636a071dd13dff26ecc76560cb2ba51d77643307
[platform/upstream/mesa.git] / src / gallium / drivers / zink / zink_compiler.c
1 /*
2  * Copyright 2018 Collabora Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  */
23
24 #include "nir_opcodes.h"
25 #include "zink_context.h"
26 #include "zink_compiler.h"
27 #include "zink_descriptors.h"
28 #include "zink_program.h"
29 #include "zink_screen.h"
30 #include "nir_to_spirv/nir_to_spirv.h"
31
32 #include "pipe/p_state.h"
33
34 #include "nir.h"
35 #include "nir_xfb_info.h"
36 #include "nir/nir_draw_helpers.h"
37 #include "compiler/nir/nir_builder.h"
38 #include "compiler/nir/nir_serialize.h"
39 #include "compiler/nir/nir_builtin_builder.h"
40
41 #include "nir/tgsi_to_nir.h"
42 #include "tgsi/tgsi_dump.h"
43
44 #include "util/u_memory.h"
45
46 #include "compiler/spirv/nir_spirv.h"
47 #include "vulkan/util/vk_util.h"
48
49 bool
50 zink_lower_cubemap_to_array(nir_shader *s, uint32_t nonseamless_cube_mask);
51
52
53 static void
54 copy_vars(nir_builder *b, nir_deref_instr *dst, nir_deref_instr *src)
55 {
56    assert(glsl_get_bare_type(dst->type) == glsl_get_bare_type(src->type));
57    if (glsl_type_is_struct_or_ifc(dst->type)) {
58       for (unsigned i = 0; i < glsl_get_length(dst->type); ++i) {
59          copy_vars(b, nir_build_deref_struct(b, dst, i), nir_build_deref_struct(b, src, i));
60       }
61    } else if (glsl_type_is_array_or_matrix(dst->type)) {
62       unsigned count = glsl_type_is_array(dst->type) ? glsl_array_size(dst->type) : glsl_get_matrix_columns(dst->type);
63       for (unsigned i = 0; i < count; i++) {
64          copy_vars(b, nir_build_deref_array_imm(b, dst, i), nir_build_deref_array_imm(b, src, i));
65       }
66    } else {
67       nir_ssa_def *load = nir_load_deref(b, src);
68       nir_store_deref(b, dst, load, BITFIELD_MASK(load->num_components));
69    }
70 }
71
72 #define SIZEOF_FIELD(type, field) sizeof(((type *)0)->field)
73
74 static void
75 create_gfx_pushconst(nir_shader *nir)
76 {
77 #define PUSHCONST_MEMBER(member_idx, field)                                                                     \
78 fields[member_idx].type =                                                                                       \
79    glsl_array_type(glsl_uint_type(), SIZEOF_FIELD(struct zink_gfx_push_constant, field) / sizeof(uint32_t), 0); \
80 fields[member_idx].name = ralloc_asprintf(nir, #field);                                                         \
81 fields[member_idx].offset = offsetof(struct zink_gfx_push_constant, field);
82
83    nir_variable *pushconst;
84    /* create compatible layout for the ntv push constant loader */
85    struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, ZINK_GFX_PUSHCONST_MAX);
86    PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_DRAW_MODE_IS_INDEXED, draw_mode_is_indexed);
87    PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_DRAW_ID, draw_id);
88    PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_FRAMEBUFFER_IS_LAYERED, framebuffer_is_layered);
89    PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_DEFAULT_INNER_LEVEL, default_inner_level);
90    PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_DEFAULT_OUTER_LEVEL, default_outer_level);
91    PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_LINE_STIPPLE_PATTERN, line_stipple_pattern);
92    PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_VIEWPORT_SCALE, viewport_scale);
93    PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_LINE_WIDTH, line_width);
94
95    pushconst = nir_variable_create(nir, nir_var_mem_push_const,
96                                    glsl_struct_type(fields, ZINK_GFX_PUSHCONST_MAX, "struct", false),
97                                    "gfx_pushconst");
98    pushconst->data.location = INT_MAX; //doesn't really matter
99
100 #undef PUSHCONST_MEMBER
101 }
102
103 static bool
104 lower_64bit_vertex_attribs_instr(nir_builder *b, nir_instr *instr, void *data)
105 {
106    if (instr->type != nir_instr_type_intrinsic)
107       return false;
108    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
109    if (intr->intrinsic != nir_intrinsic_load_deref)
110       return false;
111    nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(intr->src[0].ssa->parent_instr));
112    if (var->data.mode != nir_var_shader_in)
113       return false;
114    if (!glsl_type_is_64bit(var->type) || !glsl_type_is_vector(var->type) || glsl_get_vector_elements(var->type) < 3)
115       return false;
116
117    /* create second variable for the split */
118    nir_variable *var2 = nir_variable_clone(var, b->shader);
119    /* split new variable into second slot */
120    var2->data.driver_location++;
121    nir_shader_add_variable(b->shader, var2);
122
123    unsigned total_num_components = glsl_get_vector_elements(var->type);
124    /* new variable is the second half of the dvec */
125    var2->type = glsl_vector_type(glsl_get_base_type(var->type), glsl_get_vector_elements(var->type) - 2);
126    /* clamp original variable to a dvec2 */
127    var->type = glsl_vector_type(glsl_get_base_type(var->type), 2);
128
129    b->cursor = nir_after_instr(instr);
130
131    /* this is the first load instruction for the first half of the dvec3/4 components */
132    nir_ssa_def *load = nir_load_var(b, var);
133    /* this is the second load instruction for the second half of the dvec3/4 components */
134    nir_ssa_def *load2 = nir_load_var(b, var2);
135
136    nir_ssa_def *def[4];
137    /* create a new dvec3/4 comprised of all the loaded components from both variables */
138    def[0] = nir_vector_extract(b, load, nir_imm_int(b, 0));
139    def[1] = nir_vector_extract(b, load, nir_imm_int(b, 1));
140    def[2] = nir_vector_extract(b, load2, nir_imm_int(b, 0));
141    if (total_num_components == 4)
142       def[3] = nir_vector_extract(b, load2, nir_imm_int(b, 1));
143    nir_ssa_def *new_vec = nir_vec(b, def, total_num_components);
144    /* use the assembled dvec3/4 for all other uses of the load */
145    nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, new_vec,
146                                   new_vec->parent_instr);
147
148    /* remove the original instr and its deref chain */
149    nir_instr *parent = intr->src[0].ssa->parent_instr;
150    nir_instr_remove(instr);
151    nir_deref_instr_remove_if_unused(nir_instr_as_deref(parent));
152
153    return true;
154 }
155
156 /* mesa/gallium always provides UINT versions of 64bit formats:
157  * - rewrite loads as 32bit vec loads
158  * - cast back to 64bit
159  */
160 static bool
161 lower_64bit_uint_attribs_instr(nir_builder *b, nir_instr *instr, void *data)
162 {
163    if (instr->type != nir_instr_type_intrinsic)
164       return false;
165    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
166    if (intr->intrinsic != nir_intrinsic_load_deref)
167       return false;
168    nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(intr->src[0].ssa->parent_instr));
169    if (var->data.mode != nir_var_shader_in)
170       return false;
171    if (glsl_get_bit_size(var->type) != 64 || glsl_get_base_type(var->type) >= GLSL_TYPE_SAMPLER)
172       return false;
173
174    unsigned num_components = glsl_get_vector_elements(var->type);
175    enum glsl_base_type base_type;
176    switch (glsl_get_base_type(var->type)) {
177    case GLSL_TYPE_UINT64:
178       base_type = GLSL_TYPE_UINT;
179       break;
180    case GLSL_TYPE_INT64:
181       base_type = GLSL_TYPE_INT;
182       break;
183    case GLSL_TYPE_DOUBLE:
184       base_type = GLSL_TYPE_FLOAT;
185       break;
186    default:
187       unreachable("unknown 64-bit vertex attribute format!");
188    }
189    var->type = glsl_vector_type(base_type, num_components * 2);
190
191    b->cursor = nir_after_instr(instr);
192
193    nir_ssa_def *load = nir_load_var(b, var);
194    nir_ssa_def *casted[2];
195    for (unsigned i = 0; i < num_components; i++)
196      casted[i] = nir_pack_64_2x32(b, nir_channels(b, load, BITFIELD_RANGE(i * 2, 2)));
197    nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_vec(b, casted, num_components));
198
199    /* remove the original instr and its deref chain */
200    nir_instr *parent = intr->src[0].ssa->parent_instr;
201    nir_instr_remove(instr);
202    nir_deref_instr_remove_if_unused(nir_instr_as_deref(parent));
203
204    return true;
205 }
206
207 /* "64-bit three- and four-component vectors consume two consecutive locations."
208  *  - 14.1.4. Location Assignment
209  *
210  * this pass splits dvec3 and dvec4 vertex inputs into a dvec2 and a double/dvec2 which
211  * are assigned to consecutive locations, loaded separately, and then assembled back into a
212  * composite value that's used in place of the original loaded ssa src
213  */
214 static bool
215 lower_64bit_vertex_attribs(nir_shader *shader)
216 {
217    if (shader->info.stage != MESA_SHADER_VERTEX)
218       return false;
219
220    bool progress = nir_shader_instructions_pass(shader, lower_64bit_vertex_attribs_instr, nir_metadata_dominance, NULL);
221    progress |= nir_shader_instructions_pass(shader, lower_64bit_uint_attribs_instr, nir_metadata_dominance, NULL);
222    return progress;
223 }
224
225 static bool
226 lower_basevertex_instr(nir_builder *b, nir_instr *in, void *data)
227 {
228    if (in->type != nir_instr_type_intrinsic)
229       return false;
230    nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
231    if (instr->intrinsic != nir_intrinsic_load_base_vertex)
232       return false;
233
234    b->cursor = nir_after_instr(&instr->instr);
235    nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant);
236    load->src[0] = nir_src_for_ssa(nir_imm_int(b, ZINK_GFX_PUSHCONST_DRAW_MODE_IS_INDEXED));
237    nir_intrinsic_set_range(load, 4);
238    load->num_components = 1;
239    nir_ssa_dest_init(&load->instr, &load->dest, 1, 32);
240    nir_builder_instr_insert(b, &load->instr);
241
242    nir_ssa_def *composite = nir_build_alu(b, nir_op_bcsel,
243                                           nir_build_alu(b, nir_op_ieq, &load->dest.ssa, nir_imm_int(b, 1), NULL, NULL),
244                                           &instr->dest.ssa,
245                                           nir_imm_int(b, 0),
246                                           NULL);
247
248    nir_ssa_def_rewrite_uses_after(&instr->dest.ssa, composite,
249                                   composite->parent_instr);
250    return true;
251 }
252
253 static bool
254 lower_basevertex(nir_shader *shader)
255 {
256    if (shader->info.stage != MESA_SHADER_VERTEX)
257       return false;
258
259    if (!BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX))
260       return false;
261
262    return nir_shader_instructions_pass(shader, lower_basevertex_instr, nir_metadata_dominance, NULL);
263 }
264
265
266 static bool
267 lower_drawid_instr(nir_builder *b, nir_instr *in, void *data)
268 {
269    if (in->type != nir_instr_type_intrinsic)
270       return false;
271    nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
272    if (instr->intrinsic != nir_intrinsic_load_draw_id)
273       return false;
274
275    b->cursor = nir_before_instr(&instr->instr);
276    nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant);
277    load->src[0] = nir_src_for_ssa(nir_imm_int(b, ZINK_GFX_PUSHCONST_DRAW_ID));
278    nir_intrinsic_set_range(load, 4);
279    load->num_components = 1;
280    nir_ssa_dest_init(&load->instr, &load->dest, 1, 32);
281    nir_builder_instr_insert(b, &load->instr);
282
283    nir_ssa_def_rewrite_uses(&instr->dest.ssa, &load->dest.ssa);
284
285    return true;
286 }
287
288 static bool
289 lower_drawid(nir_shader *shader)
290 {
291    if (shader->info.stage != MESA_SHADER_VERTEX)
292       return false;
293
294    if (!BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_DRAW_ID))
295       return false;
296
297    return nir_shader_instructions_pass(shader, lower_drawid_instr, nir_metadata_dominance, NULL);
298 }
299
300 struct lower_gl_point_state {
301    nir_variable *gl_pos_out;
302    nir_variable *gl_point_size;
303 };
304
305 static bool
306 lower_gl_point_gs_instr(nir_builder *b, nir_instr *instr, void *data)
307 {
308    struct lower_gl_point_state *state = data;
309    nir_ssa_def *vp_scale, *pos;
310
311    if (instr->type != nir_instr_type_intrinsic)
312       return false;
313
314    nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
315    if (intrin->intrinsic != nir_intrinsic_emit_vertex_with_counter &&
316        intrin->intrinsic != nir_intrinsic_emit_vertex)
317       return false;
318
319    if (nir_intrinsic_stream_id(intrin) != 0)
320       return false;
321
322    if (intrin->intrinsic == nir_intrinsic_end_primitive_with_counter ||
323          intrin->intrinsic == nir_intrinsic_end_primitive) {
324       nir_instr_remove(&intrin->instr);
325       return true;
326    }
327
328    b->cursor = nir_before_instr(instr);
329
330    // viewport-map endpoints
331    nir_ssa_def *vp_const_pos = nir_imm_int(b, ZINK_GFX_PUSHCONST_VIEWPORT_SCALE);
332    vp_scale = nir_load_push_constant(b, 2, 32, vp_const_pos, .base = 1, .range = 2);
333
334    // Load point info values
335    nir_ssa_def *point_size = nir_load_var(b, state->gl_point_size);
336    nir_ssa_def *point_pos = nir_load_var(b, state->gl_pos_out);
337
338    // w_delta = gl_point_size / width_viewport_size_scale * gl_Position.w
339    nir_ssa_def *w_delta = nir_fdiv(b, point_size, nir_channel(b, vp_scale, 0));
340    w_delta = nir_fmul(b, w_delta, nir_channel(b, point_pos, 3));
341    // halt_w_delta = w_delta / 2
342    nir_ssa_def *half_w_delta = nir_fmul_imm(b, w_delta, 0.5);
343
344    // h_delta = gl_point_size / height_viewport_size_scale * gl_Position.w
345    nir_ssa_def *h_delta = nir_fdiv(b, point_size, nir_channel(b, vp_scale, 1));
346    h_delta = nir_fmul(b, h_delta, nir_channel(b, point_pos, 3));
347    // halt_h_delta = h_delta / 2
348    nir_ssa_def *half_h_delta = nir_fmul_imm(b, h_delta, 0.5);
349
350    nir_ssa_def *point_dir[4][2] = {
351       { nir_imm_float(b, -1), nir_imm_float(b, -1) },
352       { nir_imm_float(b, -1), nir_imm_float(b, 1) },
353       { nir_imm_float(b, 1), nir_imm_float(b, -1) },
354       { nir_imm_float(b, 1), nir_imm_float(b, 1) }
355    };
356
357    nir_ssa_def *point_pos_x = nir_channel(b, point_pos, 0);
358    nir_ssa_def *point_pos_y = nir_channel(b, point_pos, 1);
359
360    for (size_t i = 0; i < 4; i++) {
361       pos = nir_vec4(b,
362                      nir_ffma(b, half_w_delta, point_dir[i][0], point_pos_x),
363                      nir_ffma(b, half_h_delta, point_dir[i][1], point_pos_y),
364                      nir_channel(b, point_pos, 2),
365                      nir_channel(b, point_pos, 3));
366
367       nir_store_var(b, state->gl_pos_out, pos, 0xf);
368
369       nir_emit_vertex(b);
370    }
371
372    nir_end_primitive(b);
373
374    nir_instr_remove(&intrin->instr);
375
376    return true;
377 }
378
379 static bool
380 lower_gl_point_gs(nir_shader *shader)
381 {
382    struct lower_gl_point_state state;
383
384    shader->info.gs.output_primitive = MESA_PRIM_TRIANGLE_STRIP;
385    shader->info.gs.vertices_out *= 4;
386
387    // Gets the gl_Position in and out
388    state.gl_pos_out =
389       nir_find_variable_with_location(shader, nir_var_shader_out,
390                                       VARYING_SLOT_POS);
391    state.gl_point_size =
392       nir_find_variable_with_location(shader, nir_var_shader_out,
393                                       VARYING_SLOT_PSIZ);
394
395    // if position in or gl_PointSize aren't written, we have nothing to do
396    if (!state.gl_pos_out || !state.gl_point_size)
397       return false;
398
399    return nir_shader_instructions_pass(shader, lower_gl_point_gs_instr,
400                                        nir_metadata_dominance, &state);
401 }
402
403 struct lower_pv_mode_state {
404    nir_variable *varyings[VARYING_SLOT_MAX][4];
405    nir_variable *pos_counter;
406    nir_variable *out_pos_counter;
407    nir_variable *ring_offset;
408    unsigned ring_size;
409    unsigned primitive_vert_count;
410    unsigned prim;
411 };
412
413 static nir_ssa_def*
414 lower_pv_mode_gs_ring_index(nir_builder *b,
415                             struct lower_pv_mode_state *state,
416                             nir_ssa_def *index)
417 {
418    nir_ssa_def *ring_offset = nir_load_var(b, state->ring_offset);
419    return nir_imod_imm(b, nir_iadd(b, index, ring_offset),
420                           state->ring_size);
421 }
422
423 /* Given the final deref of chain of derefs this function will walk up the chain
424  * until it finds a var deref.
425  *
426  * It will then recreate an identical chain that ends with the provided deref.
427  */
428 static nir_deref_instr*
429 replicate_derefs(nir_builder *b, nir_deref_instr *old, nir_deref_instr *new)
430 {
431    nir_deref_instr *parent = nir_src_as_deref(old->parent);
432    switch(old->deref_type) {
433    case nir_deref_type_var:
434       return new;
435    case nir_deref_type_array:
436       return nir_build_deref_array(b, replicate_derefs(b, parent, new), old->arr.index.ssa);
437    case nir_deref_type_struct:
438       return nir_build_deref_struct(b, replicate_derefs(b, parent, new), old->strct.index);
439    case nir_deref_type_array_wildcard:
440    case nir_deref_type_ptr_as_array:
441    case nir_deref_type_cast:
442       unreachable("unexpected deref type");
443    }
444    unreachable("impossible deref type");
445 }
446
447 static bool
448 lower_pv_mode_gs_store(nir_builder *b,
449                        nir_intrinsic_instr *intrin,
450                        struct lower_pv_mode_state *state)
451 {
452    b->cursor = nir_before_instr(&intrin->instr);
453    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
454    if (nir_deref_mode_is(deref, nir_var_shader_out)) {
455       nir_variable *var = nir_deref_instr_get_variable(deref);
456
457       gl_varying_slot location = var->data.location;
458       unsigned location_frac = var->data.location_frac;
459       assert(state->varyings[location][location_frac]);
460       nir_ssa_def *pos_counter = nir_load_var(b, state->pos_counter);
461       nir_ssa_def *index = lower_pv_mode_gs_ring_index(b, state, pos_counter);
462       nir_deref_instr *varying_deref = nir_build_deref_var(b, state->varyings[location][location_frac]);
463       nir_deref_instr *ring_deref = nir_build_deref_array(b, varying_deref, index);
464       // recreate the chain of deref that lead to the store.
465       nir_deref_instr *new_top_deref = replicate_derefs(b, deref, ring_deref);
466       nir_store_deref(b, new_top_deref, intrin->src[1].ssa, nir_intrinsic_write_mask(intrin));
467       nir_instr_remove(&intrin->instr);
468       return true;
469    }
470
471    return false;
472 }
473
474 static void
475 lower_pv_mode_emit_rotated_prim(nir_builder *b,
476                                 struct lower_pv_mode_state *state,
477                                 nir_ssa_def *current_vertex)
478 {
479    nir_ssa_def *two = nir_imm_int(b, 2);
480    nir_ssa_def *three = nir_imm_int(b, 3);
481    bool is_triangle = state->primitive_vert_count == 3;
482    /* This shader will always see the last three vertices emitted by the user gs.
483     * The following table is used to to rotate primitives within a strip generated
484     * by the user gs such that the last vertex becomes the first.
485     *
486     * [lines, tris][even/odd index][vertex mod 3]
487     */
488    static const unsigned vert_maps[2][2][3] = {
489       {{1, 0, 0}, {1, 0, 0}},
490       {{2, 0, 1}, {2, 1, 0}}
491    };
492    /* When the primive supplied to the gs comes from a strip, the last provoking vertex
493     * is either the last or the second, depending on whether the triangle is at an odd
494     * or even position within the strip.
495     *
496     * odd or even primitive within draw
497     */
498    nir_ssa_def *odd_prim = nir_imod(b, nir_load_primitive_id(b), two);
499    for (unsigned i = 0; i < state->primitive_vert_count; i++) {
500       /* odd or even triangle within strip emitted by user GS
501        * this is handled using the table
502        */
503       nir_ssa_def *odd_user_prim = nir_imod(b, current_vertex, two);
504       unsigned offset_even = vert_maps[is_triangle][0][i];
505       unsigned offset_odd = vert_maps[is_triangle][1][i];
506       nir_ssa_def *offset_even_value = nir_imm_int(b, offset_even);
507       nir_ssa_def *offset_odd_value = nir_imm_int(b, offset_odd);
508       nir_ssa_def *rotated_i = nir_bcsel(b, nir_b2b1(b, odd_user_prim),
509                                             offset_odd_value, offset_even_value);
510       /* Here we account for how triangles are provided to the gs from a strip.
511        * For even primitives we rotate by 3, meaning we do nothing.
512        * For odd primitives we rotate by 2, combined with the previous rotation this
513        * means the second vertex becomes the last.
514        */
515       if (state->prim == ZINK_PVE_PRIMITIVE_TRISTRIP)
516         rotated_i = nir_imod(b, nir_iadd(b, rotated_i,
517                                             nir_isub(b, three,
518                                                         odd_prim)),
519                                             three);
520       /* Triangles that come from fans are provided to the gs the same way as
521        * odd triangles from a strip so always rotate by 2.
522        */
523       else if (state->prim == ZINK_PVE_PRIMITIVE_FAN)
524         rotated_i = nir_imod(b, nir_iadd_imm(b, rotated_i, 2),
525                                 three);
526       rotated_i = nir_iadd(b, rotated_i, current_vertex);
527       nir_foreach_variable_with_modes(var, b->shader, nir_var_shader_out) {
528          gl_varying_slot location = var->data.location;
529          unsigned location_frac = var->data.location_frac;
530          if (state->varyings[location][location_frac]) {
531             nir_ssa_def *index = lower_pv_mode_gs_ring_index(b, state, rotated_i);
532             nir_deref_instr *value = nir_build_deref_array(b, nir_build_deref_var(b, state->varyings[location][location_frac]), index);
533             copy_vars(b, nir_build_deref_var(b, var), value);
534          }
535       }
536       nir_emit_vertex(b);
537    }
538 }
539
540 static bool
541 lower_pv_mode_gs_emit_vertex(nir_builder *b,
542                              nir_intrinsic_instr *intrin,
543                              struct lower_pv_mode_state *state)
544 {
545    b->cursor = nir_before_instr(&intrin->instr);
546
547    // increment pos_counter
548    nir_ssa_def *pos_counter = nir_load_var(b, state->pos_counter);
549    nir_store_var(b, state->pos_counter, nir_iadd_imm(b, pos_counter, 1), 1);
550
551    nir_instr_remove(&intrin->instr);
552    return true;
553 }
554
555 static bool
556 lower_pv_mode_gs_end_primitive(nir_builder *b,
557                                nir_intrinsic_instr *intrin,
558                                struct lower_pv_mode_state *state)
559 {
560    b->cursor = nir_before_instr(&intrin->instr);
561
562    nir_ssa_def *pos_counter = nir_load_var(b, state->pos_counter);
563    nir_push_loop(b);
564    {
565       nir_ssa_def *out_pos_counter = nir_load_var(b, state->out_pos_counter);
566       nir_push_if(b, nir_ilt(b, nir_isub(b, pos_counter, out_pos_counter),
567                                 nir_imm_int(b, state->primitive_vert_count)));
568       nir_jump(b, nir_jump_break);
569       nir_pop_if(b, NULL);
570
571       lower_pv_mode_emit_rotated_prim(b, state, out_pos_counter);
572       nir_end_primitive(b);
573
574       nir_store_var(b, state->out_pos_counter, nir_iadd_imm(b, out_pos_counter, 1), 1);
575    }
576    nir_pop_loop(b, NULL);
577    /* Set the ring offset such that when position 0 is
578     * read we get the last value written
579     */
580    nir_store_var(b, state->ring_offset, pos_counter, 1);
581    nir_store_var(b, state->pos_counter, nir_imm_int(b, 0), 1);
582    nir_store_var(b, state->out_pos_counter, nir_imm_int(b, 0), 1);
583
584    nir_instr_remove(&intrin->instr);
585    return true;
586 }
587
588 static bool
589 lower_pv_mode_gs_instr(nir_builder *b, nir_instr *instr, void *data)
590 {
591    if (instr->type != nir_instr_type_intrinsic)
592       return false;
593
594    struct lower_pv_mode_state *state = data;
595    nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
596
597    switch (intrin->intrinsic) {
598    case nir_intrinsic_store_deref:
599       return lower_pv_mode_gs_store(b, intrin, state);
600    case nir_intrinsic_copy_deref:
601       unreachable("should be lowered");
602    case nir_intrinsic_emit_vertex_with_counter:
603    case nir_intrinsic_emit_vertex:
604       return lower_pv_mode_gs_emit_vertex(b, intrin, state);
605    case nir_intrinsic_end_primitive:
606    case nir_intrinsic_end_primitive_with_counter:
607       return lower_pv_mode_gs_end_primitive(b, intrin, state);
608    default:
609       return false;
610    }
611 }
612
613 static unsigned int
614 lower_pv_mode_vertices_for_prim(enum mesa_prim prim)
615 {
616    switch (prim) {
617    case MESA_PRIM_POINTS:
618       return 1;
619    case MESA_PRIM_LINE_STRIP:
620       return 2;
621    case MESA_PRIM_TRIANGLE_STRIP:
622       return 3;
623    default:
624       unreachable("unsupported primitive for gs output");
625    }
626 }
627
628 static bool
629 lower_pv_mode_gs(nir_shader *shader, unsigned prim)
630 {
631    nir_builder b;
632    struct lower_pv_mode_state state;
633    memset(state.varyings, 0, sizeof(state.varyings));
634
635    nir_function_impl *entry = nir_shader_get_entrypoint(shader);
636    b = nir_builder_at(nir_before_cf_list(&entry->body));
637
638    state.primitive_vert_count =
639       lower_pv_mode_vertices_for_prim(shader->info.gs.output_primitive);
640    state.ring_size = shader->info.gs.vertices_out;
641
642    nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) {
643       gl_varying_slot location = var->data.location;
644       unsigned location_frac = var->data.location_frac;
645
646       char name[100];
647       snprintf(name, sizeof(name), "__tmp_primverts_%d_%d", location, location_frac);
648       state.varyings[location][location_frac] =
649          nir_local_variable_create(entry,
650                                    glsl_array_type(var->type,
651                                                    state.ring_size,
652                                                    false),
653                                    name);
654    }
655
656    state.pos_counter = nir_local_variable_create(entry,
657                                                  glsl_uint_type(),
658                                                  "__pos_counter");
659
660    state.out_pos_counter = nir_local_variable_create(entry,
661                                                      glsl_uint_type(),
662                                                      "__out_pos_counter");
663
664    state.ring_offset = nir_local_variable_create(entry,
665                                                  glsl_uint_type(),
666                                                  "__ring_offset");
667
668    state.prim = prim;
669
670    // initialize pos_counter and out_pos_counter
671    nir_store_var(&b, state.pos_counter, nir_imm_int(&b, 0), 1);
672    nir_store_var(&b, state.out_pos_counter, nir_imm_int(&b, 0), 1);
673    nir_store_var(&b, state.ring_offset, nir_imm_int(&b, 0), 1);
674
675    shader->info.gs.vertices_out = (shader->info.gs.vertices_out -
676                                    (state.primitive_vert_count - 1)) *
677                                   state.primitive_vert_count;
678    return nir_shader_instructions_pass(shader, lower_pv_mode_gs_instr,
679                                        nir_metadata_dominance, &state);
680 }
681
682 struct lower_line_stipple_state {
683    nir_variable *pos_out;
684    nir_variable *stipple_out;
685    nir_variable *prev_pos;
686    nir_variable *pos_counter;
687    nir_variable *stipple_counter;
688    bool line_rectangular;
689 };
690
691 static nir_ssa_def *
692 viewport_map(nir_builder *b, nir_ssa_def *vert,
693              nir_ssa_def *scale)
694 {
695    nir_ssa_def *w_recip = nir_frcp(b, nir_channel(b, vert, 3));
696    nir_ssa_def *ndc_point = nir_fmul(b, nir_trim_vector(b, vert, 2),
697                                         w_recip);
698    return nir_fmul(b, ndc_point, scale);
699 }
700
701 static bool
702 lower_line_stipple_gs_instr(nir_builder *b, nir_instr *instr, void *data)
703 {
704    struct lower_line_stipple_state *state = data;
705    if (instr->type != nir_instr_type_intrinsic)
706       return false;
707
708    nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
709    if (intrin->intrinsic != nir_intrinsic_emit_vertex_with_counter &&
710        intrin->intrinsic != nir_intrinsic_emit_vertex)
711       return false;
712
713    b->cursor = nir_before_instr(instr);
714
715    nir_push_if(b, nir_ine_imm(b, nir_load_var(b, state->pos_counter), 0));
716    // viewport-map endpoints
717    nir_ssa_def *vp_scale = nir_load_push_constant(b, 2, 32,
718                                                   nir_imm_int(b, ZINK_GFX_PUSHCONST_VIEWPORT_SCALE),
719                                                   .base = 1,
720                                                   .range = 2);
721    nir_ssa_def *prev = nir_load_var(b, state->prev_pos);
722    nir_ssa_def *curr = nir_load_var(b, state->pos_out);
723    prev = viewport_map(b, prev, vp_scale);
724    curr = viewport_map(b, curr, vp_scale);
725
726    // calculate length of line
727    nir_ssa_def *len;
728    if (state->line_rectangular)
729       len = nir_fast_distance(b, prev, curr);
730    else {
731       nir_ssa_def *diff = nir_fabs(b, nir_fsub(b, prev, curr));
732       len = nir_fmax(b, nir_channel(b, diff, 0), nir_channel(b, diff, 1));
733    }
734    // update stipple_counter
735    nir_store_var(b, state->stipple_counter,
736                     nir_fadd(b, nir_load_var(b, state->stipple_counter),
737                                 len), 1);
738    nir_pop_if(b, NULL);
739    // emit stipple out
740    nir_copy_var(b, state->stipple_out, state->stipple_counter);
741    nir_copy_var(b, state->prev_pos, state->pos_out);
742
743    // update prev_pos and pos_counter for next vertex
744    b->cursor = nir_after_instr(instr);
745    nir_store_var(b, state->pos_counter,
746                     nir_iadd_imm(b, nir_load_var(b, state->pos_counter),
747                                     1), 1);
748
749    return true;
750 }
751
752 static bool
753 lower_line_stipple_gs(nir_shader *shader, bool line_rectangular)
754 {
755    nir_builder b;
756    struct lower_line_stipple_state state;
757
758    state.pos_out =
759       nir_find_variable_with_location(shader, nir_var_shader_out,
760                                       VARYING_SLOT_POS);
761
762    // if position isn't written, we have nothing to do
763    if (!state.pos_out)
764       return false;
765
766    state.stipple_out = nir_variable_create(shader, nir_var_shader_out,
767                                            glsl_float_type(),
768                                            "__stipple");
769    state.stipple_out->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
770    state.stipple_out->data.driver_location = shader->num_outputs++;
771    state.stipple_out->data.location = MAX2(util_last_bit64(shader->info.outputs_written), VARYING_SLOT_VAR0);
772    shader->info.outputs_written |= BITFIELD64_BIT(state.stipple_out->data.location);
773
774    // create temp variables
775    state.prev_pos = nir_variable_create(shader, nir_var_shader_temp,
776                                         glsl_vec4_type(),
777                                         "__prev_pos");
778    state.pos_counter = nir_variable_create(shader, nir_var_shader_temp,
779                                            glsl_uint_type(),
780                                            "__pos_counter");
781    state.stipple_counter = nir_variable_create(shader, nir_var_shader_temp,
782                                                glsl_float_type(),
783                                                "__stipple_counter");
784
785    state.line_rectangular = line_rectangular;
786    // initialize pos_counter and stipple_counter
787    nir_function_impl *entry = nir_shader_get_entrypoint(shader);
788    b = nir_builder_at(nir_before_cf_list(&entry->body));
789    nir_store_var(&b, state.pos_counter, nir_imm_int(&b, 0), 1);
790    nir_store_var(&b, state.stipple_counter, nir_imm_float(&b, 0), 1);
791
792    return nir_shader_instructions_pass(shader, lower_line_stipple_gs_instr,
793                                        nir_metadata_dominance, &state);
794 }
795
796 static bool
797 lower_line_stipple_fs(nir_shader *shader)
798 {
799    nir_builder b;
800    nir_function_impl *entry = nir_shader_get_entrypoint(shader);
801    b = nir_builder_at(nir_after_cf_list(&entry->body));
802
803    // create stipple counter
804    nir_variable *stipple = nir_variable_create(shader, nir_var_shader_in,
805                                                glsl_float_type(),
806                                                "__stipple");
807    stipple->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
808    stipple->data.driver_location = shader->num_inputs++;
809    stipple->data.location = MAX2(util_last_bit64(shader->info.inputs_read), VARYING_SLOT_VAR0);
810    shader->info.inputs_read |= BITFIELD64_BIT(stipple->data.location);
811
812    nir_variable *sample_mask_out =
813       nir_find_variable_with_location(shader, nir_var_shader_out,
814                                       FRAG_RESULT_SAMPLE_MASK);
815    if (!sample_mask_out) {
816       sample_mask_out = nir_variable_create(shader, nir_var_shader_out,
817                                         glsl_uint_type(), "sample_mask");
818       sample_mask_out->data.driver_location = shader->num_outputs++;
819       sample_mask_out->data.location = FRAG_RESULT_SAMPLE_MASK;
820    }
821
822    nir_ssa_def *pattern = nir_load_push_constant(&b, 1, 32,
823                                                  nir_imm_int(&b, ZINK_GFX_PUSHCONST_LINE_STIPPLE_PATTERN),
824                                                  .base = 1);
825    nir_ssa_def *factor = nir_i2f32(&b, nir_ishr_imm(&b, pattern, 16));
826    pattern = nir_iand_imm(&b, pattern, 0xffff);
827
828    nir_ssa_def *sample_mask_in = nir_load_sample_mask_in(&b);
829    nir_variable *v = nir_local_variable_create(entry, glsl_uint_type(), NULL);
830    nir_variable *sample_mask = nir_local_variable_create(entry, glsl_uint_type(), NULL);
831    nir_store_var(&b, v, sample_mask_in, 1);
832    nir_store_var(&b, sample_mask, sample_mask_in, 1);
833    nir_push_loop(&b);
834    {
835       nir_ssa_def *value = nir_load_var(&b, v);
836       nir_ssa_def *index = nir_ufind_msb(&b, value);
837       nir_ssa_def *index_mask = nir_ishl(&b, nir_imm_int(&b, 1), index);
838       nir_ssa_def *new_value = nir_ixor(&b, value, index_mask);
839       nir_store_var(&b, v, new_value,  1);
840       nir_push_if(&b, nir_ieq_imm(&b, value, 0));
841       nir_jump(&b, nir_jump_break);
842       nir_pop_if(&b, NULL);
843
844       nir_ssa_def *stipple_pos =
845          nir_interp_deref_at_sample(&b, 1, 32,
846             &nir_build_deref_var(&b, stipple)->dest.ssa, index);
847       stipple_pos = nir_fmod(&b, nir_fdiv(&b, stipple_pos, factor),
848                                  nir_imm_float(&b, 16.0));
849       stipple_pos = nir_f2i32(&b, stipple_pos);
850       nir_ssa_def *bit =
851          nir_iand_imm(&b, nir_ishr(&b, pattern, stipple_pos), 1);
852       nir_push_if(&b, nir_ieq_imm(&b, bit, 0));
853       {
854          nir_ssa_def *value = nir_load_var(&b, sample_mask);
855          value = nir_ixor(&b, value, index_mask);
856          nir_store_var(&b, sample_mask, value, 1);
857       }
858       nir_pop_if(&b, NULL);
859    }
860    nir_pop_loop(&b, NULL);
861    nir_store_var(&b, sample_mask_out, nir_load_var(&b, sample_mask), 1);
862
863    return true;
864 }
865
866 struct lower_line_smooth_state {
867    nir_variable *pos_out;
868    nir_variable *line_coord_out;
869    nir_variable *prev_pos;
870    nir_variable *pos_counter;
871    nir_variable *prev_varyings[VARYING_SLOT_MAX][4],
872                 *varyings[VARYING_SLOT_MAX][4]; // location_frac
873 };
874
875 static bool
876 lower_line_smooth_gs_store(nir_builder *b,
877                            nir_intrinsic_instr *intrin,
878                            struct lower_line_smooth_state *state)
879 {
880    b->cursor = nir_before_instr(&intrin->instr);
881    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
882    if (nir_deref_mode_is(deref, nir_var_shader_out)) {
883       nir_variable *var = nir_deref_instr_get_variable(deref);
884
885       // we take care of position elsewhere
886       gl_varying_slot location = var->data.location;
887       unsigned location_frac = var->data.location_frac;
888       if (location != VARYING_SLOT_POS) {
889          assert(state->varyings[location]);
890          nir_store_var(b, state->varyings[location][location_frac],
891                        intrin->src[1].ssa,
892                        nir_intrinsic_write_mask(intrin));
893          nir_instr_remove(&intrin->instr);
894          return true;
895       }
896    }
897
898    return false;
899 }
900
901 static bool
902 lower_line_smooth_gs_emit_vertex(nir_builder *b,
903                                  nir_intrinsic_instr *intrin,
904                                  struct lower_line_smooth_state *state)
905 {
906    b->cursor = nir_before_instr(&intrin->instr);
907
908    nir_push_if(b, nir_ine_imm(b, nir_load_var(b, state->pos_counter), 0));
909    nir_ssa_def *vp_scale = nir_load_push_constant(b, 2, 32,
910                                                   nir_imm_int(b, ZINK_GFX_PUSHCONST_VIEWPORT_SCALE),
911                                                   .base = 1,
912                                                   .range = 2);
913    nir_ssa_def *prev = nir_load_var(b, state->prev_pos);
914    nir_ssa_def *curr = nir_load_var(b, state->pos_out);
915    nir_ssa_def *prev_vp = viewport_map(b, prev, vp_scale);
916    nir_ssa_def *curr_vp = viewport_map(b, curr, vp_scale);
917
918    nir_ssa_def *width = nir_load_push_constant(b, 1, 32,
919                                                nir_imm_int(b, ZINK_GFX_PUSHCONST_LINE_WIDTH),
920                                                .base = 1);
921    nir_ssa_def *half_width = nir_fadd_imm(b, nir_fmul_imm(b, width, 0.5), 0.5);
922
923    const unsigned yx[2] = { 1, 0 };
924    nir_ssa_def *vec = nir_fsub(b, curr_vp, prev_vp);
925    nir_ssa_def *len = nir_fast_length(b, vec);
926    nir_ssa_def *dir = nir_normalize(b, vec);
927    nir_ssa_def *half_length = nir_fmul_imm(b, len, 0.5);
928    half_length = nir_fadd_imm(b, half_length, 0.5);
929
930    nir_ssa_def *vp_scale_rcp = nir_frcp(b, vp_scale);
931    nir_ssa_def *tangent =
932       nir_fmul(b,
933                nir_fmul(b,
934                         nir_swizzle(b, dir, yx, 2),
935                         nir_imm_vec2(b, 1.0, -1.0)),
936                vp_scale_rcp);
937    tangent = nir_fmul(b, tangent, half_width);
938    tangent = nir_pad_vector_imm_int(b, tangent, 0, 4);
939    dir = nir_fmul_imm(b, nir_fmul(b, dir, vp_scale_rcp), 0.5);
940
941    nir_ssa_def *line_offets[8] = {
942       nir_fadd(b, tangent, nir_fneg(b, dir)),
943       nir_fadd(b, nir_fneg(b, tangent), nir_fneg(b, dir)),
944       tangent,
945       nir_fneg(b, tangent),
946       tangent,
947       nir_fneg(b, tangent),
948       nir_fadd(b, tangent, dir),
949       nir_fadd(b, nir_fneg(b, tangent), dir),
950    };
951    nir_ssa_def *line_coord =
952       nir_vec4(b, half_width, half_width, half_length, half_length);
953    nir_ssa_def *line_coords[8] = {
954       nir_fmul(b, line_coord, nir_imm_vec4(b, -1,  1,  -1,  1)),
955       nir_fmul(b, line_coord, nir_imm_vec4(b,  1,  1,  -1,  1)),
956       nir_fmul(b, line_coord, nir_imm_vec4(b, -1,  1,   0,  1)),
957       nir_fmul(b, line_coord, nir_imm_vec4(b,  1,  1,   0,  1)),
958       nir_fmul(b, line_coord, nir_imm_vec4(b, -1,  1,   0,  1)),
959       nir_fmul(b, line_coord, nir_imm_vec4(b,  1,  1,   0,  1)),
960       nir_fmul(b, line_coord, nir_imm_vec4(b, -1,  1,   1,  1)),
961       nir_fmul(b, line_coord, nir_imm_vec4(b,  1,  1,   1,  1)),
962    };
963
964    /* emit first end-cap, and start line */
965    for (int i = 0; i < 4; ++i) {
966       nir_foreach_variable_with_modes(var, b->shader, nir_var_shader_out) {
967          gl_varying_slot location = var->data.location;
968          unsigned location_frac = var->data.location_frac;
969          if (state->prev_varyings[location][location_frac])
970             nir_copy_var(b, var, state->prev_varyings[location][location_frac]);
971       }
972       nir_store_var(b, state->pos_out,
973                     nir_fadd(b, prev, nir_fmul(b, line_offets[i],
974                              nir_channel(b, prev, 3))), 0xf);
975       nir_store_var(b, state->line_coord_out, line_coords[i], 0xf);
976       nir_emit_vertex(b);
977    }
978
979    /* finish line and emit last end-cap */
980    for (int i = 4; i < 8; ++i) {
981       nir_foreach_variable_with_modes(var, b->shader, nir_var_shader_out) {
982          gl_varying_slot location = var->data.location;
983          unsigned location_frac = var->data.location_frac;
984          if (state->varyings[location][location_frac])
985             nir_copy_var(b, var, state->varyings[location][location_frac]);
986       }
987       nir_store_var(b, state->pos_out,
988                     nir_fadd(b, curr, nir_fmul(b, line_offets[i],
989                              nir_channel(b, curr, 3))), 0xf);
990       nir_store_var(b, state->line_coord_out, line_coords[i], 0xf);
991       nir_emit_vertex(b);
992    }
993    nir_end_primitive(b);
994
995    nir_pop_if(b, NULL);
996
997    nir_copy_var(b, state->prev_pos, state->pos_out);
998    nir_foreach_variable_with_modes(var, b->shader, nir_var_shader_out) {
999       gl_varying_slot location = var->data.location;
1000       unsigned location_frac = var->data.location_frac;
1001       if (state->varyings[location][location_frac])
1002          nir_copy_var(b, state->prev_varyings[location][location_frac], state->varyings[location][location_frac]);
1003    }
1004
1005    // update prev_pos and pos_counter for next vertex
1006    b->cursor = nir_after_instr(&intrin->instr);
1007    nir_store_var(b, state->pos_counter,
1008                     nir_iadd_imm(b, nir_load_var(b, state->pos_counter),
1009                                     1), 1);
1010
1011    nir_instr_remove(&intrin->instr);
1012    return true;
1013 }
1014
1015 static bool
1016 lower_line_smooth_gs_end_primitive(nir_builder *b,
1017                                    nir_intrinsic_instr *intrin,
1018                                    struct lower_line_smooth_state *state)
1019 {
1020    b->cursor = nir_before_instr(&intrin->instr);
1021
1022    // reset line counter
1023    nir_store_var(b, state->pos_counter, nir_imm_int(b, 0), 1);
1024
1025    nir_instr_remove(&intrin->instr);
1026    return true;
1027 }
1028
1029 static bool
1030 lower_line_smooth_gs_instr(nir_builder *b, nir_instr *instr, void *data)
1031 {
1032    if (instr->type != nir_instr_type_intrinsic)
1033       return false;
1034
1035    struct lower_line_smooth_state *state = data;
1036    nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1037
1038    switch (intrin->intrinsic) {
1039    case nir_intrinsic_store_deref:
1040       return lower_line_smooth_gs_store(b, intrin, state);
1041    case nir_intrinsic_copy_deref:
1042       unreachable("should be lowered");
1043    case nir_intrinsic_emit_vertex_with_counter:
1044    case nir_intrinsic_emit_vertex:
1045       return lower_line_smooth_gs_emit_vertex(b, intrin, state);
1046    case nir_intrinsic_end_primitive:
1047    case nir_intrinsic_end_primitive_with_counter:
1048       return lower_line_smooth_gs_end_primitive(b, intrin, state);
1049    default:
1050       return false;
1051    }
1052 }
1053
1054 static bool
1055 lower_line_smooth_gs(nir_shader *shader)
1056 {
1057    nir_builder b;
1058    struct lower_line_smooth_state state;
1059
1060    memset(state.varyings, 0, sizeof(state.varyings));
1061    memset(state.prev_varyings, 0, sizeof(state.prev_varyings));
1062    nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) {
1063       gl_varying_slot location = var->data.location;
1064       unsigned location_frac = var->data.location_frac;
1065       if (location == VARYING_SLOT_POS)
1066          continue;
1067
1068       char name[100];
1069       snprintf(name, sizeof(name), "__tmp_%d_%d", location, location_frac);
1070       state.varyings[location][location_frac] =
1071          nir_variable_create(shader, nir_var_shader_temp,
1072                               var->type, name);
1073
1074       snprintf(name, sizeof(name), "__tmp_prev_%d_%d", location, location_frac);
1075       state.prev_varyings[location][location_frac] =
1076          nir_variable_create(shader, nir_var_shader_temp,
1077                               var->type, name);
1078    }
1079
1080    state.pos_out =
1081       nir_find_variable_with_location(shader, nir_var_shader_out,
1082                                       VARYING_SLOT_POS);
1083
1084    // if position isn't written, we have nothing to do
1085    if (!state.pos_out)
1086       return false;
1087
1088    unsigned location = 0;
1089    nir_foreach_shader_in_variable(var, shader) {
1090      if (var->data.driver_location >= location)
1091          location = var->data.driver_location + 1;
1092    }
1093
1094    state.line_coord_out =
1095       nir_variable_create(shader, nir_var_shader_out, glsl_vec4_type(),
1096                           "__line_coord");
1097    state.line_coord_out->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
1098    state.line_coord_out->data.driver_location = location;
1099    state.line_coord_out->data.location = MAX2(util_last_bit64(shader->info.outputs_written), VARYING_SLOT_VAR0);
1100    shader->info.outputs_written |= BITFIELD64_BIT(state.line_coord_out->data.location);
1101    shader->num_outputs++;
1102
1103    // create temp variables
1104    state.prev_pos = nir_variable_create(shader, nir_var_shader_temp,
1105                                         glsl_vec4_type(),
1106                                         "__prev_pos");
1107    state.pos_counter = nir_variable_create(shader, nir_var_shader_temp,
1108                                            glsl_uint_type(),
1109                                            "__pos_counter");
1110
1111    // initialize pos_counter
1112    nir_function_impl *entry = nir_shader_get_entrypoint(shader);
1113    b = nir_builder_at(nir_before_cf_list(&entry->body));
1114    nir_store_var(&b, state.pos_counter, nir_imm_int(&b, 0), 1);
1115
1116    shader->info.gs.vertices_out = 8 * shader->info.gs.vertices_out;
1117    shader->info.gs.output_primitive = MESA_PRIM_TRIANGLE_STRIP;
1118
1119    return nir_shader_instructions_pass(shader, lower_line_smooth_gs_instr,
1120                                        nir_metadata_dominance, &state);
1121 }
1122
1123 static bool
1124 lower_line_smooth_fs(nir_shader *shader, bool lower_stipple)
1125 {
1126    int dummy;
1127    nir_builder b;
1128
1129    nir_variable *stipple_counter = NULL, *stipple_pattern = NULL;
1130    if (lower_stipple) {
1131       stipple_counter = nir_variable_create(shader, nir_var_shader_in,
1132                                             glsl_float_type(),
1133                                             "__stipple");
1134       stipple_counter->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
1135       stipple_counter->data.driver_location = shader->num_inputs++;
1136       stipple_counter->data.location =
1137          MAX2(util_last_bit64(shader->info.inputs_read), VARYING_SLOT_VAR0);
1138       shader->info.inputs_read |= BITFIELD64_BIT(stipple_counter->data.location);
1139
1140       stipple_pattern = nir_variable_create(shader, nir_var_shader_temp,
1141                                             glsl_uint_type(),
1142                                             "stipple_pattern");
1143
1144       // initialize stipple_pattern
1145       nir_function_impl *entry = nir_shader_get_entrypoint(shader);
1146       b = nir_builder_at(nir_before_cf_list(&entry->body));
1147       nir_ssa_def *pattern = nir_load_push_constant(&b, 1, 32,
1148                                                    nir_imm_int(&b, ZINK_GFX_PUSHCONST_LINE_STIPPLE_PATTERN),
1149                                                    .base = 1);
1150       nir_store_var(&b, stipple_pattern, pattern, 1);
1151    }
1152
1153    nir_lower_aaline_fs(shader, &dummy, stipple_counter, stipple_pattern);
1154    return true;
1155 }
1156
1157 static bool
1158 lower_dual_blend(nir_shader *shader)
1159 {
1160    bool progress = false;
1161    nir_variable *var = nir_find_variable_with_location(shader, nir_var_shader_out, FRAG_RESULT_DATA1);
1162    if (var) {
1163       var->data.location = FRAG_RESULT_DATA0;
1164       var->data.index = 1;
1165       progress = true;
1166    }
1167    nir_shader_preserve_all_metadata(shader);
1168    return progress;
1169 }
1170
1171 static bool
1172 lower_64bit_pack_instr(nir_builder *b, nir_instr *instr, void *data)
1173 {
1174    if (instr->type != nir_instr_type_alu)
1175       return false;
1176    nir_alu_instr *alu_instr = (nir_alu_instr *) instr;
1177    if (alu_instr->op != nir_op_pack_64_2x32 &&
1178        alu_instr->op != nir_op_unpack_64_2x32)
1179       return false;
1180    b->cursor = nir_before_instr(&alu_instr->instr);
1181    nir_ssa_def *src = nir_ssa_for_alu_src(b, alu_instr, 0);
1182    nir_ssa_def *dest;
1183    switch (alu_instr->op) {
1184    case nir_op_pack_64_2x32:
1185       dest = nir_pack_64_2x32_split(b, nir_channel(b, src, 0), nir_channel(b, src, 1));
1186       break;
1187    case nir_op_unpack_64_2x32:
1188       dest = nir_vec2(b, nir_unpack_64_2x32_split_x(b, src), nir_unpack_64_2x32_split_y(b, src));
1189       break;
1190    default:
1191       unreachable("Impossible opcode");
1192    }
1193    nir_ssa_def_rewrite_uses(&alu_instr->dest.dest.ssa, dest);
1194    nir_instr_remove(&alu_instr->instr);
1195    return true;
1196 }
1197
1198 static bool
1199 lower_64bit_pack(nir_shader *shader)
1200 {
1201    return nir_shader_instructions_pass(shader, lower_64bit_pack_instr,
1202                                        nir_metadata_block_index | nir_metadata_dominance, NULL);
1203 }
1204
1205 nir_shader *
1206 zink_create_quads_emulation_gs(const nir_shader_compiler_options *options,
1207                                const nir_shader *prev_stage)
1208 {
1209    nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY,
1210                                                   options,
1211                                                   "filled quad gs");
1212
1213    nir_shader *nir = b.shader;
1214    nir->info.gs.input_primitive = MESA_PRIM_LINES_ADJACENCY;
1215    nir->info.gs.output_primitive = MESA_PRIM_TRIANGLE_STRIP;
1216    nir->info.gs.vertices_in = 4;
1217    nir->info.gs.vertices_out = 6;
1218    nir->info.gs.invocations = 1;
1219    nir->info.gs.active_stream_mask = 1;
1220
1221    nir->info.has_transform_feedback_varyings = prev_stage->info.has_transform_feedback_varyings;
1222    memcpy(nir->info.xfb_stride, prev_stage->info.xfb_stride, sizeof(prev_stage->info.xfb_stride));
1223    if (prev_stage->xfb_info) {
1224       nir->xfb_info = mem_dup(prev_stage->xfb_info, sizeof(nir_xfb_info));
1225    }
1226
1227    nir_variable *in_vars[VARYING_SLOT_MAX];
1228    nir_variable *out_vars[VARYING_SLOT_MAX];
1229    unsigned num_vars = 0;
1230
1231    /* Create input/output variables. */
1232    nir_foreach_shader_out_variable(var, prev_stage) {
1233       assert(!var->data.patch);
1234
1235       /* input vars can't be created for those */
1236       if (var->data.location == VARYING_SLOT_LAYER ||
1237           var->data.location == VARYING_SLOT_VIEW_INDEX ||
1238           /* psiz not needed for quads */
1239           var->data.location == VARYING_SLOT_PSIZ)
1240          continue;
1241
1242       char name[100];
1243       if (var->name)
1244          snprintf(name, sizeof(name), "in_%s", var->name);
1245       else
1246          snprintf(name, sizeof(name), "in_%d", var->data.driver_location);
1247
1248       nir_variable *in = nir_variable_clone(var, nir);
1249       ralloc_free(in->name);
1250       in->name = ralloc_strdup(in, name);
1251       in->type = glsl_array_type(var->type, 4, false);
1252       in->data.mode = nir_var_shader_in;
1253       nir_shader_add_variable(nir, in);
1254
1255       if (var->name)
1256          snprintf(name, sizeof(name), "out_%s", var->name);
1257       else
1258          snprintf(name, sizeof(name), "out_%d", var->data.driver_location);
1259
1260       nir_variable *out = nir_variable_clone(var, nir);
1261       ralloc_free(out->name);
1262       out->name = ralloc_strdup(out, name);
1263       out->data.mode = nir_var_shader_out;
1264       nir_shader_add_variable(nir, out);
1265
1266       in_vars[num_vars] = in;
1267       out_vars[num_vars++] = out;
1268    }
1269
1270    int mapping_first[] = {0, 1, 2, 0, 2, 3};
1271    int mapping_last[] = {0, 1, 3, 1, 2, 3};
1272    nir_ssa_def *last_pv_vert_def = nir_load_provoking_last(&b);
1273    last_pv_vert_def = nir_ine_imm(&b, last_pv_vert_def, 0);
1274    for (unsigned i = 0; i < 6; ++i) {
1275       /* swap indices 2 and 3 */
1276       nir_ssa_def *idx = nir_bcsel(&b, last_pv_vert_def,
1277                                    nir_imm_int(&b, mapping_last[i]),
1278                                    nir_imm_int(&b, mapping_first[i]));
1279       /* Copy inputs to outputs. */
1280       for (unsigned j = 0; j < num_vars; ++j) {
1281          if (in_vars[j]->data.location == VARYING_SLOT_EDGE) {
1282             continue;
1283          }
1284          nir_deref_instr *in_value = nir_build_deref_array(&b, nir_build_deref_var(&b, in_vars[j]), idx);
1285          copy_vars(&b, nir_build_deref_var(&b, out_vars[j]), in_value);
1286       }
1287       nir_emit_vertex(&b, 0);
1288       if (i == 2)
1289         nir_end_primitive(&b, 0);
1290    }
1291
1292    nir_end_primitive(&b, 0);
1293    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
1294    nir_validate_shader(nir, "in zink_create_quads_emulation_gs");
1295    return nir;
1296 }
1297
1298 static bool
1299 lower_system_values_to_inlined_uniforms_instr(nir_builder *b, nir_instr *instr, void *data)
1300 {
1301    if (instr->type != nir_instr_type_intrinsic)
1302       return false;
1303
1304    nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1305
1306    int inlined_uniform_offset;
1307    switch (intrin->intrinsic) {
1308    case nir_intrinsic_load_flat_mask:
1309       inlined_uniform_offset = ZINK_INLINE_VAL_FLAT_MASK * sizeof(uint32_t);
1310       break;
1311    case nir_intrinsic_load_provoking_last:
1312       inlined_uniform_offset = ZINK_INLINE_VAL_PV_LAST_VERT * sizeof(uint32_t);
1313       break;
1314    default:
1315       return false;
1316    }
1317
1318    b->cursor = nir_before_instr(&intrin->instr);
1319    nir_ssa_def *new_dest_def = nir_load_ubo(b, 1, 32, nir_imm_int(b, 0),
1320                                             nir_imm_int(b, inlined_uniform_offset),
1321                                             .align_mul = 4, .align_offset = 0,
1322                                             .range_base = 0, .range = ~0);
1323    nir_ssa_def_rewrite_uses(&intrin->dest.ssa, new_dest_def);
1324    nir_instr_remove(instr);
1325    return true;
1326 }
1327
1328 bool
1329 zink_lower_system_values_to_inlined_uniforms(nir_shader *nir)
1330 {
1331    return nir_shader_instructions_pass(nir, lower_system_values_to_inlined_uniforms_instr,
1332                                        nir_metadata_dominance, NULL);
1333 }
1334
1335 void
1336 zink_screen_init_compiler(struct zink_screen *screen)
1337 {
1338    static const struct nir_shader_compiler_options
1339    default_options = {
1340       .lower_ffma16 = true,
1341       .lower_ffma32 = true,
1342       .lower_ffma64 = true,
1343       .lower_scmp = true,
1344       .lower_fdph = true,
1345       .lower_flrp32 = true,
1346       .lower_fpow = true,
1347       .lower_fsat = true,
1348       .lower_extract_byte = true,
1349       .lower_extract_word = true,
1350       .lower_insert_byte = true,
1351       .lower_insert_word = true,
1352
1353       /* We can only support 32-bit ldexp, but NIR doesn't have a flag
1354        * distinguishing 64-bit ldexp support (radeonsi *does* support 64-bit
1355        * ldexp, so we don't just always lower it in NIR).  Given that ldexp is
1356        * effectively unused (no instances in shader-db), it's not worth the
1357        * effort to do so.
1358        * */
1359       .lower_ldexp = true,
1360
1361       .lower_mul_high = true,
1362       .lower_rotate = true,
1363       .lower_uadd_carry = true,
1364       .lower_usub_borrow = true,
1365       .lower_uadd_sat = true,
1366       .lower_usub_sat = true,
1367       .lower_vector_cmp = true,
1368       .lower_int64_options = 0,
1369       .lower_doubles_options = 0,
1370       .lower_uniforms_to_ubo = true,
1371       .has_fsub = true,
1372       .has_isub = true,
1373       .lower_mul_2x32_64 = true,
1374       .support_16bit_alu = true, /* not quite what it sounds like */
1375       .max_unroll_iterations = 0,
1376    };
1377
1378    screen->nir_options = default_options;
1379
1380    if (!screen->info.feats.features.shaderInt64)
1381       screen->nir_options.lower_int64_options = ~0;
1382
1383    if (!screen->info.feats.features.shaderFloat64) {
1384       screen->nir_options.lower_doubles_options = ~0;
1385       screen->nir_options.lower_flrp64 = true;
1386       screen->nir_options.lower_ffma64 = true;
1387       /* soft fp64 function inlining will blow up loop bodies and effectively
1388        * stop Vulkan drivers from unrolling the loops.
1389        */
1390       screen->nir_options.max_unroll_iterations_fp64 = 32;
1391    }
1392
1393    /*
1394        The OpFRem and OpFMod instructions use cheap approximations of remainder,
1395        and the error can be large due to the discontinuity in trunc() and floor().
1396        This can produce mathematically unexpected results in some cases, such as
1397        FMod(x,x) computing x rather than 0, and can also cause the result to have
1398        a different sign than the infinitely precise result.
1399
1400        -Table 84. Precision of core SPIR-V Instructions
1401        * for drivers that are known to have imprecise fmod for doubles, lower dmod
1402     */
1403    if (screen->info.driver_props.driverID == VK_DRIVER_ID_MESA_RADV ||
1404        screen->info.driver_props.driverID == VK_DRIVER_ID_AMD_OPEN_SOURCE ||
1405        screen->info.driver_props.driverID == VK_DRIVER_ID_AMD_PROPRIETARY)
1406       screen->nir_options.lower_doubles_options = nir_lower_dmod;
1407 }
1408
1409 const void *
1410 zink_get_compiler_options(struct pipe_screen *pscreen,
1411                           enum pipe_shader_ir ir,
1412                           gl_shader_stage shader)
1413 {
1414    assert(ir == PIPE_SHADER_IR_NIR);
1415    return &zink_screen(pscreen)->nir_options;
1416 }
1417
1418 struct nir_shader *
1419 zink_tgsi_to_nir(struct pipe_screen *screen, const struct tgsi_token *tokens)
1420 {
1421    if (zink_debug & ZINK_DEBUG_TGSI) {
1422       fprintf(stderr, "TGSI shader:\n---8<---\n");
1423       tgsi_dump_to_file(tokens, 0, stderr);
1424       fprintf(stderr, "---8<---\n\n");
1425    }
1426
1427    return tgsi_to_nir(tokens, screen, false);
1428 }
1429
1430
1431 static bool
1432 dest_is_64bit(nir_dest *dest, void *state)
1433 {
1434    bool *lower = (bool *)state;
1435    if (dest && (nir_dest_bit_size(*dest) == 64)) {
1436       *lower = true;
1437       return false;
1438    }
1439    return true;
1440 }
1441
1442 static bool
1443 src_is_64bit(nir_src *src, void *state)
1444 {
1445    bool *lower = (bool *)state;
1446    if (src && (nir_src_bit_size(*src) == 64)) {
1447       *lower = true;
1448       return false;
1449    }
1450    return true;
1451 }
1452
1453 static bool
1454 filter_64_bit_instr(const nir_instr *const_instr, UNUSED const void *data)
1455 {
1456    bool lower = false;
1457    /* lower_alu_to_scalar required nir_instr to be const, but nir_foreach_*
1458     * doesn't have const variants, so do the ugly const_cast here. */
1459    nir_instr *instr = (nir_instr *)const_instr;
1460
1461    nir_foreach_dest(instr, dest_is_64bit, &lower);
1462    if (lower)
1463       return true;
1464    nir_foreach_src(instr, src_is_64bit, &lower);
1465    return lower;
1466 }
1467
1468 static bool
1469 filter_pack_instr(const nir_instr *const_instr, UNUSED const void *data)
1470 {
1471    nir_instr *instr = (nir_instr *)const_instr;
1472    nir_alu_instr *alu = nir_instr_as_alu(instr);
1473    switch (alu->op) {
1474    case nir_op_pack_64_2x32_split:
1475    case nir_op_pack_32_2x16_split:
1476    case nir_op_unpack_32_2x16_split_x:
1477    case nir_op_unpack_32_2x16_split_y:
1478    case nir_op_unpack_64_2x32_split_x:
1479    case nir_op_unpack_64_2x32_split_y:
1480       return true;
1481    default:
1482       break;
1483    }
1484    return false;
1485 }
1486
1487
1488 struct bo_vars {
1489    nir_variable *uniforms[5];
1490    nir_variable *ubo[5];
1491    nir_variable *ssbo[5];
1492    uint32_t first_ubo;
1493    uint32_t first_ssbo;
1494 };
1495
1496 static struct bo_vars
1497 get_bo_vars(struct zink_shader *zs, nir_shader *shader)
1498 {
1499    struct bo_vars bo;
1500    memset(&bo, 0, sizeof(bo));
1501    if (zs->ubos_used)
1502       bo.first_ubo = ffs(zs->ubos_used & ~BITFIELD_BIT(0)) - 2;
1503    assert(bo.first_ssbo < PIPE_MAX_CONSTANT_BUFFERS);
1504    if (zs->ssbos_used)
1505       bo.first_ssbo = ffs(zs->ssbos_used) - 1;
1506    assert(bo.first_ssbo < PIPE_MAX_SHADER_BUFFERS);
1507    nir_foreach_variable_with_modes(var, shader, nir_var_mem_ssbo | nir_var_mem_ubo) {
1508       unsigned idx = glsl_get_explicit_stride(glsl_get_struct_field(glsl_without_array(var->type), 0)) >> 1;
1509       if (var->data.mode == nir_var_mem_ssbo) {
1510          assert(!bo.ssbo[idx]);
1511          bo.ssbo[idx] = var;
1512       } else {
1513          if (var->data.driver_location) {
1514             assert(!bo.ubo[idx]);
1515             bo.ubo[idx] = var;
1516          } else {
1517             assert(!bo.uniforms[idx]);
1518             bo.uniforms[idx] = var;
1519          }
1520       }
1521    }
1522    return bo;
1523 }
1524
1525 static bool
1526 bound_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
1527 {
1528    struct bo_vars *bo = data;
1529    if (instr->type != nir_instr_type_intrinsic)
1530       return false;
1531    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1532    nir_variable *var = NULL;
1533    nir_ssa_def *offset = NULL;
1534    bool is_load = true;
1535    b->cursor = nir_before_instr(instr);
1536
1537    switch (intr->intrinsic) {
1538    case nir_intrinsic_store_ssbo:
1539       var = bo->ssbo[nir_dest_bit_size(intr->dest) >> 4];
1540       offset = intr->src[2].ssa;
1541       is_load = false;
1542       break;
1543    case nir_intrinsic_load_ssbo:
1544       var = bo->ssbo[nir_dest_bit_size(intr->dest) >> 4];
1545       offset = intr->src[1].ssa;
1546       break;
1547    case nir_intrinsic_load_ubo:
1548       if (nir_src_is_const(intr->src[0]) && nir_src_as_const_value(intr->src[0])->u32 == 0)
1549          var = bo->uniforms[nir_dest_bit_size(intr->dest) >> 4];
1550       else
1551          var = bo->ubo[nir_dest_bit_size(intr->dest) >> 4];
1552       offset = intr->src[1].ssa;
1553       break;
1554    default:
1555       return false;
1556    }
1557    nir_src offset_src = nir_src_for_ssa(offset);
1558    if (!nir_src_is_const(offset_src))
1559       return false;
1560
1561    unsigned offset_bytes = nir_src_as_const_value(offset_src)->u32;
1562    const struct glsl_type *strct_type = glsl_get_array_element(var->type);
1563    unsigned size = glsl_array_size(glsl_get_struct_field(strct_type, 0));
1564    bool has_unsized = glsl_array_size(glsl_get_struct_field(strct_type, glsl_get_length(strct_type) - 1)) == 0;
1565    if (has_unsized || offset_bytes + intr->num_components - 1 < size)
1566       return false;
1567
1568    unsigned rewrites = 0;
1569    nir_ssa_def *result[2];
1570    for (unsigned i = 0; i < intr->num_components; i++) {
1571       if (offset_bytes + i >= size) {
1572          rewrites++;
1573          if (is_load)
1574             result[i] = nir_imm_zero(b, 1, nir_dest_bit_size(intr->dest));
1575       }
1576    }
1577    assert(rewrites == intr->num_components);
1578    if (is_load) {
1579       nir_ssa_def *load = nir_vec(b, result, intr->num_components);
1580       nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
1581    }
1582    nir_instr_remove(instr);
1583    return true;
1584 }
1585
1586 static bool
1587 bound_bo_access(nir_shader *shader, struct zink_shader *zs)
1588 {
1589    struct bo_vars bo = get_bo_vars(zs, shader);
1590    return nir_shader_instructions_pass(shader, bound_bo_access_instr, nir_metadata_dominance, &bo);
1591 }
1592
1593 static void
1594 optimize_nir(struct nir_shader *s, struct zink_shader *zs)
1595 {
1596    bool progress;
1597    do {
1598       progress = false;
1599       if (s->options->lower_int64_options)
1600          NIR_PASS_V(s, nir_lower_int64);
1601       if (s->options->lower_doubles_options & nir_lower_fp64_full_software)
1602          NIR_PASS_V(s, lower_64bit_pack);
1603       NIR_PASS_V(s, nir_lower_vars_to_ssa);
1604       NIR_PASS(progress, s, nir_lower_alu_to_scalar, filter_pack_instr, NULL);
1605       NIR_PASS(progress, s, nir_opt_copy_prop_vars);
1606       NIR_PASS(progress, s, nir_copy_prop);
1607       NIR_PASS(progress, s, nir_opt_remove_phis);
1608       if (s->options->lower_int64_options) {
1609          NIR_PASS(progress, s, nir_lower_64bit_phis);
1610          NIR_PASS(progress, s, nir_lower_alu_to_scalar, filter_64_bit_instr, NULL);
1611       }
1612       NIR_PASS(progress, s, nir_opt_dce);
1613       NIR_PASS(progress, s, nir_opt_dead_cf);
1614       NIR_PASS(progress, s, nir_lower_phis_to_scalar, false);
1615       NIR_PASS(progress, s, nir_opt_cse);
1616       NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
1617       NIR_PASS(progress, s, nir_opt_algebraic);
1618       NIR_PASS(progress, s, nir_opt_constant_folding);
1619       NIR_PASS(progress, s, nir_opt_undef);
1620       NIR_PASS(progress, s, zink_nir_lower_b2b);
1621       if (zs)
1622          NIR_PASS(progress, s, bound_bo_access, zs);
1623    } while (progress);
1624
1625    do {
1626       progress = false;
1627       NIR_PASS(progress, s, nir_opt_algebraic_late);
1628       if (progress) {
1629          NIR_PASS_V(s, nir_copy_prop);
1630          NIR_PASS_V(s, nir_opt_dce);
1631          NIR_PASS_V(s, nir_opt_cse);
1632       }
1633    } while (progress);
1634 }
1635
1636 /* - copy the lowered fbfetch variable
1637  * - set the new one up as an input attachment for descriptor 0.6
1638  * - load it as an image
1639  * - overwrite the previous load
1640  */
1641 static bool
1642 lower_fbfetch_instr(nir_builder *b, nir_instr *instr, void *data)
1643 {
1644    bool ms = data != NULL;
1645    if (instr->type != nir_instr_type_intrinsic)
1646       return false;
1647    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1648    if (intr->intrinsic != nir_intrinsic_load_deref)
1649       return false;
1650    nir_variable *var = nir_intrinsic_get_var(intr, 0);
1651    if (!var->data.fb_fetch_output)
1652       return false;
1653    b->cursor = nir_after_instr(instr);
1654    nir_variable *fbfetch = nir_variable_clone(var, b->shader);
1655    /* If Dim is SubpassData, ... Image Format must be Unknown
1656     * - SPIRV OpTypeImage specification
1657     */
1658    fbfetch->data.image.format = 0;
1659    fbfetch->data.index = 0; /* fix this if more than 1 fbfetch target is supported */
1660    fbfetch->data.mode = nir_var_uniform;
1661    fbfetch->data.binding = ZINK_FBFETCH_BINDING;
1662    fbfetch->data.binding = ZINK_FBFETCH_BINDING;
1663    fbfetch->data.sample = ms;
1664    enum glsl_sampler_dim dim = ms ? GLSL_SAMPLER_DIM_SUBPASS_MS : GLSL_SAMPLER_DIM_SUBPASS;
1665    fbfetch->type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
1666    nir_shader_add_variable(b->shader, fbfetch);
1667    nir_ssa_def *deref = &nir_build_deref_var(b, fbfetch)->dest.ssa;
1668    nir_ssa_def *sample = ms ? nir_load_sample_id(b) : nir_ssa_undef(b, 1, 32);
1669    nir_ssa_def *load = nir_image_deref_load(b, 4, 32, deref, nir_imm_vec4(b, 0, 0, 0, 1), sample, nir_imm_int(b, 0));
1670    nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
1671    return true;
1672 }
1673
1674 static bool
1675 lower_fbfetch(nir_shader *shader, nir_variable **fbfetch, bool ms)
1676 {
1677    nir_foreach_shader_out_variable(var, shader) {
1678       if (var->data.fb_fetch_output) {
1679          *fbfetch = var;
1680          break;
1681       }
1682    }
1683    assert(*fbfetch);
1684    if (!*fbfetch)
1685       return false;
1686    return nir_shader_instructions_pass(shader, lower_fbfetch_instr, nir_metadata_dominance, (void*)ms);
1687 }
1688
1689 /*
1690  * Add a check for out of bounds LOD for every texel fetch op
1691  * It boils down to:
1692  * - if (lod < query_levels(tex))
1693  * -    res = txf(tex)
1694  * - else
1695  * -    res = (0, 0, 0, 1)
1696  */
1697 static bool
1698 lower_txf_lod_robustness_instr(nir_builder *b, nir_instr *in, void *data)
1699 {
1700    if (in->type != nir_instr_type_tex)
1701       return false;
1702    nir_tex_instr *txf = nir_instr_as_tex(in);
1703    if (txf->op != nir_texop_txf)
1704       return false;
1705
1706    b->cursor = nir_before_instr(in);
1707    int lod_idx = nir_tex_instr_src_index(txf, nir_tex_src_lod);
1708    assert(lod_idx >= 0);
1709    nir_src lod_src = txf->src[lod_idx].src;
1710    if (nir_src_is_const(lod_src) && nir_src_as_const_value(lod_src)->u32 == 0)
1711       return false;
1712
1713    nir_ssa_def *lod = lod_src.ssa;
1714
1715    int offset_idx = nir_tex_instr_src_index(txf, nir_tex_src_texture_offset);
1716    int handle_idx = nir_tex_instr_src_index(txf, nir_tex_src_texture_handle);
1717    nir_tex_instr *levels = nir_tex_instr_create(b->shader,
1718                                                 !!(offset_idx >= 0) + !!(handle_idx >= 0));
1719    levels->op = nir_texop_query_levels;
1720    levels->texture_index = txf->texture_index;
1721    levels->dest_type = nir_type_int | lod->bit_size;
1722    if (offset_idx >= 0) {
1723       levels->src[0].src_type = nir_tex_src_texture_offset;
1724       nir_src_copy(&levels->src[0].src, &txf->src[offset_idx].src, &levels->instr);
1725    }
1726    if (handle_idx >= 0) {
1727       levels->src[!!(offset_idx >= 0)].src_type = nir_tex_src_texture_handle;
1728       nir_src_copy(&levels->src[!!(offset_idx >= 0)].src, &txf->src[handle_idx].src, &levels->instr);
1729    }
1730    nir_ssa_dest_init(&levels->instr, &levels->dest,
1731                      nir_tex_instr_dest_size(levels), 32);
1732    nir_builder_instr_insert(b, &levels->instr);
1733
1734    nir_if *lod_oob_if = nir_push_if(b, nir_ilt(b, lod, &levels->dest.ssa));
1735    nir_tex_instr *new_txf = nir_instr_as_tex(nir_instr_clone(b->shader, in));
1736    nir_builder_instr_insert(b, &new_txf->instr);
1737
1738    nir_if *lod_oob_else = nir_push_else(b, lod_oob_if);
1739    nir_const_value oob_values[4] = {0};
1740    unsigned bit_size = nir_alu_type_get_type_size(txf->dest_type);
1741    oob_values[3] = (txf->dest_type & nir_type_float) ?
1742                    nir_const_value_for_float(1.0, bit_size) : nir_const_value_for_uint(1, bit_size);
1743    nir_ssa_def *oob_val = nir_build_imm(b, nir_tex_instr_dest_size(txf), bit_size, oob_values);
1744
1745    nir_pop_if(b, lod_oob_else);
1746    nir_ssa_def *robust_txf = nir_if_phi(b, &new_txf->dest.ssa, oob_val);
1747
1748    nir_ssa_def_rewrite_uses(&txf->dest.ssa, robust_txf);
1749    nir_instr_remove_v(in);
1750    return true;
1751 }
1752
1753 /* This pass is used to workaround the lack of out of bounds LOD robustness
1754  * for texel fetch ops in VK_EXT_image_robustness.
1755  */
1756 static bool
1757 lower_txf_lod_robustness(nir_shader *shader)
1758 {
1759    return nir_shader_instructions_pass(shader, lower_txf_lod_robustness_instr, nir_metadata_none, NULL);
1760 }
1761
1762 /* check for a genuine gl_PointSize output vs one from nir_lower_point_size_mov */
1763 static bool
1764 check_psiz(struct nir_shader *s)
1765 {
1766    bool have_psiz = false;
1767    nir_foreach_shader_out_variable(var, s) {
1768       if (var->data.location == VARYING_SLOT_PSIZ) {
1769          /* genuine PSIZ outputs will have this set */
1770          have_psiz |= !!var->data.explicit_location;
1771       }
1772    }
1773    return have_psiz;
1774 }
1775
1776 static nir_variable *
1777 find_var_with_location_frac(nir_shader *nir, unsigned location, unsigned location_frac, bool have_psiz)
1778 {
1779    assert((int)location >= 0);
1780
1781    unsigned found = 0;
1782    if (!location_frac && location != VARYING_SLOT_PSIZ) {
1783       nir_foreach_shader_out_variable(var, nir) {
1784          if (var->data.location == location)
1785             found++;
1786       }
1787    }
1788    if (found) {
1789       /* multiple variables found for this location: find the biggest one */
1790       nir_variable *out = NULL;
1791       unsigned slots = 0;
1792       nir_foreach_shader_out_variable(var, nir) {
1793          if (var->data.location == location) {
1794             unsigned count_slots = glsl_count_vec4_slots(var->type, false, false);
1795             if (count_slots > slots) {
1796                slots = count_slots;
1797                out = var;
1798             }
1799          }
1800       }
1801       return out;
1802    } else {
1803       /* only one variable found or this is location_frac */
1804       nir_foreach_shader_out_variable(var, nir) {
1805          if (var->data.location == location &&
1806              (var->data.location_frac == location_frac ||
1807               (glsl_type_is_array(var->type) ? glsl_array_size(var->type) : glsl_get_vector_elements(var->type)) >= location_frac + 1)) {
1808             if (location != VARYING_SLOT_PSIZ || !have_psiz || var->data.explicit_location)
1809                return var;
1810          }
1811       }
1812    }
1813    return NULL;
1814 }
1815
1816 static bool
1817 is_inlined(const bool *inlined, const struct pipe_stream_output *output)
1818 {
1819    for (unsigned i = 0; i < output->num_components; i++)
1820       if (!inlined[output->start_component + i])
1821          return false;
1822    return true;
1823 }
1824
1825 static void
1826 update_psiz_location(nir_shader *nir, nir_variable *psiz)
1827 {
1828    uint32_t last_output = util_last_bit64(nir->info.outputs_written);
1829    if (last_output < VARYING_SLOT_VAR0)
1830       last_output = VARYING_SLOT_VAR0;
1831    else
1832       last_output++;
1833    /* this should get fixed up by slot remapping */
1834    psiz->data.location = last_output;
1835 }
1836
1837 static const struct glsl_type *
1838 clamp_slot_type(const struct glsl_type *type, unsigned slot)
1839 {
1840    /* could be dvec/dmat/mat: each member is the same */
1841    const struct glsl_type *plain = glsl_without_array_or_matrix(type);
1842    /* determine size of each member type */
1843    unsigned slot_count = glsl_count_vec4_slots(plain, false, false);
1844    /* normalize slot idx to current type's size */
1845    slot %= slot_count;
1846    unsigned slot_components = glsl_get_components(plain);
1847    if (glsl_base_type_is_64bit(glsl_get_base_type(plain)))
1848       slot_components *= 2;
1849    /* create a vec4 mask of the selected slot's components out of all the components */
1850    uint32_t mask = BITFIELD_MASK(slot_components) & BITFIELD_RANGE(slot * 4, 4);
1851    /* return a vecN of the selected components */
1852    slot_components = util_bitcount(mask);
1853    return glsl_vec_type(slot_components);
1854 }
1855
1856 static const struct glsl_type *
1857 unroll_struct_type(const struct glsl_type *slot_type, unsigned *slot_idx)
1858 {
1859    const struct glsl_type *type = slot_type;
1860    unsigned slot_count = 0;
1861    unsigned cur_slot = 0;
1862    /* iterate over all the members in the struct, stopping once the slot idx is reached */
1863    for (unsigned i = 0; i < glsl_get_length(slot_type) && cur_slot <= *slot_idx; i++, cur_slot += slot_count) {
1864       /* use array type for slot counting but return array member type for unroll */
1865       const struct glsl_type *arraytype = glsl_get_struct_field(slot_type, i);
1866       type = glsl_without_array(arraytype);
1867       slot_count = glsl_count_vec4_slots(arraytype, false, false);
1868    }
1869    *slot_idx -= (cur_slot - slot_count);
1870    if (!glsl_type_is_struct_or_ifc(type))
1871       /* this is a fully unrolled struct: find the number of vec components to output */
1872       type = clamp_slot_type(type, *slot_idx);
1873    return type;
1874 }
1875
1876 static unsigned
1877 get_slot_components(nir_variable *var, unsigned slot, unsigned so_slot)
1878 {
1879    assert(var && slot < var->data.location + glsl_count_vec4_slots(var->type, false, false));
1880    const struct glsl_type *orig_type = var->type;
1881    const struct glsl_type *type = glsl_without_array(var->type);
1882    unsigned slot_idx = slot - so_slot;
1883    if (type != orig_type)
1884       slot_idx %= glsl_count_vec4_slots(type, false, false);
1885    /* need to find the vec4 that's being exported by this slot */
1886    while (glsl_type_is_struct_or_ifc(type))
1887       type = unroll_struct_type(type, &slot_idx);
1888
1889    /* arrays here are already fully unrolled from their structs, so slot handling is implicit */
1890    unsigned num_components = glsl_get_components(glsl_without_array(type));
1891    /* special handling: clip/cull distance are arrays with vector semantics */
1892    if (var->data.location == VARYING_SLOT_CLIP_DIST0 || var->data.location == VARYING_SLOT_CULL_DIST0) {
1893       num_components = glsl_array_size(type);
1894       if (slot_idx)
1895          /* this is the second vec4 */
1896          num_components %= 4;
1897       else
1898          /* this is the first vec4 */
1899          num_components = MIN2(num_components, 4);
1900    }
1901    assert(num_components);
1902    /* gallium handles xfb in terms of 32bit units */
1903    if (glsl_base_type_is_64bit(glsl_get_base_type(glsl_without_array(type))))
1904       num_components *= 2;
1905    return num_components;
1906 }
1907
1908 static const struct pipe_stream_output *
1909 find_packed_output(const struct pipe_stream_output_info *so_info, uint8_t *reverse_map, unsigned slot)
1910 {
1911    for (unsigned i = 0; i < so_info->num_outputs; i++) {
1912       const struct pipe_stream_output *packed_output = &so_info->output[i];
1913       if (reverse_map[packed_output->register_index] == slot)
1914          return packed_output;
1915    }
1916    return NULL;
1917 }
1918
1919 static void
1920 update_so_info(struct zink_shader *zs, nir_shader *nir, const struct pipe_stream_output_info *so_info,
1921                uint64_t outputs_written, bool have_psiz)
1922 {
1923    uint8_t reverse_map[VARYING_SLOT_MAX] = {0};
1924    unsigned slot = 0;
1925    /* semi-copied from iris */
1926    while (outputs_written) {
1927       int bit = u_bit_scan64(&outputs_written);
1928       /* PSIZ from nir_lower_point_size_mov breaks stream output, so always skip it */
1929       if (bit == VARYING_SLOT_PSIZ && !have_psiz)
1930          continue;
1931       reverse_map[slot++] = bit;
1932    }
1933
1934    bool have_fake_psiz = false;
1935    nir_foreach_shader_out_variable(var, nir) {
1936       if (var->data.location == VARYING_SLOT_PSIZ && !var->data.explicit_location)
1937          have_fake_psiz = true;
1938    }
1939
1940    bool inlined[VARYING_SLOT_MAX][4] = {0};
1941    uint64_t packed = 0;
1942    uint8_t packed_components[VARYING_SLOT_MAX] = {0};
1943    uint8_t packed_streams[VARYING_SLOT_MAX] = {0};
1944    uint8_t packed_buffers[VARYING_SLOT_MAX] = {0};
1945    uint16_t packed_offsets[VARYING_SLOT_MAX][4] = {0};
1946    nir_variable *psiz = NULL;
1947    for (unsigned i = 0; i < so_info->num_outputs; i++) {
1948       const struct pipe_stream_output *output = &so_info->output[i];
1949       unsigned slot = reverse_map[output->register_index];
1950       /* always set stride to be used during draw */
1951       zs->sinfo.so_info.stride[output->output_buffer] = so_info->stride[output->output_buffer];
1952       if (zs->info.stage != MESA_SHADER_GEOMETRY || util_bitcount(zs->info.gs.active_stream_mask) == 1) {
1953          nir_variable *var = NULL;
1954          unsigned so_slot;
1955          while (!var)
1956             var = find_var_with_location_frac(nir, slot--, output->start_component, have_psiz);
1957          if (var->data.location == VARYING_SLOT_PSIZ)
1958             psiz = var;
1959          so_slot = slot + 1;
1960          slot = reverse_map[output->register_index];
1961          if (var->data.explicit_xfb_buffer) {
1962             /* handle dvec3 where gallium splits streamout over 2 registers */
1963             for (unsigned j = 0; j < output->num_components; j++)
1964                inlined[slot][output->start_component + j] = true;
1965          }
1966          if (is_inlined(inlined[slot], output))
1967             continue;
1968          bool is_struct = glsl_type_is_struct_or_ifc(glsl_without_array(var->type));
1969          unsigned num_components = get_slot_components(var, slot, so_slot);
1970          /* if this is the entire variable, try to blast it out during the initial declaration
1971           * structs must be handled later to ensure accurate analysis
1972           */
1973          if (!is_struct && (num_components == output->num_components || (num_components > output->num_components && output->num_components == 4))) {
1974             var->data.explicit_xfb_buffer = 1;
1975             var->data.xfb.buffer = output->output_buffer;
1976             var->data.xfb.stride = so_info->stride[output->output_buffer] * 4;
1977             var->data.offset = output->dst_offset * 4;
1978             var->data.stream = output->stream;
1979             for (unsigned j = 0; j < output->num_components; j++)
1980                inlined[slot][output->start_component + j] = true;
1981          } else {
1982             /* otherwise store some metadata for later */
1983             packed |= BITFIELD64_BIT(slot);
1984             packed_components[slot] += output->num_components;
1985             packed_streams[slot] |= BITFIELD_BIT(output->stream);
1986             packed_buffers[slot] |= BITFIELD_BIT(output->output_buffer);
1987             for (unsigned j = 0; j < output->num_components; j++)
1988                packed_offsets[output->register_index][j + output->start_component] = output->dst_offset + j;
1989          }
1990       }
1991    }
1992
1993    /* if this was flagged as a packed output before, and if all the components are
1994     * being output with the same stream on the same buffer with increasing offsets, this entire variable
1995     * can be consolidated into a single output to conserve locations
1996     */
1997    for (unsigned i = 0; i < so_info->num_outputs; i++) {
1998       const struct pipe_stream_output *output = &so_info->output[i];
1999       unsigned slot = reverse_map[output->register_index];
2000       if (is_inlined(inlined[slot], output))
2001          continue;
2002       if (zs->info.stage != MESA_SHADER_GEOMETRY || util_bitcount(zs->info.gs.active_stream_mask) == 1) {
2003          nir_variable *var = NULL;
2004          while (!var)
2005             var = find_var_with_location_frac(nir, slot--, output->start_component, have_psiz);
2006          /* this is a lowered 64bit variable that can't be exported due to packing */
2007          if (var->data.is_xfb)
2008             goto out;
2009
2010          unsigned num_slots = glsl_count_vec4_slots(var->type, false, false);
2011          /* for each variable, iterate over all the variable's slots and inline the outputs */
2012          for (unsigned j = 0; j < num_slots; j++) {
2013             slot = var->data.location + j;
2014             const struct pipe_stream_output *packed_output = find_packed_output(so_info, reverse_map, slot);
2015             if (!packed_output)
2016                goto out;
2017
2018             /* if this slot wasn't packed or isn't in the same stream/buffer, skip consolidation */
2019             if (!(packed & BITFIELD64_BIT(slot)) ||
2020                 util_bitcount(packed_streams[slot]) != 1 ||
2021                 util_bitcount(packed_buffers[slot]) != 1)
2022                goto out;
2023
2024             /* if all the components the variable exports to this slot aren't captured, skip consolidation */
2025             unsigned num_components = get_slot_components(var, slot, var->data.location);
2026             if (num_components != packed_components[slot])
2027                goto out;
2028
2029             /* in order to pack the xfb output, all the offsets must be sequentially incrementing */
2030             uint32_t prev_offset = packed_offsets[packed_output->register_index][0];
2031             for (unsigned k = 1; k < num_components; k++) {
2032                /* if the offsets are not incrementing as expected, skip consolidation */
2033                if (packed_offsets[packed_output->register_index][k] != prev_offset + 1)
2034                   goto out;
2035                prev_offset = packed_offsets[packed_output->register_index][k + packed_output->start_component];
2036             }
2037          }
2038          /* this output can be consolidated: blast out all the data inlined */
2039          var->data.explicit_xfb_buffer = 1;
2040          var->data.xfb.buffer = output->output_buffer;
2041          var->data.xfb.stride = so_info->stride[output->output_buffer] * 4;
2042          var->data.offset = output->dst_offset * 4;
2043          var->data.stream = output->stream;
2044          /* GLSL specifies that interface blocks are split per-buffer in XFB */
2045          if (glsl_type_is_array(var->type) && glsl_array_size(var->type) > 1 && glsl_type_is_interface(glsl_without_array(var->type)))
2046             zs->sinfo.so_propagate |= BITFIELD_BIT(var->data.location - VARYING_SLOT_VAR0);
2047          /* mark all slot components inlined to skip subsequent loop iterations */
2048          for (unsigned j = 0; j < num_slots; j++) {
2049             slot = var->data.location + j;
2050             for (unsigned k = 0; k < packed_components[slot]; k++)
2051                inlined[slot][k] = true;
2052             packed &= ~BITFIELD64_BIT(slot);
2053          }
2054          continue;
2055       }
2056 out:
2057       /* these are packed/explicit varyings which can't be exported with normal output */
2058       zs->sinfo.so_info.output[zs->sinfo.so_info.num_outputs] = *output;
2059       /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */
2060       zs->sinfo.so_info_slots[zs->sinfo.so_info.num_outputs++] = reverse_map[output->register_index];
2061    }
2062    zs->sinfo.have_xfb = zs->sinfo.so_info.num_outputs || zs->sinfo.so_propagate;
2063    /* ensure this doesn't get output in the shader by unsetting location */
2064    if (have_fake_psiz && psiz)
2065       update_psiz_location(nir, psiz);
2066 }
2067
2068 struct decompose_state {
2069   nir_variable **split;
2070   bool needs_w;
2071 };
2072
2073 static bool
2074 lower_attrib(nir_builder *b, nir_instr *instr, void *data)
2075 {
2076    struct decompose_state *state = data;
2077    nir_variable **split = state->split;
2078    if (instr->type != nir_instr_type_intrinsic)
2079       return false;
2080    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2081    if (intr->intrinsic != nir_intrinsic_load_deref)
2082       return false;
2083    nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
2084    nir_variable *var = nir_deref_instr_get_variable(deref);
2085    if (var != split[0])
2086       return false;
2087    unsigned num_components = glsl_get_vector_elements(split[0]->type);
2088    b->cursor = nir_after_instr(instr);
2089    nir_ssa_def *loads[4];
2090    for (unsigned i = 0; i < (state->needs_w ? num_components - 1 : num_components); i++)
2091       loads[i] = nir_load_deref(b, nir_build_deref_var(b, split[i+1]));
2092    if (state->needs_w) {
2093       /* oob load w comopnent to get correct value for int/float */
2094       loads[3] = nir_channel(b, loads[0], 3);
2095       loads[0] = nir_channel(b, loads[0], 0);
2096    }
2097    nir_ssa_def *new_load = nir_vec(b, loads, num_components);
2098    nir_ssa_def_rewrite_uses(&intr->dest.ssa, new_load);
2099    nir_instr_remove_v(instr);
2100    return true;
2101 }
2102
2103 static bool
2104 decompose_attribs(nir_shader *nir, uint32_t decomposed_attrs, uint32_t decomposed_attrs_without_w)
2105 {
2106    uint32_t bits = 0;
2107    nir_foreach_variable_with_modes(var, nir, nir_var_shader_in)
2108       bits |= BITFIELD_BIT(var->data.driver_location);
2109    bits = ~bits;
2110    u_foreach_bit(location, decomposed_attrs | decomposed_attrs_without_w) {
2111       nir_variable *split[5];
2112       struct decompose_state state;
2113       state.split = split;
2114       nir_variable *var = nir_find_variable_with_driver_location(nir, nir_var_shader_in, location);
2115       assert(var);
2116       split[0] = var;
2117       bits |= BITFIELD_BIT(var->data.driver_location);
2118       const struct glsl_type *new_type = glsl_type_is_scalar(var->type) ? var->type : glsl_get_array_element(var->type);
2119       unsigned num_components = glsl_get_vector_elements(var->type);
2120       state.needs_w = (decomposed_attrs_without_w & BITFIELD_BIT(location)) != 0 && num_components == 4;
2121       for (unsigned i = 0; i < (state.needs_w ? num_components - 1 : num_components); i++) {
2122          split[i+1] = nir_variable_clone(var, nir);
2123          split[i+1]->name = ralloc_asprintf(nir, "%s_split%u", var->name, i);
2124          if (decomposed_attrs_without_w & BITFIELD_BIT(location))
2125             split[i+1]->type = !i && num_components == 4 ? var->type : new_type;
2126          else
2127             split[i+1]->type = new_type;
2128          split[i+1]->data.driver_location = ffs(bits) - 1;
2129          bits &= ~BITFIELD_BIT(split[i+1]->data.driver_location);
2130          nir_shader_add_variable(nir, split[i+1]);
2131       }
2132       var->data.mode = nir_var_shader_temp;
2133       nir_shader_instructions_pass(nir, lower_attrib, nir_metadata_dominance, &state);
2134    }
2135    nir_fixup_deref_modes(nir);
2136    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2137    optimize_nir(nir, NULL);
2138    return true;
2139 }
2140
2141 static bool
2142 rewrite_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
2143 {
2144    struct zink_screen *screen = data;
2145    const bool has_int64 = screen->info.feats.features.shaderInt64;
2146    if (instr->type != nir_instr_type_intrinsic)
2147       return false;
2148    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2149    b->cursor = nir_before_instr(instr);
2150    switch (intr->intrinsic) {
2151    case nir_intrinsic_ssbo_atomic:
2152    case nir_intrinsic_ssbo_atomic_swap: {
2153       /* convert offset to uintN_t[idx] */
2154       nir_ssa_def *offset = nir_udiv_imm(b, intr->src[1].ssa, nir_dest_bit_size(intr->dest) / 8);
2155       nir_instr_rewrite_src_ssa(instr, &intr->src[1], offset);
2156       return true;
2157    }
2158    case nir_intrinsic_load_ssbo:
2159    case nir_intrinsic_load_ubo: {
2160       /* ubo0 can have unaligned 64bit loads, particularly for bindless texture ids */
2161       bool force_2x32 = intr->intrinsic == nir_intrinsic_load_ubo &&
2162                         nir_src_is_const(intr->src[0]) &&
2163                         nir_src_as_uint(intr->src[0]) == 0 &&
2164                         nir_dest_bit_size(intr->dest) == 64 &&
2165                         nir_intrinsic_align_offset(intr) % 8 != 0;
2166       force_2x32 |= nir_dest_bit_size(intr->dest) == 64 && !has_int64;
2167       nir_ssa_def *offset = nir_udiv_imm(b, intr->src[1].ssa, (force_2x32 ? 32 : nir_dest_bit_size(intr->dest)) / 8);
2168       nir_instr_rewrite_src_ssa(instr, &intr->src[1], offset);
2169       /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
2170       if (force_2x32) {
2171          /* this is always scalarized */
2172          assert(intr->dest.ssa.num_components == 1);
2173          /* rewrite as 2x32 */
2174          nir_ssa_def *load[2];
2175          for (unsigned i = 0; i < 2; i++) {
2176             if (intr->intrinsic == nir_intrinsic_load_ssbo)
2177                load[i] = nir_load_ssbo(b, 1, 32, intr->src[0].ssa, nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0);
2178             else
2179                load[i] = nir_load_ubo(b, 1, 32, intr->src[0].ssa, nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0, .range = 4);
2180             nir_intrinsic_set_access(nir_instr_as_intrinsic(load[i]->parent_instr), nir_intrinsic_access(intr));
2181          }
2182          /* cast back to 64bit */
2183          nir_ssa_def *casted = nir_pack_64_2x32_split(b, load[0], load[1]);
2184          nir_ssa_def_rewrite_uses(&intr->dest.ssa, casted);
2185          nir_instr_remove(instr);
2186       }
2187       return true;
2188    }
2189    case nir_intrinsic_load_shared:
2190       b->cursor = nir_before_instr(instr);
2191       bool force_2x32 = nir_dest_bit_size(intr->dest) == 64 && !has_int64;
2192       nir_ssa_def *offset = nir_udiv_imm(b, intr->src[0].ssa, (force_2x32 ? 32 : nir_dest_bit_size(intr->dest)) / 8);
2193       nir_instr_rewrite_src_ssa(instr, &intr->src[0], offset);
2194       /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
2195       if (force_2x32) {
2196          /* this is always scalarized */
2197          assert(intr->dest.ssa.num_components == 1);
2198          /* rewrite as 2x32 */
2199          nir_ssa_def *load[2];
2200          for (unsigned i = 0; i < 2; i++)
2201             load[i] = nir_load_shared(b, 1, 32, nir_iadd_imm(b, intr->src[0].ssa, i), .align_mul = 4, .align_offset = 0);
2202          /* cast back to 64bit */
2203          nir_ssa_def *casted = nir_pack_64_2x32_split(b, load[0], load[1]);
2204          nir_ssa_def_rewrite_uses(&intr->dest.ssa, casted);
2205          nir_instr_remove(instr);
2206          return true;
2207       }
2208       break;
2209    case nir_intrinsic_store_ssbo: {
2210       b->cursor = nir_before_instr(instr);
2211       bool force_2x32 = nir_src_bit_size(intr->src[0]) == 64 && !has_int64;
2212       nir_ssa_def *offset = nir_udiv_imm(b, intr->src[2].ssa, (force_2x32 ? 32 : nir_src_bit_size(intr->src[0])) / 8);
2213       nir_instr_rewrite_src_ssa(instr, &intr->src[2], offset);
2214       /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
2215       if (force_2x32) {
2216          /* this is always scalarized */
2217          assert(intr->src[0].ssa->num_components == 1);
2218          nir_ssa_def *vals[2] = {nir_unpack_64_2x32_split_x(b, intr->src[0].ssa), nir_unpack_64_2x32_split_y(b, intr->src[0].ssa)};
2219          for (unsigned i = 0; i < 2; i++)
2220             nir_store_ssbo(b, vals[i], intr->src[1].ssa, nir_iadd_imm(b, intr->src[2].ssa, i), .align_mul = 4, .align_offset = 0);
2221          nir_instr_remove(instr);
2222       }
2223       return true;
2224    }
2225    case nir_intrinsic_store_shared: {
2226       b->cursor = nir_before_instr(instr);
2227       bool force_2x32 = nir_src_bit_size(intr->src[0]) == 64 && !has_int64;
2228       nir_ssa_def *offset = nir_udiv_imm(b, intr->src[1].ssa, (force_2x32 ? 32 : nir_src_bit_size(intr->src[0])) / 8);
2229       nir_instr_rewrite_src_ssa(instr, &intr->src[1], offset);
2230       /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
2231       if (nir_src_bit_size(intr->src[0]) == 64 && !has_int64) {
2232          /* this is always scalarized */
2233          assert(intr->src[0].ssa->num_components == 1);
2234          nir_ssa_def *vals[2] = {nir_unpack_64_2x32_split_x(b, intr->src[0].ssa), nir_unpack_64_2x32_split_y(b, intr->src[0].ssa)};
2235          for (unsigned i = 0; i < 2; i++)
2236             nir_store_shared(b, vals[i], nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0);
2237          nir_instr_remove(instr);
2238       }
2239       return true;
2240    }
2241    default:
2242       break;
2243    }
2244    return false;
2245 }
2246
2247 static bool
2248 rewrite_bo_access(nir_shader *shader, struct zink_screen *screen)
2249 {
2250    return nir_shader_instructions_pass(shader, rewrite_bo_access_instr, nir_metadata_dominance, screen);
2251 }
2252
2253 static nir_variable *
2254 get_bo_var(nir_shader *shader, struct bo_vars *bo, bool ssbo, nir_src *src, unsigned bit_size)
2255 {
2256    nir_variable *var, **ptr;
2257    unsigned idx = ssbo || (nir_src_is_const(*src) && !nir_src_as_uint(*src)) ? 0 : 1;
2258
2259    if (ssbo)
2260       ptr = &bo->ssbo[bit_size >> 4];
2261    else {
2262       if (!idx) {
2263          ptr = &bo->uniforms[bit_size >> 4];
2264       } else
2265          ptr = &bo->ubo[bit_size >> 4];
2266    }
2267    var = *ptr;
2268    if (!var) {
2269       if (ssbo)
2270          var = bo->ssbo[32 >> 4];
2271       else {
2272          if (!idx)
2273             var = bo->uniforms[32 >> 4];
2274          else
2275             var = bo->ubo[32 >> 4];
2276       }
2277       var = nir_variable_clone(var, shader);
2278       if (ssbo)
2279          var->name = ralloc_asprintf(shader, "%s@%u", "ssbos", bit_size);
2280       else
2281          var->name = ralloc_asprintf(shader, "%s@%u", idx ? "ubos" : "uniform_0", bit_size);
2282       *ptr = var;
2283       nir_shader_add_variable(shader, var);
2284
2285       struct glsl_struct_field *fields = rzalloc_array(shader, struct glsl_struct_field, 2);
2286       fields[0].name = ralloc_strdup(shader, "base");
2287       fields[1].name = ralloc_strdup(shader, "unsized");
2288       unsigned array_size = glsl_get_length(var->type);
2289       const struct glsl_type *bare_type = glsl_without_array(var->type);
2290       const struct glsl_type *array_type = glsl_get_struct_field(bare_type, 0);
2291       unsigned length = glsl_get_length(array_type);
2292       const struct glsl_type *type;
2293       const struct glsl_type *unsized = glsl_array_type(glsl_uintN_t_type(bit_size), 0, bit_size / 8);
2294       if (bit_size > 32) {
2295          assert(bit_size == 64);
2296          type = glsl_array_type(glsl_uintN_t_type(bit_size), length / 2, bit_size / 8);
2297       } else {
2298          type = glsl_array_type(glsl_uintN_t_type(bit_size), length * (32 / bit_size), bit_size / 8);
2299       }
2300       fields[0].type = type;
2301       fields[1].type = unsized;
2302       var->type = glsl_array_type(glsl_struct_type(fields, glsl_get_length(bare_type), "struct", false), array_size, 0);
2303       var->data.driver_location = idx;
2304    }
2305    return var;
2306 }
2307
2308 static void
2309 rewrite_atomic_ssbo_instr(nir_builder *b, nir_instr *instr, struct bo_vars *bo)
2310 {
2311    nir_intrinsic_op op;
2312    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2313    if (intr->intrinsic == nir_intrinsic_ssbo_atomic)
2314       op = nir_intrinsic_deref_atomic;
2315    else if (intr->intrinsic == nir_intrinsic_ssbo_atomic_swap)
2316       op = nir_intrinsic_deref_atomic_swap;
2317    else
2318       unreachable("unknown intrinsic");
2319    nir_ssa_def *offset = intr->src[1].ssa;
2320    nir_src *src = &intr->src[0];
2321    nir_variable *var = get_bo_var(b->shader, bo, true, src, nir_dest_bit_size(intr->dest));
2322    nir_deref_instr *deref_var = nir_build_deref_var(b, var);
2323    nir_ssa_def *idx = src->ssa;
2324    if (bo->first_ssbo)
2325       idx = nir_iadd_imm(b, idx, -bo->first_ssbo);
2326    nir_deref_instr *deref_array = nir_build_deref_array(b, deref_var, idx);
2327    nir_deref_instr *deref_struct = nir_build_deref_struct(b, deref_array, 0);
2328
2329    /* generate new atomic deref ops for every component */
2330    nir_ssa_def *result[4];
2331    unsigned num_components = nir_dest_num_components(intr->dest);
2332    for (unsigned i = 0; i < num_components; i++) {
2333       nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct, offset);
2334       nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(b->shader, op);
2335       nir_ssa_dest_init(&new_instr->instr, &new_instr->dest, 1,
2336                         nir_dest_bit_size(intr->dest));
2337       nir_intrinsic_set_atomic_op(new_instr, nir_intrinsic_atomic_op(intr));
2338       new_instr->src[0] = nir_src_for_ssa(&deref_arr->dest.ssa);
2339       /* deref ops have no offset src, so copy the srcs after it */
2340       for (unsigned i = 2; i < nir_intrinsic_infos[intr->intrinsic].num_srcs; i++)
2341          nir_src_copy(&new_instr->src[i - 1], &intr->src[i], &new_instr->instr);
2342       nir_builder_instr_insert(b, &new_instr->instr);
2343
2344       result[i] = &new_instr->dest.ssa;
2345       offset = nir_iadd_imm(b, offset, 1);
2346    }
2347
2348    nir_ssa_def *load = nir_vec(b, result, num_components);
2349    nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
2350    nir_instr_remove(instr);
2351 }
2352
2353 static bool
2354 remove_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
2355 {
2356    struct bo_vars *bo = data;
2357    if (instr->type != nir_instr_type_intrinsic)
2358       return false;
2359    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2360    nir_variable *var = NULL;
2361    nir_ssa_def *offset = NULL;
2362    bool is_load = true;
2363    b->cursor = nir_before_instr(instr);
2364    nir_src *src;
2365    bool ssbo = true;
2366    switch (intr->intrinsic) {
2367    case nir_intrinsic_ssbo_atomic:
2368    case nir_intrinsic_ssbo_atomic_swap:
2369       rewrite_atomic_ssbo_instr(b, instr, bo);
2370       return true;
2371    case nir_intrinsic_store_ssbo:
2372       src = &intr->src[1];
2373       var = get_bo_var(b->shader, bo, true, src, nir_src_bit_size(intr->src[0]));
2374       offset = intr->src[2].ssa;
2375       is_load = false;
2376       break;
2377    case nir_intrinsic_load_ssbo:
2378       src = &intr->src[0];
2379       var = get_bo_var(b->shader, bo, true, src, nir_dest_bit_size(intr->dest));
2380       offset = intr->src[1].ssa;
2381       break;
2382    case nir_intrinsic_load_ubo:
2383       src = &intr->src[0];
2384       var = get_bo_var(b->shader, bo, false, src, nir_dest_bit_size(intr->dest));
2385       offset = intr->src[1].ssa;
2386       ssbo = false;
2387       break;
2388    default:
2389       return false;
2390    }
2391    assert(var);
2392    assert(offset);
2393    nir_deref_instr *deref_var = nir_build_deref_var(b, var);
2394    nir_ssa_def *idx = !ssbo && var->data.driver_location ? nir_iadd_imm(b, src->ssa, -1) : src->ssa;
2395    if (!ssbo && bo->first_ubo && var->data.driver_location)
2396       idx = nir_iadd_imm(b, idx, -bo->first_ubo);
2397    else if (ssbo && bo->first_ssbo)
2398       idx = nir_iadd_imm(b, idx, -bo->first_ssbo);
2399    nir_deref_instr *deref_array = nir_build_deref_array(b, deref_var, nir_i2iN(b, idx, nir_dest_bit_size(deref_var->dest)));
2400    nir_deref_instr *deref_struct = nir_build_deref_struct(b, deref_array, 0);
2401    assert(intr->num_components <= 2);
2402    if (is_load) {
2403       nir_ssa_def *result[2];
2404       for (unsigned i = 0; i < intr->num_components; i++) {
2405          nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct, nir_i2iN(b, offset, nir_dest_bit_size(deref_struct->dest)));
2406          result[i] = nir_load_deref(b, deref_arr);
2407          if (intr->intrinsic == nir_intrinsic_load_ssbo)
2408             nir_intrinsic_set_access(nir_instr_as_intrinsic(result[i]->parent_instr), nir_intrinsic_access(intr));
2409          offset = nir_iadd_imm(b, offset, 1);
2410       }
2411       nir_ssa_def *load = nir_vec(b, result, intr->num_components);
2412       nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
2413    } else {
2414       nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct, nir_i2iN(b, offset, nir_dest_bit_size(deref_struct->dest)));
2415       nir_build_store_deref(b, &deref_arr->dest.ssa, intr->src[0].ssa, BITFIELD_MASK(intr->num_components), nir_intrinsic_access(intr));
2416    }
2417    nir_instr_remove(instr);
2418    return true;
2419 }
2420
2421 static bool
2422 remove_bo_access(nir_shader *shader, struct zink_shader *zs)
2423 {
2424    struct bo_vars bo = get_bo_vars(zs, shader);
2425    return nir_shader_instructions_pass(shader, remove_bo_access_instr, nir_metadata_dominance, &bo);
2426 }
2427
2428 static bool
2429 find_var_deref(nir_shader *nir, nir_variable *var)
2430 {
2431    nir_foreach_function_impl(impl, nir) {
2432       nir_foreach_block(block, impl) {
2433          nir_foreach_instr(instr, block) {
2434             if (instr->type != nir_instr_type_deref)
2435                continue;
2436             nir_deref_instr *deref = nir_instr_as_deref(instr);
2437             if (deref->deref_type == nir_deref_type_var && deref->var == var)
2438                return true;
2439          }
2440       }
2441    }
2442    return false;
2443 }
2444
2445 struct clamp_layer_output_state {
2446    nir_variable *original;
2447    nir_variable *clamped;
2448 };
2449
2450 static void
2451 clamp_layer_output_emit(nir_builder *b, struct clamp_layer_output_state *state)
2452 {
2453    nir_ssa_def *is_layered = nir_load_push_constant(b, 1, 32,
2454                                                     nir_imm_int(b, ZINK_GFX_PUSHCONST_FRAMEBUFFER_IS_LAYERED),
2455                                                     .base = ZINK_GFX_PUSHCONST_FRAMEBUFFER_IS_LAYERED, .range = 4);
2456    nir_deref_instr *original_deref = nir_build_deref_var(b, state->original);
2457    nir_deref_instr *clamped_deref = nir_build_deref_var(b, state->clamped);
2458    nir_ssa_def *layer = nir_bcsel(b, nir_ieq_imm(b, is_layered, 1),
2459                                   nir_load_deref(b, original_deref),
2460                                   nir_imm_int(b, 0));
2461    nir_store_deref(b, clamped_deref, layer, 0);
2462 }
2463
2464 static bool
2465 clamp_layer_output_instr(nir_builder *b, nir_instr *instr, void *data)
2466 {
2467    struct clamp_layer_output_state *state = data;
2468    switch (instr->type) {
2469    case nir_instr_type_intrinsic: {
2470       nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2471       if (intr->intrinsic != nir_intrinsic_emit_vertex_with_counter &&
2472           intr->intrinsic != nir_intrinsic_emit_vertex)
2473          return false;
2474       b->cursor = nir_before_instr(instr);
2475       clamp_layer_output_emit(b, state);
2476       return true;
2477    }
2478    default: return false;
2479    }
2480 }
2481
2482 static bool
2483 clamp_layer_output(nir_shader *vs, nir_shader *fs, unsigned *next_location)
2484 {
2485    switch (vs->info.stage) {
2486    case MESA_SHADER_VERTEX:
2487    case MESA_SHADER_GEOMETRY:
2488    case MESA_SHADER_TESS_EVAL:
2489       break;
2490    default:
2491       unreachable("invalid last vertex stage!");
2492    }
2493    struct clamp_layer_output_state state = {0};
2494    state.original = nir_find_variable_with_location(vs, nir_var_shader_out, VARYING_SLOT_LAYER);
2495    if (!state.original || !find_var_deref(vs, state.original))
2496       return false;
2497    state.clamped = nir_variable_create(vs, nir_var_shader_out, glsl_int_type(), "layer_clamped");
2498    state.clamped->data.location = VARYING_SLOT_LAYER;
2499    nir_variable *fs_var = nir_find_variable_with_location(fs, nir_var_shader_in, VARYING_SLOT_LAYER);
2500    if ((state.original->data.explicit_xfb_buffer || fs_var) && *next_location < MAX_VARYING) {
2501       state.original->data.location = VARYING_SLOT_VAR0; // Anything but a built-in slot
2502       state.original->data.driver_location = (*next_location)++;
2503       if (fs_var) {
2504          fs_var->data.location = state.original->data.location;
2505          fs_var->data.driver_location = state.original->data.driver_location;
2506       }
2507    } else {
2508       if (state.original->data.explicit_xfb_buffer) {
2509          /* Will xfb the clamped output but still better than nothing */
2510          state.clamped->data.explicit_xfb_buffer = state.original->data.explicit_xfb_buffer;
2511          state.clamped->data.xfb.buffer = state.original->data.xfb.buffer;
2512          state.clamped->data.xfb.stride = state.original->data.xfb.stride;
2513          state.clamped->data.offset = state.original->data.offset;
2514          state.clamped->data.stream = state.original->data.stream;
2515       }
2516       state.original->data.mode = nir_var_shader_temp;
2517       nir_fixup_deref_modes(vs);
2518    }
2519    if (vs->info.stage == MESA_SHADER_GEOMETRY) {
2520       nir_shader_instructions_pass(vs, clamp_layer_output_instr, nir_metadata_dominance, &state);
2521    } else {
2522       nir_builder b;
2523       nir_function_impl *impl = nir_shader_get_entrypoint(vs);
2524       b = nir_builder_at(nir_after_cf_list(&impl->body));
2525       assert(impl->end_block->predecessors->entries == 1);
2526       clamp_layer_output_emit(&b, &state);
2527       nir_metadata_preserve(impl, nir_metadata_dominance);
2528    }
2529    optimize_nir(vs, NULL);
2530    NIR_PASS_V(vs, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2531    return true;
2532 }
2533
2534 static void
2535 assign_producer_var_io(gl_shader_stage stage, nir_variable *var, unsigned *reserved, unsigned char *slot_map)
2536 {
2537    unsigned slot = var->data.location;
2538    switch (slot) {
2539    case -1:
2540    case VARYING_SLOT_POS:
2541    case VARYING_SLOT_PSIZ:
2542    case VARYING_SLOT_LAYER:
2543    case VARYING_SLOT_PRIMITIVE_ID:
2544    case VARYING_SLOT_CLIP_DIST0:
2545    case VARYING_SLOT_CULL_DIST0:
2546    case VARYING_SLOT_VIEWPORT:
2547    case VARYING_SLOT_FACE:
2548    case VARYING_SLOT_TESS_LEVEL_OUTER:
2549    case VARYING_SLOT_TESS_LEVEL_INNER:
2550       /* use a sentinel value to avoid counting later */
2551       var->data.driver_location = UINT_MAX;
2552       break;
2553
2554    default:
2555       if (var->data.patch) {
2556          assert(slot >= VARYING_SLOT_PATCH0);
2557          slot -= VARYING_SLOT_PATCH0;
2558       }
2559       if (slot_map[slot] == 0xff) {
2560          assert(*reserved < MAX_VARYING);
2561          unsigned num_slots;
2562          if (nir_is_arrayed_io(var, stage))
2563             num_slots = glsl_count_vec4_slots(glsl_get_array_element(var->type), false, false);
2564          else
2565             num_slots = glsl_count_vec4_slots(var->type, false, false);
2566          assert(*reserved + num_slots <= MAX_VARYING);
2567          for (unsigned i = 0; i < num_slots; i++)
2568             slot_map[slot + i] = (*reserved)++;
2569       }
2570       slot = slot_map[slot];
2571       assert(slot < MAX_VARYING);
2572       var->data.driver_location = slot;
2573    }
2574 }
2575
2576 ALWAYS_INLINE static bool
2577 is_texcoord(gl_shader_stage stage, const nir_variable *var)
2578 {
2579    if (stage != MESA_SHADER_FRAGMENT)
2580       return false;
2581    return var->data.location >= VARYING_SLOT_TEX0 && 
2582           var->data.location <= VARYING_SLOT_TEX7;
2583 }
2584
2585 static bool
2586 assign_consumer_var_io(gl_shader_stage stage, nir_variable *var, unsigned *reserved, unsigned char *slot_map)
2587 {
2588    unsigned slot = var->data.location;
2589    switch (slot) {
2590    case VARYING_SLOT_POS:
2591    case VARYING_SLOT_PSIZ:
2592    case VARYING_SLOT_LAYER:
2593    case VARYING_SLOT_PRIMITIVE_ID:
2594    case VARYING_SLOT_CLIP_DIST0:
2595    case VARYING_SLOT_CULL_DIST0:
2596    case VARYING_SLOT_VIEWPORT:
2597    case VARYING_SLOT_FACE:
2598    case VARYING_SLOT_TESS_LEVEL_OUTER:
2599    case VARYING_SLOT_TESS_LEVEL_INNER:
2600       /* use a sentinel value to avoid counting later */
2601       var->data.driver_location = UINT_MAX;
2602       break;
2603    default:
2604       if (var->data.patch) {
2605          assert(slot >= VARYING_SLOT_PATCH0);
2606          slot -= VARYING_SLOT_PATCH0;
2607       }
2608       if (slot_map[slot] == (unsigned char)-1) {
2609          /* texcoords can't be eliminated in fs due to GL_COORD_REPLACE,
2610           * so keep for now and eliminate later
2611           */
2612          if (is_texcoord(stage, var)) {
2613             var->data.driver_location = -1;
2614             return true;
2615          }
2616          /* patch variables may be read in the workgroup */
2617          if (stage != MESA_SHADER_TESS_CTRL)
2618             /* dead io */
2619             return false;
2620          unsigned num_slots;
2621          if (nir_is_arrayed_io(var, stage))
2622             num_slots = glsl_count_vec4_slots(glsl_get_array_element(var->type), false, false);
2623          else
2624             num_slots = glsl_count_vec4_slots(var->type, false, false);
2625          assert(*reserved + num_slots <= MAX_VARYING);
2626          for (unsigned i = 0; i < num_slots; i++)
2627             slot_map[slot + i] = (*reserved)++;
2628       }
2629       var->data.driver_location = slot_map[slot];
2630    }
2631    return true;
2632 }
2633
2634
2635 static bool
2636 rewrite_read_as_0(nir_builder *b, nir_instr *instr, void *data)
2637 {
2638    nir_variable *var = data;
2639    if (instr->type != nir_instr_type_intrinsic)
2640       return false;
2641
2642    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2643    if (intr->intrinsic != nir_intrinsic_load_deref)
2644       return false;
2645    nir_variable *deref_var = nir_intrinsic_get_var(intr, 0);
2646    if (deref_var != var)
2647       return false;
2648    b->cursor = nir_before_instr(instr);
2649    nir_ssa_def *zero = nir_imm_zero(b, nir_dest_num_components(intr->dest), nir_dest_bit_size(intr->dest));
2650    if (b->shader->info.stage == MESA_SHADER_FRAGMENT) {
2651       switch (var->data.location) {
2652       case VARYING_SLOT_COL0:
2653       case VARYING_SLOT_COL1:
2654       case VARYING_SLOT_BFC0:
2655       case VARYING_SLOT_BFC1:
2656          /* default color is 0,0,0,1 */
2657          if (nir_dest_num_components(intr->dest) == 4)
2658             zero = nir_vector_insert_imm(b, zero, nir_imm_float(b, 1.0), 3);
2659          break;
2660       default:
2661          break;
2662       }
2663    }
2664    nir_ssa_def_rewrite_uses(&intr->dest.ssa, zero);
2665    nir_instr_remove(instr);
2666    return true;
2667 }
2668
2669 void
2670 zink_compiler_assign_io(struct zink_screen *screen, nir_shader *producer, nir_shader *consumer)
2671 {
2672    unsigned reserved = 0;
2673    unsigned char slot_map[VARYING_SLOT_MAX];
2674    memset(slot_map, -1, sizeof(slot_map));
2675    bool do_fixup = false;
2676    nir_shader *nir = producer->info.stage == MESA_SHADER_TESS_CTRL ? producer : consumer;
2677    if (consumer->info.stage != MESA_SHADER_FRAGMENT) {
2678       /* remove injected pointsize from all but the last vertex stage */
2679       nir_variable *var = nir_find_variable_with_location(producer, nir_var_shader_out, VARYING_SLOT_PSIZ);
2680       if (var && !var->data.explicit_location && !nir_find_variable_with_location(consumer, nir_var_shader_in, VARYING_SLOT_PSIZ)) {
2681          var->data.mode = nir_var_shader_temp;
2682          nir_fixup_deref_modes(producer);
2683          NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2684          optimize_nir(producer, NULL);
2685       }
2686    }
2687    if (producer->info.stage == MESA_SHADER_TESS_CTRL) {
2688       /* never assign from tcs -> tes, always invert */
2689       nir_foreach_variable_with_modes(var, consumer, nir_var_shader_in)
2690          assign_producer_var_io(consumer->info.stage, var, &reserved, slot_map);
2691       nir_foreach_variable_with_modes_safe(var, producer, nir_var_shader_out) {
2692          if (!assign_consumer_var_io(producer->info.stage, var, &reserved, slot_map))
2693             /* this is an output, nothing more needs to be done for it to be dropped */
2694             do_fixup = true;
2695       }
2696    } else {
2697       nir_foreach_variable_with_modes(var, producer, nir_var_shader_out)
2698          assign_producer_var_io(producer->info.stage, var, &reserved, slot_map);
2699       nir_foreach_variable_with_modes_safe(var, consumer, nir_var_shader_in) {
2700          if (!assign_consumer_var_io(consumer->info.stage, var, &reserved, slot_map)) {
2701             do_fixup = true;
2702             /* input needs to be rewritten */
2703             nir_shader_instructions_pass(consumer, rewrite_read_as_0, nir_metadata_dominance, var);
2704          }
2705       }
2706       if (consumer->info.stage == MESA_SHADER_FRAGMENT && screen->driver_workarounds.needs_sanitised_layer)
2707          do_fixup |= clamp_layer_output(producer, consumer, &reserved);
2708    }
2709    if (!do_fixup)
2710       return;
2711    nir_fixup_deref_modes(nir);
2712    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2713    optimize_nir(nir, NULL);
2714 }
2715
2716 /* all types that hit this function contain something that is 64bit */
2717 static const struct glsl_type *
2718 rewrite_64bit_type(nir_shader *nir, const struct glsl_type *type, nir_variable *var, bool doubles_only)
2719 {
2720    if (glsl_type_is_array(type)) {
2721       const struct glsl_type *child = glsl_get_array_element(type);
2722       unsigned elements = glsl_array_size(type);
2723       unsigned stride = glsl_get_explicit_stride(type);
2724       return glsl_array_type(rewrite_64bit_type(nir, child, var, doubles_only), elements, stride);
2725    }
2726    /* rewrite structs recursively */
2727    if (glsl_type_is_struct_or_ifc(type)) {
2728       unsigned nmembers = glsl_get_length(type);
2729       struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, nmembers * 2);
2730       unsigned xfb_offset = 0;
2731       for (unsigned i = 0; i < nmembers; i++) {
2732          const struct glsl_struct_field *f = glsl_get_struct_field_data(type, i);
2733          fields[i] = *f;
2734          xfb_offset += glsl_get_component_slots(fields[i].type) * 4;
2735          if (i < nmembers - 1 && xfb_offset % 8 &&
2736              (glsl_contains_double(glsl_get_struct_field(type, i + 1)) ||
2737               (glsl_type_contains_64bit(glsl_get_struct_field(type, i + 1)) && !doubles_only))) {
2738             var->data.is_xfb = true;
2739          }
2740          fields[i].type = rewrite_64bit_type(nir, f->type, var, doubles_only);
2741       }
2742       return glsl_struct_type(fields, nmembers, glsl_get_type_name(type), glsl_struct_type_is_packed(type));
2743    }
2744    if (!glsl_type_is_64bit(type) || (!glsl_contains_double(type) && doubles_only))
2745       return type;
2746    if (doubles_only && glsl_type_is_vector_or_scalar(type))
2747       return glsl_vector_type(GLSL_TYPE_UINT64, glsl_get_vector_elements(type));
2748    enum glsl_base_type base_type;
2749    switch (glsl_get_base_type(type)) {
2750    case GLSL_TYPE_UINT64:
2751       base_type = GLSL_TYPE_UINT;
2752       break;
2753    case GLSL_TYPE_INT64:
2754       base_type = GLSL_TYPE_INT;
2755       break;
2756    case GLSL_TYPE_DOUBLE:
2757       base_type = GLSL_TYPE_FLOAT;
2758       break;
2759    default:
2760       unreachable("unknown 64-bit vertex attribute format!");
2761    }
2762    if (glsl_type_is_scalar(type))
2763       return glsl_vector_type(base_type, 2);
2764    unsigned num_components;
2765    if (glsl_type_is_matrix(type)) {
2766       /* align to vec4 size: dvec3-composed arrays are arrays of dvec3s */
2767       unsigned vec_components = glsl_get_vector_elements(type);
2768       if (vec_components == 3)
2769          vec_components = 4;
2770       num_components = vec_components * 2 * glsl_get_matrix_columns(type);
2771    } else {
2772       num_components = glsl_get_vector_elements(type) * 2;
2773       if (num_components <= 4)
2774          return glsl_vector_type(base_type, num_components);
2775    }
2776    /* dvec3/dvec4/dmatX: rewrite as struct { vec4, vec4, vec4, ... [vec2] } */
2777    struct glsl_struct_field fields[8] = {0};
2778    unsigned remaining = num_components;
2779    unsigned nfields = 0;
2780    for (unsigned i = 0; remaining; i++, remaining -= MIN2(4, remaining), nfields++) {
2781       assert(i < ARRAY_SIZE(fields));
2782       fields[i].name = "";
2783       fields[i].offset = i * 16;
2784       fields[i].type = glsl_vector_type(base_type, MIN2(4, remaining));
2785    }
2786    char buf[64];
2787    snprintf(buf, sizeof(buf), "struct(%s)", glsl_get_type_name(type));
2788    return glsl_struct_type(fields, nfields, buf, true);
2789 }
2790
2791 static const struct glsl_type *
2792 deref_is_matrix(nir_deref_instr *deref)
2793 {
2794    if (glsl_type_is_matrix(deref->type))
2795       return deref->type;
2796    nir_deref_instr *parent = nir_deref_instr_parent(deref);
2797    if (parent)
2798       return deref_is_matrix(parent);
2799    return NULL;
2800 }
2801
2802 static bool
2803 lower_64bit_vars_function(nir_shader *shader, nir_function_impl *impl, nir_variable *var,
2804                           struct hash_table *derefs, struct set *deletes, bool doubles_only)
2805 {
2806    bool func_progress = false;
2807    nir_builder b = nir_builder_create(impl);
2808    nir_foreach_block(block, impl) {
2809       nir_foreach_instr_safe(instr, block) {
2810          switch (instr->type) {
2811          case nir_instr_type_deref: {
2812             nir_deref_instr *deref = nir_instr_as_deref(instr);
2813             if (!(deref->modes & var->data.mode))
2814                continue;
2815             if (nir_deref_instr_get_variable(deref) != var)
2816                continue;
2817
2818             /* matrix types are special: store the original deref type for later use */
2819             const struct glsl_type *matrix = deref_is_matrix(deref);
2820             nir_deref_instr *parent = nir_deref_instr_parent(deref);
2821             if (!matrix) {
2822                /* if this isn't a direct matrix deref, it's maybe a matrix row deref */
2823                hash_table_foreach(derefs, he) {
2824                   /* propagate parent matrix type to row deref */
2825                   if (he->key == parent)
2826                      matrix = he->data;
2827                }
2828             }
2829             if (matrix)
2830                _mesa_hash_table_insert(derefs, deref, (void*)matrix);
2831             if (deref->deref_type == nir_deref_type_var)
2832                deref->type = var->type;
2833             else
2834                deref->type = rewrite_64bit_type(shader, deref->type, var, doubles_only);
2835          }
2836          break;
2837          case nir_instr_type_intrinsic: {
2838             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2839             if (intr->intrinsic != nir_intrinsic_store_deref &&
2840                   intr->intrinsic != nir_intrinsic_load_deref)
2841                break;
2842             if (nir_intrinsic_get_var(intr, 0) != var)
2843                break;
2844             if ((intr->intrinsic == nir_intrinsic_store_deref && intr->src[1].ssa->bit_size != 64) ||
2845                   (intr->intrinsic == nir_intrinsic_load_deref && intr->dest.ssa.bit_size != 64))
2846                break;
2847             b.cursor = nir_before_instr(instr);
2848             nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
2849             unsigned num_components = intr->num_components * 2;
2850             nir_ssa_def *comp[NIR_MAX_VEC_COMPONENTS];
2851             /* this is the stored matrix type from the deref */
2852             struct hash_entry *he = _mesa_hash_table_search(derefs, deref);
2853             const struct glsl_type *matrix = he ? he->data : NULL;
2854             if (doubles_only && !matrix)
2855                break;
2856             func_progress = true;
2857             if (intr->intrinsic == nir_intrinsic_store_deref) {
2858                /* first, unpack the src data to 32bit vec2 components */
2859                for (unsigned i = 0; i < intr->num_components; i++) {
2860                   nir_ssa_def *ssa = nir_unpack_64_2x32(&b, nir_channel(&b, intr->src[1].ssa, i));
2861                   comp[i * 2] = nir_channel(&b, ssa, 0);
2862                   comp[i * 2 + 1] = nir_channel(&b, ssa, 1);
2863                }
2864                unsigned wrmask = nir_intrinsic_write_mask(intr);
2865                unsigned mask = 0;
2866                /* expand writemask for doubled components */
2867                for (unsigned i = 0; i < intr->num_components; i++) {
2868                   if (wrmask & BITFIELD_BIT(i))
2869                      mask |= BITFIELD_BIT(i * 2) | BITFIELD_BIT(i * 2 + 1);
2870                }
2871                if (matrix) {
2872                   /* matrix types always come from array (row) derefs */
2873                   assert(deref->deref_type == nir_deref_type_array);
2874                   nir_deref_instr *var_deref = nir_deref_instr_parent(deref);
2875                   /* let optimization clean up consts later */
2876                   nir_ssa_def *index = deref->arr.index.ssa;
2877                   /* this might be an indirect array index:
2878                      * - iterate over matrix columns
2879                      * - add if blocks for each column
2880                      * - perform the store in the block
2881                      */
2882                   for (unsigned idx = 0; idx < glsl_get_matrix_columns(matrix); idx++) {
2883                      nir_push_if(&b, nir_ieq_imm(&b, index, idx));
2884                      unsigned vec_components = glsl_get_vector_elements(matrix);
2885                      /* always clamp dvec3 to 4 components */
2886                      if (vec_components == 3)
2887                         vec_components = 4;
2888                      unsigned start_component = idx * vec_components * 2;
2889                      /* struct member */
2890                      unsigned member = start_component / 4;
2891                      /* number of components remaining */
2892                      unsigned remaining = num_components;
2893                      for (unsigned i = 0; i < num_components; member++) {
2894                         if (!(mask & BITFIELD_BIT(i)))
2895                            continue;
2896                         assert(member < glsl_get_length(var_deref->type));
2897                         /* deref the rewritten struct to the appropriate vec4/vec2 */
2898                         nir_deref_instr *strct = nir_build_deref_struct(&b, var_deref, member);
2899                         unsigned incr = MIN2(remaining, 4);
2900                         /* assemble the write component vec */
2901                         nir_ssa_def *val = nir_vec(&b, &comp[i], incr);
2902                         /* use the number of components being written as the writemask */
2903                         if (glsl_get_vector_elements(strct->type) > val->num_components)
2904                            val = nir_pad_vector(&b, val, glsl_get_vector_elements(strct->type));
2905                         nir_store_deref(&b, strct, val, BITFIELD_MASK(incr));
2906                         remaining -= incr;
2907                         i += incr;
2908                      }
2909                      nir_pop_if(&b, NULL);
2910                   }
2911                   _mesa_set_add(deletes, &deref->instr);
2912                } else if (num_components <= 4) {
2913                   /* simple store case: just write out the components */
2914                   nir_ssa_def *dest = nir_vec(&b, comp, num_components);
2915                   nir_store_deref(&b, deref, dest, mask);
2916                } else {
2917                   /* writing > 4 components: access the struct and write to the appropriate vec4 members */
2918                   for (unsigned i = 0; num_components; i++, num_components -= MIN2(num_components, 4)) {
2919                      if (!(mask & BITFIELD_MASK(4)))
2920                         continue;
2921                      nir_deref_instr *strct = nir_build_deref_struct(&b, deref, i);
2922                      nir_ssa_def *dest = nir_vec(&b, &comp[i * 4], MIN2(num_components, 4));
2923                      if (glsl_get_vector_elements(strct->type) > dest->num_components)
2924                         dest = nir_pad_vector(&b, dest, glsl_get_vector_elements(strct->type));
2925                      nir_store_deref(&b, strct, dest, mask & BITFIELD_MASK(4));
2926                      mask >>= 4;
2927                   }
2928                }
2929             } else {
2930                nir_ssa_def *dest = NULL;
2931                if (matrix) {
2932                   /* matrix types always come from array (row) derefs */
2933                   assert(deref->deref_type == nir_deref_type_array);
2934                   nir_deref_instr *var_deref = nir_deref_instr_parent(deref);
2935                   /* let optimization clean up consts later */
2936                   nir_ssa_def *index = deref->arr.index.ssa;
2937                   /* this might be an indirect array index:
2938                      * - iterate over matrix columns
2939                      * - add if blocks for each column
2940                      * - phi the loads using the array index
2941                      */
2942                   unsigned cols = glsl_get_matrix_columns(matrix);
2943                   nir_ssa_def *dests[4];
2944                   for (unsigned idx = 0; idx < cols; idx++) {
2945                      /* don't add an if for the final row: this will be handled in the else */
2946                      if (idx < cols - 1)
2947                         nir_push_if(&b, nir_ieq_imm(&b, index, idx));
2948                      unsigned vec_components = glsl_get_vector_elements(matrix);
2949                      /* always clamp dvec3 to 4 components */
2950                      if (vec_components == 3)
2951                         vec_components = 4;
2952                      unsigned start_component = idx * vec_components * 2;
2953                      /* struct member */
2954                      unsigned member = start_component / 4;
2955                      /* number of components remaining */
2956                      unsigned remaining = num_components;
2957                      /* component index */
2958                      unsigned comp_idx = 0;
2959                      for (unsigned i = 0; i < num_components; member++) {
2960                         assert(member < glsl_get_length(var_deref->type));
2961                         nir_deref_instr *strct = nir_build_deref_struct(&b, var_deref, member);
2962                         nir_ssa_def *load = nir_load_deref(&b, strct);
2963                         unsigned incr = MIN2(remaining, 4);
2964                         /* repack the loads to 64bit */
2965                         for (unsigned c = 0; c < incr / 2; c++, comp_idx++)
2966                            comp[comp_idx] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(c * 2, 2)));
2967                         remaining -= incr;
2968                         i += incr;
2969                      }
2970                      dest = dests[idx] = nir_vec(&b, comp, intr->num_components);
2971                      if (idx < cols - 1)
2972                         nir_push_else(&b, NULL);
2973                   }
2974                   /* loop over all the if blocks that were made, pop them, and phi the loaded+packed results */
2975                   for (unsigned idx = cols - 1; idx >= 1; idx--) {
2976                      nir_pop_if(&b, NULL);
2977                      dest = nir_if_phi(&b, dests[idx - 1], dest);
2978                   }
2979                   _mesa_set_add(deletes, &deref->instr);
2980                } else if (num_components <= 4) {
2981                   /* simple load case */
2982                   nir_ssa_def *load = nir_load_deref(&b, deref);
2983                   /* pack 32bit loads into 64bit: this will automagically get optimized out later */
2984                   for (unsigned i = 0; i < intr->num_components; i++) {
2985                      comp[i] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(i * 2, 2)));
2986                   }
2987                   dest = nir_vec(&b, comp, intr->num_components);
2988                } else {
2989                   /* writing > 4 components: access the struct and load the appropriate vec4 members */
2990                   for (unsigned i = 0; i < 2; i++, num_components -= 4) {
2991                      nir_deref_instr *strct = nir_build_deref_struct(&b, deref, i);
2992                      nir_ssa_def *load = nir_load_deref(&b, strct);
2993                      comp[i * 2] = nir_pack_64_2x32(&b,
2994                                                     nir_trim_vector(&b, load, 2));
2995                      if (num_components > 2)
2996                         comp[i * 2 + 1] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(2, 2)));
2997                   }
2998                   dest = nir_vec(&b, comp, intr->num_components);
2999                }
3000                nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, dest, instr);
3001             }
3002             _mesa_set_add(deletes, instr);
3003             break;
3004          }
3005          break;
3006          default: break;
3007          }
3008       }
3009    }
3010    if (func_progress)
3011       nir_metadata_preserve(impl, nir_metadata_none);
3012    /* derefs must be queued for deletion to avoid deleting the same deref repeatedly */
3013    set_foreach_remove(deletes, he)
3014       nir_instr_remove((void*)he->key);
3015    return func_progress;
3016 }
3017
3018 static bool
3019 lower_64bit_vars_loop(nir_shader *shader, nir_variable *var, struct hash_table *derefs,
3020                       struct set *deletes, bool doubles_only)
3021 {
3022    if (!glsl_type_contains_64bit(var->type) || (doubles_only && !glsl_contains_double(var->type)))
3023       return false;
3024    var->type = rewrite_64bit_type(shader, var->type, var, doubles_only);
3025    /* once type is rewritten, rewrite all loads and stores */
3026    nir_foreach_function_impl(impl, shader)
3027       lower_64bit_vars_function(shader, impl, var, derefs, deletes, doubles_only);
3028    return true;
3029 }
3030
3031 /* rewrite all input/output variables using 32bit types and load/stores */
3032 static bool
3033 lower_64bit_vars(nir_shader *shader, bool doubles_only)
3034 {
3035    bool progress = false;
3036    struct hash_table *derefs = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
3037    struct set *deletes = _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
3038    nir_foreach_variable_with_modes(var, shader, nir_var_shader_in | nir_var_shader_out)
3039       progress |= lower_64bit_vars_loop(shader, var, derefs, deletes, doubles_only);
3040    nir_foreach_function_impl(impl, shader) {
3041       nir_foreach_function_temp_variable(var, impl) {
3042          if (!glsl_type_contains_64bit(var->type) || (doubles_only && !glsl_contains_double(var->type)))
3043             continue;
3044          var->type = rewrite_64bit_type(shader, var->type, var, doubles_only);
3045          progress |= lower_64bit_vars_function(shader, impl, var, derefs, deletes, doubles_only);
3046       }
3047    }
3048    ralloc_free(deletes);
3049    ralloc_free(derefs);
3050    if (progress) {
3051       nir_lower_alu_to_scalar(shader, filter_64_bit_instr, NULL);
3052       nir_lower_phis_to_scalar(shader, false);
3053       optimize_nir(shader, NULL);
3054    }
3055    return progress;
3056 }
3057
3058 static bool
3059 split_blocks(nir_shader *nir)
3060 {
3061    bool progress = false;
3062    bool changed = true;
3063    do {
3064       progress = false;
3065       nir_foreach_shader_out_variable(var, nir) {
3066          const struct glsl_type *base_type = glsl_without_array(var->type);
3067          nir_variable *members[32]; //can't have more than this without breaking NIR
3068          if (!glsl_type_is_struct(base_type))
3069             continue;
3070          /* TODO: arrays? */
3071          if (!glsl_type_is_struct(var->type) || glsl_get_length(var->type) == 1)
3072             continue;
3073          if (glsl_count_attribute_slots(var->type, false) == 1)
3074             continue;
3075          unsigned offset = 0;
3076          for (unsigned i = 0; i < glsl_get_length(var->type); i++) {
3077             members[i] = nir_variable_clone(var, nir);
3078             members[i]->type = glsl_get_struct_field(var->type, i);
3079             members[i]->name = (void*)glsl_get_struct_elem_name(var->type, i);
3080             members[i]->data.location += offset;
3081             offset += glsl_count_attribute_slots(members[i]->type, false);
3082             nir_shader_add_variable(nir, members[i]);
3083          }
3084          nir_foreach_function_impl(impl, nir) {
3085             bool func_progress = false;
3086             nir_builder b = nir_builder_create(impl);
3087             nir_foreach_block(block, impl) {
3088                nir_foreach_instr_safe(instr, block) {
3089                   switch (instr->type) {
3090                   case nir_instr_type_deref: {
3091                   nir_deref_instr *deref = nir_instr_as_deref(instr);
3092                   if (!(deref->modes & nir_var_shader_out))
3093                      continue;
3094                   if (nir_deref_instr_get_variable(deref) != var)
3095                      continue;
3096                   if (deref->deref_type != nir_deref_type_struct)
3097                      continue;
3098                   nir_deref_instr *parent = nir_deref_instr_parent(deref);
3099                   if (parent->deref_type != nir_deref_type_var)
3100                      continue;
3101                   deref->modes = nir_var_shader_temp;
3102                   parent->modes = nir_var_shader_temp;
3103                   b.cursor = nir_before_instr(instr);
3104                   nir_ssa_def *dest = &nir_build_deref_var(&b, members[deref->strct.index])->dest.ssa;
3105                   nir_ssa_def_rewrite_uses_after(&deref->dest.ssa, dest, &deref->instr);
3106                   nir_instr_remove(&deref->instr);
3107                   func_progress = true;
3108                   break;
3109                   }
3110                   default: break;
3111                   }
3112                }
3113             }
3114             if (func_progress)
3115                nir_metadata_preserve(impl, nir_metadata_none);
3116          }
3117          var->data.mode = nir_var_shader_temp;
3118          changed = true;
3119          progress = true;
3120       }
3121    } while (progress);
3122    return changed;
3123 }
3124
3125 static void
3126 zink_shader_dump(const struct zink_shader *zs, void *words, size_t size, const char *file)
3127 {
3128    FILE *fp = fopen(file, "wb");
3129    if (fp) {
3130       fwrite(words, 1, size, fp);
3131       fclose(fp);
3132       fprintf(stderr, "wrote %s shader '%s'...\n", _mesa_shader_stage_to_string(zs->info.stage), file);
3133    }
3134 }
3135
3136 static VkShaderStageFlagBits
3137 zink_get_next_stage(gl_shader_stage stage)
3138 {
3139    switch (stage) {
3140    case MESA_SHADER_VERTEX:
3141       return VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT |
3142              VK_SHADER_STAGE_GEOMETRY_BIT |
3143              VK_SHADER_STAGE_FRAGMENT_BIT;
3144    case MESA_SHADER_TESS_CTRL:
3145       return VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
3146    case MESA_SHADER_TESS_EVAL:
3147       return VK_SHADER_STAGE_GEOMETRY_BIT |
3148              VK_SHADER_STAGE_FRAGMENT_BIT;
3149    case MESA_SHADER_GEOMETRY:
3150       return VK_SHADER_STAGE_FRAGMENT_BIT;
3151    case MESA_SHADER_FRAGMENT:
3152    case MESA_SHADER_COMPUTE:
3153       return 0;
3154    default:
3155       unreachable("invalid shader stage");
3156    }
3157 }
3158
3159 struct zink_shader_object
3160 zink_shader_spirv_compile(struct zink_screen *screen, struct zink_shader *zs, struct spirv_shader *spirv, bool can_shobj, struct zink_program *pg)
3161 {
3162    VkShaderModuleCreateInfo smci = {0};
3163    VkShaderCreateInfoEXT sci = {0};
3164
3165    if (!spirv)
3166       spirv = zs->spirv;
3167
3168    if (zink_debug & ZINK_DEBUG_SPIRV) {
3169       char buf[256];
3170       static int i;
3171       snprintf(buf, sizeof(buf), "dump%02d.spv", i++);
3172       zink_shader_dump(zs, spirv->words, spirv->num_words * sizeof(uint32_t), buf);
3173    }
3174
3175    sci.sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO_EXT;
3176    sci.stage = mesa_to_vk_shader_stage(zs->info.stage);
3177    sci.nextStage = zink_get_next_stage(zs->info.stage);
3178    sci.codeType = VK_SHADER_CODE_TYPE_SPIRV_EXT;
3179    sci.codeSize = spirv->num_words * sizeof(uint32_t);
3180    sci.pCode = spirv->words;
3181    sci.pName = "main";
3182    VkDescriptorSetLayout dsl[ZINK_GFX_SHADER_COUNT] = {0};
3183    if (pg) {
3184       sci.setLayoutCount = pg->num_dsl;
3185       sci.pSetLayouts = pg->dsl;
3186    } else {
3187       sci.setLayoutCount = zs->info.stage + 1;
3188       dsl[zs->info.stage] = zs->precompile.dsl;;
3189       sci.pSetLayouts = dsl;
3190    }
3191    VkPushConstantRange pcr;
3192    pcr.stageFlags = VK_SHADER_STAGE_ALL_GRAPHICS;
3193    pcr.offset = 0;
3194    pcr.size = sizeof(struct zink_gfx_push_constant);
3195    sci.pushConstantRangeCount = 1;
3196    sci.pPushConstantRanges = &pcr;
3197
3198    smci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
3199    smci.codeSize = spirv->num_words * sizeof(uint32_t);
3200    smci.pCode = spirv->words;
3201
3202 #ifndef NDEBUG
3203    if (zink_debug & ZINK_DEBUG_VALIDATION) {
3204       static const struct spirv_to_nir_options spirv_options = {
3205          .environment = NIR_SPIRV_VULKAN,
3206          .caps = {
3207             .float64 = true,
3208             .int16 = true,
3209             .int64 = true,
3210             .tessellation = true,
3211             .float_controls = true,
3212             .image_ms_array = true,
3213             .image_read_without_format = true,
3214             .image_write_without_format = true,
3215             .storage_image_ms = true,
3216             .geometry_streams = true,
3217             .storage_8bit = true,
3218             .storage_16bit = true,
3219             .variable_pointers = true,
3220             .stencil_export = true,
3221             .post_depth_coverage = true,
3222             .transform_feedback = true,
3223             .device_group = true,
3224             .draw_parameters = true,
3225             .shader_viewport_index_layer = true,
3226             .multiview = true,
3227             .physical_storage_buffer_address = true,
3228             .int64_atomics = true,
3229             .subgroup_arithmetic = true,
3230             .subgroup_basic = true,
3231             .subgroup_ballot = true,
3232             .subgroup_quad = true,
3233             .subgroup_shuffle = true,
3234             .subgroup_vote = true,
3235             .vk_memory_model = true,
3236             .vk_memory_model_device_scope = true,
3237             .int8 = true,
3238             .float16 = true,
3239             .demote_to_helper_invocation = true,
3240             .sparse_residency = true,
3241             .min_lod = true,
3242          },
3243          .ubo_addr_format = nir_address_format_32bit_index_offset,
3244          .ssbo_addr_format = nir_address_format_32bit_index_offset,
3245          .phys_ssbo_addr_format = nir_address_format_64bit_global,
3246          .push_const_addr_format = nir_address_format_logical,
3247          .shared_addr_format = nir_address_format_32bit_offset,
3248       };
3249       uint32_t num_spec_entries = 0;
3250       struct nir_spirv_specialization *spec_entries = NULL;
3251       VkSpecializationInfo sinfo = {0};
3252       VkSpecializationMapEntry me[3];
3253       uint32_t size[3] = {1,1,1};
3254       if (!zs->info.workgroup_size[0]) {
3255          sinfo.mapEntryCount = 3;
3256          sinfo.pMapEntries = &me[0];
3257          sinfo.dataSize = sizeof(uint32_t) * 3;
3258          sinfo.pData = size;
3259          uint32_t ids[] = {ZINK_WORKGROUP_SIZE_X, ZINK_WORKGROUP_SIZE_Y, ZINK_WORKGROUP_SIZE_Z};
3260          for (int i = 0; i < 3; i++) {
3261             me[i].size = sizeof(uint32_t);
3262             me[i].constantID = ids[i];
3263             me[i].offset = i * sizeof(uint32_t);
3264          }
3265          spec_entries = vk_spec_info_to_nir_spirv(&sinfo, &num_spec_entries);
3266       }
3267       nir_shader *nir = spirv_to_nir(spirv->words, spirv->num_words,
3268                          spec_entries, num_spec_entries,
3269                          clamp_stage(&zs->info), "main", &spirv_options, &screen->nir_options);
3270       assert(nir);
3271       ralloc_free(nir);
3272       free(spec_entries);
3273    }
3274 #endif
3275
3276    VkResult ret;
3277    struct zink_shader_object obj = {0};
3278    if (!can_shobj || !screen->info.have_EXT_shader_object)
3279       ret = VKSCR(CreateShaderModule)(screen->dev, &smci, NULL, &obj.mod);
3280    else
3281       ret = VKSCR(CreateShadersEXT)(screen->dev, 1, &sci, NULL, &obj.obj);
3282    bool success = zink_screen_handle_vkresult(screen, ret);
3283    assert(success);
3284    return obj;
3285 }
3286
3287 static void
3288 prune_io(nir_shader *nir)
3289 {
3290    nir_foreach_shader_in_variable_safe(var, nir) {
3291       if (!find_var_deref(nir, var))
3292          var->data.mode = nir_var_shader_temp;
3293    }
3294    nir_foreach_shader_out_variable_safe(var, nir) {
3295       if (!find_var_deref(nir, var))
3296          var->data.mode = nir_var_shader_temp;
3297    }
3298    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
3299 }
3300
3301 static void
3302 flag_shadow_tex(nir_variable *var, struct zink_shader *zs)
3303 {
3304    /* unconvert from zink_binding() */
3305    uint32_t sampler_id = var->data.binding - (PIPE_MAX_SAMPLERS * MESA_SHADER_FRAGMENT);
3306    assert(sampler_id < 32); //bitfield size for tracking
3307    zs->fs.legacy_shadow_mask |= BITFIELD_BIT(sampler_id);
3308 }
3309
3310 static nir_ssa_def *
3311 rewrite_tex_dest(nir_builder *b, nir_tex_instr *tex, nir_variable *var, struct zink_shader *zs)
3312 {
3313    assert(var);
3314    const struct glsl_type *type = glsl_without_array(var->type);
3315    enum glsl_base_type ret_type = glsl_get_sampler_result_type(type);
3316    bool is_int = glsl_base_type_is_integer(ret_type);
3317    unsigned bit_size = glsl_base_type_get_bit_size(ret_type);
3318    unsigned dest_size = nir_dest_bit_size(tex->dest);
3319    b->cursor = nir_after_instr(&tex->instr);
3320    unsigned num_components = nir_dest_num_components(tex->dest);
3321    bool rewrite_depth = tex->is_shadow && num_components > 1 && tex->op != nir_texop_tg4 && !tex->is_sparse;
3322    if (bit_size == dest_size && !rewrite_depth)
3323       return NULL;
3324    nir_ssa_def *dest = &tex->dest.ssa;
3325    if (rewrite_depth && zs) {
3326       /* If only .x is used in the NIR, then it's effectively not a legacy depth
3327        * sample anyway and we don't want to ask for shader recompiles.  This is
3328        * the typical path, since GL_DEPTH_TEXTURE_MODE defaults to either RED or
3329        * LUMINANCE, so apps just use the first channel.
3330        */
3331       if (nir_ssa_def_components_read(dest) & ~1) {
3332          if (b->shader->info.stage == MESA_SHADER_FRAGMENT)
3333             flag_shadow_tex(var, zs);
3334          else
3335             mesa_loge("unhandled old-style shadow sampler in non-fragment stage!");
3336       }
3337       return NULL;
3338    }
3339    if (bit_size != dest_size) {
3340       tex->dest.ssa.bit_size = bit_size;
3341       tex->dest_type = nir_get_nir_type_for_glsl_base_type(ret_type);
3342
3343       if (is_int) {
3344          if (glsl_unsigned_base_type_of(ret_type) == ret_type)
3345             dest = nir_u2uN(b, &tex->dest.ssa, dest_size);
3346          else
3347             dest = nir_i2iN(b, &tex->dest.ssa, dest_size);
3348       } else {
3349          dest = nir_f2fN(b, &tex->dest.ssa, dest_size);
3350       }
3351       if (rewrite_depth)
3352          return dest;
3353       nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, dest, dest->parent_instr);
3354    } else if (rewrite_depth) {
3355       return dest;
3356    }
3357    return dest;
3358 }
3359
3360 struct lower_zs_swizzle_state {
3361    bool shadow_only;
3362    unsigned base_sampler_id;
3363    const struct zink_zs_swizzle_key *swizzle;
3364 };
3365
3366 static bool
3367 lower_zs_swizzle_tex_instr(nir_builder *b, nir_instr *instr, void *data)
3368 {
3369    struct lower_zs_swizzle_state *state = data;
3370    const struct zink_zs_swizzle_key *swizzle_key = state->swizzle;
3371    assert(state->shadow_only || swizzle_key);
3372    if (instr->type != nir_instr_type_tex)
3373       return false;
3374    nir_tex_instr *tex = nir_instr_as_tex(instr);
3375    if (tex->op == nir_texop_txs || tex->op == nir_texop_lod ||
3376        (!tex->is_shadow && state->shadow_only) || tex->is_new_style_shadow)
3377       return false;
3378    if (tex->is_shadow && tex->op == nir_texop_tg4)
3379       /* Will not even try to emulate the shadow comparison */
3380       return false;
3381    int handle = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
3382    nir_variable *var = NULL;
3383    if (handle != -1)
3384       /* gtfo bindless depth texture mode */
3385       return false;
3386    nir_foreach_variable_with_modes(img, b->shader, nir_var_uniform) {
3387       if (glsl_type_is_sampler(glsl_without_array(img->type))) {
3388          unsigned size = glsl_type_is_array(img->type) ? glsl_get_aoa_size(img->type) : 1;
3389          if (tex->texture_index >= img->data.driver_location &&
3390                tex->texture_index < img->data.driver_location + size) {
3391             var = img;
3392             break;
3393          }
3394       }
3395    }
3396    assert(var);
3397    uint32_t sampler_id = var->data.binding - state->base_sampler_id;
3398    const struct glsl_type *type = glsl_without_array(var->type);
3399    enum glsl_base_type ret_type = glsl_get_sampler_result_type(type);
3400    bool is_int = glsl_base_type_is_integer(ret_type);
3401    unsigned num_components = nir_dest_num_components(tex->dest);
3402    if (tex->is_shadow)
3403       tex->is_new_style_shadow = true;
3404    nir_ssa_def *dest = rewrite_tex_dest(b, tex, var, NULL);
3405    assert(dest || !state->shadow_only);
3406    if (!dest && !(swizzle_key->mask & BITFIELD_BIT(sampler_id)))
3407       return false;
3408    else if (!dest)
3409       dest = &tex->dest.ssa;
3410    else
3411       tex->dest.ssa.num_components = 1;
3412    if (swizzle_key && (swizzle_key->mask & BITFIELD_BIT(sampler_id))) {
3413       /* these require manual swizzles */
3414       if (tex->op == nir_texop_tg4) {
3415          assert(!tex->is_shadow);
3416          nir_ssa_def *swizzle;
3417          switch (swizzle_key->swizzle[sampler_id].s[tex->component]) {
3418          case PIPE_SWIZZLE_0:
3419             swizzle = nir_imm_zero(b, 4, nir_dest_bit_size(tex->dest));
3420             break;
3421          case PIPE_SWIZZLE_1:
3422             if (is_int)
3423                swizzle = nir_imm_intN_t(b, 4, nir_dest_bit_size(tex->dest));
3424             else
3425                swizzle = nir_imm_floatN_t(b, 4, nir_dest_bit_size(tex->dest));
3426             break;
3427          default:
3428             if (!tex->component)
3429                return false;
3430             tex->component = 0;
3431             return true;
3432          }
3433          nir_ssa_def_rewrite_uses_after(dest, swizzle, swizzle->parent_instr);
3434          return true;
3435       }
3436       nir_ssa_def *vec[4];
3437       for (unsigned i = 0; i < ARRAY_SIZE(vec); i++) {
3438          switch (swizzle_key->swizzle[sampler_id].s[i]) {
3439          case PIPE_SWIZZLE_0:
3440             vec[i] = nir_imm_zero(b, 1, nir_dest_bit_size(tex->dest));
3441             break;
3442          case PIPE_SWIZZLE_1:
3443             if (is_int)
3444                vec[i] = nir_imm_intN_t(b, 1, nir_dest_bit_size(tex->dest));
3445             else
3446                vec[i] = nir_imm_floatN_t(b, 1, nir_dest_bit_size(tex->dest));
3447             break;
3448          default:
3449             vec[i] = dest->num_components == 1 ? dest : nir_channel(b, dest, i);
3450             break;
3451          }
3452       }
3453       nir_ssa_def *swizzle = nir_vec(b, vec, num_components);
3454       nir_ssa_def_rewrite_uses_after(dest, swizzle, swizzle->parent_instr);
3455    } else {
3456       assert(tex->is_shadow);
3457       nir_ssa_def *vec[4] = {dest, dest, dest, dest};
3458       nir_ssa_def *splat = nir_vec(b, vec, num_components);
3459       nir_ssa_def_rewrite_uses_after(dest, splat, splat->parent_instr);
3460    }
3461    return true;
3462 }
3463
3464 /* Applies in-shader swizzles when necessary for depth/shadow sampling.
3465  *
3466  * SPIRV only has new-style (scalar result) shadow sampling, so to emulate
3467  * !is_new_style_shadow (vec4 result) shadow sampling we lower to a
3468  * new-style-shadow sample, and apply GL_DEPTH_TEXTURE_MODE swizzles in the NIR
3469  * shader to expand out to vec4.  Since this depends on sampler state, it's a
3470  * draw-time shader recompile to do so.
3471  *
3472  * We may also need to apply shader swizzles for
3473  * driver_workarounds.needs_zs_shader_swizzle.
3474  */
3475 static bool
3476 lower_zs_swizzle_tex(nir_shader *nir, const void *swizzle, bool shadow_only)
3477 {
3478    /* We don't use nir_lower_tex to do our swizzling, because of this base_sampler_id. */
3479    unsigned base_sampler_id = gl_shader_stage_is_compute(nir->info.stage) ? 0 : PIPE_MAX_SAMPLERS * nir->info.stage;
3480    struct lower_zs_swizzle_state state = {shadow_only, base_sampler_id, swizzle};
3481    return nir_shader_instructions_pass(nir, lower_zs_swizzle_tex_instr, nir_metadata_dominance | nir_metadata_block_index, (void*)&state);
3482 }
3483
3484 static bool
3485 invert_point_coord_instr(nir_builder *b, nir_instr *instr, void *data)
3486 {
3487    if (instr->type != nir_instr_type_intrinsic)
3488       return false;
3489    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
3490    if (intr->intrinsic != nir_intrinsic_load_point_coord)
3491       return false;
3492    b->cursor = nir_after_instr(instr);
3493    nir_ssa_def *def = nir_vec2(b, nir_channel(b, &intr->dest.ssa, 0),
3494                                   nir_fsub_imm(b, 1.0, nir_channel(b, &intr->dest.ssa, 1)));
3495    nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, def, def->parent_instr);
3496    return true;
3497 }
3498
3499 static bool
3500 invert_point_coord(nir_shader *nir)
3501 {
3502    if (!BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_POINT_COORD))
3503       return false;
3504    return nir_shader_instructions_pass(nir, invert_point_coord_instr, nir_metadata_dominance, NULL);
3505 }
3506
3507 static struct zink_shader_object
3508 compile_module(struct zink_screen *screen, struct zink_shader *zs, nir_shader *nir, bool can_shobj, struct zink_program *pg)
3509 {
3510    struct zink_shader_info *sinfo = &zs->sinfo;
3511    prune_io(nir);
3512
3513    NIR_PASS_V(nir, nir_convert_from_ssa, true);
3514
3515    if (zink_debug & (ZINK_DEBUG_NIR | ZINK_DEBUG_SPIRV))
3516       nir_index_ssa_defs(nir_shader_get_entrypoint(nir));
3517    if (zink_debug & ZINK_DEBUG_NIR) {
3518       fprintf(stderr, "NIR shader:\n---8<---\n");
3519       nir_print_shader(nir, stderr);
3520       fprintf(stderr, "---8<---\n");
3521    }
3522
3523    struct zink_shader_object obj;
3524    struct spirv_shader *spirv = nir_to_spirv(nir, sinfo, screen->spirv_version);
3525    if (spirv)
3526       obj = zink_shader_spirv_compile(screen, zs, spirv, can_shobj, pg);
3527
3528    /* TODO: determine if there's any reason to cache spirv output? */
3529    if (zs->info.stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated)
3530       zs->spirv = spirv;
3531    else
3532       obj.spirv = spirv;
3533    return obj;
3534 }
3535
3536 struct zink_shader_object
3537 zink_shader_compile(struct zink_screen *screen, bool can_shobj, struct zink_shader *zs,
3538                     nir_shader *nir, const struct zink_shader_key *key, const void *extra_data, struct zink_program *pg)
3539 {
3540    struct zink_shader_info *sinfo = &zs->sinfo;
3541    bool need_optimize = false;
3542    bool inlined_uniforms = false;
3543
3544    if (key) {
3545       if (key->inline_uniforms) {
3546          NIR_PASS_V(nir, nir_inline_uniforms,
3547                     nir->info.num_inlinable_uniforms,
3548                     key->base.inlined_uniform_values,
3549                     nir->info.inlinable_uniform_dw_offsets);
3550
3551          inlined_uniforms = true;
3552       }
3553
3554       /* TODO: use a separate mem ctx here for ralloc */
3555
3556       if (!screen->optimal_keys) {
3557          switch (zs->info.stage) {
3558          case MESA_SHADER_VERTEX: {
3559             uint32_t decomposed_attrs = 0, decomposed_attrs_without_w = 0;
3560             const struct zink_vs_key *vs_key = zink_vs_key(key);
3561             switch (vs_key->size) {
3562             case 4:
3563                decomposed_attrs = vs_key->u32.decomposed_attrs;
3564                decomposed_attrs_without_w = vs_key->u32.decomposed_attrs_without_w;
3565                break;
3566             case 2:
3567                decomposed_attrs = vs_key->u16.decomposed_attrs;
3568                decomposed_attrs_without_w = vs_key->u16.decomposed_attrs_without_w;
3569                break;
3570             case 1:
3571                decomposed_attrs = vs_key->u8.decomposed_attrs;
3572                decomposed_attrs_without_w = vs_key->u8.decomposed_attrs_without_w;
3573                break;
3574             default: break;
3575             }
3576             if (decomposed_attrs || decomposed_attrs_without_w)
3577                NIR_PASS_V(nir, decompose_attribs, decomposed_attrs, decomposed_attrs_without_w);
3578             break;
3579          }
3580
3581          case MESA_SHADER_GEOMETRY:
3582             if (zink_gs_key(key)->lower_line_stipple) {
3583                NIR_PASS_V(nir, lower_line_stipple_gs, zink_gs_key(key)->line_rectangular);
3584                NIR_PASS_V(nir, nir_lower_var_copies);
3585                need_optimize = true;
3586             }
3587
3588             if (zink_gs_key(key)->lower_line_smooth) {
3589                NIR_PASS_V(nir, lower_line_smooth_gs);
3590                NIR_PASS_V(nir, nir_lower_var_copies);
3591                need_optimize = true;
3592             }
3593
3594             if (zink_gs_key(key)->lower_gl_point) {
3595                NIR_PASS_V(nir, lower_gl_point_gs);
3596                need_optimize = true;
3597             }
3598
3599             if (zink_gs_key(key)->lower_pv_mode) {
3600                NIR_PASS_V(nir, lower_pv_mode_gs, zink_gs_key(key)->lower_pv_mode);
3601                need_optimize = true; //TODO verify that this is required
3602             }
3603             break;
3604
3605          default:
3606             break;
3607          }
3608       }
3609
3610       switch (zs->info.stage) {
3611       case MESA_SHADER_VERTEX:
3612       case MESA_SHADER_TESS_EVAL:
3613       case MESA_SHADER_GEOMETRY:
3614          if (zink_vs_key_base(key)->last_vertex_stage) {
3615             if (zs->sinfo.have_xfb)
3616                sinfo->last_vertex = true;
3617
3618             if (!zink_vs_key_base(key)->clip_halfz && !screen->info.have_EXT_depth_clip_control) {
3619                NIR_PASS_V(nir, nir_lower_clip_halfz);
3620             }
3621             if (zink_vs_key_base(key)->push_drawid) {
3622                NIR_PASS_V(nir, lower_drawid);
3623             }
3624          }
3625          if (zink_vs_key_base(key)->robust_access)
3626             NIR_PASS(need_optimize, nir, lower_txf_lod_robustness);
3627          break;
3628       case MESA_SHADER_FRAGMENT:
3629          if (zink_fs_key(key)->lower_line_smooth) {
3630             NIR_PASS_V(nir, lower_line_smooth_fs,
3631                        zink_fs_key(key)->lower_line_stipple);
3632             need_optimize = true;
3633          } else if (zink_fs_key(key)->lower_line_stipple)
3634                NIR_PASS_V(nir, lower_line_stipple_fs);
3635
3636          if (zink_fs_key(key)->lower_point_smooth) {
3637             NIR_PASS_V(nir, nir_lower_point_smooth);
3638             NIR_PASS_V(nir, nir_lower_discard_if, nir_lower_discard_if_to_cf);
3639             nir->info.fs.uses_discard = true;
3640             need_optimize = true;
3641          }
3642
3643          if (zink_fs_key(key)->robust_access)
3644             NIR_PASS(need_optimize, nir, lower_txf_lod_robustness);
3645
3646          if (!zink_fs_key_base(key)->samples &&
3647             nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)) {
3648             /* VK will always use gl_SampleMask[] values even if sample count is 0,
3649             * so we need to skip this write here to mimic GL's behavior of ignoring it
3650             */
3651             nir_foreach_shader_out_variable(var, nir) {
3652                if (var->data.location == FRAG_RESULT_SAMPLE_MASK)
3653                   var->data.mode = nir_var_shader_temp;
3654             }
3655             nir_fixup_deref_modes(nir);
3656             NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
3657             need_optimize = true;
3658          }
3659          if (zink_fs_key_base(key)->force_dual_color_blend && nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DATA1)) {
3660             NIR_PASS_V(nir, lower_dual_blend);
3661          }
3662          if (zink_fs_key_base(key)->coord_replace_bits)
3663             NIR_PASS_V(nir, nir_lower_texcoord_replace, zink_fs_key_base(key)->coord_replace_bits, true, false);
3664          if (zink_fs_key_base(key)->point_coord_yinvert)
3665             NIR_PASS_V(nir, invert_point_coord);
3666          if (zink_fs_key_base(key)->force_persample_interp || zink_fs_key_base(key)->fbfetch_ms) {
3667             nir_foreach_shader_in_variable(var, nir)
3668                var->data.sample = true;
3669             nir->info.fs.uses_sample_qualifier = true;
3670             nir->info.fs.uses_sample_shading = true;
3671          }
3672          if (zs->fs.legacy_shadow_mask && !key->base.needs_zs_shader_swizzle)
3673             NIR_PASS(need_optimize, nir, lower_zs_swizzle_tex, zink_fs_key_base(key)->shadow_needs_shader_swizzle ? extra_data : NULL, true);
3674          if (nir->info.fs.uses_fbfetch_output) {
3675             nir_variable *fbfetch = NULL;
3676             NIR_PASS_V(nir, lower_fbfetch, &fbfetch, zink_fs_key_base(key)->fbfetch_ms);
3677             /* old variable must be deleted to avoid spirv errors */
3678             fbfetch->data.mode = nir_var_shader_temp;
3679             nir_fixup_deref_modes(nir);
3680             NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
3681             need_optimize = true;
3682          }
3683          nir_foreach_shader_in_variable_safe(var, nir) {
3684             if (!is_texcoord(MESA_SHADER_FRAGMENT, var) || var->data.driver_location != -1)
3685                continue;
3686             nir_shader_instructions_pass(nir, rewrite_read_as_0, nir_metadata_dominance, var);
3687             var->data.mode = nir_var_shader_temp;
3688             nir_fixup_deref_modes(nir);
3689             NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
3690             need_optimize = true;
3691          }
3692          break;
3693       case MESA_SHADER_COMPUTE:
3694          if (zink_cs_key(key)->robust_access)
3695             NIR_PASS(need_optimize, nir, lower_txf_lod_robustness);
3696          break;
3697       default: break;
3698       }
3699       if (key->base.needs_zs_shader_swizzle) {
3700          assert(extra_data);
3701          NIR_PASS(need_optimize, nir, lower_zs_swizzle_tex, extra_data, false);
3702       }
3703       if (key->base.nonseamless_cube_mask) {
3704          NIR_PASS_V(nir, zink_lower_cubemap_to_array, key->base.nonseamless_cube_mask);
3705          need_optimize = true;
3706       }
3707    }
3708    if (screen->driconf.inline_uniforms) {
3709       NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_global | nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared, NULL, NULL);
3710       NIR_PASS_V(nir, rewrite_bo_access, screen);
3711       NIR_PASS_V(nir, remove_bo_access, zs);
3712       need_optimize = true;
3713    }
3714    if (inlined_uniforms) {
3715       optimize_nir(nir, zs);
3716
3717       /* This must be done again. */
3718       NIR_PASS_V(nir, nir_io_add_const_offset_to_base, nir_var_shader_in |
3719                                                        nir_var_shader_out);
3720
3721       nir_function_impl *impl = nir_shader_get_entrypoint(nir);
3722       if (impl->ssa_alloc > ZINK_ALWAYS_INLINE_LIMIT)
3723          zs->can_inline = false;
3724    } else if (need_optimize)
3725       optimize_nir(nir, zs);
3726    
3727    struct zink_shader_object obj = compile_module(screen, zs, nir, can_shobj, pg);
3728    ralloc_free(nir);
3729    return obj;
3730 }
3731
3732 struct zink_shader_object
3733 zink_shader_compile_separate(struct zink_screen *screen, struct zink_shader *zs)
3734 {
3735    nir_shader *nir = zink_shader_deserialize(screen, zs);
3736    /* TODO: maybe compile multiple variants for different set counts for compact mode? */
3737    int set = zs->info.stage == MESA_SHADER_FRAGMENT;
3738    if (screen->info.have_EXT_shader_object)
3739       set = zs->info.stage;
3740    unsigned offsets[4];
3741    zink_descriptor_shader_get_binding_offsets(zs, offsets);
3742    nir_foreach_variable_with_modes(var, nir, nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_uniform | nir_var_image) {
3743       if (var->data.descriptor_set == screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS])
3744          continue;
3745       var->data.descriptor_set = set;
3746       switch (var->data.mode) {
3747       case nir_var_mem_ubo:
3748             var->data.binding = !!var->data.driver_location;
3749             break;
3750       case nir_var_uniform:
3751          if (glsl_type_is_sampler(glsl_without_array(var->type)))
3752             var->data.binding += offsets[1];
3753          break;
3754       case nir_var_mem_ssbo:
3755          var->data.binding += offsets[2];
3756          break;
3757       case nir_var_image:
3758          var->data.binding += offsets[3];
3759          break;
3760       default: break;
3761       }
3762    }
3763    if (screen->driconf.inline_uniforms) {
3764       NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_global | nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared, NULL, NULL);
3765       NIR_PASS_V(nir, rewrite_bo_access, screen);
3766       NIR_PASS_V(nir, remove_bo_access, zs);
3767    }
3768    optimize_nir(nir, zs);
3769    zink_descriptor_shader_init(screen, zs);
3770    zs->sinfo.last_vertex = zs->sinfo.have_xfb;
3771    nir_shader *nir_clone = NULL;
3772    if (screen->info.have_EXT_shader_object)
3773       nir_clone = nir_shader_clone(nir, nir);
3774    struct zink_shader_object obj = compile_module(screen, zs, nir, true, NULL);
3775    if (screen->info.have_EXT_shader_object && !zs->info.internal) {
3776       /* always try to pre-generate a tcs in case it's needed */
3777       if (zs->info.stage == MESA_SHADER_TESS_EVAL) {
3778          nir_shader *nir_tcs = NULL;
3779          /* use max pcp for compat */
3780          zs->non_fs.generated_tcs = zink_shader_tcs_create(screen, nir_clone, 32, &nir_tcs);
3781          nir_tcs->info.separate_shader = true;
3782          zs->non_fs.generated_tcs->precompile.obj = zink_shader_compile_separate(screen, zs->non_fs.generated_tcs);
3783          ralloc_free(nir_tcs);
3784       }
3785       if (zs->info.stage == MESA_SHADER_VERTEX || zs->info.stage == MESA_SHADER_TESS_EVAL) {
3786          /* create a second variant with PSIZ removed:
3787           * this works around a bug in drivers using nir_assign_io_var_locations()
3788           * where builtins that aren't read by following stages get assigned
3789           * driver locations before varyings and break the i/o interface between shaders even
3790           * though zink has correctly assigned all locations
3791           */
3792          nir_variable *var = nir_find_variable_with_location(nir_clone, nir_var_shader_out, VARYING_SLOT_PSIZ);
3793          if (var && !var->data.explicit_location) {
3794             var->data.mode = nir_var_shader_temp;
3795             nir_fixup_deref_modes(nir_clone);
3796             NIR_PASS_V(nir_clone, nir_remove_dead_variables, nir_var_shader_temp, NULL);
3797             optimize_nir(nir_clone, NULL);
3798             zs->precompile.no_psiz_obj = compile_module(screen, zs, nir_clone, true, NULL);
3799             spirv_shader_delete(zs->precompile.no_psiz_obj.spirv);
3800             zs->precompile.no_psiz_obj.spirv = NULL;
3801          }
3802       }
3803    }
3804    ralloc_free(nir);
3805    spirv_shader_delete(obj.spirv);
3806    obj.spirv = NULL;
3807    return obj;
3808 }
3809
3810 static bool
3811 lower_baseinstance_instr(nir_builder *b, nir_instr *instr, void *data)
3812 {
3813    if (instr->type != nir_instr_type_intrinsic)
3814       return false;
3815    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
3816    if (intr->intrinsic != nir_intrinsic_load_instance_id)
3817       return false;
3818    b->cursor = nir_after_instr(instr);
3819    nir_ssa_def *def = nir_isub(b, &intr->dest.ssa, nir_load_base_instance(b));
3820    nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, def, def->parent_instr);
3821    return true;
3822 }
3823
3824 static bool
3825 lower_baseinstance(nir_shader *shader)
3826 {
3827    if (shader->info.stage != MESA_SHADER_VERTEX)
3828       return false;
3829    return nir_shader_instructions_pass(shader, lower_baseinstance_instr, nir_metadata_dominance, NULL);
3830 }
3831
3832 /* gl_nir_lower_buffers makes variables unusable for all UBO/SSBO access
3833  * so instead we delete all those broken variables and just make new ones
3834  */
3835 static bool
3836 unbreak_bos(nir_shader *shader, struct zink_shader *zs, bool needs_size)
3837 {
3838    uint64_t max_ssbo_size = 0;
3839    uint64_t max_ubo_size = 0;
3840    uint64_t max_uniform_size = 0;
3841
3842    if (!shader->info.num_ssbos && !shader->info.num_ubos)
3843       return false;
3844
3845    nir_foreach_variable_with_modes(var, shader, nir_var_mem_ssbo | nir_var_mem_ubo) {
3846       const struct glsl_type *type = glsl_without_array(var->type);
3847       if (type_is_counter(type))
3848          continue;
3849       /* be conservative: use the bigger of the interface and variable types to ensure in-bounds access */
3850       unsigned size = glsl_count_attribute_slots(glsl_type_is_array(var->type) ? var->type : type, false);
3851       const struct glsl_type *interface_type = var->interface_type ? glsl_without_array(var->interface_type) : NULL;
3852       if (interface_type) {
3853          unsigned block_size = glsl_get_explicit_size(interface_type, true);
3854          if (glsl_get_length(interface_type) == 1) {
3855             /* handle bare unsized ssbo arrays: glsl_get_explicit_size always returns type-aligned sizes */
3856             const struct glsl_type *f = glsl_get_struct_field(interface_type, 0);
3857             if (glsl_type_is_array(f) && !glsl_array_size(f))
3858                block_size = 0;
3859          }
3860          if (block_size) {
3861             block_size = DIV_ROUND_UP(block_size, sizeof(float) * 4);
3862             size = MAX2(size, block_size);
3863          }
3864       }
3865       if (var->data.mode == nir_var_mem_ubo) {
3866          if (var->data.driver_location)
3867             max_ubo_size = MAX2(max_ubo_size, size);
3868          else
3869             max_uniform_size = MAX2(max_uniform_size, size);
3870       } else {
3871          max_ssbo_size = MAX2(max_ssbo_size, size);
3872          if (interface_type) {
3873             if (glsl_type_is_unsized_array(glsl_get_struct_field(interface_type, glsl_get_length(interface_type) - 1)))
3874                needs_size = true;
3875          }
3876       }
3877       var->data.mode = nir_var_shader_temp;
3878    }
3879    nir_fixup_deref_modes(shader);
3880    NIR_PASS_V(shader, nir_remove_dead_variables, nir_var_shader_temp, NULL);
3881    optimize_nir(shader, NULL);
3882
3883    struct glsl_struct_field field = {0};
3884    field.name = ralloc_strdup(shader, "base");
3885    if (shader->info.num_ubos) {
3886       if (shader->num_uniforms && zs->ubos_used & BITFIELD_BIT(0)) {
3887          field.type = glsl_array_type(glsl_uint_type(), max_uniform_size * 4, 4);
3888          nir_variable *var = nir_variable_create(shader, nir_var_mem_ubo,
3889                                                  glsl_array_type(glsl_interface_type(&field, 1, GLSL_INTERFACE_PACKING_STD430, false, "struct"), 1, 0),
3890                                                  "uniform_0@32");
3891          var->interface_type = var->type;
3892          var->data.mode = nir_var_mem_ubo;
3893          var->data.driver_location = 0;
3894       }
3895
3896       unsigned num_ubos = shader->info.num_ubos - !!shader->info.first_ubo_is_default_ubo;
3897       uint32_t ubos_used = zs->ubos_used & ~BITFIELD_BIT(0);
3898       if (num_ubos && ubos_used) {
3899          field.type = glsl_array_type(glsl_uint_type(), max_ubo_size * 4, 4);
3900          /* shrink array as much as possible */
3901          unsigned first_ubo = ffs(ubos_used) - 2;
3902          assert(first_ubo < PIPE_MAX_CONSTANT_BUFFERS);
3903          num_ubos -= first_ubo;
3904          assert(num_ubos);
3905          nir_variable *var = nir_variable_create(shader, nir_var_mem_ubo,
3906                                    glsl_array_type(glsl_struct_type(&field, 1, "struct", false), num_ubos, 0),
3907                                    "ubos@32");
3908          var->interface_type = var->type;
3909          var->data.mode = nir_var_mem_ubo;
3910          var->data.driver_location = first_ubo + !!shader->info.first_ubo_is_default_ubo;
3911       }
3912    }
3913    if (shader->info.num_ssbos && zs->ssbos_used) {
3914       /* shrink array as much as possible */
3915       unsigned first_ssbo = ffs(zs->ssbos_used) - 1;
3916       assert(first_ssbo < PIPE_MAX_SHADER_BUFFERS);
3917       unsigned num_ssbos = shader->info.num_ssbos - first_ssbo;
3918       assert(num_ssbos);
3919       const struct glsl_type *ssbo_type = glsl_array_type(glsl_uint_type(), needs_size ? 0 : max_ssbo_size * 4, 4);
3920       field.type = ssbo_type;
3921       nir_variable *var = nir_variable_create(shader, nir_var_mem_ssbo,
3922                                               glsl_array_type(glsl_struct_type(&field, 1, "struct", false), num_ssbos, 0),
3923                                               "ssbos@32");
3924       var->interface_type = var->type;
3925       var->data.mode = nir_var_mem_ssbo;
3926       var->data.driver_location = first_ssbo;
3927    }
3928    return true;
3929 }
3930
3931 static uint32_t
3932 get_src_mask_ssbo(unsigned total, nir_src src)
3933 {
3934    if (nir_src_is_const(src))
3935       return BITFIELD_BIT(nir_src_as_uint(src));
3936    return BITFIELD_MASK(total);
3937 }
3938
3939 static uint32_t
3940 get_src_mask_ubo(unsigned total, nir_src src)
3941 {
3942    if (nir_src_is_const(src))
3943       return BITFIELD_BIT(nir_src_as_uint(src));
3944    return BITFIELD_MASK(total) & ~BITFIELD_BIT(0);
3945 }
3946
3947 static bool
3948 analyze_io(struct zink_shader *zs, nir_shader *shader)
3949 {
3950    bool ret = false;
3951    nir_function_impl *impl = nir_shader_get_entrypoint(shader);
3952    nir_foreach_block(block, impl) {
3953       nir_foreach_instr(instr, block) {
3954          if (shader->info.stage != MESA_SHADER_KERNEL && instr->type == nir_instr_type_tex) {
3955             /* gl_nir_lower_samplers_as_deref is where this would normally be set, but zink doesn't use it */
3956             nir_tex_instr *tex = nir_instr_as_tex(instr);
3957             nir_foreach_variable_with_modes(img, shader, nir_var_uniform) {
3958                if (glsl_type_is_sampler(glsl_without_array(img->type))) {
3959                   unsigned size = glsl_type_is_array(img->type) ? glsl_get_aoa_size(img->type) : 1;
3960                   if (tex->texture_index >= img->data.driver_location &&
3961                      tex->texture_index < img->data.driver_location + size) {
3962                      BITSET_SET_RANGE(shader->info.textures_used, img->data.driver_location, img->data.driver_location + (size - 1));
3963                      break;
3964                   }
3965                }
3966             }
3967             continue;
3968          }
3969          if (instr->type != nir_instr_type_intrinsic)
3970             continue;
3971  
3972          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
3973          switch (intrin->intrinsic) {
3974          case nir_intrinsic_store_ssbo:
3975             zs->ssbos_used |= get_src_mask_ssbo(shader->info.num_ssbos, intrin->src[1]);
3976             break;
3977  
3978          case nir_intrinsic_get_ssbo_size: {
3979             zs->ssbos_used |= get_src_mask_ssbo(shader->info.num_ssbos, intrin->src[0]);
3980             ret = true;
3981             break;
3982          }
3983          case nir_intrinsic_ssbo_atomic:
3984          case nir_intrinsic_ssbo_atomic_swap:
3985          case nir_intrinsic_load_ssbo:
3986             zs->ssbos_used |= get_src_mask_ssbo(shader->info.num_ssbos, intrin->src[0]);
3987             break;
3988          case nir_intrinsic_load_ubo:
3989          case nir_intrinsic_load_ubo_vec4:
3990             zs->ubos_used |= get_src_mask_ubo(shader->info.num_ubos, intrin->src[0]);
3991             break;
3992          default:
3993             break;
3994          }
3995       }
3996    }
3997    return ret;
3998 }
3999
4000 struct zink_bindless_info {
4001    nir_variable *bindless[4];
4002    unsigned bindless_set;
4003 };
4004
4005 /* this is a "default" bindless texture used if the shader has no texture variables */
4006 static nir_variable *
4007 create_bindless_texture(nir_shader *nir, nir_tex_instr *tex, unsigned descriptor_set)
4008 {
4009    unsigned binding = tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ? 1 : 0;
4010    nir_variable *var;
4011
4012    const struct glsl_type *sampler_type = glsl_sampler_type(tex->sampler_dim, tex->is_shadow, tex->is_array, GLSL_TYPE_FLOAT);
4013    var = nir_variable_create(nir, nir_var_uniform, glsl_array_type(sampler_type, ZINK_MAX_BINDLESS_HANDLES, 0), "bindless_texture");
4014    var->data.descriptor_set = descriptor_set;
4015    var->data.driver_location = var->data.binding = binding;
4016    return var;
4017 }
4018
4019 /* this is a "default" bindless image used if the shader has no image variables */
4020 static nir_variable *
4021 create_bindless_image(nir_shader *nir, enum glsl_sampler_dim dim, unsigned descriptor_set)
4022 {
4023    unsigned binding = dim == GLSL_SAMPLER_DIM_BUF ? 3 : 2;
4024    nir_variable *var;
4025
4026    const struct glsl_type *image_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
4027    var = nir_variable_create(nir, nir_var_image, glsl_array_type(image_type, ZINK_MAX_BINDLESS_HANDLES, 0), "bindless_image");
4028    var->data.descriptor_set = descriptor_set;
4029    var->data.driver_location = var->data.binding = binding;
4030    var->data.image.format = PIPE_FORMAT_R8G8B8A8_UNORM;
4031    return var;
4032 }
4033
4034 /* rewrite bindless instructions as array deref instructions */
4035 static bool
4036 lower_bindless_instr(nir_builder *b, nir_instr *in, void *data)
4037 {
4038    struct zink_bindless_info *bindless = data;
4039
4040    if (in->type == nir_instr_type_tex) {
4041       nir_tex_instr *tex = nir_instr_as_tex(in);
4042       int idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
4043       if (idx == -1)
4044          return false;
4045
4046       nir_variable *var = tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ? bindless->bindless[1] : bindless->bindless[0];
4047       if (!var)
4048          var = create_bindless_texture(b->shader, tex, bindless->bindless_set);
4049       b->cursor = nir_before_instr(in);
4050       nir_deref_instr *deref = nir_build_deref_var(b, var);
4051       if (glsl_type_is_array(var->type))
4052          deref = nir_build_deref_array(b, deref, nir_u2uN(b, tex->src[idx].src.ssa, 32));
4053       nir_instr_rewrite_src_ssa(in, &tex->src[idx].src, &deref->dest.ssa);
4054
4055       /* bindless sampling uses the variable type directly, which means the tex instr has to exactly
4056        * match up with it in contrast to normal sampler ops where things are a bit more flexible;
4057        * this results in cases where a shader is passed with sampler2DArray but the tex instr only has
4058        * 2 components, which explodes spirv compilation even though it doesn't trigger validation errors
4059        *
4060        * to fix this, pad the coord src here and fix the tex instr so that ntv will do the "right" thing
4061        * - Warhammer 40k: Dawn of War III
4062        */
4063       unsigned needed_components = glsl_get_sampler_coordinate_components(glsl_without_array(var->type));
4064       unsigned c = nir_tex_instr_src_index(tex, nir_tex_src_coord);
4065       unsigned coord_components = nir_src_num_components(tex->src[c].src);
4066       if (coord_components < needed_components) {
4067          nir_ssa_def *def = nir_pad_vector(b, tex->src[c].src.ssa, needed_components);
4068          nir_instr_rewrite_src_ssa(in, &tex->src[c].src, def);
4069          tex->coord_components = needed_components;
4070       }
4071       return true;
4072    }
4073    if (in->type != nir_instr_type_intrinsic)
4074       return false;
4075    nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
4076
4077    nir_intrinsic_op op;
4078 #define OP_SWAP(OP) \
4079    case nir_intrinsic_bindless_image_##OP: \
4080       op = nir_intrinsic_image_deref_##OP; \
4081       break;
4082
4083
4084    /* convert bindless intrinsics to deref intrinsics */
4085    switch (instr->intrinsic) {
4086    OP_SWAP(atomic)
4087    OP_SWAP(atomic_swap)
4088    OP_SWAP(format)
4089    OP_SWAP(load)
4090    OP_SWAP(order)
4091    OP_SWAP(samples)
4092    OP_SWAP(size)
4093    OP_SWAP(store)
4094    default:
4095       return false;
4096    }
4097
4098    enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr);
4099    nir_variable *var = dim == GLSL_SAMPLER_DIM_BUF ? bindless->bindless[3] : bindless->bindless[2];
4100    if (!var)
4101       var = create_bindless_image(b->shader, dim, bindless->bindless_set);
4102    instr->intrinsic = op;
4103    b->cursor = nir_before_instr(in);
4104    nir_deref_instr *deref = nir_build_deref_var(b, var);
4105    if (glsl_type_is_array(var->type))
4106       deref = nir_build_deref_array(b, deref, nir_u2uN(b, instr->src[0].ssa, 32));
4107    nir_instr_rewrite_src_ssa(in, &instr->src[0], &deref->dest.ssa);
4108    return true;
4109 }
4110
4111 static bool
4112 lower_bindless(nir_shader *shader, struct zink_bindless_info *bindless)
4113 {
4114    if (!nir_shader_instructions_pass(shader, lower_bindless_instr, nir_metadata_dominance, bindless))
4115       return false;
4116    nir_fixup_deref_modes(shader);
4117    NIR_PASS_V(shader, nir_remove_dead_variables, nir_var_shader_temp, NULL);
4118    optimize_nir(shader, NULL);
4119    return true;
4120 }
4121
4122 /* convert shader image/texture io variables to int64 handles for bindless indexing */
4123 static bool
4124 lower_bindless_io_instr(nir_builder *b, nir_instr *in, void *data)
4125 {
4126    if (in->type != nir_instr_type_intrinsic)
4127       return false;
4128    nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
4129    if (instr->intrinsic != nir_intrinsic_load_deref &&
4130        instr->intrinsic != nir_intrinsic_store_deref)
4131       return false;
4132
4133    nir_deref_instr *src_deref = nir_src_as_deref(instr->src[0]);
4134    nir_variable *var = nir_deref_instr_get_variable(src_deref);
4135    if (var->data.bindless)
4136       return false;
4137    if (var->data.mode != nir_var_shader_in && var->data.mode != nir_var_shader_out)
4138       return false;
4139    if (!glsl_type_is_image(var->type) && !glsl_type_is_sampler(var->type))
4140       return false;
4141
4142    var->type = glsl_int64_t_type();
4143    var->data.bindless = 1;
4144    b->cursor = nir_before_instr(in);
4145    nir_deref_instr *deref = nir_build_deref_var(b, var);
4146    if (instr->intrinsic == nir_intrinsic_load_deref) {
4147        nir_ssa_def *def = nir_load_deref(b, deref);
4148        nir_instr_rewrite_src_ssa(in, &instr->src[0], def);
4149        nir_ssa_def_rewrite_uses(&instr->dest.ssa, def);
4150    } else {
4151       nir_store_deref(b, deref, instr->src[1].ssa, nir_intrinsic_write_mask(instr));
4152    }
4153    nir_instr_remove(in);
4154    nir_instr_remove(&src_deref->instr);
4155    return true;
4156 }
4157
4158 static bool
4159 lower_bindless_io(nir_shader *shader)
4160 {
4161    return nir_shader_instructions_pass(shader, lower_bindless_io_instr, nir_metadata_dominance, NULL);
4162 }
4163
4164 static uint32_t
4165 zink_binding(gl_shader_stage stage, VkDescriptorType type, int index, bool compact_descriptors)
4166 {
4167    if (stage == MESA_SHADER_NONE) {
4168       unreachable("not supported");
4169    } else {
4170       unsigned base = stage;
4171       /* clamp compute bindings for better driver efficiency */
4172       if (gl_shader_stage_is_compute(stage))
4173          base = 0;
4174       switch (type) {
4175       case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
4176       case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
4177          return base * 2 + !!index;
4178
4179       case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
4180          assert(stage == MESA_SHADER_KERNEL);
4181          FALLTHROUGH;
4182       case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
4183          if (stage == MESA_SHADER_KERNEL) {
4184             assert(index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
4185             return index + PIPE_MAX_SAMPLERS;
4186          }
4187          FALLTHROUGH;
4188       case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
4189          assert(index < PIPE_MAX_SAMPLERS);
4190          assert(stage != MESA_SHADER_KERNEL);
4191          return (base * PIPE_MAX_SAMPLERS) + index;
4192
4193       case VK_DESCRIPTOR_TYPE_SAMPLER:
4194          assert(index < PIPE_MAX_SAMPLERS);
4195          assert(stage == MESA_SHADER_KERNEL);
4196          return index;
4197
4198       case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
4199          return base + (compact_descriptors * (ZINK_GFX_SHADER_COUNT * 2));
4200
4201       case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
4202       case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
4203          assert(index < ZINK_MAX_SHADER_IMAGES);
4204          if (stage == MESA_SHADER_KERNEL)
4205             return index + (compact_descriptors ? (PIPE_MAX_SAMPLERS + PIPE_MAX_SHADER_SAMPLER_VIEWS) : 0);
4206          return (base * ZINK_MAX_SHADER_IMAGES) + index + (compact_descriptors * (ZINK_GFX_SHADER_COUNT * PIPE_MAX_SAMPLERS));
4207
4208       default:
4209          unreachable("unexpected type");
4210       }
4211    }
4212 }
4213
4214 static void
4215 handle_bindless_var(nir_shader *nir, nir_variable *var, const struct glsl_type *type, struct zink_bindless_info *bindless)
4216 {
4217    if (glsl_type_is_struct(type)) {
4218       for (unsigned i = 0; i < glsl_get_length(type); i++)
4219          handle_bindless_var(nir, var, glsl_get_struct_field(type, i), bindless);
4220       return;
4221    }
4222
4223    /* just a random scalar in a struct */
4224    if (!glsl_type_is_image(type) && !glsl_type_is_sampler(type))
4225       return;
4226
4227    VkDescriptorType vktype = glsl_type_is_image(type) ? zink_image_type(type) : zink_sampler_type(type);
4228    unsigned binding;
4229    switch (vktype) {
4230       case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
4231          binding = 0;
4232          break;
4233       case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
4234          binding = 1;
4235          break;
4236       case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
4237          binding = 2;
4238          break;
4239       case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
4240          binding = 3;
4241          break;
4242       default:
4243          unreachable("unknown");
4244    }
4245    if (!bindless->bindless[binding]) {
4246       bindless->bindless[binding] = nir_variable_clone(var, nir);
4247       bindless->bindless[binding]->data.bindless = 0;
4248       bindless->bindless[binding]->data.descriptor_set = bindless->bindless_set;
4249       bindless->bindless[binding]->type = glsl_array_type(type, ZINK_MAX_BINDLESS_HANDLES, 0);
4250       bindless->bindless[binding]->data.driver_location = bindless->bindless[binding]->data.binding = binding;
4251       if (!bindless->bindless[binding]->data.image.format)
4252          bindless->bindless[binding]->data.image.format = PIPE_FORMAT_R8G8B8A8_UNORM;
4253       nir_shader_add_variable(nir, bindless->bindless[binding]);
4254    } else {
4255       assert(glsl_get_sampler_dim(glsl_without_array(bindless->bindless[binding]->type)) == glsl_get_sampler_dim(glsl_without_array(var->type)));
4256    }
4257    var->data.mode = nir_var_shader_temp;
4258 }
4259
4260 static bool
4261 convert_1d_shadow_tex(nir_builder *b, nir_instr *instr, void *data)
4262 {
4263    struct zink_screen *screen = data;
4264    if (instr->type != nir_instr_type_tex)
4265       return false;
4266    nir_tex_instr *tex = nir_instr_as_tex(instr);
4267    if (tex->sampler_dim != GLSL_SAMPLER_DIM_1D || !tex->is_shadow)
4268       return false;
4269    if (tex->is_sparse && screen->need_2D_sparse) {
4270       /* no known case of this exists: only nvidia can hit it, and nothing uses it */
4271       mesa_loge("unhandled/unsupported 1D sparse texture!");
4272       abort();
4273    }
4274    tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
4275    b->cursor = nir_before_instr(instr);
4276    tex->coord_components++;
4277    unsigned srcs[] = {
4278       nir_tex_src_coord,
4279       nir_tex_src_offset,
4280       nir_tex_src_ddx,
4281       nir_tex_src_ddy,
4282    };
4283    for (unsigned i = 0; i < ARRAY_SIZE(srcs); i++) {
4284       unsigned c = nir_tex_instr_src_index(tex, srcs[i]);
4285       if (c == -1)
4286          continue;
4287       if (tex->src[c].src.ssa->num_components == tex->coord_components)
4288          continue;
4289       nir_ssa_def *def;
4290       nir_ssa_def *zero = nir_imm_zero(b, 1, tex->src[c].src.ssa->bit_size);
4291       if (tex->src[c].src.ssa->num_components == 1)
4292          def = nir_vec2(b, tex->src[c].src.ssa, zero);
4293       else
4294          def = nir_vec3(b, nir_channel(b, tex->src[c].src.ssa, 0), zero, nir_channel(b, tex->src[c].src.ssa, 1));
4295       nir_instr_rewrite_src_ssa(instr, &tex->src[c].src, def);
4296    }
4297    b->cursor = nir_after_instr(instr);
4298    unsigned needed_components = nir_tex_instr_dest_size(tex);
4299    unsigned num_components = tex->dest.ssa.num_components;
4300    if (needed_components > num_components) {
4301       tex->dest.ssa.num_components = needed_components;
4302       assert(num_components < 3);
4303       /* take either xz or just x since this is promoted to 2D from 1D */
4304       uint32_t mask = num_components == 2 ? (1|4) : 1;
4305       nir_ssa_def *dst = nir_channels(b, &tex->dest.ssa, mask);
4306       nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, dst, dst->parent_instr);
4307    }
4308    return true;
4309 }
4310
4311 static bool
4312 lower_1d_shadow(nir_shader *shader, struct zink_screen *screen)
4313 {
4314    bool found = false;
4315    nir_foreach_variable_with_modes(var, shader, nir_var_uniform | nir_var_image) {
4316       const struct glsl_type *type = glsl_without_array(var->type);
4317       unsigned length = glsl_get_length(var->type);
4318       if (!glsl_type_is_sampler(type) || !glsl_sampler_type_is_shadow(type) || glsl_get_sampler_dim(type) != GLSL_SAMPLER_DIM_1D)
4319          continue;
4320       const struct glsl_type *sampler = glsl_sampler_type(GLSL_SAMPLER_DIM_2D, true, glsl_sampler_type_is_array(type), glsl_get_sampler_result_type(type));
4321       var->type = type != var->type ? glsl_array_type(sampler, length, glsl_get_explicit_stride(var->type)) : sampler;
4322
4323       found = true;
4324    }
4325    if (found)
4326       nir_shader_instructions_pass(shader, convert_1d_shadow_tex, nir_metadata_dominance, screen);
4327    return found;
4328 }
4329
4330 static void
4331 scan_nir(struct zink_screen *screen, nir_shader *shader, struct zink_shader *zs)
4332 {
4333    nir_foreach_function_impl(impl, shader) {
4334       nir_foreach_block_safe(block, impl) {
4335          nir_foreach_instr_safe(instr, block) {
4336             if (instr->type == nir_instr_type_tex) {
4337                nir_tex_instr *tex = nir_instr_as_tex(instr);
4338                zs->sinfo.have_sparse |= tex->is_sparse;
4339             }
4340             if (instr->type != nir_instr_type_intrinsic)
4341                continue;
4342             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
4343             if (intr->intrinsic == nir_intrinsic_image_deref_load ||
4344                 intr->intrinsic == nir_intrinsic_image_deref_sparse_load ||
4345                 intr->intrinsic == nir_intrinsic_image_deref_store ||
4346                 intr->intrinsic == nir_intrinsic_image_deref_atomic ||
4347                 intr->intrinsic == nir_intrinsic_image_deref_atomic_swap ||
4348                 intr->intrinsic == nir_intrinsic_image_deref_size ||
4349                 intr->intrinsic == nir_intrinsic_image_deref_samples ||
4350                 intr->intrinsic == nir_intrinsic_image_deref_format ||
4351                 intr->intrinsic == nir_intrinsic_image_deref_order) {
4352
4353                 nir_variable *var = nir_intrinsic_get_var(intr, 0);
4354
4355                 /* Structs have been lowered already, so get_aoa_size is sufficient. */
4356                 const unsigned size =
4357                    glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : 1;
4358                 BITSET_SET_RANGE(shader->info.images_used, var->data.binding,
4359                                  var->data.binding + (MAX2(size, 1) - 1));
4360             }
4361             if (intr->intrinsic == nir_intrinsic_is_sparse_texels_resident ||
4362                 intr->intrinsic == nir_intrinsic_image_deref_sparse_load)
4363                zs->sinfo.have_sparse = true;
4364
4365             static bool warned = false;
4366             if (!screen->info.have_EXT_shader_atomic_float && !screen->is_cpu && !warned) {
4367                switch (intr->intrinsic) {
4368                case nir_intrinsic_image_deref_atomic: {
4369                   nir_variable *var = nir_intrinsic_get_var(intr, 0);
4370                   if (nir_intrinsic_atomic_op(intr) == nir_atomic_op_iadd &&
4371                       util_format_is_float(var->data.image.format))
4372                      fprintf(stderr, "zink: Vulkan driver missing VK_EXT_shader_atomic_float but attempting to do atomic ops!\n");
4373                   break;
4374                }
4375                default:
4376                   break;
4377                }
4378             }
4379          }
4380       }
4381    }
4382 }
4383
4384 static bool
4385 is_residency_code(nir_ssa_def *src)
4386 {
4387    nir_instr *parent = src->parent_instr;
4388    while (1) {
4389       if (parent->type == nir_instr_type_intrinsic) {
4390          ASSERTED nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent);
4391          assert(intr->intrinsic == nir_intrinsic_is_sparse_texels_resident);
4392          return false;
4393       }
4394       if (parent->type == nir_instr_type_tex)
4395          return true;
4396       assert(parent->type == nir_instr_type_alu);
4397       nir_alu_instr *alu = nir_instr_as_alu(parent);
4398       parent = alu->src[0].src.ssa->parent_instr;
4399    }
4400 }
4401
4402 static bool
4403 lower_sparse_instr(nir_builder *b, nir_instr *in, void *data)
4404 {
4405    if (in->type != nir_instr_type_intrinsic)
4406       return false;
4407    nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
4408    if (instr->intrinsic == nir_intrinsic_sparse_residency_code_and) {
4409       b->cursor = nir_before_instr(&instr->instr);
4410       nir_ssa_def *src0;
4411       if (is_residency_code(instr->src[0].ssa))
4412          src0 = nir_is_sparse_texels_resident(b, 1, instr->src[0].ssa);
4413       else
4414          src0 = instr->src[0].ssa;
4415       nir_ssa_def *src1;
4416       if (is_residency_code(instr->src[1].ssa))
4417          src1 = nir_is_sparse_texels_resident(b, 1, instr->src[1].ssa);
4418       else
4419          src1 = instr->src[1].ssa;
4420       nir_ssa_def *def = nir_iand(b, src0, src1);
4421       nir_ssa_def_rewrite_uses_after(&instr->dest.ssa, def, in);
4422       nir_instr_remove(in);
4423       return true;
4424    }
4425    if (instr->intrinsic != nir_intrinsic_is_sparse_texels_resident)
4426       return false;
4427
4428    /* vulkan vec can only be a vec4, but this is (maybe) vec5,
4429     * so just rewrite as the first component since ntv is going to use a different
4430     * method for storing the residency value anyway
4431     */
4432    b->cursor = nir_before_instr(&instr->instr);
4433    nir_instr *parent = instr->src[0].ssa->parent_instr;
4434    if (is_residency_code(instr->src[0].ssa)) {
4435       assert(parent->type == nir_instr_type_alu);
4436       nir_alu_instr *alu = nir_instr_as_alu(parent);
4437       nir_ssa_def_rewrite_uses_after(instr->src[0].ssa, nir_channel(b, alu->src[0].src.ssa, 0), parent);
4438       nir_instr_remove(parent);
4439    } else {
4440       nir_ssa_def *src;
4441       if (parent->type == nir_instr_type_intrinsic) {
4442          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent);
4443          assert(intr->intrinsic == nir_intrinsic_is_sparse_texels_resident);
4444          src = intr->src[0].ssa;
4445       } else {
4446          assert(parent->type == nir_instr_type_alu);
4447          nir_alu_instr *alu = nir_instr_as_alu(parent);
4448          src = alu->src[0].src.ssa;
4449       }
4450       if (instr->dest.ssa.bit_size != 32) {
4451          if (instr->dest.ssa.bit_size == 1)
4452             src = nir_ieq_imm(b, src, 1);
4453          else
4454             src = nir_u2uN(b, src, instr->dest.ssa.bit_size);
4455       }
4456       nir_ssa_def_rewrite_uses(&instr->dest.ssa, src);
4457       nir_instr_remove(in);
4458    }
4459    return true;
4460 }
4461
4462 static bool
4463 lower_sparse(nir_shader *shader)
4464 {
4465    return nir_shader_instructions_pass(shader, lower_sparse_instr, nir_metadata_dominance, NULL);
4466 }
4467
4468 static bool
4469 match_tex_dests_instr(nir_builder *b, nir_instr *in, void *data)
4470 {
4471    if (in->type != nir_instr_type_tex)
4472       return false;
4473    nir_tex_instr *tex = nir_instr_as_tex(in);
4474    if (tex->op == nir_texop_txs || tex->op == nir_texop_lod)
4475       return false;
4476    int handle = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
4477    nir_variable *var = NULL;
4478    if (handle != -1) {
4479       var = nir_deref_instr_get_variable(nir_src_as_deref(tex->src[handle].src));
4480    } else {
4481       nir_foreach_variable_with_modes(img, b->shader, nir_var_uniform) {
4482          if (glsl_type_is_sampler(glsl_without_array(img->type))) {
4483             unsigned size = glsl_type_is_array(img->type) ? glsl_get_aoa_size(img->type) : 1;
4484             if (tex->texture_index >= img->data.driver_location &&
4485                 tex->texture_index < img->data.driver_location + size) {
4486                var = img;
4487                break;
4488             }
4489          }
4490       }
4491    }
4492    return !!rewrite_tex_dest(b, tex, var, data);
4493 }
4494
4495 static bool
4496 match_tex_dests(nir_shader *shader, struct zink_shader *zs)
4497 {
4498    return nir_shader_instructions_pass(shader, match_tex_dests_instr, nir_metadata_dominance, zs);
4499 }
4500
4501 static bool
4502 split_bitfields_instr(nir_builder *b, nir_instr *in, void *data)
4503 {
4504    if (in->type != nir_instr_type_alu)
4505       return false;
4506    nir_alu_instr *alu = nir_instr_as_alu(in);
4507    switch (alu->op) {
4508    case nir_op_ubitfield_extract:
4509    case nir_op_ibitfield_extract:
4510    case nir_op_bitfield_insert:
4511       break;
4512    default:
4513       return false;
4514    }
4515    unsigned num_components = nir_dest_num_components(alu->dest.dest);
4516    if (num_components == 1)
4517       return false;
4518    b->cursor = nir_before_instr(in);
4519    nir_ssa_def *dests[NIR_MAX_VEC_COMPONENTS];
4520    for (unsigned i = 0; i < num_components; i++) {
4521       if (alu->op == nir_op_bitfield_insert)
4522          dests[i] = nir_bitfield_insert(b,
4523                                         nir_channel(b, alu->src[0].src.ssa, alu->src[0].swizzle[i]),
4524                                         nir_channel(b, alu->src[1].src.ssa, alu->src[1].swizzle[i]),
4525                                         nir_channel(b, alu->src[2].src.ssa, alu->src[2].swizzle[i]),
4526                                         nir_channel(b, alu->src[3].src.ssa, alu->src[3].swizzle[i]));
4527       else if (alu->op == nir_op_ubitfield_extract)
4528          dests[i] = nir_ubitfield_extract(b,
4529                                           nir_channel(b, alu->src[0].src.ssa, alu->src[0].swizzle[i]),
4530                                           nir_channel(b, alu->src[1].src.ssa, alu->src[1].swizzle[i]),
4531                                           nir_channel(b, alu->src[2].src.ssa, alu->src[2].swizzle[i]));
4532       else
4533          dests[i] = nir_ibitfield_extract(b,
4534                                           nir_channel(b, alu->src[0].src.ssa, alu->src[0].swizzle[i]),
4535                                           nir_channel(b, alu->src[1].src.ssa, alu->src[1].swizzle[i]),
4536                                           nir_channel(b, alu->src[2].src.ssa, alu->src[2].swizzle[i]));
4537    }
4538    nir_ssa_def *dest = nir_vec(b, dests, num_components);
4539    nir_ssa_def_rewrite_uses_after(&alu->dest.dest.ssa, dest, in);
4540    nir_instr_remove(in);
4541    return true;
4542 }
4543
4544
4545 static bool
4546 split_bitfields(nir_shader *shader)
4547 {
4548    return nir_shader_instructions_pass(shader, split_bitfields_instr, nir_metadata_dominance, NULL);
4549 }
4550
4551 static void
4552 rewrite_cl_derefs(nir_shader *nir, nir_variable *var)
4553 {
4554    nir_foreach_function_impl(impl, nir) {
4555       nir_foreach_block(block, impl) {
4556          nir_foreach_instr_safe(instr, block) {
4557             if (instr->type != nir_instr_type_deref)
4558                continue;
4559             nir_deref_instr *deref = nir_instr_as_deref(instr);
4560             nir_variable *img = nir_deref_instr_get_variable(deref);
4561             if (img != var)
4562                continue;
4563             if (glsl_type_is_array(var->type)) {
4564                if (deref->deref_type == nir_deref_type_array)
4565                   deref->type = glsl_without_array(var->type);
4566                else
4567                   deref->type = var->type;
4568             } else {
4569                deref->type = var->type;
4570             }
4571          }
4572       }
4573    }
4574 }
4575
4576 static void
4577 type_image(nir_shader *nir, nir_variable *var)
4578 {
4579    nir_foreach_function_impl(impl, nir) {
4580       nir_foreach_block(block, impl) {
4581          nir_foreach_instr_safe(instr, block) {
4582             if (instr->type != nir_instr_type_intrinsic)
4583                continue;
4584             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
4585             if (intr->intrinsic == nir_intrinsic_image_deref_load ||
4586                intr->intrinsic == nir_intrinsic_image_deref_sparse_load ||
4587                intr->intrinsic == nir_intrinsic_image_deref_store ||
4588                intr->intrinsic == nir_intrinsic_image_deref_atomic ||
4589                intr->intrinsic == nir_intrinsic_image_deref_atomic_swap ||
4590                intr->intrinsic == nir_intrinsic_image_deref_samples ||
4591                intr->intrinsic == nir_intrinsic_image_deref_format ||
4592                intr->intrinsic == nir_intrinsic_image_deref_order) {
4593                nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
4594                nir_variable *img = nir_deref_instr_get_variable(deref);
4595                if (img != var)
4596                   continue;
4597                nir_alu_type alu_type = nir_intrinsic_src_type(intr);
4598                const struct glsl_type *type = glsl_without_array(var->type);
4599                if (glsl_get_sampler_result_type(type) != GLSL_TYPE_VOID) {
4600                   assert(glsl_get_sampler_result_type(type) == nir_get_glsl_base_type_for_nir_type(alu_type));
4601                   continue;
4602                }
4603                const struct glsl_type *img_type = glsl_image_type(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type), nir_get_glsl_base_type_for_nir_type(alu_type));
4604                if (glsl_type_is_array(var->type))
4605                   img_type = glsl_array_type(img_type, glsl_array_size(var->type), glsl_get_explicit_stride(var->type));
4606                var->type = img_type;
4607                rewrite_cl_derefs(nir, var);
4608                return;
4609             }
4610          }
4611       }
4612    }
4613    nir_foreach_function_impl(impl, nir) {
4614       nir_foreach_block(block, impl) {
4615          nir_foreach_instr_safe(instr, block) {
4616             if (instr->type != nir_instr_type_intrinsic)
4617                continue;
4618             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
4619             if (intr->intrinsic != nir_intrinsic_image_deref_size)
4620                continue;
4621             nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
4622             nir_variable *img = nir_deref_instr_get_variable(deref);
4623             if (img != var)
4624                continue;
4625             nir_alu_type alu_type = nir_type_uint32;
4626             const struct glsl_type *type = glsl_without_array(var->type);
4627             if (glsl_get_sampler_result_type(type) != GLSL_TYPE_VOID) {
4628                continue;
4629             }
4630             const struct glsl_type *img_type = glsl_image_type(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type), nir_get_glsl_base_type_for_nir_type(alu_type));
4631             if (glsl_type_is_array(var->type))
4632                img_type = glsl_array_type(img_type, glsl_array_size(var->type), glsl_get_explicit_stride(var->type));
4633             var->type = img_type;
4634             rewrite_cl_derefs(nir, var);
4635             return;
4636          }
4637       }
4638    }
4639    var->data.mode = nir_var_shader_temp;
4640 }
4641
4642 static nir_variable *
4643 find_sampler_var(nir_shader *nir, unsigned texture_index)
4644 {
4645    nir_foreach_variable_with_modes(var, nir, nir_var_uniform) {
4646       unsigned size = glsl_type_is_array(var->type) ? glsl_array_size(var->type) : 1;
4647       if ((glsl_type_is_texture(glsl_without_array(var->type)) || glsl_type_is_sampler(glsl_without_array(var->type))) &&
4648           (var->data.binding == texture_index || (var->data.binding < texture_index && var->data.binding + size > texture_index)))
4649          return var;
4650    }
4651    return NULL;
4652 }
4653
4654 static bool
4655 type_sampler_vars(nir_shader *nir, unsigned *sampler_mask)
4656 {
4657    bool progress = false;
4658    nir_foreach_function_impl(impl, nir) {
4659       nir_foreach_block(block, impl) {
4660          nir_foreach_instr(instr, block) {
4661             if (instr->type != nir_instr_type_tex)
4662                continue;
4663             nir_tex_instr *tex = nir_instr_as_tex(instr);
4664             switch (tex->op) {
4665             case nir_texop_lod:
4666             case nir_texop_txs:
4667             case nir_texop_query_levels:
4668             case nir_texop_texture_samples:
4669             case nir_texop_samples_identical:
4670                continue;
4671             default:
4672                break;
4673             }
4674             *sampler_mask |= BITFIELD_BIT(tex->sampler_index);
4675             nir_variable *var = find_sampler_var(nir, tex->texture_index);
4676             assert(var);
4677             if (glsl_get_sampler_result_type(glsl_without_array(var->type)) != GLSL_TYPE_VOID)
4678                continue;
4679             const struct glsl_type *img_type = glsl_sampler_type(glsl_get_sampler_dim(glsl_without_array(var->type)), tex->is_shadow, tex->is_array, nir_get_glsl_base_type_for_nir_type(tex->dest_type));
4680             unsigned size = glsl_type_is_array(var->type) ? glsl_array_size(var->type) : 1;
4681             if (size > 1)
4682                img_type = glsl_array_type(img_type, size, 0);
4683             var->type = img_type;
4684             progress = true;
4685          }
4686       }
4687    }
4688    nir_foreach_function_impl(impl, nir) {
4689       nir_foreach_block(block, impl) {
4690          nir_foreach_instr(instr, block) {
4691             if (instr->type != nir_instr_type_tex)
4692                continue;
4693             nir_tex_instr *tex = nir_instr_as_tex(instr);
4694             switch (tex->op) {
4695             case nir_texop_lod:
4696             case nir_texop_txs:
4697             case nir_texop_query_levels:
4698             case nir_texop_texture_samples:
4699             case nir_texop_samples_identical:
4700                break;
4701             default:
4702                continue;
4703             }
4704             *sampler_mask |= BITFIELD_BIT(tex->sampler_index);
4705             nir_variable *var = find_sampler_var(nir, tex->texture_index);
4706             assert(var);
4707             if (glsl_get_sampler_result_type(glsl_without_array(var->type)) != GLSL_TYPE_VOID)
4708                continue;
4709             const struct glsl_type *img_type = glsl_sampler_type(glsl_get_sampler_dim(glsl_without_array(var->type)), tex->is_shadow, tex->is_array, nir_get_glsl_base_type_for_nir_type(tex->dest_type));
4710             unsigned size = glsl_type_is_array(var->type) ? glsl_array_size(var->type) : 1;
4711             if (size > 1)
4712                img_type = glsl_array_type(img_type, size, 0);
4713             var->type = img_type;
4714             progress = true;
4715          }
4716       }
4717    }
4718    return progress;
4719 }
4720
4721 static bool
4722 delete_samplers(nir_shader *nir)
4723 {
4724    bool progress = false;
4725    nir_foreach_variable_with_modes(var, nir, nir_var_uniform) {
4726       if (glsl_type_is_sampler(glsl_without_array(var->type))) {
4727          var->data.mode = nir_var_shader_temp;
4728          progress = true;
4729       }
4730    }
4731    return progress;
4732 }
4733
4734 static bool
4735 type_images(nir_shader *nir, unsigned *sampler_mask)
4736 {
4737    bool progress = false;
4738    progress |= delete_samplers(nir);
4739    progress |= type_sampler_vars(nir, sampler_mask);
4740    nir_foreach_variable_with_modes(var, nir, nir_var_image) {
4741       type_image(nir, var);
4742       progress = true;
4743    }
4744    return progress;
4745 }
4746
4747 /* attempt to assign io for separate shaders */
4748 static bool
4749 fixup_io_locations(nir_shader *nir)
4750 {
4751    nir_variable_mode modes;
4752    if (nir->info.stage != MESA_SHADER_FRAGMENT && nir->info.stage != MESA_SHADER_VERTEX)
4753       modes = nir_var_shader_in | nir_var_shader_out;
4754    else
4755       modes = nir->info.stage == MESA_SHADER_FRAGMENT ? nir_var_shader_in : nir_var_shader_out;
4756    u_foreach_bit(mode, modes) {
4757       nir_variable_mode m = BITFIELD_BIT(mode);
4758       if ((m == nir_var_shader_in && ((nir->info.inputs_read & BITFIELD64_MASK(VARYING_SLOT_VAR1)) == nir->info.inputs_read)) ||
4759           (m == nir_var_shader_out && ((nir->info.outputs_written | nir->info.outputs_read) & BITFIELD64_MASK(VARYING_SLOT_VAR1)) == (nir->info.outputs_written | nir->info.outputs_read))) {
4760          /* this is a special heuristic to catch ARB/fixedfunc shaders which have different rules:
4761           * - i/o interface blocks don't need to match
4762           * - any location can be present or not
4763           * - it just has to work
4764           *
4765           * VAR0 is the only user varying that mesa can produce in this case, so overwrite POS
4766           * since it's a builtin and yolo it with all the other legacy crap
4767           */
4768          nir_foreach_variable_with_modes(var, nir, m) {
4769             if (nir_slot_is_sysval_output(var->data.location, MESA_SHADER_NONE))
4770                continue;
4771             if (var->data.location == VARYING_SLOT_VAR0)
4772                var->data.driver_location = 0;
4773             else
4774                var->data.driver_location = var->data.location;
4775          }
4776          return true;
4777       }
4778       /* i/o interface blocks are required to be EXACT matches between stages:
4779       * iterate over all locations and set locations incrementally
4780       */
4781       unsigned slot = 0;
4782       for (unsigned i = 0; i < VARYING_SLOT_MAX; i++) {
4783          if (nir_slot_is_sysval_output(i, MESA_SHADER_NONE))
4784             continue;
4785          bool found = false;
4786          unsigned size = 0;
4787          nir_foreach_variable_with_modes(var, nir, m) {
4788             if (var->data.location != i)
4789                continue;
4790             /* only add slots for non-component vars or first-time component vars */
4791             if (!var->data.location_frac || !size) {
4792                /* ensure variable is given enough slots */
4793                if (nir_is_arrayed_io(var, nir->info.stage))
4794                   size += glsl_count_vec4_slots(glsl_get_array_element(var->type), false, false);
4795                else
4796                   size += glsl_count_vec4_slots(var->type, false, false);
4797             }
4798             var->data.driver_location = slot;
4799             found = true;
4800          }
4801          slot += size;
4802          if (found) {
4803             /* ensure the consumed slots aren't double iterated */
4804             i += size - 1;
4805          } else {
4806             /* locations used between stages are not required to be contiguous */
4807             if (i >= VARYING_SLOT_VAR0)
4808                slot++;
4809          }
4810       }
4811    }
4812    return true;
4813 }
4814
4815 static uint32_t
4816 zink_flat_flags(struct nir_shader *shader)
4817 {
4818    uint32_t flat_flags = 0, c = 0;
4819    nir_foreach_shader_in_variable(var, shader) {
4820       if (var->data.interpolation == INTERP_MODE_FLAT)
4821          flat_flags |= 1u << (c++);
4822    }
4823
4824    return flat_flags;
4825 }
4826
4827 struct zink_shader *
4828 zink_shader_create(struct zink_screen *screen, struct nir_shader *nir,
4829                    const struct pipe_stream_output_info *so_info)
4830 {
4831    struct zink_shader *ret = rzalloc(NULL, struct zink_shader);
4832    bool have_psiz = false;
4833
4834    ret->has_edgeflags = nir->info.stage == MESA_SHADER_VERTEX &&
4835                         nir_find_variable_with_location(nir, nir_var_shader_out, VARYING_SLOT_EDGE);
4836
4837    ret->sinfo.have_vulkan_memory_model = screen->info.have_KHR_vulkan_memory_model;
4838    ret->sinfo.have_workgroup_memory_explicit_layout = screen->info.have_KHR_workgroup_memory_explicit_layout;
4839    ret->sinfo.bindless_set_idx = screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS];
4840
4841    util_queue_fence_init(&ret->precompile.fence);
4842    util_dynarray_init(&ret->pipeline_libs, ret);
4843    ret->hash = _mesa_hash_pointer(ret);
4844
4845    ret->programs = _mesa_pointer_set_create(NULL);
4846    simple_mtx_init(&ret->lock, mtx_plain);
4847
4848    nir_variable_mode indirect_derefs_modes = 0;
4849    if (nir->info.stage == MESA_SHADER_TESS_CTRL ||
4850        nir->info.stage == MESA_SHADER_TESS_EVAL)
4851       indirect_derefs_modes |= nir_var_shader_in | nir_var_shader_out;
4852
4853    NIR_PASS_V(nir, nir_lower_indirect_derefs, indirect_derefs_modes,
4854               UINT32_MAX);
4855
4856    if (nir->info.stage < MESA_SHADER_COMPUTE)
4857       create_gfx_pushconst(nir);
4858
4859    if (nir->info.stage == MESA_SHADER_TESS_CTRL ||
4860             nir->info.stage == MESA_SHADER_TESS_EVAL)
4861       NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
4862
4863    if (nir->info.stage < MESA_SHADER_FRAGMENT)
4864       have_psiz = check_psiz(nir);
4865    if (nir->info.stage == MESA_SHADER_FRAGMENT)
4866       ret->flat_flags = zink_flat_flags(nir);
4867
4868    if (!gl_shader_stage_is_compute(nir->info.stage) && nir->info.separate_shader)
4869       NIR_PASS_V(nir, fixup_io_locations);
4870
4871    NIR_PASS_V(nir, lower_basevertex);
4872    NIR_PASS_V(nir, lower_baseinstance);
4873    NIR_PASS_V(nir, lower_sparse);
4874    NIR_PASS_V(nir, split_bitfields);
4875    NIR_PASS_V(nir, nir_lower_frexp); /* TODO: Use the spirv instructions for this. */
4876
4877    if (screen->info.have_EXT_shader_demote_to_helper_invocation) {
4878       NIR_PASS_V(nir, nir_lower_discard_or_demote,
4879                  screen->driconf.glsl_correct_derivatives_after_discard ||
4880                  nir->info.use_legacy_math_rules);
4881    }
4882
4883    if (screen->need_2D_zs)
4884       NIR_PASS_V(nir, lower_1d_shadow, screen);
4885
4886    {
4887       nir_lower_subgroups_options subgroup_options = {0};
4888       subgroup_options.lower_to_scalar = true;
4889       subgroup_options.subgroup_size = screen->info.props11.subgroupSize;
4890       subgroup_options.ballot_bit_size = 32;
4891       subgroup_options.ballot_components = 4;
4892       subgroup_options.lower_subgroup_masks = true;
4893       if (!(screen->info.subgroup.supportedStages & mesa_to_vk_shader_stage(clamp_stage(&nir->info)))) {
4894          subgroup_options.subgroup_size = 1;
4895          subgroup_options.lower_vote_trivial = true;
4896       }
4897       NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_options);
4898    }
4899
4900    if (so_info && so_info->num_outputs)
4901       NIR_PASS_V(nir, split_blocks);
4902
4903    optimize_nir(nir, NULL);
4904    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
4905    NIR_PASS_V(nir, nir_lower_discard_if, (nir_lower_discard_if_to_cf |
4906                                           nir_lower_demote_if_to_cf |
4907                                           nir_lower_terminate_if_to_cf));
4908    NIR_PASS_V(nir, nir_lower_fragcolor,
4909          nir->info.fs.color_is_dual_source ? 1 : 8);
4910
4911    NIR_PASS_V(nir, lower_64bit_vertex_attribs);
4912    bool needs_size = analyze_io(ret, nir);
4913    NIR_PASS_V(nir, unbreak_bos, ret, needs_size);
4914    /* run in compile if there could be inlined uniforms */
4915    if (!screen->driconf.inline_uniforms && !nir->info.num_inlinable_uniforms) {
4916       NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_global | nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared, NULL, NULL);
4917       NIR_PASS_V(nir, rewrite_bo_access, screen);
4918       NIR_PASS_V(nir, remove_bo_access, ret);
4919    }
4920
4921    struct zink_bindless_info bindless = {0};
4922    bindless.bindless_set = screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS];
4923    bool has_bindless_io = false;
4924    nir_foreach_variable_with_modes(var, nir, nir_var_shader_in | nir_var_shader_out) {
4925       var->data.is_xfb = false;
4926       if (glsl_type_is_image(var->type) || glsl_type_is_sampler(var->type)) {
4927          has_bindless_io = true;
4928          break;
4929       }
4930    }
4931    if (has_bindless_io)
4932       NIR_PASS_V(nir, lower_bindless_io);
4933
4934    optimize_nir(nir, NULL);
4935    prune_io(nir);
4936
4937    scan_nir(screen, nir, ret);
4938    unsigned sampler_mask = 0;
4939    if (nir->info.stage == MESA_SHADER_KERNEL) {
4940       NIR_PASS_V(nir, type_images, &sampler_mask);
4941       enum zink_descriptor_type ztype = ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW;
4942       VkDescriptorType vktype = VK_DESCRIPTOR_TYPE_SAMPLER;
4943       u_foreach_bit(s, sampler_mask) {
4944          ret->bindings[ztype][ret->num_bindings[ztype]].index = s;
4945          ret->bindings[ztype][ret->num_bindings[ztype]].binding = zink_binding(MESA_SHADER_KERNEL, vktype, s, screen->compact_descriptors);
4946          ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype;
4947          ret->bindings[ztype][ret->num_bindings[ztype]].size = 1;
4948          ret->num_bindings[ztype]++;
4949       }
4950       ret->sinfo.sampler_mask = sampler_mask;
4951    }
4952
4953    unsigned ubo_binding_mask = 0;
4954    unsigned ssbo_binding_mask = 0;
4955    foreach_list_typed_reverse_safe(nir_variable, var, node, &nir->variables) {
4956       if (_nir_shader_variable_has_mode(var, nir_var_uniform |
4957                                         nir_var_image |
4958                                         nir_var_mem_ubo |
4959                                         nir_var_mem_ssbo)) {
4960          enum zink_descriptor_type ztype;
4961          const struct glsl_type *type = glsl_without_array(var->type);
4962          if (var->data.mode == nir_var_mem_ubo) {
4963             ztype = ZINK_DESCRIPTOR_TYPE_UBO;
4964             /* buffer 0 is a push descriptor */
4965             var->data.descriptor_set = !!var->data.driver_location;
4966             var->data.binding = !var->data.driver_location ? clamp_stage(&nir->info) :
4967                                 zink_binding(nir->info.stage,
4968                                              VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
4969                                              var->data.driver_location,
4970                                              screen->compact_descriptors);
4971             assert(var->data.driver_location || var->data.binding < 10);
4972             VkDescriptorType vktype = !var->data.driver_location ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC : VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
4973             int binding = var->data.binding;
4974
4975             if (!var->data.driver_location) {
4976                ret->has_uniforms = true;
4977             } else if (!(ubo_binding_mask & BITFIELD_BIT(binding))) {
4978                ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location;
4979                ret->bindings[ztype][ret->num_bindings[ztype]].binding = binding;
4980                ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype;
4981                ret->bindings[ztype][ret->num_bindings[ztype]].size = glsl_get_length(var->type);
4982                assert(ret->bindings[ztype][ret->num_bindings[ztype]].size);
4983                ret->num_bindings[ztype]++;
4984                ubo_binding_mask |= BITFIELD_BIT(binding);
4985             }
4986          } else if (var->data.mode == nir_var_mem_ssbo) {
4987             ztype = ZINK_DESCRIPTOR_TYPE_SSBO;
4988             var->data.descriptor_set = screen->desc_set_id[ztype];
4989             var->data.binding = zink_binding(nir->info.stage,
4990                                              VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
4991                                              var->data.driver_location,
4992                                              screen->compact_descriptors);
4993             if (!(ssbo_binding_mask & BITFIELD_BIT(var->data.binding))) {
4994                ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location;
4995                ret->bindings[ztype][ret->num_bindings[ztype]].binding = var->data.binding;
4996                ret->bindings[ztype][ret->num_bindings[ztype]].type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
4997                ret->bindings[ztype][ret->num_bindings[ztype]].size = glsl_get_length(var->type);
4998                assert(ret->bindings[ztype][ret->num_bindings[ztype]].size);
4999                ret->num_bindings[ztype]++;
5000                ssbo_binding_mask |= BITFIELD_BIT(var->data.binding);
5001             }
5002          } else {
5003             assert(var->data.mode == nir_var_uniform ||
5004                    var->data.mode == nir_var_image);
5005             if (var->data.bindless) {
5006                ret->bindless = true;
5007                handle_bindless_var(nir, var, type, &bindless);
5008             } else if (glsl_type_is_sampler(type) || glsl_type_is_image(type)) {
5009                VkDescriptorType vktype = glsl_type_is_image(type) ? zink_image_type(type) : zink_sampler_type(type);
5010                if (nir->info.stage == MESA_SHADER_KERNEL && vktype == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
5011                   vktype = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
5012                ztype = zink_desc_type_from_vktype(vktype);
5013                if (vktype == VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER)
5014                   ret->num_texel_buffers++;
5015                var->data.driver_location = var->data.binding;
5016                var->data.descriptor_set = screen->desc_set_id[ztype];
5017                var->data.binding = zink_binding(nir->info.stage, vktype, var->data.driver_location, screen->compact_descriptors);
5018                ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location;
5019                ret->bindings[ztype][ret->num_bindings[ztype]].binding = var->data.binding;
5020                ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype;
5021                if (glsl_type_is_array(var->type))
5022                   ret->bindings[ztype][ret->num_bindings[ztype]].size = glsl_get_aoa_size(var->type);
5023                else
5024                   ret->bindings[ztype][ret->num_bindings[ztype]].size = 1;
5025                ret->num_bindings[ztype]++;
5026             } else if (var->data.mode == nir_var_uniform) {
5027                /* this is a dead uniform */
5028                var->data.mode = 0;
5029                exec_node_remove(&var->node);
5030             }
5031          }
5032       }
5033    }
5034    bool bindless_lowered = false;
5035    NIR_PASS(bindless_lowered, nir, lower_bindless, &bindless);
5036    ret->bindless |= bindless_lowered;
5037
5038    if (!screen->info.feats.features.shaderInt64 || !screen->info.feats.features.shaderFloat64)
5039       NIR_PASS_V(nir, lower_64bit_vars, screen->info.feats.features.shaderInt64);
5040    if (nir->info.stage != MESA_SHADER_KERNEL)
5041       NIR_PASS_V(nir, match_tex_dests, ret);
5042
5043    if (!nir->info.internal)
5044       nir_foreach_shader_out_variable(var, nir)
5045          var->data.explicit_xfb_buffer = 0;
5046    if (so_info && so_info->num_outputs && nir->info.outputs_written)
5047       update_so_info(ret, nir, so_info, nir->info.outputs_written, have_psiz);
5048    else if (have_psiz) {
5049       bool have_fake_psiz = false;
5050       nir_variable *psiz = NULL;
5051       nir_foreach_shader_out_variable(var, nir) {
5052          if (var->data.location == VARYING_SLOT_PSIZ) {
5053             if (!var->data.explicit_location)
5054                have_fake_psiz = true;
5055             else
5056                psiz = var;
5057          }
5058       }
5059       if (have_fake_psiz && psiz) {
5060          psiz->data.mode = nir_var_shader_temp;
5061          nir_fixup_deref_modes(nir);
5062          NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
5063       }
5064    }
5065    zink_shader_serialize_blob(nir, &ret->blob);
5066    memcpy(&ret->info, &nir->info, sizeof(nir->info));
5067
5068    ret->can_inline = true;
5069
5070    return ret;
5071 }
5072
5073 char *
5074 zink_shader_finalize(struct pipe_screen *pscreen, void *nirptr)
5075 {
5076    struct zink_screen *screen = zink_screen(pscreen);
5077    nir_shader *nir = nirptr;
5078
5079    nir_lower_tex_options tex_opts = {
5080       .lower_invalid_implicit_lod = true,
5081    };
5082    /*
5083       Sampled Image must be an object whose type is OpTypeSampledImage.
5084       The Dim operand of the underlying OpTypeImage must be 1D, 2D, 3D,
5085       or Rect, and the Arrayed and MS operands must be 0.
5086       - SPIRV, OpImageSampleProj* opcodes
5087     */
5088    tex_opts.lower_txp = BITFIELD_BIT(GLSL_SAMPLER_DIM_CUBE) |
5089                         BITFIELD_BIT(GLSL_SAMPLER_DIM_MS);
5090    tex_opts.lower_txp_array = true;
5091    if (!screen->info.feats.features.shaderImageGatherExtended)
5092       tex_opts.lower_tg4_offsets = true;
5093    NIR_PASS_V(nir, nir_lower_tex, &tex_opts);
5094    optimize_nir(nir, NULL);
5095    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
5096    if (screen->driconf.inline_uniforms)
5097       nir_find_inlinable_uniforms(nir);
5098
5099    return NULL;
5100 }
5101
5102 void
5103 zink_shader_free(struct zink_screen *screen, struct zink_shader *shader)
5104 {
5105    _mesa_set_destroy(shader->programs, NULL);
5106    util_queue_fence_wait(&shader->precompile.fence);
5107    util_queue_fence_destroy(&shader->precompile.fence);
5108    zink_descriptor_shader_deinit(screen, shader);
5109    if (screen->info.have_EXT_shader_object) {
5110       VKSCR(DestroyShaderEXT)(screen->dev, shader->precompile.obj.obj, NULL);
5111       VKSCR(DestroyShaderEXT)(screen->dev, shader->precompile.no_psiz_obj.obj, NULL);
5112    } else {
5113       if (shader->precompile.obj.mod)
5114          VKSCR(DestroyShaderModule)(screen->dev, shader->precompile.obj.mod, NULL);
5115       if (shader->precompile.gpl)
5116          VKSCR(DestroyPipeline)(screen->dev, shader->precompile.gpl, NULL);
5117    }
5118    blob_finish(&shader->blob);
5119    ralloc_free(shader->spirv);
5120    free(shader->precompile.bindings);
5121    ralloc_free(shader);
5122 }
5123
5124 void
5125 zink_gfx_shader_free(struct zink_screen *screen, struct zink_shader *shader)
5126 {
5127    assert(shader->info.stage != MESA_SHADER_COMPUTE);
5128    util_queue_fence_wait(&shader->precompile.fence);
5129    set_foreach(shader->programs, entry) {
5130       struct zink_gfx_program *prog = (void*)entry->key;
5131       gl_shader_stage stage = shader->info.stage;
5132       assert(stage < ZINK_GFX_SHADER_COUNT);
5133       unsigned stages_present = prog->stages_present;
5134       if (prog->shaders[MESA_SHADER_TESS_CTRL] &&
5135             prog->shaders[MESA_SHADER_TESS_CTRL]->non_fs.is_generated)
5136          stages_present &= ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL);
5137       unsigned idx = zink_program_cache_stages(stages_present);
5138       if (!prog->base.removed && prog->stages_present == prog->stages_remaining &&
5139           (stage == MESA_SHADER_FRAGMENT || !shader->non_fs.is_generated)) {
5140          struct hash_table *ht = &prog->ctx->program_cache[idx];
5141          simple_mtx_lock(&prog->ctx->program_lock[idx]);
5142          struct hash_entry *he = _mesa_hash_table_search(ht, prog->shaders);
5143          assert(he && he->data == prog);
5144          _mesa_hash_table_remove(ht, he);
5145          prog->base.removed = true;
5146          simple_mtx_unlock(&prog->ctx->program_lock[idx]);
5147          util_queue_fence_wait(&prog->base.cache_fence);
5148
5149          for (unsigned r = 0; r < ARRAY_SIZE(prog->pipelines); r++) {
5150             for (int i = 0; i < ARRAY_SIZE(prog->pipelines[0]); ++i) {
5151                hash_table_foreach(&prog->pipelines[r][i], entry) {
5152                   struct zink_gfx_pipeline_cache_entry *pc_entry = entry->data;
5153
5154                   util_queue_fence_wait(&pc_entry->fence);
5155                }
5156             }
5157          }
5158
5159       }
5160       while (util_dynarray_contains(&shader->pipeline_libs, struct zink_gfx_lib_cache*)) {
5161          struct zink_gfx_lib_cache *libs = util_dynarray_pop(&shader->pipeline_libs, struct zink_gfx_lib_cache*);
5162          //this condition is equivalent to verifying that, for each bit stages_present_i in stages_present,
5163          //stages_present_i implies libs->stages_present_i
5164          if ((stages_present & ~(libs->stages_present & stages_present)) != 0)
5165             continue;
5166          if (!libs->removed) {
5167             libs->removed = true;
5168             simple_mtx_lock(&screen->pipeline_libs_lock[idx]);
5169             _mesa_set_remove_key(&screen->pipeline_libs[idx], libs);
5170             simple_mtx_unlock(&screen->pipeline_libs_lock[idx]);
5171          }
5172          zink_gfx_lib_cache_unref(screen, libs);
5173       }
5174       if (stage == MESA_SHADER_FRAGMENT || !shader->non_fs.is_generated) {
5175          prog->shaders[stage] = NULL;
5176          prog->stages_remaining &= ~BITFIELD_BIT(stage);
5177       }
5178       /* only remove generated tcs during parent tes destruction */
5179       if (stage == MESA_SHADER_TESS_EVAL && shader->non_fs.generated_tcs)
5180          prog->shaders[MESA_SHADER_TESS_CTRL] = NULL;
5181       if (stage != MESA_SHADER_FRAGMENT &&
5182           prog->shaders[MESA_SHADER_GEOMETRY] &&
5183           prog->shaders[MESA_SHADER_GEOMETRY]->non_fs.parent ==
5184           shader) {
5185          prog->shaders[MESA_SHADER_GEOMETRY] = NULL;
5186       }
5187       zink_gfx_program_reference(screen, &prog, NULL);
5188    }
5189    if (shader->info.stage == MESA_SHADER_TESS_EVAL &&
5190        shader->non_fs.generated_tcs) {
5191       /* automatically destroy generated tcs shaders when tes is destroyed */
5192       zink_gfx_shader_free(screen, shader->non_fs.generated_tcs);
5193       shader->non_fs.generated_tcs = NULL;
5194    }
5195    for (unsigned int i = 0; i < ARRAY_SIZE(shader->non_fs.generated_gs); i++) {
5196       for (int j = 0; j < ARRAY_SIZE(shader->non_fs.generated_gs[0]); j++) {
5197          if (shader->info.stage != MESA_SHADER_FRAGMENT &&
5198              shader->non_fs.generated_gs[i][j]) {
5199             /* automatically destroy generated gs shaders when owner is destroyed */
5200             zink_gfx_shader_free(screen, shader->non_fs.generated_gs[i][j]);
5201             shader->non_fs.generated_gs[i][j] = NULL;
5202          }
5203       }
5204    }
5205    zink_shader_free(screen, shader);
5206 }
5207
5208
5209 struct zink_shader_object
5210 zink_shader_tcs_compile(struct zink_screen *screen, struct zink_shader *zs, unsigned patch_vertices, bool can_shobj, struct zink_program *pg)
5211 {
5212    assert(zs->info.stage == MESA_SHADER_TESS_CTRL);
5213    /* shortcut all the nir passes since we just have to change this one word */
5214    zs->spirv->words[zs->spirv->tcs_vertices_out_word] = patch_vertices;
5215    return zink_shader_spirv_compile(screen, zs, NULL, can_shobj, pg);
5216 }
5217
5218 /* creating a passthrough tcs shader that's roughly:
5219
5220 #version 150
5221 #extension GL_ARB_tessellation_shader : require
5222
5223 in vec4 some_var[gl_MaxPatchVertices];
5224 out vec4 some_var_out;
5225
5226 layout(push_constant) uniform tcsPushConstants {
5227     layout(offset = 0) float TessLevelInner[2];
5228     layout(offset = 8) float TessLevelOuter[4];
5229 } u_tcsPushConstants;
5230 layout(vertices = $vertices_per_patch) out;
5231 void main()
5232 {
5233   gl_TessLevelInner = u_tcsPushConstants.TessLevelInner;
5234   gl_TessLevelOuter = u_tcsPushConstants.TessLevelOuter;
5235   some_var_out = some_var[gl_InvocationID];
5236 }
5237
5238 */
5239 struct zink_shader *
5240 zink_shader_tcs_create(struct zink_screen *screen, nir_shader *tes, unsigned vertices_per_patch, nir_shader **nir_ret)
5241 {
5242    struct zink_shader *ret = rzalloc(NULL, struct zink_shader);
5243    util_queue_fence_init(&ret->precompile.fence);
5244    ret->hash = _mesa_hash_pointer(ret);
5245    ret->programs = _mesa_pointer_set_create(NULL);
5246    simple_mtx_init(&ret->lock, mtx_plain);
5247
5248    nir_shader *nir = nir_shader_create(NULL, MESA_SHADER_TESS_CTRL, &screen->nir_options, NULL);
5249    nir_function *fn = nir_function_create(nir, "main");
5250    fn->is_entrypoint = true;
5251    nir_function_impl *impl = nir_function_impl_create(fn);
5252
5253    nir_builder b = nir_builder_at(nir_before_block(nir_start_block(impl)));
5254
5255    nir_ssa_def *invocation_id = nir_load_invocation_id(&b);
5256
5257    nir_foreach_shader_in_variable(var, tes) {
5258       if (var->data.location == VARYING_SLOT_TESS_LEVEL_INNER || var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER)
5259          continue;
5260       const struct glsl_type *in_type = var->type;
5261       const struct glsl_type *out_type = var->type;
5262       char buf[1024];
5263       snprintf(buf, sizeof(buf), "%s_out", var->name);
5264       if (!nir_is_arrayed_io(var, MESA_SHADER_TESS_EVAL)) {
5265          const struct glsl_type *type = var->type;
5266          in_type = glsl_array_type(type, 32 /* MAX_PATCH_VERTICES */, 0);
5267          out_type = glsl_array_type(type, vertices_per_patch, 0);
5268       }
5269
5270       nir_variable *in = nir_variable_create(nir, nir_var_shader_in, in_type, var->name);
5271       nir_variable *out = nir_variable_create(nir, nir_var_shader_out, out_type, buf);
5272       out->data.location = in->data.location = var->data.location;
5273       out->data.location_frac = in->data.location_frac = var->data.location_frac;
5274
5275       /* gl_in[] receives values from equivalent built-in output
5276          variables written by the vertex shader (section 2.14.7).  Each array
5277          element of gl_in[] is a structure holding values for a specific vertex of
5278          the input patch.  The length of gl_in[] is equal to the
5279          implementation-dependent maximum patch size (gl_MaxPatchVertices).
5280          - ARB_tessellation_shader
5281        */
5282       /* we need to load the invocation-specific value of the vertex output and then store it to the per-patch output */
5283       nir_deref_instr *in_value = nir_build_deref_array(&b, nir_build_deref_var(&b, in), invocation_id);
5284       nir_deref_instr *out_value = nir_build_deref_array(&b, nir_build_deref_var(&b, out), invocation_id);
5285       copy_vars(&b, out_value, in_value);
5286    }
5287    nir_variable *gl_TessLevelInner = nir_variable_create(nir, nir_var_shader_out, glsl_array_type(glsl_float_type(), 2, 0), "gl_TessLevelInner");
5288    gl_TessLevelInner->data.location = VARYING_SLOT_TESS_LEVEL_INNER;
5289    gl_TessLevelInner->data.patch = 1;
5290    nir_variable *gl_TessLevelOuter = nir_variable_create(nir, nir_var_shader_out, glsl_array_type(glsl_float_type(), 4, 0), "gl_TessLevelOuter");
5291    gl_TessLevelOuter->data.location = VARYING_SLOT_TESS_LEVEL_OUTER;
5292    gl_TessLevelOuter->data.patch = 1;
5293
5294    create_gfx_pushconst(nir);
5295
5296    nir_ssa_def *load_inner = nir_load_push_constant(&b, 2, 32,
5297                                                     nir_imm_int(&b, ZINK_GFX_PUSHCONST_DEFAULT_INNER_LEVEL),
5298                                                     .base = 1, .range = 8);
5299    nir_ssa_def *load_outer = nir_load_push_constant(&b, 4, 32,
5300                                                     nir_imm_int(&b, ZINK_GFX_PUSHCONST_DEFAULT_OUTER_LEVEL),
5301                                                     .base = 2, .range = 16);
5302
5303    for (unsigned i = 0; i < 2; i++) {
5304       nir_deref_instr *store_idx = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gl_TessLevelInner), i);
5305       nir_store_deref(&b, store_idx, nir_channel(&b, load_inner, i), 0xff);
5306    }
5307    for (unsigned i = 0; i < 4; i++) {
5308       nir_deref_instr *store_idx = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gl_TessLevelOuter), i);
5309       nir_store_deref(&b, store_idx, nir_channel(&b, load_outer, i), 0xff);
5310    }
5311
5312    nir->info.tess.tcs_vertices_out = vertices_per_patch;
5313    nir_validate_shader(nir, "created");
5314
5315    optimize_nir(nir, NULL);
5316    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
5317    NIR_PASS_V(nir, nir_convert_from_ssa, true);
5318
5319    *nir_ret = nir;
5320    zink_shader_serialize_blob(nir, &ret->blob);
5321    memcpy(&ret->info, &nir->info, sizeof(nir->info));
5322    ret->non_fs.is_generated = true;
5323    return ret;
5324 }
5325
5326 bool
5327 zink_shader_has_cubes(nir_shader *nir)
5328 {
5329    nir_foreach_variable_with_modes(var, nir, nir_var_uniform) {
5330       const struct glsl_type *type = glsl_without_array(var->type);
5331       if (glsl_type_is_sampler(type) && glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE)
5332          return true;
5333    }
5334    return false;
5335 }
5336
5337 nir_shader *
5338 zink_shader_blob_deserialize(struct zink_screen *screen, struct blob *blob)
5339 {
5340    struct blob_reader blob_reader;
5341    blob_reader_init(&blob_reader, blob->data, blob->size);
5342    return nir_deserialize(NULL, &screen->nir_options, &blob_reader);
5343 }
5344
5345 nir_shader *
5346 zink_shader_deserialize(struct zink_screen *screen, struct zink_shader *zs)
5347 {
5348    return zink_shader_blob_deserialize(screen, &zs->blob);
5349 }
5350
5351 void
5352 zink_shader_serialize_blob(nir_shader *nir, struct blob *blob)
5353 {
5354    blob_init(blob);
5355 #ifndef NDEBUG
5356    bool strip = !(zink_debug & (ZINK_DEBUG_NIR | ZINK_DEBUG_SPIRV | ZINK_DEBUG_TGSI));
5357 #else
5358    bool strip = false;
5359 #endif
5360    nir_serialize(blob, nir, strip);
5361 }
5362
5363 void
5364 zink_print_shader(struct zink_screen *screen, struct zink_shader *zs, FILE *fp)
5365 {
5366    nir_shader *nir = zink_shader_deserialize(screen, zs);
5367    nir_print_shader(nir, fp);
5368    ralloc_free(nir);
5369 }