2 * Copyright 2018 Collabora Ltd.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
24 #include "nir_opcodes.h"
25 #include "zink_context.h"
26 #include "zink_compiler.h"
27 #include "zink_descriptors.h"
28 #include "zink_program.h"
29 #include "zink_screen.h"
30 #include "nir_to_spirv/nir_to_spirv.h"
32 #include "pipe/p_state.h"
35 #include "nir_xfb_info.h"
36 #include "nir/nir_draw_helpers.h"
37 #include "compiler/nir/nir_builder.h"
38 #include "compiler/nir/nir_serialize.h"
39 #include "compiler/nir/nir_builtin_builder.h"
41 #include "nir/tgsi_to_nir.h"
42 #include "tgsi/tgsi_dump.h"
43 #include "tgsi/tgsi_from_mesa.h"
45 #include "util/u_memory.h"
47 #include "compiler/spirv/nir_spirv.h"
48 #include "vulkan/util/vk_util.h"
51 zink_lower_cubemap_to_array(nir_shader *s, uint32_t nonseamless_cube_mask);
55 copy_vars(nir_builder *b, nir_deref_instr *dst, nir_deref_instr *src)
57 assert(glsl_get_bare_type(dst->type) == glsl_get_bare_type(src->type));
58 if (glsl_type_is_struct(dst->type)) {
59 for (unsigned i = 0; i < glsl_get_length(dst->type); ++i) {
60 copy_vars(b, nir_build_deref_struct(b, dst, i), nir_build_deref_struct(b, src, i));
62 } else if (glsl_type_is_array_or_matrix(dst->type)) {
63 unsigned count = glsl_type_is_array(dst->type) ? glsl_array_size(dst->type) : glsl_get_matrix_columns(dst->type);
64 for (unsigned i = 0; i < count; i++) {
65 copy_vars(b, nir_build_deref_array_imm(b, dst, i), nir_build_deref_array_imm(b, src, i));
68 nir_ssa_def *load = nir_load_deref(b, src);
69 nir_store_deref(b, dst, load, BITFIELD_MASK(load->num_components));
73 #define SIZEOF_FIELD(type, field) sizeof(((type *)0)->field)
76 create_gfx_pushconst(nir_shader *nir)
78 #define PUSHCONST_MEMBER(member_idx, field) \
79 fields[member_idx].type = \
80 glsl_array_type(glsl_uint_type(), SIZEOF_FIELD(struct zink_gfx_push_constant, field) / sizeof(uint32_t), 0); \
81 fields[member_idx].name = ralloc_asprintf(nir, #field); \
82 fields[member_idx].offset = offsetof(struct zink_gfx_push_constant, field);
84 nir_variable *pushconst;
85 /* create compatible layout for the ntv push constant loader */
86 struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, ZINK_GFX_PUSHCONST_MAX);
87 PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_DRAW_MODE_IS_INDEXED, draw_mode_is_indexed);
88 PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_DRAW_ID, draw_id);
89 PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_FRAMEBUFFER_IS_LAYERED, framebuffer_is_layered);
90 PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_DEFAULT_INNER_LEVEL, default_inner_level);
91 PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_DEFAULT_OUTER_LEVEL, default_outer_level);
92 PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_LINE_STIPPLE_PATTERN, line_stipple_pattern);
93 PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_VIEWPORT_SCALE, viewport_scale);
94 PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_LINE_WIDTH, line_width);
96 pushconst = nir_variable_create(nir, nir_var_mem_push_const,
97 glsl_struct_type(fields, ZINK_GFX_PUSHCONST_MAX, "struct", false),
99 pushconst->data.location = INT_MAX; //doesn't really matter
101 #undef PUSHCONST_MEMBER
105 lower_64bit_vertex_attribs_instr(nir_builder *b, nir_instr *instr, void *data)
107 if (instr->type != nir_instr_type_intrinsic)
109 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
110 if (intr->intrinsic != nir_intrinsic_load_deref)
112 nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(intr->src[0].ssa->parent_instr));
113 if (var->data.mode != nir_var_shader_in)
115 if (!glsl_type_is_64bit(var->type) || !glsl_type_is_vector(var->type) || glsl_get_vector_elements(var->type) < 3)
118 /* create second variable for the split */
119 nir_variable *var2 = nir_variable_clone(var, b->shader);
120 /* split new variable into second slot */
121 var2->data.driver_location++;
122 nir_shader_add_variable(b->shader, var2);
124 unsigned total_num_components = glsl_get_vector_elements(var->type);
125 /* new variable is the second half of the dvec */
126 var2->type = glsl_vector_type(glsl_get_base_type(var->type), glsl_get_vector_elements(var->type) - 2);
127 /* clamp original variable to a dvec2 */
128 var->type = glsl_vector_type(glsl_get_base_type(var->type), 2);
130 b->cursor = nir_after_instr(instr);
132 /* this is the first load instruction for the first half of the dvec3/4 components */
133 nir_ssa_def *load = nir_load_var(b, var);
134 /* this is the second load instruction for the second half of the dvec3/4 components */
135 nir_ssa_def *load2 = nir_load_var(b, var2);
138 /* create a new dvec3/4 comprised of all the loaded components from both variables */
139 def[0] = nir_vector_extract(b, load, nir_imm_int(b, 0));
140 def[1] = nir_vector_extract(b, load, nir_imm_int(b, 1));
141 def[2] = nir_vector_extract(b, load2, nir_imm_int(b, 0));
142 if (total_num_components == 4)
143 def[3] = nir_vector_extract(b, load2, nir_imm_int(b, 1));
144 nir_ssa_def *new_vec = nir_vec(b, def, total_num_components);
145 /* use the assembled dvec3/4 for all other uses of the load */
146 nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, new_vec,
147 new_vec->parent_instr);
149 /* remove the original instr and its deref chain */
150 nir_instr *parent = intr->src[0].ssa->parent_instr;
151 nir_instr_remove(instr);
152 nir_deref_instr_remove_if_unused(nir_instr_as_deref(parent));
157 /* mesa/gallium always provides UINT versions of 64bit formats:
158 * - rewrite loads as 32bit vec loads
159 * - cast back to 64bit
162 lower_64bit_uint_attribs_instr(nir_builder *b, nir_instr *instr, void *data)
164 if (instr->type != nir_instr_type_intrinsic)
166 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
167 if (intr->intrinsic != nir_intrinsic_load_deref)
169 nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(intr->src[0].ssa->parent_instr));
170 if (var->data.mode != nir_var_shader_in)
172 if (glsl_get_bit_size(var->type) != 64 || glsl_get_base_type(var->type) >= GLSL_TYPE_SAMPLER)
175 unsigned num_components = glsl_get_vector_elements(var->type);
176 enum glsl_base_type base_type;
177 switch (glsl_get_base_type(var->type)) {
178 case GLSL_TYPE_UINT64:
179 base_type = GLSL_TYPE_UINT;
181 case GLSL_TYPE_INT64:
182 base_type = GLSL_TYPE_INT;
184 case GLSL_TYPE_DOUBLE:
185 base_type = GLSL_TYPE_FLOAT;
188 unreachable("unknown 64-bit vertex attribute format!");
190 var->type = glsl_vector_type(base_type, num_components * 2);
192 b->cursor = nir_after_instr(instr);
194 nir_ssa_def *load = nir_load_var(b, var);
195 nir_ssa_def *casted[2];
196 for (unsigned i = 0; i < num_components; i++)
197 casted[i] = nir_pack_64_2x32(b, nir_channels(b, load, BITFIELD_RANGE(i * 2, 2)));
198 nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_vec(b, casted, num_components));
200 /* remove the original instr and its deref chain */
201 nir_instr *parent = intr->src[0].ssa->parent_instr;
202 nir_instr_remove(instr);
203 nir_deref_instr_remove_if_unused(nir_instr_as_deref(parent));
208 /* "64-bit three- and four-component vectors consume two consecutive locations."
209 * - 14.1.4. Location Assignment
211 * this pass splits dvec3 and dvec4 vertex inputs into a dvec2 and a double/dvec2 which
212 * are assigned to consecutive locations, loaded separately, and then assembled back into a
213 * composite value that's used in place of the original loaded ssa src
216 lower_64bit_vertex_attribs(nir_shader *shader)
218 if (shader->info.stage != MESA_SHADER_VERTEX)
221 bool progress = nir_shader_instructions_pass(shader, lower_64bit_vertex_attribs_instr, nir_metadata_dominance, NULL);
222 progress |= nir_shader_instructions_pass(shader, lower_64bit_uint_attribs_instr, nir_metadata_dominance, NULL);
227 lower_basevertex_instr(nir_builder *b, nir_instr *in, void *data)
229 if (in->type != nir_instr_type_intrinsic)
231 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
232 if (instr->intrinsic != nir_intrinsic_load_base_vertex)
235 b->cursor = nir_after_instr(&instr->instr);
236 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant);
237 load->src[0] = nir_src_for_ssa(nir_imm_int(b, ZINK_GFX_PUSHCONST_DRAW_MODE_IS_INDEXED));
238 nir_intrinsic_set_range(load, 4);
239 load->num_components = 1;
240 nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "draw_mode_is_indexed");
241 nir_builder_instr_insert(b, &load->instr);
243 nir_ssa_def *composite = nir_build_alu(b, nir_op_bcsel,
244 nir_build_alu(b, nir_op_ieq, &load->dest.ssa, nir_imm_int(b, 1), NULL, NULL),
249 nir_ssa_def_rewrite_uses_after(&instr->dest.ssa, composite,
250 composite->parent_instr);
255 lower_basevertex(nir_shader *shader)
257 if (shader->info.stage != MESA_SHADER_VERTEX)
260 if (!BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX))
263 return nir_shader_instructions_pass(shader, lower_basevertex_instr, nir_metadata_dominance, NULL);
268 lower_drawid_instr(nir_builder *b, nir_instr *in, void *data)
270 if (in->type != nir_instr_type_intrinsic)
272 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
273 if (instr->intrinsic != nir_intrinsic_load_draw_id)
276 b->cursor = nir_before_instr(&instr->instr);
277 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant);
278 load->src[0] = nir_src_for_ssa(nir_imm_int(b, ZINK_GFX_PUSHCONST_DRAW_ID));
279 nir_intrinsic_set_range(load, 4);
280 load->num_components = 1;
281 nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "draw_id");
282 nir_builder_instr_insert(b, &load->instr);
284 nir_ssa_def_rewrite_uses(&instr->dest.ssa, &load->dest.ssa);
290 lower_drawid(nir_shader *shader)
292 if (shader->info.stage != MESA_SHADER_VERTEX)
295 if (!BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_DRAW_ID))
298 return nir_shader_instructions_pass(shader, lower_drawid_instr, nir_metadata_dominance, NULL);
301 struct lower_gl_point_state {
302 nir_variable *gl_pos_out;
303 nir_variable *gl_point_size;
307 lower_gl_point_gs_instr(nir_builder *b, nir_instr *instr, void *data)
309 struct lower_gl_point_state *state = data;
310 nir_ssa_def *vp_scale, *pos;
312 if (instr->type != nir_instr_type_intrinsic)
315 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
316 if (intrin->intrinsic != nir_intrinsic_emit_vertex_with_counter &&
317 intrin->intrinsic != nir_intrinsic_emit_vertex)
320 if (nir_intrinsic_stream_id(intrin) != 0)
323 if (intrin->intrinsic == nir_intrinsic_end_primitive_with_counter ||
324 intrin->intrinsic == nir_intrinsic_end_primitive) {
325 nir_instr_remove(&intrin->instr);
329 b->cursor = nir_before_instr(instr);
331 // viewport-map endpoints
332 nir_ssa_def *vp_const_pos = nir_imm_int(b, ZINK_GFX_PUSHCONST_VIEWPORT_SCALE);
333 vp_scale = nir_load_push_constant(b, 2, 32, vp_const_pos, .base = 1, .range = 2);
335 // Load point info values
336 nir_ssa_def *point_size = nir_load_var(b, state->gl_point_size);
337 nir_ssa_def *point_pos = nir_load_var(b, state->gl_pos_out);
339 // w_delta = gl_point_size / width_viewport_size_scale * gl_Position.w
340 nir_ssa_def *w_delta = nir_fdiv(b, point_size, nir_channel(b, vp_scale, 0));
341 w_delta = nir_fmul(b, w_delta, nir_channel(b, point_pos, 3));
342 // halt_w_delta = w_delta / 2
343 nir_ssa_def *half_w_delta = nir_fmul(b, w_delta, nir_imm_float(b, 0.5));
345 // h_delta = gl_point_size / height_viewport_size_scale * gl_Position.w
346 nir_ssa_def *h_delta = nir_fdiv(b, point_size, nir_channel(b, vp_scale, 1));
347 h_delta = nir_fmul(b, h_delta, nir_channel(b, point_pos, 3));
348 // halt_h_delta = h_delta / 2
349 nir_ssa_def *half_h_delta = nir_fmul(b, h_delta, nir_imm_float(b, 0.5));
351 nir_ssa_def *point_dir[4][2] = {
352 { nir_imm_float(b, -1), nir_imm_float(b, -1) },
353 { nir_imm_float(b, -1), nir_imm_float(b, 1) },
354 { nir_imm_float(b, 1), nir_imm_float(b, -1) },
355 { nir_imm_float(b, 1), nir_imm_float(b, 1) }
358 nir_ssa_def *point_pos_x = nir_channel(b, point_pos, 0);
359 nir_ssa_def *point_pos_y = nir_channel(b, point_pos, 1);
361 for (size_t i = 0; i < 4; i++) {
363 nir_ffma(b, half_w_delta, point_dir[i][0], point_pos_x),
364 nir_ffma(b, half_h_delta, point_dir[i][1], point_pos_y),
365 nir_channel(b, point_pos, 2),
366 nir_channel(b, point_pos, 3));
368 nir_store_var(b, state->gl_pos_out, pos, 0xf);
373 nir_end_primitive(b);
375 nir_instr_remove(&intrin->instr);
381 lower_gl_point_gs(nir_shader *shader)
383 struct lower_gl_point_state state;
386 shader->info.gs.output_primitive = SHADER_PRIM_TRIANGLE_STRIP;
387 shader->info.gs.vertices_out *= 4;
389 // Gets the gl_Position in and out
391 nir_find_variable_with_location(shader, nir_var_shader_out,
393 state.gl_point_size =
394 nir_find_variable_with_location(shader, nir_var_shader_out,
397 // if position in or gl_PointSize aren't written, we have nothing to do
398 if (!state.gl_pos_out || !state.gl_point_size)
401 nir_function_impl *entry = nir_shader_get_entrypoint(shader);
402 nir_builder_init(&b, entry);
403 b.cursor = nir_before_cf_list(&entry->body);
405 return nir_shader_instructions_pass(shader, lower_gl_point_gs_instr,
406 nir_metadata_dominance, &state);
409 struct lower_pv_mode_state {
410 nir_variable *varyings[VARYING_SLOT_MAX];
411 nir_variable *pos_counter;
412 nir_variable *out_pos_counter;
413 nir_variable *ring_offset;
415 unsigned primitive_vert_count;
420 lower_pv_mode_gs_ring_index(nir_builder *b,
421 struct lower_pv_mode_state *state,
424 nir_ssa_def *ring_offset = nir_load_var(b, state->ring_offset);
425 return nir_imod(b, nir_iadd(b, index, ring_offset),
426 nir_imm_int(b, state->ring_size));
429 /* Given the final deref of chain of derefs this function will walk up the chain
430 * until it finds a var deref.
432 * It will then recreate an identical chain that ends with the provided deref.
434 static nir_deref_instr*
435 replicate_derefs(nir_builder *b, nir_deref_instr *old, nir_deref_instr *new)
437 nir_deref_instr *parent = nir_src_as_deref(old->parent);
438 switch(old->deref_type) {
439 case nir_deref_type_var:
441 case nir_deref_type_array:
442 assert(old->arr.index.is_ssa);
443 return nir_build_deref_array(b, replicate_derefs(b, parent, new), old->arr.index.ssa);
444 case nir_deref_type_struct:
445 return nir_build_deref_struct(b, replicate_derefs(b, parent, new), old->strct.index);
446 case nir_deref_type_array_wildcard:
447 case nir_deref_type_ptr_as_array:
448 case nir_deref_type_cast:
449 unreachable("unexpected deref type");
451 unreachable("impossible deref type");
455 lower_pv_mode_gs_store(nir_builder *b,
456 nir_intrinsic_instr *intrin,
457 struct lower_pv_mode_state *state)
459 b->cursor = nir_before_instr(&intrin->instr);
460 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
461 if (nir_deref_mode_is(deref, nir_var_shader_out)) {
462 nir_variable *var = nir_deref_instr_get_variable(deref);
464 gl_varying_slot location = var->data.location;
465 assert(state->varyings[location]);
466 assert(intrin->src[1].is_ssa);
467 nir_ssa_def *pos_counter = nir_load_var(b, state->pos_counter);
468 nir_ssa_def *index = lower_pv_mode_gs_ring_index(b, state, pos_counter);
469 nir_deref_instr *varying_deref = nir_build_deref_var(b, state->varyings[location]);
470 nir_deref_instr *ring_deref = nir_build_deref_array(b, varying_deref, index);
471 // recreate the chain of deref that lead to the store.
472 nir_deref_instr *new_top_deref = replicate_derefs(b, deref, ring_deref);
473 nir_store_deref(b, new_top_deref, intrin->src[1].ssa, nir_intrinsic_write_mask(intrin));
474 nir_instr_remove(&intrin->instr);
482 lower_pv_mode_emit_rotated_prim(nir_builder *b,
483 struct lower_pv_mode_state *state,
484 nir_ssa_def *current_vertex)
486 nir_ssa_def *two = nir_imm_int(b, 2);
487 nir_ssa_def *three = nir_imm_int(b, 3);
488 bool is_triangle = state->primitive_vert_count == 3;
489 /* This shader will always see the last three vertices emitted by the user gs.
490 * The following table is used to to rotate primitives within a strip generated
491 * by the user gs such that the last vertex becomes the first.
493 * [lines, tris][even/odd index][vertex mod 3]
495 static const unsigned vert_maps[2][2][3] = {
496 {{1, 0, 0}, {1, 0, 0}},
497 {{2, 0, 1}, {2, 1, 0}}
499 /* When the primive supplied to the gs comes from a strip, the last provoking vertex
500 * is either the last or the second, depending on whether the triangle is at an odd
501 * or even position within the strip.
503 * odd or even primitive within draw
505 nir_ssa_def *odd_prim = nir_imod(b, nir_load_primitive_id(b), two);
506 for (unsigned i = 0; i < state->primitive_vert_count; i++) {
507 /* odd or even triangle within strip emitted by user GS
508 * this is handled using the table
510 nir_ssa_def *odd_user_prim = nir_imod(b, current_vertex, two);
511 unsigned offset_even = vert_maps[is_triangle][0][i];
512 unsigned offset_odd = vert_maps[is_triangle][1][i];
513 nir_ssa_def *offset_even_value = nir_imm_int(b, offset_even);
514 nir_ssa_def *offset_odd_value = nir_imm_int(b, offset_odd);
515 nir_ssa_def *rotated_i = nir_bcsel(b, nir_b2b1(b, odd_user_prim),
516 offset_odd_value, offset_even_value);
517 /* Here we account for how triangles are provided to the gs from a strip.
518 * For even primitives we rotate by 3, meaning we do nothing.
519 * For odd primitives we rotate by 2, combined with the previous rotation this
520 * means the second vertex becomes the last.
522 if (state->prim == ZINK_PVE_PRIMITIVE_TRISTRIP)
523 rotated_i = nir_imod(b, nir_iadd(b, rotated_i,
527 /* Triangles that come from fans are provided to the gs the same way as
528 * odd triangles from a strip so always rotate by 2.
530 else if (state->prim == ZINK_PVE_PRIMITIVE_FAN)
531 rotated_i = nir_imod(b, nir_iadd_imm(b, rotated_i, 2),
533 rotated_i = nir_iadd(b, rotated_i, current_vertex);
534 nir_foreach_variable_with_modes(var, b->shader, nir_var_shader_out) {
535 gl_varying_slot location = var->data.location;
536 if (state->varyings[location]) {
537 nir_ssa_def *index = lower_pv_mode_gs_ring_index(b, state, rotated_i);
538 nir_deref_instr *value = nir_build_deref_array(b, nir_build_deref_var(b, state->varyings[location]), index);
539 copy_vars(b, nir_build_deref_var(b, var), value);
547 lower_pv_mode_gs_emit_vertex(nir_builder *b,
548 nir_intrinsic_instr *intrin,
549 struct lower_pv_mode_state *state)
551 b->cursor = nir_before_instr(&intrin->instr);
553 // increment pos_counter
554 nir_ssa_def *pos_counter = nir_load_var(b, state->pos_counter);
555 nir_store_var(b, state->pos_counter, nir_iadd_imm(b, pos_counter, 1), 1);
557 nir_instr_remove(&intrin->instr);
562 lower_pv_mode_gs_end_primitive(nir_builder *b,
563 nir_intrinsic_instr *intrin,
564 struct lower_pv_mode_state *state)
566 b->cursor = nir_before_instr(&intrin->instr);
568 nir_ssa_def *pos_counter = nir_load_var(b, state->pos_counter);
571 nir_ssa_def *out_pos_counter = nir_load_var(b, state->out_pos_counter);
572 nir_push_if(b, nir_ilt(b, nir_isub(b, pos_counter, out_pos_counter),
573 nir_imm_int(b, state->primitive_vert_count)));
574 nir_jump(b, nir_jump_break);
577 lower_pv_mode_emit_rotated_prim(b, state, out_pos_counter);
578 nir_end_primitive(b);
580 nir_store_var(b, state->out_pos_counter, nir_iadd_imm(b, out_pos_counter, 1), 1);
582 nir_pop_loop(b, NULL);
583 /* Set the ring offset such that when position 0 is
584 * read we get the last value written
586 nir_store_var(b, state->ring_offset, pos_counter, 1);
587 nir_store_var(b, state->pos_counter, nir_imm_int(b, 0), 1);
588 nir_store_var(b, state->out_pos_counter, nir_imm_int(b, 0), 1);
590 nir_instr_remove(&intrin->instr);
595 lower_pv_mode_gs_instr(nir_builder *b, nir_instr *instr, void *data)
597 if (instr->type != nir_instr_type_intrinsic)
600 struct lower_pv_mode_state *state = data;
601 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
603 switch (intrin->intrinsic) {
604 case nir_intrinsic_store_deref:
605 return lower_pv_mode_gs_store(b, intrin, state);
606 case nir_intrinsic_copy_deref:
607 unreachable("should be lowered");
608 case nir_intrinsic_emit_vertex_with_counter:
609 case nir_intrinsic_emit_vertex:
610 return lower_pv_mode_gs_emit_vertex(b, intrin, state);
611 case nir_intrinsic_end_primitive:
612 case nir_intrinsic_end_primitive_with_counter:
613 return lower_pv_mode_gs_end_primitive(b, intrin, state);
620 lower_pv_mode_vertices_for_prim(enum shader_prim prim)
623 case SHADER_PRIM_POINTS:
625 case SHADER_PRIM_LINE_STRIP:
627 case SHADER_PRIM_TRIANGLE_STRIP:
630 unreachable("unsupported primitive for gs output");
635 lower_pv_mode_gs(nir_shader *shader, unsigned prim)
638 struct lower_pv_mode_state state;
639 memset(state.varyings, 0, sizeof(state.varyings));
641 nir_function_impl *entry = nir_shader_get_entrypoint(shader);
642 nir_builder_init(&b, entry);
643 b.cursor = nir_before_cf_list(&entry->body);
645 state.primitive_vert_count =
646 lower_pv_mode_vertices_for_prim(shader->info.gs.output_primitive);
647 state.ring_size = shader->info.gs.vertices_out;
649 nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) {
650 gl_varying_slot location = var->data.location;
653 snprintf(name, sizeof(name), "__tmp_primverts_%d", location);
654 state.varyings[location] =
655 nir_local_variable_create(entry,
656 glsl_array_type(var->type,
662 state.pos_counter = nir_local_variable_create(entry,
666 state.out_pos_counter = nir_local_variable_create(entry,
668 "__out_pos_counter");
670 state.ring_offset = nir_local_variable_create(entry,
676 // initialize pos_counter and out_pos_counter
677 nir_store_var(&b, state.pos_counter, nir_imm_int(&b, 0), 1);
678 nir_store_var(&b, state.out_pos_counter, nir_imm_int(&b, 0), 1);
679 nir_store_var(&b, state.ring_offset, nir_imm_int(&b, 0), 1);
681 shader->info.gs.vertices_out = (shader->info.gs.vertices_out -
682 (state.primitive_vert_count - 1)) *
683 state.primitive_vert_count;
684 return nir_shader_instructions_pass(shader, lower_pv_mode_gs_instr,
685 nir_metadata_dominance, &state);
688 struct lower_line_stipple_state {
689 nir_variable *pos_out;
690 nir_variable *stipple_out;
691 nir_variable *prev_pos;
692 nir_variable *pos_counter;
693 nir_variable *stipple_counter;
694 bool line_rectangular;
698 viewport_map(nir_builder *b, nir_ssa_def *vert,
701 nir_ssa_def *w_recip = nir_frcp(b, nir_channel(b, vert, 3));
702 nir_ssa_def *ndc_point = nir_fmul(b, nir_channels(b, vert, 0x3),
704 return nir_fmul(b, ndc_point, scale);
708 lower_line_stipple_gs_instr(nir_builder *b, nir_instr *instr, void *data)
710 struct lower_line_stipple_state *state = data;
711 if (instr->type != nir_instr_type_intrinsic)
714 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
715 if (intrin->intrinsic != nir_intrinsic_emit_vertex_with_counter &&
716 intrin->intrinsic != nir_intrinsic_emit_vertex)
719 b->cursor = nir_before_instr(instr);
721 nir_push_if(b, nir_ine_imm(b, nir_load_var(b, state->pos_counter), 0));
722 // viewport-map endpoints
723 nir_ssa_def *vp_scale = nir_load_push_constant(b, 2, 32,
724 nir_imm_int(b, ZINK_GFX_PUSHCONST_VIEWPORT_SCALE),
727 nir_ssa_def *prev = nir_load_var(b, state->prev_pos);
728 nir_ssa_def *curr = nir_load_var(b, state->pos_out);
729 prev = viewport_map(b, prev, vp_scale);
730 curr = viewport_map(b, curr, vp_scale);
732 // calculate length of line
734 if (state->line_rectangular)
735 len = nir_fast_distance(b, prev, curr);
737 nir_ssa_def *diff = nir_fabs(b, nir_fsub(b, prev, curr));
738 len = nir_fmax(b, nir_channel(b, diff, 0), nir_channel(b, diff, 1));
740 // update stipple_counter
741 nir_store_var(b, state->stipple_counter,
742 nir_fadd(b, nir_load_var(b, state->stipple_counter),
746 nir_copy_var(b, state->stipple_out, state->stipple_counter);
747 nir_copy_var(b, state->prev_pos, state->pos_out);
749 // update prev_pos and pos_counter for next vertex
750 b->cursor = nir_after_instr(instr);
751 nir_store_var(b, state->pos_counter,
752 nir_iadd_imm(b, nir_load_var(b, state->pos_counter),
759 lower_line_stipple_gs(nir_shader *shader, bool line_rectangular)
762 struct lower_line_stipple_state state;
765 nir_find_variable_with_location(shader, nir_var_shader_out,
768 // if position isn't written, we have nothing to do
772 state.stipple_out = nir_variable_create(shader, nir_var_shader_out,
775 state.stipple_out->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
776 state.stipple_out->data.driver_location = shader->num_outputs++;
777 state.stipple_out->data.location = MAX2(util_last_bit64(shader->info.outputs_written), VARYING_SLOT_VAR0);
778 shader->info.outputs_written |= BITFIELD64_BIT(state.stipple_out->data.location);
780 // create temp variables
781 state.prev_pos = nir_variable_create(shader, nir_var_shader_temp,
784 state.pos_counter = nir_variable_create(shader, nir_var_shader_temp,
787 state.stipple_counter = nir_variable_create(shader, nir_var_shader_temp,
789 "__stipple_counter");
791 state.line_rectangular = line_rectangular;
792 // initialize pos_counter and stipple_counter
793 nir_function_impl *entry = nir_shader_get_entrypoint(shader);
794 nir_builder_init(&b, entry);
795 b.cursor = nir_before_cf_list(&entry->body);
796 nir_store_var(&b, state.pos_counter, nir_imm_int(&b, 0), 1);
797 nir_store_var(&b, state.stipple_counter, nir_imm_float(&b, 0), 1);
799 return nir_shader_instructions_pass(shader, lower_line_stipple_gs_instr,
800 nir_metadata_dominance, &state);
804 lower_line_stipple_fs(nir_shader *shader)
807 nir_function_impl *entry = nir_shader_get_entrypoint(shader);
808 nir_builder_init(&b, entry);
810 // create stipple counter
811 nir_variable *stipple = nir_variable_create(shader, nir_var_shader_in,
814 stipple->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
815 stipple->data.driver_location = shader->num_inputs++;
816 stipple->data.location = MAX2(util_last_bit64(shader->info.inputs_read), VARYING_SLOT_VAR0);
817 shader->info.inputs_read |= BITFIELD64_BIT(stipple->data.location);
819 nir_variable *sample_mask_out =
820 nir_find_variable_with_location(shader, nir_var_shader_out,
821 FRAG_RESULT_SAMPLE_MASK);
822 if (!sample_mask_out) {
823 sample_mask_out = nir_variable_create(shader, nir_var_shader_out,
824 glsl_uint_type(), "sample_mask");
825 sample_mask_out->data.driver_location = shader->num_outputs++;
826 sample_mask_out->data.location = FRAG_RESULT_SAMPLE_MASK;
829 b.cursor = nir_after_cf_list(&entry->body);
831 nir_ssa_def *pattern = nir_load_push_constant(&b, 1, 32,
832 nir_imm_int(&b, ZINK_GFX_PUSHCONST_LINE_STIPPLE_PATTERN),
834 nir_ssa_def *factor = nir_i2f32(&b, nir_ishr_imm(&b, pattern, 16));
835 pattern = nir_iand_imm(&b, pattern, 0xffff);
837 nir_ssa_def *sample_mask_in = nir_load_sample_mask_in(&b);
838 nir_variable *v = nir_local_variable_create(entry, glsl_uint_type(), NULL);
839 nir_variable *sample_mask = nir_local_variable_create(entry, glsl_uint_type(), NULL);
840 nir_store_var(&b, v, sample_mask_in, 1);
841 nir_store_var(&b, sample_mask, sample_mask_in, 1);
844 nir_ssa_def *value = nir_load_var(&b, v);
845 nir_ssa_def *index = nir_ufind_msb(&b, value);
846 nir_ssa_def *index_mask = nir_ishl(&b, nir_imm_int(&b, 1), index);
847 nir_ssa_def *new_value = nir_ixor(&b, value, index_mask);
848 nir_store_var(&b, v, new_value, 1);
849 nir_push_if(&b, nir_ieq_imm(&b, value, 0));
850 nir_jump(&b, nir_jump_break);
851 nir_pop_if(&b, NULL);
853 nir_ssa_def *stipple_pos =
854 nir_interp_deref_at_sample(&b, 1, 32,
855 &nir_build_deref_var(&b, stipple)->dest.ssa, index);
856 stipple_pos = nir_fmod(&b, nir_fdiv(&b, stipple_pos, factor),
857 nir_imm_float(&b, 16.0));
858 stipple_pos = nir_f2i32(&b, stipple_pos);
860 nir_iand_imm(&b, nir_ishr(&b, pattern, stipple_pos), 1);
861 nir_push_if(&b, nir_ieq_imm(&b, bit, 0));
863 nir_ssa_def *value = nir_load_var(&b, sample_mask);
864 value = nir_ixor(&b, value, index_mask);
865 nir_store_var(&b, sample_mask, value, 1);
867 nir_pop_if(&b, NULL);
869 nir_pop_loop(&b, NULL);
870 nir_store_var(&b, sample_mask_out, nir_load_var(&b, sample_mask), 1);
875 struct lower_line_smooth_state {
876 nir_variable *pos_out;
877 nir_variable *line_coord_out;
878 nir_variable *prev_pos;
879 nir_variable *pos_counter;
880 nir_variable *prev_varyings[VARYING_SLOT_MAX][4],
881 *varyings[VARYING_SLOT_MAX][4]; // location_frac
885 lower_line_smooth_gs_store(nir_builder *b,
886 nir_intrinsic_instr *intrin,
887 struct lower_line_smooth_state *state)
889 b->cursor = nir_before_instr(&intrin->instr);
890 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
891 if (nir_deref_mode_is(deref, nir_var_shader_out)) {
892 nir_variable *var = nir_deref_instr_get_variable(deref);
894 // we take care of position elsewhere
895 gl_varying_slot location = var->data.location;
896 unsigned location_frac = var->data.location_frac;
897 if (location != VARYING_SLOT_POS) {
898 assert(state->varyings[location]);
899 assert(intrin->src[1].is_ssa);
900 nir_store_var(b, state->varyings[location][location_frac],
902 nir_intrinsic_write_mask(intrin));
903 nir_instr_remove(&intrin->instr);
912 lower_line_smooth_gs_emit_vertex(nir_builder *b,
913 nir_intrinsic_instr *intrin,
914 struct lower_line_smooth_state *state)
916 b->cursor = nir_before_instr(&intrin->instr);
918 nir_push_if(b, nir_ine_imm(b, nir_load_var(b, state->pos_counter), 0));
919 nir_ssa_def *vp_scale = nir_load_push_constant(b, 2, 32,
920 nir_imm_int(b, ZINK_GFX_PUSHCONST_VIEWPORT_SCALE),
923 nir_ssa_def *prev = nir_load_var(b, state->prev_pos);
924 nir_ssa_def *curr = nir_load_var(b, state->pos_out);
925 nir_ssa_def *prev_vp = viewport_map(b, prev, vp_scale);
926 nir_ssa_def *curr_vp = viewport_map(b, curr, vp_scale);
928 nir_ssa_def *width = nir_load_push_constant(b, 1, 32,
929 nir_imm_int(b, ZINK_GFX_PUSHCONST_LINE_WIDTH),
931 nir_ssa_def *half_width = nir_fadd_imm(b, nir_fmul_imm(b, width, 0.5), 0.5);
933 const unsigned yx[2] = { 1, 0 };
934 nir_ssa_def *vec = nir_fsub(b, curr_vp, prev_vp);
935 nir_ssa_def *len = nir_fast_length(b, vec);
936 nir_ssa_def *dir = nir_normalize(b, vec);
937 nir_ssa_def *half_length = nir_fmul_imm(b, len, 0.5);
938 half_length = nir_fadd_imm(b, half_length, 0.5);
940 nir_ssa_def *vp_scale_rcp = nir_frcp(b, vp_scale);
941 nir_ssa_def *tangent =
944 nir_swizzle(b, dir, yx, 2),
945 nir_imm_vec2(b, 1.0, -1.0)),
947 tangent = nir_fmul(b, tangent, half_width);
948 tangent = nir_pad_vector_imm_int(b, tangent, 0, 4);
949 dir = nir_fmul_imm(b, nir_fmul(b, dir, vp_scale_rcp), 0.5);
951 nir_ssa_def *line_offets[8] = {
952 nir_fadd(b, tangent, nir_fneg(b, dir)),
953 nir_fadd(b, nir_fneg(b, tangent), nir_fneg(b, dir)),
955 nir_fneg(b, tangent),
957 nir_fneg(b, tangent),
958 nir_fadd(b, tangent, dir),
959 nir_fadd(b, nir_fneg(b, tangent), dir),
961 nir_ssa_def *line_coord =
962 nir_vec4(b, half_width, half_width, half_length, half_length);
963 nir_ssa_def *line_coords[8] = {
964 nir_fmul(b, line_coord, nir_imm_vec4(b, -1, 1, -1, 1)),
965 nir_fmul(b, line_coord, nir_imm_vec4(b, 1, 1, -1, 1)),
966 nir_fmul(b, line_coord, nir_imm_vec4(b, -1, 1, 0, 1)),
967 nir_fmul(b, line_coord, nir_imm_vec4(b, 1, 1, 0, 1)),
968 nir_fmul(b, line_coord, nir_imm_vec4(b, -1, 1, 0, 1)),
969 nir_fmul(b, line_coord, nir_imm_vec4(b, 1, 1, 0, 1)),
970 nir_fmul(b, line_coord, nir_imm_vec4(b, -1, 1, 1, 1)),
971 nir_fmul(b, line_coord, nir_imm_vec4(b, 1, 1, 1, 1)),
974 /* emit first end-cap, and start line */
975 for (int i = 0; i < 4; ++i) {
976 nir_foreach_variable_with_modes(var, b->shader, nir_var_shader_out) {
977 gl_varying_slot location = var->data.location;
978 unsigned location_frac = var->data.location_frac;
979 if (state->prev_varyings[location][location_frac])
980 nir_copy_var(b, var, state->prev_varyings[location][location_frac]);
982 nir_store_var(b, state->pos_out,
983 nir_fadd(b, prev, nir_fmul(b, line_offets[i],
984 nir_channel(b, prev, 3))), 0xf);
985 nir_store_var(b, state->line_coord_out, line_coords[i], 0xf);
989 /* finish line and emit last end-cap */
990 for (int i = 4; i < 8; ++i) {
991 nir_foreach_variable_with_modes(var, b->shader, nir_var_shader_out) {
992 gl_varying_slot location = var->data.location;
993 unsigned location_frac = var->data.location_frac;
994 if (state->varyings[location][location_frac])
995 nir_copy_var(b, var, state->varyings[location][location_frac]);
997 nir_store_var(b, state->pos_out,
998 nir_fadd(b, curr, nir_fmul(b, line_offets[i],
999 nir_channel(b, curr, 3))), 0xf);
1000 nir_store_var(b, state->line_coord_out, line_coords[i], 0xf);
1003 nir_end_primitive(b);
1005 nir_pop_if(b, NULL);
1007 nir_copy_var(b, state->prev_pos, state->pos_out);
1008 nir_foreach_variable_with_modes(var, b->shader, nir_var_shader_out) {
1009 gl_varying_slot location = var->data.location;
1010 unsigned location_frac = var->data.location_frac;
1011 if (state->varyings[location][location_frac])
1012 nir_copy_var(b, state->prev_varyings[location][location_frac], state->varyings[location][location_frac]);
1015 // update prev_pos and pos_counter for next vertex
1016 b->cursor = nir_after_instr(&intrin->instr);
1017 nir_store_var(b, state->pos_counter,
1018 nir_iadd_imm(b, nir_load_var(b, state->pos_counter),
1021 nir_instr_remove(&intrin->instr);
1026 lower_line_smooth_gs_end_primitive(nir_builder *b,
1027 nir_intrinsic_instr *intrin,
1028 struct lower_line_smooth_state *state)
1030 b->cursor = nir_before_instr(&intrin->instr);
1032 // reset line counter
1033 nir_store_var(b, state->pos_counter, nir_imm_int(b, 0), 1);
1035 nir_instr_remove(&intrin->instr);
1040 lower_line_smooth_gs_instr(nir_builder *b, nir_instr *instr, void *data)
1042 if (instr->type != nir_instr_type_intrinsic)
1045 struct lower_line_smooth_state *state = data;
1046 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1048 switch (intrin->intrinsic) {
1049 case nir_intrinsic_store_deref:
1050 return lower_line_smooth_gs_store(b, intrin, state);
1051 case nir_intrinsic_copy_deref:
1052 unreachable("should be lowered");
1053 case nir_intrinsic_emit_vertex_with_counter:
1054 case nir_intrinsic_emit_vertex:
1055 return lower_line_smooth_gs_emit_vertex(b, intrin, state);
1056 case nir_intrinsic_end_primitive:
1057 case nir_intrinsic_end_primitive_with_counter:
1058 return lower_line_smooth_gs_end_primitive(b, intrin, state);
1065 lower_line_smooth_gs(nir_shader *shader)
1068 struct lower_line_smooth_state state;
1070 memset(state.varyings, 0, sizeof(state.varyings));
1071 memset(state.prev_varyings, 0, sizeof(state.prev_varyings));
1072 nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) {
1073 gl_varying_slot location = var->data.location;
1074 unsigned location_frac = var->data.location_frac;
1075 if (location == VARYING_SLOT_POS)
1079 snprintf(name, sizeof(name), "__tmp_%d_%d", location, location_frac);
1080 state.varyings[location][location_frac] =
1081 nir_variable_create(shader, nir_var_shader_temp,
1084 snprintf(name, sizeof(name), "__tmp_prev_%d_%d", location, location_frac);
1085 state.prev_varyings[location][location_frac] =
1086 nir_variable_create(shader, nir_var_shader_temp,
1091 nir_find_variable_with_location(shader, nir_var_shader_out,
1094 // if position isn't written, we have nothing to do
1098 state.line_coord_out =
1099 nir_variable_create(shader, nir_var_shader_out, glsl_vec4_type(),
1101 state.line_coord_out->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
1102 state.line_coord_out->data.driver_location = shader->num_outputs++;
1103 state.line_coord_out->data.location = MAX2(util_last_bit64(shader->info.outputs_written), VARYING_SLOT_VAR0);
1104 shader->info.outputs_written |= BITFIELD64_BIT(state.line_coord_out->data.location);
1106 // create temp variables
1107 state.prev_pos = nir_variable_create(shader, nir_var_shader_temp,
1110 state.pos_counter = nir_variable_create(shader, nir_var_shader_temp,
1114 // initialize pos_counter
1115 nir_function_impl *entry = nir_shader_get_entrypoint(shader);
1116 nir_builder_init(&b, entry);
1117 b.cursor = nir_before_cf_list(&entry->body);
1118 nir_store_var(&b, state.pos_counter, nir_imm_int(&b, 0), 1);
1120 shader->info.gs.vertices_out = 8 * shader->info.gs.vertices_out;
1121 shader->info.gs.output_primitive = SHADER_PRIM_TRIANGLE_STRIP;
1123 return nir_shader_instructions_pass(shader, lower_line_smooth_gs_instr,
1124 nir_metadata_dominance, &state);
1128 lower_line_smooth_fs(nir_shader *shader, bool lower_stipple)
1133 nir_variable *stipple_counter = NULL, *stipple_pattern = NULL;
1134 if (lower_stipple) {
1135 stipple_counter = nir_variable_create(shader, nir_var_shader_in,
1138 stipple_counter->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
1139 stipple_counter->data.driver_location = shader->num_inputs++;
1140 stipple_counter->data.location =
1141 MAX2(util_last_bit64(shader->info.inputs_read), VARYING_SLOT_VAR0);
1142 shader->info.inputs_read |= BITFIELD64_BIT(stipple_counter->data.location);
1144 stipple_pattern = nir_variable_create(shader, nir_var_shader_temp,
1148 // initialize stipple_pattern
1149 nir_function_impl *entry = nir_shader_get_entrypoint(shader);
1150 nir_builder_init(&b, entry);
1151 b.cursor = nir_before_cf_list(&entry->body);
1152 nir_ssa_def *pattern = nir_load_push_constant(&b, 1, 32,
1153 nir_imm_int(&b, ZINK_GFX_PUSHCONST_LINE_STIPPLE_PATTERN),
1155 nir_store_var(&b, stipple_pattern, pattern, 1);
1158 nir_lower_aaline_fs(shader, &dummy, stipple_counter, stipple_pattern);
1163 lower_dual_blend(nir_shader *shader)
1165 bool progress = false;
1166 nir_variable *var = nir_find_variable_with_location(shader, nir_var_shader_out, FRAG_RESULT_DATA1);
1168 var->data.location = FRAG_RESULT_DATA0;
1169 var->data.index = 1;
1172 nir_shader_preserve_all_metadata(shader);
1177 lower_64bit_pack_instr(nir_builder *b, nir_instr *instr, void *data)
1179 if (instr->type != nir_instr_type_alu)
1181 nir_alu_instr *alu_instr = (nir_alu_instr *) instr;
1182 if (alu_instr->op != nir_op_pack_64_2x32 &&
1183 alu_instr->op != nir_op_unpack_64_2x32)
1185 b->cursor = nir_before_instr(&alu_instr->instr);
1186 nir_ssa_def *src = nir_ssa_for_alu_src(b, alu_instr, 0);
1188 switch (alu_instr->op) {
1189 case nir_op_pack_64_2x32:
1190 dest = nir_pack_64_2x32_split(b, nir_channel(b, src, 0), nir_channel(b, src, 1));
1192 case nir_op_unpack_64_2x32:
1193 dest = nir_vec2(b, nir_unpack_64_2x32_split_x(b, src), nir_unpack_64_2x32_split_y(b, src));
1196 unreachable("Impossible opcode");
1198 nir_ssa_def_rewrite_uses(&alu_instr->dest.dest.ssa, dest);
1199 nir_instr_remove(&alu_instr->instr);
1204 lower_64bit_pack(nir_shader *shader)
1206 return nir_shader_instructions_pass(shader, lower_64bit_pack_instr,
1207 nir_metadata_block_index | nir_metadata_dominance, NULL);
1211 zink_create_quads_emulation_gs(const nir_shader_compiler_options *options,
1212 const nir_shader *prev_stage)
1214 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY,
1218 nir_shader *nir = b.shader;
1219 nir->info.gs.input_primitive = SHADER_PRIM_LINES_ADJACENCY;
1220 nir->info.gs.output_primitive = SHADER_PRIM_TRIANGLE_STRIP;
1221 nir->info.gs.vertices_in = 4;
1222 nir->info.gs.vertices_out = 6;
1223 nir->info.gs.invocations = 1;
1224 nir->info.gs.active_stream_mask = 1;
1226 nir->info.has_transform_feedback_varyings = prev_stage->info.has_transform_feedback_varyings;
1227 memcpy(nir->info.xfb_stride, prev_stage->info.xfb_stride, sizeof(prev_stage->info.xfb_stride));
1228 if (prev_stage->xfb_info) {
1229 nir->xfb_info = mem_dup(prev_stage->xfb_info, sizeof(nir_xfb_info));
1232 nir_variable *in_vars[VARYING_SLOT_MAX];
1233 nir_variable *out_vars[VARYING_SLOT_MAX];
1234 unsigned num_vars = 0;
1236 /* Create input/output variables. */
1237 nir_foreach_shader_out_variable(var, prev_stage) {
1238 assert(!var->data.patch);
1242 snprintf(name, sizeof(name), "in_%s", var->name);
1244 snprintf(name, sizeof(name), "in_%d", var->data.driver_location);
1246 nir_variable *in = nir_variable_clone(var, nir);
1247 ralloc_free(in->name);
1248 in->name = ralloc_strdup(in, name);
1249 in->type = glsl_array_type(var->type, 4, false);
1250 in->data.mode = nir_var_shader_in;
1251 nir_shader_add_variable(nir, in);
1254 snprintf(name, sizeof(name), "out_%s", var->name);
1256 snprintf(name, sizeof(name), "out_%d", var->data.driver_location);
1258 nir_variable *out = nir_variable_clone(var, nir);
1259 ralloc_free(out->name);
1260 out->name = ralloc_strdup(out, name);
1261 out->data.mode = nir_var_shader_out;
1262 nir_shader_add_variable(nir, out);
1264 in_vars[num_vars] = in;
1265 out_vars[num_vars++] = out;
1268 int mapping_first[] = {0, 1, 2, 0, 2, 3};
1269 int mapping_last[] = {0, 1, 3, 1, 2, 3};
1270 nir_ssa_def *last_pv_vert_def = nir_load_provoking_last(&b);
1271 last_pv_vert_def = nir_ine_imm(&b, last_pv_vert_def, 0);
1272 for (unsigned i = 0; i < 6; ++i) {
1273 /* swap indices 2 and 3 */
1274 nir_ssa_def *idx = nir_bcsel(&b, last_pv_vert_def,
1275 nir_imm_int(&b, mapping_last[i]),
1276 nir_imm_int(&b, mapping_first[i]));
1277 /* Copy inputs to outputs. */
1278 for (unsigned j = 0; j < num_vars; ++j) {
1279 if (in_vars[j]->data.location == VARYING_SLOT_EDGE) {
1282 nir_deref_instr *in_value = nir_build_deref_array(&b, nir_build_deref_var(&b, in_vars[j]), idx);
1283 copy_vars(&b, nir_build_deref_var(&b, out_vars[j]), in_value);
1285 nir_emit_vertex(&b, 0);
1287 nir_end_primitive(&b, 0);
1290 nir_end_primitive(&b, 0);
1291 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
1292 nir_validate_shader(nir, "in zink_create_quads_emulation_gs");
1297 lower_system_values_to_inlined_uniforms_instr(nir_builder *b, nir_instr *instr, void *data)
1299 if (instr->type != nir_instr_type_intrinsic)
1302 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1304 int inlined_uniform_offset;
1305 switch (intrin->intrinsic) {
1306 case nir_intrinsic_load_flat_mask:
1307 inlined_uniform_offset = ZINK_INLINE_VAL_FLAT_MASK * sizeof(uint32_t);
1309 case nir_intrinsic_load_provoking_last:
1310 inlined_uniform_offset = ZINK_INLINE_VAL_PV_LAST_VERT * sizeof(uint32_t);
1316 b->cursor = nir_before_instr(&intrin->instr);
1317 nir_ssa_def *new_dest_def = nir_load_ubo(b, 1, 32, nir_imm_int(b, 0),
1318 nir_imm_int(b, inlined_uniform_offset),
1319 .align_mul = 4, .align_offset = 0,
1320 .range_base = 0, .range = ~0);
1321 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, new_dest_def);
1322 nir_instr_remove(instr);
1327 zink_lower_system_values_to_inlined_uniforms(nir_shader *nir)
1329 return nir_shader_instructions_pass(nir, lower_system_values_to_inlined_uniforms_instr,
1330 nir_metadata_dominance, NULL);
1334 zink_screen_init_compiler(struct zink_screen *screen)
1336 static const struct nir_shader_compiler_options
1338 .lower_ffma16 = true,
1339 .lower_ffma32 = true,
1340 .lower_ffma64 = true,
1343 .lower_flrp32 = true,
1346 .lower_extract_byte = true,
1347 .lower_extract_word = true,
1348 .lower_insert_byte = true,
1349 .lower_insert_word = true,
1351 /* We can only support 32-bit ldexp, but NIR doesn't have a flag
1352 * distinguishing 64-bit ldexp support (radeonsi *does* support 64-bit
1353 * ldexp, so we don't just always lower it in NIR). Given that ldexp is
1354 * effectively unused (no instances in shader-db), it's not worth the
1357 .lower_ldexp = true,
1359 .lower_mul_high = true,
1360 .lower_rotate = true,
1361 .lower_uadd_carry = true,
1362 .lower_usub_borrow = true,
1363 .lower_uadd_sat = true,
1364 .lower_usub_sat = true,
1365 .lower_vector_cmp = true,
1366 .lower_int64_options = 0,
1367 .lower_doubles_options = 0,
1368 .lower_uniforms_to_ubo = true,
1372 .lower_mul_2x32_64 = true,
1373 .support_16bit_alu = true, /* not quite what it sounds like */
1374 .max_unroll_iterations = 0,
1377 screen->nir_options = default_options;
1379 if (!screen->info.feats.features.shaderInt64)
1380 screen->nir_options.lower_int64_options = ~0;
1382 if (!screen->info.feats.features.shaderFloat64) {
1383 screen->nir_options.lower_doubles_options = ~0;
1384 screen->nir_options.lower_flrp64 = true;
1385 screen->nir_options.lower_ffma64 = true;
1386 /* soft fp64 function inlining will blow up loop bodies and effectively
1387 * stop Vulkan drivers from unrolling the loops.
1389 screen->nir_options.max_unroll_iterations_fp64 = 32;
1393 The OpFRem and OpFMod instructions use cheap approximations of remainder,
1394 and the error can be large due to the discontinuity in trunc() and floor().
1395 This can produce mathematically unexpected results in some cases, such as
1396 FMod(x,x) computing x rather than 0, and can also cause the result to have
1397 a different sign than the infinitely precise result.
1399 -Table 84. Precision of core SPIR-V Instructions
1400 * for drivers that are known to have imprecise fmod for doubles, lower dmod
1402 if (screen->info.driver_props.driverID == VK_DRIVER_ID_MESA_RADV ||
1403 screen->info.driver_props.driverID == VK_DRIVER_ID_AMD_OPEN_SOURCE ||
1404 screen->info.driver_props.driverID == VK_DRIVER_ID_AMD_PROPRIETARY)
1405 screen->nir_options.lower_doubles_options = nir_lower_dmod;
1409 zink_get_compiler_options(struct pipe_screen *pscreen,
1410 enum pipe_shader_ir ir,
1411 gl_shader_stage shader)
1413 assert(ir == PIPE_SHADER_IR_NIR);
1414 return &zink_screen(pscreen)->nir_options;
1418 zink_tgsi_to_nir(struct pipe_screen *screen, const struct tgsi_token *tokens)
1420 if (zink_debug & ZINK_DEBUG_TGSI) {
1421 fprintf(stderr, "TGSI shader:\n---8<---\n");
1422 tgsi_dump_to_file(tokens, 0, stderr);
1423 fprintf(stderr, "---8<---\n\n");
1426 return tgsi_to_nir(tokens, screen, false);
1431 dest_is_64bit(nir_dest *dest, void *state)
1433 bool *lower = (bool *)state;
1434 if (dest && (nir_dest_bit_size(*dest) == 64)) {
1442 src_is_64bit(nir_src *src, void *state)
1444 bool *lower = (bool *)state;
1445 if (src && (nir_src_bit_size(*src) == 64)) {
1453 filter_64_bit_instr(const nir_instr *const_instr, UNUSED const void *data)
1456 /* lower_alu_to_scalar required nir_instr to be const, but nir_foreach_*
1457 * doesn't have const variants, so do the ugly const_cast here. */
1458 nir_instr *instr = (nir_instr *)const_instr;
1460 nir_foreach_dest(instr, dest_is_64bit, &lower);
1463 nir_foreach_src(instr, src_is_64bit, &lower);
1468 filter_pack_instr(const nir_instr *const_instr, UNUSED const void *data)
1470 nir_instr *instr = (nir_instr *)const_instr;
1471 nir_alu_instr *alu = nir_instr_as_alu(instr);
1473 case nir_op_pack_64_2x32_split:
1474 case nir_op_pack_32_2x16_split:
1475 case nir_op_unpack_32_2x16_split_x:
1476 case nir_op_unpack_32_2x16_split_y:
1477 case nir_op_unpack_64_2x32_split_x:
1478 case nir_op_unpack_64_2x32_split_y:
1488 nir_variable *uniforms[5];
1489 nir_variable *ubo[5];
1490 nir_variable *ssbo[5];
1492 uint32_t first_ssbo;
1495 static struct bo_vars
1496 get_bo_vars(struct zink_shader *zs, nir_shader *shader)
1499 memset(&bo, 0, sizeof(bo));
1501 bo.first_ubo = ffs(zs->ubos_used & ~BITFIELD_BIT(0)) - 2;
1502 assert(bo.first_ssbo < PIPE_MAX_CONSTANT_BUFFERS);
1504 bo.first_ssbo = ffs(zs->ssbos_used) - 1;
1505 assert(bo.first_ssbo < PIPE_MAX_SHADER_BUFFERS);
1506 nir_foreach_variable_with_modes(var, shader, nir_var_mem_ssbo | nir_var_mem_ubo) {
1507 unsigned idx = glsl_get_explicit_stride(glsl_get_struct_field(glsl_without_array(var->type), 0)) >> 1;
1508 if (var->data.mode == nir_var_mem_ssbo) {
1509 assert(!bo.ssbo[idx]);
1512 if (var->data.driver_location) {
1513 assert(!bo.ubo[idx]);
1516 assert(!bo.uniforms[idx]);
1517 bo.uniforms[idx] = var;
1525 bound_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
1527 struct bo_vars *bo = data;
1528 if (instr->type != nir_instr_type_intrinsic)
1530 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1531 nir_variable *var = NULL;
1532 nir_ssa_def *offset = NULL;
1533 bool is_load = true;
1534 b->cursor = nir_before_instr(instr);
1536 switch (intr->intrinsic) {
1537 case nir_intrinsic_store_ssbo:
1538 var = bo->ssbo[nir_dest_bit_size(intr->dest) >> 4];
1539 offset = intr->src[2].ssa;
1542 case nir_intrinsic_load_ssbo:
1543 var = bo->ssbo[nir_dest_bit_size(intr->dest) >> 4];
1544 offset = intr->src[1].ssa;
1546 case nir_intrinsic_load_ubo:
1547 if (nir_src_is_const(intr->src[0]) && nir_src_as_const_value(intr->src[0])->u32 == 0)
1548 var = bo->uniforms[nir_dest_bit_size(intr->dest) >> 4];
1550 var = bo->ubo[nir_dest_bit_size(intr->dest) >> 4];
1551 offset = intr->src[1].ssa;
1556 nir_src offset_src = nir_src_for_ssa(offset);
1557 if (!nir_src_is_const(offset_src))
1560 unsigned offset_bytes = nir_src_as_const_value(offset_src)->u32;
1561 const struct glsl_type *strct_type = glsl_get_array_element(var->type);
1562 unsigned size = glsl_array_size(glsl_get_struct_field(strct_type, 0));
1563 bool has_unsized = glsl_array_size(glsl_get_struct_field(strct_type, glsl_get_length(strct_type) - 1)) == 0;
1564 if (has_unsized || offset_bytes + intr->num_components - 1 < size)
1567 unsigned rewrites = 0;
1568 nir_ssa_def *result[2];
1569 for (unsigned i = 0; i < intr->num_components; i++) {
1570 if (offset_bytes + i >= size) {
1573 result[i] = nir_imm_zero(b, 1, nir_dest_bit_size(intr->dest));
1576 assert(rewrites == intr->num_components);
1578 nir_ssa_def *load = nir_vec(b, result, intr->num_components);
1579 nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
1581 nir_instr_remove(instr);
1586 bound_bo_access(nir_shader *shader, struct zink_shader *zs)
1588 struct bo_vars bo = get_bo_vars(zs, shader);
1589 return nir_shader_instructions_pass(shader, bound_bo_access_instr, nir_metadata_dominance, &bo);
1593 optimize_nir(struct nir_shader *s, struct zink_shader *zs)
1598 if (s->options->lower_int64_options)
1599 NIR_PASS_V(s, nir_lower_int64);
1600 if (s->options->lower_doubles_options & nir_lower_fp64_full_software)
1601 NIR_PASS_V(s, lower_64bit_pack);
1602 NIR_PASS_V(s, nir_lower_vars_to_ssa);
1603 NIR_PASS(progress, s, nir_lower_alu_to_scalar, filter_pack_instr, NULL);
1604 NIR_PASS(progress, s, nir_opt_copy_prop_vars);
1605 NIR_PASS(progress, s, nir_copy_prop);
1606 NIR_PASS(progress, s, nir_opt_remove_phis);
1607 if (s->options->lower_int64_options) {
1608 NIR_PASS(progress, s, nir_lower_64bit_phis);
1609 NIR_PASS(progress, s, nir_lower_alu_to_scalar, filter_64_bit_instr, NULL);
1611 NIR_PASS(progress, s, nir_opt_dce);
1612 NIR_PASS(progress, s, nir_opt_dead_cf);
1613 NIR_PASS(progress, s, nir_lower_phis_to_scalar, false);
1614 NIR_PASS(progress, s, nir_opt_cse);
1615 NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
1616 NIR_PASS(progress, s, nir_opt_algebraic);
1617 NIR_PASS(progress, s, nir_opt_constant_folding);
1618 NIR_PASS(progress, s, nir_opt_undef);
1619 NIR_PASS(progress, s, zink_nir_lower_b2b);
1621 NIR_PASS(progress, s, bound_bo_access, zs);
1626 NIR_PASS(progress, s, nir_opt_algebraic_late);
1628 NIR_PASS_V(s, nir_copy_prop);
1629 NIR_PASS_V(s, nir_opt_dce);
1630 NIR_PASS_V(s, nir_opt_cse);
1635 /* - copy the lowered fbfetch variable
1636 * - set the new one up as an input attachment for descriptor 0.6
1637 * - load it as an image
1638 * - overwrite the previous load
1641 lower_fbfetch_instr(nir_builder *b, nir_instr *instr, void *data)
1643 bool ms = data != NULL;
1644 if (instr->type != nir_instr_type_intrinsic)
1646 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1647 if (intr->intrinsic != nir_intrinsic_load_deref)
1649 nir_variable *var = nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));
1650 if (!var->data.fb_fetch_output)
1652 b->cursor = nir_after_instr(instr);
1653 nir_variable *fbfetch = nir_variable_clone(var, b->shader);
1654 /* If Dim is SubpassData, ... Image Format must be Unknown
1655 * - SPIRV OpTypeImage specification
1657 fbfetch->data.image.format = 0;
1658 fbfetch->data.index = 0; /* fix this if more than 1 fbfetch target is supported */
1659 fbfetch->data.mode = nir_var_uniform;
1660 fbfetch->data.binding = ZINK_FBFETCH_BINDING;
1661 fbfetch->data.binding = ZINK_FBFETCH_BINDING;
1662 fbfetch->data.sample = ms;
1663 enum glsl_sampler_dim dim = ms ? GLSL_SAMPLER_DIM_SUBPASS_MS : GLSL_SAMPLER_DIM_SUBPASS;
1664 fbfetch->type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
1665 nir_shader_add_variable(b->shader, fbfetch);
1666 nir_ssa_def *deref = &nir_build_deref_var(b, fbfetch)->dest.ssa;
1667 nir_ssa_def *sample = ms ? nir_load_sample_id(b) : nir_ssa_undef(b, 1, 32);
1668 nir_ssa_def *load = nir_image_deref_load(b, 4, 32, deref, nir_imm_vec4(b, 0, 0, 0, 1), sample, nir_imm_int(b, 0));
1669 nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
1674 lower_fbfetch(nir_shader *shader, nir_variable **fbfetch, bool ms)
1676 nir_foreach_shader_out_variable(var, shader) {
1677 if (var->data.fb_fetch_output) {
1685 return nir_shader_instructions_pass(shader, lower_fbfetch_instr, nir_metadata_dominance, (void*)ms);
1689 * Add a check for out of bounds LOD for every texel fetch op
1691 * - if (lod < query_levels(tex))
1694 * - res = (0, 0, 0, 1)
1697 lower_txf_lod_robustness_instr(nir_builder *b, nir_instr *in, void *data)
1699 if (in->type != nir_instr_type_tex)
1701 nir_tex_instr *txf = nir_instr_as_tex(in);
1702 if (txf->op != nir_texop_txf)
1705 b->cursor = nir_before_instr(in);
1706 int lod_idx = nir_tex_instr_src_index(txf, nir_tex_src_lod);
1707 assert(lod_idx >= 0);
1708 nir_src lod_src = txf->src[lod_idx].src;
1709 if (nir_src_is_const(lod_src) && nir_src_as_const_value(lod_src)->u32 == 0)
1712 assert(lod_src.is_ssa);
1713 nir_ssa_def *lod = lod_src.ssa;
1715 int offset_idx = nir_tex_instr_src_index(txf, nir_tex_src_texture_offset);
1716 int handle_idx = nir_tex_instr_src_index(txf, nir_tex_src_texture_handle);
1717 nir_tex_instr *levels = nir_tex_instr_create(b->shader,
1718 !!(offset_idx >= 0) + !!(handle_idx >= 0));
1719 levels->op = nir_texop_query_levels;
1720 levels->texture_index = txf->texture_index;
1721 levels->dest_type = nir_type_int | lod->bit_size;
1722 if (offset_idx >= 0) {
1723 levels->src[0].src_type = nir_tex_src_texture_offset;
1724 nir_src_copy(&levels->src[0].src, &txf->src[offset_idx].src, &levels->instr);
1726 if (handle_idx >= 0) {
1727 levels->src[!!(offset_idx >= 0)].src_type = nir_tex_src_texture_handle;
1728 nir_src_copy(&levels->src[!!(offset_idx >= 0)].src, &txf->src[handle_idx].src, &levels->instr);
1730 nir_ssa_dest_init(&levels->instr, &levels->dest,
1731 nir_tex_instr_dest_size(levels), 32, NULL);
1732 nir_builder_instr_insert(b, &levels->instr);
1734 nir_if *lod_oob_if = nir_push_if(b, nir_ilt(b, lod, &levels->dest.ssa));
1735 nir_tex_instr *new_txf = nir_instr_as_tex(nir_instr_clone(b->shader, in));
1736 nir_builder_instr_insert(b, &new_txf->instr);
1738 nir_if *lod_oob_else = nir_push_else(b, lod_oob_if);
1739 nir_const_value oob_values[4] = {0};
1740 unsigned bit_size = nir_alu_type_get_type_size(txf->dest_type);
1741 oob_values[3] = (txf->dest_type & nir_type_float) ?
1742 nir_const_value_for_float(1.0, bit_size) : nir_const_value_for_uint(1, bit_size);
1743 nir_ssa_def *oob_val = nir_build_imm(b, nir_tex_instr_dest_size(txf), bit_size, oob_values);
1745 nir_pop_if(b, lod_oob_else);
1746 nir_ssa_def *robust_txf = nir_if_phi(b, &new_txf->dest.ssa, oob_val);
1748 nir_ssa_def_rewrite_uses(&txf->dest.ssa, robust_txf);
1749 nir_instr_remove_v(in);
1753 /* This pass is used to workaround the lack of out of bounds LOD robustness
1754 * for texel fetch ops in VK_EXT_image_robustness.
1757 lower_txf_lod_robustness(nir_shader *shader)
1759 return nir_shader_instructions_pass(shader, lower_txf_lod_robustness_instr, nir_metadata_none, NULL);
1762 /* check for a genuine gl_PointSize output vs one from nir_lower_point_size_mov */
1764 check_psiz(struct nir_shader *s)
1766 bool have_psiz = false;
1767 nir_foreach_shader_out_variable(var, s) {
1768 if (var->data.location == VARYING_SLOT_PSIZ) {
1769 /* genuine PSIZ outputs will have this set */
1770 have_psiz |= !!var->data.explicit_location;
1776 static nir_variable *
1777 find_var_with_location_frac(nir_shader *nir, unsigned location, unsigned location_frac, bool have_psiz)
1779 assert((int)location >= 0);
1782 if (!location_frac && location != VARYING_SLOT_PSIZ) {
1783 nir_foreach_shader_out_variable(var, nir) {
1784 if (var->data.location == location)
1789 /* multiple variables found for this location: find the biggest one */
1790 nir_variable *out = NULL;
1792 nir_foreach_shader_out_variable(var, nir) {
1793 if (var->data.location == location) {
1794 unsigned count_slots = glsl_count_vec4_slots(var->type, false, false);
1795 if (count_slots > slots) {
1796 slots = count_slots;
1803 /* only one variable found or this is location_frac */
1804 nir_foreach_shader_out_variable(var, nir) {
1805 if (var->data.location == location &&
1806 (var->data.location_frac == location_frac ||
1807 (glsl_type_is_array(var->type) ? glsl_array_size(var->type) : glsl_get_vector_elements(var->type)) >= location_frac + 1)) {
1808 if (location != VARYING_SLOT_PSIZ || !have_psiz || var->data.explicit_location)
1817 is_inlined(const bool *inlined, const struct pipe_stream_output *output)
1819 for (unsigned i = 0; i < output->num_components; i++)
1820 if (!inlined[output->start_component + i])
1826 update_psiz_location(nir_shader *nir, nir_variable *psiz)
1828 uint32_t last_output = util_last_bit64(nir->info.outputs_written);
1829 if (last_output < VARYING_SLOT_VAR0)
1830 last_output = VARYING_SLOT_VAR0;
1833 /* this should get fixed up by slot remapping */
1834 psiz->data.location = last_output;
1837 static const struct glsl_type *
1838 clamp_slot_type(const struct glsl_type *type, unsigned slot)
1840 /* could be dvec/dmat/mat: each member is the same */
1841 const struct glsl_type *plain = glsl_without_array_or_matrix(type);
1842 /* determine size of each member type */
1843 unsigned slot_count = glsl_count_vec4_slots(plain, false, false);
1844 /* normalize slot idx to current type's size */
1846 unsigned slot_components = glsl_get_components(plain);
1847 if (glsl_base_type_is_64bit(glsl_get_base_type(plain)))
1848 slot_components *= 2;
1849 /* create a vec4 mask of the selected slot's components out of all the components */
1850 uint32_t mask = BITFIELD_MASK(slot_components) & BITFIELD_RANGE(slot * 4, 4);
1851 /* return a vecN of the selected components */
1852 slot_components = util_bitcount(mask);
1853 return glsl_vec_type(slot_components);
1856 static const struct glsl_type *
1857 unroll_struct_type(const struct glsl_type *slot_type, unsigned *slot_idx)
1859 const struct glsl_type *type = slot_type;
1860 unsigned slot_count = 0;
1861 unsigned cur_slot = 0;
1862 /* iterate over all the members in the struct, stopping once the slot idx is reached */
1863 for (unsigned i = 0; i < glsl_get_length(slot_type) && cur_slot <= *slot_idx; i++, cur_slot += slot_count) {
1864 /* use array type for slot counting but return array member type for unroll */
1865 const struct glsl_type *arraytype = glsl_get_struct_field(slot_type, i);
1866 type = glsl_without_array(arraytype);
1867 slot_count = glsl_count_vec4_slots(arraytype, false, false);
1869 *slot_idx -= (cur_slot - slot_count);
1870 if (!glsl_type_is_struct_or_ifc(type))
1871 /* this is a fully unrolled struct: find the number of vec components to output */
1872 type = clamp_slot_type(type, *slot_idx);
1877 get_slot_components(nir_variable *var, unsigned slot, unsigned so_slot)
1879 assert(var && slot < var->data.location + glsl_count_vec4_slots(var->type, false, false));
1880 const struct glsl_type *orig_type = var->type;
1881 const struct glsl_type *type = glsl_without_array(var->type);
1882 unsigned slot_idx = slot - so_slot;
1883 if (type != orig_type)
1884 slot_idx %= glsl_count_vec4_slots(type, false, false);
1885 /* need to find the vec4 that's being exported by this slot */
1886 while (glsl_type_is_struct_or_ifc(type))
1887 type = unroll_struct_type(type, &slot_idx);
1889 /* arrays here are already fully unrolled from their structs, so slot handling is implicit */
1890 unsigned num_components = glsl_get_components(glsl_without_array(type));
1891 /* special handling: clip/cull distance are arrays with vector semantics */
1892 if (var->data.location == VARYING_SLOT_CLIP_DIST0 || var->data.location == VARYING_SLOT_CULL_DIST0) {
1893 num_components = glsl_array_size(type);
1895 /* this is the second vec4 */
1896 num_components %= 4;
1898 /* this is the first vec4 */
1899 num_components = MIN2(num_components, 4);
1901 assert(num_components);
1902 /* gallium handles xfb in terms of 32bit units */
1903 if (glsl_base_type_is_64bit(glsl_get_base_type(glsl_without_array(type))))
1904 num_components *= 2;
1905 return num_components;
1908 static const struct pipe_stream_output *
1909 find_packed_output(const struct pipe_stream_output_info *so_info, uint8_t *reverse_map, unsigned slot)
1911 for (unsigned i = 0; i < so_info->num_outputs; i++) {
1912 const struct pipe_stream_output *packed_output = &so_info->output[i];
1913 if (reverse_map[packed_output->register_index] == slot)
1914 return packed_output;
1920 update_so_info(struct zink_shader *zs, nir_shader *nir, const struct pipe_stream_output_info *so_info,
1921 uint64_t outputs_written, bool have_psiz)
1923 uint8_t reverse_map[VARYING_SLOT_MAX] = {0};
1925 /* semi-copied from iris */
1926 while (outputs_written) {
1927 int bit = u_bit_scan64(&outputs_written);
1928 /* PSIZ from nir_lower_point_size_mov breaks stream output, so always skip it */
1929 if (bit == VARYING_SLOT_PSIZ && !have_psiz)
1931 reverse_map[slot++] = bit;
1934 bool have_fake_psiz = false;
1935 nir_foreach_shader_out_variable(var, nir) {
1936 if (var->data.location == VARYING_SLOT_PSIZ && !var->data.explicit_location)
1937 have_fake_psiz = true;
1940 bool inlined[VARYING_SLOT_MAX][4] = {0};
1941 uint64_t packed = 0;
1942 uint8_t packed_components[VARYING_SLOT_MAX] = {0};
1943 uint8_t packed_streams[VARYING_SLOT_MAX] = {0};
1944 uint8_t packed_buffers[VARYING_SLOT_MAX] = {0};
1945 uint16_t packed_offsets[VARYING_SLOT_MAX][4] = {0};
1946 nir_variable *psiz = NULL;
1947 for (unsigned i = 0; i < so_info->num_outputs; i++) {
1948 const struct pipe_stream_output *output = &so_info->output[i];
1949 unsigned slot = reverse_map[output->register_index];
1950 /* always set stride to be used during draw */
1951 zs->sinfo.so_info.stride[output->output_buffer] = so_info->stride[output->output_buffer];
1952 if (zs->info.stage != MESA_SHADER_GEOMETRY || util_bitcount(zs->info.gs.active_stream_mask) == 1) {
1953 nir_variable *var = NULL;
1956 var = find_var_with_location_frac(nir, slot--, output->start_component, have_psiz);
1957 if (var->data.location == VARYING_SLOT_PSIZ)
1960 slot = reverse_map[output->register_index];
1961 if (var->data.explicit_xfb_buffer) {
1962 /* handle dvec3 where gallium splits streamout over 2 registers */
1963 for (unsigned j = 0; j < output->num_components; j++)
1964 inlined[slot][output->start_component + j] = true;
1966 if (is_inlined(inlined[slot], output))
1968 bool is_struct = glsl_type_is_struct_or_ifc(glsl_without_array(var->type));
1969 unsigned num_components = get_slot_components(var, slot, so_slot);
1970 /* if this is the entire variable, try to blast it out during the initial declaration
1971 * structs must be handled later to ensure accurate analysis
1973 if (!is_struct && (num_components == output->num_components || (num_components > output->num_components && output->num_components == 4))) {
1974 var->data.explicit_xfb_buffer = 1;
1975 var->data.xfb.buffer = output->output_buffer;
1976 var->data.xfb.stride = so_info->stride[output->output_buffer] * 4;
1977 var->data.offset = output->dst_offset * 4;
1978 var->data.stream = output->stream;
1979 for (unsigned j = 0; j < output->num_components; j++)
1980 inlined[slot][output->start_component + j] = true;
1982 /* otherwise store some metadata for later */
1983 packed |= BITFIELD64_BIT(slot);
1984 packed_components[slot] += output->num_components;
1985 packed_streams[slot] |= BITFIELD_BIT(output->stream);
1986 packed_buffers[slot] |= BITFIELD_BIT(output->output_buffer);
1987 for (unsigned j = 0; j < output->num_components; j++)
1988 packed_offsets[output->register_index][j + output->start_component] = output->dst_offset + j;
1993 /* if this was flagged as a packed output before, and if all the components are
1994 * being output with the same stream on the same buffer with increasing offsets, this entire variable
1995 * can be consolidated into a single output to conserve locations
1997 for (unsigned i = 0; i < so_info->num_outputs; i++) {
1998 const struct pipe_stream_output *output = &so_info->output[i];
1999 unsigned slot = reverse_map[output->register_index];
2000 if (is_inlined(inlined[slot], output))
2002 if (zs->info.stage != MESA_SHADER_GEOMETRY || util_bitcount(zs->info.gs.active_stream_mask) == 1) {
2003 nir_variable *var = NULL;
2005 var = find_var_with_location_frac(nir, slot--, output->start_component, have_psiz);
2006 /* this is a lowered 64bit variable that can't be exported due to packing */
2007 if (var->data.is_xfb)
2010 unsigned num_slots = glsl_count_vec4_slots(var->type, false, false);
2011 /* for each variable, iterate over all the variable's slots and inline the outputs */
2012 for (unsigned j = 0; j < num_slots; j++) {
2013 slot = var->data.location + j;
2014 const struct pipe_stream_output *packed_output = find_packed_output(so_info, reverse_map, slot);
2018 /* if this slot wasn't packed or isn't in the same stream/buffer, skip consolidation */
2019 if (!(packed & BITFIELD64_BIT(slot)) ||
2020 util_bitcount(packed_streams[slot]) != 1 ||
2021 util_bitcount(packed_buffers[slot]) != 1)
2024 /* if all the components the variable exports to this slot aren't captured, skip consolidation */
2025 unsigned num_components = get_slot_components(var, slot, var->data.location);
2026 if (num_components != packed_components[slot])
2029 /* in order to pack the xfb output, all the offsets must be sequentially incrementing */
2030 uint32_t prev_offset = packed_offsets[packed_output->register_index][0];
2031 for (unsigned k = 1; k < num_components; k++) {
2032 /* if the offsets are not incrementing as expected, skip consolidation */
2033 if (packed_offsets[packed_output->register_index][k] != prev_offset + 1)
2035 prev_offset = packed_offsets[packed_output->register_index][k + packed_output->start_component];
2038 /* this output can be consolidated: blast out all the data inlined */
2039 var->data.explicit_xfb_buffer = 1;
2040 var->data.xfb.buffer = output->output_buffer;
2041 var->data.xfb.stride = so_info->stride[output->output_buffer] * 4;
2042 var->data.offset = output->dst_offset * 4;
2043 var->data.stream = output->stream;
2044 /* GLSL specifies that interface blocks are split per-buffer in XFB */
2045 if (glsl_type_is_array(var->type) && glsl_array_size(var->type) > 1 && glsl_type_is_interface(glsl_without_array(var->type)))
2046 zs->sinfo.so_propagate |= BITFIELD_BIT(var->data.location - VARYING_SLOT_VAR0);
2047 /* mark all slot components inlined to skip subsequent loop iterations */
2048 for (unsigned j = 0; j < num_slots; j++) {
2049 slot = var->data.location + j;
2050 for (unsigned k = 0; k < packed_components[slot]; k++)
2051 inlined[slot][k] = true;
2052 packed &= ~BITFIELD64_BIT(slot);
2057 /* these are packed/explicit varyings which can't be exported with normal output */
2058 zs->sinfo.so_info.output[zs->sinfo.so_info.num_outputs] = *output;
2059 /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */
2060 zs->sinfo.so_info_slots[zs->sinfo.so_info.num_outputs++] = reverse_map[output->register_index];
2062 zs->sinfo.have_xfb = zs->sinfo.so_info.num_outputs || zs->sinfo.so_propagate;
2063 /* ensure this doesn't get output in the shader by unsetting location */
2064 if (have_fake_psiz && psiz)
2065 update_psiz_location(nir, psiz);
2068 struct decompose_state {
2069 nir_variable **split;
2074 lower_attrib(nir_builder *b, nir_instr *instr, void *data)
2076 struct decompose_state *state = data;
2077 nir_variable **split = state->split;
2078 if (instr->type != nir_instr_type_intrinsic)
2080 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2081 if (intr->intrinsic != nir_intrinsic_load_deref)
2083 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
2084 nir_variable *var = nir_deref_instr_get_variable(deref);
2085 if (var != split[0])
2087 unsigned num_components = glsl_get_vector_elements(split[0]->type);
2088 b->cursor = nir_after_instr(instr);
2089 nir_ssa_def *loads[4];
2090 for (unsigned i = 0; i < (state->needs_w ? num_components - 1 : num_components); i++)
2091 loads[i] = nir_load_deref(b, nir_build_deref_var(b, split[i+1]));
2092 if (state->needs_w) {
2093 /* oob load w comopnent to get correct value for int/float */
2094 loads[3] = nir_channel(b, loads[0], 3);
2095 loads[0] = nir_channel(b, loads[0], 0);
2097 nir_ssa_def *new_load = nir_vec(b, loads, num_components);
2098 nir_ssa_def_rewrite_uses(&intr->dest.ssa, new_load);
2099 nir_instr_remove_v(instr);
2104 decompose_attribs(nir_shader *nir, uint32_t decomposed_attrs, uint32_t decomposed_attrs_without_w)
2107 nir_foreach_variable_with_modes(var, nir, nir_var_shader_in)
2108 bits |= BITFIELD_BIT(var->data.driver_location);
2110 u_foreach_bit(location, decomposed_attrs | decomposed_attrs_without_w) {
2111 nir_variable *split[5];
2112 struct decompose_state state;
2113 state.split = split;
2114 nir_variable *var = nir_find_variable_with_driver_location(nir, nir_var_shader_in, location);
2117 bits |= BITFIELD_BIT(var->data.driver_location);
2118 const struct glsl_type *new_type = glsl_type_is_scalar(var->type) ? var->type : glsl_get_array_element(var->type);
2119 unsigned num_components = glsl_get_vector_elements(var->type);
2120 state.needs_w = (decomposed_attrs_without_w & BITFIELD_BIT(location)) != 0 && num_components == 4;
2121 for (unsigned i = 0; i < (state.needs_w ? num_components - 1 : num_components); i++) {
2122 split[i+1] = nir_variable_clone(var, nir);
2123 split[i+1]->name = ralloc_asprintf(nir, "%s_split%u", var->name, i);
2124 if (decomposed_attrs_without_w & BITFIELD_BIT(location))
2125 split[i+1]->type = !i && num_components == 4 ? var->type : new_type;
2127 split[i+1]->type = new_type;
2128 split[i+1]->data.driver_location = ffs(bits) - 1;
2129 bits &= ~BITFIELD_BIT(split[i+1]->data.driver_location);
2130 nir_shader_add_variable(nir, split[i+1]);
2132 var->data.mode = nir_var_shader_temp;
2133 nir_shader_instructions_pass(nir, lower_attrib, nir_metadata_dominance, &state);
2135 nir_fixup_deref_modes(nir);
2136 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2137 optimize_nir(nir, NULL);
2142 rewrite_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
2144 struct zink_screen *screen = data;
2145 const bool has_int64 = screen->info.feats.features.shaderInt64;
2146 if (instr->type != nir_instr_type_intrinsic)
2148 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2149 b->cursor = nir_before_instr(instr);
2150 switch (intr->intrinsic) {
2151 case nir_intrinsic_ssbo_atomic_fadd:
2152 case nir_intrinsic_ssbo_atomic_add:
2153 case nir_intrinsic_ssbo_atomic_umin:
2154 case nir_intrinsic_ssbo_atomic_imin:
2155 case nir_intrinsic_ssbo_atomic_umax:
2156 case nir_intrinsic_ssbo_atomic_imax:
2157 case nir_intrinsic_ssbo_atomic_and:
2158 case nir_intrinsic_ssbo_atomic_or:
2159 case nir_intrinsic_ssbo_atomic_xor:
2160 case nir_intrinsic_ssbo_atomic_exchange:
2161 case nir_intrinsic_ssbo_atomic_comp_swap: {
2162 /* convert offset to uintN_t[idx] */
2163 nir_ssa_def *offset = nir_udiv_imm(b, intr->src[1].ssa, nir_dest_bit_size(intr->dest) / 8);
2164 nir_instr_rewrite_src_ssa(instr, &intr->src[1], offset);
2167 case nir_intrinsic_load_ssbo:
2168 case nir_intrinsic_load_ubo: {
2169 /* ubo0 can have unaligned 64bit loads, particularly for bindless texture ids */
2170 bool force_2x32 = intr->intrinsic == nir_intrinsic_load_ubo &&
2171 nir_src_is_const(intr->src[0]) &&
2172 nir_src_as_uint(intr->src[0]) == 0 &&
2173 nir_dest_bit_size(intr->dest) == 64 &&
2174 nir_intrinsic_align_offset(intr) % 8 != 0;
2175 force_2x32 |= nir_dest_bit_size(intr->dest) == 64 && !has_int64;
2176 nir_ssa_def *offset = nir_udiv_imm(b, intr->src[1].ssa, (force_2x32 ? 32 : nir_dest_bit_size(intr->dest)) / 8);
2177 nir_instr_rewrite_src_ssa(instr, &intr->src[1], offset);
2178 /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
2180 /* this is always scalarized */
2181 assert(intr->dest.ssa.num_components == 1);
2182 /* rewrite as 2x32 */
2183 nir_ssa_def *load[2];
2184 for (unsigned i = 0; i < 2; i++) {
2185 if (intr->intrinsic == nir_intrinsic_load_ssbo)
2186 load[i] = nir_load_ssbo(b, 1, 32, intr->src[0].ssa, nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0);
2188 load[i] = nir_load_ubo(b, 1, 32, intr->src[0].ssa, nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0, .range = 4);
2189 nir_intrinsic_set_access(nir_instr_as_intrinsic(load[i]->parent_instr), nir_intrinsic_access(intr));
2191 /* cast back to 64bit */
2192 nir_ssa_def *casted = nir_pack_64_2x32_split(b, load[0], load[1]);
2193 nir_ssa_def_rewrite_uses(&intr->dest.ssa, casted);
2194 nir_instr_remove(instr);
2198 case nir_intrinsic_load_shared:
2199 b->cursor = nir_before_instr(instr);
2200 bool force_2x32 = nir_dest_bit_size(intr->dest) == 64 && !has_int64;
2201 nir_ssa_def *offset = nir_udiv_imm(b, intr->src[0].ssa, (force_2x32 ? 32 : nir_dest_bit_size(intr->dest)) / 8);
2202 nir_instr_rewrite_src_ssa(instr, &intr->src[0], offset);
2203 /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
2205 /* this is always scalarized */
2206 assert(intr->dest.ssa.num_components == 1);
2207 /* rewrite as 2x32 */
2208 nir_ssa_def *load[2];
2209 for (unsigned i = 0; i < 2; i++)
2210 load[i] = nir_load_shared(b, 1, 32, nir_iadd_imm(b, intr->src[0].ssa, i), .align_mul = 4, .align_offset = 0);
2211 /* cast back to 64bit */
2212 nir_ssa_def *casted = nir_pack_64_2x32_split(b, load[0], load[1]);
2213 nir_ssa_def_rewrite_uses(&intr->dest.ssa, casted);
2214 nir_instr_remove(instr);
2218 case nir_intrinsic_store_ssbo: {
2219 b->cursor = nir_before_instr(instr);
2220 bool force_2x32 = nir_src_bit_size(intr->src[0]) == 64 && !has_int64;
2221 nir_ssa_def *offset = nir_udiv_imm(b, intr->src[2].ssa, (force_2x32 ? 32 : nir_src_bit_size(intr->src[0])) / 8);
2222 nir_instr_rewrite_src_ssa(instr, &intr->src[2], offset);
2223 /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
2225 /* this is always scalarized */
2226 assert(intr->src[0].ssa->num_components == 1);
2227 nir_ssa_def *vals[2] = {nir_unpack_64_2x32_split_x(b, intr->src[0].ssa), nir_unpack_64_2x32_split_y(b, intr->src[0].ssa)};
2228 for (unsigned i = 0; i < 2; i++)
2229 nir_store_ssbo(b, vals[i], intr->src[1].ssa, nir_iadd_imm(b, intr->src[2].ssa, i), .align_mul = 4, .align_offset = 0);
2230 nir_instr_remove(instr);
2234 case nir_intrinsic_store_shared: {
2235 b->cursor = nir_before_instr(instr);
2236 bool force_2x32 = nir_src_bit_size(intr->src[0]) == 64 && !has_int64;
2237 nir_ssa_def *offset = nir_udiv_imm(b, intr->src[1].ssa, (force_2x32 ? 32 : nir_src_bit_size(intr->src[0])) / 8);
2238 nir_instr_rewrite_src_ssa(instr, &intr->src[1], offset);
2239 /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
2240 if (nir_src_bit_size(intr->src[0]) == 64 && !has_int64) {
2241 /* this is always scalarized */
2242 assert(intr->src[0].ssa->num_components == 1);
2243 nir_ssa_def *vals[2] = {nir_unpack_64_2x32_split_x(b, intr->src[0].ssa), nir_unpack_64_2x32_split_y(b, intr->src[0].ssa)};
2244 for (unsigned i = 0; i < 2; i++)
2245 nir_store_shared(b, vals[i], nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0);
2246 nir_instr_remove(instr);
2257 rewrite_bo_access(nir_shader *shader, struct zink_screen *screen)
2259 return nir_shader_instructions_pass(shader, rewrite_bo_access_instr, nir_metadata_dominance, screen);
2262 static nir_variable *
2263 get_bo_var(nir_shader *shader, struct bo_vars *bo, bool ssbo, nir_src *src, unsigned bit_size)
2265 nir_variable *var, **ptr;
2266 unsigned idx = ssbo || (nir_src_is_const(*src) && !nir_src_as_uint(*src)) ? 0 : 1;
2269 ptr = &bo->ssbo[bit_size >> 4];
2272 ptr = &bo->uniforms[bit_size >> 4];
2274 ptr = &bo->ubo[bit_size >> 4];
2279 var = bo->ssbo[32 >> 4];
2282 var = bo->uniforms[32 >> 4];
2284 var = bo->ubo[32 >> 4];
2286 var = nir_variable_clone(var, shader);
2288 var->name = ralloc_asprintf(shader, "%s@%u", "ssbos", bit_size);
2290 var->name = ralloc_asprintf(shader, "%s@%u", idx ? "ubos" : "uniform_0", bit_size);
2292 nir_shader_add_variable(shader, var);
2294 struct glsl_struct_field *fields = rzalloc_array(shader, struct glsl_struct_field, 2);
2295 fields[0].name = ralloc_strdup(shader, "base");
2296 fields[1].name = ralloc_strdup(shader, "unsized");
2297 unsigned array_size = glsl_get_length(var->type);
2298 const struct glsl_type *bare_type = glsl_without_array(var->type);
2299 const struct glsl_type *array_type = glsl_get_struct_field(bare_type, 0);
2300 unsigned length = glsl_get_length(array_type);
2301 const struct glsl_type *type;
2302 const struct glsl_type *unsized = glsl_array_type(glsl_uintN_t_type(bit_size), 0, bit_size / 8);
2303 if (bit_size > 32) {
2304 assert(bit_size == 64);
2305 type = glsl_array_type(glsl_uintN_t_type(bit_size), length / 2, bit_size / 8);
2307 type = glsl_array_type(glsl_uintN_t_type(bit_size), length * (32 / bit_size), bit_size / 8);
2309 fields[0].type = type;
2310 fields[1].type = unsized;
2311 var->type = glsl_array_type(glsl_struct_type(fields, glsl_get_length(bare_type), "struct", false), array_size, 0);
2312 var->data.driver_location = idx;
2318 rewrite_atomic_ssbo_instr(nir_builder *b, nir_instr *instr, struct bo_vars *bo)
2320 nir_intrinsic_op op;
2321 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2322 switch (intr->intrinsic) {
2323 case nir_intrinsic_ssbo_atomic_fadd:
2324 op = nir_intrinsic_deref_atomic_fadd;
2326 case nir_intrinsic_ssbo_atomic_fmin:
2327 op = nir_intrinsic_deref_atomic_fmin;
2329 case nir_intrinsic_ssbo_atomic_fmax:
2330 op = nir_intrinsic_deref_atomic_fmax;
2332 case nir_intrinsic_ssbo_atomic_fcomp_swap:
2333 op = nir_intrinsic_deref_atomic_fcomp_swap;
2335 case nir_intrinsic_ssbo_atomic_add:
2336 op = nir_intrinsic_deref_atomic_add;
2338 case nir_intrinsic_ssbo_atomic_umin:
2339 op = nir_intrinsic_deref_atomic_umin;
2341 case nir_intrinsic_ssbo_atomic_imin:
2342 op = nir_intrinsic_deref_atomic_imin;
2344 case nir_intrinsic_ssbo_atomic_umax:
2345 op = nir_intrinsic_deref_atomic_umax;
2347 case nir_intrinsic_ssbo_atomic_imax:
2348 op = nir_intrinsic_deref_atomic_imax;
2350 case nir_intrinsic_ssbo_atomic_and:
2351 op = nir_intrinsic_deref_atomic_and;
2353 case nir_intrinsic_ssbo_atomic_or:
2354 op = nir_intrinsic_deref_atomic_or;
2356 case nir_intrinsic_ssbo_atomic_xor:
2357 op = nir_intrinsic_deref_atomic_xor;
2359 case nir_intrinsic_ssbo_atomic_exchange:
2360 op = nir_intrinsic_deref_atomic_exchange;
2362 case nir_intrinsic_ssbo_atomic_comp_swap:
2363 op = nir_intrinsic_deref_atomic_comp_swap;
2366 unreachable("unknown intrinsic");
2368 nir_ssa_def *offset = intr->src[1].ssa;
2369 nir_src *src = &intr->src[0];
2370 nir_variable *var = get_bo_var(b->shader, bo, true, src, nir_dest_bit_size(intr->dest));
2371 nir_deref_instr *deref_var = nir_build_deref_var(b, var);
2372 nir_ssa_def *idx = src->ssa;
2374 idx = nir_iadd_imm(b, idx, -bo->first_ssbo);
2375 nir_deref_instr *deref_array = nir_build_deref_array(b, deref_var, idx);
2376 nir_deref_instr *deref_struct = nir_build_deref_struct(b, deref_array, 0);
2378 /* generate new atomic deref ops for every component */
2379 nir_ssa_def *result[4];
2380 unsigned num_components = nir_dest_num_components(intr->dest);
2381 for (unsigned i = 0; i < num_components; i++) {
2382 nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct, offset);
2383 nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(b->shader, op);
2384 nir_ssa_dest_init(&new_instr->instr, &new_instr->dest, 1, nir_dest_bit_size(intr->dest), "");
2385 new_instr->src[0] = nir_src_for_ssa(&deref_arr->dest.ssa);
2386 /* deref ops have no offset src, so copy the srcs after it */
2387 for (unsigned i = 2; i < nir_intrinsic_infos[intr->intrinsic].num_srcs; i++)
2388 nir_src_copy(&new_instr->src[i - 1], &intr->src[i], &new_instr->instr);
2389 nir_builder_instr_insert(b, &new_instr->instr);
2391 result[i] = &new_instr->dest.ssa;
2392 offset = nir_iadd_imm(b, offset, 1);
2395 nir_ssa_def *load = nir_vec(b, result, num_components);
2396 nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
2397 nir_instr_remove(instr);
2401 remove_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
2403 struct bo_vars *bo = data;
2404 if (instr->type != nir_instr_type_intrinsic)
2406 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2407 nir_variable *var = NULL;
2408 nir_ssa_def *offset = NULL;
2409 bool is_load = true;
2410 b->cursor = nir_before_instr(instr);
2413 switch (intr->intrinsic) {
2414 case nir_intrinsic_ssbo_atomic_fadd:
2415 case nir_intrinsic_ssbo_atomic_fmin:
2416 case nir_intrinsic_ssbo_atomic_fmax:
2417 case nir_intrinsic_ssbo_atomic_fcomp_swap:
2418 case nir_intrinsic_ssbo_atomic_add:
2419 case nir_intrinsic_ssbo_atomic_umin:
2420 case nir_intrinsic_ssbo_atomic_imin:
2421 case nir_intrinsic_ssbo_atomic_umax:
2422 case nir_intrinsic_ssbo_atomic_imax:
2423 case nir_intrinsic_ssbo_atomic_and:
2424 case nir_intrinsic_ssbo_atomic_or:
2425 case nir_intrinsic_ssbo_atomic_xor:
2426 case nir_intrinsic_ssbo_atomic_exchange:
2427 case nir_intrinsic_ssbo_atomic_comp_swap:
2428 rewrite_atomic_ssbo_instr(b, instr, bo);
2430 case nir_intrinsic_store_ssbo:
2431 src = &intr->src[1];
2432 var = get_bo_var(b->shader, bo, true, src, nir_src_bit_size(intr->src[0]));
2433 offset = intr->src[2].ssa;
2436 case nir_intrinsic_load_ssbo:
2437 src = &intr->src[0];
2438 var = get_bo_var(b->shader, bo, true, src, nir_dest_bit_size(intr->dest));
2439 offset = intr->src[1].ssa;
2441 case nir_intrinsic_load_ubo:
2442 src = &intr->src[0];
2443 var = get_bo_var(b->shader, bo, false, src, nir_dest_bit_size(intr->dest));
2444 offset = intr->src[1].ssa;
2452 nir_deref_instr *deref_var = nir_build_deref_var(b, var);
2453 nir_ssa_def *idx = !ssbo && var->data.driver_location ? nir_iadd_imm(b, src->ssa, -1) : src->ssa;
2454 if (!ssbo && bo->first_ubo && var->data.driver_location)
2455 idx = nir_iadd_imm(b, idx, -bo->first_ubo);
2456 else if (ssbo && bo->first_ssbo)
2457 idx = nir_iadd_imm(b, idx, -bo->first_ssbo);
2458 nir_deref_instr *deref_array = nir_build_deref_array(b, deref_var, nir_i2iN(b, idx, nir_dest_bit_size(deref_var->dest)));
2459 nir_deref_instr *deref_struct = nir_build_deref_struct(b, deref_array, 0);
2460 assert(intr->num_components <= 2);
2462 nir_ssa_def *result[2];
2463 for (unsigned i = 0; i < intr->num_components; i++) {
2464 nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct, nir_i2iN(b, offset, nir_dest_bit_size(deref_struct->dest)));
2465 result[i] = nir_load_deref(b, deref_arr);
2466 if (intr->intrinsic == nir_intrinsic_load_ssbo)
2467 nir_intrinsic_set_access(nir_instr_as_intrinsic(result[i]->parent_instr), nir_intrinsic_access(intr));
2468 offset = nir_iadd_imm(b, offset, 1);
2470 nir_ssa_def *load = nir_vec(b, result, intr->num_components);
2471 nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
2473 nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct, nir_i2iN(b, offset, nir_dest_bit_size(deref_struct->dest)));
2474 nir_build_store_deref(b, &deref_arr->dest.ssa, intr->src[0].ssa, BITFIELD_MASK(intr->num_components), nir_intrinsic_access(intr));
2476 nir_instr_remove(instr);
2481 remove_bo_access(nir_shader *shader, struct zink_shader *zs)
2483 struct bo_vars bo = get_bo_vars(zs, shader);
2484 return nir_shader_instructions_pass(shader, remove_bo_access_instr, nir_metadata_dominance, &bo);
2488 find_var_deref(nir_shader *nir, nir_variable *var)
2490 nir_foreach_function(function, nir) {
2491 if (!function->impl)
2494 nir_foreach_block(block, function->impl) {
2495 nir_foreach_instr(instr, block) {
2496 if (instr->type != nir_instr_type_deref)
2498 nir_deref_instr *deref = nir_instr_as_deref(instr);
2499 if (deref->deref_type == nir_deref_type_var && deref->var == var)
2507 struct clamp_layer_output_state {
2508 nir_variable *original;
2509 nir_variable *clamped;
2513 clamp_layer_output_emit(nir_builder *b, struct clamp_layer_output_state *state)
2515 nir_ssa_def *is_layered = nir_load_push_constant(b, 1, 32,
2516 nir_imm_int(b, ZINK_GFX_PUSHCONST_FRAMEBUFFER_IS_LAYERED),
2517 .base = ZINK_GFX_PUSHCONST_FRAMEBUFFER_IS_LAYERED, .range = 4);
2518 nir_deref_instr *original_deref = nir_build_deref_var(b, state->original);
2519 nir_deref_instr *clamped_deref = nir_build_deref_var(b, state->clamped);
2520 nir_ssa_def *layer = nir_bcsel(b, nir_ieq_imm(b, is_layered, 1),
2521 nir_load_deref(b, original_deref),
2523 nir_store_deref(b, clamped_deref, layer, 0);
2527 clamp_layer_output_instr(nir_builder *b, nir_instr *instr, void *data)
2529 struct clamp_layer_output_state *state = data;
2530 switch (instr->type) {
2531 case nir_instr_type_intrinsic: {
2532 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2533 if (intr->intrinsic != nir_intrinsic_emit_vertex_with_counter &&
2534 intr->intrinsic != nir_intrinsic_emit_vertex)
2536 b->cursor = nir_before_instr(instr);
2537 clamp_layer_output_emit(b, state);
2540 default: return false;
2545 clamp_layer_output(nir_shader *vs, nir_shader *fs, unsigned *next_location)
2547 switch (vs->info.stage) {
2548 case MESA_SHADER_VERTEX:
2549 case MESA_SHADER_GEOMETRY:
2550 case MESA_SHADER_TESS_EVAL:
2553 unreachable("invalid last vertex stage!");
2555 struct clamp_layer_output_state state = {0};
2556 state.original = nir_find_variable_with_location(vs, nir_var_shader_out, VARYING_SLOT_LAYER);
2557 if (!state.original || !find_var_deref(vs, state.original))
2559 state.clamped = nir_variable_create(vs, nir_var_shader_out, glsl_int_type(), "layer_clamped");
2560 state.clamped->data.location = VARYING_SLOT_LAYER;
2561 nir_variable *fs_var = nir_find_variable_with_location(fs, nir_var_shader_in, VARYING_SLOT_LAYER);
2562 if ((state.original->data.explicit_xfb_buffer || fs_var) && *next_location < MAX_VARYING) {
2563 state.original->data.location = VARYING_SLOT_VAR0; // Anything but a built-in slot
2564 state.original->data.driver_location = (*next_location)++;
2566 fs_var->data.location = state.original->data.location;
2567 fs_var->data.driver_location = state.original->data.driver_location;
2570 if (state.original->data.explicit_xfb_buffer) {
2571 /* Will xfb the clamped output but still better than nothing */
2572 state.clamped->data.explicit_xfb_buffer = state.original->data.explicit_xfb_buffer;
2573 state.clamped->data.xfb.buffer = state.original->data.xfb.buffer;
2574 state.clamped->data.xfb.stride = state.original->data.xfb.stride;
2575 state.clamped->data.offset = state.original->data.offset;
2576 state.clamped->data.stream = state.original->data.stream;
2578 state.original->data.mode = nir_var_shader_temp;
2579 nir_fixup_deref_modes(vs);
2581 if (vs->info.stage == MESA_SHADER_GEOMETRY) {
2582 nir_shader_instructions_pass(vs, clamp_layer_output_instr, nir_metadata_dominance, &state);
2585 nir_function_impl *impl = nir_shader_get_entrypoint(vs);
2586 nir_builder_init(&b, impl);
2587 assert(impl->end_block->predecessors->entries == 1);
2588 b.cursor = nir_after_cf_list(&impl->body);
2589 clamp_layer_output_emit(&b, &state);
2590 nir_metadata_preserve(impl, nir_metadata_dominance);
2592 optimize_nir(vs, NULL);
2593 NIR_PASS_V(vs, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2598 assign_producer_var_io(gl_shader_stage stage, nir_variable *var, unsigned *reserved, unsigned char *slot_map)
2600 unsigned slot = var->data.location;
2603 case VARYING_SLOT_POS:
2604 case VARYING_SLOT_PNTC:
2605 case VARYING_SLOT_PSIZ:
2606 case VARYING_SLOT_LAYER:
2607 case VARYING_SLOT_PRIMITIVE_ID:
2608 case VARYING_SLOT_CLIP_DIST0:
2609 case VARYING_SLOT_CULL_DIST0:
2610 case VARYING_SLOT_VIEWPORT:
2611 case VARYING_SLOT_FACE:
2612 case VARYING_SLOT_TESS_LEVEL_OUTER:
2613 case VARYING_SLOT_TESS_LEVEL_INNER:
2614 /* use a sentinel value to avoid counting later */
2615 var->data.driver_location = UINT_MAX;
2619 if (var->data.patch) {
2620 assert(slot >= VARYING_SLOT_PATCH0);
2621 slot -= VARYING_SLOT_PATCH0;
2623 if (slot_map[slot] == 0xff) {
2624 assert(*reserved < MAX_VARYING);
2626 if (nir_is_arrayed_io(var, stage))
2627 num_slots = glsl_count_vec4_slots(glsl_get_array_element(var->type), false, false);
2629 num_slots = glsl_count_vec4_slots(var->type, false, false);
2630 assert(*reserved + num_slots <= MAX_VARYING);
2631 for (unsigned i = 0; i < num_slots; i++)
2632 slot_map[slot + i] = (*reserved)++;
2634 slot = slot_map[slot];
2635 assert(slot < MAX_VARYING);
2636 var->data.driver_location = slot;
2640 ALWAYS_INLINE static bool
2641 is_texcoord(gl_shader_stage stage, const nir_variable *var)
2643 if (stage != MESA_SHADER_FRAGMENT)
2645 return var->data.location >= VARYING_SLOT_TEX0 &&
2646 var->data.location <= VARYING_SLOT_TEX7;
2650 assign_consumer_var_io(gl_shader_stage stage, nir_variable *var, unsigned *reserved, unsigned char *slot_map)
2652 unsigned slot = var->data.location;
2654 case VARYING_SLOT_POS:
2655 case VARYING_SLOT_PNTC:
2656 case VARYING_SLOT_PSIZ:
2657 case VARYING_SLOT_LAYER:
2658 case VARYING_SLOT_PRIMITIVE_ID:
2659 case VARYING_SLOT_CLIP_DIST0:
2660 case VARYING_SLOT_CULL_DIST0:
2661 case VARYING_SLOT_VIEWPORT:
2662 case VARYING_SLOT_FACE:
2663 case VARYING_SLOT_TESS_LEVEL_OUTER:
2664 case VARYING_SLOT_TESS_LEVEL_INNER:
2665 /* use a sentinel value to avoid counting later */
2666 var->data.driver_location = UINT_MAX;
2669 if (var->data.patch) {
2670 assert(slot >= VARYING_SLOT_PATCH0);
2671 slot -= VARYING_SLOT_PATCH0;
2673 if (slot_map[slot] == (unsigned char)-1) {
2674 /* texcoords can't be eliminated in fs due to GL_COORD_REPLACE,
2675 * so keep for now and eliminate later
2677 if (is_texcoord(stage, var)) {
2678 var->data.driver_location = -1;
2681 if (stage != MESA_SHADER_TESS_CTRL)
2684 /* patch variables may be read in the workgroup */
2685 slot_map[slot] = (*reserved)++;
2687 var->data.driver_location = slot_map[slot];
2694 rewrite_read_as_0(nir_builder *b, nir_instr *instr, void *data)
2696 nir_variable *var = data;
2697 if (instr->type != nir_instr_type_intrinsic)
2700 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2701 if (intr->intrinsic != nir_intrinsic_load_deref)
2703 nir_variable *deref_var = nir_intrinsic_get_var(intr, 0);
2704 if (deref_var != var)
2706 b->cursor = nir_before_instr(instr);
2707 nir_ssa_def *zero = nir_imm_zero(b, nir_dest_num_components(intr->dest), nir_dest_bit_size(intr->dest));
2708 if (b->shader->info.stage == MESA_SHADER_FRAGMENT) {
2709 switch (var->data.location) {
2710 case VARYING_SLOT_COL0:
2711 case VARYING_SLOT_COL1:
2712 case VARYING_SLOT_BFC0:
2713 case VARYING_SLOT_BFC1:
2714 /* default color is 0,0,0,1 */
2715 if (nir_dest_num_components(intr->dest) == 4)
2716 zero = nir_vector_insert_imm(b, zero, nir_imm_float(b, 1.0), 3);
2722 nir_ssa_def_rewrite_uses(&intr->dest.ssa, zero);
2723 nir_instr_remove(instr);
2728 zink_compiler_assign_io(struct zink_screen *screen, nir_shader *producer, nir_shader *consumer)
2730 unsigned reserved = 0;
2731 unsigned char slot_map[VARYING_SLOT_MAX];
2732 memset(slot_map, -1, sizeof(slot_map));
2733 bool do_fixup = false;
2734 nir_shader *nir = producer->info.stage == MESA_SHADER_TESS_CTRL ? producer : consumer;
2735 if (consumer->info.stage != MESA_SHADER_FRAGMENT) {
2736 /* remove injected pointsize from all but the last vertex stage */
2737 nir_variable *var = nir_find_variable_with_location(producer, nir_var_shader_out, VARYING_SLOT_PSIZ);
2738 if (var && !var->data.explicit_location) {
2739 var->data.mode = nir_var_shader_temp;
2740 nir_fixup_deref_modes(producer);
2741 NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2742 optimize_nir(producer, NULL);
2745 if (producer->info.stage == MESA_SHADER_TESS_CTRL) {
2746 /* never assign from tcs -> tes, always invert */
2747 nir_foreach_variable_with_modes(var, consumer, nir_var_shader_in)
2748 assign_producer_var_io(consumer->info.stage, var, &reserved, slot_map);
2749 nir_foreach_variable_with_modes_safe(var, producer, nir_var_shader_out) {
2750 if (!assign_consumer_var_io(producer->info.stage, var, &reserved, slot_map))
2751 /* this is an output, nothing more needs to be done for it to be dropped */
2755 nir_foreach_variable_with_modes(var, producer, nir_var_shader_out)
2756 assign_producer_var_io(producer->info.stage, var, &reserved, slot_map);
2757 nir_foreach_variable_with_modes_safe(var, consumer, nir_var_shader_in) {
2758 if (!assign_consumer_var_io(consumer->info.stage, var, &reserved, slot_map)) {
2760 /* input needs to be rewritten */
2761 nir_shader_instructions_pass(consumer, rewrite_read_as_0, nir_metadata_dominance, var);
2764 if (consumer->info.stage == MESA_SHADER_FRAGMENT && screen->driver_workarounds.needs_sanitised_layer)
2765 do_fixup |= clamp_layer_output(producer, consumer, &reserved);
2769 nir_fixup_deref_modes(nir);
2770 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2771 optimize_nir(nir, NULL);
2774 /* all types that hit this function contain something that is 64bit */
2775 static const struct glsl_type *
2776 rewrite_64bit_type(nir_shader *nir, const struct glsl_type *type, nir_variable *var, bool doubles_only)
2778 if (glsl_type_is_array(type)) {
2779 const struct glsl_type *child = glsl_get_array_element(type);
2780 unsigned elements = glsl_array_size(type);
2781 unsigned stride = glsl_get_explicit_stride(type);
2782 return glsl_array_type(rewrite_64bit_type(nir, child, var, doubles_only), elements, stride);
2784 /* rewrite structs recursively */
2785 if (glsl_type_is_struct_or_ifc(type)) {
2786 unsigned nmembers = glsl_get_length(type);
2787 struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, nmembers * 2);
2788 unsigned xfb_offset = 0;
2789 for (unsigned i = 0; i < nmembers; i++) {
2790 const struct glsl_struct_field *f = glsl_get_struct_field_data(type, i);
2792 xfb_offset += glsl_get_component_slots(fields[i].type) * 4;
2793 if (i < nmembers - 1 && xfb_offset % 8 &&
2794 (glsl_contains_double(glsl_get_struct_field(type, i + 1)) ||
2795 (glsl_type_contains_64bit(glsl_get_struct_field(type, i + 1)) && !doubles_only))) {
2796 var->data.is_xfb = true;
2798 fields[i].type = rewrite_64bit_type(nir, f->type, var, doubles_only);
2800 return glsl_struct_type(fields, nmembers, glsl_get_type_name(type), glsl_struct_type_is_packed(type));
2802 if (!glsl_type_is_64bit(type) || (!glsl_contains_double(type) && doubles_only))
2804 if (doubles_only && glsl_type_is_vector_or_scalar(type))
2805 return glsl_vector_type(GLSL_TYPE_UINT64, glsl_get_vector_elements(type));
2806 enum glsl_base_type base_type;
2807 switch (glsl_get_base_type(type)) {
2808 case GLSL_TYPE_UINT64:
2809 base_type = GLSL_TYPE_UINT;
2811 case GLSL_TYPE_INT64:
2812 base_type = GLSL_TYPE_INT;
2814 case GLSL_TYPE_DOUBLE:
2815 base_type = GLSL_TYPE_FLOAT;
2818 unreachable("unknown 64-bit vertex attribute format!");
2820 if (glsl_type_is_scalar(type))
2821 return glsl_vector_type(base_type, 2);
2822 unsigned num_components;
2823 if (glsl_type_is_matrix(type)) {
2824 /* align to vec4 size: dvec3-composed arrays are arrays of dvec3s */
2825 unsigned vec_components = glsl_get_vector_elements(type);
2826 if (vec_components == 3)
2828 num_components = vec_components * 2 * glsl_get_matrix_columns(type);
2830 num_components = glsl_get_vector_elements(type) * 2;
2831 if (num_components <= 4)
2832 return glsl_vector_type(base_type, num_components);
2834 /* dvec3/dvec4/dmatX: rewrite as struct { vec4, vec4, vec4, ... [vec2] } */
2835 struct glsl_struct_field fields[8] = {0};
2836 unsigned remaining = num_components;
2837 unsigned nfields = 0;
2838 for (unsigned i = 0; remaining; i++, remaining -= MIN2(4, remaining), nfields++) {
2839 assert(i < ARRAY_SIZE(fields));
2840 fields[i].name = "";
2841 fields[i].offset = i * 16;
2842 fields[i].type = glsl_vector_type(base_type, MIN2(4, remaining));
2845 snprintf(buf, sizeof(buf), "struct(%s)", glsl_get_type_name(type));
2846 return glsl_struct_type(fields, nfields, buf, true);
2849 static const struct glsl_type *
2850 deref_is_matrix(nir_deref_instr *deref)
2852 if (glsl_type_is_matrix(deref->type))
2854 nir_deref_instr *parent = nir_deref_instr_parent(deref);
2856 return deref_is_matrix(parent);
2861 lower_64bit_vars_function(nir_shader *shader, nir_function *function, nir_variable *var,
2862 struct hash_table *derefs, struct set *deletes, bool doubles_only)
2864 bool func_progress = false;
2865 if (!function->impl)
2868 nir_builder_init(&b, function->impl);
2869 nir_foreach_block(block, function->impl) {
2870 nir_foreach_instr_safe(instr, block) {
2871 switch (instr->type) {
2872 case nir_instr_type_deref: {
2873 nir_deref_instr *deref = nir_instr_as_deref(instr);
2874 if (!(deref->modes & var->data.mode))
2876 if (nir_deref_instr_get_variable(deref) != var)
2879 /* matrix types are special: store the original deref type for later use */
2880 const struct glsl_type *matrix = deref_is_matrix(deref);
2881 nir_deref_instr *parent = nir_deref_instr_parent(deref);
2883 /* if this isn't a direct matrix deref, it's maybe a matrix row deref */
2884 hash_table_foreach(derefs, he) {
2885 /* propagate parent matrix type to row deref */
2886 if (he->key == parent)
2891 _mesa_hash_table_insert(derefs, deref, (void*)matrix);
2892 if (deref->deref_type == nir_deref_type_var)
2893 deref->type = var->type;
2895 deref->type = rewrite_64bit_type(shader, deref->type, var, doubles_only);
2898 case nir_instr_type_intrinsic: {
2899 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2900 if (intr->intrinsic != nir_intrinsic_store_deref &&
2901 intr->intrinsic != nir_intrinsic_load_deref)
2903 if (nir_intrinsic_get_var(intr, 0) != var)
2905 if ((intr->intrinsic == nir_intrinsic_store_deref && intr->src[1].ssa->bit_size != 64) ||
2906 (intr->intrinsic == nir_intrinsic_load_deref && intr->dest.ssa.bit_size != 64))
2908 b.cursor = nir_before_instr(instr);
2909 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
2910 unsigned num_components = intr->num_components * 2;
2911 nir_ssa_def *comp[NIR_MAX_VEC_COMPONENTS];
2912 /* this is the stored matrix type from the deref */
2913 struct hash_entry *he = _mesa_hash_table_search(derefs, deref);
2914 const struct glsl_type *matrix = he ? he->data : NULL;
2915 if (doubles_only && !matrix)
2917 func_progress = true;
2918 if (intr->intrinsic == nir_intrinsic_store_deref) {
2919 /* first, unpack the src data to 32bit vec2 components */
2920 for (unsigned i = 0; i < intr->num_components; i++) {
2921 nir_ssa_def *ssa = nir_unpack_64_2x32(&b, nir_channel(&b, intr->src[1].ssa, i));
2922 comp[i * 2] = nir_channel(&b, ssa, 0);
2923 comp[i * 2 + 1] = nir_channel(&b, ssa, 1);
2925 unsigned wrmask = nir_intrinsic_write_mask(intr);
2927 /* expand writemask for doubled components */
2928 for (unsigned i = 0; i < intr->num_components; i++) {
2929 if (wrmask & BITFIELD_BIT(i))
2930 mask |= BITFIELD_BIT(i * 2) | BITFIELD_BIT(i * 2 + 1);
2933 /* matrix types always come from array (row) derefs */
2934 assert(deref->deref_type == nir_deref_type_array);
2935 nir_deref_instr *var_deref = nir_deref_instr_parent(deref);
2936 /* let optimization clean up consts later */
2937 nir_ssa_def *index = deref->arr.index.ssa;
2938 /* this might be an indirect array index:
2939 * - iterate over matrix columns
2940 * - add if blocks for each column
2941 * - perform the store in the block
2943 for (unsigned idx = 0; idx < glsl_get_matrix_columns(matrix); idx++) {
2944 nir_push_if(&b, nir_ieq_imm(&b, index, idx));
2945 unsigned vec_components = glsl_get_vector_elements(matrix);
2946 /* always clamp dvec3 to 4 components */
2947 if (vec_components == 3)
2949 unsigned start_component = idx * vec_components * 2;
2951 unsigned member = start_component / 4;
2952 /* number of components remaining */
2953 unsigned remaining = num_components;
2954 for (unsigned i = 0; i < num_components; member++) {
2955 if (!(mask & BITFIELD_BIT(i)))
2957 assert(member < glsl_get_length(var_deref->type));
2958 /* deref the rewritten struct to the appropriate vec4/vec2 */
2959 nir_deref_instr *strct = nir_build_deref_struct(&b, var_deref, member);
2960 unsigned incr = MIN2(remaining, 4);
2961 /* assemble the write component vec */
2962 nir_ssa_def *val = nir_vec(&b, &comp[i], incr);
2963 /* use the number of components being written as the writemask */
2964 if (glsl_get_vector_elements(strct->type) > val->num_components)
2965 val = nir_pad_vector(&b, val, glsl_get_vector_elements(strct->type));
2966 nir_store_deref(&b, strct, val, BITFIELD_MASK(incr));
2970 nir_pop_if(&b, NULL);
2972 _mesa_set_add(deletes, &deref->instr);
2973 } else if (num_components <= 4) {
2974 /* simple store case: just write out the components */
2975 nir_ssa_def *dest = nir_vec(&b, comp, num_components);
2976 nir_store_deref(&b, deref, dest, mask);
2978 /* writing > 4 components: access the struct and write to the appropriate vec4 members */
2979 for (unsigned i = 0; num_components; i++, num_components -= MIN2(num_components, 4)) {
2980 if (!(mask & BITFIELD_MASK(4)))
2982 nir_deref_instr *strct = nir_build_deref_struct(&b, deref, i);
2983 nir_ssa_def *dest = nir_vec(&b, &comp[i * 4], MIN2(num_components, 4));
2984 if (glsl_get_vector_elements(strct->type) > dest->num_components)
2985 dest = nir_pad_vector(&b, dest, glsl_get_vector_elements(strct->type));
2986 nir_store_deref(&b, strct, dest, mask & BITFIELD_MASK(4));
2991 nir_ssa_def *dest = NULL;
2993 /* matrix types always come from array (row) derefs */
2994 assert(deref->deref_type == nir_deref_type_array);
2995 nir_deref_instr *var_deref = nir_deref_instr_parent(deref);
2996 /* let optimization clean up consts later */
2997 nir_ssa_def *index = deref->arr.index.ssa;
2998 /* this might be an indirect array index:
2999 * - iterate over matrix columns
3000 * - add if blocks for each column
3001 * - phi the loads using the array index
3003 unsigned cols = glsl_get_matrix_columns(matrix);
3004 nir_ssa_def *dests[4];
3005 for (unsigned idx = 0; idx < cols; idx++) {
3006 /* don't add an if for the final row: this will be handled in the else */
3008 nir_push_if(&b, nir_ieq_imm(&b, index, idx));
3009 unsigned vec_components = glsl_get_vector_elements(matrix);
3010 /* always clamp dvec3 to 4 components */
3011 if (vec_components == 3)
3013 unsigned start_component = idx * vec_components * 2;
3015 unsigned member = start_component / 4;
3016 /* number of components remaining */
3017 unsigned remaining = num_components;
3018 /* component index */
3019 unsigned comp_idx = 0;
3020 for (unsigned i = 0; i < num_components; member++) {
3021 assert(member < glsl_get_length(var_deref->type));
3022 nir_deref_instr *strct = nir_build_deref_struct(&b, var_deref, member);
3023 nir_ssa_def *load = nir_load_deref(&b, strct);
3024 unsigned incr = MIN2(remaining, 4);
3025 /* repack the loads to 64bit */
3026 for (unsigned c = 0; c < incr / 2; c++, comp_idx++)
3027 comp[comp_idx] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(c * 2, 2)));
3031 dest = dests[idx] = nir_vec(&b, comp, intr->num_components);
3033 nir_push_else(&b, NULL);
3035 /* loop over all the if blocks that were made, pop them, and phi the loaded+packed results */
3036 for (unsigned idx = cols - 1; idx >= 1; idx--) {
3037 nir_pop_if(&b, NULL);
3038 dest = nir_if_phi(&b, dests[idx - 1], dest);
3040 _mesa_set_add(deletes, &deref->instr);
3041 } else if (num_components <= 4) {
3042 /* simple load case */
3043 nir_ssa_def *load = nir_load_deref(&b, deref);
3044 /* pack 32bit loads into 64bit: this will automagically get optimized out later */
3045 for (unsigned i = 0; i < intr->num_components; i++) {
3046 comp[i] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(i * 2, 2)));
3048 dest = nir_vec(&b, comp, intr->num_components);
3050 /* writing > 4 components: access the struct and load the appropriate vec4 members */
3051 for (unsigned i = 0; i < 2; i++, num_components -= 4) {
3052 nir_deref_instr *strct = nir_build_deref_struct(&b, deref, i);
3053 nir_ssa_def *load = nir_load_deref(&b, strct);
3054 comp[i * 2] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_MASK(2)));
3055 if (num_components > 2)
3056 comp[i * 2 + 1] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(2, 2)));
3058 dest = nir_vec(&b, comp, intr->num_components);
3060 nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, dest, instr);
3062 _mesa_set_add(deletes, instr);
3071 nir_metadata_preserve(function->impl, nir_metadata_none);
3072 /* derefs must be queued for deletion to avoid deleting the same deref repeatedly */
3073 set_foreach_remove(deletes, he)
3074 nir_instr_remove((void*)he->key);
3075 return func_progress;
3079 lower_64bit_vars_loop(nir_shader *shader, nir_variable *var, struct hash_table *derefs,
3080 struct set *deletes, bool doubles_only)
3082 if (!glsl_type_contains_64bit(var->type) || (doubles_only && !glsl_contains_double(var->type)))
3084 var->type = rewrite_64bit_type(shader, var->type, var, doubles_only);
3085 /* once type is rewritten, rewrite all loads and stores */
3086 nir_foreach_function(function, shader)
3087 lower_64bit_vars_function(shader, function, var, derefs, deletes, doubles_only);
3091 /* rewrite all input/output variables using 32bit types and load/stores */
3093 lower_64bit_vars(nir_shader *shader, bool doubles_only)
3095 bool progress = false;
3096 struct hash_table *derefs = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
3097 struct set *deletes = _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
3098 nir_foreach_variable_with_modes(var, shader, nir_var_shader_in | nir_var_shader_out)
3099 progress |= lower_64bit_vars_loop(shader, var, derefs, deletes, doubles_only);
3100 nir_foreach_function(function, shader) {
3101 nir_foreach_function_temp_variable(var, function->impl) {
3102 if (!glsl_type_contains_64bit(var->type) || (doubles_only && !glsl_contains_double(var->type)))
3104 var->type = rewrite_64bit_type(shader, var->type, var, doubles_only);
3105 progress |= lower_64bit_vars_function(shader, function, var, derefs, deletes, doubles_only);
3108 ralloc_free(deletes);
3109 ralloc_free(derefs);
3111 nir_lower_alu_to_scalar(shader, filter_64_bit_instr, NULL);
3112 nir_lower_phis_to_scalar(shader, false);
3113 optimize_nir(shader, NULL);
3119 split_blocks(nir_shader *nir)
3121 bool progress = false;
3122 bool changed = true;
3125 nir_foreach_shader_out_variable(var, nir) {
3126 const struct glsl_type *base_type = glsl_without_array(var->type);
3127 nir_variable *members[32]; //can't have more than this without breaking NIR
3128 if (!glsl_type_is_struct(base_type))
3131 if (!glsl_type_is_struct(var->type) || glsl_get_length(var->type) == 1)
3133 if (glsl_count_attribute_slots(var->type, false) == 1)
3135 unsigned offset = 0;
3136 for (unsigned i = 0; i < glsl_get_length(var->type); i++) {
3137 members[i] = nir_variable_clone(var, nir);
3138 members[i]->type = glsl_get_struct_field(var->type, i);
3139 members[i]->name = (void*)glsl_get_struct_elem_name(var->type, i);
3140 members[i]->data.location += offset;
3141 offset += glsl_count_attribute_slots(members[i]->type, false);
3142 nir_shader_add_variable(nir, members[i]);
3144 nir_foreach_function(function, nir) {
3145 bool func_progress = false;
3146 if (!function->impl)
3149 nir_builder_init(&b, function->impl);
3150 nir_foreach_block(block, function->impl) {
3151 nir_foreach_instr_safe(instr, block) {
3152 switch (instr->type) {
3153 case nir_instr_type_deref: {
3154 nir_deref_instr *deref = nir_instr_as_deref(instr);
3155 if (!(deref->modes & nir_var_shader_out))
3157 if (nir_deref_instr_get_variable(deref) != var)
3159 if (deref->deref_type != nir_deref_type_struct)
3161 nir_deref_instr *parent = nir_deref_instr_parent(deref);
3162 if (parent->deref_type != nir_deref_type_var)
3164 deref->modes = nir_var_shader_temp;
3165 parent->modes = nir_var_shader_temp;
3166 b.cursor = nir_before_instr(instr);
3167 nir_ssa_def *dest = &nir_build_deref_var(&b, members[deref->strct.index])->dest.ssa;
3168 nir_ssa_def_rewrite_uses_after(&deref->dest.ssa, dest, &deref->instr);
3169 nir_instr_remove(&deref->instr);
3170 func_progress = true;
3178 nir_metadata_preserve(function->impl, nir_metadata_none);
3180 var->data.mode = nir_var_shader_temp;
3189 zink_shader_dump(void *words, size_t size, const char *file)
3191 FILE *fp = fopen(file, "wb");
3193 fwrite(words, 1, size, fp);
3195 fprintf(stderr, "wrote '%s'...\n", file);
3199 static struct zink_shader_object
3200 zink_shader_spirv_compile(struct zink_screen *screen, struct zink_shader *zs, struct spirv_shader *spirv, bool separate)
3202 VkShaderModuleCreateInfo smci = {0};
3203 VkShaderCreateInfoEXT sci = {0};
3208 if (zink_debug & ZINK_DEBUG_SPIRV) {
3211 snprintf(buf, sizeof(buf), "dump%02d.spv", i++);
3212 zink_shader_dump(spirv->words, spirv->num_words * sizeof(uint32_t), buf);
3215 sci.sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO_EXT;
3216 sci.stage = mesa_to_vk_shader_stage(zs->info.stage);
3217 if (sci.stage != VK_SHADER_STAGE_FRAGMENT_BIT)
3218 sci.nextStage = VK_SHADER_STAGE_FRAGMENT_BIT;
3219 sci.codeType = VK_SHADER_CODE_TYPE_SPIRV_EXT;
3220 sci.codeSize = spirv->num_words * sizeof(uint32_t);
3221 sci.pCode = spirv->words;
3223 sci.setLayoutCount = 2;
3224 VkDescriptorSetLayout dsl[2] = {0};
3225 dsl[zs->info.stage == MESA_SHADER_FRAGMENT] = zs->precompile.dsl;
3226 sci.pSetLayouts = dsl;
3227 VkPushConstantRange pcr;
3228 pcr.stageFlags = VK_SHADER_STAGE_ALL_GRAPHICS;
3230 pcr.size = sizeof(struct zink_gfx_push_constant);
3231 sci.pushConstantRangeCount = 1;
3232 sci.pPushConstantRanges = &pcr;
3234 smci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
3235 smci.codeSize = spirv->num_words * sizeof(uint32_t);
3236 smci.pCode = spirv->words;
3239 if (zink_debug & ZINK_DEBUG_VALIDATION) {
3240 static const struct spirv_to_nir_options spirv_options = {
3241 .environment = NIR_SPIRV_VULKAN,
3246 .tessellation = true,
3247 .float_controls = true,
3248 .image_ms_array = true,
3249 .image_read_without_format = true,
3250 .image_write_without_format = true,
3251 .storage_image_ms = true,
3252 .geometry_streams = true,
3253 .storage_8bit = true,
3254 .storage_16bit = true,
3255 .variable_pointers = true,
3256 .stencil_export = true,
3257 .post_depth_coverage = true,
3258 .transform_feedback = true,
3259 .device_group = true,
3260 .draw_parameters = true,
3261 .shader_viewport_index_layer = true,
3263 .physical_storage_buffer_address = true,
3264 .int64_atomics = true,
3265 .subgroup_arithmetic = true,
3266 .subgroup_basic = true,
3267 .subgroup_ballot = true,
3268 .subgroup_quad = true,
3269 .subgroup_shuffle = true,
3270 .subgroup_vote = true,
3271 .vk_memory_model = true,
3272 .vk_memory_model_device_scope = true,
3275 .demote_to_helper_invocation = true,
3276 .sparse_residency = true,
3279 .ubo_addr_format = nir_address_format_32bit_index_offset,
3280 .ssbo_addr_format = nir_address_format_32bit_index_offset,
3281 .phys_ssbo_addr_format = nir_address_format_64bit_global,
3282 .push_const_addr_format = nir_address_format_logical,
3283 .shared_addr_format = nir_address_format_32bit_offset,
3285 uint32_t num_spec_entries = 0;
3286 struct nir_spirv_specialization *spec_entries = NULL;
3287 VkSpecializationInfo sinfo = {0};
3288 VkSpecializationMapEntry me[3];
3289 uint32_t size[3] = {1,1,1};
3290 if (!zs->info.workgroup_size[0]) {
3291 sinfo.mapEntryCount = 3;
3292 sinfo.pMapEntries = &me[0];
3293 sinfo.dataSize = sizeof(uint32_t) * 3;
3295 uint32_t ids[] = {ZINK_WORKGROUP_SIZE_X, ZINK_WORKGROUP_SIZE_Y, ZINK_WORKGROUP_SIZE_Z};
3296 for (int i = 0; i < 3; i++) {
3297 me[i].size = sizeof(uint32_t);
3298 me[i].constantID = ids[i];
3299 me[i].offset = i * sizeof(uint32_t);
3301 spec_entries = vk_spec_info_to_nir_spirv(&sinfo, &num_spec_entries);
3303 nir_shader *nir = spirv_to_nir(spirv->words, spirv->num_words,
3304 spec_entries, num_spec_entries,
3305 clamp_stage(&zs->info), "main", &spirv_options, &screen->nir_options);
3313 struct zink_shader_object obj;
3314 if (!separate || !screen->info.have_EXT_shader_object)
3315 ret = VKSCR(CreateShaderModule)(screen->dev, &smci, NULL, &obj.mod);
3317 ret = VKSCR(CreateShadersEXT)(screen->dev, 1, &sci, NULL, &obj.obj);
3318 bool success = zink_screen_handle_vkresult(screen, ret);
3324 prune_io(nir_shader *nir)
3326 nir_foreach_shader_in_variable_safe(var, nir) {
3327 if (!find_var_deref(nir, var))
3328 var->data.mode = nir_var_shader_temp;
3330 nir_foreach_shader_out_variable_safe(var, nir) {
3331 if (!find_var_deref(nir, var))
3332 var->data.mode = nir_var_shader_temp;
3334 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
3338 flag_shadow_tex(nir_variable *var, struct zink_shader *zs)
3340 /* unconvert from zink_binding() */
3341 uint32_t sampler_id = var->data.binding - (PIPE_MAX_SAMPLERS * MESA_SHADER_FRAGMENT);
3342 assert(sampler_id < 32); //bitfield size for tracking
3343 zs->fs.legacy_shadow_mask |= BITFIELD_BIT(sampler_id);
3346 static nir_ssa_def *
3347 rewrite_tex_dest(nir_builder *b, nir_tex_instr *tex, nir_variable *var, void *data)
3350 const struct glsl_type *type = glsl_without_array(var->type);
3351 enum glsl_base_type ret_type = glsl_get_sampler_result_type(type);
3352 bool is_int = glsl_base_type_is_integer(ret_type);
3353 unsigned bit_size = glsl_base_type_get_bit_size(ret_type);
3354 unsigned dest_size = nir_dest_bit_size(tex->dest);
3355 b->cursor = nir_after_instr(&tex->instr);
3356 unsigned num_components = nir_dest_num_components(tex->dest);
3357 bool rewrite_depth = tex->is_shadow && num_components > 1 && tex->op != nir_texop_tg4 && !tex->is_sparse;
3358 if (bit_size == dest_size && !rewrite_depth)
3360 nir_ssa_def *dest = &tex->dest.ssa;
3361 if (rewrite_depth && data) {
3362 if (b->shader->info.stage == MESA_SHADER_FRAGMENT)
3363 flag_shadow_tex(var, data);
3365 mesa_loge("unhandled old-style shadow sampler in non-fragment stage!");
3368 if (bit_size != dest_size) {
3369 tex->dest.ssa.bit_size = bit_size;
3370 tex->dest_type = nir_get_nir_type_for_glsl_base_type(ret_type);
3373 if (glsl_unsigned_base_type_of(ret_type) == ret_type)
3374 dest = nir_u2uN(b, &tex->dest.ssa, dest_size);
3376 dest = nir_i2iN(b, &tex->dest.ssa, dest_size);
3378 dest = nir_f2fN(b, &tex->dest.ssa, dest_size);
3382 nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, dest, dest->parent_instr);
3383 } else if (rewrite_depth) {
3389 struct lower_zs_swizzle_state {
3391 unsigned base_sampler_id;
3392 const struct zink_zs_swizzle_key *swizzle;
3396 lower_zs_swizzle_tex_instr(nir_builder *b, nir_instr *instr, void *data)
3398 struct lower_zs_swizzle_state *state = data;
3399 const struct zink_zs_swizzle_key *swizzle_key = state->swizzle;
3400 assert(state->shadow_only || swizzle_key);
3401 if (instr->type != nir_instr_type_tex)
3403 nir_tex_instr *tex = nir_instr_as_tex(instr);
3404 if (tex->op == nir_texop_txs || tex->op == nir_texop_lod ||
3405 (!tex->is_shadow && state->shadow_only) || tex->is_new_style_shadow)
3407 if (tex->is_shadow && tex->op == nir_texop_tg4)
3408 /* Will not even try to emulate the shadow comparison */
3410 int handle = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
3411 nir_variable *var = NULL;
3413 /* gtfo bindless depth texture mode */
3415 nir_foreach_variable_with_modes(img, b->shader, nir_var_uniform) {
3416 if (glsl_type_is_sampler(glsl_without_array(img->type))) {
3417 unsigned size = glsl_type_is_array(img->type) ? glsl_get_aoa_size(img->type) : 1;
3418 if (tex->texture_index >= img->data.driver_location &&
3419 tex->texture_index < img->data.driver_location + size) {
3426 uint32_t sampler_id = var->data.binding - state->base_sampler_id;
3427 const struct glsl_type *type = glsl_without_array(var->type);
3428 enum glsl_base_type ret_type = glsl_get_sampler_result_type(type);
3429 bool is_int = glsl_base_type_is_integer(ret_type);
3430 unsigned num_components = nir_dest_num_components(tex->dest);
3432 tex->is_new_style_shadow = true;
3433 nir_ssa_def *dest = rewrite_tex_dest(b, tex, var, NULL);
3434 assert(dest || !state->shadow_only);
3435 if (!dest && !(swizzle_key->mask & BITFIELD_BIT(sampler_id)))
3438 dest = &tex->dest.ssa;
3440 tex->dest.ssa.num_components = 1;
3441 if (swizzle_key && (swizzle_key->mask & BITFIELD_BIT(sampler_id))) {
3442 /* these require manual swizzles */
3443 if (tex->op == nir_texop_tg4) {
3444 assert(!tex->is_shadow);
3445 nir_ssa_def *swizzle;
3446 switch (swizzle_key->swizzle[sampler_id].s[tex->component]) {
3447 case PIPE_SWIZZLE_0:
3448 swizzle = nir_imm_zero(b, 4, nir_dest_bit_size(tex->dest));
3450 case PIPE_SWIZZLE_1:
3452 swizzle = nir_imm_intN_t(b, 4, nir_dest_bit_size(tex->dest));
3454 swizzle = nir_imm_floatN_t(b, 4, nir_dest_bit_size(tex->dest));
3457 if (!tex->component)
3462 nir_ssa_def_rewrite_uses_after(dest, swizzle, swizzle->parent_instr);
3465 nir_ssa_def *vec[4];
3466 for (unsigned i = 0; i < ARRAY_SIZE(vec); i++) {
3467 switch (swizzle_key->swizzle[sampler_id].s[i]) {
3468 case PIPE_SWIZZLE_0:
3469 vec[i] = nir_imm_zero(b, 1, nir_dest_bit_size(tex->dest));
3471 case PIPE_SWIZZLE_1:
3473 vec[i] = nir_imm_intN_t(b, 1, nir_dest_bit_size(tex->dest));
3475 vec[i] = nir_imm_floatN_t(b, 1, nir_dest_bit_size(tex->dest));
3478 vec[i] = dest->num_components == 1 ? dest : nir_channel(b, dest, i);
3482 nir_ssa_def *swizzle = nir_vec(b, vec, num_components);
3483 nir_ssa_def_rewrite_uses_after(dest, swizzle, swizzle->parent_instr);
3485 assert(tex->is_shadow);
3486 nir_ssa_def *vec[4] = {dest, dest, dest, dest};
3487 nir_ssa_def *splat = nir_vec(b, vec, num_components);
3488 nir_ssa_def_rewrite_uses_after(dest, splat, splat->parent_instr);
3494 lower_zs_swizzle_tex(nir_shader *nir, const void *swizzle, bool shadow_only)
3496 unsigned base_sampler_id = gl_shader_stage_is_compute(nir->info.stage) ? 0 : PIPE_MAX_SAMPLERS * nir->info.stage;
3497 struct lower_zs_swizzle_state state = {shadow_only, base_sampler_id, swizzle};
3498 return nir_shader_instructions_pass(nir, lower_zs_swizzle_tex_instr, nir_metadata_dominance | nir_metadata_block_index, (void*)&state);
3502 invert_point_coord_instr(nir_builder *b, nir_instr *instr, void *data)
3504 if (instr->type != nir_instr_type_intrinsic)
3506 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
3507 if (intr->intrinsic != nir_intrinsic_load_deref)
3509 nir_variable *deref_var = nir_intrinsic_get_var(intr, 0);
3510 if (deref_var->data.location != VARYING_SLOT_PNTC)
3512 b->cursor = nir_after_instr(instr);
3513 nir_ssa_def *def = nir_vec2(b, nir_channel(b, &intr->dest.ssa, 0),
3514 nir_fsub(b, nir_imm_float(b, 1.0), nir_channel(b, &intr->dest.ssa, 1)));
3515 nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, def, def->parent_instr);
3520 invert_point_coord(nir_shader *nir)
3522 if (!(nir->info.inputs_read & BITFIELD64_BIT(VARYING_SLOT_PNTC)))
3524 return nir_shader_instructions_pass(nir, invert_point_coord_instr, nir_metadata_dominance, NULL);
3527 static struct zink_shader_object
3528 compile_module(struct zink_screen *screen, struct zink_shader *zs, nir_shader *nir, bool separate)
3530 struct zink_shader_info *sinfo = &zs->sinfo;
3533 NIR_PASS_V(nir, nir_convert_from_ssa, true);
3535 struct zink_shader_object obj;
3536 struct spirv_shader *spirv = nir_to_spirv(nir, sinfo, screen->spirv_version);
3538 obj = zink_shader_spirv_compile(screen, zs, spirv, separate);
3540 /* TODO: determine if there's any reason to cache spirv output? */
3541 if (zs->info.stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated)
3549 zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs,
3550 nir_shader *nir, const struct zink_shader_key *key, const void *extra_data)
3552 struct zink_shader_info *sinfo = &zs->sinfo;
3553 bool need_optimize = false;
3554 bool inlined_uniforms = false;
3557 if (key->inline_uniforms) {
3558 NIR_PASS_V(nir, nir_inline_uniforms,
3559 nir->info.num_inlinable_uniforms,
3560 key->base.inlined_uniform_values,
3561 nir->info.inlinable_uniform_dw_offsets);
3563 inlined_uniforms = true;
3566 /* TODO: use a separate mem ctx here for ralloc */
3568 if (!screen->optimal_keys) {
3569 switch (zs->info.stage) {
3570 case MESA_SHADER_VERTEX: {
3571 uint32_t decomposed_attrs = 0, decomposed_attrs_without_w = 0;
3572 const struct zink_vs_key *vs_key = zink_vs_key(key);
3573 switch (vs_key->size) {
3575 decomposed_attrs = vs_key->u32.decomposed_attrs;
3576 decomposed_attrs_without_w = vs_key->u32.decomposed_attrs_without_w;
3579 decomposed_attrs = vs_key->u16.decomposed_attrs;
3580 decomposed_attrs_without_w = vs_key->u16.decomposed_attrs_without_w;
3583 decomposed_attrs = vs_key->u8.decomposed_attrs;
3584 decomposed_attrs_without_w = vs_key->u8.decomposed_attrs_without_w;
3588 if (decomposed_attrs || decomposed_attrs_without_w)
3589 NIR_PASS_V(nir, decompose_attribs, decomposed_attrs, decomposed_attrs_without_w);
3593 case MESA_SHADER_GEOMETRY:
3594 if (zink_gs_key(key)->lower_line_stipple) {
3595 NIR_PASS_V(nir, lower_line_stipple_gs, zink_gs_key(key)->line_rectangular);
3596 NIR_PASS_V(nir, nir_lower_var_copies);
3597 need_optimize = true;
3600 if (zink_gs_key(key)->lower_line_smooth) {
3601 NIR_PASS_V(nir, lower_line_smooth_gs);
3602 NIR_PASS_V(nir, nir_lower_var_copies);
3603 need_optimize = true;
3606 if (zink_gs_key(key)->lower_gl_point) {
3607 NIR_PASS_V(nir, lower_gl_point_gs);
3608 need_optimize = true;
3611 if (zink_gs_key(key)->lower_pv_mode) {
3612 NIR_PASS_V(nir, lower_pv_mode_gs, zink_gs_key(key)->lower_pv_mode);
3613 need_optimize = true; //TODO verify that this is required
3622 switch (zs->info.stage) {
3623 case MESA_SHADER_VERTEX:
3624 case MESA_SHADER_TESS_EVAL:
3625 case MESA_SHADER_GEOMETRY:
3626 if (zink_vs_key_base(key)->last_vertex_stage) {
3627 if (zs->sinfo.have_xfb)
3628 sinfo->last_vertex = true;
3630 if (!zink_vs_key_base(key)->clip_halfz && !screen->info.have_EXT_depth_clip_control) {
3631 NIR_PASS_V(nir, nir_lower_clip_halfz);
3633 if (zink_vs_key_base(key)->push_drawid) {
3634 NIR_PASS_V(nir, lower_drawid);
3637 if (zink_vs_key_base(key)->robust_access)
3638 NIR_PASS(need_optimize, nir, lower_txf_lod_robustness);
3640 case MESA_SHADER_FRAGMENT:
3641 if (zink_fs_key(key)->lower_line_smooth) {
3642 NIR_PASS_V(nir, lower_line_smooth_fs,
3643 zink_fs_key(key)->lower_line_stipple);
3644 need_optimize = true;
3645 } else if (zink_fs_key(key)->lower_line_stipple)
3646 NIR_PASS_V(nir, lower_line_stipple_fs);
3648 if (zink_fs_key(key)->lower_point_smooth) {
3649 NIR_PASS_V(nir, nir_lower_point_smooth);
3650 NIR_PASS_V(nir, nir_lower_discard_if, nir_lower_discard_if_to_cf);
3651 nir->info.fs.uses_discard = true;
3652 need_optimize = true;
3655 if (zink_fs_key(key)->robust_access)
3656 NIR_PASS(need_optimize, nir, lower_txf_lod_robustness);
3658 if (!zink_fs_key_base(key)->samples &&
3659 nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)) {
3660 /* VK will always use gl_SampleMask[] values even if sample count is 0,
3661 * so we need to skip this write here to mimic GL's behavior of ignoring it
3663 nir_foreach_shader_out_variable(var, nir) {
3664 if (var->data.location == FRAG_RESULT_SAMPLE_MASK)
3665 var->data.mode = nir_var_shader_temp;
3667 nir_fixup_deref_modes(nir);
3668 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
3669 need_optimize = true;
3671 if (zink_fs_key_base(key)->force_dual_color_blend && nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DATA1)) {
3672 NIR_PASS_V(nir, lower_dual_blend);
3674 if (zink_fs_key_base(key)->single_sample) {
3675 NIR_PASS_V(nir, nir_lower_single_sampled);
3677 if (zink_fs_key_base(key)->coord_replace_bits)
3678 NIR_PASS_V(nir, nir_lower_texcoord_replace, zink_fs_key_base(key)->coord_replace_bits, false, false);
3679 if (zink_fs_key_base(key)->point_coord_yinvert)
3680 NIR_PASS_V(nir, invert_point_coord);
3681 if (zink_fs_key_base(key)->force_persample_interp || zink_fs_key_base(key)->fbfetch_ms) {
3682 nir_foreach_shader_in_variable(var, nir)
3683 var->data.sample = true;
3684 nir->info.fs.uses_sample_qualifier = true;
3685 nir->info.fs.uses_sample_shading = true;
3687 if (zs->fs.legacy_shadow_mask && !key->base.needs_zs_shader_swizzle)
3688 NIR_PASS(need_optimize, nir, lower_zs_swizzle_tex, zink_fs_key_base(key)->shadow_needs_shader_swizzle ? extra_data : NULL, true);
3689 if (nir->info.fs.uses_fbfetch_output) {
3690 nir_variable *fbfetch = NULL;
3691 NIR_PASS_V(nir, lower_fbfetch, &fbfetch, zink_fs_key_base(key)->fbfetch_ms);
3692 /* old variable must be deleted to avoid spirv errors */
3693 fbfetch->data.mode = nir_var_shader_temp;
3694 nir_fixup_deref_modes(nir);
3695 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
3696 need_optimize = true;
3698 nir_foreach_shader_in_variable_safe(var, nir) {
3699 if (!is_texcoord(MESA_SHADER_FRAGMENT, var) || var->data.driver_location != -1)
3701 nir_shader_instructions_pass(nir, rewrite_read_as_0, nir_metadata_dominance, var);
3702 var->data.mode = nir_var_shader_temp;
3703 nir_fixup_deref_modes(nir);
3704 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
3705 need_optimize = true;
3708 case MESA_SHADER_COMPUTE:
3709 if (zink_cs_key(key)->robust_access)
3710 NIR_PASS(need_optimize, nir, lower_txf_lod_robustness);
3714 if (key->base.needs_zs_shader_swizzle) {
3716 NIR_PASS(need_optimize, nir, lower_zs_swizzle_tex, extra_data, false);
3718 if (key->base.nonseamless_cube_mask) {
3719 NIR_PASS_V(nir, zink_lower_cubemap_to_array, key->base.nonseamless_cube_mask);
3720 need_optimize = true;
3723 if (screen->driconf.inline_uniforms) {
3724 NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_global | nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared);
3725 NIR_PASS_V(nir, rewrite_bo_access, screen);
3726 NIR_PASS_V(nir, remove_bo_access, zs);
3727 need_optimize = true;
3729 if (inlined_uniforms) {
3730 optimize_nir(nir, zs);
3732 /* This must be done again. */
3733 NIR_PASS_V(nir, nir_io_add_const_offset_to_base, nir_var_shader_in |
3734 nir_var_shader_out);
3736 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
3737 if (impl->ssa_alloc > ZINK_ALWAYS_INLINE_LIMIT)
3738 zs->can_inline = false;
3739 } else if (need_optimize)
3740 optimize_nir(nir, zs);
3742 struct zink_shader_object obj = compile_module(screen, zs, nir, false);
3747 struct zink_shader_object
3748 zink_shader_compile_separate(struct zink_screen *screen, struct zink_shader *zs)
3750 nir_shader *nir = zink_shader_deserialize(screen, zs);
3751 int set = nir->info.stage == MESA_SHADER_FRAGMENT;
3752 unsigned offsets[4];
3753 zink_descriptor_shader_get_binding_offsets(zs, offsets);
3754 nir_foreach_variable_with_modes(var, nir, nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_uniform | nir_var_image) {
3755 if (var->data.bindless)
3757 var->data.descriptor_set = set;
3758 switch (var->data.mode) {
3759 case nir_var_mem_ubo:
3760 var->data.binding = !!var->data.driver_location;
3762 case nir_var_uniform:
3763 if (glsl_type_is_sampler(glsl_without_array(var->type)))
3764 var->data.binding += offsets[1];
3766 case nir_var_mem_ssbo:
3767 var->data.binding += offsets[2];
3770 var->data.binding += offsets[3];
3775 if (screen->driconf.inline_uniforms) {
3776 NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_global | nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared);
3777 NIR_PASS_V(nir, rewrite_bo_access, screen);
3778 NIR_PASS_V(nir, remove_bo_access, zs);
3780 optimize_nir(nir, zs);
3781 zink_descriptor_shader_init(screen, zs);
3782 struct zink_shader_object obj = compile_module(screen, zs, nir, true);
3788 lower_baseinstance_instr(nir_builder *b, nir_instr *instr, void *data)
3790 if (instr->type != nir_instr_type_intrinsic)
3792 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
3793 if (intr->intrinsic != nir_intrinsic_load_instance_id)
3795 b->cursor = nir_after_instr(instr);
3796 nir_ssa_def *def = nir_isub(b, &intr->dest.ssa, nir_load_base_instance(b));
3797 nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, def, def->parent_instr);
3802 lower_baseinstance(nir_shader *shader)
3804 if (shader->info.stage != MESA_SHADER_VERTEX)
3806 return nir_shader_instructions_pass(shader, lower_baseinstance_instr, nir_metadata_dominance, NULL);
3809 /* gl_nir_lower_buffers makes variables unusable for all UBO/SSBO access
3810 * so instead we delete all those broken variables and just make new ones
3813 unbreak_bos(nir_shader *shader, struct zink_shader *zs, bool needs_size)
3815 uint64_t max_ssbo_size = 0;
3816 uint64_t max_ubo_size = 0;
3817 uint64_t max_uniform_size = 0;
3819 if (!shader->info.num_ssbos && !shader->info.num_ubos)
3822 nir_foreach_variable_with_modes(var, shader, nir_var_mem_ssbo | nir_var_mem_ubo) {
3823 const struct glsl_type *type = glsl_without_array(var->type);
3824 if (type_is_counter(type))
3826 /* be conservative: use the bigger of the interface and variable types to ensure in-bounds access */
3827 unsigned size = glsl_count_attribute_slots(glsl_type_is_array(var->type) ? var->type : type, false);
3828 const struct glsl_type *interface_type = var->interface_type ? glsl_without_array(var->interface_type) : NULL;
3829 if (interface_type) {
3830 unsigned block_size = glsl_get_explicit_size(interface_type, true);
3831 if (glsl_get_length(interface_type) == 1) {
3832 /* handle bare unsized ssbo arrays: glsl_get_explicit_size always returns type-aligned sizes */
3833 const struct glsl_type *f = glsl_get_struct_field(interface_type, 0);
3834 if (glsl_type_is_array(f) && !glsl_array_size(f))
3838 block_size = DIV_ROUND_UP(block_size, sizeof(float) * 4);
3839 size = MAX2(size, block_size);
3842 if (var->data.mode == nir_var_mem_ubo) {
3843 if (var->data.driver_location)
3844 max_ubo_size = MAX2(max_ubo_size, size);
3846 max_uniform_size = MAX2(max_uniform_size, size);
3848 max_ssbo_size = MAX2(max_ssbo_size, size);
3849 if (interface_type) {
3850 if (glsl_type_is_unsized_array(glsl_get_struct_field(interface_type, glsl_get_length(interface_type) - 1)))
3854 var->data.mode = nir_var_shader_temp;
3856 nir_fixup_deref_modes(shader);
3857 NIR_PASS_V(shader, nir_remove_dead_variables, nir_var_shader_temp, NULL);
3858 optimize_nir(shader, NULL);
3860 struct glsl_struct_field field = {0};
3861 field.name = ralloc_strdup(shader, "base");
3862 if (shader->info.num_ubos) {
3863 if (shader->num_uniforms && zs->ubos_used & BITFIELD_BIT(0)) {
3864 field.type = glsl_array_type(glsl_uint_type(), max_uniform_size * 4, 4);
3865 nir_variable *var = nir_variable_create(shader, nir_var_mem_ubo,
3866 glsl_array_type(glsl_interface_type(&field, 1, GLSL_INTERFACE_PACKING_STD430, false, "struct"), 1, 0),
3868 var->interface_type = var->type;
3869 var->data.mode = nir_var_mem_ubo;
3870 var->data.driver_location = 0;
3873 unsigned num_ubos = shader->info.num_ubos - !!shader->info.first_ubo_is_default_ubo;
3874 uint32_t ubos_used = zs->ubos_used & ~BITFIELD_BIT(0);
3875 if (num_ubos && ubos_used) {
3876 field.type = glsl_array_type(glsl_uint_type(), max_ubo_size * 4, 4);
3877 /* shrink array as much as possible */
3878 unsigned first_ubo = ffs(ubos_used) - 2;
3879 assert(first_ubo < PIPE_MAX_CONSTANT_BUFFERS);
3880 num_ubos -= first_ubo;
3882 nir_variable *var = nir_variable_create(shader, nir_var_mem_ubo,
3883 glsl_array_type(glsl_struct_type(&field, 1, "struct", false), num_ubos, 0),
3885 var->interface_type = var->type;
3886 var->data.mode = nir_var_mem_ubo;
3887 var->data.driver_location = first_ubo + !!shader->info.first_ubo_is_default_ubo;
3890 if (shader->info.num_ssbos && zs->ssbos_used) {
3891 /* shrink array as much as possible */
3892 unsigned first_ssbo = ffs(zs->ssbos_used) - 1;
3893 assert(first_ssbo < PIPE_MAX_SHADER_BUFFERS);
3894 unsigned num_ssbos = shader->info.num_ssbos - first_ssbo;
3896 const struct glsl_type *ssbo_type = glsl_array_type(glsl_uint_type(), needs_size ? 0 : max_ssbo_size * 4, 4);
3897 field.type = ssbo_type;
3898 nir_variable *var = nir_variable_create(shader, nir_var_mem_ssbo,
3899 glsl_array_type(glsl_struct_type(&field, 1, "struct", false), num_ssbos, 0),
3901 var->interface_type = var->type;
3902 var->data.mode = nir_var_mem_ssbo;
3903 var->data.driver_location = first_ssbo;
3909 get_src_mask_ssbo(unsigned total, nir_src src)
3911 if (nir_src_is_const(src))
3912 return BITFIELD_BIT(nir_src_as_uint(src));
3913 return BITFIELD_MASK(total);
3917 get_src_mask_ubo(unsigned total, nir_src src)
3919 if (nir_src_is_const(src))
3920 return BITFIELD_BIT(nir_src_as_uint(src));
3921 return BITFIELD_MASK(total) & ~BITFIELD_BIT(0);
3925 analyze_io(struct zink_shader *zs, nir_shader *shader)
3928 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
3929 nir_foreach_block(block, impl) {
3930 nir_foreach_instr(instr, block) {
3931 if (shader->info.stage != MESA_SHADER_KERNEL && instr->type == nir_instr_type_tex) {
3932 /* gl_nir_lower_samplers_as_deref is where this would normally be set, but zink doesn't use it */
3933 nir_tex_instr *tex = nir_instr_as_tex(instr);
3934 nir_foreach_variable_with_modes(img, shader, nir_var_uniform) {
3935 if (glsl_type_is_sampler(glsl_without_array(img->type))) {
3936 unsigned size = glsl_type_is_array(img->type) ? glsl_get_aoa_size(img->type) : 1;
3937 if (tex->texture_index >= img->data.driver_location &&
3938 tex->texture_index < img->data.driver_location + size) {
3939 BITSET_SET_RANGE(shader->info.textures_used, img->data.driver_location, img->data.driver_location + (size - 1));
3946 if (instr->type != nir_instr_type_intrinsic)
3949 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
3950 switch (intrin->intrinsic) {
3951 case nir_intrinsic_store_ssbo:
3952 zs->ssbos_used |= get_src_mask_ssbo(shader->info.num_ssbos, intrin->src[1]);
3955 case nir_intrinsic_get_ssbo_size: {
3956 zs->ssbos_used |= get_src_mask_ssbo(shader->info.num_ssbos, intrin->src[0]);
3960 case nir_intrinsic_ssbo_atomic_fadd:
3961 case nir_intrinsic_ssbo_atomic_add:
3962 case nir_intrinsic_ssbo_atomic_imin:
3963 case nir_intrinsic_ssbo_atomic_umin:
3964 case nir_intrinsic_ssbo_atomic_imax:
3965 case nir_intrinsic_ssbo_atomic_umax:
3966 case nir_intrinsic_ssbo_atomic_and:
3967 case nir_intrinsic_ssbo_atomic_or:
3968 case nir_intrinsic_ssbo_atomic_xor:
3969 case nir_intrinsic_ssbo_atomic_exchange:
3970 case nir_intrinsic_ssbo_atomic_comp_swap:
3971 case nir_intrinsic_ssbo_atomic_fmin:
3972 case nir_intrinsic_ssbo_atomic_fmax:
3973 case nir_intrinsic_ssbo_atomic_fcomp_swap:
3974 case nir_intrinsic_load_ssbo:
3975 zs->ssbos_used |= get_src_mask_ssbo(shader->info.num_ssbos, intrin->src[0]);
3977 case nir_intrinsic_load_ubo:
3978 case nir_intrinsic_load_ubo_vec4:
3979 zs->ubos_used |= get_src_mask_ubo(shader->info.num_ubos, intrin->src[0]);
3989 struct zink_bindless_info {
3990 nir_variable *bindless[4];
3991 unsigned bindless_set;
3994 /* this is a "default" bindless texture used if the shader has no texture variables */
3995 static nir_variable *
3996 create_bindless_texture(nir_shader *nir, nir_tex_instr *tex, unsigned descriptor_set)
3998 unsigned binding = tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ? 1 : 0;
4001 const struct glsl_type *sampler_type = glsl_sampler_type(tex->sampler_dim, tex->is_shadow, tex->is_array, GLSL_TYPE_FLOAT);
4002 var = nir_variable_create(nir, nir_var_uniform, glsl_array_type(sampler_type, ZINK_MAX_BINDLESS_HANDLES, 0), "bindless_texture");
4003 var->data.descriptor_set = descriptor_set;
4004 var->data.driver_location = var->data.binding = binding;
4008 /* this is a "default" bindless image used if the shader has no image variables */
4009 static nir_variable *
4010 create_bindless_image(nir_shader *nir, enum glsl_sampler_dim dim, unsigned descriptor_set)
4012 unsigned binding = dim == GLSL_SAMPLER_DIM_BUF ? 3 : 2;
4015 const struct glsl_type *image_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
4016 var = nir_variable_create(nir, nir_var_image, glsl_array_type(image_type, ZINK_MAX_BINDLESS_HANDLES, 0), "bindless_image");
4017 var->data.descriptor_set = descriptor_set;
4018 var->data.driver_location = var->data.binding = binding;
4019 var->data.image.format = PIPE_FORMAT_R8G8B8A8_UNORM;
4023 /* rewrite bindless instructions as array deref instructions */
4025 lower_bindless_instr(nir_builder *b, nir_instr *in, void *data)
4027 struct zink_bindless_info *bindless = data;
4029 if (in->type == nir_instr_type_tex) {
4030 nir_tex_instr *tex = nir_instr_as_tex(in);
4031 int idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
4035 nir_variable *var = tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ? bindless->bindless[1] : bindless->bindless[0];
4037 var = create_bindless_texture(b->shader, tex, bindless->bindless_set);
4038 b->cursor = nir_before_instr(in);
4039 nir_deref_instr *deref = nir_build_deref_var(b, var);
4040 if (glsl_type_is_array(var->type))
4041 deref = nir_build_deref_array(b, deref, nir_u2uN(b, tex->src[idx].src.ssa, 32));
4042 nir_instr_rewrite_src_ssa(in, &tex->src[idx].src, &deref->dest.ssa);
4044 /* bindless sampling uses the variable type directly, which means the tex instr has to exactly
4045 * match up with it in contrast to normal sampler ops where things are a bit more flexible;
4046 * this results in cases where a shader is passed with sampler2DArray but the tex instr only has
4047 * 2 components, which explodes spirv compilation even though it doesn't trigger validation errors
4049 * to fix this, pad the coord src here and fix the tex instr so that ntv will do the "right" thing
4050 * - Warhammer 40k: Dawn of War III
4052 unsigned needed_components = glsl_get_sampler_coordinate_components(glsl_without_array(var->type));
4053 unsigned c = nir_tex_instr_src_index(tex, nir_tex_src_coord);
4054 unsigned coord_components = nir_src_num_components(tex->src[c].src);
4055 if (coord_components < needed_components) {
4056 nir_ssa_def *def = nir_pad_vector(b, tex->src[c].src.ssa, needed_components);
4057 nir_instr_rewrite_src_ssa(in, &tex->src[c].src, def);
4058 tex->coord_components = needed_components;
4062 if (in->type != nir_instr_type_intrinsic)
4064 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
4066 nir_intrinsic_op op;
4067 #define OP_SWAP(OP) \
4068 case nir_intrinsic_bindless_image_##OP: \
4069 op = nir_intrinsic_image_deref_##OP; \
4073 /* convert bindless intrinsics to deref intrinsics */
4074 switch (instr->intrinsic) {
4077 OP_SWAP(atomic_comp_swap)
4078 OP_SWAP(atomic_dec_wrap)
4079 OP_SWAP(atomic_exchange)
4080 OP_SWAP(atomic_fadd)
4081 OP_SWAP(atomic_fmax)
4082 OP_SWAP(atomic_fmin)
4083 OP_SWAP(atomic_imax)
4084 OP_SWAP(atomic_imin)
4085 OP_SWAP(atomic_inc_wrap)
4087 OP_SWAP(atomic_umax)
4088 OP_SWAP(atomic_umin)
4100 enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr);
4101 nir_variable *var = dim == GLSL_SAMPLER_DIM_BUF ? bindless->bindless[3] : bindless->bindless[2];
4103 var = create_bindless_image(b->shader, dim, bindless->bindless_set);
4104 instr->intrinsic = op;
4105 b->cursor = nir_before_instr(in);
4106 nir_deref_instr *deref = nir_build_deref_var(b, var);
4107 if (glsl_type_is_array(var->type))
4108 deref = nir_build_deref_array(b, deref, nir_u2uN(b, instr->src[0].ssa, 32));
4109 nir_instr_rewrite_src_ssa(in, &instr->src[0], &deref->dest.ssa);
4114 lower_bindless(nir_shader *shader, struct zink_bindless_info *bindless)
4116 if (!nir_shader_instructions_pass(shader, lower_bindless_instr, nir_metadata_dominance, bindless))
4118 nir_fixup_deref_modes(shader);
4119 NIR_PASS_V(shader, nir_remove_dead_variables, nir_var_shader_temp, NULL);
4120 optimize_nir(shader, NULL);
4124 /* convert shader image/texture io variables to int64 handles for bindless indexing */
4126 lower_bindless_io_instr(nir_builder *b, nir_instr *in, void *data)
4128 if (in->type != nir_instr_type_intrinsic)
4130 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
4131 if (instr->intrinsic != nir_intrinsic_load_deref &&
4132 instr->intrinsic != nir_intrinsic_store_deref)
4135 nir_deref_instr *src_deref = nir_src_as_deref(instr->src[0]);
4136 nir_variable *var = nir_deref_instr_get_variable(src_deref);
4137 if (var->data.bindless)
4139 if (var->data.mode != nir_var_shader_in && var->data.mode != nir_var_shader_out)
4141 if (!glsl_type_is_image(var->type) && !glsl_type_is_sampler(var->type))
4144 var->type = glsl_int64_t_type();
4145 var->data.bindless = 1;
4146 b->cursor = nir_before_instr(in);
4147 nir_deref_instr *deref = nir_build_deref_var(b, var);
4148 if (instr->intrinsic == nir_intrinsic_load_deref) {
4149 nir_ssa_def *def = nir_load_deref(b, deref);
4150 nir_instr_rewrite_src_ssa(in, &instr->src[0], def);
4151 nir_ssa_def_rewrite_uses(&instr->dest.ssa, def);
4153 nir_store_deref(b, deref, instr->src[1].ssa, nir_intrinsic_write_mask(instr));
4155 nir_instr_remove(in);
4156 nir_instr_remove(&src_deref->instr);
4161 lower_bindless_io(nir_shader *shader)
4163 return nir_shader_instructions_pass(shader, lower_bindless_io_instr, nir_metadata_dominance, NULL);
4167 zink_binding(gl_shader_stage stage, VkDescriptorType type, int index, bool compact_descriptors)
4169 if (stage == MESA_SHADER_NONE) {
4170 unreachable("not supported");
4172 unsigned base = stage;
4173 /* clamp compute bindings for better driver efficiency */
4174 if (gl_shader_stage_is_compute(stage))
4177 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
4178 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
4179 return base * 2 + !!index;
4181 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
4182 assert(stage == MESA_SHADER_KERNEL);
4184 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
4185 if (stage == MESA_SHADER_KERNEL) {
4186 assert(index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
4187 return index + PIPE_MAX_SAMPLERS;
4190 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
4191 assert(index < PIPE_MAX_SAMPLERS);
4192 assert(stage != MESA_SHADER_KERNEL);
4193 return (base * PIPE_MAX_SAMPLERS) + index;
4195 case VK_DESCRIPTOR_TYPE_SAMPLER:
4196 assert(index < PIPE_MAX_SAMPLERS);
4197 assert(stage == MESA_SHADER_KERNEL);
4200 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
4201 return base + (compact_descriptors * (ZINK_GFX_SHADER_COUNT * 2));
4203 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
4204 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
4205 assert(index < ZINK_MAX_SHADER_IMAGES);
4206 if (stage == MESA_SHADER_KERNEL)
4207 return index + (compact_descriptors ? (PIPE_MAX_SAMPLERS + PIPE_MAX_SHADER_SAMPLER_VIEWS) : 0);
4208 return (base * ZINK_MAX_SHADER_IMAGES) + index + (compact_descriptors * (ZINK_GFX_SHADER_COUNT * PIPE_MAX_SAMPLERS));
4211 unreachable("unexpected type");
4217 handle_bindless_var(nir_shader *nir, nir_variable *var, const struct glsl_type *type, struct zink_bindless_info *bindless)
4219 if (glsl_type_is_struct(type)) {
4220 for (unsigned i = 0; i < glsl_get_length(type); i++)
4221 handle_bindless_var(nir, var, glsl_get_struct_field(type, i), bindless);
4225 /* just a random scalar in a struct */
4226 if (!glsl_type_is_image(type) && !glsl_type_is_sampler(type))
4229 VkDescriptorType vktype = glsl_type_is_image(type) ? zink_image_type(type) : zink_sampler_type(type);
4232 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
4235 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
4238 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
4241 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
4245 unreachable("unknown");
4247 if (!bindless->bindless[binding]) {
4248 bindless->bindless[binding] = nir_variable_clone(var, nir);
4249 bindless->bindless[binding]->data.bindless = 0;
4250 bindless->bindless[binding]->data.descriptor_set = bindless->bindless_set;
4251 bindless->bindless[binding]->type = glsl_array_type(type, ZINK_MAX_BINDLESS_HANDLES, 0);
4252 bindless->bindless[binding]->data.driver_location = bindless->bindless[binding]->data.binding = binding;
4253 if (!bindless->bindless[binding]->data.image.format)
4254 bindless->bindless[binding]->data.image.format = PIPE_FORMAT_R8G8B8A8_UNORM;
4255 nir_shader_add_variable(nir, bindless->bindless[binding]);
4257 assert(glsl_get_sampler_dim(glsl_without_array(bindless->bindless[binding]->type)) == glsl_get_sampler_dim(glsl_without_array(var->type)));
4259 var->data.mode = nir_var_shader_temp;
4263 convert_1d_shadow_tex(nir_builder *b, nir_instr *instr, void *data)
4265 struct zink_screen *screen = data;
4266 if (instr->type != nir_instr_type_tex)
4268 nir_tex_instr *tex = nir_instr_as_tex(instr);
4269 if (tex->sampler_dim != GLSL_SAMPLER_DIM_1D || !tex->is_shadow)
4271 if (tex->is_sparse && screen->need_2D_sparse) {
4272 /* no known case of this exists: only nvidia can hit it, and nothing uses it */
4273 mesa_loge("unhandled/unsupported 1D sparse texture!");
4276 tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
4277 b->cursor = nir_before_instr(instr);
4278 tex->coord_components++;
4285 for (unsigned i = 0; i < ARRAY_SIZE(srcs); i++) {
4286 unsigned c = nir_tex_instr_src_index(tex, srcs[i]);
4289 if (tex->src[c].src.ssa->num_components == tex->coord_components)
4292 nir_ssa_def *zero = nir_imm_zero(b, 1, tex->src[c].src.ssa->bit_size);
4293 if (tex->src[c].src.ssa->num_components == 1)
4294 def = nir_vec2(b, tex->src[c].src.ssa, zero);
4296 def = nir_vec3(b, nir_channel(b, tex->src[c].src.ssa, 0), zero, nir_channel(b, tex->src[c].src.ssa, 1));
4297 nir_instr_rewrite_src_ssa(instr, &tex->src[c].src, def);
4299 b->cursor = nir_after_instr(instr);
4300 unsigned needed_components = nir_tex_instr_dest_size(tex);
4301 unsigned num_components = tex->dest.ssa.num_components;
4302 if (needed_components > num_components) {
4303 tex->dest.ssa.num_components = needed_components;
4304 assert(num_components < 3);
4305 /* take either xz or just x since this is promoted to 2D from 1D */
4306 uint32_t mask = num_components == 2 ? (1|4) : 1;
4307 nir_ssa_def *dst = nir_channels(b, &tex->dest.ssa, mask);
4308 nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, dst, dst->parent_instr);
4314 lower_1d_shadow(nir_shader *shader, struct zink_screen *screen)
4317 nir_foreach_variable_with_modes(var, shader, nir_var_uniform | nir_var_image) {
4318 const struct glsl_type *type = glsl_without_array(var->type);
4319 unsigned length = glsl_get_length(var->type);
4320 if (!glsl_type_is_sampler(type) || !glsl_sampler_type_is_shadow(type) || glsl_get_sampler_dim(type) != GLSL_SAMPLER_DIM_1D)
4322 const struct glsl_type *sampler = glsl_sampler_type(GLSL_SAMPLER_DIM_2D, true, glsl_sampler_type_is_array(type), glsl_get_sampler_result_type(type));
4323 var->type = type != var->type ? glsl_array_type(sampler, length, glsl_get_explicit_stride(var->type)) : sampler;
4328 nir_shader_instructions_pass(shader, convert_1d_shadow_tex, nir_metadata_dominance, screen);
4333 scan_nir(struct zink_screen *screen, nir_shader *shader, struct zink_shader *zs)
4335 nir_foreach_function(function, shader) {
4336 if (!function->impl)
4338 nir_foreach_block_safe(block, function->impl) {
4339 nir_foreach_instr_safe(instr, block) {
4340 if (instr->type == nir_instr_type_tex) {
4341 nir_tex_instr *tex = nir_instr_as_tex(instr);
4342 zs->sinfo.have_sparse |= tex->is_sparse;
4344 if (instr->type != nir_instr_type_intrinsic)
4346 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
4347 if (intr->intrinsic == nir_intrinsic_image_deref_load ||
4348 intr->intrinsic == nir_intrinsic_image_deref_sparse_load ||
4349 intr->intrinsic == nir_intrinsic_image_deref_store ||
4350 intr->intrinsic == nir_intrinsic_image_deref_atomic_add ||
4351 intr->intrinsic == nir_intrinsic_image_deref_atomic_imin ||
4352 intr->intrinsic == nir_intrinsic_image_deref_atomic_umin ||
4353 intr->intrinsic == nir_intrinsic_image_deref_atomic_imax ||
4354 intr->intrinsic == nir_intrinsic_image_deref_atomic_umax ||
4355 intr->intrinsic == nir_intrinsic_image_deref_atomic_and ||
4356 intr->intrinsic == nir_intrinsic_image_deref_atomic_or ||
4357 intr->intrinsic == nir_intrinsic_image_deref_atomic_xor ||
4358 intr->intrinsic == nir_intrinsic_image_deref_atomic_exchange ||
4359 intr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap ||
4360 intr->intrinsic == nir_intrinsic_image_deref_atomic_fadd ||
4361 intr->intrinsic == nir_intrinsic_image_deref_size ||
4362 intr->intrinsic == nir_intrinsic_image_deref_samples ||
4363 intr->intrinsic == nir_intrinsic_image_deref_format ||
4364 intr->intrinsic == nir_intrinsic_image_deref_order) {
4367 nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));
4369 /* Structs have been lowered already, so get_aoa_size is sufficient. */
4370 const unsigned size =
4371 glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : 1;
4372 BITSET_SET_RANGE(shader->info.images_used, var->data.binding,
4373 var->data.binding + (MAX2(size, 1) - 1));
4375 if (intr->intrinsic == nir_intrinsic_is_sparse_texels_resident ||
4376 intr->intrinsic == nir_intrinsic_image_deref_sparse_load)
4377 zs->sinfo.have_sparse = true;
4379 static bool warned = false;
4380 if (!screen->info.have_EXT_shader_atomic_float && !screen->is_cpu && !warned) {
4381 switch (intr->intrinsic) {
4382 case nir_intrinsic_image_deref_atomic_add: {
4383 nir_variable *var = nir_intrinsic_get_var(intr, 0);
4384 if (util_format_is_float(var->data.image.format))
4385 fprintf(stderr, "zink: Vulkan driver missing VK_EXT_shader_atomic_float but attempting to do atomic ops!\n");
4398 is_residency_code(nir_ssa_def *src)
4400 nir_instr *parent = src->parent_instr;
4402 if (parent->type == nir_instr_type_intrinsic) {
4403 ASSERTED nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent);
4404 assert(intr->intrinsic == nir_intrinsic_is_sparse_texels_resident);
4407 if (parent->type == nir_instr_type_tex)
4409 assert(parent->type == nir_instr_type_alu);
4410 nir_alu_instr *alu = nir_instr_as_alu(parent);
4411 parent = alu->src[0].src.ssa->parent_instr;
4416 lower_sparse_instr(nir_builder *b, nir_instr *in, void *data)
4418 if (in->type != nir_instr_type_intrinsic)
4420 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
4421 if (instr->intrinsic == nir_intrinsic_sparse_residency_code_and) {
4422 b->cursor = nir_before_instr(&instr->instr);
4424 if (is_residency_code(instr->src[0].ssa))
4425 src0 = nir_is_sparse_texels_resident(b, 1, instr->src[0].ssa);
4427 src0 = instr->src[0].ssa;
4429 if (is_residency_code(instr->src[1].ssa))
4430 src1 = nir_is_sparse_texels_resident(b, 1, instr->src[1].ssa);
4432 src1 = instr->src[1].ssa;
4433 nir_ssa_def *def = nir_iand(b, src0, src1);
4434 nir_ssa_def_rewrite_uses_after(&instr->dest.ssa, def, in);
4435 nir_instr_remove(in);
4438 if (instr->intrinsic != nir_intrinsic_is_sparse_texels_resident)
4441 /* vulkan vec can only be a vec4, but this is (maybe) vec5,
4442 * so just rewrite as the first component since ntv is going to use a different
4443 * method for storing the residency value anyway
4445 b->cursor = nir_before_instr(&instr->instr);
4446 nir_instr *parent = instr->src[0].ssa->parent_instr;
4447 if (is_residency_code(instr->src[0].ssa)) {
4448 assert(parent->type == nir_instr_type_alu);
4449 nir_alu_instr *alu = nir_instr_as_alu(parent);
4450 nir_ssa_def_rewrite_uses_after(instr->src[0].ssa, nir_channel(b, alu->src[0].src.ssa, 0), parent);
4451 nir_instr_remove(parent);
4454 if (parent->type == nir_instr_type_intrinsic) {
4455 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent);
4456 assert(intr->intrinsic == nir_intrinsic_is_sparse_texels_resident);
4457 src = intr->src[0].ssa;
4459 assert(parent->type == nir_instr_type_alu);
4460 nir_alu_instr *alu = nir_instr_as_alu(parent);
4461 src = alu->src[0].src.ssa;
4463 if (instr->dest.ssa.bit_size != 32) {
4464 if (instr->dest.ssa.bit_size == 1)
4465 src = nir_ieq_imm(b, src, 1);
4467 src = nir_u2uN(b, src, instr->dest.ssa.bit_size);
4469 nir_ssa_def_rewrite_uses(&instr->dest.ssa, src);
4470 nir_instr_remove(in);
4476 lower_sparse(nir_shader *shader)
4478 return nir_shader_instructions_pass(shader, lower_sparse_instr, nir_metadata_dominance, NULL);
4482 match_tex_dests_instr(nir_builder *b, nir_instr *in, void *data)
4484 if (in->type != nir_instr_type_tex)
4486 nir_tex_instr *tex = nir_instr_as_tex(in);
4487 if (tex->op == nir_texop_txs || tex->op == nir_texop_lod)
4489 int handle = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
4490 nir_variable *var = NULL;
4492 var = nir_deref_instr_get_variable(nir_src_as_deref(tex->src[handle].src));
4494 nir_foreach_variable_with_modes(img, b->shader, nir_var_uniform) {
4495 if (glsl_type_is_sampler(glsl_without_array(img->type))) {
4496 unsigned size = glsl_type_is_array(img->type) ? glsl_get_aoa_size(img->type) : 1;
4497 if (tex->texture_index >= img->data.driver_location &&
4498 tex->texture_index < img->data.driver_location + size) {
4505 return !!rewrite_tex_dest(b, tex, var, data);
4509 match_tex_dests(nir_shader *shader, struct zink_shader *zs)
4511 return nir_shader_instructions_pass(shader, match_tex_dests_instr, nir_metadata_dominance, zs);
4515 split_bitfields_instr(nir_builder *b, nir_instr *in, void *data)
4517 if (in->type != nir_instr_type_alu)
4519 nir_alu_instr *alu = nir_instr_as_alu(in);
4521 case nir_op_ubitfield_extract:
4522 case nir_op_ibitfield_extract:
4523 case nir_op_bitfield_insert:
4528 unsigned num_components = nir_dest_num_components(alu->dest.dest);
4529 if (num_components == 1)
4531 b->cursor = nir_before_instr(in);
4532 nir_ssa_def *dests[NIR_MAX_VEC_COMPONENTS];
4533 for (unsigned i = 0; i < num_components; i++) {
4534 if (alu->op == nir_op_bitfield_insert)
4535 dests[i] = nir_bitfield_insert(b,
4536 nir_channel(b, alu->src[0].src.ssa, alu->src[0].swizzle[i]),
4537 nir_channel(b, alu->src[1].src.ssa, alu->src[1].swizzle[i]),
4538 nir_channel(b, alu->src[2].src.ssa, alu->src[2].swizzle[i]),
4539 nir_channel(b, alu->src[3].src.ssa, alu->src[3].swizzle[i]));
4540 else if (alu->op == nir_op_ubitfield_extract)
4541 dests[i] = nir_ubitfield_extract(b,
4542 nir_channel(b, alu->src[0].src.ssa, alu->src[0].swizzle[i]),
4543 nir_channel(b, alu->src[1].src.ssa, alu->src[1].swizzle[i]),
4544 nir_channel(b, alu->src[2].src.ssa, alu->src[2].swizzle[i]));
4546 dests[i] = nir_ibitfield_extract(b,
4547 nir_channel(b, alu->src[0].src.ssa, alu->src[0].swizzle[i]),
4548 nir_channel(b, alu->src[1].src.ssa, alu->src[1].swizzle[i]),
4549 nir_channel(b, alu->src[2].src.ssa, alu->src[2].swizzle[i]));
4551 nir_ssa_def *dest = nir_vec(b, dests, num_components);
4552 nir_ssa_def_rewrite_uses_after(&alu->dest.dest.ssa, dest, in);
4553 nir_instr_remove(in);
4559 split_bitfields(nir_shader *shader)
4561 return nir_shader_instructions_pass(shader, split_bitfields_instr, nir_metadata_dominance, NULL);
4565 rewrite_cl_derefs(nir_shader *nir, nir_variable *var)
4567 nir_foreach_function(function, nir) {
4568 nir_foreach_block(block, function->impl) {
4569 nir_foreach_instr_safe(instr, block) {
4570 if (instr->type != nir_instr_type_deref)
4572 nir_deref_instr *deref = nir_instr_as_deref(instr);
4573 nir_variable *img = nir_deref_instr_get_variable(deref);
4576 if (glsl_type_is_array(var->type)) {
4577 if (deref->deref_type == nir_deref_type_array)
4578 deref->type = glsl_without_array(var->type);
4580 deref->type = var->type;
4582 deref->type = var->type;
4590 type_image(nir_shader *nir, nir_variable *var)
4592 nir_foreach_function(function, nir) {
4593 nir_foreach_block(block, function->impl) {
4594 nir_foreach_instr_safe(instr, block) {
4595 if (instr->type != nir_instr_type_intrinsic)
4597 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
4598 if (intr->intrinsic == nir_intrinsic_image_deref_load ||
4599 intr->intrinsic == nir_intrinsic_image_deref_sparse_load ||
4600 intr->intrinsic == nir_intrinsic_image_deref_store ||
4601 intr->intrinsic == nir_intrinsic_image_deref_atomic_add ||
4602 intr->intrinsic == nir_intrinsic_image_deref_atomic_imin ||
4603 intr->intrinsic == nir_intrinsic_image_deref_atomic_umin ||
4604 intr->intrinsic == nir_intrinsic_image_deref_atomic_imax ||
4605 intr->intrinsic == nir_intrinsic_image_deref_atomic_umax ||
4606 intr->intrinsic == nir_intrinsic_image_deref_atomic_and ||
4607 intr->intrinsic == nir_intrinsic_image_deref_atomic_or ||
4608 intr->intrinsic == nir_intrinsic_image_deref_atomic_xor ||
4609 intr->intrinsic == nir_intrinsic_image_deref_atomic_exchange ||
4610 intr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap ||
4611 intr->intrinsic == nir_intrinsic_image_deref_atomic_fadd ||
4612 intr->intrinsic == nir_intrinsic_image_deref_samples ||
4613 intr->intrinsic == nir_intrinsic_image_deref_format ||
4614 intr->intrinsic == nir_intrinsic_image_deref_order) {
4615 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
4616 nir_variable *img = nir_deref_instr_get_variable(deref);
4619 nir_alu_type alu_type = nir_intrinsic_src_type(intr);
4620 const struct glsl_type *type = glsl_without_array(var->type);
4621 if (glsl_get_sampler_result_type(type) != GLSL_TYPE_VOID) {
4622 assert(glsl_get_sampler_result_type(type) == nir_get_glsl_base_type_for_nir_type(alu_type));
4625 const struct glsl_type *img_type = glsl_image_type(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type), nir_get_glsl_base_type_for_nir_type(alu_type));
4626 if (glsl_type_is_array(var->type))
4627 img_type = glsl_array_type(img_type, glsl_array_size(var->type), glsl_get_explicit_stride(var->type));
4628 var->type = img_type;
4629 rewrite_cl_derefs(nir, var);
4635 nir_foreach_function(function, nir) {
4636 nir_foreach_block(block, function->impl) {
4637 nir_foreach_instr_safe(instr, block) {
4638 if (instr->type != nir_instr_type_intrinsic)
4640 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
4641 if (intr->intrinsic != nir_intrinsic_image_deref_size)
4643 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
4644 nir_variable *img = nir_deref_instr_get_variable(deref);
4647 nir_alu_type alu_type = nir_type_uint32;
4648 const struct glsl_type *type = glsl_without_array(var->type);
4649 if (glsl_get_sampler_result_type(type) != GLSL_TYPE_VOID) {
4652 const struct glsl_type *img_type = glsl_image_type(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type), nir_get_glsl_base_type_for_nir_type(alu_type));
4653 if (glsl_type_is_array(var->type))
4654 img_type = glsl_array_type(img_type, glsl_array_size(var->type), glsl_get_explicit_stride(var->type));
4655 var->type = img_type;
4656 rewrite_cl_derefs(nir, var);
4661 var->data.mode = nir_var_shader_temp;
4664 static nir_variable *
4665 find_sampler_var(nir_shader *nir, unsigned texture_index)
4667 nir_foreach_variable_with_modes(var, nir, nir_var_uniform) {
4668 unsigned size = glsl_type_is_array(var->type) ? glsl_array_size(var->type) : 1;
4669 if ((glsl_type_is_texture(glsl_without_array(var->type)) || glsl_type_is_sampler(glsl_without_array(var->type))) &&
4670 (var->data.binding == texture_index || (var->data.binding < texture_index && var->data.binding + size > texture_index)))
4677 type_sampler_vars(nir_shader *nir, unsigned *sampler_mask)
4679 bool progress = false;
4680 nir_foreach_function(function, nir) {
4681 nir_foreach_block(block, function->impl) {
4682 nir_foreach_instr(instr, block) {
4683 if (instr->type != nir_instr_type_tex)
4685 nir_tex_instr *tex = nir_instr_as_tex(instr);
4689 case nir_texop_query_levels:
4690 case nir_texop_texture_samples:
4691 case nir_texop_samples_identical:
4696 *sampler_mask |= BITFIELD_BIT(tex->sampler_index);
4697 nir_variable *var = find_sampler_var(nir, tex->texture_index);
4699 if (glsl_get_sampler_result_type(glsl_without_array(var->type)) != GLSL_TYPE_VOID)
4701 const struct glsl_type *img_type = glsl_sampler_type(glsl_get_sampler_dim(glsl_without_array(var->type)), tex->is_shadow, tex->is_array, nir_get_glsl_base_type_for_nir_type(tex->dest_type));
4702 unsigned size = glsl_type_is_array(var->type) ? glsl_array_size(var->type) : 1;
4704 img_type = glsl_array_type(img_type, size, 0);
4705 var->type = img_type;
4710 nir_foreach_function(function, nir) {
4711 nir_foreach_block(block, function->impl) {
4712 nir_foreach_instr(instr, block) {
4713 if (instr->type != nir_instr_type_tex)
4715 nir_tex_instr *tex = nir_instr_as_tex(instr);
4719 case nir_texop_query_levels:
4720 case nir_texop_texture_samples:
4721 case nir_texop_samples_identical:
4726 *sampler_mask |= BITFIELD_BIT(tex->sampler_index);
4727 nir_variable *var = find_sampler_var(nir, tex->texture_index);
4729 if (glsl_get_sampler_result_type(glsl_without_array(var->type)) != GLSL_TYPE_VOID)
4731 const struct glsl_type *img_type = glsl_sampler_type(glsl_get_sampler_dim(glsl_without_array(var->type)), tex->is_shadow, tex->is_array, nir_get_glsl_base_type_for_nir_type(tex->dest_type));
4732 unsigned size = glsl_type_is_array(var->type) ? glsl_array_size(var->type) : 1;
4734 img_type = glsl_array_type(img_type, size, 0);
4735 var->type = img_type;
4744 delete_samplers(nir_shader *nir)
4746 bool progress = false;
4747 nir_foreach_variable_with_modes(var, nir, nir_var_uniform) {
4748 if (glsl_type_is_sampler(glsl_without_array(var->type))) {
4749 var->data.mode = nir_var_shader_temp;
4757 type_images(nir_shader *nir, unsigned *sampler_mask)
4759 bool progress = false;
4760 progress |= delete_samplers(nir);
4761 progress |= type_sampler_vars(nir, sampler_mask);
4762 nir_foreach_variable_with_modes(var, nir, nir_var_image) {
4763 type_image(nir, var);
4769 /* attempt to assign io for separate shaders */
4771 fixup_io_locations(nir_shader *nir)
4773 nir_variable_mode mode = nir->info.stage == MESA_SHADER_FRAGMENT ? nir_var_shader_in : nir_var_shader_out;
4774 /* i/o interface blocks are required to be EXACT matches between stages:
4775 * iterate over all locations and set locations incrementally
4778 for (unsigned i = 0; i < VARYING_SLOT_MAX; i++) {
4779 if (nir_slot_is_sysval_output(i, MESA_SHADER_NONE))
4781 nir_variable *var = nir_find_variable_with_location(nir, mode, i);
4783 /* locations used between stages are not required to be contiguous */
4784 if (i >= VARYING_SLOT_VAR0)
4789 /* ensure variable is given enough slots */
4790 if (nir_is_arrayed_io(var, nir->info.stage))
4791 size = glsl_count_vec4_slots(glsl_get_array_element(var->type), false, false);
4793 size = glsl_count_vec4_slots(var->type, false, false);
4794 var->data.driver_location = slot;
4796 /* ensure the consumed slots aren't double iterated */
4803 zink_flat_flags(struct nir_shader *shader)
4805 uint32_t flat_flags = 0, c = 0;
4806 nir_foreach_shader_in_variable(var, shader) {
4807 if (var->data.interpolation == INTERP_MODE_FLAT)
4808 flat_flags |= 1u << (c++);
4814 struct zink_shader *
4815 zink_shader_create(struct zink_screen *screen, struct nir_shader *nir,
4816 const struct pipe_stream_output_info *so_info)
4818 struct zink_shader *ret = rzalloc(NULL, struct zink_shader);
4819 bool have_psiz = false;
4821 ret->has_edgeflags = nir->info.stage == MESA_SHADER_VERTEX &&
4822 nir_find_variable_with_location(nir, nir_var_shader_out, VARYING_SLOT_EDGE);
4824 ret->sinfo.have_vulkan_memory_model = screen->info.have_KHR_vulkan_memory_model;
4825 ret->sinfo.bindless_set_idx = screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS];
4827 util_queue_fence_init(&ret->precompile.fence);
4828 util_dynarray_init(&ret->pipeline_libs, ret);
4829 ret->hash = _mesa_hash_pointer(ret);
4831 ret->programs = _mesa_pointer_set_create(NULL);
4832 simple_mtx_init(&ret->lock, mtx_plain);
4834 nir_variable_mode indirect_derefs_modes = 0;
4835 if (nir->info.stage == MESA_SHADER_TESS_CTRL ||
4836 nir->info.stage == MESA_SHADER_TESS_EVAL)
4837 indirect_derefs_modes |= nir_var_shader_in | nir_var_shader_out;
4839 NIR_PASS_V(nir, nir_lower_indirect_derefs, indirect_derefs_modes,
4842 if (nir->info.stage < MESA_SHADER_COMPUTE)
4843 create_gfx_pushconst(nir);
4845 if (nir->info.stage == MESA_SHADER_TESS_CTRL ||
4846 nir->info.stage == MESA_SHADER_TESS_EVAL)
4847 NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
4849 if (nir->info.stage < MESA_SHADER_FRAGMENT)
4850 have_psiz = check_psiz(nir);
4851 if (nir->info.stage == MESA_SHADER_FRAGMENT)
4852 ret->flat_flags = zink_flat_flags(nir);
4854 if (!gl_shader_stage_is_compute(nir->info.stage) && nir->info.separate_shader)
4855 NIR_PASS_V(nir, fixup_io_locations);
4857 NIR_PASS_V(nir, lower_basevertex);
4858 NIR_PASS_V(nir, nir_lower_regs_to_ssa);
4859 NIR_PASS_V(nir, lower_baseinstance);
4860 NIR_PASS_V(nir, lower_sparse);
4861 NIR_PASS_V(nir, split_bitfields);
4862 NIR_PASS_V(nir, nir_lower_frexp); /* TODO: Use the spirv instructions for this. */
4864 if (screen->info.have_EXT_shader_demote_to_helper_invocation) {
4865 NIR_PASS_V(nir, nir_lower_discard_or_demote,
4866 screen->driconf.glsl_correct_derivatives_after_discard ||
4867 nir->info.use_legacy_math_rules);
4870 if (screen->need_2D_zs)
4871 NIR_PASS_V(nir, lower_1d_shadow, screen);
4874 nir_lower_subgroups_options subgroup_options = {0};
4875 subgroup_options.lower_to_scalar = true;
4876 subgroup_options.subgroup_size = screen->info.props11.subgroupSize;
4877 subgroup_options.ballot_bit_size = 32;
4878 subgroup_options.ballot_components = 4;
4879 subgroup_options.lower_subgroup_masks = true;
4880 if (!(screen->info.subgroup.supportedStages & mesa_to_vk_shader_stage(clamp_stage(&nir->info)))) {
4881 subgroup_options.subgroup_size = 1;
4882 subgroup_options.lower_vote_trivial = true;
4884 NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_options);
4887 if (so_info && so_info->num_outputs)
4888 NIR_PASS_V(nir, split_blocks);
4890 optimize_nir(nir, NULL);
4891 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
4892 NIR_PASS_V(nir, nir_lower_discard_if, (nir_lower_discard_if_to_cf |
4893 nir_lower_demote_if_to_cf |
4894 nir_lower_terminate_if_to_cf));
4895 NIR_PASS_V(nir, nir_lower_fragcolor,
4896 nir->info.fs.color_is_dual_source ? 1 : 8);
4897 NIR_PASS_V(nir, lower_64bit_vertex_attribs);
4898 bool needs_size = analyze_io(ret, nir);
4899 NIR_PASS_V(nir, unbreak_bos, ret, needs_size);
4900 /* run in compile if there could be inlined uniforms */
4901 if (!screen->driconf.inline_uniforms && !nir->info.num_inlinable_uniforms) {
4902 NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_global | nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared);
4903 NIR_PASS_V(nir, rewrite_bo_access, screen);
4904 NIR_PASS_V(nir, remove_bo_access, ret);
4907 if (zink_debug & ZINK_DEBUG_NIR) {
4908 fprintf(stderr, "NIR shader:\n---8<---\n");
4909 nir_print_shader(nir, stderr);
4910 fprintf(stderr, "---8<---\n");
4913 struct zink_bindless_info bindless = {0};
4914 bindless.bindless_set = screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS];
4915 bool has_bindless_io = false;
4916 nir_foreach_variable_with_modes(var, nir, nir_var_shader_in | nir_var_shader_out) {
4917 var->data.is_xfb = false;
4918 if (glsl_type_is_image(var->type) || glsl_type_is_sampler(var->type)) {
4919 has_bindless_io = true;
4923 if (has_bindless_io)
4924 NIR_PASS_V(nir, lower_bindless_io);
4926 optimize_nir(nir, NULL);
4929 scan_nir(screen, nir, ret);
4930 unsigned sampler_mask = 0;
4931 if (nir->info.stage == MESA_SHADER_KERNEL) {
4932 NIR_PASS_V(nir, type_images, &sampler_mask);
4933 enum zink_descriptor_type ztype = ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW;
4934 VkDescriptorType vktype = VK_DESCRIPTOR_TYPE_SAMPLER;
4935 u_foreach_bit(s, sampler_mask) {
4936 ret->bindings[ztype][ret->num_bindings[ztype]].index = s;
4937 ret->bindings[ztype][ret->num_bindings[ztype]].binding = zink_binding(MESA_SHADER_KERNEL, vktype, s, screen->compact_descriptors);
4938 ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype;
4939 ret->bindings[ztype][ret->num_bindings[ztype]].size = 1;
4940 ret->num_bindings[ztype]++;
4942 ret->sinfo.sampler_mask = sampler_mask;
4945 unsigned ubo_binding_mask = 0;
4946 unsigned ssbo_binding_mask = 0;
4947 foreach_list_typed_reverse_safe(nir_variable, var, node, &nir->variables) {
4948 if (_nir_shader_variable_has_mode(var, nir_var_uniform |
4951 nir_var_mem_ssbo)) {
4952 enum zink_descriptor_type ztype;
4953 const struct glsl_type *type = glsl_without_array(var->type);
4954 if (var->data.mode == nir_var_mem_ubo) {
4955 ztype = ZINK_DESCRIPTOR_TYPE_UBO;
4956 /* buffer 0 is a push descriptor */
4957 var->data.descriptor_set = !!var->data.driver_location;
4958 var->data.binding = !var->data.driver_location ? clamp_stage(&nir->info) :
4959 zink_binding(nir->info.stage,
4960 VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
4961 var->data.driver_location,
4962 screen->compact_descriptors);
4963 assert(var->data.driver_location || var->data.binding < 10);
4964 VkDescriptorType vktype = !var->data.driver_location ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC : VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
4965 int binding = var->data.binding;
4967 if (!var->data.driver_location) {
4968 ret->has_uniforms = true;
4969 } else if (!(ubo_binding_mask & BITFIELD_BIT(binding))) {
4970 ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location;
4971 ret->bindings[ztype][ret->num_bindings[ztype]].binding = binding;
4972 ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype;
4973 ret->bindings[ztype][ret->num_bindings[ztype]].size = glsl_get_length(var->type);
4974 assert(ret->bindings[ztype][ret->num_bindings[ztype]].size);
4975 ret->num_bindings[ztype]++;
4976 ubo_binding_mask |= BITFIELD_BIT(binding);
4978 } else if (var->data.mode == nir_var_mem_ssbo) {
4979 ztype = ZINK_DESCRIPTOR_TYPE_SSBO;
4980 var->data.descriptor_set = screen->desc_set_id[ztype];
4981 var->data.binding = zink_binding(nir->info.stage,
4982 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
4983 var->data.driver_location,
4984 screen->compact_descriptors);
4985 if (!(ssbo_binding_mask & BITFIELD_BIT(var->data.binding))) {
4986 ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location;
4987 ret->bindings[ztype][ret->num_bindings[ztype]].binding = var->data.binding;
4988 ret->bindings[ztype][ret->num_bindings[ztype]].type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
4989 ret->bindings[ztype][ret->num_bindings[ztype]].size = glsl_get_length(var->type);
4990 assert(ret->bindings[ztype][ret->num_bindings[ztype]].size);
4991 ret->num_bindings[ztype]++;
4992 ssbo_binding_mask |= BITFIELD_BIT(var->data.binding);
4995 assert(var->data.mode == nir_var_uniform ||
4996 var->data.mode == nir_var_image);
4997 if (var->data.bindless) {
4998 ret->bindless = true;
4999 handle_bindless_var(nir, var, type, &bindless);
5000 } else if (glsl_type_is_sampler(type) || glsl_type_is_image(type)) {
5001 VkDescriptorType vktype = glsl_type_is_image(type) ? zink_image_type(type) : zink_sampler_type(type);
5002 if (nir->info.stage == MESA_SHADER_KERNEL && vktype == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
5003 vktype = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
5004 ztype = zink_desc_type_from_vktype(vktype);
5005 if (vktype == VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER)
5006 ret->num_texel_buffers++;
5007 var->data.driver_location = var->data.binding;
5008 var->data.descriptor_set = screen->desc_set_id[ztype];
5009 var->data.binding = zink_binding(nir->info.stage, vktype, var->data.driver_location, screen->compact_descriptors);
5010 ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location;
5011 ret->bindings[ztype][ret->num_bindings[ztype]].binding = var->data.binding;
5012 ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype;
5013 if (glsl_type_is_array(var->type))
5014 ret->bindings[ztype][ret->num_bindings[ztype]].size = glsl_get_aoa_size(var->type);
5016 ret->bindings[ztype][ret->num_bindings[ztype]].size = 1;
5017 ret->num_bindings[ztype]++;
5018 } else if (var->data.mode == nir_var_uniform) {
5019 /* this is a dead uniform */
5021 exec_node_remove(&var->node);
5026 bool bindless_lowered = false;
5027 NIR_PASS(bindless_lowered, nir, lower_bindless, &bindless);
5028 ret->bindless |= bindless_lowered;
5030 if (!screen->info.feats.features.shaderInt64 || !screen->info.feats.features.shaderFloat64)
5031 NIR_PASS_V(nir, lower_64bit_vars, screen->info.feats.features.shaderInt64);
5032 if (nir->info.stage != MESA_SHADER_KERNEL)
5033 NIR_PASS_V(nir, match_tex_dests, ret);
5035 if (!nir->info.internal)
5036 nir_foreach_shader_out_variable(var, nir)
5037 var->data.explicit_xfb_buffer = 0;
5038 if (so_info && so_info->num_outputs)
5039 update_so_info(ret, nir, so_info, nir->info.outputs_written, have_psiz);
5040 else if (have_psiz) {
5041 bool have_fake_psiz = false;
5042 nir_variable *psiz = NULL;
5043 nir_foreach_shader_out_variable(var, nir) {
5044 if (var->data.location == VARYING_SLOT_PSIZ) {
5045 if (!var->data.explicit_location)
5046 have_fake_psiz = true;
5051 if (have_fake_psiz && psiz) {
5052 psiz->data.mode = nir_var_shader_temp;
5053 nir_fixup_deref_modes(nir);
5054 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
5057 zink_shader_serialize_blob(nir, &ret->blob);
5058 memcpy(&ret->info, &nir->info, sizeof(nir->info));
5060 ret->can_inline = true;
5066 zink_shader_finalize(struct pipe_screen *pscreen, void *nirptr)
5068 struct zink_screen *screen = zink_screen(pscreen);
5069 nir_shader *nir = nirptr;
5071 nir_lower_tex_options tex_opts = {
5072 .lower_invalid_implicit_lod = true,
5075 Sampled Image must be an object whose type is OpTypeSampledImage.
5076 The Dim operand of the underlying OpTypeImage must be 1D, 2D, 3D,
5077 or Rect, and the Arrayed and MS operands must be 0.
5078 - SPIRV, OpImageSampleProj* opcodes
5080 tex_opts.lower_txp = BITFIELD_BIT(GLSL_SAMPLER_DIM_CUBE) |
5081 BITFIELD_BIT(GLSL_SAMPLER_DIM_MS);
5082 tex_opts.lower_txp_array = true;
5083 if (!screen->info.feats.features.shaderImageGatherExtended)
5084 tex_opts.lower_tg4_offsets = true;
5085 NIR_PASS_V(nir, nir_lower_tex, &tex_opts);
5086 optimize_nir(nir, NULL);
5087 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
5088 if (screen->driconf.inline_uniforms)
5089 nir_find_inlinable_uniforms(nir);
5095 zink_shader_free(struct zink_screen *screen, struct zink_shader *shader)
5097 _mesa_set_destroy(shader->programs, NULL);
5098 util_queue_fence_wait(&shader->precompile.fence);
5099 util_queue_fence_destroy(&shader->precompile.fence);
5100 zink_descriptor_shader_deinit(screen, shader);
5101 if (screen->info.have_EXT_shader_object) {
5102 VKSCR(DestroyShaderEXT)(screen->dev, shader->precompile.obj.obj, NULL);
5104 if (shader->precompile.obj.mod)
5105 VKSCR(DestroyShaderModule)(screen->dev, shader->precompile.obj.mod, NULL);
5106 if (shader->precompile.gpl)
5107 VKSCR(DestroyPipeline)(screen->dev, shader->precompile.gpl, NULL);
5109 blob_finish(&shader->blob);
5110 ralloc_free(shader->spirv);
5111 free(shader->precompile.bindings);
5112 ralloc_free(shader);
5116 zink_gfx_shader_free(struct zink_screen *screen, struct zink_shader *shader)
5118 assert(shader->info.stage != MESA_SHADER_COMPUTE);
5119 util_queue_fence_wait(&shader->precompile.fence);
5120 set_foreach(shader->programs, entry) {
5121 struct zink_gfx_program *prog = (void*)entry->key;
5122 gl_shader_stage stage = shader->info.stage;
5123 assert(stage < ZINK_GFX_SHADER_COUNT);
5124 unsigned stages_present = prog->stages_present;
5125 if (prog->shaders[MESA_SHADER_TESS_CTRL] &&
5126 prog->shaders[MESA_SHADER_TESS_CTRL]->non_fs.is_generated)
5127 stages_present &= ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL);
5128 unsigned idx = zink_program_cache_stages(stages_present);
5129 if (!prog->base.removed && prog->stages_present == prog->stages_remaining &&
5130 (stage == MESA_SHADER_FRAGMENT || !shader->non_fs.is_generated)) {
5131 struct hash_table *ht = &prog->ctx->program_cache[idx];
5132 simple_mtx_lock(&prog->ctx->program_lock[idx]);
5133 struct hash_entry *he = _mesa_hash_table_search(ht, prog->shaders);
5134 assert(he && he->data == prog);
5135 _mesa_hash_table_remove(ht, he);
5136 prog->base.removed = true;
5137 simple_mtx_unlock(&prog->ctx->program_lock[idx]);
5138 util_queue_fence_wait(&prog->base.cache_fence);
5140 for (unsigned r = 0; r < ARRAY_SIZE(prog->pipelines); r++) {
5141 for (int i = 0; i < ARRAY_SIZE(prog->pipelines[0]); ++i) {
5142 hash_table_foreach(&prog->pipelines[r][i], entry) {
5143 struct zink_gfx_pipeline_cache_entry *pc_entry = entry->data;
5145 util_queue_fence_wait(&pc_entry->fence);
5151 while (util_dynarray_contains(&shader->pipeline_libs, struct zink_gfx_lib_cache*)) {
5152 struct zink_gfx_lib_cache *libs = util_dynarray_pop(&shader->pipeline_libs, struct zink_gfx_lib_cache*);
5153 //this condition is equivalent to verifying that, for each bit stages_present_i in stages_present,
5154 //stages_present_i implies libs->stages_present_i
5155 if ((stages_present & ~(libs->stages_present & stages_present)) != 0)
5157 if (!libs->removed) {
5158 libs->removed = true;
5159 simple_mtx_lock(&screen->pipeline_libs_lock[idx]);
5160 _mesa_set_remove_key(&screen->pipeline_libs[idx], libs);
5161 simple_mtx_unlock(&screen->pipeline_libs_lock[idx]);
5163 zink_gfx_lib_cache_unref(screen, libs);
5165 if (stage == MESA_SHADER_FRAGMENT || !shader->non_fs.is_generated) {
5166 prog->shaders[stage] = NULL;
5167 prog->stages_remaining &= ~BITFIELD_BIT(stage);
5169 /* only remove generated tcs during parent tes destruction */
5170 if (stage == MESA_SHADER_TESS_EVAL && shader->non_fs.generated_tcs)
5171 prog->shaders[MESA_SHADER_TESS_CTRL] = NULL;
5172 if (stage != MESA_SHADER_FRAGMENT &&
5173 prog->shaders[MESA_SHADER_GEOMETRY] &&
5174 prog->shaders[MESA_SHADER_GEOMETRY]->non_fs.parent ==
5176 prog->shaders[MESA_SHADER_GEOMETRY] = NULL;
5178 zink_gfx_program_reference(screen, &prog, NULL);
5180 if (shader->info.stage == MESA_SHADER_TESS_EVAL &&
5181 shader->non_fs.generated_tcs) {
5182 /* automatically destroy generated tcs shaders when tes is destroyed */
5183 zink_gfx_shader_free(screen, shader->non_fs.generated_tcs);
5184 shader->non_fs.generated_tcs = NULL;
5186 for (unsigned int i = 0; i < ARRAY_SIZE(shader->non_fs.generated_gs); i++) {
5187 for (int j = 0; j < ARRAY_SIZE(shader->non_fs.generated_gs[0]); j++) {
5188 if (shader->info.stage != MESA_SHADER_FRAGMENT &&
5189 shader->non_fs.generated_gs[i][j]) {
5190 /* automatically destroy generated gs shaders when owner is destroyed */
5191 zink_gfx_shader_free(screen, shader->non_fs.generated_gs[i][j]);
5192 shader->non_fs.generated_gs[i][j] = NULL;
5196 zink_shader_free(screen, shader);
5200 struct zink_shader_object
5201 zink_shader_tcs_compile(struct zink_screen *screen, struct zink_shader *zs, unsigned patch_vertices)
5203 assert(zs->info.stage == MESA_SHADER_TESS_CTRL);
5204 /* shortcut all the nir passes since we just have to change this one word */
5205 zs->spirv->words[zs->spirv->tcs_vertices_out_word] = patch_vertices;
5206 return zink_shader_spirv_compile(screen, zs, NULL, false);
5209 /* creating a passthrough tcs shader that's roughly:
5212 #extension GL_ARB_tessellation_shader : require
5214 in vec4 some_var[gl_MaxPatchVertices];
5215 out vec4 some_var_out;
5217 layout(push_constant) uniform tcsPushConstants {
5218 layout(offset = 0) float TessLevelInner[2];
5219 layout(offset = 8) float TessLevelOuter[4];
5220 } u_tcsPushConstants;
5221 layout(vertices = $vertices_per_patch) out;
5224 gl_TessLevelInner = u_tcsPushConstants.TessLevelInner;
5225 gl_TessLevelOuter = u_tcsPushConstants.TessLevelOuter;
5226 some_var_out = some_var[gl_InvocationID];
5230 struct zink_shader *
5231 zink_shader_tcs_create(struct zink_screen *screen, nir_shader *tes, unsigned vertices_per_patch, nir_shader **nir_ret)
5233 struct zink_shader *ret = rzalloc(NULL, struct zink_shader);
5234 util_queue_fence_init(&ret->precompile.fence);
5235 ret->hash = _mesa_hash_pointer(ret);
5236 ret->programs = _mesa_pointer_set_create(NULL);
5237 simple_mtx_init(&ret->lock, mtx_plain);
5239 nir_shader *nir = nir_shader_create(NULL, MESA_SHADER_TESS_CTRL, &screen->nir_options, NULL);
5240 nir_function *fn = nir_function_create(nir, "main");
5241 fn->is_entrypoint = true;
5242 nir_function_impl *impl = nir_function_impl_create(fn);
5245 nir_builder_init(&b, impl);
5246 b.cursor = nir_before_block(nir_start_block(impl));
5248 nir_ssa_def *invocation_id = nir_load_invocation_id(&b);
5250 nir_foreach_shader_in_variable(var, tes) {
5251 if (var->data.location == VARYING_SLOT_TESS_LEVEL_INNER || var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER)
5253 const struct glsl_type *in_type = var->type;
5254 const struct glsl_type *out_type = var->type;
5256 snprintf(buf, sizeof(buf), "%s_out", var->name);
5257 if (!nir_is_arrayed_io(var, MESA_SHADER_TESS_EVAL)) {
5258 const struct glsl_type *type = var->type;
5259 in_type = glsl_array_type(type, 32 /* MAX_PATCH_VERTICES */, 0);
5260 out_type = glsl_array_type(type, vertices_per_patch, 0);
5263 nir_variable *in = nir_variable_create(nir, nir_var_shader_in, in_type, var->name);
5264 nir_variable *out = nir_variable_create(nir, nir_var_shader_out, out_type, buf);
5265 out->data.location = in->data.location = var->data.location;
5266 out->data.location_frac = in->data.location_frac = var->data.location_frac;
5268 /* gl_in[] receives values from equivalent built-in output
5269 variables written by the vertex shader (section 2.14.7). Each array
5270 element of gl_in[] is a structure holding values for a specific vertex of
5271 the input patch. The length of gl_in[] is equal to the
5272 implementation-dependent maximum patch size (gl_MaxPatchVertices).
5273 - ARB_tessellation_shader
5275 /* we need to load the invocation-specific value of the vertex output and then store it to the per-patch output */
5276 nir_deref_instr *in_value = nir_build_deref_array(&b, nir_build_deref_var(&b, in), invocation_id);
5277 nir_deref_instr *out_value = nir_build_deref_array(&b, nir_build_deref_var(&b, out), invocation_id);
5278 copy_vars(&b, out_value, in_value);
5280 nir_variable *gl_TessLevelInner = nir_variable_create(nir, nir_var_shader_out, glsl_array_type(glsl_float_type(), 2, 0), "gl_TessLevelInner");
5281 gl_TessLevelInner->data.location = VARYING_SLOT_TESS_LEVEL_INNER;
5282 gl_TessLevelInner->data.patch = 1;
5283 nir_variable *gl_TessLevelOuter = nir_variable_create(nir, nir_var_shader_out, glsl_array_type(glsl_float_type(), 4, 0), "gl_TessLevelOuter");
5284 gl_TessLevelOuter->data.location = VARYING_SLOT_TESS_LEVEL_OUTER;
5285 gl_TessLevelOuter->data.patch = 1;
5287 create_gfx_pushconst(nir);
5289 nir_ssa_def *load_inner = nir_load_push_constant(&b, 2, 32,
5290 nir_imm_int(&b, ZINK_GFX_PUSHCONST_DEFAULT_INNER_LEVEL),
5291 .base = 1, .range = 8);
5292 nir_ssa_def *load_outer = nir_load_push_constant(&b, 4, 32,
5293 nir_imm_int(&b, ZINK_GFX_PUSHCONST_DEFAULT_OUTER_LEVEL),
5294 .base = 2, .range = 16);
5296 for (unsigned i = 0; i < 2; i++) {
5297 nir_deref_instr *store_idx = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gl_TessLevelInner), i);
5298 nir_store_deref(&b, store_idx, nir_channel(&b, load_inner, i), 0xff);
5300 for (unsigned i = 0; i < 4; i++) {
5301 nir_deref_instr *store_idx = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gl_TessLevelOuter), i);
5302 nir_store_deref(&b, store_idx, nir_channel(&b, load_outer, i), 0xff);
5305 nir->info.tess.tcs_vertices_out = vertices_per_patch;
5306 nir_validate_shader(nir, "created");
5308 NIR_PASS_V(nir, nir_lower_regs_to_ssa);
5309 optimize_nir(nir, NULL);
5310 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
5311 NIR_PASS_V(nir, nir_convert_from_ssa, true);
5314 zink_shader_serialize_blob(nir, &ret->blob);
5315 memcpy(&ret->info, &nir->info, sizeof(nir->info));
5316 ret->non_fs.is_generated = true;
5321 zink_shader_has_cubes(nir_shader *nir)
5323 nir_foreach_variable_with_modes(var, nir, nir_var_uniform) {
5324 const struct glsl_type *type = glsl_without_array(var->type);
5325 if (glsl_type_is_sampler(type) && glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE)
5332 zink_shader_blob_deserialize(struct zink_screen *screen, struct blob *blob)
5334 struct blob_reader blob_reader;
5335 blob_reader_init(&blob_reader, blob->data, blob->size);
5336 return nir_deserialize(NULL, &screen->nir_options, &blob_reader);
5340 zink_shader_deserialize(struct zink_screen *screen, struct zink_shader *zs)
5342 return zink_shader_blob_deserialize(screen, &zs->blob);
5346 zink_shader_serialize_blob(nir_shader *nir, struct blob *blob)
5350 bool strip = !(zink_debug & (ZINK_DEBUG_NIR | ZINK_DEBUG_SPIRV | ZINK_DEBUG_TGSI));
5354 nir_serialize(blob, nir, strip);
5358 zink_print_shader(struct zink_screen *screen, struct zink_shader *zs, FILE *fp)
5360 nir_shader *nir = zink_shader_deserialize(screen, zs);
5361 nir_print_shader(nir, fp);