2 * Copyright 2018 Collabora Ltd.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
24 #include "nir_opcodes.h"
25 #include "zink_context.h"
26 #include "zink_compiler.h"
27 #include "zink_descriptors.h"
28 #include "zink_program.h"
29 #include "zink_screen.h"
30 #include "nir_to_spirv/nir_to_spirv.h"
32 #include "pipe/p_state.h"
35 #include "nir_xfb_info.h"
36 #include "nir/nir_draw_helpers.h"
37 #include "compiler/nir/nir_builder.h"
38 #include "compiler/nir/nir_serialize.h"
39 #include "compiler/nir/nir_builtin_builder.h"
41 #include "nir/tgsi_to_nir.h"
42 #include "tgsi/tgsi_dump.h"
44 #include "util/u_memory.h"
46 #include "compiler/spirv/nir_spirv.h"
47 #include "vulkan/util/vk_util.h"
50 zink_lower_cubemap_to_array(nir_shader *s, uint32_t nonseamless_cube_mask);
54 copy_vars(nir_builder *b, nir_deref_instr *dst, nir_deref_instr *src)
56 assert(glsl_get_bare_type(dst->type) == glsl_get_bare_type(src->type));
57 if (glsl_type_is_struct_or_ifc(dst->type)) {
58 for (unsigned i = 0; i < glsl_get_length(dst->type); ++i) {
59 copy_vars(b, nir_build_deref_struct(b, dst, i), nir_build_deref_struct(b, src, i));
61 } else if (glsl_type_is_array_or_matrix(dst->type)) {
62 unsigned count = glsl_type_is_array(dst->type) ? glsl_array_size(dst->type) : glsl_get_matrix_columns(dst->type);
63 for (unsigned i = 0; i < count; i++) {
64 copy_vars(b, nir_build_deref_array_imm(b, dst, i), nir_build_deref_array_imm(b, src, i));
67 nir_ssa_def *load = nir_load_deref(b, src);
68 nir_store_deref(b, dst, load, BITFIELD_MASK(load->num_components));
72 #define SIZEOF_FIELD(type, field) sizeof(((type *)0)->field)
75 create_gfx_pushconst(nir_shader *nir)
77 #define PUSHCONST_MEMBER(member_idx, field) \
78 fields[member_idx].type = \
79 glsl_array_type(glsl_uint_type(), SIZEOF_FIELD(struct zink_gfx_push_constant, field) / sizeof(uint32_t), 0); \
80 fields[member_idx].name = ralloc_asprintf(nir, #field); \
81 fields[member_idx].offset = offsetof(struct zink_gfx_push_constant, field);
83 nir_variable *pushconst;
84 /* create compatible layout for the ntv push constant loader */
85 struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, ZINK_GFX_PUSHCONST_MAX);
86 PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_DRAW_MODE_IS_INDEXED, draw_mode_is_indexed);
87 PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_DRAW_ID, draw_id);
88 PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_FRAMEBUFFER_IS_LAYERED, framebuffer_is_layered);
89 PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_DEFAULT_INNER_LEVEL, default_inner_level);
90 PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_DEFAULT_OUTER_LEVEL, default_outer_level);
91 PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_LINE_STIPPLE_PATTERN, line_stipple_pattern);
92 PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_VIEWPORT_SCALE, viewport_scale);
93 PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_LINE_WIDTH, line_width);
95 pushconst = nir_variable_create(nir, nir_var_mem_push_const,
96 glsl_struct_type(fields, ZINK_GFX_PUSHCONST_MAX, "struct", false),
98 pushconst->data.location = INT_MAX; //doesn't really matter
100 #undef PUSHCONST_MEMBER
104 lower_64bit_vertex_attribs_instr(nir_builder *b, nir_instr *instr, void *data)
106 if (instr->type != nir_instr_type_intrinsic)
108 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
109 if (intr->intrinsic != nir_intrinsic_load_deref)
111 nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(intr->src[0].ssa->parent_instr));
112 if (var->data.mode != nir_var_shader_in)
114 if (!glsl_type_is_64bit(var->type) || !glsl_type_is_vector(var->type) || glsl_get_vector_elements(var->type) < 3)
117 /* create second variable for the split */
118 nir_variable *var2 = nir_variable_clone(var, b->shader);
119 /* split new variable into second slot */
120 var2->data.driver_location++;
121 nir_shader_add_variable(b->shader, var2);
123 unsigned total_num_components = glsl_get_vector_elements(var->type);
124 /* new variable is the second half of the dvec */
125 var2->type = glsl_vector_type(glsl_get_base_type(var->type), glsl_get_vector_elements(var->type) - 2);
126 /* clamp original variable to a dvec2 */
127 var->type = glsl_vector_type(glsl_get_base_type(var->type), 2);
129 b->cursor = nir_after_instr(instr);
131 /* this is the first load instruction for the first half of the dvec3/4 components */
132 nir_ssa_def *load = nir_load_var(b, var);
133 /* this is the second load instruction for the second half of the dvec3/4 components */
134 nir_ssa_def *load2 = nir_load_var(b, var2);
137 /* create a new dvec3/4 comprised of all the loaded components from both variables */
138 def[0] = nir_vector_extract(b, load, nir_imm_int(b, 0));
139 def[1] = nir_vector_extract(b, load, nir_imm_int(b, 1));
140 def[2] = nir_vector_extract(b, load2, nir_imm_int(b, 0));
141 if (total_num_components == 4)
142 def[3] = nir_vector_extract(b, load2, nir_imm_int(b, 1));
143 nir_ssa_def *new_vec = nir_vec(b, def, total_num_components);
144 /* use the assembled dvec3/4 for all other uses of the load */
145 nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, new_vec,
146 new_vec->parent_instr);
148 /* remove the original instr and its deref chain */
149 nir_instr *parent = intr->src[0].ssa->parent_instr;
150 nir_instr_remove(instr);
151 nir_deref_instr_remove_if_unused(nir_instr_as_deref(parent));
156 /* mesa/gallium always provides UINT versions of 64bit formats:
157 * - rewrite loads as 32bit vec loads
158 * - cast back to 64bit
161 lower_64bit_uint_attribs_instr(nir_builder *b, nir_instr *instr, void *data)
163 if (instr->type != nir_instr_type_intrinsic)
165 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
166 if (intr->intrinsic != nir_intrinsic_load_deref)
168 nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(intr->src[0].ssa->parent_instr));
169 if (var->data.mode != nir_var_shader_in)
171 if (glsl_get_bit_size(var->type) != 64 || glsl_get_base_type(var->type) >= GLSL_TYPE_SAMPLER)
174 unsigned num_components = glsl_get_vector_elements(var->type);
175 enum glsl_base_type base_type;
176 switch (glsl_get_base_type(var->type)) {
177 case GLSL_TYPE_UINT64:
178 base_type = GLSL_TYPE_UINT;
180 case GLSL_TYPE_INT64:
181 base_type = GLSL_TYPE_INT;
183 case GLSL_TYPE_DOUBLE:
184 base_type = GLSL_TYPE_FLOAT;
187 unreachable("unknown 64-bit vertex attribute format!");
189 var->type = glsl_vector_type(base_type, num_components * 2);
191 b->cursor = nir_after_instr(instr);
193 nir_ssa_def *load = nir_load_var(b, var);
194 nir_ssa_def *casted[2];
195 for (unsigned i = 0; i < num_components; i++)
196 casted[i] = nir_pack_64_2x32(b, nir_channels(b, load, BITFIELD_RANGE(i * 2, 2)));
197 nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_vec(b, casted, num_components));
199 /* remove the original instr and its deref chain */
200 nir_instr *parent = intr->src[0].ssa->parent_instr;
201 nir_instr_remove(instr);
202 nir_deref_instr_remove_if_unused(nir_instr_as_deref(parent));
207 /* "64-bit three- and four-component vectors consume two consecutive locations."
208 * - 14.1.4. Location Assignment
210 * this pass splits dvec3 and dvec4 vertex inputs into a dvec2 and a double/dvec2 which
211 * are assigned to consecutive locations, loaded separately, and then assembled back into a
212 * composite value that's used in place of the original loaded ssa src
215 lower_64bit_vertex_attribs(nir_shader *shader)
217 if (shader->info.stage != MESA_SHADER_VERTEX)
220 bool progress = nir_shader_instructions_pass(shader, lower_64bit_vertex_attribs_instr, nir_metadata_dominance, NULL);
221 progress |= nir_shader_instructions_pass(shader, lower_64bit_uint_attribs_instr, nir_metadata_dominance, NULL);
226 lower_basevertex_instr(nir_builder *b, nir_instr *in, void *data)
228 if (in->type != nir_instr_type_intrinsic)
230 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
231 if (instr->intrinsic != nir_intrinsic_load_base_vertex)
234 b->cursor = nir_after_instr(&instr->instr);
235 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant);
236 load->src[0] = nir_src_for_ssa(nir_imm_int(b, ZINK_GFX_PUSHCONST_DRAW_MODE_IS_INDEXED));
237 nir_intrinsic_set_range(load, 4);
238 load->num_components = 1;
239 nir_ssa_dest_init(&load->instr, &load->dest, 1, 32);
240 nir_builder_instr_insert(b, &load->instr);
242 nir_ssa_def *composite = nir_build_alu(b, nir_op_bcsel,
243 nir_build_alu(b, nir_op_ieq, &load->dest.ssa, nir_imm_int(b, 1), NULL, NULL),
248 nir_ssa_def_rewrite_uses_after(&instr->dest.ssa, composite,
249 composite->parent_instr);
254 lower_basevertex(nir_shader *shader)
256 if (shader->info.stage != MESA_SHADER_VERTEX)
259 if (!BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX))
262 return nir_shader_instructions_pass(shader, lower_basevertex_instr, nir_metadata_dominance, NULL);
267 lower_drawid_instr(nir_builder *b, nir_instr *in, void *data)
269 if (in->type != nir_instr_type_intrinsic)
271 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
272 if (instr->intrinsic != nir_intrinsic_load_draw_id)
275 b->cursor = nir_before_instr(&instr->instr);
276 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant);
277 load->src[0] = nir_src_for_ssa(nir_imm_int(b, ZINK_GFX_PUSHCONST_DRAW_ID));
278 nir_intrinsic_set_range(load, 4);
279 load->num_components = 1;
280 nir_ssa_dest_init(&load->instr, &load->dest, 1, 32);
281 nir_builder_instr_insert(b, &load->instr);
283 nir_ssa_def_rewrite_uses(&instr->dest.ssa, &load->dest.ssa);
289 lower_drawid(nir_shader *shader)
291 if (shader->info.stage != MESA_SHADER_VERTEX)
294 if (!BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_DRAW_ID))
297 return nir_shader_instructions_pass(shader, lower_drawid_instr, nir_metadata_dominance, NULL);
300 struct lower_gl_point_state {
301 nir_variable *gl_pos_out;
302 nir_variable *gl_point_size;
306 lower_gl_point_gs_instr(nir_builder *b, nir_instr *instr, void *data)
308 struct lower_gl_point_state *state = data;
309 nir_ssa_def *vp_scale, *pos;
311 if (instr->type != nir_instr_type_intrinsic)
314 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
315 if (intrin->intrinsic != nir_intrinsic_emit_vertex_with_counter &&
316 intrin->intrinsic != nir_intrinsic_emit_vertex)
319 if (nir_intrinsic_stream_id(intrin) != 0)
322 if (intrin->intrinsic == nir_intrinsic_end_primitive_with_counter ||
323 intrin->intrinsic == nir_intrinsic_end_primitive) {
324 nir_instr_remove(&intrin->instr);
328 b->cursor = nir_before_instr(instr);
330 // viewport-map endpoints
331 nir_ssa_def *vp_const_pos = nir_imm_int(b, ZINK_GFX_PUSHCONST_VIEWPORT_SCALE);
332 vp_scale = nir_load_push_constant(b, 2, 32, vp_const_pos, .base = 1, .range = 2);
334 // Load point info values
335 nir_ssa_def *point_size = nir_load_var(b, state->gl_point_size);
336 nir_ssa_def *point_pos = nir_load_var(b, state->gl_pos_out);
338 // w_delta = gl_point_size / width_viewport_size_scale * gl_Position.w
339 nir_ssa_def *w_delta = nir_fdiv(b, point_size, nir_channel(b, vp_scale, 0));
340 w_delta = nir_fmul(b, w_delta, nir_channel(b, point_pos, 3));
341 // halt_w_delta = w_delta / 2
342 nir_ssa_def *half_w_delta = nir_fmul_imm(b, w_delta, 0.5);
344 // h_delta = gl_point_size / height_viewport_size_scale * gl_Position.w
345 nir_ssa_def *h_delta = nir_fdiv(b, point_size, nir_channel(b, vp_scale, 1));
346 h_delta = nir_fmul(b, h_delta, nir_channel(b, point_pos, 3));
347 // halt_h_delta = h_delta / 2
348 nir_ssa_def *half_h_delta = nir_fmul_imm(b, h_delta, 0.5);
350 nir_ssa_def *point_dir[4][2] = {
351 { nir_imm_float(b, -1), nir_imm_float(b, -1) },
352 { nir_imm_float(b, -1), nir_imm_float(b, 1) },
353 { nir_imm_float(b, 1), nir_imm_float(b, -1) },
354 { nir_imm_float(b, 1), nir_imm_float(b, 1) }
357 nir_ssa_def *point_pos_x = nir_channel(b, point_pos, 0);
358 nir_ssa_def *point_pos_y = nir_channel(b, point_pos, 1);
360 for (size_t i = 0; i < 4; i++) {
362 nir_ffma(b, half_w_delta, point_dir[i][0], point_pos_x),
363 nir_ffma(b, half_h_delta, point_dir[i][1], point_pos_y),
364 nir_channel(b, point_pos, 2),
365 nir_channel(b, point_pos, 3));
367 nir_store_var(b, state->gl_pos_out, pos, 0xf);
372 nir_end_primitive(b);
374 nir_instr_remove(&intrin->instr);
380 lower_gl_point_gs(nir_shader *shader)
382 struct lower_gl_point_state state;
384 shader->info.gs.output_primitive = MESA_PRIM_TRIANGLE_STRIP;
385 shader->info.gs.vertices_out *= 4;
387 // Gets the gl_Position in and out
389 nir_find_variable_with_location(shader, nir_var_shader_out,
391 state.gl_point_size =
392 nir_find_variable_with_location(shader, nir_var_shader_out,
395 // if position in or gl_PointSize aren't written, we have nothing to do
396 if (!state.gl_pos_out || !state.gl_point_size)
399 return nir_shader_instructions_pass(shader, lower_gl_point_gs_instr,
400 nir_metadata_dominance, &state);
403 struct lower_pv_mode_state {
404 nir_variable *varyings[VARYING_SLOT_MAX][4];
405 nir_variable *pos_counter;
406 nir_variable *out_pos_counter;
407 nir_variable *ring_offset;
409 unsigned primitive_vert_count;
414 lower_pv_mode_gs_ring_index(nir_builder *b,
415 struct lower_pv_mode_state *state,
418 nir_ssa_def *ring_offset = nir_load_var(b, state->ring_offset);
419 return nir_imod_imm(b, nir_iadd(b, index, ring_offset),
423 /* Given the final deref of chain of derefs this function will walk up the chain
424 * until it finds a var deref.
426 * It will then recreate an identical chain that ends with the provided deref.
428 static nir_deref_instr*
429 replicate_derefs(nir_builder *b, nir_deref_instr *old, nir_deref_instr *new)
431 nir_deref_instr *parent = nir_src_as_deref(old->parent);
432 switch(old->deref_type) {
433 case nir_deref_type_var:
435 case nir_deref_type_array:
436 return nir_build_deref_array(b, replicate_derefs(b, parent, new), old->arr.index.ssa);
437 case nir_deref_type_struct:
438 return nir_build_deref_struct(b, replicate_derefs(b, parent, new), old->strct.index);
439 case nir_deref_type_array_wildcard:
440 case nir_deref_type_ptr_as_array:
441 case nir_deref_type_cast:
442 unreachable("unexpected deref type");
444 unreachable("impossible deref type");
448 lower_pv_mode_gs_store(nir_builder *b,
449 nir_intrinsic_instr *intrin,
450 struct lower_pv_mode_state *state)
452 b->cursor = nir_before_instr(&intrin->instr);
453 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
454 if (nir_deref_mode_is(deref, nir_var_shader_out)) {
455 nir_variable *var = nir_deref_instr_get_variable(deref);
457 gl_varying_slot location = var->data.location;
458 unsigned location_frac = var->data.location_frac;
459 assert(state->varyings[location][location_frac]);
460 nir_ssa_def *pos_counter = nir_load_var(b, state->pos_counter);
461 nir_ssa_def *index = lower_pv_mode_gs_ring_index(b, state, pos_counter);
462 nir_deref_instr *varying_deref = nir_build_deref_var(b, state->varyings[location][location_frac]);
463 nir_deref_instr *ring_deref = nir_build_deref_array(b, varying_deref, index);
464 // recreate the chain of deref that lead to the store.
465 nir_deref_instr *new_top_deref = replicate_derefs(b, deref, ring_deref);
466 nir_store_deref(b, new_top_deref, intrin->src[1].ssa, nir_intrinsic_write_mask(intrin));
467 nir_instr_remove(&intrin->instr);
475 lower_pv_mode_emit_rotated_prim(nir_builder *b,
476 struct lower_pv_mode_state *state,
477 nir_ssa_def *current_vertex)
479 nir_ssa_def *two = nir_imm_int(b, 2);
480 nir_ssa_def *three = nir_imm_int(b, 3);
481 bool is_triangle = state->primitive_vert_count == 3;
482 /* This shader will always see the last three vertices emitted by the user gs.
483 * The following table is used to to rotate primitives within a strip generated
484 * by the user gs such that the last vertex becomes the first.
486 * [lines, tris][even/odd index][vertex mod 3]
488 static const unsigned vert_maps[2][2][3] = {
489 {{1, 0, 0}, {1, 0, 0}},
490 {{2, 0, 1}, {2, 1, 0}}
492 /* When the primive supplied to the gs comes from a strip, the last provoking vertex
493 * is either the last or the second, depending on whether the triangle is at an odd
494 * or even position within the strip.
496 * odd or even primitive within draw
498 nir_ssa_def *odd_prim = nir_imod(b, nir_load_primitive_id(b), two);
499 for (unsigned i = 0; i < state->primitive_vert_count; i++) {
500 /* odd or even triangle within strip emitted by user GS
501 * this is handled using the table
503 nir_ssa_def *odd_user_prim = nir_imod(b, current_vertex, two);
504 unsigned offset_even = vert_maps[is_triangle][0][i];
505 unsigned offset_odd = vert_maps[is_triangle][1][i];
506 nir_ssa_def *offset_even_value = nir_imm_int(b, offset_even);
507 nir_ssa_def *offset_odd_value = nir_imm_int(b, offset_odd);
508 nir_ssa_def *rotated_i = nir_bcsel(b, nir_b2b1(b, odd_user_prim),
509 offset_odd_value, offset_even_value);
510 /* Here we account for how triangles are provided to the gs from a strip.
511 * For even primitives we rotate by 3, meaning we do nothing.
512 * For odd primitives we rotate by 2, combined with the previous rotation this
513 * means the second vertex becomes the last.
515 if (state->prim == ZINK_PVE_PRIMITIVE_TRISTRIP)
516 rotated_i = nir_imod(b, nir_iadd(b, rotated_i,
520 /* Triangles that come from fans are provided to the gs the same way as
521 * odd triangles from a strip so always rotate by 2.
523 else if (state->prim == ZINK_PVE_PRIMITIVE_FAN)
524 rotated_i = nir_imod(b, nir_iadd_imm(b, rotated_i, 2),
526 rotated_i = nir_iadd(b, rotated_i, current_vertex);
527 nir_foreach_variable_with_modes(var, b->shader, nir_var_shader_out) {
528 gl_varying_slot location = var->data.location;
529 unsigned location_frac = var->data.location_frac;
530 if (state->varyings[location][location_frac]) {
531 nir_ssa_def *index = lower_pv_mode_gs_ring_index(b, state, rotated_i);
532 nir_deref_instr *value = nir_build_deref_array(b, nir_build_deref_var(b, state->varyings[location][location_frac]), index);
533 copy_vars(b, nir_build_deref_var(b, var), value);
541 lower_pv_mode_gs_emit_vertex(nir_builder *b,
542 nir_intrinsic_instr *intrin,
543 struct lower_pv_mode_state *state)
545 b->cursor = nir_before_instr(&intrin->instr);
547 // increment pos_counter
548 nir_ssa_def *pos_counter = nir_load_var(b, state->pos_counter);
549 nir_store_var(b, state->pos_counter, nir_iadd_imm(b, pos_counter, 1), 1);
551 nir_instr_remove(&intrin->instr);
556 lower_pv_mode_gs_end_primitive(nir_builder *b,
557 nir_intrinsic_instr *intrin,
558 struct lower_pv_mode_state *state)
560 b->cursor = nir_before_instr(&intrin->instr);
562 nir_ssa_def *pos_counter = nir_load_var(b, state->pos_counter);
565 nir_ssa_def *out_pos_counter = nir_load_var(b, state->out_pos_counter);
566 nir_push_if(b, nir_ilt(b, nir_isub(b, pos_counter, out_pos_counter),
567 nir_imm_int(b, state->primitive_vert_count)));
568 nir_jump(b, nir_jump_break);
571 lower_pv_mode_emit_rotated_prim(b, state, out_pos_counter);
572 nir_end_primitive(b);
574 nir_store_var(b, state->out_pos_counter, nir_iadd_imm(b, out_pos_counter, 1), 1);
576 nir_pop_loop(b, NULL);
577 /* Set the ring offset such that when position 0 is
578 * read we get the last value written
580 nir_store_var(b, state->ring_offset, pos_counter, 1);
581 nir_store_var(b, state->pos_counter, nir_imm_int(b, 0), 1);
582 nir_store_var(b, state->out_pos_counter, nir_imm_int(b, 0), 1);
584 nir_instr_remove(&intrin->instr);
589 lower_pv_mode_gs_instr(nir_builder *b, nir_instr *instr, void *data)
591 if (instr->type != nir_instr_type_intrinsic)
594 struct lower_pv_mode_state *state = data;
595 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
597 switch (intrin->intrinsic) {
598 case nir_intrinsic_store_deref:
599 return lower_pv_mode_gs_store(b, intrin, state);
600 case nir_intrinsic_copy_deref:
601 unreachable("should be lowered");
602 case nir_intrinsic_emit_vertex_with_counter:
603 case nir_intrinsic_emit_vertex:
604 return lower_pv_mode_gs_emit_vertex(b, intrin, state);
605 case nir_intrinsic_end_primitive:
606 case nir_intrinsic_end_primitive_with_counter:
607 return lower_pv_mode_gs_end_primitive(b, intrin, state);
614 lower_pv_mode_vertices_for_prim(enum mesa_prim prim)
617 case MESA_PRIM_POINTS:
619 case MESA_PRIM_LINE_STRIP:
621 case MESA_PRIM_TRIANGLE_STRIP:
624 unreachable("unsupported primitive for gs output");
629 lower_pv_mode_gs(nir_shader *shader, unsigned prim)
632 struct lower_pv_mode_state state;
633 memset(state.varyings, 0, sizeof(state.varyings));
635 nir_function_impl *entry = nir_shader_get_entrypoint(shader);
636 b = nir_builder_at(nir_before_cf_list(&entry->body));
638 state.primitive_vert_count =
639 lower_pv_mode_vertices_for_prim(shader->info.gs.output_primitive);
640 state.ring_size = shader->info.gs.vertices_out;
642 nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) {
643 gl_varying_slot location = var->data.location;
644 unsigned location_frac = var->data.location_frac;
647 snprintf(name, sizeof(name), "__tmp_primverts_%d_%d", location, location_frac);
648 state.varyings[location][location_frac] =
649 nir_local_variable_create(entry,
650 glsl_array_type(var->type,
656 state.pos_counter = nir_local_variable_create(entry,
660 state.out_pos_counter = nir_local_variable_create(entry,
662 "__out_pos_counter");
664 state.ring_offset = nir_local_variable_create(entry,
670 // initialize pos_counter and out_pos_counter
671 nir_store_var(&b, state.pos_counter, nir_imm_int(&b, 0), 1);
672 nir_store_var(&b, state.out_pos_counter, nir_imm_int(&b, 0), 1);
673 nir_store_var(&b, state.ring_offset, nir_imm_int(&b, 0), 1);
675 shader->info.gs.vertices_out = (shader->info.gs.vertices_out -
676 (state.primitive_vert_count - 1)) *
677 state.primitive_vert_count;
678 return nir_shader_instructions_pass(shader, lower_pv_mode_gs_instr,
679 nir_metadata_dominance, &state);
682 struct lower_line_stipple_state {
683 nir_variable *pos_out;
684 nir_variable *stipple_out;
685 nir_variable *prev_pos;
686 nir_variable *pos_counter;
687 nir_variable *stipple_counter;
688 bool line_rectangular;
692 viewport_map(nir_builder *b, nir_ssa_def *vert,
695 nir_ssa_def *w_recip = nir_frcp(b, nir_channel(b, vert, 3));
696 nir_ssa_def *ndc_point = nir_fmul(b, nir_trim_vector(b, vert, 2),
698 return nir_fmul(b, ndc_point, scale);
702 lower_line_stipple_gs_instr(nir_builder *b, nir_instr *instr, void *data)
704 struct lower_line_stipple_state *state = data;
705 if (instr->type != nir_instr_type_intrinsic)
708 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
709 if (intrin->intrinsic != nir_intrinsic_emit_vertex_with_counter &&
710 intrin->intrinsic != nir_intrinsic_emit_vertex)
713 b->cursor = nir_before_instr(instr);
715 nir_push_if(b, nir_ine_imm(b, nir_load_var(b, state->pos_counter), 0));
716 // viewport-map endpoints
717 nir_ssa_def *vp_scale = nir_load_push_constant(b, 2, 32,
718 nir_imm_int(b, ZINK_GFX_PUSHCONST_VIEWPORT_SCALE),
721 nir_ssa_def *prev = nir_load_var(b, state->prev_pos);
722 nir_ssa_def *curr = nir_load_var(b, state->pos_out);
723 prev = viewport_map(b, prev, vp_scale);
724 curr = viewport_map(b, curr, vp_scale);
726 // calculate length of line
728 if (state->line_rectangular)
729 len = nir_fast_distance(b, prev, curr);
731 nir_ssa_def *diff = nir_fabs(b, nir_fsub(b, prev, curr));
732 len = nir_fmax(b, nir_channel(b, diff, 0), nir_channel(b, diff, 1));
734 // update stipple_counter
735 nir_store_var(b, state->stipple_counter,
736 nir_fadd(b, nir_load_var(b, state->stipple_counter),
740 nir_copy_var(b, state->stipple_out, state->stipple_counter);
741 nir_copy_var(b, state->prev_pos, state->pos_out);
743 // update prev_pos and pos_counter for next vertex
744 b->cursor = nir_after_instr(instr);
745 nir_store_var(b, state->pos_counter,
746 nir_iadd_imm(b, nir_load_var(b, state->pos_counter),
753 lower_line_stipple_gs(nir_shader *shader, bool line_rectangular)
756 struct lower_line_stipple_state state;
759 nir_find_variable_with_location(shader, nir_var_shader_out,
762 // if position isn't written, we have nothing to do
766 state.stipple_out = nir_variable_create(shader, nir_var_shader_out,
769 state.stipple_out->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
770 state.stipple_out->data.driver_location = shader->num_outputs++;
771 state.stipple_out->data.location = MAX2(util_last_bit64(shader->info.outputs_written), VARYING_SLOT_VAR0);
772 shader->info.outputs_written |= BITFIELD64_BIT(state.stipple_out->data.location);
774 // create temp variables
775 state.prev_pos = nir_variable_create(shader, nir_var_shader_temp,
778 state.pos_counter = nir_variable_create(shader, nir_var_shader_temp,
781 state.stipple_counter = nir_variable_create(shader, nir_var_shader_temp,
783 "__stipple_counter");
785 state.line_rectangular = line_rectangular;
786 // initialize pos_counter and stipple_counter
787 nir_function_impl *entry = nir_shader_get_entrypoint(shader);
788 b = nir_builder_at(nir_before_cf_list(&entry->body));
789 nir_store_var(&b, state.pos_counter, nir_imm_int(&b, 0), 1);
790 nir_store_var(&b, state.stipple_counter, nir_imm_float(&b, 0), 1);
792 return nir_shader_instructions_pass(shader, lower_line_stipple_gs_instr,
793 nir_metadata_dominance, &state);
797 lower_line_stipple_fs(nir_shader *shader)
800 nir_function_impl *entry = nir_shader_get_entrypoint(shader);
801 b = nir_builder_at(nir_after_cf_list(&entry->body));
803 // create stipple counter
804 nir_variable *stipple = nir_variable_create(shader, nir_var_shader_in,
807 stipple->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
808 stipple->data.driver_location = shader->num_inputs++;
809 stipple->data.location = MAX2(util_last_bit64(shader->info.inputs_read), VARYING_SLOT_VAR0);
810 shader->info.inputs_read |= BITFIELD64_BIT(stipple->data.location);
812 nir_variable *sample_mask_out =
813 nir_find_variable_with_location(shader, nir_var_shader_out,
814 FRAG_RESULT_SAMPLE_MASK);
815 if (!sample_mask_out) {
816 sample_mask_out = nir_variable_create(shader, nir_var_shader_out,
817 glsl_uint_type(), "sample_mask");
818 sample_mask_out->data.driver_location = shader->num_outputs++;
819 sample_mask_out->data.location = FRAG_RESULT_SAMPLE_MASK;
822 nir_ssa_def *pattern = nir_load_push_constant(&b, 1, 32,
823 nir_imm_int(&b, ZINK_GFX_PUSHCONST_LINE_STIPPLE_PATTERN),
825 nir_ssa_def *factor = nir_i2f32(&b, nir_ishr_imm(&b, pattern, 16));
826 pattern = nir_iand_imm(&b, pattern, 0xffff);
828 nir_ssa_def *sample_mask_in = nir_load_sample_mask_in(&b);
829 nir_variable *v = nir_local_variable_create(entry, glsl_uint_type(), NULL);
830 nir_variable *sample_mask = nir_local_variable_create(entry, glsl_uint_type(), NULL);
831 nir_store_var(&b, v, sample_mask_in, 1);
832 nir_store_var(&b, sample_mask, sample_mask_in, 1);
835 nir_ssa_def *value = nir_load_var(&b, v);
836 nir_ssa_def *index = nir_ufind_msb(&b, value);
837 nir_ssa_def *index_mask = nir_ishl(&b, nir_imm_int(&b, 1), index);
838 nir_ssa_def *new_value = nir_ixor(&b, value, index_mask);
839 nir_store_var(&b, v, new_value, 1);
840 nir_push_if(&b, nir_ieq_imm(&b, value, 0));
841 nir_jump(&b, nir_jump_break);
842 nir_pop_if(&b, NULL);
844 nir_ssa_def *stipple_pos =
845 nir_interp_deref_at_sample(&b, 1, 32,
846 &nir_build_deref_var(&b, stipple)->dest.ssa, index);
847 stipple_pos = nir_fmod(&b, nir_fdiv(&b, stipple_pos, factor),
848 nir_imm_float(&b, 16.0));
849 stipple_pos = nir_f2i32(&b, stipple_pos);
851 nir_iand_imm(&b, nir_ishr(&b, pattern, stipple_pos), 1);
852 nir_push_if(&b, nir_ieq_imm(&b, bit, 0));
854 nir_ssa_def *value = nir_load_var(&b, sample_mask);
855 value = nir_ixor(&b, value, index_mask);
856 nir_store_var(&b, sample_mask, value, 1);
858 nir_pop_if(&b, NULL);
860 nir_pop_loop(&b, NULL);
861 nir_store_var(&b, sample_mask_out, nir_load_var(&b, sample_mask), 1);
866 struct lower_line_smooth_state {
867 nir_variable *pos_out;
868 nir_variable *line_coord_out;
869 nir_variable *prev_pos;
870 nir_variable *pos_counter;
871 nir_variable *prev_varyings[VARYING_SLOT_MAX][4],
872 *varyings[VARYING_SLOT_MAX][4]; // location_frac
876 lower_line_smooth_gs_store(nir_builder *b,
877 nir_intrinsic_instr *intrin,
878 struct lower_line_smooth_state *state)
880 b->cursor = nir_before_instr(&intrin->instr);
881 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
882 if (nir_deref_mode_is(deref, nir_var_shader_out)) {
883 nir_variable *var = nir_deref_instr_get_variable(deref);
885 // we take care of position elsewhere
886 gl_varying_slot location = var->data.location;
887 unsigned location_frac = var->data.location_frac;
888 if (location != VARYING_SLOT_POS) {
889 assert(state->varyings[location]);
890 nir_store_var(b, state->varyings[location][location_frac],
892 nir_intrinsic_write_mask(intrin));
893 nir_instr_remove(&intrin->instr);
902 lower_line_smooth_gs_emit_vertex(nir_builder *b,
903 nir_intrinsic_instr *intrin,
904 struct lower_line_smooth_state *state)
906 b->cursor = nir_before_instr(&intrin->instr);
908 nir_push_if(b, nir_ine_imm(b, nir_load_var(b, state->pos_counter), 0));
909 nir_ssa_def *vp_scale = nir_load_push_constant(b, 2, 32,
910 nir_imm_int(b, ZINK_GFX_PUSHCONST_VIEWPORT_SCALE),
913 nir_ssa_def *prev = nir_load_var(b, state->prev_pos);
914 nir_ssa_def *curr = nir_load_var(b, state->pos_out);
915 nir_ssa_def *prev_vp = viewport_map(b, prev, vp_scale);
916 nir_ssa_def *curr_vp = viewport_map(b, curr, vp_scale);
918 nir_ssa_def *width = nir_load_push_constant(b, 1, 32,
919 nir_imm_int(b, ZINK_GFX_PUSHCONST_LINE_WIDTH),
921 nir_ssa_def *half_width = nir_fadd_imm(b, nir_fmul_imm(b, width, 0.5), 0.5);
923 const unsigned yx[2] = { 1, 0 };
924 nir_ssa_def *vec = nir_fsub(b, curr_vp, prev_vp);
925 nir_ssa_def *len = nir_fast_length(b, vec);
926 nir_ssa_def *dir = nir_normalize(b, vec);
927 nir_ssa_def *half_length = nir_fmul_imm(b, len, 0.5);
928 half_length = nir_fadd_imm(b, half_length, 0.5);
930 nir_ssa_def *vp_scale_rcp = nir_frcp(b, vp_scale);
931 nir_ssa_def *tangent =
934 nir_swizzle(b, dir, yx, 2),
935 nir_imm_vec2(b, 1.0, -1.0)),
937 tangent = nir_fmul(b, tangent, half_width);
938 tangent = nir_pad_vector_imm_int(b, tangent, 0, 4);
939 dir = nir_fmul_imm(b, nir_fmul(b, dir, vp_scale_rcp), 0.5);
941 nir_ssa_def *line_offets[8] = {
942 nir_fadd(b, tangent, nir_fneg(b, dir)),
943 nir_fadd(b, nir_fneg(b, tangent), nir_fneg(b, dir)),
945 nir_fneg(b, tangent),
947 nir_fneg(b, tangent),
948 nir_fadd(b, tangent, dir),
949 nir_fadd(b, nir_fneg(b, tangent), dir),
951 nir_ssa_def *line_coord =
952 nir_vec4(b, half_width, half_width, half_length, half_length);
953 nir_ssa_def *line_coords[8] = {
954 nir_fmul(b, line_coord, nir_imm_vec4(b, -1, 1, -1, 1)),
955 nir_fmul(b, line_coord, nir_imm_vec4(b, 1, 1, -1, 1)),
956 nir_fmul(b, line_coord, nir_imm_vec4(b, -1, 1, 0, 1)),
957 nir_fmul(b, line_coord, nir_imm_vec4(b, 1, 1, 0, 1)),
958 nir_fmul(b, line_coord, nir_imm_vec4(b, -1, 1, 0, 1)),
959 nir_fmul(b, line_coord, nir_imm_vec4(b, 1, 1, 0, 1)),
960 nir_fmul(b, line_coord, nir_imm_vec4(b, -1, 1, 1, 1)),
961 nir_fmul(b, line_coord, nir_imm_vec4(b, 1, 1, 1, 1)),
964 /* emit first end-cap, and start line */
965 for (int i = 0; i < 4; ++i) {
966 nir_foreach_variable_with_modes(var, b->shader, nir_var_shader_out) {
967 gl_varying_slot location = var->data.location;
968 unsigned location_frac = var->data.location_frac;
969 if (state->prev_varyings[location][location_frac])
970 nir_copy_var(b, var, state->prev_varyings[location][location_frac]);
972 nir_store_var(b, state->pos_out,
973 nir_fadd(b, prev, nir_fmul(b, line_offets[i],
974 nir_channel(b, prev, 3))), 0xf);
975 nir_store_var(b, state->line_coord_out, line_coords[i], 0xf);
979 /* finish line and emit last end-cap */
980 for (int i = 4; i < 8; ++i) {
981 nir_foreach_variable_with_modes(var, b->shader, nir_var_shader_out) {
982 gl_varying_slot location = var->data.location;
983 unsigned location_frac = var->data.location_frac;
984 if (state->varyings[location][location_frac])
985 nir_copy_var(b, var, state->varyings[location][location_frac]);
987 nir_store_var(b, state->pos_out,
988 nir_fadd(b, curr, nir_fmul(b, line_offets[i],
989 nir_channel(b, curr, 3))), 0xf);
990 nir_store_var(b, state->line_coord_out, line_coords[i], 0xf);
993 nir_end_primitive(b);
997 nir_copy_var(b, state->prev_pos, state->pos_out);
998 nir_foreach_variable_with_modes(var, b->shader, nir_var_shader_out) {
999 gl_varying_slot location = var->data.location;
1000 unsigned location_frac = var->data.location_frac;
1001 if (state->varyings[location][location_frac])
1002 nir_copy_var(b, state->prev_varyings[location][location_frac], state->varyings[location][location_frac]);
1005 // update prev_pos and pos_counter for next vertex
1006 b->cursor = nir_after_instr(&intrin->instr);
1007 nir_store_var(b, state->pos_counter,
1008 nir_iadd_imm(b, nir_load_var(b, state->pos_counter),
1011 nir_instr_remove(&intrin->instr);
1016 lower_line_smooth_gs_end_primitive(nir_builder *b,
1017 nir_intrinsic_instr *intrin,
1018 struct lower_line_smooth_state *state)
1020 b->cursor = nir_before_instr(&intrin->instr);
1022 // reset line counter
1023 nir_store_var(b, state->pos_counter, nir_imm_int(b, 0), 1);
1025 nir_instr_remove(&intrin->instr);
1030 lower_line_smooth_gs_instr(nir_builder *b, nir_instr *instr, void *data)
1032 if (instr->type != nir_instr_type_intrinsic)
1035 struct lower_line_smooth_state *state = data;
1036 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1038 switch (intrin->intrinsic) {
1039 case nir_intrinsic_store_deref:
1040 return lower_line_smooth_gs_store(b, intrin, state);
1041 case nir_intrinsic_copy_deref:
1042 unreachable("should be lowered");
1043 case nir_intrinsic_emit_vertex_with_counter:
1044 case nir_intrinsic_emit_vertex:
1045 return lower_line_smooth_gs_emit_vertex(b, intrin, state);
1046 case nir_intrinsic_end_primitive:
1047 case nir_intrinsic_end_primitive_with_counter:
1048 return lower_line_smooth_gs_end_primitive(b, intrin, state);
1055 lower_line_smooth_gs(nir_shader *shader)
1058 struct lower_line_smooth_state state;
1060 memset(state.varyings, 0, sizeof(state.varyings));
1061 memset(state.prev_varyings, 0, sizeof(state.prev_varyings));
1062 nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) {
1063 gl_varying_slot location = var->data.location;
1064 unsigned location_frac = var->data.location_frac;
1065 if (location == VARYING_SLOT_POS)
1069 snprintf(name, sizeof(name), "__tmp_%d_%d", location, location_frac);
1070 state.varyings[location][location_frac] =
1071 nir_variable_create(shader, nir_var_shader_temp,
1074 snprintf(name, sizeof(name), "__tmp_prev_%d_%d", location, location_frac);
1075 state.prev_varyings[location][location_frac] =
1076 nir_variable_create(shader, nir_var_shader_temp,
1081 nir_find_variable_with_location(shader, nir_var_shader_out,
1084 // if position isn't written, we have nothing to do
1088 unsigned location = 0;
1089 nir_foreach_shader_in_variable(var, shader) {
1090 if (var->data.driver_location >= location)
1091 location = var->data.driver_location + 1;
1094 state.line_coord_out =
1095 nir_variable_create(shader, nir_var_shader_out, glsl_vec4_type(),
1097 state.line_coord_out->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
1098 state.line_coord_out->data.driver_location = location;
1099 state.line_coord_out->data.location = MAX2(util_last_bit64(shader->info.outputs_written), VARYING_SLOT_VAR0);
1100 shader->info.outputs_written |= BITFIELD64_BIT(state.line_coord_out->data.location);
1101 shader->num_outputs++;
1103 // create temp variables
1104 state.prev_pos = nir_variable_create(shader, nir_var_shader_temp,
1107 state.pos_counter = nir_variable_create(shader, nir_var_shader_temp,
1111 // initialize pos_counter
1112 nir_function_impl *entry = nir_shader_get_entrypoint(shader);
1113 b = nir_builder_at(nir_before_cf_list(&entry->body));
1114 nir_store_var(&b, state.pos_counter, nir_imm_int(&b, 0), 1);
1116 shader->info.gs.vertices_out = 8 * shader->info.gs.vertices_out;
1117 shader->info.gs.output_primitive = MESA_PRIM_TRIANGLE_STRIP;
1119 return nir_shader_instructions_pass(shader, lower_line_smooth_gs_instr,
1120 nir_metadata_dominance, &state);
1124 lower_line_smooth_fs(nir_shader *shader, bool lower_stipple)
1129 nir_variable *stipple_counter = NULL, *stipple_pattern = NULL;
1130 if (lower_stipple) {
1131 stipple_counter = nir_variable_create(shader, nir_var_shader_in,
1134 stipple_counter->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
1135 stipple_counter->data.driver_location = shader->num_inputs++;
1136 stipple_counter->data.location =
1137 MAX2(util_last_bit64(shader->info.inputs_read), VARYING_SLOT_VAR0);
1138 shader->info.inputs_read |= BITFIELD64_BIT(stipple_counter->data.location);
1140 stipple_pattern = nir_variable_create(shader, nir_var_shader_temp,
1144 // initialize stipple_pattern
1145 nir_function_impl *entry = nir_shader_get_entrypoint(shader);
1146 b = nir_builder_at(nir_before_cf_list(&entry->body));
1147 nir_ssa_def *pattern = nir_load_push_constant(&b, 1, 32,
1148 nir_imm_int(&b, ZINK_GFX_PUSHCONST_LINE_STIPPLE_PATTERN),
1150 nir_store_var(&b, stipple_pattern, pattern, 1);
1153 nir_lower_aaline_fs(shader, &dummy, stipple_counter, stipple_pattern);
1158 lower_dual_blend(nir_shader *shader)
1160 bool progress = false;
1161 nir_variable *var = nir_find_variable_with_location(shader, nir_var_shader_out, FRAG_RESULT_DATA1);
1163 var->data.location = FRAG_RESULT_DATA0;
1164 var->data.index = 1;
1167 nir_shader_preserve_all_metadata(shader);
1172 lower_64bit_pack_instr(nir_builder *b, nir_instr *instr, void *data)
1174 if (instr->type != nir_instr_type_alu)
1176 nir_alu_instr *alu_instr = (nir_alu_instr *) instr;
1177 if (alu_instr->op != nir_op_pack_64_2x32 &&
1178 alu_instr->op != nir_op_unpack_64_2x32)
1180 b->cursor = nir_before_instr(&alu_instr->instr);
1181 nir_ssa_def *src = nir_ssa_for_alu_src(b, alu_instr, 0);
1183 switch (alu_instr->op) {
1184 case nir_op_pack_64_2x32:
1185 dest = nir_pack_64_2x32_split(b, nir_channel(b, src, 0), nir_channel(b, src, 1));
1187 case nir_op_unpack_64_2x32:
1188 dest = nir_vec2(b, nir_unpack_64_2x32_split_x(b, src), nir_unpack_64_2x32_split_y(b, src));
1191 unreachable("Impossible opcode");
1193 nir_ssa_def_rewrite_uses(&alu_instr->dest.dest.ssa, dest);
1194 nir_instr_remove(&alu_instr->instr);
1199 lower_64bit_pack(nir_shader *shader)
1201 return nir_shader_instructions_pass(shader, lower_64bit_pack_instr,
1202 nir_metadata_block_index | nir_metadata_dominance, NULL);
1206 zink_create_quads_emulation_gs(const nir_shader_compiler_options *options,
1207 const nir_shader *prev_stage)
1209 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY,
1213 nir_shader *nir = b.shader;
1214 nir->info.gs.input_primitive = MESA_PRIM_LINES_ADJACENCY;
1215 nir->info.gs.output_primitive = MESA_PRIM_TRIANGLE_STRIP;
1216 nir->info.gs.vertices_in = 4;
1217 nir->info.gs.vertices_out = 6;
1218 nir->info.gs.invocations = 1;
1219 nir->info.gs.active_stream_mask = 1;
1221 nir->info.has_transform_feedback_varyings = prev_stage->info.has_transform_feedback_varyings;
1222 memcpy(nir->info.xfb_stride, prev_stage->info.xfb_stride, sizeof(prev_stage->info.xfb_stride));
1223 if (prev_stage->xfb_info) {
1224 nir->xfb_info = mem_dup(prev_stage->xfb_info, sizeof(nir_xfb_info));
1227 nir_variable *in_vars[VARYING_SLOT_MAX];
1228 nir_variable *out_vars[VARYING_SLOT_MAX];
1229 unsigned num_vars = 0;
1231 /* Create input/output variables. */
1232 nir_foreach_shader_out_variable(var, prev_stage) {
1233 assert(!var->data.patch);
1235 /* input vars can't be created for those */
1236 if (var->data.location == VARYING_SLOT_LAYER ||
1237 var->data.location == VARYING_SLOT_VIEW_INDEX ||
1238 /* psiz not needed for quads */
1239 var->data.location == VARYING_SLOT_PSIZ)
1244 snprintf(name, sizeof(name), "in_%s", var->name);
1246 snprintf(name, sizeof(name), "in_%d", var->data.driver_location);
1248 nir_variable *in = nir_variable_clone(var, nir);
1249 ralloc_free(in->name);
1250 in->name = ralloc_strdup(in, name);
1251 in->type = glsl_array_type(var->type, 4, false);
1252 in->data.mode = nir_var_shader_in;
1253 nir_shader_add_variable(nir, in);
1256 snprintf(name, sizeof(name), "out_%s", var->name);
1258 snprintf(name, sizeof(name), "out_%d", var->data.driver_location);
1260 nir_variable *out = nir_variable_clone(var, nir);
1261 ralloc_free(out->name);
1262 out->name = ralloc_strdup(out, name);
1263 out->data.mode = nir_var_shader_out;
1264 nir_shader_add_variable(nir, out);
1266 in_vars[num_vars] = in;
1267 out_vars[num_vars++] = out;
1270 int mapping_first[] = {0, 1, 2, 0, 2, 3};
1271 int mapping_last[] = {0, 1, 3, 1, 2, 3};
1272 nir_ssa_def *last_pv_vert_def = nir_load_provoking_last(&b);
1273 last_pv_vert_def = nir_ine_imm(&b, last_pv_vert_def, 0);
1274 for (unsigned i = 0; i < 6; ++i) {
1275 /* swap indices 2 and 3 */
1276 nir_ssa_def *idx = nir_bcsel(&b, last_pv_vert_def,
1277 nir_imm_int(&b, mapping_last[i]),
1278 nir_imm_int(&b, mapping_first[i]));
1279 /* Copy inputs to outputs. */
1280 for (unsigned j = 0; j < num_vars; ++j) {
1281 if (in_vars[j]->data.location == VARYING_SLOT_EDGE) {
1284 nir_deref_instr *in_value = nir_build_deref_array(&b, nir_build_deref_var(&b, in_vars[j]), idx);
1285 copy_vars(&b, nir_build_deref_var(&b, out_vars[j]), in_value);
1287 nir_emit_vertex(&b, 0);
1289 nir_end_primitive(&b, 0);
1292 nir_end_primitive(&b, 0);
1293 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
1294 nir_validate_shader(nir, "in zink_create_quads_emulation_gs");
1299 lower_system_values_to_inlined_uniforms_instr(nir_builder *b, nir_instr *instr, void *data)
1301 if (instr->type != nir_instr_type_intrinsic)
1304 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1306 int inlined_uniform_offset;
1307 switch (intrin->intrinsic) {
1308 case nir_intrinsic_load_flat_mask:
1309 inlined_uniform_offset = ZINK_INLINE_VAL_FLAT_MASK * sizeof(uint32_t);
1311 case nir_intrinsic_load_provoking_last:
1312 inlined_uniform_offset = ZINK_INLINE_VAL_PV_LAST_VERT * sizeof(uint32_t);
1318 b->cursor = nir_before_instr(&intrin->instr);
1319 nir_ssa_def *new_dest_def = nir_load_ubo(b, 1, 32, nir_imm_int(b, 0),
1320 nir_imm_int(b, inlined_uniform_offset),
1321 .align_mul = 4, .align_offset = 0,
1322 .range_base = 0, .range = ~0);
1323 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, new_dest_def);
1324 nir_instr_remove(instr);
1329 zink_lower_system_values_to_inlined_uniforms(nir_shader *nir)
1331 return nir_shader_instructions_pass(nir, lower_system_values_to_inlined_uniforms_instr,
1332 nir_metadata_dominance, NULL);
1336 zink_screen_init_compiler(struct zink_screen *screen)
1338 static const struct nir_shader_compiler_options
1340 .lower_ffma16 = true,
1341 .lower_ffma32 = true,
1342 .lower_ffma64 = true,
1345 .lower_flrp32 = true,
1348 .lower_extract_byte = true,
1349 .lower_extract_word = true,
1350 .lower_insert_byte = true,
1351 .lower_insert_word = true,
1353 /* We can only support 32-bit ldexp, but NIR doesn't have a flag
1354 * distinguishing 64-bit ldexp support (radeonsi *does* support 64-bit
1355 * ldexp, so we don't just always lower it in NIR). Given that ldexp is
1356 * effectively unused (no instances in shader-db), it's not worth the
1359 .lower_ldexp = true,
1361 .lower_mul_high = true,
1362 .lower_rotate = true,
1363 .lower_uadd_carry = true,
1364 .lower_usub_borrow = true,
1365 .lower_uadd_sat = true,
1366 .lower_usub_sat = true,
1367 .lower_vector_cmp = true,
1368 .lower_int64_options = 0,
1369 .lower_doubles_options = 0,
1370 .lower_uniforms_to_ubo = true,
1373 .lower_mul_2x32_64 = true,
1374 .support_16bit_alu = true, /* not quite what it sounds like */
1375 .max_unroll_iterations = 0,
1378 screen->nir_options = default_options;
1380 if (!screen->info.feats.features.shaderInt64)
1381 screen->nir_options.lower_int64_options = ~0;
1383 if (!screen->info.feats.features.shaderFloat64) {
1384 screen->nir_options.lower_doubles_options = ~0;
1385 screen->nir_options.lower_flrp64 = true;
1386 screen->nir_options.lower_ffma64 = true;
1387 /* soft fp64 function inlining will blow up loop bodies and effectively
1388 * stop Vulkan drivers from unrolling the loops.
1390 screen->nir_options.max_unroll_iterations_fp64 = 32;
1394 The OpFRem and OpFMod instructions use cheap approximations of remainder,
1395 and the error can be large due to the discontinuity in trunc() and floor().
1396 This can produce mathematically unexpected results in some cases, such as
1397 FMod(x,x) computing x rather than 0, and can also cause the result to have
1398 a different sign than the infinitely precise result.
1400 -Table 84. Precision of core SPIR-V Instructions
1401 * for drivers that are known to have imprecise fmod for doubles, lower dmod
1403 if (screen->info.driver_props.driverID == VK_DRIVER_ID_MESA_RADV ||
1404 screen->info.driver_props.driverID == VK_DRIVER_ID_AMD_OPEN_SOURCE ||
1405 screen->info.driver_props.driverID == VK_DRIVER_ID_AMD_PROPRIETARY)
1406 screen->nir_options.lower_doubles_options = nir_lower_dmod;
1410 zink_get_compiler_options(struct pipe_screen *pscreen,
1411 enum pipe_shader_ir ir,
1412 gl_shader_stage shader)
1414 assert(ir == PIPE_SHADER_IR_NIR);
1415 return &zink_screen(pscreen)->nir_options;
1419 zink_tgsi_to_nir(struct pipe_screen *screen, const struct tgsi_token *tokens)
1421 if (zink_debug & ZINK_DEBUG_TGSI) {
1422 fprintf(stderr, "TGSI shader:\n---8<---\n");
1423 tgsi_dump_to_file(tokens, 0, stderr);
1424 fprintf(stderr, "---8<---\n\n");
1427 return tgsi_to_nir(tokens, screen, false);
1432 dest_is_64bit(nir_dest *dest, void *state)
1434 bool *lower = (bool *)state;
1435 if (dest && (nir_dest_bit_size(*dest) == 64)) {
1443 src_is_64bit(nir_src *src, void *state)
1445 bool *lower = (bool *)state;
1446 if (src && (nir_src_bit_size(*src) == 64)) {
1454 filter_64_bit_instr(const nir_instr *const_instr, UNUSED const void *data)
1457 /* lower_alu_to_scalar required nir_instr to be const, but nir_foreach_*
1458 * doesn't have const variants, so do the ugly const_cast here. */
1459 nir_instr *instr = (nir_instr *)const_instr;
1461 nir_foreach_dest(instr, dest_is_64bit, &lower);
1464 nir_foreach_src(instr, src_is_64bit, &lower);
1469 filter_pack_instr(const nir_instr *const_instr, UNUSED const void *data)
1471 nir_instr *instr = (nir_instr *)const_instr;
1472 nir_alu_instr *alu = nir_instr_as_alu(instr);
1474 case nir_op_pack_64_2x32_split:
1475 case nir_op_pack_32_2x16_split:
1476 case nir_op_unpack_32_2x16_split_x:
1477 case nir_op_unpack_32_2x16_split_y:
1478 case nir_op_unpack_64_2x32_split_x:
1479 case nir_op_unpack_64_2x32_split_y:
1489 nir_variable *uniforms[5];
1490 nir_variable *ubo[5];
1491 nir_variable *ssbo[5];
1493 uint32_t first_ssbo;
1496 static struct bo_vars
1497 get_bo_vars(struct zink_shader *zs, nir_shader *shader)
1500 memset(&bo, 0, sizeof(bo));
1502 bo.first_ubo = ffs(zs->ubos_used & ~BITFIELD_BIT(0)) - 2;
1503 assert(bo.first_ssbo < PIPE_MAX_CONSTANT_BUFFERS);
1505 bo.first_ssbo = ffs(zs->ssbos_used) - 1;
1506 assert(bo.first_ssbo < PIPE_MAX_SHADER_BUFFERS);
1507 nir_foreach_variable_with_modes(var, shader, nir_var_mem_ssbo | nir_var_mem_ubo) {
1508 unsigned idx = glsl_get_explicit_stride(glsl_get_struct_field(glsl_without_array(var->type), 0)) >> 1;
1509 if (var->data.mode == nir_var_mem_ssbo) {
1510 assert(!bo.ssbo[idx]);
1513 if (var->data.driver_location) {
1514 assert(!bo.ubo[idx]);
1517 assert(!bo.uniforms[idx]);
1518 bo.uniforms[idx] = var;
1526 bound_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
1528 struct bo_vars *bo = data;
1529 if (instr->type != nir_instr_type_intrinsic)
1531 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1532 nir_variable *var = NULL;
1533 nir_ssa_def *offset = NULL;
1534 bool is_load = true;
1535 b->cursor = nir_before_instr(instr);
1537 switch (intr->intrinsic) {
1538 case nir_intrinsic_store_ssbo:
1539 var = bo->ssbo[nir_dest_bit_size(intr->dest) >> 4];
1540 offset = intr->src[2].ssa;
1543 case nir_intrinsic_load_ssbo:
1544 var = bo->ssbo[nir_dest_bit_size(intr->dest) >> 4];
1545 offset = intr->src[1].ssa;
1547 case nir_intrinsic_load_ubo:
1548 if (nir_src_is_const(intr->src[0]) && nir_src_as_const_value(intr->src[0])->u32 == 0)
1549 var = bo->uniforms[nir_dest_bit_size(intr->dest) >> 4];
1551 var = bo->ubo[nir_dest_bit_size(intr->dest) >> 4];
1552 offset = intr->src[1].ssa;
1557 nir_src offset_src = nir_src_for_ssa(offset);
1558 if (!nir_src_is_const(offset_src))
1561 unsigned offset_bytes = nir_src_as_const_value(offset_src)->u32;
1562 const struct glsl_type *strct_type = glsl_get_array_element(var->type);
1563 unsigned size = glsl_array_size(glsl_get_struct_field(strct_type, 0));
1564 bool has_unsized = glsl_array_size(glsl_get_struct_field(strct_type, glsl_get_length(strct_type) - 1)) == 0;
1565 if (has_unsized || offset_bytes + intr->num_components - 1 < size)
1568 unsigned rewrites = 0;
1569 nir_ssa_def *result[2];
1570 for (unsigned i = 0; i < intr->num_components; i++) {
1571 if (offset_bytes + i >= size) {
1574 result[i] = nir_imm_zero(b, 1, nir_dest_bit_size(intr->dest));
1577 assert(rewrites == intr->num_components);
1579 nir_ssa_def *load = nir_vec(b, result, intr->num_components);
1580 nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
1582 nir_instr_remove(instr);
1587 bound_bo_access(nir_shader *shader, struct zink_shader *zs)
1589 struct bo_vars bo = get_bo_vars(zs, shader);
1590 return nir_shader_instructions_pass(shader, bound_bo_access_instr, nir_metadata_dominance, &bo);
1594 optimize_nir(struct nir_shader *s, struct zink_shader *zs)
1599 if (s->options->lower_int64_options)
1600 NIR_PASS_V(s, nir_lower_int64);
1601 if (s->options->lower_doubles_options & nir_lower_fp64_full_software)
1602 NIR_PASS_V(s, lower_64bit_pack);
1603 NIR_PASS_V(s, nir_lower_vars_to_ssa);
1604 NIR_PASS(progress, s, nir_lower_alu_to_scalar, filter_pack_instr, NULL);
1605 NIR_PASS(progress, s, nir_opt_copy_prop_vars);
1606 NIR_PASS(progress, s, nir_copy_prop);
1607 NIR_PASS(progress, s, nir_opt_remove_phis);
1608 if (s->options->lower_int64_options) {
1609 NIR_PASS(progress, s, nir_lower_64bit_phis);
1610 NIR_PASS(progress, s, nir_lower_alu_to_scalar, filter_64_bit_instr, NULL);
1612 NIR_PASS(progress, s, nir_opt_dce);
1613 NIR_PASS(progress, s, nir_opt_dead_cf);
1614 NIR_PASS(progress, s, nir_lower_phis_to_scalar, false);
1615 NIR_PASS(progress, s, nir_opt_cse);
1616 NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
1617 NIR_PASS(progress, s, nir_opt_algebraic);
1618 NIR_PASS(progress, s, nir_opt_constant_folding);
1619 NIR_PASS(progress, s, nir_opt_undef);
1620 NIR_PASS(progress, s, zink_nir_lower_b2b);
1622 NIR_PASS(progress, s, bound_bo_access, zs);
1627 NIR_PASS(progress, s, nir_opt_algebraic_late);
1629 NIR_PASS_V(s, nir_copy_prop);
1630 NIR_PASS_V(s, nir_opt_dce);
1631 NIR_PASS_V(s, nir_opt_cse);
1636 /* - copy the lowered fbfetch variable
1637 * - set the new one up as an input attachment for descriptor 0.6
1638 * - load it as an image
1639 * - overwrite the previous load
1642 lower_fbfetch_instr(nir_builder *b, nir_instr *instr, void *data)
1644 bool ms = data != NULL;
1645 if (instr->type != nir_instr_type_intrinsic)
1647 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1648 if (intr->intrinsic != nir_intrinsic_load_deref)
1650 nir_variable *var = nir_intrinsic_get_var(intr, 0);
1651 if (!var->data.fb_fetch_output)
1653 b->cursor = nir_after_instr(instr);
1654 nir_variable *fbfetch = nir_variable_clone(var, b->shader);
1655 /* If Dim is SubpassData, ... Image Format must be Unknown
1656 * - SPIRV OpTypeImage specification
1658 fbfetch->data.image.format = 0;
1659 fbfetch->data.index = 0; /* fix this if more than 1 fbfetch target is supported */
1660 fbfetch->data.mode = nir_var_uniform;
1661 fbfetch->data.binding = ZINK_FBFETCH_BINDING;
1662 fbfetch->data.binding = ZINK_FBFETCH_BINDING;
1663 fbfetch->data.sample = ms;
1664 enum glsl_sampler_dim dim = ms ? GLSL_SAMPLER_DIM_SUBPASS_MS : GLSL_SAMPLER_DIM_SUBPASS;
1665 fbfetch->type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
1666 nir_shader_add_variable(b->shader, fbfetch);
1667 nir_ssa_def *deref = &nir_build_deref_var(b, fbfetch)->dest.ssa;
1668 nir_ssa_def *sample = ms ? nir_load_sample_id(b) : nir_ssa_undef(b, 1, 32);
1669 nir_ssa_def *load = nir_image_deref_load(b, 4, 32, deref, nir_imm_vec4(b, 0, 0, 0, 1), sample, nir_imm_int(b, 0));
1670 nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
1675 lower_fbfetch(nir_shader *shader, nir_variable **fbfetch, bool ms)
1677 nir_foreach_shader_out_variable(var, shader) {
1678 if (var->data.fb_fetch_output) {
1686 return nir_shader_instructions_pass(shader, lower_fbfetch_instr, nir_metadata_dominance, (void*)ms);
1690 * Add a check for out of bounds LOD for every texel fetch op
1692 * - if (lod < query_levels(tex))
1695 * - res = (0, 0, 0, 1)
1698 lower_txf_lod_robustness_instr(nir_builder *b, nir_instr *in, void *data)
1700 if (in->type != nir_instr_type_tex)
1702 nir_tex_instr *txf = nir_instr_as_tex(in);
1703 if (txf->op != nir_texop_txf)
1706 b->cursor = nir_before_instr(in);
1707 int lod_idx = nir_tex_instr_src_index(txf, nir_tex_src_lod);
1708 assert(lod_idx >= 0);
1709 nir_src lod_src = txf->src[lod_idx].src;
1710 if (nir_src_is_const(lod_src) && nir_src_as_const_value(lod_src)->u32 == 0)
1713 nir_ssa_def *lod = lod_src.ssa;
1715 int offset_idx = nir_tex_instr_src_index(txf, nir_tex_src_texture_offset);
1716 int handle_idx = nir_tex_instr_src_index(txf, nir_tex_src_texture_handle);
1717 nir_tex_instr *levels = nir_tex_instr_create(b->shader,
1718 !!(offset_idx >= 0) + !!(handle_idx >= 0));
1719 levels->op = nir_texop_query_levels;
1720 levels->texture_index = txf->texture_index;
1721 levels->dest_type = nir_type_int | lod->bit_size;
1722 if (offset_idx >= 0) {
1723 levels->src[0].src_type = nir_tex_src_texture_offset;
1724 nir_src_copy(&levels->src[0].src, &txf->src[offset_idx].src, &levels->instr);
1726 if (handle_idx >= 0) {
1727 levels->src[!!(offset_idx >= 0)].src_type = nir_tex_src_texture_handle;
1728 nir_src_copy(&levels->src[!!(offset_idx >= 0)].src, &txf->src[handle_idx].src, &levels->instr);
1730 nir_ssa_dest_init(&levels->instr, &levels->dest,
1731 nir_tex_instr_dest_size(levels), 32);
1732 nir_builder_instr_insert(b, &levels->instr);
1734 nir_if *lod_oob_if = nir_push_if(b, nir_ilt(b, lod, &levels->dest.ssa));
1735 nir_tex_instr *new_txf = nir_instr_as_tex(nir_instr_clone(b->shader, in));
1736 nir_builder_instr_insert(b, &new_txf->instr);
1738 nir_if *lod_oob_else = nir_push_else(b, lod_oob_if);
1739 nir_const_value oob_values[4] = {0};
1740 unsigned bit_size = nir_alu_type_get_type_size(txf->dest_type);
1741 oob_values[3] = (txf->dest_type & nir_type_float) ?
1742 nir_const_value_for_float(1.0, bit_size) : nir_const_value_for_uint(1, bit_size);
1743 nir_ssa_def *oob_val = nir_build_imm(b, nir_tex_instr_dest_size(txf), bit_size, oob_values);
1745 nir_pop_if(b, lod_oob_else);
1746 nir_ssa_def *robust_txf = nir_if_phi(b, &new_txf->dest.ssa, oob_val);
1748 nir_ssa_def_rewrite_uses(&txf->dest.ssa, robust_txf);
1749 nir_instr_remove_v(in);
1753 /* This pass is used to workaround the lack of out of bounds LOD robustness
1754 * for texel fetch ops in VK_EXT_image_robustness.
1757 lower_txf_lod_robustness(nir_shader *shader)
1759 return nir_shader_instructions_pass(shader, lower_txf_lod_robustness_instr, nir_metadata_none, NULL);
1762 /* check for a genuine gl_PointSize output vs one from nir_lower_point_size_mov */
1764 check_psiz(struct nir_shader *s)
1766 bool have_psiz = false;
1767 nir_foreach_shader_out_variable(var, s) {
1768 if (var->data.location == VARYING_SLOT_PSIZ) {
1769 /* genuine PSIZ outputs will have this set */
1770 have_psiz |= !!var->data.explicit_location;
1776 static nir_variable *
1777 find_var_with_location_frac(nir_shader *nir, unsigned location, unsigned location_frac, bool have_psiz)
1779 assert((int)location >= 0);
1782 if (!location_frac && location != VARYING_SLOT_PSIZ) {
1783 nir_foreach_shader_out_variable(var, nir) {
1784 if (var->data.location == location)
1789 /* multiple variables found for this location: find the biggest one */
1790 nir_variable *out = NULL;
1792 nir_foreach_shader_out_variable(var, nir) {
1793 if (var->data.location == location) {
1794 unsigned count_slots = glsl_count_vec4_slots(var->type, false, false);
1795 if (count_slots > slots) {
1796 slots = count_slots;
1803 /* only one variable found or this is location_frac */
1804 nir_foreach_shader_out_variable(var, nir) {
1805 if (var->data.location == location &&
1806 (var->data.location_frac == location_frac ||
1807 (glsl_type_is_array(var->type) ? glsl_array_size(var->type) : glsl_get_vector_elements(var->type)) >= location_frac + 1)) {
1808 if (location != VARYING_SLOT_PSIZ || !have_psiz || var->data.explicit_location)
1817 is_inlined(const bool *inlined, const struct pipe_stream_output *output)
1819 for (unsigned i = 0; i < output->num_components; i++)
1820 if (!inlined[output->start_component + i])
1826 update_psiz_location(nir_shader *nir, nir_variable *psiz)
1828 uint32_t last_output = util_last_bit64(nir->info.outputs_written);
1829 if (last_output < VARYING_SLOT_VAR0)
1830 last_output = VARYING_SLOT_VAR0;
1833 /* this should get fixed up by slot remapping */
1834 psiz->data.location = last_output;
1837 static const struct glsl_type *
1838 clamp_slot_type(const struct glsl_type *type, unsigned slot)
1840 /* could be dvec/dmat/mat: each member is the same */
1841 const struct glsl_type *plain = glsl_without_array_or_matrix(type);
1842 /* determine size of each member type */
1843 unsigned slot_count = glsl_count_vec4_slots(plain, false, false);
1844 /* normalize slot idx to current type's size */
1846 unsigned slot_components = glsl_get_components(plain);
1847 if (glsl_base_type_is_64bit(glsl_get_base_type(plain)))
1848 slot_components *= 2;
1849 /* create a vec4 mask of the selected slot's components out of all the components */
1850 uint32_t mask = BITFIELD_MASK(slot_components) & BITFIELD_RANGE(slot * 4, 4);
1851 /* return a vecN of the selected components */
1852 slot_components = util_bitcount(mask);
1853 return glsl_vec_type(slot_components);
1856 static const struct glsl_type *
1857 unroll_struct_type(const struct glsl_type *slot_type, unsigned *slot_idx)
1859 const struct glsl_type *type = slot_type;
1860 unsigned slot_count = 0;
1861 unsigned cur_slot = 0;
1862 /* iterate over all the members in the struct, stopping once the slot idx is reached */
1863 for (unsigned i = 0; i < glsl_get_length(slot_type) && cur_slot <= *slot_idx; i++, cur_slot += slot_count) {
1864 /* use array type for slot counting but return array member type for unroll */
1865 const struct glsl_type *arraytype = glsl_get_struct_field(slot_type, i);
1866 type = glsl_without_array(arraytype);
1867 slot_count = glsl_count_vec4_slots(arraytype, false, false);
1869 *slot_idx -= (cur_slot - slot_count);
1870 if (!glsl_type_is_struct_or_ifc(type))
1871 /* this is a fully unrolled struct: find the number of vec components to output */
1872 type = clamp_slot_type(type, *slot_idx);
1877 get_slot_components(nir_variable *var, unsigned slot, unsigned so_slot)
1879 assert(var && slot < var->data.location + glsl_count_vec4_slots(var->type, false, false));
1880 const struct glsl_type *orig_type = var->type;
1881 const struct glsl_type *type = glsl_without_array(var->type);
1882 unsigned slot_idx = slot - so_slot;
1883 if (type != orig_type)
1884 slot_idx %= glsl_count_vec4_slots(type, false, false);
1885 /* need to find the vec4 that's being exported by this slot */
1886 while (glsl_type_is_struct_or_ifc(type))
1887 type = unroll_struct_type(type, &slot_idx);
1889 /* arrays here are already fully unrolled from their structs, so slot handling is implicit */
1890 unsigned num_components = glsl_get_components(glsl_without_array(type));
1891 /* special handling: clip/cull distance are arrays with vector semantics */
1892 if (var->data.location == VARYING_SLOT_CLIP_DIST0 || var->data.location == VARYING_SLOT_CULL_DIST0) {
1893 num_components = glsl_array_size(type);
1895 /* this is the second vec4 */
1896 num_components %= 4;
1898 /* this is the first vec4 */
1899 num_components = MIN2(num_components, 4);
1901 assert(num_components);
1902 /* gallium handles xfb in terms of 32bit units */
1903 if (glsl_base_type_is_64bit(glsl_get_base_type(glsl_without_array(type))))
1904 num_components *= 2;
1905 return num_components;
1908 static const struct pipe_stream_output *
1909 find_packed_output(const struct pipe_stream_output_info *so_info, uint8_t *reverse_map, unsigned slot)
1911 for (unsigned i = 0; i < so_info->num_outputs; i++) {
1912 const struct pipe_stream_output *packed_output = &so_info->output[i];
1913 if (reverse_map[packed_output->register_index] == slot)
1914 return packed_output;
1920 update_so_info(struct zink_shader *zs, nir_shader *nir, const struct pipe_stream_output_info *so_info,
1921 uint64_t outputs_written, bool have_psiz)
1923 uint8_t reverse_map[VARYING_SLOT_MAX] = {0};
1925 /* semi-copied from iris */
1926 while (outputs_written) {
1927 int bit = u_bit_scan64(&outputs_written);
1928 /* PSIZ from nir_lower_point_size_mov breaks stream output, so always skip it */
1929 if (bit == VARYING_SLOT_PSIZ && !have_psiz)
1931 reverse_map[slot++] = bit;
1934 bool have_fake_psiz = false;
1935 nir_foreach_shader_out_variable(var, nir) {
1936 if (var->data.location == VARYING_SLOT_PSIZ && !var->data.explicit_location)
1937 have_fake_psiz = true;
1940 bool inlined[VARYING_SLOT_MAX][4] = {0};
1941 uint64_t packed = 0;
1942 uint8_t packed_components[VARYING_SLOT_MAX] = {0};
1943 uint8_t packed_streams[VARYING_SLOT_MAX] = {0};
1944 uint8_t packed_buffers[VARYING_SLOT_MAX] = {0};
1945 uint16_t packed_offsets[VARYING_SLOT_MAX][4] = {0};
1946 nir_variable *psiz = NULL;
1947 for (unsigned i = 0; i < so_info->num_outputs; i++) {
1948 const struct pipe_stream_output *output = &so_info->output[i];
1949 unsigned slot = reverse_map[output->register_index];
1950 /* always set stride to be used during draw */
1951 zs->sinfo.so_info.stride[output->output_buffer] = so_info->stride[output->output_buffer];
1952 if (zs->info.stage != MESA_SHADER_GEOMETRY || util_bitcount(zs->info.gs.active_stream_mask) == 1) {
1953 nir_variable *var = NULL;
1956 var = find_var_with_location_frac(nir, slot--, output->start_component, have_psiz);
1957 if (var->data.location == VARYING_SLOT_PSIZ)
1960 slot = reverse_map[output->register_index];
1961 if (var->data.explicit_xfb_buffer) {
1962 /* handle dvec3 where gallium splits streamout over 2 registers */
1963 for (unsigned j = 0; j < output->num_components; j++)
1964 inlined[slot][output->start_component + j] = true;
1966 if (is_inlined(inlined[slot], output))
1968 bool is_struct = glsl_type_is_struct_or_ifc(glsl_without_array(var->type));
1969 unsigned num_components = get_slot_components(var, slot, so_slot);
1970 /* if this is the entire variable, try to blast it out during the initial declaration
1971 * structs must be handled later to ensure accurate analysis
1973 if (!is_struct && (num_components == output->num_components || (num_components > output->num_components && output->num_components == 4))) {
1974 var->data.explicit_xfb_buffer = 1;
1975 var->data.xfb.buffer = output->output_buffer;
1976 var->data.xfb.stride = so_info->stride[output->output_buffer] * 4;
1977 var->data.offset = output->dst_offset * 4;
1978 var->data.stream = output->stream;
1979 for (unsigned j = 0; j < output->num_components; j++)
1980 inlined[slot][output->start_component + j] = true;
1982 /* otherwise store some metadata for later */
1983 packed |= BITFIELD64_BIT(slot);
1984 packed_components[slot] += output->num_components;
1985 packed_streams[slot] |= BITFIELD_BIT(output->stream);
1986 packed_buffers[slot] |= BITFIELD_BIT(output->output_buffer);
1987 for (unsigned j = 0; j < output->num_components; j++)
1988 packed_offsets[output->register_index][j + output->start_component] = output->dst_offset + j;
1993 /* if this was flagged as a packed output before, and if all the components are
1994 * being output with the same stream on the same buffer with increasing offsets, this entire variable
1995 * can be consolidated into a single output to conserve locations
1997 for (unsigned i = 0; i < so_info->num_outputs; i++) {
1998 const struct pipe_stream_output *output = &so_info->output[i];
1999 unsigned slot = reverse_map[output->register_index];
2000 if (is_inlined(inlined[slot], output))
2002 if (zs->info.stage != MESA_SHADER_GEOMETRY || util_bitcount(zs->info.gs.active_stream_mask) == 1) {
2003 nir_variable *var = NULL;
2005 var = find_var_with_location_frac(nir, slot--, output->start_component, have_psiz);
2006 /* this is a lowered 64bit variable that can't be exported due to packing */
2007 if (var->data.is_xfb)
2010 unsigned num_slots = glsl_count_vec4_slots(var->type, false, false);
2011 /* for each variable, iterate over all the variable's slots and inline the outputs */
2012 for (unsigned j = 0; j < num_slots; j++) {
2013 slot = var->data.location + j;
2014 const struct pipe_stream_output *packed_output = find_packed_output(so_info, reverse_map, slot);
2018 /* if this slot wasn't packed or isn't in the same stream/buffer, skip consolidation */
2019 if (!(packed & BITFIELD64_BIT(slot)) ||
2020 util_bitcount(packed_streams[slot]) != 1 ||
2021 util_bitcount(packed_buffers[slot]) != 1)
2024 /* if all the components the variable exports to this slot aren't captured, skip consolidation */
2025 unsigned num_components = get_slot_components(var, slot, var->data.location);
2026 if (num_components != packed_components[slot])
2029 /* in order to pack the xfb output, all the offsets must be sequentially incrementing */
2030 uint32_t prev_offset = packed_offsets[packed_output->register_index][0];
2031 for (unsigned k = 1; k < num_components; k++) {
2032 /* if the offsets are not incrementing as expected, skip consolidation */
2033 if (packed_offsets[packed_output->register_index][k] != prev_offset + 1)
2035 prev_offset = packed_offsets[packed_output->register_index][k + packed_output->start_component];
2038 /* this output can be consolidated: blast out all the data inlined */
2039 var->data.explicit_xfb_buffer = 1;
2040 var->data.xfb.buffer = output->output_buffer;
2041 var->data.xfb.stride = so_info->stride[output->output_buffer] * 4;
2042 var->data.offset = output->dst_offset * 4;
2043 var->data.stream = output->stream;
2044 /* GLSL specifies that interface blocks are split per-buffer in XFB */
2045 if (glsl_type_is_array(var->type) && glsl_array_size(var->type) > 1 && glsl_type_is_interface(glsl_without_array(var->type)))
2046 zs->sinfo.so_propagate |= BITFIELD_BIT(var->data.location - VARYING_SLOT_VAR0);
2047 /* mark all slot components inlined to skip subsequent loop iterations */
2048 for (unsigned j = 0; j < num_slots; j++) {
2049 slot = var->data.location + j;
2050 for (unsigned k = 0; k < packed_components[slot]; k++)
2051 inlined[slot][k] = true;
2052 packed &= ~BITFIELD64_BIT(slot);
2057 /* these are packed/explicit varyings which can't be exported with normal output */
2058 zs->sinfo.so_info.output[zs->sinfo.so_info.num_outputs] = *output;
2059 /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */
2060 zs->sinfo.so_info_slots[zs->sinfo.so_info.num_outputs++] = reverse_map[output->register_index];
2062 zs->sinfo.have_xfb = zs->sinfo.so_info.num_outputs || zs->sinfo.so_propagate;
2063 /* ensure this doesn't get output in the shader by unsetting location */
2064 if (have_fake_psiz && psiz)
2065 update_psiz_location(nir, psiz);
2068 struct decompose_state {
2069 nir_variable **split;
2074 lower_attrib(nir_builder *b, nir_instr *instr, void *data)
2076 struct decompose_state *state = data;
2077 nir_variable **split = state->split;
2078 if (instr->type != nir_instr_type_intrinsic)
2080 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2081 if (intr->intrinsic != nir_intrinsic_load_deref)
2083 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
2084 nir_variable *var = nir_deref_instr_get_variable(deref);
2085 if (var != split[0])
2087 unsigned num_components = glsl_get_vector_elements(split[0]->type);
2088 b->cursor = nir_after_instr(instr);
2089 nir_ssa_def *loads[4];
2090 for (unsigned i = 0; i < (state->needs_w ? num_components - 1 : num_components); i++)
2091 loads[i] = nir_load_deref(b, nir_build_deref_var(b, split[i+1]));
2092 if (state->needs_w) {
2093 /* oob load w comopnent to get correct value for int/float */
2094 loads[3] = nir_channel(b, loads[0], 3);
2095 loads[0] = nir_channel(b, loads[0], 0);
2097 nir_ssa_def *new_load = nir_vec(b, loads, num_components);
2098 nir_ssa_def_rewrite_uses(&intr->dest.ssa, new_load);
2099 nir_instr_remove_v(instr);
2104 decompose_attribs(nir_shader *nir, uint32_t decomposed_attrs, uint32_t decomposed_attrs_without_w)
2107 nir_foreach_variable_with_modes(var, nir, nir_var_shader_in)
2108 bits |= BITFIELD_BIT(var->data.driver_location);
2110 u_foreach_bit(location, decomposed_attrs | decomposed_attrs_without_w) {
2111 nir_variable *split[5];
2112 struct decompose_state state;
2113 state.split = split;
2114 nir_variable *var = nir_find_variable_with_driver_location(nir, nir_var_shader_in, location);
2117 bits |= BITFIELD_BIT(var->data.driver_location);
2118 const struct glsl_type *new_type = glsl_type_is_scalar(var->type) ? var->type : glsl_get_array_element(var->type);
2119 unsigned num_components = glsl_get_vector_elements(var->type);
2120 state.needs_w = (decomposed_attrs_without_w & BITFIELD_BIT(location)) != 0 && num_components == 4;
2121 for (unsigned i = 0; i < (state.needs_w ? num_components - 1 : num_components); i++) {
2122 split[i+1] = nir_variable_clone(var, nir);
2123 split[i+1]->name = ralloc_asprintf(nir, "%s_split%u", var->name, i);
2124 if (decomposed_attrs_without_w & BITFIELD_BIT(location))
2125 split[i+1]->type = !i && num_components == 4 ? var->type : new_type;
2127 split[i+1]->type = new_type;
2128 split[i+1]->data.driver_location = ffs(bits) - 1;
2129 bits &= ~BITFIELD_BIT(split[i+1]->data.driver_location);
2130 nir_shader_add_variable(nir, split[i+1]);
2132 var->data.mode = nir_var_shader_temp;
2133 nir_shader_instructions_pass(nir, lower_attrib, nir_metadata_dominance, &state);
2135 nir_fixup_deref_modes(nir);
2136 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2137 optimize_nir(nir, NULL);
2142 rewrite_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
2144 struct zink_screen *screen = data;
2145 const bool has_int64 = screen->info.feats.features.shaderInt64;
2146 if (instr->type != nir_instr_type_intrinsic)
2148 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2149 b->cursor = nir_before_instr(instr);
2150 switch (intr->intrinsic) {
2151 case nir_intrinsic_ssbo_atomic:
2152 case nir_intrinsic_ssbo_atomic_swap: {
2153 /* convert offset to uintN_t[idx] */
2154 nir_ssa_def *offset = nir_udiv_imm(b, intr->src[1].ssa, nir_dest_bit_size(intr->dest) / 8);
2155 nir_instr_rewrite_src_ssa(instr, &intr->src[1], offset);
2158 case nir_intrinsic_load_ssbo:
2159 case nir_intrinsic_load_ubo: {
2160 /* ubo0 can have unaligned 64bit loads, particularly for bindless texture ids */
2161 bool force_2x32 = intr->intrinsic == nir_intrinsic_load_ubo &&
2162 nir_src_is_const(intr->src[0]) &&
2163 nir_src_as_uint(intr->src[0]) == 0 &&
2164 nir_dest_bit_size(intr->dest) == 64 &&
2165 nir_intrinsic_align_offset(intr) % 8 != 0;
2166 force_2x32 |= nir_dest_bit_size(intr->dest) == 64 && !has_int64;
2167 nir_ssa_def *offset = nir_udiv_imm(b, intr->src[1].ssa, (force_2x32 ? 32 : nir_dest_bit_size(intr->dest)) / 8);
2168 nir_instr_rewrite_src_ssa(instr, &intr->src[1], offset);
2169 /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
2171 /* this is always scalarized */
2172 assert(intr->dest.ssa.num_components == 1);
2173 /* rewrite as 2x32 */
2174 nir_ssa_def *load[2];
2175 for (unsigned i = 0; i < 2; i++) {
2176 if (intr->intrinsic == nir_intrinsic_load_ssbo)
2177 load[i] = nir_load_ssbo(b, 1, 32, intr->src[0].ssa, nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0);
2179 load[i] = nir_load_ubo(b, 1, 32, intr->src[0].ssa, nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0, .range = 4);
2180 nir_intrinsic_set_access(nir_instr_as_intrinsic(load[i]->parent_instr), nir_intrinsic_access(intr));
2182 /* cast back to 64bit */
2183 nir_ssa_def *casted = nir_pack_64_2x32_split(b, load[0], load[1]);
2184 nir_ssa_def_rewrite_uses(&intr->dest.ssa, casted);
2185 nir_instr_remove(instr);
2189 case nir_intrinsic_load_shared:
2190 b->cursor = nir_before_instr(instr);
2191 bool force_2x32 = nir_dest_bit_size(intr->dest) == 64 && !has_int64;
2192 nir_ssa_def *offset = nir_udiv_imm(b, intr->src[0].ssa, (force_2x32 ? 32 : nir_dest_bit_size(intr->dest)) / 8);
2193 nir_instr_rewrite_src_ssa(instr, &intr->src[0], offset);
2194 /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
2196 /* this is always scalarized */
2197 assert(intr->dest.ssa.num_components == 1);
2198 /* rewrite as 2x32 */
2199 nir_ssa_def *load[2];
2200 for (unsigned i = 0; i < 2; i++)
2201 load[i] = nir_load_shared(b, 1, 32, nir_iadd_imm(b, intr->src[0].ssa, i), .align_mul = 4, .align_offset = 0);
2202 /* cast back to 64bit */
2203 nir_ssa_def *casted = nir_pack_64_2x32_split(b, load[0], load[1]);
2204 nir_ssa_def_rewrite_uses(&intr->dest.ssa, casted);
2205 nir_instr_remove(instr);
2209 case nir_intrinsic_store_ssbo: {
2210 b->cursor = nir_before_instr(instr);
2211 bool force_2x32 = nir_src_bit_size(intr->src[0]) == 64 && !has_int64;
2212 nir_ssa_def *offset = nir_udiv_imm(b, intr->src[2].ssa, (force_2x32 ? 32 : nir_src_bit_size(intr->src[0])) / 8);
2213 nir_instr_rewrite_src_ssa(instr, &intr->src[2], offset);
2214 /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
2216 /* this is always scalarized */
2217 assert(intr->src[0].ssa->num_components == 1);
2218 nir_ssa_def *vals[2] = {nir_unpack_64_2x32_split_x(b, intr->src[0].ssa), nir_unpack_64_2x32_split_y(b, intr->src[0].ssa)};
2219 for (unsigned i = 0; i < 2; i++)
2220 nir_store_ssbo(b, vals[i], intr->src[1].ssa, nir_iadd_imm(b, intr->src[2].ssa, i), .align_mul = 4, .align_offset = 0);
2221 nir_instr_remove(instr);
2225 case nir_intrinsic_store_shared: {
2226 b->cursor = nir_before_instr(instr);
2227 bool force_2x32 = nir_src_bit_size(intr->src[0]) == 64 && !has_int64;
2228 nir_ssa_def *offset = nir_udiv_imm(b, intr->src[1].ssa, (force_2x32 ? 32 : nir_src_bit_size(intr->src[0])) / 8);
2229 nir_instr_rewrite_src_ssa(instr, &intr->src[1], offset);
2230 /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
2231 if (nir_src_bit_size(intr->src[0]) == 64 && !has_int64) {
2232 /* this is always scalarized */
2233 assert(intr->src[0].ssa->num_components == 1);
2234 nir_ssa_def *vals[2] = {nir_unpack_64_2x32_split_x(b, intr->src[0].ssa), nir_unpack_64_2x32_split_y(b, intr->src[0].ssa)};
2235 for (unsigned i = 0; i < 2; i++)
2236 nir_store_shared(b, vals[i], nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0);
2237 nir_instr_remove(instr);
2248 rewrite_bo_access(nir_shader *shader, struct zink_screen *screen)
2250 return nir_shader_instructions_pass(shader, rewrite_bo_access_instr, nir_metadata_dominance, screen);
2253 static nir_variable *
2254 get_bo_var(nir_shader *shader, struct bo_vars *bo, bool ssbo, nir_src *src, unsigned bit_size)
2256 nir_variable *var, **ptr;
2257 unsigned idx = ssbo || (nir_src_is_const(*src) && !nir_src_as_uint(*src)) ? 0 : 1;
2260 ptr = &bo->ssbo[bit_size >> 4];
2263 ptr = &bo->uniforms[bit_size >> 4];
2265 ptr = &bo->ubo[bit_size >> 4];
2270 var = bo->ssbo[32 >> 4];
2273 var = bo->uniforms[32 >> 4];
2275 var = bo->ubo[32 >> 4];
2277 var = nir_variable_clone(var, shader);
2279 var->name = ralloc_asprintf(shader, "%s@%u", "ssbos", bit_size);
2281 var->name = ralloc_asprintf(shader, "%s@%u", idx ? "ubos" : "uniform_0", bit_size);
2283 nir_shader_add_variable(shader, var);
2285 struct glsl_struct_field *fields = rzalloc_array(shader, struct glsl_struct_field, 2);
2286 fields[0].name = ralloc_strdup(shader, "base");
2287 fields[1].name = ralloc_strdup(shader, "unsized");
2288 unsigned array_size = glsl_get_length(var->type);
2289 const struct glsl_type *bare_type = glsl_without_array(var->type);
2290 const struct glsl_type *array_type = glsl_get_struct_field(bare_type, 0);
2291 unsigned length = glsl_get_length(array_type);
2292 const struct glsl_type *type;
2293 const struct glsl_type *unsized = glsl_array_type(glsl_uintN_t_type(bit_size), 0, bit_size / 8);
2294 if (bit_size > 32) {
2295 assert(bit_size == 64);
2296 type = glsl_array_type(glsl_uintN_t_type(bit_size), length / 2, bit_size / 8);
2298 type = glsl_array_type(glsl_uintN_t_type(bit_size), length * (32 / bit_size), bit_size / 8);
2300 fields[0].type = type;
2301 fields[1].type = unsized;
2302 var->type = glsl_array_type(glsl_struct_type(fields, glsl_get_length(bare_type), "struct", false), array_size, 0);
2303 var->data.driver_location = idx;
2309 rewrite_atomic_ssbo_instr(nir_builder *b, nir_instr *instr, struct bo_vars *bo)
2311 nir_intrinsic_op op;
2312 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2313 if (intr->intrinsic == nir_intrinsic_ssbo_atomic)
2314 op = nir_intrinsic_deref_atomic;
2315 else if (intr->intrinsic == nir_intrinsic_ssbo_atomic_swap)
2316 op = nir_intrinsic_deref_atomic_swap;
2318 unreachable("unknown intrinsic");
2319 nir_ssa_def *offset = intr->src[1].ssa;
2320 nir_src *src = &intr->src[0];
2321 nir_variable *var = get_bo_var(b->shader, bo, true, src, nir_dest_bit_size(intr->dest));
2322 nir_deref_instr *deref_var = nir_build_deref_var(b, var);
2323 nir_ssa_def *idx = src->ssa;
2325 idx = nir_iadd_imm(b, idx, -bo->first_ssbo);
2326 nir_deref_instr *deref_array = nir_build_deref_array(b, deref_var, idx);
2327 nir_deref_instr *deref_struct = nir_build_deref_struct(b, deref_array, 0);
2329 /* generate new atomic deref ops for every component */
2330 nir_ssa_def *result[4];
2331 unsigned num_components = nir_dest_num_components(intr->dest);
2332 for (unsigned i = 0; i < num_components; i++) {
2333 nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct, offset);
2334 nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(b->shader, op);
2335 nir_ssa_dest_init(&new_instr->instr, &new_instr->dest, 1,
2336 nir_dest_bit_size(intr->dest));
2337 nir_intrinsic_set_atomic_op(new_instr, nir_intrinsic_atomic_op(intr));
2338 new_instr->src[0] = nir_src_for_ssa(&deref_arr->dest.ssa);
2339 /* deref ops have no offset src, so copy the srcs after it */
2340 for (unsigned i = 2; i < nir_intrinsic_infos[intr->intrinsic].num_srcs; i++)
2341 nir_src_copy(&new_instr->src[i - 1], &intr->src[i], &new_instr->instr);
2342 nir_builder_instr_insert(b, &new_instr->instr);
2344 result[i] = &new_instr->dest.ssa;
2345 offset = nir_iadd_imm(b, offset, 1);
2348 nir_ssa_def *load = nir_vec(b, result, num_components);
2349 nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
2350 nir_instr_remove(instr);
2354 remove_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
2356 struct bo_vars *bo = data;
2357 if (instr->type != nir_instr_type_intrinsic)
2359 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2360 nir_variable *var = NULL;
2361 nir_ssa_def *offset = NULL;
2362 bool is_load = true;
2363 b->cursor = nir_before_instr(instr);
2366 switch (intr->intrinsic) {
2367 case nir_intrinsic_ssbo_atomic:
2368 case nir_intrinsic_ssbo_atomic_swap:
2369 rewrite_atomic_ssbo_instr(b, instr, bo);
2371 case nir_intrinsic_store_ssbo:
2372 src = &intr->src[1];
2373 var = get_bo_var(b->shader, bo, true, src, nir_src_bit_size(intr->src[0]));
2374 offset = intr->src[2].ssa;
2377 case nir_intrinsic_load_ssbo:
2378 src = &intr->src[0];
2379 var = get_bo_var(b->shader, bo, true, src, nir_dest_bit_size(intr->dest));
2380 offset = intr->src[1].ssa;
2382 case nir_intrinsic_load_ubo:
2383 src = &intr->src[0];
2384 var = get_bo_var(b->shader, bo, false, src, nir_dest_bit_size(intr->dest));
2385 offset = intr->src[1].ssa;
2393 nir_deref_instr *deref_var = nir_build_deref_var(b, var);
2394 nir_ssa_def *idx = !ssbo && var->data.driver_location ? nir_iadd_imm(b, src->ssa, -1) : src->ssa;
2395 if (!ssbo && bo->first_ubo && var->data.driver_location)
2396 idx = nir_iadd_imm(b, idx, -bo->first_ubo);
2397 else if (ssbo && bo->first_ssbo)
2398 idx = nir_iadd_imm(b, idx, -bo->first_ssbo);
2399 nir_deref_instr *deref_array = nir_build_deref_array(b, deref_var, nir_i2iN(b, idx, nir_dest_bit_size(deref_var->dest)));
2400 nir_deref_instr *deref_struct = nir_build_deref_struct(b, deref_array, 0);
2401 assert(intr->num_components <= 2);
2403 nir_ssa_def *result[2];
2404 for (unsigned i = 0; i < intr->num_components; i++) {
2405 nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct, nir_i2iN(b, offset, nir_dest_bit_size(deref_struct->dest)));
2406 result[i] = nir_load_deref(b, deref_arr);
2407 if (intr->intrinsic == nir_intrinsic_load_ssbo)
2408 nir_intrinsic_set_access(nir_instr_as_intrinsic(result[i]->parent_instr), nir_intrinsic_access(intr));
2409 offset = nir_iadd_imm(b, offset, 1);
2411 nir_ssa_def *load = nir_vec(b, result, intr->num_components);
2412 nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
2414 nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct, nir_i2iN(b, offset, nir_dest_bit_size(deref_struct->dest)));
2415 nir_build_store_deref(b, &deref_arr->dest.ssa, intr->src[0].ssa, BITFIELD_MASK(intr->num_components), nir_intrinsic_access(intr));
2417 nir_instr_remove(instr);
2422 remove_bo_access(nir_shader *shader, struct zink_shader *zs)
2424 struct bo_vars bo = get_bo_vars(zs, shader);
2425 return nir_shader_instructions_pass(shader, remove_bo_access_instr, nir_metadata_dominance, &bo);
2429 find_var_deref(nir_shader *nir, nir_variable *var)
2431 nir_foreach_function_impl(impl, nir) {
2432 nir_foreach_block(block, impl) {
2433 nir_foreach_instr(instr, block) {
2434 if (instr->type != nir_instr_type_deref)
2436 nir_deref_instr *deref = nir_instr_as_deref(instr);
2437 if (deref->deref_type == nir_deref_type_var && deref->var == var)
2445 struct clamp_layer_output_state {
2446 nir_variable *original;
2447 nir_variable *clamped;
2451 clamp_layer_output_emit(nir_builder *b, struct clamp_layer_output_state *state)
2453 nir_ssa_def *is_layered = nir_load_push_constant(b, 1, 32,
2454 nir_imm_int(b, ZINK_GFX_PUSHCONST_FRAMEBUFFER_IS_LAYERED),
2455 .base = ZINK_GFX_PUSHCONST_FRAMEBUFFER_IS_LAYERED, .range = 4);
2456 nir_deref_instr *original_deref = nir_build_deref_var(b, state->original);
2457 nir_deref_instr *clamped_deref = nir_build_deref_var(b, state->clamped);
2458 nir_ssa_def *layer = nir_bcsel(b, nir_ieq_imm(b, is_layered, 1),
2459 nir_load_deref(b, original_deref),
2461 nir_store_deref(b, clamped_deref, layer, 0);
2465 clamp_layer_output_instr(nir_builder *b, nir_instr *instr, void *data)
2467 struct clamp_layer_output_state *state = data;
2468 switch (instr->type) {
2469 case nir_instr_type_intrinsic: {
2470 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2471 if (intr->intrinsic != nir_intrinsic_emit_vertex_with_counter &&
2472 intr->intrinsic != nir_intrinsic_emit_vertex)
2474 b->cursor = nir_before_instr(instr);
2475 clamp_layer_output_emit(b, state);
2478 default: return false;
2483 clamp_layer_output(nir_shader *vs, nir_shader *fs, unsigned *next_location)
2485 switch (vs->info.stage) {
2486 case MESA_SHADER_VERTEX:
2487 case MESA_SHADER_GEOMETRY:
2488 case MESA_SHADER_TESS_EVAL:
2491 unreachable("invalid last vertex stage!");
2493 struct clamp_layer_output_state state = {0};
2494 state.original = nir_find_variable_with_location(vs, nir_var_shader_out, VARYING_SLOT_LAYER);
2495 if (!state.original || !find_var_deref(vs, state.original))
2497 state.clamped = nir_variable_create(vs, nir_var_shader_out, glsl_int_type(), "layer_clamped");
2498 state.clamped->data.location = VARYING_SLOT_LAYER;
2499 nir_variable *fs_var = nir_find_variable_with_location(fs, nir_var_shader_in, VARYING_SLOT_LAYER);
2500 if ((state.original->data.explicit_xfb_buffer || fs_var) && *next_location < MAX_VARYING) {
2501 state.original->data.location = VARYING_SLOT_VAR0; // Anything but a built-in slot
2502 state.original->data.driver_location = (*next_location)++;
2504 fs_var->data.location = state.original->data.location;
2505 fs_var->data.driver_location = state.original->data.driver_location;
2508 if (state.original->data.explicit_xfb_buffer) {
2509 /* Will xfb the clamped output but still better than nothing */
2510 state.clamped->data.explicit_xfb_buffer = state.original->data.explicit_xfb_buffer;
2511 state.clamped->data.xfb.buffer = state.original->data.xfb.buffer;
2512 state.clamped->data.xfb.stride = state.original->data.xfb.stride;
2513 state.clamped->data.offset = state.original->data.offset;
2514 state.clamped->data.stream = state.original->data.stream;
2516 state.original->data.mode = nir_var_shader_temp;
2517 nir_fixup_deref_modes(vs);
2519 if (vs->info.stage == MESA_SHADER_GEOMETRY) {
2520 nir_shader_instructions_pass(vs, clamp_layer_output_instr, nir_metadata_dominance, &state);
2523 nir_function_impl *impl = nir_shader_get_entrypoint(vs);
2524 b = nir_builder_at(nir_after_cf_list(&impl->body));
2525 assert(impl->end_block->predecessors->entries == 1);
2526 clamp_layer_output_emit(&b, &state);
2527 nir_metadata_preserve(impl, nir_metadata_dominance);
2529 optimize_nir(vs, NULL);
2530 NIR_PASS_V(vs, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2535 assign_producer_var_io(gl_shader_stage stage, nir_variable *var, unsigned *reserved, unsigned char *slot_map)
2537 unsigned slot = var->data.location;
2540 case VARYING_SLOT_POS:
2541 case VARYING_SLOT_PSIZ:
2542 case VARYING_SLOT_LAYER:
2543 case VARYING_SLOT_PRIMITIVE_ID:
2544 case VARYING_SLOT_CLIP_DIST0:
2545 case VARYING_SLOT_CULL_DIST0:
2546 case VARYING_SLOT_VIEWPORT:
2547 case VARYING_SLOT_FACE:
2548 case VARYING_SLOT_TESS_LEVEL_OUTER:
2549 case VARYING_SLOT_TESS_LEVEL_INNER:
2550 /* use a sentinel value to avoid counting later */
2551 var->data.driver_location = UINT_MAX;
2555 if (var->data.patch) {
2556 assert(slot >= VARYING_SLOT_PATCH0);
2557 slot -= VARYING_SLOT_PATCH0;
2559 if (slot_map[slot] == 0xff) {
2560 assert(*reserved < MAX_VARYING);
2562 if (nir_is_arrayed_io(var, stage))
2563 num_slots = glsl_count_vec4_slots(glsl_get_array_element(var->type), false, false);
2565 num_slots = glsl_count_vec4_slots(var->type, false, false);
2566 assert(*reserved + num_slots <= MAX_VARYING);
2567 for (unsigned i = 0; i < num_slots; i++)
2568 slot_map[slot + i] = (*reserved)++;
2570 slot = slot_map[slot];
2571 assert(slot < MAX_VARYING);
2572 var->data.driver_location = slot;
2576 ALWAYS_INLINE static bool
2577 is_texcoord(gl_shader_stage stage, const nir_variable *var)
2579 if (stage != MESA_SHADER_FRAGMENT)
2581 return var->data.location >= VARYING_SLOT_TEX0 &&
2582 var->data.location <= VARYING_SLOT_TEX7;
2586 assign_consumer_var_io(gl_shader_stage stage, nir_variable *var, unsigned *reserved, unsigned char *slot_map)
2588 unsigned slot = var->data.location;
2590 case VARYING_SLOT_POS:
2591 case VARYING_SLOT_PSIZ:
2592 case VARYING_SLOT_LAYER:
2593 case VARYING_SLOT_PRIMITIVE_ID:
2594 case VARYING_SLOT_CLIP_DIST0:
2595 case VARYING_SLOT_CULL_DIST0:
2596 case VARYING_SLOT_VIEWPORT:
2597 case VARYING_SLOT_FACE:
2598 case VARYING_SLOT_TESS_LEVEL_OUTER:
2599 case VARYING_SLOT_TESS_LEVEL_INNER:
2600 /* use a sentinel value to avoid counting later */
2601 var->data.driver_location = UINT_MAX;
2604 if (var->data.patch) {
2605 assert(slot >= VARYING_SLOT_PATCH0);
2606 slot -= VARYING_SLOT_PATCH0;
2608 if (slot_map[slot] == (unsigned char)-1) {
2609 /* texcoords can't be eliminated in fs due to GL_COORD_REPLACE,
2610 * so keep for now and eliminate later
2612 if (is_texcoord(stage, var)) {
2613 var->data.driver_location = -1;
2616 /* patch variables may be read in the workgroup */
2617 if (stage != MESA_SHADER_TESS_CTRL)
2621 if (nir_is_arrayed_io(var, stage))
2622 num_slots = glsl_count_vec4_slots(glsl_get_array_element(var->type), false, false);
2624 num_slots = glsl_count_vec4_slots(var->type, false, false);
2625 assert(*reserved + num_slots <= MAX_VARYING);
2626 for (unsigned i = 0; i < num_slots; i++)
2627 slot_map[slot + i] = (*reserved)++;
2629 var->data.driver_location = slot_map[slot];
2636 rewrite_read_as_0(nir_builder *b, nir_instr *instr, void *data)
2638 nir_variable *var = data;
2639 if (instr->type != nir_instr_type_intrinsic)
2642 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2643 if (intr->intrinsic != nir_intrinsic_load_deref)
2645 nir_variable *deref_var = nir_intrinsic_get_var(intr, 0);
2646 if (deref_var != var)
2648 b->cursor = nir_before_instr(instr);
2649 nir_ssa_def *zero = nir_imm_zero(b, nir_dest_num_components(intr->dest), nir_dest_bit_size(intr->dest));
2650 if (b->shader->info.stage == MESA_SHADER_FRAGMENT) {
2651 switch (var->data.location) {
2652 case VARYING_SLOT_COL0:
2653 case VARYING_SLOT_COL1:
2654 case VARYING_SLOT_BFC0:
2655 case VARYING_SLOT_BFC1:
2656 /* default color is 0,0,0,1 */
2657 if (nir_dest_num_components(intr->dest) == 4)
2658 zero = nir_vector_insert_imm(b, zero, nir_imm_float(b, 1.0), 3);
2664 nir_ssa_def_rewrite_uses(&intr->dest.ssa, zero);
2665 nir_instr_remove(instr);
2670 zink_compiler_assign_io(struct zink_screen *screen, nir_shader *producer, nir_shader *consumer)
2672 unsigned reserved = 0;
2673 unsigned char slot_map[VARYING_SLOT_MAX];
2674 memset(slot_map, -1, sizeof(slot_map));
2675 bool do_fixup = false;
2676 nir_shader *nir = producer->info.stage == MESA_SHADER_TESS_CTRL ? producer : consumer;
2677 if (consumer->info.stage != MESA_SHADER_FRAGMENT) {
2678 /* remove injected pointsize from all but the last vertex stage */
2679 nir_variable *var = nir_find_variable_with_location(producer, nir_var_shader_out, VARYING_SLOT_PSIZ);
2680 if (var && !var->data.explicit_location && !nir_find_variable_with_location(consumer, nir_var_shader_in, VARYING_SLOT_PSIZ)) {
2681 var->data.mode = nir_var_shader_temp;
2682 nir_fixup_deref_modes(producer);
2683 NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2684 optimize_nir(producer, NULL);
2687 if (producer->info.stage == MESA_SHADER_TESS_CTRL) {
2688 /* never assign from tcs -> tes, always invert */
2689 nir_foreach_variable_with_modes(var, consumer, nir_var_shader_in)
2690 assign_producer_var_io(consumer->info.stage, var, &reserved, slot_map);
2691 nir_foreach_variable_with_modes_safe(var, producer, nir_var_shader_out) {
2692 if (!assign_consumer_var_io(producer->info.stage, var, &reserved, slot_map))
2693 /* this is an output, nothing more needs to be done for it to be dropped */
2697 nir_foreach_variable_with_modes(var, producer, nir_var_shader_out)
2698 assign_producer_var_io(producer->info.stage, var, &reserved, slot_map);
2699 nir_foreach_variable_with_modes_safe(var, consumer, nir_var_shader_in) {
2700 if (!assign_consumer_var_io(consumer->info.stage, var, &reserved, slot_map)) {
2702 /* input needs to be rewritten */
2703 nir_shader_instructions_pass(consumer, rewrite_read_as_0, nir_metadata_dominance, var);
2706 if (consumer->info.stage == MESA_SHADER_FRAGMENT && screen->driver_workarounds.needs_sanitised_layer)
2707 do_fixup |= clamp_layer_output(producer, consumer, &reserved);
2711 nir_fixup_deref_modes(nir);
2712 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2713 optimize_nir(nir, NULL);
2716 /* all types that hit this function contain something that is 64bit */
2717 static const struct glsl_type *
2718 rewrite_64bit_type(nir_shader *nir, const struct glsl_type *type, nir_variable *var, bool doubles_only)
2720 if (glsl_type_is_array(type)) {
2721 const struct glsl_type *child = glsl_get_array_element(type);
2722 unsigned elements = glsl_array_size(type);
2723 unsigned stride = glsl_get_explicit_stride(type);
2724 return glsl_array_type(rewrite_64bit_type(nir, child, var, doubles_only), elements, stride);
2726 /* rewrite structs recursively */
2727 if (glsl_type_is_struct_or_ifc(type)) {
2728 unsigned nmembers = glsl_get_length(type);
2729 struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, nmembers * 2);
2730 unsigned xfb_offset = 0;
2731 for (unsigned i = 0; i < nmembers; i++) {
2732 const struct glsl_struct_field *f = glsl_get_struct_field_data(type, i);
2734 xfb_offset += glsl_get_component_slots(fields[i].type) * 4;
2735 if (i < nmembers - 1 && xfb_offset % 8 &&
2736 (glsl_contains_double(glsl_get_struct_field(type, i + 1)) ||
2737 (glsl_type_contains_64bit(glsl_get_struct_field(type, i + 1)) && !doubles_only))) {
2738 var->data.is_xfb = true;
2740 fields[i].type = rewrite_64bit_type(nir, f->type, var, doubles_only);
2742 return glsl_struct_type(fields, nmembers, glsl_get_type_name(type), glsl_struct_type_is_packed(type));
2744 if (!glsl_type_is_64bit(type) || (!glsl_contains_double(type) && doubles_only))
2746 if (doubles_only && glsl_type_is_vector_or_scalar(type))
2747 return glsl_vector_type(GLSL_TYPE_UINT64, glsl_get_vector_elements(type));
2748 enum glsl_base_type base_type;
2749 switch (glsl_get_base_type(type)) {
2750 case GLSL_TYPE_UINT64:
2751 base_type = GLSL_TYPE_UINT;
2753 case GLSL_TYPE_INT64:
2754 base_type = GLSL_TYPE_INT;
2756 case GLSL_TYPE_DOUBLE:
2757 base_type = GLSL_TYPE_FLOAT;
2760 unreachable("unknown 64-bit vertex attribute format!");
2762 if (glsl_type_is_scalar(type))
2763 return glsl_vector_type(base_type, 2);
2764 unsigned num_components;
2765 if (glsl_type_is_matrix(type)) {
2766 /* align to vec4 size: dvec3-composed arrays are arrays of dvec3s */
2767 unsigned vec_components = glsl_get_vector_elements(type);
2768 if (vec_components == 3)
2770 num_components = vec_components * 2 * glsl_get_matrix_columns(type);
2772 num_components = glsl_get_vector_elements(type) * 2;
2773 if (num_components <= 4)
2774 return glsl_vector_type(base_type, num_components);
2776 /* dvec3/dvec4/dmatX: rewrite as struct { vec4, vec4, vec4, ... [vec2] } */
2777 struct glsl_struct_field fields[8] = {0};
2778 unsigned remaining = num_components;
2779 unsigned nfields = 0;
2780 for (unsigned i = 0; remaining; i++, remaining -= MIN2(4, remaining), nfields++) {
2781 assert(i < ARRAY_SIZE(fields));
2782 fields[i].name = "";
2783 fields[i].offset = i * 16;
2784 fields[i].type = glsl_vector_type(base_type, MIN2(4, remaining));
2787 snprintf(buf, sizeof(buf), "struct(%s)", glsl_get_type_name(type));
2788 return glsl_struct_type(fields, nfields, buf, true);
2791 static const struct glsl_type *
2792 deref_is_matrix(nir_deref_instr *deref)
2794 if (glsl_type_is_matrix(deref->type))
2796 nir_deref_instr *parent = nir_deref_instr_parent(deref);
2798 return deref_is_matrix(parent);
2803 lower_64bit_vars_function(nir_shader *shader, nir_function_impl *impl, nir_variable *var,
2804 struct hash_table *derefs, struct set *deletes, bool doubles_only)
2806 bool func_progress = false;
2807 nir_builder b = nir_builder_create(impl);
2808 nir_foreach_block(block, impl) {
2809 nir_foreach_instr_safe(instr, block) {
2810 switch (instr->type) {
2811 case nir_instr_type_deref: {
2812 nir_deref_instr *deref = nir_instr_as_deref(instr);
2813 if (!(deref->modes & var->data.mode))
2815 if (nir_deref_instr_get_variable(deref) != var)
2818 /* matrix types are special: store the original deref type for later use */
2819 const struct glsl_type *matrix = deref_is_matrix(deref);
2820 nir_deref_instr *parent = nir_deref_instr_parent(deref);
2822 /* if this isn't a direct matrix deref, it's maybe a matrix row deref */
2823 hash_table_foreach(derefs, he) {
2824 /* propagate parent matrix type to row deref */
2825 if (he->key == parent)
2830 _mesa_hash_table_insert(derefs, deref, (void*)matrix);
2831 if (deref->deref_type == nir_deref_type_var)
2832 deref->type = var->type;
2834 deref->type = rewrite_64bit_type(shader, deref->type, var, doubles_only);
2837 case nir_instr_type_intrinsic: {
2838 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2839 if (intr->intrinsic != nir_intrinsic_store_deref &&
2840 intr->intrinsic != nir_intrinsic_load_deref)
2842 if (nir_intrinsic_get_var(intr, 0) != var)
2844 if ((intr->intrinsic == nir_intrinsic_store_deref && intr->src[1].ssa->bit_size != 64) ||
2845 (intr->intrinsic == nir_intrinsic_load_deref && intr->dest.ssa.bit_size != 64))
2847 b.cursor = nir_before_instr(instr);
2848 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
2849 unsigned num_components = intr->num_components * 2;
2850 nir_ssa_def *comp[NIR_MAX_VEC_COMPONENTS];
2851 /* this is the stored matrix type from the deref */
2852 struct hash_entry *he = _mesa_hash_table_search(derefs, deref);
2853 const struct glsl_type *matrix = he ? he->data : NULL;
2854 if (doubles_only && !matrix)
2856 func_progress = true;
2857 if (intr->intrinsic == nir_intrinsic_store_deref) {
2858 /* first, unpack the src data to 32bit vec2 components */
2859 for (unsigned i = 0; i < intr->num_components; i++) {
2860 nir_ssa_def *ssa = nir_unpack_64_2x32(&b, nir_channel(&b, intr->src[1].ssa, i));
2861 comp[i * 2] = nir_channel(&b, ssa, 0);
2862 comp[i * 2 + 1] = nir_channel(&b, ssa, 1);
2864 unsigned wrmask = nir_intrinsic_write_mask(intr);
2866 /* expand writemask for doubled components */
2867 for (unsigned i = 0; i < intr->num_components; i++) {
2868 if (wrmask & BITFIELD_BIT(i))
2869 mask |= BITFIELD_BIT(i * 2) | BITFIELD_BIT(i * 2 + 1);
2872 /* matrix types always come from array (row) derefs */
2873 assert(deref->deref_type == nir_deref_type_array);
2874 nir_deref_instr *var_deref = nir_deref_instr_parent(deref);
2875 /* let optimization clean up consts later */
2876 nir_ssa_def *index = deref->arr.index.ssa;
2877 /* this might be an indirect array index:
2878 * - iterate over matrix columns
2879 * - add if blocks for each column
2880 * - perform the store in the block
2882 for (unsigned idx = 0; idx < glsl_get_matrix_columns(matrix); idx++) {
2883 nir_push_if(&b, nir_ieq_imm(&b, index, idx));
2884 unsigned vec_components = glsl_get_vector_elements(matrix);
2885 /* always clamp dvec3 to 4 components */
2886 if (vec_components == 3)
2888 unsigned start_component = idx * vec_components * 2;
2890 unsigned member = start_component / 4;
2891 /* number of components remaining */
2892 unsigned remaining = num_components;
2893 for (unsigned i = 0; i < num_components; member++) {
2894 if (!(mask & BITFIELD_BIT(i)))
2896 assert(member < glsl_get_length(var_deref->type));
2897 /* deref the rewritten struct to the appropriate vec4/vec2 */
2898 nir_deref_instr *strct = nir_build_deref_struct(&b, var_deref, member);
2899 unsigned incr = MIN2(remaining, 4);
2900 /* assemble the write component vec */
2901 nir_ssa_def *val = nir_vec(&b, &comp[i], incr);
2902 /* use the number of components being written as the writemask */
2903 if (glsl_get_vector_elements(strct->type) > val->num_components)
2904 val = nir_pad_vector(&b, val, glsl_get_vector_elements(strct->type));
2905 nir_store_deref(&b, strct, val, BITFIELD_MASK(incr));
2909 nir_pop_if(&b, NULL);
2911 _mesa_set_add(deletes, &deref->instr);
2912 } else if (num_components <= 4) {
2913 /* simple store case: just write out the components */
2914 nir_ssa_def *dest = nir_vec(&b, comp, num_components);
2915 nir_store_deref(&b, deref, dest, mask);
2917 /* writing > 4 components: access the struct and write to the appropriate vec4 members */
2918 for (unsigned i = 0; num_components; i++, num_components -= MIN2(num_components, 4)) {
2919 if (!(mask & BITFIELD_MASK(4)))
2921 nir_deref_instr *strct = nir_build_deref_struct(&b, deref, i);
2922 nir_ssa_def *dest = nir_vec(&b, &comp[i * 4], MIN2(num_components, 4));
2923 if (glsl_get_vector_elements(strct->type) > dest->num_components)
2924 dest = nir_pad_vector(&b, dest, glsl_get_vector_elements(strct->type));
2925 nir_store_deref(&b, strct, dest, mask & BITFIELD_MASK(4));
2930 nir_ssa_def *dest = NULL;
2932 /* matrix types always come from array (row) derefs */
2933 assert(deref->deref_type == nir_deref_type_array);
2934 nir_deref_instr *var_deref = nir_deref_instr_parent(deref);
2935 /* let optimization clean up consts later */
2936 nir_ssa_def *index = deref->arr.index.ssa;
2937 /* this might be an indirect array index:
2938 * - iterate over matrix columns
2939 * - add if blocks for each column
2940 * - phi the loads using the array index
2942 unsigned cols = glsl_get_matrix_columns(matrix);
2943 nir_ssa_def *dests[4];
2944 for (unsigned idx = 0; idx < cols; idx++) {
2945 /* don't add an if for the final row: this will be handled in the else */
2947 nir_push_if(&b, nir_ieq_imm(&b, index, idx));
2948 unsigned vec_components = glsl_get_vector_elements(matrix);
2949 /* always clamp dvec3 to 4 components */
2950 if (vec_components == 3)
2952 unsigned start_component = idx * vec_components * 2;
2954 unsigned member = start_component / 4;
2955 /* number of components remaining */
2956 unsigned remaining = num_components;
2957 /* component index */
2958 unsigned comp_idx = 0;
2959 for (unsigned i = 0; i < num_components; member++) {
2960 assert(member < glsl_get_length(var_deref->type));
2961 nir_deref_instr *strct = nir_build_deref_struct(&b, var_deref, member);
2962 nir_ssa_def *load = nir_load_deref(&b, strct);
2963 unsigned incr = MIN2(remaining, 4);
2964 /* repack the loads to 64bit */
2965 for (unsigned c = 0; c < incr / 2; c++, comp_idx++)
2966 comp[comp_idx] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(c * 2, 2)));
2970 dest = dests[idx] = nir_vec(&b, comp, intr->num_components);
2972 nir_push_else(&b, NULL);
2974 /* loop over all the if blocks that were made, pop them, and phi the loaded+packed results */
2975 for (unsigned idx = cols - 1; idx >= 1; idx--) {
2976 nir_pop_if(&b, NULL);
2977 dest = nir_if_phi(&b, dests[idx - 1], dest);
2979 _mesa_set_add(deletes, &deref->instr);
2980 } else if (num_components <= 4) {
2981 /* simple load case */
2982 nir_ssa_def *load = nir_load_deref(&b, deref);
2983 /* pack 32bit loads into 64bit: this will automagically get optimized out later */
2984 for (unsigned i = 0; i < intr->num_components; i++) {
2985 comp[i] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(i * 2, 2)));
2987 dest = nir_vec(&b, comp, intr->num_components);
2989 /* writing > 4 components: access the struct and load the appropriate vec4 members */
2990 for (unsigned i = 0; i < 2; i++, num_components -= 4) {
2991 nir_deref_instr *strct = nir_build_deref_struct(&b, deref, i);
2992 nir_ssa_def *load = nir_load_deref(&b, strct);
2993 comp[i * 2] = nir_pack_64_2x32(&b,
2994 nir_trim_vector(&b, load, 2));
2995 if (num_components > 2)
2996 comp[i * 2 + 1] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(2, 2)));
2998 dest = nir_vec(&b, comp, intr->num_components);
3000 nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, dest, instr);
3002 _mesa_set_add(deletes, instr);
3011 nir_metadata_preserve(impl, nir_metadata_none);
3012 /* derefs must be queued for deletion to avoid deleting the same deref repeatedly */
3013 set_foreach_remove(deletes, he)
3014 nir_instr_remove((void*)he->key);
3015 return func_progress;
3019 lower_64bit_vars_loop(nir_shader *shader, nir_variable *var, struct hash_table *derefs,
3020 struct set *deletes, bool doubles_only)
3022 if (!glsl_type_contains_64bit(var->type) || (doubles_only && !glsl_contains_double(var->type)))
3024 var->type = rewrite_64bit_type(shader, var->type, var, doubles_only);
3025 /* once type is rewritten, rewrite all loads and stores */
3026 nir_foreach_function_impl(impl, shader)
3027 lower_64bit_vars_function(shader, impl, var, derefs, deletes, doubles_only);
3031 /* rewrite all input/output variables using 32bit types and load/stores */
3033 lower_64bit_vars(nir_shader *shader, bool doubles_only)
3035 bool progress = false;
3036 struct hash_table *derefs = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
3037 struct set *deletes = _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
3038 nir_foreach_variable_with_modes(var, shader, nir_var_shader_in | nir_var_shader_out)
3039 progress |= lower_64bit_vars_loop(shader, var, derefs, deletes, doubles_only);
3040 nir_foreach_function_impl(impl, shader) {
3041 nir_foreach_function_temp_variable(var, impl) {
3042 if (!glsl_type_contains_64bit(var->type) || (doubles_only && !glsl_contains_double(var->type)))
3044 var->type = rewrite_64bit_type(shader, var->type, var, doubles_only);
3045 progress |= lower_64bit_vars_function(shader, impl, var, derefs, deletes, doubles_only);
3048 ralloc_free(deletes);
3049 ralloc_free(derefs);
3051 nir_lower_alu_to_scalar(shader, filter_64_bit_instr, NULL);
3052 nir_lower_phis_to_scalar(shader, false);
3053 optimize_nir(shader, NULL);
3059 split_blocks(nir_shader *nir)
3061 bool progress = false;
3062 bool changed = true;
3065 nir_foreach_shader_out_variable(var, nir) {
3066 const struct glsl_type *base_type = glsl_without_array(var->type);
3067 nir_variable *members[32]; //can't have more than this without breaking NIR
3068 if (!glsl_type_is_struct(base_type))
3071 if (!glsl_type_is_struct(var->type) || glsl_get_length(var->type) == 1)
3073 if (glsl_count_attribute_slots(var->type, false) == 1)
3075 unsigned offset = 0;
3076 for (unsigned i = 0; i < glsl_get_length(var->type); i++) {
3077 members[i] = nir_variable_clone(var, nir);
3078 members[i]->type = glsl_get_struct_field(var->type, i);
3079 members[i]->name = (void*)glsl_get_struct_elem_name(var->type, i);
3080 members[i]->data.location += offset;
3081 offset += glsl_count_attribute_slots(members[i]->type, false);
3082 nir_shader_add_variable(nir, members[i]);
3084 nir_foreach_function_impl(impl, nir) {
3085 bool func_progress = false;
3086 nir_builder b = nir_builder_create(impl);
3087 nir_foreach_block(block, impl) {
3088 nir_foreach_instr_safe(instr, block) {
3089 switch (instr->type) {
3090 case nir_instr_type_deref: {
3091 nir_deref_instr *deref = nir_instr_as_deref(instr);
3092 if (!(deref->modes & nir_var_shader_out))
3094 if (nir_deref_instr_get_variable(deref) != var)
3096 if (deref->deref_type != nir_deref_type_struct)
3098 nir_deref_instr *parent = nir_deref_instr_parent(deref);
3099 if (parent->deref_type != nir_deref_type_var)
3101 deref->modes = nir_var_shader_temp;
3102 parent->modes = nir_var_shader_temp;
3103 b.cursor = nir_before_instr(instr);
3104 nir_ssa_def *dest = &nir_build_deref_var(&b, members[deref->strct.index])->dest.ssa;
3105 nir_ssa_def_rewrite_uses_after(&deref->dest.ssa, dest, &deref->instr);
3106 nir_instr_remove(&deref->instr);
3107 func_progress = true;
3115 nir_metadata_preserve(impl, nir_metadata_none);
3117 var->data.mode = nir_var_shader_temp;
3126 zink_shader_dump(const struct zink_shader *zs, void *words, size_t size, const char *file)
3128 FILE *fp = fopen(file, "wb");
3130 fwrite(words, 1, size, fp);
3132 fprintf(stderr, "wrote %s shader '%s'...\n", _mesa_shader_stage_to_string(zs->info.stage), file);
3136 static VkShaderStageFlagBits
3137 zink_get_next_stage(gl_shader_stage stage)
3140 case MESA_SHADER_VERTEX:
3141 return VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT |
3142 VK_SHADER_STAGE_GEOMETRY_BIT |
3143 VK_SHADER_STAGE_FRAGMENT_BIT;
3144 case MESA_SHADER_TESS_CTRL:
3145 return VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
3146 case MESA_SHADER_TESS_EVAL:
3147 return VK_SHADER_STAGE_GEOMETRY_BIT |
3148 VK_SHADER_STAGE_FRAGMENT_BIT;
3149 case MESA_SHADER_GEOMETRY:
3150 return VK_SHADER_STAGE_FRAGMENT_BIT;
3151 case MESA_SHADER_FRAGMENT:
3152 case MESA_SHADER_COMPUTE:
3155 unreachable("invalid shader stage");
3159 struct zink_shader_object
3160 zink_shader_spirv_compile(struct zink_screen *screen, struct zink_shader *zs, struct spirv_shader *spirv, bool can_shobj, struct zink_program *pg)
3162 VkShaderModuleCreateInfo smci = {0};
3163 VkShaderCreateInfoEXT sci = {0};
3168 if (zink_debug & ZINK_DEBUG_SPIRV) {
3171 snprintf(buf, sizeof(buf), "dump%02d.spv", i++);
3172 zink_shader_dump(zs, spirv->words, spirv->num_words * sizeof(uint32_t), buf);
3175 sci.sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO_EXT;
3176 sci.stage = mesa_to_vk_shader_stage(zs->info.stage);
3177 sci.nextStage = zink_get_next_stage(zs->info.stage);
3178 sci.codeType = VK_SHADER_CODE_TYPE_SPIRV_EXT;
3179 sci.codeSize = spirv->num_words * sizeof(uint32_t);
3180 sci.pCode = spirv->words;
3182 VkDescriptorSetLayout dsl[ZINK_GFX_SHADER_COUNT] = {0};
3184 sci.setLayoutCount = pg->num_dsl;
3185 sci.pSetLayouts = pg->dsl;
3187 sci.setLayoutCount = zs->info.stage + 1;
3188 dsl[zs->info.stage] = zs->precompile.dsl;;
3189 sci.pSetLayouts = dsl;
3191 VkPushConstantRange pcr;
3192 pcr.stageFlags = VK_SHADER_STAGE_ALL_GRAPHICS;
3194 pcr.size = sizeof(struct zink_gfx_push_constant);
3195 sci.pushConstantRangeCount = 1;
3196 sci.pPushConstantRanges = &pcr;
3198 smci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
3199 smci.codeSize = spirv->num_words * sizeof(uint32_t);
3200 smci.pCode = spirv->words;
3203 if (zink_debug & ZINK_DEBUG_VALIDATION) {
3204 static const struct spirv_to_nir_options spirv_options = {
3205 .environment = NIR_SPIRV_VULKAN,
3210 .tessellation = true,
3211 .float_controls = true,
3212 .image_ms_array = true,
3213 .image_read_without_format = true,
3214 .image_write_without_format = true,
3215 .storage_image_ms = true,
3216 .geometry_streams = true,
3217 .storage_8bit = true,
3218 .storage_16bit = true,
3219 .variable_pointers = true,
3220 .stencil_export = true,
3221 .post_depth_coverage = true,
3222 .transform_feedback = true,
3223 .device_group = true,
3224 .draw_parameters = true,
3225 .shader_viewport_index_layer = true,
3227 .physical_storage_buffer_address = true,
3228 .int64_atomics = true,
3229 .subgroup_arithmetic = true,
3230 .subgroup_basic = true,
3231 .subgroup_ballot = true,
3232 .subgroup_quad = true,
3233 .subgroup_shuffle = true,
3234 .subgroup_vote = true,
3235 .vk_memory_model = true,
3236 .vk_memory_model_device_scope = true,
3239 .demote_to_helper_invocation = true,
3240 .sparse_residency = true,
3243 .ubo_addr_format = nir_address_format_32bit_index_offset,
3244 .ssbo_addr_format = nir_address_format_32bit_index_offset,
3245 .phys_ssbo_addr_format = nir_address_format_64bit_global,
3246 .push_const_addr_format = nir_address_format_logical,
3247 .shared_addr_format = nir_address_format_32bit_offset,
3249 uint32_t num_spec_entries = 0;
3250 struct nir_spirv_specialization *spec_entries = NULL;
3251 VkSpecializationInfo sinfo = {0};
3252 VkSpecializationMapEntry me[3];
3253 uint32_t size[3] = {1,1,1};
3254 if (!zs->info.workgroup_size[0]) {
3255 sinfo.mapEntryCount = 3;
3256 sinfo.pMapEntries = &me[0];
3257 sinfo.dataSize = sizeof(uint32_t) * 3;
3259 uint32_t ids[] = {ZINK_WORKGROUP_SIZE_X, ZINK_WORKGROUP_SIZE_Y, ZINK_WORKGROUP_SIZE_Z};
3260 for (int i = 0; i < 3; i++) {
3261 me[i].size = sizeof(uint32_t);
3262 me[i].constantID = ids[i];
3263 me[i].offset = i * sizeof(uint32_t);
3265 spec_entries = vk_spec_info_to_nir_spirv(&sinfo, &num_spec_entries);
3267 nir_shader *nir = spirv_to_nir(spirv->words, spirv->num_words,
3268 spec_entries, num_spec_entries,
3269 clamp_stage(&zs->info), "main", &spirv_options, &screen->nir_options);
3277 struct zink_shader_object obj = {0};
3278 if (!can_shobj || !screen->info.have_EXT_shader_object)
3279 ret = VKSCR(CreateShaderModule)(screen->dev, &smci, NULL, &obj.mod);
3281 ret = VKSCR(CreateShadersEXT)(screen->dev, 1, &sci, NULL, &obj.obj);
3282 bool success = zink_screen_handle_vkresult(screen, ret);
3288 prune_io(nir_shader *nir)
3290 nir_foreach_shader_in_variable_safe(var, nir) {
3291 if (!find_var_deref(nir, var))
3292 var->data.mode = nir_var_shader_temp;
3294 nir_foreach_shader_out_variable_safe(var, nir) {
3295 if (!find_var_deref(nir, var))
3296 var->data.mode = nir_var_shader_temp;
3298 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
3302 flag_shadow_tex(nir_variable *var, struct zink_shader *zs)
3304 /* unconvert from zink_binding() */
3305 uint32_t sampler_id = var->data.binding - (PIPE_MAX_SAMPLERS * MESA_SHADER_FRAGMENT);
3306 assert(sampler_id < 32); //bitfield size for tracking
3307 zs->fs.legacy_shadow_mask |= BITFIELD_BIT(sampler_id);
3310 static nir_ssa_def *
3311 rewrite_tex_dest(nir_builder *b, nir_tex_instr *tex, nir_variable *var, struct zink_shader *zs)
3314 const struct glsl_type *type = glsl_without_array(var->type);
3315 enum glsl_base_type ret_type = glsl_get_sampler_result_type(type);
3316 bool is_int = glsl_base_type_is_integer(ret_type);
3317 unsigned bit_size = glsl_base_type_get_bit_size(ret_type);
3318 unsigned dest_size = nir_dest_bit_size(tex->dest);
3319 b->cursor = nir_after_instr(&tex->instr);
3320 unsigned num_components = nir_dest_num_components(tex->dest);
3321 bool rewrite_depth = tex->is_shadow && num_components > 1 && tex->op != nir_texop_tg4 && !tex->is_sparse;
3322 if (bit_size == dest_size && !rewrite_depth)
3324 nir_ssa_def *dest = &tex->dest.ssa;
3325 if (rewrite_depth && zs) {
3326 /* If only .x is used in the NIR, then it's effectively not a legacy depth
3327 * sample anyway and we don't want to ask for shader recompiles. This is
3328 * the typical path, since GL_DEPTH_TEXTURE_MODE defaults to either RED or
3329 * LUMINANCE, so apps just use the first channel.
3331 if (nir_ssa_def_components_read(dest) & ~1) {
3332 if (b->shader->info.stage == MESA_SHADER_FRAGMENT)
3333 flag_shadow_tex(var, zs);
3335 mesa_loge("unhandled old-style shadow sampler in non-fragment stage!");
3339 if (bit_size != dest_size) {
3340 tex->dest.ssa.bit_size = bit_size;
3341 tex->dest_type = nir_get_nir_type_for_glsl_base_type(ret_type);
3344 if (glsl_unsigned_base_type_of(ret_type) == ret_type)
3345 dest = nir_u2uN(b, &tex->dest.ssa, dest_size);
3347 dest = nir_i2iN(b, &tex->dest.ssa, dest_size);
3349 dest = nir_f2fN(b, &tex->dest.ssa, dest_size);
3353 nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, dest, dest->parent_instr);
3354 } else if (rewrite_depth) {
3360 struct lower_zs_swizzle_state {
3362 unsigned base_sampler_id;
3363 const struct zink_zs_swizzle_key *swizzle;
3367 lower_zs_swizzle_tex_instr(nir_builder *b, nir_instr *instr, void *data)
3369 struct lower_zs_swizzle_state *state = data;
3370 const struct zink_zs_swizzle_key *swizzle_key = state->swizzle;
3371 assert(state->shadow_only || swizzle_key);
3372 if (instr->type != nir_instr_type_tex)
3374 nir_tex_instr *tex = nir_instr_as_tex(instr);
3375 if (tex->op == nir_texop_txs || tex->op == nir_texop_lod ||
3376 (!tex->is_shadow && state->shadow_only) || tex->is_new_style_shadow)
3378 if (tex->is_shadow && tex->op == nir_texop_tg4)
3379 /* Will not even try to emulate the shadow comparison */
3381 int handle = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
3382 nir_variable *var = NULL;
3384 /* gtfo bindless depth texture mode */
3386 nir_foreach_variable_with_modes(img, b->shader, nir_var_uniform) {
3387 if (glsl_type_is_sampler(glsl_without_array(img->type))) {
3388 unsigned size = glsl_type_is_array(img->type) ? glsl_get_aoa_size(img->type) : 1;
3389 if (tex->texture_index >= img->data.driver_location &&
3390 tex->texture_index < img->data.driver_location + size) {
3397 uint32_t sampler_id = var->data.binding - state->base_sampler_id;
3398 const struct glsl_type *type = glsl_without_array(var->type);
3399 enum glsl_base_type ret_type = glsl_get_sampler_result_type(type);
3400 bool is_int = glsl_base_type_is_integer(ret_type);
3401 unsigned num_components = nir_dest_num_components(tex->dest);
3403 tex->is_new_style_shadow = true;
3404 nir_ssa_def *dest = rewrite_tex_dest(b, tex, var, NULL);
3405 assert(dest || !state->shadow_only);
3406 if (!dest && !(swizzle_key->mask & BITFIELD_BIT(sampler_id)))
3409 dest = &tex->dest.ssa;
3411 tex->dest.ssa.num_components = 1;
3412 if (swizzle_key && (swizzle_key->mask & BITFIELD_BIT(sampler_id))) {
3413 /* these require manual swizzles */
3414 if (tex->op == nir_texop_tg4) {
3415 assert(!tex->is_shadow);
3416 nir_ssa_def *swizzle;
3417 switch (swizzle_key->swizzle[sampler_id].s[tex->component]) {
3418 case PIPE_SWIZZLE_0:
3419 swizzle = nir_imm_zero(b, 4, nir_dest_bit_size(tex->dest));
3421 case PIPE_SWIZZLE_1:
3423 swizzle = nir_imm_intN_t(b, 4, nir_dest_bit_size(tex->dest));
3425 swizzle = nir_imm_floatN_t(b, 4, nir_dest_bit_size(tex->dest));
3428 if (!tex->component)
3433 nir_ssa_def_rewrite_uses_after(dest, swizzle, swizzle->parent_instr);
3436 nir_ssa_def *vec[4];
3437 for (unsigned i = 0; i < ARRAY_SIZE(vec); i++) {
3438 switch (swizzle_key->swizzle[sampler_id].s[i]) {
3439 case PIPE_SWIZZLE_0:
3440 vec[i] = nir_imm_zero(b, 1, nir_dest_bit_size(tex->dest));
3442 case PIPE_SWIZZLE_1:
3444 vec[i] = nir_imm_intN_t(b, 1, nir_dest_bit_size(tex->dest));
3446 vec[i] = nir_imm_floatN_t(b, 1, nir_dest_bit_size(tex->dest));
3449 vec[i] = dest->num_components == 1 ? dest : nir_channel(b, dest, i);
3453 nir_ssa_def *swizzle = nir_vec(b, vec, num_components);
3454 nir_ssa_def_rewrite_uses_after(dest, swizzle, swizzle->parent_instr);
3456 assert(tex->is_shadow);
3457 nir_ssa_def *vec[4] = {dest, dest, dest, dest};
3458 nir_ssa_def *splat = nir_vec(b, vec, num_components);
3459 nir_ssa_def_rewrite_uses_after(dest, splat, splat->parent_instr);
3464 /* Applies in-shader swizzles when necessary for depth/shadow sampling.
3466 * SPIRV only has new-style (scalar result) shadow sampling, so to emulate
3467 * !is_new_style_shadow (vec4 result) shadow sampling we lower to a
3468 * new-style-shadow sample, and apply GL_DEPTH_TEXTURE_MODE swizzles in the NIR
3469 * shader to expand out to vec4. Since this depends on sampler state, it's a
3470 * draw-time shader recompile to do so.
3472 * We may also need to apply shader swizzles for
3473 * driver_workarounds.needs_zs_shader_swizzle.
3476 lower_zs_swizzle_tex(nir_shader *nir, const void *swizzle, bool shadow_only)
3478 /* We don't use nir_lower_tex to do our swizzling, because of this base_sampler_id. */
3479 unsigned base_sampler_id = gl_shader_stage_is_compute(nir->info.stage) ? 0 : PIPE_MAX_SAMPLERS * nir->info.stage;
3480 struct lower_zs_swizzle_state state = {shadow_only, base_sampler_id, swizzle};
3481 return nir_shader_instructions_pass(nir, lower_zs_swizzle_tex_instr, nir_metadata_dominance | nir_metadata_block_index, (void*)&state);
3485 invert_point_coord_instr(nir_builder *b, nir_instr *instr, void *data)
3487 if (instr->type != nir_instr_type_intrinsic)
3489 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
3490 if (intr->intrinsic != nir_intrinsic_load_point_coord)
3492 b->cursor = nir_after_instr(instr);
3493 nir_ssa_def *def = nir_vec2(b, nir_channel(b, &intr->dest.ssa, 0),
3494 nir_fsub_imm(b, 1.0, nir_channel(b, &intr->dest.ssa, 1)));
3495 nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, def, def->parent_instr);
3500 invert_point_coord(nir_shader *nir)
3502 if (!BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_POINT_COORD))
3504 return nir_shader_instructions_pass(nir, invert_point_coord_instr, nir_metadata_dominance, NULL);
3507 static struct zink_shader_object
3508 compile_module(struct zink_screen *screen, struct zink_shader *zs, nir_shader *nir, bool can_shobj, struct zink_program *pg)
3510 struct zink_shader_info *sinfo = &zs->sinfo;
3513 NIR_PASS_V(nir, nir_convert_from_ssa, true);
3515 if (zink_debug & (ZINK_DEBUG_NIR | ZINK_DEBUG_SPIRV))
3516 nir_index_ssa_defs(nir_shader_get_entrypoint(nir));
3517 if (zink_debug & ZINK_DEBUG_NIR) {
3518 fprintf(stderr, "NIR shader:\n---8<---\n");
3519 nir_print_shader(nir, stderr);
3520 fprintf(stderr, "---8<---\n");
3523 struct zink_shader_object obj;
3524 struct spirv_shader *spirv = nir_to_spirv(nir, sinfo, screen->spirv_version);
3526 obj = zink_shader_spirv_compile(screen, zs, spirv, can_shobj, pg);
3528 /* TODO: determine if there's any reason to cache spirv output? */
3529 if (zs->info.stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated)
3536 struct zink_shader_object
3537 zink_shader_compile(struct zink_screen *screen, bool can_shobj, struct zink_shader *zs,
3538 nir_shader *nir, const struct zink_shader_key *key, const void *extra_data, struct zink_program *pg)
3540 struct zink_shader_info *sinfo = &zs->sinfo;
3541 bool need_optimize = false;
3542 bool inlined_uniforms = false;
3545 if (key->inline_uniforms) {
3546 NIR_PASS_V(nir, nir_inline_uniforms,
3547 nir->info.num_inlinable_uniforms,
3548 key->base.inlined_uniform_values,
3549 nir->info.inlinable_uniform_dw_offsets);
3551 inlined_uniforms = true;
3554 /* TODO: use a separate mem ctx here for ralloc */
3556 if (!screen->optimal_keys) {
3557 switch (zs->info.stage) {
3558 case MESA_SHADER_VERTEX: {
3559 uint32_t decomposed_attrs = 0, decomposed_attrs_without_w = 0;
3560 const struct zink_vs_key *vs_key = zink_vs_key(key);
3561 switch (vs_key->size) {
3563 decomposed_attrs = vs_key->u32.decomposed_attrs;
3564 decomposed_attrs_without_w = vs_key->u32.decomposed_attrs_without_w;
3567 decomposed_attrs = vs_key->u16.decomposed_attrs;
3568 decomposed_attrs_without_w = vs_key->u16.decomposed_attrs_without_w;
3571 decomposed_attrs = vs_key->u8.decomposed_attrs;
3572 decomposed_attrs_without_w = vs_key->u8.decomposed_attrs_without_w;
3576 if (decomposed_attrs || decomposed_attrs_without_w)
3577 NIR_PASS_V(nir, decompose_attribs, decomposed_attrs, decomposed_attrs_without_w);
3581 case MESA_SHADER_GEOMETRY:
3582 if (zink_gs_key(key)->lower_line_stipple) {
3583 NIR_PASS_V(nir, lower_line_stipple_gs, zink_gs_key(key)->line_rectangular);
3584 NIR_PASS_V(nir, nir_lower_var_copies);
3585 need_optimize = true;
3588 if (zink_gs_key(key)->lower_line_smooth) {
3589 NIR_PASS_V(nir, lower_line_smooth_gs);
3590 NIR_PASS_V(nir, nir_lower_var_copies);
3591 need_optimize = true;
3594 if (zink_gs_key(key)->lower_gl_point) {
3595 NIR_PASS_V(nir, lower_gl_point_gs);
3596 need_optimize = true;
3599 if (zink_gs_key(key)->lower_pv_mode) {
3600 NIR_PASS_V(nir, lower_pv_mode_gs, zink_gs_key(key)->lower_pv_mode);
3601 need_optimize = true; //TODO verify that this is required
3610 switch (zs->info.stage) {
3611 case MESA_SHADER_VERTEX:
3612 case MESA_SHADER_TESS_EVAL:
3613 case MESA_SHADER_GEOMETRY:
3614 if (zink_vs_key_base(key)->last_vertex_stage) {
3615 if (zs->sinfo.have_xfb)
3616 sinfo->last_vertex = true;
3618 if (!zink_vs_key_base(key)->clip_halfz && !screen->info.have_EXT_depth_clip_control) {
3619 NIR_PASS_V(nir, nir_lower_clip_halfz);
3621 if (zink_vs_key_base(key)->push_drawid) {
3622 NIR_PASS_V(nir, lower_drawid);
3625 if (zink_vs_key_base(key)->robust_access)
3626 NIR_PASS(need_optimize, nir, lower_txf_lod_robustness);
3628 case MESA_SHADER_FRAGMENT:
3629 if (zink_fs_key(key)->lower_line_smooth) {
3630 NIR_PASS_V(nir, lower_line_smooth_fs,
3631 zink_fs_key(key)->lower_line_stipple);
3632 need_optimize = true;
3633 } else if (zink_fs_key(key)->lower_line_stipple)
3634 NIR_PASS_V(nir, lower_line_stipple_fs);
3636 if (zink_fs_key(key)->lower_point_smooth) {
3637 NIR_PASS_V(nir, nir_lower_point_smooth);
3638 NIR_PASS_V(nir, nir_lower_discard_if, nir_lower_discard_if_to_cf);
3639 nir->info.fs.uses_discard = true;
3640 need_optimize = true;
3643 if (zink_fs_key(key)->robust_access)
3644 NIR_PASS(need_optimize, nir, lower_txf_lod_robustness);
3646 if (!zink_fs_key_base(key)->samples &&
3647 nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)) {
3648 /* VK will always use gl_SampleMask[] values even if sample count is 0,
3649 * so we need to skip this write here to mimic GL's behavior of ignoring it
3651 nir_foreach_shader_out_variable(var, nir) {
3652 if (var->data.location == FRAG_RESULT_SAMPLE_MASK)
3653 var->data.mode = nir_var_shader_temp;
3655 nir_fixup_deref_modes(nir);
3656 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
3657 need_optimize = true;
3659 if (zink_fs_key_base(key)->force_dual_color_blend && nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DATA1)) {
3660 NIR_PASS_V(nir, lower_dual_blend);
3662 if (zink_fs_key_base(key)->coord_replace_bits)
3663 NIR_PASS_V(nir, nir_lower_texcoord_replace, zink_fs_key_base(key)->coord_replace_bits, true, false);
3664 if (zink_fs_key_base(key)->point_coord_yinvert)
3665 NIR_PASS_V(nir, invert_point_coord);
3666 if (zink_fs_key_base(key)->force_persample_interp || zink_fs_key_base(key)->fbfetch_ms) {
3667 nir_foreach_shader_in_variable(var, nir)
3668 var->data.sample = true;
3669 nir->info.fs.uses_sample_qualifier = true;
3670 nir->info.fs.uses_sample_shading = true;
3672 if (zs->fs.legacy_shadow_mask && !key->base.needs_zs_shader_swizzle)
3673 NIR_PASS(need_optimize, nir, lower_zs_swizzle_tex, zink_fs_key_base(key)->shadow_needs_shader_swizzle ? extra_data : NULL, true);
3674 if (nir->info.fs.uses_fbfetch_output) {
3675 nir_variable *fbfetch = NULL;
3676 NIR_PASS_V(nir, lower_fbfetch, &fbfetch, zink_fs_key_base(key)->fbfetch_ms);
3677 /* old variable must be deleted to avoid spirv errors */
3678 fbfetch->data.mode = nir_var_shader_temp;
3679 nir_fixup_deref_modes(nir);
3680 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
3681 need_optimize = true;
3683 nir_foreach_shader_in_variable_safe(var, nir) {
3684 if (!is_texcoord(MESA_SHADER_FRAGMENT, var) || var->data.driver_location != -1)
3686 nir_shader_instructions_pass(nir, rewrite_read_as_0, nir_metadata_dominance, var);
3687 var->data.mode = nir_var_shader_temp;
3688 nir_fixup_deref_modes(nir);
3689 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
3690 need_optimize = true;
3693 case MESA_SHADER_COMPUTE:
3694 if (zink_cs_key(key)->robust_access)
3695 NIR_PASS(need_optimize, nir, lower_txf_lod_robustness);
3699 if (key->base.needs_zs_shader_swizzle) {
3701 NIR_PASS(need_optimize, nir, lower_zs_swizzle_tex, extra_data, false);
3703 if (key->base.nonseamless_cube_mask) {
3704 NIR_PASS_V(nir, zink_lower_cubemap_to_array, key->base.nonseamless_cube_mask);
3705 need_optimize = true;
3708 if (screen->driconf.inline_uniforms) {
3709 NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_global | nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared, NULL, NULL);
3710 NIR_PASS_V(nir, rewrite_bo_access, screen);
3711 NIR_PASS_V(nir, remove_bo_access, zs);
3712 need_optimize = true;
3714 if (inlined_uniforms) {
3715 optimize_nir(nir, zs);
3717 /* This must be done again. */
3718 NIR_PASS_V(nir, nir_io_add_const_offset_to_base, nir_var_shader_in |
3719 nir_var_shader_out);
3721 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
3722 if (impl->ssa_alloc > ZINK_ALWAYS_INLINE_LIMIT)
3723 zs->can_inline = false;
3724 } else if (need_optimize)
3725 optimize_nir(nir, zs);
3727 struct zink_shader_object obj = compile_module(screen, zs, nir, can_shobj, pg);
3732 struct zink_shader_object
3733 zink_shader_compile_separate(struct zink_screen *screen, struct zink_shader *zs)
3735 nir_shader *nir = zink_shader_deserialize(screen, zs);
3736 /* TODO: maybe compile multiple variants for different set counts for compact mode? */
3737 int set = zs->info.stage == MESA_SHADER_FRAGMENT;
3738 if (screen->info.have_EXT_shader_object)
3739 set = zs->info.stage;
3740 unsigned offsets[4];
3741 zink_descriptor_shader_get_binding_offsets(zs, offsets);
3742 nir_foreach_variable_with_modes(var, nir, nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_uniform | nir_var_image) {
3743 if (var->data.descriptor_set == screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS])
3745 var->data.descriptor_set = set;
3746 switch (var->data.mode) {
3747 case nir_var_mem_ubo:
3748 var->data.binding = !!var->data.driver_location;
3750 case nir_var_uniform:
3751 if (glsl_type_is_sampler(glsl_without_array(var->type)))
3752 var->data.binding += offsets[1];
3754 case nir_var_mem_ssbo:
3755 var->data.binding += offsets[2];
3758 var->data.binding += offsets[3];
3763 if (screen->driconf.inline_uniforms) {
3764 NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_global | nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared, NULL, NULL);
3765 NIR_PASS_V(nir, rewrite_bo_access, screen);
3766 NIR_PASS_V(nir, remove_bo_access, zs);
3768 optimize_nir(nir, zs);
3769 zink_descriptor_shader_init(screen, zs);
3770 zs->sinfo.last_vertex = zs->sinfo.have_xfb;
3771 nir_shader *nir_clone = NULL;
3772 if (screen->info.have_EXT_shader_object)
3773 nir_clone = nir_shader_clone(nir, nir);
3774 struct zink_shader_object obj = compile_module(screen, zs, nir, true, NULL);
3775 if (screen->info.have_EXT_shader_object && !zs->info.internal) {
3776 /* always try to pre-generate a tcs in case it's needed */
3777 if (zs->info.stage == MESA_SHADER_TESS_EVAL) {
3778 nir_shader *nir_tcs = NULL;
3779 /* use max pcp for compat */
3780 zs->non_fs.generated_tcs = zink_shader_tcs_create(screen, nir_clone, 32, &nir_tcs);
3781 nir_tcs->info.separate_shader = true;
3782 zs->non_fs.generated_tcs->precompile.obj = zink_shader_compile_separate(screen, zs->non_fs.generated_tcs);
3783 ralloc_free(nir_tcs);
3785 if (zs->info.stage == MESA_SHADER_VERTEX || zs->info.stage == MESA_SHADER_TESS_EVAL) {
3786 /* create a second variant with PSIZ removed:
3787 * this works around a bug in drivers using nir_assign_io_var_locations()
3788 * where builtins that aren't read by following stages get assigned
3789 * driver locations before varyings and break the i/o interface between shaders even
3790 * though zink has correctly assigned all locations
3792 nir_variable *var = nir_find_variable_with_location(nir_clone, nir_var_shader_out, VARYING_SLOT_PSIZ);
3793 if (var && !var->data.explicit_location) {
3794 var->data.mode = nir_var_shader_temp;
3795 nir_fixup_deref_modes(nir_clone);
3796 NIR_PASS_V(nir_clone, nir_remove_dead_variables, nir_var_shader_temp, NULL);
3797 optimize_nir(nir_clone, NULL);
3798 zs->precompile.no_psiz_obj = compile_module(screen, zs, nir_clone, true, NULL);
3799 spirv_shader_delete(zs->precompile.no_psiz_obj.spirv);
3800 zs->precompile.no_psiz_obj.spirv = NULL;
3805 spirv_shader_delete(obj.spirv);
3811 lower_baseinstance_instr(nir_builder *b, nir_instr *instr, void *data)
3813 if (instr->type != nir_instr_type_intrinsic)
3815 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
3816 if (intr->intrinsic != nir_intrinsic_load_instance_id)
3818 b->cursor = nir_after_instr(instr);
3819 nir_ssa_def *def = nir_isub(b, &intr->dest.ssa, nir_load_base_instance(b));
3820 nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, def, def->parent_instr);
3825 lower_baseinstance(nir_shader *shader)
3827 if (shader->info.stage != MESA_SHADER_VERTEX)
3829 return nir_shader_instructions_pass(shader, lower_baseinstance_instr, nir_metadata_dominance, NULL);
3832 /* gl_nir_lower_buffers makes variables unusable for all UBO/SSBO access
3833 * so instead we delete all those broken variables and just make new ones
3836 unbreak_bos(nir_shader *shader, struct zink_shader *zs, bool needs_size)
3838 uint64_t max_ssbo_size = 0;
3839 uint64_t max_ubo_size = 0;
3840 uint64_t max_uniform_size = 0;
3842 if (!shader->info.num_ssbos && !shader->info.num_ubos)
3845 nir_foreach_variable_with_modes(var, shader, nir_var_mem_ssbo | nir_var_mem_ubo) {
3846 const struct glsl_type *type = glsl_without_array(var->type);
3847 if (type_is_counter(type))
3849 /* be conservative: use the bigger of the interface and variable types to ensure in-bounds access */
3850 unsigned size = glsl_count_attribute_slots(glsl_type_is_array(var->type) ? var->type : type, false);
3851 const struct glsl_type *interface_type = var->interface_type ? glsl_without_array(var->interface_type) : NULL;
3852 if (interface_type) {
3853 unsigned block_size = glsl_get_explicit_size(interface_type, true);
3854 if (glsl_get_length(interface_type) == 1) {
3855 /* handle bare unsized ssbo arrays: glsl_get_explicit_size always returns type-aligned sizes */
3856 const struct glsl_type *f = glsl_get_struct_field(interface_type, 0);
3857 if (glsl_type_is_array(f) && !glsl_array_size(f))
3861 block_size = DIV_ROUND_UP(block_size, sizeof(float) * 4);
3862 size = MAX2(size, block_size);
3865 if (var->data.mode == nir_var_mem_ubo) {
3866 if (var->data.driver_location)
3867 max_ubo_size = MAX2(max_ubo_size, size);
3869 max_uniform_size = MAX2(max_uniform_size, size);
3871 max_ssbo_size = MAX2(max_ssbo_size, size);
3872 if (interface_type) {
3873 if (glsl_type_is_unsized_array(glsl_get_struct_field(interface_type, glsl_get_length(interface_type) - 1)))
3877 var->data.mode = nir_var_shader_temp;
3879 nir_fixup_deref_modes(shader);
3880 NIR_PASS_V(shader, nir_remove_dead_variables, nir_var_shader_temp, NULL);
3881 optimize_nir(shader, NULL);
3883 struct glsl_struct_field field = {0};
3884 field.name = ralloc_strdup(shader, "base");
3885 if (shader->info.num_ubos) {
3886 if (shader->num_uniforms && zs->ubos_used & BITFIELD_BIT(0)) {
3887 field.type = glsl_array_type(glsl_uint_type(), max_uniform_size * 4, 4);
3888 nir_variable *var = nir_variable_create(shader, nir_var_mem_ubo,
3889 glsl_array_type(glsl_interface_type(&field, 1, GLSL_INTERFACE_PACKING_STD430, false, "struct"), 1, 0),
3891 var->interface_type = var->type;
3892 var->data.mode = nir_var_mem_ubo;
3893 var->data.driver_location = 0;
3896 unsigned num_ubos = shader->info.num_ubos - !!shader->info.first_ubo_is_default_ubo;
3897 uint32_t ubos_used = zs->ubos_used & ~BITFIELD_BIT(0);
3898 if (num_ubos && ubos_used) {
3899 field.type = glsl_array_type(glsl_uint_type(), max_ubo_size * 4, 4);
3900 /* shrink array as much as possible */
3901 unsigned first_ubo = ffs(ubos_used) - 2;
3902 assert(first_ubo < PIPE_MAX_CONSTANT_BUFFERS);
3903 num_ubos -= first_ubo;
3905 nir_variable *var = nir_variable_create(shader, nir_var_mem_ubo,
3906 glsl_array_type(glsl_struct_type(&field, 1, "struct", false), num_ubos, 0),
3908 var->interface_type = var->type;
3909 var->data.mode = nir_var_mem_ubo;
3910 var->data.driver_location = first_ubo + !!shader->info.first_ubo_is_default_ubo;
3913 if (shader->info.num_ssbos && zs->ssbos_used) {
3914 /* shrink array as much as possible */
3915 unsigned first_ssbo = ffs(zs->ssbos_used) - 1;
3916 assert(first_ssbo < PIPE_MAX_SHADER_BUFFERS);
3917 unsigned num_ssbos = shader->info.num_ssbos - first_ssbo;
3919 const struct glsl_type *ssbo_type = glsl_array_type(glsl_uint_type(), needs_size ? 0 : max_ssbo_size * 4, 4);
3920 field.type = ssbo_type;
3921 nir_variable *var = nir_variable_create(shader, nir_var_mem_ssbo,
3922 glsl_array_type(glsl_struct_type(&field, 1, "struct", false), num_ssbos, 0),
3924 var->interface_type = var->type;
3925 var->data.mode = nir_var_mem_ssbo;
3926 var->data.driver_location = first_ssbo;
3932 get_src_mask_ssbo(unsigned total, nir_src src)
3934 if (nir_src_is_const(src))
3935 return BITFIELD_BIT(nir_src_as_uint(src));
3936 return BITFIELD_MASK(total);
3940 get_src_mask_ubo(unsigned total, nir_src src)
3942 if (nir_src_is_const(src))
3943 return BITFIELD_BIT(nir_src_as_uint(src));
3944 return BITFIELD_MASK(total) & ~BITFIELD_BIT(0);
3948 analyze_io(struct zink_shader *zs, nir_shader *shader)
3951 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
3952 nir_foreach_block(block, impl) {
3953 nir_foreach_instr(instr, block) {
3954 if (shader->info.stage != MESA_SHADER_KERNEL && instr->type == nir_instr_type_tex) {
3955 /* gl_nir_lower_samplers_as_deref is where this would normally be set, but zink doesn't use it */
3956 nir_tex_instr *tex = nir_instr_as_tex(instr);
3957 nir_foreach_variable_with_modes(img, shader, nir_var_uniform) {
3958 if (glsl_type_is_sampler(glsl_without_array(img->type))) {
3959 unsigned size = glsl_type_is_array(img->type) ? glsl_get_aoa_size(img->type) : 1;
3960 if (tex->texture_index >= img->data.driver_location &&
3961 tex->texture_index < img->data.driver_location + size) {
3962 BITSET_SET_RANGE(shader->info.textures_used, img->data.driver_location, img->data.driver_location + (size - 1));
3969 if (instr->type != nir_instr_type_intrinsic)
3972 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
3973 switch (intrin->intrinsic) {
3974 case nir_intrinsic_store_ssbo:
3975 zs->ssbos_used |= get_src_mask_ssbo(shader->info.num_ssbos, intrin->src[1]);
3978 case nir_intrinsic_get_ssbo_size: {
3979 zs->ssbos_used |= get_src_mask_ssbo(shader->info.num_ssbos, intrin->src[0]);
3983 case nir_intrinsic_ssbo_atomic:
3984 case nir_intrinsic_ssbo_atomic_swap:
3985 case nir_intrinsic_load_ssbo:
3986 zs->ssbos_used |= get_src_mask_ssbo(shader->info.num_ssbos, intrin->src[0]);
3988 case nir_intrinsic_load_ubo:
3989 case nir_intrinsic_load_ubo_vec4:
3990 zs->ubos_used |= get_src_mask_ubo(shader->info.num_ubos, intrin->src[0]);
4000 struct zink_bindless_info {
4001 nir_variable *bindless[4];
4002 unsigned bindless_set;
4005 /* this is a "default" bindless texture used if the shader has no texture variables */
4006 static nir_variable *
4007 create_bindless_texture(nir_shader *nir, nir_tex_instr *tex, unsigned descriptor_set)
4009 unsigned binding = tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ? 1 : 0;
4012 const struct glsl_type *sampler_type = glsl_sampler_type(tex->sampler_dim, tex->is_shadow, tex->is_array, GLSL_TYPE_FLOAT);
4013 var = nir_variable_create(nir, nir_var_uniform, glsl_array_type(sampler_type, ZINK_MAX_BINDLESS_HANDLES, 0), "bindless_texture");
4014 var->data.descriptor_set = descriptor_set;
4015 var->data.driver_location = var->data.binding = binding;
4019 /* this is a "default" bindless image used if the shader has no image variables */
4020 static nir_variable *
4021 create_bindless_image(nir_shader *nir, enum glsl_sampler_dim dim, unsigned descriptor_set)
4023 unsigned binding = dim == GLSL_SAMPLER_DIM_BUF ? 3 : 2;
4026 const struct glsl_type *image_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
4027 var = nir_variable_create(nir, nir_var_image, glsl_array_type(image_type, ZINK_MAX_BINDLESS_HANDLES, 0), "bindless_image");
4028 var->data.descriptor_set = descriptor_set;
4029 var->data.driver_location = var->data.binding = binding;
4030 var->data.image.format = PIPE_FORMAT_R8G8B8A8_UNORM;
4034 /* rewrite bindless instructions as array deref instructions */
4036 lower_bindless_instr(nir_builder *b, nir_instr *in, void *data)
4038 struct zink_bindless_info *bindless = data;
4040 if (in->type == nir_instr_type_tex) {
4041 nir_tex_instr *tex = nir_instr_as_tex(in);
4042 int idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
4046 nir_variable *var = tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ? bindless->bindless[1] : bindless->bindless[0];
4048 var = create_bindless_texture(b->shader, tex, bindless->bindless_set);
4049 b->cursor = nir_before_instr(in);
4050 nir_deref_instr *deref = nir_build_deref_var(b, var);
4051 if (glsl_type_is_array(var->type))
4052 deref = nir_build_deref_array(b, deref, nir_u2uN(b, tex->src[idx].src.ssa, 32));
4053 nir_instr_rewrite_src_ssa(in, &tex->src[idx].src, &deref->dest.ssa);
4055 /* bindless sampling uses the variable type directly, which means the tex instr has to exactly
4056 * match up with it in contrast to normal sampler ops where things are a bit more flexible;
4057 * this results in cases where a shader is passed with sampler2DArray but the tex instr only has
4058 * 2 components, which explodes spirv compilation even though it doesn't trigger validation errors
4060 * to fix this, pad the coord src here and fix the tex instr so that ntv will do the "right" thing
4061 * - Warhammer 40k: Dawn of War III
4063 unsigned needed_components = glsl_get_sampler_coordinate_components(glsl_without_array(var->type));
4064 unsigned c = nir_tex_instr_src_index(tex, nir_tex_src_coord);
4065 unsigned coord_components = nir_src_num_components(tex->src[c].src);
4066 if (coord_components < needed_components) {
4067 nir_ssa_def *def = nir_pad_vector(b, tex->src[c].src.ssa, needed_components);
4068 nir_instr_rewrite_src_ssa(in, &tex->src[c].src, def);
4069 tex->coord_components = needed_components;
4073 if (in->type != nir_instr_type_intrinsic)
4075 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
4077 nir_intrinsic_op op;
4078 #define OP_SWAP(OP) \
4079 case nir_intrinsic_bindless_image_##OP: \
4080 op = nir_intrinsic_image_deref_##OP; \
4084 /* convert bindless intrinsics to deref intrinsics */
4085 switch (instr->intrinsic) {
4087 OP_SWAP(atomic_swap)
4098 enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr);
4099 nir_variable *var = dim == GLSL_SAMPLER_DIM_BUF ? bindless->bindless[3] : bindless->bindless[2];
4101 var = create_bindless_image(b->shader, dim, bindless->bindless_set);
4102 instr->intrinsic = op;
4103 b->cursor = nir_before_instr(in);
4104 nir_deref_instr *deref = nir_build_deref_var(b, var);
4105 if (glsl_type_is_array(var->type))
4106 deref = nir_build_deref_array(b, deref, nir_u2uN(b, instr->src[0].ssa, 32));
4107 nir_instr_rewrite_src_ssa(in, &instr->src[0], &deref->dest.ssa);
4112 lower_bindless(nir_shader *shader, struct zink_bindless_info *bindless)
4114 if (!nir_shader_instructions_pass(shader, lower_bindless_instr, nir_metadata_dominance, bindless))
4116 nir_fixup_deref_modes(shader);
4117 NIR_PASS_V(shader, nir_remove_dead_variables, nir_var_shader_temp, NULL);
4118 optimize_nir(shader, NULL);
4122 /* convert shader image/texture io variables to int64 handles for bindless indexing */
4124 lower_bindless_io_instr(nir_builder *b, nir_instr *in, void *data)
4126 if (in->type != nir_instr_type_intrinsic)
4128 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
4129 if (instr->intrinsic != nir_intrinsic_load_deref &&
4130 instr->intrinsic != nir_intrinsic_store_deref)
4133 nir_deref_instr *src_deref = nir_src_as_deref(instr->src[0]);
4134 nir_variable *var = nir_deref_instr_get_variable(src_deref);
4135 if (var->data.bindless)
4137 if (var->data.mode != nir_var_shader_in && var->data.mode != nir_var_shader_out)
4139 if (!glsl_type_is_image(var->type) && !glsl_type_is_sampler(var->type))
4142 var->type = glsl_int64_t_type();
4143 var->data.bindless = 1;
4144 b->cursor = nir_before_instr(in);
4145 nir_deref_instr *deref = nir_build_deref_var(b, var);
4146 if (instr->intrinsic == nir_intrinsic_load_deref) {
4147 nir_ssa_def *def = nir_load_deref(b, deref);
4148 nir_instr_rewrite_src_ssa(in, &instr->src[0], def);
4149 nir_ssa_def_rewrite_uses(&instr->dest.ssa, def);
4151 nir_store_deref(b, deref, instr->src[1].ssa, nir_intrinsic_write_mask(instr));
4153 nir_instr_remove(in);
4154 nir_instr_remove(&src_deref->instr);
4159 lower_bindless_io(nir_shader *shader)
4161 return nir_shader_instructions_pass(shader, lower_bindless_io_instr, nir_metadata_dominance, NULL);
4165 zink_binding(gl_shader_stage stage, VkDescriptorType type, int index, bool compact_descriptors)
4167 if (stage == MESA_SHADER_NONE) {
4168 unreachable("not supported");
4170 unsigned base = stage;
4171 /* clamp compute bindings for better driver efficiency */
4172 if (gl_shader_stage_is_compute(stage))
4175 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
4176 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
4177 return base * 2 + !!index;
4179 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
4180 assert(stage == MESA_SHADER_KERNEL);
4182 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
4183 if (stage == MESA_SHADER_KERNEL) {
4184 assert(index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
4185 return index + PIPE_MAX_SAMPLERS;
4188 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
4189 assert(index < PIPE_MAX_SAMPLERS);
4190 assert(stage != MESA_SHADER_KERNEL);
4191 return (base * PIPE_MAX_SAMPLERS) + index;
4193 case VK_DESCRIPTOR_TYPE_SAMPLER:
4194 assert(index < PIPE_MAX_SAMPLERS);
4195 assert(stage == MESA_SHADER_KERNEL);
4198 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
4199 return base + (compact_descriptors * (ZINK_GFX_SHADER_COUNT * 2));
4201 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
4202 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
4203 assert(index < ZINK_MAX_SHADER_IMAGES);
4204 if (stage == MESA_SHADER_KERNEL)
4205 return index + (compact_descriptors ? (PIPE_MAX_SAMPLERS + PIPE_MAX_SHADER_SAMPLER_VIEWS) : 0);
4206 return (base * ZINK_MAX_SHADER_IMAGES) + index + (compact_descriptors * (ZINK_GFX_SHADER_COUNT * PIPE_MAX_SAMPLERS));
4209 unreachable("unexpected type");
4215 handle_bindless_var(nir_shader *nir, nir_variable *var, const struct glsl_type *type, struct zink_bindless_info *bindless)
4217 if (glsl_type_is_struct(type)) {
4218 for (unsigned i = 0; i < glsl_get_length(type); i++)
4219 handle_bindless_var(nir, var, glsl_get_struct_field(type, i), bindless);
4223 /* just a random scalar in a struct */
4224 if (!glsl_type_is_image(type) && !glsl_type_is_sampler(type))
4227 VkDescriptorType vktype = glsl_type_is_image(type) ? zink_image_type(type) : zink_sampler_type(type);
4230 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
4233 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
4236 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
4239 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
4243 unreachable("unknown");
4245 if (!bindless->bindless[binding]) {
4246 bindless->bindless[binding] = nir_variable_clone(var, nir);
4247 bindless->bindless[binding]->data.bindless = 0;
4248 bindless->bindless[binding]->data.descriptor_set = bindless->bindless_set;
4249 bindless->bindless[binding]->type = glsl_array_type(type, ZINK_MAX_BINDLESS_HANDLES, 0);
4250 bindless->bindless[binding]->data.driver_location = bindless->bindless[binding]->data.binding = binding;
4251 if (!bindless->bindless[binding]->data.image.format)
4252 bindless->bindless[binding]->data.image.format = PIPE_FORMAT_R8G8B8A8_UNORM;
4253 nir_shader_add_variable(nir, bindless->bindless[binding]);
4255 assert(glsl_get_sampler_dim(glsl_without_array(bindless->bindless[binding]->type)) == glsl_get_sampler_dim(glsl_without_array(var->type)));
4257 var->data.mode = nir_var_shader_temp;
4261 convert_1d_shadow_tex(nir_builder *b, nir_instr *instr, void *data)
4263 struct zink_screen *screen = data;
4264 if (instr->type != nir_instr_type_tex)
4266 nir_tex_instr *tex = nir_instr_as_tex(instr);
4267 if (tex->sampler_dim != GLSL_SAMPLER_DIM_1D || !tex->is_shadow)
4269 if (tex->is_sparse && screen->need_2D_sparse) {
4270 /* no known case of this exists: only nvidia can hit it, and nothing uses it */
4271 mesa_loge("unhandled/unsupported 1D sparse texture!");
4274 tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
4275 b->cursor = nir_before_instr(instr);
4276 tex->coord_components++;
4283 for (unsigned i = 0; i < ARRAY_SIZE(srcs); i++) {
4284 unsigned c = nir_tex_instr_src_index(tex, srcs[i]);
4287 if (tex->src[c].src.ssa->num_components == tex->coord_components)
4290 nir_ssa_def *zero = nir_imm_zero(b, 1, tex->src[c].src.ssa->bit_size);
4291 if (tex->src[c].src.ssa->num_components == 1)
4292 def = nir_vec2(b, tex->src[c].src.ssa, zero);
4294 def = nir_vec3(b, nir_channel(b, tex->src[c].src.ssa, 0), zero, nir_channel(b, tex->src[c].src.ssa, 1));
4295 nir_instr_rewrite_src_ssa(instr, &tex->src[c].src, def);
4297 b->cursor = nir_after_instr(instr);
4298 unsigned needed_components = nir_tex_instr_dest_size(tex);
4299 unsigned num_components = tex->dest.ssa.num_components;
4300 if (needed_components > num_components) {
4301 tex->dest.ssa.num_components = needed_components;
4302 assert(num_components < 3);
4303 /* take either xz or just x since this is promoted to 2D from 1D */
4304 uint32_t mask = num_components == 2 ? (1|4) : 1;
4305 nir_ssa_def *dst = nir_channels(b, &tex->dest.ssa, mask);
4306 nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, dst, dst->parent_instr);
4312 lower_1d_shadow(nir_shader *shader, struct zink_screen *screen)
4315 nir_foreach_variable_with_modes(var, shader, nir_var_uniform | nir_var_image) {
4316 const struct glsl_type *type = glsl_without_array(var->type);
4317 unsigned length = glsl_get_length(var->type);
4318 if (!glsl_type_is_sampler(type) || !glsl_sampler_type_is_shadow(type) || glsl_get_sampler_dim(type) != GLSL_SAMPLER_DIM_1D)
4320 const struct glsl_type *sampler = glsl_sampler_type(GLSL_SAMPLER_DIM_2D, true, glsl_sampler_type_is_array(type), glsl_get_sampler_result_type(type));
4321 var->type = type != var->type ? glsl_array_type(sampler, length, glsl_get_explicit_stride(var->type)) : sampler;
4326 nir_shader_instructions_pass(shader, convert_1d_shadow_tex, nir_metadata_dominance, screen);
4331 scan_nir(struct zink_screen *screen, nir_shader *shader, struct zink_shader *zs)
4333 nir_foreach_function_impl(impl, shader) {
4334 nir_foreach_block_safe(block, impl) {
4335 nir_foreach_instr_safe(instr, block) {
4336 if (instr->type == nir_instr_type_tex) {
4337 nir_tex_instr *tex = nir_instr_as_tex(instr);
4338 zs->sinfo.have_sparse |= tex->is_sparse;
4340 if (instr->type != nir_instr_type_intrinsic)
4342 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
4343 if (intr->intrinsic == nir_intrinsic_image_deref_load ||
4344 intr->intrinsic == nir_intrinsic_image_deref_sparse_load ||
4345 intr->intrinsic == nir_intrinsic_image_deref_store ||
4346 intr->intrinsic == nir_intrinsic_image_deref_atomic ||
4347 intr->intrinsic == nir_intrinsic_image_deref_atomic_swap ||
4348 intr->intrinsic == nir_intrinsic_image_deref_size ||
4349 intr->intrinsic == nir_intrinsic_image_deref_samples ||
4350 intr->intrinsic == nir_intrinsic_image_deref_format ||
4351 intr->intrinsic == nir_intrinsic_image_deref_order) {
4353 nir_variable *var = nir_intrinsic_get_var(intr, 0);
4355 /* Structs have been lowered already, so get_aoa_size is sufficient. */
4356 const unsigned size =
4357 glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : 1;
4358 BITSET_SET_RANGE(shader->info.images_used, var->data.binding,
4359 var->data.binding + (MAX2(size, 1) - 1));
4361 if (intr->intrinsic == nir_intrinsic_is_sparse_texels_resident ||
4362 intr->intrinsic == nir_intrinsic_image_deref_sparse_load)
4363 zs->sinfo.have_sparse = true;
4365 static bool warned = false;
4366 if (!screen->info.have_EXT_shader_atomic_float && !screen->is_cpu && !warned) {
4367 switch (intr->intrinsic) {
4368 case nir_intrinsic_image_deref_atomic: {
4369 nir_variable *var = nir_intrinsic_get_var(intr, 0);
4370 if (nir_intrinsic_atomic_op(intr) == nir_atomic_op_iadd &&
4371 util_format_is_float(var->data.image.format))
4372 fprintf(stderr, "zink: Vulkan driver missing VK_EXT_shader_atomic_float but attempting to do atomic ops!\n");
4385 is_residency_code(nir_ssa_def *src)
4387 nir_instr *parent = src->parent_instr;
4389 if (parent->type == nir_instr_type_intrinsic) {
4390 ASSERTED nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent);
4391 assert(intr->intrinsic == nir_intrinsic_is_sparse_texels_resident);
4394 if (parent->type == nir_instr_type_tex)
4396 assert(parent->type == nir_instr_type_alu);
4397 nir_alu_instr *alu = nir_instr_as_alu(parent);
4398 parent = alu->src[0].src.ssa->parent_instr;
4403 lower_sparse_instr(nir_builder *b, nir_instr *in, void *data)
4405 if (in->type != nir_instr_type_intrinsic)
4407 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
4408 if (instr->intrinsic == nir_intrinsic_sparse_residency_code_and) {
4409 b->cursor = nir_before_instr(&instr->instr);
4411 if (is_residency_code(instr->src[0].ssa))
4412 src0 = nir_is_sparse_texels_resident(b, 1, instr->src[0].ssa);
4414 src0 = instr->src[0].ssa;
4416 if (is_residency_code(instr->src[1].ssa))
4417 src1 = nir_is_sparse_texels_resident(b, 1, instr->src[1].ssa);
4419 src1 = instr->src[1].ssa;
4420 nir_ssa_def *def = nir_iand(b, src0, src1);
4421 nir_ssa_def_rewrite_uses_after(&instr->dest.ssa, def, in);
4422 nir_instr_remove(in);
4425 if (instr->intrinsic != nir_intrinsic_is_sparse_texels_resident)
4428 /* vulkan vec can only be a vec4, but this is (maybe) vec5,
4429 * so just rewrite as the first component since ntv is going to use a different
4430 * method for storing the residency value anyway
4432 b->cursor = nir_before_instr(&instr->instr);
4433 nir_instr *parent = instr->src[0].ssa->parent_instr;
4434 if (is_residency_code(instr->src[0].ssa)) {
4435 assert(parent->type == nir_instr_type_alu);
4436 nir_alu_instr *alu = nir_instr_as_alu(parent);
4437 nir_ssa_def_rewrite_uses_after(instr->src[0].ssa, nir_channel(b, alu->src[0].src.ssa, 0), parent);
4438 nir_instr_remove(parent);
4441 if (parent->type == nir_instr_type_intrinsic) {
4442 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent);
4443 assert(intr->intrinsic == nir_intrinsic_is_sparse_texels_resident);
4444 src = intr->src[0].ssa;
4446 assert(parent->type == nir_instr_type_alu);
4447 nir_alu_instr *alu = nir_instr_as_alu(parent);
4448 src = alu->src[0].src.ssa;
4450 if (instr->dest.ssa.bit_size != 32) {
4451 if (instr->dest.ssa.bit_size == 1)
4452 src = nir_ieq_imm(b, src, 1);
4454 src = nir_u2uN(b, src, instr->dest.ssa.bit_size);
4456 nir_ssa_def_rewrite_uses(&instr->dest.ssa, src);
4457 nir_instr_remove(in);
4463 lower_sparse(nir_shader *shader)
4465 return nir_shader_instructions_pass(shader, lower_sparse_instr, nir_metadata_dominance, NULL);
4469 match_tex_dests_instr(nir_builder *b, nir_instr *in, void *data)
4471 if (in->type != nir_instr_type_tex)
4473 nir_tex_instr *tex = nir_instr_as_tex(in);
4474 if (tex->op == nir_texop_txs || tex->op == nir_texop_lod)
4476 int handle = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
4477 nir_variable *var = NULL;
4479 var = nir_deref_instr_get_variable(nir_src_as_deref(tex->src[handle].src));
4481 nir_foreach_variable_with_modes(img, b->shader, nir_var_uniform) {
4482 if (glsl_type_is_sampler(glsl_without_array(img->type))) {
4483 unsigned size = glsl_type_is_array(img->type) ? glsl_get_aoa_size(img->type) : 1;
4484 if (tex->texture_index >= img->data.driver_location &&
4485 tex->texture_index < img->data.driver_location + size) {
4492 return !!rewrite_tex_dest(b, tex, var, data);
4496 match_tex_dests(nir_shader *shader, struct zink_shader *zs)
4498 return nir_shader_instructions_pass(shader, match_tex_dests_instr, nir_metadata_dominance, zs);
4502 split_bitfields_instr(nir_builder *b, nir_instr *in, void *data)
4504 if (in->type != nir_instr_type_alu)
4506 nir_alu_instr *alu = nir_instr_as_alu(in);
4508 case nir_op_ubitfield_extract:
4509 case nir_op_ibitfield_extract:
4510 case nir_op_bitfield_insert:
4515 unsigned num_components = nir_dest_num_components(alu->dest.dest);
4516 if (num_components == 1)
4518 b->cursor = nir_before_instr(in);
4519 nir_ssa_def *dests[NIR_MAX_VEC_COMPONENTS];
4520 for (unsigned i = 0; i < num_components; i++) {
4521 if (alu->op == nir_op_bitfield_insert)
4522 dests[i] = nir_bitfield_insert(b,
4523 nir_channel(b, alu->src[0].src.ssa, alu->src[0].swizzle[i]),
4524 nir_channel(b, alu->src[1].src.ssa, alu->src[1].swizzle[i]),
4525 nir_channel(b, alu->src[2].src.ssa, alu->src[2].swizzle[i]),
4526 nir_channel(b, alu->src[3].src.ssa, alu->src[3].swizzle[i]));
4527 else if (alu->op == nir_op_ubitfield_extract)
4528 dests[i] = nir_ubitfield_extract(b,
4529 nir_channel(b, alu->src[0].src.ssa, alu->src[0].swizzle[i]),
4530 nir_channel(b, alu->src[1].src.ssa, alu->src[1].swizzle[i]),
4531 nir_channel(b, alu->src[2].src.ssa, alu->src[2].swizzle[i]));
4533 dests[i] = nir_ibitfield_extract(b,
4534 nir_channel(b, alu->src[0].src.ssa, alu->src[0].swizzle[i]),
4535 nir_channel(b, alu->src[1].src.ssa, alu->src[1].swizzle[i]),
4536 nir_channel(b, alu->src[2].src.ssa, alu->src[2].swizzle[i]));
4538 nir_ssa_def *dest = nir_vec(b, dests, num_components);
4539 nir_ssa_def_rewrite_uses_after(&alu->dest.dest.ssa, dest, in);
4540 nir_instr_remove(in);
4546 split_bitfields(nir_shader *shader)
4548 return nir_shader_instructions_pass(shader, split_bitfields_instr, nir_metadata_dominance, NULL);
4552 rewrite_cl_derefs(nir_shader *nir, nir_variable *var)
4554 nir_foreach_function_impl(impl, nir) {
4555 nir_foreach_block(block, impl) {
4556 nir_foreach_instr_safe(instr, block) {
4557 if (instr->type != nir_instr_type_deref)
4559 nir_deref_instr *deref = nir_instr_as_deref(instr);
4560 nir_variable *img = nir_deref_instr_get_variable(deref);
4563 if (glsl_type_is_array(var->type)) {
4564 if (deref->deref_type == nir_deref_type_array)
4565 deref->type = glsl_without_array(var->type);
4567 deref->type = var->type;
4569 deref->type = var->type;
4577 type_image(nir_shader *nir, nir_variable *var)
4579 nir_foreach_function_impl(impl, nir) {
4580 nir_foreach_block(block, impl) {
4581 nir_foreach_instr_safe(instr, block) {
4582 if (instr->type != nir_instr_type_intrinsic)
4584 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
4585 if (intr->intrinsic == nir_intrinsic_image_deref_load ||
4586 intr->intrinsic == nir_intrinsic_image_deref_sparse_load ||
4587 intr->intrinsic == nir_intrinsic_image_deref_store ||
4588 intr->intrinsic == nir_intrinsic_image_deref_atomic ||
4589 intr->intrinsic == nir_intrinsic_image_deref_atomic_swap ||
4590 intr->intrinsic == nir_intrinsic_image_deref_samples ||
4591 intr->intrinsic == nir_intrinsic_image_deref_format ||
4592 intr->intrinsic == nir_intrinsic_image_deref_order) {
4593 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
4594 nir_variable *img = nir_deref_instr_get_variable(deref);
4597 nir_alu_type alu_type = nir_intrinsic_src_type(intr);
4598 const struct glsl_type *type = glsl_without_array(var->type);
4599 if (glsl_get_sampler_result_type(type) != GLSL_TYPE_VOID) {
4600 assert(glsl_get_sampler_result_type(type) == nir_get_glsl_base_type_for_nir_type(alu_type));
4603 const struct glsl_type *img_type = glsl_image_type(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type), nir_get_glsl_base_type_for_nir_type(alu_type));
4604 if (glsl_type_is_array(var->type))
4605 img_type = glsl_array_type(img_type, glsl_array_size(var->type), glsl_get_explicit_stride(var->type));
4606 var->type = img_type;
4607 rewrite_cl_derefs(nir, var);
4613 nir_foreach_function_impl(impl, nir) {
4614 nir_foreach_block(block, impl) {
4615 nir_foreach_instr_safe(instr, block) {
4616 if (instr->type != nir_instr_type_intrinsic)
4618 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
4619 if (intr->intrinsic != nir_intrinsic_image_deref_size)
4621 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
4622 nir_variable *img = nir_deref_instr_get_variable(deref);
4625 nir_alu_type alu_type = nir_type_uint32;
4626 const struct glsl_type *type = glsl_without_array(var->type);
4627 if (glsl_get_sampler_result_type(type) != GLSL_TYPE_VOID) {
4630 const struct glsl_type *img_type = glsl_image_type(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type), nir_get_glsl_base_type_for_nir_type(alu_type));
4631 if (glsl_type_is_array(var->type))
4632 img_type = glsl_array_type(img_type, glsl_array_size(var->type), glsl_get_explicit_stride(var->type));
4633 var->type = img_type;
4634 rewrite_cl_derefs(nir, var);
4639 var->data.mode = nir_var_shader_temp;
4642 static nir_variable *
4643 find_sampler_var(nir_shader *nir, unsigned texture_index)
4645 nir_foreach_variable_with_modes(var, nir, nir_var_uniform) {
4646 unsigned size = glsl_type_is_array(var->type) ? glsl_array_size(var->type) : 1;
4647 if ((glsl_type_is_texture(glsl_without_array(var->type)) || glsl_type_is_sampler(glsl_without_array(var->type))) &&
4648 (var->data.binding == texture_index || (var->data.binding < texture_index && var->data.binding + size > texture_index)))
4655 type_sampler_vars(nir_shader *nir, unsigned *sampler_mask)
4657 bool progress = false;
4658 nir_foreach_function_impl(impl, nir) {
4659 nir_foreach_block(block, impl) {
4660 nir_foreach_instr(instr, block) {
4661 if (instr->type != nir_instr_type_tex)
4663 nir_tex_instr *tex = nir_instr_as_tex(instr);
4667 case nir_texop_query_levels:
4668 case nir_texop_texture_samples:
4669 case nir_texop_samples_identical:
4674 *sampler_mask |= BITFIELD_BIT(tex->sampler_index);
4675 nir_variable *var = find_sampler_var(nir, tex->texture_index);
4677 if (glsl_get_sampler_result_type(glsl_without_array(var->type)) != GLSL_TYPE_VOID)
4679 const struct glsl_type *img_type = glsl_sampler_type(glsl_get_sampler_dim(glsl_without_array(var->type)), tex->is_shadow, tex->is_array, nir_get_glsl_base_type_for_nir_type(tex->dest_type));
4680 unsigned size = glsl_type_is_array(var->type) ? glsl_array_size(var->type) : 1;
4682 img_type = glsl_array_type(img_type, size, 0);
4683 var->type = img_type;
4688 nir_foreach_function_impl(impl, nir) {
4689 nir_foreach_block(block, impl) {
4690 nir_foreach_instr(instr, block) {
4691 if (instr->type != nir_instr_type_tex)
4693 nir_tex_instr *tex = nir_instr_as_tex(instr);
4697 case nir_texop_query_levels:
4698 case nir_texop_texture_samples:
4699 case nir_texop_samples_identical:
4704 *sampler_mask |= BITFIELD_BIT(tex->sampler_index);
4705 nir_variable *var = find_sampler_var(nir, tex->texture_index);
4707 if (glsl_get_sampler_result_type(glsl_without_array(var->type)) != GLSL_TYPE_VOID)
4709 const struct glsl_type *img_type = glsl_sampler_type(glsl_get_sampler_dim(glsl_without_array(var->type)), tex->is_shadow, tex->is_array, nir_get_glsl_base_type_for_nir_type(tex->dest_type));
4710 unsigned size = glsl_type_is_array(var->type) ? glsl_array_size(var->type) : 1;
4712 img_type = glsl_array_type(img_type, size, 0);
4713 var->type = img_type;
4722 delete_samplers(nir_shader *nir)
4724 bool progress = false;
4725 nir_foreach_variable_with_modes(var, nir, nir_var_uniform) {
4726 if (glsl_type_is_sampler(glsl_without_array(var->type))) {
4727 var->data.mode = nir_var_shader_temp;
4735 type_images(nir_shader *nir, unsigned *sampler_mask)
4737 bool progress = false;
4738 progress |= delete_samplers(nir);
4739 progress |= type_sampler_vars(nir, sampler_mask);
4740 nir_foreach_variable_with_modes(var, nir, nir_var_image) {
4741 type_image(nir, var);
4747 /* attempt to assign io for separate shaders */
4749 fixup_io_locations(nir_shader *nir)
4751 nir_variable_mode modes;
4752 if (nir->info.stage != MESA_SHADER_FRAGMENT && nir->info.stage != MESA_SHADER_VERTEX)
4753 modes = nir_var_shader_in | nir_var_shader_out;
4755 modes = nir->info.stage == MESA_SHADER_FRAGMENT ? nir_var_shader_in : nir_var_shader_out;
4756 u_foreach_bit(mode, modes) {
4757 nir_variable_mode m = BITFIELD_BIT(mode);
4758 if ((m == nir_var_shader_in && ((nir->info.inputs_read & BITFIELD64_MASK(VARYING_SLOT_VAR1)) == nir->info.inputs_read)) ||
4759 (m == nir_var_shader_out && ((nir->info.outputs_written | nir->info.outputs_read) & BITFIELD64_MASK(VARYING_SLOT_VAR1)) == (nir->info.outputs_written | nir->info.outputs_read))) {
4760 /* this is a special heuristic to catch ARB/fixedfunc shaders which have different rules:
4761 * - i/o interface blocks don't need to match
4762 * - any location can be present or not
4763 * - it just has to work
4765 * VAR0 is the only user varying that mesa can produce in this case, so overwrite POS
4766 * since it's a builtin and yolo it with all the other legacy crap
4768 nir_foreach_variable_with_modes(var, nir, m) {
4769 if (nir_slot_is_sysval_output(var->data.location, MESA_SHADER_NONE))
4771 if (var->data.location == VARYING_SLOT_VAR0)
4772 var->data.driver_location = 0;
4774 var->data.driver_location = var->data.location;
4778 /* i/o interface blocks are required to be EXACT matches between stages:
4779 * iterate over all locations and set locations incrementally
4782 for (unsigned i = 0; i < VARYING_SLOT_MAX; i++) {
4783 if (nir_slot_is_sysval_output(i, MESA_SHADER_NONE))
4787 nir_foreach_variable_with_modes(var, nir, m) {
4788 if (var->data.location != i)
4790 /* only add slots for non-component vars or first-time component vars */
4791 if (!var->data.location_frac || !size) {
4792 /* ensure variable is given enough slots */
4793 if (nir_is_arrayed_io(var, nir->info.stage))
4794 size += glsl_count_vec4_slots(glsl_get_array_element(var->type), false, false);
4796 size += glsl_count_vec4_slots(var->type, false, false);
4798 var->data.driver_location = slot;
4803 /* ensure the consumed slots aren't double iterated */
4806 /* locations used between stages are not required to be contiguous */
4807 if (i >= VARYING_SLOT_VAR0)
4816 zink_flat_flags(struct nir_shader *shader)
4818 uint32_t flat_flags = 0, c = 0;
4819 nir_foreach_shader_in_variable(var, shader) {
4820 if (var->data.interpolation == INTERP_MODE_FLAT)
4821 flat_flags |= 1u << (c++);
4827 struct zink_shader *
4828 zink_shader_create(struct zink_screen *screen, struct nir_shader *nir,
4829 const struct pipe_stream_output_info *so_info)
4831 struct zink_shader *ret = rzalloc(NULL, struct zink_shader);
4832 bool have_psiz = false;
4834 ret->has_edgeflags = nir->info.stage == MESA_SHADER_VERTEX &&
4835 nir_find_variable_with_location(nir, nir_var_shader_out, VARYING_SLOT_EDGE);
4837 ret->sinfo.have_vulkan_memory_model = screen->info.have_KHR_vulkan_memory_model;
4838 ret->sinfo.have_workgroup_memory_explicit_layout = screen->info.have_KHR_workgroup_memory_explicit_layout;
4839 ret->sinfo.bindless_set_idx = screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS];
4841 util_queue_fence_init(&ret->precompile.fence);
4842 util_dynarray_init(&ret->pipeline_libs, ret);
4843 ret->hash = _mesa_hash_pointer(ret);
4845 ret->programs = _mesa_pointer_set_create(NULL);
4846 simple_mtx_init(&ret->lock, mtx_plain);
4848 nir_variable_mode indirect_derefs_modes = 0;
4849 if (nir->info.stage == MESA_SHADER_TESS_CTRL ||
4850 nir->info.stage == MESA_SHADER_TESS_EVAL)
4851 indirect_derefs_modes |= nir_var_shader_in | nir_var_shader_out;
4853 NIR_PASS_V(nir, nir_lower_indirect_derefs, indirect_derefs_modes,
4856 if (nir->info.stage < MESA_SHADER_COMPUTE)
4857 create_gfx_pushconst(nir);
4859 if (nir->info.stage == MESA_SHADER_TESS_CTRL ||
4860 nir->info.stage == MESA_SHADER_TESS_EVAL)
4861 NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
4863 if (nir->info.stage < MESA_SHADER_FRAGMENT)
4864 have_psiz = check_psiz(nir);
4865 if (nir->info.stage == MESA_SHADER_FRAGMENT)
4866 ret->flat_flags = zink_flat_flags(nir);
4868 if (!gl_shader_stage_is_compute(nir->info.stage) && nir->info.separate_shader)
4869 NIR_PASS_V(nir, fixup_io_locations);
4871 NIR_PASS_V(nir, lower_basevertex);
4872 NIR_PASS_V(nir, lower_baseinstance);
4873 NIR_PASS_V(nir, lower_sparse);
4874 NIR_PASS_V(nir, split_bitfields);
4875 NIR_PASS_V(nir, nir_lower_frexp); /* TODO: Use the spirv instructions for this. */
4877 if (screen->info.have_EXT_shader_demote_to_helper_invocation) {
4878 NIR_PASS_V(nir, nir_lower_discard_or_demote,
4879 screen->driconf.glsl_correct_derivatives_after_discard ||
4880 nir->info.use_legacy_math_rules);
4883 if (screen->need_2D_zs)
4884 NIR_PASS_V(nir, lower_1d_shadow, screen);
4887 nir_lower_subgroups_options subgroup_options = {0};
4888 subgroup_options.lower_to_scalar = true;
4889 subgroup_options.subgroup_size = screen->info.props11.subgroupSize;
4890 subgroup_options.ballot_bit_size = 32;
4891 subgroup_options.ballot_components = 4;
4892 subgroup_options.lower_subgroup_masks = true;
4893 if (!(screen->info.subgroup.supportedStages & mesa_to_vk_shader_stage(clamp_stage(&nir->info)))) {
4894 subgroup_options.subgroup_size = 1;
4895 subgroup_options.lower_vote_trivial = true;
4897 NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_options);
4900 if (so_info && so_info->num_outputs)
4901 NIR_PASS_V(nir, split_blocks);
4903 optimize_nir(nir, NULL);
4904 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
4905 NIR_PASS_V(nir, nir_lower_discard_if, (nir_lower_discard_if_to_cf |
4906 nir_lower_demote_if_to_cf |
4907 nir_lower_terminate_if_to_cf));
4908 NIR_PASS_V(nir, nir_lower_fragcolor,
4909 nir->info.fs.color_is_dual_source ? 1 : 8);
4911 NIR_PASS_V(nir, lower_64bit_vertex_attribs);
4912 bool needs_size = analyze_io(ret, nir);
4913 NIR_PASS_V(nir, unbreak_bos, ret, needs_size);
4914 /* run in compile if there could be inlined uniforms */
4915 if (!screen->driconf.inline_uniforms && !nir->info.num_inlinable_uniforms) {
4916 NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_global | nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared, NULL, NULL);
4917 NIR_PASS_V(nir, rewrite_bo_access, screen);
4918 NIR_PASS_V(nir, remove_bo_access, ret);
4921 struct zink_bindless_info bindless = {0};
4922 bindless.bindless_set = screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS];
4923 bool has_bindless_io = false;
4924 nir_foreach_variable_with_modes(var, nir, nir_var_shader_in | nir_var_shader_out) {
4925 var->data.is_xfb = false;
4926 if (glsl_type_is_image(var->type) || glsl_type_is_sampler(var->type)) {
4927 has_bindless_io = true;
4931 if (has_bindless_io)
4932 NIR_PASS_V(nir, lower_bindless_io);
4934 optimize_nir(nir, NULL);
4937 scan_nir(screen, nir, ret);
4938 unsigned sampler_mask = 0;
4939 if (nir->info.stage == MESA_SHADER_KERNEL) {
4940 NIR_PASS_V(nir, type_images, &sampler_mask);
4941 enum zink_descriptor_type ztype = ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW;
4942 VkDescriptorType vktype = VK_DESCRIPTOR_TYPE_SAMPLER;
4943 u_foreach_bit(s, sampler_mask) {
4944 ret->bindings[ztype][ret->num_bindings[ztype]].index = s;
4945 ret->bindings[ztype][ret->num_bindings[ztype]].binding = zink_binding(MESA_SHADER_KERNEL, vktype, s, screen->compact_descriptors);
4946 ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype;
4947 ret->bindings[ztype][ret->num_bindings[ztype]].size = 1;
4948 ret->num_bindings[ztype]++;
4950 ret->sinfo.sampler_mask = sampler_mask;
4953 unsigned ubo_binding_mask = 0;
4954 unsigned ssbo_binding_mask = 0;
4955 foreach_list_typed_reverse_safe(nir_variable, var, node, &nir->variables) {
4956 if (_nir_shader_variable_has_mode(var, nir_var_uniform |
4959 nir_var_mem_ssbo)) {
4960 enum zink_descriptor_type ztype;
4961 const struct glsl_type *type = glsl_without_array(var->type);
4962 if (var->data.mode == nir_var_mem_ubo) {
4963 ztype = ZINK_DESCRIPTOR_TYPE_UBO;
4964 /* buffer 0 is a push descriptor */
4965 var->data.descriptor_set = !!var->data.driver_location;
4966 var->data.binding = !var->data.driver_location ? clamp_stage(&nir->info) :
4967 zink_binding(nir->info.stage,
4968 VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
4969 var->data.driver_location,
4970 screen->compact_descriptors);
4971 assert(var->data.driver_location || var->data.binding < 10);
4972 VkDescriptorType vktype = !var->data.driver_location ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC : VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
4973 int binding = var->data.binding;
4975 if (!var->data.driver_location) {
4976 ret->has_uniforms = true;
4977 } else if (!(ubo_binding_mask & BITFIELD_BIT(binding))) {
4978 ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location;
4979 ret->bindings[ztype][ret->num_bindings[ztype]].binding = binding;
4980 ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype;
4981 ret->bindings[ztype][ret->num_bindings[ztype]].size = glsl_get_length(var->type);
4982 assert(ret->bindings[ztype][ret->num_bindings[ztype]].size);
4983 ret->num_bindings[ztype]++;
4984 ubo_binding_mask |= BITFIELD_BIT(binding);
4986 } else if (var->data.mode == nir_var_mem_ssbo) {
4987 ztype = ZINK_DESCRIPTOR_TYPE_SSBO;
4988 var->data.descriptor_set = screen->desc_set_id[ztype];
4989 var->data.binding = zink_binding(nir->info.stage,
4990 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
4991 var->data.driver_location,
4992 screen->compact_descriptors);
4993 if (!(ssbo_binding_mask & BITFIELD_BIT(var->data.binding))) {
4994 ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location;
4995 ret->bindings[ztype][ret->num_bindings[ztype]].binding = var->data.binding;
4996 ret->bindings[ztype][ret->num_bindings[ztype]].type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
4997 ret->bindings[ztype][ret->num_bindings[ztype]].size = glsl_get_length(var->type);
4998 assert(ret->bindings[ztype][ret->num_bindings[ztype]].size);
4999 ret->num_bindings[ztype]++;
5000 ssbo_binding_mask |= BITFIELD_BIT(var->data.binding);
5003 assert(var->data.mode == nir_var_uniform ||
5004 var->data.mode == nir_var_image);
5005 if (var->data.bindless) {
5006 ret->bindless = true;
5007 handle_bindless_var(nir, var, type, &bindless);
5008 } else if (glsl_type_is_sampler(type) || glsl_type_is_image(type)) {
5009 VkDescriptorType vktype = glsl_type_is_image(type) ? zink_image_type(type) : zink_sampler_type(type);
5010 if (nir->info.stage == MESA_SHADER_KERNEL && vktype == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
5011 vktype = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
5012 ztype = zink_desc_type_from_vktype(vktype);
5013 if (vktype == VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER)
5014 ret->num_texel_buffers++;
5015 var->data.driver_location = var->data.binding;
5016 var->data.descriptor_set = screen->desc_set_id[ztype];
5017 var->data.binding = zink_binding(nir->info.stage, vktype, var->data.driver_location, screen->compact_descriptors);
5018 ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location;
5019 ret->bindings[ztype][ret->num_bindings[ztype]].binding = var->data.binding;
5020 ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype;
5021 if (glsl_type_is_array(var->type))
5022 ret->bindings[ztype][ret->num_bindings[ztype]].size = glsl_get_aoa_size(var->type);
5024 ret->bindings[ztype][ret->num_bindings[ztype]].size = 1;
5025 ret->num_bindings[ztype]++;
5026 } else if (var->data.mode == nir_var_uniform) {
5027 /* this is a dead uniform */
5029 exec_node_remove(&var->node);
5034 bool bindless_lowered = false;
5035 NIR_PASS(bindless_lowered, nir, lower_bindless, &bindless);
5036 ret->bindless |= bindless_lowered;
5038 if (!screen->info.feats.features.shaderInt64 || !screen->info.feats.features.shaderFloat64)
5039 NIR_PASS_V(nir, lower_64bit_vars, screen->info.feats.features.shaderInt64);
5040 if (nir->info.stage != MESA_SHADER_KERNEL)
5041 NIR_PASS_V(nir, match_tex_dests, ret);
5043 if (!nir->info.internal)
5044 nir_foreach_shader_out_variable(var, nir)
5045 var->data.explicit_xfb_buffer = 0;
5046 if (so_info && so_info->num_outputs && nir->info.outputs_written)
5047 update_so_info(ret, nir, so_info, nir->info.outputs_written, have_psiz);
5048 else if (have_psiz) {
5049 bool have_fake_psiz = false;
5050 nir_variable *psiz = NULL;
5051 nir_foreach_shader_out_variable(var, nir) {
5052 if (var->data.location == VARYING_SLOT_PSIZ) {
5053 if (!var->data.explicit_location)
5054 have_fake_psiz = true;
5059 if (have_fake_psiz && psiz) {
5060 psiz->data.mode = nir_var_shader_temp;
5061 nir_fixup_deref_modes(nir);
5062 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
5065 zink_shader_serialize_blob(nir, &ret->blob);
5066 memcpy(&ret->info, &nir->info, sizeof(nir->info));
5068 ret->can_inline = true;
5074 zink_shader_finalize(struct pipe_screen *pscreen, void *nirptr)
5076 struct zink_screen *screen = zink_screen(pscreen);
5077 nir_shader *nir = nirptr;
5079 nir_lower_tex_options tex_opts = {
5080 .lower_invalid_implicit_lod = true,
5083 Sampled Image must be an object whose type is OpTypeSampledImage.
5084 The Dim operand of the underlying OpTypeImage must be 1D, 2D, 3D,
5085 or Rect, and the Arrayed and MS operands must be 0.
5086 - SPIRV, OpImageSampleProj* opcodes
5088 tex_opts.lower_txp = BITFIELD_BIT(GLSL_SAMPLER_DIM_CUBE) |
5089 BITFIELD_BIT(GLSL_SAMPLER_DIM_MS);
5090 tex_opts.lower_txp_array = true;
5091 if (!screen->info.feats.features.shaderImageGatherExtended)
5092 tex_opts.lower_tg4_offsets = true;
5093 NIR_PASS_V(nir, nir_lower_tex, &tex_opts);
5094 optimize_nir(nir, NULL);
5095 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
5096 if (screen->driconf.inline_uniforms)
5097 nir_find_inlinable_uniforms(nir);
5103 zink_shader_free(struct zink_screen *screen, struct zink_shader *shader)
5105 _mesa_set_destroy(shader->programs, NULL);
5106 util_queue_fence_wait(&shader->precompile.fence);
5107 util_queue_fence_destroy(&shader->precompile.fence);
5108 zink_descriptor_shader_deinit(screen, shader);
5109 if (screen->info.have_EXT_shader_object) {
5110 VKSCR(DestroyShaderEXT)(screen->dev, shader->precompile.obj.obj, NULL);
5111 VKSCR(DestroyShaderEXT)(screen->dev, shader->precompile.no_psiz_obj.obj, NULL);
5113 if (shader->precompile.obj.mod)
5114 VKSCR(DestroyShaderModule)(screen->dev, shader->precompile.obj.mod, NULL);
5115 if (shader->precompile.gpl)
5116 VKSCR(DestroyPipeline)(screen->dev, shader->precompile.gpl, NULL);
5118 blob_finish(&shader->blob);
5119 ralloc_free(shader->spirv);
5120 free(shader->precompile.bindings);
5121 ralloc_free(shader);
5125 zink_gfx_shader_free(struct zink_screen *screen, struct zink_shader *shader)
5127 assert(shader->info.stage != MESA_SHADER_COMPUTE);
5128 util_queue_fence_wait(&shader->precompile.fence);
5129 set_foreach(shader->programs, entry) {
5130 struct zink_gfx_program *prog = (void*)entry->key;
5131 gl_shader_stage stage = shader->info.stage;
5132 assert(stage < ZINK_GFX_SHADER_COUNT);
5133 unsigned stages_present = prog->stages_present;
5134 if (prog->shaders[MESA_SHADER_TESS_CTRL] &&
5135 prog->shaders[MESA_SHADER_TESS_CTRL]->non_fs.is_generated)
5136 stages_present &= ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL);
5137 unsigned idx = zink_program_cache_stages(stages_present);
5138 if (!prog->base.removed && prog->stages_present == prog->stages_remaining &&
5139 (stage == MESA_SHADER_FRAGMENT || !shader->non_fs.is_generated)) {
5140 struct hash_table *ht = &prog->ctx->program_cache[idx];
5141 simple_mtx_lock(&prog->ctx->program_lock[idx]);
5142 struct hash_entry *he = _mesa_hash_table_search(ht, prog->shaders);
5143 assert(he && he->data == prog);
5144 _mesa_hash_table_remove(ht, he);
5145 prog->base.removed = true;
5146 simple_mtx_unlock(&prog->ctx->program_lock[idx]);
5147 util_queue_fence_wait(&prog->base.cache_fence);
5149 for (unsigned r = 0; r < ARRAY_SIZE(prog->pipelines); r++) {
5150 for (int i = 0; i < ARRAY_SIZE(prog->pipelines[0]); ++i) {
5151 hash_table_foreach(&prog->pipelines[r][i], entry) {
5152 struct zink_gfx_pipeline_cache_entry *pc_entry = entry->data;
5154 util_queue_fence_wait(&pc_entry->fence);
5160 while (util_dynarray_contains(&shader->pipeline_libs, struct zink_gfx_lib_cache*)) {
5161 struct zink_gfx_lib_cache *libs = util_dynarray_pop(&shader->pipeline_libs, struct zink_gfx_lib_cache*);
5162 //this condition is equivalent to verifying that, for each bit stages_present_i in stages_present,
5163 //stages_present_i implies libs->stages_present_i
5164 if ((stages_present & ~(libs->stages_present & stages_present)) != 0)
5166 if (!libs->removed) {
5167 libs->removed = true;
5168 simple_mtx_lock(&screen->pipeline_libs_lock[idx]);
5169 _mesa_set_remove_key(&screen->pipeline_libs[idx], libs);
5170 simple_mtx_unlock(&screen->pipeline_libs_lock[idx]);
5172 zink_gfx_lib_cache_unref(screen, libs);
5174 if (stage == MESA_SHADER_FRAGMENT || !shader->non_fs.is_generated) {
5175 prog->shaders[stage] = NULL;
5176 prog->stages_remaining &= ~BITFIELD_BIT(stage);
5178 /* only remove generated tcs during parent tes destruction */
5179 if (stage == MESA_SHADER_TESS_EVAL && shader->non_fs.generated_tcs)
5180 prog->shaders[MESA_SHADER_TESS_CTRL] = NULL;
5181 if (stage != MESA_SHADER_FRAGMENT &&
5182 prog->shaders[MESA_SHADER_GEOMETRY] &&
5183 prog->shaders[MESA_SHADER_GEOMETRY]->non_fs.parent ==
5185 prog->shaders[MESA_SHADER_GEOMETRY] = NULL;
5187 zink_gfx_program_reference(screen, &prog, NULL);
5189 if (shader->info.stage == MESA_SHADER_TESS_EVAL &&
5190 shader->non_fs.generated_tcs) {
5191 /* automatically destroy generated tcs shaders when tes is destroyed */
5192 zink_gfx_shader_free(screen, shader->non_fs.generated_tcs);
5193 shader->non_fs.generated_tcs = NULL;
5195 for (unsigned int i = 0; i < ARRAY_SIZE(shader->non_fs.generated_gs); i++) {
5196 for (int j = 0; j < ARRAY_SIZE(shader->non_fs.generated_gs[0]); j++) {
5197 if (shader->info.stage != MESA_SHADER_FRAGMENT &&
5198 shader->non_fs.generated_gs[i][j]) {
5199 /* automatically destroy generated gs shaders when owner is destroyed */
5200 zink_gfx_shader_free(screen, shader->non_fs.generated_gs[i][j]);
5201 shader->non_fs.generated_gs[i][j] = NULL;
5205 zink_shader_free(screen, shader);
5209 struct zink_shader_object
5210 zink_shader_tcs_compile(struct zink_screen *screen, struct zink_shader *zs, unsigned patch_vertices, bool can_shobj, struct zink_program *pg)
5212 assert(zs->info.stage == MESA_SHADER_TESS_CTRL);
5213 /* shortcut all the nir passes since we just have to change this one word */
5214 zs->spirv->words[zs->spirv->tcs_vertices_out_word] = patch_vertices;
5215 return zink_shader_spirv_compile(screen, zs, NULL, can_shobj, pg);
5218 /* creating a passthrough tcs shader that's roughly:
5221 #extension GL_ARB_tessellation_shader : require
5223 in vec4 some_var[gl_MaxPatchVertices];
5224 out vec4 some_var_out;
5226 layout(push_constant) uniform tcsPushConstants {
5227 layout(offset = 0) float TessLevelInner[2];
5228 layout(offset = 8) float TessLevelOuter[4];
5229 } u_tcsPushConstants;
5230 layout(vertices = $vertices_per_patch) out;
5233 gl_TessLevelInner = u_tcsPushConstants.TessLevelInner;
5234 gl_TessLevelOuter = u_tcsPushConstants.TessLevelOuter;
5235 some_var_out = some_var[gl_InvocationID];
5239 struct zink_shader *
5240 zink_shader_tcs_create(struct zink_screen *screen, nir_shader *tes, unsigned vertices_per_patch, nir_shader **nir_ret)
5242 struct zink_shader *ret = rzalloc(NULL, struct zink_shader);
5243 util_queue_fence_init(&ret->precompile.fence);
5244 ret->hash = _mesa_hash_pointer(ret);
5245 ret->programs = _mesa_pointer_set_create(NULL);
5246 simple_mtx_init(&ret->lock, mtx_plain);
5248 nir_shader *nir = nir_shader_create(NULL, MESA_SHADER_TESS_CTRL, &screen->nir_options, NULL);
5249 nir_function *fn = nir_function_create(nir, "main");
5250 fn->is_entrypoint = true;
5251 nir_function_impl *impl = nir_function_impl_create(fn);
5253 nir_builder b = nir_builder_at(nir_before_block(nir_start_block(impl)));
5255 nir_ssa_def *invocation_id = nir_load_invocation_id(&b);
5257 nir_foreach_shader_in_variable(var, tes) {
5258 if (var->data.location == VARYING_SLOT_TESS_LEVEL_INNER || var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER)
5260 const struct glsl_type *in_type = var->type;
5261 const struct glsl_type *out_type = var->type;
5263 snprintf(buf, sizeof(buf), "%s_out", var->name);
5264 if (!nir_is_arrayed_io(var, MESA_SHADER_TESS_EVAL)) {
5265 const struct glsl_type *type = var->type;
5266 in_type = glsl_array_type(type, 32 /* MAX_PATCH_VERTICES */, 0);
5267 out_type = glsl_array_type(type, vertices_per_patch, 0);
5270 nir_variable *in = nir_variable_create(nir, nir_var_shader_in, in_type, var->name);
5271 nir_variable *out = nir_variable_create(nir, nir_var_shader_out, out_type, buf);
5272 out->data.location = in->data.location = var->data.location;
5273 out->data.location_frac = in->data.location_frac = var->data.location_frac;
5275 /* gl_in[] receives values from equivalent built-in output
5276 variables written by the vertex shader (section 2.14.7). Each array
5277 element of gl_in[] is a structure holding values for a specific vertex of
5278 the input patch. The length of gl_in[] is equal to the
5279 implementation-dependent maximum patch size (gl_MaxPatchVertices).
5280 - ARB_tessellation_shader
5282 /* we need to load the invocation-specific value of the vertex output and then store it to the per-patch output */
5283 nir_deref_instr *in_value = nir_build_deref_array(&b, nir_build_deref_var(&b, in), invocation_id);
5284 nir_deref_instr *out_value = nir_build_deref_array(&b, nir_build_deref_var(&b, out), invocation_id);
5285 copy_vars(&b, out_value, in_value);
5287 nir_variable *gl_TessLevelInner = nir_variable_create(nir, nir_var_shader_out, glsl_array_type(glsl_float_type(), 2, 0), "gl_TessLevelInner");
5288 gl_TessLevelInner->data.location = VARYING_SLOT_TESS_LEVEL_INNER;
5289 gl_TessLevelInner->data.patch = 1;
5290 nir_variable *gl_TessLevelOuter = nir_variable_create(nir, nir_var_shader_out, glsl_array_type(glsl_float_type(), 4, 0), "gl_TessLevelOuter");
5291 gl_TessLevelOuter->data.location = VARYING_SLOT_TESS_LEVEL_OUTER;
5292 gl_TessLevelOuter->data.patch = 1;
5294 create_gfx_pushconst(nir);
5296 nir_ssa_def *load_inner = nir_load_push_constant(&b, 2, 32,
5297 nir_imm_int(&b, ZINK_GFX_PUSHCONST_DEFAULT_INNER_LEVEL),
5298 .base = 1, .range = 8);
5299 nir_ssa_def *load_outer = nir_load_push_constant(&b, 4, 32,
5300 nir_imm_int(&b, ZINK_GFX_PUSHCONST_DEFAULT_OUTER_LEVEL),
5301 .base = 2, .range = 16);
5303 for (unsigned i = 0; i < 2; i++) {
5304 nir_deref_instr *store_idx = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gl_TessLevelInner), i);
5305 nir_store_deref(&b, store_idx, nir_channel(&b, load_inner, i), 0xff);
5307 for (unsigned i = 0; i < 4; i++) {
5308 nir_deref_instr *store_idx = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gl_TessLevelOuter), i);
5309 nir_store_deref(&b, store_idx, nir_channel(&b, load_outer, i), 0xff);
5312 nir->info.tess.tcs_vertices_out = vertices_per_patch;
5313 nir_validate_shader(nir, "created");
5315 optimize_nir(nir, NULL);
5316 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
5317 NIR_PASS_V(nir, nir_convert_from_ssa, true);
5320 zink_shader_serialize_blob(nir, &ret->blob);
5321 memcpy(&ret->info, &nir->info, sizeof(nir->info));
5322 ret->non_fs.is_generated = true;
5327 zink_shader_has_cubes(nir_shader *nir)
5329 nir_foreach_variable_with_modes(var, nir, nir_var_uniform) {
5330 const struct glsl_type *type = glsl_without_array(var->type);
5331 if (glsl_type_is_sampler(type) && glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE)
5338 zink_shader_blob_deserialize(struct zink_screen *screen, struct blob *blob)
5340 struct blob_reader blob_reader;
5341 blob_reader_init(&blob_reader, blob->data, blob->size);
5342 return nir_deserialize(NULL, &screen->nir_options, &blob_reader);
5346 zink_shader_deserialize(struct zink_screen *screen, struct zink_shader *zs)
5348 return zink_shader_blob_deserialize(screen, &zs->blob);
5352 zink_shader_serialize_blob(nir_shader *nir, struct blob *blob)
5356 bool strip = !(zink_debug & (ZINK_DEBUG_NIR | ZINK_DEBUG_SPIRV | ZINK_DEBUG_TGSI));
5360 nir_serialize(blob, nir, strip);
5364 zink_print_shader(struct zink_screen *screen, struct zink_shader *zs, FILE *fp)
5366 nir_shader *nir = zink_shader_deserialize(screen, zs);
5367 nir_print_shader(nir, fp);