2 * Copyright 2018 Collabora Ltd.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
24 #include "nir_opcodes.h"
25 #include "zink_context.h"
26 #include "zink_compiler.h"
27 #include "zink_descriptors.h"
28 #include "zink_program.h"
29 #include "zink_screen.h"
30 #include "nir_to_spirv/nir_to_spirv.h"
32 #include "pipe/p_state.h"
35 #include "nir_xfb_info.h"
36 #include "nir/nir_draw_helpers.h"
37 #include "compiler/nir/nir_builder.h"
38 #include "compiler/nir/nir_serialize.h"
39 #include "compiler/nir/nir_builtin_builder.h"
41 #include "nir/tgsi_to_nir.h"
42 #include "tgsi/tgsi_dump.h"
43 #include "tgsi/tgsi_from_mesa.h"
45 #include "util/u_memory.h"
47 #include "compiler/spirv/nir_spirv.h"
48 #include "vulkan/util/vk_util.h"
51 zink_lower_cubemap_to_array(nir_shader *s, uint32_t nonseamless_cube_mask);
55 copy_vars(nir_builder *b, nir_deref_instr *dst, nir_deref_instr *src)
57 assert(glsl_get_bare_type(dst->type) == glsl_get_bare_type(src->type));
58 if (glsl_type_is_struct(dst->type)) {
59 for (unsigned i = 0; i < glsl_get_length(dst->type); ++i) {
60 copy_vars(b, nir_build_deref_struct(b, dst, i), nir_build_deref_struct(b, src, i));
62 } else if (glsl_type_is_array_or_matrix(dst->type)) {
63 unsigned count = glsl_type_is_array(dst->type) ? glsl_array_size(dst->type) : glsl_get_matrix_columns(dst->type);
64 for (unsigned i = 0; i < count; i++) {
65 copy_vars(b, nir_build_deref_array_imm(b, dst, i), nir_build_deref_array_imm(b, src, i));
68 nir_ssa_def *load = nir_load_deref(b, src);
69 nir_store_deref(b, dst, load, BITFIELD_MASK(load->num_components));
73 #define SIZEOF_FIELD(type, field) sizeof(((type *)0)->field)
76 create_gfx_pushconst(nir_shader *nir)
78 #define PUSHCONST_MEMBER(member_idx, field) \
79 fields[member_idx].type = \
80 glsl_array_type(glsl_uint_type(), SIZEOF_FIELD(struct zink_gfx_push_constant, field) / sizeof(uint32_t), 0); \
81 fields[member_idx].name = ralloc_asprintf(nir, #field); \
82 fields[member_idx].offset = offsetof(struct zink_gfx_push_constant, field);
84 nir_variable *pushconst;
85 /* create compatible layout for the ntv push constant loader */
86 struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, ZINK_GFX_PUSHCONST_MAX);
87 PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_DRAW_MODE_IS_INDEXED, draw_mode_is_indexed);
88 PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_DRAW_ID, draw_id);
89 PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_FRAMEBUFFER_IS_LAYERED, framebuffer_is_layered);
90 PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_DEFAULT_INNER_LEVEL, default_inner_level);
91 PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_DEFAULT_OUTER_LEVEL, default_outer_level);
92 PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_LINE_STIPPLE_PATTERN, line_stipple_pattern);
93 PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_VIEWPORT_SCALE, viewport_scale);
94 PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_LINE_WIDTH, line_width);
96 pushconst = nir_variable_create(nir, nir_var_mem_push_const,
97 glsl_struct_type(fields, ZINK_GFX_PUSHCONST_MAX, "struct", false),
99 pushconst->data.location = INT_MAX; //doesn't really matter
101 #undef PUSHCONST_MEMBER
105 lower_64bit_vertex_attribs_instr(nir_builder *b, nir_instr *instr, void *data)
107 if (instr->type != nir_instr_type_intrinsic)
109 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
110 if (intr->intrinsic != nir_intrinsic_load_deref)
112 nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(intr->src[0].ssa->parent_instr));
113 if (var->data.mode != nir_var_shader_in)
115 if (!glsl_type_is_64bit(var->type) || !glsl_type_is_vector(var->type) || glsl_get_vector_elements(var->type) < 3)
118 /* create second variable for the split */
119 nir_variable *var2 = nir_variable_clone(var, b->shader);
120 /* split new variable into second slot */
121 var2->data.driver_location++;
122 nir_shader_add_variable(b->shader, var2);
124 unsigned total_num_components = glsl_get_vector_elements(var->type);
125 /* new variable is the second half of the dvec */
126 var2->type = glsl_vector_type(glsl_get_base_type(var->type), glsl_get_vector_elements(var->type) - 2);
127 /* clamp original variable to a dvec2 */
128 var->type = glsl_vector_type(glsl_get_base_type(var->type), 2);
130 b->cursor = nir_after_instr(instr);
132 /* this is the first load instruction for the first half of the dvec3/4 components */
133 nir_ssa_def *load = nir_load_var(b, var);
134 /* this is the second load instruction for the second half of the dvec3/4 components */
135 nir_ssa_def *load2 = nir_load_var(b, var2);
138 /* create a new dvec3/4 comprised of all the loaded components from both variables */
139 def[0] = nir_vector_extract(b, load, nir_imm_int(b, 0));
140 def[1] = nir_vector_extract(b, load, nir_imm_int(b, 1));
141 def[2] = nir_vector_extract(b, load2, nir_imm_int(b, 0));
142 if (total_num_components == 4)
143 def[3] = nir_vector_extract(b, load2, nir_imm_int(b, 1));
144 nir_ssa_def *new_vec = nir_vec(b, def, total_num_components);
145 /* use the assembled dvec3/4 for all other uses of the load */
146 nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, new_vec,
147 new_vec->parent_instr);
149 /* remove the original instr and its deref chain */
150 nir_instr *parent = intr->src[0].ssa->parent_instr;
151 nir_instr_remove(instr);
152 nir_deref_instr_remove_if_unused(nir_instr_as_deref(parent));
157 /* mesa/gallium always provides UINT versions of 64bit formats:
158 * - rewrite loads as 32bit vec loads
159 * - cast back to 64bit
162 lower_64bit_uint_attribs_instr(nir_builder *b, nir_instr *instr, void *data)
164 if (instr->type != nir_instr_type_intrinsic)
166 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
167 if (intr->intrinsic != nir_intrinsic_load_deref)
169 nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(intr->src[0].ssa->parent_instr));
170 if (var->data.mode != nir_var_shader_in)
172 if (glsl_get_bit_size(var->type) != 64 || glsl_get_base_type(var->type) >= GLSL_TYPE_SAMPLER)
175 unsigned num_components = glsl_get_vector_elements(var->type);
176 enum glsl_base_type base_type;
177 switch (glsl_get_base_type(var->type)) {
178 case GLSL_TYPE_UINT64:
179 base_type = GLSL_TYPE_UINT;
181 case GLSL_TYPE_INT64:
182 base_type = GLSL_TYPE_INT;
184 case GLSL_TYPE_DOUBLE:
185 base_type = GLSL_TYPE_FLOAT;
188 unreachable("unknown 64-bit vertex attribute format!");
190 var->type = glsl_vector_type(base_type, num_components * 2);
192 b->cursor = nir_after_instr(instr);
194 nir_ssa_def *load = nir_load_var(b, var);
195 nir_ssa_def *casted[2];
196 for (unsigned i = 0; i < num_components; i++)
197 casted[i] = nir_pack_64_2x32(b, nir_channels(b, load, BITFIELD_RANGE(i * 2, 2)));
198 nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_vec(b, casted, num_components));
200 /* remove the original instr and its deref chain */
201 nir_instr *parent = intr->src[0].ssa->parent_instr;
202 nir_instr_remove(instr);
203 nir_deref_instr_remove_if_unused(nir_instr_as_deref(parent));
208 /* "64-bit three- and four-component vectors consume two consecutive locations."
209 * - 14.1.4. Location Assignment
211 * this pass splits dvec3 and dvec4 vertex inputs into a dvec2 and a double/dvec2 which
212 * are assigned to consecutive locations, loaded separately, and then assembled back into a
213 * composite value that's used in place of the original loaded ssa src
216 lower_64bit_vertex_attribs(nir_shader *shader)
218 if (shader->info.stage != MESA_SHADER_VERTEX)
221 bool progress = nir_shader_instructions_pass(shader, lower_64bit_vertex_attribs_instr, nir_metadata_dominance, NULL);
222 progress |= nir_shader_instructions_pass(shader, lower_64bit_uint_attribs_instr, nir_metadata_dominance, NULL);
227 lower_basevertex_instr(nir_builder *b, nir_instr *in, void *data)
229 if (in->type != nir_instr_type_intrinsic)
231 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
232 if (instr->intrinsic != nir_intrinsic_load_base_vertex)
235 b->cursor = nir_after_instr(&instr->instr);
236 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant);
237 load->src[0] = nir_src_for_ssa(nir_imm_int(b, ZINK_GFX_PUSHCONST_DRAW_MODE_IS_INDEXED));
238 nir_intrinsic_set_range(load, 4);
239 load->num_components = 1;
240 nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "draw_mode_is_indexed");
241 nir_builder_instr_insert(b, &load->instr);
243 nir_ssa_def *composite = nir_build_alu(b, nir_op_bcsel,
244 nir_build_alu(b, nir_op_ieq, &load->dest.ssa, nir_imm_int(b, 1), NULL, NULL),
249 nir_ssa_def_rewrite_uses_after(&instr->dest.ssa, composite,
250 composite->parent_instr);
255 lower_basevertex(nir_shader *shader)
257 if (shader->info.stage != MESA_SHADER_VERTEX)
260 if (!BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX))
263 return nir_shader_instructions_pass(shader, lower_basevertex_instr, nir_metadata_dominance, NULL);
268 lower_drawid_instr(nir_builder *b, nir_instr *in, void *data)
270 if (in->type != nir_instr_type_intrinsic)
272 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
273 if (instr->intrinsic != nir_intrinsic_load_draw_id)
276 b->cursor = nir_before_instr(&instr->instr);
277 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant);
278 load->src[0] = nir_src_for_ssa(nir_imm_int(b, ZINK_GFX_PUSHCONST_DRAW_ID));
279 nir_intrinsic_set_range(load, 4);
280 load->num_components = 1;
281 nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "draw_id");
282 nir_builder_instr_insert(b, &load->instr);
284 nir_ssa_def_rewrite_uses(&instr->dest.ssa, &load->dest.ssa);
290 lower_drawid(nir_shader *shader)
292 if (shader->info.stage != MESA_SHADER_VERTEX)
295 if (!BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_DRAW_ID))
298 return nir_shader_instructions_pass(shader, lower_drawid_instr, nir_metadata_dominance, NULL);
301 struct lower_gl_point_state {
302 nir_variable *gl_pos_out;
303 nir_variable *gl_point_size;
307 lower_gl_point_gs_instr(nir_builder *b, nir_instr *instr, void *data)
309 struct lower_gl_point_state *state = data;
310 nir_ssa_def *vp_scale, *pos;
312 if (instr->type != nir_instr_type_intrinsic)
315 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
316 if (intrin->intrinsic != nir_intrinsic_emit_vertex_with_counter &&
317 intrin->intrinsic != nir_intrinsic_emit_vertex)
320 if (nir_intrinsic_stream_id(intrin) != 0)
323 if (intrin->intrinsic == nir_intrinsic_end_primitive_with_counter ||
324 intrin->intrinsic == nir_intrinsic_end_primitive) {
325 nir_instr_remove(&intrin->instr);
329 b->cursor = nir_before_instr(instr);
331 // viewport-map endpoints
332 nir_ssa_def *vp_const_pos = nir_imm_int(b, ZINK_GFX_PUSHCONST_VIEWPORT_SCALE);
333 vp_scale = nir_load_push_constant(b, 2, 32, vp_const_pos, .base = 1, .range = 2);
335 // Load point info values
336 nir_ssa_def *point_size = nir_load_var(b, state->gl_point_size);
337 nir_ssa_def *point_pos = nir_load_var(b, state->gl_pos_out);
339 // w_delta = gl_point_size / width_viewport_size_scale * gl_Position.w
340 nir_ssa_def *w_delta = nir_fdiv(b, point_size, nir_channel(b, vp_scale, 0));
341 w_delta = nir_fmul(b, w_delta, nir_channel(b, point_pos, 3));
342 // halt_w_delta = w_delta / 2
343 nir_ssa_def *half_w_delta = nir_fmul(b, w_delta, nir_imm_float(b, 0.5));
345 // h_delta = gl_point_size / height_viewport_size_scale * gl_Position.w
346 nir_ssa_def *h_delta = nir_fdiv(b, point_size, nir_channel(b, vp_scale, 1));
347 h_delta = nir_fmul(b, h_delta, nir_channel(b, point_pos, 3));
348 // halt_h_delta = h_delta / 2
349 nir_ssa_def *half_h_delta = nir_fmul(b, h_delta, nir_imm_float(b, 0.5));
351 nir_ssa_def *point_dir[4][2] = {
352 { nir_imm_float(b, -1), nir_imm_float(b, -1) },
353 { nir_imm_float(b, -1), nir_imm_float(b, 1) },
354 { nir_imm_float(b, 1), nir_imm_float(b, -1) },
355 { nir_imm_float(b, 1), nir_imm_float(b, 1) }
358 nir_ssa_def *point_pos_x = nir_channel(b, point_pos, 0);
359 nir_ssa_def *point_pos_y = nir_channel(b, point_pos, 1);
361 for (size_t i = 0; i < 4; i++) {
363 nir_ffma(b, half_w_delta, point_dir[i][0], point_pos_x),
364 nir_ffma(b, half_h_delta, point_dir[i][1], point_pos_y),
365 nir_channel(b, point_pos, 2),
366 nir_channel(b, point_pos, 3));
368 nir_store_var(b, state->gl_pos_out, pos, 0xf);
373 nir_end_primitive(b);
375 nir_instr_remove(&intrin->instr);
381 lower_gl_point_gs(nir_shader *shader)
383 struct lower_gl_point_state state;
386 shader->info.gs.output_primitive = SHADER_PRIM_TRIANGLE_STRIP;
387 shader->info.gs.vertices_out *= 4;
389 // Gets the gl_Position in and out
391 nir_find_variable_with_location(shader, nir_var_shader_out,
393 state.gl_point_size =
394 nir_find_variable_with_location(shader, nir_var_shader_out,
397 // if position in or gl_PointSize aren't written, we have nothing to do
398 if (!state.gl_pos_out || !state.gl_point_size)
401 nir_function_impl *entry = nir_shader_get_entrypoint(shader);
402 nir_builder_init(&b, entry);
403 b.cursor = nir_before_cf_list(&entry->body);
405 return nir_shader_instructions_pass(shader, lower_gl_point_gs_instr,
406 nir_metadata_dominance, &state);
409 struct lower_pv_mode_state {
410 nir_variable *varyings[VARYING_SLOT_MAX];
411 nir_variable *pos_counter;
412 nir_variable *out_pos_counter;
413 nir_variable *ring_offset;
415 unsigned primitive_vert_count;
420 lower_pv_mode_gs_ring_index(nir_builder *b,
421 struct lower_pv_mode_state *state,
424 nir_ssa_def *ring_offset = nir_load_var(b, state->ring_offset);
425 return nir_imod(b, nir_iadd(b, index, ring_offset),
426 nir_imm_int(b, state->ring_size));
430 lower_pv_mode_gs_store(nir_builder *b,
431 nir_intrinsic_instr *intrin,
432 struct lower_pv_mode_state *state)
434 b->cursor = nir_before_instr(&intrin->instr);
435 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
436 if (nir_deref_mode_is(deref, nir_var_shader_out)) {
437 nir_variable *var = nir_deref_instr_get_variable(deref);
439 gl_varying_slot location = var->data.location;
440 assert(state->varyings[location]);
441 assert(intrin->src[1].is_ssa);
442 nir_ssa_def *pos_counter = nir_load_var(b, state->pos_counter);
443 nir_ssa_def *index = lower_pv_mode_gs_ring_index(b, state, pos_counter);
444 nir_store_array_var(b, state->varyings[location],
445 index, intrin->src[1].ssa,
446 nir_intrinsic_write_mask(intrin));
447 nir_instr_remove(&intrin->instr);
455 lower_pv_mode_emit_rotated_prim(nir_builder *b,
456 struct lower_pv_mode_state *state,
457 nir_ssa_def *current_vertex)
459 nir_ssa_def *two = nir_imm_int(b, 2);
460 nir_ssa_def *three = nir_imm_int(b, 3);
461 bool is_triangle = state->primitive_vert_count == 3;
462 /* This shader will always see the last three vertices emitted by the user gs.
463 * The following table is used to to rotate primitives within a strip generated
464 * by the user gs such that the last vertex becomes the first.
466 * [lines, tris][even/odd index][vertex mod 3]
468 static const unsigned vert_maps[2][2][3] = {
469 {{1, 0, 0}, {1, 0, 0}},
470 {{2, 0, 1}, {2, 1, 0}}
472 /* When the primive supplied to the gs comes from a strip, the last provoking vertex
473 * is either the last or the second, depending on whether the triangle is at an odd
474 * or even position within the strip.
476 * odd or even primitive within draw
478 nir_ssa_def *odd_prim = nir_imod(b, nir_load_primitive_id(b), two);
479 for (unsigned i = 0; i < state->primitive_vert_count; i++) {
480 /* odd or even triangle within strip emitted by user GS
481 * this is handled using the table
483 nir_ssa_def *odd_user_prim = nir_imod(b, current_vertex, two);
484 unsigned offset_even = vert_maps[is_triangle][0][i];
485 unsigned offset_odd = vert_maps[is_triangle][1][i];
486 nir_ssa_def *offset_even_value = nir_imm_int(b, offset_even);
487 nir_ssa_def *offset_odd_value = nir_imm_int(b, offset_odd);
488 nir_ssa_def *rotated_i = nir_bcsel(b, nir_b2b1(b, odd_user_prim),
489 offset_odd_value, offset_even_value);
490 /* Here we account for how triangles are provided to the gs from a strip.
491 * For even primitives we rotate by 3, meaning we do nothing.
492 * For odd primitives we rotate by 2, combined with the previous rotation this
493 * means the second vertex becomes the last.
495 if (state->prim == ZINK_PVE_PRIMITIVE_TRISTRIP)
496 rotated_i = nir_imod(b, nir_iadd(b, rotated_i,
500 /* Triangles that come from fans are provided to the gs the same way as
501 * odd triangles from a strip so always rotate by 2.
503 else if (state->prim == ZINK_PVE_PRIMITIVE_FAN)
504 rotated_i = nir_imod(b, nir_iadd_imm(b, rotated_i, 2),
506 rotated_i = nir_iadd(b, rotated_i, current_vertex);
507 nir_foreach_variable_with_modes(var, b->shader, nir_var_shader_out) {
508 gl_varying_slot location = var->data.location;
509 if (state->varyings[location]) {
510 nir_ssa_def *index = lower_pv_mode_gs_ring_index(b, state, rotated_i);
511 nir_deref_instr *value = nir_build_deref_array(b, nir_build_deref_var(b, state->varyings[location]), index);
512 copy_vars(b, nir_build_deref_var(b, var), value);
520 lower_pv_mode_gs_emit_vertex(nir_builder *b,
521 nir_intrinsic_instr *intrin,
522 struct lower_pv_mode_state *state)
524 b->cursor = nir_before_instr(&intrin->instr);
526 // increment pos_counter
527 nir_ssa_def *pos_counter = nir_load_var(b, state->pos_counter);
528 nir_store_var(b, state->pos_counter, nir_iadd_imm(b, pos_counter, 1), 1);
530 nir_instr_remove(&intrin->instr);
535 lower_pv_mode_gs_end_primitive(nir_builder *b,
536 nir_intrinsic_instr *intrin,
537 struct lower_pv_mode_state *state)
539 b->cursor = nir_before_instr(&intrin->instr);
541 nir_ssa_def *pos_counter = nir_load_var(b, state->pos_counter);
544 nir_ssa_def *out_pos_counter = nir_load_var(b, state->out_pos_counter);
545 nir_push_if(b, nir_ilt(b, nir_isub(b, pos_counter, out_pos_counter),
546 nir_imm_int(b, state->primitive_vert_count)));
547 nir_jump(b, nir_jump_break);
550 lower_pv_mode_emit_rotated_prim(b, state, out_pos_counter);
551 nir_end_primitive(b);
553 nir_store_var(b, state->out_pos_counter, nir_iadd_imm(b, out_pos_counter, 1), 1);
555 nir_pop_loop(b, NULL);
556 /* Set the ring offset such that when position 0 is
557 * read we get the last value written
559 nir_store_var(b, state->ring_offset, pos_counter, 1);
560 nir_store_var(b, state->pos_counter, nir_imm_int(b, 0), 1);
561 nir_store_var(b, state->out_pos_counter, nir_imm_int(b, 0), 1);
563 nir_instr_remove(&intrin->instr);
568 lower_pv_mode_gs_instr(nir_builder *b, nir_instr *instr, void *data)
570 if (instr->type != nir_instr_type_intrinsic)
573 struct lower_pv_mode_state *state = data;
574 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
576 switch (intrin->intrinsic) {
577 case nir_intrinsic_store_deref:
578 return lower_pv_mode_gs_store(b, intrin, state);
579 case nir_intrinsic_copy_deref:
580 unreachable("should be lowered");
581 case nir_intrinsic_emit_vertex_with_counter:
582 case nir_intrinsic_emit_vertex:
583 return lower_pv_mode_gs_emit_vertex(b, intrin, state);
584 case nir_intrinsic_end_primitive:
585 case nir_intrinsic_end_primitive_with_counter:
586 return lower_pv_mode_gs_end_primitive(b, intrin, state);
593 lower_pv_mode_vertices_for_prim(enum shader_prim prim)
596 case SHADER_PRIM_POINTS:
598 case SHADER_PRIM_LINE_STRIP:
600 case SHADER_PRIM_TRIANGLE_STRIP:
603 unreachable("unsupported primitive for gs output");
608 lower_pv_mode_gs(nir_shader *shader, unsigned prim)
611 struct lower_pv_mode_state state;
612 memset(state.varyings, 0, sizeof(state.varyings));
614 nir_function_impl *entry = nir_shader_get_entrypoint(shader);
615 nir_builder_init(&b, entry);
616 b.cursor = nir_before_cf_list(&entry->body);
618 state.primitive_vert_count =
619 lower_pv_mode_vertices_for_prim(shader->info.gs.output_primitive);
620 state.ring_size = shader->info.gs.vertices_out;
622 nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) {
623 gl_varying_slot location = var->data.location;
626 snprintf(name, sizeof(name), "__tmp_primverts_%d", location);
627 state.varyings[location] =
628 nir_local_variable_create(entry,
629 glsl_array_type(var->type,
635 state.pos_counter = nir_local_variable_create(entry,
639 state.out_pos_counter = nir_local_variable_create(entry,
641 "__out_pos_counter");
643 state.ring_offset = nir_local_variable_create(entry,
649 // initialize pos_counter and out_pos_counter
650 nir_store_var(&b, state.pos_counter, nir_imm_int(&b, 0), 1);
651 nir_store_var(&b, state.out_pos_counter, nir_imm_int(&b, 0), 1);
652 nir_store_var(&b, state.ring_offset, nir_imm_int(&b, 0), 1);
654 shader->info.gs.vertices_out = (shader->info.gs.vertices_out -
655 (state.primitive_vert_count - 1)) *
656 state.primitive_vert_count;
657 return nir_shader_instructions_pass(shader, lower_pv_mode_gs_instr,
658 nir_metadata_dominance, &state);
661 struct lower_line_stipple_state {
662 nir_variable *pos_out;
663 nir_variable *stipple_out;
664 nir_variable *prev_pos;
665 nir_variable *pos_counter;
666 nir_variable *stipple_counter;
667 bool line_rectangular;
671 viewport_map(nir_builder *b, nir_ssa_def *vert,
674 nir_ssa_def *w_recip = nir_frcp(b, nir_channel(b, vert, 3));
675 nir_ssa_def *ndc_point = nir_fmul(b, nir_channels(b, vert, 0x3),
677 return nir_fmul(b, ndc_point, scale);
681 lower_line_stipple_gs_instr(nir_builder *b, nir_instr *instr, void *data)
683 struct lower_line_stipple_state *state = data;
684 if (instr->type != nir_instr_type_intrinsic)
687 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
688 if (intrin->intrinsic != nir_intrinsic_emit_vertex_with_counter &&
689 intrin->intrinsic != nir_intrinsic_emit_vertex)
692 b->cursor = nir_before_instr(instr);
694 nir_push_if(b, nir_ine_imm(b, nir_load_var(b, state->pos_counter), 0));
695 // viewport-map endpoints
696 nir_ssa_def *vp_scale = nir_load_push_constant(b, 2, 32,
697 nir_imm_int(b, ZINK_GFX_PUSHCONST_VIEWPORT_SCALE),
700 nir_ssa_def *prev = nir_load_var(b, state->prev_pos);
701 nir_ssa_def *curr = nir_load_var(b, state->pos_out);
702 prev = viewport_map(b, prev, vp_scale);
703 curr = viewport_map(b, curr, vp_scale);
705 // calculate length of line
707 if (state->line_rectangular)
708 len = nir_fast_distance(b, prev, curr);
710 nir_ssa_def *diff = nir_fabs(b, nir_fsub(b, prev, curr));
711 len = nir_fmax(b, nir_channel(b, diff, 0), nir_channel(b, diff, 1));
713 // update stipple_counter
714 nir_store_var(b, state->stipple_counter,
715 nir_fadd(b, nir_load_var(b, state->stipple_counter),
719 nir_copy_var(b, state->stipple_out, state->stipple_counter);
720 nir_copy_var(b, state->prev_pos, state->pos_out);
722 // update prev_pos and pos_counter for next vertex
723 b->cursor = nir_after_instr(instr);
724 nir_store_var(b, state->pos_counter,
725 nir_iadd_imm(b, nir_load_var(b, state->pos_counter),
732 lower_line_stipple_gs(nir_shader *shader, bool line_rectangular)
735 struct lower_line_stipple_state state;
738 nir_find_variable_with_location(shader, nir_var_shader_out,
741 // if position isn't written, we have nothing to do
745 state.stipple_out = nir_variable_create(shader, nir_var_shader_out,
748 state.stipple_out->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
749 state.stipple_out->data.driver_location = shader->num_outputs++;
750 state.stipple_out->data.location = MAX2(util_last_bit64(shader->info.outputs_written), VARYING_SLOT_VAR0);
751 shader->info.outputs_written |= BITFIELD64_BIT(state.stipple_out->data.location);
753 // create temp variables
754 state.prev_pos = nir_variable_create(shader, nir_var_shader_temp,
757 state.pos_counter = nir_variable_create(shader, nir_var_shader_temp,
760 state.stipple_counter = nir_variable_create(shader, nir_var_shader_temp,
762 "__stipple_counter");
764 state.line_rectangular = line_rectangular;
765 // initialize pos_counter and stipple_counter
766 nir_function_impl *entry = nir_shader_get_entrypoint(shader);
767 nir_builder_init(&b, entry);
768 b.cursor = nir_before_cf_list(&entry->body);
769 nir_store_var(&b, state.pos_counter, nir_imm_int(&b, 0), 1);
770 nir_store_var(&b, state.stipple_counter, nir_imm_float(&b, 0), 1);
772 return nir_shader_instructions_pass(shader, lower_line_stipple_gs_instr,
773 nir_metadata_dominance, &state);
777 lower_line_stipple_fs(nir_shader *shader)
780 nir_function_impl *entry = nir_shader_get_entrypoint(shader);
781 nir_builder_init(&b, entry);
783 // create stipple counter
784 nir_variable *stipple = nir_variable_create(shader, nir_var_shader_in,
787 stipple->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
788 stipple->data.driver_location = shader->num_inputs++;
789 stipple->data.location = MAX2(util_last_bit64(shader->info.inputs_read), VARYING_SLOT_VAR0);
790 shader->info.inputs_read |= BITFIELD64_BIT(stipple->data.location);
792 nir_variable *sample_mask_out =
793 nir_find_variable_with_location(shader, nir_var_shader_out,
794 FRAG_RESULT_SAMPLE_MASK);
795 if (!sample_mask_out) {
796 sample_mask_out = nir_variable_create(shader, nir_var_shader_out,
797 glsl_uint_type(), "sample_mask");
798 sample_mask_out->data.driver_location = shader->num_outputs++;
799 sample_mask_out->data.location = FRAG_RESULT_SAMPLE_MASK;
802 b.cursor = nir_after_cf_list(&entry->body);
804 nir_ssa_def *pattern = nir_load_push_constant(&b, 1, 32,
805 nir_imm_int(&b, ZINK_GFX_PUSHCONST_LINE_STIPPLE_PATTERN),
807 nir_ssa_def *factor = nir_i2f32(&b, nir_ishr_imm(&b, pattern, 16));
808 pattern = nir_iand_imm(&b, pattern, 0xffff);
810 nir_ssa_def *sample_mask_in = nir_load_sample_mask_in(&b);
811 nir_variable *v = nir_local_variable_create(entry, glsl_uint_type(), NULL);
812 nir_variable *sample_mask = nir_local_variable_create(entry, glsl_uint_type(), NULL);
813 nir_store_var(&b, v, sample_mask_in, 1);
814 nir_store_var(&b, sample_mask, sample_mask_in, 1);
817 nir_ssa_def *value = nir_load_var(&b, v);
818 nir_ssa_def *index = nir_ufind_msb(&b, value);
819 nir_ssa_def *index_mask = nir_ishl(&b, nir_imm_int(&b, 1), index);
820 nir_ssa_def *new_value = nir_ixor(&b, value, index_mask);
821 nir_store_var(&b, v, new_value, 1);
822 nir_push_if(&b, nir_ieq_imm(&b, value, 0));
823 nir_jump(&b, nir_jump_break);
824 nir_pop_if(&b, NULL);
826 nir_ssa_def *stipple_pos =
827 nir_interp_deref_at_sample(&b, 1, 32,
828 &nir_build_deref_var(&b, stipple)->dest.ssa, index);
829 stipple_pos = nir_fmod(&b, nir_fdiv(&b, stipple_pos, factor),
830 nir_imm_float(&b, 16.0));
831 stipple_pos = nir_f2i32(&b, stipple_pos);
833 nir_iand_imm(&b, nir_ishr(&b, pattern, stipple_pos), 1);
834 nir_push_if(&b, nir_ieq_imm(&b, bit, 0));
836 nir_ssa_def *value = nir_load_var(&b, sample_mask);
837 value = nir_ixor(&b, value, index_mask);
838 nir_store_var(&b, sample_mask, value, 1);
840 nir_pop_if(&b, NULL);
842 nir_pop_loop(&b, NULL);
843 nir_store_var(&b, sample_mask_out, nir_load_var(&b, sample_mask), 1);
848 struct lower_line_smooth_state {
849 nir_variable *pos_out;
850 nir_variable *line_coord_out;
851 nir_variable *prev_pos;
852 nir_variable *pos_counter;
853 nir_variable *prev_varyings[VARYING_SLOT_MAX][4],
854 *varyings[VARYING_SLOT_MAX][4]; // location_frac
858 lower_line_smooth_gs_store(nir_builder *b,
859 nir_intrinsic_instr *intrin,
860 struct lower_line_smooth_state *state)
862 b->cursor = nir_before_instr(&intrin->instr);
863 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
864 if (nir_deref_mode_is(deref, nir_var_shader_out)) {
865 nir_variable *var = nir_deref_instr_get_variable(deref);
867 // we take care of position elsewhere
868 gl_varying_slot location = var->data.location;
869 unsigned location_frac = var->data.location_frac;
870 if (location != VARYING_SLOT_POS) {
871 assert(state->varyings[location]);
872 assert(intrin->src[1].is_ssa);
873 nir_store_var(b, state->varyings[location][location_frac],
875 nir_intrinsic_write_mask(intrin));
876 nir_instr_remove(&intrin->instr);
885 lower_line_smooth_gs_emit_vertex(nir_builder *b,
886 nir_intrinsic_instr *intrin,
887 struct lower_line_smooth_state *state)
889 b->cursor = nir_before_instr(&intrin->instr);
891 nir_push_if(b, nir_ine_imm(b, nir_load_var(b, state->pos_counter), 0));
892 nir_ssa_def *vp_scale = nir_load_push_constant(b, 2, 32,
893 nir_imm_int(b, ZINK_GFX_PUSHCONST_VIEWPORT_SCALE),
896 nir_ssa_def *prev = nir_load_var(b, state->prev_pos);
897 nir_ssa_def *curr = nir_load_var(b, state->pos_out);
898 nir_ssa_def *prev_vp = viewport_map(b, prev, vp_scale);
899 nir_ssa_def *curr_vp = viewport_map(b, curr, vp_scale);
901 nir_ssa_def *width = nir_load_push_constant(b, 1, 32,
902 nir_imm_int(b, ZINK_GFX_PUSHCONST_LINE_WIDTH),
904 nir_ssa_def *half_width = nir_fadd_imm(b, nir_fmul_imm(b, width, 0.5), 0.5);
906 const unsigned yx[2] = { 1, 0 };
907 nir_ssa_def *vec = nir_fsub(b, curr_vp, prev_vp);
908 nir_ssa_def *len = nir_fast_length(b, vec);
909 nir_ssa_def *dir = nir_normalize(b, vec);
910 nir_ssa_def *half_length = nir_fmul_imm(b, len, 0.5);
911 half_length = nir_fadd_imm(b, half_length, 0.5);
913 nir_ssa_def *vp_scale_rcp = nir_frcp(b, vp_scale);
914 nir_ssa_def *tangent =
917 nir_swizzle(b, dir, yx, 2),
918 nir_imm_vec2(b, 1.0, -1.0)),
920 tangent = nir_fmul(b, tangent, half_width);
921 tangent = nir_pad_vector_imm_int(b, tangent, 0, 4);
922 dir = nir_fmul_imm(b, nir_fmul(b, dir, vp_scale_rcp), 0.5);
924 nir_ssa_def *line_offets[8] = {
925 nir_fadd(b, tangent, nir_fneg(b, dir)),
926 nir_fadd(b, nir_fneg(b, tangent), nir_fneg(b, dir)),
928 nir_fneg(b, tangent),
930 nir_fneg(b, tangent),
931 nir_fadd(b, tangent, dir),
932 nir_fadd(b, nir_fneg(b, tangent), dir),
934 nir_ssa_def *line_coord =
935 nir_vec4(b, half_width, half_width, half_length, half_length);
936 nir_ssa_def *line_coords[8] = {
937 nir_fmul(b, line_coord, nir_imm_vec4(b, -1, 1, -1, 1)),
938 nir_fmul(b, line_coord, nir_imm_vec4(b, 1, 1, -1, 1)),
939 nir_fmul(b, line_coord, nir_imm_vec4(b, -1, 1, 0, 1)),
940 nir_fmul(b, line_coord, nir_imm_vec4(b, 1, 1, 0, 1)),
941 nir_fmul(b, line_coord, nir_imm_vec4(b, -1, 1, 0, 1)),
942 nir_fmul(b, line_coord, nir_imm_vec4(b, 1, 1, 0, 1)),
943 nir_fmul(b, line_coord, nir_imm_vec4(b, -1, 1, 1, 1)),
944 nir_fmul(b, line_coord, nir_imm_vec4(b, 1, 1, 1, 1)),
947 /* emit first end-cap, and start line */
948 for (int i = 0; i < 4; ++i) {
949 nir_foreach_variable_with_modes(var, b->shader, nir_var_shader_out) {
950 gl_varying_slot location = var->data.location;
951 unsigned location_frac = var->data.location_frac;
952 if (state->prev_varyings[location][location_frac])
953 nir_copy_var(b, var, state->prev_varyings[location][location_frac]);
955 nir_store_var(b, state->pos_out,
956 nir_fadd(b, prev, nir_fmul(b, line_offets[i],
957 nir_channel(b, prev, 3))), 0xf);
958 nir_store_var(b, state->line_coord_out, line_coords[i], 0xf);
962 /* finish line and emit last end-cap */
963 for (int i = 4; i < 8; ++i) {
964 nir_foreach_variable_with_modes(var, b->shader, nir_var_shader_out) {
965 gl_varying_slot location = var->data.location;
966 unsigned location_frac = var->data.location_frac;
967 if (state->varyings[location][location_frac])
968 nir_copy_var(b, var, state->varyings[location][location_frac]);
970 nir_store_var(b, state->pos_out,
971 nir_fadd(b, curr, nir_fmul(b, line_offets[i],
972 nir_channel(b, curr, 3))), 0xf);
973 nir_store_var(b, state->line_coord_out, line_coords[i], 0xf);
976 nir_end_primitive(b);
980 nir_copy_var(b, state->prev_pos, state->pos_out);
981 nir_foreach_variable_with_modes(var, b->shader, nir_var_shader_out) {
982 gl_varying_slot location = var->data.location;
983 unsigned location_frac = var->data.location_frac;
984 if (state->varyings[location][location_frac])
985 nir_copy_var(b, state->prev_varyings[location][location_frac], state->varyings[location][location_frac]);
988 // update prev_pos and pos_counter for next vertex
989 b->cursor = nir_after_instr(&intrin->instr);
990 nir_store_var(b, state->pos_counter,
991 nir_iadd_imm(b, nir_load_var(b, state->pos_counter),
994 nir_instr_remove(&intrin->instr);
999 lower_line_smooth_gs_end_primitive(nir_builder *b,
1000 nir_intrinsic_instr *intrin,
1001 struct lower_line_smooth_state *state)
1003 b->cursor = nir_before_instr(&intrin->instr);
1005 // reset line counter
1006 nir_store_var(b, state->pos_counter, nir_imm_int(b, 0), 1);
1008 nir_instr_remove(&intrin->instr);
1013 lower_line_smooth_gs_instr(nir_builder *b, nir_instr *instr, void *data)
1015 if (instr->type != nir_instr_type_intrinsic)
1018 struct lower_line_smooth_state *state = data;
1019 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1021 switch (intrin->intrinsic) {
1022 case nir_intrinsic_store_deref:
1023 return lower_line_smooth_gs_store(b, intrin, state);
1024 case nir_intrinsic_copy_deref:
1025 unreachable("should be lowered");
1026 case nir_intrinsic_emit_vertex_with_counter:
1027 case nir_intrinsic_emit_vertex:
1028 return lower_line_smooth_gs_emit_vertex(b, intrin, state);
1029 case nir_intrinsic_end_primitive:
1030 case nir_intrinsic_end_primitive_with_counter:
1031 return lower_line_smooth_gs_end_primitive(b, intrin, state);
1038 lower_line_smooth_gs(nir_shader *shader)
1041 struct lower_line_smooth_state state;
1043 memset(state.varyings, 0, sizeof(state.varyings));
1044 memset(state.prev_varyings, 0, sizeof(state.prev_varyings));
1045 nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) {
1046 gl_varying_slot location = var->data.location;
1047 unsigned location_frac = var->data.location_frac;
1048 if (location == VARYING_SLOT_POS)
1052 snprintf(name, sizeof(name), "__tmp_%d_%d", location, location_frac);
1053 state.varyings[location][location_frac] =
1054 nir_variable_create(shader, nir_var_shader_temp,
1057 snprintf(name, sizeof(name), "__tmp_prev_%d_%d", location, location_frac);
1058 state.prev_varyings[location][location_frac] =
1059 nir_variable_create(shader, nir_var_shader_temp,
1064 nir_find_variable_with_location(shader, nir_var_shader_out,
1067 // if position isn't written, we have nothing to do
1071 state.line_coord_out =
1072 nir_variable_create(shader, nir_var_shader_out, glsl_vec4_type(),
1074 state.line_coord_out->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
1075 state.line_coord_out->data.driver_location = shader->num_outputs++;
1076 state.line_coord_out->data.location = MAX2(util_last_bit64(shader->info.outputs_written), VARYING_SLOT_VAR0);
1077 shader->info.outputs_written |= BITFIELD64_BIT(state.line_coord_out->data.location);
1079 // create temp variables
1080 state.prev_pos = nir_variable_create(shader, nir_var_shader_temp,
1083 state.pos_counter = nir_variable_create(shader, nir_var_shader_temp,
1087 // initialize pos_counter
1088 nir_function_impl *entry = nir_shader_get_entrypoint(shader);
1089 nir_builder_init(&b, entry);
1090 b.cursor = nir_before_cf_list(&entry->body);
1091 nir_store_var(&b, state.pos_counter, nir_imm_int(&b, 0), 1);
1093 shader->info.gs.vertices_out = 8 * shader->info.gs.vertices_out;
1094 shader->info.gs.output_primitive = SHADER_PRIM_TRIANGLE_STRIP;
1096 return nir_shader_instructions_pass(shader, lower_line_smooth_gs_instr,
1097 nir_metadata_dominance, &state);
1101 lower_line_smooth_fs(nir_shader *shader, bool lower_stipple)
1106 nir_variable *stipple_counter = NULL, *stipple_pattern = NULL;
1107 if (lower_stipple) {
1108 stipple_counter = nir_variable_create(shader, nir_var_shader_in,
1111 stipple_counter->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
1112 stipple_counter->data.driver_location = shader->num_inputs++;
1113 stipple_counter->data.location =
1114 MAX2(util_last_bit64(shader->info.inputs_read), VARYING_SLOT_VAR0);
1115 shader->info.inputs_read |= BITFIELD64_BIT(stipple_counter->data.location);
1117 stipple_pattern = nir_variable_create(shader, nir_var_shader_temp,
1121 // initialize stipple_pattern
1122 nir_function_impl *entry = nir_shader_get_entrypoint(shader);
1123 nir_builder_init(&b, entry);
1124 b.cursor = nir_before_cf_list(&entry->body);
1125 nir_ssa_def *pattern = nir_load_push_constant(&b, 1, 32,
1126 nir_imm_int(&b, ZINK_GFX_PUSHCONST_LINE_STIPPLE_PATTERN),
1128 nir_store_var(&b, stipple_pattern, pattern, 1);
1131 nir_lower_aaline_fs(shader, &dummy, stipple_counter, stipple_pattern);
1136 lower_dual_blend(nir_shader *shader)
1138 bool progress = false;
1139 nir_variable *var = nir_find_variable_with_location(shader, nir_var_shader_out, FRAG_RESULT_DATA1);
1141 var->data.location = FRAG_RESULT_DATA0;
1142 var->data.index = 1;
1145 nir_shader_preserve_all_metadata(shader);
1150 lower_64bit_pack_instr(nir_builder *b, nir_instr *instr, void *data)
1152 if (instr->type != nir_instr_type_alu)
1154 nir_alu_instr *alu_instr = (nir_alu_instr *) instr;
1155 if (alu_instr->op != nir_op_pack_64_2x32 &&
1156 alu_instr->op != nir_op_unpack_64_2x32)
1158 b->cursor = nir_before_instr(&alu_instr->instr);
1159 nir_ssa_def *src = nir_ssa_for_alu_src(b, alu_instr, 0);
1161 switch (alu_instr->op) {
1162 case nir_op_pack_64_2x32:
1163 dest = nir_pack_64_2x32_split(b, nir_channel(b, src, 0), nir_channel(b, src, 1));
1165 case nir_op_unpack_64_2x32:
1166 dest = nir_vec2(b, nir_unpack_64_2x32_split_x(b, src), nir_unpack_64_2x32_split_y(b, src));
1169 unreachable("Impossible opcode");
1171 nir_ssa_def_rewrite_uses(&alu_instr->dest.dest.ssa, dest);
1172 nir_instr_remove(&alu_instr->instr);
1177 lower_64bit_pack(nir_shader *shader)
1179 return nir_shader_instructions_pass(shader, lower_64bit_pack_instr,
1180 nir_metadata_block_index | nir_metadata_dominance, NULL);
1184 zink_create_quads_emulation_gs(const nir_shader_compiler_options *options,
1185 const nir_shader *prev_stage,
1186 int last_pv_vert_offset)
1188 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY,
1192 nir_shader *nir = b.shader;
1193 nir->info.gs.input_primitive = SHADER_PRIM_LINES_ADJACENCY;
1194 nir->info.gs.output_primitive = SHADER_PRIM_TRIANGLE_STRIP;
1195 nir->info.gs.vertices_in = 4;
1196 nir->info.gs.vertices_out = 6;
1197 nir->info.gs.invocations = 1;
1198 nir->info.gs.active_stream_mask = 1;
1200 nir->info.has_transform_feedback_varyings = prev_stage->info.has_transform_feedback_varyings;
1201 memcpy(nir->info.xfb_stride, prev_stage->info.xfb_stride, sizeof(prev_stage->info.xfb_stride));
1202 if (prev_stage->xfb_info) {
1203 nir->xfb_info = mem_dup(prev_stage->xfb_info, sizeof(nir_xfb_info));
1206 nir_variable *in_vars[VARYING_SLOT_MAX];
1207 nir_variable *out_vars[VARYING_SLOT_MAX];
1208 unsigned num_vars = 0;
1210 /* Create input/output variables. */
1211 nir_foreach_shader_out_variable(var, prev_stage) {
1212 assert(!var->data.patch);
1216 snprintf(name, sizeof(name), "in_%s", var->name);
1218 snprintf(name, sizeof(name), "in_%d", var->data.driver_location);
1220 nir_variable *in = nir_variable_clone(var, nir);
1221 ralloc_free(in->name);
1222 in->name = ralloc_strdup(in, name);
1223 in->type = glsl_array_type(var->type, 4, false);
1224 in->data.mode = nir_var_shader_in;
1225 nir_shader_add_variable(nir, in);
1228 snprintf(name, sizeof(name), "out_%s", var->name);
1230 snprintf(name, sizeof(name), "out_%d", var->data.driver_location);
1232 nir_variable *out = nir_variable_clone(var, nir);
1233 ralloc_free(out->name);
1234 out->name = ralloc_strdup(out, name);
1235 out->data.mode = nir_var_shader_out;
1236 nir_shader_add_variable(nir, out);
1238 in_vars[num_vars] = in;
1239 out_vars[num_vars++] = out;
1242 int mapping_first[] = {0, 1, 2, 0, 2, 3};
1243 int mapping_last[] = {0, 1, 3, 1, 2, 3};
1244 nir_ssa_def *last_pv_vert_def = nir_load_ubo(&b, 1, 32,
1245 nir_imm_int(&b, 0), nir_imm_int(&b, last_pv_vert_offset),
1246 .align_mul = 4, .align_offset = 0, .range_base = 0, .range = ~0);
1247 last_pv_vert_def = nir_ine_imm(&b, last_pv_vert_def, 0);
1248 for (unsigned i = 0; i < 6; ++i) {
1249 /* swap indices 2 and 3 */
1250 nir_ssa_def *idx = nir_bcsel(&b, last_pv_vert_def,
1251 nir_imm_int(&b, mapping_last[i]),
1252 nir_imm_int(&b, mapping_first[i]));
1253 /* Copy inputs to outputs. */
1254 for (unsigned j = 0; j < num_vars; ++j) {
1255 if (in_vars[j]->data.location == VARYING_SLOT_EDGE) {
1258 nir_deref_instr *in_value = nir_build_deref_array(&b, nir_build_deref_var(&b, in_vars[j]), idx);
1259 copy_vars(&b, nir_build_deref_var(&b, out_vars[j]), in_value);
1261 nir_emit_vertex(&b, 0);
1263 nir_end_primitive(&b, 0);
1266 nir_end_primitive(&b, 0);
1267 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
1268 nir_validate_shader(nir, "in zink_create_quads_emulation_gs");
1273 zink_screen_init_compiler(struct zink_screen *screen)
1275 static const struct nir_shader_compiler_options
1277 .lower_ffma16 = true,
1278 .lower_ffma32 = true,
1279 .lower_ffma64 = true,
1282 .lower_flrp32 = true,
1285 .lower_extract_byte = true,
1286 .lower_extract_word = true,
1287 .lower_insert_byte = true,
1288 .lower_insert_word = true,
1290 /* We can only support 32-bit ldexp, but NIR doesn't have a flag
1291 * distinguishing 64-bit ldexp support (radeonsi *does* support 64-bit
1292 * ldexp, so we don't just always lower it in NIR). Given that ldexp is
1293 * effectively unused (no instances in shader-db), it's not worth the
1296 .lower_ldexp = true,
1298 .lower_mul_high = true,
1299 .lower_rotate = true,
1300 .lower_uadd_carry = true,
1301 .lower_usub_borrow = true,
1302 .lower_uadd_sat = true,
1303 .lower_usub_sat = true,
1304 .lower_vector_cmp = true,
1305 .lower_int64_options = 0,
1306 .lower_doubles_options = 0,
1307 .lower_uniforms_to_ubo = true,
1311 .lower_mul_2x32_64 = true,
1312 .support_16bit_alu = true, /* not quite what it sounds like */
1313 .max_unroll_iterations = 0,
1316 screen->nir_options = default_options;
1318 if (!screen->info.feats.features.shaderInt64)
1319 screen->nir_options.lower_int64_options = ~0;
1321 if (!screen->info.feats.features.shaderFloat64) {
1322 screen->nir_options.lower_doubles_options = ~0;
1323 screen->nir_options.lower_flrp64 = true;
1324 screen->nir_options.lower_ffma64 = true;
1325 /* soft fp64 function inlining will blow up loop bodies and effectively
1326 * stop Vulkan drivers from unrolling the loops.
1328 screen->nir_options.max_unroll_iterations_fp64 = 32;
1332 The OpFRem and OpFMod instructions use cheap approximations of remainder,
1333 and the error can be large due to the discontinuity in trunc() and floor().
1334 This can produce mathematically unexpected results in some cases, such as
1335 FMod(x,x) computing x rather than 0, and can also cause the result to have
1336 a different sign than the infinitely precise result.
1338 -Table 84. Precision of core SPIR-V Instructions
1339 * for drivers that are known to have imprecise fmod for doubles, lower dmod
1341 if (screen->info.driver_props.driverID == VK_DRIVER_ID_MESA_RADV ||
1342 screen->info.driver_props.driverID == VK_DRIVER_ID_AMD_OPEN_SOURCE ||
1343 screen->info.driver_props.driverID == VK_DRIVER_ID_AMD_PROPRIETARY)
1344 screen->nir_options.lower_doubles_options = nir_lower_dmod;
1348 zink_get_compiler_options(struct pipe_screen *pscreen,
1349 enum pipe_shader_ir ir,
1350 gl_shader_stage shader)
1352 assert(ir == PIPE_SHADER_IR_NIR);
1353 return &zink_screen(pscreen)->nir_options;
1357 zink_tgsi_to_nir(struct pipe_screen *screen, const struct tgsi_token *tokens)
1359 if (zink_debug & ZINK_DEBUG_TGSI) {
1360 fprintf(stderr, "TGSI shader:\n---8<---\n");
1361 tgsi_dump_to_file(tokens, 0, stderr);
1362 fprintf(stderr, "---8<---\n\n");
1365 return tgsi_to_nir(tokens, screen, false);
1370 dest_is_64bit(nir_dest *dest, void *state)
1372 bool *lower = (bool *)state;
1373 if (dest && (nir_dest_bit_size(*dest) == 64)) {
1381 src_is_64bit(nir_src *src, void *state)
1383 bool *lower = (bool *)state;
1384 if (src && (nir_src_bit_size(*src) == 64)) {
1392 filter_64_bit_instr(const nir_instr *const_instr, UNUSED const void *data)
1395 /* lower_alu_to_scalar required nir_instr to be const, but nir_foreach_*
1396 * doesn't have const variants, so do the ugly const_cast here. */
1397 nir_instr *instr = (nir_instr *)const_instr;
1399 nir_foreach_dest(instr, dest_is_64bit, &lower);
1402 nir_foreach_src(instr, src_is_64bit, &lower);
1407 filter_pack_instr(const nir_instr *const_instr, UNUSED const void *data)
1409 nir_instr *instr = (nir_instr *)const_instr;
1410 nir_alu_instr *alu = nir_instr_as_alu(instr);
1412 case nir_op_pack_64_2x32_split:
1413 case nir_op_pack_32_2x16_split:
1414 case nir_op_unpack_32_2x16_split_x:
1415 case nir_op_unpack_32_2x16_split_y:
1416 case nir_op_unpack_64_2x32_split_x:
1417 case nir_op_unpack_64_2x32_split_y:
1427 nir_variable *uniforms[5];
1428 nir_variable *ubo[5];
1429 nir_variable *ssbo[5];
1431 uint32_t first_ssbo;
1434 static struct bo_vars
1435 get_bo_vars(struct zink_shader *zs, nir_shader *shader)
1438 memset(&bo, 0, sizeof(bo));
1440 bo.first_ubo = ffs(zs->ubos_used & ~BITFIELD_BIT(0)) - 2;
1441 assert(bo.first_ssbo < PIPE_MAX_CONSTANT_BUFFERS);
1443 bo.first_ssbo = ffs(zs->ssbos_used) - 1;
1444 assert(bo.first_ssbo < PIPE_MAX_SHADER_BUFFERS);
1445 nir_foreach_variable_with_modes(var, shader, nir_var_mem_ssbo | nir_var_mem_ubo) {
1446 unsigned idx = glsl_get_explicit_stride(glsl_get_struct_field(glsl_without_array(var->type), 0)) >> 1;
1447 if (var->data.mode == nir_var_mem_ssbo) {
1448 assert(!bo.ssbo[idx]);
1451 if (var->data.driver_location) {
1452 assert(!bo.ubo[idx]);
1455 assert(!bo.uniforms[idx]);
1456 bo.uniforms[idx] = var;
1464 bound_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
1466 struct bo_vars *bo = data;
1467 if (instr->type != nir_instr_type_intrinsic)
1469 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1470 nir_variable *var = NULL;
1471 nir_ssa_def *offset = NULL;
1472 bool is_load = true;
1473 b->cursor = nir_before_instr(instr);
1475 switch (intr->intrinsic) {
1476 case nir_intrinsic_store_ssbo:
1477 var = bo->ssbo[nir_dest_bit_size(intr->dest) >> 4];
1478 offset = intr->src[2].ssa;
1481 case nir_intrinsic_load_ssbo:
1482 var = bo->ssbo[nir_dest_bit_size(intr->dest) >> 4];
1483 offset = intr->src[1].ssa;
1485 case nir_intrinsic_load_ubo:
1486 if (nir_src_is_const(intr->src[0]) && nir_src_as_const_value(intr->src[0])->u32 == 0)
1487 var = bo->uniforms[nir_dest_bit_size(intr->dest) >> 4];
1489 var = bo->ubo[nir_dest_bit_size(intr->dest) >> 4];
1490 offset = intr->src[1].ssa;
1495 nir_src offset_src = nir_src_for_ssa(offset);
1496 if (!nir_src_is_const(offset_src))
1499 unsigned offset_bytes = nir_src_as_const_value(offset_src)->u32;
1500 const struct glsl_type *strct_type = glsl_get_array_element(var->type);
1501 unsigned size = glsl_array_size(glsl_get_struct_field(strct_type, 0));
1502 bool has_unsized = glsl_array_size(glsl_get_struct_field(strct_type, glsl_get_length(strct_type) - 1)) == 0;
1503 if (has_unsized || offset_bytes + intr->num_components - 1 < size)
1506 unsigned rewrites = 0;
1507 nir_ssa_def *result[2];
1508 for (unsigned i = 0; i < intr->num_components; i++) {
1509 if (offset_bytes + i >= size) {
1512 result[i] = nir_imm_zero(b, 1, nir_dest_bit_size(intr->dest));
1515 assert(rewrites == intr->num_components);
1517 nir_ssa_def *load = nir_vec(b, result, intr->num_components);
1518 nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
1520 nir_instr_remove(instr);
1525 bound_bo_access(nir_shader *shader, struct zink_shader *zs)
1527 struct bo_vars bo = get_bo_vars(zs, shader);
1528 return nir_shader_instructions_pass(shader, bound_bo_access_instr, nir_metadata_dominance, &bo);
1532 optimize_nir(struct nir_shader *s, struct zink_shader *zs)
1537 if (s->options->lower_int64_options)
1538 NIR_PASS_V(s, nir_lower_int64);
1539 if (s->options->lower_doubles_options & nir_lower_fp64_full_software)
1540 NIR_PASS_V(s, lower_64bit_pack);
1541 NIR_PASS_V(s, nir_lower_vars_to_ssa);
1542 NIR_PASS(progress, s, nir_lower_alu_to_scalar, filter_pack_instr, NULL);
1543 NIR_PASS(progress, s, nir_opt_copy_prop_vars);
1544 NIR_PASS(progress, s, nir_copy_prop);
1545 NIR_PASS(progress, s, nir_opt_remove_phis);
1546 if (s->options->lower_int64_options) {
1547 NIR_PASS(progress, s, nir_lower_64bit_phis);
1548 NIR_PASS(progress, s, nir_lower_alu_to_scalar, filter_64_bit_instr, NULL);
1550 NIR_PASS(progress, s, nir_opt_dce);
1551 NIR_PASS(progress, s, nir_opt_dead_cf);
1552 NIR_PASS(progress, s, nir_lower_phis_to_scalar, false);
1553 NIR_PASS(progress, s, nir_opt_cse);
1554 NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
1555 NIR_PASS(progress, s, nir_opt_algebraic);
1556 NIR_PASS(progress, s, nir_opt_constant_folding);
1557 NIR_PASS(progress, s, nir_opt_undef);
1558 NIR_PASS(progress, s, zink_nir_lower_b2b);
1560 NIR_PASS(progress, s, bound_bo_access, zs);
1565 NIR_PASS(progress, s, nir_opt_algebraic_late);
1567 NIR_PASS_V(s, nir_copy_prop);
1568 NIR_PASS_V(s, nir_opt_dce);
1569 NIR_PASS_V(s, nir_opt_cse);
1574 /* - copy the lowered fbfetch variable
1575 * - set the new one up as an input attachment for descriptor 0.6
1576 * - load it as an image
1577 * - overwrite the previous load
1580 lower_fbfetch_instr(nir_builder *b, nir_instr *instr, void *data)
1582 bool ms = data != NULL;
1583 if (instr->type != nir_instr_type_intrinsic)
1585 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1586 if (intr->intrinsic != nir_intrinsic_load_deref)
1588 nir_variable *var = nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));
1589 if (!var->data.fb_fetch_output)
1591 b->cursor = nir_after_instr(instr);
1592 nir_variable *fbfetch = nir_variable_clone(var, b->shader);
1593 /* If Dim is SubpassData, ... Image Format must be Unknown
1594 * - SPIRV OpTypeImage specification
1596 fbfetch->data.image.format = 0;
1597 fbfetch->data.index = 0; /* fix this if more than 1 fbfetch target is supported */
1598 fbfetch->data.mode = nir_var_uniform;
1599 fbfetch->data.binding = ZINK_FBFETCH_BINDING;
1600 fbfetch->data.binding = ZINK_FBFETCH_BINDING;
1601 fbfetch->data.sample = ms;
1602 enum glsl_sampler_dim dim = ms ? GLSL_SAMPLER_DIM_SUBPASS_MS : GLSL_SAMPLER_DIM_SUBPASS;
1603 fbfetch->type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
1604 nir_shader_add_variable(b->shader, fbfetch);
1605 nir_ssa_def *deref = &nir_build_deref_var(b, fbfetch)->dest.ssa;
1606 nir_ssa_def *sample = ms ? nir_load_sample_id(b) : nir_ssa_undef(b, 1, 32);
1607 nir_ssa_def *load = nir_image_deref_load(b, 4, 32, deref, nir_imm_vec4(b, 0, 0, 0, 1), sample, nir_imm_int(b, 0));
1608 nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
1613 lower_fbfetch(nir_shader *shader, nir_variable **fbfetch, bool ms)
1615 nir_foreach_shader_out_variable(var, shader) {
1616 if (var->data.fb_fetch_output) {
1624 return nir_shader_instructions_pass(shader, lower_fbfetch_instr, nir_metadata_dominance, (void*)ms);
1628 * Add a check for out of bounds LOD for every texel fetch op
1630 * - if (lod < query_levels(tex))
1633 * - res = (0, 0, 0, 1)
1636 lower_txf_lod_robustness_instr(nir_builder *b, nir_instr *in, void *data)
1638 if (in->type != nir_instr_type_tex)
1640 nir_tex_instr *txf = nir_instr_as_tex(in);
1641 if (txf->op != nir_texop_txf)
1644 b->cursor = nir_before_instr(in);
1645 int lod_idx = nir_tex_instr_src_index(txf, nir_tex_src_lod);
1646 assert(lod_idx >= 0);
1647 nir_src lod_src = txf->src[lod_idx].src;
1648 if (nir_src_is_const(lod_src) && nir_src_as_const_value(lod_src)->u32 == 0)
1651 assert(lod_src.is_ssa);
1652 nir_ssa_def *lod = lod_src.ssa;
1654 int offset_idx = nir_tex_instr_src_index(txf, nir_tex_src_texture_offset);
1655 int handle_idx = nir_tex_instr_src_index(txf, nir_tex_src_texture_handle);
1656 nir_tex_instr *levels = nir_tex_instr_create(b->shader,
1657 !!(offset_idx >= 0) + !!(handle_idx >= 0));
1658 levels->op = nir_texop_query_levels;
1659 levels->texture_index = txf->texture_index;
1660 levels->dest_type = nir_type_int | lod->bit_size;
1661 if (offset_idx >= 0) {
1662 levels->src[0].src_type = nir_tex_src_texture_offset;
1663 nir_src_copy(&levels->src[0].src, &txf->src[offset_idx].src, &levels->instr);
1665 if (handle_idx >= 0) {
1666 levels->src[!!(offset_idx >= 0)].src_type = nir_tex_src_texture_handle;
1667 nir_src_copy(&levels->src[!!(offset_idx >= 0)].src, &txf->src[handle_idx].src, &levels->instr);
1669 nir_ssa_dest_init(&levels->instr, &levels->dest,
1670 nir_tex_instr_dest_size(levels), 32, NULL);
1671 nir_builder_instr_insert(b, &levels->instr);
1673 nir_if *lod_oob_if = nir_push_if(b, nir_ilt(b, lod, &levels->dest.ssa));
1674 nir_tex_instr *new_txf = nir_instr_as_tex(nir_instr_clone(b->shader, in));
1675 nir_builder_instr_insert(b, &new_txf->instr);
1677 nir_if *lod_oob_else = nir_push_else(b, lod_oob_if);
1678 nir_const_value oob_values[4] = {0};
1679 unsigned bit_size = nir_alu_type_get_type_size(txf->dest_type);
1680 oob_values[3] = (txf->dest_type & nir_type_float) ?
1681 nir_const_value_for_float(1.0, bit_size) : nir_const_value_for_uint(1, bit_size);
1682 nir_ssa_def *oob_val = nir_build_imm(b, nir_tex_instr_dest_size(txf), bit_size, oob_values);
1684 nir_pop_if(b, lod_oob_else);
1685 nir_ssa_def *robust_txf = nir_if_phi(b, &new_txf->dest.ssa, oob_val);
1687 nir_ssa_def_rewrite_uses(&txf->dest.ssa, robust_txf);
1688 nir_instr_remove_v(in);
1692 /* This pass is used to workaround the lack of out of bounds LOD robustness
1693 * for texel fetch ops in VK_EXT_image_robustness.
1696 lower_txf_lod_robustness(nir_shader *shader)
1698 return nir_shader_instructions_pass(shader, lower_txf_lod_robustness_instr, nir_metadata_none, NULL);
1701 /* check for a genuine gl_PointSize output vs one from nir_lower_point_size_mov */
1703 check_psiz(struct nir_shader *s)
1705 bool have_psiz = false;
1706 nir_foreach_shader_out_variable(var, s) {
1707 if (var->data.location == VARYING_SLOT_PSIZ) {
1708 /* genuine PSIZ outputs will have this set */
1709 have_psiz |= !!var->data.explicit_location;
1715 static nir_variable *
1716 find_var_with_location_frac(nir_shader *nir, unsigned location, unsigned location_frac, bool have_psiz)
1718 assert((int)location >= 0);
1721 if (!location_frac && location != VARYING_SLOT_PSIZ) {
1722 nir_foreach_shader_out_variable(var, nir) {
1723 if (var->data.location == location)
1728 /* multiple variables found for this location: find the biggest one */
1729 nir_variable *out = NULL;
1731 nir_foreach_shader_out_variable(var, nir) {
1732 if (var->data.location == location) {
1733 unsigned count_slots = glsl_count_vec4_slots(var->type, false, false);
1734 if (count_slots > slots) {
1735 slots = count_slots;
1742 /* only one variable found or this is location_frac */
1743 nir_foreach_shader_out_variable(var, nir) {
1744 if (var->data.location == location &&
1745 (var->data.location_frac == location_frac ||
1746 (glsl_type_is_array(var->type) ? glsl_array_size(var->type) : glsl_get_vector_elements(var->type)) >= location_frac + 1)) {
1747 if (location != VARYING_SLOT_PSIZ || !have_psiz || var->data.explicit_location)
1756 is_inlined(const bool *inlined, const struct pipe_stream_output *output)
1758 for (unsigned i = 0; i < output->num_components; i++)
1759 if (!inlined[output->start_component + i])
1765 update_psiz_location(nir_shader *nir, nir_variable *psiz)
1767 uint32_t last_output = util_last_bit64(nir->info.outputs_written);
1768 if (last_output < VARYING_SLOT_VAR0)
1769 last_output = VARYING_SLOT_VAR0;
1772 /* this should get fixed up by slot remapping */
1773 psiz->data.location = last_output;
1776 static const struct glsl_type *
1777 clamp_slot_type(const struct glsl_type *type, unsigned slot)
1779 /* could be dvec/dmat/mat: each member is the same */
1780 const struct glsl_type *plain = glsl_without_array_or_matrix(type);
1781 /* determine size of each member type */
1782 unsigned slot_count = glsl_count_vec4_slots(plain, false, false);
1783 /* normalize slot idx to current type's size */
1785 unsigned slot_components = glsl_get_components(plain);
1786 if (glsl_base_type_is_64bit(glsl_get_base_type(plain)))
1787 slot_components *= 2;
1788 /* create a vec4 mask of the selected slot's components out of all the components */
1789 uint32_t mask = BITFIELD_MASK(slot_components) & BITFIELD_RANGE(slot * 4, 4);
1790 /* return a vecN of the selected components */
1791 slot_components = util_bitcount(mask);
1792 return glsl_vec_type(slot_components);
1795 static const struct glsl_type *
1796 unroll_struct_type(const struct glsl_type *slot_type, unsigned *slot_idx)
1798 const struct glsl_type *type = slot_type;
1799 unsigned slot_count = 0;
1800 unsigned cur_slot = 0;
1801 /* iterate over all the members in the struct, stopping once the slot idx is reached */
1802 for (unsigned i = 0; i < glsl_get_length(slot_type) && cur_slot <= *slot_idx; i++, cur_slot += slot_count) {
1803 /* use array type for slot counting but return array member type for unroll */
1804 const struct glsl_type *arraytype = glsl_get_struct_field(slot_type, i);
1805 type = glsl_without_array(arraytype);
1806 slot_count = glsl_count_vec4_slots(arraytype, false, false);
1808 *slot_idx -= (cur_slot - slot_count);
1809 if (!glsl_type_is_struct_or_ifc(type))
1810 /* this is a fully unrolled struct: find the number of vec components to output */
1811 type = clamp_slot_type(type, *slot_idx);
1816 get_slot_components(nir_variable *var, unsigned slot, unsigned so_slot)
1818 assert(var && slot < var->data.location + glsl_count_vec4_slots(var->type, false, false));
1819 const struct glsl_type *orig_type = var->type;
1820 const struct glsl_type *type = glsl_without_array(var->type);
1821 unsigned slot_idx = slot - so_slot;
1822 if (type != orig_type)
1823 slot_idx %= glsl_count_vec4_slots(type, false, false);
1824 /* need to find the vec4 that's being exported by this slot */
1825 while (glsl_type_is_struct_or_ifc(type))
1826 type = unroll_struct_type(type, &slot_idx);
1828 /* arrays here are already fully unrolled from their structs, so slot handling is implicit */
1829 unsigned num_components = glsl_get_components(glsl_without_array(type));
1830 /* special handling: clip/cull distance are arrays with vector semantics */
1831 if (var->data.location == VARYING_SLOT_CLIP_DIST0 || var->data.location == VARYING_SLOT_CULL_DIST0) {
1832 num_components = glsl_array_size(type);
1834 /* this is the second vec4 */
1835 num_components %= 4;
1837 /* this is the first vec4 */
1838 num_components = MIN2(num_components, 4);
1840 assert(num_components);
1841 /* gallium handles xfb in terms of 32bit units */
1842 if (glsl_base_type_is_64bit(glsl_get_base_type(glsl_without_array(type))))
1843 num_components *= 2;
1844 return num_components;
1847 static const struct pipe_stream_output *
1848 find_packed_output(const struct pipe_stream_output_info *so_info, uint8_t *reverse_map, unsigned slot)
1850 for (unsigned i = 0; i < so_info->num_outputs; i++) {
1851 const struct pipe_stream_output *packed_output = &so_info->output[i];
1852 if (reverse_map[packed_output->register_index] == slot)
1853 return packed_output;
1859 update_so_info(struct zink_shader *zs, nir_shader *nir, const struct pipe_stream_output_info *so_info,
1860 uint64_t outputs_written, bool have_psiz)
1862 uint8_t reverse_map[VARYING_SLOT_MAX] = {0};
1864 /* semi-copied from iris */
1865 while (outputs_written) {
1866 int bit = u_bit_scan64(&outputs_written);
1867 /* PSIZ from nir_lower_point_size_mov breaks stream output, so always skip it */
1868 if (bit == VARYING_SLOT_PSIZ && !have_psiz)
1870 reverse_map[slot++] = bit;
1873 bool have_fake_psiz = false;
1874 nir_foreach_shader_out_variable(var, nir) {
1875 if (var->data.location == VARYING_SLOT_PSIZ && !var->data.explicit_location)
1876 have_fake_psiz = true;
1879 bool inlined[VARYING_SLOT_MAX][4] = {0};
1880 uint64_t packed = 0;
1881 uint8_t packed_components[VARYING_SLOT_MAX] = {0};
1882 uint8_t packed_streams[VARYING_SLOT_MAX] = {0};
1883 uint8_t packed_buffers[VARYING_SLOT_MAX] = {0};
1884 uint16_t packed_offsets[VARYING_SLOT_MAX][4] = {0};
1885 nir_variable *psiz = NULL;
1886 for (unsigned i = 0; i < so_info->num_outputs; i++) {
1887 const struct pipe_stream_output *output = &so_info->output[i];
1888 unsigned slot = reverse_map[output->register_index];
1889 /* always set stride to be used during draw */
1890 zs->sinfo.so_info.stride[output->output_buffer] = so_info->stride[output->output_buffer];
1891 if (zs->info.stage != MESA_SHADER_GEOMETRY || util_bitcount(zs->info.gs.active_stream_mask) == 1) {
1892 nir_variable *var = NULL;
1895 var = find_var_with_location_frac(nir, slot--, output->start_component, have_psiz);
1896 if (var->data.location == VARYING_SLOT_PSIZ)
1899 slot = reverse_map[output->register_index];
1900 if (var->data.explicit_xfb_buffer) {
1901 /* handle dvec3 where gallium splits streamout over 2 registers */
1902 for (unsigned j = 0; j < output->num_components; j++)
1903 inlined[slot][output->start_component + j] = true;
1905 if (is_inlined(inlined[slot], output))
1907 bool is_struct = glsl_type_is_struct_or_ifc(glsl_without_array(var->type));
1908 unsigned num_components = get_slot_components(var, slot, so_slot);
1909 /* if this is the entire variable, try to blast it out during the initial declaration
1910 * structs must be handled later to ensure accurate analysis
1912 if (!is_struct && (num_components == output->num_components || (num_components > output->num_components && output->num_components == 4))) {
1913 var->data.explicit_xfb_buffer = 1;
1914 var->data.xfb.buffer = output->output_buffer;
1915 var->data.xfb.stride = so_info->stride[output->output_buffer] * 4;
1916 var->data.offset = output->dst_offset * 4;
1917 var->data.stream = output->stream;
1918 for (unsigned j = 0; j < output->num_components; j++)
1919 inlined[slot][output->start_component + j] = true;
1921 /* otherwise store some metadata for later */
1922 packed |= BITFIELD64_BIT(slot);
1923 packed_components[slot] += output->num_components;
1924 packed_streams[slot] |= BITFIELD_BIT(output->stream);
1925 packed_buffers[slot] |= BITFIELD_BIT(output->output_buffer);
1926 for (unsigned j = 0; j < output->num_components; j++)
1927 packed_offsets[output->register_index][j + output->start_component] = output->dst_offset + j;
1932 /* if this was flagged as a packed output before, and if all the components are
1933 * being output with the same stream on the same buffer with increasing offsets, this entire variable
1934 * can be consolidated into a single output to conserve locations
1936 for (unsigned i = 0; i < so_info->num_outputs; i++) {
1937 const struct pipe_stream_output *output = &so_info->output[i];
1938 unsigned slot = reverse_map[output->register_index];
1939 if (is_inlined(inlined[slot], output))
1941 if (zs->info.stage != MESA_SHADER_GEOMETRY || util_bitcount(zs->info.gs.active_stream_mask) == 1) {
1942 nir_variable *var = NULL;
1944 var = find_var_with_location_frac(nir, slot--, output->start_component, have_psiz);
1945 /* this is a lowered 64bit variable that can't be exported due to packing */
1946 if (var->data.is_xfb)
1949 unsigned num_slots = glsl_count_vec4_slots(var->type, false, false);
1950 /* for each variable, iterate over all the variable's slots and inline the outputs */
1951 for (unsigned j = 0; j < num_slots; j++) {
1952 slot = var->data.location + j;
1953 const struct pipe_stream_output *packed_output = find_packed_output(so_info, reverse_map, slot);
1957 /* if this slot wasn't packed or isn't in the same stream/buffer, skip consolidation */
1958 if (!(packed & BITFIELD64_BIT(slot)) ||
1959 util_bitcount(packed_streams[slot]) != 1 ||
1960 util_bitcount(packed_buffers[slot]) != 1)
1963 /* if all the components the variable exports to this slot aren't captured, skip consolidation */
1964 unsigned num_components = get_slot_components(var, slot, var->data.location);
1965 if (num_components != packed_components[slot])
1968 /* in order to pack the xfb output, all the offsets must be sequentially incrementing */
1969 uint32_t prev_offset = packed_offsets[packed_output->register_index][0];
1970 for (unsigned k = 1; k < num_components; k++) {
1971 /* if the offsets are not incrementing as expected, skip consolidation */
1972 if (packed_offsets[packed_output->register_index][k] != prev_offset + 1)
1974 prev_offset = packed_offsets[packed_output->register_index][k + packed_output->start_component];
1977 /* this output can be consolidated: blast out all the data inlined */
1978 var->data.explicit_xfb_buffer = 1;
1979 var->data.xfb.buffer = output->output_buffer;
1980 var->data.xfb.stride = so_info->stride[output->output_buffer] * 4;
1981 var->data.offset = output->dst_offset * 4;
1982 var->data.stream = output->stream;
1983 /* GLSL specifies that interface blocks are split per-buffer in XFB */
1984 if (glsl_type_is_array(var->type) && glsl_array_size(var->type) > 1 && glsl_type_is_interface(glsl_without_array(var->type)))
1985 zs->sinfo.so_propagate |= BITFIELD_BIT(var->data.location - VARYING_SLOT_VAR0);
1986 /* mark all slot components inlined to skip subsequent loop iterations */
1987 for (unsigned j = 0; j < num_slots; j++) {
1988 slot = var->data.location + j;
1989 for (unsigned k = 0; k < packed_components[slot]; k++)
1990 inlined[slot][k] = true;
1991 packed &= ~BITFIELD64_BIT(slot);
1996 /* these are packed/explicit varyings which can't be exported with normal output */
1997 zs->sinfo.so_info.output[zs->sinfo.so_info.num_outputs] = *output;
1998 /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */
1999 zs->sinfo.so_info_slots[zs->sinfo.so_info.num_outputs++] = reverse_map[output->register_index];
2001 zs->sinfo.have_xfb = zs->sinfo.so_info.num_outputs || zs->sinfo.so_propagate;
2002 /* ensure this doesn't get output in the shader by unsetting location */
2003 if (have_fake_psiz && psiz)
2004 update_psiz_location(nir, psiz);
2007 struct decompose_state {
2008 nir_variable **split;
2013 lower_attrib(nir_builder *b, nir_instr *instr, void *data)
2015 struct decompose_state *state = data;
2016 nir_variable **split = state->split;
2017 if (instr->type != nir_instr_type_intrinsic)
2019 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2020 if (intr->intrinsic != nir_intrinsic_load_deref)
2022 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
2023 nir_variable *var = nir_deref_instr_get_variable(deref);
2024 if (var != split[0])
2026 unsigned num_components = glsl_get_vector_elements(split[0]->type);
2027 b->cursor = nir_after_instr(instr);
2028 nir_ssa_def *loads[4];
2029 for (unsigned i = 0; i < (state->needs_w ? num_components - 1 : num_components); i++)
2030 loads[i] = nir_load_deref(b, nir_build_deref_var(b, split[i+1]));
2031 if (state->needs_w) {
2032 /* oob load w comopnent to get correct value for int/float */
2033 loads[3] = nir_channel(b, loads[0], 3);
2034 loads[0] = nir_channel(b, loads[0], 0);
2036 nir_ssa_def *new_load = nir_vec(b, loads, num_components);
2037 nir_ssa_def_rewrite_uses(&intr->dest.ssa, new_load);
2038 nir_instr_remove_v(instr);
2043 decompose_attribs(nir_shader *nir, uint32_t decomposed_attrs, uint32_t decomposed_attrs_without_w)
2046 nir_foreach_variable_with_modes(var, nir, nir_var_shader_in)
2047 bits |= BITFIELD_BIT(var->data.driver_location);
2049 u_foreach_bit(location, decomposed_attrs | decomposed_attrs_without_w) {
2050 nir_variable *split[5];
2051 struct decompose_state state;
2052 state.split = split;
2053 nir_variable *var = nir_find_variable_with_driver_location(nir, nir_var_shader_in, location);
2056 bits |= BITFIELD_BIT(var->data.driver_location);
2057 const struct glsl_type *new_type = glsl_type_is_scalar(var->type) ? var->type : glsl_get_array_element(var->type);
2058 unsigned num_components = glsl_get_vector_elements(var->type);
2059 state.needs_w = (decomposed_attrs_without_w & BITFIELD_BIT(location)) != 0 && num_components == 4;
2060 for (unsigned i = 0; i < (state.needs_w ? num_components - 1 : num_components); i++) {
2061 split[i+1] = nir_variable_clone(var, nir);
2062 split[i+1]->name = ralloc_asprintf(nir, "%s_split%u", var->name, i);
2063 if (decomposed_attrs_without_w & BITFIELD_BIT(location))
2064 split[i+1]->type = !i && num_components == 4 ? var->type : new_type;
2066 split[i+1]->type = new_type;
2067 split[i+1]->data.driver_location = ffs(bits) - 1;
2068 bits &= ~BITFIELD_BIT(split[i+1]->data.driver_location);
2069 nir_shader_add_variable(nir, split[i+1]);
2071 var->data.mode = nir_var_shader_temp;
2072 nir_shader_instructions_pass(nir, lower_attrib, nir_metadata_dominance, &state);
2074 nir_fixup_deref_modes(nir);
2075 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2076 optimize_nir(nir, NULL);
2081 rewrite_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
2083 struct zink_screen *screen = data;
2084 const bool has_int64 = screen->info.feats.features.shaderInt64;
2085 if (instr->type != nir_instr_type_intrinsic)
2087 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2088 b->cursor = nir_before_instr(instr);
2089 switch (intr->intrinsic) {
2090 case nir_intrinsic_ssbo_atomic_fadd:
2091 case nir_intrinsic_ssbo_atomic_add:
2092 case nir_intrinsic_ssbo_atomic_umin:
2093 case nir_intrinsic_ssbo_atomic_imin:
2094 case nir_intrinsic_ssbo_atomic_umax:
2095 case nir_intrinsic_ssbo_atomic_imax:
2096 case nir_intrinsic_ssbo_atomic_and:
2097 case nir_intrinsic_ssbo_atomic_or:
2098 case nir_intrinsic_ssbo_atomic_xor:
2099 case nir_intrinsic_ssbo_atomic_exchange:
2100 case nir_intrinsic_ssbo_atomic_comp_swap: {
2101 /* convert offset to uintN_t[idx] */
2102 nir_ssa_def *offset = nir_udiv_imm(b, intr->src[1].ssa, nir_dest_bit_size(intr->dest) / 8);
2103 nir_instr_rewrite_src_ssa(instr, &intr->src[1], offset);
2106 case nir_intrinsic_load_ssbo:
2107 case nir_intrinsic_load_ubo: {
2108 /* ubo0 can have unaligned 64bit loads, particularly for bindless texture ids */
2109 bool force_2x32 = intr->intrinsic == nir_intrinsic_load_ubo &&
2110 nir_src_is_const(intr->src[0]) &&
2111 nir_src_as_uint(intr->src[0]) == 0 &&
2112 nir_dest_bit_size(intr->dest) == 64 &&
2113 nir_intrinsic_align_offset(intr) % 8 != 0;
2114 force_2x32 |= nir_dest_bit_size(intr->dest) == 64 && !has_int64;
2115 nir_ssa_def *offset = nir_udiv_imm(b, intr->src[1].ssa, (force_2x32 ? 32 : nir_dest_bit_size(intr->dest)) / 8);
2116 nir_instr_rewrite_src_ssa(instr, &intr->src[1], offset);
2117 /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
2119 /* this is always scalarized */
2120 assert(intr->dest.ssa.num_components == 1);
2121 /* rewrite as 2x32 */
2122 nir_ssa_def *load[2];
2123 for (unsigned i = 0; i < 2; i++) {
2124 if (intr->intrinsic == nir_intrinsic_load_ssbo)
2125 load[i] = nir_load_ssbo(b, 1, 32, intr->src[0].ssa, nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0);
2127 load[i] = nir_load_ubo(b, 1, 32, intr->src[0].ssa, nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0, .range = 4);
2128 nir_intrinsic_set_access(nir_instr_as_intrinsic(load[i]->parent_instr), nir_intrinsic_access(intr));
2130 /* cast back to 64bit */
2131 nir_ssa_def *casted = nir_pack_64_2x32_split(b, load[0], load[1]);
2132 nir_ssa_def_rewrite_uses(&intr->dest.ssa, casted);
2133 nir_instr_remove(instr);
2137 case nir_intrinsic_load_shared:
2138 b->cursor = nir_before_instr(instr);
2139 bool force_2x32 = nir_dest_bit_size(intr->dest) == 64 && !has_int64;
2140 nir_ssa_def *offset = nir_udiv_imm(b, intr->src[0].ssa, (force_2x32 ? 32 : nir_dest_bit_size(intr->dest)) / 8);
2141 nir_instr_rewrite_src_ssa(instr, &intr->src[0], offset);
2142 /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
2144 /* this is always scalarized */
2145 assert(intr->dest.ssa.num_components == 1);
2146 /* rewrite as 2x32 */
2147 nir_ssa_def *load[2];
2148 for (unsigned i = 0; i < 2; i++)
2149 load[i] = nir_load_shared(b, 1, 32, nir_iadd_imm(b, intr->src[0].ssa, i), .align_mul = 4, .align_offset = 0);
2150 /* cast back to 64bit */
2151 nir_ssa_def *casted = nir_pack_64_2x32_split(b, load[0], load[1]);
2152 nir_ssa_def_rewrite_uses(&intr->dest.ssa, casted);
2153 nir_instr_remove(instr);
2157 case nir_intrinsic_store_ssbo: {
2158 b->cursor = nir_before_instr(instr);
2159 bool force_2x32 = nir_src_bit_size(intr->src[0]) == 64 && !has_int64;
2160 nir_ssa_def *offset = nir_udiv_imm(b, intr->src[2].ssa, (force_2x32 ? 32 : nir_src_bit_size(intr->src[0])) / 8);
2161 nir_instr_rewrite_src_ssa(instr, &intr->src[2], offset);
2162 /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
2164 /* this is always scalarized */
2165 assert(intr->src[0].ssa->num_components == 1);
2166 nir_ssa_def *vals[2] = {nir_unpack_64_2x32_split_x(b, intr->src[0].ssa), nir_unpack_64_2x32_split_y(b, intr->src[0].ssa)};
2167 for (unsigned i = 0; i < 2; i++)
2168 nir_store_ssbo(b, vals[i], intr->src[1].ssa, nir_iadd_imm(b, intr->src[2].ssa, i), .align_mul = 4, .align_offset = 0);
2169 nir_instr_remove(instr);
2173 case nir_intrinsic_store_shared: {
2174 b->cursor = nir_before_instr(instr);
2175 bool force_2x32 = nir_src_bit_size(intr->src[0]) == 64 && !has_int64;
2176 nir_ssa_def *offset = nir_udiv_imm(b, intr->src[1].ssa, (force_2x32 ? 32 : nir_src_bit_size(intr->src[0])) / 8);
2177 nir_instr_rewrite_src_ssa(instr, &intr->src[1], offset);
2178 /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
2179 if (nir_src_bit_size(intr->src[0]) == 64 && !has_int64) {
2180 /* this is always scalarized */
2181 assert(intr->src[0].ssa->num_components == 1);
2182 nir_ssa_def *vals[2] = {nir_unpack_64_2x32_split_x(b, intr->src[0].ssa), nir_unpack_64_2x32_split_y(b, intr->src[0].ssa)};
2183 for (unsigned i = 0; i < 2; i++)
2184 nir_store_shared(b, vals[i], nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0);
2185 nir_instr_remove(instr);
2196 rewrite_bo_access(nir_shader *shader, struct zink_screen *screen)
2198 return nir_shader_instructions_pass(shader, rewrite_bo_access_instr, nir_metadata_dominance, screen);
2201 static nir_variable *
2202 get_bo_var(nir_shader *shader, struct bo_vars *bo, bool ssbo, nir_src *src, unsigned bit_size)
2204 nir_variable *var, **ptr;
2205 unsigned idx = ssbo || (nir_src_is_const(*src) && !nir_src_as_uint(*src)) ? 0 : 1;
2208 ptr = &bo->ssbo[bit_size >> 4];
2211 ptr = &bo->uniforms[bit_size >> 4];
2213 ptr = &bo->ubo[bit_size >> 4];
2218 var = bo->ssbo[32 >> 4];
2221 var = bo->uniforms[32 >> 4];
2223 var = bo->ubo[32 >> 4];
2225 var = nir_variable_clone(var, shader);
2227 var->name = ralloc_asprintf(shader, "%s@%u", "ssbos", bit_size);
2229 var->name = ralloc_asprintf(shader, "%s@%u", idx ? "ubos" : "uniform_0", bit_size);
2231 nir_shader_add_variable(shader, var);
2233 struct glsl_struct_field *fields = rzalloc_array(shader, struct glsl_struct_field, 2);
2234 fields[0].name = ralloc_strdup(shader, "base");
2235 fields[1].name = ralloc_strdup(shader, "unsized");
2236 unsigned array_size = glsl_get_length(var->type);
2237 const struct glsl_type *bare_type = glsl_without_array(var->type);
2238 const struct glsl_type *array_type = glsl_get_struct_field(bare_type, 0);
2239 unsigned length = glsl_get_length(array_type);
2240 const struct glsl_type *type;
2241 const struct glsl_type *unsized = glsl_array_type(glsl_uintN_t_type(bit_size), 0, bit_size / 8);
2242 if (bit_size > 32) {
2243 assert(bit_size == 64);
2244 type = glsl_array_type(glsl_uintN_t_type(bit_size), length / 2, bit_size / 8);
2246 type = glsl_array_type(glsl_uintN_t_type(bit_size), length * (32 / bit_size), bit_size / 8);
2248 fields[0].type = type;
2249 fields[1].type = unsized;
2250 var->type = glsl_array_type(glsl_struct_type(fields, glsl_get_length(bare_type), "struct", false), array_size, 0);
2251 var->data.driver_location = idx;
2257 rewrite_atomic_ssbo_instr(nir_builder *b, nir_instr *instr, struct bo_vars *bo)
2259 nir_intrinsic_op op;
2260 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2261 switch (intr->intrinsic) {
2262 case nir_intrinsic_ssbo_atomic_fadd:
2263 op = nir_intrinsic_deref_atomic_fadd;
2265 case nir_intrinsic_ssbo_atomic_fmin:
2266 op = nir_intrinsic_deref_atomic_fmin;
2268 case nir_intrinsic_ssbo_atomic_fmax:
2269 op = nir_intrinsic_deref_atomic_fmax;
2271 case nir_intrinsic_ssbo_atomic_fcomp_swap:
2272 op = nir_intrinsic_deref_atomic_fcomp_swap;
2274 case nir_intrinsic_ssbo_atomic_add:
2275 op = nir_intrinsic_deref_atomic_add;
2277 case nir_intrinsic_ssbo_atomic_umin:
2278 op = nir_intrinsic_deref_atomic_umin;
2280 case nir_intrinsic_ssbo_atomic_imin:
2281 op = nir_intrinsic_deref_atomic_imin;
2283 case nir_intrinsic_ssbo_atomic_umax:
2284 op = nir_intrinsic_deref_atomic_umax;
2286 case nir_intrinsic_ssbo_atomic_imax:
2287 op = nir_intrinsic_deref_atomic_imax;
2289 case nir_intrinsic_ssbo_atomic_and:
2290 op = nir_intrinsic_deref_atomic_and;
2292 case nir_intrinsic_ssbo_atomic_or:
2293 op = nir_intrinsic_deref_atomic_or;
2295 case nir_intrinsic_ssbo_atomic_xor:
2296 op = nir_intrinsic_deref_atomic_xor;
2298 case nir_intrinsic_ssbo_atomic_exchange:
2299 op = nir_intrinsic_deref_atomic_exchange;
2301 case nir_intrinsic_ssbo_atomic_comp_swap:
2302 op = nir_intrinsic_deref_atomic_comp_swap;
2305 unreachable("unknown intrinsic");
2307 nir_ssa_def *offset = intr->src[1].ssa;
2308 nir_src *src = &intr->src[0];
2309 nir_variable *var = get_bo_var(b->shader, bo, true, src, nir_dest_bit_size(intr->dest));
2310 nir_deref_instr *deref_var = nir_build_deref_var(b, var);
2311 nir_ssa_def *idx = src->ssa;
2313 idx = nir_iadd_imm(b, idx, -bo->first_ssbo);
2314 nir_deref_instr *deref_array = nir_build_deref_array(b, deref_var, idx);
2315 nir_deref_instr *deref_struct = nir_build_deref_struct(b, deref_array, 0);
2317 /* generate new atomic deref ops for every component */
2318 nir_ssa_def *result[4];
2319 unsigned num_components = nir_dest_num_components(intr->dest);
2320 for (unsigned i = 0; i < num_components; i++) {
2321 nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct, offset);
2322 nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(b->shader, op);
2323 nir_ssa_dest_init(&new_instr->instr, &new_instr->dest, 1, nir_dest_bit_size(intr->dest), "");
2324 new_instr->src[0] = nir_src_for_ssa(&deref_arr->dest.ssa);
2325 /* deref ops have no offset src, so copy the srcs after it */
2326 for (unsigned i = 2; i < nir_intrinsic_infos[intr->intrinsic].num_srcs; i++)
2327 nir_src_copy(&new_instr->src[i - 1], &intr->src[i], &new_instr->instr);
2328 nir_builder_instr_insert(b, &new_instr->instr);
2330 result[i] = &new_instr->dest.ssa;
2331 offset = nir_iadd_imm(b, offset, 1);
2334 nir_ssa_def *load = nir_vec(b, result, num_components);
2335 nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
2336 nir_instr_remove(instr);
2340 remove_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
2342 struct bo_vars *bo = data;
2343 if (instr->type != nir_instr_type_intrinsic)
2345 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2346 nir_variable *var = NULL;
2347 nir_ssa_def *offset = NULL;
2348 bool is_load = true;
2349 b->cursor = nir_before_instr(instr);
2352 switch (intr->intrinsic) {
2353 case nir_intrinsic_ssbo_atomic_fadd:
2354 case nir_intrinsic_ssbo_atomic_fmin:
2355 case nir_intrinsic_ssbo_atomic_fmax:
2356 case nir_intrinsic_ssbo_atomic_fcomp_swap:
2357 case nir_intrinsic_ssbo_atomic_add:
2358 case nir_intrinsic_ssbo_atomic_umin:
2359 case nir_intrinsic_ssbo_atomic_imin:
2360 case nir_intrinsic_ssbo_atomic_umax:
2361 case nir_intrinsic_ssbo_atomic_imax:
2362 case nir_intrinsic_ssbo_atomic_and:
2363 case nir_intrinsic_ssbo_atomic_or:
2364 case nir_intrinsic_ssbo_atomic_xor:
2365 case nir_intrinsic_ssbo_atomic_exchange:
2366 case nir_intrinsic_ssbo_atomic_comp_swap:
2367 rewrite_atomic_ssbo_instr(b, instr, bo);
2369 case nir_intrinsic_store_ssbo:
2370 src = &intr->src[1];
2371 var = get_bo_var(b->shader, bo, true, src, nir_src_bit_size(intr->src[0]));
2372 offset = intr->src[2].ssa;
2375 case nir_intrinsic_load_ssbo:
2376 src = &intr->src[0];
2377 var = get_bo_var(b->shader, bo, true, src, nir_dest_bit_size(intr->dest));
2378 offset = intr->src[1].ssa;
2380 case nir_intrinsic_load_ubo:
2381 src = &intr->src[0];
2382 var = get_bo_var(b->shader, bo, false, src, nir_dest_bit_size(intr->dest));
2383 offset = intr->src[1].ssa;
2391 nir_deref_instr *deref_var = nir_build_deref_var(b, var);
2392 nir_ssa_def *idx = !ssbo && var->data.driver_location ? nir_iadd_imm(b, src->ssa, -1) : src->ssa;
2393 if (!ssbo && bo->first_ubo && var->data.driver_location)
2394 idx = nir_iadd_imm(b, idx, -bo->first_ubo);
2395 else if (ssbo && bo->first_ssbo)
2396 idx = nir_iadd_imm(b, idx, -bo->first_ssbo);
2397 nir_deref_instr *deref_array = nir_build_deref_array(b, deref_var, nir_i2iN(b, idx, nir_dest_bit_size(deref_var->dest)));
2398 nir_deref_instr *deref_struct = nir_build_deref_struct(b, deref_array, 0);
2399 assert(intr->num_components <= 2);
2401 nir_ssa_def *result[2];
2402 for (unsigned i = 0; i < intr->num_components; i++) {
2403 nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct, nir_i2iN(b, offset, nir_dest_bit_size(deref_struct->dest)));
2404 result[i] = nir_load_deref(b, deref_arr);
2405 if (intr->intrinsic == nir_intrinsic_load_ssbo)
2406 nir_intrinsic_set_access(nir_instr_as_intrinsic(result[i]->parent_instr), nir_intrinsic_access(intr));
2407 offset = nir_iadd_imm(b, offset, 1);
2409 nir_ssa_def *load = nir_vec(b, result, intr->num_components);
2410 nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
2412 nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct, nir_i2iN(b, offset, nir_dest_bit_size(deref_struct->dest)));
2413 nir_build_store_deref(b, &deref_arr->dest.ssa, intr->src[0].ssa, BITFIELD_MASK(intr->num_components), nir_intrinsic_access(intr));
2415 nir_instr_remove(instr);
2420 remove_bo_access(nir_shader *shader, struct zink_shader *zs)
2422 struct bo_vars bo = get_bo_vars(zs, shader);
2423 return nir_shader_instructions_pass(shader, remove_bo_access_instr, nir_metadata_dominance, &bo);
2427 find_var_deref(nir_shader *nir, nir_variable *var)
2429 nir_foreach_function(function, nir) {
2430 if (!function->impl)
2433 nir_foreach_block(block, function->impl) {
2434 nir_foreach_instr(instr, block) {
2435 if (instr->type != nir_instr_type_deref)
2437 nir_deref_instr *deref = nir_instr_as_deref(instr);
2438 if (deref->deref_type == nir_deref_type_var && deref->var == var)
2446 struct clamp_layer_output_state {
2447 nir_variable *original;
2448 nir_variable *clamped;
2452 clamp_layer_output_emit(nir_builder *b, struct clamp_layer_output_state *state)
2454 nir_ssa_def *is_layered = nir_load_push_constant(b, 1, 32,
2455 nir_imm_int(b, ZINK_GFX_PUSHCONST_FRAMEBUFFER_IS_LAYERED),
2456 .base = ZINK_GFX_PUSHCONST_FRAMEBUFFER_IS_LAYERED, .range = 4);
2457 nir_deref_instr *original_deref = nir_build_deref_var(b, state->original);
2458 nir_deref_instr *clamped_deref = nir_build_deref_var(b, state->clamped);
2459 nir_ssa_def *layer = nir_bcsel(b, nir_ieq_imm(b, is_layered, 1),
2460 nir_load_deref(b, original_deref),
2462 nir_store_deref(b, clamped_deref, layer, 0);
2466 clamp_layer_output_instr(nir_builder *b, nir_instr *instr, void *data)
2468 struct clamp_layer_output_state *state = data;
2469 switch (instr->type) {
2470 case nir_instr_type_intrinsic: {
2471 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2472 if (intr->intrinsic != nir_intrinsic_emit_vertex_with_counter &&
2473 intr->intrinsic != nir_intrinsic_emit_vertex)
2475 b->cursor = nir_before_instr(instr);
2476 clamp_layer_output_emit(b, state);
2479 default: return false;
2484 clamp_layer_output(nir_shader *vs, nir_shader *fs, unsigned *next_location)
2486 switch (vs->info.stage) {
2487 case MESA_SHADER_VERTEX:
2488 case MESA_SHADER_GEOMETRY:
2489 case MESA_SHADER_TESS_EVAL:
2492 unreachable("invalid last vertex stage!");
2494 struct clamp_layer_output_state state = {0};
2495 state.original = nir_find_variable_with_location(vs, nir_var_shader_out, VARYING_SLOT_LAYER);
2496 if (!state.original || !find_var_deref(vs, state.original))
2498 state.clamped = nir_variable_create(vs, nir_var_shader_out, glsl_int_type(), "layer_clamped");
2499 state.clamped->data.location = VARYING_SLOT_LAYER;
2500 nir_variable *fs_var = nir_find_variable_with_location(fs, nir_var_shader_in, VARYING_SLOT_LAYER);
2501 if ((state.original->data.explicit_xfb_buffer || fs_var) && *next_location < MAX_VARYING) {
2502 state.original->data.location = VARYING_SLOT_VAR0; // Anything but a built-in slot
2503 state.original->data.driver_location = (*next_location)++;
2505 fs_var->data.location = state.original->data.location;
2506 fs_var->data.driver_location = state.original->data.driver_location;
2509 if (state.original->data.explicit_xfb_buffer) {
2510 /* Will xfb the clamped output but still better than nothing */
2511 state.clamped->data.explicit_xfb_buffer = state.original->data.explicit_xfb_buffer;
2512 state.clamped->data.xfb.buffer = state.original->data.xfb.buffer;
2513 state.clamped->data.xfb.stride = state.original->data.xfb.stride;
2514 state.clamped->data.offset = state.original->data.offset;
2515 state.clamped->data.stream = state.original->data.stream;
2517 state.original->data.mode = nir_var_shader_temp;
2518 nir_fixup_deref_modes(vs);
2520 if (vs->info.stage == MESA_SHADER_GEOMETRY) {
2521 nir_shader_instructions_pass(vs, clamp_layer_output_instr, nir_metadata_dominance, &state);
2524 nir_function_impl *impl = nir_shader_get_entrypoint(vs);
2525 nir_builder_init(&b, impl);
2526 assert(impl->end_block->predecessors->entries == 1);
2527 b.cursor = nir_after_cf_list(&impl->body);
2528 clamp_layer_output_emit(&b, &state);
2529 nir_metadata_preserve(impl, nir_metadata_dominance);
2531 optimize_nir(vs, NULL);
2532 NIR_PASS_V(vs, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2537 assign_producer_var_io(gl_shader_stage stage, nir_variable *var, unsigned *reserved, unsigned char *slot_map)
2539 unsigned slot = var->data.location;
2542 case VARYING_SLOT_POS:
2543 case VARYING_SLOT_PNTC:
2544 case VARYING_SLOT_PSIZ:
2545 case VARYING_SLOT_LAYER:
2546 case VARYING_SLOT_PRIMITIVE_ID:
2547 case VARYING_SLOT_CLIP_DIST0:
2548 case VARYING_SLOT_CULL_DIST0:
2549 case VARYING_SLOT_VIEWPORT:
2550 case VARYING_SLOT_FACE:
2551 case VARYING_SLOT_TESS_LEVEL_OUTER:
2552 case VARYING_SLOT_TESS_LEVEL_INNER:
2553 /* use a sentinel value to avoid counting later */
2554 var->data.driver_location = UINT_MAX;
2558 if (var->data.patch) {
2559 assert(slot >= VARYING_SLOT_PATCH0);
2560 slot -= VARYING_SLOT_PATCH0;
2562 if (slot_map[slot] == 0xff) {
2563 assert(*reserved < MAX_VARYING);
2565 if (nir_is_arrayed_io(var, stage))
2566 num_slots = glsl_count_vec4_slots(glsl_get_array_element(var->type), false, false);
2568 num_slots = glsl_count_vec4_slots(var->type, false, false);
2569 assert(*reserved + num_slots <= MAX_VARYING);
2570 for (unsigned i = 0; i < num_slots; i++)
2571 slot_map[slot + i] = (*reserved)++;
2573 slot = slot_map[slot];
2574 assert(slot < MAX_VARYING);
2575 var->data.driver_location = slot;
2579 ALWAYS_INLINE static bool
2580 is_texcoord(gl_shader_stage stage, const nir_variable *var)
2582 if (stage != MESA_SHADER_FRAGMENT)
2584 return var->data.location >= VARYING_SLOT_TEX0 &&
2585 var->data.location <= VARYING_SLOT_TEX7;
2589 assign_consumer_var_io(gl_shader_stage stage, nir_variable *var, unsigned *reserved, unsigned char *slot_map)
2591 unsigned slot = var->data.location;
2593 case VARYING_SLOT_POS:
2594 case VARYING_SLOT_PNTC:
2595 case VARYING_SLOT_PSIZ:
2596 case VARYING_SLOT_LAYER:
2597 case VARYING_SLOT_PRIMITIVE_ID:
2598 case VARYING_SLOT_CLIP_DIST0:
2599 case VARYING_SLOT_CULL_DIST0:
2600 case VARYING_SLOT_VIEWPORT:
2601 case VARYING_SLOT_FACE:
2602 case VARYING_SLOT_TESS_LEVEL_OUTER:
2603 case VARYING_SLOT_TESS_LEVEL_INNER:
2604 /* use a sentinel value to avoid counting later */
2605 var->data.driver_location = UINT_MAX;
2608 if (var->data.patch) {
2609 assert(slot >= VARYING_SLOT_PATCH0);
2610 slot -= VARYING_SLOT_PATCH0;
2612 if (slot_map[slot] == (unsigned char)-1) {
2613 /* texcoords can't be eliminated in fs due to GL_COORD_REPLACE,
2614 * so keep for now and eliminate later
2616 if (is_texcoord(stage, var)) {
2617 var->data.driver_location = -1;
2620 if (stage != MESA_SHADER_TESS_CTRL)
2623 /* patch variables may be read in the workgroup */
2624 slot_map[slot] = (*reserved)++;
2626 var->data.driver_location = slot_map[slot];
2633 rewrite_read_as_0(nir_builder *b, nir_instr *instr, void *data)
2635 nir_variable *var = data;
2636 if (instr->type != nir_instr_type_intrinsic)
2639 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2640 if (intr->intrinsic != nir_intrinsic_load_deref)
2642 nir_variable *deref_var = nir_intrinsic_get_var(intr, 0);
2643 if (deref_var != var)
2645 b->cursor = nir_before_instr(instr);
2646 nir_ssa_def *zero = nir_imm_zero(b, nir_dest_num_components(intr->dest), nir_dest_bit_size(intr->dest));
2647 if (b->shader->info.stage == MESA_SHADER_FRAGMENT) {
2648 switch (var->data.location) {
2649 case VARYING_SLOT_COL0:
2650 case VARYING_SLOT_COL1:
2651 case VARYING_SLOT_BFC0:
2652 case VARYING_SLOT_BFC1:
2653 /* default color is 0,0,0,1 */
2654 if (nir_dest_num_components(intr->dest) == 4)
2655 zero = nir_vector_insert_imm(b, zero, nir_imm_float(b, 1.0), 3);
2661 nir_ssa_def_rewrite_uses(&intr->dest.ssa, zero);
2662 nir_instr_remove(instr);
2667 zink_compiler_assign_io(struct zink_screen *screen, nir_shader *producer, nir_shader *consumer)
2669 unsigned reserved = 0;
2670 unsigned char slot_map[VARYING_SLOT_MAX];
2671 memset(slot_map, -1, sizeof(slot_map));
2672 bool do_fixup = false;
2673 nir_shader *nir = producer->info.stage == MESA_SHADER_TESS_CTRL ? producer : consumer;
2674 if (consumer->info.stage != MESA_SHADER_FRAGMENT) {
2675 /* remove injected pointsize from all but the last vertex stage */
2676 nir_variable *var = nir_find_variable_with_location(producer, nir_var_shader_out, VARYING_SLOT_PSIZ);
2677 if (var && !var->data.explicit_location) {
2678 var->data.mode = nir_var_shader_temp;
2679 nir_fixup_deref_modes(producer);
2680 NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2681 optimize_nir(producer, NULL);
2684 if (producer->info.stage == MESA_SHADER_TESS_CTRL) {
2685 /* never assign from tcs -> tes, always invert */
2686 nir_foreach_variable_with_modes(var, consumer, nir_var_shader_in)
2687 assign_producer_var_io(consumer->info.stage, var, &reserved, slot_map);
2688 nir_foreach_variable_with_modes_safe(var, producer, nir_var_shader_out) {
2689 if (!assign_consumer_var_io(producer->info.stage, var, &reserved, slot_map))
2690 /* this is an output, nothing more needs to be done for it to be dropped */
2694 nir_foreach_variable_with_modes(var, producer, nir_var_shader_out)
2695 assign_producer_var_io(producer->info.stage, var, &reserved, slot_map);
2696 nir_foreach_variable_with_modes_safe(var, consumer, nir_var_shader_in) {
2697 if (!assign_consumer_var_io(consumer->info.stage, var, &reserved, slot_map)) {
2699 /* input needs to be rewritten */
2700 nir_shader_instructions_pass(consumer, rewrite_read_as_0, nir_metadata_dominance, var);
2703 if (consumer->info.stage == MESA_SHADER_FRAGMENT && screen->driver_workarounds.needs_sanitised_layer)
2704 do_fixup |= clamp_layer_output(producer, consumer, &reserved);
2708 nir_fixup_deref_modes(nir);
2709 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2710 optimize_nir(nir, NULL);
2713 /* all types that hit this function contain something that is 64bit */
2714 static const struct glsl_type *
2715 rewrite_64bit_type(nir_shader *nir, const struct glsl_type *type, nir_variable *var, bool doubles_only)
2717 if (glsl_type_is_array(type)) {
2718 const struct glsl_type *child = glsl_get_array_element(type);
2719 unsigned elements = glsl_array_size(type);
2720 unsigned stride = glsl_get_explicit_stride(type);
2721 return glsl_array_type(rewrite_64bit_type(nir, child, var, doubles_only), elements, stride);
2723 /* rewrite structs recursively */
2724 if (glsl_type_is_struct_or_ifc(type)) {
2725 unsigned nmembers = glsl_get_length(type);
2726 struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, nmembers * 2);
2727 unsigned xfb_offset = 0;
2728 for (unsigned i = 0; i < nmembers; i++) {
2729 const struct glsl_struct_field *f = glsl_get_struct_field_data(type, i);
2731 xfb_offset += glsl_get_component_slots(fields[i].type) * 4;
2732 if (i < nmembers - 1 && xfb_offset % 8 &&
2733 (glsl_contains_double(glsl_get_struct_field(type, i + 1)) ||
2734 (glsl_type_contains_64bit(glsl_get_struct_field(type, i + 1)) && !doubles_only))) {
2735 var->data.is_xfb = true;
2737 fields[i].type = rewrite_64bit_type(nir, f->type, var, doubles_only);
2739 return glsl_struct_type(fields, nmembers, glsl_get_type_name(type), glsl_struct_type_is_packed(type));
2741 if (!glsl_type_is_64bit(type) || (!glsl_contains_double(type) && doubles_only))
2743 if (doubles_only && glsl_type_is_vector_or_scalar(type))
2744 return glsl_vector_type(GLSL_TYPE_UINT64, glsl_get_vector_elements(type));
2745 enum glsl_base_type base_type;
2746 switch (glsl_get_base_type(type)) {
2747 case GLSL_TYPE_UINT64:
2748 base_type = GLSL_TYPE_UINT;
2750 case GLSL_TYPE_INT64:
2751 base_type = GLSL_TYPE_INT;
2753 case GLSL_TYPE_DOUBLE:
2754 base_type = GLSL_TYPE_FLOAT;
2757 unreachable("unknown 64-bit vertex attribute format!");
2759 if (glsl_type_is_scalar(type))
2760 return glsl_vector_type(base_type, 2);
2761 unsigned num_components;
2762 if (glsl_type_is_matrix(type)) {
2763 /* align to vec4 size: dvec3-composed arrays are arrays of dvec3s */
2764 unsigned vec_components = glsl_get_vector_elements(type);
2765 if (vec_components == 3)
2767 num_components = vec_components * 2 * glsl_get_matrix_columns(type);
2769 num_components = glsl_get_vector_elements(type) * 2;
2770 if (num_components <= 4)
2771 return glsl_vector_type(base_type, num_components);
2773 /* dvec3/dvec4/dmatX: rewrite as struct { vec4, vec4, vec4, ... [vec2] } */
2774 struct glsl_struct_field fields[8] = {0};
2775 unsigned remaining = num_components;
2776 unsigned nfields = 0;
2777 for (unsigned i = 0; remaining; i++, remaining -= MIN2(4, remaining), nfields++) {
2778 assert(i < ARRAY_SIZE(fields));
2779 fields[i].name = "";
2780 fields[i].offset = i * 16;
2781 fields[i].type = glsl_vector_type(base_type, MIN2(4, remaining));
2784 snprintf(buf, sizeof(buf), "struct(%s)", glsl_get_type_name(type));
2785 return glsl_struct_type(fields, nfields, buf, true);
2788 static const struct glsl_type *
2789 deref_is_matrix(nir_deref_instr *deref)
2791 if (glsl_type_is_matrix(deref->type))
2793 nir_deref_instr *parent = nir_deref_instr_parent(deref);
2795 return deref_is_matrix(parent);
2800 lower_64bit_vars_function(nir_shader *shader, nir_function *function, nir_variable *var,
2801 struct hash_table *derefs, struct set *deletes, bool doubles_only)
2803 bool func_progress = false;
2804 if (!function->impl)
2807 nir_builder_init(&b, function->impl);
2808 nir_foreach_block(block, function->impl) {
2809 nir_foreach_instr_safe(instr, block) {
2810 switch (instr->type) {
2811 case nir_instr_type_deref: {
2812 nir_deref_instr *deref = nir_instr_as_deref(instr);
2813 if (!(deref->modes & var->data.mode))
2815 if (nir_deref_instr_get_variable(deref) != var)
2818 /* matrix types are special: store the original deref type for later use */
2819 const struct glsl_type *matrix = deref_is_matrix(deref);
2820 nir_deref_instr *parent = nir_deref_instr_parent(deref);
2822 /* if this isn't a direct matrix deref, it's maybe a matrix row deref */
2823 hash_table_foreach(derefs, he) {
2824 /* propagate parent matrix type to row deref */
2825 if (he->key == parent)
2830 _mesa_hash_table_insert(derefs, deref, (void*)matrix);
2831 if (deref->deref_type == nir_deref_type_var)
2832 deref->type = var->type;
2834 deref->type = rewrite_64bit_type(shader, deref->type, var, doubles_only);
2837 case nir_instr_type_intrinsic: {
2838 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2839 if (intr->intrinsic != nir_intrinsic_store_deref &&
2840 intr->intrinsic != nir_intrinsic_load_deref)
2842 if (nir_intrinsic_get_var(intr, 0) != var)
2844 if ((intr->intrinsic == nir_intrinsic_store_deref && intr->src[1].ssa->bit_size != 64) ||
2845 (intr->intrinsic == nir_intrinsic_load_deref && intr->dest.ssa.bit_size != 64))
2847 b.cursor = nir_before_instr(instr);
2848 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
2849 unsigned num_components = intr->num_components * 2;
2850 nir_ssa_def *comp[NIR_MAX_VEC_COMPONENTS];
2851 /* this is the stored matrix type from the deref */
2852 struct hash_entry *he = _mesa_hash_table_search(derefs, deref);
2853 const struct glsl_type *matrix = he ? he->data : NULL;
2854 if (doubles_only && !matrix)
2856 func_progress = true;
2857 if (intr->intrinsic == nir_intrinsic_store_deref) {
2858 /* first, unpack the src data to 32bit vec2 components */
2859 for (unsigned i = 0; i < intr->num_components; i++) {
2860 nir_ssa_def *ssa = nir_unpack_64_2x32(&b, nir_channel(&b, intr->src[1].ssa, i));
2861 comp[i * 2] = nir_channel(&b, ssa, 0);
2862 comp[i * 2 + 1] = nir_channel(&b, ssa, 1);
2864 unsigned wrmask = nir_intrinsic_write_mask(intr);
2866 /* expand writemask for doubled components */
2867 for (unsigned i = 0; i < intr->num_components; i++) {
2868 if (wrmask & BITFIELD_BIT(i))
2869 mask |= BITFIELD_BIT(i * 2) | BITFIELD_BIT(i * 2 + 1);
2872 /* matrix types always come from array (row) derefs */
2873 assert(deref->deref_type == nir_deref_type_array);
2874 nir_deref_instr *var_deref = nir_deref_instr_parent(deref);
2875 /* let optimization clean up consts later */
2876 nir_ssa_def *index = deref->arr.index.ssa;
2877 /* this might be an indirect array index:
2878 * - iterate over matrix columns
2879 * - add if blocks for each column
2880 * - perform the store in the block
2882 for (unsigned idx = 0; idx < glsl_get_matrix_columns(matrix); idx++) {
2883 nir_push_if(&b, nir_ieq_imm(&b, index, idx));
2884 unsigned vec_components = glsl_get_vector_elements(matrix);
2885 /* always clamp dvec3 to 4 components */
2886 if (vec_components == 3)
2888 unsigned start_component = idx * vec_components * 2;
2890 unsigned member = start_component / 4;
2891 /* number of components remaining */
2892 unsigned remaining = num_components;
2893 for (unsigned i = 0; i < num_components; member++) {
2894 if (!(mask & BITFIELD_BIT(i)))
2896 assert(member < glsl_get_length(var_deref->type));
2897 /* deref the rewritten struct to the appropriate vec4/vec2 */
2898 nir_deref_instr *strct = nir_build_deref_struct(&b, var_deref, member);
2899 unsigned incr = MIN2(remaining, 4);
2900 /* assemble the write component vec */
2901 nir_ssa_def *val = nir_vec(&b, &comp[i], incr);
2902 /* use the number of components being written as the writemask */
2903 if (glsl_get_vector_elements(strct->type) > val->num_components)
2904 val = nir_pad_vector(&b, val, glsl_get_vector_elements(strct->type));
2905 nir_store_deref(&b, strct, val, BITFIELD_MASK(incr));
2909 nir_pop_if(&b, NULL);
2911 _mesa_set_add(deletes, &deref->instr);
2912 } else if (num_components <= 4) {
2913 /* simple store case: just write out the components */
2914 nir_ssa_def *dest = nir_vec(&b, comp, num_components);
2915 nir_store_deref(&b, deref, dest, mask);
2917 /* writing > 4 components: access the struct and write to the appropriate vec4 members */
2918 for (unsigned i = 0; num_components; i++, num_components -= MIN2(num_components, 4)) {
2919 if (!(mask & BITFIELD_MASK(4)))
2921 nir_deref_instr *strct = nir_build_deref_struct(&b, deref, i);
2922 nir_ssa_def *dest = nir_vec(&b, &comp[i * 4], MIN2(num_components, 4));
2923 if (glsl_get_vector_elements(strct->type) > dest->num_components)
2924 dest = nir_pad_vector(&b, dest, glsl_get_vector_elements(strct->type));
2925 nir_store_deref(&b, strct, dest, mask & BITFIELD_MASK(4));
2930 nir_ssa_def *dest = NULL;
2932 /* matrix types always come from array (row) derefs */
2933 assert(deref->deref_type == nir_deref_type_array);
2934 nir_deref_instr *var_deref = nir_deref_instr_parent(deref);
2935 /* let optimization clean up consts later */
2936 nir_ssa_def *index = deref->arr.index.ssa;
2937 /* this might be an indirect array index:
2938 * - iterate over matrix columns
2939 * - add if blocks for each column
2940 * - phi the loads using the array index
2942 unsigned cols = glsl_get_matrix_columns(matrix);
2943 nir_ssa_def *dests[4];
2944 for (unsigned idx = 0; idx < cols; idx++) {
2945 /* don't add an if for the final row: this will be handled in the else */
2947 nir_push_if(&b, nir_ieq_imm(&b, index, idx));
2948 unsigned vec_components = glsl_get_vector_elements(matrix);
2949 /* always clamp dvec3 to 4 components */
2950 if (vec_components == 3)
2952 unsigned start_component = idx * vec_components * 2;
2954 unsigned member = start_component / 4;
2955 /* number of components remaining */
2956 unsigned remaining = num_components;
2957 /* component index */
2958 unsigned comp_idx = 0;
2959 for (unsigned i = 0; i < num_components; member++) {
2960 assert(member < glsl_get_length(var_deref->type));
2961 nir_deref_instr *strct = nir_build_deref_struct(&b, var_deref, member);
2962 nir_ssa_def *load = nir_load_deref(&b, strct);
2963 unsigned incr = MIN2(remaining, 4);
2964 /* repack the loads to 64bit */
2965 for (unsigned c = 0; c < incr / 2; c++, comp_idx++)
2966 comp[comp_idx] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(c * 2, 2)));
2970 dest = dests[idx] = nir_vec(&b, comp, intr->num_components);
2972 nir_push_else(&b, NULL);
2974 /* loop over all the if blocks that were made, pop them, and phi the loaded+packed results */
2975 for (unsigned idx = cols - 1; idx >= 1; idx--) {
2976 nir_pop_if(&b, NULL);
2977 dest = nir_if_phi(&b, dests[idx - 1], dest);
2979 _mesa_set_add(deletes, &deref->instr);
2980 } else if (num_components <= 4) {
2981 /* simple load case */
2982 nir_ssa_def *load = nir_load_deref(&b, deref);
2983 /* pack 32bit loads into 64bit: this will automagically get optimized out later */
2984 for (unsigned i = 0; i < intr->num_components; i++) {
2985 comp[i] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(i * 2, 2)));
2987 dest = nir_vec(&b, comp, intr->num_components);
2989 /* writing > 4 components: access the struct and load the appropriate vec4 members */
2990 for (unsigned i = 0; i < 2; i++, num_components -= 4) {
2991 nir_deref_instr *strct = nir_build_deref_struct(&b, deref, i);
2992 nir_ssa_def *load = nir_load_deref(&b, strct);
2993 comp[i * 2] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_MASK(2)));
2994 if (num_components > 2)
2995 comp[i * 2 + 1] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(2, 2)));
2997 dest = nir_vec(&b, comp, intr->num_components);
2999 nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, dest, instr);
3001 _mesa_set_add(deletes, instr);
3010 nir_metadata_preserve(function->impl, nir_metadata_none);
3011 /* derefs must be queued for deletion to avoid deleting the same deref repeatedly */
3012 set_foreach_remove(deletes, he)
3013 nir_instr_remove((void*)he->key);
3014 return func_progress;
3018 lower_64bit_vars_loop(nir_shader *shader, nir_variable *var, struct hash_table *derefs,
3019 struct set *deletes, bool doubles_only)
3021 if (!glsl_type_contains_64bit(var->type) || (doubles_only && !glsl_contains_double(var->type)))
3023 var->type = rewrite_64bit_type(shader, var->type, var, doubles_only);
3024 /* once type is rewritten, rewrite all loads and stores */
3025 nir_foreach_function(function, shader)
3026 lower_64bit_vars_function(shader, function, var, derefs, deletes, doubles_only);
3030 /* rewrite all input/output variables using 32bit types and load/stores */
3032 lower_64bit_vars(nir_shader *shader, bool doubles_only)
3034 bool progress = false;
3035 struct hash_table *derefs = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
3036 struct set *deletes = _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
3037 nir_foreach_variable_with_modes(var, shader, nir_var_shader_in | nir_var_shader_out)
3038 progress |= lower_64bit_vars_loop(shader, var, derefs, deletes, doubles_only);
3039 nir_foreach_function(function, shader) {
3040 nir_foreach_function_temp_variable(var, function->impl) {
3041 if (!glsl_type_contains_64bit(var->type) || (doubles_only && !glsl_contains_double(var->type)))
3043 var->type = rewrite_64bit_type(shader, var->type, var, doubles_only);
3044 progress |= lower_64bit_vars_function(shader, function, var, derefs, deletes, doubles_only);
3047 ralloc_free(deletes);
3048 ralloc_free(derefs);
3050 nir_lower_alu_to_scalar(shader, filter_64_bit_instr, NULL);
3051 nir_lower_phis_to_scalar(shader, false);
3052 optimize_nir(shader, NULL);
3058 split_blocks(nir_shader *nir)
3060 bool progress = false;
3061 bool changed = true;
3064 nir_foreach_shader_out_variable(var, nir) {
3065 const struct glsl_type *base_type = glsl_without_array(var->type);
3066 nir_variable *members[32]; //can't have more than this without breaking NIR
3067 if (!glsl_type_is_struct(base_type))
3070 if (!glsl_type_is_struct(var->type) || glsl_get_length(var->type) == 1)
3072 if (glsl_count_attribute_slots(var->type, false) == 1)
3074 unsigned offset = 0;
3075 for (unsigned i = 0; i < glsl_get_length(var->type); i++) {
3076 members[i] = nir_variable_clone(var, nir);
3077 members[i]->type = glsl_get_struct_field(var->type, i);
3078 members[i]->name = (void*)glsl_get_struct_elem_name(var->type, i);
3079 members[i]->data.location += offset;
3080 offset += glsl_count_attribute_slots(members[i]->type, false);
3081 nir_shader_add_variable(nir, members[i]);
3083 nir_foreach_function(function, nir) {
3084 bool func_progress = false;
3085 if (!function->impl)
3088 nir_builder_init(&b, function->impl);
3089 nir_foreach_block(block, function->impl) {
3090 nir_foreach_instr_safe(instr, block) {
3091 switch (instr->type) {
3092 case nir_instr_type_deref: {
3093 nir_deref_instr *deref = nir_instr_as_deref(instr);
3094 if (!(deref->modes & nir_var_shader_out))
3096 if (nir_deref_instr_get_variable(deref) != var)
3098 if (deref->deref_type != nir_deref_type_struct)
3100 nir_deref_instr *parent = nir_deref_instr_parent(deref);
3101 if (parent->deref_type != nir_deref_type_var)
3103 deref->modes = nir_var_shader_temp;
3104 parent->modes = nir_var_shader_temp;
3105 b.cursor = nir_before_instr(instr);
3106 nir_ssa_def *dest = &nir_build_deref_var(&b, members[deref->strct.index])->dest.ssa;
3107 nir_ssa_def_rewrite_uses_after(&deref->dest.ssa, dest, &deref->instr);
3108 nir_instr_remove(&deref->instr);
3109 func_progress = true;
3117 nir_metadata_preserve(function->impl, nir_metadata_none);
3119 var->data.mode = nir_var_shader_temp;
3128 zink_shader_dump(void *words, size_t size, const char *file)
3130 FILE *fp = fopen(file, "wb");
3132 fwrite(words, 1, size, fp);
3134 fprintf(stderr, "wrote '%s'...\n", file);
3138 static struct zink_shader_object
3139 zink_shader_spirv_compile(struct zink_screen *screen, struct zink_shader *zs, struct spirv_shader *spirv, bool separate)
3141 VkShaderModuleCreateInfo smci = {0};
3142 VkShaderCreateInfoEXT sci = {0};
3147 if (zink_debug & ZINK_DEBUG_SPIRV) {
3150 snprintf(buf, sizeof(buf), "dump%02d.spv", i++);
3151 zink_shader_dump(spirv->words, spirv->num_words * sizeof(uint32_t), buf);
3154 sci.sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO_EXT;
3155 sci.stage = mesa_to_vk_shader_stage(zs->info.stage);
3156 if (sci.stage != VK_SHADER_STAGE_FRAGMENT_BIT)
3157 sci.nextStage = VK_SHADER_STAGE_FRAGMENT_BIT;
3158 sci.codeType = VK_SHADER_CODE_TYPE_SPIRV_EXT;
3159 sci.codeSize = spirv->num_words * sizeof(uint32_t);
3160 sci.pCode = spirv->words;
3162 sci.setLayoutCount = 2;
3163 VkDescriptorSetLayout dsl[2] = {0};
3164 dsl[zs->info.stage == MESA_SHADER_FRAGMENT] = zs->precompile.dsl;
3165 sci.pSetLayouts = dsl;
3166 VkPushConstantRange pcr;
3167 pcr.stageFlags = VK_SHADER_STAGE_ALL_GRAPHICS;
3169 pcr.size = sizeof(struct zink_gfx_push_constant);
3170 sci.pushConstantRangeCount = 1;
3171 sci.pPushConstantRanges = &pcr;
3173 smci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
3174 smci.codeSize = spirv->num_words * sizeof(uint32_t);
3175 smci.pCode = spirv->words;
3178 if (zink_debug & ZINK_DEBUG_VALIDATION) {
3179 static const struct spirv_to_nir_options spirv_options = {
3180 .environment = NIR_SPIRV_VULKAN,
3185 .tessellation = true,
3186 .float_controls = true,
3187 .image_ms_array = true,
3188 .image_read_without_format = true,
3189 .image_write_without_format = true,
3190 .storage_image_ms = true,
3191 .geometry_streams = true,
3192 .storage_8bit = true,
3193 .storage_16bit = true,
3194 .variable_pointers = true,
3195 .stencil_export = true,
3196 .post_depth_coverage = true,
3197 .transform_feedback = true,
3198 .device_group = true,
3199 .draw_parameters = true,
3200 .shader_viewport_index_layer = true,
3202 .physical_storage_buffer_address = true,
3203 .int64_atomics = true,
3204 .subgroup_arithmetic = true,
3205 .subgroup_basic = true,
3206 .subgroup_ballot = true,
3207 .subgroup_quad = true,
3208 .subgroup_shuffle = true,
3209 .subgroup_vote = true,
3210 .vk_memory_model = true,
3211 .vk_memory_model_device_scope = true,
3214 .demote_to_helper_invocation = true,
3215 .sparse_residency = true,
3218 .ubo_addr_format = nir_address_format_32bit_index_offset,
3219 .ssbo_addr_format = nir_address_format_32bit_index_offset,
3220 .phys_ssbo_addr_format = nir_address_format_64bit_global,
3221 .push_const_addr_format = nir_address_format_logical,
3222 .shared_addr_format = nir_address_format_32bit_offset,
3224 uint32_t num_spec_entries = 0;
3225 struct nir_spirv_specialization *spec_entries = NULL;
3226 VkSpecializationInfo sinfo = {0};
3227 VkSpecializationMapEntry me[3];
3228 uint32_t size[3] = {1,1,1};
3229 if (!zs->info.workgroup_size[0]) {
3230 sinfo.mapEntryCount = 3;
3231 sinfo.pMapEntries = &me[0];
3232 sinfo.dataSize = sizeof(uint32_t) * 3;
3234 uint32_t ids[] = {ZINK_WORKGROUP_SIZE_X, ZINK_WORKGROUP_SIZE_Y, ZINK_WORKGROUP_SIZE_Z};
3235 for (int i = 0; i < 3; i++) {
3236 me[i].size = sizeof(uint32_t);
3237 me[i].constantID = ids[i];
3238 me[i].offset = i * sizeof(uint32_t);
3240 spec_entries = vk_spec_info_to_nir_spirv(&sinfo, &num_spec_entries);
3242 nir_shader *nir = spirv_to_nir(spirv->words, spirv->num_words,
3243 spec_entries, num_spec_entries,
3244 clamp_stage(&zs->info), "main", &spirv_options, &screen->nir_options);
3252 struct zink_shader_object obj;
3253 if (!separate || !screen->info.have_EXT_shader_object)
3254 ret = VKSCR(CreateShaderModule)(screen->dev, &smci, NULL, &obj.mod);
3256 ret = VKSCR(CreateShadersEXT)(screen->dev, 1, &sci, NULL, &obj.obj);
3257 bool success = zink_screen_handle_vkresult(screen, ret);
3263 prune_io(nir_shader *nir)
3265 nir_foreach_shader_in_variable_safe(var, nir) {
3266 if (!find_var_deref(nir, var))
3267 var->data.mode = nir_var_shader_temp;
3269 nir_foreach_shader_out_variable_safe(var, nir) {
3270 if (!find_var_deref(nir, var))
3271 var->data.mode = nir_var_shader_temp;
3273 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
3277 flag_shadow_tex(nir_variable *var, struct zink_shader *zs)
3279 /* unconvert from zink_binding() */
3280 uint32_t sampler_id = var->data.binding - (PIPE_MAX_SAMPLERS * MESA_SHADER_FRAGMENT);
3281 assert(sampler_id < 32); //bitfield size for tracking
3282 zs->fs.legacy_shadow_mask |= BITFIELD_BIT(sampler_id);
3285 static nir_ssa_def *
3286 rewrite_tex_dest(nir_builder *b, nir_tex_instr *tex, nir_variable *var, void *data)
3289 const struct glsl_type *type = glsl_without_array(var->type);
3290 enum glsl_base_type ret_type = glsl_get_sampler_result_type(type);
3291 bool is_int = glsl_base_type_is_integer(ret_type);
3292 unsigned bit_size = glsl_base_type_get_bit_size(ret_type);
3293 unsigned dest_size = nir_dest_bit_size(tex->dest);
3294 b->cursor = nir_after_instr(&tex->instr);
3295 unsigned num_components = nir_dest_num_components(tex->dest);
3296 bool rewrite_depth = tex->is_shadow && num_components > 1 && tex->op != nir_texop_tg4 && !tex->is_sparse;
3297 if (bit_size == dest_size && !rewrite_depth)
3299 nir_ssa_def *dest = &tex->dest.ssa;
3300 if (rewrite_depth && data) {
3301 if (b->shader->info.stage == MESA_SHADER_FRAGMENT)
3302 flag_shadow_tex(var, data);
3304 mesa_loge("unhandled old-style shadow sampler in non-fragment stage!");
3307 if (bit_size != dest_size) {
3308 tex->dest.ssa.bit_size = bit_size;
3309 tex->dest_type = nir_get_nir_type_for_glsl_base_type(ret_type);
3312 if (glsl_unsigned_base_type_of(ret_type) == ret_type)
3313 dest = nir_u2uN(b, &tex->dest.ssa, dest_size);
3315 dest = nir_i2iN(b, &tex->dest.ssa, dest_size);
3317 dest = nir_f2fN(b, &tex->dest.ssa, dest_size);
3321 nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, dest, dest->parent_instr);
3322 } else if (rewrite_depth) {
3328 struct lower_zs_swizzle_state {
3330 unsigned base_sampler_id;
3331 const struct zink_zs_swizzle_key *swizzle;
3335 lower_zs_swizzle_tex_instr(nir_builder *b, nir_instr *instr, void *data)
3337 struct lower_zs_swizzle_state *state = data;
3338 const struct zink_zs_swizzle_key *swizzle_key = state->swizzle;
3339 assert(state->shadow_only || swizzle_key);
3340 if (instr->type != nir_instr_type_tex)
3342 nir_tex_instr *tex = nir_instr_as_tex(instr);
3343 if (tex->op == nir_texop_txs || tex->op == nir_texop_lod ||
3344 (!tex->is_shadow && state->shadow_only) || tex->is_new_style_shadow)
3346 if (tex->is_shadow && tex->op == nir_texop_tg4)
3347 /* Will not even try to emulate the shadow comparison */
3349 int handle = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
3350 nir_variable *var = NULL;
3352 /* gtfo bindless depth texture mode */
3354 nir_foreach_variable_with_modes(img, b->shader, nir_var_uniform) {
3355 if (glsl_type_is_sampler(glsl_without_array(img->type))) {
3356 unsigned size = glsl_type_is_array(img->type) ? glsl_get_aoa_size(img->type) : 1;
3357 if (tex->texture_index >= img->data.driver_location &&
3358 tex->texture_index < img->data.driver_location + size) {
3365 uint32_t sampler_id = var->data.binding - state->base_sampler_id;
3366 const struct glsl_type *type = glsl_without_array(var->type);
3367 enum glsl_base_type ret_type = glsl_get_sampler_result_type(type);
3368 bool is_int = glsl_base_type_is_integer(ret_type);
3369 unsigned num_components = nir_dest_num_components(tex->dest);
3371 tex->is_new_style_shadow = true;
3372 nir_ssa_def *dest = rewrite_tex_dest(b, tex, var, NULL);
3373 assert(dest || !state->shadow_only);
3374 if (!dest && !(swizzle_key->mask & BITFIELD_BIT(sampler_id)))
3377 dest = &tex->dest.ssa;
3379 tex->dest.ssa.num_components = 1;
3380 if (swizzle_key && (swizzle_key->mask & BITFIELD_BIT(sampler_id))) {
3381 /* these require manual swizzles */
3382 if (tex->op == nir_texop_tg4) {
3383 assert(!tex->is_shadow);
3384 nir_ssa_def *swizzle;
3385 switch (swizzle_key->swizzle[sampler_id].s[tex->component]) {
3386 case PIPE_SWIZZLE_0:
3387 swizzle = nir_imm_zero(b, 4, nir_dest_bit_size(tex->dest));
3389 case PIPE_SWIZZLE_1:
3391 swizzle = nir_imm_intN_t(b, 4, nir_dest_bit_size(tex->dest));
3393 swizzle = nir_imm_floatN_t(b, 4, nir_dest_bit_size(tex->dest));
3396 if (!tex->component)
3401 nir_ssa_def_rewrite_uses_after(dest, swizzle, swizzle->parent_instr);
3404 nir_ssa_def *vec[4];
3405 for (unsigned i = 0; i < ARRAY_SIZE(vec); i++) {
3406 switch (swizzle_key->swizzle[sampler_id].s[i]) {
3407 case PIPE_SWIZZLE_0:
3408 vec[i] = nir_imm_zero(b, 1, nir_dest_bit_size(tex->dest));
3410 case PIPE_SWIZZLE_1:
3412 vec[i] = nir_imm_intN_t(b, 1, nir_dest_bit_size(tex->dest));
3414 vec[i] = nir_imm_floatN_t(b, 1, nir_dest_bit_size(tex->dest));
3417 vec[i] = dest->num_components == 1 ? dest : nir_channel(b, dest, i);
3421 nir_ssa_def *swizzle = nir_vec(b, vec, num_components);
3422 nir_ssa_def_rewrite_uses_after(dest, swizzle, swizzle->parent_instr);
3424 assert(tex->is_shadow);
3425 nir_ssa_def *vec[4] = {dest, dest, dest, dest};
3426 nir_ssa_def *splat = nir_vec(b, vec, num_components);
3427 nir_ssa_def_rewrite_uses_after(dest, splat, splat->parent_instr);
3433 lower_zs_swizzle_tex(nir_shader *nir, const void *swizzle, bool shadow_only)
3435 unsigned base_sampler_id = gl_shader_stage_is_compute(nir->info.stage) ? 0 : PIPE_MAX_SAMPLERS * nir->info.stage;
3436 struct lower_zs_swizzle_state state = {shadow_only, base_sampler_id, swizzle};
3437 return nir_shader_instructions_pass(nir, lower_zs_swizzle_tex_instr, nir_metadata_dominance | nir_metadata_block_index, (void*)&state);
3441 invert_point_coord_instr(nir_builder *b, nir_instr *instr, void *data)
3443 if (instr->type != nir_instr_type_intrinsic)
3445 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
3446 if (intr->intrinsic != nir_intrinsic_load_deref)
3448 nir_variable *deref_var = nir_intrinsic_get_var(intr, 0);
3449 if (deref_var->data.location != VARYING_SLOT_PNTC)
3451 b->cursor = nir_after_instr(instr);
3452 nir_ssa_def *def = nir_vec2(b, nir_channel(b, &intr->dest.ssa, 0),
3453 nir_fsub(b, nir_imm_float(b, 1.0), nir_channel(b, &intr->dest.ssa, 1)));
3454 nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, def, def->parent_instr);
3459 invert_point_coord(nir_shader *nir)
3461 if (!(nir->info.inputs_read & BITFIELD64_BIT(VARYING_SLOT_PNTC)))
3463 return nir_shader_instructions_pass(nir, invert_point_coord_instr, nir_metadata_dominance, NULL);
3466 static struct zink_shader_object
3467 compile_module(struct zink_screen *screen, struct zink_shader *zs, nir_shader *nir, bool separate)
3469 struct zink_shader_info *sinfo = &zs->sinfo;
3472 NIR_PASS_V(nir, nir_convert_from_ssa, true);
3474 struct zink_shader_object obj;
3475 struct spirv_shader *spirv = nir_to_spirv(nir, sinfo, screen->spirv_version);
3477 obj = zink_shader_spirv_compile(screen, zs, spirv, separate);
3479 /* TODO: determine if there's any reason to cache spirv output? */
3480 if (zs->info.stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated)
3488 zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs,
3489 nir_shader *nir, const struct zink_shader_key *key, const void *extra_data)
3491 struct zink_shader_info *sinfo = &zs->sinfo;
3492 bool need_optimize = false;
3493 bool inlined_uniforms = false;
3496 if (key->inline_uniforms) {
3497 NIR_PASS_V(nir, nir_inline_uniforms,
3498 nir->info.num_inlinable_uniforms,
3499 key->base.inlined_uniform_values,
3500 nir->info.inlinable_uniform_dw_offsets);
3502 inlined_uniforms = true;
3505 /* TODO: use a separate mem ctx here for ralloc */
3507 if (!screen->optimal_keys) {
3508 switch (zs->info.stage) {
3509 case MESA_SHADER_VERTEX: {
3510 uint32_t decomposed_attrs = 0, decomposed_attrs_without_w = 0;
3511 const struct zink_vs_key *vs_key = zink_vs_key(key);
3512 switch (vs_key->size) {
3514 decomposed_attrs = vs_key->u32.decomposed_attrs;
3515 decomposed_attrs_without_w = vs_key->u32.decomposed_attrs_without_w;
3518 decomposed_attrs = vs_key->u16.decomposed_attrs;
3519 decomposed_attrs_without_w = vs_key->u16.decomposed_attrs_without_w;
3522 decomposed_attrs = vs_key->u8.decomposed_attrs;
3523 decomposed_attrs_without_w = vs_key->u8.decomposed_attrs_without_w;
3527 if (decomposed_attrs || decomposed_attrs_without_w)
3528 NIR_PASS_V(nir, decompose_attribs, decomposed_attrs, decomposed_attrs_without_w);
3532 case MESA_SHADER_GEOMETRY:
3533 if (zink_gs_key(key)->lower_line_stipple) {
3534 NIR_PASS_V(nir, lower_line_stipple_gs, zink_gs_key(key)->line_rectangular);
3535 NIR_PASS_V(nir, nir_lower_var_copies);
3536 need_optimize = true;
3539 if (zink_gs_key(key)->lower_line_smooth) {
3540 NIR_PASS_V(nir, lower_line_smooth_gs);
3541 NIR_PASS_V(nir, nir_lower_var_copies);
3542 need_optimize = true;
3545 if (zink_gs_key(key)->lower_gl_point) {
3546 NIR_PASS_V(nir, lower_gl_point_gs);
3547 need_optimize = true;
3550 if (zink_gs_key(key)->lower_pv_mode) {
3551 NIR_PASS_V(nir, lower_pv_mode_gs, zink_gs_key(key)->lower_pv_mode);
3552 need_optimize = true; //TODO verify that this is required
3561 switch (zs->info.stage) {
3562 case MESA_SHADER_VERTEX:
3563 case MESA_SHADER_TESS_EVAL:
3564 case MESA_SHADER_GEOMETRY:
3565 if (zink_vs_key_base(key)->last_vertex_stage) {
3566 if (zs->sinfo.have_xfb)
3567 sinfo->last_vertex = true;
3569 if (!zink_vs_key_base(key)->clip_halfz && !screen->info.have_EXT_depth_clip_control) {
3570 NIR_PASS_V(nir, nir_lower_clip_halfz);
3572 if (zink_vs_key_base(key)->push_drawid) {
3573 NIR_PASS_V(nir, lower_drawid);
3576 if (zink_vs_key_base(key)->robust_access)
3577 NIR_PASS(need_optimize, nir, lower_txf_lod_robustness);
3579 case MESA_SHADER_FRAGMENT:
3580 if (zink_fs_key(key)->lower_line_smooth) {
3581 NIR_PASS_V(nir, lower_line_smooth_fs,
3582 zink_fs_key(key)->lower_line_stipple);
3583 need_optimize = true;
3584 } else if (zink_fs_key(key)->lower_line_stipple)
3585 NIR_PASS_V(nir, lower_line_stipple_fs);
3587 if (zink_fs_key(key)->lower_point_smooth) {
3588 NIR_PASS_V(nir, nir_lower_point_smooth);
3589 NIR_PASS_V(nir, nir_lower_discard_if, nir_lower_discard_if_to_cf);
3590 nir->info.fs.uses_discard = true;
3591 need_optimize = true;
3594 if (zink_fs_key(key)->robust_access)
3595 NIR_PASS(need_optimize, nir, lower_txf_lod_robustness);
3597 if (!zink_fs_key_base(key)->samples &&
3598 nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)) {
3599 /* VK will always use gl_SampleMask[] values even if sample count is 0,
3600 * so we need to skip this write here to mimic GL's behavior of ignoring it
3602 nir_foreach_shader_out_variable(var, nir) {
3603 if (var->data.location == FRAG_RESULT_SAMPLE_MASK)
3604 var->data.mode = nir_var_shader_temp;
3606 nir_fixup_deref_modes(nir);
3607 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
3608 need_optimize = true;
3610 if (zink_fs_key_base(key)->force_dual_color_blend && nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DATA1)) {
3611 NIR_PASS_V(nir, lower_dual_blend);
3613 if (zink_fs_key_base(key)->single_sample) {
3614 NIR_PASS_V(nir, nir_lower_single_sampled);
3616 if (zink_fs_key_base(key)->coord_replace_bits)
3617 NIR_PASS_V(nir, nir_lower_texcoord_replace, zink_fs_key_base(key)->coord_replace_bits, false, false);
3618 if (zink_fs_key_base(key)->point_coord_yinvert)
3619 NIR_PASS_V(nir, invert_point_coord);
3620 if (zink_fs_key_base(key)->force_persample_interp || zink_fs_key_base(key)->fbfetch_ms) {
3621 nir_foreach_shader_in_variable(var, nir)
3622 var->data.sample = true;
3623 nir->info.fs.uses_sample_qualifier = true;
3624 nir->info.fs.uses_sample_shading = true;
3626 if (zs->fs.legacy_shadow_mask && !key->base.needs_zs_shader_swizzle)
3627 NIR_PASS(need_optimize, nir, lower_zs_swizzle_tex, zink_fs_key_base(key)->shadow_needs_shader_swizzle ? extra_data : NULL, true);
3628 if (nir->info.fs.uses_fbfetch_output) {
3629 nir_variable *fbfetch = NULL;
3630 NIR_PASS_V(nir, lower_fbfetch, &fbfetch, zink_fs_key_base(key)->fbfetch_ms);
3631 /* old variable must be deleted to avoid spirv errors */
3632 fbfetch->data.mode = nir_var_shader_temp;
3633 nir_fixup_deref_modes(nir);
3634 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
3635 need_optimize = true;
3637 nir_foreach_shader_in_variable_safe(var, nir) {
3638 if (!is_texcoord(MESA_SHADER_FRAGMENT, var) || var->data.driver_location != -1)
3640 nir_shader_instructions_pass(nir, rewrite_read_as_0, nir_metadata_dominance, var);
3641 var->data.mode = nir_var_shader_temp;
3642 nir_fixup_deref_modes(nir);
3643 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
3644 need_optimize = true;
3647 case MESA_SHADER_COMPUTE:
3648 if (zink_cs_key(key)->robust_access)
3649 NIR_PASS(need_optimize, nir, lower_txf_lod_robustness);
3653 if (key->base.needs_zs_shader_swizzle) {
3655 NIR_PASS(need_optimize, nir, lower_zs_swizzle_tex, extra_data, false);
3657 if (key->base.nonseamless_cube_mask) {
3658 NIR_PASS_V(nir, zink_lower_cubemap_to_array, key->base.nonseamless_cube_mask);
3659 need_optimize = true;
3662 if (screen->driconf.inline_uniforms) {
3663 NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_global | nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared);
3664 NIR_PASS_V(nir, rewrite_bo_access, screen);
3665 NIR_PASS_V(nir, remove_bo_access, zs);
3666 need_optimize = true;
3668 if (inlined_uniforms) {
3669 optimize_nir(nir, zs);
3671 /* This must be done again. */
3672 NIR_PASS_V(nir, nir_io_add_const_offset_to_base, nir_var_shader_in |
3673 nir_var_shader_out);
3675 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
3676 if (impl->ssa_alloc > ZINK_ALWAYS_INLINE_LIMIT)
3677 zs->can_inline = false;
3678 } else if (need_optimize)
3679 optimize_nir(nir, zs);
3681 struct zink_shader_object obj = compile_module(screen, zs, nir, false);
3686 struct zink_shader_object
3687 zink_shader_compile_separate(struct zink_screen *screen, struct zink_shader *zs)
3689 nir_shader *nir = zink_shader_deserialize(screen, zs);
3690 int set = nir->info.stage == MESA_SHADER_FRAGMENT;
3691 unsigned offsets[4];
3692 zink_descriptor_shader_get_binding_offsets(zs, offsets);
3693 nir_foreach_variable_with_modes(var, nir, nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_uniform | nir_var_image) {
3694 if (var->data.bindless)
3696 var->data.descriptor_set = set;
3697 switch (var->data.mode) {
3698 case nir_var_mem_ubo:
3699 var->data.binding = !!var->data.driver_location;
3701 case nir_var_uniform:
3702 if (glsl_type_is_sampler(glsl_without_array(var->type)))
3703 var->data.binding += offsets[1];
3705 case nir_var_mem_ssbo:
3706 var->data.binding += offsets[2];
3709 var->data.binding += offsets[3];
3714 if (screen->driconf.inline_uniforms) {
3715 NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_global | nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared);
3716 NIR_PASS_V(nir, rewrite_bo_access, screen);
3717 NIR_PASS_V(nir, remove_bo_access, zs);
3719 optimize_nir(nir, zs);
3720 zink_descriptor_shader_init(screen, zs);
3721 struct zink_shader_object obj = compile_module(screen, zs, nir, true);
3727 lower_baseinstance_instr(nir_builder *b, nir_instr *instr, void *data)
3729 if (instr->type != nir_instr_type_intrinsic)
3731 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
3732 if (intr->intrinsic != nir_intrinsic_load_instance_id)
3734 b->cursor = nir_after_instr(instr);
3735 nir_ssa_def *def = nir_isub(b, &intr->dest.ssa, nir_load_base_instance(b));
3736 nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, def, def->parent_instr);
3741 lower_baseinstance(nir_shader *shader)
3743 if (shader->info.stage != MESA_SHADER_VERTEX)
3745 return nir_shader_instructions_pass(shader, lower_baseinstance_instr, nir_metadata_dominance, NULL);
3748 /* gl_nir_lower_buffers makes variables unusable for all UBO/SSBO access
3749 * so instead we delete all those broken variables and just make new ones
3752 unbreak_bos(nir_shader *shader, struct zink_shader *zs, bool needs_size)
3754 uint64_t max_ssbo_size = 0;
3755 uint64_t max_ubo_size = 0;
3756 uint64_t max_uniform_size = 0;
3758 if (!shader->info.num_ssbos && !shader->info.num_ubos)
3761 nir_foreach_variable_with_modes(var, shader, nir_var_mem_ssbo | nir_var_mem_ubo) {
3762 const struct glsl_type *type = glsl_without_array(var->type);
3763 if (type_is_counter(type))
3765 /* be conservative: use the bigger of the interface and variable types to ensure in-bounds access */
3766 unsigned size = glsl_count_attribute_slots(glsl_type_is_array(var->type) ? var->type : type, false);
3767 const struct glsl_type *interface_type = var->interface_type ? glsl_without_array(var->interface_type) : NULL;
3768 if (interface_type) {
3769 unsigned block_size = glsl_get_explicit_size(interface_type, true);
3770 if (glsl_get_length(interface_type) == 1) {
3771 /* handle bare unsized ssbo arrays: glsl_get_explicit_size always returns type-aligned sizes */
3772 const struct glsl_type *f = glsl_get_struct_field(interface_type, 0);
3773 if (glsl_type_is_array(f) && !glsl_array_size(f))
3777 block_size = DIV_ROUND_UP(block_size, sizeof(float) * 4);
3778 size = MAX2(size, block_size);
3781 if (var->data.mode == nir_var_mem_ubo) {
3782 if (var->data.driver_location)
3783 max_ubo_size = MAX2(max_ubo_size, size);
3785 max_uniform_size = MAX2(max_uniform_size, size);
3787 max_ssbo_size = MAX2(max_ssbo_size, size);
3788 if (interface_type) {
3789 if (glsl_type_is_unsized_array(glsl_get_struct_field(interface_type, glsl_get_length(interface_type) - 1)))
3793 var->data.mode = nir_var_shader_temp;
3795 nir_fixup_deref_modes(shader);
3796 NIR_PASS_V(shader, nir_remove_dead_variables, nir_var_shader_temp, NULL);
3797 optimize_nir(shader, NULL);
3799 struct glsl_struct_field field = {0};
3800 field.name = ralloc_strdup(shader, "base");
3801 if (shader->info.num_ubos) {
3802 if (shader->num_uniforms && zs->ubos_used & BITFIELD_BIT(0)) {
3803 field.type = glsl_array_type(glsl_uint_type(), max_uniform_size * 4, 4);
3804 nir_variable *var = nir_variable_create(shader, nir_var_mem_ubo,
3805 glsl_array_type(glsl_interface_type(&field, 1, GLSL_INTERFACE_PACKING_STD430, false, "struct"), 1, 0),
3807 var->interface_type = var->type;
3808 var->data.mode = nir_var_mem_ubo;
3809 var->data.driver_location = 0;
3812 unsigned num_ubos = shader->info.num_ubos - !!shader->info.first_ubo_is_default_ubo;
3813 uint32_t ubos_used = zs->ubos_used & ~BITFIELD_BIT(0);
3814 if (num_ubos && ubos_used) {
3815 field.type = glsl_array_type(glsl_uint_type(), max_ubo_size * 4, 4);
3816 /* shrink array as much as possible */
3817 unsigned first_ubo = ffs(ubos_used) - 2;
3818 assert(first_ubo < PIPE_MAX_CONSTANT_BUFFERS);
3819 num_ubos -= first_ubo;
3821 nir_variable *var = nir_variable_create(shader, nir_var_mem_ubo,
3822 glsl_array_type(glsl_struct_type(&field, 1, "struct", false), num_ubos, 0),
3824 var->interface_type = var->type;
3825 var->data.mode = nir_var_mem_ubo;
3826 var->data.driver_location = first_ubo + !!shader->info.first_ubo_is_default_ubo;
3829 if (shader->info.num_ssbos && zs->ssbos_used) {
3830 /* shrink array as much as possible */
3831 unsigned first_ssbo = ffs(zs->ssbos_used) - 1;
3832 assert(first_ssbo < PIPE_MAX_SHADER_BUFFERS);
3833 unsigned num_ssbos = shader->info.num_ssbos - first_ssbo;
3835 const struct glsl_type *ssbo_type = glsl_array_type(glsl_uint_type(), needs_size ? 0 : max_ssbo_size * 4, 4);
3836 field.type = ssbo_type;
3837 nir_variable *var = nir_variable_create(shader, nir_var_mem_ssbo,
3838 glsl_array_type(glsl_struct_type(&field, 1, "struct", false), num_ssbos, 0),
3840 var->interface_type = var->type;
3841 var->data.mode = nir_var_mem_ssbo;
3842 var->data.driver_location = first_ssbo;
3848 get_src_mask_ssbo(unsigned total, nir_src src)
3850 if (nir_src_is_const(src))
3851 return BITFIELD_BIT(nir_src_as_uint(src));
3852 return BITFIELD_MASK(total);
3856 get_src_mask_ubo(unsigned total, nir_src src)
3858 if (nir_src_is_const(src))
3859 return BITFIELD_BIT(nir_src_as_uint(src));
3860 return BITFIELD_MASK(total) & ~BITFIELD_BIT(0);
3864 analyze_io(struct zink_shader *zs, nir_shader *shader)
3867 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
3868 nir_foreach_block(block, impl) {
3869 nir_foreach_instr(instr, block) {
3870 if (shader->info.stage != MESA_SHADER_KERNEL && instr->type == nir_instr_type_tex) {
3871 /* gl_nir_lower_samplers_as_deref is where this would normally be set, but zink doesn't use it */
3872 nir_tex_instr *tex = nir_instr_as_tex(instr);
3873 nir_foreach_variable_with_modes(img, shader, nir_var_uniform) {
3874 if (glsl_type_is_sampler(glsl_without_array(img->type))) {
3875 unsigned size = glsl_type_is_array(img->type) ? glsl_get_aoa_size(img->type) : 1;
3876 if (tex->texture_index >= img->data.driver_location &&
3877 tex->texture_index < img->data.driver_location + size) {
3878 BITSET_SET_RANGE(shader->info.textures_used, img->data.driver_location, img->data.driver_location + (size - 1));
3885 if (instr->type != nir_instr_type_intrinsic)
3888 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
3889 switch (intrin->intrinsic) {
3890 case nir_intrinsic_store_ssbo:
3891 zs->ssbos_used |= get_src_mask_ssbo(shader->info.num_ssbos, intrin->src[1]);
3894 case nir_intrinsic_get_ssbo_size: {
3895 zs->ssbos_used |= get_src_mask_ssbo(shader->info.num_ssbos, intrin->src[0]);
3899 case nir_intrinsic_ssbo_atomic_fadd:
3900 case nir_intrinsic_ssbo_atomic_add:
3901 case nir_intrinsic_ssbo_atomic_imin:
3902 case nir_intrinsic_ssbo_atomic_umin:
3903 case nir_intrinsic_ssbo_atomic_imax:
3904 case nir_intrinsic_ssbo_atomic_umax:
3905 case nir_intrinsic_ssbo_atomic_and:
3906 case nir_intrinsic_ssbo_atomic_or:
3907 case nir_intrinsic_ssbo_atomic_xor:
3908 case nir_intrinsic_ssbo_atomic_exchange:
3909 case nir_intrinsic_ssbo_atomic_comp_swap:
3910 case nir_intrinsic_ssbo_atomic_fmin:
3911 case nir_intrinsic_ssbo_atomic_fmax:
3912 case nir_intrinsic_ssbo_atomic_fcomp_swap:
3913 case nir_intrinsic_load_ssbo:
3914 zs->ssbos_used |= get_src_mask_ssbo(shader->info.num_ssbos, intrin->src[0]);
3916 case nir_intrinsic_load_ubo:
3917 case nir_intrinsic_load_ubo_vec4:
3918 zs->ubos_used |= get_src_mask_ubo(shader->info.num_ubos, intrin->src[0]);
3928 struct zink_bindless_info {
3929 nir_variable *bindless[4];
3930 unsigned bindless_set;
3933 /* this is a "default" bindless texture used if the shader has no texture variables */
3934 static nir_variable *
3935 create_bindless_texture(nir_shader *nir, nir_tex_instr *tex, unsigned descriptor_set)
3937 unsigned binding = tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ? 1 : 0;
3940 const struct glsl_type *sampler_type = glsl_sampler_type(tex->sampler_dim, tex->is_shadow, tex->is_array, GLSL_TYPE_FLOAT);
3941 var = nir_variable_create(nir, nir_var_uniform, glsl_array_type(sampler_type, ZINK_MAX_BINDLESS_HANDLES, 0), "bindless_texture");
3942 var->data.descriptor_set = descriptor_set;
3943 var->data.driver_location = var->data.binding = binding;
3947 /* this is a "default" bindless image used if the shader has no image variables */
3948 static nir_variable *
3949 create_bindless_image(nir_shader *nir, enum glsl_sampler_dim dim, unsigned descriptor_set)
3951 unsigned binding = dim == GLSL_SAMPLER_DIM_BUF ? 3 : 2;
3954 const struct glsl_type *image_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
3955 var = nir_variable_create(nir, nir_var_image, glsl_array_type(image_type, ZINK_MAX_BINDLESS_HANDLES, 0), "bindless_image");
3956 var->data.descriptor_set = descriptor_set;
3957 var->data.driver_location = var->data.binding = binding;
3958 var->data.image.format = PIPE_FORMAT_R8G8B8A8_UNORM;
3962 /* rewrite bindless instructions as array deref instructions */
3964 lower_bindless_instr(nir_builder *b, nir_instr *in, void *data)
3966 struct zink_bindless_info *bindless = data;
3968 if (in->type == nir_instr_type_tex) {
3969 nir_tex_instr *tex = nir_instr_as_tex(in);
3970 int idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
3974 nir_variable *var = tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ? bindless->bindless[1] : bindless->bindless[0];
3976 var = create_bindless_texture(b->shader, tex, bindless->bindless_set);
3977 b->cursor = nir_before_instr(in);
3978 nir_deref_instr *deref = nir_build_deref_var(b, var);
3979 if (glsl_type_is_array(var->type))
3980 deref = nir_build_deref_array(b, deref, nir_u2uN(b, tex->src[idx].src.ssa, 32));
3981 nir_instr_rewrite_src_ssa(in, &tex->src[idx].src, &deref->dest.ssa);
3983 /* bindless sampling uses the variable type directly, which means the tex instr has to exactly
3984 * match up with it in contrast to normal sampler ops where things are a bit more flexible;
3985 * this results in cases where a shader is passed with sampler2DArray but the tex instr only has
3986 * 2 components, which explodes spirv compilation even though it doesn't trigger validation errors
3988 * to fix this, pad the coord src here and fix the tex instr so that ntv will do the "right" thing
3989 * - Warhammer 40k: Dawn of War III
3991 unsigned needed_components = glsl_get_sampler_coordinate_components(glsl_without_array(var->type));
3992 unsigned c = nir_tex_instr_src_index(tex, nir_tex_src_coord);
3993 unsigned coord_components = nir_src_num_components(tex->src[c].src);
3994 if (coord_components < needed_components) {
3995 nir_ssa_def *def = nir_pad_vector(b, tex->src[c].src.ssa, needed_components);
3996 nir_instr_rewrite_src_ssa(in, &tex->src[c].src, def);
3997 tex->coord_components = needed_components;
4001 if (in->type != nir_instr_type_intrinsic)
4003 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
4005 nir_intrinsic_op op;
4006 #define OP_SWAP(OP) \
4007 case nir_intrinsic_bindless_image_##OP: \
4008 op = nir_intrinsic_image_deref_##OP; \
4012 /* convert bindless intrinsics to deref intrinsics */
4013 switch (instr->intrinsic) {
4016 OP_SWAP(atomic_comp_swap)
4017 OP_SWAP(atomic_dec_wrap)
4018 OP_SWAP(atomic_exchange)
4019 OP_SWAP(atomic_fadd)
4020 OP_SWAP(atomic_fmax)
4021 OP_SWAP(atomic_fmin)
4022 OP_SWAP(atomic_imax)
4023 OP_SWAP(atomic_imin)
4024 OP_SWAP(atomic_inc_wrap)
4026 OP_SWAP(atomic_umax)
4027 OP_SWAP(atomic_umin)
4039 enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr);
4040 nir_variable *var = dim == GLSL_SAMPLER_DIM_BUF ? bindless->bindless[3] : bindless->bindless[2];
4042 var = create_bindless_image(b->shader, dim, bindless->bindless_set);
4043 instr->intrinsic = op;
4044 b->cursor = nir_before_instr(in);
4045 nir_deref_instr *deref = nir_build_deref_var(b, var);
4046 if (glsl_type_is_array(var->type))
4047 deref = nir_build_deref_array(b, deref, nir_u2uN(b, instr->src[0].ssa, 32));
4048 nir_instr_rewrite_src_ssa(in, &instr->src[0], &deref->dest.ssa);
4053 lower_bindless(nir_shader *shader, struct zink_bindless_info *bindless)
4055 if (!nir_shader_instructions_pass(shader, lower_bindless_instr, nir_metadata_dominance, bindless))
4057 nir_fixup_deref_modes(shader);
4058 NIR_PASS_V(shader, nir_remove_dead_variables, nir_var_shader_temp, NULL);
4059 optimize_nir(shader, NULL);
4063 /* convert shader image/texture io variables to int64 handles for bindless indexing */
4065 lower_bindless_io_instr(nir_builder *b, nir_instr *in, void *data)
4067 if (in->type != nir_instr_type_intrinsic)
4069 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
4070 if (instr->intrinsic != nir_intrinsic_load_deref &&
4071 instr->intrinsic != nir_intrinsic_store_deref)
4074 nir_deref_instr *src_deref = nir_src_as_deref(instr->src[0]);
4075 nir_variable *var = nir_deref_instr_get_variable(src_deref);
4076 if (var->data.bindless)
4078 if (var->data.mode != nir_var_shader_in && var->data.mode != nir_var_shader_out)
4080 if (!glsl_type_is_image(var->type) && !glsl_type_is_sampler(var->type))
4083 var->type = glsl_int64_t_type();
4084 var->data.bindless = 1;
4085 b->cursor = nir_before_instr(in);
4086 nir_deref_instr *deref = nir_build_deref_var(b, var);
4087 if (instr->intrinsic == nir_intrinsic_load_deref) {
4088 nir_ssa_def *def = nir_load_deref(b, deref);
4089 nir_instr_rewrite_src_ssa(in, &instr->src[0], def);
4090 nir_ssa_def_rewrite_uses(&instr->dest.ssa, def);
4092 nir_store_deref(b, deref, instr->src[1].ssa, nir_intrinsic_write_mask(instr));
4094 nir_instr_remove(in);
4095 nir_instr_remove(&src_deref->instr);
4100 lower_bindless_io(nir_shader *shader)
4102 return nir_shader_instructions_pass(shader, lower_bindless_io_instr, nir_metadata_dominance, NULL);
4106 zink_binding(gl_shader_stage stage, VkDescriptorType type, int index, bool compact_descriptors)
4108 if (stage == MESA_SHADER_NONE) {
4109 unreachable("not supported");
4111 unsigned base = stage;
4112 /* clamp compute bindings for better driver efficiency */
4113 if (gl_shader_stage_is_compute(stage))
4116 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
4117 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
4118 return base * 2 + !!index;
4120 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
4121 assert(stage == MESA_SHADER_KERNEL);
4123 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
4124 if (stage == MESA_SHADER_KERNEL) {
4125 assert(index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
4126 return index + PIPE_MAX_SAMPLERS;
4129 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
4130 assert(index < PIPE_MAX_SAMPLERS);
4131 assert(stage != MESA_SHADER_KERNEL);
4132 return (base * PIPE_MAX_SAMPLERS) + index;
4134 case VK_DESCRIPTOR_TYPE_SAMPLER:
4135 assert(index < PIPE_MAX_SAMPLERS);
4136 assert(stage == MESA_SHADER_KERNEL);
4139 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
4140 return base + (compact_descriptors * (ZINK_GFX_SHADER_COUNT * 2));
4142 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
4143 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
4144 assert(index < ZINK_MAX_SHADER_IMAGES);
4145 if (stage == MESA_SHADER_KERNEL)
4146 return index + (compact_descriptors ? (PIPE_MAX_SAMPLERS + PIPE_MAX_SHADER_SAMPLER_VIEWS) : 0);
4147 return (base * ZINK_MAX_SHADER_IMAGES) + index + (compact_descriptors * (ZINK_GFX_SHADER_COUNT * PIPE_MAX_SAMPLERS));
4150 unreachable("unexpected type");
4156 handle_bindless_var(nir_shader *nir, nir_variable *var, const struct glsl_type *type, struct zink_bindless_info *bindless)
4158 if (glsl_type_is_struct(type)) {
4159 for (unsigned i = 0; i < glsl_get_length(type); i++)
4160 handle_bindless_var(nir, var, glsl_get_struct_field(type, i), bindless);
4164 /* just a random scalar in a struct */
4165 if (!glsl_type_is_image(type) && !glsl_type_is_sampler(type))
4168 VkDescriptorType vktype = glsl_type_is_image(type) ? zink_image_type(type) : zink_sampler_type(type);
4171 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
4174 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
4177 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
4180 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
4184 unreachable("unknown");
4186 if (!bindless->bindless[binding]) {
4187 bindless->bindless[binding] = nir_variable_clone(var, nir);
4188 bindless->bindless[binding]->data.bindless = 0;
4189 bindless->bindless[binding]->data.descriptor_set = bindless->bindless_set;
4190 bindless->bindless[binding]->type = glsl_array_type(type, ZINK_MAX_BINDLESS_HANDLES, 0);
4191 bindless->bindless[binding]->data.driver_location = bindless->bindless[binding]->data.binding = binding;
4192 if (!bindless->bindless[binding]->data.image.format)
4193 bindless->bindless[binding]->data.image.format = PIPE_FORMAT_R8G8B8A8_UNORM;
4194 nir_shader_add_variable(nir, bindless->bindless[binding]);
4196 assert(glsl_get_sampler_dim(glsl_without_array(bindless->bindless[binding]->type)) == glsl_get_sampler_dim(glsl_without_array(var->type)));
4198 var->data.mode = nir_var_shader_temp;
4202 convert_1d_shadow_tex(nir_builder *b, nir_instr *instr, void *data)
4204 struct zink_screen *screen = data;
4205 if (instr->type != nir_instr_type_tex)
4207 nir_tex_instr *tex = nir_instr_as_tex(instr);
4208 if (tex->sampler_dim != GLSL_SAMPLER_DIM_1D || !tex->is_shadow)
4210 if (tex->is_sparse && screen->need_2D_sparse) {
4211 /* no known case of this exists: only nvidia can hit it, and nothing uses it */
4212 mesa_loge("unhandled/unsupported 1D sparse texture!");
4215 tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
4216 b->cursor = nir_before_instr(instr);
4217 tex->coord_components++;
4224 for (unsigned i = 0; i < ARRAY_SIZE(srcs); i++) {
4225 unsigned c = nir_tex_instr_src_index(tex, srcs[i]);
4228 if (tex->src[c].src.ssa->num_components == tex->coord_components)
4231 nir_ssa_def *zero = nir_imm_zero(b, 1, tex->src[c].src.ssa->bit_size);
4232 if (tex->src[c].src.ssa->num_components == 1)
4233 def = nir_vec2(b, tex->src[c].src.ssa, zero);
4235 def = nir_vec3(b, nir_channel(b, tex->src[c].src.ssa, 0), zero, nir_channel(b, tex->src[c].src.ssa, 1));
4236 nir_instr_rewrite_src_ssa(instr, &tex->src[c].src, def);
4238 b->cursor = nir_after_instr(instr);
4239 unsigned needed_components = nir_tex_instr_dest_size(tex);
4240 unsigned num_components = tex->dest.ssa.num_components;
4241 if (needed_components > num_components) {
4242 tex->dest.ssa.num_components = needed_components;
4243 assert(num_components < 3);
4244 /* take either xz or just x since this is promoted to 2D from 1D */
4245 uint32_t mask = num_components == 2 ? (1|4) : 1;
4246 nir_ssa_def *dst = nir_channels(b, &tex->dest.ssa, mask);
4247 nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, dst, dst->parent_instr);
4253 lower_1d_shadow(nir_shader *shader, struct zink_screen *screen)
4256 nir_foreach_variable_with_modes(var, shader, nir_var_uniform | nir_var_image) {
4257 const struct glsl_type *type = glsl_without_array(var->type);
4258 unsigned length = glsl_get_length(var->type);
4259 if (!glsl_type_is_sampler(type) || !glsl_sampler_type_is_shadow(type) || glsl_get_sampler_dim(type) != GLSL_SAMPLER_DIM_1D)
4261 const struct glsl_type *sampler = glsl_sampler_type(GLSL_SAMPLER_DIM_2D, true, glsl_sampler_type_is_array(type), glsl_get_sampler_result_type(type));
4262 var->type = type != var->type ? glsl_array_type(sampler, length, glsl_get_explicit_stride(var->type)) : sampler;
4267 nir_shader_instructions_pass(shader, convert_1d_shadow_tex, nir_metadata_dominance, screen);
4272 scan_nir(struct zink_screen *screen, nir_shader *shader, struct zink_shader *zs)
4274 nir_foreach_function(function, shader) {
4275 if (!function->impl)
4277 nir_foreach_block_safe(block, function->impl) {
4278 nir_foreach_instr_safe(instr, block) {
4279 if (instr->type == nir_instr_type_tex) {
4280 nir_tex_instr *tex = nir_instr_as_tex(instr);
4281 zs->sinfo.have_sparse |= tex->is_sparse;
4283 if (instr->type != nir_instr_type_intrinsic)
4285 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
4286 if (intr->intrinsic == nir_intrinsic_image_deref_load ||
4287 intr->intrinsic == nir_intrinsic_image_deref_sparse_load ||
4288 intr->intrinsic == nir_intrinsic_image_deref_store ||
4289 intr->intrinsic == nir_intrinsic_image_deref_atomic_add ||
4290 intr->intrinsic == nir_intrinsic_image_deref_atomic_imin ||
4291 intr->intrinsic == nir_intrinsic_image_deref_atomic_umin ||
4292 intr->intrinsic == nir_intrinsic_image_deref_atomic_imax ||
4293 intr->intrinsic == nir_intrinsic_image_deref_atomic_umax ||
4294 intr->intrinsic == nir_intrinsic_image_deref_atomic_and ||
4295 intr->intrinsic == nir_intrinsic_image_deref_atomic_or ||
4296 intr->intrinsic == nir_intrinsic_image_deref_atomic_xor ||
4297 intr->intrinsic == nir_intrinsic_image_deref_atomic_exchange ||
4298 intr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap ||
4299 intr->intrinsic == nir_intrinsic_image_deref_atomic_fadd ||
4300 intr->intrinsic == nir_intrinsic_image_deref_size ||
4301 intr->intrinsic == nir_intrinsic_image_deref_samples ||
4302 intr->intrinsic == nir_intrinsic_image_deref_format ||
4303 intr->intrinsic == nir_intrinsic_image_deref_order) {
4306 nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));
4308 /* Structs have been lowered already, so get_aoa_size is sufficient. */
4309 const unsigned size =
4310 glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : 1;
4311 BITSET_SET_RANGE(shader->info.images_used, var->data.binding,
4312 var->data.binding + (MAX2(size, 1) - 1));
4314 if (intr->intrinsic == nir_intrinsic_is_sparse_texels_resident ||
4315 intr->intrinsic == nir_intrinsic_image_deref_sparse_load)
4316 zs->sinfo.have_sparse = true;
4318 static bool warned = false;
4319 if (!screen->info.have_EXT_shader_atomic_float && !screen->is_cpu && !warned) {
4320 switch (intr->intrinsic) {
4321 case nir_intrinsic_image_deref_atomic_add: {
4322 nir_variable *var = nir_intrinsic_get_var(intr, 0);
4323 if (util_format_is_float(var->data.image.format))
4324 fprintf(stderr, "zink: Vulkan driver missing VK_EXT_shader_atomic_float but attempting to do atomic ops!\n");
4337 is_residency_code(nir_ssa_def *src)
4339 nir_instr *parent = src->parent_instr;
4341 if (parent->type == nir_instr_type_intrinsic) {
4342 ASSERTED nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent);
4343 assert(intr->intrinsic == nir_intrinsic_is_sparse_texels_resident);
4346 if (parent->type == nir_instr_type_tex)
4348 assert(parent->type == nir_instr_type_alu);
4349 nir_alu_instr *alu = nir_instr_as_alu(parent);
4350 parent = alu->src[0].src.ssa->parent_instr;
4355 lower_sparse_instr(nir_builder *b, nir_instr *in, void *data)
4357 if (in->type != nir_instr_type_intrinsic)
4359 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
4360 if (instr->intrinsic == nir_intrinsic_sparse_residency_code_and) {
4361 b->cursor = nir_before_instr(&instr->instr);
4363 if (is_residency_code(instr->src[0].ssa))
4364 src0 = nir_is_sparse_texels_resident(b, 1, instr->src[0].ssa);
4366 src0 = instr->src[0].ssa;
4368 if (is_residency_code(instr->src[1].ssa))
4369 src1 = nir_is_sparse_texels_resident(b, 1, instr->src[1].ssa);
4371 src1 = instr->src[1].ssa;
4372 nir_ssa_def *def = nir_iand(b, src0, src1);
4373 nir_ssa_def_rewrite_uses_after(&instr->dest.ssa, def, in);
4374 nir_instr_remove(in);
4377 if (instr->intrinsic != nir_intrinsic_is_sparse_texels_resident)
4380 /* vulkan vec can only be a vec4, but this is (maybe) vec5,
4381 * so just rewrite as the first component since ntv is going to use a different
4382 * method for storing the residency value anyway
4384 b->cursor = nir_before_instr(&instr->instr);
4385 nir_instr *parent = instr->src[0].ssa->parent_instr;
4386 if (is_residency_code(instr->src[0].ssa)) {
4387 assert(parent->type == nir_instr_type_alu);
4388 nir_alu_instr *alu = nir_instr_as_alu(parent);
4389 nir_ssa_def_rewrite_uses_after(instr->src[0].ssa, nir_channel(b, alu->src[0].src.ssa, 0), parent);
4390 nir_instr_remove(parent);
4393 if (parent->type == nir_instr_type_intrinsic) {
4394 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent);
4395 assert(intr->intrinsic == nir_intrinsic_is_sparse_texels_resident);
4396 src = intr->src[0].ssa;
4398 assert(parent->type == nir_instr_type_alu);
4399 nir_alu_instr *alu = nir_instr_as_alu(parent);
4400 src = alu->src[0].src.ssa;
4402 if (instr->dest.ssa.bit_size != 32) {
4403 if (instr->dest.ssa.bit_size == 1)
4404 src = nir_ieq_imm(b, src, 1);
4406 src = nir_u2uN(b, src, instr->dest.ssa.bit_size);
4408 nir_ssa_def_rewrite_uses(&instr->dest.ssa, src);
4409 nir_instr_remove(in);
4415 lower_sparse(nir_shader *shader)
4417 return nir_shader_instructions_pass(shader, lower_sparse_instr, nir_metadata_dominance, NULL);
4421 match_tex_dests_instr(nir_builder *b, nir_instr *in, void *data)
4423 if (in->type != nir_instr_type_tex)
4425 nir_tex_instr *tex = nir_instr_as_tex(in);
4426 if (tex->op == nir_texop_txs || tex->op == nir_texop_lod)
4428 int handle = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
4429 nir_variable *var = NULL;
4431 var = nir_deref_instr_get_variable(nir_src_as_deref(tex->src[handle].src));
4433 nir_foreach_variable_with_modes(img, b->shader, nir_var_uniform) {
4434 if (glsl_type_is_sampler(glsl_without_array(img->type))) {
4435 unsigned size = glsl_type_is_array(img->type) ? glsl_get_aoa_size(img->type) : 1;
4436 if (tex->texture_index >= img->data.driver_location &&
4437 tex->texture_index < img->data.driver_location + size) {
4444 return !!rewrite_tex_dest(b, tex, var, data);
4448 match_tex_dests(nir_shader *shader, struct zink_shader *zs)
4450 return nir_shader_instructions_pass(shader, match_tex_dests_instr, nir_metadata_dominance, zs);
4454 split_bitfields_instr(nir_builder *b, nir_instr *in, void *data)
4456 if (in->type != nir_instr_type_alu)
4458 nir_alu_instr *alu = nir_instr_as_alu(in);
4460 case nir_op_ubitfield_extract:
4461 case nir_op_ibitfield_extract:
4462 case nir_op_bitfield_insert:
4467 unsigned num_components = nir_dest_num_components(alu->dest.dest);
4468 if (num_components == 1)
4470 b->cursor = nir_before_instr(in);
4471 nir_ssa_def *dests[NIR_MAX_VEC_COMPONENTS];
4472 for (unsigned i = 0; i < num_components; i++) {
4473 if (alu->op == nir_op_bitfield_insert)
4474 dests[i] = nir_bitfield_insert(b,
4475 nir_channel(b, alu->src[0].src.ssa, alu->src[0].swizzle[i]),
4476 nir_channel(b, alu->src[1].src.ssa, alu->src[1].swizzle[i]),
4477 nir_channel(b, alu->src[2].src.ssa, alu->src[2].swizzle[i]),
4478 nir_channel(b, alu->src[3].src.ssa, alu->src[3].swizzle[i]));
4479 else if (alu->op == nir_op_ubitfield_extract)
4480 dests[i] = nir_ubitfield_extract(b,
4481 nir_channel(b, alu->src[0].src.ssa, alu->src[0].swizzle[i]),
4482 nir_channel(b, alu->src[1].src.ssa, alu->src[1].swizzle[i]),
4483 nir_channel(b, alu->src[2].src.ssa, alu->src[2].swizzle[i]));
4485 dests[i] = nir_ibitfield_extract(b,
4486 nir_channel(b, alu->src[0].src.ssa, alu->src[0].swizzle[i]),
4487 nir_channel(b, alu->src[1].src.ssa, alu->src[1].swizzle[i]),
4488 nir_channel(b, alu->src[2].src.ssa, alu->src[2].swizzle[i]));
4490 nir_ssa_def *dest = nir_vec(b, dests, num_components);
4491 nir_ssa_def_rewrite_uses_after(&alu->dest.dest.ssa, dest, in);
4492 nir_instr_remove(in);
4498 split_bitfields(nir_shader *shader)
4500 return nir_shader_instructions_pass(shader, split_bitfields_instr, nir_metadata_dominance, NULL);
4504 rewrite_cl_derefs(nir_shader *nir, nir_variable *var)
4506 nir_foreach_function(function, nir) {
4507 nir_foreach_block(block, function->impl) {
4508 nir_foreach_instr_safe(instr, block) {
4509 if (instr->type != nir_instr_type_deref)
4511 nir_deref_instr *deref = nir_instr_as_deref(instr);
4512 nir_variable *img = nir_deref_instr_get_variable(deref);
4515 if (glsl_type_is_array(var->type)) {
4516 if (deref->deref_type == nir_deref_type_array)
4517 deref->type = glsl_without_array(var->type);
4519 deref->type = var->type;
4521 deref->type = var->type;
4529 type_image(nir_shader *nir, nir_variable *var)
4531 nir_foreach_function(function, nir) {
4532 nir_foreach_block(block, function->impl) {
4533 nir_foreach_instr_safe(instr, block) {
4534 if (instr->type != nir_instr_type_intrinsic)
4536 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
4537 if (intr->intrinsic == nir_intrinsic_image_deref_load ||
4538 intr->intrinsic == nir_intrinsic_image_deref_sparse_load ||
4539 intr->intrinsic == nir_intrinsic_image_deref_store ||
4540 intr->intrinsic == nir_intrinsic_image_deref_atomic_add ||
4541 intr->intrinsic == nir_intrinsic_image_deref_atomic_imin ||
4542 intr->intrinsic == nir_intrinsic_image_deref_atomic_umin ||
4543 intr->intrinsic == nir_intrinsic_image_deref_atomic_imax ||
4544 intr->intrinsic == nir_intrinsic_image_deref_atomic_umax ||
4545 intr->intrinsic == nir_intrinsic_image_deref_atomic_and ||
4546 intr->intrinsic == nir_intrinsic_image_deref_atomic_or ||
4547 intr->intrinsic == nir_intrinsic_image_deref_atomic_xor ||
4548 intr->intrinsic == nir_intrinsic_image_deref_atomic_exchange ||
4549 intr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap ||
4550 intr->intrinsic == nir_intrinsic_image_deref_atomic_fadd ||
4551 intr->intrinsic == nir_intrinsic_image_deref_samples ||
4552 intr->intrinsic == nir_intrinsic_image_deref_format ||
4553 intr->intrinsic == nir_intrinsic_image_deref_order) {
4554 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
4555 nir_variable *img = nir_deref_instr_get_variable(deref);
4558 nir_alu_type alu_type = nir_intrinsic_src_type(intr);
4559 const struct glsl_type *type = glsl_without_array(var->type);
4560 if (glsl_get_sampler_result_type(type) != GLSL_TYPE_VOID) {
4561 assert(glsl_get_sampler_result_type(type) == nir_get_glsl_base_type_for_nir_type(alu_type));
4564 const struct glsl_type *img_type = glsl_image_type(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type), nir_get_glsl_base_type_for_nir_type(alu_type));
4565 if (glsl_type_is_array(var->type))
4566 img_type = glsl_array_type(img_type, glsl_array_size(var->type), glsl_get_explicit_stride(var->type));
4567 var->type = img_type;
4568 rewrite_cl_derefs(nir, var);
4574 nir_foreach_function(function, nir) {
4575 nir_foreach_block(block, function->impl) {
4576 nir_foreach_instr_safe(instr, block) {
4577 if (instr->type != nir_instr_type_intrinsic)
4579 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
4580 if (intr->intrinsic != nir_intrinsic_image_deref_size)
4582 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
4583 nir_variable *img = nir_deref_instr_get_variable(deref);
4586 nir_alu_type alu_type = nir_type_uint32;
4587 const struct glsl_type *type = glsl_without_array(var->type);
4588 if (glsl_get_sampler_result_type(type) != GLSL_TYPE_VOID) {
4591 const struct glsl_type *img_type = glsl_image_type(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type), nir_get_glsl_base_type_for_nir_type(alu_type));
4592 if (glsl_type_is_array(var->type))
4593 img_type = glsl_array_type(img_type, glsl_array_size(var->type), glsl_get_explicit_stride(var->type));
4594 var->type = img_type;
4595 rewrite_cl_derefs(nir, var);
4600 var->data.mode = nir_var_shader_temp;
4603 static nir_variable *
4604 find_sampler_var(nir_shader *nir, unsigned texture_index)
4606 nir_foreach_variable_with_modes(var, nir, nir_var_uniform) {
4607 unsigned size = glsl_type_is_array(var->type) ? glsl_array_size(var->type) : 1;
4608 if ((glsl_type_is_texture(glsl_without_array(var->type)) || glsl_type_is_sampler(glsl_without_array(var->type))) &&
4609 (var->data.binding == texture_index || (var->data.binding < texture_index && var->data.binding + size > texture_index)))
4616 type_sampler_vars(nir_shader *nir, unsigned *sampler_mask)
4618 bool progress = false;
4619 nir_foreach_function(function, nir) {
4620 nir_foreach_block(block, function->impl) {
4621 nir_foreach_instr(instr, block) {
4622 if (instr->type != nir_instr_type_tex)
4624 nir_tex_instr *tex = nir_instr_as_tex(instr);
4628 case nir_texop_query_levels:
4629 case nir_texop_texture_samples:
4630 case nir_texop_samples_identical:
4635 *sampler_mask |= BITFIELD_BIT(tex->sampler_index);
4636 nir_variable *var = find_sampler_var(nir, tex->texture_index);
4638 if (glsl_get_sampler_result_type(glsl_without_array(var->type)) != GLSL_TYPE_VOID)
4640 const struct glsl_type *img_type = glsl_sampler_type(glsl_get_sampler_dim(glsl_without_array(var->type)), tex->is_shadow, tex->is_array, nir_get_glsl_base_type_for_nir_type(tex->dest_type));
4641 unsigned size = glsl_type_is_array(var->type) ? glsl_array_size(var->type) : 1;
4643 img_type = glsl_array_type(img_type, size, 0);
4644 var->type = img_type;
4649 nir_foreach_function(function, nir) {
4650 nir_foreach_block(block, function->impl) {
4651 nir_foreach_instr(instr, block) {
4652 if (instr->type != nir_instr_type_tex)
4654 nir_tex_instr *tex = nir_instr_as_tex(instr);
4658 case nir_texop_query_levels:
4659 case nir_texop_texture_samples:
4660 case nir_texop_samples_identical:
4665 *sampler_mask |= BITFIELD_BIT(tex->sampler_index);
4666 nir_variable *var = find_sampler_var(nir, tex->texture_index);
4668 if (glsl_get_sampler_result_type(glsl_without_array(var->type)) != GLSL_TYPE_VOID)
4670 const struct glsl_type *img_type = glsl_sampler_type(glsl_get_sampler_dim(glsl_without_array(var->type)), tex->is_shadow, tex->is_array, nir_get_glsl_base_type_for_nir_type(tex->dest_type));
4671 unsigned size = glsl_type_is_array(var->type) ? glsl_array_size(var->type) : 1;
4673 img_type = glsl_array_type(img_type, size, 0);
4674 var->type = img_type;
4683 delete_samplers(nir_shader *nir)
4685 bool progress = false;
4686 nir_foreach_variable_with_modes(var, nir, nir_var_uniform) {
4687 if (glsl_type_is_sampler(glsl_without_array(var->type))) {
4688 var->data.mode = nir_var_shader_temp;
4696 type_images(nir_shader *nir, unsigned *sampler_mask)
4698 bool progress = false;
4699 progress |= delete_samplers(nir);
4700 progress |= type_sampler_vars(nir, sampler_mask);
4701 nir_foreach_variable_with_modes(var, nir, nir_var_image) {
4702 type_image(nir, var);
4708 /* attempt to assign io for separate shaders */
4710 fixup_io_locations(nir_shader *nir)
4712 nir_variable_mode mode = nir->info.stage == MESA_SHADER_FRAGMENT ? nir_var_shader_in : nir_var_shader_out;
4713 /* i/o interface blocks are required to be EXACT matches between stages:
4714 * iterate over all locations and set locations incrementally
4717 for (unsigned i = 0; i < VARYING_SLOT_MAX; i++) {
4718 if (nir_slot_is_sysval_output(i, MESA_SHADER_NONE))
4720 nir_variable *var = nir_find_variable_with_location(nir, mode, i);
4722 /* locations used between stages are not required to be contiguous */
4723 if (i >= VARYING_SLOT_VAR0)
4728 /* ensure variable is given enough slots */
4729 if (nir_is_arrayed_io(var, nir->info.stage))
4730 size = glsl_count_vec4_slots(glsl_get_array_element(var->type), false, false);
4732 size = glsl_count_vec4_slots(var->type, false, false);
4733 var->data.driver_location = slot;
4735 /* ensure the consumed slots aren't double iterated */
4742 zink_flat_flags(struct nir_shader *shader)
4744 uint32_t flat_flags = 0, c = 0;
4745 nir_foreach_shader_in_variable(var, shader) {
4746 if (var->data.interpolation == INTERP_MODE_FLAT)
4747 flat_flags |= 1u << (c++);
4753 struct zink_shader *
4754 zink_shader_create(struct zink_screen *screen, struct nir_shader *nir,
4755 const struct pipe_stream_output_info *so_info)
4757 struct zink_shader *ret = rzalloc(NULL, struct zink_shader);
4758 bool have_psiz = false;
4760 ret->has_edgeflags = nir->info.stage == MESA_SHADER_VERTEX &&
4761 nir_find_variable_with_location(nir, nir_var_shader_out, VARYING_SLOT_EDGE);
4763 ret->sinfo.have_vulkan_memory_model = screen->info.have_KHR_vulkan_memory_model;
4764 ret->sinfo.bindless_set_idx = screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS];
4766 util_queue_fence_init(&ret->precompile.fence);
4767 util_dynarray_init(&ret->pipeline_libs, ret);
4768 ret->hash = _mesa_hash_pointer(ret);
4770 ret->programs = _mesa_pointer_set_create(NULL);
4771 simple_mtx_init(&ret->lock, mtx_plain);
4773 nir_variable_mode indirect_derefs_modes = 0;
4774 if (nir->info.stage == MESA_SHADER_TESS_CTRL ||
4775 nir->info.stage == MESA_SHADER_TESS_EVAL)
4776 indirect_derefs_modes |= nir_var_shader_in | nir_var_shader_out;
4778 NIR_PASS_V(nir, nir_lower_indirect_derefs, indirect_derefs_modes,
4781 if (nir->info.stage < MESA_SHADER_COMPUTE)
4782 create_gfx_pushconst(nir);
4784 if (nir->info.stage == MESA_SHADER_TESS_CTRL ||
4785 nir->info.stage == MESA_SHADER_TESS_EVAL)
4786 NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
4788 if (nir->info.stage < MESA_SHADER_FRAGMENT)
4789 have_psiz = check_psiz(nir);
4790 if (nir->info.stage == MESA_SHADER_FRAGMENT)
4791 ret->flat_flags = zink_flat_flags(nir);
4793 if (!gl_shader_stage_is_compute(nir->info.stage) && nir->info.separate_shader)
4794 NIR_PASS_V(nir, fixup_io_locations);
4796 NIR_PASS_V(nir, lower_basevertex);
4797 NIR_PASS_V(nir, nir_lower_regs_to_ssa);
4798 NIR_PASS_V(nir, lower_baseinstance);
4799 NIR_PASS_V(nir, lower_sparse);
4800 NIR_PASS_V(nir, split_bitfields);
4801 NIR_PASS_V(nir, nir_lower_frexp); /* TODO: Use the spirv instructions for this. */
4803 if (screen->info.have_EXT_shader_demote_to_helper_invocation) {
4804 NIR_PASS_V(nir, nir_lower_discard_or_demote,
4805 screen->driconf.glsl_correct_derivatives_after_discard ||
4806 nir->info.use_legacy_math_rules);
4809 if (screen->need_2D_zs)
4810 NIR_PASS_V(nir, lower_1d_shadow, screen);
4813 nir_lower_subgroups_options subgroup_options = {0};
4814 subgroup_options.lower_to_scalar = true;
4815 subgroup_options.subgroup_size = screen->info.props11.subgroupSize;
4816 subgroup_options.ballot_bit_size = 32;
4817 subgroup_options.ballot_components = 4;
4818 subgroup_options.lower_subgroup_masks = true;
4819 if (!(screen->info.subgroup.supportedStages & mesa_to_vk_shader_stage(clamp_stage(&nir->info)))) {
4820 subgroup_options.subgroup_size = 1;
4821 subgroup_options.lower_vote_trivial = true;
4823 NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_options);
4826 if (so_info && so_info->num_outputs)
4827 NIR_PASS_V(nir, split_blocks);
4829 optimize_nir(nir, NULL);
4830 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
4831 NIR_PASS_V(nir, nir_lower_discard_if, (nir_lower_discard_if_to_cf |
4832 nir_lower_demote_if_to_cf |
4833 nir_lower_terminate_if_to_cf));
4834 NIR_PASS_V(nir, nir_lower_fragcolor,
4835 nir->info.fs.color_is_dual_source ? 1 : 8);
4836 NIR_PASS_V(nir, lower_64bit_vertex_attribs);
4837 bool needs_size = analyze_io(ret, nir);
4838 NIR_PASS_V(nir, unbreak_bos, ret, needs_size);
4839 /* run in compile if there could be inlined uniforms */
4840 if (!screen->driconf.inline_uniforms && !nir->info.num_inlinable_uniforms) {
4841 NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_global | nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared);
4842 NIR_PASS_V(nir, rewrite_bo_access, screen);
4843 NIR_PASS_V(nir, remove_bo_access, ret);
4846 if (zink_debug & ZINK_DEBUG_NIR) {
4847 fprintf(stderr, "NIR shader:\n---8<---\n");
4848 nir_print_shader(nir, stderr);
4849 fprintf(stderr, "---8<---\n");
4852 struct zink_bindless_info bindless = {0};
4853 bindless.bindless_set = screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS];
4854 bool has_bindless_io = false;
4855 nir_foreach_variable_with_modes(var, nir, nir_var_shader_in | nir_var_shader_out) {
4856 var->data.is_xfb = false;
4857 if (glsl_type_is_image(var->type) || glsl_type_is_sampler(var->type)) {
4858 has_bindless_io = true;
4862 if (has_bindless_io)
4863 NIR_PASS_V(nir, lower_bindless_io);
4865 optimize_nir(nir, NULL);
4868 scan_nir(screen, nir, ret);
4869 unsigned sampler_mask = 0;
4870 if (nir->info.stage == MESA_SHADER_KERNEL) {
4871 NIR_PASS_V(nir, type_images, &sampler_mask);
4872 enum zink_descriptor_type ztype = ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW;
4873 VkDescriptorType vktype = VK_DESCRIPTOR_TYPE_SAMPLER;
4874 u_foreach_bit(s, sampler_mask) {
4875 ret->bindings[ztype][ret->num_bindings[ztype]].index = s;
4876 ret->bindings[ztype][ret->num_bindings[ztype]].binding = zink_binding(MESA_SHADER_KERNEL, vktype, s, screen->compact_descriptors);
4877 ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype;
4878 ret->bindings[ztype][ret->num_bindings[ztype]].size = 1;
4879 ret->num_bindings[ztype]++;
4881 ret->sinfo.sampler_mask = sampler_mask;
4884 unsigned ubo_binding_mask = 0;
4885 unsigned ssbo_binding_mask = 0;
4886 foreach_list_typed_reverse_safe(nir_variable, var, node, &nir->variables) {
4887 if (_nir_shader_variable_has_mode(var, nir_var_uniform |
4890 nir_var_mem_ssbo)) {
4891 enum zink_descriptor_type ztype;
4892 const struct glsl_type *type = glsl_without_array(var->type);
4893 if (var->data.mode == nir_var_mem_ubo) {
4894 ztype = ZINK_DESCRIPTOR_TYPE_UBO;
4895 /* buffer 0 is a push descriptor */
4896 var->data.descriptor_set = !!var->data.driver_location;
4897 var->data.binding = !var->data.driver_location ? clamp_stage(&nir->info) :
4898 zink_binding(nir->info.stage,
4899 VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
4900 var->data.driver_location,
4901 screen->compact_descriptors);
4902 assert(var->data.driver_location || var->data.binding < 10);
4903 VkDescriptorType vktype = !var->data.driver_location ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC : VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
4904 int binding = var->data.binding;
4906 if (!var->data.driver_location) {
4907 ret->has_uniforms = true;
4908 } else if (!(ubo_binding_mask & BITFIELD_BIT(binding))) {
4909 ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location;
4910 ret->bindings[ztype][ret->num_bindings[ztype]].binding = binding;
4911 ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype;
4912 ret->bindings[ztype][ret->num_bindings[ztype]].size = glsl_get_length(var->type);
4913 assert(ret->bindings[ztype][ret->num_bindings[ztype]].size);
4914 ret->num_bindings[ztype]++;
4915 ubo_binding_mask |= BITFIELD_BIT(binding);
4917 } else if (var->data.mode == nir_var_mem_ssbo) {
4918 ztype = ZINK_DESCRIPTOR_TYPE_SSBO;
4919 var->data.descriptor_set = screen->desc_set_id[ztype];
4920 var->data.binding = zink_binding(nir->info.stage,
4921 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
4922 var->data.driver_location,
4923 screen->compact_descriptors);
4924 if (!(ssbo_binding_mask & BITFIELD_BIT(var->data.binding))) {
4925 ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location;
4926 ret->bindings[ztype][ret->num_bindings[ztype]].binding = var->data.binding;
4927 ret->bindings[ztype][ret->num_bindings[ztype]].type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
4928 ret->bindings[ztype][ret->num_bindings[ztype]].size = glsl_get_length(var->type);
4929 assert(ret->bindings[ztype][ret->num_bindings[ztype]].size);
4930 ret->num_bindings[ztype]++;
4931 ssbo_binding_mask |= BITFIELD_BIT(var->data.binding);
4934 assert(var->data.mode == nir_var_uniform ||
4935 var->data.mode == nir_var_image);
4936 if (var->data.bindless) {
4937 ret->bindless = true;
4938 handle_bindless_var(nir, var, type, &bindless);
4939 } else if (glsl_type_is_sampler(type) || glsl_type_is_image(type)) {
4940 VkDescriptorType vktype = glsl_type_is_image(type) ? zink_image_type(type) : zink_sampler_type(type);
4941 if (nir->info.stage == MESA_SHADER_KERNEL && vktype == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
4942 vktype = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
4943 ztype = zink_desc_type_from_vktype(vktype);
4944 if (vktype == VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER)
4945 ret->num_texel_buffers++;
4946 var->data.driver_location = var->data.binding;
4947 var->data.descriptor_set = screen->desc_set_id[ztype];
4948 var->data.binding = zink_binding(nir->info.stage, vktype, var->data.driver_location, screen->compact_descriptors);
4949 ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location;
4950 ret->bindings[ztype][ret->num_bindings[ztype]].binding = var->data.binding;
4951 ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype;
4952 if (glsl_type_is_array(var->type))
4953 ret->bindings[ztype][ret->num_bindings[ztype]].size = glsl_get_aoa_size(var->type);
4955 ret->bindings[ztype][ret->num_bindings[ztype]].size = 1;
4956 ret->num_bindings[ztype]++;
4957 } else if (var->data.mode == nir_var_uniform) {
4958 /* this is a dead uniform */
4960 exec_node_remove(&var->node);
4965 bool bindless_lowered = false;
4966 NIR_PASS(bindless_lowered, nir, lower_bindless, &bindless);
4967 ret->bindless |= bindless_lowered;
4969 if (!screen->info.feats.features.shaderInt64 || !screen->info.feats.features.shaderFloat64)
4970 NIR_PASS_V(nir, lower_64bit_vars, screen->info.feats.features.shaderInt64);
4971 if (nir->info.stage != MESA_SHADER_KERNEL)
4972 NIR_PASS_V(nir, match_tex_dests, ret);
4974 if (!nir->info.internal)
4975 nir_foreach_shader_out_variable(var, nir)
4976 var->data.explicit_xfb_buffer = 0;
4977 if (so_info && so_info->num_outputs)
4978 update_so_info(ret, nir, so_info, nir->info.outputs_written, have_psiz);
4979 else if (have_psiz) {
4980 bool have_fake_psiz = false;
4981 nir_variable *psiz = NULL;
4982 nir_foreach_shader_out_variable(var, nir) {
4983 if (var->data.location == VARYING_SLOT_PSIZ) {
4984 if (!var->data.explicit_location)
4985 have_fake_psiz = true;
4990 if (have_fake_psiz && psiz) {
4991 psiz->data.mode = nir_var_shader_temp;
4992 nir_fixup_deref_modes(nir);
4993 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
4996 zink_shader_serialize_blob(nir, &ret->blob);
4997 memcpy(&ret->info, &nir->info, sizeof(nir->info));
4999 ret->can_inline = true;
5005 zink_shader_finalize(struct pipe_screen *pscreen, void *nirptr)
5007 struct zink_screen *screen = zink_screen(pscreen);
5008 nir_shader *nir = nirptr;
5010 nir_lower_tex_options tex_opts = {
5011 .lower_invalid_implicit_lod = true,
5014 Sampled Image must be an object whose type is OpTypeSampledImage.
5015 The Dim operand of the underlying OpTypeImage must be 1D, 2D, 3D,
5016 or Rect, and the Arrayed and MS operands must be 0.
5017 - SPIRV, OpImageSampleProj* opcodes
5019 tex_opts.lower_txp = BITFIELD_BIT(GLSL_SAMPLER_DIM_CUBE) |
5020 BITFIELD_BIT(GLSL_SAMPLER_DIM_MS);
5021 tex_opts.lower_txp_array = true;
5022 if (!screen->info.feats.features.shaderImageGatherExtended)
5023 tex_opts.lower_tg4_offsets = true;
5024 NIR_PASS_V(nir, nir_lower_tex, &tex_opts);
5025 optimize_nir(nir, NULL);
5026 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
5027 if (screen->driconf.inline_uniforms)
5028 nir_find_inlinable_uniforms(nir);
5034 zink_shader_free(struct zink_screen *screen, struct zink_shader *shader)
5036 _mesa_set_destroy(shader->programs, NULL);
5037 util_queue_fence_wait(&shader->precompile.fence);
5038 util_queue_fence_destroy(&shader->precompile.fence);
5039 zink_descriptor_shader_deinit(screen, shader);
5040 if (screen->info.have_EXT_shader_object) {
5041 VKSCR(DestroyShaderEXT)(screen->dev, shader->precompile.obj.obj, NULL);
5043 if (shader->precompile.obj.mod)
5044 VKSCR(DestroyShaderModule)(screen->dev, shader->precompile.obj.mod, NULL);
5045 if (shader->precompile.gpl)
5046 VKSCR(DestroyPipeline)(screen->dev, shader->precompile.gpl, NULL);
5048 blob_finish(&shader->blob);
5049 ralloc_free(shader->spirv);
5050 free(shader->precompile.bindings);
5051 ralloc_free(shader);
5055 zink_gfx_shader_free(struct zink_screen *screen, struct zink_shader *shader)
5057 assert(shader->info.stage != MESA_SHADER_COMPUTE);
5058 util_queue_fence_wait(&shader->precompile.fence);
5059 set_foreach(shader->programs, entry) {
5060 struct zink_gfx_program *prog = (void*)entry->key;
5061 gl_shader_stage stage = shader->info.stage;
5062 assert(stage < ZINK_GFX_SHADER_COUNT);
5063 unsigned stages_present = prog->stages_present;
5064 if (prog->shaders[MESA_SHADER_TESS_CTRL] &&
5065 prog->shaders[MESA_SHADER_TESS_CTRL]->non_fs.is_generated)
5066 stages_present &= ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL);
5067 unsigned idx = zink_program_cache_stages(stages_present);
5068 if (!prog->base.removed && prog->stages_present == prog->stages_remaining &&
5069 (stage == MESA_SHADER_FRAGMENT || !shader->non_fs.is_generated)) {
5070 struct hash_table *ht = &prog->ctx->program_cache[idx];
5071 simple_mtx_lock(&prog->ctx->program_lock[idx]);
5072 struct hash_entry *he = _mesa_hash_table_search(ht, prog->shaders);
5073 assert(he && he->data == prog);
5074 _mesa_hash_table_remove(ht, he);
5075 prog->base.removed = true;
5076 simple_mtx_unlock(&prog->ctx->program_lock[idx]);
5077 util_queue_fence_wait(&prog->base.cache_fence);
5079 for (unsigned r = 0; r < ARRAY_SIZE(prog->pipelines); r++) {
5080 for (int i = 0; i < ARRAY_SIZE(prog->pipelines[0]); ++i) {
5081 hash_table_foreach(&prog->pipelines[r][i], entry) {
5082 struct zink_gfx_pipeline_cache_entry *pc_entry = entry->data;
5084 util_queue_fence_wait(&pc_entry->fence);
5090 while (util_dynarray_contains(&shader->pipeline_libs, struct zink_gfx_lib_cache*)) {
5091 struct zink_gfx_lib_cache *libs = util_dynarray_pop(&shader->pipeline_libs, struct zink_gfx_lib_cache*);
5092 //this condition is equivalent to verifying that, for each bit stages_present_i in stages_present,
5093 //stages_present_i implies libs->stages_present_i
5094 if ((stages_present & ~(libs->stages_present & stages_present)) != 0)
5096 if (!libs->removed) {
5097 libs->removed = true;
5098 simple_mtx_lock(&screen->pipeline_libs_lock[idx]);
5099 _mesa_set_remove_key(&screen->pipeline_libs[idx], libs);
5100 simple_mtx_unlock(&screen->pipeline_libs_lock[idx]);
5102 zink_gfx_lib_cache_unref(screen, libs);
5104 if (stage == MESA_SHADER_FRAGMENT || !shader->non_fs.is_generated) {
5105 prog->shaders[stage] = NULL;
5106 prog->stages_remaining &= ~BITFIELD_BIT(stage);
5108 /* only remove generated tcs during parent tes destruction */
5109 if (stage == MESA_SHADER_TESS_EVAL && shader->non_fs.generated_tcs)
5110 prog->shaders[MESA_SHADER_TESS_CTRL] = NULL;
5111 if (stage != MESA_SHADER_FRAGMENT &&
5112 prog->shaders[MESA_SHADER_GEOMETRY] &&
5113 prog->shaders[MESA_SHADER_GEOMETRY]->non_fs.parent ==
5115 prog->shaders[MESA_SHADER_GEOMETRY] = NULL;
5117 zink_gfx_program_reference(screen, &prog, NULL);
5119 if (shader->info.stage == MESA_SHADER_TESS_EVAL &&
5120 shader->non_fs.generated_tcs) {
5121 /* automatically destroy generated tcs shaders when tes is destroyed */
5122 zink_gfx_shader_free(screen, shader->non_fs.generated_tcs);
5123 shader->non_fs.generated_tcs = NULL;
5125 for (unsigned int i = 0; i < ARRAY_SIZE(shader->non_fs.generated_gs); i++) {
5126 for (int j = 0; j < ARRAY_SIZE(shader->non_fs.generated_gs[0]); j++) {
5127 if (shader->info.stage != MESA_SHADER_FRAGMENT &&
5128 shader->non_fs.generated_gs[i][j]) {
5129 /* automatically destroy generated gs shaders when owner is destroyed */
5130 zink_gfx_shader_free(screen, shader->non_fs.generated_gs[i][j]);
5131 shader->non_fs.generated_gs[i][j] = NULL;
5135 zink_shader_free(screen, shader);
5139 struct zink_shader_object
5140 zink_shader_tcs_compile(struct zink_screen *screen, struct zink_shader *zs, unsigned patch_vertices)
5142 assert(zs->info.stage == MESA_SHADER_TESS_CTRL);
5143 /* shortcut all the nir passes since we just have to change this one word */
5144 zs->spirv->words[zs->spirv->tcs_vertices_out_word] = patch_vertices;
5145 return zink_shader_spirv_compile(screen, zs, NULL, false);
5148 /* creating a passthrough tcs shader that's roughly:
5151 #extension GL_ARB_tessellation_shader : require
5153 in vec4 some_var[gl_MaxPatchVertices];
5154 out vec4 some_var_out;
5156 layout(push_constant) uniform tcsPushConstants {
5157 layout(offset = 0) float TessLevelInner[2];
5158 layout(offset = 8) float TessLevelOuter[4];
5159 } u_tcsPushConstants;
5160 layout(vertices = $vertices_per_patch) out;
5163 gl_TessLevelInner = u_tcsPushConstants.TessLevelInner;
5164 gl_TessLevelOuter = u_tcsPushConstants.TessLevelOuter;
5165 some_var_out = some_var[gl_InvocationID];
5169 struct zink_shader *
5170 zink_shader_tcs_create(struct zink_screen *screen, nir_shader *tes, unsigned vertices_per_patch, nir_shader **nir_ret)
5172 struct zink_shader *ret = rzalloc(NULL, struct zink_shader);
5173 util_queue_fence_init(&ret->precompile.fence);
5174 ret->hash = _mesa_hash_pointer(ret);
5175 ret->programs = _mesa_pointer_set_create(NULL);
5176 simple_mtx_init(&ret->lock, mtx_plain);
5178 nir_shader *nir = nir_shader_create(NULL, MESA_SHADER_TESS_CTRL, &screen->nir_options, NULL);
5179 nir_function *fn = nir_function_create(nir, "main");
5180 fn->is_entrypoint = true;
5181 nir_function_impl *impl = nir_function_impl_create(fn);
5184 nir_builder_init(&b, impl);
5185 b.cursor = nir_before_block(nir_start_block(impl));
5187 nir_ssa_def *invocation_id = nir_load_invocation_id(&b);
5189 nir_foreach_shader_in_variable(var, tes) {
5190 if (var->data.location == VARYING_SLOT_TESS_LEVEL_INNER || var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER)
5192 const struct glsl_type *in_type = var->type;
5193 const struct glsl_type *out_type = var->type;
5195 snprintf(buf, sizeof(buf), "%s_out", var->name);
5196 if (!nir_is_arrayed_io(var, MESA_SHADER_TESS_EVAL)) {
5197 const struct glsl_type *type = var->type;
5198 in_type = glsl_array_type(type, 32 /* MAX_PATCH_VERTICES */, 0);
5199 out_type = glsl_array_type(type, vertices_per_patch, 0);
5202 nir_variable *in = nir_variable_create(nir, nir_var_shader_in, in_type, var->name);
5203 nir_variable *out = nir_variable_create(nir, nir_var_shader_out, out_type, buf);
5204 out->data.location = in->data.location = var->data.location;
5205 out->data.location_frac = in->data.location_frac = var->data.location_frac;
5207 /* gl_in[] receives values from equivalent built-in output
5208 variables written by the vertex shader (section 2.14.7). Each array
5209 element of gl_in[] is a structure holding values for a specific vertex of
5210 the input patch. The length of gl_in[] is equal to the
5211 implementation-dependent maximum patch size (gl_MaxPatchVertices).
5212 - ARB_tessellation_shader
5214 /* we need to load the invocation-specific value of the vertex output and then store it to the per-patch output */
5215 nir_deref_instr *in_value = nir_build_deref_array(&b, nir_build_deref_var(&b, in), invocation_id);
5216 nir_deref_instr *out_value = nir_build_deref_array(&b, nir_build_deref_var(&b, out), invocation_id);
5217 copy_vars(&b, out_value, in_value);
5219 nir_variable *gl_TessLevelInner = nir_variable_create(nir, nir_var_shader_out, glsl_array_type(glsl_float_type(), 2, 0), "gl_TessLevelInner");
5220 gl_TessLevelInner->data.location = VARYING_SLOT_TESS_LEVEL_INNER;
5221 gl_TessLevelInner->data.patch = 1;
5222 nir_variable *gl_TessLevelOuter = nir_variable_create(nir, nir_var_shader_out, glsl_array_type(glsl_float_type(), 4, 0), "gl_TessLevelOuter");
5223 gl_TessLevelOuter->data.location = VARYING_SLOT_TESS_LEVEL_OUTER;
5224 gl_TessLevelOuter->data.patch = 1;
5226 create_gfx_pushconst(nir);
5228 nir_ssa_def *load_inner = nir_load_push_constant(&b, 2, 32,
5229 nir_imm_int(&b, ZINK_GFX_PUSHCONST_DEFAULT_INNER_LEVEL),
5230 .base = 1, .range = 8);
5231 nir_ssa_def *load_outer = nir_load_push_constant(&b, 4, 32,
5232 nir_imm_int(&b, ZINK_GFX_PUSHCONST_DEFAULT_OUTER_LEVEL),
5233 .base = 2, .range = 16);
5235 for (unsigned i = 0; i < 2; i++) {
5236 nir_deref_instr *store_idx = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gl_TessLevelInner), i);
5237 nir_store_deref(&b, store_idx, nir_channel(&b, load_inner, i), 0xff);
5239 for (unsigned i = 0; i < 4; i++) {
5240 nir_deref_instr *store_idx = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gl_TessLevelOuter), i);
5241 nir_store_deref(&b, store_idx, nir_channel(&b, load_outer, i), 0xff);
5244 nir->info.tess.tcs_vertices_out = vertices_per_patch;
5245 nir_validate_shader(nir, "created");
5247 NIR_PASS_V(nir, nir_lower_regs_to_ssa);
5248 optimize_nir(nir, NULL);
5249 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
5250 NIR_PASS_V(nir, nir_convert_from_ssa, true);
5253 zink_shader_serialize_blob(nir, &ret->blob);
5254 memcpy(&ret->info, &nir->info, sizeof(nir->info));
5255 ret->non_fs.is_generated = true;
5260 zink_shader_has_cubes(nir_shader *nir)
5262 nir_foreach_variable_with_modes(var, nir, nir_var_uniform) {
5263 const struct glsl_type *type = glsl_without_array(var->type);
5264 if (glsl_type_is_sampler(type) && glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE)
5271 zink_shader_blob_deserialize(struct zink_screen *screen, struct blob *blob)
5273 struct blob_reader blob_reader;
5274 blob_reader_init(&blob_reader, blob->data, blob->size);
5275 return nir_deserialize(NULL, &screen->nir_options, &blob_reader);
5279 zink_shader_deserialize(struct zink_screen *screen, struct zink_shader *zs)
5281 return zink_shader_blob_deserialize(screen, &zs->blob);
5285 zink_shader_serialize_blob(nir_shader *nir, struct blob *blob)
5289 bool strip = !(zink_debug & (ZINK_DEBUG_NIR | ZINK_DEBUG_SPIRV | ZINK_DEBUG_TGSI));
5293 nir_serialize(blob, nir, strip);
5297 zink_print_shader(struct zink_screen *screen, struct zink_shader *zs, FILE *fp)
5299 nir_shader *nir = zink_shader_deserialize(screen, zs);
5300 nir_print_shader(nir, fp);