2 * Copyright 2018 Collabora Ltd.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
24 #include "nir_opcodes.h"
25 #include "zink_context.h"
26 #include "zink_compiler.h"
27 #include "zink_descriptors.h"
28 #include "zink_program.h"
29 #include "zink_screen.h"
30 #include "nir_to_spirv/nir_to_spirv.h"
32 #include "pipe/p_state.h"
35 #include "nir_xfb_info.h"
36 #include "nir/nir_draw_helpers.h"
37 #include "compiler/nir/nir_builder.h"
38 #include "compiler/nir/nir_serialize.h"
39 #include "compiler/nir/nir_builtin_builder.h"
41 #include "nir/tgsi_to_nir.h"
42 #include "tgsi/tgsi_dump.h"
43 #include "tgsi/tgsi_from_mesa.h"
45 #include "util/u_memory.h"
47 #include "compiler/spirv/nir_spirv.h"
48 #include "vulkan/util/vk_util.h"
51 zink_lower_cubemap_to_array(nir_shader *s, uint32_t nonseamless_cube_mask);
55 copy_vars(nir_builder *b, nir_deref_instr *dst, nir_deref_instr *src)
57 assert(glsl_get_bare_type(dst->type) == glsl_get_bare_type(src->type));
58 if (glsl_type_is_struct(dst->type)) {
59 for (unsigned i = 0; i < glsl_get_length(dst->type); ++i) {
60 copy_vars(b, nir_build_deref_struct(b, dst, i), nir_build_deref_struct(b, src, i));
62 } else if (glsl_type_is_array_or_matrix(dst->type)) {
63 unsigned count = glsl_type_is_array(dst->type) ? glsl_array_size(dst->type) : glsl_get_matrix_columns(dst->type);
64 for (unsigned i = 0; i < count; i++) {
65 copy_vars(b, nir_build_deref_array_imm(b, dst, i), nir_build_deref_array_imm(b, src, i));
68 nir_ssa_def *load = nir_load_deref(b, src);
69 nir_store_deref(b, dst, load, BITFIELD_MASK(load->num_components));
73 #define SIZEOF_FIELD(type, field) sizeof(((type *)0)->field)
76 create_gfx_pushconst(nir_shader *nir)
78 #define PUSHCONST_MEMBER(member_idx, field) \
79 fields[member_idx].type = \
80 glsl_array_type(glsl_uint_type(), SIZEOF_FIELD(struct zink_gfx_push_constant, field) / sizeof(uint32_t), 0); \
81 fields[member_idx].name = ralloc_asprintf(nir, #field); \
82 fields[member_idx].offset = offsetof(struct zink_gfx_push_constant, field);
84 nir_variable *pushconst;
85 /* create compatible layout for the ntv push constant loader */
86 struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, ZINK_GFX_PUSHCONST_MAX);
87 PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_DRAW_MODE_IS_INDEXED, draw_mode_is_indexed);
88 PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_DRAW_ID, draw_id);
89 PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_FRAMEBUFFER_IS_LAYERED, framebuffer_is_layered);
90 PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_DEFAULT_INNER_LEVEL, default_inner_level);
91 PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_DEFAULT_OUTER_LEVEL, default_outer_level);
92 PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_LINE_STIPPLE_PATTERN, line_stipple_pattern);
93 PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_VIEWPORT_SCALE, viewport_scale);
94 PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_LINE_WIDTH, line_width);
96 pushconst = nir_variable_create(nir, nir_var_mem_push_const,
97 glsl_struct_type(fields, ZINK_GFX_PUSHCONST_MAX, "struct", false),
99 pushconst->data.location = INT_MAX; //doesn't really matter
101 #undef PUSHCONST_MEMBER
105 lower_64bit_vertex_attribs_instr(nir_builder *b, nir_instr *instr, void *data)
107 if (instr->type != nir_instr_type_intrinsic)
109 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
110 if (intr->intrinsic != nir_intrinsic_load_deref)
112 nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(intr->src[0].ssa->parent_instr));
113 if (var->data.mode != nir_var_shader_in)
115 if (!glsl_type_is_64bit(var->type) || !glsl_type_is_vector(var->type) || glsl_get_vector_elements(var->type) < 3)
118 /* create second variable for the split */
119 nir_variable *var2 = nir_variable_clone(var, b->shader);
120 /* split new variable into second slot */
121 var2->data.driver_location++;
122 nir_shader_add_variable(b->shader, var2);
124 unsigned total_num_components = glsl_get_vector_elements(var->type);
125 /* new variable is the second half of the dvec */
126 var2->type = glsl_vector_type(glsl_get_base_type(var->type), glsl_get_vector_elements(var->type) - 2);
127 /* clamp original variable to a dvec2 */
128 var->type = glsl_vector_type(glsl_get_base_type(var->type), 2);
130 b->cursor = nir_after_instr(instr);
132 /* this is the first load instruction for the first half of the dvec3/4 components */
133 nir_ssa_def *load = nir_load_var(b, var);
134 /* this is the second load instruction for the second half of the dvec3/4 components */
135 nir_ssa_def *load2 = nir_load_var(b, var2);
138 /* create a new dvec3/4 comprised of all the loaded components from both variables */
139 def[0] = nir_vector_extract(b, load, nir_imm_int(b, 0));
140 def[1] = nir_vector_extract(b, load, nir_imm_int(b, 1));
141 def[2] = nir_vector_extract(b, load2, nir_imm_int(b, 0));
142 if (total_num_components == 4)
143 def[3] = nir_vector_extract(b, load2, nir_imm_int(b, 1));
144 nir_ssa_def *new_vec = nir_vec(b, def, total_num_components);
145 /* use the assembled dvec3/4 for all other uses of the load */
146 nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, new_vec,
147 new_vec->parent_instr);
149 /* remove the original instr and its deref chain */
150 nir_instr *parent = intr->src[0].ssa->parent_instr;
151 nir_instr_remove(instr);
152 nir_deref_instr_remove_if_unused(nir_instr_as_deref(parent));
157 /* mesa/gallium always provides UINT versions of 64bit formats:
158 * - rewrite loads as 32bit vec loads
159 * - cast back to 64bit
162 lower_64bit_uint_attribs_instr(nir_builder *b, nir_instr *instr, void *data)
164 if (instr->type != nir_instr_type_intrinsic)
166 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
167 if (intr->intrinsic != nir_intrinsic_load_deref)
169 nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(intr->src[0].ssa->parent_instr));
170 if (var->data.mode != nir_var_shader_in)
172 if (glsl_get_bit_size(var->type) != 64 || glsl_get_base_type(var->type) >= GLSL_TYPE_SAMPLER)
175 unsigned num_components = glsl_get_vector_elements(var->type);
176 enum glsl_base_type base_type;
177 switch (glsl_get_base_type(var->type)) {
178 case GLSL_TYPE_UINT64:
179 base_type = GLSL_TYPE_UINT;
181 case GLSL_TYPE_INT64:
182 base_type = GLSL_TYPE_INT;
184 case GLSL_TYPE_DOUBLE:
185 base_type = GLSL_TYPE_FLOAT;
188 unreachable("unknown 64-bit vertex attribute format!");
190 var->type = glsl_vector_type(base_type, num_components * 2);
192 b->cursor = nir_after_instr(instr);
194 nir_ssa_def *load = nir_load_var(b, var);
195 nir_ssa_def *casted[2];
196 for (unsigned i = 0; i < num_components; i++)
197 casted[i] = nir_pack_64_2x32(b, nir_channels(b, load, BITFIELD_RANGE(i * 2, 2)));
198 nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_vec(b, casted, num_components));
200 /* remove the original instr and its deref chain */
201 nir_instr *parent = intr->src[0].ssa->parent_instr;
202 nir_instr_remove(instr);
203 nir_deref_instr_remove_if_unused(nir_instr_as_deref(parent));
208 /* "64-bit three- and four-component vectors consume two consecutive locations."
209 * - 14.1.4. Location Assignment
211 * this pass splits dvec3 and dvec4 vertex inputs into a dvec2 and a double/dvec2 which
212 * are assigned to consecutive locations, loaded separately, and then assembled back into a
213 * composite value that's used in place of the original loaded ssa src
216 lower_64bit_vertex_attribs(nir_shader *shader)
218 if (shader->info.stage != MESA_SHADER_VERTEX)
221 bool progress = nir_shader_instructions_pass(shader, lower_64bit_vertex_attribs_instr, nir_metadata_dominance, NULL);
222 progress |= nir_shader_instructions_pass(shader, lower_64bit_uint_attribs_instr, nir_metadata_dominance, NULL);
227 lower_basevertex_instr(nir_builder *b, nir_instr *in, void *data)
229 if (in->type != nir_instr_type_intrinsic)
231 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
232 if (instr->intrinsic != nir_intrinsic_load_base_vertex)
235 b->cursor = nir_after_instr(&instr->instr);
236 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant);
237 load->src[0] = nir_src_for_ssa(nir_imm_int(b, ZINK_GFX_PUSHCONST_DRAW_MODE_IS_INDEXED));
238 nir_intrinsic_set_range(load, 4);
239 load->num_components = 1;
240 nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "draw_mode_is_indexed");
241 nir_builder_instr_insert(b, &load->instr);
243 nir_ssa_def *composite = nir_build_alu(b, nir_op_bcsel,
244 nir_build_alu(b, nir_op_ieq, &load->dest.ssa, nir_imm_int(b, 1), NULL, NULL),
249 nir_ssa_def_rewrite_uses_after(&instr->dest.ssa, composite,
250 composite->parent_instr);
255 lower_basevertex(nir_shader *shader)
257 if (shader->info.stage != MESA_SHADER_VERTEX)
260 if (!BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX))
263 return nir_shader_instructions_pass(shader, lower_basevertex_instr, nir_metadata_dominance, NULL);
268 lower_drawid_instr(nir_builder *b, nir_instr *in, void *data)
270 if (in->type != nir_instr_type_intrinsic)
272 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
273 if (instr->intrinsic != nir_intrinsic_load_draw_id)
276 b->cursor = nir_before_instr(&instr->instr);
277 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant);
278 load->src[0] = nir_src_for_ssa(nir_imm_int(b, ZINK_GFX_PUSHCONST_DRAW_ID));
279 nir_intrinsic_set_range(load, 4);
280 load->num_components = 1;
281 nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "draw_id");
282 nir_builder_instr_insert(b, &load->instr);
284 nir_ssa_def_rewrite_uses(&instr->dest.ssa, &load->dest.ssa);
290 lower_drawid(nir_shader *shader)
292 if (shader->info.stage != MESA_SHADER_VERTEX)
295 if (!BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_DRAW_ID))
298 return nir_shader_instructions_pass(shader, lower_drawid_instr, nir_metadata_dominance, NULL);
301 struct lower_gl_point_state {
302 nir_variable *gl_pos_out;
303 nir_variable *gl_point_size;
307 lower_gl_point_gs_instr(nir_builder *b, nir_instr *instr, void *data)
309 struct lower_gl_point_state *state = data;
310 nir_ssa_def *vp_scale, *pos;
312 if (instr->type != nir_instr_type_intrinsic)
315 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
316 if (intrin->intrinsic != nir_intrinsic_emit_vertex_with_counter &&
317 intrin->intrinsic != nir_intrinsic_emit_vertex)
320 if (nir_intrinsic_stream_id(intrin) != 0)
323 if (intrin->intrinsic == nir_intrinsic_end_primitive_with_counter ||
324 intrin->intrinsic == nir_intrinsic_end_primitive) {
325 nir_instr_remove(&intrin->instr);
329 b->cursor = nir_before_instr(instr);
331 // viewport-map endpoints
332 nir_ssa_def *vp_const_pos = nir_imm_int(b, ZINK_GFX_PUSHCONST_VIEWPORT_SCALE);
333 vp_scale = nir_load_push_constant(b, 2, 32, vp_const_pos, .base = 1, .range = 2);
335 // Load point info values
336 nir_ssa_def *point_size = nir_load_var(b, state->gl_point_size);
337 nir_ssa_def *point_pos = nir_load_var(b, state->gl_pos_out);
339 // w_delta = gl_point_size / width_viewport_size_scale * gl_Position.w
340 nir_ssa_def *w_delta = nir_fdiv(b, point_size, nir_channel(b, vp_scale, 0));
341 w_delta = nir_fmul(b, w_delta, nir_channel(b, point_pos, 3));
342 // halt_w_delta = w_delta / 2
343 nir_ssa_def *half_w_delta = nir_fmul(b, w_delta, nir_imm_float(b, 0.5));
345 // h_delta = gl_point_size / height_viewport_size_scale * gl_Position.w
346 nir_ssa_def *h_delta = nir_fdiv(b, point_size, nir_channel(b, vp_scale, 1));
347 h_delta = nir_fmul(b, h_delta, nir_channel(b, point_pos, 3));
348 // halt_h_delta = h_delta / 2
349 nir_ssa_def *half_h_delta = nir_fmul(b, h_delta, nir_imm_float(b, 0.5));
351 nir_ssa_def *point_dir[4][2] = {
352 { nir_imm_float(b, -1), nir_imm_float(b, -1) },
353 { nir_imm_float(b, -1), nir_imm_float(b, 1) },
354 { nir_imm_float(b, 1), nir_imm_float(b, -1) },
355 { nir_imm_float(b, 1), nir_imm_float(b, 1) }
358 nir_ssa_def *point_pos_x = nir_channel(b, point_pos, 0);
359 nir_ssa_def *point_pos_y = nir_channel(b, point_pos, 1);
361 for (size_t i = 0; i < 4; i++) {
363 nir_ffma(b, half_w_delta, point_dir[i][0], point_pos_x),
364 nir_ffma(b, half_h_delta, point_dir[i][1], point_pos_y),
365 nir_channel(b, point_pos, 2),
366 nir_channel(b, point_pos, 3));
368 nir_store_var(b, state->gl_pos_out, pos, 0xf);
373 nir_end_primitive(b);
375 nir_instr_remove(&intrin->instr);
381 lower_gl_point_gs(nir_shader *shader)
383 struct lower_gl_point_state state;
386 shader->info.gs.output_primitive = SHADER_PRIM_TRIANGLE_STRIP;
387 shader->info.gs.vertices_out *= 4;
389 // Gets the gl_Position in and out
391 nir_find_variable_with_location(shader, nir_var_shader_out,
393 state.gl_point_size =
394 nir_find_variable_with_location(shader, nir_var_shader_out,
397 // if position in or gl_PointSize aren't written, we have nothing to do
398 if (!state.gl_pos_out || !state.gl_point_size)
401 nir_function_impl *entry = nir_shader_get_entrypoint(shader);
402 nir_builder_init(&b, entry);
403 b.cursor = nir_before_cf_list(&entry->body);
405 return nir_shader_instructions_pass(shader, lower_gl_point_gs_instr,
406 nir_metadata_dominance, &state);
409 struct lower_pv_mode_state {
410 nir_variable *varyings[VARYING_SLOT_MAX];
411 nir_variable *pos_counter;
412 nir_variable *out_pos_counter;
413 nir_variable *ring_offset;
415 unsigned primitive_vert_count;
420 lower_pv_mode_gs_ring_index(nir_builder *b,
421 struct lower_pv_mode_state *state,
424 nir_ssa_def *ring_offset = nir_load_var(b, state->ring_offset);
425 return nir_imod(b, nir_iadd(b, index, ring_offset),
426 nir_imm_int(b, state->ring_size));
430 lower_pv_mode_gs_store(nir_builder *b,
431 nir_intrinsic_instr *intrin,
432 struct lower_pv_mode_state *state)
434 b->cursor = nir_before_instr(&intrin->instr);
435 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
436 if (nir_deref_mode_is(deref, nir_var_shader_out)) {
437 nir_variable *var = nir_deref_instr_get_variable(deref);
439 gl_varying_slot location = var->data.location;
440 assert(state->varyings[location]);
441 assert(intrin->src[1].is_ssa);
442 nir_ssa_def *pos_counter = nir_load_var(b, state->pos_counter);
443 nir_ssa_def *index = lower_pv_mode_gs_ring_index(b, state, pos_counter);
444 nir_store_array_var(b, state->varyings[location],
445 index, intrin->src[1].ssa,
446 nir_intrinsic_write_mask(intrin));
447 nir_instr_remove(&intrin->instr);
455 lower_pv_mode_emit_rotated_prim(nir_builder *b,
456 struct lower_pv_mode_state *state,
457 nir_ssa_def *current_vertex)
459 nir_ssa_def *two = nir_imm_int(b, 2);
460 nir_ssa_def *three = nir_imm_int(b, 3);
461 bool is_triangle = state->primitive_vert_count == 3;
462 /* This shader will always see the last three vertices emitted by the user gs.
463 * The following table is used to to rotate primitives within a strip generated
464 * by the user gs such that the last vertex becomes the first.
466 * [lines, tris][even/odd index][vertex mod 3]
468 static const unsigned vert_maps[2][2][3] = {
469 {{1, 0, 0}, {1, 0, 0}},
470 {{2, 0, 1}, {2, 1, 0}}
472 /* When the primive supplied to the gs comes from a strip, the last provoking vertex
473 * is either the last or the second, depending on whether the triangle is at an odd
474 * or even position within the strip.
476 * odd or even primitive within draw
478 nir_ssa_def *odd_prim = nir_imod(b, nir_load_primitive_id(b), two);
479 for (unsigned i = 0; i < state->primitive_vert_count; i++) {
480 /* odd or even triangle within strip emitted by user GS
481 * this is handled using the table
483 nir_ssa_def *odd_user_prim = nir_imod(b, current_vertex, two);
484 unsigned offset_even = vert_maps[is_triangle][0][i];
485 unsigned offset_odd = vert_maps[is_triangle][1][i];
486 nir_ssa_def *offset_even_value = nir_imm_int(b, offset_even);
487 nir_ssa_def *offset_odd_value = nir_imm_int(b, offset_odd);
488 nir_ssa_def *rotated_i = nir_bcsel(b, nir_b2b1(b, odd_user_prim),
489 offset_odd_value, offset_even_value);
490 /* Here we account for how triangles are provided to the gs from a strip.
491 * For even primitives we rotate by 3, meaning we do nothing.
492 * For odd primitives we rotate by 2, combined with the previous rotation this
493 * means the second vertex becomes the last.
495 if (state->prim == ZINK_PVE_PRIMITIVE_TRISTRIP)
496 rotated_i = nir_imod(b, nir_iadd(b, rotated_i,
500 /* Triangles that come from fans are provided to the gs the same way as
501 * odd triangles from a strip so always rotate by 2.
503 else if (state->prim == ZINK_PVE_PRIMITIVE_FAN)
504 rotated_i = nir_imod(b, nir_iadd_imm(b, rotated_i, 2),
506 rotated_i = nir_iadd(b, rotated_i, current_vertex);
507 nir_foreach_variable_with_modes(var, b->shader, nir_var_shader_out) {
508 gl_varying_slot location = var->data.location;
509 if (state->varyings[location]) {
510 nir_ssa_def *index = lower_pv_mode_gs_ring_index(b, state, rotated_i);
511 nir_deref_instr *value = nir_build_deref_array(b, nir_build_deref_var(b, state->varyings[location]), index);
512 copy_vars(b, nir_build_deref_var(b, var), value);
520 lower_pv_mode_gs_emit_vertex(nir_builder *b,
521 nir_intrinsic_instr *intrin,
522 struct lower_pv_mode_state *state)
524 b->cursor = nir_before_instr(&intrin->instr);
526 // increment pos_counter
527 nir_ssa_def *pos_counter = nir_load_var(b, state->pos_counter);
528 nir_store_var(b, state->pos_counter, nir_iadd_imm(b, pos_counter, 1), 1);
530 nir_instr_remove(&intrin->instr);
535 lower_pv_mode_gs_end_primitive(nir_builder *b,
536 nir_intrinsic_instr *intrin,
537 struct lower_pv_mode_state *state)
539 b->cursor = nir_before_instr(&intrin->instr);
541 nir_ssa_def *pos_counter = nir_load_var(b, state->pos_counter);
544 nir_ssa_def *out_pos_counter = nir_load_var(b, state->out_pos_counter);
545 nir_push_if(b, nir_ilt(b, nir_isub(b, pos_counter, out_pos_counter),
546 nir_imm_int(b, state->primitive_vert_count)));
547 nir_jump(b, nir_jump_break);
550 lower_pv_mode_emit_rotated_prim(b, state, out_pos_counter);
551 nir_end_primitive(b);
553 nir_store_var(b, state->out_pos_counter, nir_iadd_imm(b, out_pos_counter, 1), 1);
555 nir_pop_loop(b, NULL);
556 /* Set the ring offset such that when position 0 is
557 * read we get the last value written
559 nir_store_var(b, state->ring_offset, pos_counter, 1);
560 nir_store_var(b, state->pos_counter, nir_imm_int(b, 0), 1);
561 nir_store_var(b, state->out_pos_counter, nir_imm_int(b, 0), 1);
563 nir_instr_remove(&intrin->instr);
568 lower_pv_mode_gs_instr(nir_builder *b, nir_instr *instr, void *data)
570 if (instr->type != nir_instr_type_intrinsic)
573 struct lower_pv_mode_state *state = data;
574 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
576 switch (intrin->intrinsic) {
577 case nir_intrinsic_store_deref:
578 return lower_pv_mode_gs_store(b, intrin, state);
579 case nir_intrinsic_copy_deref:
580 unreachable("should be lowered");
581 case nir_intrinsic_emit_vertex_with_counter:
582 case nir_intrinsic_emit_vertex:
583 return lower_pv_mode_gs_emit_vertex(b, intrin, state);
584 case nir_intrinsic_end_primitive:
585 case nir_intrinsic_end_primitive_with_counter:
586 return lower_pv_mode_gs_end_primitive(b, intrin, state);
593 lower_pv_mode_vertices_for_prim(enum shader_prim prim)
596 case SHADER_PRIM_POINTS:
598 case SHADER_PRIM_LINE_STRIP:
600 case SHADER_PRIM_TRIANGLE_STRIP:
603 unreachable("unsupported primitive for gs output");
608 lower_pv_mode_gs(nir_shader *shader, unsigned prim)
611 struct lower_pv_mode_state state;
612 memset(state.varyings, 0, sizeof(state.varyings));
614 nir_function_impl *entry = nir_shader_get_entrypoint(shader);
615 nir_builder_init(&b, entry);
616 b.cursor = nir_before_cf_list(&entry->body);
618 state.primitive_vert_count =
619 lower_pv_mode_vertices_for_prim(shader->info.gs.output_primitive);
620 state.ring_size = shader->info.gs.vertices_out;
622 nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) {
623 gl_varying_slot location = var->data.location;
626 snprintf(name, sizeof(name), "__tmp_primverts_%d", location);
627 state.varyings[location] =
628 nir_local_variable_create(entry,
629 glsl_array_type(var->type,
635 state.pos_counter = nir_local_variable_create(entry,
639 state.out_pos_counter = nir_local_variable_create(entry,
641 "__out_pos_counter");
643 state.ring_offset = nir_local_variable_create(entry,
649 // initialize pos_counter and out_pos_counter
650 nir_store_var(&b, state.pos_counter, nir_imm_int(&b, 0), 1);
651 nir_store_var(&b, state.out_pos_counter, nir_imm_int(&b, 0), 1);
652 nir_store_var(&b, state.ring_offset, nir_imm_int(&b, 0), 1);
654 shader->info.gs.vertices_out = (shader->info.gs.vertices_out -
655 (state.primitive_vert_count - 1)) *
656 state.primitive_vert_count;
657 return nir_shader_instructions_pass(shader, lower_pv_mode_gs_instr,
658 nir_metadata_dominance, &state);
661 struct lower_line_stipple_state {
662 nir_variable *pos_out;
663 nir_variable *stipple_out;
664 nir_variable *prev_pos;
665 nir_variable *pos_counter;
666 nir_variable *stipple_counter;
667 bool line_rectangular;
671 viewport_map(nir_builder *b, nir_ssa_def *vert,
674 nir_ssa_def *w_recip = nir_frcp(b, nir_channel(b, vert, 3));
675 nir_ssa_def *ndc_point = nir_fmul(b, nir_channels(b, vert, 0x3),
677 return nir_fmul(b, ndc_point, scale);
681 lower_line_stipple_gs_instr(nir_builder *b, nir_instr *instr, void *data)
683 struct lower_line_stipple_state *state = data;
684 if (instr->type != nir_instr_type_intrinsic)
687 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
688 if (intrin->intrinsic != nir_intrinsic_emit_vertex_with_counter &&
689 intrin->intrinsic != nir_intrinsic_emit_vertex)
692 b->cursor = nir_before_instr(instr);
694 nir_push_if(b, nir_ine_imm(b, nir_load_var(b, state->pos_counter), 0));
695 // viewport-map endpoints
696 nir_ssa_def *vp_scale = nir_load_push_constant(b, 2, 32,
697 nir_imm_int(b, ZINK_GFX_PUSHCONST_VIEWPORT_SCALE),
700 nir_ssa_def *prev = nir_load_var(b, state->prev_pos);
701 nir_ssa_def *curr = nir_load_var(b, state->pos_out);
702 prev = viewport_map(b, prev, vp_scale);
703 curr = viewport_map(b, curr, vp_scale);
705 // calculate length of line
707 if (state->line_rectangular)
708 len = nir_fast_distance(b, prev, curr);
710 nir_ssa_def *diff = nir_fabs(b, nir_fsub(b, prev, curr));
711 len = nir_fmax(b, nir_channel(b, diff, 0), nir_channel(b, diff, 1));
713 // update stipple_counter
714 nir_store_var(b, state->stipple_counter,
715 nir_fadd(b, nir_load_var(b, state->stipple_counter),
719 nir_copy_var(b, state->stipple_out, state->stipple_counter);
720 nir_copy_var(b, state->prev_pos, state->pos_out);
722 // update prev_pos and pos_counter for next vertex
723 b->cursor = nir_after_instr(instr);
724 nir_store_var(b, state->pos_counter,
725 nir_iadd_imm(b, nir_load_var(b, state->pos_counter),
732 lower_line_stipple_gs(nir_shader *shader, bool line_rectangular)
735 struct lower_line_stipple_state state;
738 nir_find_variable_with_location(shader, nir_var_shader_out,
741 // if position isn't written, we have nothing to do
745 state.stipple_out = nir_variable_create(shader, nir_var_shader_out,
748 state.stipple_out->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
749 state.stipple_out->data.driver_location = shader->num_outputs++;
750 state.stipple_out->data.location = MAX2(util_last_bit64(shader->info.outputs_written), VARYING_SLOT_VAR0);
751 shader->info.outputs_written |= BITFIELD64_BIT(state.stipple_out->data.location);
753 // create temp variables
754 state.prev_pos = nir_variable_create(shader, nir_var_shader_temp,
757 state.pos_counter = nir_variable_create(shader, nir_var_shader_temp,
760 state.stipple_counter = nir_variable_create(shader, nir_var_shader_temp,
762 "__stipple_counter");
764 state.line_rectangular = line_rectangular;
765 // initialize pos_counter and stipple_counter
766 nir_function_impl *entry = nir_shader_get_entrypoint(shader);
767 nir_builder_init(&b, entry);
768 b.cursor = nir_before_cf_list(&entry->body);
769 nir_store_var(&b, state.pos_counter, nir_imm_int(&b, 0), 1);
770 nir_store_var(&b, state.stipple_counter, nir_imm_float(&b, 0), 1);
772 return nir_shader_instructions_pass(shader, lower_line_stipple_gs_instr,
773 nir_metadata_dominance, &state);
777 lower_line_stipple_fs(nir_shader *shader)
780 nir_function_impl *entry = nir_shader_get_entrypoint(shader);
781 nir_builder_init(&b, entry);
783 // create stipple counter
784 nir_variable *stipple = nir_variable_create(shader, nir_var_shader_in,
787 stipple->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
788 stipple->data.driver_location = shader->num_inputs++;
789 stipple->data.location = MAX2(util_last_bit64(shader->info.inputs_read), VARYING_SLOT_VAR0);
790 shader->info.inputs_read |= BITFIELD64_BIT(stipple->data.location);
792 nir_variable *sample_mask_out =
793 nir_find_variable_with_location(shader, nir_var_shader_out,
794 FRAG_RESULT_SAMPLE_MASK);
795 if (!sample_mask_out) {
796 sample_mask_out = nir_variable_create(shader, nir_var_shader_out,
797 glsl_uint_type(), "sample_mask");
798 sample_mask_out->data.driver_location = shader->num_outputs++;
799 sample_mask_out->data.location = FRAG_RESULT_SAMPLE_MASK;
802 b.cursor = nir_after_cf_list(&entry->body);
804 nir_ssa_def *pattern = nir_load_push_constant(&b, 1, 32,
805 nir_imm_int(&b, ZINK_GFX_PUSHCONST_LINE_STIPPLE_PATTERN),
807 nir_ssa_def *factor = nir_i2f32(&b, nir_ishr_imm(&b, pattern, 16));
808 pattern = nir_iand_imm(&b, pattern, 0xffff);
810 nir_ssa_def *sample_mask_in = nir_load_sample_mask_in(&b);
811 nir_variable *v = nir_local_variable_create(entry, glsl_uint_type(), NULL);
812 nir_variable *sample_mask = nir_local_variable_create(entry, glsl_uint_type(), NULL);
813 nir_store_var(&b, v, sample_mask_in, 1);
814 nir_store_var(&b, sample_mask, sample_mask_in, 1);
817 nir_ssa_def *value = nir_load_var(&b, v);
818 nir_ssa_def *index = nir_ufind_msb(&b, value);
819 nir_ssa_def *index_mask = nir_ishl(&b, nir_imm_int(&b, 1), index);
820 nir_ssa_def *new_value = nir_ixor(&b, value, index_mask);
821 nir_store_var(&b, v, new_value, 1);
822 nir_push_if(&b, nir_ieq_imm(&b, value, 0));
823 nir_jump(&b, nir_jump_break);
824 nir_pop_if(&b, NULL);
826 nir_ssa_def *stipple_pos =
827 nir_interp_deref_at_sample(&b, 1, 32,
828 &nir_build_deref_var(&b, stipple)->dest.ssa, index);
829 stipple_pos = nir_fmod(&b, nir_fdiv(&b, stipple_pos, factor),
830 nir_imm_float(&b, 16.0));
831 stipple_pos = nir_f2i32(&b, stipple_pos);
833 nir_iand_imm(&b, nir_ishr(&b, pattern, stipple_pos), 1);
834 nir_push_if(&b, nir_ieq_imm(&b, bit, 0));
836 nir_ssa_def *value = nir_load_var(&b, sample_mask);
837 value = nir_ixor(&b, value, index_mask);
838 nir_store_var(&b, sample_mask, value, 1);
840 nir_pop_if(&b, NULL);
842 nir_pop_loop(&b, NULL);
843 nir_store_var(&b, sample_mask_out, nir_load_var(&b, sample_mask), 1);
848 struct lower_line_smooth_state {
849 nir_variable *pos_out;
850 nir_variable *line_coord_out;
851 nir_variable *prev_pos;
852 nir_variable *pos_counter;
853 nir_variable *prev_varyings[VARYING_SLOT_MAX][4],
854 *varyings[VARYING_SLOT_MAX][4]; // location_frac
858 lower_line_smooth_gs_store(nir_builder *b,
859 nir_intrinsic_instr *intrin,
860 struct lower_line_smooth_state *state)
862 b->cursor = nir_before_instr(&intrin->instr);
863 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
864 if (nir_deref_mode_is(deref, nir_var_shader_out)) {
865 nir_variable *var = nir_deref_instr_get_variable(deref);
867 // we take care of position elsewhere
868 gl_varying_slot location = var->data.location;
869 unsigned location_frac = var->data.location_frac;
870 if (location != VARYING_SLOT_POS) {
871 assert(state->varyings[location]);
872 assert(intrin->src[1].is_ssa);
873 nir_store_var(b, state->varyings[location][location_frac],
875 nir_intrinsic_write_mask(intrin));
876 nir_instr_remove(&intrin->instr);
885 lower_line_smooth_gs_emit_vertex(nir_builder *b,
886 nir_intrinsic_instr *intrin,
887 struct lower_line_smooth_state *state)
889 b->cursor = nir_before_instr(&intrin->instr);
891 nir_push_if(b, nir_ine_imm(b, nir_load_var(b, state->pos_counter), 0));
892 nir_ssa_def *vp_scale = nir_load_push_constant(b, 2, 32,
893 nir_imm_int(b, ZINK_GFX_PUSHCONST_VIEWPORT_SCALE),
896 nir_ssa_def *prev = nir_load_var(b, state->prev_pos);
897 nir_ssa_def *curr = nir_load_var(b, state->pos_out);
898 nir_ssa_def *prev_vp = viewport_map(b, prev, vp_scale);
899 nir_ssa_def *curr_vp = viewport_map(b, curr, vp_scale);
901 nir_ssa_def *width = nir_load_push_constant(b, 1, 32,
902 nir_imm_int(b, ZINK_GFX_PUSHCONST_LINE_WIDTH),
904 nir_ssa_def *half_width = nir_fadd_imm(b, nir_fmul_imm(b, width, 0.5), 0.5);
906 const unsigned yx[2] = { 1, 0 };
907 nir_ssa_def *vec = nir_fsub(b, curr_vp, prev_vp);
908 nir_ssa_def *len = nir_fast_length(b, vec);
909 nir_ssa_def *dir = nir_normalize(b, vec);
910 nir_ssa_def *half_length = nir_fmul_imm(b, len, 0.5);
911 half_length = nir_fadd_imm(b, half_length, 0.5);
913 nir_ssa_def *vp_scale_rcp = nir_frcp(b, vp_scale);
914 nir_ssa_def *tangent =
917 nir_swizzle(b, dir, yx, 2),
918 nir_imm_vec2(b, 1.0, -1.0)),
920 tangent = nir_fmul(b, tangent, half_width);
921 tangent = nir_pad_vector_imm_int(b, tangent, 0, 4);
922 dir = nir_fmul_imm(b, nir_fmul(b, dir, vp_scale_rcp), 0.5);
924 nir_ssa_def *line_offets[8] = {
925 nir_fadd(b, tangent, nir_fneg(b, dir)),
926 nir_fadd(b, nir_fneg(b, tangent), nir_fneg(b, dir)),
928 nir_fneg(b, tangent),
930 nir_fneg(b, tangent),
931 nir_fadd(b, tangent, dir),
932 nir_fadd(b, nir_fneg(b, tangent), dir),
934 nir_ssa_def *line_coord =
935 nir_vec4(b, half_width, half_width, half_length, half_length);
936 nir_ssa_def *line_coords[8] = {
937 nir_fmul(b, line_coord, nir_imm_vec4(b, -1, 1, -1, 1)),
938 nir_fmul(b, line_coord, nir_imm_vec4(b, 1, 1, -1, 1)),
939 nir_fmul(b, line_coord, nir_imm_vec4(b, -1, 1, 0, 1)),
940 nir_fmul(b, line_coord, nir_imm_vec4(b, 1, 1, 0, 1)),
941 nir_fmul(b, line_coord, nir_imm_vec4(b, -1, 1, 0, 1)),
942 nir_fmul(b, line_coord, nir_imm_vec4(b, 1, 1, 0, 1)),
943 nir_fmul(b, line_coord, nir_imm_vec4(b, -1, 1, 1, 1)),
944 nir_fmul(b, line_coord, nir_imm_vec4(b, 1, 1, 1, 1)),
947 /* emit first end-cap, and start line */
948 for (int i = 0; i < 4; ++i) {
949 nir_foreach_variable_with_modes(var, b->shader, nir_var_shader_out) {
950 gl_varying_slot location = var->data.location;
951 unsigned location_frac = var->data.location_frac;
952 if (state->prev_varyings[location][location_frac])
953 nir_copy_var(b, var, state->prev_varyings[location][location_frac]);
955 nir_store_var(b, state->pos_out,
956 nir_fadd(b, prev, nir_fmul(b, line_offets[i],
957 nir_channel(b, prev, 3))), 0xf);
958 nir_store_var(b, state->line_coord_out, line_coords[i], 0xf);
962 /* finish line and emit last end-cap */
963 for (int i = 4; i < 8; ++i) {
964 nir_foreach_variable_with_modes(var, b->shader, nir_var_shader_out) {
965 gl_varying_slot location = var->data.location;
966 unsigned location_frac = var->data.location_frac;
967 if (state->varyings[location][location_frac])
968 nir_copy_var(b, var, state->varyings[location][location_frac]);
970 nir_store_var(b, state->pos_out,
971 nir_fadd(b, curr, nir_fmul(b, line_offets[i],
972 nir_channel(b, curr, 3))), 0xf);
973 nir_store_var(b, state->line_coord_out, line_coords[i], 0xf);
976 nir_end_primitive(b);
980 nir_copy_var(b, state->prev_pos, state->pos_out);
981 nir_foreach_variable_with_modes(var, b->shader, nir_var_shader_out) {
982 gl_varying_slot location = var->data.location;
983 unsigned location_frac = var->data.location_frac;
984 if (state->varyings[location][location_frac])
985 nir_copy_var(b, state->prev_varyings[location][location_frac], state->varyings[location][location_frac]);
988 // update prev_pos and pos_counter for next vertex
989 b->cursor = nir_after_instr(&intrin->instr);
990 nir_store_var(b, state->pos_counter,
991 nir_iadd_imm(b, nir_load_var(b, state->pos_counter),
994 nir_instr_remove(&intrin->instr);
999 lower_line_smooth_gs_end_primitive(nir_builder *b,
1000 nir_intrinsic_instr *intrin,
1001 struct lower_line_smooth_state *state)
1003 b->cursor = nir_before_instr(&intrin->instr);
1005 // reset line counter
1006 nir_store_var(b, state->pos_counter, nir_imm_int(b, 0), 1);
1008 nir_instr_remove(&intrin->instr);
1013 lower_line_smooth_gs_instr(nir_builder *b, nir_instr *instr, void *data)
1015 if (instr->type != nir_instr_type_intrinsic)
1018 struct lower_line_smooth_state *state = data;
1019 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1021 switch (intrin->intrinsic) {
1022 case nir_intrinsic_store_deref:
1023 return lower_line_smooth_gs_store(b, intrin, state);
1024 case nir_intrinsic_copy_deref:
1025 unreachable("should be lowered");
1026 case nir_intrinsic_emit_vertex_with_counter:
1027 case nir_intrinsic_emit_vertex:
1028 return lower_line_smooth_gs_emit_vertex(b, intrin, state);
1029 case nir_intrinsic_end_primitive:
1030 case nir_intrinsic_end_primitive_with_counter:
1031 return lower_line_smooth_gs_end_primitive(b, intrin, state);
1038 lower_line_smooth_gs(nir_shader *shader)
1041 struct lower_line_smooth_state state;
1043 memset(state.varyings, 0, sizeof(state.varyings));
1044 memset(state.prev_varyings, 0, sizeof(state.prev_varyings));
1045 nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) {
1046 gl_varying_slot location = var->data.location;
1047 unsigned location_frac = var->data.location_frac;
1048 if (location == VARYING_SLOT_POS)
1052 snprintf(name, sizeof(name), "__tmp_%d_%d", location, location_frac);
1053 state.varyings[location][location_frac] =
1054 nir_variable_create(shader, nir_var_shader_temp,
1057 snprintf(name, sizeof(name), "__tmp_prev_%d_%d", location, location_frac);
1058 state.prev_varyings[location][location_frac] =
1059 nir_variable_create(shader, nir_var_shader_temp,
1064 nir_find_variable_with_location(shader, nir_var_shader_out,
1067 // if position isn't written, we have nothing to do
1071 state.line_coord_out =
1072 nir_variable_create(shader, nir_var_shader_out, glsl_vec4_type(),
1074 state.line_coord_out->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
1075 state.line_coord_out->data.driver_location = shader->num_outputs++;
1076 state.line_coord_out->data.location = MAX2(util_last_bit64(shader->info.outputs_written), VARYING_SLOT_VAR0);
1077 shader->info.outputs_written |= BITFIELD64_BIT(state.line_coord_out->data.location);
1079 // create temp variables
1080 state.prev_pos = nir_variable_create(shader, nir_var_shader_temp,
1083 state.pos_counter = nir_variable_create(shader, nir_var_shader_temp,
1087 // initialize pos_counter
1088 nir_function_impl *entry = nir_shader_get_entrypoint(shader);
1089 nir_builder_init(&b, entry);
1090 b.cursor = nir_before_cf_list(&entry->body);
1091 nir_store_var(&b, state.pos_counter, nir_imm_int(&b, 0), 1);
1093 shader->info.gs.vertices_out = 8 * shader->info.gs.vertices_out;
1094 shader->info.gs.output_primitive = SHADER_PRIM_TRIANGLE_STRIP;
1096 return nir_shader_instructions_pass(shader, lower_line_smooth_gs_instr,
1097 nir_metadata_dominance, &state);
1101 lower_line_smooth_fs(nir_shader *shader, bool lower_stipple)
1106 nir_variable *stipple_counter = NULL, *stipple_pattern = NULL;
1107 if (lower_stipple) {
1108 stipple_counter = nir_variable_create(shader, nir_var_shader_in,
1111 stipple_counter->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
1112 stipple_counter->data.driver_location = shader->num_inputs++;
1113 stipple_counter->data.location =
1114 MAX2(util_last_bit64(shader->info.inputs_read), VARYING_SLOT_VAR0);
1115 shader->info.inputs_read |= BITFIELD64_BIT(stipple_counter->data.location);
1117 stipple_pattern = nir_variable_create(shader, nir_var_shader_temp,
1121 // initialize stipple_pattern
1122 nir_function_impl *entry = nir_shader_get_entrypoint(shader);
1123 nir_builder_init(&b, entry);
1124 b.cursor = nir_before_cf_list(&entry->body);
1125 nir_ssa_def *pattern = nir_load_push_constant(&b, 1, 32,
1126 nir_imm_int(&b, ZINK_GFX_PUSHCONST_LINE_STIPPLE_PATTERN),
1128 nir_store_var(&b, stipple_pattern, pattern, 1);
1131 nir_lower_aaline_fs(shader, &dummy, stipple_counter, stipple_pattern);
1136 lower_dual_blend(nir_shader *shader)
1138 bool progress = false;
1139 nir_variable *var = nir_find_variable_with_location(shader, nir_var_shader_out, FRAG_RESULT_DATA1);
1141 var->data.location = FRAG_RESULT_DATA0;
1142 var->data.index = 1;
1145 nir_shader_preserve_all_metadata(shader);
1150 lower_64bit_pack_instr(nir_builder *b, nir_instr *instr, void *data)
1152 if (instr->type != nir_instr_type_alu)
1154 nir_alu_instr *alu_instr = (nir_alu_instr *) instr;
1155 if (alu_instr->op != nir_op_pack_64_2x32 &&
1156 alu_instr->op != nir_op_unpack_64_2x32)
1158 b->cursor = nir_before_instr(&alu_instr->instr);
1159 nir_ssa_def *src = nir_ssa_for_alu_src(b, alu_instr, 0);
1161 switch (alu_instr->op) {
1162 case nir_op_pack_64_2x32:
1163 dest = nir_pack_64_2x32_split(b, nir_channel(b, src, 0), nir_channel(b, src, 1));
1165 case nir_op_unpack_64_2x32:
1166 dest = nir_vec2(b, nir_unpack_64_2x32_split_x(b, src), nir_unpack_64_2x32_split_y(b, src));
1169 unreachable("Impossible opcode");
1171 nir_ssa_def_rewrite_uses(&alu_instr->dest.dest.ssa, dest);
1172 nir_instr_remove(&alu_instr->instr);
1177 lower_64bit_pack(nir_shader *shader)
1179 return nir_shader_instructions_pass(shader, lower_64bit_pack_instr,
1180 nir_metadata_block_index | nir_metadata_dominance, NULL);
1184 zink_create_quads_emulation_gs(const nir_shader_compiler_options *options,
1185 const nir_shader *prev_stage)
1187 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY,
1191 nir_shader *nir = b.shader;
1192 nir->info.gs.input_primitive = SHADER_PRIM_LINES_ADJACENCY;
1193 nir->info.gs.output_primitive = SHADER_PRIM_TRIANGLE_STRIP;
1194 nir->info.gs.vertices_in = 4;
1195 nir->info.gs.vertices_out = 6;
1196 nir->info.gs.invocations = 1;
1197 nir->info.gs.active_stream_mask = 1;
1199 nir->info.has_transform_feedback_varyings = prev_stage->info.has_transform_feedback_varyings;
1200 memcpy(nir->info.xfb_stride, prev_stage->info.xfb_stride, sizeof(prev_stage->info.xfb_stride));
1201 if (prev_stage->xfb_info) {
1202 nir->xfb_info = mem_dup(prev_stage->xfb_info, sizeof(nir_xfb_info));
1205 nir_variable *in_vars[VARYING_SLOT_MAX];
1206 nir_variable *out_vars[VARYING_SLOT_MAX];
1207 unsigned num_vars = 0;
1209 /* Create input/output variables. */
1210 nir_foreach_shader_out_variable(var, prev_stage) {
1211 assert(!var->data.patch);
1215 snprintf(name, sizeof(name), "in_%s", var->name);
1217 snprintf(name, sizeof(name), "in_%d", var->data.driver_location);
1219 nir_variable *in = nir_variable_clone(var, nir);
1220 ralloc_free(in->name);
1221 in->name = ralloc_strdup(in, name);
1222 in->type = glsl_array_type(var->type, 4, false);
1223 in->data.mode = nir_var_shader_in;
1224 nir_shader_add_variable(nir, in);
1227 snprintf(name, sizeof(name), "out_%s", var->name);
1229 snprintf(name, sizeof(name), "out_%d", var->data.driver_location);
1231 nir_variable *out = nir_variable_clone(var, nir);
1232 ralloc_free(out->name);
1233 out->name = ralloc_strdup(out, name);
1234 out->data.mode = nir_var_shader_out;
1235 nir_shader_add_variable(nir, out);
1237 in_vars[num_vars] = in;
1238 out_vars[num_vars++] = out;
1241 int mapping_first[] = {0, 1, 2, 0, 2, 3};
1242 int mapping_last[] = {0, 1, 3, 1, 2, 3};
1243 nir_ssa_def *last_pv_vert_def = nir_load_provoking_last(&b);
1244 last_pv_vert_def = nir_ine_imm(&b, last_pv_vert_def, 0);
1245 for (unsigned i = 0; i < 6; ++i) {
1246 /* swap indices 2 and 3 */
1247 nir_ssa_def *idx = nir_bcsel(&b, last_pv_vert_def,
1248 nir_imm_int(&b, mapping_last[i]),
1249 nir_imm_int(&b, mapping_first[i]));
1250 /* Copy inputs to outputs. */
1251 for (unsigned j = 0; j < num_vars; ++j) {
1252 if (in_vars[j]->data.location == VARYING_SLOT_EDGE) {
1255 nir_deref_instr *in_value = nir_build_deref_array(&b, nir_build_deref_var(&b, in_vars[j]), idx);
1256 copy_vars(&b, nir_build_deref_var(&b, out_vars[j]), in_value);
1258 nir_emit_vertex(&b, 0);
1260 nir_end_primitive(&b, 0);
1263 nir_end_primitive(&b, 0);
1264 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
1265 nir_validate_shader(nir, "in zink_create_quads_emulation_gs");
1270 lower_system_values_to_inlined_uniforms_instr(nir_builder *b, nir_instr *instr, void *data)
1272 if (instr->type != nir_instr_type_intrinsic)
1275 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1277 int inlined_uniform_offset;
1278 switch (intrin->intrinsic) {
1279 case nir_intrinsic_load_flat_mask:
1280 inlined_uniform_offset = ZINK_INLINE_VAL_FLAT_MASK * sizeof(uint32_t);
1282 case nir_intrinsic_load_provoking_last:
1283 inlined_uniform_offset = ZINK_INLINE_VAL_PV_LAST_VERT * sizeof(uint32_t);
1289 b->cursor = nir_before_instr(&intrin->instr);
1290 nir_ssa_def *new_dest_def = nir_load_ubo(b, 1, 32, nir_imm_int(b, 0),
1291 nir_imm_int(b, inlined_uniform_offset),
1292 .align_mul = 4, .align_offset = 0,
1293 .range_base = 0, .range = ~0);
1294 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, new_dest_def);
1295 nir_instr_remove(instr);
1300 zink_lower_system_values_to_inlined_uniforms(nir_shader *nir)
1302 return nir_shader_instructions_pass(nir, lower_system_values_to_inlined_uniforms_instr,
1303 nir_metadata_dominance, NULL);
1307 zink_screen_init_compiler(struct zink_screen *screen)
1309 static const struct nir_shader_compiler_options
1311 .lower_ffma16 = true,
1312 .lower_ffma32 = true,
1313 .lower_ffma64 = true,
1316 .lower_flrp32 = true,
1319 .lower_extract_byte = true,
1320 .lower_extract_word = true,
1321 .lower_insert_byte = true,
1322 .lower_insert_word = true,
1324 /* We can only support 32-bit ldexp, but NIR doesn't have a flag
1325 * distinguishing 64-bit ldexp support (radeonsi *does* support 64-bit
1326 * ldexp, so we don't just always lower it in NIR). Given that ldexp is
1327 * effectively unused (no instances in shader-db), it's not worth the
1330 .lower_ldexp = true,
1332 .lower_mul_high = true,
1333 .lower_rotate = true,
1334 .lower_uadd_carry = true,
1335 .lower_usub_borrow = true,
1336 .lower_uadd_sat = true,
1337 .lower_usub_sat = true,
1338 .lower_vector_cmp = true,
1339 .lower_int64_options = 0,
1340 .lower_doubles_options = 0,
1341 .lower_uniforms_to_ubo = true,
1345 .lower_mul_2x32_64 = true,
1346 .support_16bit_alu = true, /* not quite what it sounds like */
1347 .max_unroll_iterations = 0,
1350 screen->nir_options = default_options;
1352 if (!screen->info.feats.features.shaderInt64)
1353 screen->nir_options.lower_int64_options = ~0;
1355 if (!screen->info.feats.features.shaderFloat64) {
1356 screen->nir_options.lower_doubles_options = ~0;
1357 screen->nir_options.lower_flrp64 = true;
1358 screen->nir_options.lower_ffma64 = true;
1359 /* soft fp64 function inlining will blow up loop bodies and effectively
1360 * stop Vulkan drivers from unrolling the loops.
1362 screen->nir_options.max_unroll_iterations_fp64 = 32;
1366 The OpFRem and OpFMod instructions use cheap approximations of remainder,
1367 and the error can be large due to the discontinuity in trunc() and floor().
1368 This can produce mathematically unexpected results in some cases, such as
1369 FMod(x,x) computing x rather than 0, and can also cause the result to have
1370 a different sign than the infinitely precise result.
1372 -Table 84. Precision of core SPIR-V Instructions
1373 * for drivers that are known to have imprecise fmod for doubles, lower dmod
1375 if (screen->info.driver_props.driverID == VK_DRIVER_ID_MESA_RADV ||
1376 screen->info.driver_props.driverID == VK_DRIVER_ID_AMD_OPEN_SOURCE ||
1377 screen->info.driver_props.driverID == VK_DRIVER_ID_AMD_PROPRIETARY)
1378 screen->nir_options.lower_doubles_options = nir_lower_dmod;
1382 zink_get_compiler_options(struct pipe_screen *pscreen,
1383 enum pipe_shader_ir ir,
1384 gl_shader_stage shader)
1386 assert(ir == PIPE_SHADER_IR_NIR);
1387 return &zink_screen(pscreen)->nir_options;
1391 zink_tgsi_to_nir(struct pipe_screen *screen, const struct tgsi_token *tokens)
1393 if (zink_debug & ZINK_DEBUG_TGSI) {
1394 fprintf(stderr, "TGSI shader:\n---8<---\n");
1395 tgsi_dump_to_file(tokens, 0, stderr);
1396 fprintf(stderr, "---8<---\n\n");
1399 return tgsi_to_nir(tokens, screen, false);
1404 dest_is_64bit(nir_dest *dest, void *state)
1406 bool *lower = (bool *)state;
1407 if (dest && (nir_dest_bit_size(*dest) == 64)) {
1415 src_is_64bit(nir_src *src, void *state)
1417 bool *lower = (bool *)state;
1418 if (src && (nir_src_bit_size(*src) == 64)) {
1426 filter_64_bit_instr(const nir_instr *const_instr, UNUSED const void *data)
1429 /* lower_alu_to_scalar required nir_instr to be const, but nir_foreach_*
1430 * doesn't have const variants, so do the ugly const_cast here. */
1431 nir_instr *instr = (nir_instr *)const_instr;
1433 nir_foreach_dest(instr, dest_is_64bit, &lower);
1436 nir_foreach_src(instr, src_is_64bit, &lower);
1441 filter_pack_instr(const nir_instr *const_instr, UNUSED const void *data)
1443 nir_instr *instr = (nir_instr *)const_instr;
1444 nir_alu_instr *alu = nir_instr_as_alu(instr);
1446 case nir_op_pack_64_2x32_split:
1447 case nir_op_pack_32_2x16_split:
1448 case nir_op_unpack_32_2x16_split_x:
1449 case nir_op_unpack_32_2x16_split_y:
1450 case nir_op_unpack_64_2x32_split_x:
1451 case nir_op_unpack_64_2x32_split_y:
1461 nir_variable *uniforms[5];
1462 nir_variable *ubo[5];
1463 nir_variable *ssbo[5];
1465 uint32_t first_ssbo;
1468 static struct bo_vars
1469 get_bo_vars(struct zink_shader *zs, nir_shader *shader)
1472 memset(&bo, 0, sizeof(bo));
1474 bo.first_ubo = ffs(zs->ubos_used & ~BITFIELD_BIT(0)) - 2;
1475 assert(bo.first_ssbo < PIPE_MAX_CONSTANT_BUFFERS);
1477 bo.first_ssbo = ffs(zs->ssbos_used) - 1;
1478 assert(bo.first_ssbo < PIPE_MAX_SHADER_BUFFERS);
1479 nir_foreach_variable_with_modes(var, shader, nir_var_mem_ssbo | nir_var_mem_ubo) {
1480 unsigned idx = glsl_get_explicit_stride(glsl_get_struct_field(glsl_without_array(var->type), 0)) >> 1;
1481 if (var->data.mode == nir_var_mem_ssbo) {
1482 assert(!bo.ssbo[idx]);
1485 if (var->data.driver_location) {
1486 assert(!bo.ubo[idx]);
1489 assert(!bo.uniforms[idx]);
1490 bo.uniforms[idx] = var;
1498 bound_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
1500 struct bo_vars *bo = data;
1501 if (instr->type != nir_instr_type_intrinsic)
1503 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1504 nir_variable *var = NULL;
1505 nir_ssa_def *offset = NULL;
1506 bool is_load = true;
1507 b->cursor = nir_before_instr(instr);
1509 switch (intr->intrinsic) {
1510 case nir_intrinsic_store_ssbo:
1511 var = bo->ssbo[nir_dest_bit_size(intr->dest) >> 4];
1512 offset = intr->src[2].ssa;
1515 case nir_intrinsic_load_ssbo:
1516 var = bo->ssbo[nir_dest_bit_size(intr->dest) >> 4];
1517 offset = intr->src[1].ssa;
1519 case nir_intrinsic_load_ubo:
1520 if (nir_src_is_const(intr->src[0]) && nir_src_as_const_value(intr->src[0])->u32 == 0)
1521 var = bo->uniforms[nir_dest_bit_size(intr->dest) >> 4];
1523 var = bo->ubo[nir_dest_bit_size(intr->dest) >> 4];
1524 offset = intr->src[1].ssa;
1529 nir_src offset_src = nir_src_for_ssa(offset);
1530 if (!nir_src_is_const(offset_src))
1533 unsigned offset_bytes = nir_src_as_const_value(offset_src)->u32;
1534 const struct glsl_type *strct_type = glsl_get_array_element(var->type);
1535 unsigned size = glsl_array_size(glsl_get_struct_field(strct_type, 0));
1536 bool has_unsized = glsl_array_size(glsl_get_struct_field(strct_type, glsl_get_length(strct_type) - 1)) == 0;
1537 if (has_unsized || offset_bytes + intr->num_components - 1 < size)
1540 unsigned rewrites = 0;
1541 nir_ssa_def *result[2];
1542 for (unsigned i = 0; i < intr->num_components; i++) {
1543 if (offset_bytes + i >= size) {
1546 result[i] = nir_imm_zero(b, 1, nir_dest_bit_size(intr->dest));
1549 assert(rewrites == intr->num_components);
1551 nir_ssa_def *load = nir_vec(b, result, intr->num_components);
1552 nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
1554 nir_instr_remove(instr);
1559 bound_bo_access(nir_shader *shader, struct zink_shader *zs)
1561 struct bo_vars bo = get_bo_vars(zs, shader);
1562 return nir_shader_instructions_pass(shader, bound_bo_access_instr, nir_metadata_dominance, &bo);
1566 optimize_nir(struct nir_shader *s, struct zink_shader *zs)
1571 if (s->options->lower_int64_options)
1572 NIR_PASS_V(s, nir_lower_int64);
1573 if (s->options->lower_doubles_options & nir_lower_fp64_full_software)
1574 NIR_PASS_V(s, lower_64bit_pack);
1575 NIR_PASS_V(s, nir_lower_vars_to_ssa);
1576 NIR_PASS(progress, s, nir_lower_alu_to_scalar, filter_pack_instr, NULL);
1577 NIR_PASS(progress, s, nir_opt_copy_prop_vars);
1578 NIR_PASS(progress, s, nir_copy_prop);
1579 NIR_PASS(progress, s, nir_opt_remove_phis);
1580 if (s->options->lower_int64_options) {
1581 NIR_PASS(progress, s, nir_lower_64bit_phis);
1582 NIR_PASS(progress, s, nir_lower_alu_to_scalar, filter_64_bit_instr, NULL);
1584 NIR_PASS(progress, s, nir_opt_dce);
1585 NIR_PASS(progress, s, nir_opt_dead_cf);
1586 NIR_PASS(progress, s, nir_lower_phis_to_scalar, false);
1587 NIR_PASS(progress, s, nir_opt_cse);
1588 NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
1589 NIR_PASS(progress, s, nir_opt_algebraic);
1590 NIR_PASS(progress, s, nir_opt_constant_folding);
1591 NIR_PASS(progress, s, nir_opt_undef);
1592 NIR_PASS(progress, s, zink_nir_lower_b2b);
1594 NIR_PASS(progress, s, bound_bo_access, zs);
1599 NIR_PASS(progress, s, nir_opt_algebraic_late);
1601 NIR_PASS_V(s, nir_copy_prop);
1602 NIR_PASS_V(s, nir_opt_dce);
1603 NIR_PASS_V(s, nir_opt_cse);
1608 /* - copy the lowered fbfetch variable
1609 * - set the new one up as an input attachment for descriptor 0.6
1610 * - load it as an image
1611 * - overwrite the previous load
1614 lower_fbfetch_instr(nir_builder *b, nir_instr *instr, void *data)
1616 bool ms = data != NULL;
1617 if (instr->type != nir_instr_type_intrinsic)
1619 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1620 if (intr->intrinsic != nir_intrinsic_load_deref)
1622 nir_variable *var = nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));
1623 if (!var->data.fb_fetch_output)
1625 b->cursor = nir_after_instr(instr);
1626 nir_variable *fbfetch = nir_variable_clone(var, b->shader);
1627 /* If Dim is SubpassData, ... Image Format must be Unknown
1628 * - SPIRV OpTypeImage specification
1630 fbfetch->data.image.format = 0;
1631 fbfetch->data.index = 0; /* fix this if more than 1 fbfetch target is supported */
1632 fbfetch->data.mode = nir_var_uniform;
1633 fbfetch->data.binding = ZINK_FBFETCH_BINDING;
1634 fbfetch->data.binding = ZINK_FBFETCH_BINDING;
1635 fbfetch->data.sample = ms;
1636 enum glsl_sampler_dim dim = ms ? GLSL_SAMPLER_DIM_SUBPASS_MS : GLSL_SAMPLER_DIM_SUBPASS;
1637 fbfetch->type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
1638 nir_shader_add_variable(b->shader, fbfetch);
1639 nir_ssa_def *deref = &nir_build_deref_var(b, fbfetch)->dest.ssa;
1640 nir_ssa_def *sample = ms ? nir_load_sample_id(b) : nir_ssa_undef(b, 1, 32);
1641 nir_ssa_def *load = nir_image_deref_load(b, 4, 32, deref, nir_imm_vec4(b, 0, 0, 0, 1), sample, nir_imm_int(b, 0));
1642 nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
1647 lower_fbfetch(nir_shader *shader, nir_variable **fbfetch, bool ms)
1649 nir_foreach_shader_out_variable(var, shader) {
1650 if (var->data.fb_fetch_output) {
1658 return nir_shader_instructions_pass(shader, lower_fbfetch_instr, nir_metadata_dominance, (void*)ms);
1662 * Add a check for out of bounds LOD for every texel fetch op
1664 * - if (lod < query_levels(tex))
1667 * - res = (0, 0, 0, 1)
1670 lower_txf_lod_robustness_instr(nir_builder *b, nir_instr *in, void *data)
1672 if (in->type != nir_instr_type_tex)
1674 nir_tex_instr *txf = nir_instr_as_tex(in);
1675 if (txf->op != nir_texop_txf)
1678 b->cursor = nir_before_instr(in);
1679 int lod_idx = nir_tex_instr_src_index(txf, nir_tex_src_lod);
1680 assert(lod_idx >= 0);
1681 nir_src lod_src = txf->src[lod_idx].src;
1682 if (nir_src_is_const(lod_src) && nir_src_as_const_value(lod_src)->u32 == 0)
1685 assert(lod_src.is_ssa);
1686 nir_ssa_def *lod = lod_src.ssa;
1688 int offset_idx = nir_tex_instr_src_index(txf, nir_tex_src_texture_offset);
1689 int handle_idx = nir_tex_instr_src_index(txf, nir_tex_src_texture_handle);
1690 nir_tex_instr *levels = nir_tex_instr_create(b->shader,
1691 !!(offset_idx >= 0) + !!(handle_idx >= 0));
1692 levels->op = nir_texop_query_levels;
1693 levels->texture_index = txf->texture_index;
1694 levels->dest_type = nir_type_int | lod->bit_size;
1695 if (offset_idx >= 0) {
1696 levels->src[0].src_type = nir_tex_src_texture_offset;
1697 nir_src_copy(&levels->src[0].src, &txf->src[offset_idx].src, &levels->instr);
1699 if (handle_idx >= 0) {
1700 levels->src[!!(offset_idx >= 0)].src_type = nir_tex_src_texture_handle;
1701 nir_src_copy(&levels->src[!!(offset_idx >= 0)].src, &txf->src[handle_idx].src, &levels->instr);
1703 nir_ssa_dest_init(&levels->instr, &levels->dest,
1704 nir_tex_instr_dest_size(levels), 32, NULL);
1705 nir_builder_instr_insert(b, &levels->instr);
1707 nir_if *lod_oob_if = nir_push_if(b, nir_ilt(b, lod, &levels->dest.ssa));
1708 nir_tex_instr *new_txf = nir_instr_as_tex(nir_instr_clone(b->shader, in));
1709 nir_builder_instr_insert(b, &new_txf->instr);
1711 nir_if *lod_oob_else = nir_push_else(b, lod_oob_if);
1712 nir_const_value oob_values[4] = {0};
1713 unsigned bit_size = nir_alu_type_get_type_size(txf->dest_type);
1714 oob_values[3] = (txf->dest_type & nir_type_float) ?
1715 nir_const_value_for_float(1.0, bit_size) : nir_const_value_for_uint(1, bit_size);
1716 nir_ssa_def *oob_val = nir_build_imm(b, nir_tex_instr_dest_size(txf), bit_size, oob_values);
1718 nir_pop_if(b, lod_oob_else);
1719 nir_ssa_def *robust_txf = nir_if_phi(b, &new_txf->dest.ssa, oob_val);
1721 nir_ssa_def_rewrite_uses(&txf->dest.ssa, robust_txf);
1722 nir_instr_remove_v(in);
1726 /* This pass is used to workaround the lack of out of bounds LOD robustness
1727 * for texel fetch ops in VK_EXT_image_robustness.
1730 lower_txf_lod_robustness(nir_shader *shader)
1732 return nir_shader_instructions_pass(shader, lower_txf_lod_robustness_instr, nir_metadata_none, NULL);
1735 /* check for a genuine gl_PointSize output vs one from nir_lower_point_size_mov */
1737 check_psiz(struct nir_shader *s)
1739 bool have_psiz = false;
1740 nir_foreach_shader_out_variable(var, s) {
1741 if (var->data.location == VARYING_SLOT_PSIZ) {
1742 /* genuine PSIZ outputs will have this set */
1743 have_psiz |= !!var->data.explicit_location;
1749 static nir_variable *
1750 find_var_with_location_frac(nir_shader *nir, unsigned location, unsigned location_frac, bool have_psiz)
1752 assert((int)location >= 0);
1755 if (!location_frac && location != VARYING_SLOT_PSIZ) {
1756 nir_foreach_shader_out_variable(var, nir) {
1757 if (var->data.location == location)
1762 /* multiple variables found for this location: find the biggest one */
1763 nir_variable *out = NULL;
1765 nir_foreach_shader_out_variable(var, nir) {
1766 if (var->data.location == location) {
1767 unsigned count_slots = glsl_count_vec4_slots(var->type, false, false);
1768 if (count_slots > slots) {
1769 slots = count_slots;
1776 /* only one variable found or this is location_frac */
1777 nir_foreach_shader_out_variable(var, nir) {
1778 if (var->data.location == location &&
1779 (var->data.location_frac == location_frac ||
1780 (glsl_type_is_array(var->type) ? glsl_array_size(var->type) : glsl_get_vector_elements(var->type)) >= location_frac + 1)) {
1781 if (location != VARYING_SLOT_PSIZ || !have_psiz || var->data.explicit_location)
1790 is_inlined(const bool *inlined, const struct pipe_stream_output *output)
1792 for (unsigned i = 0; i < output->num_components; i++)
1793 if (!inlined[output->start_component + i])
1799 update_psiz_location(nir_shader *nir, nir_variable *psiz)
1801 uint32_t last_output = util_last_bit64(nir->info.outputs_written);
1802 if (last_output < VARYING_SLOT_VAR0)
1803 last_output = VARYING_SLOT_VAR0;
1806 /* this should get fixed up by slot remapping */
1807 psiz->data.location = last_output;
1810 static const struct glsl_type *
1811 clamp_slot_type(const struct glsl_type *type, unsigned slot)
1813 /* could be dvec/dmat/mat: each member is the same */
1814 const struct glsl_type *plain = glsl_without_array_or_matrix(type);
1815 /* determine size of each member type */
1816 unsigned slot_count = glsl_count_vec4_slots(plain, false, false);
1817 /* normalize slot idx to current type's size */
1819 unsigned slot_components = glsl_get_components(plain);
1820 if (glsl_base_type_is_64bit(glsl_get_base_type(plain)))
1821 slot_components *= 2;
1822 /* create a vec4 mask of the selected slot's components out of all the components */
1823 uint32_t mask = BITFIELD_MASK(slot_components) & BITFIELD_RANGE(slot * 4, 4);
1824 /* return a vecN of the selected components */
1825 slot_components = util_bitcount(mask);
1826 return glsl_vec_type(slot_components);
1829 static const struct glsl_type *
1830 unroll_struct_type(const struct glsl_type *slot_type, unsigned *slot_idx)
1832 const struct glsl_type *type = slot_type;
1833 unsigned slot_count = 0;
1834 unsigned cur_slot = 0;
1835 /* iterate over all the members in the struct, stopping once the slot idx is reached */
1836 for (unsigned i = 0; i < glsl_get_length(slot_type) && cur_slot <= *slot_idx; i++, cur_slot += slot_count) {
1837 /* use array type for slot counting but return array member type for unroll */
1838 const struct glsl_type *arraytype = glsl_get_struct_field(slot_type, i);
1839 type = glsl_without_array(arraytype);
1840 slot_count = glsl_count_vec4_slots(arraytype, false, false);
1842 *slot_idx -= (cur_slot - slot_count);
1843 if (!glsl_type_is_struct_or_ifc(type))
1844 /* this is a fully unrolled struct: find the number of vec components to output */
1845 type = clamp_slot_type(type, *slot_idx);
1850 get_slot_components(nir_variable *var, unsigned slot, unsigned so_slot)
1852 assert(var && slot < var->data.location + glsl_count_vec4_slots(var->type, false, false));
1853 const struct glsl_type *orig_type = var->type;
1854 const struct glsl_type *type = glsl_without_array(var->type);
1855 unsigned slot_idx = slot - so_slot;
1856 if (type != orig_type)
1857 slot_idx %= glsl_count_vec4_slots(type, false, false);
1858 /* need to find the vec4 that's being exported by this slot */
1859 while (glsl_type_is_struct_or_ifc(type))
1860 type = unroll_struct_type(type, &slot_idx);
1862 /* arrays here are already fully unrolled from their structs, so slot handling is implicit */
1863 unsigned num_components = glsl_get_components(glsl_without_array(type));
1864 /* special handling: clip/cull distance are arrays with vector semantics */
1865 if (var->data.location == VARYING_SLOT_CLIP_DIST0 || var->data.location == VARYING_SLOT_CULL_DIST0) {
1866 num_components = glsl_array_size(type);
1868 /* this is the second vec4 */
1869 num_components %= 4;
1871 /* this is the first vec4 */
1872 num_components = MIN2(num_components, 4);
1874 assert(num_components);
1875 /* gallium handles xfb in terms of 32bit units */
1876 if (glsl_base_type_is_64bit(glsl_get_base_type(glsl_without_array(type))))
1877 num_components *= 2;
1878 return num_components;
1881 static const struct pipe_stream_output *
1882 find_packed_output(const struct pipe_stream_output_info *so_info, uint8_t *reverse_map, unsigned slot)
1884 for (unsigned i = 0; i < so_info->num_outputs; i++) {
1885 const struct pipe_stream_output *packed_output = &so_info->output[i];
1886 if (reverse_map[packed_output->register_index] == slot)
1887 return packed_output;
1893 update_so_info(struct zink_shader *zs, nir_shader *nir, const struct pipe_stream_output_info *so_info,
1894 uint64_t outputs_written, bool have_psiz)
1896 uint8_t reverse_map[VARYING_SLOT_MAX] = {0};
1898 /* semi-copied from iris */
1899 while (outputs_written) {
1900 int bit = u_bit_scan64(&outputs_written);
1901 /* PSIZ from nir_lower_point_size_mov breaks stream output, so always skip it */
1902 if (bit == VARYING_SLOT_PSIZ && !have_psiz)
1904 reverse_map[slot++] = bit;
1907 bool have_fake_psiz = false;
1908 nir_foreach_shader_out_variable(var, nir) {
1909 if (var->data.location == VARYING_SLOT_PSIZ && !var->data.explicit_location)
1910 have_fake_psiz = true;
1913 bool inlined[VARYING_SLOT_MAX][4] = {0};
1914 uint64_t packed = 0;
1915 uint8_t packed_components[VARYING_SLOT_MAX] = {0};
1916 uint8_t packed_streams[VARYING_SLOT_MAX] = {0};
1917 uint8_t packed_buffers[VARYING_SLOT_MAX] = {0};
1918 uint16_t packed_offsets[VARYING_SLOT_MAX][4] = {0};
1919 nir_variable *psiz = NULL;
1920 for (unsigned i = 0; i < so_info->num_outputs; i++) {
1921 const struct pipe_stream_output *output = &so_info->output[i];
1922 unsigned slot = reverse_map[output->register_index];
1923 /* always set stride to be used during draw */
1924 zs->sinfo.so_info.stride[output->output_buffer] = so_info->stride[output->output_buffer];
1925 if (zs->info.stage != MESA_SHADER_GEOMETRY || util_bitcount(zs->info.gs.active_stream_mask) == 1) {
1926 nir_variable *var = NULL;
1929 var = find_var_with_location_frac(nir, slot--, output->start_component, have_psiz);
1930 if (var->data.location == VARYING_SLOT_PSIZ)
1933 slot = reverse_map[output->register_index];
1934 if (var->data.explicit_xfb_buffer) {
1935 /* handle dvec3 where gallium splits streamout over 2 registers */
1936 for (unsigned j = 0; j < output->num_components; j++)
1937 inlined[slot][output->start_component + j] = true;
1939 if (is_inlined(inlined[slot], output))
1941 bool is_struct = glsl_type_is_struct_or_ifc(glsl_without_array(var->type));
1942 unsigned num_components = get_slot_components(var, slot, so_slot);
1943 /* if this is the entire variable, try to blast it out during the initial declaration
1944 * structs must be handled later to ensure accurate analysis
1946 if (!is_struct && (num_components == output->num_components || (num_components > output->num_components && output->num_components == 4))) {
1947 var->data.explicit_xfb_buffer = 1;
1948 var->data.xfb.buffer = output->output_buffer;
1949 var->data.xfb.stride = so_info->stride[output->output_buffer] * 4;
1950 var->data.offset = output->dst_offset * 4;
1951 var->data.stream = output->stream;
1952 for (unsigned j = 0; j < output->num_components; j++)
1953 inlined[slot][output->start_component + j] = true;
1955 /* otherwise store some metadata for later */
1956 packed |= BITFIELD64_BIT(slot);
1957 packed_components[slot] += output->num_components;
1958 packed_streams[slot] |= BITFIELD_BIT(output->stream);
1959 packed_buffers[slot] |= BITFIELD_BIT(output->output_buffer);
1960 for (unsigned j = 0; j < output->num_components; j++)
1961 packed_offsets[output->register_index][j + output->start_component] = output->dst_offset + j;
1966 /* if this was flagged as a packed output before, and if all the components are
1967 * being output with the same stream on the same buffer with increasing offsets, this entire variable
1968 * can be consolidated into a single output to conserve locations
1970 for (unsigned i = 0; i < so_info->num_outputs; i++) {
1971 const struct pipe_stream_output *output = &so_info->output[i];
1972 unsigned slot = reverse_map[output->register_index];
1973 if (is_inlined(inlined[slot], output))
1975 if (zs->info.stage != MESA_SHADER_GEOMETRY || util_bitcount(zs->info.gs.active_stream_mask) == 1) {
1976 nir_variable *var = NULL;
1978 var = find_var_with_location_frac(nir, slot--, output->start_component, have_psiz);
1979 /* this is a lowered 64bit variable that can't be exported due to packing */
1980 if (var->data.is_xfb)
1983 unsigned num_slots = glsl_count_vec4_slots(var->type, false, false);
1984 /* for each variable, iterate over all the variable's slots and inline the outputs */
1985 for (unsigned j = 0; j < num_slots; j++) {
1986 slot = var->data.location + j;
1987 const struct pipe_stream_output *packed_output = find_packed_output(so_info, reverse_map, slot);
1991 /* if this slot wasn't packed or isn't in the same stream/buffer, skip consolidation */
1992 if (!(packed & BITFIELD64_BIT(slot)) ||
1993 util_bitcount(packed_streams[slot]) != 1 ||
1994 util_bitcount(packed_buffers[slot]) != 1)
1997 /* if all the components the variable exports to this slot aren't captured, skip consolidation */
1998 unsigned num_components = get_slot_components(var, slot, var->data.location);
1999 if (num_components != packed_components[slot])
2002 /* in order to pack the xfb output, all the offsets must be sequentially incrementing */
2003 uint32_t prev_offset = packed_offsets[packed_output->register_index][0];
2004 for (unsigned k = 1; k < num_components; k++) {
2005 /* if the offsets are not incrementing as expected, skip consolidation */
2006 if (packed_offsets[packed_output->register_index][k] != prev_offset + 1)
2008 prev_offset = packed_offsets[packed_output->register_index][k + packed_output->start_component];
2011 /* this output can be consolidated: blast out all the data inlined */
2012 var->data.explicit_xfb_buffer = 1;
2013 var->data.xfb.buffer = output->output_buffer;
2014 var->data.xfb.stride = so_info->stride[output->output_buffer] * 4;
2015 var->data.offset = output->dst_offset * 4;
2016 var->data.stream = output->stream;
2017 /* GLSL specifies that interface blocks are split per-buffer in XFB */
2018 if (glsl_type_is_array(var->type) && glsl_array_size(var->type) > 1 && glsl_type_is_interface(glsl_without_array(var->type)))
2019 zs->sinfo.so_propagate |= BITFIELD_BIT(var->data.location - VARYING_SLOT_VAR0);
2020 /* mark all slot components inlined to skip subsequent loop iterations */
2021 for (unsigned j = 0; j < num_slots; j++) {
2022 slot = var->data.location + j;
2023 for (unsigned k = 0; k < packed_components[slot]; k++)
2024 inlined[slot][k] = true;
2025 packed &= ~BITFIELD64_BIT(slot);
2030 /* these are packed/explicit varyings which can't be exported with normal output */
2031 zs->sinfo.so_info.output[zs->sinfo.so_info.num_outputs] = *output;
2032 /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */
2033 zs->sinfo.so_info_slots[zs->sinfo.so_info.num_outputs++] = reverse_map[output->register_index];
2035 zs->sinfo.have_xfb = zs->sinfo.so_info.num_outputs || zs->sinfo.so_propagate;
2036 /* ensure this doesn't get output in the shader by unsetting location */
2037 if (have_fake_psiz && psiz)
2038 update_psiz_location(nir, psiz);
2041 struct decompose_state {
2042 nir_variable **split;
2047 lower_attrib(nir_builder *b, nir_instr *instr, void *data)
2049 struct decompose_state *state = data;
2050 nir_variable **split = state->split;
2051 if (instr->type != nir_instr_type_intrinsic)
2053 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2054 if (intr->intrinsic != nir_intrinsic_load_deref)
2056 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
2057 nir_variable *var = nir_deref_instr_get_variable(deref);
2058 if (var != split[0])
2060 unsigned num_components = glsl_get_vector_elements(split[0]->type);
2061 b->cursor = nir_after_instr(instr);
2062 nir_ssa_def *loads[4];
2063 for (unsigned i = 0; i < (state->needs_w ? num_components - 1 : num_components); i++)
2064 loads[i] = nir_load_deref(b, nir_build_deref_var(b, split[i+1]));
2065 if (state->needs_w) {
2066 /* oob load w comopnent to get correct value for int/float */
2067 loads[3] = nir_channel(b, loads[0], 3);
2068 loads[0] = nir_channel(b, loads[0], 0);
2070 nir_ssa_def *new_load = nir_vec(b, loads, num_components);
2071 nir_ssa_def_rewrite_uses(&intr->dest.ssa, new_load);
2072 nir_instr_remove_v(instr);
2077 decompose_attribs(nir_shader *nir, uint32_t decomposed_attrs, uint32_t decomposed_attrs_without_w)
2080 nir_foreach_variable_with_modes(var, nir, nir_var_shader_in)
2081 bits |= BITFIELD_BIT(var->data.driver_location);
2083 u_foreach_bit(location, decomposed_attrs | decomposed_attrs_without_w) {
2084 nir_variable *split[5];
2085 struct decompose_state state;
2086 state.split = split;
2087 nir_variable *var = nir_find_variable_with_driver_location(nir, nir_var_shader_in, location);
2090 bits |= BITFIELD_BIT(var->data.driver_location);
2091 const struct glsl_type *new_type = glsl_type_is_scalar(var->type) ? var->type : glsl_get_array_element(var->type);
2092 unsigned num_components = glsl_get_vector_elements(var->type);
2093 state.needs_w = (decomposed_attrs_without_w & BITFIELD_BIT(location)) != 0 && num_components == 4;
2094 for (unsigned i = 0; i < (state.needs_w ? num_components - 1 : num_components); i++) {
2095 split[i+1] = nir_variable_clone(var, nir);
2096 split[i+1]->name = ralloc_asprintf(nir, "%s_split%u", var->name, i);
2097 if (decomposed_attrs_without_w & BITFIELD_BIT(location))
2098 split[i+1]->type = !i && num_components == 4 ? var->type : new_type;
2100 split[i+1]->type = new_type;
2101 split[i+1]->data.driver_location = ffs(bits) - 1;
2102 bits &= ~BITFIELD_BIT(split[i+1]->data.driver_location);
2103 nir_shader_add_variable(nir, split[i+1]);
2105 var->data.mode = nir_var_shader_temp;
2106 nir_shader_instructions_pass(nir, lower_attrib, nir_metadata_dominance, &state);
2108 nir_fixup_deref_modes(nir);
2109 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2110 optimize_nir(nir, NULL);
2115 rewrite_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
2117 struct zink_screen *screen = data;
2118 const bool has_int64 = screen->info.feats.features.shaderInt64;
2119 if (instr->type != nir_instr_type_intrinsic)
2121 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2122 b->cursor = nir_before_instr(instr);
2123 switch (intr->intrinsic) {
2124 case nir_intrinsic_ssbo_atomic_fadd:
2125 case nir_intrinsic_ssbo_atomic_add:
2126 case nir_intrinsic_ssbo_atomic_umin:
2127 case nir_intrinsic_ssbo_atomic_imin:
2128 case nir_intrinsic_ssbo_atomic_umax:
2129 case nir_intrinsic_ssbo_atomic_imax:
2130 case nir_intrinsic_ssbo_atomic_and:
2131 case nir_intrinsic_ssbo_atomic_or:
2132 case nir_intrinsic_ssbo_atomic_xor:
2133 case nir_intrinsic_ssbo_atomic_exchange:
2134 case nir_intrinsic_ssbo_atomic_comp_swap: {
2135 /* convert offset to uintN_t[idx] */
2136 nir_ssa_def *offset = nir_udiv_imm(b, intr->src[1].ssa, nir_dest_bit_size(intr->dest) / 8);
2137 nir_instr_rewrite_src_ssa(instr, &intr->src[1], offset);
2140 case nir_intrinsic_load_ssbo:
2141 case nir_intrinsic_load_ubo: {
2142 /* ubo0 can have unaligned 64bit loads, particularly for bindless texture ids */
2143 bool force_2x32 = intr->intrinsic == nir_intrinsic_load_ubo &&
2144 nir_src_is_const(intr->src[0]) &&
2145 nir_src_as_uint(intr->src[0]) == 0 &&
2146 nir_dest_bit_size(intr->dest) == 64 &&
2147 nir_intrinsic_align_offset(intr) % 8 != 0;
2148 force_2x32 |= nir_dest_bit_size(intr->dest) == 64 && !has_int64;
2149 nir_ssa_def *offset = nir_udiv_imm(b, intr->src[1].ssa, (force_2x32 ? 32 : nir_dest_bit_size(intr->dest)) / 8);
2150 nir_instr_rewrite_src_ssa(instr, &intr->src[1], offset);
2151 /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
2153 /* this is always scalarized */
2154 assert(intr->dest.ssa.num_components == 1);
2155 /* rewrite as 2x32 */
2156 nir_ssa_def *load[2];
2157 for (unsigned i = 0; i < 2; i++) {
2158 if (intr->intrinsic == nir_intrinsic_load_ssbo)
2159 load[i] = nir_load_ssbo(b, 1, 32, intr->src[0].ssa, nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0);
2161 load[i] = nir_load_ubo(b, 1, 32, intr->src[0].ssa, nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0, .range = 4);
2162 nir_intrinsic_set_access(nir_instr_as_intrinsic(load[i]->parent_instr), nir_intrinsic_access(intr));
2164 /* cast back to 64bit */
2165 nir_ssa_def *casted = nir_pack_64_2x32_split(b, load[0], load[1]);
2166 nir_ssa_def_rewrite_uses(&intr->dest.ssa, casted);
2167 nir_instr_remove(instr);
2171 case nir_intrinsic_load_shared:
2172 b->cursor = nir_before_instr(instr);
2173 bool force_2x32 = nir_dest_bit_size(intr->dest) == 64 && !has_int64;
2174 nir_ssa_def *offset = nir_udiv_imm(b, intr->src[0].ssa, (force_2x32 ? 32 : nir_dest_bit_size(intr->dest)) / 8);
2175 nir_instr_rewrite_src_ssa(instr, &intr->src[0], offset);
2176 /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
2178 /* this is always scalarized */
2179 assert(intr->dest.ssa.num_components == 1);
2180 /* rewrite as 2x32 */
2181 nir_ssa_def *load[2];
2182 for (unsigned i = 0; i < 2; i++)
2183 load[i] = nir_load_shared(b, 1, 32, nir_iadd_imm(b, intr->src[0].ssa, i), .align_mul = 4, .align_offset = 0);
2184 /* cast back to 64bit */
2185 nir_ssa_def *casted = nir_pack_64_2x32_split(b, load[0], load[1]);
2186 nir_ssa_def_rewrite_uses(&intr->dest.ssa, casted);
2187 nir_instr_remove(instr);
2191 case nir_intrinsic_store_ssbo: {
2192 b->cursor = nir_before_instr(instr);
2193 bool force_2x32 = nir_src_bit_size(intr->src[0]) == 64 && !has_int64;
2194 nir_ssa_def *offset = nir_udiv_imm(b, intr->src[2].ssa, (force_2x32 ? 32 : nir_src_bit_size(intr->src[0])) / 8);
2195 nir_instr_rewrite_src_ssa(instr, &intr->src[2], offset);
2196 /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
2198 /* this is always scalarized */
2199 assert(intr->src[0].ssa->num_components == 1);
2200 nir_ssa_def *vals[2] = {nir_unpack_64_2x32_split_x(b, intr->src[0].ssa), nir_unpack_64_2x32_split_y(b, intr->src[0].ssa)};
2201 for (unsigned i = 0; i < 2; i++)
2202 nir_store_ssbo(b, vals[i], intr->src[1].ssa, nir_iadd_imm(b, intr->src[2].ssa, i), .align_mul = 4, .align_offset = 0);
2203 nir_instr_remove(instr);
2207 case nir_intrinsic_store_shared: {
2208 b->cursor = nir_before_instr(instr);
2209 bool force_2x32 = nir_src_bit_size(intr->src[0]) == 64 && !has_int64;
2210 nir_ssa_def *offset = nir_udiv_imm(b, intr->src[1].ssa, (force_2x32 ? 32 : nir_src_bit_size(intr->src[0])) / 8);
2211 nir_instr_rewrite_src_ssa(instr, &intr->src[1], offset);
2212 /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
2213 if (nir_src_bit_size(intr->src[0]) == 64 && !has_int64) {
2214 /* this is always scalarized */
2215 assert(intr->src[0].ssa->num_components == 1);
2216 nir_ssa_def *vals[2] = {nir_unpack_64_2x32_split_x(b, intr->src[0].ssa), nir_unpack_64_2x32_split_y(b, intr->src[0].ssa)};
2217 for (unsigned i = 0; i < 2; i++)
2218 nir_store_shared(b, vals[i], nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0);
2219 nir_instr_remove(instr);
2230 rewrite_bo_access(nir_shader *shader, struct zink_screen *screen)
2232 return nir_shader_instructions_pass(shader, rewrite_bo_access_instr, nir_metadata_dominance, screen);
2235 static nir_variable *
2236 get_bo_var(nir_shader *shader, struct bo_vars *bo, bool ssbo, nir_src *src, unsigned bit_size)
2238 nir_variable *var, **ptr;
2239 unsigned idx = ssbo || (nir_src_is_const(*src) && !nir_src_as_uint(*src)) ? 0 : 1;
2242 ptr = &bo->ssbo[bit_size >> 4];
2245 ptr = &bo->uniforms[bit_size >> 4];
2247 ptr = &bo->ubo[bit_size >> 4];
2252 var = bo->ssbo[32 >> 4];
2255 var = bo->uniforms[32 >> 4];
2257 var = bo->ubo[32 >> 4];
2259 var = nir_variable_clone(var, shader);
2261 var->name = ralloc_asprintf(shader, "%s@%u", "ssbos", bit_size);
2263 var->name = ralloc_asprintf(shader, "%s@%u", idx ? "ubos" : "uniform_0", bit_size);
2265 nir_shader_add_variable(shader, var);
2267 struct glsl_struct_field *fields = rzalloc_array(shader, struct glsl_struct_field, 2);
2268 fields[0].name = ralloc_strdup(shader, "base");
2269 fields[1].name = ralloc_strdup(shader, "unsized");
2270 unsigned array_size = glsl_get_length(var->type);
2271 const struct glsl_type *bare_type = glsl_without_array(var->type);
2272 const struct glsl_type *array_type = glsl_get_struct_field(bare_type, 0);
2273 unsigned length = glsl_get_length(array_type);
2274 const struct glsl_type *type;
2275 const struct glsl_type *unsized = glsl_array_type(glsl_uintN_t_type(bit_size), 0, bit_size / 8);
2276 if (bit_size > 32) {
2277 assert(bit_size == 64);
2278 type = glsl_array_type(glsl_uintN_t_type(bit_size), length / 2, bit_size / 8);
2280 type = glsl_array_type(glsl_uintN_t_type(bit_size), length * (32 / bit_size), bit_size / 8);
2282 fields[0].type = type;
2283 fields[1].type = unsized;
2284 var->type = glsl_array_type(glsl_struct_type(fields, glsl_get_length(bare_type), "struct", false), array_size, 0);
2285 var->data.driver_location = idx;
2291 rewrite_atomic_ssbo_instr(nir_builder *b, nir_instr *instr, struct bo_vars *bo)
2293 nir_intrinsic_op op;
2294 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2295 switch (intr->intrinsic) {
2296 case nir_intrinsic_ssbo_atomic_fadd:
2297 op = nir_intrinsic_deref_atomic_fadd;
2299 case nir_intrinsic_ssbo_atomic_fmin:
2300 op = nir_intrinsic_deref_atomic_fmin;
2302 case nir_intrinsic_ssbo_atomic_fmax:
2303 op = nir_intrinsic_deref_atomic_fmax;
2305 case nir_intrinsic_ssbo_atomic_fcomp_swap:
2306 op = nir_intrinsic_deref_atomic_fcomp_swap;
2308 case nir_intrinsic_ssbo_atomic_add:
2309 op = nir_intrinsic_deref_atomic_add;
2311 case nir_intrinsic_ssbo_atomic_umin:
2312 op = nir_intrinsic_deref_atomic_umin;
2314 case nir_intrinsic_ssbo_atomic_imin:
2315 op = nir_intrinsic_deref_atomic_imin;
2317 case nir_intrinsic_ssbo_atomic_umax:
2318 op = nir_intrinsic_deref_atomic_umax;
2320 case nir_intrinsic_ssbo_atomic_imax:
2321 op = nir_intrinsic_deref_atomic_imax;
2323 case nir_intrinsic_ssbo_atomic_and:
2324 op = nir_intrinsic_deref_atomic_and;
2326 case nir_intrinsic_ssbo_atomic_or:
2327 op = nir_intrinsic_deref_atomic_or;
2329 case nir_intrinsic_ssbo_atomic_xor:
2330 op = nir_intrinsic_deref_atomic_xor;
2332 case nir_intrinsic_ssbo_atomic_exchange:
2333 op = nir_intrinsic_deref_atomic_exchange;
2335 case nir_intrinsic_ssbo_atomic_comp_swap:
2336 op = nir_intrinsic_deref_atomic_comp_swap;
2339 unreachable("unknown intrinsic");
2341 nir_ssa_def *offset = intr->src[1].ssa;
2342 nir_src *src = &intr->src[0];
2343 nir_variable *var = get_bo_var(b->shader, bo, true, src, nir_dest_bit_size(intr->dest));
2344 nir_deref_instr *deref_var = nir_build_deref_var(b, var);
2345 nir_ssa_def *idx = src->ssa;
2347 idx = nir_iadd_imm(b, idx, -bo->first_ssbo);
2348 nir_deref_instr *deref_array = nir_build_deref_array(b, deref_var, idx);
2349 nir_deref_instr *deref_struct = nir_build_deref_struct(b, deref_array, 0);
2351 /* generate new atomic deref ops for every component */
2352 nir_ssa_def *result[4];
2353 unsigned num_components = nir_dest_num_components(intr->dest);
2354 for (unsigned i = 0; i < num_components; i++) {
2355 nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct, offset);
2356 nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(b->shader, op);
2357 nir_ssa_dest_init(&new_instr->instr, &new_instr->dest, 1, nir_dest_bit_size(intr->dest), "");
2358 new_instr->src[0] = nir_src_for_ssa(&deref_arr->dest.ssa);
2359 /* deref ops have no offset src, so copy the srcs after it */
2360 for (unsigned i = 2; i < nir_intrinsic_infos[intr->intrinsic].num_srcs; i++)
2361 nir_src_copy(&new_instr->src[i - 1], &intr->src[i], &new_instr->instr);
2362 nir_builder_instr_insert(b, &new_instr->instr);
2364 result[i] = &new_instr->dest.ssa;
2365 offset = nir_iadd_imm(b, offset, 1);
2368 nir_ssa_def *load = nir_vec(b, result, num_components);
2369 nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
2370 nir_instr_remove(instr);
2374 remove_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
2376 struct bo_vars *bo = data;
2377 if (instr->type != nir_instr_type_intrinsic)
2379 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2380 nir_variable *var = NULL;
2381 nir_ssa_def *offset = NULL;
2382 bool is_load = true;
2383 b->cursor = nir_before_instr(instr);
2386 switch (intr->intrinsic) {
2387 case nir_intrinsic_ssbo_atomic_fadd:
2388 case nir_intrinsic_ssbo_atomic_fmin:
2389 case nir_intrinsic_ssbo_atomic_fmax:
2390 case nir_intrinsic_ssbo_atomic_fcomp_swap:
2391 case nir_intrinsic_ssbo_atomic_add:
2392 case nir_intrinsic_ssbo_atomic_umin:
2393 case nir_intrinsic_ssbo_atomic_imin:
2394 case nir_intrinsic_ssbo_atomic_umax:
2395 case nir_intrinsic_ssbo_atomic_imax:
2396 case nir_intrinsic_ssbo_atomic_and:
2397 case nir_intrinsic_ssbo_atomic_or:
2398 case nir_intrinsic_ssbo_atomic_xor:
2399 case nir_intrinsic_ssbo_atomic_exchange:
2400 case nir_intrinsic_ssbo_atomic_comp_swap:
2401 rewrite_atomic_ssbo_instr(b, instr, bo);
2403 case nir_intrinsic_store_ssbo:
2404 src = &intr->src[1];
2405 var = get_bo_var(b->shader, bo, true, src, nir_src_bit_size(intr->src[0]));
2406 offset = intr->src[2].ssa;
2409 case nir_intrinsic_load_ssbo:
2410 src = &intr->src[0];
2411 var = get_bo_var(b->shader, bo, true, src, nir_dest_bit_size(intr->dest));
2412 offset = intr->src[1].ssa;
2414 case nir_intrinsic_load_ubo:
2415 src = &intr->src[0];
2416 var = get_bo_var(b->shader, bo, false, src, nir_dest_bit_size(intr->dest));
2417 offset = intr->src[1].ssa;
2425 nir_deref_instr *deref_var = nir_build_deref_var(b, var);
2426 nir_ssa_def *idx = !ssbo && var->data.driver_location ? nir_iadd_imm(b, src->ssa, -1) : src->ssa;
2427 if (!ssbo && bo->first_ubo && var->data.driver_location)
2428 idx = nir_iadd_imm(b, idx, -bo->first_ubo);
2429 else if (ssbo && bo->first_ssbo)
2430 idx = nir_iadd_imm(b, idx, -bo->first_ssbo);
2431 nir_deref_instr *deref_array = nir_build_deref_array(b, deref_var, nir_i2iN(b, idx, nir_dest_bit_size(deref_var->dest)));
2432 nir_deref_instr *deref_struct = nir_build_deref_struct(b, deref_array, 0);
2433 assert(intr->num_components <= 2);
2435 nir_ssa_def *result[2];
2436 for (unsigned i = 0; i < intr->num_components; i++) {
2437 nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct, nir_i2iN(b, offset, nir_dest_bit_size(deref_struct->dest)));
2438 result[i] = nir_load_deref(b, deref_arr);
2439 if (intr->intrinsic == nir_intrinsic_load_ssbo)
2440 nir_intrinsic_set_access(nir_instr_as_intrinsic(result[i]->parent_instr), nir_intrinsic_access(intr));
2441 offset = nir_iadd_imm(b, offset, 1);
2443 nir_ssa_def *load = nir_vec(b, result, intr->num_components);
2444 nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
2446 nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct, nir_i2iN(b, offset, nir_dest_bit_size(deref_struct->dest)));
2447 nir_build_store_deref(b, &deref_arr->dest.ssa, intr->src[0].ssa, BITFIELD_MASK(intr->num_components), nir_intrinsic_access(intr));
2449 nir_instr_remove(instr);
2454 remove_bo_access(nir_shader *shader, struct zink_shader *zs)
2456 struct bo_vars bo = get_bo_vars(zs, shader);
2457 return nir_shader_instructions_pass(shader, remove_bo_access_instr, nir_metadata_dominance, &bo);
2461 find_var_deref(nir_shader *nir, nir_variable *var)
2463 nir_foreach_function(function, nir) {
2464 if (!function->impl)
2467 nir_foreach_block(block, function->impl) {
2468 nir_foreach_instr(instr, block) {
2469 if (instr->type != nir_instr_type_deref)
2471 nir_deref_instr *deref = nir_instr_as_deref(instr);
2472 if (deref->deref_type == nir_deref_type_var && deref->var == var)
2480 struct clamp_layer_output_state {
2481 nir_variable *original;
2482 nir_variable *clamped;
2486 clamp_layer_output_emit(nir_builder *b, struct clamp_layer_output_state *state)
2488 nir_ssa_def *is_layered = nir_load_push_constant(b, 1, 32,
2489 nir_imm_int(b, ZINK_GFX_PUSHCONST_FRAMEBUFFER_IS_LAYERED),
2490 .base = ZINK_GFX_PUSHCONST_FRAMEBUFFER_IS_LAYERED, .range = 4);
2491 nir_deref_instr *original_deref = nir_build_deref_var(b, state->original);
2492 nir_deref_instr *clamped_deref = nir_build_deref_var(b, state->clamped);
2493 nir_ssa_def *layer = nir_bcsel(b, nir_ieq_imm(b, is_layered, 1),
2494 nir_load_deref(b, original_deref),
2496 nir_store_deref(b, clamped_deref, layer, 0);
2500 clamp_layer_output_instr(nir_builder *b, nir_instr *instr, void *data)
2502 struct clamp_layer_output_state *state = data;
2503 switch (instr->type) {
2504 case nir_instr_type_intrinsic: {
2505 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2506 if (intr->intrinsic != nir_intrinsic_emit_vertex_with_counter &&
2507 intr->intrinsic != nir_intrinsic_emit_vertex)
2509 b->cursor = nir_before_instr(instr);
2510 clamp_layer_output_emit(b, state);
2513 default: return false;
2518 clamp_layer_output(nir_shader *vs, nir_shader *fs, unsigned *next_location)
2520 switch (vs->info.stage) {
2521 case MESA_SHADER_VERTEX:
2522 case MESA_SHADER_GEOMETRY:
2523 case MESA_SHADER_TESS_EVAL:
2526 unreachable("invalid last vertex stage!");
2528 struct clamp_layer_output_state state = {0};
2529 state.original = nir_find_variable_with_location(vs, nir_var_shader_out, VARYING_SLOT_LAYER);
2530 if (!state.original || !find_var_deref(vs, state.original))
2532 state.clamped = nir_variable_create(vs, nir_var_shader_out, glsl_int_type(), "layer_clamped");
2533 state.clamped->data.location = VARYING_SLOT_LAYER;
2534 nir_variable *fs_var = nir_find_variable_with_location(fs, nir_var_shader_in, VARYING_SLOT_LAYER);
2535 if ((state.original->data.explicit_xfb_buffer || fs_var) && *next_location < MAX_VARYING) {
2536 state.original->data.location = VARYING_SLOT_VAR0; // Anything but a built-in slot
2537 state.original->data.driver_location = (*next_location)++;
2539 fs_var->data.location = state.original->data.location;
2540 fs_var->data.driver_location = state.original->data.driver_location;
2543 if (state.original->data.explicit_xfb_buffer) {
2544 /* Will xfb the clamped output but still better than nothing */
2545 state.clamped->data.explicit_xfb_buffer = state.original->data.explicit_xfb_buffer;
2546 state.clamped->data.xfb.buffer = state.original->data.xfb.buffer;
2547 state.clamped->data.xfb.stride = state.original->data.xfb.stride;
2548 state.clamped->data.offset = state.original->data.offset;
2549 state.clamped->data.stream = state.original->data.stream;
2551 state.original->data.mode = nir_var_shader_temp;
2552 nir_fixup_deref_modes(vs);
2554 if (vs->info.stage == MESA_SHADER_GEOMETRY) {
2555 nir_shader_instructions_pass(vs, clamp_layer_output_instr, nir_metadata_dominance, &state);
2558 nir_function_impl *impl = nir_shader_get_entrypoint(vs);
2559 nir_builder_init(&b, impl);
2560 assert(impl->end_block->predecessors->entries == 1);
2561 b.cursor = nir_after_cf_list(&impl->body);
2562 clamp_layer_output_emit(&b, &state);
2563 nir_metadata_preserve(impl, nir_metadata_dominance);
2565 optimize_nir(vs, NULL);
2566 NIR_PASS_V(vs, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2571 assign_producer_var_io(gl_shader_stage stage, nir_variable *var, unsigned *reserved, unsigned char *slot_map)
2573 unsigned slot = var->data.location;
2576 case VARYING_SLOT_POS:
2577 case VARYING_SLOT_PNTC:
2578 case VARYING_SLOT_PSIZ:
2579 case VARYING_SLOT_LAYER:
2580 case VARYING_SLOT_PRIMITIVE_ID:
2581 case VARYING_SLOT_CLIP_DIST0:
2582 case VARYING_SLOT_CULL_DIST0:
2583 case VARYING_SLOT_VIEWPORT:
2584 case VARYING_SLOT_FACE:
2585 case VARYING_SLOT_TESS_LEVEL_OUTER:
2586 case VARYING_SLOT_TESS_LEVEL_INNER:
2587 /* use a sentinel value to avoid counting later */
2588 var->data.driver_location = UINT_MAX;
2592 if (var->data.patch) {
2593 assert(slot >= VARYING_SLOT_PATCH0);
2594 slot -= VARYING_SLOT_PATCH0;
2596 if (slot_map[slot] == 0xff) {
2597 assert(*reserved < MAX_VARYING);
2599 if (nir_is_arrayed_io(var, stage))
2600 num_slots = glsl_count_vec4_slots(glsl_get_array_element(var->type), false, false);
2602 num_slots = glsl_count_vec4_slots(var->type, false, false);
2603 assert(*reserved + num_slots <= MAX_VARYING);
2604 for (unsigned i = 0; i < num_slots; i++)
2605 slot_map[slot + i] = (*reserved)++;
2607 slot = slot_map[slot];
2608 assert(slot < MAX_VARYING);
2609 var->data.driver_location = slot;
2613 ALWAYS_INLINE static bool
2614 is_texcoord(gl_shader_stage stage, const nir_variable *var)
2616 if (stage != MESA_SHADER_FRAGMENT)
2618 return var->data.location >= VARYING_SLOT_TEX0 &&
2619 var->data.location <= VARYING_SLOT_TEX7;
2623 assign_consumer_var_io(gl_shader_stage stage, nir_variable *var, unsigned *reserved, unsigned char *slot_map)
2625 unsigned slot = var->data.location;
2627 case VARYING_SLOT_POS:
2628 case VARYING_SLOT_PNTC:
2629 case VARYING_SLOT_PSIZ:
2630 case VARYING_SLOT_LAYER:
2631 case VARYING_SLOT_PRIMITIVE_ID:
2632 case VARYING_SLOT_CLIP_DIST0:
2633 case VARYING_SLOT_CULL_DIST0:
2634 case VARYING_SLOT_VIEWPORT:
2635 case VARYING_SLOT_FACE:
2636 case VARYING_SLOT_TESS_LEVEL_OUTER:
2637 case VARYING_SLOT_TESS_LEVEL_INNER:
2638 /* use a sentinel value to avoid counting later */
2639 var->data.driver_location = UINT_MAX;
2642 if (var->data.patch) {
2643 assert(slot >= VARYING_SLOT_PATCH0);
2644 slot -= VARYING_SLOT_PATCH0;
2646 if (slot_map[slot] == (unsigned char)-1) {
2647 /* texcoords can't be eliminated in fs due to GL_COORD_REPLACE,
2648 * so keep for now and eliminate later
2650 if (is_texcoord(stage, var)) {
2651 var->data.driver_location = -1;
2654 if (stage != MESA_SHADER_TESS_CTRL)
2657 /* patch variables may be read in the workgroup */
2658 slot_map[slot] = (*reserved)++;
2660 var->data.driver_location = slot_map[slot];
2667 rewrite_read_as_0(nir_builder *b, nir_instr *instr, void *data)
2669 nir_variable *var = data;
2670 if (instr->type != nir_instr_type_intrinsic)
2673 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2674 if (intr->intrinsic != nir_intrinsic_load_deref)
2676 nir_variable *deref_var = nir_intrinsic_get_var(intr, 0);
2677 if (deref_var != var)
2679 b->cursor = nir_before_instr(instr);
2680 nir_ssa_def *zero = nir_imm_zero(b, nir_dest_num_components(intr->dest), nir_dest_bit_size(intr->dest));
2681 if (b->shader->info.stage == MESA_SHADER_FRAGMENT) {
2682 switch (var->data.location) {
2683 case VARYING_SLOT_COL0:
2684 case VARYING_SLOT_COL1:
2685 case VARYING_SLOT_BFC0:
2686 case VARYING_SLOT_BFC1:
2687 /* default color is 0,0,0,1 */
2688 if (nir_dest_num_components(intr->dest) == 4)
2689 zero = nir_vector_insert_imm(b, zero, nir_imm_float(b, 1.0), 3);
2695 nir_ssa_def_rewrite_uses(&intr->dest.ssa, zero);
2696 nir_instr_remove(instr);
2701 zink_compiler_assign_io(struct zink_screen *screen, nir_shader *producer, nir_shader *consumer)
2703 unsigned reserved = 0;
2704 unsigned char slot_map[VARYING_SLOT_MAX];
2705 memset(slot_map, -1, sizeof(slot_map));
2706 bool do_fixup = false;
2707 nir_shader *nir = producer->info.stage == MESA_SHADER_TESS_CTRL ? producer : consumer;
2708 if (consumer->info.stage != MESA_SHADER_FRAGMENT) {
2709 /* remove injected pointsize from all but the last vertex stage */
2710 nir_variable *var = nir_find_variable_with_location(producer, nir_var_shader_out, VARYING_SLOT_PSIZ);
2711 if (var && !var->data.explicit_location) {
2712 var->data.mode = nir_var_shader_temp;
2713 nir_fixup_deref_modes(producer);
2714 NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2715 optimize_nir(producer, NULL);
2718 if (producer->info.stage == MESA_SHADER_TESS_CTRL) {
2719 /* never assign from tcs -> tes, always invert */
2720 nir_foreach_variable_with_modes(var, consumer, nir_var_shader_in)
2721 assign_producer_var_io(consumer->info.stage, var, &reserved, slot_map);
2722 nir_foreach_variable_with_modes_safe(var, producer, nir_var_shader_out) {
2723 if (!assign_consumer_var_io(producer->info.stage, var, &reserved, slot_map))
2724 /* this is an output, nothing more needs to be done for it to be dropped */
2728 nir_foreach_variable_with_modes(var, producer, nir_var_shader_out)
2729 assign_producer_var_io(producer->info.stage, var, &reserved, slot_map);
2730 nir_foreach_variable_with_modes_safe(var, consumer, nir_var_shader_in) {
2731 if (!assign_consumer_var_io(consumer->info.stage, var, &reserved, slot_map)) {
2733 /* input needs to be rewritten */
2734 nir_shader_instructions_pass(consumer, rewrite_read_as_0, nir_metadata_dominance, var);
2737 if (consumer->info.stage == MESA_SHADER_FRAGMENT && screen->driver_workarounds.needs_sanitised_layer)
2738 do_fixup |= clamp_layer_output(producer, consumer, &reserved);
2742 nir_fixup_deref_modes(nir);
2743 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2744 optimize_nir(nir, NULL);
2747 /* all types that hit this function contain something that is 64bit */
2748 static const struct glsl_type *
2749 rewrite_64bit_type(nir_shader *nir, const struct glsl_type *type, nir_variable *var, bool doubles_only)
2751 if (glsl_type_is_array(type)) {
2752 const struct glsl_type *child = glsl_get_array_element(type);
2753 unsigned elements = glsl_array_size(type);
2754 unsigned stride = glsl_get_explicit_stride(type);
2755 return glsl_array_type(rewrite_64bit_type(nir, child, var, doubles_only), elements, stride);
2757 /* rewrite structs recursively */
2758 if (glsl_type_is_struct_or_ifc(type)) {
2759 unsigned nmembers = glsl_get_length(type);
2760 struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, nmembers * 2);
2761 unsigned xfb_offset = 0;
2762 for (unsigned i = 0; i < nmembers; i++) {
2763 const struct glsl_struct_field *f = glsl_get_struct_field_data(type, i);
2765 xfb_offset += glsl_get_component_slots(fields[i].type) * 4;
2766 if (i < nmembers - 1 && xfb_offset % 8 &&
2767 (glsl_contains_double(glsl_get_struct_field(type, i + 1)) ||
2768 (glsl_type_contains_64bit(glsl_get_struct_field(type, i + 1)) && !doubles_only))) {
2769 var->data.is_xfb = true;
2771 fields[i].type = rewrite_64bit_type(nir, f->type, var, doubles_only);
2773 return glsl_struct_type(fields, nmembers, glsl_get_type_name(type), glsl_struct_type_is_packed(type));
2775 if (!glsl_type_is_64bit(type) || (!glsl_contains_double(type) && doubles_only))
2777 if (doubles_only && glsl_type_is_vector_or_scalar(type))
2778 return glsl_vector_type(GLSL_TYPE_UINT64, glsl_get_vector_elements(type));
2779 enum glsl_base_type base_type;
2780 switch (glsl_get_base_type(type)) {
2781 case GLSL_TYPE_UINT64:
2782 base_type = GLSL_TYPE_UINT;
2784 case GLSL_TYPE_INT64:
2785 base_type = GLSL_TYPE_INT;
2787 case GLSL_TYPE_DOUBLE:
2788 base_type = GLSL_TYPE_FLOAT;
2791 unreachable("unknown 64-bit vertex attribute format!");
2793 if (glsl_type_is_scalar(type))
2794 return glsl_vector_type(base_type, 2);
2795 unsigned num_components;
2796 if (glsl_type_is_matrix(type)) {
2797 /* align to vec4 size: dvec3-composed arrays are arrays of dvec3s */
2798 unsigned vec_components = glsl_get_vector_elements(type);
2799 if (vec_components == 3)
2801 num_components = vec_components * 2 * glsl_get_matrix_columns(type);
2803 num_components = glsl_get_vector_elements(type) * 2;
2804 if (num_components <= 4)
2805 return glsl_vector_type(base_type, num_components);
2807 /* dvec3/dvec4/dmatX: rewrite as struct { vec4, vec4, vec4, ... [vec2] } */
2808 struct glsl_struct_field fields[8] = {0};
2809 unsigned remaining = num_components;
2810 unsigned nfields = 0;
2811 for (unsigned i = 0; remaining; i++, remaining -= MIN2(4, remaining), nfields++) {
2812 assert(i < ARRAY_SIZE(fields));
2813 fields[i].name = "";
2814 fields[i].offset = i * 16;
2815 fields[i].type = glsl_vector_type(base_type, MIN2(4, remaining));
2818 snprintf(buf, sizeof(buf), "struct(%s)", glsl_get_type_name(type));
2819 return glsl_struct_type(fields, nfields, buf, true);
2822 static const struct glsl_type *
2823 deref_is_matrix(nir_deref_instr *deref)
2825 if (glsl_type_is_matrix(deref->type))
2827 nir_deref_instr *parent = nir_deref_instr_parent(deref);
2829 return deref_is_matrix(parent);
2834 lower_64bit_vars_function(nir_shader *shader, nir_function *function, nir_variable *var,
2835 struct hash_table *derefs, struct set *deletes, bool doubles_only)
2837 bool func_progress = false;
2838 if (!function->impl)
2841 nir_builder_init(&b, function->impl);
2842 nir_foreach_block(block, function->impl) {
2843 nir_foreach_instr_safe(instr, block) {
2844 switch (instr->type) {
2845 case nir_instr_type_deref: {
2846 nir_deref_instr *deref = nir_instr_as_deref(instr);
2847 if (!(deref->modes & var->data.mode))
2849 if (nir_deref_instr_get_variable(deref) != var)
2852 /* matrix types are special: store the original deref type for later use */
2853 const struct glsl_type *matrix = deref_is_matrix(deref);
2854 nir_deref_instr *parent = nir_deref_instr_parent(deref);
2856 /* if this isn't a direct matrix deref, it's maybe a matrix row deref */
2857 hash_table_foreach(derefs, he) {
2858 /* propagate parent matrix type to row deref */
2859 if (he->key == parent)
2864 _mesa_hash_table_insert(derefs, deref, (void*)matrix);
2865 if (deref->deref_type == nir_deref_type_var)
2866 deref->type = var->type;
2868 deref->type = rewrite_64bit_type(shader, deref->type, var, doubles_only);
2871 case nir_instr_type_intrinsic: {
2872 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2873 if (intr->intrinsic != nir_intrinsic_store_deref &&
2874 intr->intrinsic != nir_intrinsic_load_deref)
2876 if (nir_intrinsic_get_var(intr, 0) != var)
2878 if ((intr->intrinsic == nir_intrinsic_store_deref && intr->src[1].ssa->bit_size != 64) ||
2879 (intr->intrinsic == nir_intrinsic_load_deref && intr->dest.ssa.bit_size != 64))
2881 b.cursor = nir_before_instr(instr);
2882 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
2883 unsigned num_components = intr->num_components * 2;
2884 nir_ssa_def *comp[NIR_MAX_VEC_COMPONENTS];
2885 /* this is the stored matrix type from the deref */
2886 struct hash_entry *he = _mesa_hash_table_search(derefs, deref);
2887 const struct glsl_type *matrix = he ? he->data : NULL;
2888 if (doubles_only && !matrix)
2890 func_progress = true;
2891 if (intr->intrinsic == nir_intrinsic_store_deref) {
2892 /* first, unpack the src data to 32bit vec2 components */
2893 for (unsigned i = 0; i < intr->num_components; i++) {
2894 nir_ssa_def *ssa = nir_unpack_64_2x32(&b, nir_channel(&b, intr->src[1].ssa, i));
2895 comp[i * 2] = nir_channel(&b, ssa, 0);
2896 comp[i * 2 + 1] = nir_channel(&b, ssa, 1);
2898 unsigned wrmask = nir_intrinsic_write_mask(intr);
2900 /* expand writemask for doubled components */
2901 for (unsigned i = 0; i < intr->num_components; i++) {
2902 if (wrmask & BITFIELD_BIT(i))
2903 mask |= BITFIELD_BIT(i * 2) | BITFIELD_BIT(i * 2 + 1);
2906 /* matrix types always come from array (row) derefs */
2907 assert(deref->deref_type == nir_deref_type_array);
2908 nir_deref_instr *var_deref = nir_deref_instr_parent(deref);
2909 /* let optimization clean up consts later */
2910 nir_ssa_def *index = deref->arr.index.ssa;
2911 /* this might be an indirect array index:
2912 * - iterate over matrix columns
2913 * - add if blocks for each column
2914 * - perform the store in the block
2916 for (unsigned idx = 0; idx < glsl_get_matrix_columns(matrix); idx++) {
2917 nir_push_if(&b, nir_ieq_imm(&b, index, idx));
2918 unsigned vec_components = glsl_get_vector_elements(matrix);
2919 /* always clamp dvec3 to 4 components */
2920 if (vec_components == 3)
2922 unsigned start_component = idx * vec_components * 2;
2924 unsigned member = start_component / 4;
2925 /* number of components remaining */
2926 unsigned remaining = num_components;
2927 for (unsigned i = 0; i < num_components; member++) {
2928 if (!(mask & BITFIELD_BIT(i)))
2930 assert(member < glsl_get_length(var_deref->type));
2931 /* deref the rewritten struct to the appropriate vec4/vec2 */
2932 nir_deref_instr *strct = nir_build_deref_struct(&b, var_deref, member);
2933 unsigned incr = MIN2(remaining, 4);
2934 /* assemble the write component vec */
2935 nir_ssa_def *val = nir_vec(&b, &comp[i], incr);
2936 /* use the number of components being written as the writemask */
2937 if (glsl_get_vector_elements(strct->type) > val->num_components)
2938 val = nir_pad_vector(&b, val, glsl_get_vector_elements(strct->type));
2939 nir_store_deref(&b, strct, val, BITFIELD_MASK(incr));
2943 nir_pop_if(&b, NULL);
2945 _mesa_set_add(deletes, &deref->instr);
2946 } else if (num_components <= 4) {
2947 /* simple store case: just write out the components */
2948 nir_ssa_def *dest = nir_vec(&b, comp, num_components);
2949 nir_store_deref(&b, deref, dest, mask);
2951 /* writing > 4 components: access the struct and write to the appropriate vec4 members */
2952 for (unsigned i = 0; num_components; i++, num_components -= MIN2(num_components, 4)) {
2953 if (!(mask & BITFIELD_MASK(4)))
2955 nir_deref_instr *strct = nir_build_deref_struct(&b, deref, i);
2956 nir_ssa_def *dest = nir_vec(&b, &comp[i * 4], MIN2(num_components, 4));
2957 if (glsl_get_vector_elements(strct->type) > dest->num_components)
2958 dest = nir_pad_vector(&b, dest, glsl_get_vector_elements(strct->type));
2959 nir_store_deref(&b, strct, dest, mask & BITFIELD_MASK(4));
2964 nir_ssa_def *dest = NULL;
2966 /* matrix types always come from array (row) derefs */
2967 assert(deref->deref_type == nir_deref_type_array);
2968 nir_deref_instr *var_deref = nir_deref_instr_parent(deref);
2969 /* let optimization clean up consts later */
2970 nir_ssa_def *index = deref->arr.index.ssa;
2971 /* this might be an indirect array index:
2972 * - iterate over matrix columns
2973 * - add if blocks for each column
2974 * - phi the loads using the array index
2976 unsigned cols = glsl_get_matrix_columns(matrix);
2977 nir_ssa_def *dests[4];
2978 for (unsigned idx = 0; idx < cols; idx++) {
2979 /* don't add an if for the final row: this will be handled in the else */
2981 nir_push_if(&b, nir_ieq_imm(&b, index, idx));
2982 unsigned vec_components = glsl_get_vector_elements(matrix);
2983 /* always clamp dvec3 to 4 components */
2984 if (vec_components == 3)
2986 unsigned start_component = idx * vec_components * 2;
2988 unsigned member = start_component / 4;
2989 /* number of components remaining */
2990 unsigned remaining = num_components;
2991 /* component index */
2992 unsigned comp_idx = 0;
2993 for (unsigned i = 0; i < num_components; member++) {
2994 assert(member < glsl_get_length(var_deref->type));
2995 nir_deref_instr *strct = nir_build_deref_struct(&b, var_deref, member);
2996 nir_ssa_def *load = nir_load_deref(&b, strct);
2997 unsigned incr = MIN2(remaining, 4);
2998 /* repack the loads to 64bit */
2999 for (unsigned c = 0; c < incr / 2; c++, comp_idx++)
3000 comp[comp_idx] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(c * 2, 2)));
3004 dest = dests[idx] = nir_vec(&b, comp, intr->num_components);
3006 nir_push_else(&b, NULL);
3008 /* loop over all the if blocks that were made, pop them, and phi the loaded+packed results */
3009 for (unsigned idx = cols - 1; idx >= 1; idx--) {
3010 nir_pop_if(&b, NULL);
3011 dest = nir_if_phi(&b, dests[idx - 1], dest);
3013 _mesa_set_add(deletes, &deref->instr);
3014 } else if (num_components <= 4) {
3015 /* simple load case */
3016 nir_ssa_def *load = nir_load_deref(&b, deref);
3017 /* pack 32bit loads into 64bit: this will automagically get optimized out later */
3018 for (unsigned i = 0; i < intr->num_components; i++) {
3019 comp[i] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(i * 2, 2)));
3021 dest = nir_vec(&b, comp, intr->num_components);
3023 /* writing > 4 components: access the struct and load the appropriate vec4 members */
3024 for (unsigned i = 0; i < 2; i++, num_components -= 4) {
3025 nir_deref_instr *strct = nir_build_deref_struct(&b, deref, i);
3026 nir_ssa_def *load = nir_load_deref(&b, strct);
3027 comp[i * 2] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_MASK(2)));
3028 if (num_components > 2)
3029 comp[i * 2 + 1] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(2, 2)));
3031 dest = nir_vec(&b, comp, intr->num_components);
3033 nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, dest, instr);
3035 _mesa_set_add(deletes, instr);
3044 nir_metadata_preserve(function->impl, nir_metadata_none);
3045 /* derefs must be queued for deletion to avoid deleting the same deref repeatedly */
3046 set_foreach_remove(deletes, he)
3047 nir_instr_remove((void*)he->key);
3048 return func_progress;
3052 lower_64bit_vars_loop(nir_shader *shader, nir_variable *var, struct hash_table *derefs,
3053 struct set *deletes, bool doubles_only)
3055 if (!glsl_type_contains_64bit(var->type) || (doubles_only && !glsl_contains_double(var->type)))
3057 var->type = rewrite_64bit_type(shader, var->type, var, doubles_only);
3058 /* once type is rewritten, rewrite all loads and stores */
3059 nir_foreach_function(function, shader)
3060 lower_64bit_vars_function(shader, function, var, derefs, deletes, doubles_only);
3064 /* rewrite all input/output variables using 32bit types and load/stores */
3066 lower_64bit_vars(nir_shader *shader, bool doubles_only)
3068 bool progress = false;
3069 struct hash_table *derefs = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
3070 struct set *deletes = _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
3071 nir_foreach_variable_with_modes(var, shader, nir_var_shader_in | nir_var_shader_out)
3072 progress |= lower_64bit_vars_loop(shader, var, derefs, deletes, doubles_only);
3073 nir_foreach_function(function, shader) {
3074 nir_foreach_function_temp_variable(var, function->impl) {
3075 if (!glsl_type_contains_64bit(var->type) || (doubles_only && !glsl_contains_double(var->type)))
3077 var->type = rewrite_64bit_type(shader, var->type, var, doubles_only);
3078 progress |= lower_64bit_vars_function(shader, function, var, derefs, deletes, doubles_only);
3081 ralloc_free(deletes);
3082 ralloc_free(derefs);
3084 nir_lower_alu_to_scalar(shader, filter_64_bit_instr, NULL);
3085 nir_lower_phis_to_scalar(shader, false);
3086 optimize_nir(shader, NULL);
3092 split_blocks(nir_shader *nir)
3094 bool progress = false;
3095 bool changed = true;
3098 nir_foreach_shader_out_variable(var, nir) {
3099 const struct glsl_type *base_type = glsl_without_array(var->type);
3100 nir_variable *members[32]; //can't have more than this without breaking NIR
3101 if (!glsl_type_is_struct(base_type))
3104 if (!glsl_type_is_struct(var->type) || glsl_get_length(var->type) == 1)
3106 if (glsl_count_attribute_slots(var->type, false) == 1)
3108 unsigned offset = 0;
3109 for (unsigned i = 0; i < glsl_get_length(var->type); i++) {
3110 members[i] = nir_variable_clone(var, nir);
3111 members[i]->type = glsl_get_struct_field(var->type, i);
3112 members[i]->name = (void*)glsl_get_struct_elem_name(var->type, i);
3113 members[i]->data.location += offset;
3114 offset += glsl_count_attribute_slots(members[i]->type, false);
3115 nir_shader_add_variable(nir, members[i]);
3117 nir_foreach_function(function, nir) {
3118 bool func_progress = false;
3119 if (!function->impl)
3122 nir_builder_init(&b, function->impl);
3123 nir_foreach_block(block, function->impl) {
3124 nir_foreach_instr_safe(instr, block) {
3125 switch (instr->type) {
3126 case nir_instr_type_deref: {
3127 nir_deref_instr *deref = nir_instr_as_deref(instr);
3128 if (!(deref->modes & nir_var_shader_out))
3130 if (nir_deref_instr_get_variable(deref) != var)
3132 if (deref->deref_type != nir_deref_type_struct)
3134 nir_deref_instr *parent = nir_deref_instr_parent(deref);
3135 if (parent->deref_type != nir_deref_type_var)
3137 deref->modes = nir_var_shader_temp;
3138 parent->modes = nir_var_shader_temp;
3139 b.cursor = nir_before_instr(instr);
3140 nir_ssa_def *dest = &nir_build_deref_var(&b, members[deref->strct.index])->dest.ssa;
3141 nir_ssa_def_rewrite_uses_after(&deref->dest.ssa, dest, &deref->instr);
3142 nir_instr_remove(&deref->instr);
3143 func_progress = true;
3151 nir_metadata_preserve(function->impl, nir_metadata_none);
3153 var->data.mode = nir_var_shader_temp;
3162 zink_shader_dump(void *words, size_t size, const char *file)
3164 FILE *fp = fopen(file, "wb");
3166 fwrite(words, 1, size, fp);
3168 fprintf(stderr, "wrote '%s'...\n", file);
3172 static struct zink_shader_object
3173 zink_shader_spirv_compile(struct zink_screen *screen, struct zink_shader *zs, struct spirv_shader *spirv, bool separate)
3175 VkShaderModuleCreateInfo smci = {0};
3176 VkShaderCreateInfoEXT sci = {0};
3181 if (zink_debug & ZINK_DEBUG_SPIRV) {
3184 snprintf(buf, sizeof(buf), "dump%02d.spv", i++);
3185 zink_shader_dump(spirv->words, spirv->num_words * sizeof(uint32_t), buf);
3188 sci.sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO_EXT;
3189 sci.stage = mesa_to_vk_shader_stage(zs->info.stage);
3190 if (sci.stage != VK_SHADER_STAGE_FRAGMENT_BIT)
3191 sci.nextStage = VK_SHADER_STAGE_FRAGMENT_BIT;
3192 sci.codeType = VK_SHADER_CODE_TYPE_SPIRV_EXT;
3193 sci.codeSize = spirv->num_words * sizeof(uint32_t);
3194 sci.pCode = spirv->words;
3196 sci.setLayoutCount = 2;
3197 VkDescriptorSetLayout dsl[2] = {0};
3198 dsl[zs->info.stage == MESA_SHADER_FRAGMENT] = zs->precompile.dsl;
3199 sci.pSetLayouts = dsl;
3200 VkPushConstantRange pcr;
3201 pcr.stageFlags = VK_SHADER_STAGE_ALL_GRAPHICS;
3203 pcr.size = sizeof(struct zink_gfx_push_constant);
3204 sci.pushConstantRangeCount = 1;
3205 sci.pPushConstantRanges = &pcr;
3207 smci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
3208 smci.codeSize = spirv->num_words * sizeof(uint32_t);
3209 smci.pCode = spirv->words;
3212 if (zink_debug & ZINK_DEBUG_VALIDATION) {
3213 static const struct spirv_to_nir_options spirv_options = {
3214 .environment = NIR_SPIRV_VULKAN,
3219 .tessellation = true,
3220 .float_controls = true,
3221 .image_ms_array = true,
3222 .image_read_without_format = true,
3223 .image_write_without_format = true,
3224 .storage_image_ms = true,
3225 .geometry_streams = true,
3226 .storage_8bit = true,
3227 .storage_16bit = true,
3228 .variable_pointers = true,
3229 .stencil_export = true,
3230 .post_depth_coverage = true,
3231 .transform_feedback = true,
3232 .device_group = true,
3233 .draw_parameters = true,
3234 .shader_viewport_index_layer = true,
3236 .physical_storage_buffer_address = true,
3237 .int64_atomics = true,
3238 .subgroup_arithmetic = true,
3239 .subgroup_basic = true,
3240 .subgroup_ballot = true,
3241 .subgroup_quad = true,
3242 .subgroup_shuffle = true,
3243 .subgroup_vote = true,
3244 .vk_memory_model = true,
3245 .vk_memory_model_device_scope = true,
3248 .demote_to_helper_invocation = true,
3249 .sparse_residency = true,
3252 .ubo_addr_format = nir_address_format_32bit_index_offset,
3253 .ssbo_addr_format = nir_address_format_32bit_index_offset,
3254 .phys_ssbo_addr_format = nir_address_format_64bit_global,
3255 .push_const_addr_format = nir_address_format_logical,
3256 .shared_addr_format = nir_address_format_32bit_offset,
3258 uint32_t num_spec_entries = 0;
3259 struct nir_spirv_specialization *spec_entries = NULL;
3260 VkSpecializationInfo sinfo = {0};
3261 VkSpecializationMapEntry me[3];
3262 uint32_t size[3] = {1,1,1};
3263 if (!zs->info.workgroup_size[0]) {
3264 sinfo.mapEntryCount = 3;
3265 sinfo.pMapEntries = &me[0];
3266 sinfo.dataSize = sizeof(uint32_t) * 3;
3268 uint32_t ids[] = {ZINK_WORKGROUP_SIZE_X, ZINK_WORKGROUP_SIZE_Y, ZINK_WORKGROUP_SIZE_Z};
3269 for (int i = 0; i < 3; i++) {
3270 me[i].size = sizeof(uint32_t);
3271 me[i].constantID = ids[i];
3272 me[i].offset = i * sizeof(uint32_t);
3274 spec_entries = vk_spec_info_to_nir_spirv(&sinfo, &num_spec_entries);
3276 nir_shader *nir = spirv_to_nir(spirv->words, spirv->num_words,
3277 spec_entries, num_spec_entries,
3278 clamp_stage(&zs->info), "main", &spirv_options, &screen->nir_options);
3286 struct zink_shader_object obj;
3287 if (!separate || !screen->info.have_EXT_shader_object)
3288 ret = VKSCR(CreateShaderModule)(screen->dev, &smci, NULL, &obj.mod);
3290 ret = VKSCR(CreateShadersEXT)(screen->dev, 1, &sci, NULL, &obj.obj);
3291 bool success = zink_screen_handle_vkresult(screen, ret);
3297 prune_io(nir_shader *nir)
3299 nir_foreach_shader_in_variable_safe(var, nir) {
3300 if (!find_var_deref(nir, var))
3301 var->data.mode = nir_var_shader_temp;
3303 nir_foreach_shader_out_variable_safe(var, nir) {
3304 if (!find_var_deref(nir, var))
3305 var->data.mode = nir_var_shader_temp;
3307 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
3311 flag_shadow_tex(nir_variable *var, struct zink_shader *zs)
3313 /* unconvert from zink_binding() */
3314 uint32_t sampler_id = var->data.binding - (PIPE_MAX_SAMPLERS * MESA_SHADER_FRAGMENT);
3315 assert(sampler_id < 32); //bitfield size for tracking
3316 zs->fs.legacy_shadow_mask |= BITFIELD_BIT(sampler_id);
3319 static nir_ssa_def *
3320 rewrite_tex_dest(nir_builder *b, nir_tex_instr *tex, nir_variable *var, void *data)
3323 const struct glsl_type *type = glsl_without_array(var->type);
3324 enum glsl_base_type ret_type = glsl_get_sampler_result_type(type);
3325 bool is_int = glsl_base_type_is_integer(ret_type);
3326 unsigned bit_size = glsl_base_type_get_bit_size(ret_type);
3327 unsigned dest_size = nir_dest_bit_size(tex->dest);
3328 b->cursor = nir_after_instr(&tex->instr);
3329 unsigned num_components = nir_dest_num_components(tex->dest);
3330 bool rewrite_depth = tex->is_shadow && num_components > 1 && tex->op != nir_texop_tg4 && !tex->is_sparse;
3331 if (bit_size == dest_size && !rewrite_depth)
3333 nir_ssa_def *dest = &tex->dest.ssa;
3334 if (rewrite_depth && data) {
3335 if (b->shader->info.stage == MESA_SHADER_FRAGMENT)
3336 flag_shadow_tex(var, data);
3338 mesa_loge("unhandled old-style shadow sampler in non-fragment stage!");
3341 if (bit_size != dest_size) {
3342 tex->dest.ssa.bit_size = bit_size;
3343 tex->dest_type = nir_get_nir_type_for_glsl_base_type(ret_type);
3346 if (glsl_unsigned_base_type_of(ret_type) == ret_type)
3347 dest = nir_u2uN(b, &tex->dest.ssa, dest_size);
3349 dest = nir_i2iN(b, &tex->dest.ssa, dest_size);
3351 dest = nir_f2fN(b, &tex->dest.ssa, dest_size);
3355 nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, dest, dest->parent_instr);
3356 } else if (rewrite_depth) {
3362 struct lower_zs_swizzle_state {
3364 unsigned base_sampler_id;
3365 const struct zink_zs_swizzle_key *swizzle;
3369 lower_zs_swizzle_tex_instr(nir_builder *b, nir_instr *instr, void *data)
3371 struct lower_zs_swizzle_state *state = data;
3372 const struct zink_zs_swizzle_key *swizzle_key = state->swizzle;
3373 assert(state->shadow_only || swizzle_key);
3374 if (instr->type != nir_instr_type_tex)
3376 nir_tex_instr *tex = nir_instr_as_tex(instr);
3377 if (tex->op == nir_texop_txs || tex->op == nir_texop_lod ||
3378 (!tex->is_shadow && state->shadow_only) || tex->is_new_style_shadow)
3380 if (tex->is_shadow && tex->op == nir_texop_tg4)
3381 /* Will not even try to emulate the shadow comparison */
3383 int handle = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
3384 nir_variable *var = NULL;
3386 /* gtfo bindless depth texture mode */
3388 nir_foreach_variable_with_modes(img, b->shader, nir_var_uniform) {
3389 if (glsl_type_is_sampler(glsl_without_array(img->type))) {
3390 unsigned size = glsl_type_is_array(img->type) ? glsl_get_aoa_size(img->type) : 1;
3391 if (tex->texture_index >= img->data.driver_location &&
3392 tex->texture_index < img->data.driver_location + size) {
3399 uint32_t sampler_id = var->data.binding - state->base_sampler_id;
3400 const struct glsl_type *type = glsl_without_array(var->type);
3401 enum glsl_base_type ret_type = glsl_get_sampler_result_type(type);
3402 bool is_int = glsl_base_type_is_integer(ret_type);
3403 unsigned num_components = nir_dest_num_components(tex->dest);
3405 tex->is_new_style_shadow = true;
3406 nir_ssa_def *dest = rewrite_tex_dest(b, tex, var, NULL);
3407 assert(dest || !state->shadow_only);
3408 if (!dest && !(swizzle_key->mask & BITFIELD_BIT(sampler_id)))
3411 dest = &tex->dest.ssa;
3413 tex->dest.ssa.num_components = 1;
3414 if (swizzle_key && (swizzle_key->mask & BITFIELD_BIT(sampler_id))) {
3415 /* these require manual swizzles */
3416 if (tex->op == nir_texop_tg4) {
3417 assert(!tex->is_shadow);
3418 nir_ssa_def *swizzle;
3419 switch (swizzle_key->swizzle[sampler_id].s[tex->component]) {
3420 case PIPE_SWIZZLE_0:
3421 swizzle = nir_imm_zero(b, 4, nir_dest_bit_size(tex->dest));
3423 case PIPE_SWIZZLE_1:
3425 swizzle = nir_imm_intN_t(b, 4, nir_dest_bit_size(tex->dest));
3427 swizzle = nir_imm_floatN_t(b, 4, nir_dest_bit_size(tex->dest));
3430 if (!tex->component)
3435 nir_ssa_def_rewrite_uses_after(dest, swizzle, swizzle->parent_instr);
3438 nir_ssa_def *vec[4];
3439 for (unsigned i = 0; i < ARRAY_SIZE(vec); i++) {
3440 switch (swizzle_key->swizzle[sampler_id].s[i]) {
3441 case PIPE_SWIZZLE_0:
3442 vec[i] = nir_imm_zero(b, 1, nir_dest_bit_size(tex->dest));
3444 case PIPE_SWIZZLE_1:
3446 vec[i] = nir_imm_intN_t(b, 1, nir_dest_bit_size(tex->dest));
3448 vec[i] = nir_imm_floatN_t(b, 1, nir_dest_bit_size(tex->dest));
3451 vec[i] = dest->num_components == 1 ? dest : nir_channel(b, dest, i);
3455 nir_ssa_def *swizzle = nir_vec(b, vec, num_components);
3456 nir_ssa_def_rewrite_uses_after(dest, swizzle, swizzle->parent_instr);
3458 assert(tex->is_shadow);
3459 nir_ssa_def *vec[4] = {dest, dest, dest, dest};
3460 nir_ssa_def *splat = nir_vec(b, vec, num_components);
3461 nir_ssa_def_rewrite_uses_after(dest, splat, splat->parent_instr);
3467 lower_zs_swizzle_tex(nir_shader *nir, const void *swizzle, bool shadow_only)
3469 unsigned base_sampler_id = gl_shader_stage_is_compute(nir->info.stage) ? 0 : PIPE_MAX_SAMPLERS * nir->info.stage;
3470 struct lower_zs_swizzle_state state = {shadow_only, base_sampler_id, swizzle};
3471 return nir_shader_instructions_pass(nir, lower_zs_swizzle_tex_instr, nir_metadata_dominance | nir_metadata_block_index, (void*)&state);
3475 invert_point_coord_instr(nir_builder *b, nir_instr *instr, void *data)
3477 if (instr->type != nir_instr_type_intrinsic)
3479 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
3480 if (intr->intrinsic != nir_intrinsic_load_deref)
3482 nir_variable *deref_var = nir_intrinsic_get_var(intr, 0);
3483 if (deref_var->data.location != VARYING_SLOT_PNTC)
3485 b->cursor = nir_after_instr(instr);
3486 nir_ssa_def *def = nir_vec2(b, nir_channel(b, &intr->dest.ssa, 0),
3487 nir_fsub(b, nir_imm_float(b, 1.0), nir_channel(b, &intr->dest.ssa, 1)));
3488 nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, def, def->parent_instr);
3493 invert_point_coord(nir_shader *nir)
3495 if (!(nir->info.inputs_read & BITFIELD64_BIT(VARYING_SLOT_PNTC)))
3497 return nir_shader_instructions_pass(nir, invert_point_coord_instr, nir_metadata_dominance, NULL);
3500 static struct zink_shader_object
3501 compile_module(struct zink_screen *screen, struct zink_shader *zs, nir_shader *nir, bool separate)
3503 struct zink_shader_info *sinfo = &zs->sinfo;
3506 NIR_PASS_V(nir, nir_convert_from_ssa, true);
3508 struct zink_shader_object obj;
3509 struct spirv_shader *spirv = nir_to_spirv(nir, sinfo, screen->spirv_version);
3511 obj = zink_shader_spirv_compile(screen, zs, spirv, separate);
3513 /* TODO: determine if there's any reason to cache spirv output? */
3514 if (zs->info.stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated)
3522 zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs,
3523 nir_shader *nir, const struct zink_shader_key *key, const void *extra_data)
3525 struct zink_shader_info *sinfo = &zs->sinfo;
3526 bool need_optimize = false;
3527 bool inlined_uniforms = false;
3530 if (key->inline_uniforms) {
3531 NIR_PASS_V(nir, nir_inline_uniforms,
3532 nir->info.num_inlinable_uniforms,
3533 key->base.inlined_uniform_values,
3534 nir->info.inlinable_uniform_dw_offsets);
3536 inlined_uniforms = true;
3539 /* TODO: use a separate mem ctx here for ralloc */
3541 if (!screen->optimal_keys) {
3542 switch (zs->info.stage) {
3543 case MESA_SHADER_VERTEX: {
3544 uint32_t decomposed_attrs = 0, decomposed_attrs_without_w = 0;
3545 const struct zink_vs_key *vs_key = zink_vs_key(key);
3546 switch (vs_key->size) {
3548 decomposed_attrs = vs_key->u32.decomposed_attrs;
3549 decomposed_attrs_without_w = vs_key->u32.decomposed_attrs_without_w;
3552 decomposed_attrs = vs_key->u16.decomposed_attrs;
3553 decomposed_attrs_without_w = vs_key->u16.decomposed_attrs_without_w;
3556 decomposed_attrs = vs_key->u8.decomposed_attrs;
3557 decomposed_attrs_without_w = vs_key->u8.decomposed_attrs_without_w;
3561 if (decomposed_attrs || decomposed_attrs_without_w)
3562 NIR_PASS_V(nir, decompose_attribs, decomposed_attrs, decomposed_attrs_without_w);
3566 case MESA_SHADER_GEOMETRY:
3567 if (zink_gs_key(key)->lower_line_stipple) {
3568 NIR_PASS_V(nir, lower_line_stipple_gs, zink_gs_key(key)->line_rectangular);
3569 NIR_PASS_V(nir, nir_lower_var_copies);
3570 need_optimize = true;
3573 if (zink_gs_key(key)->lower_line_smooth) {
3574 NIR_PASS_V(nir, lower_line_smooth_gs);
3575 NIR_PASS_V(nir, nir_lower_var_copies);
3576 need_optimize = true;
3579 if (zink_gs_key(key)->lower_gl_point) {
3580 NIR_PASS_V(nir, lower_gl_point_gs);
3581 need_optimize = true;
3584 if (zink_gs_key(key)->lower_pv_mode) {
3585 NIR_PASS_V(nir, lower_pv_mode_gs, zink_gs_key(key)->lower_pv_mode);
3586 need_optimize = true; //TODO verify that this is required
3595 switch (zs->info.stage) {
3596 case MESA_SHADER_VERTEX:
3597 case MESA_SHADER_TESS_EVAL:
3598 case MESA_SHADER_GEOMETRY:
3599 if (zink_vs_key_base(key)->last_vertex_stage) {
3600 if (zs->sinfo.have_xfb)
3601 sinfo->last_vertex = true;
3603 if (!zink_vs_key_base(key)->clip_halfz && !screen->info.have_EXT_depth_clip_control) {
3604 NIR_PASS_V(nir, nir_lower_clip_halfz);
3606 if (zink_vs_key_base(key)->push_drawid) {
3607 NIR_PASS_V(nir, lower_drawid);
3610 if (zink_vs_key_base(key)->robust_access)
3611 NIR_PASS(need_optimize, nir, lower_txf_lod_robustness);
3613 case MESA_SHADER_FRAGMENT:
3614 if (zink_fs_key(key)->lower_line_smooth) {
3615 NIR_PASS_V(nir, lower_line_smooth_fs,
3616 zink_fs_key(key)->lower_line_stipple);
3617 need_optimize = true;
3618 } else if (zink_fs_key(key)->lower_line_stipple)
3619 NIR_PASS_V(nir, lower_line_stipple_fs);
3621 if (zink_fs_key(key)->lower_point_smooth) {
3622 NIR_PASS_V(nir, nir_lower_point_smooth);
3623 NIR_PASS_V(nir, nir_lower_discard_if, nir_lower_discard_if_to_cf);
3624 nir->info.fs.uses_discard = true;
3625 need_optimize = true;
3628 if (zink_fs_key(key)->robust_access)
3629 NIR_PASS(need_optimize, nir, lower_txf_lod_robustness);
3631 if (!zink_fs_key_base(key)->samples &&
3632 nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)) {
3633 /* VK will always use gl_SampleMask[] values even if sample count is 0,
3634 * so we need to skip this write here to mimic GL's behavior of ignoring it
3636 nir_foreach_shader_out_variable(var, nir) {
3637 if (var->data.location == FRAG_RESULT_SAMPLE_MASK)
3638 var->data.mode = nir_var_shader_temp;
3640 nir_fixup_deref_modes(nir);
3641 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
3642 need_optimize = true;
3644 if (zink_fs_key_base(key)->force_dual_color_blend && nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DATA1)) {
3645 NIR_PASS_V(nir, lower_dual_blend);
3647 if (zink_fs_key_base(key)->single_sample) {
3648 NIR_PASS_V(nir, nir_lower_single_sampled);
3650 if (zink_fs_key_base(key)->coord_replace_bits)
3651 NIR_PASS_V(nir, nir_lower_texcoord_replace, zink_fs_key_base(key)->coord_replace_bits, false, false);
3652 if (zink_fs_key_base(key)->point_coord_yinvert)
3653 NIR_PASS_V(nir, invert_point_coord);
3654 if (zink_fs_key_base(key)->force_persample_interp || zink_fs_key_base(key)->fbfetch_ms) {
3655 nir_foreach_shader_in_variable(var, nir)
3656 var->data.sample = true;
3657 nir->info.fs.uses_sample_qualifier = true;
3658 nir->info.fs.uses_sample_shading = true;
3660 if (zs->fs.legacy_shadow_mask && !key->base.needs_zs_shader_swizzle)
3661 NIR_PASS(need_optimize, nir, lower_zs_swizzle_tex, zink_fs_key_base(key)->shadow_needs_shader_swizzle ? extra_data : NULL, true);
3662 if (nir->info.fs.uses_fbfetch_output) {
3663 nir_variable *fbfetch = NULL;
3664 NIR_PASS_V(nir, lower_fbfetch, &fbfetch, zink_fs_key_base(key)->fbfetch_ms);
3665 /* old variable must be deleted to avoid spirv errors */
3666 fbfetch->data.mode = nir_var_shader_temp;
3667 nir_fixup_deref_modes(nir);
3668 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
3669 need_optimize = true;
3671 nir_foreach_shader_in_variable_safe(var, nir) {
3672 if (!is_texcoord(MESA_SHADER_FRAGMENT, var) || var->data.driver_location != -1)
3674 nir_shader_instructions_pass(nir, rewrite_read_as_0, nir_metadata_dominance, var);
3675 var->data.mode = nir_var_shader_temp;
3676 nir_fixup_deref_modes(nir);
3677 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
3678 need_optimize = true;
3681 case MESA_SHADER_COMPUTE:
3682 if (zink_cs_key(key)->robust_access)
3683 NIR_PASS(need_optimize, nir, lower_txf_lod_robustness);
3687 if (key->base.needs_zs_shader_swizzle) {
3689 NIR_PASS(need_optimize, nir, lower_zs_swizzle_tex, extra_data, false);
3691 if (key->base.nonseamless_cube_mask) {
3692 NIR_PASS_V(nir, zink_lower_cubemap_to_array, key->base.nonseamless_cube_mask);
3693 need_optimize = true;
3696 if (screen->driconf.inline_uniforms) {
3697 NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_global | nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared);
3698 NIR_PASS_V(nir, rewrite_bo_access, screen);
3699 NIR_PASS_V(nir, remove_bo_access, zs);
3700 need_optimize = true;
3702 if (inlined_uniforms) {
3703 optimize_nir(nir, zs);
3705 /* This must be done again. */
3706 NIR_PASS_V(nir, nir_io_add_const_offset_to_base, nir_var_shader_in |
3707 nir_var_shader_out);
3709 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
3710 if (impl->ssa_alloc > ZINK_ALWAYS_INLINE_LIMIT)
3711 zs->can_inline = false;
3712 } else if (need_optimize)
3713 optimize_nir(nir, zs);
3715 struct zink_shader_object obj = compile_module(screen, zs, nir, false);
3720 struct zink_shader_object
3721 zink_shader_compile_separate(struct zink_screen *screen, struct zink_shader *zs)
3723 nir_shader *nir = zink_shader_deserialize(screen, zs);
3724 int set = nir->info.stage == MESA_SHADER_FRAGMENT;
3725 unsigned offsets[4];
3726 zink_descriptor_shader_get_binding_offsets(zs, offsets);
3727 nir_foreach_variable_with_modes(var, nir, nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_uniform | nir_var_image) {
3728 if (var->data.bindless)
3730 var->data.descriptor_set = set;
3731 switch (var->data.mode) {
3732 case nir_var_mem_ubo:
3733 var->data.binding = !!var->data.driver_location;
3735 case nir_var_uniform:
3736 if (glsl_type_is_sampler(glsl_without_array(var->type)))
3737 var->data.binding += offsets[1];
3739 case nir_var_mem_ssbo:
3740 var->data.binding += offsets[2];
3743 var->data.binding += offsets[3];
3748 if (screen->driconf.inline_uniforms) {
3749 NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_global | nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared);
3750 NIR_PASS_V(nir, rewrite_bo_access, screen);
3751 NIR_PASS_V(nir, remove_bo_access, zs);
3753 optimize_nir(nir, zs);
3754 zink_descriptor_shader_init(screen, zs);
3755 struct zink_shader_object obj = compile_module(screen, zs, nir, true);
3761 lower_baseinstance_instr(nir_builder *b, nir_instr *instr, void *data)
3763 if (instr->type != nir_instr_type_intrinsic)
3765 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
3766 if (intr->intrinsic != nir_intrinsic_load_instance_id)
3768 b->cursor = nir_after_instr(instr);
3769 nir_ssa_def *def = nir_isub(b, &intr->dest.ssa, nir_load_base_instance(b));
3770 nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, def, def->parent_instr);
3775 lower_baseinstance(nir_shader *shader)
3777 if (shader->info.stage != MESA_SHADER_VERTEX)
3779 return nir_shader_instructions_pass(shader, lower_baseinstance_instr, nir_metadata_dominance, NULL);
3782 /* gl_nir_lower_buffers makes variables unusable for all UBO/SSBO access
3783 * so instead we delete all those broken variables and just make new ones
3786 unbreak_bos(nir_shader *shader, struct zink_shader *zs, bool needs_size)
3788 uint64_t max_ssbo_size = 0;
3789 uint64_t max_ubo_size = 0;
3790 uint64_t max_uniform_size = 0;
3792 if (!shader->info.num_ssbos && !shader->info.num_ubos)
3795 nir_foreach_variable_with_modes(var, shader, nir_var_mem_ssbo | nir_var_mem_ubo) {
3796 const struct glsl_type *type = glsl_without_array(var->type);
3797 if (type_is_counter(type))
3799 /* be conservative: use the bigger of the interface and variable types to ensure in-bounds access */
3800 unsigned size = glsl_count_attribute_slots(glsl_type_is_array(var->type) ? var->type : type, false);
3801 const struct glsl_type *interface_type = var->interface_type ? glsl_without_array(var->interface_type) : NULL;
3802 if (interface_type) {
3803 unsigned block_size = glsl_get_explicit_size(interface_type, true);
3804 if (glsl_get_length(interface_type) == 1) {
3805 /* handle bare unsized ssbo arrays: glsl_get_explicit_size always returns type-aligned sizes */
3806 const struct glsl_type *f = glsl_get_struct_field(interface_type, 0);
3807 if (glsl_type_is_array(f) && !glsl_array_size(f))
3811 block_size = DIV_ROUND_UP(block_size, sizeof(float) * 4);
3812 size = MAX2(size, block_size);
3815 if (var->data.mode == nir_var_mem_ubo) {
3816 if (var->data.driver_location)
3817 max_ubo_size = MAX2(max_ubo_size, size);
3819 max_uniform_size = MAX2(max_uniform_size, size);
3821 max_ssbo_size = MAX2(max_ssbo_size, size);
3822 if (interface_type) {
3823 if (glsl_type_is_unsized_array(glsl_get_struct_field(interface_type, glsl_get_length(interface_type) - 1)))
3827 var->data.mode = nir_var_shader_temp;
3829 nir_fixup_deref_modes(shader);
3830 NIR_PASS_V(shader, nir_remove_dead_variables, nir_var_shader_temp, NULL);
3831 optimize_nir(shader, NULL);
3833 struct glsl_struct_field field = {0};
3834 field.name = ralloc_strdup(shader, "base");
3835 if (shader->info.num_ubos) {
3836 if (shader->num_uniforms && zs->ubos_used & BITFIELD_BIT(0)) {
3837 field.type = glsl_array_type(glsl_uint_type(), max_uniform_size * 4, 4);
3838 nir_variable *var = nir_variable_create(shader, nir_var_mem_ubo,
3839 glsl_array_type(glsl_interface_type(&field, 1, GLSL_INTERFACE_PACKING_STD430, false, "struct"), 1, 0),
3841 var->interface_type = var->type;
3842 var->data.mode = nir_var_mem_ubo;
3843 var->data.driver_location = 0;
3846 unsigned num_ubos = shader->info.num_ubos - !!shader->info.first_ubo_is_default_ubo;
3847 uint32_t ubos_used = zs->ubos_used & ~BITFIELD_BIT(0);
3848 if (num_ubos && ubos_used) {
3849 field.type = glsl_array_type(glsl_uint_type(), max_ubo_size * 4, 4);
3850 /* shrink array as much as possible */
3851 unsigned first_ubo = ffs(ubos_used) - 2;
3852 assert(first_ubo < PIPE_MAX_CONSTANT_BUFFERS);
3853 num_ubos -= first_ubo;
3855 nir_variable *var = nir_variable_create(shader, nir_var_mem_ubo,
3856 glsl_array_type(glsl_struct_type(&field, 1, "struct", false), num_ubos, 0),
3858 var->interface_type = var->type;
3859 var->data.mode = nir_var_mem_ubo;
3860 var->data.driver_location = first_ubo + !!shader->info.first_ubo_is_default_ubo;
3863 if (shader->info.num_ssbos && zs->ssbos_used) {
3864 /* shrink array as much as possible */
3865 unsigned first_ssbo = ffs(zs->ssbos_used) - 1;
3866 assert(first_ssbo < PIPE_MAX_SHADER_BUFFERS);
3867 unsigned num_ssbos = shader->info.num_ssbos - first_ssbo;
3869 const struct glsl_type *ssbo_type = glsl_array_type(glsl_uint_type(), needs_size ? 0 : max_ssbo_size * 4, 4);
3870 field.type = ssbo_type;
3871 nir_variable *var = nir_variable_create(shader, nir_var_mem_ssbo,
3872 glsl_array_type(glsl_struct_type(&field, 1, "struct", false), num_ssbos, 0),
3874 var->interface_type = var->type;
3875 var->data.mode = nir_var_mem_ssbo;
3876 var->data.driver_location = first_ssbo;
3882 get_src_mask_ssbo(unsigned total, nir_src src)
3884 if (nir_src_is_const(src))
3885 return BITFIELD_BIT(nir_src_as_uint(src));
3886 return BITFIELD_MASK(total);
3890 get_src_mask_ubo(unsigned total, nir_src src)
3892 if (nir_src_is_const(src))
3893 return BITFIELD_BIT(nir_src_as_uint(src));
3894 return BITFIELD_MASK(total) & ~BITFIELD_BIT(0);
3898 analyze_io(struct zink_shader *zs, nir_shader *shader)
3901 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
3902 nir_foreach_block(block, impl) {
3903 nir_foreach_instr(instr, block) {
3904 if (shader->info.stage != MESA_SHADER_KERNEL && instr->type == nir_instr_type_tex) {
3905 /* gl_nir_lower_samplers_as_deref is where this would normally be set, but zink doesn't use it */
3906 nir_tex_instr *tex = nir_instr_as_tex(instr);
3907 nir_foreach_variable_with_modes(img, shader, nir_var_uniform) {
3908 if (glsl_type_is_sampler(glsl_without_array(img->type))) {
3909 unsigned size = glsl_type_is_array(img->type) ? glsl_get_aoa_size(img->type) : 1;
3910 if (tex->texture_index >= img->data.driver_location &&
3911 tex->texture_index < img->data.driver_location + size) {
3912 BITSET_SET_RANGE(shader->info.textures_used, img->data.driver_location, img->data.driver_location + (size - 1));
3919 if (instr->type != nir_instr_type_intrinsic)
3922 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
3923 switch (intrin->intrinsic) {
3924 case nir_intrinsic_store_ssbo:
3925 zs->ssbos_used |= get_src_mask_ssbo(shader->info.num_ssbos, intrin->src[1]);
3928 case nir_intrinsic_get_ssbo_size: {
3929 zs->ssbos_used |= get_src_mask_ssbo(shader->info.num_ssbos, intrin->src[0]);
3933 case nir_intrinsic_ssbo_atomic_fadd:
3934 case nir_intrinsic_ssbo_atomic_add:
3935 case nir_intrinsic_ssbo_atomic_imin:
3936 case nir_intrinsic_ssbo_atomic_umin:
3937 case nir_intrinsic_ssbo_atomic_imax:
3938 case nir_intrinsic_ssbo_atomic_umax:
3939 case nir_intrinsic_ssbo_atomic_and:
3940 case nir_intrinsic_ssbo_atomic_or:
3941 case nir_intrinsic_ssbo_atomic_xor:
3942 case nir_intrinsic_ssbo_atomic_exchange:
3943 case nir_intrinsic_ssbo_atomic_comp_swap:
3944 case nir_intrinsic_ssbo_atomic_fmin:
3945 case nir_intrinsic_ssbo_atomic_fmax:
3946 case nir_intrinsic_ssbo_atomic_fcomp_swap:
3947 case nir_intrinsic_load_ssbo:
3948 zs->ssbos_used |= get_src_mask_ssbo(shader->info.num_ssbos, intrin->src[0]);
3950 case nir_intrinsic_load_ubo:
3951 case nir_intrinsic_load_ubo_vec4:
3952 zs->ubos_used |= get_src_mask_ubo(shader->info.num_ubos, intrin->src[0]);
3962 struct zink_bindless_info {
3963 nir_variable *bindless[4];
3964 unsigned bindless_set;
3967 /* this is a "default" bindless texture used if the shader has no texture variables */
3968 static nir_variable *
3969 create_bindless_texture(nir_shader *nir, nir_tex_instr *tex, unsigned descriptor_set)
3971 unsigned binding = tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ? 1 : 0;
3974 const struct glsl_type *sampler_type = glsl_sampler_type(tex->sampler_dim, tex->is_shadow, tex->is_array, GLSL_TYPE_FLOAT);
3975 var = nir_variable_create(nir, nir_var_uniform, glsl_array_type(sampler_type, ZINK_MAX_BINDLESS_HANDLES, 0), "bindless_texture");
3976 var->data.descriptor_set = descriptor_set;
3977 var->data.driver_location = var->data.binding = binding;
3981 /* this is a "default" bindless image used if the shader has no image variables */
3982 static nir_variable *
3983 create_bindless_image(nir_shader *nir, enum glsl_sampler_dim dim, unsigned descriptor_set)
3985 unsigned binding = dim == GLSL_SAMPLER_DIM_BUF ? 3 : 2;
3988 const struct glsl_type *image_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
3989 var = nir_variable_create(nir, nir_var_image, glsl_array_type(image_type, ZINK_MAX_BINDLESS_HANDLES, 0), "bindless_image");
3990 var->data.descriptor_set = descriptor_set;
3991 var->data.driver_location = var->data.binding = binding;
3992 var->data.image.format = PIPE_FORMAT_R8G8B8A8_UNORM;
3996 /* rewrite bindless instructions as array deref instructions */
3998 lower_bindless_instr(nir_builder *b, nir_instr *in, void *data)
4000 struct zink_bindless_info *bindless = data;
4002 if (in->type == nir_instr_type_tex) {
4003 nir_tex_instr *tex = nir_instr_as_tex(in);
4004 int idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
4008 nir_variable *var = tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ? bindless->bindless[1] : bindless->bindless[0];
4010 var = create_bindless_texture(b->shader, tex, bindless->bindless_set);
4011 b->cursor = nir_before_instr(in);
4012 nir_deref_instr *deref = nir_build_deref_var(b, var);
4013 if (glsl_type_is_array(var->type))
4014 deref = nir_build_deref_array(b, deref, nir_u2uN(b, tex->src[idx].src.ssa, 32));
4015 nir_instr_rewrite_src_ssa(in, &tex->src[idx].src, &deref->dest.ssa);
4017 /* bindless sampling uses the variable type directly, which means the tex instr has to exactly
4018 * match up with it in contrast to normal sampler ops where things are a bit more flexible;
4019 * this results in cases where a shader is passed with sampler2DArray but the tex instr only has
4020 * 2 components, which explodes spirv compilation even though it doesn't trigger validation errors
4022 * to fix this, pad the coord src here and fix the tex instr so that ntv will do the "right" thing
4023 * - Warhammer 40k: Dawn of War III
4025 unsigned needed_components = glsl_get_sampler_coordinate_components(glsl_without_array(var->type));
4026 unsigned c = nir_tex_instr_src_index(tex, nir_tex_src_coord);
4027 unsigned coord_components = nir_src_num_components(tex->src[c].src);
4028 if (coord_components < needed_components) {
4029 nir_ssa_def *def = nir_pad_vector(b, tex->src[c].src.ssa, needed_components);
4030 nir_instr_rewrite_src_ssa(in, &tex->src[c].src, def);
4031 tex->coord_components = needed_components;
4035 if (in->type != nir_instr_type_intrinsic)
4037 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
4039 nir_intrinsic_op op;
4040 #define OP_SWAP(OP) \
4041 case nir_intrinsic_bindless_image_##OP: \
4042 op = nir_intrinsic_image_deref_##OP; \
4046 /* convert bindless intrinsics to deref intrinsics */
4047 switch (instr->intrinsic) {
4050 OP_SWAP(atomic_comp_swap)
4051 OP_SWAP(atomic_dec_wrap)
4052 OP_SWAP(atomic_exchange)
4053 OP_SWAP(atomic_fadd)
4054 OP_SWAP(atomic_fmax)
4055 OP_SWAP(atomic_fmin)
4056 OP_SWAP(atomic_imax)
4057 OP_SWAP(atomic_imin)
4058 OP_SWAP(atomic_inc_wrap)
4060 OP_SWAP(atomic_umax)
4061 OP_SWAP(atomic_umin)
4073 enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr);
4074 nir_variable *var = dim == GLSL_SAMPLER_DIM_BUF ? bindless->bindless[3] : bindless->bindless[2];
4076 var = create_bindless_image(b->shader, dim, bindless->bindless_set);
4077 instr->intrinsic = op;
4078 b->cursor = nir_before_instr(in);
4079 nir_deref_instr *deref = nir_build_deref_var(b, var);
4080 if (glsl_type_is_array(var->type))
4081 deref = nir_build_deref_array(b, deref, nir_u2uN(b, instr->src[0].ssa, 32));
4082 nir_instr_rewrite_src_ssa(in, &instr->src[0], &deref->dest.ssa);
4087 lower_bindless(nir_shader *shader, struct zink_bindless_info *bindless)
4089 if (!nir_shader_instructions_pass(shader, lower_bindless_instr, nir_metadata_dominance, bindless))
4091 nir_fixup_deref_modes(shader);
4092 NIR_PASS_V(shader, nir_remove_dead_variables, nir_var_shader_temp, NULL);
4093 optimize_nir(shader, NULL);
4097 /* convert shader image/texture io variables to int64 handles for bindless indexing */
4099 lower_bindless_io_instr(nir_builder *b, nir_instr *in, void *data)
4101 if (in->type != nir_instr_type_intrinsic)
4103 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
4104 if (instr->intrinsic != nir_intrinsic_load_deref &&
4105 instr->intrinsic != nir_intrinsic_store_deref)
4108 nir_deref_instr *src_deref = nir_src_as_deref(instr->src[0]);
4109 nir_variable *var = nir_deref_instr_get_variable(src_deref);
4110 if (var->data.bindless)
4112 if (var->data.mode != nir_var_shader_in && var->data.mode != nir_var_shader_out)
4114 if (!glsl_type_is_image(var->type) && !glsl_type_is_sampler(var->type))
4117 var->type = glsl_int64_t_type();
4118 var->data.bindless = 1;
4119 b->cursor = nir_before_instr(in);
4120 nir_deref_instr *deref = nir_build_deref_var(b, var);
4121 if (instr->intrinsic == nir_intrinsic_load_deref) {
4122 nir_ssa_def *def = nir_load_deref(b, deref);
4123 nir_instr_rewrite_src_ssa(in, &instr->src[0], def);
4124 nir_ssa_def_rewrite_uses(&instr->dest.ssa, def);
4126 nir_store_deref(b, deref, instr->src[1].ssa, nir_intrinsic_write_mask(instr));
4128 nir_instr_remove(in);
4129 nir_instr_remove(&src_deref->instr);
4134 lower_bindless_io(nir_shader *shader)
4136 return nir_shader_instructions_pass(shader, lower_bindless_io_instr, nir_metadata_dominance, NULL);
4140 zink_binding(gl_shader_stage stage, VkDescriptorType type, int index, bool compact_descriptors)
4142 if (stage == MESA_SHADER_NONE) {
4143 unreachable("not supported");
4145 unsigned base = stage;
4146 /* clamp compute bindings for better driver efficiency */
4147 if (gl_shader_stage_is_compute(stage))
4150 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
4151 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
4152 return base * 2 + !!index;
4154 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
4155 assert(stage == MESA_SHADER_KERNEL);
4157 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
4158 if (stage == MESA_SHADER_KERNEL) {
4159 assert(index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
4160 return index + PIPE_MAX_SAMPLERS;
4163 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
4164 assert(index < PIPE_MAX_SAMPLERS);
4165 assert(stage != MESA_SHADER_KERNEL);
4166 return (base * PIPE_MAX_SAMPLERS) + index;
4168 case VK_DESCRIPTOR_TYPE_SAMPLER:
4169 assert(index < PIPE_MAX_SAMPLERS);
4170 assert(stage == MESA_SHADER_KERNEL);
4173 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
4174 return base + (compact_descriptors * (ZINK_GFX_SHADER_COUNT * 2));
4176 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
4177 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
4178 assert(index < ZINK_MAX_SHADER_IMAGES);
4179 if (stage == MESA_SHADER_KERNEL)
4180 return index + (compact_descriptors ? (PIPE_MAX_SAMPLERS + PIPE_MAX_SHADER_SAMPLER_VIEWS) : 0);
4181 return (base * ZINK_MAX_SHADER_IMAGES) + index + (compact_descriptors * (ZINK_GFX_SHADER_COUNT * PIPE_MAX_SAMPLERS));
4184 unreachable("unexpected type");
4190 handle_bindless_var(nir_shader *nir, nir_variable *var, const struct glsl_type *type, struct zink_bindless_info *bindless)
4192 if (glsl_type_is_struct(type)) {
4193 for (unsigned i = 0; i < glsl_get_length(type); i++)
4194 handle_bindless_var(nir, var, glsl_get_struct_field(type, i), bindless);
4198 /* just a random scalar in a struct */
4199 if (!glsl_type_is_image(type) && !glsl_type_is_sampler(type))
4202 VkDescriptorType vktype = glsl_type_is_image(type) ? zink_image_type(type) : zink_sampler_type(type);
4205 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
4208 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
4211 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
4214 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
4218 unreachable("unknown");
4220 if (!bindless->bindless[binding]) {
4221 bindless->bindless[binding] = nir_variable_clone(var, nir);
4222 bindless->bindless[binding]->data.bindless = 0;
4223 bindless->bindless[binding]->data.descriptor_set = bindless->bindless_set;
4224 bindless->bindless[binding]->type = glsl_array_type(type, ZINK_MAX_BINDLESS_HANDLES, 0);
4225 bindless->bindless[binding]->data.driver_location = bindless->bindless[binding]->data.binding = binding;
4226 if (!bindless->bindless[binding]->data.image.format)
4227 bindless->bindless[binding]->data.image.format = PIPE_FORMAT_R8G8B8A8_UNORM;
4228 nir_shader_add_variable(nir, bindless->bindless[binding]);
4230 assert(glsl_get_sampler_dim(glsl_without_array(bindless->bindless[binding]->type)) == glsl_get_sampler_dim(glsl_without_array(var->type)));
4232 var->data.mode = nir_var_shader_temp;
4236 convert_1d_shadow_tex(nir_builder *b, nir_instr *instr, void *data)
4238 struct zink_screen *screen = data;
4239 if (instr->type != nir_instr_type_tex)
4241 nir_tex_instr *tex = nir_instr_as_tex(instr);
4242 if (tex->sampler_dim != GLSL_SAMPLER_DIM_1D || !tex->is_shadow)
4244 if (tex->is_sparse && screen->need_2D_sparse) {
4245 /* no known case of this exists: only nvidia can hit it, and nothing uses it */
4246 mesa_loge("unhandled/unsupported 1D sparse texture!");
4249 tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
4250 b->cursor = nir_before_instr(instr);
4251 tex->coord_components++;
4258 for (unsigned i = 0; i < ARRAY_SIZE(srcs); i++) {
4259 unsigned c = nir_tex_instr_src_index(tex, srcs[i]);
4262 if (tex->src[c].src.ssa->num_components == tex->coord_components)
4265 nir_ssa_def *zero = nir_imm_zero(b, 1, tex->src[c].src.ssa->bit_size);
4266 if (tex->src[c].src.ssa->num_components == 1)
4267 def = nir_vec2(b, tex->src[c].src.ssa, zero);
4269 def = nir_vec3(b, nir_channel(b, tex->src[c].src.ssa, 0), zero, nir_channel(b, tex->src[c].src.ssa, 1));
4270 nir_instr_rewrite_src_ssa(instr, &tex->src[c].src, def);
4272 b->cursor = nir_after_instr(instr);
4273 unsigned needed_components = nir_tex_instr_dest_size(tex);
4274 unsigned num_components = tex->dest.ssa.num_components;
4275 if (needed_components > num_components) {
4276 tex->dest.ssa.num_components = needed_components;
4277 assert(num_components < 3);
4278 /* take either xz or just x since this is promoted to 2D from 1D */
4279 uint32_t mask = num_components == 2 ? (1|4) : 1;
4280 nir_ssa_def *dst = nir_channels(b, &tex->dest.ssa, mask);
4281 nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, dst, dst->parent_instr);
4287 lower_1d_shadow(nir_shader *shader, struct zink_screen *screen)
4290 nir_foreach_variable_with_modes(var, shader, nir_var_uniform | nir_var_image) {
4291 const struct glsl_type *type = glsl_without_array(var->type);
4292 unsigned length = glsl_get_length(var->type);
4293 if (!glsl_type_is_sampler(type) || !glsl_sampler_type_is_shadow(type) || glsl_get_sampler_dim(type) != GLSL_SAMPLER_DIM_1D)
4295 const struct glsl_type *sampler = glsl_sampler_type(GLSL_SAMPLER_DIM_2D, true, glsl_sampler_type_is_array(type), glsl_get_sampler_result_type(type));
4296 var->type = type != var->type ? glsl_array_type(sampler, length, glsl_get_explicit_stride(var->type)) : sampler;
4301 nir_shader_instructions_pass(shader, convert_1d_shadow_tex, nir_metadata_dominance, screen);
4306 scan_nir(struct zink_screen *screen, nir_shader *shader, struct zink_shader *zs)
4308 nir_foreach_function(function, shader) {
4309 if (!function->impl)
4311 nir_foreach_block_safe(block, function->impl) {
4312 nir_foreach_instr_safe(instr, block) {
4313 if (instr->type == nir_instr_type_tex) {
4314 nir_tex_instr *tex = nir_instr_as_tex(instr);
4315 zs->sinfo.have_sparse |= tex->is_sparse;
4317 if (instr->type != nir_instr_type_intrinsic)
4319 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
4320 if (intr->intrinsic == nir_intrinsic_image_deref_load ||
4321 intr->intrinsic == nir_intrinsic_image_deref_sparse_load ||
4322 intr->intrinsic == nir_intrinsic_image_deref_store ||
4323 intr->intrinsic == nir_intrinsic_image_deref_atomic_add ||
4324 intr->intrinsic == nir_intrinsic_image_deref_atomic_imin ||
4325 intr->intrinsic == nir_intrinsic_image_deref_atomic_umin ||
4326 intr->intrinsic == nir_intrinsic_image_deref_atomic_imax ||
4327 intr->intrinsic == nir_intrinsic_image_deref_atomic_umax ||
4328 intr->intrinsic == nir_intrinsic_image_deref_atomic_and ||
4329 intr->intrinsic == nir_intrinsic_image_deref_atomic_or ||
4330 intr->intrinsic == nir_intrinsic_image_deref_atomic_xor ||
4331 intr->intrinsic == nir_intrinsic_image_deref_atomic_exchange ||
4332 intr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap ||
4333 intr->intrinsic == nir_intrinsic_image_deref_atomic_fadd ||
4334 intr->intrinsic == nir_intrinsic_image_deref_size ||
4335 intr->intrinsic == nir_intrinsic_image_deref_samples ||
4336 intr->intrinsic == nir_intrinsic_image_deref_format ||
4337 intr->intrinsic == nir_intrinsic_image_deref_order) {
4340 nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));
4342 /* Structs have been lowered already, so get_aoa_size is sufficient. */
4343 const unsigned size =
4344 glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : 1;
4345 BITSET_SET_RANGE(shader->info.images_used, var->data.binding,
4346 var->data.binding + (MAX2(size, 1) - 1));
4348 if (intr->intrinsic == nir_intrinsic_is_sparse_texels_resident ||
4349 intr->intrinsic == nir_intrinsic_image_deref_sparse_load)
4350 zs->sinfo.have_sparse = true;
4352 static bool warned = false;
4353 if (!screen->info.have_EXT_shader_atomic_float && !screen->is_cpu && !warned) {
4354 switch (intr->intrinsic) {
4355 case nir_intrinsic_image_deref_atomic_add: {
4356 nir_variable *var = nir_intrinsic_get_var(intr, 0);
4357 if (util_format_is_float(var->data.image.format))
4358 fprintf(stderr, "zink: Vulkan driver missing VK_EXT_shader_atomic_float but attempting to do atomic ops!\n");
4371 is_residency_code(nir_ssa_def *src)
4373 nir_instr *parent = src->parent_instr;
4375 if (parent->type == nir_instr_type_intrinsic) {
4376 ASSERTED nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent);
4377 assert(intr->intrinsic == nir_intrinsic_is_sparse_texels_resident);
4380 if (parent->type == nir_instr_type_tex)
4382 assert(parent->type == nir_instr_type_alu);
4383 nir_alu_instr *alu = nir_instr_as_alu(parent);
4384 parent = alu->src[0].src.ssa->parent_instr;
4389 lower_sparse_instr(nir_builder *b, nir_instr *in, void *data)
4391 if (in->type != nir_instr_type_intrinsic)
4393 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
4394 if (instr->intrinsic == nir_intrinsic_sparse_residency_code_and) {
4395 b->cursor = nir_before_instr(&instr->instr);
4397 if (is_residency_code(instr->src[0].ssa))
4398 src0 = nir_is_sparse_texels_resident(b, 1, instr->src[0].ssa);
4400 src0 = instr->src[0].ssa;
4402 if (is_residency_code(instr->src[1].ssa))
4403 src1 = nir_is_sparse_texels_resident(b, 1, instr->src[1].ssa);
4405 src1 = instr->src[1].ssa;
4406 nir_ssa_def *def = nir_iand(b, src0, src1);
4407 nir_ssa_def_rewrite_uses_after(&instr->dest.ssa, def, in);
4408 nir_instr_remove(in);
4411 if (instr->intrinsic != nir_intrinsic_is_sparse_texels_resident)
4414 /* vulkan vec can only be a vec4, but this is (maybe) vec5,
4415 * so just rewrite as the first component since ntv is going to use a different
4416 * method for storing the residency value anyway
4418 b->cursor = nir_before_instr(&instr->instr);
4419 nir_instr *parent = instr->src[0].ssa->parent_instr;
4420 if (is_residency_code(instr->src[0].ssa)) {
4421 assert(parent->type == nir_instr_type_alu);
4422 nir_alu_instr *alu = nir_instr_as_alu(parent);
4423 nir_ssa_def_rewrite_uses_after(instr->src[0].ssa, nir_channel(b, alu->src[0].src.ssa, 0), parent);
4424 nir_instr_remove(parent);
4427 if (parent->type == nir_instr_type_intrinsic) {
4428 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent);
4429 assert(intr->intrinsic == nir_intrinsic_is_sparse_texels_resident);
4430 src = intr->src[0].ssa;
4432 assert(parent->type == nir_instr_type_alu);
4433 nir_alu_instr *alu = nir_instr_as_alu(parent);
4434 src = alu->src[0].src.ssa;
4436 if (instr->dest.ssa.bit_size != 32) {
4437 if (instr->dest.ssa.bit_size == 1)
4438 src = nir_ieq_imm(b, src, 1);
4440 src = nir_u2uN(b, src, instr->dest.ssa.bit_size);
4442 nir_ssa_def_rewrite_uses(&instr->dest.ssa, src);
4443 nir_instr_remove(in);
4449 lower_sparse(nir_shader *shader)
4451 return nir_shader_instructions_pass(shader, lower_sparse_instr, nir_metadata_dominance, NULL);
4455 match_tex_dests_instr(nir_builder *b, nir_instr *in, void *data)
4457 if (in->type != nir_instr_type_tex)
4459 nir_tex_instr *tex = nir_instr_as_tex(in);
4460 if (tex->op == nir_texop_txs || tex->op == nir_texop_lod)
4462 int handle = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
4463 nir_variable *var = NULL;
4465 var = nir_deref_instr_get_variable(nir_src_as_deref(tex->src[handle].src));
4467 nir_foreach_variable_with_modes(img, b->shader, nir_var_uniform) {
4468 if (glsl_type_is_sampler(glsl_without_array(img->type))) {
4469 unsigned size = glsl_type_is_array(img->type) ? glsl_get_aoa_size(img->type) : 1;
4470 if (tex->texture_index >= img->data.driver_location &&
4471 tex->texture_index < img->data.driver_location + size) {
4478 return !!rewrite_tex_dest(b, tex, var, data);
4482 match_tex_dests(nir_shader *shader, struct zink_shader *zs)
4484 return nir_shader_instructions_pass(shader, match_tex_dests_instr, nir_metadata_dominance, zs);
4488 split_bitfields_instr(nir_builder *b, nir_instr *in, void *data)
4490 if (in->type != nir_instr_type_alu)
4492 nir_alu_instr *alu = nir_instr_as_alu(in);
4494 case nir_op_ubitfield_extract:
4495 case nir_op_ibitfield_extract:
4496 case nir_op_bitfield_insert:
4501 unsigned num_components = nir_dest_num_components(alu->dest.dest);
4502 if (num_components == 1)
4504 b->cursor = nir_before_instr(in);
4505 nir_ssa_def *dests[NIR_MAX_VEC_COMPONENTS];
4506 for (unsigned i = 0; i < num_components; i++) {
4507 if (alu->op == nir_op_bitfield_insert)
4508 dests[i] = nir_bitfield_insert(b,
4509 nir_channel(b, alu->src[0].src.ssa, alu->src[0].swizzle[i]),
4510 nir_channel(b, alu->src[1].src.ssa, alu->src[1].swizzle[i]),
4511 nir_channel(b, alu->src[2].src.ssa, alu->src[2].swizzle[i]),
4512 nir_channel(b, alu->src[3].src.ssa, alu->src[3].swizzle[i]));
4513 else if (alu->op == nir_op_ubitfield_extract)
4514 dests[i] = nir_ubitfield_extract(b,
4515 nir_channel(b, alu->src[0].src.ssa, alu->src[0].swizzle[i]),
4516 nir_channel(b, alu->src[1].src.ssa, alu->src[1].swizzle[i]),
4517 nir_channel(b, alu->src[2].src.ssa, alu->src[2].swizzle[i]));
4519 dests[i] = nir_ibitfield_extract(b,
4520 nir_channel(b, alu->src[0].src.ssa, alu->src[0].swizzle[i]),
4521 nir_channel(b, alu->src[1].src.ssa, alu->src[1].swizzle[i]),
4522 nir_channel(b, alu->src[2].src.ssa, alu->src[2].swizzle[i]));
4524 nir_ssa_def *dest = nir_vec(b, dests, num_components);
4525 nir_ssa_def_rewrite_uses_after(&alu->dest.dest.ssa, dest, in);
4526 nir_instr_remove(in);
4532 split_bitfields(nir_shader *shader)
4534 return nir_shader_instructions_pass(shader, split_bitfields_instr, nir_metadata_dominance, NULL);
4538 rewrite_cl_derefs(nir_shader *nir, nir_variable *var)
4540 nir_foreach_function(function, nir) {
4541 nir_foreach_block(block, function->impl) {
4542 nir_foreach_instr_safe(instr, block) {
4543 if (instr->type != nir_instr_type_deref)
4545 nir_deref_instr *deref = nir_instr_as_deref(instr);
4546 nir_variable *img = nir_deref_instr_get_variable(deref);
4549 if (glsl_type_is_array(var->type)) {
4550 if (deref->deref_type == nir_deref_type_array)
4551 deref->type = glsl_without_array(var->type);
4553 deref->type = var->type;
4555 deref->type = var->type;
4563 type_image(nir_shader *nir, nir_variable *var)
4565 nir_foreach_function(function, nir) {
4566 nir_foreach_block(block, function->impl) {
4567 nir_foreach_instr_safe(instr, block) {
4568 if (instr->type != nir_instr_type_intrinsic)
4570 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
4571 if (intr->intrinsic == nir_intrinsic_image_deref_load ||
4572 intr->intrinsic == nir_intrinsic_image_deref_sparse_load ||
4573 intr->intrinsic == nir_intrinsic_image_deref_store ||
4574 intr->intrinsic == nir_intrinsic_image_deref_atomic_add ||
4575 intr->intrinsic == nir_intrinsic_image_deref_atomic_imin ||
4576 intr->intrinsic == nir_intrinsic_image_deref_atomic_umin ||
4577 intr->intrinsic == nir_intrinsic_image_deref_atomic_imax ||
4578 intr->intrinsic == nir_intrinsic_image_deref_atomic_umax ||
4579 intr->intrinsic == nir_intrinsic_image_deref_atomic_and ||
4580 intr->intrinsic == nir_intrinsic_image_deref_atomic_or ||
4581 intr->intrinsic == nir_intrinsic_image_deref_atomic_xor ||
4582 intr->intrinsic == nir_intrinsic_image_deref_atomic_exchange ||
4583 intr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap ||
4584 intr->intrinsic == nir_intrinsic_image_deref_atomic_fadd ||
4585 intr->intrinsic == nir_intrinsic_image_deref_samples ||
4586 intr->intrinsic == nir_intrinsic_image_deref_format ||
4587 intr->intrinsic == nir_intrinsic_image_deref_order) {
4588 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
4589 nir_variable *img = nir_deref_instr_get_variable(deref);
4592 nir_alu_type alu_type = nir_intrinsic_src_type(intr);
4593 const struct glsl_type *type = glsl_without_array(var->type);
4594 if (glsl_get_sampler_result_type(type) != GLSL_TYPE_VOID) {
4595 assert(glsl_get_sampler_result_type(type) == nir_get_glsl_base_type_for_nir_type(alu_type));
4598 const struct glsl_type *img_type = glsl_image_type(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type), nir_get_glsl_base_type_for_nir_type(alu_type));
4599 if (glsl_type_is_array(var->type))
4600 img_type = glsl_array_type(img_type, glsl_array_size(var->type), glsl_get_explicit_stride(var->type));
4601 var->type = img_type;
4602 rewrite_cl_derefs(nir, var);
4608 nir_foreach_function(function, nir) {
4609 nir_foreach_block(block, function->impl) {
4610 nir_foreach_instr_safe(instr, block) {
4611 if (instr->type != nir_instr_type_intrinsic)
4613 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
4614 if (intr->intrinsic != nir_intrinsic_image_deref_size)
4616 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
4617 nir_variable *img = nir_deref_instr_get_variable(deref);
4620 nir_alu_type alu_type = nir_type_uint32;
4621 const struct glsl_type *type = glsl_without_array(var->type);
4622 if (glsl_get_sampler_result_type(type) != GLSL_TYPE_VOID) {
4625 const struct glsl_type *img_type = glsl_image_type(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type), nir_get_glsl_base_type_for_nir_type(alu_type));
4626 if (glsl_type_is_array(var->type))
4627 img_type = glsl_array_type(img_type, glsl_array_size(var->type), glsl_get_explicit_stride(var->type));
4628 var->type = img_type;
4629 rewrite_cl_derefs(nir, var);
4634 var->data.mode = nir_var_shader_temp;
4637 static nir_variable *
4638 find_sampler_var(nir_shader *nir, unsigned texture_index)
4640 nir_foreach_variable_with_modes(var, nir, nir_var_uniform) {
4641 unsigned size = glsl_type_is_array(var->type) ? glsl_array_size(var->type) : 1;
4642 if ((glsl_type_is_texture(glsl_without_array(var->type)) || glsl_type_is_sampler(glsl_without_array(var->type))) &&
4643 (var->data.binding == texture_index || (var->data.binding < texture_index && var->data.binding + size > texture_index)))
4650 type_sampler_vars(nir_shader *nir, unsigned *sampler_mask)
4652 bool progress = false;
4653 nir_foreach_function(function, nir) {
4654 nir_foreach_block(block, function->impl) {
4655 nir_foreach_instr(instr, block) {
4656 if (instr->type != nir_instr_type_tex)
4658 nir_tex_instr *tex = nir_instr_as_tex(instr);
4662 case nir_texop_query_levels:
4663 case nir_texop_texture_samples:
4664 case nir_texop_samples_identical:
4669 *sampler_mask |= BITFIELD_BIT(tex->sampler_index);
4670 nir_variable *var = find_sampler_var(nir, tex->texture_index);
4672 if (glsl_get_sampler_result_type(glsl_without_array(var->type)) != GLSL_TYPE_VOID)
4674 const struct glsl_type *img_type = glsl_sampler_type(glsl_get_sampler_dim(glsl_without_array(var->type)), tex->is_shadow, tex->is_array, nir_get_glsl_base_type_for_nir_type(tex->dest_type));
4675 unsigned size = glsl_type_is_array(var->type) ? glsl_array_size(var->type) : 1;
4677 img_type = glsl_array_type(img_type, size, 0);
4678 var->type = img_type;
4683 nir_foreach_function(function, nir) {
4684 nir_foreach_block(block, function->impl) {
4685 nir_foreach_instr(instr, block) {
4686 if (instr->type != nir_instr_type_tex)
4688 nir_tex_instr *tex = nir_instr_as_tex(instr);
4692 case nir_texop_query_levels:
4693 case nir_texop_texture_samples:
4694 case nir_texop_samples_identical:
4699 *sampler_mask |= BITFIELD_BIT(tex->sampler_index);
4700 nir_variable *var = find_sampler_var(nir, tex->texture_index);
4702 if (glsl_get_sampler_result_type(glsl_without_array(var->type)) != GLSL_TYPE_VOID)
4704 const struct glsl_type *img_type = glsl_sampler_type(glsl_get_sampler_dim(glsl_without_array(var->type)), tex->is_shadow, tex->is_array, nir_get_glsl_base_type_for_nir_type(tex->dest_type));
4705 unsigned size = glsl_type_is_array(var->type) ? glsl_array_size(var->type) : 1;
4707 img_type = glsl_array_type(img_type, size, 0);
4708 var->type = img_type;
4717 delete_samplers(nir_shader *nir)
4719 bool progress = false;
4720 nir_foreach_variable_with_modes(var, nir, nir_var_uniform) {
4721 if (glsl_type_is_sampler(glsl_without_array(var->type))) {
4722 var->data.mode = nir_var_shader_temp;
4730 type_images(nir_shader *nir, unsigned *sampler_mask)
4732 bool progress = false;
4733 progress |= delete_samplers(nir);
4734 progress |= type_sampler_vars(nir, sampler_mask);
4735 nir_foreach_variable_with_modes(var, nir, nir_var_image) {
4736 type_image(nir, var);
4742 /* attempt to assign io for separate shaders */
4744 fixup_io_locations(nir_shader *nir)
4746 nir_variable_mode mode = nir->info.stage == MESA_SHADER_FRAGMENT ? nir_var_shader_in : nir_var_shader_out;
4747 /* i/o interface blocks are required to be EXACT matches between stages:
4748 * iterate over all locations and set locations incrementally
4751 for (unsigned i = 0; i < VARYING_SLOT_MAX; i++) {
4752 if (nir_slot_is_sysval_output(i, MESA_SHADER_NONE))
4754 nir_variable *var = nir_find_variable_with_location(nir, mode, i);
4756 /* locations used between stages are not required to be contiguous */
4757 if (i >= VARYING_SLOT_VAR0)
4762 /* ensure variable is given enough slots */
4763 if (nir_is_arrayed_io(var, nir->info.stage))
4764 size = glsl_count_vec4_slots(glsl_get_array_element(var->type), false, false);
4766 size = glsl_count_vec4_slots(var->type, false, false);
4767 var->data.driver_location = slot;
4769 /* ensure the consumed slots aren't double iterated */
4776 zink_flat_flags(struct nir_shader *shader)
4778 uint32_t flat_flags = 0, c = 0;
4779 nir_foreach_shader_in_variable(var, shader) {
4780 if (var->data.interpolation == INTERP_MODE_FLAT)
4781 flat_flags |= 1u << (c++);
4787 struct zink_shader *
4788 zink_shader_create(struct zink_screen *screen, struct nir_shader *nir,
4789 const struct pipe_stream_output_info *so_info)
4791 struct zink_shader *ret = rzalloc(NULL, struct zink_shader);
4792 bool have_psiz = false;
4794 ret->has_edgeflags = nir->info.stage == MESA_SHADER_VERTEX &&
4795 nir_find_variable_with_location(nir, nir_var_shader_out, VARYING_SLOT_EDGE);
4797 ret->sinfo.have_vulkan_memory_model = screen->info.have_KHR_vulkan_memory_model;
4798 ret->sinfo.bindless_set_idx = screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS];
4800 util_queue_fence_init(&ret->precompile.fence);
4801 util_dynarray_init(&ret->pipeline_libs, ret);
4802 ret->hash = _mesa_hash_pointer(ret);
4804 ret->programs = _mesa_pointer_set_create(NULL);
4805 simple_mtx_init(&ret->lock, mtx_plain);
4807 nir_variable_mode indirect_derefs_modes = 0;
4808 if (nir->info.stage == MESA_SHADER_TESS_CTRL ||
4809 nir->info.stage == MESA_SHADER_TESS_EVAL)
4810 indirect_derefs_modes |= nir_var_shader_in | nir_var_shader_out;
4812 NIR_PASS_V(nir, nir_lower_indirect_derefs, indirect_derefs_modes,
4815 if (nir->info.stage < MESA_SHADER_COMPUTE)
4816 create_gfx_pushconst(nir);
4818 if (nir->info.stage == MESA_SHADER_TESS_CTRL ||
4819 nir->info.stage == MESA_SHADER_TESS_EVAL)
4820 NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
4822 if (nir->info.stage < MESA_SHADER_FRAGMENT)
4823 have_psiz = check_psiz(nir);
4824 if (nir->info.stage == MESA_SHADER_FRAGMENT)
4825 ret->flat_flags = zink_flat_flags(nir);
4827 if (!gl_shader_stage_is_compute(nir->info.stage) && nir->info.separate_shader)
4828 NIR_PASS_V(nir, fixup_io_locations);
4830 NIR_PASS_V(nir, lower_basevertex);
4831 NIR_PASS_V(nir, nir_lower_regs_to_ssa);
4832 NIR_PASS_V(nir, lower_baseinstance);
4833 NIR_PASS_V(nir, lower_sparse);
4834 NIR_PASS_V(nir, split_bitfields);
4835 NIR_PASS_V(nir, nir_lower_frexp); /* TODO: Use the spirv instructions for this. */
4837 if (screen->info.have_EXT_shader_demote_to_helper_invocation) {
4838 NIR_PASS_V(nir, nir_lower_discard_or_demote,
4839 screen->driconf.glsl_correct_derivatives_after_discard ||
4840 nir->info.use_legacy_math_rules);
4843 if (screen->need_2D_zs)
4844 NIR_PASS_V(nir, lower_1d_shadow, screen);
4847 nir_lower_subgroups_options subgroup_options = {0};
4848 subgroup_options.lower_to_scalar = true;
4849 subgroup_options.subgroup_size = screen->info.props11.subgroupSize;
4850 subgroup_options.ballot_bit_size = 32;
4851 subgroup_options.ballot_components = 4;
4852 subgroup_options.lower_subgroup_masks = true;
4853 if (!(screen->info.subgroup.supportedStages & mesa_to_vk_shader_stage(clamp_stage(&nir->info)))) {
4854 subgroup_options.subgroup_size = 1;
4855 subgroup_options.lower_vote_trivial = true;
4857 NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_options);
4860 if (so_info && so_info->num_outputs)
4861 NIR_PASS_V(nir, split_blocks);
4863 optimize_nir(nir, NULL);
4864 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
4865 NIR_PASS_V(nir, nir_lower_discard_if, (nir_lower_discard_if_to_cf |
4866 nir_lower_demote_if_to_cf |
4867 nir_lower_terminate_if_to_cf));
4868 NIR_PASS_V(nir, nir_lower_fragcolor,
4869 nir->info.fs.color_is_dual_source ? 1 : 8);
4870 NIR_PASS_V(nir, lower_64bit_vertex_attribs);
4871 bool needs_size = analyze_io(ret, nir);
4872 NIR_PASS_V(nir, unbreak_bos, ret, needs_size);
4873 /* run in compile if there could be inlined uniforms */
4874 if (!screen->driconf.inline_uniforms && !nir->info.num_inlinable_uniforms) {
4875 NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_global | nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared);
4876 NIR_PASS_V(nir, rewrite_bo_access, screen);
4877 NIR_PASS_V(nir, remove_bo_access, ret);
4880 if (zink_debug & ZINK_DEBUG_NIR) {
4881 fprintf(stderr, "NIR shader:\n---8<---\n");
4882 nir_print_shader(nir, stderr);
4883 fprintf(stderr, "---8<---\n");
4886 struct zink_bindless_info bindless = {0};
4887 bindless.bindless_set = screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS];
4888 bool has_bindless_io = false;
4889 nir_foreach_variable_with_modes(var, nir, nir_var_shader_in | nir_var_shader_out) {
4890 var->data.is_xfb = false;
4891 if (glsl_type_is_image(var->type) || glsl_type_is_sampler(var->type)) {
4892 has_bindless_io = true;
4896 if (has_bindless_io)
4897 NIR_PASS_V(nir, lower_bindless_io);
4899 optimize_nir(nir, NULL);
4902 scan_nir(screen, nir, ret);
4903 unsigned sampler_mask = 0;
4904 if (nir->info.stage == MESA_SHADER_KERNEL) {
4905 NIR_PASS_V(nir, type_images, &sampler_mask);
4906 enum zink_descriptor_type ztype = ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW;
4907 VkDescriptorType vktype = VK_DESCRIPTOR_TYPE_SAMPLER;
4908 u_foreach_bit(s, sampler_mask) {
4909 ret->bindings[ztype][ret->num_bindings[ztype]].index = s;
4910 ret->bindings[ztype][ret->num_bindings[ztype]].binding = zink_binding(MESA_SHADER_KERNEL, vktype, s, screen->compact_descriptors);
4911 ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype;
4912 ret->bindings[ztype][ret->num_bindings[ztype]].size = 1;
4913 ret->num_bindings[ztype]++;
4915 ret->sinfo.sampler_mask = sampler_mask;
4918 unsigned ubo_binding_mask = 0;
4919 unsigned ssbo_binding_mask = 0;
4920 foreach_list_typed_reverse_safe(nir_variable, var, node, &nir->variables) {
4921 if (_nir_shader_variable_has_mode(var, nir_var_uniform |
4924 nir_var_mem_ssbo)) {
4925 enum zink_descriptor_type ztype;
4926 const struct glsl_type *type = glsl_without_array(var->type);
4927 if (var->data.mode == nir_var_mem_ubo) {
4928 ztype = ZINK_DESCRIPTOR_TYPE_UBO;
4929 /* buffer 0 is a push descriptor */
4930 var->data.descriptor_set = !!var->data.driver_location;
4931 var->data.binding = !var->data.driver_location ? clamp_stage(&nir->info) :
4932 zink_binding(nir->info.stage,
4933 VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
4934 var->data.driver_location,
4935 screen->compact_descriptors);
4936 assert(var->data.driver_location || var->data.binding < 10);
4937 VkDescriptorType vktype = !var->data.driver_location ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC : VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
4938 int binding = var->data.binding;
4940 if (!var->data.driver_location) {
4941 ret->has_uniforms = true;
4942 } else if (!(ubo_binding_mask & BITFIELD_BIT(binding))) {
4943 ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location;
4944 ret->bindings[ztype][ret->num_bindings[ztype]].binding = binding;
4945 ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype;
4946 ret->bindings[ztype][ret->num_bindings[ztype]].size = glsl_get_length(var->type);
4947 assert(ret->bindings[ztype][ret->num_bindings[ztype]].size);
4948 ret->num_bindings[ztype]++;
4949 ubo_binding_mask |= BITFIELD_BIT(binding);
4951 } else if (var->data.mode == nir_var_mem_ssbo) {
4952 ztype = ZINK_DESCRIPTOR_TYPE_SSBO;
4953 var->data.descriptor_set = screen->desc_set_id[ztype];
4954 var->data.binding = zink_binding(nir->info.stage,
4955 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
4956 var->data.driver_location,
4957 screen->compact_descriptors);
4958 if (!(ssbo_binding_mask & BITFIELD_BIT(var->data.binding))) {
4959 ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location;
4960 ret->bindings[ztype][ret->num_bindings[ztype]].binding = var->data.binding;
4961 ret->bindings[ztype][ret->num_bindings[ztype]].type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
4962 ret->bindings[ztype][ret->num_bindings[ztype]].size = glsl_get_length(var->type);
4963 assert(ret->bindings[ztype][ret->num_bindings[ztype]].size);
4964 ret->num_bindings[ztype]++;
4965 ssbo_binding_mask |= BITFIELD_BIT(var->data.binding);
4968 assert(var->data.mode == nir_var_uniform ||
4969 var->data.mode == nir_var_image);
4970 if (var->data.bindless) {
4971 ret->bindless = true;
4972 handle_bindless_var(nir, var, type, &bindless);
4973 } else if (glsl_type_is_sampler(type) || glsl_type_is_image(type)) {
4974 VkDescriptorType vktype = glsl_type_is_image(type) ? zink_image_type(type) : zink_sampler_type(type);
4975 if (nir->info.stage == MESA_SHADER_KERNEL && vktype == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
4976 vktype = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
4977 ztype = zink_desc_type_from_vktype(vktype);
4978 if (vktype == VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER)
4979 ret->num_texel_buffers++;
4980 var->data.driver_location = var->data.binding;
4981 var->data.descriptor_set = screen->desc_set_id[ztype];
4982 var->data.binding = zink_binding(nir->info.stage, vktype, var->data.driver_location, screen->compact_descriptors);
4983 ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location;
4984 ret->bindings[ztype][ret->num_bindings[ztype]].binding = var->data.binding;
4985 ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype;
4986 if (glsl_type_is_array(var->type))
4987 ret->bindings[ztype][ret->num_bindings[ztype]].size = glsl_get_aoa_size(var->type);
4989 ret->bindings[ztype][ret->num_bindings[ztype]].size = 1;
4990 ret->num_bindings[ztype]++;
4991 } else if (var->data.mode == nir_var_uniform) {
4992 /* this is a dead uniform */
4994 exec_node_remove(&var->node);
4999 bool bindless_lowered = false;
5000 NIR_PASS(bindless_lowered, nir, lower_bindless, &bindless);
5001 ret->bindless |= bindless_lowered;
5003 if (!screen->info.feats.features.shaderInt64 || !screen->info.feats.features.shaderFloat64)
5004 NIR_PASS_V(nir, lower_64bit_vars, screen->info.feats.features.shaderInt64);
5005 if (nir->info.stage != MESA_SHADER_KERNEL)
5006 NIR_PASS_V(nir, match_tex_dests, ret);
5008 if (!nir->info.internal)
5009 nir_foreach_shader_out_variable(var, nir)
5010 var->data.explicit_xfb_buffer = 0;
5011 if (so_info && so_info->num_outputs)
5012 update_so_info(ret, nir, so_info, nir->info.outputs_written, have_psiz);
5013 else if (have_psiz) {
5014 bool have_fake_psiz = false;
5015 nir_variable *psiz = NULL;
5016 nir_foreach_shader_out_variable(var, nir) {
5017 if (var->data.location == VARYING_SLOT_PSIZ) {
5018 if (!var->data.explicit_location)
5019 have_fake_psiz = true;
5024 if (have_fake_psiz && psiz) {
5025 psiz->data.mode = nir_var_shader_temp;
5026 nir_fixup_deref_modes(nir);
5027 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
5030 zink_shader_serialize_blob(nir, &ret->blob);
5031 memcpy(&ret->info, &nir->info, sizeof(nir->info));
5033 ret->can_inline = true;
5039 zink_shader_finalize(struct pipe_screen *pscreen, void *nirptr)
5041 struct zink_screen *screen = zink_screen(pscreen);
5042 nir_shader *nir = nirptr;
5044 nir_lower_tex_options tex_opts = {
5045 .lower_invalid_implicit_lod = true,
5048 Sampled Image must be an object whose type is OpTypeSampledImage.
5049 The Dim operand of the underlying OpTypeImage must be 1D, 2D, 3D,
5050 or Rect, and the Arrayed and MS operands must be 0.
5051 - SPIRV, OpImageSampleProj* opcodes
5053 tex_opts.lower_txp = BITFIELD_BIT(GLSL_SAMPLER_DIM_CUBE) |
5054 BITFIELD_BIT(GLSL_SAMPLER_DIM_MS);
5055 tex_opts.lower_txp_array = true;
5056 if (!screen->info.feats.features.shaderImageGatherExtended)
5057 tex_opts.lower_tg4_offsets = true;
5058 NIR_PASS_V(nir, nir_lower_tex, &tex_opts);
5059 optimize_nir(nir, NULL);
5060 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
5061 if (screen->driconf.inline_uniforms)
5062 nir_find_inlinable_uniforms(nir);
5068 zink_shader_free(struct zink_screen *screen, struct zink_shader *shader)
5070 _mesa_set_destroy(shader->programs, NULL);
5071 util_queue_fence_wait(&shader->precompile.fence);
5072 util_queue_fence_destroy(&shader->precompile.fence);
5073 zink_descriptor_shader_deinit(screen, shader);
5074 if (screen->info.have_EXT_shader_object) {
5075 VKSCR(DestroyShaderEXT)(screen->dev, shader->precompile.obj.obj, NULL);
5077 if (shader->precompile.obj.mod)
5078 VKSCR(DestroyShaderModule)(screen->dev, shader->precompile.obj.mod, NULL);
5079 if (shader->precompile.gpl)
5080 VKSCR(DestroyPipeline)(screen->dev, shader->precompile.gpl, NULL);
5082 blob_finish(&shader->blob);
5083 ralloc_free(shader->spirv);
5084 free(shader->precompile.bindings);
5085 ralloc_free(shader);
5089 zink_gfx_shader_free(struct zink_screen *screen, struct zink_shader *shader)
5091 assert(shader->info.stage != MESA_SHADER_COMPUTE);
5092 util_queue_fence_wait(&shader->precompile.fence);
5093 set_foreach(shader->programs, entry) {
5094 struct zink_gfx_program *prog = (void*)entry->key;
5095 gl_shader_stage stage = shader->info.stage;
5096 assert(stage < ZINK_GFX_SHADER_COUNT);
5097 unsigned stages_present = prog->stages_present;
5098 if (prog->shaders[MESA_SHADER_TESS_CTRL] &&
5099 prog->shaders[MESA_SHADER_TESS_CTRL]->non_fs.is_generated)
5100 stages_present &= ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL);
5101 unsigned idx = zink_program_cache_stages(stages_present);
5102 if (!prog->base.removed && prog->stages_present == prog->stages_remaining &&
5103 (stage == MESA_SHADER_FRAGMENT || !shader->non_fs.is_generated)) {
5104 struct hash_table *ht = &prog->ctx->program_cache[idx];
5105 simple_mtx_lock(&prog->ctx->program_lock[idx]);
5106 struct hash_entry *he = _mesa_hash_table_search(ht, prog->shaders);
5107 assert(he && he->data == prog);
5108 _mesa_hash_table_remove(ht, he);
5109 prog->base.removed = true;
5110 simple_mtx_unlock(&prog->ctx->program_lock[idx]);
5111 util_queue_fence_wait(&prog->base.cache_fence);
5113 for (unsigned r = 0; r < ARRAY_SIZE(prog->pipelines); r++) {
5114 for (int i = 0; i < ARRAY_SIZE(prog->pipelines[0]); ++i) {
5115 hash_table_foreach(&prog->pipelines[r][i], entry) {
5116 struct zink_gfx_pipeline_cache_entry *pc_entry = entry->data;
5118 util_queue_fence_wait(&pc_entry->fence);
5124 while (util_dynarray_contains(&shader->pipeline_libs, struct zink_gfx_lib_cache*)) {
5125 struct zink_gfx_lib_cache *libs = util_dynarray_pop(&shader->pipeline_libs, struct zink_gfx_lib_cache*);
5126 //this condition is equivalent to verifying that, for each bit stages_present_i in stages_present,
5127 //stages_present_i implies libs->stages_present_i
5128 if ((stages_present & ~(libs->stages_present & stages_present)) != 0)
5130 if (!libs->removed) {
5131 libs->removed = true;
5132 simple_mtx_lock(&screen->pipeline_libs_lock[idx]);
5133 _mesa_set_remove_key(&screen->pipeline_libs[idx], libs);
5134 simple_mtx_unlock(&screen->pipeline_libs_lock[idx]);
5136 zink_gfx_lib_cache_unref(screen, libs);
5138 if (stage == MESA_SHADER_FRAGMENT || !shader->non_fs.is_generated) {
5139 prog->shaders[stage] = NULL;
5140 prog->stages_remaining &= ~BITFIELD_BIT(stage);
5142 /* only remove generated tcs during parent tes destruction */
5143 if (stage == MESA_SHADER_TESS_EVAL && shader->non_fs.generated_tcs)
5144 prog->shaders[MESA_SHADER_TESS_CTRL] = NULL;
5145 if (stage != MESA_SHADER_FRAGMENT &&
5146 prog->shaders[MESA_SHADER_GEOMETRY] &&
5147 prog->shaders[MESA_SHADER_GEOMETRY]->non_fs.parent ==
5149 prog->shaders[MESA_SHADER_GEOMETRY] = NULL;
5151 zink_gfx_program_reference(screen, &prog, NULL);
5153 if (shader->info.stage == MESA_SHADER_TESS_EVAL &&
5154 shader->non_fs.generated_tcs) {
5155 /* automatically destroy generated tcs shaders when tes is destroyed */
5156 zink_gfx_shader_free(screen, shader->non_fs.generated_tcs);
5157 shader->non_fs.generated_tcs = NULL;
5159 for (unsigned int i = 0; i < ARRAY_SIZE(shader->non_fs.generated_gs); i++) {
5160 for (int j = 0; j < ARRAY_SIZE(shader->non_fs.generated_gs[0]); j++) {
5161 if (shader->info.stage != MESA_SHADER_FRAGMENT &&
5162 shader->non_fs.generated_gs[i][j]) {
5163 /* automatically destroy generated gs shaders when owner is destroyed */
5164 zink_gfx_shader_free(screen, shader->non_fs.generated_gs[i][j]);
5165 shader->non_fs.generated_gs[i][j] = NULL;
5169 zink_shader_free(screen, shader);
5173 struct zink_shader_object
5174 zink_shader_tcs_compile(struct zink_screen *screen, struct zink_shader *zs, unsigned patch_vertices)
5176 assert(zs->info.stage == MESA_SHADER_TESS_CTRL);
5177 /* shortcut all the nir passes since we just have to change this one word */
5178 zs->spirv->words[zs->spirv->tcs_vertices_out_word] = patch_vertices;
5179 return zink_shader_spirv_compile(screen, zs, NULL, false);
5182 /* creating a passthrough tcs shader that's roughly:
5185 #extension GL_ARB_tessellation_shader : require
5187 in vec4 some_var[gl_MaxPatchVertices];
5188 out vec4 some_var_out;
5190 layout(push_constant) uniform tcsPushConstants {
5191 layout(offset = 0) float TessLevelInner[2];
5192 layout(offset = 8) float TessLevelOuter[4];
5193 } u_tcsPushConstants;
5194 layout(vertices = $vertices_per_patch) out;
5197 gl_TessLevelInner = u_tcsPushConstants.TessLevelInner;
5198 gl_TessLevelOuter = u_tcsPushConstants.TessLevelOuter;
5199 some_var_out = some_var[gl_InvocationID];
5203 struct zink_shader *
5204 zink_shader_tcs_create(struct zink_screen *screen, nir_shader *tes, unsigned vertices_per_patch, nir_shader **nir_ret)
5206 struct zink_shader *ret = rzalloc(NULL, struct zink_shader);
5207 util_queue_fence_init(&ret->precompile.fence);
5208 ret->hash = _mesa_hash_pointer(ret);
5209 ret->programs = _mesa_pointer_set_create(NULL);
5210 simple_mtx_init(&ret->lock, mtx_plain);
5212 nir_shader *nir = nir_shader_create(NULL, MESA_SHADER_TESS_CTRL, &screen->nir_options, NULL);
5213 nir_function *fn = nir_function_create(nir, "main");
5214 fn->is_entrypoint = true;
5215 nir_function_impl *impl = nir_function_impl_create(fn);
5218 nir_builder_init(&b, impl);
5219 b.cursor = nir_before_block(nir_start_block(impl));
5221 nir_ssa_def *invocation_id = nir_load_invocation_id(&b);
5223 nir_foreach_shader_in_variable(var, tes) {
5224 if (var->data.location == VARYING_SLOT_TESS_LEVEL_INNER || var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER)
5226 const struct glsl_type *in_type = var->type;
5227 const struct glsl_type *out_type = var->type;
5229 snprintf(buf, sizeof(buf), "%s_out", var->name);
5230 if (!nir_is_arrayed_io(var, MESA_SHADER_TESS_EVAL)) {
5231 const struct glsl_type *type = var->type;
5232 in_type = glsl_array_type(type, 32 /* MAX_PATCH_VERTICES */, 0);
5233 out_type = glsl_array_type(type, vertices_per_patch, 0);
5236 nir_variable *in = nir_variable_create(nir, nir_var_shader_in, in_type, var->name);
5237 nir_variable *out = nir_variable_create(nir, nir_var_shader_out, out_type, buf);
5238 out->data.location = in->data.location = var->data.location;
5239 out->data.location_frac = in->data.location_frac = var->data.location_frac;
5241 /* gl_in[] receives values from equivalent built-in output
5242 variables written by the vertex shader (section 2.14.7). Each array
5243 element of gl_in[] is a structure holding values for a specific vertex of
5244 the input patch. The length of gl_in[] is equal to the
5245 implementation-dependent maximum patch size (gl_MaxPatchVertices).
5246 - ARB_tessellation_shader
5248 /* we need to load the invocation-specific value of the vertex output and then store it to the per-patch output */
5249 nir_deref_instr *in_value = nir_build_deref_array(&b, nir_build_deref_var(&b, in), invocation_id);
5250 nir_deref_instr *out_value = nir_build_deref_array(&b, nir_build_deref_var(&b, out), invocation_id);
5251 copy_vars(&b, out_value, in_value);
5253 nir_variable *gl_TessLevelInner = nir_variable_create(nir, nir_var_shader_out, glsl_array_type(glsl_float_type(), 2, 0), "gl_TessLevelInner");
5254 gl_TessLevelInner->data.location = VARYING_SLOT_TESS_LEVEL_INNER;
5255 gl_TessLevelInner->data.patch = 1;
5256 nir_variable *gl_TessLevelOuter = nir_variable_create(nir, nir_var_shader_out, glsl_array_type(glsl_float_type(), 4, 0), "gl_TessLevelOuter");
5257 gl_TessLevelOuter->data.location = VARYING_SLOT_TESS_LEVEL_OUTER;
5258 gl_TessLevelOuter->data.patch = 1;
5260 create_gfx_pushconst(nir);
5262 nir_ssa_def *load_inner = nir_load_push_constant(&b, 2, 32,
5263 nir_imm_int(&b, ZINK_GFX_PUSHCONST_DEFAULT_INNER_LEVEL),
5264 .base = 1, .range = 8);
5265 nir_ssa_def *load_outer = nir_load_push_constant(&b, 4, 32,
5266 nir_imm_int(&b, ZINK_GFX_PUSHCONST_DEFAULT_OUTER_LEVEL),
5267 .base = 2, .range = 16);
5269 for (unsigned i = 0; i < 2; i++) {
5270 nir_deref_instr *store_idx = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gl_TessLevelInner), i);
5271 nir_store_deref(&b, store_idx, nir_channel(&b, load_inner, i), 0xff);
5273 for (unsigned i = 0; i < 4; i++) {
5274 nir_deref_instr *store_idx = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gl_TessLevelOuter), i);
5275 nir_store_deref(&b, store_idx, nir_channel(&b, load_outer, i), 0xff);
5278 nir->info.tess.tcs_vertices_out = vertices_per_patch;
5279 nir_validate_shader(nir, "created");
5281 NIR_PASS_V(nir, nir_lower_regs_to_ssa);
5282 optimize_nir(nir, NULL);
5283 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
5284 NIR_PASS_V(nir, nir_convert_from_ssa, true);
5287 zink_shader_serialize_blob(nir, &ret->blob);
5288 memcpy(&ret->info, &nir->info, sizeof(nir->info));
5289 ret->non_fs.is_generated = true;
5294 zink_shader_has_cubes(nir_shader *nir)
5296 nir_foreach_variable_with_modes(var, nir, nir_var_uniform) {
5297 const struct glsl_type *type = glsl_without_array(var->type);
5298 if (glsl_type_is_sampler(type) && glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE)
5305 zink_shader_blob_deserialize(struct zink_screen *screen, struct blob *blob)
5307 struct blob_reader blob_reader;
5308 blob_reader_init(&blob_reader, blob->data, blob->size);
5309 return nir_deserialize(NULL, &screen->nir_options, &blob_reader);
5313 zink_shader_deserialize(struct zink_screen *screen, struct zink_shader *zs)
5315 return zink_shader_blob_deserialize(screen, &zs->blob);
5319 zink_shader_serialize_blob(nir_shader *nir, struct blob *blob)
5323 bool strip = !(zink_debug & (ZINK_DEBUG_NIR | ZINK_DEBUG_SPIRV | ZINK_DEBUG_TGSI));
5327 nir_serialize(blob, nir, strip);
5331 zink_print_shader(struct zink_screen *screen, struct zink_shader *zs, FILE *fp)
5333 nir_shader *nir = zink_shader_deserialize(screen, zs);
5334 nir_print_shader(nir, fp);