From 8ed9d38e73d30b94d829c4b72beceef0ef0c5040 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Wed, 22 Dec 2021 19:32:06 -0500 Subject: [PATCH] radeonsi: move si_nir_scan_shader into si_shader_info.c Reviewed-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/gallium/drivers/radeonsi/meson.build | 1 + src/gallium/drivers/radeonsi/si_shader.h | 4 +- src/gallium/drivers/radeonsi/si_shader_info.c | 572 ++++++++++++++++++++++++++ src/gallium/drivers/radeonsi/si_shader_nir.c | 553 +------------------------ 4 files changed, 577 insertions(+), 553 deletions(-) create mode 100644 src/gallium/drivers/radeonsi/si_shader_info.c diff --git a/src/gallium/drivers/radeonsi/meson.build b/src/gallium/drivers/radeonsi/meson.build index 88d8ccc..ce907aa 100644 --- a/src/gallium/drivers/radeonsi/meson.build +++ b/src/gallium/drivers/radeonsi/meson.build @@ -49,6 +49,7 @@ files_libradeonsi = files( 'si_sdma_copy_image.c', 'si_shader.c', 'si_shader.h', + 'si_shader_info.c', 'si_shader_internal.h', 'si_shader_llvm.c', 'si_shader_llvm_gs.c', diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 881653f8..292edf3 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -949,6 +949,9 @@ void si_multiwave_lds_size_workaround(struct si_screen *sscreen, unsigned *lds_s const char *si_get_shader_name(const struct si_shader *shader); void si_shader_binary_clean(struct si_shader_binary *binary); +/* si_shader_info.c */ +void si_nir_scan_shader(const struct nir_shader *nir, struct si_shader_info *info); + /* si_shader_llvm_gs.c */ struct si_shader *si_generate_gs_copy_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compiler, @@ -956,7 +959,6 @@ struct si_shader *si_generate_gs_copy_shader(struct si_screen *sscreen, struct pipe_debug_callback *debug); /* si_shader_nir.c */ -void si_nir_scan_shader(const struct nir_shader *nir, struct si_shader_info *info); void si_nir_opts(struct si_screen *sscreen, struct nir_shader *nir, bool first); void si_nir_late_opts(nir_shader *nir); char *si_finalize_nir(struct pipe_screen *screen, void *nirptr); diff --git a/src/gallium/drivers/radeonsi/si_shader_info.c b/src/gallium/drivers/radeonsi/si_shader_info.c new file mode 100644 index 0000000..15ad048 --- /dev/null +++ b/src/gallium/drivers/radeonsi/si_shader_info.c @@ -0,0 +1,572 @@ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "ac_nir_to_llvm.h" +#include "si_shader.h" +#include "util/mesa-sha1.h" + + +struct si_shader_profile { + uint32_t sha1[SHA1_DIGEST_LENGTH32]; + uint32_t options; +}; + +static struct si_shader_profile profiles[] = +{ + { + /* Plot3D */ + {0x485320cd, 0x87a9ba05, 0x24a60e4f, 0x25aa19f7, 0xf5287451}, + SI_PROFILE_VS_NO_BINNING, + }, + { + /* Viewperf/Energy isn't affected by the discard bug. */ + {0x17118671, 0xd0102e0c, 0x947f3592, 0xb2057e7b, 0x4da5d9b0}, + SI_PROFILE_IGNORE_LLVM_DISCARD_BUG, + }, + { + /* Viewperf/Medical */ + {0x4dce4331, 0x38f778d5, 0x1b75a717, 0x3e454fb9, 0xeb1527f0}, + SI_PROFILE_PS_NO_BINNING, + }, + { + /* Viewperf/Medical, a shader with a divergent loop doesn't benefit from Wave32, + * probably due to interpolation performance. + */ + {0x29f0f4a0, 0x0672258d, 0x47ccdcfd, 0x31e67dcc, 0xdcb1fda8}, + SI_PROFILE_WAVE64, + }, +}; + +static const nir_src *get_texture_src(nir_tex_instr *instr, nir_tex_src_type type) +{ + for (unsigned i = 0; i < instr->num_srcs; i++) { + if (instr->src[i].src_type == type) + return &instr->src[i].src; + } + return NULL; +} + +static void scan_io_usage(struct si_shader_info *info, nir_intrinsic_instr *intr, + bool is_input) +{ + unsigned interp = INTERP_MODE_FLAT; /* load_input uses flat shading */ + + if (intr->intrinsic == nir_intrinsic_load_interpolated_input) { + nir_intrinsic_instr *baryc = nir_instr_as_intrinsic(intr->src[0].ssa->parent_instr); + + if (baryc) { + if (nir_intrinsic_infos[baryc->intrinsic].index_map[NIR_INTRINSIC_INTERP_MODE] > 0) + interp = nir_intrinsic_interp_mode(baryc); + else + unreachable("unknown barycentric intrinsic"); + } else { + unreachable("unknown barycentric expression"); + } + } + + unsigned mask, bit_size; + bool is_output_load; + + if (nir_intrinsic_has_write_mask(intr)) { + mask = nir_intrinsic_write_mask(intr); /* store */ + bit_size = nir_src_bit_size(intr->src[0]); + is_output_load = false; + } else { + mask = nir_ssa_def_components_read(&intr->dest.ssa); /* load */ + bit_size = intr->dest.ssa.bit_size; + is_output_load = !is_input; + } + assert(bit_size != 64 && !(mask & ~0xf) && "64-bit IO should have been lowered"); + + /* Convert the 16-bit component mask to a 32-bit component mask except for VS inputs + * where the mask is untyped. + */ + if (bit_size == 16 && !is_input) { + unsigned new_mask = 0; + for (unsigned i = 0; i < 4; i++) { + if (mask & (1 << i)) + new_mask |= 0x1 << (i / 2); + } + mask = new_mask; + } + + mask <<= nir_intrinsic_component(intr); + + nir_src offset = *nir_get_io_offset_src(intr); + bool indirect = !nir_src_is_const(offset); + if (!indirect) + assert(nir_src_as_uint(offset) == 0); + + unsigned semantic = 0; + /* VS doesn't have semantics. */ + if (info->stage != MESA_SHADER_VERTEX || !is_input) + semantic = nir_intrinsic_io_semantics(intr).location; + + if (info->stage == MESA_SHADER_FRAGMENT && !is_input) { + /* Never use FRAG_RESULT_COLOR directly. */ + if (semantic == FRAG_RESULT_COLOR) + semantic = FRAG_RESULT_DATA0; + semantic += nir_intrinsic_io_semantics(intr).dual_source_blend_index; + } + + unsigned driver_location = nir_intrinsic_base(intr); + unsigned num_slots = indirect ? nir_intrinsic_io_semantics(intr).num_slots : 1; + + if (is_input) { + assert(driver_location + num_slots <= ARRAY_SIZE(info->input)); + + for (unsigned i = 0; i < num_slots; i++) { + unsigned loc = driver_location + i; + + info->input[loc].semantic = semantic + i; + + if (semantic == VARYING_SLOT_PRIMITIVE_ID) + info->input[loc].interpolate = INTERP_MODE_FLAT; + else + info->input[loc].interpolate = interp; + + if (mask) { + info->input[loc].usage_mask |= mask; + if (bit_size == 16) { + if (nir_intrinsic_io_semantics(intr).high_16bits) + info->input[loc].fp16_lo_hi_valid |= 0x2; + else + info->input[loc].fp16_lo_hi_valid |= 0x1; + } + info->num_inputs = MAX2(info->num_inputs, loc + 1); + } + } + } else { + /* Outputs. */ + assert(driver_location + num_slots <= ARRAY_SIZE(info->output_usagemask)); + + for (unsigned i = 0; i < num_slots; i++) { + unsigned loc = driver_location + i; + + info->output_semantic[loc] = semantic + i; + + if (is_output_load) { + /* Output loads have only a few things that we need to track. */ + info->output_readmask[loc] |= mask; + } else if (mask) { + /* Output stores. */ + unsigned gs_streams = (uint32_t)nir_intrinsic_io_semantics(intr).gs_streams << + (nir_intrinsic_component(intr) * 2); + unsigned new_mask = mask & ~info->output_usagemask[loc]; + + for (unsigned i = 0; i < 4; i++) { + unsigned stream = (gs_streams >> (i * 2)) & 0x3; + + if (new_mask & (1 << i)) { + info->output_streams[loc] |= stream << (i * 2); + info->num_stream_output_components[stream]++; + } + } + + if (nir_intrinsic_has_src_type(intr)) + info->output_type[loc] = nir_intrinsic_src_type(intr); + else if (nir_intrinsic_has_dest_type(intr)) + info->output_type[loc] = nir_intrinsic_dest_type(intr); + else + info->output_type[loc] = nir_type_float32; + + info->output_usagemask[loc] |= mask; + info->num_outputs = MAX2(info->num_outputs, loc + 1); + + if (info->stage == MESA_SHADER_FRAGMENT && + semantic >= FRAG_RESULT_DATA0 && semantic <= FRAG_RESULT_DATA7) { + unsigned index = semantic - FRAG_RESULT_DATA0; + + if (nir_intrinsic_src_type(intr) == nir_type_float16) + info->output_color_types |= SI_TYPE_FLOAT16 << (index * 2); + else if (nir_intrinsic_src_type(intr) == nir_type_int16) + info->output_color_types |= SI_TYPE_INT16 << (index * 2); + else if (nir_intrinsic_src_type(intr) == nir_type_uint16) + info->output_color_types |= SI_TYPE_UINT16 << (index * 2); + } + } + } + } +} + +static bool is_bindless_handle_indirect(nir_instr *src) +{ + /* Check if the bindless handle comes from indirect load_ubo. */ + if (src->type == nir_instr_type_intrinsic && + nir_instr_as_intrinsic(src)->intrinsic == nir_intrinsic_load_ubo) { + if (!nir_src_is_const(nir_instr_as_intrinsic(src)->src[0])) + return true; + } else { + /* Some other instruction. Return the worst-case result. */ + return true; + } + return false; +} + +static void scan_instruction(const struct nir_shader *nir, struct si_shader_info *info, + nir_instr *instr) +{ + if (instr->type == nir_instr_type_tex) { + nir_tex_instr *tex = nir_instr_as_tex(instr); + const nir_src *handle = get_texture_src(tex, nir_tex_src_texture_handle); + + /* Gather the types of used VMEM instructions that return something. */ + switch (tex->op) { + case nir_texop_tex: + case nir_texop_txb: + case nir_texop_txl: + case nir_texop_txd: + case nir_texop_lod: + case nir_texop_tg4: + info->uses_vmem_return_type_sampler_or_bvh = true; + break; + default: + info->uses_vmem_return_type_other = true; + break; + } + + if (handle) { + info->uses_bindless_samplers = true; + + if (is_bindless_handle_indirect(handle->ssa->parent_instr)) + info->uses_indirect_descriptor = true; + } else { + const nir_src *deref = get_texture_src(tex, nir_tex_src_texture_deref); + + if (nir_deref_instr_has_indirect(nir_src_as_deref(*deref))) + info->uses_indirect_descriptor = true; + } + } else if (instr->type == nir_instr_type_intrinsic) { + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + const char *intr_name = nir_intrinsic_infos[intr->intrinsic].name; + bool is_ssbo = strstr(intr_name, "ssbo"); + bool is_image = strstr(intr_name, "image") == intr_name; + bool is_bindless_image = strstr(intr_name, "bindless_image") == intr_name; + + /* Gather the types of used VMEM instructions that return something. */ + if (nir_intrinsic_infos[intr->intrinsic].has_dest) { + switch (intr->intrinsic) { + case nir_intrinsic_load_ubo: + if (!nir_src_is_const(intr->src[1])) + info->uses_vmem_return_type_other = true; + break; + case nir_intrinsic_load_constant: + info->uses_vmem_return_type_other = true; + break; + + case nir_intrinsic_load_barycentric_at_sample: /* This loads sample positions. */ + case nir_intrinsic_load_tess_level_outer: /* TES input read from memory */ + case nir_intrinsic_load_tess_level_inner: /* TES input read from memory */ + info->uses_vmem_return_type_other = true; + break; + + case nir_intrinsic_load_input: + case nir_intrinsic_load_input_vertex: + case nir_intrinsic_load_per_vertex_input: + if (nir->info.stage == MESA_SHADER_VERTEX || + nir->info.stage == MESA_SHADER_TESS_EVAL) + info->uses_vmem_return_type_other = true; + break; + + default: + if (is_image || + is_bindless_image || + is_ssbo || + (strstr(intr_name, "global") == intr_name || + intr->intrinsic == nir_intrinsic_load_global || + intr->intrinsic == nir_intrinsic_store_global) || + strstr(intr_name, "scratch")) + info->uses_vmem_return_type_other = true; + break; + } + } + + if (is_bindless_image) + info->uses_bindless_images = true; + + if (nir_intrinsic_writes_external_memory(intr)) + info->num_memory_stores++; + + if (is_image && nir_deref_instr_has_indirect(nir_src_as_deref(intr->src[0]))) + info->uses_indirect_descriptor = true; + + if (is_bindless_image && is_bindless_handle_indirect(intr->src[0].ssa->parent_instr)) + info->uses_indirect_descriptor = true; + + if (intr->intrinsic != nir_intrinsic_store_ssbo && is_ssbo && + !nir_src_is_const(intr->src[0])) + info->uses_indirect_descriptor = true; + + switch (intr->intrinsic) { + case nir_intrinsic_store_ssbo: + if (!nir_src_is_const(intr->src[1])) + info->uses_indirect_descriptor = true; + break; + case nir_intrinsic_load_ubo: + if (!nir_src_is_const(intr->src[0])) + info->uses_indirect_descriptor = true; + break; + case nir_intrinsic_load_local_invocation_id: + case nir_intrinsic_load_workgroup_id: { + unsigned mask = nir_ssa_def_components_read(&intr->dest.ssa); + while (mask) { + unsigned i = u_bit_scan(&mask); + + if (intr->intrinsic == nir_intrinsic_load_workgroup_id) + info->uses_block_id[i] = true; + else + info->uses_thread_id[i] = true; + } + break; + } + case nir_intrinsic_load_color0: + case nir_intrinsic_load_color1: { + unsigned index = intr->intrinsic == nir_intrinsic_load_color1; + uint8_t mask = nir_ssa_def_components_read(&intr->dest.ssa); + info->colors_read |= mask << (index * 4); + + switch (info->color_interpolate[index]) { + case INTERP_MODE_SMOOTH: + if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_SAMPLE) + info->uses_persp_sample = true; + else if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_CENTROID) + info->uses_persp_centroid = true; + else if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_CENTER) + info->uses_persp_center = true; + break; + case INTERP_MODE_NOPERSPECTIVE: + if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_SAMPLE) + info->uses_linear_sample = true; + else if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_CENTROID) + info->uses_linear_centroid = true; + else if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_CENTER) + info->uses_linear_center = true; + break; + case INTERP_MODE_COLOR: + /* We don't know the final value. This will be FLAT if flatshading is enabled + * in the rasterizer state, otherwise it will be SMOOTH. + */ + info->uses_interp_color = true; + if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_SAMPLE) + info->uses_persp_sample_color = true; + else if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_CENTROID) + info->uses_persp_centroid_color = true; + else if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_CENTER) + info->uses_persp_center_color = true; + break; + } + break; + } + case nir_intrinsic_load_barycentric_at_offset: /* uses center */ + case nir_intrinsic_load_barycentric_at_sample: /* uses center */ + if (nir_intrinsic_interp_mode(intr) == INTERP_MODE_FLAT) + break; + + if (nir_intrinsic_interp_mode(intr) == INTERP_MODE_NOPERSPECTIVE) { + info->uses_linear_center = true; + } else { + info->uses_persp_center = true; + } + if (intr->intrinsic == nir_intrinsic_load_barycentric_at_sample) + info->uses_interp_at_sample = true; + break; + case nir_intrinsic_load_input: + case nir_intrinsic_load_per_vertex_input: + case nir_intrinsic_load_input_vertex: + case nir_intrinsic_load_interpolated_input: + scan_io_usage(info, intr, true); + break; + case nir_intrinsic_load_output: + case nir_intrinsic_load_per_vertex_output: + case nir_intrinsic_store_output: + case nir_intrinsic_store_per_vertex_output: + scan_io_usage(info, intr, false); + break; + case nir_intrinsic_load_deref: + case nir_intrinsic_store_deref: + /* These can only occur if there is indirect temp indexing. */ + break; + case nir_intrinsic_interp_deref_at_centroid: + case nir_intrinsic_interp_deref_at_sample: + case nir_intrinsic_interp_deref_at_offset: + unreachable("these opcodes should have been lowered"); + break; + default: + break; + } + } +} + +void si_nir_scan_shader(const struct nir_shader *nir, struct si_shader_info *info) +{ + nir_function *func; + + info->base = nir->info; + info->stage = nir->info.stage; + + /* Get options from shader profiles. */ + for (unsigned i = 0; i < ARRAY_SIZE(profiles); i++) { + if (_mesa_printed_sha1_equal(info->base.source_sha1, profiles[i].sha1)) { + info->options = profiles[i].options; + break; + } + } + + if (nir->info.stage == MESA_SHADER_TESS_EVAL) { + if (info->base.tess.primitive_mode == GL_ISOLINES) + info->base.tess.primitive_mode = GL_LINES; + } + + if (nir->info.stage == MESA_SHADER_FRAGMENT) { + /* post_depth_coverage implies early_fragment_tests */ + info->base.fs.early_fragment_tests |= info->base.fs.post_depth_coverage; + + info->color_interpolate[0] = nir->info.fs.color0_interp; + info->color_interpolate[1] = nir->info.fs.color1_interp; + for (unsigned i = 0; i < 2; i++) { + if (info->color_interpolate[i] == INTERP_MODE_NONE) + info->color_interpolate[i] = INTERP_MODE_COLOR; + } + + info->color_interpolate_loc[0] = nir->info.fs.color0_sample ? TGSI_INTERPOLATE_LOC_SAMPLE : + nir->info.fs.color0_centroid ? TGSI_INTERPOLATE_LOC_CENTROID : + TGSI_INTERPOLATE_LOC_CENTER; + info->color_interpolate_loc[1] = nir->info.fs.color1_sample ? TGSI_INTERPOLATE_LOC_SAMPLE : + nir->info.fs.color1_centroid ? TGSI_INTERPOLATE_LOC_CENTROID : + TGSI_INTERPOLATE_LOC_CENTER; + /* Set an invalid value. Will be determined at draw time if needed when the expected + * conditions are met. + */ + info->writes_1_if_tex_is_1 = nir->info.writes_memory ? 0 : 0xff; + } + + info->constbuf0_num_slots = nir->num_uniforms; + + if (nir->info.stage == MESA_SHADER_TESS_CTRL) { + info->tessfactors_are_def_in_all_invocs = ac_are_tessfactors_def_in_all_invocs(nir); + } + + info->uses_frontface = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRONT_FACE); + info->uses_instanceid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_INSTANCE_ID); + info->uses_base_vertex = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX); + info->uses_base_instance = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BASE_INSTANCE); + info->uses_invocationid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_INVOCATION_ID); + info->uses_grid_size = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_NUM_WORKGROUPS); + info->uses_subgroup_info = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_LOCAL_INVOCATION_INDEX) || + BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SUBGROUP_ID) || + BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_NUM_SUBGROUPS); + info->uses_variable_block_size = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_WORKGROUP_SIZE); + info->uses_drawid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_DRAW_ID); + info->uses_primid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID) || + nir->info.inputs_read & VARYING_BIT_PRIMITIVE_ID; + info->reads_samplemask = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN); + info->reads_tess_factors = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_TESS_LEVEL_INNER) || + BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_TESS_LEVEL_OUTER); + info->uses_linear_sample = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_LINEAR_SAMPLE); + info->uses_linear_centroid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_LINEAR_CENTROID); + info->uses_linear_center = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_LINEAR_PIXEL); + info->uses_persp_sample = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_PERSP_SAMPLE); + info->uses_persp_centroid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_PERSP_CENTROID); + info->uses_persp_center = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL); + + if (nir->info.stage == MESA_SHADER_FRAGMENT) { + info->writes_z = nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH); + info->writes_stencil = nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL); + info->writes_samplemask = nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK); + + info->colors_written = nir->info.outputs_written >> FRAG_RESULT_DATA0; + if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_COLOR)) { + info->color0_writes_all_cbufs = true; + info->colors_written |= 0x1; + } + if (nir->info.fs.color_is_dual_source) + info->colors_written |= 0x2; + } else { + info->writes_primid = nir->info.outputs_written & VARYING_BIT_PRIMITIVE_ID; + info->writes_viewport_index = nir->info.outputs_written & VARYING_BIT_VIEWPORT; + info->writes_layer = nir->info.outputs_written & VARYING_BIT_LAYER; + info->writes_psize = nir->info.outputs_written & VARYING_BIT_PSIZ; + info->writes_clipvertex = nir->info.outputs_written & VARYING_BIT_CLIP_VERTEX; + info->writes_edgeflag = nir->info.outputs_written & VARYING_BIT_EDGE; + info->writes_position = nir->info.outputs_written & VARYING_BIT_POS; + } + + func = (struct nir_function *)exec_list_get_head_const(&nir->functions); + nir_foreach_block (block, func->impl) { + nir_foreach_instr (instr, block) + scan_instruction(nir, info, instr); + } + + if (info->stage == MESA_SHADER_VERTEX || info->stage == MESA_SHADER_TESS_EVAL) { + /* Add the PrimitiveID output, but don't increment num_outputs. + * The driver inserts PrimitiveID only when it's used by the pixel shader, + * and si_emit_spi_map uses this unconditionally when such a pixel shader is used. + */ + info->output_semantic[info->num_outputs] = VARYING_SLOT_PRIMITIVE_ID; + info->output_type[info->num_outputs] = nir_type_uint32; + info->output_usagemask[info->num_outputs] = 0x1; + } + + if (nir->info.stage == MESA_SHADER_FRAGMENT) { + info->allow_flat_shading = !(info->uses_persp_center || info->uses_persp_centroid || + info->uses_persp_sample || info->uses_linear_center || + info->uses_linear_centroid || info->uses_linear_sample || + info->uses_interp_at_sample || nir->info.writes_memory || + nir->info.fs.uses_fbfetch_output || + nir->info.fs.needs_quad_helper_invocations || + BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRAG_COORD) || + BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_POINT_COORD) || + BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID) || + BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_POS) || + BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN) || + BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_HELPER_INVOCATION)); + + /* Add both front and back color inputs. */ + unsigned num_inputs_with_colors = info->num_inputs; + for (unsigned back = 0; back < 2; back++) { + for (unsigned i = 0; i < 2; i++) { + if ((info->colors_read >> (i * 4)) & 0xf) { + unsigned index = num_inputs_with_colors; + + info->input[index].semantic = (back ? VARYING_SLOT_BFC0 : VARYING_SLOT_COL0) + i; + info->input[index].interpolate = info->color_interpolate[i]; + info->input[index].usage_mask = info->colors_read >> (i * 4); + num_inputs_with_colors++; + + /* Back-face color don't increment num_inputs. si_emit_spi_map will use + * back-face colors conditionally only when they are needed. + */ + if (!back) + info->num_inputs = num_inputs_with_colors; + } + } + } + } + + /* Trim output read masks based on write masks. */ + for (unsigned i = 0; i < info->num_outputs; i++) + info->output_readmask[i] &= info->output_usagemask[i]; + + info->has_divergent_loop = nir_has_divergent_loop((nir_shader*)nir); +} diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c index 521ef47..cca73be 100644 --- a/src/gallium/drivers/radeonsi/si_shader_nir.c +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c @@ -22,561 +22,10 @@ * USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "ac_nir_to_llvm.h" -#include "compiler/nir/nir.h" -#include "compiler/nir/nir_builder.h" -#include "compiler/nir/nir_deref.h" -#include "compiler/nir_types.h" +#include "nir_builder.h" #include "si_pipe.h" -#include "si_shader_internal.h" -#include "tgsi/tgsi_from_mesa.h" -#include "util/mesa-sha1.h" -struct si_shader_profile { - uint32_t sha1[SHA1_DIGEST_LENGTH32]; - uint32_t options; -}; - -static struct si_shader_profile profiles[] = -{ - { - /* Plot3D */ - {0x485320cd, 0x87a9ba05, 0x24a60e4f, 0x25aa19f7, 0xf5287451}, - SI_PROFILE_VS_NO_BINNING, - }, - { - /* Viewperf/Energy isn't affected by the discard bug. */ - {0x17118671, 0xd0102e0c, 0x947f3592, 0xb2057e7b, 0x4da5d9b0}, - SI_PROFILE_IGNORE_LLVM_DISCARD_BUG, - }, - { - /* Viewperf/Medical */ - {0x4dce4331, 0x38f778d5, 0x1b75a717, 0x3e454fb9, 0xeb1527f0}, - SI_PROFILE_PS_NO_BINNING, - }, - { - /* Viewperf/Medical, a shader with a divergent loop doesn't benefit from Wave32, - * probably due to interpolation performance. - */ - {0x29f0f4a0, 0x0672258d, 0x47ccdcfd, 0x31e67dcc, 0xdcb1fda8}, - SI_PROFILE_WAVE64, - }, -}; - -static const nir_src *get_texture_src(nir_tex_instr *instr, nir_tex_src_type type) -{ - for (unsigned i = 0; i < instr->num_srcs; i++) { - if (instr->src[i].src_type == type) - return &instr->src[i].src; - } - return NULL; -} - -static void scan_io_usage(struct si_shader_info *info, nir_intrinsic_instr *intr, - bool is_input) -{ - unsigned interp = INTERP_MODE_FLAT; /* load_input uses flat shading */ - - if (intr->intrinsic == nir_intrinsic_load_interpolated_input) { - nir_intrinsic_instr *baryc = nir_instr_as_intrinsic(intr->src[0].ssa->parent_instr); - - if (baryc) { - if (nir_intrinsic_infos[baryc->intrinsic].index_map[NIR_INTRINSIC_INTERP_MODE] > 0) - interp = nir_intrinsic_interp_mode(baryc); - else - unreachable("unknown barycentric intrinsic"); - } else { - unreachable("unknown barycentric expression"); - } - } - - unsigned mask, bit_size; - bool is_output_load; - - if (nir_intrinsic_has_write_mask(intr)) { - mask = nir_intrinsic_write_mask(intr); /* store */ - bit_size = nir_src_bit_size(intr->src[0]); - is_output_load = false; - } else { - mask = nir_ssa_def_components_read(&intr->dest.ssa); /* load */ - bit_size = intr->dest.ssa.bit_size; - is_output_load = !is_input; - } - assert(bit_size != 64 && !(mask & ~0xf) && "64-bit IO should have been lowered"); - - /* Convert the 16-bit component mask to a 32-bit component mask except for VS inputs - * where the mask is untyped. - */ - if (bit_size == 16 && !is_input) { - unsigned new_mask = 0; - for (unsigned i = 0; i < 4; i++) { - if (mask & (1 << i)) - new_mask |= 0x1 << (i / 2); - } - mask = new_mask; - } - - mask <<= nir_intrinsic_component(intr); - - nir_src offset = *nir_get_io_offset_src(intr); - bool indirect = !nir_src_is_const(offset); - if (!indirect) - assert(nir_src_as_uint(offset) == 0); - - unsigned semantic = 0; - /* VS doesn't have semantics. */ - if (info->stage != MESA_SHADER_VERTEX || !is_input) - semantic = nir_intrinsic_io_semantics(intr).location; - - if (info->stage == MESA_SHADER_FRAGMENT && !is_input) { - /* Never use FRAG_RESULT_COLOR directly. */ - if (semantic == FRAG_RESULT_COLOR) - semantic = FRAG_RESULT_DATA0; - semantic += nir_intrinsic_io_semantics(intr).dual_source_blend_index; - } - - unsigned driver_location = nir_intrinsic_base(intr); - unsigned num_slots = indirect ? nir_intrinsic_io_semantics(intr).num_slots : 1; - - if (is_input) { - assert(driver_location + num_slots <= ARRAY_SIZE(info->input)); - - for (unsigned i = 0; i < num_slots; i++) { - unsigned loc = driver_location + i; - - info->input[loc].semantic = semantic + i; - - if (semantic == VARYING_SLOT_PRIMITIVE_ID) - info->input[loc].interpolate = INTERP_MODE_FLAT; - else - info->input[loc].interpolate = interp; - - if (mask) { - info->input[loc].usage_mask |= mask; - if (bit_size == 16) { - if (nir_intrinsic_io_semantics(intr).high_16bits) - info->input[loc].fp16_lo_hi_valid |= 0x2; - else - info->input[loc].fp16_lo_hi_valid |= 0x1; - } - info->num_inputs = MAX2(info->num_inputs, loc + 1); - } - } - } else { - /* Outputs. */ - assert(driver_location + num_slots <= ARRAY_SIZE(info->output_usagemask)); - - for (unsigned i = 0; i < num_slots; i++) { - unsigned loc = driver_location + i; - - info->output_semantic[loc] = semantic + i; - - if (is_output_load) { - /* Output loads have only a few things that we need to track. */ - info->output_readmask[loc] |= mask; - } else if (mask) { - /* Output stores. */ - unsigned gs_streams = (uint32_t)nir_intrinsic_io_semantics(intr).gs_streams << - (nir_intrinsic_component(intr) * 2); - unsigned new_mask = mask & ~info->output_usagemask[loc]; - - for (unsigned i = 0; i < 4; i++) { - unsigned stream = (gs_streams >> (i * 2)) & 0x3; - - if (new_mask & (1 << i)) { - info->output_streams[loc] |= stream << (i * 2); - info->num_stream_output_components[stream]++; - } - } - - if (nir_intrinsic_has_src_type(intr)) - info->output_type[loc] = nir_intrinsic_src_type(intr); - else if (nir_intrinsic_has_dest_type(intr)) - info->output_type[loc] = nir_intrinsic_dest_type(intr); - else - info->output_type[loc] = nir_type_float32; - - info->output_usagemask[loc] |= mask; - info->num_outputs = MAX2(info->num_outputs, loc + 1); - - if (info->stage == MESA_SHADER_FRAGMENT && - semantic >= FRAG_RESULT_DATA0 && semantic <= FRAG_RESULT_DATA7) { - unsigned index = semantic - FRAG_RESULT_DATA0; - - if (nir_intrinsic_src_type(intr) == nir_type_float16) - info->output_color_types |= SI_TYPE_FLOAT16 << (index * 2); - else if (nir_intrinsic_src_type(intr) == nir_type_int16) - info->output_color_types |= SI_TYPE_INT16 << (index * 2); - else if (nir_intrinsic_src_type(intr) == nir_type_uint16) - info->output_color_types |= SI_TYPE_UINT16 << (index * 2); - } - } - } - } -} - -static bool is_bindless_handle_indirect(nir_instr *src) -{ - /* Check if the bindless handle comes from indirect load_ubo. */ - if (src->type == nir_instr_type_intrinsic && - nir_instr_as_intrinsic(src)->intrinsic == nir_intrinsic_load_ubo) { - if (!nir_src_is_const(nir_instr_as_intrinsic(src)->src[0])) - return true; - } else { - /* Some other instruction. Return the worst-case result. */ - return true; - } - return false; -} - -static void scan_instruction(const struct nir_shader *nir, struct si_shader_info *info, - nir_instr *instr) -{ - if (instr->type == nir_instr_type_tex) { - nir_tex_instr *tex = nir_instr_as_tex(instr); - const nir_src *handle = get_texture_src(tex, nir_tex_src_texture_handle); - - /* Gather the types of used VMEM instructions that return something. */ - switch (tex->op) { - case nir_texop_tex: - case nir_texop_txb: - case nir_texop_txl: - case nir_texop_txd: - case nir_texop_lod: - case nir_texop_tg4: - info->uses_vmem_return_type_sampler_or_bvh = true; - break; - default: - info->uses_vmem_return_type_other = true; - break; - } - - if (handle) { - info->uses_bindless_samplers = true; - - if (is_bindless_handle_indirect(handle->ssa->parent_instr)) - info->uses_indirect_descriptor = true; - } else { - const nir_src *deref = get_texture_src(tex, nir_tex_src_texture_deref); - - if (nir_deref_instr_has_indirect(nir_src_as_deref(*deref))) - info->uses_indirect_descriptor = true; - } - } else if (instr->type == nir_instr_type_intrinsic) { - nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); - const char *intr_name = nir_intrinsic_infos[intr->intrinsic].name; - bool is_ssbo = strstr(intr_name, "ssbo"); - bool is_image = strstr(intr_name, "image") == intr_name; - bool is_bindless_image = strstr(intr_name, "bindless_image") == intr_name; - - /* Gather the types of used VMEM instructions that return something. */ - if (nir_intrinsic_infos[intr->intrinsic].has_dest) { - switch (intr->intrinsic) { - case nir_intrinsic_load_ubo: - if (!nir_src_is_const(intr->src[1])) - info->uses_vmem_return_type_other = true; - break; - case nir_intrinsic_load_constant: - info->uses_vmem_return_type_other = true; - break; - - case nir_intrinsic_load_barycentric_at_sample: /* This loads sample positions. */ - case nir_intrinsic_load_tess_level_outer: /* TES input read from memory */ - case nir_intrinsic_load_tess_level_inner: /* TES input read from memory */ - info->uses_vmem_return_type_other = true; - break; - - case nir_intrinsic_load_input: - case nir_intrinsic_load_input_vertex: - case nir_intrinsic_load_per_vertex_input: - if (nir->info.stage == MESA_SHADER_VERTEX || - nir->info.stage == MESA_SHADER_TESS_EVAL) - info->uses_vmem_return_type_other = true; - break; - - default: - if (is_image || - is_bindless_image || - is_ssbo || - (strstr(intr_name, "global") == intr_name || - intr->intrinsic == nir_intrinsic_load_global || - intr->intrinsic == nir_intrinsic_store_global) || - strstr(intr_name, "scratch")) - info->uses_vmem_return_type_other = true; - break; - } - } - - if (is_bindless_image) - info->uses_bindless_images = true; - - if (nir_intrinsic_writes_external_memory(intr)) - info->num_memory_stores++; - - if (is_image && nir_deref_instr_has_indirect(nir_src_as_deref(intr->src[0]))) - info->uses_indirect_descriptor = true; - - if (is_bindless_image && is_bindless_handle_indirect(intr->src[0].ssa->parent_instr)) - info->uses_indirect_descriptor = true; - - if (intr->intrinsic != nir_intrinsic_store_ssbo && is_ssbo && - !nir_src_is_const(intr->src[0])) - info->uses_indirect_descriptor = true; - - switch (intr->intrinsic) { - case nir_intrinsic_store_ssbo: - if (!nir_src_is_const(intr->src[1])) - info->uses_indirect_descriptor = true; - break; - case nir_intrinsic_load_ubo: - if (!nir_src_is_const(intr->src[0])) - info->uses_indirect_descriptor = true; - break; - case nir_intrinsic_load_local_invocation_id: - case nir_intrinsic_load_workgroup_id: { - unsigned mask = nir_ssa_def_components_read(&intr->dest.ssa); - while (mask) { - unsigned i = u_bit_scan(&mask); - - if (intr->intrinsic == nir_intrinsic_load_workgroup_id) - info->uses_block_id[i] = true; - else - info->uses_thread_id[i] = true; - } - break; - } - case nir_intrinsic_load_color0: - case nir_intrinsic_load_color1: { - unsigned index = intr->intrinsic == nir_intrinsic_load_color1; - uint8_t mask = nir_ssa_def_components_read(&intr->dest.ssa); - info->colors_read |= mask << (index * 4); - - switch (info->color_interpolate[index]) { - case INTERP_MODE_SMOOTH: - if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_SAMPLE) - info->uses_persp_sample = true; - else if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_CENTROID) - info->uses_persp_centroid = true; - else if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_CENTER) - info->uses_persp_center = true; - break; - case INTERP_MODE_NOPERSPECTIVE: - if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_SAMPLE) - info->uses_linear_sample = true; - else if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_CENTROID) - info->uses_linear_centroid = true; - else if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_CENTER) - info->uses_linear_center = true; - break; - case INTERP_MODE_COLOR: - /* We don't know the final value. This will be FLAT if flatshading is enabled - * in the rasterizer state, otherwise it will be SMOOTH. - */ - info->uses_interp_color = true; - if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_SAMPLE) - info->uses_persp_sample_color = true; - else if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_CENTROID) - info->uses_persp_centroid_color = true; - else if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_CENTER) - info->uses_persp_center_color = true; - break; - } - break; - } - case nir_intrinsic_load_barycentric_at_offset: /* uses center */ - case nir_intrinsic_load_barycentric_at_sample: /* uses center */ - if (nir_intrinsic_interp_mode(intr) == INTERP_MODE_FLAT) - break; - - if (nir_intrinsic_interp_mode(intr) == INTERP_MODE_NOPERSPECTIVE) { - info->uses_linear_center = true; - } else { - info->uses_persp_center = true; - } - if (intr->intrinsic == nir_intrinsic_load_barycentric_at_sample) - info->uses_interp_at_sample = true; - break; - case nir_intrinsic_load_input: - case nir_intrinsic_load_per_vertex_input: - case nir_intrinsic_load_input_vertex: - case nir_intrinsic_load_interpolated_input: - scan_io_usage(info, intr, true); - break; - case nir_intrinsic_load_output: - case nir_intrinsic_load_per_vertex_output: - case nir_intrinsic_store_output: - case nir_intrinsic_store_per_vertex_output: - scan_io_usage(info, intr, false); - break; - case nir_intrinsic_load_deref: - case nir_intrinsic_store_deref: - /* These can only occur if there is indirect temp indexing. */ - break; - case nir_intrinsic_interp_deref_at_centroid: - case nir_intrinsic_interp_deref_at_sample: - case nir_intrinsic_interp_deref_at_offset: - unreachable("these opcodes should have been lowered"); - break; - default: - break; - } - } -} - -void si_nir_scan_shader(const struct nir_shader *nir, struct si_shader_info *info) -{ - nir_function *func; - - info->base = nir->info; - info->stage = nir->info.stage; - - /* Get options from shader profiles. */ - for (unsigned i = 0; i < ARRAY_SIZE(profiles); i++) { - if (_mesa_printed_sha1_equal(info->base.source_sha1, profiles[i].sha1)) { - info->options = profiles[i].options; - break; - } - } - - if (nir->info.stage == MESA_SHADER_TESS_EVAL) { - if (info->base.tess.primitive_mode == GL_ISOLINES) - info->base.tess.primitive_mode = GL_LINES; - } - - if (nir->info.stage == MESA_SHADER_FRAGMENT) { - /* post_depth_coverage implies early_fragment_tests */ - info->base.fs.early_fragment_tests |= info->base.fs.post_depth_coverage; - - info->color_interpolate[0] = nir->info.fs.color0_interp; - info->color_interpolate[1] = nir->info.fs.color1_interp; - for (unsigned i = 0; i < 2; i++) { - if (info->color_interpolate[i] == INTERP_MODE_NONE) - info->color_interpolate[i] = INTERP_MODE_COLOR; - } - - info->color_interpolate_loc[0] = nir->info.fs.color0_sample ? TGSI_INTERPOLATE_LOC_SAMPLE : - nir->info.fs.color0_centroid ? TGSI_INTERPOLATE_LOC_CENTROID : - TGSI_INTERPOLATE_LOC_CENTER; - info->color_interpolate_loc[1] = nir->info.fs.color1_sample ? TGSI_INTERPOLATE_LOC_SAMPLE : - nir->info.fs.color1_centroid ? TGSI_INTERPOLATE_LOC_CENTROID : - TGSI_INTERPOLATE_LOC_CENTER; - /* Set an invalid value. Will be determined at draw time if needed when the expected - * conditions are met. - */ - info->writes_1_if_tex_is_1 = nir->info.writes_memory ? 0 : 0xff; - } - - info->constbuf0_num_slots = nir->num_uniforms; - - if (nir->info.stage == MESA_SHADER_TESS_CTRL) { - info->tessfactors_are_def_in_all_invocs = ac_are_tessfactors_def_in_all_invocs(nir); - } - - info->uses_frontface = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRONT_FACE); - info->uses_instanceid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_INSTANCE_ID); - info->uses_base_vertex = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX); - info->uses_base_instance = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BASE_INSTANCE); - info->uses_invocationid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_INVOCATION_ID); - info->uses_grid_size = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_NUM_WORKGROUPS); - info->uses_subgroup_info = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_LOCAL_INVOCATION_INDEX) || - BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SUBGROUP_ID) || - BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_NUM_SUBGROUPS); - info->uses_variable_block_size = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_WORKGROUP_SIZE); - info->uses_drawid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_DRAW_ID); - info->uses_primid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID) || - nir->info.inputs_read & VARYING_BIT_PRIMITIVE_ID; - info->reads_samplemask = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN); - info->reads_tess_factors = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_TESS_LEVEL_INNER) || - BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_TESS_LEVEL_OUTER); - info->uses_linear_sample = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_LINEAR_SAMPLE); - info->uses_linear_centroid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_LINEAR_CENTROID); - info->uses_linear_center = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_LINEAR_PIXEL); - info->uses_persp_sample = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_PERSP_SAMPLE); - info->uses_persp_centroid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_PERSP_CENTROID); - info->uses_persp_center = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL); - - if (nir->info.stage == MESA_SHADER_FRAGMENT) { - info->writes_z = nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH); - info->writes_stencil = nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL); - info->writes_samplemask = nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK); - - info->colors_written = nir->info.outputs_written >> FRAG_RESULT_DATA0; - if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_COLOR)) { - info->color0_writes_all_cbufs = true; - info->colors_written |= 0x1; - } - if (nir->info.fs.color_is_dual_source) - info->colors_written |= 0x2; - } else { - info->writes_primid = nir->info.outputs_written & VARYING_BIT_PRIMITIVE_ID; - info->writes_viewport_index = nir->info.outputs_written & VARYING_BIT_VIEWPORT; - info->writes_layer = nir->info.outputs_written & VARYING_BIT_LAYER; - info->writes_psize = nir->info.outputs_written & VARYING_BIT_PSIZ; - info->writes_clipvertex = nir->info.outputs_written & VARYING_BIT_CLIP_VERTEX; - info->writes_edgeflag = nir->info.outputs_written & VARYING_BIT_EDGE; - info->writes_position = nir->info.outputs_written & VARYING_BIT_POS; - } - - func = (struct nir_function *)exec_list_get_head_const(&nir->functions); - nir_foreach_block (block, func->impl) { - nir_foreach_instr (instr, block) - scan_instruction(nir, info, instr); - } - - if (info->stage == MESA_SHADER_VERTEX || info->stage == MESA_SHADER_TESS_EVAL) { - /* Add the PrimitiveID output, but don't increment num_outputs. - * The driver inserts PrimitiveID only when it's used by the pixel shader, - * and si_emit_spi_map uses this unconditionally when such a pixel shader is used. - */ - info->output_semantic[info->num_outputs] = VARYING_SLOT_PRIMITIVE_ID; - info->output_type[info->num_outputs] = nir_type_uint32; - info->output_usagemask[info->num_outputs] = 0x1; - } - - if (nir->info.stage == MESA_SHADER_FRAGMENT) { - info->allow_flat_shading = !(info->uses_persp_center || info->uses_persp_centroid || - info->uses_persp_sample || info->uses_linear_center || - info->uses_linear_centroid || info->uses_linear_sample || - info->uses_interp_at_sample || nir->info.writes_memory || - nir->info.fs.uses_fbfetch_output || - nir->info.fs.needs_quad_helper_invocations || - BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRAG_COORD) || - BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_POINT_COORD) || - BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID) || - BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_POS) || - BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN) || - BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_HELPER_INVOCATION)); - - /* Add both front and back color inputs. */ - unsigned num_inputs_with_colors = info->num_inputs; - for (unsigned back = 0; back < 2; back++) { - for (unsigned i = 0; i < 2; i++) { - if ((info->colors_read >> (i * 4)) & 0xf) { - unsigned index = num_inputs_with_colors; - - info->input[index].semantic = (back ? VARYING_SLOT_BFC0 : VARYING_SLOT_COL0) + i; - info->input[index].interpolate = info->color_interpolate[i]; - info->input[index].usage_mask = info->colors_read >> (i * 4); - num_inputs_with_colors++; - - /* Back-face color don't increment num_inputs. si_emit_spi_map will use - * back-face colors conditionally only when they are needed. - */ - if (!back) - info->num_inputs = num_inputs_with_colors; - } - } - } - } - - /* Trim output read masks based on write masks. */ - for (unsigned i = 0; i < info->num_outputs; i++) - info->output_readmask[i] &= info->output_usagemask[i]; - - info->has_divergent_loop = nir_has_divergent_loop((nir_shader*)nir); -} - static bool si_alu_to_scalar_filter(const nir_instr *instr, const void *data) { struct si_screen *sscreen = (struct si_screen *)data; -- 2.7.4