From 25249e8be2c9250e18cf5d5cf574a1414c811562 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 3 May 2022 12:34:44 -0500 Subject: [PATCH] nir/lower_blend: Expand or shrink output variables as needed Reviewed-by: Alyssa Rosenzweig Part-of: --- src/compiler/nir/nir_lower_blend.c | 163 ++++++++++++++++++++++++++------- src/panfrost/ci/panfrost-g52-fails.txt | 43 --------- 2 files changed, 129 insertions(+), 77 deletions(-) diff --git a/src/compiler/nir/nir_lower_blend.c b/src/compiler/nir/nir_lower_blend.c index b16f7e2..b844d70 100644 --- a/src/compiler/nir/nir_lower_blend.c +++ b/src/compiler/nir/nir_lower_blend.c @@ -354,51 +354,43 @@ nir_blend( return nir_vec(b, channels, 4); } -static bool -nir_lower_blend_instr(nir_builder *b, nir_instr *instr, void *data) +static int +color_index_for_var(const nir_variable *var) { - const nir_lower_blend_options *options = data; - if (instr->type != nir_instr_type_intrinsic) - return false; - - nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); - if (intr->intrinsic != nir_intrinsic_store_deref) - return false; - - nir_deref_instr *deref = nir_src_as_deref(intr->src[0]); - if (!nir_deref_mode_is(deref, nir_var_shader_out)) - return false; - - /* Indirects must be already lowered and output variables split */ - assert(deref && deref->deref_type == nir_deref_type_var); - nir_variable *var = deref->var; - assert(glsl_type_is_vector_or_scalar(var->type)); - if (var->data.location != FRAG_RESULT_COLOR && var->data.location < FRAG_RESULT_DATA0) - return false; + return -1; + + return (var->data.location == FRAG_RESULT_COLOR) ? 0 : + (var->data.location - FRAG_RESULT_DATA0); +} + +static bool +nir_lower_blend_store(nir_builder *b, nir_intrinsic_instr *store, + const nir_lower_blend_options *options) +{ + assert(store->intrinsic == nir_intrinsic_store_deref); - /* Determine render target for per-RT blending */ - unsigned rt = - (var->data.location == FRAG_RESULT_COLOR) ? 0 : - (var->data.location - FRAG_RESULT_DATA0); + nir_variable *var = nir_intrinsic_get_var(store, 0); + int rt = color_index_for_var(var); /* No blend lowering requested on this RT */ - if (options->format[rt] == PIPE_FORMAT_NONE) + if (rt < 0 || options->format[rt] == PIPE_FORMAT_NONE) return false; - b->cursor = nir_before_instr(instr); + b->cursor = nir_before_instr(&store->instr); - /* Grab the input color */ - unsigned src_num_comps = nir_src_num_components(intr->src[1]); - nir_ssa_def *src = - nir_pad_vector(b, nir_ssa_for_src(b, intr->src[1], src_num_comps), 4); + /* Grab the input color. We always want 4 channels during blend. Dead + * code will clean up any channels we don't need. + */ + assert(store->src[1].is_ssa); + nir_ssa_def *src = nir_pad_vector(b, store->src[1].ssa, 4); /* Grab the previous fragment color */ var->data.fb_fetch_output = true; b->shader->info.outputs_read |= BITFIELD64_BIT(var->data.location); b->shader->info.fs.uses_fbfetch_output = true; - nir_ssa_def *dst = nir_load_var(b, var); + nir_ssa_def *dst = nir_pad_vector(b, nir_load_var(b, var), 4); /* Blend the two colors per the passed options */ nir_ssa_def *blended = src; @@ -413,14 +405,82 @@ nir_lower_blend_instr(nir_builder *b, nir_instr *instr, void *data) /* Apply a colormask */ blended = nir_color_mask(b, options->rt[rt].colormask, blended, dst); - if (src_num_comps != 4) - blended = nir_channels(b, blended, nir_component_mask(src_num_comps)); + const unsigned num_components = glsl_get_vector_elements(var->type); + + /* Shave off any components we don't want to store */ + blended = nir_trim_vector(b, blended, num_components); + + /* Grow or shrink the store destination as needed */ + assert(nir_intrinsic_write_mask(store) == + nir_component_mask(store->num_components)); + store->num_components = num_components; + store->dest.ssa.num_components = num_components; + nir_intrinsic_set_write_mask(store, nir_component_mask(num_components)); /* Write out the final color instead of the input */ - nir_instr_rewrite_src_ssa(instr, &intr->src[1], blended); + nir_instr_rewrite_src_ssa(&store->instr, &store->src[1], blended); return true; } +static bool +nir_lower_blend_instr(nir_builder *b, nir_instr *instr, void *data) +{ + const nir_lower_blend_options *options = data; + + switch (instr->type) { + case nir_instr_type_deref: { + /* Fix up output deref types, as needed */ + nir_deref_instr *deref = nir_instr_as_deref(instr); + if (!nir_deref_mode_is(deref, nir_var_shader_out)) + return false; + + /* Indirects must be already lowered and output variables split */ + assert(deref->deref_type == nir_deref_type_var); + + if (deref->type == deref->var->type) + return false; + + deref->type = deref->var->type; + return true; + } + + case nir_instr_type_intrinsic: { + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + if (intrin->intrinsic != nir_intrinsic_load_deref && + intrin->intrinsic != nir_intrinsic_store_deref) + return false; + + nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); + if (!nir_deref_mode_is(deref, nir_var_shader_out)) + return false; + + assert(glsl_type_is_vector_or_scalar(deref->type)); + + if (intrin->intrinsic == nir_intrinsic_load_deref) { + /* We need to fix up framebuffer if num_components changed */ + const unsigned num_components = glsl_get_vector_elements(deref->type); + if (intrin->num_components == num_components) + return false; + + b->cursor = nir_after_instr(&intrin->instr); + + assert(intrin->dest.is_ssa); + nir_ssa_def *val = nir_resize_vector(b, &intrin->dest.ssa, + num_components); + intrin->num_components = num_components, + nir_ssa_def_rewrite_uses_after(&intrin->dest.ssa, val, + val->parent_instr); + return true; + } else { + return nir_lower_blend_store(b, intrin, options); + } + } + + default: + return false; + } +} + /** Lower blending to framebuffer fetch and some math * * This pass requires that indirects are lowered and output variables split @@ -435,6 +495,41 @@ nir_lower_blend(nir_shader *shader, const nir_lower_blend_options *options) { assert(shader->info.stage == MESA_SHADER_FRAGMENT); + /* Re-type any blended output variables to have the same number of + * components as the image format. The GL 4.6 Spec says: + * + * "If a fragment shader writes to none of gl_FragColor, gl_FragData, + * nor any user-defined output variables, the values of the fragment + * colors following shader execution are undefined, and may differ for + * each fragment color. If some, but not all elements of gl_FragData or + * of theser-defined output variables are written, the values of + * fragment colors corresponding to unwritten elements orariables are + * similarly undefined." + * + * Note the phrase "following shader execution". Those color values are + * then supposed to go into blending which may, depending on the blend + * mode, apply constraints that result in well-defined rendering. It's + * fine if we have to pad out a value with undef but we then need to blend + * that garbage value to ensure correct results. + * + * This may also, depending on output format, be a small optimization + * allowing NIR to dead-code unused calculations. + */ + nir_foreach_shader_out_variable(var, shader) { + int rt = color_index_for_var(var); + + /* No blend lowering requested on this RT */ + if (rt < 0 || options->format[rt] == PIPE_FORMAT_NONE) + continue; + + const unsigned num_format_components = + util_format_get_nr_components(options->format[rt]); + + /* Indirects must be already lowered and output variables split */ + assert(glsl_type_is_vector_or_scalar(var->type)); + var->type = glsl_replace_vector_type(var->type, num_format_components); + } + nir_shader_instructions_pass(shader, nir_lower_blend_instr, nir_metadata_block_index | nir_metadata_dominance, diff --git a/src/panfrost/ci/panfrost-g52-fails.txt b/src/panfrost/ci/panfrost-g52-fails.txt index af1c588..bfdec49 100644 --- a/src/panfrost/ci/panfrost-g52-fails.txt +++ b/src/panfrost/ci/panfrost-g52-fails.txt @@ -600,46 +600,3 @@ dEQP-VK.api.copy_and_blit.core.resolve_image.with_regions.4_bit,Crash dEQP-VK.rasterization.interpolation_multisample_4_bit.lines_wide,Fail dEQP-VK.rasterization.interpolation_multisample_4_bit.non_strict_lines_wide,Fail dEQP-VK.rasterization.interpolation_multisample_4_bit.triangles,Fail - -# The following tests started to crash after CI fix, see -# https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16325 -dEQP-VK.glsl.opaque_type_indexing.sampler.const_expression.fragment.sampler1darrayshadow,Crash -dEQP-VK.glsl.opaque_type_indexing.sampler.const_expression.fragment.sampler1dshadow,Crash -dEQP-VK.glsl.opaque_type_indexing.sampler.const_expression.fragment.sampler2darrayshadow,Crash -dEQP-VK.glsl.opaque_type_indexing.sampler.const_expression.fragment.sampler2dshadow,Crash -dEQP-VK.glsl.opaque_type_indexing.sampler.const_expression.fragment.samplercubeshadow,Crash -dEQP-VK.glsl.opaque_type_indexing.sampler.const_expression.vertex.sampler1darrayshadow,Crash -dEQP-VK.glsl.opaque_type_indexing.sampler.const_expression.vertex.sampler1dshadow,Crash -dEQP-VK.glsl.opaque_type_indexing.sampler.const_expression.vertex.sampler2darrayshadow,Crash -dEQP-VK.glsl.opaque_type_indexing.sampler.const_expression.vertex.sampler2dshadow,Crash -dEQP-VK.glsl.opaque_type_indexing.sampler.const_expression.vertex.samplercubeshadow,Crash -dEQP-VK.glsl.opaque_type_indexing.sampler.const_literal.fragment.sampler1darrayshadow,Crash -dEQP-VK.glsl.opaque_type_indexing.sampler.const_literal.fragment.sampler1dshadow,Crash -dEQP-VK.glsl.opaque_type_indexing.sampler.const_literal.fragment.sampler2darrayshadow,Crash -dEQP-VK.glsl.opaque_type_indexing.sampler.const_literal.fragment.sampler2dshadow,Crash -dEQP-VK.glsl.opaque_type_indexing.sampler.const_literal.fragment.samplercubeshadow,Crash -dEQP-VK.glsl.opaque_type_indexing.sampler.const_literal.vertex.sampler1darrayshadow,Crash -dEQP-VK.glsl.opaque_type_indexing.sampler.const_literal.vertex.sampler1dshadow,Crash -dEQP-VK.glsl.opaque_type_indexing.sampler.const_literal.vertex.sampler2darrayshadow,Crash -dEQP-VK.glsl.opaque_type_indexing.sampler.const_literal.vertex.sampler2dshadow,Crash -dEQP-VK.glsl.opaque_type_indexing.sampler.const_literal.vertex.samplercubeshadow,Crash -dEQP-VK.glsl.opaque_type_indexing.sampler.dynamically_uniform.fragment.sampler1darrayshadow,Crash -dEQP-VK.glsl.opaque_type_indexing.sampler.dynamically_uniform.fragment.sampler1dshadow,Crash -dEQP-VK.glsl.opaque_type_indexing.sampler.dynamically_uniform.fragment.sampler2darrayshadow,Crash -dEQP-VK.glsl.opaque_type_indexing.sampler.dynamically_uniform.fragment.sampler2dshadow,Crash -dEQP-VK.glsl.opaque_type_indexing.sampler.dynamically_uniform.fragment.samplercubeshadow,Crash -dEQP-VK.glsl.opaque_type_indexing.sampler.dynamically_uniform.vertex.sampler1darrayshadow,Crash -dEQP-VK.glsl.opaque_type_indexing.sampler.dynamically_uniform.vertex.sampler1dshadow,Crash -dEQP-VK.glsl.opaque_type_indexing.sampler.dynamically_uniform.vertex.sampler2darrayshadow,Crash -dEQP-VK.glsl.opaque_type_indexing.sampler.dynamically_uniform.vertex.sampler2dshadow,Crash -dEQP-VK.glsl.opaque_type_indexing.sampler.dynamically_uniform.vertex.samplercubeshadow,Crash -dEQP-VK.glsl.opaque_type_indexing.sampler.uniform.fragment.sampler1darrayshadow,Crash -dEQP-VK.glsl.opaque_type_indexing.sampler.uniform.fragment.sampler1dshadow,Crash -dEQP-VK.glsl.opaque_type_indexing.sampler.uniform.fragment.sampler2darrayshadow,Crash -dEQP-VK.glsl.opaque_type_indexing.sampler.uniform.fragment.sampler2dshadow,Crash -dEQP-VK.glsl.opaque_type_indexing.sampler.uniform.fragment.samplercubeshadow,Crash -dEQP-VK.glsl.opaque_type_indexing.sampler.uniform.vertex.sampler1darrayshadow,Crash -dEQP-VK.glsl.opaque_type_indexing.sampler.uniform.vertex.sampler1dshadow,Crash -dEQP-VK.glsl.opaque_type_indexing.sampler.uniform.vertex.sampler2darrayshadow,Crash -dEQP-VK.glsl.opaque_type_indexing.sampler.uniform.vertex.sampler2dshadow,Crash -dEQP-VK.glsl.opaque_type_indexing.sampler.uniform.vertex.samplercubeshadow,Crash -- 2.7.4