From 74a40cc4b6ed9440a0820c6f4a9cee296a8e191a Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Thu, 24 Aug 2023 01:23:00 +0300 Subject: [PATCH] intel/fs: move lower of non-uniform at_sample barycentric to NIR We use a non-uniform lowering loop in the backend which we can do better in NIR because we can also use divergence analysis there. This change also limits VGRF usage to a single VGRF to hold the sample ID in the backend. Signed-off-by: Lionel Landwerlin Reviewed-by: Emma Anholt Part-of: --- src/intel/compiler/brw_fs_nir.cpp | 67 ++++--------------- src/intel/compiler/brw_nir.c | 4 ++ src/intel/compiler/brw_nir.h | 2 + ...w_nir_lower_non_uniform_barycentric_at_sample.c | 78 ++++++++++++++++++++++ src/intel/compiler/meson.build | 1 + 5 files changed, 97 insertions(+), 55 deletions(-) create mode 100644 src/intel/compiler/brw_nir_lower_non_uniform_barycentric_at_sample.c diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 3d3a6b6..058e184 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -3536,66 +3536,23 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld, const glsl_interp_mode interpolation = (enum glsl_interp_mode) nir_intrinsic_interp_mode(instr); + fs_reg msg_data; if (nir_src_is_const(instr->src[0])) { - unsigned msg_data = nir_src_as_uint(instr->src[0]) << 4; - - emit_pixel_interpolater_send(bld, - FS_OPCODE_INTERPOLATE_AT_SAMPLE, - dest, - fs_reg(), /* src */ - brw_imm_ud(msg_data), - interpolation); + msg_data = brw_imm_ud(nir_src_as_uint(instr->src[0]) << 4); } else { const fs_reg sample_src = retype(get_nir_src(instr->src[0]), BRW_REGISTER_TYPE_UD); - - if (nir_src_is_always_uniform(instr->src[0])) { - const fs_reg sample_id = bld.emit_uniformize(sample_src); - const fs_reg msg_data = vgrf(glsl_type::uint_type); - bld.exec_all().group(1, 0) - .SHL(msg_data, sample_id, brw_imm_ud(4u)); - emit_pixel_interpolater_send(bld, - FS_OPCODE_INTERPOLATE_AT_SAMPLE, - dest, - fs_reg(), /* src */ - component(msg_data, 0), - interpolation); - } else { - /* Make a loop that sends a message to the pixel interpolater - * for the sample number in each live channel. If there are - * multiple channels with the same sample number then these - * will be handled simultaneously with a single iteration of - * the loop. - */ - bld.emit(BRW_OPCODE_DO); - - /* Get the next live sample number into sample_id_reg */ - const fs_reg sample_id = bld.emit_uniformize(sample_src); - - /* Set the flag register so that we can perform the send - * message on all channels that have the same sample number - */ - bld.CMP(bld.null_reg_ud(), - sample_src, sample_id, - BRW_CONDITIONAL_EQ); - const fs_reg msg_data = vgrf(glsl_type::uint_type); - bld.exec_all().group(1, 0) - .SHL(msg_data, sample_id, brw_imm_ud(4u)); - fs_inst *inst = - emit_pixel_interpolater_send(bld, - FS_OPCODE_INTERPOLATE_AT_SAMPLE, - dest, - fs_reg(), /* src */ - component(msg_data, 0), - interpolation); - set_predicate(BRW_PREDICATE_NORMAL, inst); - - /* Continue the loop if there are any live channels left */ - set_predicate_inv(BRW_PREDICATE_NORMAL, - true, /* inverse */ - bld.emit(BRW_OPCODE_WHILE)); - } + const fs_reg sample_id = bld.emit_uniformize(sample_src); + msg_data = component(bld.group(8, 0).vgrf(BRW_REGISTER_TYPE_UD), 0); + bld.exec_all().group(1, 0).SHL(msg_data, sample_id, brw_imm_ud(4u)); } + + emit_pixel_interpolater_send(bld, + FS_OPCODE_INTERPOLATE_AT_SAMPLE, + dest, + fs_reg(), /* src */ + msg_data, + interpolation); break; } diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c index 7cbac9c..27a9d74 100644 --- a/src/intel/compiler/brw_nir.c +++ b/src/intel/compiler/brw_nir.c @@ -1685,6 +1685,10 @@ brw_postprocess_nir(nir_shader *nir, const struct brw_compiler *compiler, brw_nir_optimize(nir, compiler); } + /* Do this only after the last opt_gcm. GCM will undo this lowering. */ + if (nir->info.stage == MESA_SHADER_FRAGMENT) + OPT(brw_nir_lower_non_uniform_barycentric_at_sample); + /* Clean up LCSSA phis */ OPT(nir_opt_remove_phis); diff --git a/src/intel/compiler/brw_nir.h b/src/intel/compiler/brw_nir.h index 8d0b786..505e147 100644 --- a/src/intel/compiler/brw_nir.h +++ b/src/intel/compiler/brw_nir.h @@ -229,6 +229,8 @@ bool brw_nir_limit_trig_input_range_workaround(nir_shader *nir); void brw_nir_apply_tcs_quads_workaround(nir_shader *nir); +bool brw_nir_lower_non_uniform_barycentric_at_sample(nir_shader *nir); + void brw_nir_apply_key(nir_shader *nir, const struct brw_compiler *compiler, const struct brw_base_prog_key *key, diff --git a/src/intel/compiler/brw_nir_lower_non_uniform_barycentric_at_sample.c b/src/intel/compiler/brw_nir_lower_non_uniform_barycentric_at_sample.c new file mode 100644 index 0000000..700d54f --- /dev/null +++ b/src/intel/compiler/brw_nir_lower_non_uniform_barycentric_at_sample.c @@ -0,0 +1,78 @@ +/* + * Copyright © 2023 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/* + * Lower non uniform at sample messages to the interpolator. + * + * This is pretty much identical to what nir_lower_non_uniform_access() does. + * We do it here because otherwise GCM would undo this optimization. Also we + * can assume divergence analysis here. + */ + +#include "brw_nir.h" +#include "compiler/nir/nir_builder.h" + +static bool +brw_nir_lower_non_uniform_barycentric_at_sample_instr(nir_builder *b, + nir_instr *instr, + void *cb_data) +{ + if (instr->type != nir_instr_type_intrinsic) + return false; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + if (intrin->intrinsic != nir_intrinsic_load_barycentric_at_sample) + return false; + + if (nir_src_is_always_uniform(intrin->src[0]) || + !nir_src_is_divergent(intrin->src[0])) + return false; + + nir_def *sample_id = intrin->src[0].ssa; + + b->cursor = nir_instr_remove(&intrin->instr); + + nir_push_loop(b); + + nir_def *first_sample_id = nir_read_first_invocation(b, sample_id); + + nir_push_if(b, nir_ieq(b, sample_id, first_sample_id)); + + nir_builder_instr_insert(b, &intrin->instr); + + nir_src_rewrite(&intrin->src[0], first_sample_id); + + nir_jump(b, nir_jump_break); + + return true; +} + +bool +brw_nir_lower_non_uniform_barycentric_at_sample(nir_shader *nir) +{ + return nir_shader_instructions_pass( + nir, + brw_nir_lower_non_uniform_barycentric_at_sample_instr, + nir_metadata_none, + NULL); +} diff --git a/src/intel/compiler/meson.build b/src/intel/compiler/meson.build index 5d42d7f..6898bfa 100644 --- a/src/intel/compiler/meson.build +++ b/src/intel/compiler/meson.build @@ -92,6 +92,7 @@ libintel_compiler_files = files( 'brw_nir_lower_alpha_to_coverage.c', 'brw_nir_lower_intersection_shader.c', 'brw_nir_lower_non_uniform_resource_intel.c', + 'brw_nir_lower_non_uniform_barycentric_at_sample.c', 'brw_nir_lower_ray_queries.c', 'brw_nir_lower_rt_intrinsics.c', 'brw_nir_lower_shader_calls.c', -- 2.7.4