From 56474fae937e5cd75ed26f3ea352e7347191416d Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Fri, 10 Mar 2023 22:57:36 +0200 Subject: [PATCH] intel/fs: fix subgroup invocation read bounds checking MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit nir->info.subgroup_size can be set to an enum : SUBGROUP_SIZE_VARYING = 0 SUBGROUP_SIZE_UNIFORM = 1 SUBGROUP_SIZE_API_CONSTANT = 2 SUBGROUP_SIZE_FULL_SUBGROUPS = 3 So compute the API subgroup size value and compare it to the dispatch size to determine whether we need some bound checking. Signed-off-by: Lionel Landwerlin Fixes: 9ac192d79d ("intel/fs: bound subgroup invocation read to dispatch size") Reviewed-by: Marcin Ślusarz Part-of: --- src/intel/compiler/brw_fs.h | 3 +++ src/intel/compiler/brw_fs_nir.cpp | 3 ++- src/intel/compiler/brw_fs_visitor.cpp | 11 +++++++++++ src/intel/compiler/brw_nir.c | 7 +++++++ src/intel/compiler/brw_nir.h | 3 +++ 5 files changed, 26 insertions(+), 1 deletion(-) diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index 8fb3c3b..1b7b243 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -535,6 +535,9 @@ public: const unsigned dispatch_width; /**< 8, 16 or 32 */ unsigned max_dispatch_width; + /* The API selected subgroup size */ + unsigned api_subgroup_size; /**< 0, 8, 16, 32 */ + struct shader_stats shader_stats; brw::fs_builder bld; diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index a443bcb..4709365 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -5326,7 +5326,8 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr * FS), bound the invocation to the dispatch size. */ fs_reg bound_invocation; - if (bld.dispatch_width() < bld.shader->nir->info.subgroup_size) { + if (api_subgroup_size == 0 || + bld.dispatch_width() < api_subgroup_size) { bound_invocation = bld.vgrf(BRW_REGISTER_TYPE_UD); bld.AND(bound_invocation, invocation, brw_imm_ud(dispatch_width - 1)); } else { diff --git a/src/intel/compiler/brw_fs_visitor.cpp b/src/intel/compiler/brw_fs_visitor.cpp index d622966..79865657 100644 --- a/src/intel/compiler/brw_fs_visitor.cpp +++ b/src/intel/compiler/brw_fs_visitor.cpp @@ -29,6 +29,7 @@ */ #include "brw_eu.h" #include "brw_fs.h" +#include "brw_nir.h" #include "compiler/glsl_types.h" using namespace brw; @@ -1362,9 +1363,14 @@ fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data, performance_analysis(this), needs_register_pressure(needs_register_pressure), dispatch_width(dispatch_width), + api_subgroup_size(brw_nir_api_subgroup_size(shader, dispatch_width)), bld(fs_builder(this, dispatch_width).at_end()) { init(); + assert(api_subgroup_size == 0 || + api_subgroup_size == 8 || + api_subgroup_size == 16 || + api_subgroup_size == 32); } fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data, @@ -1382,9 +1388,14 @@ fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data, performance_analysis(this), needs_register_pressure(needs_register_pressure), dispatch_width(8), + api_subgroup_size(brw_nir_api_subgroup_size(shader, dispatch_width)), bld(fs_builder(this, dispatch_width).at_end()) { init(); + assert(api_subgroup_size == 0 || + api_subgroup_size == 8 || + api_subgroup_size == 16 || + api_subgroup_size == 32); } void diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c index 6c9feaf..8df2c0e 100644 --- a/src/intel/compiler/brw_nir.c +++ b/src/intel/compiler/brw_nir.c @@ -1689,6 +1689,13 @@ get_subgroup_size(const struct shader_info *info, unsigned max_subgroup_size) unreachable("Invalid subgroup size type"); } +unsigned +brw_nir_api_subgroup_size(const nir_shader *nir, + unsigned hw_subgroup_size) +{ + return get_subgroup_size(&nir->info, hw_subgroup_size); +} + void brw_nir_apply_key(nir_shader *nir, const struct brw_compiler *compiler, diff --git a/src/intel/compiler/brw_nir.h b/src/intel/compiler/brw_nir.h index 269731a..423b558 100644 --- a/src/intel/compiler/brw_nir.h +++ b/src/intel/compiler/brw_nir.h @@ -160,6 +160,9 @@ void brw_nir_apply_key(nir_shader *nir, unsigned max_subgroup_size, bool is_scalar); +unsigned brw_nir_api_subgroup_size(const nir_shader *nir, + unsigned hw_subgroup_size); + enum brw_conditional_mod brw_cmod_for_nir_comparison(nir_op op); enum lsc_opcode lsc_aop_for_nir_intrinsic(const nir_intrinsic_instr *atomic); enum brw_reg_type brw_type_for_nir_type(const struct intel_device_info *devinfo, -- 2.7.4