From 4b99b528f502f09bff8a9032f79dcd152485b725 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Timur=20Krist=C3=B3f?= Date: Thu, 24 Feb 2022 10:27:30 +0100 Subject: [PATCH] nir: Introduce workgroup_index and ability to lower workgroup_id to it. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The workgroup_index is intended for situations when a 3 dimensional workgroup_id is not available on the HW, but a 1 dimensional index is. In this case, we can use lower the 3D ID to use this. Signed-off-by: Timur Kristóf Reviewed-by: Daniel Schürmann Part-of: --- src/compiler/nir/nir.c | 4 ++++ src/compiler/nir/nir.h | 1 + src/compiler/nir/nir_divergence_analysis.c | 1 + src/compiler/nir/nir_gather_info.c | 1 + src/compiler/nir/nir_intrinsics.py | 3 +++ src/compiler/nir/nir_lower_system_values.c | 38 ++++++++++++++++++++++++++++-- src/compiler/shader_enums.h | 1 + 7 files changed, 47 insertions(+), 2 deletions(-) diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c index 3f17d11..6b2996f 100644 --- a/src/compiler/nir/nir.c +++ b/src/compiler/nir/nir.c @@ -2419,6 +2419,8 @@ nir_intrinsic_from_system_value(gl_system_value val) return nir_intrinsic_load_local_invocation_index; case SYSTEM_VALUE_WORKGROUP_ID: return nir_intrinsic_load_workgroup_id; + case SYSTEM_VALUE_WORKGROUP_INDEX: + return nir_intrinsic_load_workgroup_index; case SYSTEM_VALUE_NUM_WORKGROUPS: return nir_intrinsic_load_num_workgroups; case SYSTEM_VALUE_PRIMITIVE_ID: @@ -2556,6 +2558,8 @@ nir_system_value_from_intrinsic(nir_intrinsic_op intrin) return SYSTEM_VALUE_NUM_WORKGROUPS; case nir_intrinsic_load_workgroup_id: return SYSTEM_VALUE_WORKGROUP_ID; + case nir_intrinsic_load_workgroup_index: + return SYSTEM_VALUE_WORKGROUP_INDEX; case nir_intrinsic_load_primitive_id: return SYSTEM_VALUE_PRIMITIVE_ID; case nir_intrinsic_load_tess_coord: diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index a1904e7..c4bd000 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -4764,6 +4764,7 @@ typedef struct nir_lower_compute_system_values_options { bool shuffle_local_ids_for_quad_derivatives:1; bool lower_local_invocation_index:1; bool lower_cs_local_id_to_index:1; + bool lower_workgroup_id_to_index:1; } nir_lower_compute_system_values_options; bool nir_lower_compute_system_values(nir_shader *shader, diff --git a/src/compiler/nir/nir_divergence_analysis.c b/src/compiler/nir/nir_divergence_analysis.c index a67af16..9767dcc 100644 --- a/src/compiler/nir/nir_divergence_analysis.c +++ b/src/compiler/nir/nir_divergence_analysis.c @@ -264,6 +264,7 @@ visit_intrinsic(nir_shader *shader, nir_intrinsic_instr *instr) assert(stage == MESA_SHADER_TESS_CTRL); break; + case nir_intrinsic_load_workgroup_index: case nir_intrinsic_load_workgroup_id: assert(gl_shader_stage_uses_workgroup(stage)); if (stage == MESA_SHADER_COMPUTE) diff --git a/src/compiler/nir/nir_gather_info.c b/src/compiler/nir/nir_gather_info.c index 85956f0..7a366ad 100644 --- a/src/compiler/nir/nir_gather_info.c +++ b/src/compiler/nir/nir_gather_info.c @@ -640,6 +640,7 @@ gather_intrinsic_info(nir_intrinsic_instr *instr, nir_shader *shader, case nir_intrinsic_load_base_global_invocation_id: case nir_intrinsic_load_global_invocation_index: case nir_intrinsic_load_workgroup_id: + case nir_intrinsic_load_workgroup_index: case nir_intrinsic_load_num_workgroups: case nir_intrinsic_load_workgroup_size: case nir_intrinsic_load_work_dim: diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index ec81b19..d256524 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -772,6 +772,9 @@ system_value("local_invocation_index", 1) # non-zero_base indicates the base is included system_value("workgroup_id", 3, bit_sizes=[32, 64]) system_value("workgroup_id_zero_base", 3) +# The workgroup_index is intended for situations when a 3 dimensional +# workgroup_id is not available on the HW, but a 1 dimensional index is. +system_value("workgroup_index", 1) system_value("base_workgroup_id", 3, bit_sizes=[32, 64]) system_value("user_clip_plane", 4, indices=[UCP_ID]) system_value("num_workgroups", 3, bit_sizes=[32, 64]) diff --git a/src/compiler/nir/nir_lower_system_values.c b/src/compiler/nir/nir_lower_system_values.c index 641da27..e469896 100644 --- a/src/compiler/nir/nir_lower_system_values.c +++ b/src/compiler/nir/nir_lower_system_values.c @@ -266,6 +266,35 @@ nir_lower_system_values(nir_shader *shader) } static nir_ssa_def * +lower_id_to_index_no_umod(nir_builder *b, nir_ssa_def *index, + nir_ssa_def *size, unsigned bit_size) +{ + /* We lower ID to Index with the following formula: + * + * id.z = index / (size.x * size.y) + * id.y = (index - (id.z * (size.x * size.y))) / size.x + * id.x = index - ((id.z * (size.x * size.y)) + (id.y * size.x)) + * + * This is more efficient on HW that doesn't have a + * modulo division instruction and when the size is either + * not compile time known or not a power of two. + */ + + nir_ssa_def *size_x = nir_channel(b, size, 0); + nir_ssa_def *size_y = nir_channel(b, size, 1); + nir_ssa_def *size_x_y = nir_imul(b, size_x, size_y); + + nir_ssa_def *id_z = nir_udiv(b, index, size_x_y); + nir_ssa_def *z_portion = nir_imul(b, id_z, size_x_y); + nir_ssa_def *id_y = nir_udiv(b, nir_isub(b, index, z_portion), size_x); + nir_ssa_def *y_portion = nir_imul(b, id_y, size_x); + nir_ssa_def *id_x = nir_isub(b, index, nir_iadd(b, z_portion, y_portion)); + + return nir_u2u(b, nir_vec3(b, id_x, id_y, id_z), bit_size); +} + + +static nir_ssa_def * lower_id_to_index(nir_builder *b, nir_ssa_def *index, nir_ssa_def *size, unsigned bit_size) { @@ -520,8 +549,13 @@ lower_compute_system_value_instr(nir_builder *b, if (options && options->has_base_workgroup_id) return nir_iadd(b, nir_u2u(b, nir_load_workgroup_id_zero_base(b), bit_size), nir_load_base_workgroup_id(b, bit_size)); - else - return NULL; + else if (options && options->lower_workgroup_id_to_index) + return lower_id_to_index_no_umod(b, nir_load_workgroup_index(b), + nir_load_num_workgroups(b, bit_size), + bit_size); + + return NULL; + } default: diff --git a/src/compiler/shader_enums.h b/src/compiler/shader_enums.h index 270bd77..e07761b 100644 --- a/src/compiler/shader_enums.h +++ b/src/compiler/shader_enums.h @@ -766,6 +766,7 @@ typedef enum SYSTEM_VALUE_BASE_GLOBAL_INVOCATION_ID, SYSTEM_VALUE_GLOBAL_INVOCATION_INDEX, SYSTEM_VALUE_WORKGROUP_ID, + SYSTEM_VALUE_WORKGROUP_INDEX, SYSTEM_VALUE_NUM_WORKGROUPS, SYSTEM_VALUE_WORKGROUP_SIZE, SYSTEM_VALUE_GLOBAL_GROUP_SIZE, -- 2.7.4