nir: Introduce workgroup_index and ability to lower workgroup_id to it.

author Timur Kristóf <timur.kristof@gmail.com>

Thu, 24 Feb 2022 09:27:30 +0000 (10:27 +0100)

committer Marge Bot <emma+marge@anholt.net>

Tue, 8 Mar 2022 17:36:31 +0000 (17:36 +0000)
author Timur Kristóf <timur.kristof@gmail.com>
Thu, 24 Feb 2022 09:27:30 +0000 (10:27 +0100)
committer Marge Bot <emma+marge@anholt.net>
Tue, 8 Mar 2022 17:36:31 +0000 (17:36 +0000)
diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c

index 3f17d11..6b2996f 100644 (file)
--- a/src/compiler/nir/nir.c
+++ b/src/compiler/nir/nir.c
@@ -2419,6 +2419,8 @@ nir_intrinsic_from_system_value(gl_system_value val)
        return nir_intrinsic_load_local_invocation_index;
     case SYSTEM_VALUE_WORKGROUP_ID:
        return nir_intrinsic_load_workgroup_id;
+   case SYSTEM_VALUE_WORKGROUP_INDEX:
+      return nir_intrinsic_load_workgroup_index;
     case SYSTEM_VALUE_NUM_WORKGROUPS:
        return nir_intrinsic_load_num_workgroups;
     case SYSTEM_VALUE_PRIMITIVE_ID:
@@ -2556,6 +2558,8 @@ nir_system_value_from_intrinsic(nir_intrinsic_op intrin)
        return SYSTEM_VALUE_NUM_WORKGROUPS;
     case nir_intrinsic_load_workgroup_id:
        return SYSTEM_VALUE_WORKGROUP_ID;
+   case nir_intrinsic_load_workgroup_index:
+      return SYSTEM_VALUE_WORKGROUP_INDEX;
     case nir_intrinsic_load_primitive_id:
        return SYSTEM_VALUE_PRIMITIVE_ID;
     case nir_intrinsic_load_tess_coord:
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h

index a1904e7..c4bd000 100644 (file)
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -4764,6 +4764,7 @@ typedef struct nir_lower_compute_system_values_options {
     bool shuffle_local_ids_for_quad_derivatives:1;
     bool lower_local_invocation_index:1;
     bool lower_cs_local_id_to_index:1;
+   bool lower_workgroup_id_to_index:1;
  } nir_lower_compute_system_values_options;
  
  bool nir_lower_compute_system_values(nir_shader *shader,
diff --git a/src/compiler/nir/nir_divergence_analysis.c b/src/compiler/nir/nir_divergence_analysis.c

index a67af16..9767dcc 100644 (file)
--- a/src/compiler/nir/nir_divergence_analysis.c
+++ b/src/compiler/nir/nir_divergence_analysis.c
@@ -264,6 +264,7 @@ visit_intrinsic(nir_shader *shader, nir_intrinsic_instr *instr)
           assert(stage == MESA_SHADER_TESS_CTRL);
        break;
  
+   case nir_intrinsic_load_workgroup_index:
     case nir_intrinsic_load_workgroup_id:
        assert(gl_shader_stage_uses_workgroup(stage));
        if (stage == MESA_SHADER_COMPUTE)
diff --git a/src/compiler/nir/nir_gather_info.c b/src/compiler/nir/nir_gather_info.c

index 85956f0..7a366ad 100644 (file)
--- a/src/compiler/nir/nir_gather_info.c
+++ b/src/compiler/nir/nir_gather_info.c
@@ -640,6 +640,7 @@ gather_intrinsic_info(nir_intrinsic_instr *instr, nir_shader *shader,
     case nir_intrinsic_load_base_global_invocation_id:
     case nir_intrinsic_load_global_invocation_index:
     case nir_intrinsic_load_workgroup_id:
+   case nir_intrinsic_load_workgroup_index:
     case nir_intrinsic_load_num_workgroups:
     case nir_intrinsic_load_workgroup_size:
     case nir_intrinsic_load_work_dim:
diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py

index ec81b19..d256524 100644 (file)
--- a/src/compiler/nir/nir_intrinsics.py
+++ b/src/compiler/nir/nir_intrinsics.py
@@ -772,6 +772,9 @@ system_value("local_invocation_index", 1)
  # non-zero_base indicates the base is included
  system_value("workgroup_id", 3, bit_sizes=[32, 64])
  system_value("workgroup_id_zero_base", 3)
+# The workgroup_index is intended for situations when a 3 dimensional
+# workgroup_id is not available on the HW, but a 1 dimensional index is.
+system_value("workgroup_index", 1)
  system_value("base_workgroup_id", 3, bit_sizes=[32, 64])
  system_value("user_clip_plane", 4, indices=[UCP_ID])
  system_value("num_workgroups", 3, bit_sizes=[32, 64])
diff --git a/src/compiler/nir/nir_lower_system_values.c b/src/compiler/nir/nir_lower_system_values.c

index 641da27..e469896 100644 (file)
--- a/src/compiler/nir/nir_lower_system_values.c
+++ b/src/compiler/nir/nir_lower_system_values.c
@@ -266,6 +266,35 @@ nir_lower_system_values(nir_shader *shader)
  }
  
  static nir_ssa_def *
+lower_id_to_index_no_umod(nir_builder *b, nir_ssa_def *index,
+                          nir_ssa_def *size, unsigned bit_size)
+{
+   /* We lower ID to Index with the following formula:
+    *
+    *    id.z = index / (size.x * size.y)
+    *    id.y = (index - (id.z * (size.x * size.y))) / size.x
+    *    id.x = index - ((id.z * (size.x * size.y)) + (id.y * size.x))
+    *
+    * This is more efficient on HW that doesn't have a
+    * modulo division instruction and when the size is either
+    * not compile time known or not a power of two.
+    */
+
+   nir_ssa_def *size_x = nir_channel(b, size, 0);
+   nir_ssa_def *size_y = nir_channel(b, size, 1);
+   nir_ssa_def *size_x_y = nir_imul(b, size_x, size_y);
+
+   nir_ssa_def *id_z = nir_udiv(b, index, size_x_y);
+   nir_ssa_def *z_portion = nir_imul(b, id_z, size_x_y);
+   nir_ssa_def *id_y = nir_udiv(b, nir_isub(b, index, z_portion), size_x);
+   nir_ssa_def *y_portion = nir_imul(b, id_y, size_x);
+   nir_ssa_def *id_x = nir_isub(b, index, nir_iadd(b, z_portion, y_portion));
+
+   return nir_u2u(b, nir_vec3(b, id_x, id_y, id_z), bit_size);
+}
+
+
+static nir_ssa_def *
  lower_id_to_index(nir_builder *b, nir_ssa_def *index, nir_ssa_def *size,
                    unsigned bit_size)
  {
@@ -520,8 +549,13 @@ lower_compute_system_value_instr(nir_builder *b,
        if (options && options->has_base_workgroup_id)
           return nir_iadd(b, nir_u2u(b, nir_load_workgroup_id_zero_base(b), bit_size),
                              nir_load_base_workgroup_id(b, bit_size));
-      else
-         return NULL;
+      else if (options && options->lower_workgroup_id_to_index)
+         return lower_id_to_index_no_umod(b, nir_load_workgroup_index(b),
+                                          nir_load_num_workgroups(b, bit_size),
+                                          bit_size);
+
+      return NULL;
+
     }
  
     default:
diff --git a/src/compiler/shader_enums.h b/src/compiler/shader_enums.h

index 270bd77..e07761b 100644 (file)
--- a/src/compiler/shader_enums.h
+++ b/src/compiler/shader_enums.h
@@ -766,6 +766,7 @@ typedef enum
     SYSTEM_VALUE_BASE_GLOBAL_INVOCATION_ID,
     SYSTEM_VALUE_GLOBAL_INVOCATION_INDEX,
     SYSTEM_VALUE_WORKGROUP_ID,
+   SYSTEM_VALUE_WORKGROUP_INDEX,
     SYSTEM_VALUE_NUM_WORKGROUPS,
     SYSTEM_VALUE_WORKGROUP_SIZE,
     SYSTEM_VALUE_GLOBAL_GROUP_SIZE,
author	Timur Kristóf <timur.kristof@gmail.com>
	Thu, 24 Feb 2022 09:27:30 +0000 (10:27 +0100)
committer	Marge Bot <emma+marge@anholt.net>
	Tue, 8 Mar 2022 17:36:31 +0000 (17:36 +0000)
src/compiler/nir/nir.c		patch \| blob \| history
src/compiler/nir/nir.h		patch \| blob \| history
src/compiler/nir/nir_divergence_analysis.c		patch \| blob \| history
src/compiler/nir/nir_gather_info.c		patch \| blob \| history
src/compiler/nir/nir_intrinsics.py		patch \| blob \| history
src/compiler/nir/nir_lower_system_values.c		patch \| blob \| history
src/compiler/shader_enums.h		patch \| blob \| history