intel/compiler/mesh: align payload size to the size of vec4
authorMarcin Ślusarz <marcin.slusarz@intel.com>
Mon, 5 Dec 2022 11:27:38 +0000 (12:27 +0100)
committerMarge Bot <emma+marge@anholt.net>
Tue, 6 Dec 2022 16:31:11 +0000 (16:31 +0000)
This reduces the number of instructions in task shaders when payload
size is not aligned to vec4 and payload_in_shared WA is enabled,
because nir_lower_task_shader will not need to handle the unaligned
size case.

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20080>

src/intel/compiler/brw_mesh.cpp

index 3d3a36411afdaf2338085e21568506dd210e0b3e..4843f94f278a35d2649acd378cf2f15168984e7c 100644 (file)
@@ -238,6 +238,37 @@ brw_nir_adjust_payload(nir_shader *shader, const struct brw_compiler *compiler)
       NIR_PASS(_, shader, nir_opt_constant_folding);
 }
 
+static bool
+brw_nir_align_launch_mesh_workgroups_instr(nir_builder *b, nir_instr *instr, void *data)
+{
+   if (instr->type != nir_instr_type_intrinsic)
+      return false;
+
+   nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+
+   if (intrin->intrinsic != nir_intrinsic_launch_mesh_workgroups)
+      return false;
+
+   /* nir_lower_task_shader uses "range" as task payload size. */
+   unsigned range = nir_intrinsic_range(intrin);
+   /* This will avoid special case in nir_lower_task_shader dealing with
+    * not vec4-aligned payload when payload_in_shared workaround is enabled.
+    */
+   nir_intrinsic_set_range(intrin, ALIGN(range, 16));
+
+   return true;
+}
+
+static bool
+brw_nir_align_launch_mesh_workgroups(nir_shader *nir)
+{
+   return nir_shader_instructions_pass(nir,
+                                       brw_nir_align_launch_mesh_workgroups_instr,
+                                       nir_metadata_block_index |
+                                       nir_metadata_dominance,
+                                       NULL);
+}
+
 const unsigned *
 brw_compile_task(const struct brw_compiler *compiler,
                  void *mem_ctx,
@@ -250,6 +281,8 @@ brw_compile_task(const struct brw_compiler *compiler,
 
    brw_nir_lower_tue_outputs(nir, &prog_data->map);
 
+   NIR_PASS(_, nir, brw_nir_align_launch_mesh_workgroups);
+
    nir_lower_task_shader_options lower_ts_opt = {
       .payload_to_shared_for_atomics = true,
       .payload_to_shared_for_small_types = true,