radeonsi: vectorize IO for better ALU vectorization
authorMarek Olšák <marek.olsak@amd.com>
Fri, 24 Jul 2020 20:16:46 +0000 (16:16 -0400)
committerMarge Bot <eric+marge@anholt.net>
Tue, 22 Sep 2020 02:44:53 +0000 (02:44 +0000)
Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6622>

src/gallium/drivers/radeonsi/si_shader_nir.c

index 8ed4044..043482d 100644 (file)
@@ -627,6 +627,30 @@ static void si_lower_io(struct nir_shader *nir)
       NIR_PASS_V(nir, nir_lower_global_vars_to_local);
    }
 
+   /* The vectorization must be done after nir_lower_io_to_temporaries, because
+    * nir_lower_io_to_temporaries after vectorization breaks:
+    *    piglit/bin/arb_gpu_shader5-interpolateAtOffset -auto -fbo
+    * TODO: It's probably a bug in nir_lower_io_to_temporaries.
+    *
+    * The vectorizer can only vectorize this:
+    *    op src0.x, src1.x
+    *    op src0.y, src1.y
+    *
+    * So it requires that inputs are already vectors and it must be the same
+    * vector between instructions. The vectorizer doesn't create vectors
+    * from independent scalar sources, so vectorize inputs.
+    *
+    * TODO: The pass fails this for VS: assert(b.shader->info.stage != MESA_SHADER_VERTEX);
+    */
+   if (nir->info.stage != MESA_SHADER_VERTEX)
+      NIR_PASS_V(nir, nir_lower_io_to_vector, nir_var_shader_in);
+
+   /* Vectorize outputs, so that we don't split vectors before storing outputs. */
+   /* TODO: The pass fails an assertion for other shader stages. */
+   if (nir->info.stage == MESA_SHADER_TESS_CTRL ||
+       nir->info.stage == MESA_SHADER_FRAGMENT)
+      NIR_PASS_V(nir, nir_lower_io_to_vector, nir_var_shader_out);
+
    if (nir->info.stage == MESA_SHADER_FRAGMENT)
       si_nir_lower_color(nir);