intel/fs: prevent large vector ops generated by peephole_ffma
authorLionel Landwerlin <lionel.g.landwerlin@intel.com>
Thu, 9 Mar 2023 10:12:54 +0000 (12:12 +0200)
committerMarge Bot <emma+marge@anholt.net>
Tue, 14 Mar 2023 10:38:50 +0000 (10:38 +0000)
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21782>

src/intel/compiler/brw_nir.c

index b340c09..6c9feaf 100644 (file)
@@ -1458,8 +1458,19 @@ brw_postprocess_nir(nir_shader *nir, const struct brw_compiler *compiler,
       brw_nir_optimize(nir, compiler, is_scalar);
 
    if (devinfo->ver >= 6) {
-      /* Try and fuse multiply-adds */
-      OPT(brw_nir_opt_peephole_ffma);
+      /* Try and fuse multiply-adds, if successful, run shrink_vectors to
+       * avoid peephole_ffma to generate things like this :
+       *    vec16 ssa_0 = ...
+       *    vec16 ssa_1 = fneg ssa_0
+       *    vec1  ssa_2 = ffma ssa_1, ...
+       *
+       * We want this instead :
+       *    vec16 ssa_0 = ...
+       *    vec1  ssa_1 = fneg ssa_0.x
+       *    vec1  ssa_2 = ffma ssa_1, ...
+       */
+      if (OPT(brw_nir_opt_peephole_ffma))
+         OPT(nir_opt_shrink_vectors);
    }
 
    if (is_scalar)