__builtin_shuffle sometimes should produce zip1 rather than TBL (PR82199)

author Dmitrij Pochepko <dmitrij.pochepko@bell-sw.com>

Fri, 17 Jul 2020 09:20:12 +0000 (10:20 +0100)

committer Richard Sandiford <richard.sandiford@arm.com>

Fri, 17 Jul 2020 09:20:12 +0000 (10:20 +0100)
author Dmitrij Pochepko <dmitrij.pochepko@bell-sw.com>
Fri, 17 Jul 2020 09:20:12 +0000 (10:20 +0100)
committer Richard Sandiford <richard.sandiford@arm.com>
Fri, 17 Jul 2020 09:20:12 +0000 (10:20 +0100)
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c

index 6ef2e39..110f49d 100644 (file)
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -20020,6 +20020,8 @@ struct expand_vec_perm_d
    bool testing_p;
  };
  
+static bool aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d);
+
  /* Generate a variable permutation.  */
  
  static void
@@ -20205,6 +20207,59 @@ aarch64_evpc_trn (struct expand_vec_perm_d *d)
    return true;
  }
  
+/* Try to re-encode the PERM constant so it combines odd and even elements.
+   This rewrites constants such as {0, 1, 4, 5}/V4SF to {0, 2}/V2DI.
+   We retry with this new constant with the full suite of patterns.  */
+static bool
+aarch64_evpc_reencode (struct expand_vec_perm_d *d)
+{
+  expand_vec_perm_d newd;
+  unsigned HOST_WIDE_INT nelt;
+
+  if (d->vec_flags != VEC_ADVSIMD)
+    return false;
+
+  /* Get the new mode.  Always twice the size of the inner
+     and half the elements.  */
+  poly_uint64 vec_bits = GET_MODE_BITSIZE (d->vmode);
+  unsigned int new_elt_bits = GET_MODE_UNIT_BITSIZE (d->vmode) * 2;
+  auto new_elt_mode = int_mode_for_size (new_elt_bits, false).require ();
+  machine_mode new_mode = aarch64_simd_container_mode (new_elt_mode, vec_bits);
+
+  if (new_mode == word_mode)
+    return false;
+
+  /* to_constant is safe since this routine is specific to Advanced SIMD
+     vectors.  */
+  nelt = d->perm.length ().to_constant ();
+
+  vec_perm_builder newpermconst;
+  newpermconst.new_vector (nelt / 2, nelt / 2, 1);
+
+  /* Convert the perm constant if we can.  Require even, odd as the pairs.  */
+  for (unsigned int i = 0; i < nelt; i += 2)
+    {
+      poly_int64 elt0 = d->perm[i];
+      poly_int64 elt1 = d->perm[i + 1];
+      poly_int64 newelt;
+      if (!multiple_p (elt0, 2, &newelt) || maybe_ne (elt0 + 1, elt1))
+       return false;
+      newpermconst.quick_push (newelt.to_constant ());
+    }
+  newpermconst.finalize ();
+
+  newd.vmode = new_mode;
+  newd.vec_flags = VEC_ADVSIMD;
+  newd.target = d->target ? gen_lowpart (new_mode, d->target) : NULL;
+  newd.op0 = d->op0 ? gen_lowpart (new_mode, d->op0) : NULL;
+  newd.op1 = d->op1 ? gen_lowpart (new_mode, d->op1) : NULL;
+  newd.testing_p = d->testing_p;
+  newd.one_vector_p = d->one_vector_p;
+
+  newd.perm.new_vector (newpermconst, newd.one_vector_p ? 1 : 2, nelt / 2);
+  return aarch64_expand_vec_perm_const_1 (&newd);
+}
+
  /* Recognize patterns suitable for the UZP instructions.  */
  static bool
  aarch64_evpc_uzp (struct expand_vec_perm_d *d)
@@ -20602,6 +20657,8 @@ aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
         return true;
        else if (aarch64_evpc_sel (d))
         return true;
+      else if (aarch64_evpc_reencode (d))
+       return true;
        if (d->vec_flags == VEC_SVE_DATA)
         return aarch64_evpc_sve_tbl (d);
        else if (d->vec_flags == VEC_ADVSIMD)
diff --git a/gcc/testsuite/gcc.target/aarch64/vdup_n_3.c b/gcc/testsuite/gcc.target/aarch64/vdup_n_3.c

new file mode 100644 (file)

index 0000000..5234f5e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vdup_n_3.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#define vector __attribute__((vector_size(4*sizeof(float))))
+
+/* These are both dups. */
+vector float f(vector float a, vector float b)
+{
+  return __builtin_shuffle (a, a, (vector int){0, 1, 0, 1});
+}
+vector float f1(vector float a, vector float b)
+{
+  return __builtin_shuffle (a, a, (vector int){2, 3, 2, 3});
+}
+
+/* { dg-final { scan-assembler-times {[ \t]*dup[ \t]+v[0-9]+\.2d} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vzip_1.c b/gcc/testsuite/gcc.target/aarch64/vzip_1.c

new file mode 100644 (file)

index 0000000..837adb0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vzip_1.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-require-effective-target aarch64_little_endian } */
+
+#define vector __attribute__((vector_size(2*sizeof(float))))
+
+vector float f(vector float a, vector float b)
+{
+  return __builtin_shuffle (a, b, (vector int){0, 2});
+}
+
+/* { dg-final { scan-assembler-times {[ \t]*zip1[ \t]+v[0-9]+\.2s} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vzip_2.c b/gcc/testsuite/gcc.target/aarch64/vzip_2.c

new file mode 100644 (file)

index 0000000..2287d81
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vzip_2.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-require-effective-target aarch64_little_endian } */
+
+#define vector __attribute__((vector_size(4*sizeof(float))))
+
+vector float f(vector float a, vector float b)
+{
+  /* This is the same as zip1 v.2d as {0, 1, 4, 5} can be converted to {0, 2}. */
+  return __builtin_shuffle (a, b, (vector int){0, 1, 4, 5});
+}
+
+/* { dg-final { scan-assembler-times {[ \t]*zip1[ \t]+v[0-9]+\.2d} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vzip_3.c b/gcc/testsuite/gcc.target/aarch64/vzip_3.c

new file mode 100644 (file)

index 0000000..91c444b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vzip_3.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-require-effective-target aarch64_little_endian } */
+
+#define vector __attribute__((vector_size(4*sizeof(float))))
+
+vector float f(vector float a, vector float b)
+{
+  /* This is the same as zip1 v.2d as {4, 5, 0, 1} can be converted to {2, 0}. */
+  return __builtin_shuffle (a, b, (vector int){4, 5, 0, 1});
+}
+
+/* { dg-final { scan-assembler-times {[ \t]*zip1[ \t]+v[0-9]+\.2d} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vzip_4.c b/gcc/testsuite/gcc.target/aarch64/vzip_4.c

new file mode 100644 (file)

index 0000000..b4047e2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vzip_4.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-require-effective-target aarch64_little_endian } */
+
+#define vector __attribute__((vector_size(4*sizeof(float))))
+
+vector float f(vector float a, vector float b)
+{
+  /* This is the same as zip2 v.2d as {2, 3, 6, 7} can be converted to {1, 3}. */
+  return __builtin_shuffle (a, b, (vector int){2, 3, 6, 7});
+}
+
+/* { dg-final { scan-assembler-times {[ \t]*zip2[ \t]+v[0-9]+\.2d} 1 } } */
author	Dmitrij Pochepko <dmitrij.pochepko@bell-sw.com>
	Fri, 17 Jul 2020 09:20:12 +0000 (10:20 +0100)
committer	Richard Sandiford <richard.sandiford@arm.com>
	Fri, 17 Jul 2020 09:20:12 +0000 (10:20 +0100)
gcc/config/aarch64/aarch64.c		patch \| blob \| history
gcc/testsuite/gcc.target/aarch64/vdup_n_3.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/vzip_1.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/vzip_2.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/vzip_3.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/vzip_4.c	[new file with mode: 0644]	patch \| blob