testsuite: Adjust possibly fragile slp-perm-9.c [PR104015]
authorKewen Lin <linkw@linux.ibm.com>
Wed, 19 Jan 2022 02:31:46 +0000 (20:31 -0600)
committerKewen Lin <linkw@linux.ibm.com>
Wed, 19 Jan 2022 06:04:02 +0000 (00:04 -0600)
As Richard pointed out in PR104015, the test case slp-perm-9.c
can be fragile when vectorizer tries to use different
vectorisation strategies.

As suggested, this patch tries to make the check not sensitive
on the re-trying times by removing the times checking.  To still
retain the test coverage on unnecessary re-trying, for example
it exposes this PR104015 on Power9, I added two test cases to
powerpc testsuite.

gcc/testsuite/ChangeLog:

PR tree-optimization/104015
* gcc.dg/vect/slp-perm-9.c: Adjust.
* gcc.target/powerpc/pr104015-1.c: New test.
* gcc.target/powerpc/pr104015-2.c: New test.

gcc/testsuite/gcc.dg/vect/slp-perm-9.c
gcc/testsuite/gcc.target/powerpc/pr104015-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/powerpc/pr104015-2.c [new file with mode: 0644]

index 873eddf..154c00a 100644 (file)
@@ -61,9 +61,7 @@ int main (int argc, const char* argv[])
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { { vect_perm_short || vect32 } || vect_load_lanes } } } } */
 /* We don't try permutes with a group size of 3 for variable-length
    vectors.  */
-/* { dg-final { scan-tree-dump-times "permutation requires at least three vectors" 1 "vect" { target { vect_perm_short && { { ! vect_perm3_short } && { ! vect_partial_vectors_usage_1 } } } xfail vect_variable_length } } } */
-/* Try to vectorize the epilogue using partial vectors.  */
-/* { dg-final { scan-tree-dump-times "permutation requires at least three vectors" 2 "vect" { target { vect_perm_short && { { ! vect_perm3_short } && vect_partial_vectors_usage_1 } } xfail vect_variable_length } } } */
+/* { dg-final { scan-tree-dump "permutation requires at least three vectors" "vect" { target { vect_perm_short && { ! vect_perm3_short } } xfail vect_variable_length } } } */
 /* { dg-final { scan-tree-dump-not "permutation requires at least three vectors" "vect" { target vect_perm3_short } } } */
 /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { { ! { vect_perm3_short || vect32 } } || vect_load_lanes } } } } */
 /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { { vect_perm3_short || vect32 } && { ! vect_load_lanes } } } } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr104015-1.c b/gcc/testsuite/gcc.target/powerpc/pr104015-1.c
new file mode 100644 (file)
index 0000000..895c243
--- /dev/null
@@ -0,0 +1,28 @@
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-mdejagnu-cpu=power9 -O2 -ftree-vectorize -fno-vect-cost-model -fdump-tree-vect-details" } */
+
+/* As PR104015, we don't expect vectorizer will re-try some vector modes
+   for epilogues on Power9, since Power9 doesn't support partial vector
+   by defaut.  */
+
+#include <stdarg.h>
+#define N 200
+
+void __attribute__((noinline))
+foo (unsigned short *__restrict__ pInput, unsigned short *__restrict__ pOutput)
+{
+  unsigned short i, a, b, c;
+
+  for (i = 0; i < N / 3; i++)
+    {
+       a = *pInput++;
+       b = *pInput++;
+       c = *pInput++;
+
+       *pOutput++ = a + b + c + 3;
+       *pOutput++ = a + b + c + 12;
+       *pOutput++ = a + b + c + 1;
+    }
+}
+
+/* { dg-final { scan-tree-dump-not "Re-trying epilogue analysis with vector mode" "vect" } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr104015-2.c b/gcc/testsuite/gcc.target/powerpc/pr104015-2.c
new file mode 100644 (file)
index 0000000..ab482b1
--- /dev/null
@@ -0,0 +1,29 @@
+/* { dg-require-effective-target power10_ok } */
+/* Vector with length instructions lxvl/stxvl are only enabled for 64 bit.  */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2 -ftree-vectorize -fno-vect-cost-model -fdump-tree-vect-details" } */
+
+/* Power10 support partial vector for epilogue by default, it's expected
+   vectorizer would re-try for it once.  */
+
+#include <stdarg.h>
+#define N 200
+
+void __attribute__((noinline))
+foo (unsigned short *__restrict__ pInput, unsigned short *__restrict__ pOutput)
+{
+  unsigned short i, a, b, c;
+
+  for (i = 0; i < N / 3; i++)
+    {
+       a = *pInput++;
+       b = *pInput++;
+       c = *pInput++;
+
+       *pOutput++ = a + b + c + 3;
+       *pOutput++ = a + b + c + 12;
+       *pOutput++ = a + b + c + 1;
+    }
+}
+
+/* { dg-final { scan-tree-dump-times "Re-trying epilogue analysis with vector mode" 1 "vect" } } */