+2017-05-12 Thomas Schwinge <thomas@codesourcery.com>
+
+ * c-c++-common/goacc/classify-kernels-unparallelized.c: New file.
+ * c-c++-common/goacc/classify-kernels.c: Likewise.
+ * c-c++-common/goacc/classify-parallel.c: Likewise.
+ * c-c++-common/goacc/classify-routine.c: Likewise.
+ * gfortran.dg/goacc/classify-kernels-unparallelized.f95: Likewise.
+ * gfortran.dg/goacc/classify-kernels.f95: Likewise.
+ * gfortran.dg/goacc/classify-parallel.f95: Likewise.
+ * gfortran.dg/goacc/classify-routine.f95: Likewise.
+
2017-05-11 Nathan Sidwell <nathan@acm.org>
* lib/gcc-dg.exp (schedule-cleanups): Add lang dump capability.
--- /dev/null
+/* Check offloaded function's attributes and classification for unparallelized
+ OpenACC kernels. */
+
+/* { dg-additional-options "-O2" }
+ { dg-additional-options "-fdump-tree-ompexp" }
+ { dg-additional-options "-fdump-tree-parloops1-all" }
+ { dg-additional-options "-fdump-tree-oaccdevlow" } */
+
+#define N 1024
+
+extern unsigned int *__restrict a;
+extern unsigned int *__restrict b;
+extern unsigned int *__restrict c;
+
+/* An "extern"al mapping of loop iterations/array indices makes the loop
+ unparallelizable. */
+extern unsigned int f (unsigned int);
+
+void KERNELS ()
+{
+#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N])
+ for (unsigned int i = 0; i < N; i++)
+ c[i] = a[f (i)] + b[f (i)];
+}
+
+/* Check the offloaded function's attributes.
+ { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(omp target entrypoint\\)\\)" 1 "ompexp" } } */
+
+/* Check that exactly one OpenACC kernels construct is analyzed, and that it
+ can't be parallelized.
+ { dg-final { scan-tree-dump-times "FAILED:" 1 "parloops1" } }
+ { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(, , \\), omp target entrypoint\\)\\)" 1 "parloops1" } }
+ { dg-final { scan-tree-dump-not "SUCCESS: may be parallelized" "parloops1" } } */
+
+/* Check the offloaded function's classification and compute dimensions (will
+ always be 1 x 1 x 1 for non-offloading compilation).
+ { dg-final { scan-tree-dump-times "(?n)Function is kernels offload" 1 "oaccdevlow" } }
+ { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
+ { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), omp target entrypoint\\)\\)" 1 "oaccdevlow" } } */
--- /dev/null
+/* Check offloaded function's attributes and classification for OpenACC
+ kernels. */
+
+/* { dg-additional-options "-O2" }
+ { dg-additional-options "-fdump-tree-ompexp" }
+ { dg-additional-options "-fdump-tree-parloops1-all" }
+ { dg-additional-options "-fdump-tree-oaccdevlow" } */
+
+#define N 1024
+
+extern unsigned int *__restrict a;
+extern unsigned int *__restrict b;
+extern unsigned int *__restrict c;
+
+void KERNELS ()
+{
+#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N])
+ for (unsigned int i = 0; i < N; i++)
+ c[i] = a[i] + b[i];
+}
+
+/* Check the offloaded function's attributes.
+ { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(omp target entrypoint\\)\\)" 1 "ompexp" } } */
+
+/* Check that exactly one OpenACC kernels construct is analyzed, and that it
+ can be parallelized.
+ { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } }
+ { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(0, , \\), omp target entrypoint\\)\\)" 1 "parloops1" } }
+ { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
+
+/* Check the offloaded function's classification and compute dimensions (will
+ always be 1 x 1 x 1 for non-offloading compilation).
+ { dg-final { scan-tree-dump-times "(?n)Function is kernels offload" 1 "oaccdevlow" } }
+ { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
+ { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), omp target entrypoint\\)\\)" 1 "oaccdevlow" } } */
--- /dev/null
+/* Check offloaded function's attributes and classification for OpenACC
+ parallel. */
+
+/* { dg-additional-options "-O2" }
+ { dg-additional-options "-fdump-tree-ompexp" }
+ { dg-additional-options "-fdump-tree-oaccdevlow" } */
+
+#define N 1024
+
+extern unsigned int *__restrict a;
+extern unsigned int *__restrict b;
+extern unsigned int *__restrict c;
+
+void PARALLEL ()
+{
+#pragma acc parallel loop copyin (a[0:N], b[0:N]) copyout (c[0:N])
+ for (unsigned int i = 0; i < N; i++)
+ c[i] = a[i] + b[i];
+}
+
+/* Check the offloaded function's attributes.
+ { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(omp target entrypoint\\)\\)" 1 "ompexp" } } */
+
+/* Check the offloaded function's classification and compute dimensions (will
+ always be 1 x 1 x 1 for non-offloading compilation).
+ { dg-final { scan-tree-dump-times "(?n)Function is parallel offload" 1 "oaccdevlow" } }
+ { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
+ { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), omp target entrypoint\\)\\)" 1 "oaccdevlow" } } */
--- /dev/null
+/* Check offloaded function's attributes and classification for OpenACC
+ routine. */
+
+/* { dg-additional-options "-O2" }
+ { dg-additional-options "-fdump-tree-ompexp" }
+ { dg-additional-options "-fdump-tree-oaccdevlow" } */
+
+#define N 1024
+
+extern unsigned int *__restrict a;
+extern unsigned int *__restrict b;
+extern unsigned int *__restrict c;
+#pragma acc declare copyin (a, b) create (c)
+
+#pragma acc routine worker
+void ROUTINE ()
+{
+#pragma acc loop
+ for (unsigned int i = 0; i < N; i++)
+ c[i] = a[i] + b[i];
+}
+
+/* Check the offloaded function's attributes.
+ { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(omp declare target, oacc function \\(0 1, 1 0, 1 0\\)\\)\\)" 1 "ompexp" } } */
+
+/* Check the offloaded function's classification and compute dimensions (will
+ always be 1 x 1 x 1 for non-offloading compilation).
+ { dg-final { scan-tree-dump-times "(?n)Function is routine level 1" 1 "oaccdevlow" } }
+ { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
+ { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(0 1, 1 1, 1 1\\), omp declare target, oacc function \\(0 1, 1 0, 1 0\\)\\)\\)" 1 "oaccdevlow" } } */
--- /dev/null
+! Check offloaded function's attributes and classification for unparallelized
+! OpenACC kernels.
+
+! { dg-additional-options "-O2" }
+! { dg-additional-options "-fdump-tree-ompexp" }
+! { dg-additional-options "-fdump-tree-parloops1-all" }
+! { dg-additional-options "-fdump-tree-oaccdevlow" }
+
+program main
+ implicit none
+ integer, parameter :: n = 1024
+ integer, dimension (0:n-1) :: a, b, c
+ integer :: i
+
+ ! An "external" mapping of loop iterations/array indices makes the loop
+ ! unparallelizable.
+ integer, external :: f
+
+ call setup(a, b)
+
+ !$acc kernels copyin (a(0:n-1), b(0:n-1)) copyout (c(0:n-1))
+ do i = 0, n - 1
+ c(i) = a(f (i)) + b(f (i))
+ end do
+ !$acc end kernels
+end program main
+
+! Check the offloaded function's attributes.
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(omp target entrypoint\\)\\)" 1 "ompexp" } }
+
+! Check that exactly one OpenACC kernels construct is analyzed, and that it
+! can't be parallelized.
+! { dg-final { scan-tree-dump-times "FAILED:" 1 "parloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(, , \\), omp target entrypoint\\)\\)" 1 "parloops1" } }
+! { dg-final { scan-tree-dump-not "SUCCESS: may be parallelized" "parloops1" } }
+
+! Check the offloaded function's classification and compute dimensions (will
+! always be 1 x 1 x 1 for non-offloading compilation).
+! { dg-final { scan-tree-dump-times "(?n)Function is kernels offload" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), omp target entrypoint\\)\\)" 1 "oaccdevlow" } }
--- /dev/null
+! Check offloaded function's attributes and classification for OpenACC
+! kernels.
+
+! { dg-additional-options "-O2" }
+! { dg-additional-options "-fdump-tree-ompexp" }
+! { dg-additional-options "-fdump-tree-parloops1-all" }
+! { dg-additional-options "-fdump-tree-oaccdevlow" }
+
+program main
+ implicit none
+ integer, parameter :: n = 1024
+ integer, dimension (0:n-1) :: a, b, c
+ integer :: i
+
+ call setup(a, b)
+
+ !$acc kernels copyin (a(0:n-1), b(0:n-1)) copyout (c(0:n-1))
+ do i = 0, n - 1
+ c(i) = a(i) + b(i)
+ end do
+ !$acc end kernels
+end program main
+
+! Check the offloaded function's attributes.
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(omp target entrypoint\\)\\)" 1 "ompexp" } }
+
+! Check that exactly one OpenACC kernels construct is analyzed, and that it
+! can be parallelized.
+! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(0, , \\), omp target entrypoint\\)\\)" 1 "parloops1" } }
+! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
+
+! Check the offloaded function's classification and compute dimensions (will
+! always be 1 x 1 x 1 for non-offloading compilation).
+! { dg-final { scan-tree-dump-times "(?n)Function is kernels offload" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), omp target entrypoint\\)\\)" 1 "oaccdevlow" } }
--- /dev/null
+! Check offloaded function's attributes and classification for OpenACC
+! parallel.
+
+! { dg-additional-options "-O2" }
+! { dg-additional-options "-fdump-tree-ompexp" }
+! { dg-additional-options "-fdump-tree-oaccdevlow" }
+
+program main
+ implicit none
+ integer, parameter :: n = 1024
+ integer, dimension (0:n-1) :: a, b, c
+ integer :: i
+
+ call setup(a, b)
+
+ !$acc parallel loop copyin (a(0:n-1), b(0:n-1)) copyout (c(0:n-1))
+ do i = 0, n - 1
+ c(i) = a(i) + b(i)
+ end do
+ !$acc end parallel loop
+end program main
+
+! Check the offloaded function's attributes.
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(omp target entrypoint\\)\\)" 1 "ompexp" } }
+
+! Check the offloaded function's classification and compute dimensions (will
+! always be 1 x 1 x 1 for non-offloading compilation).
+! { dg-final { scan-tree-dump-times "(?n)Function is parallel offload" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), omp target entrypoint\\)\\)" 1 "oaccdevlow" } }
--- /dev/null
+! Check offloaded function's attributes and classification for OpenACC
+! routine.
+
+! { dg-additional-options "-O2" }
+! { dg-additional-options "-fdump-tree-ompexp" }
+! { dg-additional-options "-fdump-tree-oaccdevlow" }
+
+subroutine ROUTINE
+ !$acc routine worker
+ integer, parameter :: n = 1024
+ integer, dimension (0:n-1) :: a, b, c
+ integer :: i
+
+ call setup(a, b)
+
+ !$acc loop
+ do i = 0, n - 1
+ c(i) = a(i) + b(i)
+ end do
+end subroutine ROUTINE
+
+! Check the offloaded function's attributes.
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(omp declare target, oacc function \\(0 0, 1 0, 1 0\\)\\)\\)" 1 "ompexp" } }
+
+! Check the offloaded function's classification and compute dimensions (will
+! always be 1 x 1 x 1 for non-offloading compilation).
+! { dg-final { scan-tree-dump-times "(?n)Function is routine level 1" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(0 1, 1 1, 1 1\\), omp declare target, oacc function \\(0 0, 1 0, 1 0\\)\\)\\)" 1 "oaccdevlow" } }