From 2cb7a501ab0be001dca4bac0d9af9298c0bfabbc Mon Sep 17 00:00:00 2001 From: Tom de Vries Date: Sat, 12 Jan 2019 22:18:27 +0000 Subject: [PATCH] [nvptx] Enable large vectors -- reduction testcases Add various reduction test-cases with vector length 128. 2019-01-12 Tom de Vries * testsuite/libgomp.oacc-c-c++-common/vred2d-128.c: New test. * testsuite/libgomp.oacc-fortran/gemm.f90: New test. * testsuite/libgomp.oacc-c-c++-common/vector-length-128-10.c: New test. From-SVN: r267892 --- libgomp/ChangeLog | 6 ++ .../vector-length-128-10.c | 39 +++++++++++ .../libgomp.oacc-c-c++-common/vred2d-128.c | 55 +++++++++++++++ libgomp/testsuite/libgomp.oacc-fortran/gemm.f90 | 79 ++++++++++++++++++++++ 4 files changed, 179 insertions(+) create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-10.c create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/vred2d-128.c create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/gemm.f90 diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog index 9cd9a79..b785e8a 100644 --- a/libgomp/ChangeLog +++ b/libgomp/ChangeLog @@ -1,5 +1,11 @@ 2019-01-12 Tom de Vries + * testsuite/libgomp.oacc-c-c++-common/vred2d-128.c: New test. + * testsuite/libgomp.oacc-fortran/gemm.f90: New test. + * testsuite/libgomp.oacc-c-c++-common/vector-length-128-10.c: New test. + +2019-01-12 Tom de Vries + * testsuite/libgomp.oacc-c-c++-common/vector-length-128-7.c: New test. * testsuite/libgomp.oacc-c-c++-common/vector-length-128-4.c: New test. * testsuite/libgomp.oacc-c-c++-common/vector-length-128-6.c: New test. diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-10.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-10.c new file mode 100644 index 0000000..0658cfd --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-10.c @@ -0,0 +1,39 @@ +/* { dg-do run } */ + +#include + +#define N 1024 + +unsigned int a[N]; +unsigned int b[N]; +unsigned int c[N]; +unsigned int n = N; + +int +main (void) +{ + for (unsigned int i = 0; i < n; ++i) + { + a[i] = i % 3; + b[i] = i % 5; + } + + unsigned int res = 1; + unsigned long long res2 = 1; +#pragma acc parallel vector_length (128) copyin (a,b) reduction (+:res, res2) copy (res, res2) + { +#pragma acc loop vector reduction (+:res, res2) + for (unsigned int i = 0; i < n; i++) + { + res += ((a[i] + b[i]) % 2); + res2 += ((a[i] + b[i]) % 2); + } + } + + if (res != 478) + abort (); + if (res2 != 478) + abort (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/vred2d-128.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/vred2d-128.c new file mode 100644 index 0000000..86171d4 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/vred2d-128.c @@ -0,0 +1,55 @@ +/* Test large vector lengths. */ + +#include + +#define n 10000 +int a1[n], a2[n]; + +#define gentest(name, outer, inner) \ + void name () \ + { \ + long i, j, t1, t2, t3; \ + _Pragma(outer) \ + for (i = 0; i < n; i++) \ + { \ + t1 = 0; \ + t2 = 0; \ + _Pragma(inner) \ + for (j = i; j < n; j++) \ + { \ + t1++; \ + t2--; \ + } \ + a1[i] = t1; \ + a2[i] = t2; \ + } \ + for (i = 0; i < n; i++) \ + { \ + assert (a1[i] == n-i); \ + assert (a2[i] == -(n-i)); \ + } \ + } \ + +gentest (test1, "acc parallel loop gang vector_length (128) firstprivate (t1, t2)", + "acc loop vector reduction(+:t1) reduction(-:t2)") + +gentest (test2, "acc parallel loop gang vector_length (128) firstprivate (t1, t2)", + "acc loop worker vector reduction(+:t1) reduction(-:t2)") + +gentest (test3, "acc parallel loop gang worker vector_length (128) firstprivate (t1, t2)", + "acc loop vector reduction(+:t1) reduction(-:t2)") + +gentest (test4, "acc parallel loop firstprivate (t1, t2)", + "acc loop reduction(+:t1) reduction(-:t2)") + + +int +main () +{ + test1 (); + test2 (); + test3 (); + test4 (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-fortran/gemm.f90 b/libgomp/testsuite/libgomp.oacc-fortran/gemm.f90 new file mode 100644 index 0000000..de78148 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/gemm.f90 @@ -0,0 +1,79 @@ +! Exercise three levels of parallelism using SGEMM from BLAS. + +! { dg-do run } + +! Explicitly set vector_length to 128 using a vector_length clause. +subroutine openacc_sgemm_128 (m, n, k, alpha, a, b, beta, c) + integer :: m, n, k + real :: alpha, beta + real :: a(k,*), b(k,*), c(m,*) + + integer :: i, j, l + real :: temp + + !$acc parallel loop copy(c(1:m,1:n)) copyin(a(1:k,1:m),b(1:k,1:n)) vector_length (128) firstprivate (temp) + do j = 1, n + !$acc loop + do i = 1, m + temp = 0.0 + !$acc loop reduction(+:temp) + do l = 1, k + temp = temp + a(l,i)*b(l,j) + end do + if(beta == 0.0) then + c(i,j) = alpha*temp + else + c(i,j) = alpha*temp + beta*c(i,j) + end if + end do + end do +end subroutine openacc_sgemm_128 + +subroutine host_sgemm (m, n, k, alpha, a, b, beta, c) + integer :: m, n, k + real :: alpha, beta + real :: a(k,*), b(k,*), c(m,*) + + integer :: i, j, l + real :: temp + + do j = 1, n + do i = 1, m + temp = 0.0 + do l = 1, k + temp = temp + a(l,i)*b(l,j) + end do + if(beta == 0.0) then + c(i,j) = alpha*temp + else + c(i,j) = alpha*temp + beta*c(i,j) + end if + end do + end do +end subroutine host_sgemm + +program main + integer, parameter :: M = 100, N = 50, K = 2000 + real :: a(K, M), b(K, N), c(M, N), d (M, N), e (M, N) + real alpha, beta + integer i, j + + a(:,:) = 1.0 + b(:,:) = 0.25 + + c(:,:) = 0.0 + d(:,:) = 0.0 + e(:,:) = 0.0 + + alpha = 1.05 + beta = 1.25 + + call openacc_sgemm_128 (M, N, K, alpha, a, b, beta, d) + call host_sgemm (M, N, K, alpha, a, b, beta, e) + + do i = 1, m + do j = 1, n + if (d(i,j) /= e(i,j)) call abort + end do + end do +end program main -- 2.7.4