From 1404af62dc414cc6b06e6c8c94a9922e04a7986a Mon Sep 17 00:00:00 2001 From: Thomas Schwinge Date: Fri, 14 Dec 2018 21:42:40 +0100 Subject: [PATCH] [PR88407] [OpenACC] Correctly handle unseen async-arguments ... which turn the operation into a no-op. libgomp/ PR libgomp/88407 * plugin/plugin-nvptx.c (nvptx_async_test, nvptx_wait) (nvptx_wait_async): Unseen async-argument is a no-op. * testsuite/libgomp.oacc-c-c++-common/async_queue-1.c: Update. * testsuite/libgomp.oacc-c-c++-common/data-2-lib.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/data-2.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-79.c: Likewise. * testsuite/libgomp.oacc-fortran/lib-12.f90: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-71.c: Merge into... * testsuite/libgomp.oacc-c-c++-common/lib-69.c: ... this. Update. * testsuite/libgomp.oacc-c-c++-common/lib-77.c: Merge into... * testsuite/libgomp.oacc-c-c++-common/lib-74.c: ... this. Update From-SVN: r267150 --- libgomp/ChangeLog | 13 ++ libgomp/plugin/plugin-nvptx.c | 13 +- .../libgomp.oacc-c-c++-common/async_queue-1.c | 30 +++++ .../libgomp.oacc-c-c++-common/data-2-lib.c | 2 + .../testsuite/libgomp.oacc-c-c++-common/data-2.c | 2 + .../testsuite/libgomp.oacc-c-c++-common/lib-69.c | 7 ++ .../testsuite/libgomp.oacc-c-c++-common/lib-71.c | 122 ------------------ .../testsuite/libgomp.oacc-c-c++-common/lib-74.c | 4 + .../testsuite/libgomp.oacc-c-c++-common/lib-77.c | 138 --------------------- .../testsuite/libgomp.oacc-c-c++-common/lib-79.c | 24 ++++ libgomp/testsuite/libgomp.oacc-fortran/lib-12.f90 | 5 + 11 files changed, 93 insertions(+), 267 deletions(-) delete mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-71.c delete mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-77.c diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog index d84c3f4..c1f98d7 100644 --- a/libgomp/ChangeLog +++ b/libgomp/ChangeLog @@ -1,5 +1,18 @@ 2018-12-14 Thomas Schwinge + PR libgomp/88407 + * plugin/plugin-nvptx.c (nvptx_async_test, nvptx_wait) + (nvptx_wait_async): Unseen async-argument is a no-op. + * testsuite/libgomp.oacc-c-c++-common/async_queue-1.c: Update. + * testsuite/libgomp.oacc-c-c++-common/data-2-lib.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/data-2.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-79.c: Likewise. + * testsuite/libgomp.oacc-fortran/lib-12.f90: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-71.c: Merge into... + * testsuite/libgomp.oacc-c-c++-common/lib-69.c: ... this. Update. + * testsuite/libgomp.oacc-c-c++-common/lib-77.c: Merge into... + * testsuite/libgomp.oacc-c-c++-common/lib-74.c: ... this. Update + * testsuite/libgomp.oacc-c-c++-common/data-2-lib.c: Revise. * testsuite/libgomp.oacc-c-c++-common/data-2.c: Likewise. diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c index 7d0d38e..6f9b166 100644 --- a/libgomp/plugin/plugin-nvptx.c +++ b/libgomp/plugin/plugin-nvptx.c @@ -1539,9 +1539,8 @@ nvptx_async_test (int async) struct ptx_stream *s; s = select_stream_for_async (async, pthread_self (), false, NULL); - if (!s) - GOMP_PLUGIN_fatal ("unknown async %d", async); + return 1; r = CUDA_CALL_NOCHECK (cuStreamQuery, s->stream); if (r == CUDA_SUCCESS) @@ -1596,7 +1595,7 @@ nvptx_wait (int async) s = select_stream_for_async (async, pthread_self (), false, NULL); if (!s) - GOMP_PLUGIN_fatal ("unknown async %d", async); + return; CUDA_CALL_ASSERT (cuStreamSynchronize, s->stream); @@ -1610,14 +1609,14 @@ nvptx_wait_async (int async1, int async2) struct ptx_stream *s1, *s2; pthread_t self = pthread_self (); + s1 = select_stream_for_async (async1, self, false, NULL); + if (!s1) + return; + /* The stream that is waiting (rather than being waited for) doesn't necessarily have to exist already. */ s2 = select_stream_for_async (async2, self, true, NULL); - s1 = select_stream_for_async (async1, self, false, NULL); - if (!s1) - GOMP_PLUGIN_fatal ("invalid async 1\n"); - if (s1 == s2) GOMP_PLUGIN_fatal ("identical parameters"); diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/async_queue-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/async_queue-1.c index 48e1846..544b19f 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/async_queue-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/async_queue-1.c @@ -41,6 +41,36 @@ int main(void) assert (queues[i].cuda_stream == NULL); } + /* No-ops still don't initialize them. */ + { + size_t i = 0; + /* Find the first non-special async-argument. */ + while (queues[i].async < 0) + ++i; + assert (i < queues_n); + +#pragma acc wait(queues[i].async) // no-op + + ++i; + assert (i < queues_n); +#pragma acc parallel wait(queues[i].async) // no-op + ; + + ++i; + assert (i < queues_n); + acc_wait(queues[i].async); // no-op + + i += 2; + assert (i < queues_n); + acc_wait_async(queues[i - 1].async, queues[i].async); // no-op, and async queue "i" does not get set up + + for (size_t i = 0; i < queues_n; ++i) + { + queues[i].cuda_stream = acc_get_cuda_stream (queues[i].async); + assert (queues[i].cuda_stream == NULL); + } + } + for (size_t i = 0; i < queues_n; ++i) { /* Use the queue to initialize it. */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/data-2-lib.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-2-lib.c index e432f8d..e9d1eda 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/data-2-lib.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-2-lib.c @@ -69,6 +69,8 @@ main (int argc, char **argv) acc_memcpy_from_device_async (b, d_b, nbytes, 1); acc_wait (1); + /* Test unseen async-argument. */ + acc_wait (10); for (i = 0; i < N; i++) { diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/data-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-2.c index c0f36d3..2fc4a59 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/data-2.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-2.c @@ -65,6 +65,8 @@ main (int argc, char **argv) #pragma acc update self (b[0:N]) async (1) #pragma acc wait (1) + /* Test unseen async-argument. */ +#pragma acc wait (10) for (i = 0; i < N; i++) { diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-69.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-69.c index 5462f12..c10beba 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-69.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-69.c @@ -103,6 +103,13 @@ main (int argc, char **argv) abort (); } + /* Test unseen async-argument. */ + if (acc_async_test (1) != 1) + { + fprintf (stderr, "acc_async_test failed on unseen async-argument\n"); + abort (); + } + sleep (1); if (acc_async_test (0) != 1) diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-71.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-71.c deleted file mode 100644 index c85e824..0000000 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-71.c +++ /dev/null @@ -1,122 +0,0 @@ -/* { dg-do run { target openacc_nvidia_accel_selected } } */ -/* { dg-additional-options "-lcuda" } */ - -#include -#include -#include -#include - -int -main (int argc, char **argv) -{ - CUdevice dev; - CUfunction delay; - CUmodule module; - CUresult r; - CUstream stream; - unsigned long *a, *d_a, dticks; - int nbytes; - float dtime; - void *kargs[2]; - int clkrate; - int devnum, nprocs; - - acc_init (acc_device_nvidia); - - devnum = acc_get_device_num (acc_device_nvidia); - - r = cuDeviceGet (&dev, devnum); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuDeviceGet failed: %d\n", r); - abort (); - } - - r = - cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, - dev); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); - abort (); - } - - r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); - abort (); - } - - r = cuModuleLoad (&module, "subr.ptx"); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuModuleLoad failed: %d\n", r); - abort (); - } - - r = cuModuleGetFunction (&delay, module, "delay"); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); - abort (); - } - - nbytes = nprocs * sizeof (unsigned long); - - dtime = 200.0; - - dticks = (unsigned long) (dtime * clkrate); - - a = (unsigned long *) malloc (nbytes); - d_a = (unsigned long *) acc_malloc (nbytes); - - acc_map_data (a, d_a, nbytes); - - kargs[0] = (void *) &d_a; - kargs[1] = (void *) &dticks; - - r = cuStreamCreate (&stream, CU_STREAM_DEFAULT); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuStreamCreate failed: %d\n", r); - abort (); - } - - acc_set_cuda_stream (0, stream); - - r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuLaunchKernel failed: %d\n", r); - abort (); - } - - fprintf (stderr, "CheCKpOInT\n"); - if (acc_async_test (1) != 0) - { - fprintf (stderr, "asynchronous operation not running\n"); - abort (); - } - - sleep ((int) (dtime / 1000.0f) + 1); - - if (acc_async_test (1) != 1) - { - fprintf (stderr, "found asynchronous operation still running\n"); - abort (); - } - - acc_unmap_data (a); - - free (a); - acc_free (d_a); - - acc_shutdown (acc_device_nvidia); - - return 0; -} - -/* { dg-output "CheCKpOInT(\n|\r\n|\r).*" } */ -/* { dg-output "unknown async \[0-9\]+" } */ -/* { dg-shouldfail "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-74.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-74.c index 0726ee4..0efcf0d 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-74.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-74.c @@ -103,6 +103,8 @@ main (int argc, char **argv) } acc_wait (0); + /* Test unseen async-argument. */ + acc_wait (1); atime = stop_timer (0); @@ -115,6 +117,8 @@ main (int argc, char **argv) start_timer (0); acc_wait (0); + /* Test unseen async-argument. */ + acc_wait (1); atime = stop_timer (0); diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-77.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-77.c deleted file mode 100644 index f4f196d..0000000 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-77.c +++ /dev/null @@ -1,138 +0,0 @@ -/* { dg-do run { target openacc_nvidia_accel_selected } } */ -/* { dg-additional-options "-lcuda" } */ - -#include -#include -#include -#include -#include -#include "timer.h" - -int -main (int argc, char **argv) -{ - CUdevice dev; - CUfunction delay; - CUmodule module; - CUresult r; - CUstream stream; - unsigned long *a, *d_a, dticks; - int nbytes; - float atime, dtime; - void *kargs[2]; - int clkrate; - int devnum, nprocs; - - acc_init (acc_device_nvidia); - - devnum = acc_get_device_num (acc_device_nvidia); - - r = cuDeviceGet (&dev, devnum); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuDeviceGet failed: %d\n", r); - abort (); - } - - r = - cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, - dev); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); - abort (); - } - - r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); - abort (); - } - - r = cuModuleLoad (&module, "subr.ptx"); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuModuleLoad failed: %d\n", r); - abort (); - } - - r = cuModuleGetFunction (&delay, module, "delay"); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); - abort (); - } - - nbytes = nprocs * sizeof (unsigned long); - - dtime = 200.0; - - dticks = (unsigned long) (dtime * clkrate); - - a = (unsigned long *) malloc (nbytes); - d_a = (unsigned long *) acc_malloc (nbytes); - - acc_map_data (a, d_a, nbytes); - - kargs[0] = (void *) &d_a; - kargs[1] = (void *) &dticks; - - r = cuStreamCreate (&stream, CU_STREAM_DEFAULT); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuStreamCreate failed: %d\n", r); - abort (); - } - - acc_set_cuda_stream (0, stream); - - init_timers (1); - - start_timer (0); - - r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuLaunchKernel failed: %d\n", r); - abort (); - } - - fprintf (stderr, "CheCKpOInT\n"); - acc_wait (1); - - atime = stop_timer (0); - - if (atime < dtime) - { - fprintf (stderr, "actual time < delay time\n"); - abort (); - } - - start_timer (0); - - acc_wait (1); - - atime = stop_timer (0); - - if (0.010 < atime) - { - fprintf (stderr, "actual time < delay time\n"); - abort (); - } - - acc_unmap_data (a); - - fini_timers (); - - free (a); - acc_free (d_a); - - acc_shutdown (acc_device_nvidia); - - return 0; -} - -/* { dg-output "CheCKpOInT(\n|\r\n|\r).*" } */ -/* { dg-output "unknown async \[0-9\]+" } */ -/* { dg-shouldfail "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-79.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-79.c index ef3df13..b2e2687 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-79.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-79.c @@ -122,6 +122,13 @@ main (int argc, char **argv) } } + if (acc_async_test (0) != 0) + abort (); + + /* Test unseen async-argument. */ + if (acc_async_test (1) != 1) + abort (); + acc_wait_async (0, 1); if (acc_async_test (0) != 0) @@ -130,6 +137,23 @@ main (int argc, char **argv) if (acc_async_test (1) != 0) abort (); + /* Test unseen async-argument. */ + { + if (acc_async_test (2) != 1) + abort (); + + acc_wait_async (2, 1); + + if (acc_async_test (0) != 0) + abort (); + + if (acc_async_test (1) != 0) + abort (); + + if (acc_async_test (2) != 1) + abort (); + } + acc_wait (1); atime = stop_timer (0); diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-12.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-12.f90 index 6912f67..4cf62f2 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/lib-12.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-12.f90 @@ -17,9 +17,14 @@ program main call acc_wait_async (0, 1) + ! Test unseen async-argument. + if (acc_async_test (2) .neqv. .TRUE.) call abort + call acc_wait_async (2, 1) + call acc_wait (1) if (acc_async_test (0) .neqv. .TRUE.) call abort if (acc_async_test (1) .neqv. .TRUE.) call abort + if (acc_async_test (2) .neqv. .TRUE.) call abort end program -- 2.7.4