/* OpenACC Runtime Library Definitions.
- Copyright (C) 2013-2018 Free Software Foundation, Inc.
+ Copyright (C) 2013-2020 Free Software Foundation, Inc.
Contributed by Mentor Embedded.
<http://www.gnu.org/licenses/>. */
#include <assert.h>
+#include <string.h>
#include "openacc.h"
#include "libgomp.h"
#include "oacc-int.h"
-int
-acc_async_test (int async)
+static struct goacc_thread *
+get_goacc_thread (void)
{
- if (!async_valid_p (async))
- gomp_fatal ("invalid async argument: %d", async);
-
struct goacc_thread *thr = goacc_thread ();
if (!thr || !thr->dev)
gomp_fatal ("no device active");
- return thr->dev->openacc.async_test_func (async);
+ return thr;
+}
+
+static int
+validate_async_val (int async)
+{
+ if (!async_valid_p (async))
+ gomp_fatal ("invalid async-argument: %d", async);
+
+ if (async == acc_async_sync)
+ return -1;
+
+ if (async == acc_async_noval)
+ return 0;
+
+ if (async >= 0)
+ /* TODO: we reserve 0 for acc_async_noval before we can clarify the
+ semantics of "default_async". */
+ return 1 + async;
+ else
+ __builtin_unreachable ();
+}
+
+/* Return the asyncqueue to be used for OpenACC async-argument ASYNC. This
+ might return NULL if no asyncqueue is to be used. Otherwise, if CREATE,
+ create the asyncqueue if it doesn't exist yet.
+
+ Unless CREATE, this will not generate any OpenACC Profiling Interface
+ events. */
+
+attribute_hidden struct goacc_asyncqueue *
+lookup_goacc_asyncqueue (struct goacc_thread *thr, bool create, int async)
+{
+ async = validate_async_val (async);
+ if (async < 0)
+ return NULL;
+
+ struct goacc_asyncqueue *ret_aq = NULL;
+ struct gomp_device_descr *dev = thr->dev;
+
+ gomp_mutex_lock (&dev->openacc.async.lock);
+
+ if (!create
+ && (async >= dev->openacc.async.nasyncqueue
+ || !dev->openacc.async.asyncqueue[async]))
+ goto end;
+
+ if (async >= dev->openacc.async.nasyncqueue)
+ {
+ int diff = async + 1 - dev->openacc.async.nasyncqueue;
+ dev->openacc.async.asyncqueue
+ = gomp_realloc (dev->openacc.async.asyncqueue,
+ sizeof (goacc_aq) * (async + 1));
+ memset (dev->openacc.async.asyncqueue + dev->openacc.async.nasyncqueue,
+ 0, sizeof (goacc_aq) * diff);
+ dev->openacc.async.nasyncqueue = async + 1;
+ }
+
+ if (!dev->openacc.async.asyncqueue[async])
+ {
+ dev->openacc.async.asyncqueue[async]
+ = dev->openacc.async.construct_func (dev->target_id);
+
+ if (!dev->openacc.async.asyncqueue[async])
+ {
+ gomp_mutex_unlock (&dev->openacc.async.lock);
+ gomp_fatal ("async %d creation failed", async);
+ }
+
+ /* Link new async queue into active list. */
+ goacc_aq_list n = gomp_malloc (sizeof (struct goacc_asyncqueue_list));
+ n->aq = dev->openacc.async.asyncqueue[async];
+ n->next = dev->openacc.async.active;
+ dev->openacc.async.active = n;
+ }
+
+ ret_aq = dev->openacc.async.asyncqueue[async];
+
+ end:
+ gomp_mutex_unlock (&dev->openacc.async.lock);
+ return ret_aq;
+}
+
+/* Return the asyncqueue to be used for OpenACC async-argument ASYNC. This
+ might return NULL if no asyncqueue is to be used. Otherwise, create the
+ asyncqueue if it doesn't exist yet. */
+
+attribute_hidden struct goacc_asyncqueue *
+get_goacc_asyncqueue (int async)
+{
+ struct goacc_thread *thr = get_goacc_thread ();
+ return lookup_goacc_asyncqueue (thr, true, async);
}
int
-acc_async_test_all (void)
+acc_async_test (int async)
{
struct goacc_thread *thr = goacc_thread ();
if (!thr || !thr->dev)
gomp_fatal ("no device active");
- return thr->dev->openacc.async_test_all_func ();
+ goacc_aq aq = lookup_goacc_asyncqueue (thr, false, async);
+ if (!aq)
+ return 1;
+
+ acc_prof_info prof_info;
+ acc_api_info api_info;
+ bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
+ if (profiling_p)
+ {
+ prof_info.async = async;
+ prof_info.async_queue = prof_info.async;
+ }
+
+ int res = thr->dev->openacc.async.test_func (aq);
+
+ if (profiling_p)
+ {
+ thr->prof_info = NULL;
+ thr->api_info = NULL;
+ }
+
+ return res;
+}
+
+int
+acc_async_test_all (void)
+{
+ struct goacc_thread *thr = get_goacc_thread ();
+
+ acc_prof_info prof_info;
+ acc_api_info api_info;
+ bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
+
+ int ret = 1;
+ gomp_mutex_lock (&thr->dev->openacc.async.lock);
+ for (goacc_aq_list l = thr->dev->openacc.async.active; l; l = l->next)
+ if (!thr->dev->openacc.async.test_func (l->aq))
+ {
+ ret = 0;
+ break;
+ }
+ gomp_mutex_unlock (&thr->dev->openacc.async.lock);
+
+ if (profiling_p)
+ {
+ thr->prof_info = NULL;
+ thr->api_info = NULL;
+ }
+
+ return ret;
}
void
acc_wait (int async)
{
- if (!async_valid_p (async))
- gomp_fatal ("invalid async argument: %d", async);
+ struct goacc_thread *thr = get_goacc_thread ();
- struct goacc_thread *thr = goacc_thread ();
+ goacc_aq aq = lookup_goacc_asyncqueue (thr, false, async);
+ if (!aq)
+ return;
- if (!thr || !thr->dev)
- gomp_fatal ("no device active");
+ acc_prof_info prof_info;
+ acc_api_info api_info;
+ bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
+ if (profiling_p)
+ {
+ prof_info.async = async;
+ prof_info.async_queue = prof_info.async;
+ }
+
+ if (!thr->dev->openacc.async.synchronize_func (aq))
+ gomp_fatal ("wait on %d failed", async);
- thr->dev->openacc.async_wait_func (async);
+ if (profiling_p)
+ {
+ thr->prof_info = NULL;
+ thr->api_info = NULL;
+ }
}
/* acc_async_wait is an OpenACC 1.0 compatibility name for acc_wait. */
void
acc_wait_async (int async1, int async2)
{
- struct goacc_thread *thr = goacc_thread ();
+ struct goacc_thread *thr = get_goacc_thread ();
- if (!thr || !thr->dev)
- gomp_fatal ("no device active");
+ goacc_aq aq1 = lookup_goacc_asyncqueue (thr, false, async1);
+ /* TODO: Is this also correct for acc_async_sync, assuming that in this case,
+ we'll always be synchronous anyways? */
+ if (!aq1)
+ return;
- thr->dev->openacc.async_wait_async_func (async1, async2);
+ acc_prof_info prof_info;
+ acc_api_info api_info;
+ bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
+ if (profiling_p)
+ {
+ prof_info.async = async2;
+ prof_info.async_queue = prof_info.async;
+ }
+
+ goacc_aq aq2 = lookup_goacc_asyncqueue (thr, true, async2);
+ /* An async queue is always synchronized with itself. */
+ if (aq1 == aq2)
+ goto out_prof;
+
+ if (aq2)
+ {
+ if (!thr->dev->openacc.async.serialize_func (aq1, aq2))
+ gomp_fatal ("ordering of async ids %d and %d failed", async1, async2);
+ }
+ else
+ {
+ /* TODO: Local thread synchronization.
+ Necessary for the "async2 == acc_async_sync" case, or can just skip? */
+ if (!thr->dev->openacc.async.synchronize_func (aq1))
+ gomp_fatal ("wait on %d failed", async1);
+ }
+
+ out_prof:
+ if (profiling_p)
+ {
+ thr->prof_info = NULL;
+ thr->api_info = NULL;
+ }
}
void
{
struct goacc_thread *thr = goacc_thread ();
- if (!thr || !thr->dev)
- gomp_fatal ("no device active");
+ acc_prof_info prof_info;
+ acc_api_info api_info;
+ bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
- thr->dev->openacc.async_wait_all_func ();
+ bool ret = true;
+ gomp_mutex_lock (&thr->dev->openacc.async.lock);
+ for (goacc_aq_list l = thr->dev->openacc.async.active; l; l = l->next)
+ ret &= thr->dev->openacc.async.synchronize_func (l->aq);
+ gomp_mutex_unlock (&thr->dev->openacc.async.lock);
+
+ if (profiling_p)
+ {
+ thr->prof_info = NULL;
+ thr->api_info = NULL;
+ }
+
+ if (!ret)
+ gomp_fatal ("wait all failed");
}
/* acc_async_wait_all is an OpenACC 1.0 compatibility name for acc_wait_all. */
void
acc_wait_all_async (int async)
{
- if (!async_valid_p (async))
- gomp_fatal ("invalid async argument: %d", async);
+ struct goacc_thread *thr = get_goacc_thread ();
+
+ acc_prof_info prof_info;
+ acc_api_info api_info;
+ bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
+ if (profiling_p)
+ {
+ prof_info.async = async;
+ prof_info.async_queue = prof_info.async;
+ }
+
+ goacc_aq waiting_queue = lookup_goacc_asyncqueue (thr, true, async);
+
+ bool ret = true;
+ gomp_mutex_lock (&thr->dev->openacc.async.lock);
+ for (goacc_aq_list l = thr->dev->openacc.async.active; l; l = l->next)
+ {
+ if (waiting_queue)
+ ret &= thr->dev->openacc.async.serialize_func (l->aq, waiting_queue);
+ else
+ /* TODO: Local thread synchronization.
+ Necessary for the "async2 == acc_async_sync" case, or can just skip? */
+ ret &= thr->dev->openacc.async.synchronize_func (l->aq);
+ }
+ gomp_mutex_unlock (&thr->dev->openacc.async.lock);
+
+ if (profiling_p)
+ {
+ thr->prof_info = NULL;
+ thr->api_info = NULL;
+ }
+
+ if (!ret)
+ gomp_fatal ("wait all async(%d) failed", async);
+}
+
+void
+GOACC_wait (int async, int num_waits, ...)
+{
+ goacc_lazy_initialize ();
struct goacc_thread *thr = goacc_thread ();
- if (!thr || !thr->dev)
- gomp_fatal ("no device active");
+ /* No nesting. */
+ assert (thr->prof_info == NULL);
+ assert (thr->api_info == NULL);
+ acc_prof_info prof_info;
+ acc_api_info api_info;
+ bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
+ if (profiling_p)
+ {
+ prof_info.async = async;
+ prof_info.async_queue = prof_info.async;
+ }
+
+ if (num_waits)
+ {
+ va_list ap;
+
+ va_start (ap, num_waits);
+ goacc_wait (async, num_waits, &ap);
+ va_end (ap);
+ }
+ else if (async == acc_async_sync)
+ acc_wait_all ();
+ else
+ acc_wait_all_async (async);
+
+ if (profiling_p)
+ {
+ thr->prof_info = NULL;
+ thr->api_info = NULL;
+ }
+}
+
+attribute_hidden void
+goacc_wait (int async, int num_waits, va_list *ap)
+{
+ while (num_waits--)
+ {
+ int qid = va_arg (*ap, int);
+
+ /* Waiting on ACC_ASYNC_NOVAL maps to 'wait all'. */
+ if (qid == acc_async_noval)
+ {
+ if (async == acc_async_sync)
+ acc_wait_all ();
+ else
+ acc_wait_all_async (async);
+ break;
+ }
+
+ if (acc_async_test (qid))
+ continue;
- thr->dev->openacc.async_wait_all_async_func (async);
+ if (async == acc_async_sync)
+ acc_wait (qid);
+ else if (qid == async)
+ /* If we're waiting on the same asynchronous queue as we're
+ launching on, the queue itself will order work as
+ required, so there's no need to wait explicitly. */
+ ;
+ else
+ acc_wait_async (qid, async);
+ }
+}
+
+attribute_hidden void
+goacc_async_free (struct gomp_device_descr *devicep,
+ struct goacc_asyncqueue *aq, void *ptr)
+{
+ if (!aq)
+ free (ptr);
+ else
+ devicep->openacc.async.queue_callback_func (aq, free, ptr);
+}
+
+/* This function initializes the asyncqueues for the device specified by
+ DEVICEP. TODO DEVICEP must be locked on entry, and remains locked on
+ return. */
+
+attribute_hidden void
+goacc_init_asyncqueues (struct gomp_device_descr *devicep)
+{
+ devicep->openacc.async.nasyncqueue = 0;
+ devicep->openacc.async.asyncqueue = NULL;
+ devicep->openacc.async.active = NULL;
+ gomp_mutex_init (&devicep->openacc.async.lock);
+}
+
+/* This function finalizes the asyncqueues for the device specified by DEVICEP.
+ TODO DEVICEP must be locked on entry, and remains locked on return. */
+
+attribute_hidden bool
+goacc_fini_asyncqueues (struct gomp_device_descr *devicep)
+{
+ bool ret = true;
+ gomp_mutex_lock (&devicep->openacc.async.lock);
+ if (devicep->openacc.async.nasyncqueue > 0)
+ {
+ goacc_aq_list next;
+ for (goacc_aq_list l = devicep->openacc.async.active; l; l = next)
+ {
+ ret &= devicep->openacc.async.destruct_func (l->aq);
+ next = l->next;
+ free (l);
+ }
+ free (devicep->openacc.async.asyncqueue);
+ devicep->openacc.async.nasyncqueue = 0;
+ devicep->openacc.async.asyncqueue = NULL;
+ devicep->openacc.async.active = NULL;
+ }
+ gomp_mutex_unlock (&devicep->openacc.async.lock);
+ gomp_mutex_destroy (&devicep->openacc.async.lock);
+ return ret;
}