1 /* OpenACC Runtime initialization routines
3 Copyright (C) 2013-2020 Free Software Foundation, Inc.
5 Contributed by Mentor Embedded.
7 This file is part of the GNU Offloading and Multi Processing Library
10 Libgomp is free software; you can redistribute it and/or modify it
11 under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
15 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
20 Under Section 7 of GPL version 3, you are granted additional
21 permissions described in the GCC Runtime Library Exception, version
22 3.1, as published by the Free Software Foundation.
24 You should have received a copy of the GNU General Public License and
25 a copy of the GCC Runtime Library Exception along with this program;
26 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
27 <http://www.gnu.org/licenses/>. */
38 /* This lock is used to protect access to cached_base_dev, dispatchers and
39 the (abstract) initialisation state of attached offloading devices. */
41 static gomp_mutex_t acc_device_lock;
43 /* A cached version of the dispatcher for the global "current" accelerator type,
44 e.g. used as the default when creating new host threads. This is the
45 device-type equivalent of goacc_device_num (which specifies which device to
46 use out of potentially several of the same type). If there are several
47 devices of a given type, this points at the first one. */
49 static struct gomp_device_descr *cached_base_dev = NULL;
51 #if defined HAVE_TLS || defined USE_EMUTLS
52 __thread struct goacc_thread *goacc_tls_data;
54 pthread_key_t goacc_tls_key;
56 static pthread_key_t goacc_cleanup_key;
58 static struct goacc_thread *goacc_threads;
59 static gomp_mutex_t goacc_thread_lock;
61 /* An array of dispatchers for device types, indexed by the type. This array
62 only references "base" devices, and other instances of the same type are
63 found by simply indexing from each such device (which are stored linearly,
64 grouped by device in target.c:devices). */
65 static struct gomp_device_descr *dispatchers[_ACC_device_hwm] = { 0 };
68 goacc_register (struct gomp_device_descr *disp)
70 /* Only register the 0th device here. */
71 if (disp->target_id != 0)
74 gomp_mutex_lock (&acc_device_lock);
76 assert (acc_device_type (disp->type) != acc_device_none
77 && acc_device_type (disp->type) != acc_device_default
78 && acc_device_type (disp->type) != acc_device_not_host);
79 assert (!dispatchers[disp->type]);
80 dispatchers[disp->type] = disp;
82 gomp_mutex_unlock (&acc_device_lock);
86 known_device_type_p (acc_device_t d)
88 return d >= 0 && d < _ACC_device_hwm;
92 unknown_device_type_error (acc_device_t invalid_type)
94 gomp_fatal ("unknown device type %u", invalid_type);
97 /* OpenACC names some things a little differently. */
100 get_openacc_name (const char *name)
102 if (strcmp (name, "gcn") == 0)
104 else if (strcmp (name, "nvptx") == 0)
111 name_of_acc_device_t (enum acc_device_t type)
115 case acc_device_none: return "none";
116 case acc_device_default: return "default";
117 case acc_device_host: return "host";
118 case acc_device_not_host: return "not_host";
119 case acc_device_nvidia: return "nvidia";
120 case acc_device_radeon: return "radeon";
121 default: unknown_device_type_error (type);
123 __builtin_unreachable ();
126 /* ACC_DEVICE_LOCK must be held before calling this function. If FAIL_IS_ERROR
127 is true, this function raises an error if there are no devices of type D,
128 otherwise it returns NULL in that case. */
130 static struct gomp_device_descr *
131 resolve_device (acc_device_t d, bool fail_is_error)
133 acc_device_t d_arg = d;
137 case acc_device_default:
139 if (goacc_device_type)
141 /* Lookup the named device. */
142 while (known_device_type_p (++d))
144 && !strcasecmp (goacc_device_type,
145 get_openacc_name (dispatchers[d]->name))
146 && dispatchers[d]->get_num_devices_func () > 0)
151 gomp_mutex_unlock (&acc_device_lock);
152 gomp_fatal ("device type %s not supported", goacc_device_type);
158 /* No default device specified, so start scanning for any non-host
159 device that is available. */
160 d = acc_device_not_host;
164 case acc_device_not_host:
165 /* Find the first available device after acc_device_not_host. */
166 while (known_device_type_p (++d))
167 if (dispatchers[d] && dispatchers[d]->get_num_devices_func () > 0)
169 if (d_arg == acc_device_default)
176 gomp_mutex_unlock (&acc_device_lock);
177 gomp_fatal ("no device found");
183 case acc_device_host:
187 if (!known_device_type_p (d))
190 goto unsupported_device;
198 assert (d != acc_device_none
199 && d != acc_device_default
200 && d != acc_device_not_host);
202 if (dispatchers[d] == NULL && fail_is_error)
205 gomp_mutex_unlock (&acc_device_lock);
206 gomp_fatal ("device type %s not supported", name_of_acc_device_t (d));
209 return dispatchers[d];
212 /* Emit a suitable error if no device of a particular type is available, or
213 the given device number is out-of-range. */
215 acc_dev_num_out_of_range (acc_device_t d, int ord, int ndevs)
218 gomp_fatal ("no devices of type %s available", name_of_acc_device_t (d));
220 gomp_fatal ("device %u out of range", ord);
223 /* This is called when plugins have been initialized, and serves to call
224 (indirectly) the target's device_init hook. Calling multiple times without
225 an intervening acc_shutdown_1 call is an error. ACC_DEVICE_LOCK must be
226 held before calling this function. */
228 static struct gomp_device_descr *
229 acc_init_1 (acc_device_t d, acc_construct_t parent_construct, int implicit)
231 bool check_not_nested_p;
234 /* In the implicit case, there should (TODO: must?) already be something
235 have been set up for an outer construct. */
236 check_not_nested_p = false;
240 check_not_nested_p = true;
241 /* TODO: should we set 'thr->prof_info' etc. in this case ('acc_init')?
242 The problem is, that we don't have 'thr' yet? (So,
243 'check_not_nested_p = true' also is pointless actually.) */
245 bool profiling_p = GOACC_PROFILING_DISPATCH_P (check_not_nested_p);
247 acc_prof_info prof_info;
250 prof_info.event_type = acc_ev_device_init_start;
251 prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
252 prof_info.version = _ACC_PROF_INFO_VERSION;
253 prof_info.device_type = d;
254 prof_info.device_number = goacc_device_num;
255 prof_info.thread_id = -1;
256 prof_info.async = acc_async_sync;
257 prof_info.async_queue = prof_info.async;
258 prof_info.src_file = NULL;
259 prof_info.func_name = NULL;
260 prof_info.line_no = -1;
261 prof_info.end_line_no = -1;
262 prof_info.func_line_no = -1;
263 prof_info.func_end_line_no = -1;
265 acc_event_info device_init_event_info;
268 device_init_event_info.other_event.event_type = prof_info.event_type;
269 device_init_event_info.other_event.valid_bytes
270 = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
271 device_init_event_info.other_event.parent_construct = parent_construct;
272 device_init_event_info.other_event.implicit = implicit;
273 device_init_event_info.other_event.tool_info = NULL;
275 acc_api_info api_info;
278 api_info.device_api = acc_device_api_none;
279 api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
280 api_info.device_type = prof_info.device_type;
281 api_info.vendor = -1;
282 api_info.device_handle = NULL;
283 api_info.context_handle = NULL;
284 api_info.async_handle = NULL;
288 goacc_profiling_dispatch (&prof_info, &device_init_event_info, &api_info);
290 struct gomp_device_descr *base_dev, *acc_dev;
293 base_dev = resolve_device (d, true);
295 ndevs = base_dev->get_num_devices_func ();
297 if (ndevs <= 0 || goacc_device_num >= ndevs)
298 acc_dev_num_out_of_range (d, goacc_device_num, ndevs);
300 acc_dev = &base_dev[goacc_device_num];
302 gomp_mutex_lock (&acc_dev->lock);
303 if (acc_dev->state == GOMP_DEVICE_INITIALIZED)
305 gomp_mutex_unlock (&acc_dev->lock);
306 gomp_fatal ("device already active");
309 gomp_init_device (acc_dev);
310 gomp_mutex_unlock (&acc_dev->lock);
314 prof_info.event_type = acc_ev_device_init_end;
315 device_init_event_info.other_event.event_type = prof_info.event_type;
316 goacc_profiling_dispatch (&prof_info, &device_init_event_info,
323 /* ACC_DEVICE_LOCK must be held before calling this function. */
326 acc_shutdown_1 (acc_device_t d)
328 struct gomp_device_descr *base_dev;
329 struct goacc_thread *walk;
331 bool devices_active = false;
333 /* Get the base device for this device type. */
334 base_dev = resolve_device (d, true);
336 ndevs = base_dev->get_num_devices_func ();
338 /* Unload all the devices of this type that have been opened. */
339 for (i = 0; i < ndevs; i++)
341 struct gomp_device_descr *acc_dev = &base_dev[i];
343 gomp_mutex_lock (&acc_dev->lock);
344 gomp_unload_device (acc_dev);
345 gomp_mutex_unlock (&acc_dev->lock);
348 gomp_mutex_lock (&goacc_thread_lock);
350 /* Free target-specific TLS data and close all devices. */
351 for (walk = goacc_threads; walk != NULL; walk = walk->next)
353 if (walk->target_tls)
354 base_dev->openacc.destroy_thread_data_func (walk->target_tls);
356 walk->target_tls = NULL;
358 /* This would mean the user is shutting down OpenACC in the middle of an
359 "acc data" pragma. Likely not intentional. */
360 if (walk->mapped_data)
362 gomp_mutex_unlock (&goacc_thread_lock);
363 gomp_fatal ("shutdown in 'acc data' region");
366 /* Similarly, if this happens then user code has done something weird. */
367 if (walk->saved_bound_dev)
369 gomp_mutex_unlock (&goacc_thread_lock);
370 gomp_fatal ("shutdown during host fallback");
375 gomp_mutex_lock (&walk->dev->lock);
377 while (walk->dev->mem_map.root)
379 splay_tree_key k = &walk->dev->mem_map.root->key;
381 k->aux->link_key = NULL;
382 gomp_remove_var (walk->dev, k);
385 gomp_mutex_unlock (&walk->dev->lock);
388 walk->base_dev = NULL;
392 gomp_mutex_unlock (&goacc_thread_lock);
394 /* Close all the devices of this type that have been opened. */
396 for (i = 0; i < ndevs; i++)
398 struct gomp_device_descr *acc_dev = &base_dev[i];
399 gomp_mutex_lock (&acc_dev->lock);
400 if (acc_dev->state == GOMP_DEVICE_INITIALIZED)
402 devices_active = true;
403 ret &= gomp_fini_device (acc_dev);
404 acc_dev->state = GOMP_DEVICE_UNINITIALIZED;
406 gomp_mutex_unlock (&acc_dev->lock);
410 gomp_fatal ("device finalization failed");
413 gomp_fatal ("no device initialized");
416 static struct goacc_thread *
417 goacc_new_thread (void)
419 struct goacc_thread *thr = gomp_malloc (sizeof (struct goacc_thread));
421 #if defined HAVE_TLS || defined USE_EMUTLS
422 goacc_tls_data = thr;
424 pthread_setspecific (goacc_tls_key, thr);
427 pthread_setspecific (goacc_cleanup_key, thr);
429 gomp_mutex_lock (&goacc_thread_lock);
430 thr->next = goacc_threads;
432 gomp_mutex_unlock (&goacc_thread_lock);
438 goacc_destroy_thread (void *data)
440 struct goacc_thread *thr = data, *walk, *prev;
442 gomp_mutex_lock (&goacc_thread_lock);
446 struct gomp_device_descr *acc_dev = thr->dev;
448 if (acc_dev && thr->target_tls)
450 acc_dev->openacc.destroy_thread_data_func (thr->target_tls);
451 thr->target_tls = NULL;
454 assert (!thr->mapped_data);
456 /* Remove from thread list. */
457 for (prev = NULL, walk = goacc_threads; walk;
458 prev = walk, walk = walk->next)
462 goacc_threads = walk->next;
464 prev->next = walk->next;
474 gomp_mutex_unlock (&goacc_thread_lock);
477 /* Use the ORD'th device instance for the current host thread (or -1 for the
478 current global default). The device (and the runtime) must be initialised
479 before calling this function. */
482 goacc_attach_host_thread_to_device (int ord)
484 struct goacc_thread *thr = goacc_thread ();
485 struct gomp_device_descr *acc_dev = NULL, *base_dev = NULL;
488 if (thr && thr->dev && (thr->dev->target_id == ord || ord < 0))
492 ord = goacc_device_num;
494 /* Decide which type of device to use. If the current thread has a device
495 type already (e.g. set by acc_set_device_type), use that, else use the
497 if (thr && thr->base_dev)
498 base_dev = thr->base_dev;
501 assert (cached_base_dev);
502 base_dev = cached_base_dev;
505 num_devices = base_dev->get_num_devices_func ();
506 if (num_devices <= 0 || ord >= num_devices)
507 acc_dev_num_out_of_range (acc_device_type (base_dev->type), ord,
511 thr = goacc_new_thread ();
513 thr->base_dev = base_dev;
514 thr->dev = acc_dev = &base_dev[ord];
515 thr->saved_bound_dev = NULL;
516 thr->mapped_data = NULL;
517 thr->prof_info = NULL;
518 thr->api_info = NULL;
519 /* Initially, all callbacks for all events are enabled. */
520 thr->prof_callbacks_enabled = true;
523 = acc_dev->openacc.create_thread_data_func (ord);
526 /* OpenACC 2.0a (3.2.12, 3.2.13) doesn't specify whether the serialization of
527 init/shutdown is per-process or per-thread. We choose per-process. */
530 acc_init (acc_device_t d)
532 if (!known_device_type_p (d))
533 unknown_device_type_error (d);
535 gomp_init_targets_once ();
537 gomp_mutex_lock (&acc_device_lock);
538 cached_base_dev = acc_init_1 (d, acc_construct_runtime_api, 0);
539 gomp_mutex_unlock (&acc_device_lock);
541 goacc_attach_host_thread_to_device (-1);
547 acc_shutdown (acc_device_t d)
549 if (!known_device_type_p (d))
550 unknown_device_type_error (d);
552 gomp_init_targets_once ();
554 gomp_mutex_lock (&acc_device_lock);
558 gomp_mutex_unlock (&acc_device_lock);
561 ialias (acc_shutdown)
564 acc_get_num_devices (acc_device_t d)
566 if (!known_device_type_p (d))
567 unknown_device_type_error (d);
570 struct gomp_device_descr *acc_dev;
572 if (d == acc_device_none)
575 gomp_init_targets_once ();
577 gomp_mutex_lock (&acc_device_lock);
578 acc_dev = resolve_device (d, false);
579 gomp_mutex_unlock (&acc_device_lock);
584 n = acc_dev->get_num_devices_func ();
591 ialias (acc_get_num_devices)
593 /* Set the device type for the current thread only (using the current global
594 default device number), initialising that device if necessary. Also set the
595 default device type for new threads to D. */
598 acc_set_device_type (acc_device_t d)
600 if (!known_device_type_p (d))
601 unknown_device_type_error (d);
603 struct gomp_device_descr *base_dev, *acc_dev;
604 struct goacc_thread *thr = goacc_thread ();
606 acc_prof_info prof_info;
607 acc_api_info api_info;
608 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
610 prof_info.device_type = d;
612 gomp_init_targets_once ();
614 gomp_mutex_lock (&acc_device_lock);
616 cached_base_dev = base_dev = resolve_device (d, true);
617 acc_dev = &base_dev[goacc_device_num];
619 gomp_mutex_lock (&acc_dev->lock);
620 if (acc_dev->state == GOMP_DEVICE_UNINITIALIZED)
621 gomp_init_device (acc_dev);
622 gomp_mutex_unlock (&acc_dev->lock);
624 gomp_mutex_unlock (&acc_device_lock);
626 /* We're changing device type: invalidate the current thread's dev and
627 base_dev pointers. */
628 if (thr && thr->base_dev != base_dev)
630 thr->base_dev = thr->dev = NULL;
631 if (thr->mapped_data)
632 gomp_fatal ("acc_set_device_type in 'acc data' region");
635 goacc_attach_host_thread_to_device (-1);
639 thr->prof_info = NULL;
640 thr->api_info = NULL;
644 ialias (acc_set_device_type)
647 acc_get_device_type (void)
649 acc_device_t res = acc_device_none;
650 struct gomp_device_descr *dev;
651 struct goacc_thread *thr = goacc_thread ();
653 if (thr && thr->base_dev)
654 res = acc_device_type (thr->base_dev->type);
657 acc_prof_info prof_info;
658 acc_api_info api_info;
659 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
661 gomp_init_targets_once ();
663 gomp_mutex_lock (&acc_device_lock);
664 dev = resolve_device (acc_device_default, true);
665 gomp_mutex_unlock (&acc_device_lock);
666 res = acc_device_type (dev->type);
670 thr->prof_info = NULL;
671 thr->api_info = NULL;
675 assert (res != acc_device_default
676 && res != acc_device_not_host
677 && res != acc_device_current);
682 ialias (acc_get_device_type)
685 acc_get_device_num (acc_device_t d)
687 if (!known_device_type_p (d))
688 unknown_device_type_error (d);
690 const struct gomp_device_descr *dev;
691 struct goacc_thread *thr = goacc_thread ();
693 acc_prof_info prof_info;
694 acc_api_info api_info;
695 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
697 prof_info.device_type = d;
699 gomp_init_targets_once ();
701 gomp_mutex_lock (&acc_device_lock);
702 dev = resolve_device (d, true);
703 gomp_mutex_unlock (&acc_device_lock);
707 thr->prof_info = NULL;
708 thr->api_info = NULL;
711 if (thr && thr->base_dev == dev && thr->dev)
712 return thr->dev->target_id;
714 return goacc_device_num;
717 ialias (acc_get_device_num)
720 acc_set_device_num (int ord, acc_device_t d)
722 if (!known_device_type_p (d))
723 unknown_device_type_error (d);
725 struct gomp_device_descr *base_dev, *acc_dev;
728 gomp_init_targets_once ();
731 ord = goacc_device_num;
734 /* Set whatever device is being used by the current host thread to use
735 device instance ORD. It's unclear if this is supposed to affect other
736 host threads too (OpenACC 2.0 (3.2.4) acc_set_device_num). */
737 goacc_attach_host_thread_to_device (ord);
740 gomp_mutex_lock (&acc_device_lock);
742 cached_base_dev = base_dev = resolve_device (d, true);
744 num_devices = base_dev->get_num_devices_func ();
746 if (num_devices <= 0 || ord >= num_devices)
747 acc_dev_num_out_of_range (d, ord, num_devices);
749 acc_dev = &base_dev[ord];
751 gomp_mutex_lock (&acc_dev->lock);
752 if (acc_dev->state == GOMP_DEVICE_UNINITIALIZED)
753 gomp_init_device (acc_dev);
754 gomp_mutex_unlock (&acc_dev->lock);
756 gomp_mutex_unlock (&acc_device_lock);
758 goacc_attach_host_thread_to_device (ord);
761 goacc_device_num = ord;
764 ialias (acc_set_device_num)
766 static union goacc_property_value
767 get_property_any (int ord, acc_device_t d, acc_device_property_t prop)
769 goacc_lazy_initialize ();
770 struct goacc_thread *thr = goacc_thread ();
772 if (d == acc_device_current && thr && thr->dev)
773 return thr->dev->openacc.get_property_func (thr->dev->target_id, prop);
775 gomp_mutex_lock (&acc_device_lock);
777 struct gomp_device_descr *dev = resolve_device (d, true);
779 int num_devices = dev->get_num_devices_func ();
781 if (num_devices <= 0 || ord >= num_devices)
782 acc_dev_num_out_of_range (d, ord, num_devices);
786 gomp_mutex_lock (&dev->lock);
787 if (dev->state == GOMP_DEVICE_UNINITIALIZED)
788 gomp_init_device (dev);
789 gomp_mutex_unlock (&dev->lock);
791 gomp_mutex_unlock (&acc_device_lock);
795 return dev->openacc.get_property_func (dev->target_id, prop);
799 acc_get_property (int ord, acc_device_t d, acc_device_property_t prop)
801 if (!known_device_type_p (d))
802 unknown_device_type_error(d);
804 if (prop & GOACC_PROPERTY_STRING_MASK)
807 return get_property_any (ord, d, prop).val;
810 ialias (acc_get_property)
813 acc_get_property_string (int ord, acc_device_t d, acc_device_property_t prop)
815 if (!known_device_type_p (d))
816 unknown_device_type_error(d);
818 if (prop & GOACC_PROPERTY_STRING_MASK)
819 return get_property_any (ord, d, prop).ptr;
824 ialias (acc_get_property_string)
826 /* For -O and higher, the compiler always attempts to expand acc_on_device, but
827 if the user disables the builtin, or calls it via a pointer, we'll need this
830 Compile this with optimization, so that the compiler expands
831 this, rather than generating infinitely recursive code.
833 The function just forwards its argument to __builtin_acc_on_device. It does
834 not verify that the argument is a valid acc_device_t enumeration value. */
836 int __attribute__ ((__optimize__ ("O2")))
837 acc_on_device (acc_device_t dev)
839 return __builtin_acc_on_device (dev);
842 ialias (acc_on_device)
844 attribute_hidden void
845 goacc_runtime_initialize (void)
847 gomp_mutex_init (&acc_device_lock);
849 #if !(defined HAVE_TLS || defined USE_EMUTLS)
850 pthread_key_create (&goacc_tls_key, NULL);
853 pthread_key_create (&goacc_cleanup_key, goacc_destroy_thread);
855 cached_base_dev = NULL;
857 goacc_threads = NULL;
858 gomp_mutex_init (&goacc_thread_lock);
860 /* Initialize and register the 'host' device type. */
864 static void __attribute__((destructor))
865 goacc_runtime_deinitialize (void)
867 #if !(defined HAVE_TLS || defined USE_EMUTLS)
868 pthread_key_delete (goacc_tls_key);
870 pthread_key_delete (goacc_cleanup_key);
873 /* Compiler helper functions */
875 attribute_hidden void
876 goacc_save_and_set_bind (acc_device_t d)
878 struct goacc_thread *thr = goacc_thread ();
880 assert (!thr->saved_bound_dev);
882 thr->saved_bound_dev = thr->dev;
883 thr->dev = dispatchers[d];
886 attribute_hidden void
887 goacc_restore_bind (void)
889 struct goacc_thread *thr = goacc_thread ();
891 thr->dev = thr->saved_bound_dev;
892 thr->saved_bound_dev = NULL;
895 /* This is called from any OpenACC support function that may need to implicitly
896 initialize the libgomp runtime, either globally or from a new host thread.
897 On exit "goacc_thread" will return a valid & populated thread block. */
899 attribute_hidden void
900 goacc_lazy_initialize (void)
902 struct goacc_thread *thr = goacc_thread ();
907 gomp_init_targets_once ();
909 gomp_mutex_lock (&acc_device_lock);
910 if (!cached_base_dev)
911 cached_base_dev = acc_init_1 (acc_device_default,
912 acc_construct_parallel, 1);
913 gomp_mutex_unlock (&acc_device_lock);
915 goacc_attach_host_thread_to_device (-1);