1 /* Copyright (C) 2013-2015 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
30 #include "oacc-plugin.h"
32 #include "gomp-constants.h"
36 #ifdef HAVE_INTTYPES_H
37 # include <inttypes.h> /* For PRIu64. */
44 #include "plugin-suffix.h"
47 static void gomp_target_init (void);
49 /* The whole initialization code for offloading plugins is only run one. */
50 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
52 /* Mutex for offload image registration. */
53 static gomp_mutex_t register_lock;
55 /* This structure describes an offload image.
56 It contains type of the target device, pointer to host table descriptor, and
57 pointer to target data. */
58 struct offload_image_descr {
59 enum offload_target_type type;
60 const void *host_table;
61 const void *target_data;
64 /* Array of descriptors of offload images. */
65 static struct offload_image_descr *offload_images;
67 /* Total number of offload images. */
68 static int num_offload_images;
70 /* Array of descriptors for all available devices. */
71 static struct gomp_device_descr *devices;
73 /* Total number of available devices. */
74 static int num_devices;
76 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
77 static int num_devices_openmp;
79 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
82 gomp_realloc_unlock (void *old, size_t size)
84 void *ret = realloc (old, size);
87 gomp_mutex_unlock (®ister_lock);
88 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
93 /* The comparison function. */
96 splay_compare (splay_tree_key x, splay_tree_key y)
98 if (x->host_start == x->host_end
99 && y->host_start == y->host_end)
101 if (x->host_end <= y->host_start)
103 if (x->host_start >= y->host_end)
108 #include "splay-tree.h"
110 attribute_hidden void
111 gomp_init_targets_once (void)
113 (void) pthread_once (&gomp_is_initialized, gomp_target_init);
117 gomp_get_num_devices (void)
119 gomp_init_targets_once ();
120 return num_devices_openmp;
123 static struct gomp_device_descr *
124 resolve_device (int device_id)
126 if (device_id == GOMP_DEVICE_ICV)
128 struct gomp_task_icv *icv = gomp_icv (false);
129 device_id = icv->default_device_var;
132 if (device_id < 0 || device_id >= gomp_get_num_devices ())
135 return &devices[device_id];
139 /* Handle the case where splay_tree_lookup found oldn for newn.
140 Helper function of gomp_map_vars. */
143 gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn,
144 splay_tree_key newn, unsigned char kind)
146 if ((kind & GOMP_MAP_FLAG_FORCE)
147 || oldn->host_start > newn->host_start
148 || oldn->host_end < newn->host_end)
150 gomp_mutex_unlock (&devicep->lock);
151 gomp_fatal ("Trying to map into device [%p..%p) object when "
152 "[%p..%p) is already mapped",
153 (void *) newn->host_start, (void *) newn->host_end,
154 (void *) oldn->host_start, (void *) oldn->host_end);
160 get_kind (bool is_openacc, void *kinds, int idx)
162 return is_openacc ? ((unsigned short *) kinds)[idx]
163 : ((unsigned char *) kinds)[idx];
167 gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr,
168 uintptr_t target_offset, uintptr_t bias)
170 struct gomp_device_descr *devicep = tgt->device_descr;
171 struct splay_tree_s *mem_map = &devicep->mem_map;
172 struct splay_tree_key_s cur_node;
174 cur_node.host_start = host_ptr;
175 if (cur_node.host_start == (uintptr_t) NULL)
177 cur_node.tgt_offset = (uintptr_t) NULL;
178 /* FIXME: see comment about coalescing host/dev transfers below. */
179 devicep->host2dev_func (devicep->target_id,
180 (void *) (tgt->tgt_start + target_offset),
181 (void *) &cur_node.tgt_offset,
185 /* Add bias to the pointer value. */
186 cur_node.host_start += bias;
187 cur_node.host_end = cur_node.host_start + 1;
188 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
191 /* Could be possibly zero size array section. */
193 n = splay_tree_lookup (mem_map, &cur_node);
196 cur_node.host_start--;
197 n = splay_tree_lookup (mem_map, &cur_node);
198 cur_node.host_start++;
203 gomp_mutex_unlock (&devicep->lock);
204 gomp_fatal ("Pointer target of array section wasn't mapped");
206 cur_node.host_start -= n->host_start;
208 = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
209 /* At this point tgt_offset is target address of the
210 array section. Now subtract bias to get what we want
211 to initialize the pointer with. */
212 cur_node.tgt_offset -= bias;
213 /* FIXME: see comment about coalescing host/dev transfers below. */
214 devicep->host2dev_func (devicep->target_id,
215 (void *) (tgt->tgt_start + target_offset),
216 (void *) &cur_node.tgt_offset,
220 attribute_hidden struct target_mem_desc *
221 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
222 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
223 bool is_openacc, bool is_target)
225 size_t i, tgt_align, tgt_size, not_found_cnt = 0;
226 const int rshift = is_openacc ? 8 : 3;
227 const int typemask = is_openacc ? 0xff : 0x7;
228 struct splay_tree_s *mem_map = &devicep->mem_map;
229 struct splay_tree_key_s cur_node;
230 struct target_mem_desc *tgt
231 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
232 tgt->list_count = mapnum;
234 tgt->device_descr = devicep;
239 tgt_align = sizeof (void *);
243 size_t align = 4 * sizeof (void *);
245 tgt_size = mapnum * sizeof (void *);
248 gomp_mutex_lock (&devicep->lock);
250 for (i = 0; i < mapnum; i++)
252 int kind = get_kind (is_openacc, kinds, i);
253 if (hostaddrs[i] == NULL)
258 cur_node.host_start = (uintptr_t) hostaddrs[i];
259 if (!GOMP_MAP_POINTER_P (kind & typemask))
260 cur_node.host_end = cur_node.host_start + sizes[i];
262 cur_node.host_end = cur_node.host_start + sizeof (void *);
263 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
267 gomp_map_vars_existing (devicep, n, &cur_node, kind & typemask);
273 size_t align = (size_t) 1 << (kind >> rshift);
275 if (tgt_align < align)
277 tgt_size = (tgt_size + align - 1) & ~(align - 1);
278 tgt_size += cur_node.host_end - cur_node.host_start;
279 if ((kind & typemask) == GOMP_MAP_TO_PSET)
282 for (j = i + 1; j < mapnum; j++)
283 if (!GOMP_MAP_POINTER_P (get_kind (is_openacc, kinds, j)
286 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
287 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
288 > cur_node.host_end))
303 gomp_mutex_unlock (&devicep->lock);
304 gomp_fatal ("unexpected aggregation");
306 tgt->to_free = devaddrs[0];
307 tgt->tgt_start = (uintptr_t) tgt->to_free;
308 tgt->tgt_end = tgt->tgt_start + sizes[0];
310 else if (not_found_cnt || is_target)
312 /* Allocate tgt_align aligned tgt_size block of memory. */
313 /* FIXME: Perhaps change interface to allocate properly aligned
315 tgt->to_free = devicep->alloc_func (devicep->target_id,
316 tgt_size + tgt_align - 1);
317 tgt->tgt_start = (uintptr_t) tgt->to_free;
318 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
319 tgt->tgt_end = tgt->tgt_start + tgt_size;
330 tgt_size = mapnum * sizeof (void *);
335 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
336 splay_tree_node array = tgt->array;
339 for (i = 0; i < mapnum; i++)
340 if (tgt->list[i] == NULL)
342 int kind = get_kind (is_openacc, kinds, i);
343 if (hostaddrs[i] == NULL)
345 splay_tree_key k = &array->key;
346 k->host_start = (uintptr_t) hostaddrs[i];
347 if (!GOMP_MAP_POINTER_P (kind & typemask))
348 k->host_end = k->host_start + sizes[i];
350 k->host_end = k->host_start + sizeof (void *);
351 splay_tree_key n = splay_tree_lookup (mem_map, k);
355 gomp_map_vars_existing (devicep, n, k, kind & typemask);
359 size_t align = (size_t) 1 << (kind >> rshift);
361 tgt_size = (tgt_size + align - 1) & ~(align - 1);
363 k->tgt_offset = tgt_size;
364 tgt_size += k->host_end - k->host_start;
365 k->copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
367 k->async_refcount = 0;
371 splay_tree_insert (mem_map, array);
372 switch (kind & typemask)
376 case GOMP_MAP_FORCE_ALLOC:
377 case GOMP_MAP_FORCE_FROM:
380 case GOMP_MAP_TOFROM:
381 case GOMP_MAP_FORCE_TO:
382 case GOMP_MAP_FORCE_TOFROM:
383 /* FIXME: Perhaps add some smarts, like if copying
384 several adjacent fields from host to target, use some
385 host buffer to avoid sending each var individually. */
386 devicep->host2dev_func (devicep->target_id,
387 (void *) (tgt->tgt_start
389 (void *) k->host_start,
390 k->host_end - k->host_start);
392 case GOMP_MAP_POINTER:
393 gomp_map_pointer (tgt, (uintptr_t) *(void **) k->host_start,
394 k->tgt_offset, sizes[i]);
396 case GOMP_MAP_TO_PSET:
397 /* FIXME: see above FIXME comment. */
398 devicep->host2dev_func (devicep->target_id,
399 (void *) (tgt->tgt_start
401 (void *) k->host_start,
402 k->host_end - k->host_start);
404 for (j = i + 1; j < mapnum; j++)
405 if (!GOMP_MAP_POINTER_P (get_kind (is_openacc, kinds, j)
408 else if ((uintptr_t) hostaddrs[j] < k->host_start
409 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
416 gomp_map_pointer (tgt,
417 (uintptr_t) *(void **) hostaddrs[j],
419 + ((uintptr_t) hostaddrs[j]
425 case GOMP_MAP_FORCE_PRESENT:
427 /* We already looked up the memory region above and it
429 size_t size = k->host_end - k->host_start;
430 gomp_mutex_unlock (&devicep->lock);
431 #ifdef HAVE_INTTYPES_H
432 gomp_fatal ("present clause: !acc_is_present (%p, "
433 "%"PRIu64" (0x%"PRIx64"))",
434 (void *) k->host_start,
435 (uint64_t) size, (uint64_t) size);
437 gomp_fatal ("present clause: !acc_is_present (%p, "
438 "%lu (0x%lx))", (void *) k->host_start,
439 (unsigned long) size, (unsigned long) size);
443 case GOMP_MAP_FORCE_DEVICEPTR:
444 assert (k->host_end - k->host_start == sizeof (void *));
446 devicep->host2dev_func (devicep->target_id,
447 (void *) (tgt->tgt_start
449 (void *) k->host_start,
453 gomp_mutex_unlock (&devicep->lock);
454 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
464 for (i = 0; i < mapnum; i++)
466 if (tgt->list[i] == NULL)
467 cur_node.tgt_offset = (uintptr_t) NULL;
469 cur_node.tgt_offset = tgt->list[i]->tgt->tgt_start
470 + tgt->list[i]->tgt_offset;
471 /* FIXME: see above FIXME comment. */
472 devicep->host2dev_func (devicep->target_id,
473 (void *) (tgt->tgt_start
474 + i * sizeof (void *)),
475 (void *) &cur_node.tgt_offset,
480 gomp_mutex_unlock (&devicep->lock);
485 gomp_unmap_tgt (struct target_mem_desc *tgt)
487 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
489 tgt->device_descr->free_func (tgt->device_descr->target_id, tgt->to_free);
495 /* Decrease the refcount for a set of mapped variables, and queue asychronous
496 copies from the device back to the host after any work that has been issued.
497 Because the regions are still "live", increment an asynchronous reference
498 count to indicate that they should not be unmapped from host-side data
499 structures until the asynchronous copy has completed. */
501 attribute_hidden void
502 gomp_copy_from_async (struct target_mem_desc *tgt)
504 struct gomp_device_descr *devicep = tgt->device_descr;
507 gomp_mutex_lock (&devicep->lock);
509 for (i = 0; i < tgt->list_count; i++)
510 if (tgt->list[i] == NULL)
512 else if (tgt->list[i]->refcount > 1)
514 tgt->list[i]->refcount--;
515 tgt->list[i]->async_refcount++;
519 splay_tree_key k = tgt->list[i];
521 devicep->dev2host_func (devicep->target_id, (void *) k->host_start,
522 (void *) (k->tgt->tgt_start + k->tgt_offset),
523 k->host_end - k->host_start);
526 gomp_mutex_unlock (&devicep->lock);
529 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
530 variables back from device to host: if it is false, it is assumed that this
531 has been done already, i.e. by gomp_copy_from_async above. */
533 attribute_hidden void
534 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
536 struct gomp_device_descr *devicep = tgt->device_descr;
538 if (tgt->list_count == 0)
544 gomp_mutex_lock (&devicep->lock);
547 for (i = 0; i < tgt->list_count; i++)
548 if (tgt->list[i] == NULL)
550 else if (tgt->list[i]->refcount > 1)
551 tgt->list[i]->refcount--;
552 else if (tgt->list[i]->async_refcount > 0)
553 tgt->list[i]->async_refcount--;
556 splay_tree_key k = tgt->list[i];
557 if (k->copy_from && do_copyfrom)
558 devicep->dev2host_func (devicep->target_id, (void *) k->host_start,
559 (void *) (k->tgt->tgt_start + k->tgt_offset),
560 k->host_end - k->host_start);
561 splay_tree_remove (&devicep->mem_map, k);
562 if (k->tgt->refcount > 1)
565 gomp_unmap_tgt (k->tgt);
568 if (tgt->refcount > 1)
571 gomp_unmap_tgt (tgt);
573 gomp_mutex_unlock (&devicep->lock);
577 gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
578 size_t *sizes, void *kinds, bool is_openacc)
581 struct splay_tree_key_s cur_node;
582 const int typemask = is_openacc ? 0xff : 0x7;
590 gomp_mutex_lock (&devicep->lock);
591 for (i = 0; i < mapnum; i++)
594 cur_node.host_start = (uintptr_t) hostaddrs[i];
595 cur_node.host_end = cur_node.host_start + sizes[i];
596 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
599 int kind = get_kind (is_openacc, kinds, i);
600 if (n->host_start > cur_node.host_start
601 || n->host_end < cur_node.host_end)
603 gomp_mutex_unlock (&devicep->lock);
604 gomp_fatal ("Trying to update [%p..%p) object when "
605 "only [%p..%p) is mapped",
606 (void *) cur_node.host_start,
607 (void *) cur_node.host_end,
608 (void *) n->host_start,
609 (void *) n->host_end);
611 if (GOMP_MAP_COPY_TO_P (kind & typemask))
612 devicep->host2dev_func (devicep->target_id,
613 (void *) (n->tgt->tgt_start
615 + cur_node.host_start
617 (void *) cur_node.host_start,
618 cur_node.host_end - cur_node.host_start);
619 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
620 devicep->dev2host_func (devicep->target_id,
621 (void *) cur_node.host_start,
622 (void *) (n->tgt->tgt_start
624 + cur_node.host_start
626 cur_node.host_end - cur_node.host_start);
630 gomp_mutex_unlock (&devicep->lock);
631 gomp_fatal ("Trying to update [%p..%p) object that is not mapped",
632 (void *) cur_node.host_start,
633 (void *) cur_node.host_end);
636 gomp_mutex_unlock (&devicep->lock);
639 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
640 And insert to splay tree the mapping between addresses from HOST_TABLE and
641 from loaded target image. */
644 gomp_offload_image_to_device (struct gomp_device_descr *devicep,
645 const void *host_table, const void *target_data,
646 bool is_register_lock)
648 void **host_func_table = ((void ***) host_table)[0];
649 void **host_funcs_end = ((void ***) host_table)[1];
650 void **host_var_table = ((void ***) host_table)[2];
651 void **host_vars_end = ((void ***) host_table)[3];
653 /* The func table contains only addresses, the var table contains addresses
654 and corresponding sizes. */
655 int num_funcs = host_funcs_end - host_func_table;
656 int num_vars = (host_vars_end - host_var_table) / 2;
658 /* Load image to device and get target addresses for the image. */
659 struct addr_pair *target_table = NULL;
660 int i, num_target_entries
661 = devicep->load_image_func (devicep->target_id, target_data, &target_table);
663 if (num_target_entries != num_funcs + num_vars)
665 gomp_mutex_unlock (&devicep->lock);
666 if (is_register_lock)
667 gomp_mutex_unlock (®ister_lock);
668 gomp_fatal ("Can't map target functions or variables");
671 /* Insert host-target address mapping into splay tree. */
672 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
673 tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
680 tgt->device_descr = devicep;
681 splay_tree_node array = tgt->array;
683 for (i = 0; i < num_funcs; i++)
685 splay_tree_key k = &array->key;
686 k->host_start = (uintptr_t) host_func_table[i];
687 k->host_end = k->host_start + 1;
689 k->tgt_offset = target_table[i].start;
691 k->async_refcount = 0;
692 k->copy_from = false;
695 splay_tree_insert (&devicep->mem_map, array);
699 for (i = 0; i < num_vars; i++)
701 struct addr_pair *target_var = &target_table[num_funcs + i];
702 if (target_var->end - target_var->start
703 != (uintptr_t) host_var_table[i * 2 + 1])
705 gomp_mutex_unlock (&devicep->lock);
706 if (is_register_lock)
707 gomp_mutex_unlock (®ister_lock);
708 gomp_fatal ("Can't map target variables (size mismatch)");
711 splay_tree_key k = &array->key;
712 k->host_start = (uintptr_t) host_var_table[i * 2];
713 k->host_end = k->host_start + (uintptr_t) host_var_table[i * 2 + 1];
715 k->tgt_offset = target_var->start;
717 k->async_refcount = 0;
718 k->copy_from = false;
721 splay_tree_insert (&devicep->mem_map, array);
728 /* This function should be called from every offload image while loading.
729 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
730 the target, and TARGET_DATA needed by target plugin. */
733 GOMP_offload_register (const void *host_table,
734 enum offload_target_type target_type,
735 const void *target_data)
738 gomp_mutex_lock (®ister_lock);
740 /* Load image to all initialized devices. */
741 for (i = 0; i < num_devices; i++)
743 struct gomp_device_descr *devicep = &devices[i];
744 gomp_mutex_lock (&devicep->lock);
745 if (devicep->type == target_type && devicep->is_initialized)
746 gomp_offload_image_to_device (devicep, host_table, target_data, true);
747 gomp_mutex_unlock (&devicep->lock);
750 /* Insert image to array of pending images. */
752 = gomp_realloc_unlock (offload_images,
753 (num_offload_images + 1)
754 * sizeof (struct offload_image_descr));
755 offload_images[num_offload_images].type = target_type;
756 offload_images[num_offload_images].host_table = host_table;
757 offload_images[num_offload_images].target_data = target_data;
759 num_offload_images++;
760 gomp_mutex_unlock (®ister_lock);
763 /* This function should be called from every offload image while unloading.
764 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
765 the target, and TARGET_DATA needed by target plugin. */
768 GOMP_offload_unregister (const void *host_table,
769 enum offload_target_type target_type,
770 const void *target_data)
772 void **host_func_table = ((void ***) host_table)[0];
773 void **host_funcs_end = ((void ***) host_table)[1];
774 void **host_var_table = ((void ***) host_table)[2];
775 void **host_vars_end = ((void ***) host_table)[3];
778 /* The func table contains only addresses, the var table contains addresses
779 and corresponding sizes. */
780 int num_funcs = host_funcs_end - host_func_table;
781 int num_vars = (host_vars_end - host_var_table) / 2;
783 gomp_mutex_lock (®ister_lock);
785 /* Unload image from all initialized devices. */
786 for (i = 0; i < num_devices; i++)
789 struct gomp_device_descr *devicep = &devices[i];
790 gomp_mutex_lock (&devicep->lock);
791 if (devicep->type != target_type || !devicep->is_initialized)
793 gomp_mutex_unlock (&devicep->lock);
797 devicep->unload_image_func (devicep->target_id, target_data);
799 /* Remove mapping from splay tree. */
800 struct splay_tree_key_s k;
801 splay_tree_key node = NULL;
804 k.host_start = (uintptr_t) host_func_table[0];
805 k.host_end = k.host_start + 1;
806 node = splay_tree_lookup (&devicep->mem_map, &k);
808 else if (num_vars > 0)
810 k.host_start = (uintptr_t) host_var_table[0];
811 k.host_end = k.host_start + (uintptr_t) host_var_table[1];
812 node = splay_tree_lookup (&devicep->mem_map, &k);
815 for (j = 0; j < num_funcs; j++)
817 k.host_start = (uintptr_t) host_func_table[j];
818 k.host_end = k.host_start + 1;
819 splay_tree_remove (&devicep->mem_map, &k);
822 for (j = 0; j < num_vars; j++)
824 k.host_start = (uintptr_t) host_var_table[j * 2];
825 k.host_end = k.host_start + (uintptr_t) host_var_table[j * 2 + 1];
826 splay_tree_remove (&devicep->mem_map, &k);
835 gomp_mutex_unlock (&devicep->lock);
838 /* Remove image from array of pending images. */
839 for (i = 0; i < num_offload_images; i++)
840 if (offload_images[i].target_data == target_data)
842 offload_images[i] = offload_images[--num_offload_images];
846 gomp_mutex_unlock (®ister_lock);
849 /* This function initializes the target device, specified by DEVICEP. DEVICEP
850 must be locked on entry, and remains locked on return. */
852 attribute_hidden void
853 gomp_init_device (struct gomp_device_descr *devicep)
856 devicep->init_device_func (devicep->target_id);
858 /* Load to device all images registered by the moment. */
859 for (i = 0; i < num_offload_images; i++)
861 struct offload_image_descr *image = &offload_images[i];
862 if (image->type == devicep->type)
863 gomp_offload_image_to_device (devicep, image->host_table,
864 image->target_data, false);
867 devicep->is_initialized = true;
870 /* Free address mapping tables. MM must be locked on entry, and remains locked
873 attribute_hidden void
874 gomp_free_memmap (struct splay_tree_s *mem_map)
876 while (mem_map->root)
878 struct target_mem_desc *tgt = mem_map->root->key.tgt;
880 splay_tree_remove (mem_map, &mem_map->root->key);
886 /* This function de-initializes the target device, specified by DEVICEP.
887 DEVICEP must be locked on entry, and remains locked on return. */
889 attribute_hidden void
890 gomp_fini_device (struct gomp_device_descr *devicep)
892 if (devicep->is_initialized)
893 devicep->fini_device_func (devicep->target_id);
895 devicep->is_initialized = false;
898 /* Called when encountering a target directive. If DEVICE
899 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
900 GOMP_DEVICE_HOST_FALLBACK (or any value
901 larger than last available hw device), use host fallback.
902 FN is address of host code, UNUSED is part of the current ABI, but
903 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
904 with MAPNUM entries, with addresses of the host objects,
905 sizes of the host objects (resp. for pointer kind pointer bias
906 and assumed sizeof (void *) size) and kinds. */
909 GOMP_target (int device, void (*fn) (void *), const void *unused,
910 size_t mapnum, void **hostaddrs, size_t *sizes,
911 unsigned char *kinds)
913 struct gomp_device_descr *devicep = resolve_device (device);
916 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
919 struct gomp_thread old_thr, *thr = gomp_thread ();
921 memset (thr, '\0', sizeof (*thr));
922 if (gomp_places_list)
924 thr->place = old_thr.place;
925 thr->ts.place_partition_len = gomp_places_list_len;
928 gomp_free_thread (thr);
933 gomp_mutex_lock (&devicep->lock);
934 if (!devicep->is_initialized)
935 gomp_init_device (devicep);
936 gomp_mutex_unlock (&devicep->lock);
940 if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
941 fn_addr = (void *) fn;
944 gomp_mutex_lock (&devicep->lock);
945 struct splay_tree_key_s k;
946 k.host_start = (uintptr_t) fn;
947 k.host_end = k.host_start + 1;
948 splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
951 gomp_mutex_unlock (&devicep->lock);
952 gomp_fatal ("Target function wasn't mapped");
954 gomp_mutex_unlock (&devicep->lock);
956 fn_addr = (void *) tgt_fn->tgt_offset;
959 struct target_mem_desc *tgt_vars
960 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
962 struct gomp_thread old_thr, *thr = gomp_thread ();
964 memset (thr, '\0', sizeof (*thr));
965 if (gomp_places_list)
967 thr->place = old_thr.place;
968 thr->ts.place_partition_len = gomp_places_list_len;
970 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start);
971 gomp_free_thread (thr);
973 gomp_unmap_vars (tgt_vars, true);
977 GOMP_target_data (int device, const void *unused, size_t mapnum,
978 void **hostaddrs, size_t *sizes, unsigned char *kinds)
980 struct gomp_device_descr *devicep = resolve_device (device);
983 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
986 struct gomp_task_icv *icv = gomp_icv (false);
987 if (icv->target_data)
989 /* Even when doing a host fallback, if there are any active
990 #pragma omp target data constructs, need to remember the
991 new #pragma omp target data, otherwise GOMP_target_end_data
992 would get out of sync. */
993 struct target_mem_desc *tgt
994 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false, false);
995 tgt->prev = icv->target_data;
996 icv->target_data = tgt;
1001 gomp_mutex_lock (&devicep->lock);
1002 if (!devicep->is_initialized)
1003 gomp_init_device (devicep);
1004 gomp_mutex_unlock (&devicep->lock);
1006 struct target_mem_desc *tgt
1007 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
1009 struct gomp_task_icv *icv = gomp_icv (true);
1010 tgt->prev = icv->target_data;
1011 icv->target_data = tgt;
1015 GOMP_target_end_data (void)
1017 struct gomp_task_icv *icv = gomp_icv (false);
1018 if (icv->target_data)
1020 struct target_mem_desc *tgt = icv->target_data;
1021 icv->target_data = tgt->prev;
1022 gomp_unmap_vars (tgt, true);
1027 GOMP_target_update (int device, const void *unused, size_t mapnum,
1028 void **hostaddrs, size_t *sizes, unsigned char *kinds)
1030 struct gomp_device_descr *devicep = resolve_device (device);
1033 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
1036 gomp_mutex_lock (&devicep->lock);
1037 if (!devicep->is_initialized)
1038 gomp_init_device (devicep);
1039 gomp_mutex_unlock (&devicep->lock);
1041 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
1045 GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
1049 struct gomp_task_icv *icv = gomp_icv (true);
1050 icv->thread_limit_var
1051 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
1056 #ifdef PLUGIN_SUPPORT
1058 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
1060 The handles of the found functions are stored in the corresponding fields
1061 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
1064 gomp_load_plugin_for_device (struct gomp_device_descr *device,
1065 const char *plugin_name)
1067 const char *err = NULL, *last_missing = NULL;
1068 int optional_present, optional_total;
1070 /* Clear any existing error. */
1073 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
1080 /* Check if all required functions are available in the plugin and store
1085 device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f); \
1091 /* Similar, but missing functions are not an error. */
1092 #define DLSYM_OPT(f, n) \
1095 const char *tmp_err; \
1096 device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n); \
1097 tmp_err = dlerror (); \
1098 if (tmp_err == NULL) \
1099 optional_present++; \
1101 last_missing = #n; \
1109 DLSYM (get_num_devices);
1110 DLSYM (init_device);
1111 DLSYM (fini_device);
1113 DLSYM (unload_image);
1118 device->capabilities = device->get_caps_func ();
1119 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1121 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
1123 optional_present = optional_total = 0;
1124 DLSYM_OPT (openacc.exec, openacc_parallel);
1125 DLSYM_OPT (openacc.register_async_cleanup,
1126 openacc_register_async_cleanup);
1127 DLSYM_OPT (openacc.async_test, openacc_async_test);
1128 DLSYM_OPT (openacc.async_test_all, openacc_async_test_all);
1129 DLSYM_OPT (openacc.async_wait, openacc_async_wait);
1130 DLSYM_OPT (openacc.async_wait_async, openacc_async_wait_async);
1131 DLSYM_OPT (openacc.async_wait_all, openacc_async_wait_all);
1132 DLSYM_OPT (openacc.async_wait_all_async, openacc_async_wait_all_async);
1133 DLSYM_OPT (openacc.async_set_async, openacc_async_set_async);
1134 DLSYM_OPT (openacc.create_thread_data, openacc_create_thread_data);
1135 DLSYM_OPT (openacc.destroy_thread_data, openacc_destroy_thread_data);
1136 /* Require all the OpenACC handlers if we have
1137 GOMP_OFFLOAD_CAP_OPENACC_200. */
1138 if (optional_present != optional_total)
1140 err = "plugin missing OpenACC handler function";
1143 optional_present = optional_total = 0;
1144 DLSYM_OPT (openacc.cuda.get_current_device,
1145 openacc_get_current_cuda_device);
1146 DLSYM_OPT (openacc.cuda.get_current_context,
1147 openacc_get_current_cuda_context);
1148 DLSYM_OPT (openacc.cuda.get_stream, openacc_get_cuda_stream);
1149 DLSYM_OPT (openacc.cuda.set_stream, openacc_set_cuda_stream);
1150 /* Make sure all the CUDA functions are there if any of them are. */
1151 if (optional_present && optional_present != optional_total)
1153 err = "plugin missing OpenACC CUDA handler function";
1163 gomp_error ("while loading %s: %s", plugin_name, err);
1165 gomp_error ("missing function was %s", last_missing);
1167 dlclose (plugin_handle);
1172 /* This function initializes the runtime needed for offloading.
1173 It parses the list of offload targets and tries to load the plugins for
1174 these targets. On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
1175 will be set, and the array DEVICES initialized, containing descriptors for
1176 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
1180 gomp_target_init (void)
1182 const char *prefix ="libgomp-plugin-";
1183 const char *suffix = SONAME_SUFFIX (1);
1184 const char *cur, *next;
1186 int i, new_num_devices;
1191 cur = OFFLOAD_TARGETS;
1195 struct gomp_device_descr current_device;
1197 next = strchr (cur, ',');
1199 plugin_name = (char *) malloc (1 + (next ? next - cur : strlen (cur))
1200 + strlen (prefix) + strlen (suffix));
1207 strcpy (plugin_name, prefix);
1208 strncat (plugin_name, cur, next ? next - cur : strlen (cur));
1209 strcat (plugin_name, suffix);
1211 if (gomp_load_plugin_for_device (¤t_device, plugin_name))
1213 new_num_devices = current_device.get_num_devices_func ();
1214 if (new_num_devices >= 1)
1216 /* Augment DEVICES and NUM_DEVICES. */
1218 devices = realloc (devices, (num_devices + new_num_devices)
1219 * sizeof (struct gomp_device_descr));
1227 current_device.name = current_device.get_name_func ();
1228 /* current_device.capabilities has already been set. */
1229 current_device.type = current_device.get_type_func ();
1230 current_device.mem_map.root = NULL;
1231 current_device.is_initialized = false;
1232 current_device.openacc.data_environ = NULL;
1233 for (i = 0; i < new_num_devices; i++)
1235 current_device.target_id = i;
1236 devices[num_devices] = current_device;
1237 gomp_mutex_init (&devices[num_devices].lock);
1248 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
1249 NUM_DEVICES_OPENMP. */
1250 struct gomp_device_descr *devices_s
1251 = malloc (num_devices * sizeof (struct gomp_device_descr));
1258 num_devices_openmp = 0;
1259 for (i = 0; i < num_devices; i++)
1260 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1261 devices_s[num_devices_openmp++] = devices[i];
1262 int num_devices_after_openmp = num_devices_openmp;
1263 for (i = 0; i < num_devices; i++)
1264 if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
1265 devices_s[num_devices_after_openmp++] = devices[i];
1267 devices = devices_s;
1269 for (i = 0; i < num_devices; i++)
1271 /* The 'devices' array can be moved (by the realloc call) until we have
1272 found all the plugins, so registering with the OpenACC runtime (which
1273 takes a copy of the pointer argument) must be delayed until now. */
1274 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
1275 goacc_register (&devices[i]);
1279 #else /* PLUGIN_SUPPORT */
1280 /* If dlfcn.h is unavailable we always fallback to host execution.
1281 GOMP_target* routines are just stubs for this case. */
1283 gomp_target_init (void)
1286 #endif /* PLUGIN_SUPPORT */