1 /* Copyright (C) 2013-2019 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
30 #include "oacc-plugin.h"
32 #include "gomp-constants.h"
36 #ifdef HAVE_INTTYPES_H
37 # include <inttypes.h> /* For PRIu64. */
45 #include "plugin-suffix.h"
48 static void gomp_target_init (void);
50 /* The whole initialization code for offloading plugins is only run one. */
51 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
53 /* Mutex for offload image registration. */
54 static gomp_mutex_t register_lock;
56 /* This structure describes an offload image.
57 It contains type of the target device, pointer to host table descriptor, and
58 pointer to target data. */
59 struct offload_image_descr {
61 enum offload_target_type type;
62 const void *host_table;
63 const void *target_data;
66 /* Array of descriptors of offload images. */
67 static struct offload_image_descr *offload_images;
69 /* Total number of offload images. */
70 static int num_offload_images;
72 /* Array of descriptors for all available devices. */
73 static struct gomp_device_descr *devices;
75 /* Total number of available devices. */
76 static int num_devices;
78 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
79 static int num_devices_openmp;
81 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
84 gomp_realloc_unlock (void *old, size_t size)
86 void *ret = realloc (old, size);
89 gomp_mutex_unlock (®ister_lock);
90 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
96 gomp_init_targets_once (void)
98 (void) pthread_once (&gomp_is_initialized, gomp_target_init);
102 gomp_get_num_devices (void)
104 gomp_init_targets_once ();
105 return num_devices_openmp;
108 static struct gomp_device_descr *
109 resolve_device (int device_id)
111 if (device_id == GOMP_DEVICE_ICV)
113 struct gomp_task_icv *icv = gomp_icv (false);
114 device_id = icv->default_device_var;
117 if (device_id < 0 || device_id >= gomp_get_num_devices ())
120 gomp_mutex_lock (&devices[device_id].lock);
121 if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
122 gomp_init_device (&devices[device_id]);
123 else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
125 gomp_mutex_unlock (&devices[device_id].lock);
128 gomp_mutex_unlock (&devices[device_id].lock);
130 return &devices[device_id];
134 static inline splay_tree_key
135 gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
137 if (key->host_start != key->host_end)
138 return splay_tree_lookup (mem_map, key);
141 splay_tree_key n = splay_tree_lookup (mem_map, key);
146 n = splay_tree_lookup (mem_map, key);
150 return splay_tree_lookup (mem_map, key);
153 static inline splay_tree_key
154 gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
156 if (key->host_start != key->host_end)
157 return splay_tree_lookup (mem_map, key);
160 splay_tree_key n = splay_tree_lookup (mem_map, key);
166 gomp_device_copy (struct gomp_device_descr *devicep,
167 bool (*copy_func) (int, void *, const void *, size_t),
168 const char *dst, void *dstaddr,
169 const char *src, const void *srcaddr,
172 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
174 gomp_mutex_unlock (&devicep->lock);
175 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
176 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
181 goacc_device_copy_async (struct gomp_device_descr *devicep,
182 bool (*copy_func) (int, void *, const void *, size_t,
183 struct goacc_asyncqueue *),
184 const char *dst, void *dstaddr,
185 const char *src, const void *srcaddr,
186 size_t size, struct goacc_asyncqueue *aq)
188 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq))
190 gomp_mutex_unlock (&devicep->lock);
191 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
192 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
196 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
197 host to device memory transfers. */
199 struct gomp_coalesce_chunk
201 /* The starting and ending point of a coalesced chunk of memory. */
205 struct gomp_coalesce_buf
207 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
208 it will be copied to the device. */
210 struct target_mem_desc *tgt;
211 /* Array with offsets, chunks[i].start is the starting offset and
212 chunks[i].end ending offset relative to tgt->tgt_start device address
213 of chunks which are to be copied to buf and later copied to device. */
214 struct gomp_coalesce_chunk *chunks;
215 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
218 /* During construction of chunks array, how many memory regions are within
219 the last chunk. If there is just one memory region for a chunk, we copy
220 it directly to device rather than going through buf. */
224 /* Maximum size of memory region considered for coalescing. Larger copies
225 are performed directly. */
226 #define MAX_COALESCE_BUF_SIZE (32 * 1024)
228 /* Maximum size of a gap in between regions to consider them being copied
229 within the same chunk. All the device offsets considered are within
230 newly allocated device memory, so it isn't fatal if we copy some padding
231 in between from host to device. The gaps come either from alignment
232 padding or from memory regions which are not supposed to be copied from
233 host to device (e.g. map(alloc:), map(from:) etc.). */
234 #define MAX_COALESCE_BUF_GAP (4 * 1024)
236 /* Add region with device tgt_start relative offset and length to CBUF. */
239 gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
241 if (len > MAX_COALESCE_BUF_SIZE || len == 0)
245 if (cbuf->chunk_cnt < 0)
247 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end)
249 cbuf->chunk_cnt = -1;
252 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end + MAX_COALESCE_BUF_GAP)
254 cbuf->chunks[cbuf->chunk_cnt - 1].end = start + len;
258 /* If the last chunk is only used by one mapping, discard it,
259 as it will be one host to device copy anyway and
260 memcpying it around will only waste cycles. */
261 if (cbuf->use_cnt == 1)
264 cbuf->chunks[cbuf->chunk_cnt].start = start;
265 cbuf->chunks[cbuf->chunk_cnt].end = start + len;
270 /* Return true for mapping kinds which need to copy data from the
271 host to device for regions that weren't previously mapped. */
274 gomp_to_device_kind_p (int kind)
280 case GOMP_MAP_FORCE_ALLOC:
281 case GOMP_MAP_ALWAYS_FROM:
288 attribute_hidden void
289 gomp_copy_host2dev (struct gomp_device_descr *devicep,
290 struct goacc_asyncqueue *aq,
291 void *d, const void *h, size_t sz,
292 struct gomp_coalesce_buf *cbuf)
296 uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
297 if (doff < cbuf->chunks[cbuf->chunk_cnt - 1].end)
300 long last = cbuf->chunk_cnt - 1;
301 while (first <= last)
303 long middle = (first + last) >> 1;
304 if (cbuf->chunks[middle].end <= doff)
306 else if (cbuf->chunks[middle].start <= doff)
308 if (doff + sz > cbuf->chunks[middle].end)
309 gomp_fatal ("internal libgomp cbuf error");
310 memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start),
319 if (__builtin_expect (aq != NULL, 0))
320 goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
321 "dev", d, "host", h, sz, aq);
323 gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
326 attribute_hidden void
327 gomp_copy_dev2host (struct gomp_device_descr *devicep,
328 struct goacc_asyncqueue *aq,
329 void *h, const void *d, size_t sz)
331 if (__builtin_expect (aq != NULL, 0))
332 goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func,
333 "host", h, "dev", d, sz, aq);
335 gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
339 gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
341 if (!devicep->free_func (devicep->target_id, devptr))
343 gomp_mutex_unlock (&devicep->lock);
344 gomp_fatal ("error in freeing device memory block at %p", devptr);
348 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
349 gomp_map_0len_lookup found oldn for newn.
350 Helper function of gomp_map_vars. */
353 gomp_map_vars_existing (struct gomp_device_descr *devicep,
354 struct goacc_asyncqueue *aq, splay_tree_key oldn,
355 splay_tree_key newn, struct target_var_desc *tgt_var,
356 unsigned char kind, struct gomp_coalesce_buf *cbuf)
359 tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
360 tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
361 tgt_var->offset = newn->host_start - oldn->host_start;
362 tgt_var->length = newn->host_end - newn->host_start;
364 if ((kind & GOMP_MAP_FLAG_FORCE)
365 || oldn->host_start > newn->host_start
366 || oldn->host_end < newn->host_end)
368 gomp_mutex_unlock (&devicep->lock);
369 gomp_fatal ("Trying to map into device [%p..%p) object when "
370 "[%p..%p) is already mapped",
371 (void *) newn->host_start, (void *) newn->host_end,
372 (void *) oldn->host_start, (void *) oldn->host_end);
375 if (GOMP_MAP_ALWAYS_TO_P (kind))
376 gomp_copy_host2dev (devicep, aq,
377 (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
378 + newn->host_start - oldn->host_start),
379 (void *) newn->host_start,
380 newn->host_end - newn->host_start, cbuf);
382 if (oldn->refcount != REFCOUNT_INFINITY)
387 get_kind (bool short_mapkind, void *kinds, int idx)
389 return short_mapkind ? ((unsigned short *) kinds)[idx]
390 : ((unsigned char *) kinds)[idx];
394 gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq,
395 uintptr_t host_ptr, uintptr_t target_offset, uintptr_t bias,
396 struct gomp_coalesce_buf *cbuf)
398 struct gomp_device_descr *devicep = tgt->device_descr;
399 struct splay_tree_s *mem_map = &devicep->mem_map;
400 struct splay_tree_key_s cur_node;
402 cur_node.host_start = host_ptr;
403 if (cur_node.host_start == (uintptr_t) NULL)
405 cur_node.tgt_offset = (uintptr_t) NULL;
406 gomp_copy_host2dev (devicep, aq,
407 (void *) (tgt->tgt_start + target_offset),
408 (void *) &cur_node.tgt_offset,
409 sizeof (void *), cbuf);
412 /* Add bias to the pointer value. */
413 cur_node.host_start += bias;
414 cur_node.host_end = cur_node.host_start;
415 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
418 gomp_mutex_unlock (&devicep->lock);
419 gomp_fatal ("Pointer target of array section wasn't mapped");
421 cur_node.host_start -= n->host_start;
423 = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
424 /* At this point tgt_offset is target address of the
425 array section. Now subtract bias to get what we want
426 to initialize the pointer with. */
427 cur_node.tgt_offset -= bias;
428 gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset),
429 (void *) &cur_node.tgt_offset, sizeof (void *), cbuf);
433 gomp_map_fields_existing (struct target_mem_desc *tgt,
434 struct goacc_asyncqueue *aq, splay_tree_key n,
435 size_t first, size_t i, void **hostaddrs,
436 size_t *sizes, void *kinds,
437 struct gomp_coalesce_buf *cbuf)
439 struct gomp_device_descr *devicep = tgt->device_descr;
440 struct splay_tree_s *mem_map = &devicep->mem_map;
441 struct splay_tree_key_s cur_node;
443 const bool short_mapkind = true;
444 const int typemask = short_mapkind ? 0xff : 0x7;
446 cur_node.host_start = (uintptr_t) hostaddrs[i];
447 cur_node.host_end = cur_node.host_start + sizes[i];
448 splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
449 kind = get_kind (short_mapkind, kinds, i);
452 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
454 gomp_map_vars_existing (devicep, aq, n2, &cur_node,
455 &tgt->list[i], kind & typemask, cbuf);
460 if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
462 cur_node.host_start--;
463 n2 = splay_tree_lookup (mem_map, &cur_node);
464 cur_node.host_start++;
467 && n2->host_start - n->host_start
468 == n2->tgt_offset - n->tgt_offset)
470 gomp_map_vars_existing (devicep, aq, n2, &cur_node,
471 &tgt->list[i], kind & typemask, cbuf);
476 n2 = splay_tree_lookup (mem_map, &cur_node);
480 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
482 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
483 kind & typemask, cbuf);
487 gomp_mutex_unlock (&devicep->lock);
488 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
489 "other mapped elements from the same structure weren't mapped "
490 "together with it", (void *) cur_node.host_start,
491 (void *) cur_node.host_end);
494 static inline uintptr_t
495 gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
497 if (tgt->list[i].key != NULL)
498 return tgt->list[i].key->tgt->tgt_start
499 + tgt->list[i].key->tgt_offset
500 + tgt->list[i].offset;
501 if (tgt->list[i].offset == ~(uintptr_t) 0)
502 return (uintptr_t) hostaddrs[i];
503 if (tgt->list[i].offset == ~(uintptr_t) 1)
505 if (tgt->list[i].offset == ~(uintptr_t) 2)
506 return tgt->list[i + 1].key->tgt->tgt_start
507 + tgt->list[i + 1].key->tgt_offset
508 + tgt->list[i + 1].offset
509 + (uintptr_t) hostaddrs[i]
510 - (uintptr_t) hostaddrs[i + 1];
511 return tgt->tgt_start + tgt->list[i].offset;
514 static inline __attribute__((always_inline)) struct target_mem_desc *
515 gomp_map_vars_internal (struct gomp_device_descr *devicep,
516 struct goacc_asyncqueue *aq, size_t mapnum,
517 void **hostaddrs, void **devaddrs, size_t *sizes,
518 void *kinds, bool short_mapkind,
519 enum gomp_map_vars_kind pragma_kind)
521 size_t i, tgt_align, tgt_size, not_found_cnt = 0;
522 bool has_firstprivate = false;
523 const int rshift = short_mapkind ? 8 : 3;
524 const int typemask = short_mapkind ? 0xff : 0x7;
525 struct splay_tree_s *mem_map = &devicep->mem_map;
526 struct splay_tree_key_s cur_node;
527 struct target_mem_desc *tgt
528 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
529 tgt->list_count = mapnum;
530 tgt->refcount = pragma_kind == GOMP_MAP_VARS_ENTER_DATA ? 0 : 1;
531 tgt->device_descr = devicep;
532 struct gomp_coalesce_buf cbuf, *cbufp = NULL;
541 tgt_align = sizeof (void *);
547 if (mapnum > 1 || pragma_kind == GOMP_MAP_VARS_TARGET)
549 size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk);
550 cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size);
553 if (pragma_kind == GOMP_MAP_VARS_TARGET)
555 size_t align = 4 * sizeof (void *);
557 tgt_size = mapnum * sizeof (void *);
559 cbuf.use_cnt = 1 + (mapnum > 1);
560 cbuf.chunks[0].start = 0;
561 cbuf.chunks[0].end = tgt_size;
564 gomp_mutex_lock (&devicep->lock);
565 if (devicep->state == GOMP_DEVICE_FINALIZED)
567 gomp_mutex_unlock (&devicep->lock);
572 for (i = 0; i < mapnum; i++)
574 int kind = get_kind (short_mapkind, kinds, i);
575 if (hostaddrs[i] == NULL
576 || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
578 tgt->list[i].key = NULL;
579 tgt->list[i].offset = ~(uintptr_t) 0;
582 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
584 cur_node.host_start = (uintptr_t) hostaddrs[i];
585 cur_node.host_end = cur_node.host_start;
586 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
589 gomp_mutex_unlock (&devicep->lock);
590 gomp_fatal ("use_device_ptr pointer wasn't mapped");
592 cur_node.host_start -= n->host_start;
594 = (void *) (n->tgt->tgt_start + n->tgt_offset
595 + cur_node.host_start);
596 tgt->list[i].key = NULL;
597 tgt->list[i].offset = ~(uintptr_t) 0;
600 else if ((kind & typemask) == GOMP_MAP_STRUCT)
602 size_t first = i + 1;
603 size_t last = i + sizes[i];
604 cur_node.host_start = (uintptr_t) hostaddrs[i];
605 cur_node.host_end = (uintptr_t) hostaddrs[last]
607 tgt->list[i].key = NULL;
608 tgt->list[i].offset = ~(uintptr_t) 2;
609 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
612 size_t align = (size_t) 1 << (kind >> rshift);
613 if (tgt_align < align)
615 tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start;
616 tgt_size = (tgt_size + align - 1) & ~(align - 1);
617 tgt_size += cur_node.host_end - cur_node.host_start;
618 not_found_cnt += last - i;
619 for (i = first; i <= last; i++)
621 tgt->list[i].key = NULL;
622 if (gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i)
624 gomp_coalesce_buf_add (&cbuf,
625 tgt_size - cur_node.host_end
626 + (uintptr_t) hostaddrs[i],
632 for (i = first; i <= last; i++)
633 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
638 else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
640 tgt->list[i].key = NULL;
641 tgt->list[i].offset = ~(uintptr_t) 1;
642 has_firstprivate = true;
645 cur_node.host_start = (uintptr_t) hostaddrs[i];
646 if (!GOMP_MAP_POINTER_P (kind & typemask))
647 cur_node.host_end = cur_node.host_start + sizes[i];
649 cur_node.host_end = cur_node.host_start + sizeof (void *);
650 if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
652 tgt->list[i].key = NULL;
654 size_t align = (size_t) 1 << (kind >> rshift);
655 if (tgt_align < align)
657 tgt_size = (tgt_size + align - 1) & ~(align - 1);
658 gomp_coalesce_buf_add (&cbuf, tgt_size,
659 cur_node.host_end - cur_node.host_start);
660 tgt_size += cur_node.host_end - cur_node.host_start;
661 has_firstprivate = true;
665 if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
667 n = gomp_map_0len_lookup (mem_map, &cur_node);
670 tgt->list[i].key = NULL;
671 tgt->list[i].offset = ~(uintptr_t) 1;
676 n = splay_tree_lookup (mem_map, &cur_node);
677 if (n && n->refcount != REFCOUNT_LINK)
678 gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i],
679 kind & typemask, NULL);
682 tgt->list[i].key = NULL;
684 size_t align = (size_t) 1 << (kind >> rshift);
686 if (tgt_align < align)
688 tgt_size = (tgt_size + align - 1) & ~(align - 1);
689 if (gomp_to_device_kind_p (kind & typemask))
690 gomp_coalesce_buf_add (&cbuf, tgt_size,
691 cur_node.host_end - cur_node.host_start);
692 tgt_size += cur_node.host_end - cur_node.host_start;
693 if ((kind & typemask) == GOMP_MAP_TO_PSET)
696 for (j = i + 1; j < mapnum; j++)
697 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds, j)
700 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
701 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
702 > cur_node.host_end))
706 tgt->list[j].key = NULL;
717 gomp_mutex_unlock (&devicep->lock);
718 gomp_fatal ("unexpected aggregation");
720 tgt->to_free = devaddrs[0];
721 tgt->tgt_start = (uintptr_t) tgt->to_free;
722 tgt->tgt_end = tgt->tgt_start + sizes[0];
724 else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET)
726 /* Allocate tgt_align aligned tgt_size block of memory. */
727 /* FIXME: Perhaps change interface to allocate properly aligned
729 tgt->to_free = devicep->alloc_func (devicep->target_id,
730 tgt_size + tgt_align - 1);
733 gomp_mutex_unlock (&devicep->lock);
734 gomp_fatal ("device memory allocation fail");
737 tgt->tgt_start = (uintptr_t) tgt->to_free;
738 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
739 tgt->tgt_end = tgt->tgt_start + tgt_size;
741 if (cbuf.use_cnt == 1)
743 if (cbuf.chunk_cnt > 0)
746 = malloc (cbuf.chunks[cbuf.chunk_cnt - 1].end - cbuf.chunks[0].start);
762 if (pragma_kind == GOMP_MAP_VARS_TARGET)
763 tgt_size = mapnum * sizeof (void *);
766 if (not_found_cnt || has_firstprivate)
769 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
770 splay_tree_node array = tgt->array;
771 size_t j, field_tgt_offset = 0, field_tgt_clear = ~(size_t) 0;
772 uintptr_t field_tgt_base = 0;
774 for (i = 0; i < mapnum; i++)
775 if (tgt->list[i].key == NULL)
777 int kind = get_kind (short_mapkind, kinds, i);
778 if (hostaddrs[i] == NULL)
780 switch (kind & typemask)
782 size_t align, len, first, last;
784 case GOMP_MAP_FIRSTPRIVATE:
785 align = (size_t) 1 << (kind >> rshift);
786 tgt_size = (tgt_size + align - 1) & ~(align - 1);
787 tgt->list[i].offset = tgt_size;
789 gomp_copy_host2dev (devicep, aq,
790 (void *) (tgt->tgt_start + tgt_size),
791 (void *) hostaddrs[i], len, cbufp);
794 case GOMP_MAP_FIRSTPRIVATE_INT:
795 case GOMP_MAP_USE_DEVICE_PTR:
796 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
798 case GOMP_MAP_STRUCT:
801 cur_node.host_start = (uintptr_t) hostaddrs[i];
802 cur_node.host_end = (uintptr_t) hostaddrs[last]
804 if (tgt->list[first].key != NULL)
806 n = splay_tree_lookup (mem_map, &cur_node);
809 size_t align = (size_t) 1 << (kind >> rshift);
810 tgt_size -= (uintptr_t) hostaddrs[first]
811 - (uintptr_t) hostaddrs[i];
812 tgt_size = (tgt_size + align - 1) & ~(align - 1);
813 tgt_size += (uintptr_t) hostaddrs[first]
814 - (uintptr_t) hostaddrs[i];
815 field_tgt_base = (uintptr_t) hostaddrs[first];
816 field_tgt_offset = tgt_size;
817 field_tgt_clear = last;
818 tgt_size += cur_node.host_end
819 - (uintptr_t) hostaddrs[first];
822 for (i = first; i <= last; i++)
823 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
824 sizes, kinds, cbufp);
827 case GOMP_MAP_ALWAYS_POINTER:
828 cur_node.host_start = (uintptr_t) hostaddrs[i];
829 cur_node.host_end = cur_node.host_start + sizeof (void *);
830 n = splay_tree_lookup (mem_map, &cur_node);
832 || n->host_start > cur_node.host_start
833 || n->host_end < cur_node.host_end)
835 gomp_mutex_unlock (&devicep->lock);
836 gomp_fatal ("always pointer not mapped");
838 if ((get_kind (short_mapkind, kinds, i - 1) & typemask)
839 != GOMP_MAP_ALWAYS_POINTER)
840 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
841 if (cur_node.tgt_offset)
842 cur_node.tgt_offset -= sizes[i];
843 gomp_copy_host2dev (devicep, aq,
844 (void *) (n->tgt->tgt_start
846 + cur_node.host_start
848 (void *) &cur_node.tgt_offset,
849 sizeof (void *), cbufp);
850 cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
851 + cur_node.host_start - n->host_start;
856 splay_tree_key k = &array->key;
857 k->host_start = (uintptr_t) hostaddrs[i];
858 if (!GOMP_MAP_POINTER_P (kind & typemask))
859 k->host_end = k->host_start + sizes[i];
861 k->host_end = k->host_start + sizeof (void *);
862 splay_tree_key n = splay_tree_lookup (mem_map, k);
863 if (n && n->refcount != REFCOUNT_LINK)
864 gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i],
865 kind & typemask, cbufp);
869 if (n && n->refcount == REFCOUNT_LINK)
871 /* Replace target address of the pointer with target address
872 of mapped object in the splay tree. */
873 splay_tree_remove (mem_map, n);
876 size_t align = (size_t) 1 << (kind >> rshift);
877 tgt->list[i].key = k;
879 if (field_tgt_clear != ~(size_t) 0)
881 k->tgt_offset = k->host_start - field_tgt_base
883 if (i == field_tgt_clear)
884 field_tgt_clear = ~(size_t) 0;
888 tgt_size = (tgt_size + align - 1) & ~(align - 1);
889 k->tgt_offset = tgt_size;
890 tgt_size += k->host_end - k->host_start;
892 tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
893 tgt->list[i].always_copy_from
894 = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
895 tgt->list[i].offset = 0;
896 tgt->list[i].length = k->host_end - k->host_start;
898 k->dynamic_refcount = 0;
902 splay_tree_insert (mem_map, array);
903 switch (kind & typemask)
907 case GOMP_MAP_FORCE_ALLOC:
908 case GOMP_MAP_FORCE_FROM:
909 case GOMP_MAP_ALWAYS_FROM:
912 case GOMP_MAP_TOFROM:
913 case GOMP_MAP_FORCE_TO:
914 case GOMP_MAP_FORCE_TOFROM:
915 case GOMP_MAP_ALWAYS_TO:
916 case GOMP_MAP_ALWAYS_TOFROM:
917 gomp_copy_host2dev (devicep, aq,
918 (void *) (tgt->tgt_start
920 (void *) k->host_start,
921 k->host_end - k->host_start, cbufp);
923 case GOMP_MAP_POINTER:
924 gomp_map_pointer (tgt, aq,
925 (uintptr_t) *(void **) k->host_start,
926 k->tgt_offset, sizes[i], cbufp);
928 case GOMP_MAP_TO_PSET:
929 gomp_copy_host2dev (devicep, aq,
930 (void *) (tgt->tgt_start
932 (void *) k->host_start,
933 k->host_end - k->host_start, cbufp);
935 for (j = i + 1; j < mapnum; j++)
936 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds,
940 else if ((uintptr_t) hostaddrs[j] < k->host_start
941 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
946 tgt->list[j].key = k;
947 tgt->list[j].copy_from = false;
948 tgt->list[j].always_copy_from = false;
949 if (k->refcount != REFCOUNT_INFINITY)
951 gomp_map_pointer (tgt, aq,
952 (uintptr_t) *(void **) hostaddrs[j],
954 + ((uintptr_t) hostaddrs[j]
960 case GOMP_MAP_FORCE_PRESENT:
962 /* We already looked up the memory region above and it
964 size_t size = k->host_end - k->host_start;
965 gomp_mutex_unlock (&devicep->lock);
966 #ifdef HAVE_INTTYPES_H
967 gomp_fatal ("present clause: !acc_is_present (%p, "
968 "%"PRIu64" (0x%"PRIx64"))",
969 (void *) k->host_start,
970 (uint64_t) size, (uint64_t) size);
972 gomp_fatal ("present clause: !acc_is_present (%p, "
973 "%lu (0x%lx))", (void *) k->host_start,
974 (unsigned long) size, (unsigned long) size);
978 case GOMP_MAP_FORCE_DEVICEPTR:
979 assert (k->host_end - k->host_start == sizeof (void *));
980 gomp_copy_host2dev (devicep, aq,
981 (void *) (tgt->tgt_start
983 (void *) k->host_start,
984 sizeof (void *), cbufp);
987 gomp_mutex_unlock (&devicep->lock);
988 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
994 /* Set link pointer on target to the device address of the
996 void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
997 /* We intentionally do not use coalescing here, as it's not
998 data allocated by the current call to this function. */
999 gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
1000 &tgt_addr, sizeof (void *), NULL);
1007 if (pragma_kind == GOMP_MAP_VARS_TARGET)
1009 for (i = 0; i < mapnum; i++)
1011 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
1012 gomp_copy_host2dev (devicep, aq,
1013 (void *) (tgt->tgt_start + i * sizeof (void *)),
1014 (void *) &cur_node.tgt_offset, sizeof (void *),
1022 for (c = 0; c < cbuf.chunk_cnt; ++c)
1023 gomp_copy_host2dev (devicep, aq,
1024 (void *) (tgt->tgt_start + cbuf.chunks[c].start),
1025 (char *) cbuf.buf + (cbuf.chunks[c].start
1026 - cbuf.chunks[0].start),
1027 cbuf.chunks[c].end - cbuf.chunks[c].start, NULL);
1033 /* If the variable from "omp target enter data" map-list was already mapped,
1034 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
1036 if (pragma_kind == GOMP_MAP_VARS_ENTER_DATA && tgt->refcount == 0)
1042 gomp_mutex_unlock (&devicep->lock);
1046 attribute_hidden struct target_mem_desc *
1047 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
1048 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
1049 bool short_mapkind, enum gomp_map_vars_kind pragma_kind)
1051 return gomp_map_vars_internal (devicep, NULL, mapnum, hostaddrs, devaddrs,
1052 sizes, kinds, short_mapkind, pragma_kind);
1055 attribute_hidden struct target_mem_desc *
1056 gomp_map_vars_async (struct gomp_device_descr *devicep,
1057 struct goacc_asyncqueue *aq, size_t mapnum,
1058 void **hostaddrs, void **devaddrs, size_t *sizes,
1059 void *kinds, bool short_mapkind,
1060 enum gomp_map_vars_kind pragma_kind)
1062 return gomp_map_vars_internal (devicep, aq, mapnum, hostaddrs, devaddrs,
1063 sizes, kinds, short_mapkind, pragma_kind);
1066 attribute_hidden void
1067 gomp_unmap_tgt (struct target_mem_desc *tgt)
1069 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1071 gomp_free_device_memory (tgt->device_descr, tgt->to_free);
1077 attribute_hidden bool
1078 gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
1080 bool is_tgt_unmapped = false;
1081 splay_tree_remove (&devicep->mem_map, k);
1083 splay_tree_insert (&devicep->mem_map, (splay_tree_node) k->link_key);
1084 if (k->tgt->refcount > 1)
1088 is_tgt_unmapped = true;
1089 gomp_unmap_tgt (k->tgt);
1091 return is_tgt_unmapped;
1095 gomp_unref_tgt (void *ptr)
1097 struct target_mem_desc *tgt = (struct target_mem_desc *) ptr;
1099 if (tgt->refcount > 1)
1102 gomp_unmap_tgt (tgt);
1105 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
1106 variables back from device to host: if it is false, it is assumed that this
1107 has been done already. */
1109 static inline __attribute__((always_inline)) void
1110 gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom,
1111 struct goacc_asyncqueue *aq)
1113 struct gomp_device_descr *devicep = tgt->device_descr;
1115 if (tgt->list_count == 0)
1121 gomp_mutex_lock (&devicep->lock);
1122 if (devicep->state == GOMP_DEVICE_FINALIZED)
1124 gomp_mutex_unlock (&devicep->lock);
1131 for (i = 0; i < tgt->list_count; i++)
1133 splay_tree_key k = tgt->list[i].key;
1137 bool do_unmap = false;
1138 if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY)
1140 else if (k->refcount == 1)
1146 if ((do_unmap && do_copyfrom && tgt->list[i].copy_from)
1147 || tgt->list[i].always_copy_from)
1148 gomp_copy_dev2host (devicep, aq,
1149 (void *) (k->host_start + tgt->list[i].offset),
1150 (void *) (k->tgt->tgt_start + k->tgt_offset
1151 + tgt->list[i].offset),
1152 tgt->list[i].length);
1154 gomp_remove_var (devicep, k);
1158 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt,
1161 gomp_unref_tgt ((void *) tgt);
1163 gomp_mutex_unlock (&devicep->lock);
1166 attribute_hidden void
1167 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
1169 gomp_unmap_vars_internal (tgt, do_copyfrom, NULL);
1172 attribute_hidden void
1173 gomp_unmap_vars_async (struct target_mem_desc *tgt, bool do_copyfrom,
1174 struct goacc_asyncqueue *aq)
1176 gomp_unmap_vars_internal (tgt, do_copyfrom, aq);
1180 gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
1181 size_t *sizes, void *kinds, bool short_mapkind)
1184 struct splay_tree_key_s cur_node;
1185 const int typemask = short_mapkind ? 0xff : 0x7;
1193 gomp_mutex_lock (&devicep->lock);
1194 if (devicep->state == GOMP_DEVICE_FINALIZED)
1196 gomp_mutex_unlock (&devicep->lock);
1200 for (i = 0; i < mapnum; i++)
1203 cur_node.host_start = (uintptr_t) hostaddrs[i];
1204 cur_node.host_end = cur_node.host_start + sizes[i];
1205 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
1208 int kind = get_kind (short_mapkind, kinds, i);
1209 if (n->host_start > cur_node.host_start
1210 || n->host_end < cur_node.host_end)
1212 gomp_mutex_unlock (&devicep->lock);
1213 gomp_fatal ("Trying to update [%p..%p) object when "
1214 "only [%p..%p) is mapped",
1215 (void *) cur_node.host_start,
1216 (void *) cur_node.host_end,
1217 (void *) n->host_start,
1218 (void *) n->host_end);
1222 void *hostaddr = (void *) cur_node.host_start;
1223 void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
1224 + cur_node.host_start - n->host_start);
1225 size_t size = cur_node.host_end - cur_node.host_start;
1227 if (GOMP_MAP_COPY_TO_P (kind & typemask))
1228 gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size,
1230 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
1231 gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size);
1234 gomp_mutex_unlock (&devicep->lock);
1237 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
1238 And insert to splay tree the mapping between addresses from HOST_TABLE and
1239 from loaded target image. We rely in the host and device compiler
1240 emitting variable and functions in the same order. */
1243 gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
1244 const void *host_table, const void *target_data,
1245 bool is_register_lock)
1247 void **host_func_table = ((void ***) host_table)[0];
1248 void **host_funcs_end = ((void ***) host_table)[1];
1249 void **host_var_table = ((void ***) host_table)[2];
1250 void **host_vars_end = ((void ***) host_table)[3];
1252 /* The func table contains only addresses, the var table contains addresses
1253 and corresponding sizes. */
1254 int num_funcs = host_funcs_end - host_func_table;
1255 int num_vars = (host_vars_end - host_var_table) / 2;
1257 /* Load image to device and get target addresses for the image. */
1258 struct addr_pair *target_table = NULL;
1259 int i, num_target_entries;
1262 = devicep->load_image_func (devicep->target_id, version,
1263 target_data, &target_table);
1265 if (num_target_entries != num_funcs + num_vars)
1267 gomp_mutex_unlock (&devicep->lock);
1268 if (is_register_lock)
1269 gomp_mutex_unlock (®ister_lock);
1270 gomp_fatal ("Cannot map target functions or variables"
1271 " (expected %u, have %u)", num_funcs + num_vars,
1272 num_target_entries);
1275 /* Insert host-target address mapping into splay tree. */
1276 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
1277 tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
1278 tgt->refcount = REFCOUNT_INFINITY;
1281 tgt->to_free = NULL;
1283 tgt->list_count = 0;
1284 tgt->device_descr = devicep;
1285 splay_tree_node array = tgt->array;
1287 for (i = 0; i < num_funcs; i++)
1289 splay_tree_key k = &array->key;
1290 k->host_start = (uintptr_t) host_func_table[i];
1291 k->host_end = k->host_start + 1;
1293 k->tgt_offset = target_table[i].start;
1294 k->refcount = REFCOUNT_INFINITY;
1297 array->right = NULL;
1298 splay_tree_insert (&devicep->mem_map, array);
1302 /* Most significant bit of the size in host and target tables marks
1303 "omp declare target link" variables. */
1304 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1305 const uintptr_t size_mask = ~link_bit;
1307 for (i = 0; i < num_vars; i++)
1309 struct addr_pair *target_var = &target_table[num_funcs + i];
1310 uintptr_t target_size = target_var->end - target_var->start;
1312 if ((uintptr_t) host_var_table[i * 2 + 1] != target_size)
1314 gomp_mutex_unlock (&devicep->lock);
1315 if (is_register_lock)
1316 gomp_mutex_unlock (®ister_lock);
1317 gomp_fatal ("Cannot map target variables (size mismatch)");
1320 splay_tree_key k = &array->key;
1321 k->host_start = (uintptr_t) host_var_table[i * 2];
1323 = k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
1325 k->tgt_offset = target_var->start;
1326 k->refcount = target_size & link_bit ? REFCOUNT_LINK : REFCOUNT_INFINITY;
1329 array->right = NULL;
1330 splay_tree_insert (&devicep->mem_map, array);
1334 free (target_table);
1337 /* Unload the mappings described by target_data from device DEVICE_P.
1338 The device must be locked. */
1341 gomp_unload_image_from_device (struct gomp_device_descr *devicep,
1343 const void *host_table, const void *target_data)
1345 void **host_func_table = ((void ***) host_table)[0];
1346 void **host_funcs_end = ((void ***) host_table)[1];
1347 void **host_var_table = ((void ***) host_table)[2];
1348 void **host_vars_end = ((void ***) host_table)[3];
1350 /* The func table contains only addresses, the var table contains addresses
1351 and corresponding sizes. */
1352 int num_funcs = host_funcs_end - host_func_table;
1353 int num_vars = (host_vars_end - host_var_table) / 2;
1355 struct splay_tree_key_s k;
1356 splay_tree_key node = NULL;
1358 /* Find mapping at start of node array */
1359 if (num_funcs || num_vars)
1361 k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
1362 : (uintptr_t) host_var_table[0]);
1363 k.host_end = k.host_start + 1;
1364 node = splay_tree_lookup (&devicep->mem_map, &k);
1367 if (!devicep->unload_image_func (devicep->target_id, version, target_data))
1369 gomp_mutex_unlock (&devicep->lock);
1370 gomp_fatal ("image unload fail");
1373 /* Remove mappings from splay tree. */
1375 for (i = 0; i < num_funcs; i++)
1377 k.host_start = (uintptr_t) host_func_table[i];
1378 k.host_end = k.host_start + 1;
1379 splay_tree_remove (&devicep->mem_map, &k);
1382 /* Most significant bit of the size in host and target tables marks
1383 "omp declare target link" variables. */
1384 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1385 const uintptr_t size_mask = ~link_bit;
1386 bool is_tgt_unmapped = false;
1388 for (i = 0; i < num_vars; i++)
1390 k.host_start = (uintptr_t) host_var_table[i * 2];
1392 = k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
1394 if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
1395 splay_tree_remove (&devicep->mem_map, &k);
1398 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
1399 is_tgt_unmapped = gomp_remove_var (devicep, n);
1403 if (node && !is_tgt_unmapped)
1410 /* This function should be called from every offload image while loading.
1411 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1412 the target, and TARGET_DATA needed by target plugin. */
1415 GOMP_offload_register_ver (unsigned version, const void *host_table,
1416 int target_type, const void *target_data)
1420 if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
1421 gomp_fatal ("Library too old for offload (version %u < %u)",
1422 GOMP_VERSION, GOMP_VERSION_LIB (version));
1424 gomp_mutex_lock (®ister_lock);
1426 /* Load image to all initialized devices. */
1427 for (i = 0; i < num_devices; i++)
1429 struct gomp_device_descr *devicep = &devices[i];
1430 gomp_mutex_lock (&devicep->lock);
1431 if (devicep->type == target_type
1432 && devicep->state == GOMP_DEVICE_INITIALIZED)
1433 gomp_load_image_to_device (devicep, version,
1434 host_table, target_data, true);
1435 gomp_mutex_unlock (&devicep->lock);
1438 /* Insert image to array of pending images. */
1440 = gomp_realloc_unlock (offload_images,
1441 (num_offload_images + 1)
1442 * sizeof (struct offload_image_descr));
1443 offload_images[num_offload_images].version = version;
1444 offload_images[num_offload_images].type = target_type;
1445 offload_images[num_offload_images].host_table = host_table;
1446 offload_images[num_offload_images].target_data = target_data;
1448 num_offload_images++;
1449 gomp_mutex_unlock (®ister_lock);
1453 GOMP_offload_register (const void *host_table, int target_type,
1454 const void *target_data)
1456 GOMP_offload_register_ver (0, host_table, target_type, target_data);
1459 /* This function should be called from every offload image while unloading.
1460 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1461 the target, and TARGET_DATA needed by target plugin. */
1464 GOMP_offload_unregister_ver (unsigned version, const void *host_table,
1465 int target_type, const void *target_data)
1469 gomp_mutex_lock (®ister_lock);
1471 /* Unload image from all initialized devices. */
1472 for (i = 0; i < num_devices; i++)
1474 struct gomp_device_descr *devicep = &devices[i];
1475 gomp_mutex_lock (&devicep->lock);
1476 if (devicep->type == target_type
1477 && devicep->state == GOMP_DEVICE_INITIALIZED)
1478 gomp_unload_image_from_device (devicep, version,
1479 host_table, target_data);
1480 gomp_mutex_unlock (&devicep->lock);
1483 /* Remove image from array of pending images. */
1484 for (i = 0; i < num_offload_images; i++)
1485 if (offload_images[i].target_data == target_data)
1487 offload_images[i] = offload_images[--num_offload_images];
1491 gomp_mutex_unlock (®ister_lock);
1495 GOMP_offload_unregister (const void *host_table, int target_type,
1496 const void *target_data)
1498 GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
1501 /* This function initializes the target device, specified by DEVICEP. DEVICEP
1502 must be locked on entry, and remains locked on return. */
1504 attribute_hidden void
1505 gomp_init_device (struct gomp_device_descr *devicep)
1508 if (!devicep->init_device_func (devicep->target_id))
1510 gomp_mutex_unlock (&devicep->lock);
1511 gomp_fatal ("device initialization failed");
1514 /* Load to device all images registered by the moment. */
1515 for (i = 0; i < num_offload_images; i++)
1517 struct offload_image_descr *image = &offload_images[i];
1518 if (image->type == devicep->type)
1519 gomp_load_image_to_device (devicep, image->version,
1520 image->host_table, image->target_data,
1524 /* Initialize OpenACC asynchronous queues. */
1525 goacc_init_asyncqueues (devicep);
1527 devicep->state = GOMP_DEVICE_INITIALIZED;
1530 /* This function finalizes the target device, specified by DEVICEP. DEVICEP
1531 must be locked on entry, and remains locked on return. */
1533 attribute_hidden bool
1534 gomp_fini_device (struct gomp_device_descr *devicep)
1536 bool ret = goacc_fini_asyncqueues (devicep);
1537 ret &= devicep->fini_device_func (devicep->target_id);
1538 devicep->state = GOMP_DEVICE_FINALIZED;
1542 attribute_hidden void
1543 gomp_unload_device (struct gomp_device_descr *devicep)
1545 if (devicep->state == GOMP_DEVICE_INITIALIZED)
1549 /* Unload from device all images registered at the moment. */
1550 for (i = 0; i < num_offload_images; i++)
1552 struct offload_image_descr *image = &offload_images[i];
1553 if (image->type == devicep->type)
1554 gomp_unload_image_from_device (devicep, image->version,
1556 image->target_data);
1561 /* Free address mapping tables. MM must be locked on entry, and remains locked
1564 attribute_hidden void
1565 gomp_free_memmap (struct splay_tree_s *mem_map)
1567 while (mem_map->root)
1569 struct target_mem_desc *tgt = mem_map->root->key.tgt;
1571 splay_tree_remove (mem_map, &mem_map->root->key);
1577 /* Host fallback for GOMP_target{,_ext} routines. */
1580 gomp_target_fallback (void (*fn) (void *), void **hostaddrs)
1582 struct gomp_thread old_thr, *thr = gomp_thread ();
1584 memset (thr, '\0', sizeof (*thr));
1585 if (gomp_places_list)
1587 thr->place = old_thr.place;
1588 thr->ts.place_partition_len = gomp_places_list_len;
1591 gomp_free_thread (thr);
1595 /* Calculate alignment and size requirements of a private copy of data shared
1596 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
1599 calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
1600 unsigned short *kinds, size_t *tgt_align,
1604 for (i = 0; i < mapnum; i++)
1605 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
1607 size_t align = (size_t) 1 << (kinds[i] >> 8);
1608 if (*tgt_align < align)
1610 *tgt_size = (*tgt_size + align - 1) & ~(align - 1);
1611 *tgt_size += sizes[i];
1615 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
1618 copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
1619 size_t *sizes, unsigned short *kinds, size_t tgt_align,
1622 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
1624 tgt += tgt_align - al;
1627 for (i = 0; i < mapnum; i++)
1628 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
1630 size_t align = (size_t) 1 << (kinds[i] >> 8);
1631 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1632 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
1633 hostaddrs[i] = tgt + tgt_size;
1634 tgt_size = tgt_size + sizes[i];
1638 /* Helper function of GOMP_target{,_ext} routines. */
1641 gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
1642 void (*host_fn) (void *))
1644 if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
1645 return (void *) host_fn;
1648 gomp_mutex_lock (&devicep->lock);
1649 if (devicep->state == GOMP_DEVICE_FINALIZED)
1651 gomp_mutex_unlock (&devicep->lock);
1655 struct splay_tree_key_s k;
1656 k.host_start = (uintptr_t) host_fn;
1657 k.host_end = k.host_start + 1;
1658 splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
1659 gomp_mutex_unlock (&devicep->lock);
1663 return (void *) tgt_fn->tgt_offset;
1667 /* Called when encountering a target directive. If DEVICE
1668 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
1669 GOMP_DEVICE_HOST_FALLBACK (or any value
1670 larger than last available hw device), use host fallback.
1671 FN is address of host code, UNUSED is part of the current ABI, but
1672 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
1673 with MAPNUM entries, with addresses of the host objects,
1674 sizes of the host objects (resp. for pointer kind pointer bias
1675 and assumed sizeof (void *) size) and kinds. */
1678 GOMP_target (int device, void (*fn) (void *), const void *unused,
1679 size_t mapnum, void **hostaddrs, size_t *sizes,
1680 unsigned char *kinds)
1682 struct gomp_device_descr *devicep = resolve_device (device);
1686 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1687 /* All shared memory devices should use the GOMP_target_ext function. */
1688 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
1689 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
1690 return gomp_target_fallback (fn, hostaddrs);
1692 struct target_mem_desc *tgt_vars
1693 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
1694 GOMP_MAP_VARS_TARGET);
1695 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
1697 gomp_unmap_vars (tgt_vars, true);
1700 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
1701 and several arguments have been added:
1702 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
1703 DEPEND is array of dependencies, see GOMP_task for details.
1705 ARGS is a pointer to an array consisting of a variable number of both
1706 device-independent and device-specific arguments, which can take one two
1707 elements where the first specifies for which device it is intended, the type
1708 and optionally also the value. If the value is not present in the first
1709 one, the whole second element the actual value. The last element of the
1710 array is a single NULL. Among the device independent can be for example
1711 NUM_TEAMS and THREAD_LIMIT.
1713 NUM_TEAMS is positive if GOMP_teams will be called in the body with
1714 that value, or 1 if teams construct is not present, or 0, if
1715 teams construct does not have num_teams clause and so the choice is
1716 implementation defined, and -1 if it can't be determined on the host
1717 what value will GOMP_teams have on the device.
1718 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
1719 body with that value, or 0, if teams construct does not have thread_limit
1720 clause or the teams construct is not present, or -1 if it can't be
1721 determined on the host what value will GOMP_teams have on the device. */
1724 GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
1725 void **hostaddrs, size_t *sizes, unsigned short *kinds,
1726 unsigned int flags, void **depend, void **args)
1728 struct gomp_device_descr *devicep = resolve_device (device);
1729 size_t tgt_align = 0, tgt_size = 0;
1730 bool fpc_done = false;
1732 if (flags & GOMP_TARGET_FLAG_NOWAIT)
1734 struct gomp_thread *thr = gomp_thread ();
1735 /* Create a team if we don't have any around, as nowait
1736 target tasks make sense to run asynchronously even when
1737 outside of any parallel. */
1738 if (__builtin_expect (thr->ts.team == NULL, 0))
1740 struct gomp_team *team = gomp_new_team (1);
1741 struct gomp_task *task = thr->task;
1742 struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
1743 team->prev_ts = thr->ts;
1744 thr->ts.team = team;
1745 thr->ts.team_id = 0;
1746 thr->ts.work_share = &team->work_shares[0];
1747 thr->ts.last_work_share = NULL;
1748 #ifdef HAVE_SYNC_BUILTINS
1749 thr->ts.single_count = 0;
1751 thr->ts.static_trip = 0;
1752 thr->task = &team->implicit_task[0];
1753 gomp_init_task (thr->task, NULL, icv);
1759 thr->task = &team->implicit_task[0];
1762 pthread_setspecific (gomp_thread_destructor, thr);
1765 && !thr->task->final_task)
1767 gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
1768 sizes, kinds, flags, depend, args,
1769 GOMP_TARGET_TASK_BEFORE_MAP);
1774 /* If there are depend clauses, but nowait is not present
1775 (or we are in a final task), block the parent task until the
1776 dependencies are resolved and then just continue with the rest
1777 of the function as if it is a merged task. */
1780 struct gomp_thread *thr = gomp_thread ();
1781 if (thr->task && thr->task->depend_hash)
1783 /* If we might need to wait, copy firstprivate now. */
1784 calculate_firstprivate_requirements (mapnum, sizes, kinds,
1785 &tgt_align, &tgt_size);
1788 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
1789 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
1790 tgt_align, tgt_size);
1793 gomp_task_maybe_wait_for_dependencies (depend);
1799 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1800 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
1801 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
1805 calculate_firstprivate_requirements (mapnum, sizes, kinds,
1806 &tgt_align, &tgt_size);
1809 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
1810 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
1811 tgt_align, tgt_size);
1814 gomp_target_fallback (fn, hostaddrs);
1818 struct target_mem_desc *tgt_vars;
1819 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1823 calculate_firstprivate_requirements (mapnum, sizes, kinds,
1824 &tgt_align, &tgt_size);
1827 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
1828 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
1829 tgt_align, tgt_size);
1835 tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
1836 true, GOMP_MAP_VARS_TARGET);
1837 devicep->run_func (devicep->target_id, fn_addr,
1838 tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
1841 gomp_unmap_vars (tgt_vars, true);
1844 /* Host fallback for GOMP_target_data{,_ext} routines. */
1847 gomp_target_data_fallback (void)
1849 struct gomp_task_icv *icv = gomp_icv (false);
1850 if (icv->target_data)
1852 /* Even when doing a host fallback, if there are any active
1853 #pragma omp target data constructs, need to remember the
1854 new #pragma omp target data, otherwise GOMP_target_end_data
1855 would get out of sync. */
1856 struct target_mem_desc *tgt
1857 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
1858 GOMP_MAP_VARS_DATA);
1859 tgt->prev = icv->target_data;
1860 icv->target_data = tgt;
1865 GOMP_target_data (int device, const void *unused, size_t mapnum,
1866 void **hostaddrs, size_t *sizes, unsigned char *kinds)
1868 struct gomp_device_descr *devicep = resolve_device (device);
1871 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1872 || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
1873 return gomp_target_data_fallback ();
1875 struct target_mem_desc *tgt
1876 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
1877 GOMP_MAP_VARS_DATA);
1878 struct gomp_task_icv *icv = gomp_icv (true);
1879 tgt->prev = icv->target_data;
1880 icv->target_data = tgt;
1884 GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
1885 size_t *sizes, unsigned short *kinds)
1887 struct gomp_device_descr *devicep = resolve_device (device);
1890 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1891 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1892 return gomp_target_data_fallback ();
1894 struct target_mem_desc *tgt
1895 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
1896 GOMP_MAP_VARS_DATA);
1897 struct gomp_task_icv *icv = gomp_icv (true);
1898 tgt->prev = icv->target_data;
1899 icv->target_data = tgt;
1903 GOMP_target_end_data (void)
1905 struct gomp_task_icv *icv = gomp_icv (false);
1906 if (icv->target_data)
1908 struct target_mem_desc *tgt = icv->target_data;
1909 icv->target_data = tgt->prev;
1910 gomp_unmap_vars (tgt, true);
1915 GOMP_target_update (int device, const void *unused, size_t mapnum,
1916 void **hostaddrs, size_t *sizes, unsigned char *kinds)
1918 struct gomp_device_descr *devicep = resolve_device (device);
1921 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1922 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1925 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
1929 GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
1930 size_t *sizes, unsigned short *kinds,
1931 unsigned int flags, void **depend)
1933 struct gomp_device_descr *devicep = resolve_device (device);
1935 /* If there are depend clauses, but nowait is not present,
1936 block the parent task until the dependencies are resolved
1937 and then just continue with the rest of the function as if it
1938 is a merged task. Until we are able to schedule task during
1939 variable mapping or unmapping, ignore nowait if depend clauses
1943 struct gomp_thread *thr = gomp_thread ();
1944 if (thr->task && thr->task->depend_hash)
1946 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
1948 && !thr->task->final_task)
1950 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
1951 mapnum, hostaddrs, sizes, kinds,
1952 flags | GOMP_TARGET_FLAG_UPDATE,
1953 depend, NULL, GOMP_TARGET_TASK_DATA))
1958 struct gomp_team *team = thr->ts.team;
1959 /* If parallel or taskgroup has been cancelled, don't start new
1961 if (__builtin_expect (gomp_cancel_var, 0) && team)
1963 if (gomp_team_barrier_cancelled (&team->barrier))
1965 if (thr->task->taskgroup)
1967 if (thr->task->taskgroup->cancelled)
1969 if (thr->task->taskgroup->workshare
1970 && thr->task->taskgroup->prev
1971 && thr->task->taskgroup->prev->cancelled)
1976 gomp_task_maybe_wait_for_dependencies (depend);
1982 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1983 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1986 struct gomp_thread *thr = gomp_thread ();
1987 struct gomp_team *team = thr->ts.team;
1988 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
1989 if (__builtin_expect (gomp_cancel_var, 0) && team)
1991 if (gomp_team_barrier_cancelled (&team->barrier))
1993 if (thr->task->taskgroup)
1995 if (thr->task->taskgroup->cancelled)
1997 if (thr->task->taskgroup->workshare
1998 && thr->task->taskgroup->prev
1999 && thr->task->taskgroup->prev->cancelled)
2004 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
2008 gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
2009 void **hostaddrs, size_t *sizes, unsigned short *kinds)
2011 const int typemask = 0xff;
2013 gomp_mutex_lock (&devicep->lock);
2014 if (devicep->state == GOMP_DEVICE_FINALIZED)
2016 gomp_mutex_unlock (&devicep->lock);
2020 for (i = 0; i < mapnum; i++)
2022 struct splay_tree_key_s cur_node;
2023 unsigned char kind = kinds[i] & typemask;
2027 case GOMP_MAP_ALWAYS_FROM:
2028 case GOMP_MAP_DELETE:
2029 case GOMP_MAP_RELEASE:
2030 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
2031 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
2032 cur_node.host_start = (uintptr_t) hostaddrs[i];
2033 cur_node.host_end = cur_node.host_start + sizes[i];
2034 splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
2035 || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
2036 ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
2037 : splay_tree_lookup (&devicep->mem_map, &cur_node);
2041 if (k->refcount > 0 && k->refcount != REFCOUNT_INFINITY)
2043 if ((kind == GOMP_MAP_DELETE
2044 || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION)
2045 && k->refcount != REFCOUNT_INFINITY)
2048 if ((kind == GOMP_MAP_FROM && k->refcount == 0)
2049 || kind == GOMP_MAP_ALWAYS_FROM)
2050 gomp_copy_dev2host (devicep, NULL, (void *) cur_node.host_start,
2051 (void *) (k->tgt->tgt_start + k->tgt_offset
2052 + cur_node.host_start
2054 cur_node.host_end - cur_node.host_start);
2055 if (k->refcount == 0)
2057 splay_tree_remove (&devicep->mem_map, k);
2059 splay_tree_insert (&devicep->mem_map,
2060 (splay_tree_node) k->link_key);
2061 if (k->tgt->refcount > 1)
2064 gomp_unmap_tgt (k->tgt);
2069 gomp_mutex_unlock (&devicep->lock);
2070 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
2075 gomp_mutex_unlock (&devicep->lock);
2079 GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
2080 size_t *sizes, unsigned short *kinds,
2081 unsigned int flags, void **depend)
2083 struct gomp_device_descr *devicep = resolve_device (device);
2085 /* If there are depend clauses, but nowait is not present,
2086 block the parent task until the dependencies are resolved
2087 and then just continue with the rest of the function as if it
2088 is a merged task. Until we are able to schedule task during
2089 variable mapping or unmapping, ignore nowait if depend clauses
2093 struct gomp_thread *thr = gomp_thread ();
2094 if (thr->task && thr->task->depend_hash)
2096 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
2098 && !thr->task->final_task)
2100 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
2101 mapnum, hostaddrs, sizes, kinds,
2102 flags, depend, NULL,
2103 GOMP_TARGET_TASK_DATA))
2108 struct gomp_team *team = thr->ts.team;
2109 /* If parallel or taskgroup has been cancelled, don't start new
2111 if (__builtin_expect (gomp_cancel_var, 0) && team)
2113 if (gomp_team_barrier_cancelled (&team->barrier))
2115 if (thr->task->taskgroup)
2117 if (thr->task->taskgroup->cancelled)
2119 if (thr->task->taskgroup->workshare
2120 && thr->task->taskgroup->prev
2121 && thr->task->taskgroup->prev->cancelled)
2126 gomp_task_maybe_wait_for_dependencies (depend);
2132 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2133 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2136 struct gomp_thread *thr = gomp_thread ();
2137 struct gomp_team *team = thr->ts.team;
2138 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
2139 if (__builtin_expect (gomp_cancel_var, 0) && team)
2141 if (gomp_team_barrier_cancelled (&team->barrier))
2143 if (thr->task->taskgroup)
2145 if (thr->task->taskgroup->cancelled)
2147 if (thr->task->taskgroup->workshare
2148 && thr->task->taskgroup->prev
2149 && thr->task->taskgroup->prev->cancelled)
2155 if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
2156 for (i = 0; i < mapnum; i++)
2157 if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
2159 gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
2160 &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
2164 gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
2165 true, GOMP_MAP_VARS_ENTER_DATA);
2167 gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds);
2171 gomp_target_task_fn (void *data)
2173 struct gomp_target_task *ttask = (struct gomp_target_task *) data;
2174 struct gomp_device_descr *devicep = ttask->devicep;
2176 if (ttask->fn != NULL)
2180 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2181 || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
2182 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
2184 ttask->state = GOMP_TARGET_TASK_FALLBACK;
2185 gomp_target_fallback (ttask->fn, ttask->hostaddrs);
2189 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
2192 gomp_unmap_vars (ttask->tgt, true);
2196 void *actual_arguments;
2197 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2200 actual_arguments = ttask->hostaddrs;
2204 ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
2205 NULL, ttask->sizes, ttask->kinds, true,
2206 GOMP_MAP_VARS_TARGET);
2207 actual_arguments = (void *) ttask->tgt->tgt_start;
2209 ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
2211 devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
2212 ttask->args, (void *) ttask);
2215 else if (devicep == NULL
2216 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2217 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2221 if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
2222 gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
2223 ttask->kinds, true);
2224 else if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
2225 for (i = 0; i < ttask->mapnum; i++)
2226 if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
2228 gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
2229 NULL, &ttask->sizes[i], &ttask->kinds[i], true,
2230 GOMP_MAP_VARS_ENTER_DATA);
2231 i += ttask->sizes[i];
2234 gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
2235 &ttask->kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
2237 gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
2243 GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
2247 struct gomp_task_icv *icv = gomp_icv (true);
2248 icv->thread_limit_var
2249 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
2255 omp_target_alloc (size_t size, int device_num)
2257 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2258 return malloc (size);
2263 struct gomp_device_descr *devicep = resolve_device (device_num);
2264 if (devicep == NULL)
2267 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2268 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2269 return malloc (size);
2271 gomp_mutex_lock (&devicep->lock);
2272 void *ret = devicep->alloc_func (devicep->target_id, size);
2273 gomp_mutex_unlock (&devicep->lock);
2278 omp_target_free (void *device_ptr, int device_num)
2280 if (device_ptr == NULL)
2283 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2292 struct gomp_device_descr *devicep = resolve_device (device_num);
2293 if (devicep == NULL)
2296 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2297 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2303 gomp_mutex_lock (&devicep->lock);
2304 gomp_free_device_memory (devicep, device_ptr);
2305 gomp_mutex_unlock (&devicep->lock);
2309 omp_target_is_present (const void *ptr, int device_num)
2314 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2320 struct gomp_device_descr *devicep = resolve_device (device_num);
2321 if (devicep == NULL)
2324 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2325 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2328 gomp_mutex_lock (&devicep->lock);
2329 struct splay_tree_s *mem_map = &devicep->mem_map;
2330 struct splay_tree_key_s cur_node;
2332 cur_node.host_start = (uintptr_t) ptr;
2333 cur_node.host_end = cur_node.host_start;
2334 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
2335 int ret = n != NULL;
2336 gomp_mutex_unlock (&devicep->lock);
2341 omp_target_memcpy (void *dst, const void *src, size_t length,
2342 size_t dst_offset, size_t src_offset, int dst_device_num,
2345 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
2348 if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
2350 if (dst_device_num < 0)
2353 dst_devicep = resolve_device (dst_device_num);
2354 if (dst_devicep == NULL)
2357 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2358 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2361 if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
2363 if (src_device_num < 0)
2366 src_devicep = resolve_device (src_device_num);
2367 if (src_devicep == NULL)
2370 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2371 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2374 if (src_devicep == NULL && dst_devicep == NULL)
2376 memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
2379 if (src_devicep == NULL)
2381 gomp_mutex_lock (&dst_devicep->lock);
2382 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
2383 (char *) dst + dst_offset,
2384 (char *) src + src_offset, length);
2385 gomp_mutex_unlock (&dst_devicep->lock);
2386 return (ret ? 0 : EINVAL);
2388 if (dst_devicep == NULL)
2390 gomp_mutex_lock (&src_devicep->lock);
2391 ret = src_devicep->dev2host_func (src_devicep->target_id,
2392 (char *) dst + dst_offset,
2393 (char *) src + src_offset, length);
2394 gomp_mutex_unlock (&src_devicep->lock);
2395 return (ret ? 0 : EINVAL);
2397 if (src_devicep == dst_devicep)
2399 gomp_mutex_lock (&src_devicep->lock);
2400 ret = src_devicep->dev2dev_func (src_devicep->target_id,
2401 (char *) dst + dst_offset,
2402 (char *) src + src_offset, length);
2403 gomp_mutex_unlock (&src_devicep->lock);
2404 return (ret ? 0 : EINVAL);
2410 omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
2411 int num_dims, const size_t *volume,
2412 const size_t *dst_offsets,
2413 const size_t *src_offsets,
2414 const size_t *dst_dimensions,
2415 const size_t *src_dimensions,
2416 struct gomp_device_descr *dst_devicep,
2417 struct gomp_device_descr *src_devicep)
2419 size_t dst_slice = element_size;
2420 size_t src_slice = element_size;
2421 size_t j, dst_off, src_off, length;
2426 if (__builtin_mul_overflow (element_size, volume[0], &length)
2427 || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
2428 || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
2430 if (dst_devicep == NULL && src_devicep == NULL)
2432 memcpy ((char *) dst + dst_off, (const char *) src + src_off,
2436 else if (src_devicep == NULL)
2437 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
2438 (char *) dst + dst_off,
2439 (const char *) src + src_off,
2441 else if (dst_devicep == NULL)
2442 ret = src_devicep->dev2host_func (src_devicep->target_id,
2443 (char *) dst + dst_off,
2444 (const char *) src + src_off,
2446 else if (src_devicep == dst_devicep)
2447 ret = src_devicep->dev2dev_func (src_devicep->target_id,
2448 (char *) dst + dst_off,
2449 (const char *) src + src_off,
2453 return ret ? 0 : EINVAL;
2456 /* FIXME: it would be nice to have some plugin function to handle
2457 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
2458 be handled in the generic recursion below, and for host-host it
2459 should be used even for any num_dims >= 2. */
2461 for (i = 1; i < num_dims; i++)
2462 if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
2463 || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
2465 if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
2466 || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
2468 for (j = 0; j < volume[0]; j++)
2470 ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
2471 (const char *) src + src_off,
2472 element_size, num_dims - 1,
2473 volume + 1, dst_offsets + 1,
2474 src_offsets + 1, dst_dimensions + 1,
2475 src_dimensions + 1, dst_devicep,
2479 dst_off += dst_slice;
2480 src_off += src_slice;
2486 omp_target_memcpy_rect (void *dst, const void *src, size_t element_size,
2487 int num_dims, const size_t *volume,
2488 const size_t *dst_offsets,
2489 const size_t *src_offsets,
2490 const size_t *dst_dimensions,
2491 const size_t *src_dimensions,
2492 int dst_device_num, int src_device_num)
2494 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
2499 if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
2501 if (dst_device_num < 0)
2504 dst_devicep = resolve_device (dst_device_num);
2505 if (dst_devicep == NULL)
2508 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2509 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2512 if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
2514 if (src_device_num < 0)
2517 src_devicep = resolve_device (src_device_num);
2518 if (src_devicep == NULL)
2521 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2522 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2526 if (src_devicep != NULL && dst_devicep != NULL && src_devicep != dst_devicep)
2530 gomp_mutex_lock (&src_devicep->lock);
2531 else if (dst_devicep)
2532 gomp_mutex_lock (&dst_devicep->lock);
2533 int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
2534 volume, dst_offsets, src_offsets,
2535 dst_dimensions, src_dimensions,
2536 dst_devicep, src_devicep);
2538 gomp_mutex_unlock (&src_devicep->lock);
2539 else if (dst_devicep)
2540 gomp_mutex_unlock (&dst_devicep->lock);
2545 omp_target_associate_ptr (const void *host_ptr, const void *device_ptr,
2546 size_t size, size_t device_offset, int device_num)
2548 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2554 struct gomp_device_descr *devicep = resolve_device (device_num);
2555 if (devicep == NULL)
2558 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2559 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2562 gomp_mutex_lock (&devicep->lock);
2564 struct splay_tree_s *mem_map = &devicep->mem_map;
2565 struct splay_tree_key_s cur_node;
2568 cur_node.host_start = (uintptr_t) host_ptr;
2569 cur_node.host_end = cur_node.host_start + size;
2570 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
2573 if (n->tgt->tgt_start + n->tgt_offset
2574 == (uintptr_t) device_ptr + device_offset
2575 && n->host_start <= cur_node.host_start
2576 && n->host_end >= cur_node.host_end)
2581 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
2582 tgt->array = gomp_malloc (sizeof (*tgt->array));
2586 tgt->to_free = NULL;
2588 tgt->list_count = 0;
2589 tgt->device_descr = devicep;
2590 splay_tree_node array = tgt->array;
2591 splay_tree_key k = &array->key;
2592 k->host_start = cur_node.host_start;
2593 k->host_end = cur_node.host_end;
2595 k->tgt_offset = (uintptr_t) device_ptr + device_offset;
2596 k->refcount = REFCOUNT_INFINITY;
2598 array->right = NULL;
2599 splay_tree_insert (&devicep->mem_map, array);
2602 gomp_mutex_unlock (&devicep->lock);
2607 omp_target_disassociate_ptr (const void *ptr, int device_num)
2609 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2615 struct gomp_device_descr *devicep = resolve_device (device_num);
2616 if (devicep == NULL)
2619 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
2622 gomp_mutex_lock (&devicep->lock);
2624 struct splay_tree_s *mem_map = &devicep->mem_map;
2625 struct splay_tree_key_s cur_node;
2628 cur_node.host_start = (uintptr_t) ptr;
2629 cur_node.host_end = cur_node.host_start;
2630 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
2632 && n->host_start == cur_node.host_start
2633 && n->refcount == REFCOUNT_INFINITY
2634 && n->tgt->tgt_start == 0
2635 && n->tgt->to_free == NULL
2636 && n->tgt->refcount == 1
2637 && n->tgt->list_count == 0)
2639 splay_tree_remove (&devicep->mem_map, n);
2640 gomp_unmap_tgt (n->tgt);
2644 gomp_mutex_unlock (&devicep->lock);
2649 omp_pause_resource (omp_pause_resource_t kind, int device_num)
2652 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2653 return gomp_pause_host ();
2654 if (device_num < 0 || device_num >= gomp_get_num_devices ())
2656 /* Do nothing for target devices for now. */
2661 omp_pause_resource_all (omp_pause_resource_t kind)
2664 if (gomp_pause_host ())
2666 /* Do nothing for target devices for now. */
2670 ialias (omp_pause_resource)
2671 ialias (omp_pause_resource_all)
2673 #ifdef PLUGIN_SUPPORT
2675 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
2677 The handles of the found functions are stored in the corresponding fields
2678 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
2681 gomp_load_plugin_for_device (struct gomp_device_descr *device,
2682 const char *plugin_name)
2684 const char *err = NULL, *last_missing = NULL;
2686 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
2690 /* Check if all required functions are available in the plugin and store
2691 their handlers. None of the symbols can legitimately be NULL,
2692 so we don't need to check dlerror all the time. */
2694 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
2696 /* Similar, but missing functions are not an error. Return false if
2697 failed, true otherwise. */
2698 #define DLSYM_OPT(f, n) \
2699 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
2700 || (last_missing = #n, 0))
2703 if (device->version_func () != GOMP_VERSION)
2705 err = "plugin version mismatch";
2712 DLSYM (get_num_devices);
2713 DLSYM (init_device);
2714 DLSYM (fini_device);
2716 DLSYM (unload_image);
2721 device->capabilities = device->get_caps_func ();
2722 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2726 DLSYM_OPT (can_run, can_run);
2729 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
2731 if (!DLSYM_OPT (openacc.exec, openacc_exec)
2732 || !DLSYM_OPT (openacc.create_thread_data,
2733 openacc_create_thread_data)
2734 || !DLSYM_OPT (openacc.destroy_thread_data,
2735 openacc_destroy_thread_data)
2736 || !DLSYM_OPT (openacc.async.construct, openacc_async_construct)
2737 || !DLSYM_OPT (openacc.async.destruct, openacc_async_destruct)
2738 || !DLSYM_OPT (openacc.async.test, openacc_async_test)
2739 || !DLSYM_OPT (openacc.async.synchronize, openacc_async_synchronize)
2740 || !DLSYM_OPT (openacc.async.serialize, openacc_async_serialize)
2741 || !DLSYM_OPT (openacc.async.queue_callback,
2742 openacc_async_queue_callback)
2743 || !DLSYM_OPT (openacc.async.exec, openacc_async_exec)
2744 || !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host)
2745 || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev))
2747 /* Require all the OpenACC handlers if we have
2748 GOMP_OFFLOAD_CAP_OPENACC_200. */
2749 err = "plugin missing OpenACC handler function";
2754 cuda += DLSYM_OPT (openacc.cuda.get_current_device,
2755 openacc_cuda_get_current_device);
2756 cuda += DLSYM_OPT (openacc.cuda.get_current_context,
2757 openacc_cuda_get_current_context);
2758 cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream);
2759 cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream);
2760 if (cuda && cuda != 4)
2762 /* Make sure all the CUDA functions are there if any of them are. */
2763 err = "plugin missing OpenACC CUDA handler function";
2775 gomp_error ("while loading %s: %s", plugin_name, err);
2777 gomp_error ("missing function was %s", last_missing);
2779 dlclose (plugin_handle);
2784 /* This function finalizes all initialized devices. */
2787 gomp_target_fini (void)
2790 for (i = 0; i < num_devices; i++)
2793 struct gomp_device_descr *devicep = &devices[i];
2794 gomp_mutex_lock (&devicep->lock);
2795 if (devicep->state == GOMP_DEVICE_INITIALIZED)
2796 ret = gomp_fini_device (devicep);
2797 gomp_mutex_unlock (&devicep->lock);
2799 gomp_fatal ("device finalization failed");
2803 /* This function initializes the runtime for offloading.
2804 It parses the list of offload plugins, and tries to load these.
2805 On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
2806 will be set, and the array DEVICES initialized, containing descriptors for
2807 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
2811 gomp_target_init (void)
2813 const char *prefix ="libgomp-plugin-";
2814 const char *suffix = SONAME_SUFFIX (1);
2815 const char *cur, *next;
2817 int i, new_num_devices;
2822 cur = OFFLOAD_PLUGINS;
2826 struct gomp_device_descr current_device;
2827 size_t prefix_len, suffix_len, cur_len;
2829 next = strchr (cur, ',');
2831 prefix_len = strlen (prefix);
2832 cur_len = next ? next - cur : strlen (cur);
2833 suffix_len = strlen (suffix);
2835 plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1);
2842 memcpy (plugin_name, prefix, prefix_len);
2843 memcpy (plugin_name + prefix_len, cur, cur_len);
2844 memcpy (plugin_name + prefix_len + cur_len, suffix, suffix_len + 1);
2846 if (gomp_load_plugin_for_device (¤t_device, plugin_name))
2848 new_num_devices = current_device.get_num_devices_func ();
2849 if (new_num_devices >= 1)
2851 /* Augment DEVICES and NUM_DEVICES. */
2853 devices = realloc (devices, (num_devices + new_num_devices)
2854 * sizeof (struct gomp_device_descr));
2862 current_device.name = current_device.get_name_func ();
2863 /* current_device.capabilities has already been set. */
2864 current_device.type = current_device.get_type_func ();
2865 current_device.mem_map.root = NULL;
2866 current_device.state = GOMP_DEVICE_UNINITIALIZED;
2867 current_device.openacc.data_environ = NULL;
2868 for (i = 0; i < new_num_devices; i++)
2870 current_device.target_id = i;
2871 devices[num_devices] = current_device;
2872 gomp_mutex_init (&devices[num_devices].lock);
2883 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
2884 NUM_DEVICES_OPENMP. */
2885 struct gomp_device_descr *devices_s
2886 = malloc (num_devices * sizeof (struct gomp_device_descr));
2893 num_devices_openmp = 0;
2894 for (i = 0; i < num_devices; i++)
2895 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2896 devices_s[num_devices_openmp++] = devices[i];
2897 int num_devices_after_openmp = num_devices_openmp;
2898 for (i = 0; i < num_devices; i++)
2899 if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
2900 devices_s[num_devices_after_openmp++] = devices[i];
2902 devices = devices_s;
2904 for (i = 0; i < num_devices; i++)
2906 /* The 'devices' array can be moved (by the realloc call) until we have
2907 found all the plugins, so registering with the OpenACC runtime (which
2908 takes a copy of the pointer argument) must be delayed until now. */
2909 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
2910 goacc_register (&devices[i]);
2913 if (atexit (gomp_target_fini) != 0)
2914 gomp_fatal ("atexit failed");
2917 #else /* PLUGIN_SUPPORT */
2918 /* If dlfcn.h is unavailable we always fallback to host execution.
2919 GOMP_target* routines are just stubs for this case. */
2921 gomp_target_init (void)
2924 #endif /* PLUGIN_SUPPORT */