openacc: Fortran derived-type mapping fix
[platform/upstream/gcc.git] / libgomp / loop_ull.c
1 /* Copyright (C) 2005-2020 Free Software Foundation, Inc.
2    Contributed by Richard Henderson <rth@redhat.com>.
3
4    This file is part of the GNU Offloading and Multi Processing Library
5    (libgomp).
6
7    Libgomp is free software; you can redistribute it and/or modify it
8    under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3, or (at your option)
10    any later version.
11
12    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15    more details.
16
17    Under Section 7 of GPL version 3, you are granted additional
18    permissions described in the GCC Runtime Library Exception, version
19    3.1, as published by the Free Software Foundation.
20
21    You should have received a copy of the GNU General Public License and
22    a copy of the GCC Runtime Library Exception along with this program;
23    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24    <http://www.gnu.org/licenses/>.  */
25
26 /* This file handles the LOOP (FOR/DO) construct.  */
27
28 #include <limits.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include "libgomp.h"
32
33 ialias (GOMP_loop_ull_runtime_next)
34 ialias_redirect (GOMP_taskgroup_reduction_register)
35
36 typedef unsigned long long gomp_ull;
37
38 /* Initialize the given work share construct from the given arguments.  */
39
40 static inline void
41 gomp_loop_ull_init (struct gomp_work_share *ws, bool up, gomp_ull start,
42                     gomp_ull end, gomp_ull incr, enum gomp_schedule_type sched,
43                     gomp_ull chunk_size)
44 {
45   ws->sched = sched;
46   ws->chunk_size_ull = chunk_size;
47   /* Canonicalize loops that have zero iterations to ->next == ->end.  */
48   ws->end_ull = ((up && start > end) || (!up && start < end))
49                 ? start : end;
50   ws->incr_ull = incr;
51   ws->next_ull = start;
52   ws->mode = 0;
53   if (sched == GFS_DYNAMIC)
54     {
55       ws->chunk_size_ull *= incr;
56
57 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
58       {
59         /* For dynamic scheduling prepare things to make each iteration
60            faster.  */
61         struct gomp_thread *thr = gomp_thread ();
62         struct gomp_team *team = thr->ts.team;
63         long nthreads = team ? team->nthreads : 1;
64
65         if (__builtin_expect (up, 1))
66           {
67             /* Cheap overflow protection.  */
68             if (__builtin_expect ((nthreads | ws->chunk_size_ull)
69                                   < 1ULL << (sizeof (gomp_ull)
70                                              * __CHAR_BIT__ / 2 - 1), 1))
71               ws->mode = ws->end_ull < (__LONG_LONG_MAX__ * 2ULL + 1
72                                         - (nthreads + 1) * ws->chunk_size_ull);
73           }
74         /* Cheap overflow protection.  */
75         else if (__builtin_expect ((nthreads | -ws->chunk_size_ull)
76                                    < 1ULL << (sizeof (gomp_ull)
77                                               * __CHAR_BIT__ / 2 - 1), 1))
78           ws->mode = ws->end_ull > ((nthreads + 1) * -ws->chunk_size_ull
79                                     - (__LONG_LONG_MAX__ * 2ULL + 1));
80       }
81 #endif
82     }
83   if (!up)
84     ws->mode |= 2;
85 }
86
87 /* The *_start routines are called when first encountering a loop construct
88    that is not bound directly to a parallel construct.  The first thread
89    that arrives will create the work-share construct; subsequent threads
90    will see the construct exists and allocate work from it.
91
92    START, END, INCR are the bounds of the loop; due to the restrictions of
93    OpenMP, these values must be the same in every thread.  This is not
94    verified (nor is it entirely verifiable, since START is not necessarily
95    retained intact in the work-share data structure).  CHUNK_SIZE is the
96    scheduling parameter; again this must be identical in all threads.
97
98    Returns true if there's any work for this thread to perform.  If so,
99    *ISTART and *IEND are filled with the bounds of the iteration block
100    allocated to this thread.  Returns false if all work was assigned to
101    other threads prior to this thread's arrival.  */
102
103 static bool
104 gomp_loop_ull_static_start (bool up, gomp_ull start, gomp_ull end,
105                             gomp_ull incr, gomp_ull chunk_size,
106                             gomp_ull *istart, gomp_ull *iend)
107 {
108   struct gomp_thread *thr = gomp_thread ();
109
110   thr->ts.static_trip = 0;
111   if (gomp_work_share_start (0))
112     {
113       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
114                           GFS_STATIC, chunk_size);
115       gomp_work_share_init_done ();
116     }
117
118   return !gomp_iter_ull_static_next (istart, iend);
119 }
120
121 static bool
122 gomp_loop_ull_dynamic_start (bool up, gomp_ull start, gomp_ull end,
123                              gomp_ull incr, gomp_ull chunk_size,
124                              gomp_ull *istart, gomp_ull *iend)
125 {
126   struct gomp_thread *thr = gomp_thread ();
127   bool ret;
128
129   if (gomp_work_share_start (0))
130     {
131       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
132                           GFS_DYNAMIC, chunk_size);
133       gomp_work_share_init_done ();
134     }
135
136 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
137   ret = gomp_iter_ull_dynamic_next (istart, iend);
138 #else
139   gomp_mutex_lock (&thr->ts.work_share->lock);
140   ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
141   gomp_mutex_unlock (&thr->ts.work_share->lock);
142 #endif
143
144   return ret;
145 }
146
147 static bool
148 gomp_loop_ull_guided_start (bool up, gomp_ull start, gomp_ull end,
149                             gomp_ull incr, gomp_ull chunk_size,
150                             gomp_ull *istart, gomp_ull *iend)
151 {
152   struct gomp_thread *thr = gomp_thread ();
153   bool ret;
154
155   if (gomp_work_share_start (0))
156     {
157       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
158                           GFS_GUIDED, chunk_size);
159       gomp_work_share_init_done ();
160     }
161
162 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
163   ret = gomp_iter_ull_guided_next (istart, iend);
164 #else
165   gomp_mutex_lock (&thr->ts.work_share->lock);
166   ret = gomp_iter_ull_guided_next_locked (istart, iend);
167   gomp_mutex_unlock (&thr->ts.work_share->lock);
168 #endif
169
170   return ret;
171 }
172
173 bool
174 GOMP_loop_ull_runtime_start (bool up, gomp_ull start, gomp_ull end,
175                              gomp_ull incr, gomp_ull *istart, gomp_ull *iend)
176 {
177   struct gomp_task_icv *icv = gomp_icv (false);
178   switch (icv->run_sched_var & ~GFS_MONOTONIC)
179     {
180     case GFS_STATIC:
181       return gomp_loop_ull_static_start (up, start, end, incr,
182                                          icv->run_sched_chunk_size,
183                                          istart, iend);
184     case GFS_DYNAMIC:
185       return gomp_loop_ull_dynamic_start (up, start, end, incr,
186                                           icv->run_sched_chunk_size,
187                                           istart, iend);
188     case GFS_GUIDED:
189       return gomp_loop_ull_guided_start (up, start, end, incr,
190                                          icv->run_sched_chunk_size,
191                                          istart, iend);
192     case GFS_AUTO:
193       /* For now map to schedule(static), later on we could play with feedback
194          driven choice.  */
195       return gomp_loop_ull_static_start (up, start, end, incr,
196                                          0, istart, iend);
197     default:
198       abort ();
199     }
200 }
201
202 static long
203 gomp_adjust_sched (long sched, gomp_ull *chunk_size)
204 {
205   sched &= ~GFS_MONOTONIC;
206   switch (sched)
207     {
208     case GFS_STATIC:
209     case GFS_DYNAMIC:
210     case GFS_GUIDED:
211       return sched;
212     /* GFS_RUNTIME is used for runtime schedule without monotonic
213        or nonmonotonic modifiers on the clause.
214        GFS_RUNTIME|GFS_MONOTONIC for runtime schedule with monotonic
215        modifier.  */
216     case GFS_RUNTIME:
217     /* GFS_AUTO is used for runtime schedule with nonmonotonic
218        modifier.  */
219     case GFS_AUTO:
220       {
221         struct gomp_task_icv *icv = gomp_icv (false);
222         sched = icv->run_sched_var & ~GFS_MONOTONIC;
223         switch (sched)
224           {
225           case GFS_STATIC:
226           case GFS_DYNAMIC:
227           case GFS_GUIDED:
228             *chunk_size = icv->run_sched_chunk_size;
229             break;
230           case GFS_AUTO:
231             sched = GFS_STATIC;
232             *chunk_size = 0;
233             break;
234           default:
235             abort ();
236           }
237         return sched;
238       }
239     default:
240       abort ();
241     }
242 }
243
244 bool
245 GOMP_loop_ull_start (bool up, gomp_ull start, gomp_ull end,
246                      gomp_ull incr, long sched, gomp_ull chunk_size,
247                      gomp_ull *istart, gomp_ull *iend,
248                      uintptr_t *reductions, void **mem)
249 {
250   struct gomp_thread *thr = gomp_thread ();
251
252   thr->ts.static_trip = 0;
253   if (reductions)
254     gomp_workshare_taskgroup_start ();
255   if (gomp_work_share_start (0))
256     {
257       sched = gomp_adjust_sched (sched, &chunk_size);
258       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
259                           sched, chunk_size);
260       if (reductions)
261         {
262           GOMP_taskgroup_reduction_register (reductions);
263           thr->task->taskgroup->workshare = true;
264           thr->ts.work_share->task_reductions = reductions;
265         }
266       if (mem)
267         {
268           uintptr_t size = (uintptr_t) *mem;
269 #define INLINE_ORDERED_TEAM_IDS_OFF \
270   ((offsetof (struct gomp_work_share, inline_ordered_team_ids)          \
271     + __alignof__ (long long) - 1) & ~(__alignof__ (long long) - 1))
272           if (size > (sizeof (struct gomp_work_share)
273                       - INLINE_ORDERED_TEAM_IDS_OFF))
274             *mem
275               = (void *) (thr->ts.work_share->ordered_team_ids
276                           = gomp_malloc_cleared (size));
277           else
278             *mem = memset (((char *) thr->ts.work_share)
279                            + INLINE_ORDERED_TEAM_IDS_OFF, '\0', size);
280         }
281       gomp_work_share_init_done ();
282     }
283   else
284     {
285       if (reductions)
286         {
287           uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
288           gomp_workshare_task_reduction_register (reductions,
289                                                   first_reductions);
290         }
291       if (mem)
292         {
293           if ((offsetof (struct gomp_work_share, inline_ordered_team_ids)
294                & (__alignof__ (long long) - 1)) == 0)
295             *mem = (void *) thr->ts.work_share->ordered_team_ids;
296           else
297             {
298               uintptr_t p = (uintptr_t) thr->ts.work_share->ordered_team_ids;
299               p += __alignof__ (long long) - 1;
300               p &= ~(__alignof__ (long long) - 1);
301               *mem = (void *) p;
302             }
303         }
304     }
305
306   return ialias_call (GOMP_loop_ull_runtime_next) (istart, iend);
307 }
308
309 /* The *_ordered_*_start routines are similar.  The only difference is that
310    this work-share construct is initialized to expect an ORDERED section.  */
311
312 static bool
313 gomp_loop_ull_ordered_static_start (bool up, gomp_ull start, gomp_ull end,
314                                     gomp_ull incr, gomp_ull chunk_size,
315                                     gomp_ull *istart, gomp_ull *iend)
316 {
317   struct gomp_thread *thr = gomp_thread ();
318
319   thr->ts.static_trip = 0;
320   if (gomp_work_share_start (1))
321     {
322       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
323                           GFS_STATIC, chunk_size);
324       gomp_ordered_static_init ();
325       gomp_work_share_init_done ();
326     }
327
328   return !gomp_iter_ull_static_next (istart, iend);
329 }
330
331 static bool
332 gomp_loop_ull_ordered_dynamic_start (bool up, gomp_ull start, gomp_ull end,
333                                      gomp_ull incr, gomp_ull chunk_size,
334                                      gomp_ull *istart, gomp_ull *iend)
335 {
336   struct gomp_thread *thr = gomp_thread ();
337   bool ret;
338
339   if (gomp_work_share_start (1))
340     {
341       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
342                           GFS_DYNAMIC, chunk_size);
343       gomp_mutex_lock (&thr->ts.work_share->lock);
344       gomp_work_share_init_done ();
345     }
346   else
347     gomp_mutex_lock (&thr->ts.work_share->lock);
348
349   ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
350   if (ret)
351     gomp_ordered_first ();
352   gomp_mutex_unlock (&thr->ts.work_share->lock);
353
354   return ret;
355 }
356
357 static bool
358 gomp_loop_ull_ordered_guided_start (bool up, gomp_ull start, gomp_ull end,
359                                     gomp_ull incr, gomp_ull chunk_size,
360                                     gomp_ull *istart, gomp_ull *iend)
361 {
362   struct gomp_thread *thr = gomp_thread ();
363   bool ret;
364
365   if (gomp_work_share_start (1))
366     {
367       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
368                           GFS_GUIDED, chunk_size);
369       gomp_mutex_lock (&thr->ts.work_share->lock);
370       gomp_work_share_init_done ();
371     }
372   else
373     gomp_mutex_lock (&thr->ts.work_share->lock);
374
375   ret = gomp_iter_ull_guided_next_locked (istart, iend);
376   if (ret)
377     gomp_ordered_first ();
378   gomp_mutex_unlock (&thr->ts.work_share->lock);
379
380   return ret;
381 }
382
383 bool
384 GOMP_loop_ull_ordered_runtime_start (bool up, gomp_ull start, gomp_ull end,
385                                      gomp_ull incr, gomp_ull *istart,
386                                      gomp_ull *iend)
387 {
388   struct gomp_task_icv *icv = gomp_icv (false);
389   switch (icv->run_sched_var & ~GFS_MONOTONIC)
390     {
391     case GFS_STATIC:
392       return gomp_loop_ull_ordered_static_start (up, start, end, incr,
393                                                  icv->run_sched_chunk_size,
394                                                  istart, iend);
395     case GFS_DYNAMIC:
396       return gomp_loop_ull_ordered_dynamic_start (up, start, end, incr,
397                                                   icv->run_sched_chunk_size,
398                                                   istart, iend);
399     case GFS_GUIDED:
400       return gomp_loop_ull_ordered_guided_start (up, start, end, incr,
401                                                  icv->run_sched_chunk_size,
402                                                  istart, iend);
403     case GFS_AUTO:
404       /* For now map to schedule(static), later on we could play with feedback
405          driven choice.  */
406       return gomp_loop_ull_ordered_static_start (up, start, end, incr,
407                                                  0, istart, iend);
408     default:
409       abort ();
410     }
411 }
412
413 bool
414 GOMP_loop_ull_ordered_start (bool up, gomp_ull start, gomp_ull end,
415                              gomp_ull incr, long sched, gomp_ull chunk_size,
416                              gomp_ull *istart, gomp_ull *iend,
417                              uintptr_t *reductions, void **mem)
418 {
419   struct gomp_thread *thr = gomp_thread ();
420   size_t ordered = 1;
421   bool ret;
422
423   thr->ts.static_trip = 0;
424   if (reductions)
425     gomp_workshare_taskgroup_start ();
426   if (mem)
427     ordered += (uintptr_t) *mem;
428   if (gomp_work_share_start (ordered))
429     {
430       sched = gomp_adjust_sched (sched, &chunk_size);
431       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
432                           sched, chunk_size);
433       if (reductions)
434         {
435           GOMP_taskgroup_reduction_register (reductions);
436           thr->task->taskgroup->workshare = true;
437           thr->ts.work_share->task_reductions = reductions;
438         }
439       if (sched == GFS_STATIC)
440         gomp_ordered_static_init ();
441       else
442         gomp_mutex_lock (&thr->ts.work_share->lock);
443       gomp_work_share_init_done ();
444     }
445   else
446     {
447       if (reductions)
448         {
449           uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
450           gomp_workshare_task_reduction_register (reductions,
451                                                   first_reductions);
452         }
453       sched = thr->ts.work_share->sched;
454       if (sched != GFS_STATIC)
455         gomp_mutex_lock (&thr->ts.work_share->lock);
456     }
457
458   if (mem)
459     {
460       uintptr_t p
461         = (uintptr_t) (thr->ts.work_share->ordered_team_ids
462                        + (thr->ts.team ? thr->ts.team->nthreads : 1));
463       p += __alignof__ (long long) - 1;
464       p &= ~(__alignof__ (long long) - 1);
465       *mem = (void *) p;
466     }
467
468   switch (sched)
469     {
470     case GFS_STATIC:
471     case GFS_AUTO:
472       return !gomp_iter_ull_static_next (istart, iend);
473     case GFS_DYNAMIC:
474       ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
475       break;
476     case GFS_GUIDED:
477       ret = gomp_iter_ull_guided_next_locked (istart, iend);
478       break;
479     default:
480       abort ();
481     }
482
483   if (ret)
484     gomp_ordered_first ();
485   gomp_mutex_unlock (&thr->ts.work_share->lock);
486   return ret;
487 }
488
489 /* The *_doacross_*_start routines are similar.  The only difference is that
490    this work-share construct is initialized to expect an ORDERED(N) - DOACROSS
491    section, and the worksharing loop iterates always from 0 to COUNTS[0] - 1
492    and other COUNTS array elements tell the library number of iterations
493    in the ordered inner loops.  */
494
495 static bool
496 gomp_loop_ull_doacross_static_start (unsigned ncounts, gomp_ull *counts,
497                                      gomp_ull chunk_size, gomp_ull *istart,
498                                      gomp_ull *iend)
499 {
500   struct gomp_thread *thr = gomp_thread ();
501
502   thr->ts.static_trip = 0;
503   if (gomp_work_share_start (0))
504     {
505       gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
506                           GFS_STATIC, chunk_size);
507       gomp_doacross_ull_init (ncounts, counts, chunk_size, 0);
508       gomp_work_share_init_done ();
509     }
510
511   return !gomp_iter_ull_static_next (istart, iend);
512 }
513
514 static bool
515 gomp_loop_ull_doacross_dynamic_start (unsigned ncounts, gomp_ull *counts,
516                                       gomp_ull chunk_size, gomp_ull *istart,
517                                       gomp_ull *iend)
518 {
519   struct gomp_thread *thr = gomp_thread ();
520   bool ret;
521
522   if (gomp_work_share_start (0))
523     {
524       gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
525                           GFS_DYNAMIC, chunk_size);
526       gomp_doacross_ull_init (ncounts, counts, chunk_size, 0);
527       gomp_work_share_init_done ();
528     }
529
530 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
531   ret = gomp_iter_ull_dynamic_next (istart, iend);
532 #else
533   gomp_mutex_lock (&thr->ts.work_share->lock);
534   ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
535   gomp_mutex_unlock (&thr->ts.work_share->lock);
536 #endif
537
538   return ret;
539 }
540
541 static bool
542 gomp_loop_ull_doacross_guided_start (unsigned ncounts, gomp_ull *counts,
543                                      gomp_ull chunk_size, gomp_ull *istart,
544                                      gomp_ull *iend)
545 {
546   struct gomp_thread *thr = gomp_thread ();
547   bool ret;
548
549   if (gomp_work_share_start (0))
550     {
551       gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
552                           GFS_GUIDED, chunk_size);
553       gomp_doacross_ull_init (ncounts, counts, chunk_size, 0);
554       gomp_work_share_init_done ();
555     }
556
557 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
558   ret = gomp_iter_ull_guided_next (istart, iend);
559 #else
560   gomp_mutex_lock (&thr->ts.work_share->lock);
561   ret = gomp_iter_ull_guided_next_locked (istart, iend);
562   gomp_mutex_unlock (&thr->ts.work_share->lock);
563 #endif
564
565   return ret;
566 }
567
568 bool
569 GOMP_loop_ull_doacross_runtime_start (unsigned ncounts, gomp_ull *counts,
570                                       gomp_ull *istart, gomp_ull *iend)
571 {
572   struct gomp_task_icv *icv = gomp_icv (false);
573   switch (icv->run_sched_var & ~GFS_MONOTONIC)
574     {
575     case GFS_STATIC:
576       return gomp_loop_ull_doacross_static_start (ncounts, counts,
577                                                   icv->run_sched_chunk_size,
578                                                   istart, iend);
579     case GFS_DYNAMIC:
580       return gomp_loop_ull_doacross_dynamic_start (ncounts, counts,
581                                                    icv->run_sched_chunk_size,
582                                                    istart, iend);
583     case GFS_GUIDED:
584       return gomp_loop_ull_doacross_guided_start (ncounts, counts,
585                                                   icv->run_sched_chunk_size,
586                                                   istart, iend);
587     case GFS_AUTO:
588       /* For now map to schedule(static), later on we could play with feedback
589          driven choice.  */
590       return gomp_loop_ull_doacross_static_start (ncounts, counts,
591                                                   0, istart, iend);
592     default:
593       abort ();
594     }
595 }
596
597 bool
598 GOMP_loop_ull_doacross_start (unsigned ncounts, gomp_ull *counts,
599                               long sched, gomp_ull chunk_size,
600                               gomp_ull *istart, gomp_ull *iend,
601                               uintptr_t *reductions, void **mem)
602 {
603   struct gomp_thread *thr = gomp_thread ();
604
605   thr->ts.static_trip = 0;
606   if (reductions)
607     gomp_workshare_taskgroup_start ();
608   if (gomp_work_share_start (0))
609     {
610       size_t extra = 0;
611       if (mem)
612         extra = (uintptr_t) *mem;
613       sched = gomp_adjust_sched (sched, &chunk_size);
614       gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
615                           sched, chunk_size);
616       gomp_doacross_ull_init (ncounts, counts, chunk_size, extra);
617       if (reductions)
618         {
619           GOMP_taskgroup_reduction_register (reductions);
620           thr->task->taskgroup->workshare = true;
621           thr->ts.work_share->task_reductions = reductions;
622         }
623       gomp_work_share_init_done ();
624     }
625   else
626     {
627       if (reductions)
628         {
629           uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
630           gomp_workshare_task_reduction_register (reductions,
631                                                   first_reductions);
632         }
633       sched = thr->ts.work_share->sched;
634     }
635
636   if (mem)
637     *mem = thr->ts.work_share->doacross->extra;
638
639   return ialias_call (GOMP_loop_ull_runtime_next) (istart, iend);
640 }
641
642 /* The *_next routines are called when the thread completes processing of
643    the iteration block currently assigned to it.  If the work-share
644    construct is bound directly to a parallel construct, then the iteration
645    bounds may have been set up before the parallel.  In which case, this
646    may be the first iteration for the thread.
647
648    Returns true if there is work remaining to be performed; *ISTART and
649    *IEND are filled with a new iteration block.  Returns false if all work
650    has been assigned.  */
651
652 static bool
653 gomp_loop_ull_static_next (gomp_ull *istart, gomp_ull *iend)
654 {
655   return !gomp_iter_ull_static_next (istart, iend);
656 }
657
658 static bool
659 gomp_loop_ull_dynamic_next (gomp_ull *istart, gomp_ull *iend)
660 {
661   bool ret;
662
663 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
664   ret = gomp_iter_ull_dynamic_next (istart, iend);
665 #else
666   struct gomp_thread *thr = gomp_thread ();
667   gomp_mutex_lock (&thr->ts.work_share->lock);
668   ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
669   gomp_mutex_unlock (&thr->ts.work_share->lock);
670 #endif
671
672   return ret;
673 }
674
675 static bool
676 gomp_loop_ull_guided_next (gomp_ull *istart, gomp_ull *iend)
677 {
678   bool ret;
679
680 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
681   ret = gomp_iter_ull_guided_next (istart, iend);
682 #else
683   struct gomp_thread *thr = gomp_thread ();
684   gomp_mutex_lock (&thr->ts.work_share->lock);
685   ret = gomp_iter_ull_guided_next_locked (istart, iend);
686   gomp_mutex_unlock (&thr->ts.work_share->lock);
687 #endif
688
689   return ret;
690 }
691
692 bool
693 GOMP_loop_ull_runtime_next (gomp_ull *istart, gomp_ull *iend)
694 {
695   struct gomp_thread *thr = gomp_thread ();
696
697   switch (thr->ts.work_share->sched)
698     {
699     case GFS_STATIC:
700     case GFS_AUTO:
701       return gomp_loop_ull_static_next (istart, iend);
702     case GFS_DYNAMIC:
703       return gomp_loop_ull_dynamic_next (istart, iend);
704     case GFS_GUIDED:
705       return gomp_loop_ull_guided_next (istart, iend);
706     default:
707       abort ();
708     }
709 }
710
711 /* The *_ordered_*_next routines are called when the thread completes
712    processing of the iteration block currently assigned to it.
713
714    Returns true if there is work remaining to be performed; *ISTART and
715    *IEND are filled with a new iteration block.  Returns false if all work
716    has been assigned.  */
717
718 static bool
719 gomp_loop_ull_ordered_static_next (gomp_ull *istart, gomp_ull *iend)
720 {
721   struct gomp_thread *thr = gomp_thread ();
722   int test;
723
724   gomp_ordered_sync ();
725   gomp_mutex_lock (&thr->ts.work_share->lock);
726   test = gomp_iter_ull_static_next (istart, iend);
727   if (test >= 0)
728     gomp_ordered_static_next ();
729   gomp_mutex_unlock (&thr->ts.work_share->lock);
730
731   return test == 0;
732 }
733
734 static bool
735 gomp_loop_ull_ordered_dynamic_next (gomp_ull *istart, gomp_ull *iend)
736 {
737   struct gomp_thread *thr = gomp_thread ();
738   bool ret;
739
740   gomp_ordered_sync ();
741   gomp_mutex_lock (&thr->ts.work_share->lock);
742   ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
743   if (ret)
744     gomp_ordered_next ();
745   else
746     gomp_ordered_last ();
747   gomp_mutex_unlock (&thr->ts.work_share->lock);
748
749   return ret;
750 }
751
752 static bool
753 gomp_loop_ull_ordered_guided_next (gomp_ull *istart, gomp_ull *iend)
754 {
755   struct gomp_thread *thr = gomp_thread ();
756   bool ret;
757
758   gomp_ordered_sync ();
759   gomp_mutex_lock (&thr->ts.work_share->lock);
760   ret = gomp_iter_ull_guided_next_locked (istart, iend);
761   if (ret)
762     gomp_ordered_next ();
763   else
764     gomp_ordered_last ();
765   gomp_mutex_unlock (&thr->ts.work_share->lock);
766
767   return ret;
768 }
769
770 bool
771 GOMP_loop_ull_ordered_runtime_next (gomp_ull *istart, gomp_ull *iend)
772 {
773   struct gomp_thread *thr = gomp_thread ();
774
775   switch (thr->ts.work_share->sched)
776     {
777     case GFS_STATIC:
778     case GFS_AUTO:
779       return gomp_loop_ull_ordered_static_next (istart, iend);
780     case GFS_DYNAMIC:
781       return gomp_loop_ull_ordered_dynamic_next (istart, iend);
782     case GFS_GUIDED:
783       return gomp_loop_ull_ordered_guided_next (istart, iend);
784     default:
785       abort ();
786     }
787 }
788
789 /* We use static functions above so that we're sure that the "runtime"
790    function can defer to the proper routine without interposition.  We
791    export the static function with a strong alias when possible, or with
792    a wrapper function otherwise.  */
793
794 #ifdef HAVE_ATTRIBUTE_ALIAS
795 extern __typeof(gomp_loop_ull_static_start) GOMP_loop_ull_static_start
796         __attribute__((alias ("gomp_loop_ull_static_start")));
797 extern __typeof(gomp_loop_ull_dynamic_start) GOMP_loop_ull_dynamic_start
798         __attribute__((alias ("gomp_loop_ull_dynamic_start")));
799 extern __typeof(gomp_loop_ull_guided_start) GOMP_loop_ull_guided_start
800         __attribute__((alias ("gomp_loop_ull_guided_start")));
801 extern __typeof(gomp_loop_ull_dynamic_start) GOMP_loop_ull_nonmonotonic_dynamic_start
802         __attribute__((alias ("gomp_loop_ull_dynamic_start")));
803 extern __typeof(gomp_loop_ull_guided_start) GOMP_loop_ull_nonmonotonic_guided_start
804         __attribute__((alias ("gomp_loop_ull_guided_start")));
805 extern __typeof(GOMP_loop_ull_runtime_start) GOMP_loop_ull_nonmonotonic_runtime_start
806         __attribute__((alias ("GOMP_loop_ull_runtime_start")));
807 extern __typeof(GOMP_loop_ull_runtime_start) GOMP_loop_ull_maybe_nonmonotonic_runtime_start
808         __attribute__((alias ("GOMP_loop_ull_runtime_start")));
809
810 extern __typeof(gomp_loop_ull_ordered_static_start) GOMP_loop_ull_ordered_static_start
811         __attribute__((alias ("gomp_loop_ull_ordered_static_start")));
812 extern __typeof(gomp_loop_ull_ordered_dynamic_start) GOMP_loop_ull_ordered_dynamic_start
813         __attribute__((alias ("gomp_loop_ull_ordered_dynamic_start")));
814 extern __typeof(gomp_loop_ull_ordered_guided_start) GOMP_loop_ull_ordered_guided_start
815         __attribute__((alias ("gomp_loop_ull_ordered_guided_start")));
816
817 extern __typeof(gomp_loop_ull_doacross_static_start) GOMP_loop_ull_doacross_static_start
818         __attribute__((alias ("gomp_loop_ull_doacross_static_start")));
819 extern __typeof(gomp_loop_ull_doacross_dynamic_start) GOMP_loop_ull_doacross_dynamic_start
820         __attribute__((alias ("gomp_loop_ull_doacross_dynamic_start")));
821 extern __typeof(gomp_loop_ull_doacross_guided_start) GOMP_loop_ull_doacross_guided_start
822         __attribute__((alias ("gomp_loop_ull_doacross_guided_start")));
823
824 extern __typeof(gomp_loop_ull_static_next) GOMP_loop_ull_static_next
825         __attribute__((alias ("gomp_loop_ull_static_next")));
826 extern __typeof(gomp_loop_ull_dynamic_next) GOMP_loop_ull_dynamic_next
827         __attribute__((alias ("gomp_loop_ull_dynamic_next")));
828 extern __typeof(gomp_loop_ull_guided_next) GOMP_loop_ull_guided_next
829         __attribute__((alias ("gomp_loop_ull_guided_next")));
830 extern __typeof(gomp_loop_ull_dynamic_next) GOMP_loop_ull_nonmonotonic_dynamic_next
831         __attribute__((alias ("gomp_loop_ull_dynamic_next")));
832 extern __typeof(gomp_loop_ull_guided_next) GOMP_loop_ull_nonmonotonic_guided_next
833         __attribute__((alias ("gomp_loop_ull_guided_next")));
834 extern __typeof(GOMP_loop_ull_runtime_next) GOMP_loop_ull_nonmonotonic_runtime_next
835         __attribute__((alias ("GOMP_loop_ull_runtime_next")));
836 extern __typeof(GOMP_loop_ull_runtime_next) GOMP_loop_ull_maybe_nonmonotonic_runtime_next
837         __attribute__((alias ("GOMP_loop_ull_runtime_next")));
838
839 extern __typeof(gomp_loop_ull_ordered_static_next) GOMP_loop_ull_ordered_static_next
840         __attribute__((alias ("gomp_loop_ull_ordered_static_next")));
841 extern __typeof(gomp_loop_ull_ordered_dynamic_next) GOMP_loop_ull_ordered_dynamic_next
842         __attribute__((alias ("gomp_loop_ull_ordered_dynamic_next")));
843 extern __typeof(gomp_loop_ull_ordered_guided_next) GOMP_loop_ull_ordered_guided_next
844         __attribute__((alias ("gomp_loop_ull_ordered_guided_next")));
845 #else
846 bool
847 GOMP_loop_ull_static_start (bool up, gomp_ull start, gomp_ull end,
848                             gomp_ull incr, gomp_ull chunk_size,
849                             gomp_ull *istart, gomp_ull *iend)
850 {
851   return gomp_loop_ull_static_start (up, start, end, incr, chunk_size, istart,
852                                      iend);
853 }
854
855 bool
856 GOMP_loop_ull_dynamic_start (bool up, gomp_ull start, gomp_ull end,
857                              gomp_ull incr, gomp_ull chunk_size,
858                              gomp_ull *istart, gomp_ull *iend)
859 {
860   return gomp_loop_ull_dynamic_start (up, start, end, incr, chunk_size, istart,
861                                       iend);
862 }
863
864 bool
865 GOMP_loop_ull_guided_start (bool up, gomp_ull start, gomp_ull end,
866                             gomp_ull incr, gomp_ull chunk_size,
867                             gomp_ull *istart, gomp_ull *iend)
868 {
869   return gomp_loop_ull_guided_start (up, start, end, incr, chunk_size, istart,
870                                      iend);
871 }
872
873 bool
874 GOMP_loop_ull_nonmonotonic_dynamic_start (bool up, gomp_ull start,
875                                           gomp_ull end, gomp_ull incr,
876                                           gomp_ull chunk_size,
877                                           gomp_ull *istart, gomp_ull *iend)
878 {
879   return gomp_loop_ull_dynamic_start (up, start, end, incr, chunk_size, istart,
880                                       iend);
881 }
882
883 bool
884 GOMP_loop_ull_nonmonotonic_guided_start (bool up, gomp_ull start, gomp_ull end,
885                                          gomp_ull incr, gomp_ull chunk_size,
886                                          gomp_ull *istart, gomp_ull *iend)
887 {
888   return gomp_loop_ull_guided_start (up, start, end, incr, chunk_size, istart,
889                                      iend);
890 }
891
892 bool
893 GOMP_loop_ull_nonmonotonic_runtime_start (bool up, gomp_ull start,
894                                           gomp_ull end, gomp_ull incr,
895                                           gomp_ull *istart, gomp_ull *iend)
896 {
897   return GOMP_loop_ull_runtime_start (up, start, end, incr, istart, iend);
898 }
899
900 bool
901 GOMP_loop_ull_maybe_nonmonotonic_runtime_start (bool up, gomp_ull start,
902                                                 gomp_ull end, gomp_ull incr,
903                                                 gomp_ull *istart,
904                                                 gomp_ull *iend)
905 {
906   return GOMP_loop_ull_runtime_start (up, start, end, incr, istart, iend);
907 }
908
909 bool
910 GOMP_loop_ull_ordered_static_start (bool up, gomp_ull start, gomp_ull end,
911                                     gomp_ull incr, gomp_ull chunk_size,
912                                     gomp_ull *istart, gomp_ull *iend)
913 {
914   return gomp_loop_ull_ordered_static_start (up, start, end, incr, chunk_size,
915                                              istart, iend);
916 }
917
918 bool
919 GOMP_loop_ull_ordered_dynamic_start (bool up, gomp_ull start, gomp_ull end,
920                                      gomp_ull incr, gomp_ull chunk_size,
921                                      gomp_ull *istart, gomp_ull *iend)
922 {
923   return gomp_loop_ull_ordered_dynamic_start (up, start, end, incr, chunk_size,
924                                               istart, iend);
925 }
926
927 bool
928 GOMP_loop_ull_ordered_guided_start (bool up, gomp_ull start, gomp_ull end,
929                                     gomp_ull incr, gomp_ull chunk_size,
930                                     gomp_ull *istart, gomp_ull *iend)
931 {
932   return gomp_loop_ull_ordered_guided_start (up, start, end, incr, chunk_size,
933                                              istart, iend);
934 }
935
936 bool
937 GOMP_loop_ull_doacross_static_start (unsigned ncounts, gomp_ull *counts,
938                                      gomp_ull chunk_size, gomp_ull *istart,
939                                      gomp_ull *iend)
940 {
941   return gomp_loop_ull_doacross_static_start (ncounts, counts, chunk_size,
942                                               istart, iend);
943 }
944
945 bool
946 GOMP_loop_ull_doacross_dynamic_start (unsigned ncounts, gomp_ull *counts,
947                                       gomp_ull chunk_size, gomp_ull *istart,
948                                       gomp_ull *iend)
949 {
950   return gomp_loop_ull_doacross_dynamic_start (ncounts, counts, chunk_size,
951                                                istart, iend);
952 }
953
954 bool
955 GOMP_loop_ull_doacross_guided_start (unsigned ncounts, gomp_ull *counts,
956                                      gomp_ull chunk_size, gomp_ull *istart,
957                                      gomp_ull *iend)
958 {
959   return gomp_loop_ull_doacross_guided_start (ncounts, counts, chunk_size,
960                                               istart, iend);
961 }
962
963 bool
964 GOMP_loop_ull_static_next (gomp_ull *istart, gomp_ull *iend)
965 {
966   return gomp_loop_ull_static_next (istart, iend);
967 }
968
969 bool
970 GOMP_loop_ull_dynamic_next (gomp_ull *istart, gomp_ull *iend)
971 {
972   return gomp_loop_ull_dynamic_next (istart, iend);
973 }
974
975 bool
976 GOMP_loop_ull_guided_next (gomp_ull *istart, gomp_ull *iend)
977 {
978   return gomp_loop_ull_guided_next (istart, iend);
979 }
980
981 bool
982 GOMP_loop_ull_nonmonotonic_dynamic_next (gomp_ull *istart, gomp_ull *iend)
983 {
984   return gomp_loop_ull_dynamic_next (istart, iend);
985 }
986
987 bool
988 GOMP_loop_ull_nonmonotonic_guided_next (gomp_ull *istart, gomp_ull *iend)
989 {
990   return gomp_loop_ull_guided_next (istart, iend);
991 }
992
993 bool
994 GOMP_loop_ull_nonmonotonic_runtime_next (gomp_ull *istart, gomp_ull *iend)
995 {
996   return GOMP_loop_ull_runtime_next (istart, iend);
997 }
998
999 bool
1000 GOMP_loop_ull_maybe_nonmonotonic_runtime_next (gomp_ull *istart,
1001                                                gomp_ull *iend)
1002 {
1003   return GOMP_loop_ull_runtime_next (istart, iend);
1004 }
1005
1006 bool
1007 GOMP_loop_ull_ordered_static_next (gomp_ull *istart, gomp_ull *iend)
1008 {
1009   return gomp_loop_ull_ordered_static_next (istart, iend);
1010 }
1011
1012 bool
1013 GOMP_loop_ull_ordered_dynamic_next (gomp_ull *istart, gomp_ull *iend)
1014 {
1015   return gomp_loop_ull_ordered_dynamic_next (istart, iend);
1016 }
1017
1018 bool
1019 GOMP_loop_ull_ordered_guided_next (gomp_ull *istart, gomp_ull *iend)
1020 {
1021   return gomp_loop_ull_ordered_guided_next (istart, iend);
1022 }
1023 #endif