Update copyright years.
[platform/upstream/gcc.git] / libgomp / team.c
1 /* Copyright (C) 2005-2016 Free Software Foundation, Inc.
2    Contributed by Richard Henderson <rth@redhat.com>.
3
4    This file is part of the GNU Offloading and Multi Processing Library
5    (libgomp).
6
7    Libgomp is free software; you can redistribute it and/or modify it
8    under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3, or (at your option)
10    any later version.
11
12    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15    more details.
16
17    Under Section 7 of GPL version 3, you are granted additional
18    permissions described in the GCC Runtime Library Exception, version
19    3.1, as published by the Free Software Foundation.
20
21    You should have received a copy of the GNU General Public License and
22    a copy of the GCC Runtime Library Exception along with this program;
23    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24    <http://www.gnu.org/licenses/>.  */
25
26 /* This file handles the maintainence of threads in response to team
27    creation and termination.  */
28
29 #include "libgomp.h"
30 #include "pool.h"
31 #include <stdlib.h>
32 #include <string.h>
33
34 /* This attribute contains PTHREAD_CREATE_DETACHED.  */
35 pthread_attr_t gomp_thread_attr;
36
37 /* This key is for the thread destructor.  */
38 pthread_key_t gomp_thread_destructor;
39
40
41 /* This is the libgomp per-thread data structure.  */
42 #if defined HAVE_TLS || defined USE_EMUTLS
43 __thread struct gomp_thread gomp_tls_data;
44 #else
45 pthread_key_t gomp_tls_key;
46 #endif
47
48
49 /* This structure is used to communicate across pthread_create.  */
50
51 struct gomp_thread_start_data
52 {
53   void (*fn) (void *);
54   void *fn_data;
55   struct gomp_team_state ts;
56   struct gomp_task *task;
57   struct gomp_thread_pool *thread_pool;
58   unsigned int place;
59   bool nested;
60 };
61
62
63 /* This function is a pthread_create entry point.  This contains the idle
64    loop in which a thread waits to be called up to become part of a team.  */
65
66 static void *
67 gomp_thread_start (void *xdata)
68 {
69   struct gomp_thread_start_data *data = xdata;
70   struct gomp_thread *thr;
71   struct gomp_thread_pool *pool;
72   void (*local_fn) (void *);
73   void *local_data;
74
75 #if defined HAVE_TLS || defined USE_EMUTLS
76   thr = &gomp_tls_data;
77 #else
78   struct gomp_thread local_thr;
79   thr = &local_thr;
80   pthread_setspecific (gomp_tls_key, thr);
81 #endif
82   gomp_sem_init (&thr->release, 0);
83
84   /* Extract what we need from data.  */
85   local_fn = data->fn;
86   local_data = data->fn_data;
87   thr->thread_pool = data->thread_pool;
88   thr->ts = data->ts;
89   thr->task = data->task;
90   thr->place = data->place;
91
92   thr->ts.team->ordered_release[thr->ts.team_id] = &thr->release;
93
94   /* Make thread pool local. */
95   pool = thr->thread_pool;
96
97   if (data->nested)
98     {
99       struct gomp_team *team = thr->ts.team;
100       struct gomp_task *task = thr->task;
101
102       gomp_barrier_wait (&team->barrier);
103
104       local_fn (local_data);
105       gomp_team_barrier_wait_final (&team->barrier);
106       gomp_finish_task (task);
107       gomp_barrier_wait_last (&team->barrier);
108     }
109   else
110     {
111       pool->threads[thr->ts.team_id] = thr;
112
113       gomp_barrier_wait (&pool->threads_dock);
114       do
115         {
116           struct gomp_team *team = thr->ts.team;
117           struct gomp_task *task = thr->task;
118
119           local_fn (local_data);
120           gomp_team_barrier_wait_final (&team->barrier);
121           gomp_finish_task (task);
122
123           gomp_barrier_wait (&pool->threads_dock);
124
125           local_fn = thr->fn;
126           local_data = thr->data;
127           thr->fn = NULL;
128         }
129       while (local_fn);
130     }
131
132   gomp_sem_destroy (&thr->release);
133   thr->thread_pool = NULL;
134   thr->task = NULL;
135   return NULL;
136 }
137
138 static inline struct gomp_team *
139 get_last_team (unsigned nthreads)
140 {
141   struct gomp_thread *thr = gomp_thread ();
142   if (thr->ts.team == NULL)
143     {
144       struct gomp_thread_pool *pool = gomp_get_thread_pool (thr, nthreads);
145       struct gomp_team *last_team = pool->last_team;
146       if (last_team != NULL && last_team->nthreads == nthreads)
147         {
148           pool->last_team = NULL;
149           return last_team;
150         }
151     }
152   return NULL;
153 }
154
155 /* Create a new team data structure.  */
156
157 struct gomp_team *
158 gomp_new_team (unsigned nthreads)
159 {
160   struct gomp_team *team;
161   int i;
162
163   team = get_last_team (nthreads);
164   if (team == NULL)
165     {
166       size_t extra = sizeof (team->ordered_release[0])
167                      + sizeof (team->implicit_task[0]);
168       team = gomp_malloc (sizeof (*team) + nthreads * extra);
169
170 #ifndef HAVE_SYNC_BUILTINS
171       gomp_mutex_init (&team->work_share_list_free_lock);
172 #endif
173       gomp_barrier_init (&team->barrier, nthreads);
174       gomp_mutex_init (&team->task_lock);
175
176       team->nthreads = nthreads;
177     }
178
179   team->work_share_chunk = 8;
180 #ifdef HAVE_SYNC_BUILTINS
181   team->single_count = 0;
182 #endif
183   team->work_shares_to_free = &team->work_shares[0];
184   gomp_init_work_share (&team->work_shares[0], false, nthreads);
185   team->work_shares[0].next_alloc = NULL;
186   team->work_share_list_free = NULL;
187   team->work_share_list_alloc = &team->work_shares[1];
188   for (i = 1; i < 7; i++)
189     team->work_shares[i].next_free = &team->work_shares[i + 1];
190   team->work_shares[i].next_free = NULL;
191
192   gomp_sem_init (&team->master_release, 0);
193   team->ordered_release = (void *) &team->implicit_task[nthreads];
194   team->ordered_release[0] = &team->master_release;
195
196   priority_queue_init (&team->task_queue);
197   team->task_count = 0;
198   team->task_queued_count = 0;
199   team->task_running_count = 0;
200   team->work_share_cancelled = 0;
201   team->team_cancelled = 0;
202
203   return team;
204 }
205
206
207 /* Free a team data structure.  */
208
209 static void
210 free_team (struct gomp_team *team)
211 {
212 #ifndef HAVE_SYNC_BUILTINS
213   gomp_mutex_destroy (&team->work_share_list_free_lock);
214 #endif
215   gomp_barrier_destroy (&team->barrier);
216   gomp_mutex_destroy (&team->task_lock);
217   priority_queue_free (&team->task_queue);
218   free (team);
219 }
220
221 static void
222 gomp_free_pool_helper (void *thread_pool)
223 {
224   struct gomp_thread *thr = gomp_thread ();
225   struct gomp_thread_pool *pool
226     = (struct gomp_thread_pool *) thread_pool;
227   gomp_barrier_wait_last (&pool->threads_dock);
228   gomp_sem_destroy (&thr->release);
229   thr->thread_pool = NULL;
230   thr->task = NULL;
231   pthread_exit (NULL);
232 }
233
234 /* Free a thread pool and release its threads. */
235
236 void
237 gomp_free_thread (void *arg __attribute__((unused)))
238 {
239   struct gomp_thread *thr = gomp_thread ();
240   struct gomp_thread_pool *pool = thr->thread_pool;
241   if (pool)
242     {
243       if (pool->threads_used > 0)
244         {
245           int i;
246           for (i = 1; i < pool->threads_used; i++)
247             {
248               struct gomp_thread *nthr = pool->threads[i];
249               nthr->fn = gomp_free_pool_helper;
250               nthr->data = pool;
251             }
252           /* This barrier undocks threads docked on pool->threads_dock.  */
253           gomp_barrier_wait (&pool->threads_dock);
254           /* And this waits till all threads have called gomp_barrier_wait_last
255              in gomp_free_pool_helper.  */
256           gomp_barrier_wait (&pool->threads_dock);
257           /* Now it is safe to destroy the barrier and free the pool.  */
258           gomp_barrier_destroy (&pool->threads_dock);
259
260 #ifdef HAVE_SYNC_BUILTINS
261           __sync_fetch_and_add (&gomp_managed_threads,
262                                 1L - pool->threads_used);
263 #else
264           gomp_mutex_lock (&gomp_managed_threads_lock);
265           gomp_managed_threads -= pool->threads_used - 1L;
266           gomp_mutex_unlock (&gomp_managed_threads_lock);
267 #endif
268         }
269       free (pool->threads);
270       if (pool->last_team)
271         free_team (pool->last_team);
272       free (pool);
273       thr->thread_pool = NULL;
274     }
275   if (thr->ts.level == 0 && __builtin_expect (thr->ts.team != NULL, 0))
276     gomp_team_end ();
277   if (thr->task != NULL)
278     {
279       struct gomp_task *task = thr->task;
280       gomp_end_task ();
281       free (task);
282     }
283 }
284
285 /* Launch a team.  */
286
287 void
288 gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
289                  unsigned flags, struct gomp_team *team)
290 {
291   struct gomp_thread_start_data *start_data;
292   struct gomp_thread *thr, *nthr;
293   struct gomp_task *task;
294   struct gomp_task_icv *icv;
295   bool nested;
296   struct gomp_thread_pool *pool;
297   unsigned i, n, old_threads_used = 0;
298   pthread_attr_t thread_attr, *attr;
299   unsigned long nthreads_var;
300   char bind, bind_var;
301   unsigned int s = 0, rest = 0, p = 0, k = 0;
302   unsigned int affinity_count = 0;
303   struct gomp_thread **affinity_thr = NULL;
304
305   thr = gomp_thread ();
306   nested = thr->ts.level;
307   pool = thr->thread_pool;
308   task = thr->task;
309   icv = task ? &task->icv : &gomp_global_icv;
310   if (__builtin_expect (gomp_places_list != NULL, 0) && thr->place == 0)
311     gomp_init_affinity ();
312
313   /* Always save the previous state, even if this isn't a nested team.
314      In particular, we should save any work share state from an outer
315      orphaned work share construct.  */
316   team->prev_ts = thr->ts;
317
318   thr->ts.team = team;
319   thr->ts.team_id = 0;
320   ++thr->ts.level;
321   if (nthreads > 1)
322     ++thr->ts.active_level;
323   thr->ts.work_share = &team->work_shares[0];
324   thr->ts.last_work_share = NULL;
325 #ifdef HAVE_SYNC_BUILTINS
326   thr->ts.single_count = 0;
327 #endif
328   thr->ts.static_trip = 0;
329   thr->task = &team->implicit_task[0];
330   nthreads_var = icv->nthreads_var;
331   if (__builtin_expect (gomp_nthreads_var_list != NULL, 0)
332       && thr->ts.level < gomp_nthreads_var_list_len)
333     nthreads_var = gomp_nthreads_var_list[thr->ts.level];
334   bind_var = icv->bind_var;
335   if (bind_var != omp_proc_bind_false && (flags & 7) != omp_proc_bind_false)
336     bind_var = flags & 7;
337   bind = bind_var;
338   if (__builtin_expect (gomp_bind_var_list != NULL, 0)
339       && thr->ts.level < gomp_bind_var_list_len)
340     bind_var = gomp_bind_var_list[thr->ts.level];
341   gomp_init_task (thr->task, task, icv);
342   team->implicit_task[0].icv.nthreads_var = nthreads_var;
343   team->implicit_task[0].icv.bind_var = bind_var;
344
345   if (nthreads == 1)
346     return;
347
348   i = 1;
349
350   if (__builtin_expect (gomp_places_list != NULL, 0))
351     {
352       /* Depending on chosen proc_bind model, set subpartition
353          for the master thread and initialize helper variables
354          P and optionally S, K and/or REST used by later place
355          computation for each additional thread.  */
356       p = thr->place - 1;
357       switch (bind)
358         {
359         case omp_proc_bind_true:
360         case omp_proc_bind_close:
361           if (nthreads > thr->ts.place_partition_len)
362             {
363               /* T > P.  S threads will be placed in each place,
364                  and the final REM threads placed one by one
365                  into the already occupied places.  */
366               s = nthreads / thr->ts.place_partition_len;
367               rest = nthreads % thr->ts.place_partition_len;
368             }
369           else
370             s = 1;
371           k = 1;
372           break;
373         case omp_proc_bind_master:
374           /* Each thread will be bound to master's place.  */
375           break;
376         case omp_proc_bind_spread:
377           if (nthreads <= thr->ts.place_partition_len)
378             {
379               /* T <= P.  Each subpartition will have in between s
380                  and s+1 places (subpartitions starting at or
381                  after rest will have s places, earlier s+1 places),
382                  each thread will be bound to the first place in
383                  its subpartition (except for the master thread
384                  that can be bound to another place in its
385                  subpartition).  */
386               s = thr->ts.place_partition_len / nthreads;
387               rest = thr->ts.place_partition_len % nthreads;
388               rest = (s + 1) * rest + thr->ts.place_partition_off;
389               if (p < rest)
390                 {
391                   p -= (p - thr->ts.place_partition_off) % (s + 1);
392                   thr->ts.place_partition_len = s + 1;
393                 }
394               else
395                 {
396                   p -= (p - rest) % s;
397                   thr->ts.place_partition_len = s;
398                 }
399               thr->ts.place_partition_off = p;
400             }
401           else
402             {
403               /* T > P.  Each subpartition will have just a single
404                  place and we'll place between s and s+1
405                  threads into each subpartition.  */
406               s = nthreads / thr->ts.place_partition_len;
407               rest = nthreads % thr->ts.place_partition_len;
408               thr->ts.place_partition_off = p;
409               thr->ts.place_partition_len = 1;
410               k = 1;
411             }
412           break;
413         }
414     }
415   else
416     bind = omp_proc_bind_false;
417
418   /* We only allow the reuse of idle threads for non-nested PARALLEL
419      regions.  This appears to be implied by the semantics of
420      threadprivate variables, but perhaps that's reading too much into
421      things.  Certainly it does prevent any locking problems, since
422      only the initial program thread will modify gomp_threads.  */
423   if (!nested)
424     {
425       old_threads_used = pool->threads_used;
426
427       if (nthreads <= old_threads_used)
428         n = nthreads;
429       else if (old_threads_used == 0)
430         {
431           n = 0;
432           gomp_barrier_init (&pool->threads_dock, nthreads);
433         }
434       else
435         {
436           n = old_threads_used;
437
438           /* Increase the barrier threshold to make sure all new
439              threads arrive before the team is released.  */
440           gomp_barrier_reinit (&pool->threads_dock, nthreads);
441         }
442
443       /* Not true yet, but soon will be.  We're going to release all
444          threads from the dock, and those that aren't part of the
445          team will exit.  */
446       pool->threads_used = nthreads;
447
448       /* If necessary, expand the size of the gomp_threads array.  It is
449          expected that changes in the number of threads are rare, thus we
450          make no effort to expand gomp_threads_size geometrically.  */
451       if (nthreads >= pool->threads_size)
452         {
453           pool->threads_size = nthreads + 1;
454           pool->threads
455             = gomp_realloc (pool->threads,
456                             pool->threads_size
457                             * sizeof (struct gomp_thread_data *));
458         }
459
460       /* Release existing idle threads.  */
461       for (; i < n; ++i)
462         {
463           unsigned int place_partition_off = thr->ts.place_partition_off;
464           unsigned int place_partition_len = thr->ts.place_partition_len;
465           unsigned int place = 0;
466           if (__builtin_expect (gomp_places_list != NULL, 0))
467             {
468               switch (bind)
469                 {
470                 case omp_proc_bind_true:
471                 case omp_proc_bind_close:
472                   if (k == s)
473                     {
474                       ++p;
475                       if (p == (team->prev_ts.place_partition_off
476                                 + team->prev_ts.place_partition_len))
477                         p = team->prev_ts.place_partition_off;
478                       k = 1;
479                       if (i == nthreads - rest)
480                         s = 1;
481                     }
482                   else
483                     ++k;
484                   break;
485                 case omp_proc_bind_master:
486                   break;
487                 case omp_proc_bind_spread:
488                   if (k == 0)
489                     {
490                       /* T <= P.  */
491                       if (p < rest)
492                         p += s + 1;
493                       else
494                         p += s;
495                       if (p == (team->prev_ts.place_partition_off
496                                 + team->prev_ts.place_partition_len))
497                         p = team->prev_ts.place_partition_off;
498                       place_partition_off = p;
499                       if (p < rest)
500                         place_partition_len = s + 1;
501                       else
502                         place_partition_len = s;
503                     }
504                   else
505                     {
506                       /* T > P.  */
507                       if (k == s)
508                         {
509                           ++p;
510                           if (p == (team->prev_ts.place_partition_off
511                                     + team->prev_ts.place_partition_len))
512                             p = team->prev_ts.place_partition_off;
513                           k = 1;
514                           if (i == nthreads - rest)
515                             s = 1;
516                         }
517                       else
518                         ++k;
519                       place_partition_off = p;
520                       place_partition_len = 1;
521                     }
522                   break;
523                 }
524               if (affinity_thr != NULL
525                   || (bind != omp_proc_bind_true
526                       && pool->threads[i]->place != p + 1)
527                   || pool->threads[i]->place <= place_partition_off
528                   || pool->threads[i]->place > (place_partition_off
529                                                 + place_partition_len))
530                 {
531                   unsigned int l;
532                   if (affinity_thr == NULL)
533                     {
534                       unsigned int j;
535
536                       if (team->prev_ts.place_partition_len > 64)
537                         affinity_thr
538                           = gomp_malloc (team->prev_ts.place_partition_len
539                                          * sizeof (struct gomp_thread *));
540                       else
541                         affinity_thr
542                           = gomp_alloca (team->prev_ts.place_partition_len
543                                          * sizeof (struct gomp_thread *));
544                       memset (affinity_thr, '\0',
545                               team->prev_ts.place_partition_len
546                               * sizeof (struct gomp_thread *));
547                       for (j = i; j < old_threads_used; j++)
548                         {
549                           if (pool->threads[j]->place
550                               > team->prev_ts.place_partition_off
551                               && (pool->threads[j]->place
552                                   <= (team->prev_ts.place_partition_off
553                                       + team->prev_ts.place_partition_len)))
554                             {
555                               l = pool->threads[j]->place - 1
556                                   - team->prev_ts.place_partition_off;
557                               pool->threads[j]->data = affinity_thr[l];
558                               affinity_thr[l] = pool->threads[j];
559                             }
560                           pool->threads[j] = NULL;
561                         }
562                       if (nthreads > old_threads_used)
563                         memset (&pool->threads[old_threads_used],
564                                 '\0', ((nthreads - old_threads_used)
565                                        * sizeof (struct gomp_thread *)));
566                       n = nthreads;
567                       affinity_count = old_threads_used - i;
568                     }
569                   if (affinity_count == 0)
570                     break;
571                   l = p;
572                   if (affinity_thr[l - team->prev_ts.place_partition_off]
573                       == NULL)
574                     {
575                       if (bind != omp_proc_bind_true)
576                         continue;
577                       for (l = place_partition_off;
578                            l < place_partition_off + place_partition_len;
579                            l++)
580                         if (affinity_thr[l - team->prev_ts.place_partition_off]
581                             != NULL)
582                           break;
583                       if (l == place_partition_off + place_partition_len)
584                         continue;
585                     }
586                   nthr = affinity_thr[l - team->prev_ts.place_partition_off];
587                   affinity_thr[l - team->prev_ts.place_partition_off]
588                     = (struct gomp_thread *) nthr->data;
589                   affinity_count--;
590                   pool->threads[i] = nthr;
591                 }
592               else
593                 nthr = pool->threads[i];
594               place = p + 1;
595             }
596           else
597             nthr = pool->threads[i];
598           nthr->ts.team = team;
599           nthr->ts.work_share = &team->work_shares[0];
600           nthr->ts.last_work_share = NULL;
601           nthr->ts.team_id = i;
602           nthr->ts.level = team->prev_ts.level + 1;
603           nthr->ts.active_level = thr->ts.active_level;
604           nthr->ts.place_partition_off = place_partition_off;
605           nthr->ts.place_partition_len = place_partition_len;
606 #ifdef HAVE_SYNC_BUILTINS
607           nthr->ts.single_count = 0;
608 #endif
609           nthr->ts.static_trip = 0;
610           nthr->task = &team->implicit_task[i];
611           nthr->place = place;
612           gomp_init_task (nthr->task, task, icv);
613           team->implicit_task[i].icv.nthreads_var = nthreads_var;
614           team->implicit_task[i].icv.bind_var = bind_var;
615           nthr->fn = fn;
616           nthr->data = data;
617           team->ordered_release[i] = &nthr->release;
618         }
619
620       if (__builtin_expect (affinity_thr != NULL, 0))
621         {
622           /* If AFFINITY_THR is non-NULL just because we had to
623              permute some threads in the pool, but we've managed
624              to find exactly as many old threads as we'd find
625              without affinity, we don't need to handle this
626              specially anymore.  */
627           if (nthreads <= old_threads_used
628               ? (affinity_count == old_threads_used - nthreads)
629               : (i == old_threads_used))
630             {
631               if (team->prev_ts.place_partition_len > 64)
632                 free (affinity_thr);
633               affinity_thr = NULL;
634               affinity_count = 0;
635             }
636           else
637             {
638               i = 1;
639               /* We are going to compute the places/subpartitions
640                  again from the beginning.  So, we need to reinitialize
641                  vars modified by the switch (bind) above inside
642                  of the loop, to the state they had after the initial
643                  switch (bind).  */
644               switch (bind)
645                 {
646                 case omp_proc_bind_true:
647                 case omp_proc_bind_close:
648                   if (nthreads > thr->ts.place_partition_len)
649                     /* T > P.  S has been changed, so needs
650                        to be recomputed.  */
651                     s = nthreads / thr->ts.place_partition_len;
652                   k = 1;
653                   p = thr->place - 1;
654                   break;
655                 case omp_proc_bind_master:
656                   /* No vars have been changed.  */
657                   break;
658                 case omp_proc_bind_spread:
659                   p = thr->ts.place_partition_off;
660                   if (k != 0)
661                     {
662                       /* T > P.  */
663                       s = nthreads / team->prev_ts.place_partition_len;
664                       k = 1;
665                     }
666                   break;
667                 }
668
669               /* Increase the barrier threshold to make sure all new
670                  threads and all the threads we're going to let die
671                  arrive before the team is released.  */
672               if (affinity_count)
673                 gomp_barrier_reinit (&pool->threads_dock,
674                                      nthreads + affinity_count);
675             }
676         }
677
678       if (i == nthreads)
679         goto do_release;
680
681     }
682
683   if (__builtin_expect (nthreads + affinity_count > old_threads_used, 0))
684     {
685       long diff = (long) (nthreads + affinity_count) - (long) old_threads_used;
686
687       if (old_threads_used == 0)
688         --diff;
689
690 #ifdef HAVE_SYNC_BUILTINS
691       __sync_fetch_and_add (&gomp_managed_threads, diff);
692 #else
693       gomp_mutex_lock (&gomp_managed_threads_lock);
694       gomp_managed_threads += diff;
695       gomp_mutex_unlock (&gomp_managed_threads_lock);
696 #endif
697     }
698
699   attr = &gomp_thread_attr;
700   if (__builtin_expect (gomp_places_list != NULL, 0))
701     {
702       size_t stacksize;
703       pthread_attr_init (&thread_attr);
704       pthread_attr_setdetachstate (&thread_attr, PTHREAD_CREATE_DETACHED);
705       if (! pthread_attr_getstacksize (&gomp_thread_attr, &stacksize))
706         pthread_attr_setstacksize (&thread_attr, stacksize);
707       attr = &thread_attr;
708     }
709
710   start_data = gomp_alloca (sizeof (struct gomp_thread_start_data)
711                             * (nthreads-i));
712
713   /* Launch new threads.  */
714   for (; i < nthreads; ++i)
715     {
716       pthread_t pt;
717       int err;
718
719       start_data->ts.place_partition_off = thr->ts.place_partition_off;
720       start_data->ts.place_partition_len = thr->ts.place_partition_len;
721       start_data->place = 0;
722       if (__builtin_expect (gomp_places_list != NULL, 0))
723         {
724           switch (bind)
725             {
726             case omp_proc_bind_true:
727             case omp_proc_bind_close:
728               if (k == s)
729                 {
730                   ++p;
731                   if (p == (team->prev_ts.place_partition_off
732                             + team->prev_ts.place_partition_len))
733                     p = team->prev_ts.place_partition_off;
734                   k = 1;
735                   if (i == nthreads - rest)
736                     s = 1;
737                 }
738               else
739                 ++k;
740               break;
741             case omp_proc_bind_master:
742               break;
743             case omp_proc_bind_spread:
744               if (k == 0)
745                 {
746                   /* T <= P.  */
747                   if (p < rest)
748                     p += s + 1;
749                   else
750                     p += s;
751                   if (p == (team->prev_ts.place_partition_off
752                             + team->prev_ts.place_partition_len))
753                     p = team->prev_ts.place_partition_off;
754                   start_data->ts.place_partition_off = p;
755                   if (p < rest)
756                     start_data->ts.place_partition_len = s + 1;
757                   else
758                     start_data->ts.place_partition_len = s;
759                 }
760               else
761                 {
762                   /* T > P.  */
763                   if (k == s)
764                     {
765                       ++p;
766                       if (p == (team->prev_ts.place_partition_off
767                                 + team->prev_ts.place_partition_len))
768                         p = team->prev_ts.place_partition_off;
769                       k = 1;
770                       if (i == nthreads - rest)
771                         s = 1;
772                     }
773                   else
774                     ++k;
775                   start_data->ts.place_partition_off = p;
776                   start_data->ts.place_partition_len = 1;
777                 }
778               break;
779             }
780           start_data->place = p + 1;
781           if (affinity_thr != NULL && pool->threads[i] != NULL)
782             continue;
783           gomp_init_thread_affinity (attr, p);
784         }
785
786       start_data->fn = fn;
787       start_data->fn_data = data;
788       start_data->ts.team = team;
789       start_data->ts.work_share = &team->work_shares[0];
790       start_data->ts.last_work_share = NULL;
791       start_data->ts.team_id = i;
792       start_data->ts.level = team->prev_ts.level + 1;
793       start_data->ts.active_level = thr->ts.active_level;
794 #ifdef HAVE_SYNC_BUILTINS
795       start_data->ts.single_count = 0;
796 #endif
797       start_data->ts.static_trip = 0;
798       start_data->task = &team->implicit_task[i];
799       gomp_init_task (start_data->task, task, icv);
800       team->implicit_task[i].icv.nthreads_var = nthreads_var;
801       team->implicit_task[i].icv.bind_var = bind_var;
802       start_data->thread_pool = pool;
803       start_data->nested = nested;
804
805       attr = gomp_adjust_thread_attr (attr, &thread_attr);
806       err = pthread_create (&pt, attr, gomp_thread_start, start_data++);
807       if (err != 0)
808         gomp_fatal ("Thread creation failed: %s", strerror (err));
809     }
810
811   if (__builtin_expect (attr == &thread_attr, 0))
812     pthread_attr_destroy (&thread_attr);
813
814  do_release:
815   gomp_barrier_wait (nested ? &team->barrier : &pool->threads_dock);
816
817   /* Decrease the barrier threshold to match the number of threads
818      that should arrive back at the end of this team.  The extra
819      threads should be exiting.  Note that we arrange for this test
820      to never be true for nested teams.  If AFFINITY_COUNT is non-zero,
821      the barrier as well as gomp_managed_threads was temporarily
822      set to NTHREADS + AFFINITY_COUNT.  For NTHREADS < OLD_THREADS_COUNT,
823      AFFINITY_COUNT if non-zero will be always at least
824      OLD_THREADS_COUNT - NTHREADS.  */
825   if (__builtin_expect (nthreads < old_threads_used, 0)
826       || __builtin_expect (affinity_count, 0))
827     {
828       long diff = (long) nthreads - (long) old_threads_used;
829
830       if (affinity_count)
831         diff = -affinity_count;
832
833       gomp_barrier_reinit (&pool->threads_dock, nthreads);
834
835 #ifdef HAVE_SYNC_BUILTINS
836       __sync_fetch_and_add (&gomp_managed_threads, diff);
837 #else
838       gomp_mutex_lock (&gomp_managed_threads_lock);
839       gomp_managed_threads += diff;
840       gomp_mutex_unlock (&gomp_managed_threads_lock);
841 #endif
842     }
843   if (__builtin_expect (affinity_thr != NULL, 0)
844       && team->prev_ts.place_partition_len > 64)
845     free (affinity_thr);
846 }
847
848
849 /* Terminate the current team.  This is only to be called by the master
850    thread.  We assume that we must wait for the other threads.  */
851
852 void
853 gomp_team_end (void)
854 {
855   struct gomp_thread *thr = gomp_thread ();
856   struct gomp_team *team = thr->ts.team;
857
858   /* This barrier handles all pending explicit threads.
859      As #pragma omp cancel parallel might get awaited count in
860      team->barrier in a inconsistent state, we need to use a different
861      counter here.  */
862   gomp_team_barrier_wait_final (&team->barrier);
863   if (__builtin_expect (team->team_cancelled, 0))
864     {
865       struct gomp_work_share *ws = team->work_shares_to_free;
866       do
867         {
868           struct gomp_work_share *next_ws = gomp_ptrlock_get (&ws->next_ws);
869           if (next_ws == NULL)
870             gomp_ptrlock_set (&ws->next_ws, ws);
871           gomp_fini_work_share (ws);
872           ws = next_ws;
873         }
874       while (ws != NULL);
875     }
876   else
877     gomp_fini_work_share (thr->ts.work_share);
878
879   gomp_end_task ();
880   thr->ts = team->prev_ts;
881
882   if (__builtin_expect (thr->ts.team != NULL, 0))
883     {
884 #ifdef HAVE_SYNC_BUILTINS
885       __sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads);
886 #else
887       gomp_mutex_lock (&gomp_managed_threads_lock);
888       gomp_managed_threads -= team->nthreads - 1L;
889       gomp_mutex_unlock (&gomp_managed_threads_lock);
890 #endif
891       /* This barrier has gomp_barrier_wait_last counterparts
892          and ensures the team can be safely destroyed.  */
893       gomp_barrier_wait (&team->barrier);
894     }
895
896   if (__builtin_expect (team->work_shares[0].next_alloc != NULL, 0))
897     {
898       struct gomp_work_share *ws = team->work_shares[0].next_alloc;
899       do
900         {
901           struct gomp_work_share *next_ws = ws->next_alloc;
902           free (ws);
903           ws = next_ws;
904         }
905       while (ws != NULL);
906     }
907   gomp_sem_destroy (&team->master_release);
908
909   if (__builtin_expect (thr->ts.team != NULL, 0)
910       || __builtin_expect (team->nthreads == 1, 0))
911     free_team (team);
912   else
913     {
914       struct gomp_thread_pool *pool = thr->thread_pool;
915       if (pool->last_team)
916         free_team (pool->last_team);
917       pool->last_team = team;
918       gomp_release_thread_pool (pool);
919     }
920 }
921
922
923 /* Constructors for this file.  */
924
925 static void __attribute__((constructor))
926 initialize_team (void)
927 {
928 #if !defined HAVE_TLS && !defined USE_EMUTLS
929   static struct gomp_thread initial_thread_tls_data;
930
931   pthread_key_create (&gomp_tls_key, NULL);
932   pthread_setspecific (gomp_tls_key, &initial_thread_tls_data);
933 #endif
934
935   if (pthread_key_create (&gomp_thread_destructor, gomp_free_thread) != 0)
936     gomp_fatal ("could not create thread pool destructor.");
937 }
938
939 static void __attribute__((destructor))
940 team_destructor (void)
941 {
942   /* Without this dlclose on libgomp could lead to subsequent
943      crashes.  */
944   pthread_key_delete (gomp_thread_destructor);
945 }
946
947 struct gomp_task_icv *
948 gomp_new_icv (void)
949 {
950   struct gomp_thread *thr = gomp_thread ();
951   struct gomp_task *task = gomp_malloc (sizeof (struct gomp_task));
952   gomp_init_task (task, NULL, &gomp_global_icv);
953   thr->task = task;
954   pthread_setspecific (gomp_thread_destructor, thr);
955   return &task->icv;
956 }