target.c: New file.
[platform/upstream/gcc.git] / libgomp / team.c
1 /* Copyright (C) 2005-2013 Free Software Foundation, Inc.
2    Contributed by Richard Henderson <rth@redhat.com>.
3
4    This file is part of the GNU OpenMP Library (libgomp).
5
6    Libgomp is free software; you can redistribute it and/or modify it
7    under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3, or (at your option)
9    any later version.
10
11    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
12    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
14    more details.
15
16    Under Section 7 of GPL version 3, you are granted additional
17    permissions described in the GCC Runtime Library Exception, version
18    3.1, as published by the Free Software Foundation.
19
20    You should have received a copy of the GNU General Public License and
21    a copy of the GCC Runtime Library Exception along with this program;
22    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23    <http://www.gnu.org/licenses/>.  */
24
25 /* This file handles the maintainence of threads in response to team
26    creation and termination.  */
27
28 #include "libgomp.h"
29 #include <stdlib.h>
30 #include <string.h>
31
32 /* This attribute contains PTHREAD_CREATE_DETACHED.  */
33 pthread_attr_t gomp_thread_attr;
34
35 /* This key is for the thread destructor.  */
36 pthread_key_t gomp_thread_destructor;
37
38
39 /* This is the libgomp per-thread data structure.  */
40 #ifdef HAVE_TLS
41 __thread struct gomp_thread gomp_tls_data;
42 #else
43 pthread_key_t gomp_tls_key;
44 #endif
45
46
47 /* This structure is used to communicate across pthread_create.  */
48
49 struct gomp_thread_start_data
50 {
51   void (*fn) (void *);
52   void *fn_data;
53   struct gomp_team_state ts;
54   struct gomp_task *task;
55   struct gomp_thread_pool *thread_pool;
56   unsigned int place;
57   bool nested;
58 };
59
60
61 /* This function is a pthread_create entry point.  This contains the idle
62    loop in which a thread waits to be called up to become part of a team.  */
63
64 static void *
65 gomp_thread_start (void *xdata)
66 {
67   struct gomp_thread_start_data *data = xdata;
68   struct gomp_thread *thr;
69   struct gomp_thread_pool *pool;
70   void (*local_fn) (void *);
71   void *local_data;
72
73 #ifdef HAVE_TLS
74   thr = &gomp_tls_data;
75 #else
76   struct gomp_thread local_thr;
77   thr = &local_thr;
78   pthread_setspecific (gomp_tls_key, thr);
79 #endif
80   gomp_sem_init (&thr->release, 0);
81
82   /* Extract what we need from data.  */
83   local_fn = data->fn;
84   local_data = data->fn_data;
85   thr->thread_pool = data->thread_pool;
86   thr->ts = data->ts;
87   thr->task = data->task;
88   thr->place = data->place;
89
90   thr->ts.team->ordered_release[thr->ts.team_id] = &thr->release;
91
92   /* Make thread pool local. */
93   pool = thr->thread_pool;
94
95   if (data->nested)
96     {
97       struct gomp_team *team = thr->ts.team;
98       struct gomp_task *task = thr->task;
99
100       gomp_barrier_wait (&team->barrier);
101
102       local_fn (local_data);
103       gomp_team_barrier_wait_final (&team->barrier);
104       gomp_finish_task (task);
105       gomp_barrier_wait_last (&team->barrier);
106     }
107   else
108     {
109       pool->threads[thr->ts.team_id] = thr;
110
111       gomp_barrier_wait (&pool->threads_dock);
112       do
113         {
114           struct gomp_team *team = thr->ts.team;
115           struct gomp_task *task = thr->task;
116
117           local_fn (local_data);
118           gomp_team_barrier_wait_final (&team->barrier);
119           gomp_finish_task (task);
120
121           gomp_barrier_wait (&pool->threads_dock);
122
123           local_fn = thr->fn;
124           local_data = thr->data;
125           thr->fn = NULL;
126         }
127       while (local_fn);
128     }
129
130   gomp_sem_destroy (&thr->release);
131   thr->thread_pool = NULL;
132   thr->task = NULL;
133   return NULL;
134 }
135
136
137 /* Create a new team data structure.  */
138
139 struct gomp_team *
140 gomp_new_team (unsigned nthreads)
141 {
142   struct gomp_team *team;
143   size_t size;
144   int i;
145
146   size = sizeof (*team) + nthreads * (sizeof (team->ordered_release[0])
147                                       + sizeof (team->implicit_task[0]));
148   team = gomp_malloc (size);
149
150   team->work_share_chunk = 8;
151 #ifdef HAVE_SYNC_BUILTINS
152   team->single_count = 0;
153 #else
154   gomp_mutex_init (&team->work_share_list_free_lock);
155 #endif
156   team->work_shares_to_free = &team->work_shares[0];
157   gomp_init_work_share (&team->work_shares[0], false, nthreads);
158   team->work_shares[0].next_alloc = NULL;
159   team->work_share_list_free = NULL;
160   team->work_share_list_alloc = &team->work_shares[1];
161   for (i = 1; i < 7; i++)
162     team->work_shares[i].next_free = &team->work_shares[i + 1];
163   team->work_shares[i].next_free = NULL;
164
165   team->nthreads = nthreads;
166   gomp_barrier_init (&team->barrier, nthreads);
167
168   gomp_sem_init (&team->master_release, 0);
169   team->ordered_release = (void *) &team->implicit_task[nthreads];
170   team->ordered_release[0] = &team->master_release;
171
172   gomp_mutex_init (&team->task_lock);
173   team->task_queue = NULL;
174   team->task_count = 0;
175   team->task_queued_count = 0;
176   team->task_running_count = 0;
177   team->work_share_cancelled = 0;
178   team->team_cancelled = 0;
179
180   return team;
181 }
182
183
184 /* Free a team data structure.  */
185
186 static void
187 free_team (struct gomp_team *team)
188 {
189   gomp_barrier_destroy (&team->barrier);
190   gomp_mutex_destroy (&team->task_lock);
191   free (team);
192 }
193
194 /* Allocate and initialize a thread pool. */
195
196 static struct gomp_thread_pool *gomp_new_thread_pool (void)
197 {
198   struct gomp_thread_pool *pool
199     = gomp_malloc (sizeof(struct gomp_thread_pool));
200   pool->threads = NULL;
201   pool->threads_size = 0;
202   pool->threads_used = 0;
203   pool->last_team = NULL;
204   return pool;
205 }
206
207 static void
208 gomp_free_pool_helper (void *thread_pool)
209 {
210   struct gomp_thread *thr = gomp_thread ();
211   struct gomp_thread_pool *pool
212     = (struct gomp_thread_pool *) thread_pool;
213   gomp_barrier_wait_last (&pool->threads_dock);
214   gomp_sem_destroy (&thr->release);
215   thr->thread_pool = NULL;
216   thr->task = NULL;
217   pthread_exit (NULL);
218 }
219
220 /* Free a thread pool and release its threads. */
221
222 void
223 gomp_free_thread (void *arg __attribute__((unused)))
224 {
225   struct gomp_thread *thr = gomp_thread ();
226   struct gomp_thread_pool *pool = thr->thread_pool;
227   if (pool)
228     {
229       if (pool->threads_used > 0)
230         {
231           int i;
232           for (i = 1; i < pool->threads_used; i++)
233             {
234               struct gomp_thread *nthr = pool->threads[i];
235               nthr->fn = gomp_free_pool_helper;
236               nthr->data = pool;
237             }
238           /* This barrier undocks threads docked on pool->threads_dock.  */
239           gomp_barrier_wait (&pool->threads_dock);
240           /* And this waits till all threads have called gomp_barrier_wait_last
241              in gomp_free_pool_helper.  */
242           gomp_barrier_wait (&pool->threads_dock);
243           /* Now it is safe to destroy the barrier and free the pool.  */
244           gomp_barrier_destroy (&pool->threads_dock);
245
246 #ifdef HAVE_SYNC_BUILTINS
247           __sync_fetch_and_add (&gomp_managed_threads,
248                                 1L - pool->threads_used);
249 #else
250           gomp_mutex_lock (&gomp_managed_threads_lock);
251           gomp_managed_threads -= pool->threads_used - 1L;
252           gomp_mutex_unlock (&gomp_managed_threads_lock);
253 #endif
254         }
255       free (pool->threads);
256       if (pool->last_team)
257         free_team (pool->last_team);
258       free (pool);
259       thr->thread_pool = NULL;
260     }
261   if (thr->task != NULL)
262     {
263       struct gomp_task *task = thr->task;
264       gomp_end_task ();
265       free (task);
266     }
267 }
268
269 /* Launch a team.  */
270
271 void
272 gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
273                  unsigned flags, struct gomp_team *team)
274 {
275   struct gomp_thread_start_data *start_data;
276   struct gomp_thread *thr, *nthr;
277   struct gomp_task *task;
278   struct gomp_task_icv *icv;
279   bool nested;
280   struct gomp_thread_pool *pool;
281   unsigned i, n, old_threads_used = 0;
282   pthread_attr_t thread_attr, *attr;
283   unsigned long nthreads_var;
284   char bind, bind_var;
285   unsigned int s = 0, rest = 0, p = 0, k = 0;
286   unsigned int affinity_count = 0;
287   struct gomp_thread **affinity_thr = NULL;
288
289   thr = gomp_thread ();
290   nested = thr->ts.team != NULL;
291   if (__builtin_expect (thr->thread_pool == NULL, 0))
292     {
293       thr->thread_pool = gomp_new_thread_pool ();
294       thr->thread_pool->threads_busy = nthreads;
295       pthread_setspecific (gomp_thread_destructor, thr);
296     }
297   pool = thr->thread_pool;
298   task = thr->task;
299   icv = task ? &task->icv : &gomp_global_icv;
300   if (__builtin_expect (gomp_places_list != NULL, 0) && thr->place == 0)
301     gomp_init_affinity ();
302
303   /* Always save the previous state, even if this isn't a nested team.
304      In particular, we should save any work share state from an outer
305      orphaned work share construct.  */
306   team->prev_ts = thr->ts;
307
308   thr->ts.team = team;
309   thr->ts.team_id = 0;
310   ++thr->ts.level;
311   if (nthreads > 1)
312     ++thr->ts.active_level;
313   thr->ts.work_share = &team->work_shares[0];
314   thr->ts.last_work_share = NULL;
315 #ifdef HAVE_SYNC_BUILTINS
316   thr->ts.single_count = 0;
317 #endif
318   thr->ts.static_trip = 0;
319   thr->task = &team->implicit_task[0];
320   nthreads_var = icv->nthreads_var;
321   if (__builtin_expect (gomp_nthreads_var_list != NULL, 0)
322       && thr->ts.level < gomp_nthreads_var_list_len)
323     nthreads_var = gomp_nthreads_var_list[thr->ts.level];
324   bind_var = icv->bind_var;
325   if (bind_var != omp_proc_bind_false && (flags & 7) != omp_proc_bind_false)
326     bind_var = flags & 7;
327   bind = bind_var;
328   if (__builtin_expect (gomp_bind_var_list != NULL, 0)
329       && thr->ts.level < gomp_bind_var_list_len)
330     bind_var = gomp_bind_var_list[thr->ts.level];
331   gomp_init_task (thr->task, task, icv);
332   team->implicit_task[0].icv.nthreads_var = nthreads_var;
333   team->implicit_task[0].icv.bind_var = bind_var;
334
335   if (nthreads == 1)
336     return;
337
338   i = 1;
339
340   if (__builtin_expect (gomp_places_list != NULL, 0))
341     {
342       if (bind == omp_proc_bind_false)
343         bind = omp_proc_bind_true;
344       /* Depending on chosen proc_bind model, set subpartition
345          for the master thread and initialize helper variables
346          P and optionally S, K and/or REST used by later place
347          computation for each additional thread.  */
348       p = thr->place - 1;
349       switch (bind)
350         {
351         case omp_proc_bind_false:
352           bind = omp_proc_bind_true;
353           /* FALLTHRU */
354         case omp_proc_bind_true:
355         case omp_proc_bind_close:
356           if (nthreads > thr->ts.place_partition_len)
357             {
358               /* T > P.  S threads will be placed in each place,
359                  and the final REM threads placed one by one
360                  into the already occupied places.  */
361               s = nthreads / thr->ts.place_partition_len;
362               rest = nthreads % thr->ts.place_partition_len;
363             }
364           else
365             s = 1;
366           k = 1;
367           break;
368         case omp_proc_bind_master:
369           /* Each thread will be bound to master's place.  */
370           break;
371         case omp_proc_bind_spread:
372           if (nthreads <= thr->ts.place_partition_len)
373             {
374               /* T <= P.  Each subpartition will have in between s
375                  and s+1 places (subpartitions starting at or
376                  after rest will have s places, earlier s+1 places),
377                  each thread will be bound to the first place in
378                  its subpartition (except for the master thread
379                  that can be bound to another place in its
380                  subpartition).  */
381               s = thr->ts.place_partition_len / nthreads;
382               rest = thr->ts.place_partition_len % nthreads;
383               rest = (s + 1) * rest + thr->ts.place_partition_off;
384               if (p < rest)
385                 {
386                   p -= (p - thr->ts.place_partition_off) % (s + 1);
387                   thr->ts.place_partition_len = s + 1;
388                 }
389               else
390                 {
391                   p -= (p - rest) % s;
392                   thr->ts.place_partition_len = s;
393                 }
394               thr->ts.place_partition_off = p;
395             }
396           else
397             {
398               /* T > P.  Each subpartition will have just a single
399                  place and we'll place between s and s+1
400                  threads into each subpartition.  */
401               s = nthreads / thr->ts.place_partition_len;
402               rest = nthreads % thr->ts.place_partition_len;
403               thr->ts.place_partition_off = p;
404               thr->ts.place_partition_len = 1;
405               k = 1;
406             }
407           break;
408         }
409     }
410   else
411     bind = omp_proc_bind_false;
412
413   /* We only allow the reuse of idle threads for non-nested PARALLEL
414      regions.  This appears to be implied by the semantics of
415      threadprivate variables, but perhaps that's reading too much into
416      things.  Certainly it does prevent any locking problems, since
417      only the initial program thread will modify gomp_threads.  */
418   if (!nested)
419     {
420       old_threads_used = pool->threads_used;
421
422       if (nthreads <= old_threads_used)
423         n = nthreads;
424       else if (old_threads_used == 0)
425         {
426           n = 0;
427           gomp_barrier_init (&pool->threads_dock, nthreads);
428         }
429       else
430         {
431           n = old_threads_used;
432
433           /* Increase the barrier threshold to make sure all new
434              threads arrive before the team is released.  */
435           gomp_barrier_reinit (&pool->threads_dock, nthreads);
436         }
437
438       /* Not true yet, but soon will be.  We're going to release all
439          threads from the dock, and those that aren't part of the
440          team will exit.  */
441       pool->threads_used = nthreads;
442
443       /* If necessary, expand the size of the gomp_threads array.  It is
444          expected that changes in the number of threads are rare, thus we
445          make no effort to expand gomp_threads_size geometrically.  */
446       if (nthreads >= pool->threads_size)
447         {
448           pool->threads_size = nthreads + 1;
449           pool->threads
450             = gomp_realloc (pool->threads,
451                             pool->threads_size
452                             * sizeof (struct gomp_thread_data *));
453         }
454
455       /* Release existing idle threads.  */
456       for (; i < n; ++i)
457         {
458           unsigned int place_partition_off = thr->ts.place_partition_off;
459           unsigned int place_partition_len = thr->ts.place_partition_len;
460           unsigned int place = 0;
461           if (__builtin_expect (gomp_places_list != NULL, 0))
462             {
463               switch (bind)
464                 {
465                 case omp_proc_bind_true:
466                 case omp_proc_bind_close:
467                   if (k == s)
468                     {
469                       ++p;
470                       if (p == (team->prev_ts.place_partition_off
471                                 + team->prev_ts.place_partition_len))
472                         p = team->prev_ts.place_partition_off;
473                       k = 1;
474                       if (i == nthreads - rest)
475                         s = 1;
476                     }
477                   else
478                     ++k;
479                   break;
480                 case omp_proc_bind_master:
481                   break;
482                 case omp_proc_bind_spread:
483                   if (k == 0)
484                     {
485                       /* T <= P.  */
486                       if (p < rest)
487                         p += s + 1;
488                       else
489                         p += s;
490                       if (p == (team->prev_ts.place_partition_off
491                                 + team->prev_ts.place_partition_len))
492                         p = team->prev_ts.place_partition_off;
493                       place_partition_off = p;
494                       if (p < rest)
495                         place_partition_len = s + 1;
496                       else
497                         place_partition_len = s;
498                     }
499                   else
500                     {
501                       /* T > P.  */
502                       if (k == s)
503                         {
504                           ++p;
505                           if (p == (team->prev_ts.place_partition_off
506                                     + team->prev_ts.place_partition_len))
507                             p = team->prev_ts.place_partition_off;
508                           k = 1;
509                           if (i == nthreads - rest)
510                             s = 1;
511                         }
512                       else
513                         ++k;
514                       place_partition_off = p;
515                       place_partition_len = 1;
516                     }
517                   break;
518                 }
519               if (affinity_thr != NULL
520                   || (bind != omp_proc_bind_true
521                       && pool->threads[i]->place != p + 1)
522                   || pool->threads[i]->place <= place_partition_off
523                   || pool->threads[i]->place > (place_partition_off
524                                                 + place_partition_len))
525                 {
526                   unsigned int l;
527                   if (affinity_thr == NULL)
528                     {
529                       unsigned int j;
530
531                       if (team->prev_ts.place_partition_len > 64)
532                         affinity_thr
533                           = gomp_malloc (team->prev_ts.place_partition_len
534                                          * sizeof (struct gomp_thread *));
535                       else
536                         affinity_thr
537                           = gomp_alloca (team->prev_ts.place_partition_len
538                                          * sizeof (struct gomp_thread *));
539                       memset (affinity_thr, '\0',
540                               team->prev_ts.place_partition_len
541                               * sizeof (struct gomp_thread *));
542                       for (j = i; j < old_threads_used; j++)
543                         {
544                           if (pool->threads[j]->place
545                               > team->prev_ts.place_partition_off
546                               && (pool->threads[j]->place
547                                   <= (team->prev_ts.place_partition_off
548                                       + team->prev_ts.place_partition_len)))
549                             {
550                               l = pool->threads[j]->place - 1
551                                   - team->prev_ts.place_partition_off;
552                               pool->threads[j]->data = affinity_thr[l];
553                               affinity_thr[l] = pool->threads[j];
554                             }
555                           pool->threads[j] = NULL;
556                         }
557                       if (nthreads > old_threads_used)
558                         memset (&pool->threads[old_threads_used],
559                                 '\0', ((nthreads - old_threads_used)
560                                        * sizeof (struct gomp_thread *)));
561                       n = nthreads;
562                       affinity_count = old_threads_used - i;
563                     }
564                   if (affinity_count == 0)
565                     break;
566                   l = p;
567                   if (affinity_thr[l - team->prev_ts.place_partition_off]
568                       == NULL)
569                     {
570                       if (bind != omp_proc_bind_true)
571                         continue;
572                       for (l = place_partition_off;
573                            l < place_partition_off + place_partition_len;
574                            l++)
575                         if (affinity_thr[l - team->prev_ts.place_partition_off]
576                             != NULL)
577                           break;
578                       if (l == place_partition_off + place_partition_len)
579                         continue;
580                     }
581                   nthr = affinity_thr[l - team->prev_ts.place_partition_off];
582                   affinity_thr[l - team->prev_ts.place_partition_off]
583                     = (struct gomp_thread *) nthr->data;
584                   affinity_count--;
585                   pool->threads[i] = nthr;
586                 }
587               else
588                 nthr = pool->threads[i];
589               place = p + 1;
590             }
591           else
592             nthr = pool->threads[i];
593           nthr->ts.team = team;
594           nthr->ts.work_share = &team->work_shares[0];
595           nthr->ts.last_work_share = NULL;
596           nthr->ts.team_id = i;
597           nthr->ts.level = team->prev_ts.level + 1;
598           nthr->ts.active_level = thr->ts.active_level;
599           nthr->ts.place_partition_off = place_partition_off;
600           nthr->ts.place_partition_len = place_partition_len;
601 #ifdef HAVE_SYNC_BUILTINS
602           nthr->ts.single_count = 0;
603 #endif
604           nthr->ts.static_trip = 0;
605           nthr->task = &team->implicit_task[i];
606           nthr->place = place;
607           gomp_init_task (nthr->task, task, icv);
608           team->implicit_task[i].icv.nthreads_var = nthreads_var;
609           team->implicit_task[i].icv.bind_var = bind_var;
610           nthr->fn = fn;
611           nthr->data = data;
612           team->ordered_release[i] = &nthr->release;
613         }
614
615       if (__builtin_expect (affinity_thr != NULL, 0))
616         {
617           /* If AFFINITY_THR is non-NULL just because we had to
618              permute some threads in the pool, but we've managed
619              to find exactly as many old threads as we'd find
620              without affinity, we don't need to handle this
621              specially anymore.  */
622           if (nthreads <= old_threads_used
623               ? (affinity_count == old_threads_used - nthreads)
624               : (i == old_threads_used))
625             {
626               if (team->prev_ts.place_partition_len > 64)
627                 free (affinity_thr);
628               affinity_thr = NULL;
629               affinity_count = 0;
630             }
631           else
632             {
633               i = 1;
634               /* We are going to compute the places/subpartitions
635                  again from the beginning.  So, we need to reinitialize
636                  vars modified by the switch (bind) above inside
637                  of the loop, to the state they had after the initial
638                  switch (bind).  */
639               switch (bind)
640                 {
641                 case omp_proc_bind_true:
642                 case omp_proc_bind_close:
643                   if (nthreads > thr->ts.place_partition_len)
644                     /* T > P.  S has been changed, so needs
645                        to be recomputed.  */
646                     s = nthreads / thr->ts.place_partition_len;
647                   k = 1;
648                   p = thr->place - 1;
649                   break;
650                 case omp_proc_bind_master:
651                   /* No vars have been changed.  */
652                   break;
653                 case omp_proc_bind_spread:
654                   p = thr->ts.place_partition_off;
655                   if (k != 0)
656                     {
657                       /* T > P.  */
658                       s = nthreads / team->prev_ts.place_partition_len;
659                       k = 1;
660                     }
661                   break;
662                 }
663
664               /* Increase the barrier threshold to make sure all new
665                  threads and all the threads we're going to let die
666                  arrive before the team is released.  */
667               if (affinity_count)
668                 gomp_barrier_reinit (&pool->threads_dock,
669                                      nthreads + affinity_count);
670             }
671         }
672
673       if (i == nthreads)
674         goto do_release;
675
676     }
677
678   if (__builtin_expect (nthreads + affinity_count > old_threads_used, 0))
679     {
680       long diff = (long) (nthreads + affinity_count) - (long) old_threads_used;
681
682       if (old_threads_used == 0)
683         --diff;
684
685 #ifdef HAVE_SYNC_BUILTINS
686       __sync_fetch_and_add (&gomp_managed_threads, diff);
687 #else
688       gomp_mutex_lock (&gomp_managed_threads_lock);
689       gomp_managed_threads += diff;
690       gomp_mutex_unlock (&gomp_managed_threads_lock);
691 #endif
692     }
693
694   attr = &gomp_thread_attr;
695   if (__builtin_expect (gomp_places_list != NULL, 0))
696     {
697       size_t stacksize;
698       pthread_attr_init (&thread_attr);
699       pthread_attr_setdetachstate (&thread_attr, PTHREAD_CREATE_DETACHED);
700       if (! pthread_attr_getstacksize (&gomp_thread_attr, &stacksize))
701         pthread_attr_setstacksize (&thread_attr, stacksize);
702       attr = &thread_attr;
703     }
704
705   start_data = gomp_alloca (sizeof (struct gomp_thread_start_data)
706                             * (nthreads-i));
707
708   /* Launch new threads.  */
709   for (; i < nthreads; ++i)
710     {
711       pthread_t pt;
712       int err;
713
714       start_data->ts.place_partition_off = thr->ts.place_partition_off;
715       start_data->ts.place_partition_len = thr->ts.place_partition_len;
716       start_data->place = 0;
717       if (__builtin_expect (gomp_places_list != NULL, 0))
718         {
719           switch (bind)
720             {
721             case omp_proc_bind_true:
722             case omp_proc_bind_close:
723               if (k == s)
724                 {
725                   ++p;
726                   if (p == (team->prev_ts.place_partition_off
727                             + team->prev_ts.place_partition_len))
728                     p = team->prev_ts.place_partition_off;
729                   k = 1;
730                   if (i == nthreads - rest)
731                     s = 1;
732                 }
733               else
734                 ++k;
735               break;
736             case omp_proc_bind_master:
737               break;
738             case omp_proc_bind_spread:
739               if (k == 0)
740                 {
741                   /* T <= P.  */
742                   if (p < rest)
743                     p += s + 1;
744                   else
745                     p += s;
746                   if (p == (team->prev_ts.place_partition_off
747                             + team->prev_ts.place_partition_len))
748                     p = team->prev_ts.place_partition_off;
749                   start_data->ts.place_partition_off = p;
750                   if (p < rest)
751                     start_data->ts.place_partition_len = s + 1;
752                   else
753                     start_data->ts.place_partition_len = s;
754                 }
755               else
756                 {
757                   /* T > P.  */
758                   if (k == s)
759                     {
760                       ++p;
761                       if (p == (team->prev_ts.place_partition_off
762                                 + team->prev_ts.place_partition_len))
763                         p = team->prev_ts.place_partition_off;
764                       k = 1;
765                       if (i == nthreads - rest)
766                         s = 1;
767                     }
768                   else
769                     ++k;
770                   start_data->ts.place_partition_off = p;
771                   start_data->ts.place_partition_len = 1;
772                 }
773               break;
774             }
775           start_data->place = p + 1;
776           if (affinity_thr != NULL && pool->threads[i] != NULL)
777             continue;
778           gomp_init_thread_affinity (attr, p);
779         }
780
781       start_data->fn = fn;
782       start_data->fn_data = data;
783       start_data->ts.team = team;
784       start_data->ts.work_share = &team->work_shares[0];
785       start_data->ts.last_work_share = NULL;
786       start_data->ts.team_id = i;
787       start_data->ts.level = team->prev_ts.level + 1;
788       start_data->ts.active_level = thr->ts.active_level;
789 #ifdef HAVE_SYNC_BUILTINS
790       start_data->ts.single_count = 0;
791 #endif
792       start_data->ts.static_trip = 0;
793       start_data->task = &team->implicit_task[i];
794       gomp_init_task (start_data->task, task, icv);
795       team->implicit_task[i].icv.nthreads_var = nthreads_var;
796       team->implicit_task[i].icv.bind_var = bind_var;
797       start_data->thread_pool = pool;
798       start_data->nested = nested;
799
800       err = pthread_create (&pt, attr, gomp_thread_start, start_data++);
801       if (err != 0)
802         gomp_fatal ("Thread creation failed: %s", strerror (err));
803     }
804
805   if (__builtin_expect (gomp_places_list != NULL, 0))
806     pthread_attr_destroy (&thread_attr);
807
808  do_release:
809   gomp_barrier_wait (nested ? &team->barrier : &pool->threads_dock);
810
811   /* Decrease the barrier threshold to match the number of threads
812      that should arrive back at the end of this team.  The extra
813      threads should be exiting.  Note that we arrange for this test
814      to never be true for nested teams.  If AFFINITY_COUNT is non-zero,
815      the barrier as well as gomp_managed_threads was temporarily
816      set to NTHREADS + AFFINITY_COUNT.  For NTHREADS < OLD_THREADS_COUNT,
817      AFFINITY_COUNT if non-zero will be always at least
818      OLD_THREADS_COUNT - NTHREADS.  */
819   if (__builtin_expect (nthreads < old_threads_used, 0)
820       || __builtin_expect (affinity_count, 0))
821     {
822       long diff = (long) nthreads - (long) old_threads_used;
823
824       if (affinity_count)
825         diff = -affinity_count;
826
827       gomp_barrier_reinit (&pool->threads_dock, nthreads);
828
829 #ifdef HAVE_SYNC_BUILTINS
830       __sync_fetch_and_add (&gomp_managed_threads, diff);
831 #else
832       gomp_mutex_lock (&gomp_managed_threads_lock);
833       gomp_managed_threads += diff;
834       gomp_mutex_unlock (&gomp_managed_threads_lock);
835 #endif
836     }
837   if (__builtin_expect (affinity_thr != NULL, 0)
838       && team->prev_ts.place_partition_len > 64)
839     free (affinity_thr);
840 }
841
842
843 /* Terminate the current team.  This is only to be called by the master
844    thread.  We assume that we must wait for the other threads.  */
845
846 void
847 gomp_team_end (void)
848 {
849   struct gomp_thread *thr = gomp_thread ();
850   struct gomp_team *team = thr->ts.team;
851
852   /* This barrier handles all pending explicit threads.
853      As #pragma omp cancel parallel might get awaited count in
854      team->barrier in a inconsistent state, we need to use a different
855      counter here.  */
856   gomp_team_barrier_wait_final (&team->barrier);
857   if (__builtin_expect (team->team_cancelled, 0))
858     {
859       struct gomp_work_share *ws = team->work_shares_to_free;
860       do
861         {
862           struct gomp_work_share *next_ws = gomp_ptrlock_get (&ws->next_ws);
863           if (next_ws == NULL)
864             gomp_ptrlock_set (&ws->next_ws, ws);
865           gomp_fini_work_share (ws);
866           ws = next_ws;
867         }
868       while (ws != NULL);
869     }
870   else
871     gomp_fini_work_share (thr->ts.work_share);
872
873   gomp_end_task ();
874   thr->ts = team->prev_ts;
875
876   if (__builtin_expect (thr->ts.team != NULL, 0))
877     {
878 #ifdef HAVE_SYNC_BUILTINS
879       __sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads);
880 #else
881       gomp_mutex_lock (&gomp_managed_threads_lock);
882       gomp_managed_threads -= team->nthreads - 1L;
883       gomp_mutex_unlock (&gomp_managed_threads_lock);
884 #endif
885       /* This barrier has gomp_barrier_wait_last counterparts
886          and ensures the team can be safely destroyed.  */
887       gomp_barrier_wait (&team->barrier);
888     }
889
890   if (__builtin_expect (team->work_shares[0].next_alloc != NULL, 0))
891     {
892       struct gomp_work_share *ws = team->work_shares[0].next_alloc;
893       do
894         {
895           struct gomp_work_share *next_ws = ws->next_alloc;
896           free (ws);
897           ws = next_ws;
898         }
899       while (ws != NULL);
900     }
901   gomp_sem_destroy (&team->master_release);
902 #ifndef HAVE_SYNC_BUILTINS
903   gomp_mutex_destroy (&team->work_share_list_free_lock);
904 #endif
905
906   if (__builtin_expect (thr->ts.team != NULL, 0)
907       || __builtin_expect (team->nthreads == 1, 0))
908     free_team (team);
909   else
910     {
911       struct gomp_thread_pool *pool = thr->thread_pool;
912       if (pool->last_team)
913         free_team (pool->last_team);
914       pool->last_team = team;
915     }
916 }
917
918
919 /* Constructors for this file.  */
920
921 static void __attribute__((constructor))
922 initialize_team (void)
923 {
924 #ifndef HAVE_TLS
925   static struct gomp_thread initial_thread_tls_data;
926
927   pthread_key_create (&gomp_tls_key, NULL);
928   pthread_setspecific (gomp_tls_key, &initial_thread_tls_data);
929 #endif
930
931   if (pthread_key_create (&gomp_thread_destructor, gomp_free_thread) != 0)
932     gomp_fatal ("could not create thread pool destructor.");
933 }
934
935 static void __attribute__((destructor))
936 team_destructor (void)
937 {
938   /* Without this dlclose on libgomp could lead to subsequent
939      crashes.  */
940   pthread_key_delete (gomp_thread_destructor);
941 }
942
943 struct gomp_task_icv *
944 gomp_new_icv (void)
945 {
946   struct gomp_thread *thr = gomp_thread ();
947   struct gomp_task *task = gomp_malloc (sizeof (struct gomp_task));
948   gomp_init_task (task, NULL, &gomp_global_icv);
949   thr->task = task;
950   pthread_setspecific (gomp_thread_destructor, thr);
951   return &task->icv;
952 }