[gomp] Add thread attribute customization
[platform/upstream/gcc.git] / libgomp / team.c
1 /* Copyright (C) 2005-2015 Free Software Foundation, Inc.
2    Contributed by Richard Henderson <rth@redhat.com>.
3
4    This file is part of the GNU Offloading and Multi Processing Library
5    (libgomp).
6
7    Libgomp is free software; you can redistribute it and/or modify it
8    under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3, or (at your option)
10    any later version.
11
12    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15    more details.
16
17    Under Section 7 of GPL version 3, you are granted additional
18    permissions described in the GCC Runtime Library Exception, version
19    3.1, as published by the Free Software Foundation.
20
21    You should have received a copy of the GNU General Public License and
22    a copy of the GCC Runtime Library Exception along with this program;
23    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24    <http://www.gnu.org/licenses/>.  */
25
26 /* This file handles the maintainence of threads in response to team
27    creation and termination.  */
28
29 #include "libgomp.h"
30 #include "pool.h"
31 #include <stdlib.h>
32 #include <string.h>
33
34 /* This attribute contains PTHREAD_CREATE_DETACHED.  */
35 pthread_attr_t gomp_thread_attr;
36
37 /* This key is for the thread destructor.  */
38 pthread_key_t gomp_thread_destructor;
39
40
41 /* This is the libgomp per-thread data structure.  */
42 #if defined HAVE_TLS || defined USE_EMUTLS
43 __thread struct gomp_thread gomp_tls_data;
44 #else
45 pthread_key_t gomp_tls_key;
46 #endif
47
48
49 /* This structure is used to communicate across pthread_create.  */
50
51 struct gomp_thread_start_data
52 {
53   void (*fn) (void *);
54   void *fn_data;
55   struct gomp_team_state ts;
56   struct gomp_task *task;
57   struct gomp_thread_pool *thread_pool;
58   unsigned int place;
59   bool nested;
60 };
61
62
63 /* This function is a pthread_create entry point.  This contains the idle
64    loop in which a thread waits to be called up to become part of a team.  */
65
66 static void *
67 gomp_thread_start (void *xdata)
68 {
69   struct gomp_thread_start_data *data = xdata;
70   struct gomp_thread *thr;
71   struct gomp_thread_pool *pool;
72   void (*local_fn) (void *);
73   void *local_data;
74
75 #if defined HAVE_TLS || defined USE_EMUTLS
76   thr = &gomp_tls_data;
77 #else
78   struct gomp_thread local_thr;
79   thr = &local_thr;
80   pthread_setspecific (gomp_tls_key, thr);
81 #endif
82   gomp_sem_init (&thr->release, 0);
83
84   /* Extract what we need from data.  */
85   local_fn = data->fn;
86   local_data = data->fn_data;
87   thr->thread_pool = data->thread_pool;
88   thr->ts = data->ts;
89   thr->task = data->task;
90   thr->place = data->place;
91
92   thr->ts.team->ordered_release[thr->ts.team_id] = &thr->release;
93
94   /* Make thread pool local. */
95   pool = thr->thread_pool;
96
97   if (data->nested)
98     {
99       struct gomp_team *team = thr->ts.team;
100       struct gomp_task *task = thr->task;
101
102       gomp_barrier_wait (&team->barrier);
103
104       local_fn (local_data);
105       gomp_team_barrier_wait_final (&team->barrier);
106       gomp_finish_task (task);
107       gomp_barrier_wait_last (&team->barrier);
108     }
109   else
110     {
111       pool->threads[thr->ts.team_id] = thr;
112
113       gomp_barrier_wait (&pool->threads_dock);
114       do
115         {
116           struct gomp_team *team = thr->ts.team;
117           struct gomp_task *task = thr->task;
118
119           local_fn (local_data);
120           gomp_team_barrier_wait_final (&team->barrier);
121           gomp_finish_task (task);
122
123           gomp_barrier_wait (&pool->threads_dock);
124
125           local_fn = thr->fn;
126           local_data = thr->data;
127           thr->fn = NULL;
128         }
129       while (local_fn);
130     }
131
132   gomp_sem_destroy (&thr->release);
133   thr->thread_pool = NULL;
134   thr->task = NULL;
135   return NULL;
136 }
137
138 static inline struct gomp_team *
139 get_last_team (unsigned nthreads)
140 {
141   struct gomp_thread *thr = gomp_thread ();
142   if (thr->ts.team == NULL)
143     {
144       struct gomp_thread_pool *pool = gomp_get_thread_pool (thr, nthreads);
145       struct gomp_team *last_team = pool->last_team;
146       if (last_team != NULL && last_team->nthreads == nthreads)
147         {
148           pool->last_team = NULL;
149           return last_team;
150         }
151     }
152   return NULL;
153 }
154
155 /* Create a new team data structure.  */
156
157 struct gomp_team *
158 gomp_new_team (unsigned nthreads)
159 {
160   struct gomp_team *team;
161   int i;
162
163   team = get_last_team (nthreads);
164   if (team == NULL)
165     {
166       size_t extra = sizeof (team->ordered_release[0])
167                      + sizeof (team->implicit_task[0]);
168       team = gomp_malloc (sizeof (*team) + nthreads * extra);
169
170 #ifndef HAVE_SYNC_BUILTINS
171       gomp_mutex_init (&team->work_share_list_free_lock);
172 #endif
173       gomp_barrier_init (&team->barrier, nthreads);
174       gomp_mutex_init (&team->task_lock);
175
176       team->nthreads = nthreads;
177     }
178
179   team->work_share_chunk = 8;
180 #ifdef HAVE_SYNC_BUILTINS
181   team->single_count = 0;
182 #endif
183   team->work_shares_to_free = &team->work_shares[0];
184   gomp_init_work_share (&team->work_shares[0], false, nthreads);
185   team->work_shares[0].next_alloc = NULL;
186   team->work_share_list_free = NULL;
187   team->work_share_list_alloc = &team->work_shares[1];
188   for (i = 1; i < 7; i++)
189     team->work_shares[i].next_free = &team->work_shares[i + 1];
190   team->work_shares[i].next_free = NULL;
191
192   gomp_sem_init (&team->master_release, 0);
193   team->ordered_release = (void *) &team->implicit_task[nthreads];
194   team->ordered_release[0] = &team->master_release;
195
196   team->task_queue = NULL;
197   team->task_count = 0;
198   team->task_queued_count = 0;
199   team->task_running_count = 0;
200   team->work_share_cancelled = 0;
201   team->team_cancelled = 0;
202
203   return team;
204 }
205
206
207 /* Free a team data structure.  */
208
209 static void
210 free_team (struct gomp_team *team)
211 {
212 #ifndef HAVE_SYNC_BUILTINS
213   gomp_mutex_destroy (&team->work_share_list_free_lock);
214 #endif
215   gomp_barrier_destroy (&team->barrier);
216   gomp_mutex_destroy (&team->task_lock);
217   free (team);
218 }
219
220 static void
221 gomp_free_pool_helper (void *thread_pool)
222 {
223   struct gomp_thread *thr = gomp_thread ();
224   struct gomp_thread_pool *pool
225     = (struct gomp_thread_pool *) thread_pool;
226   gomp_barrier_wait_last (&pool->threads_dock);
227   gomp_sem_destroy (&thr->release);
228   thr->thread_pool = NULL;
229   thr->task = NULL;
230   pthread_exit (NULL);
231 }
232
233 /* Free a thread pool and release its threads. */
234
235 void
236 gomp_free_thread (void *arg __attribute__((unused)))
237 {
238   struct gomp_thread *thr = gomp_thread ();
239   struct gomp_thread_pool *pool = thr->thread_pool;
240   if (pool)
241     {
242       if (pool->threads_used > 0)
243         {
244           int i;
245           for (i = 1; i < pool->threads_used; i++)
246             {
247               struct gomp_thread *nthr = pool->threads[i];
248               nthr->fn = gomp_free_pool_helper;
249               nthr->data = pool;
250             }
251           /* This barrier undocks threads docked on pool->threads_dock.  */
252           gomp_barrier_wait (&pool->threads_dock);
253           /* And this waits till all threads have called gomp_barrier_wait_last
254              in gomp_free_pool_helper.  */
255           gomp_barrier_wait (&pool->threads_dock);
256           /* Now it is safe to destroy the barrier and free the pool.  */
257           gomp_barrier_destroy (&pool->threads_dock);
258
259 #ifdef HAVE_SYNC_BUILTINS
260           __sync_fetch_and_add (&gomp_managed_threads,
261                                 1L - pool->threads_used);
262 #else
263           gomp_mutex_lock (&gomp_managed_threads_lock);
264           gomp_managed_threads -= pool->threads_used - 1L;
265           gomp_mutex_unlock (&gomp_managed_threads_lock);
266 #endif
267         }
268       free (pool->threads);
269       if (pool->last_team)
270         free_team (pool->last_team);
271       free (pool);
272       thr->thread_pool = NULL;
273     }
274   if (thr->task != NULL)
275     {
276       struct gomp_task *task = thr->task;
277       gomp_end_task ();
278       free (task);
279     }
280 }
281
282 /* Launch a team.  */
283
284 void
285 gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
286                  unsigned flags, struct gomp_team *team)
287 {
288   struct gomp_thread_start_data *start_data;
289   struct gomp_thread *thr, *nthr;
290   struct gomp_task *task;
291   struct gomp_task_icv *icv;
292   bool nested;
293   struct gomp_thread_pool *pool;
294   unsigned i, n, old_threads_used = 0;
295   pthread_attr_t thread_attr, *attr;
296   unsigned long nthreads_var;
297   char bind, bind_var;
298   unsigned int s = 0, rest = 0, p = 0, k = 0;
299   unsigned int affinity_count = 0;
300   struct gomp_thread **affinity_thr = NULL;
301
302   thr = gomp_thread ();
303   nested = thr->ts.team != NULL;
304   pool = thr->thread_pool;
305   task = thr->task;
306   icv = task ? &task->icv : &gomp_global_icv;
307   if (__builtin_expect (gomp_places_list != NULL, 0) && thr->place == 0)
308     gomp_init_affinity ();
309
310   /* Always save the previous state, even if this isn't a nested team.
311      In particular, we should save any work share state from an outer
312      orphaned work share construct.  */
313   team->prev_ts = thr->ts;
314
315   thr->ts.team = team;
316   thr->ts.team_id = 0;
317   ++thr->ts.level;
318   if (nthreads > 1)
319     ++thr->ts.active_level;
320   thr->ts.work_share = &team->work_shares[0];
321   thr->ts.last_work_share = NULL;
322 #ifdef HAVE_SYNC_BUILTINS
323   thr->ts.single_count = 0;
324 #endif
325   thr->ts.static_trip = 0;
326   thr->task = &team->implicit_task[0];
327   nthreads_var = icv->nthreads_var;
328   if (__builtin_expect (gomp_nthreads_var_list != NULL, 0)
329       && thr->ts.level < gomp_nthreads_var_list_len)
330     nthreads_var = gomp_nthreads_var_list[thr->ts.level];
331   bind_var = icv->bind_var;
332   if (bind_var != omp_proc_bind_false && (flags & 7) != omp_proc_bind_false)
333     bind_var = flags & 7;
334   bind = bind_var;
335   if (__builtin_expect (gomp_bind_var_list != NULL, 0)
336       && thr->ts.level < gomp_bind_var_list_len)
337     bind_var = gomp_bind_var_list[thr->ts.level];
338   gomp_init_task (thr->task, task, icv);
339   team->implicit_task[0].icv.nthreads_var = nthreads_var;
340   team->implicit_task[0].icv.bind_var = bind_var;
341
342   if (nthreads == 1)
343     return;
344
345   i = 1;
346
347   if (__builtin_expect (gomp_places_list != NULL, 0))
348     {
349       /* Depending on chosen proc_bind model, set subpartition
350          for the master thread and initialize helper variables
351          P and optionally S, K and/or REST used by later place
352          computation for each additional thread.  */
353       p = thr->place - 1;
354       switch (bind)
355         {
356         case omp_proc_bind_true:
357         case omp_proc_bind_close:
358           if (nthreads > thr->ts.place_partition_len)
359             {
360               /* T > P.  S threads will be placed in each place,
361                  and the final REM threads placed one by one
362                  into the already occupied places.  */
363               s = nthreads / thr->ts.place_partition_len;
364               rest = nthreads % thr->ts.place_partition_len;
365             }
366           else
367             s = 1;
368           k = 1;
369           break;
370         case omp_proc_bind_master:
371           /* Each thread will be bound to master's place.  */
372           break;
373         case omp_proc_bind_spread:
374           if (nthreads <= thr->ts.place_partition_len)
375             {
376               /* T <= P.  Each subpartition will have in between s
377                  and s+1 places (subpartitions starting at or
378                  after rest will have s places, earlier s+1 places),
379                  each thread will be bound to the first place in
380                  its subpartition (except for the master thread
381                  that can be bound to another place in its
382                  subpartition).  */
383               s = thr->ts.place_partition_len / nthreads;
384               rest = thr->ts.place_partition_len % nthreads;
385               rest = (s + 1) * rest + thr->ts.place_partition_off;
386               if (p < rest)
387                 {
388                   p -= (p - thr->ts.place_partition_off) % (s + 1);
389                   thr->ts.place_partition_len = s + 1;
390                 }
391               else
392                 {
393                   p -= (p - rest) % s;
394                   thr->ts.place_partition_len = s;
395                 }
396               thr->ts.place_partition_off = p;
397             }
398           else
399             {
400               /* T > P.  Each subpartition will have just a single
401                  place and we'll place between s and s+1
402                  threads into each subpartition.  */
403               s = nthreads / thr->ts.place_partition_len;
404               rest = nthreads % thr->ts.place_partition_len;
405               thr->ts.place_partition_off = p;
406               thr->ts.place_partition_len = 1;
407               k = 1;
408             }
409           break;
410         }
411     }
412   else
413     bind = omp_proc_bind_false;
414
415   /* We only allow the reuse of idle threads for non-nested PARALLEL
416      regions.  This appears to be implied by the semantics of
417      threadprivate variables, but perhaps that's reading too much into
418      things.  Certainly it does prevent any locking problems, since
419      only the initial program thread will modify gomp_threads.  */
420   if (!nested)
421     {
422       old_threads_used = pool->threads_used;
423
424       if (nthreads <= old_threads_used)
425         n = nthreads;
426       else if (old_threads_used == 0)
427         {
428           n = 0;
429           gomp_barrier_init (&pool->threads_dock, nthreads);
430         }
431       else
432         {
433           n = old_threads_used;
434
435           /* Increase the barrier threshold to make sure all new
436              threads arrive before the team is released.  */
437           gomp_barrier_reinit (&pool->threads_dock, nthreads);
438         }
439
440       /* Not true yet, but soon will be.  We're going to release all
441          threads from the dock, and those that aren't part of the
442          team will exit.  */
443       pool->threads_used = nthreads;
444
445       /* If necessary, expand the size of the gomp_threads array.  It is
446          expected that changes in the number of threads are rare, thus we
447          make no effort to expand gomp_threads_size geometrically.  */
448       if (nthreads >= pool->threads_size)
449         {
450           pool->threads_size = nthreads + 1;
451           pool->threads
452             = gomp_realloc (pool->threads,
453                             pool->threads_size
454                             * sizeof (struct gomp_thread_data *));
455         }
456
457       /* Release existing idle threads.  */
458       for (; i < n; ++i)
459         {
460           unsigned int place_partition_off = thr->ts.place_partition_off;
461           unsigned int place_partition_len = thr->ts.place_partition_len;
462           unsigned int place = 0;
463           if (__builtin_expect (gomp_places_list != NULL, 0))
464             {
465               switch (bind)
466                 {
467                 case omp_proc_bind_true:
468                 case omp_proc_bind_close:
469                   if (k == s)
470                     {
471                       ++p;
472                       if (p == (team->prev_ts.place_partition_off
473                                 + team->prev_ts.place_partition_len))
474                         p = team->prev_ts.place_partition_off;
475                       k = 1;
476                       if (i == nthreads - rest)
477                         s = 1;
478                     }
479                   else
480                     ++k;
481                   break;
482                 case omp_proc_bind_master:
483                   break;
484                 case omp_proc_bind_spread:
485                   if (k == 0)
486                     {
487                       /* T <= P.  */
488                       if (p < rest)
489                         p += s + 1;
490                       else
491                         p += s;
492                       if (p == (team->prev_ts.place_partition_off
493                                 + team->prev_ts.place_partition_len))
494                         p = team->prev_ts.place_partition_off;
495                       place_partition_off = p;
496                       if (p < rest)
497                         place_partition_len = s + 1;
498                       else
499                         place_partition_len = s;
500                     }
501                   else
502                     {
503                       /* T > P.  */
504                       if (k == s)
505                         {
506                           ++p;
507                           if (p == (team->prev_ts.place_partition_off
508                                     + team->prev_ts.place_partition_len))
509                             p = team->prev_ts.place_partition_off;
510                           k = 1;
511                           if (i == nthreads - rest)
512                             s = 1;
513                         }
514                       else
515                         ++k;
516                       place_partition_off = p;
517                       place_partition_len = 1;
518                     }
519                   break;
520                 }
521               if (affinity_thr != NULL
522                   || (bind != omp_proc_bind_true
523                       && pool->threads[i]->place != p + 1)
524                   || pool->threads[i]->place <= place_partition_off
525                   || pool->threads[i]->place > (place_partition_off
526                                                 + place_partition_len))
527                 {
528                   unsigned int l;
529                   if (affinity_thr == NULL)
530                     {
531                       unsigned int j;
532
533                       if (team->prev_ts.place_partition_len > 64)
534                         affinity_thr
535                           = gomp_malloc (team->prev_ts.place_partition_len
536                                          * sizeof (struct gomp_thread *));
537                       else
538                         affinity_thr
539                           = gomp_alloca (team->prev_ts.place_partition_len
540                                          * sizeof (struct gomp_thread *));
541                       memset (affinity_thr, '\0',
542                               team->prev_ts.place_partition_len
543                               * sizeof (struct gomp_thread *));
544                       for (j = i; j < old_threads_used; j++)
545                         {
546                           if (pool->threads[j]->place
547                               > team->prev_ts.place_partition_off
548                               && (pool->threads[j]->place
549                                   <= (team->prev_ts.place_partition_off
550                                       + team->prev_ts.place_partition_len)))
551                             {
552                               l = pool->threads[j]->place - 1
553                                   - team->prev_ts.place_partition_off;
554                               pool->threads[j]->data = affinity_thr[l];
555                               affinity_thr[l] = pool->threads[j];
556                             }
557                           pool->threads[j] = NULL;
558                         }
559                       if (nthreads > old_threads_used)
560                         memset (&pool->threads[old_threads_used],
561                                 '\0', ((nthreads - old_threads_used)
562                                        * sizeof (struct gomp_thread *)));
563                       n = nthreads;
564                       affinity_count = old_threads_used - i;
565                     }
566                   if (affinity_count == 0)
567                     break;
568                   l = p;
569                   if (affinity_thr[l - team->prev_ts.place_partition_off]
570                       == NULL)
571                     {
572                       if (bind != omp_proc_bind_true)
573                         continue;
574                       for (l = place_partition_off;
575                            l < place_partition_off + place_partition_len;
576                            l++)
577                         if (affinity_thr[l - team->prev_ts.place_partition_off]
578                             != NULL)
579                           break;
580                       if (l == place_partition_off + place_partition_len)
581                         continue;
582                     }
583                   nthr = affinity_thr[l - team->prev_ts.place_partition_off];
584                   affinity_thr[l - team->prev_ts.place_partition_off]
585                     = (struct gomp_thread *) nthr->data;
586                   affinity_count--;
587                   pool->threads[i] = nthr;
588                 }
589               else
590                 nthr = pool->threads[i];
591               place = p + 1;
592             }
593           else
594             nthr = pool->threads[i];
595           nthr->ts.team = team;
596           nthr->ts.work_share = &team->work_shares[0];
597           nthr->ts.last_work_share = NULL;
598           nthr->ts.team_id = i;
599           nthr->ts.level = team->prev_ts.level + 1;
600           nthr->ts.active_level = thr->ts.active_level;
601           nthr->ts.place_partition_off = place_partition_off;
602           nthr->ts.place_partition_len = place_partition_len;
603 #ifdef HAVE_SYNC_BUILTINS
604           nthr->ts.single_count = 0;
605 #endif
606           nthr->ts.static_trip = 0;
607           nthr->task = &team->implicit_task[i];
608           nthr->place = place;
609           gomp_init_task (nthr->task, task, icv);
610           team->implicit_task[i].icv.nthreads_var = nthreads_var;
611           team->implicit_task[i].icv.bind_var = bind_var;
612           nthr->fn = fn;
613           nthr->data = data;
614           team->ordered_release[i] = &nthr->release;
615         }
616
617       if (__builtin_expect (affinity_thr != NULL, 0))
618         {
619           /* If AFFINITY_THR is non-NULL just because we had to
620              permute some threads in the pool, but we've managed
621              to find exactly as many old threads as we'd find
622              without affinity, we don't need to handle this
623              specially anymore.  */
624           if (nthreads <= old_threads_used
625               ? (affinity_count == old_threads_used - nthreads)
626               : (i == old_threads_used))
627             {
628               if (team->prev_ts.place_partition_len > 64)
629                 free (affinity_thr);
630               affinity_thr = NULL;
631               affinity_count = 0;
632             }
633           else
634             {
635               i = 1;
636               /* We are going to compute the places/subpartitions
637                  again from the beginning.  So, we need to reinitialize
638                  vars modified by the switch (bind) above inside
639                  of the loop, to the state they had after the initial
640                  switch (bind).  */
641               switch (bind)
642                 {
643                 case omp_proc_bind_true:
644                 case omp_proc_bind_close:
645                   if (nthreads > thr->ts.place_partition_len)
646                     /* T > P.  S has been changed, so needs
647                        to be recomputed.  */
648                     s = nthreads / thr->ts.place_partition_len;
649                   k = 1;
650                   p = thr->place - 1;
651                   break;
652                 case omp_proc_bind_master:
653                   /* No vars have been changed.  */
654                   break;
655                 case omp_proc_bind_spread:
656                   p = thr->ts.place_partition_off;
657                   if (k != 0)
658                     {
659                       /* T > P.  */
660                       s = nthreads / team->prev_ts.place_partition_len;
661                       k = 1;
662                     }
663                   break;
664                 }
665
666               /* Increase the barrier threshold to make sure all new
667                  threads and all the threads we're going to let die
668                  arrive before the team is released.  */
669               if (affinity_count)
670                 gomp_barrier_reinit (&pool->threads_dock,
671                                      nthreads + affinity_count);
672             }
673         }
674
675       if (i == nthreads)
676         goto do_release;
677
678     }
679
680   if (__builtin_expect (nthreads + affinity_count > old_threads_used, 0))
681     {
682       long diff = (long) (nthreads + affinity_count) - (long) old_threads_used;
683
684       if (old_threads_used == 0)
685         --diff;
686
687 #ifdef HAVE_SYNC_BUILTINS
688       __sync_fetch_and_add (&gomp_managed_threads, diff);
689 #else
690       gomp_mutex_lock (&gomp_managed_threads_lock);
691       gomp_managed_threads += diff;
692       gomp_mutex_unlock (&gomp_managed_threads_lock);
693 #endif
694     }
695
696   attr = &gomp_thread_attr;
697   if (__builtin_expect (gomp_places_list != NULL, 0))
698     {
699       size_t stacksize;
700       pthread_attr_init (&thread_attr);
701       pthread_attr_setdetachstate (&thread_attr, PTHREAD_CREATE_DETACHED);
702       if (! pthread_attr_getstacksize (&gomp_thread_attr, &stacksize))
703         pthread_attr_setstacksize (&thread_attr, stacksize);
704       attr = &thread_attr;
705     }
706
707   start_data = gomp_alloca (sizeof (struct gomp_thread_start_data)
708                             * (nthreads-i));
709
710   /* Launch new threads.  */
711   for (; i < nthreads; ++i)
712     {
713       pthread_t pt;
714       int err;
715
716       start_data->ts.place_partition_off = thr->ts.place_partition_off;
717       start_data->ts.place_partition_len = thr->ts.place_partition_len;
718       start_data->place = 0;
719       if (__builtin_expect (gomp_places_list != NULL, 0))
720         {
721           switch (bind)
722             {
723             case omp_proc_bind_true:
724             case omp_proc_bind_close:
725               if (k == s)
726                 {
727                   ++p;
728                   if (p == (team->prev_ts.place_partition_off
729                             + team->prev_ts.place_partition_len))
730                     p = team->prev_ts.place_partition_off;
731                   k = 1;
732                   if (i == nthreads - rest)
733                     s = 1;
734                 }
735               else
736                 ++k;
737               break;
738             case omp_proc_bind_master:
739               break;
740             case omp_proc_bind_spread:
741               if (k == 0)
742                 {
743                   /* T <= P.  */
744                   if (p < rest)
745                     p += s + 1;
746                   else
747                     p += s;
748                   if (p == (team->prev_ts.place_partition_off
749                             + team->prev_ts.place_partition_len))
750                     p = team->prev_ts.place_partition_off;
751                   start_data->ts.place_partition_off = p;
752                   if (p < rest)
753                     start_data->ts.place_partition_len = s + 1;
754                   else
755                     start_data->ts.place_partition_len = s;
756                 }
757               else
758                 {
759                   /* T > P.  */
760                   if (k == s)
761                     {
762                       ++p;
763                       if (p == (team->prev_ts.place_partition_off
764                                 + team->prev_ts.place_partition_len))
765                         p = team->prev_ts.place_partition_off;
766                       k = 1;
767                       if (i == nthreads - rest)
768                         s = 1;
769                     }
770                   else
771                     ++k;
772                   start_data->ts.place_partition_off = p;
773                   start_data->ts.place_partition_len = 1;
774                 }
775               break;
776             }
777           start_data->place = p + 1;
778           if (affinity_thr != NULL && pool->threads[i] != NULL)
779             continue;
780           gomp_init_thread_affinity (attr, p);
781         }
782
783       start_data->fn = fn;
784       start_data->fn_data = data;
785       start_data->ts.team = team;
786       start_data->ts.work_share = &team->work_shares[0];
787       start_data->ts.last_work_share = NULL;
788       start_data->ts.team_id = i;
789       start_data->ts.level = team->prev_ts.level + 1;
790       start_data->ts.active_level = thr->ts.active_level;
791 #ifdef HAVE_SYNC_BUILTINS
792       start_data->ts.single_count = 0;
793 #endif
794       start_data->ts.static_trip = 0;
795       start_data->task = &team->implicit_task[i];
796       gomp_init_task (start_data->task, task, icv);
797       team->implicit_task[i].icv.nthreads_var = nthreads_var;
798       team->implicit_task[i].icv.bind_var = bind_var;
799       start_data->thread_pool = pool;
800       start_data->nested = nested;
801
802       attr = gomp_adjust_thread_attr (attr, &thread_attr);
803       err = pthread_create (&pt, attr, gomp_thread_start, start_data++);
804       if (err != 0)
805         gomp_fatal ("Thread creation failed: %s", strerror (err));
806     }
807
808   if (__builtin_expect (attr == &thread_attr, 0))
809     pthread_attr_destroy (&thread_attr);
810
811  do_release:
812   gomp_barrier_wait (nested ? &team->barrier : &pool->threads_dock);
813
814   /* Decrease the barrier threshold to match the number of threads
815      that should arrive back at the end of this team.  The extra
816      threads should be exiting.  Note that we arrange for this test
817      to never be true for nested teams.  If AFFINITY_COUNT is non-zero,
818      the barrier as well as gomp_managed_threads was temporarily
819      set to NTHREADS + AFFINITY_COUNT.  For NTHREADS < OLD_THREADS_COUNT,
820      AFFINITY_COUNT if non-zero will be always at least
821      OLD_THREADS_COUNT - NTHREADS.  */
822   if (__builtin_expect (nthreads < old_threads_used, 0)
823       || __builtin_expect (affinity_count, 0))
824     {
825       long diff = (long) nthreads - (long) old_threads_used;
826
827       if (affinity_count)
828         diff = -affinity_count;
829
830       gomp_barrier_reinit (&pool->threads_dock, nthreads);
831
832 #ifdef HAVE_SYNC_BUILTINS
833       __sync_fetch_and_add (&gomp_managed_threads, diff);
834 #else
835       gomp_mutex_lock (&gomp_managed_threads_lock);
836       gomp_managed_threads += diff;
837       gomp_mutex_unlock (&gomp_managed_threads_lock);
838 #endif
839     }
840   if (__builtin_expect (affinity_thr != NULL, 0)
841       && team->prev_ts.place_partition_len > 64)
842     free (affinity_thr);
843 }
844
845
846 /* Terminate the current team.  This is only to be called by the master
847    thread.  We assume that we must wait for the other threads.  */
848
849 void
850 gomp_team_end (void)
851 {
852   struct gomp_thread *thr = gomp_thread ();
853   struct gomp_team *team = thr->ts.team;
854
855   /* This barrier handles all pending explicit threads.
856      As #pragma omp cancel parallel might get awaited count in
857      team->barrier in a inconsistent state, we need to use a different
858      counter here.  */
859   gomp_team_barrier_wait_final (&team->barrier);
860   if (__builtin_expect (team->team_cancelled, 0))
861     {
862       struct gomp_work_share *ws = team->work_shares_to_free;
863       do
864         {
865           struct gomp_work_share *next_ws = gomp_ptrlock_get (&ws->next_ws);
866           if (next_ws == NULL)
867             gomp_ptrlock_set (&ws->next_ws, ws);
868           gomp_fini_work_share (ws);
869           ws = next_ws;
870         }
871       while (ws != NULL);
872     }
873   else
874     gomp_fini_work_share (thr->ts.work_share);
875
876   gomp_end_task ();
877   thr->ts = team->prev_ts;
878
879   if (__builtin_expect (thr->ts.team != NULL, 0))
880     {
881 #ifdef HAVE_SYNC_BUILTINS
882       __sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads);
883 #else
884       gomp_mutex_lock (&gomp_managed_threads_lock);
885       gomp_managed_threads -= team->nthreads - 1L;
886       gomp_mutex_unlock (&gomp_managed_threads_lock);
887 #endif
888       /* This barrier has gomp_barrier_wait_last counterparts
889          and ensures the team can be safely destroyed.  */
890       gomp_barrier_wait (&team->barrier);
891     }
892
893   if (__builtin_expect (team->work_shares[0].next_alloc != NULL, 0))
894     {
895       struct gomp_work_share *ws = team->work_shares[0].next_alloc;
896       do
897         {
898           struct gomp_work_share *next_ws = ws->next_alloc;
899           free (ws);
900           ws = next_ws;
901         }
902       while (ws != NULL);
903     }
904   gomp_sem_destroy (&team->master_release);
905
906   if (__builtin_expect (thr->ts.team != NULL, 0)
907       || __builtin_expect (team->nthreads == 1, 0))
908     free_team (team);
909   else
910     {
911       struct gomp_thread_pool *pool = thr->thread_pool;
912       if (pool->last_team)
913         free_team (pool->last_team);
914       pool->last_team = team;
915       gomp_release_thread_pool (pool);
916     }
917 }
918
919
920 /* Constructors for this file.  */
921
922 static void __attribute__((constructor))
923 initialize_team (void)
924 {
925 #if !defined HAVE_TLS && !defined USE_EMUTLS
926   static struct gomp_thread initial_thread_tls_data;
927
928   pthread_key_create (&gomp_tls_key, NULL);
929   pthread_setspecific (gomp_tls_key, &initial_thread_tls_data);
930 #endif
931
932   if (pthread_key_create (&gomp_thread_destructor, gomp_free_thread) != 0)
933     gomp_fatal ("could not create thread pool destructor.");
934 }
935
936 static void __attribute__((destructor))
937 team_destructor (void)
938 {
939   /* Without this dlclose on libgomp could lead to subsequent
940      crashes.  */
941   pthread_key_delete (gomp_thread_destructor);
942 }
943
944 struct gomp_task_icv *
945 gomp_new_icv (void)
946 {
947   struct gomp_thread *thr = gomp_thread ();
948   struct gomp_task *task = gomp_malloc (sizeof (struct gomp_task));
949   gomp_init_task (task, NULL, &gomp_global_icv);
950   thr->task = task;
951   pthread_setspecific (gomp_thread_destructor, thr);
952   return &task->icv;
953 }