Import pre-release gcc-5.0 to new vendor branch
[dragonfly.git] / contrib / gcc-5.0 / libgomp / team.c
1 /* Copyright (C) 2005-2015 Free Software Foundation, Inc.
2    Contributed by Richard Henderson <rth@redhat.com>.
3
4    This file is part of the GNU Offloading and Multi Processing Library
5    (libgomp).
6
7    Libgomp is free software; you can redistribute it and/or modify it
8    under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3, or (at your option)
10    any later version.
11
12    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15    more details.
16
17    Under Section 7 of GPL version 3, you are granted additional
18    permissions described in the GCC Runtime Library Exception, version
19    3.1, as published by the Free Software Foundation.
20
21    You should have received a copy of the GNU General Public License and
22    a copy of the GCC Runtime Library Exception along with this program;
23    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24    <http://www.gnu.org/licenses/>.  */
25
26 /* This file handles the maintainence of threads in response to team
27    creation and termination.  */
28
29 #include "libgomp.h"
30 #include <stdlib.h>
31 #include <string.h>
32
33 /* This attribute contains PTHREAD_CREATE_DETACHED.  */
34 pthread_attr_t gomp_thread_attr;
35
36 /* This key is for the thread destructor.  */
37 pthread_key_t gomp_thread_destructor;
38
39
40 /* This is the libgomp per-thread data structure.  */
41 #if defined HAVE_TLS || defined USE_EMUTLS
42 __thread struct gomp_thread gomp_tls_data;
43 #else
44 pthread_key_t gomp_tls_key;
45 #endif
46
47
48 /* This structure is used to communicate across pthread_create.  */
49
50 struct gomp_thread_start_data
51 {
52   void (*fn) (void *);
53   void *fn_data;
54   struct gomp_team_state ts;
55   struct gomp_task *task;
56   struct gomp_thread_pool *thread_pool;
57   unsigned int place;
58   bool nested;
59 };
60
61
62 /* This function is a pthread_create entry point.  This contains the idle
63    loop in which a thread waits to be called up to become part of a team.  */
64
65 static void *
66 gomp_thread_start (void *xdata)
67 {
68   struct gomp_thread_start_data *data = xdata;
69   struct gomp_thread *thr;
70   struct gomp_thread_pool *pool;
71   void (*local_fn) (void *);
72   void *local_data;
73
74 #if defined HAVE_TLS || defined USE_EMUTLS
75   thr = &gomp_tls_data;
76 #else
77   struct gomp_thread local_thr;
78   thr = &local_thr;
79   pthread_setspecific (gomp_tls_key, thr);
80 #endif
81   gomp_sem_init (&thr->release, 0);
82
83   /* Extract what we need from data.  */
84   local_fn = data->fn;
85   local_data = data->fn_data;
86   thr->thread_pool = data->thread_pool;
87   thr->ts = data->ts;
88   thr->task = data->task;
89   thr->place = data->place;
90
91   thr->ts.team->ordered_release[thr->ts.team_id] = &thr->release;
92
93   /* Make thread pool local. */
94   pool = thr->thread_pool;
95
96   if (data->nested)
97     {
98       struct gomp_team *team = thr->ts.team;
99       struct gomp_task *task = thr->task;
100
101       gomp_barrier_wait (&team->barrier);
102
103       local_fn (local_data);
104       gomp_team_barrier_wait_final (&team->barrier);
105       gomp_finish_task (task);
106       gomp_barrier_wait_last (&team->barrier);
107     }
108   else
109     {
110       pool->threads[thr->ts.team_id] = thr;
111
112       gomp_barrier_wait (&pool->threads_dock);
113       do
114         {
115           struct gomp_team *team = thr->ts.team;
116           struct gomp_task *task = thr->task;
117
118           local_fn (local_data);
119           gomp_team_barrier_wait_final (&team->barrier);
120           gomp_finish_task (task);
121
122           gomp_barrier_wait (&pool->threads_dock);
123
124           local_fn = thr->fn;
125           local_data = thr->data;
126           thr->fn = NULL;
127         }
128       while (local_fn);
129     }
130
131   gomp_sem_destroy (&thr->release);
132   thr->thread_pool = NULL;
133   thr->task = NULL;
134   return NULL;
135 }
136
137
138 /* Create a new team data structure.  */
139
140 struct gomp_team *
141 gomp_new_team (unsigned nthreads)
142 {
143   struct gomp_team *team;
144   size_t size;
145   int i;
146
147   size = sizeof (*team) + nthreads * (sizeof (team->ordered_release[0])
148                                       + sizeof (team->implicit_task[0]));
149   team = gomp_malloc (size);
150
151   team->work_share_chunk = 8;
152 #ifdef HAVE_SYNC_BUILTINS
153   team->single_count = 0;
154 #else
155   gomp_mutex_init (&team->work_share_list_free_lock);
156 #endif
157   team->work_shares_to_free = &team->work_shares[0];
158   gomp_init_work_share (&team->work_shares[0], false, nthreads);
159   team->work_shares[0].next_alloc = NULL;
160   team->work_share_list_free = NULL;
161   team->work_share_list_alloc = &team->work_shares[1];
162   for (i = 1; i < 7; i++)
163     team->work_shares[i].next_free = &team->work_shares[i + 1];
164   team->work_shares[i].next_free = NULL;
165
166   team->nthreads = nthreads;
167   gomp_barrier_init (&team->barrier, nthreads);
168
169   gomp_sem_init (&team->master_release, 0);
170   team->ordered_release = (void *) &team->implicit_task[nthreads];
171   team->ordered_release[0] = &team->master_release;
172
173   gomp_mutex_init (&team->task_lock);
174   team->task_queue = NULL;
175   team->task_count = 0;
176   team->task_queued_count = 0;
177   team->task_running_count = 0;
178   team->work_share_cancelled = 0;
179   team->team_cancelled = 0;
180
181   return team;
182 }
183
184
185 /* Free a team data structure.  */
186
187 static void
188 free_team (struct gomp_team *team)
189 {
190   gomp_barrier_destroy (&team->barrier);
191   gomp_mutex_destroy (&team->task_lock);
192   free (team);
193 }
194
195 /* Allocate and initialize a thread pool. */
196
197 static struct gomp_thread_pool *gomp_new_thread_pool (void)
198 {
199   struct gomp_thread_pool *pool
200     = gomp_malloc (sizeof(struct gomp_thread_pool));
201   pool->threads = NULL;
202   pool->threads_size = 0;
203   pool->threads_used = 0;
204   pool->last_team = NULL;
205   return pool;
206 }
207
208 static void
209 gomp_free_pool_helper (void *thread_pool)
210 {
211   struct gomp_thread *thr = gomp_thread ();
212   struct gomp_thread_pool *pool
213     = (struct gomp_thread_pool *) thread_pool;
214   gomp_barrier_wait_last (&pool->threads_dock);
215   gomp_sem_destroy (&thr->release);
216   thr->thread_pool = NULL;
217   thr->task = NULL;
218   pthread_exit (NULL);
219 }
220
221 /* Free a thread pool and release its threads. */
222
223 void
224 gomp_free_thread (void *arg __attribute__((unused)))
225 {
226   struct gomp_thread *thr = gomp_thread ();
227   struct gomp_thread_pool *pool = thr->thread_pool;
228   if (pool)
229     {
230       if (pool->threads_used > 0)
231         {
232           int i;
233           for (i = 1; i < pool->threads_used; i++)
234             {
235               struct gomp_thread *nthr = pool->threads[i];
236               nthr->fn = gomp_free_pool_helper;
237               nthr->data = pool;
238             }
239           /* This barrier undocks threads docked on pool->threads_dock.  */
240           gomp_barrier_wait (&pool->threads_dock);
241           /* And this waits till all threads have called gomp_barrier_wait_last
242              in gomp_free_pool_helper.  */
243           gomp_barrier_wait (&pool->threads_dock);
244           /* Now it is safe to destroy the barrier and free the pool.  */
245           gomp_barrier_destroy (&pool->threads_dock);
246
247 #ifdef HAVE_SYNC_BUILTINS
248           __sync_fetch_and_add (&gomp_managed_threads,
249                                 1L - pool->threads_used);
250 #else
251           gomp_mutex_lock (&gomp_managed_threads_lock);
252           gomp_managed_threads -= pool->threads_used - 1L;
253           gomp_mutex_unlock (&gomp_managed_threads_lock);
254 #endif
255         }
256       free (pool->threads);
257       if (pool->last_team)
258         free_team (pool->last_team);
259       free (pool);
260       thr->thread_pool = NULL;
261     }
262   if (thr->task != NULL)
263     {
264       struct gomp_task *task = thr->task;
265       gomp_end_task ();
266       free (task);
267     }
268 }
269
270 /* Launch a team.  */
271
272 void
273 gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
274                  unsigned flags, struct gomp_team *team)
275 {
276   struct gomp_thread_start_data *start_data;
277   struct gomp_thread *thr, *nthr;
278   struct gomp_task *task;
279   struct gomp_task_icv *icv;
280   bool nested;
281   struct gomp_thread_pool *pool;
282   unsigned i, n, old_threads_used = 0;
283   pthread_attr_t thread_attr, *attr;
284   unsigned long nthreads_var;
285   char bind, bind_var;
286   unsigned int s = 0, rest = 0, p = 0, k = 0;
287   unsigned int affinity_count = 0;
288   struct gomp_thread **affinity_thr = NULL;
289
290   thr = gomp_thread ();
291   nested = thr->ts.team != NULL;
292   if (__builtin_expect (thr->thread_pool == NULL, 0))
293     {
294       thr->thread_pool = gomp_new_thread_pool ();
295       thr->thread_pool->threads_busy = nthreads;
296       pthread_setspecific (gomp_thread_destructor, thr);
297     }
298   pool = thr->thread_pool;
299   task = thr->task;
300   icv = task ? &task->icv : &gomp_global_icv;
301   if (__builtin_expect (gomp_places_list != NULL, 0) && thr->place == 0)
302     gomp_init_affinity ();
303
304   /* Always save the previous state, even if this isn't a nested team.
305      In particular, we should save any work share state from an outer
306      orphaned work share construct.  */
307   team->prev_ts = thr->ts;
308
309   thr->ts.team = team;
310   thr->ts.team_id = 0;
311   ++thr->ts.level;
312   if (nthreads > 1)
313     ++thr->ts.active_level;
314   thr->ts.work_share = &team->work_shares[0];
315   thr->ts.last_work_share = NULL;
316 #ifdef HAVE_SYNC_BUILTINS
317   thr->ts.single_count = 0;
318 #endif
319   thr->ts.static_trip = 0;
320   thr->task = &team->implicit_task[0];
321   nthreads_var = icv->nthreads_var;
322   if (__builtin_expect (gomp_nthreads_var_list != NULL, 0)
323       && thr->ts.level < gomp_nthreads_var_list_len)
324     nthreads_var = gomp_nthreads_var_list[thr->ts.level];
325   bind_var = icv->bind_var;
326   if (bind_var != omp_proc_bind_false && (flags & 7) != omp_proc_bind_false)
327     bind_var = flags & 7;
328   bind = bind_var;
329   if (__builtin_expect (gomp_bind_var_list != NULL, 0)
330       && thr->ts.level < gomp_bind_var_list_len)
331     bind_var = gomp_bind_var_list[thr->ts.level];
332   gomp_init_task (thr->task, task, icv);
333   team->implicit_task[0].icv.nthreads_var = nthreads_var;
334   team->implicit_task[0].icv.bind_var = bind_var;
335
336   if (nthreads == 1)
337     return;
338
339   i = 1;
340
341   if (__builtin_expect (gomp_places_list != NULL, 0))
342     {
343       /* Depending on chosen proc_bind model, set subpartition
344          for the master thread and initialize helper variables
345          P and optionally S, K and/or REST used by later place
346          computation for each additional thread.  */
347       p = thr->place - 1;
348       switch (bind)
349         {
350         case omp_proc_bind_true:
351         case omp_proc_bind_close:
352           if (nthreads > thr->ts.place_partition_len)
353             {
354               /* T > P.  S threads will be placed in each place,
355                  and the final REM threads placed one by one
356                  into the already occupied places.  */
357               s = nthreads / thr->ts.place_partition_len;
358               rest = nthreads % thr->ts.place_partition_len;
359             }
360           else
361             s = 1;
362           k = 1;
363           break;
364         case omp_proc_bind_master:
365           /* Each thread will be bound to master's place.  */
366           break;
367         case omp_proc_bind_spread:
368           if (nthreads <= thr->ts.place_partition_len)
369             {
370               /* T <= P.  Each subpartition will have in between s
371                  and s+1 places (subpartitions starting at or
372                  after rest will have s places, earlier s+1 places),
373                  each thread will be bound to the first place in
374                  its subpartition (except for the master thread
375                  that can be bound to another place in its
376                  subpartition).  */
377               s = thr->ts.place_partition_len / nthreads;
378               rest = thr->ts.place_partition_len % nthreads;
379               rest = (s + 1) * rest + thr->ts.place_partition_off;
380               if (p < rest)
381                 {
382                   p -= (p - thr->ts.place_partition_off) % (s + 1);
383                   thr->ts.place_partition_len = s + 1;
384                 }
385               else
386                 {
387                   p -= (p - rest) % s;
388                   thr->ts.place_partition_len = s;
389                 }
390               thr->ts.place_partition_off = p;
391             }
392           else
393             {
394               /* T > P.  Each subpartition will have just a single
395                  place and we'll place between s and s+1
396                  threads into each subpartition.  */
397               s = nthreads / thr->ts.place_partition_len;
398               rest = nthreads % thr->ts.place_partition_len;
399               thr->ts.place_partition_off = p;
400               thr->ts.place_partition_len = 1;
401               k = 1;
402             }
403           break;
404         }
405     }
406   else
407     bind = omp_proc_bind_false;
408
409   /* We only allow the reuse of idle threads for non-nested PARALLEL
410      regions.  This appears to be implied by the semantics of
411      threadprivate variables, but perhaps that's reading too much into
412      things.  Certainly it does prevent any locking problems, since
413      only the initial program thread will modify gomp_threads.  */
414   if (!nested)
415     {
416       old_threads_used = pool->threads_used;
417
418       if (nthreads <= old_threads_used)
419         n = nthreads;
420       else if (old_threads_used == 0)
421         {
422           n = 0;
423           gomp_barrier_init (&pool->threads_dock, nthreads);
424         }
425       else
426         {
427           n = old_threads_used;
428
429           /* Increase the barrier threshold to make sure all new
430              threads arrive before the team is released.  */
431           gomp_barrier_reinit (&pool->threads_dock, nthreads);
432         }
433
434       /* Not true yet, but soon will be.  We're going to release all
435          threads from the dock, and those that aren't part of the
436          team will exit.  */
437       pool->threads_used = nthreads;
438
439       /* If necessary, expand the size of the gomp_threads array.  It is
440          expected that changes in the number of threads are rare, thus we
441          make no effort to expand gomp_threads_size geometrically.  */
442       if (nthreads >= pool->threads_size)
443         {
444           pool->threads_size = nthreads + 1;
445           pool->threads
446             = gomp_realloc (pool->threads,
447                             pool->threads_size
448                             * sizeof (struct gomp_thread_data *));
449         }
450
451       /* Release existing idle threads.  */
452       for (; i < n; ++i)
453         {
454           unsigned int place_partition_off = thr->ts.place_partition_off;
455           unsigned int place_partition_len = thr->ts.place_partition_len;
456           unsigned int place = 0;
457           if (__builtin_expect (gomp_places_list != NULL, 0))
458             {
459               switch (bind)
460                 {
461                 case omp_proc_bind_true:
462                 case omp_proc_bind_close:
463                   if (k == s)
464                     {
465                       ++p;
466                       if (p == (team->prev_ts.place_partition_off
467                                 + team->prev_ts.place_partition_len))
468                         p = team->prev_ts.place_partition_off;
469                       k = 1;
470                       if (i == nthreads - rest)
471                         s = 1;
472                     }
473                   else
474                     ++k;
475                   break;
476                 case omp_proc_bind_master:
477                   break;
478                 case omp_proc_bind_spread:
479                   if (k == 0)
480                     {
481                       /* T <= P.  */
482                       if (p < rest)
483                         p += s + 1;
484                       else
485                         p += s;
486                       if (p == (team->prev_ts.place_partition_off
487                                 + team->prev_ts.place_partition_len))
488                         p = team->prev_ts.place_partition_off;
489                       place_partition_off = p;
490                       if (p < rest)
491                         place_partition_len = s + 1;
492                       else
493                         place_partition_len = s;
494                     }
495                   else
496                     {
497                       /* T > P.  */
498                       if (k == s)
499                         {
500                           ++p;
501                           if (p == (team->prev_ts.place_partition_off
502                                     + team->prev_ts.place_partition_len))
503                             p = team->prev_ts.place_partition_off;
504                           k = 1;
505                           if (i == nthreads - rest)
506                             s = 1;
507                         }
508                       else
509                         ++k;
510                       place_partition_off = p;
511                       place_partition_len = 1;
512                     }
513                   break;
514                 }
515               if (affinity_thr != NULL
516                   || (bind != omp_proc_bind_true
517                       && pool->threads[i]->place != p + 1)
518                   || pool->threads[i]->place <= place_partition_off
519                   || pool->threads[i]->place > (place_partition_off
520                                                 + place_partition_len))
521                 {
522                   unsigned int l;
523                   if (affinity_thr == NULL)
524                     {
525                       unsigned int j;
526
527                       if (team->prev_ts.place_partition_len > 64)
528                         affinity_thr
529                           = gomp_malloc (team->prev_ts.place_partition_len
530                                          * sizeof (struct gomp_thread *));
531                       else
532                         affinity_thr
533                           = gomp_alloca (team->prev_ts.place_partition_len
534                                          * sizeof (struct gomp_thread *));
535                       memset (affinity_thr, '\0',
536                               team->prev_ts.place_partition_len
537                               * sizeof (struct gomp_thread *));
538                       for (j = i; j < old_threads_used; j++)
539                         {
540                           if (pool->threads[j]->place
541                               > team->prev_ts.place_partition_off
542                               && (pool->threads[j]->place
543                                   <= (team->prev_ts.place_partition_off
544                                       + team->prev_ts.place_partition_len)))
545                             {
546                               l = pool->threads[j]->place - 1
547                                   - team->prev_ts.place_partition_off;
548                               pool->threads[j]->data = affinity_thr[l];
549                               affinity_thr[l] = pool->threads[j];
550                             }
551                           pool->threads[j] = NULL;
552                         }
553                       if (nthreads > old_threads_used)
554                         memset (&pool->threads[old_threads_used],
555                                 '\0', ((nthreads - old_threads_used)
556                                        * sizeof (struct gomp_thread *)));
557                       n = nthreads;
558                       affinity_count = old_threads_used - i;
559                     }
560                   if (affinity_count == 0)
561                     break;
562                   l = p;
563                   if (affinity_thr[l - team->prev_ts.place_partition_off]
564                       == NULL)
565                     {
566                       if (bind != omp_proc_bind_true)
567                         continue;
568                       for (l = place_partition_off;
569                            l < place_partition_off + place_partition_len;
570                            l++)
571                         if (affinity_thr[l - team->prev_ts.place_partition_off]
572                             != NULL)
573                           break;
574                       if (l == place_partition_off + place_partition_len)
575                         continue;
576                     }
577                   nthr = affinity_thr[l - team->prev_ts.place_partition_off];
578                   affinity_thr[l - team->prev_ts.place_partition_off]
579                     = (struct gomp_thread *) nthr->data;
580                   affinity_count--;
581                   pool->threads[i] = nthr;
582                 }
583               else
584                 nthr = pool->threads[i];
585               place = p + 1;
586             }
587           else
588             nthr = pool->threads[i];
589           nthr->ts.team = team;
590           nthr->ts.work_share = &team->work_shares[0];
591           nthr->ts.last_work_share = NULL;
592           nthr->ts.team_id = i;
593           nthr->ts.level = team->prev_ts.level + 1;
594           nthr->ts.active_level = thr->ts.active_level;
595           nthr->ts.place_partition_off = place_partition_off;
596           nthr->ts.place_partition_len = place_partition_len;
597 #ifdef HAVE_SYNC_BUILTINS
598           nthr->ts.single_count = 0;
599 #endif
600           nthr->ts.static_trip = 0;
601           nthr->task = &team->implicit_task[i];
602           nthr->place = place;
603           gomp_init_task (nthr->task, task, icv);
604           team->implicit_task[i].icv.nthreads_var = nthreads_var;
605           team->implicit_task[i].icv.bind_var = bind_var;
606           nthr->fn = fn;
607           nthr->data = data;
608           team->ordered_release[i] = &nthr->release;
609         }
610
611       if (__builtin_expect (affinity_thr != NULL, 0))
612         {
613           /* If AFFINITY_THR is non-NULL just because we had to
614              permute some threads in the pool, but we've managed
615              to find exactly as many old threads as we'd find
616              without affinity, we don't need to handle this
617              specially anymore.  */
618           if (nthreads <= old_threads_used
619               ? (affinity_count == old_threads_used - nthreads)
620               : (i == old_threads_used))
621             {
622               if (team->prev_ts.place_partition_len > 64)
623                 free (affinity_thr);
624               affinity_thr = NULL;
625               affinity_count = 0;
626             }
627           else
628             {
629               i = 1;
630               /* We are going to compute the places/subpartitions
631                  again from the beginning.  So, we need to reinitialize
632                  vars modified by the switch (bind) above inside
633                  of the loop, to the state they had after the initial
634                  switch (bind).  */
635               switch (bind)
636                 {
637                 case omp_proc_bind_true:
638                 case omp_proc_bind_close:
639                   if (nthreads > thr->ts.place_partition_len)
640                     /* T > P.  S has been changed, so needs
641                        to be recomputed.  */
642                     s = nthreads / thr->ts.place_partition_len;
643                   k = 1;
644                   p = thr->place - 1;
645                   break;
646                 case omp_proc_bind_master:
647                   /* No vars have been changed.  */
648                   break;
649                 case omp_proc_bind_spread:
650                   p = thr->ts.place_partition_off;
651                   if (k != 0)
652                     {
653                       /* T > P.  */
654                       s = nthreads / team->prev_ts.place_partition_len;
655                       k = 1;
656                     }
657                   break;
658                 }
659
660               /* Increase the barrier threshold to make sure all new
661                  threads and all the threads we're going to let die
662                  arrive before the team is released.  */
663               if (affinity_count)
664                 gomp_barrier_reinit (&pool->threads_dock,
665                                      nthreads + affinity_count);
666             }
667         }
668
669       if (i == nthreads)
670         goto do_release;
671
672     }
673
674   if (__builtin_expect (nthreads + affinity_count > old_threads_used, 0))
675     {
676       long diff = (long) (nthreads + affinity_count) - (long) old_threads_used;
677
678       if (old_threads_used == 0)
679         --diff;
680
681 #ifdef HAVE_SYNC_BUILTINS
682       __sync_fetch_and_add (&gomp_managed_threads, diff);
683 #else
684       gomp_mutex_lock (&gomp_managed_threads_lock);
685       gomp_managed_threads += diff;
686       gomp_mutex_unlock (&gomp_managed_threads_lock);
687 #endif
688     }
689
690   attr = &gomp_thread_attr;
691   if (__builtin_expect (gomp_places_list != NULL, 0))
692     {
693       size_t stacksize;
694       pthread_attr_init (&thread_attr);
695       pthread_attr_setdetachstate (&thread_attr, PTHREAD_CREATE_DETACHED);
696       if (! pthread_attr_getstacksize (&gomp_thread_attr, &stacksize))
697         pthread_attr_setstacksize (&thread_attr, stacksize);
698       attr = &thread_attr;
699     }
700
701   start_data = gomp_alloca (sizeof (struct gomp_thread_start_data)
702                             * (nthreads-i));
703
704   /* Launch new threads.  */
705   for (; i < nthreads; ++i)
706     {
707       pthread_t pt;
708       int err;
709
710       start_data->ts.place_partition_off = thr->ts.place_partition_off;
711       start_data->ts.place_partition_len = thr->ts.place_partition_len;
712       start_data->place = 0;
713       if (__builtin_expect (gomp_places_list != NULL, 0))
714         {
715           switch (bind)
716             {
717             case omp_proc_bind_true:
718             case omp_proc_bind_close:
719               if (k == s)
720                 {
721                   ++p;
722                   if (p == (team->prev_ts.place_partition_off
723                             + team->prev_ts.place_partition_len))
724                     p = team->prev_ts.place_partition_off;
725                   k = 1;
726                   if (i == nthreads - rest)
727                     s = 1;
728                 }
729               else
730                 ++k;
731               break;
732             case omp_proc_bind_master:
733               break;
734             case omp_proc_bind_spread:
735               if (k == 0)
736                 {
737                   /* T <= P.  */
738                   if (p < rest)
739                     p += s + 1;
740                   else
741                     p += s;
742                   if (p == (team->prev_ts.place_partition_off
743                             + team->prev_ts.place_partition_len))
744                     p = team->prev_ts.place_partition_off;
745                   start_data->ts.place_partition_off = p;
746                   if (p < rest)
747                     start_data->ts.place_partition_len = s + 1;
748                   else
749                     start_data->ts.place_partition_len = s;
750                 }
751               else
752                 {
753                   /* T > P.  */
754                   if (k == s)
755                     {
756                       ++p;
757                       if (p == (team->prev_ts.place_partition_off
758                                 + team->prev_ts.place_partition_len))
759                         p = team->prev_ts.place_partition_off;
760                       k = 1;
761                       if (i == nthreads - rest)
762                         s = 1;
763                     }
764                   else
765                     ++k;
766                   start_data->ts.place_partition_off = p;
767                   start_data->ts.place_partition_len = 1;
768                 }
769               break;
770             }
771           start_data->place = p + 1;
772           if (affinity_thr != NULL && pool->threads[i] != NULL)
773             continue;
774           gomp_init_thread_affinity (attr, p);
775         }
776
777       start_data->fn = fn;
778       start_data->fn_data = data;
779       start_data->ts.team = team;
780       start_data->ts.work_share = &team->work_shares[0];
781       start_data->ts.last_work_share = NULL;
782       start_data->ts.team_id = i;
783       start_data->ts.level = team->prev_ts.level + 1;
784       start_data->ts.active_level = thr->ts.active_level;
785 #ifdef HAVE_SYNC_BUILTINS
786       start_data->ts.single_count = 0;
787 #endif
788       start_data->ts.static_trip = 0;
789       start_data->task = &team->implicit_task[i];
790       gomp_init_task (start_data->task, task, icv);
791       team->implicit_task[i].icv.nthreads_var = nthreads_var;
792       team->implicit_task[i].icv.bind_var = bind_var;
793       start_data->thread_pool = pool;
794       start_data->nested = nested;
795
796       err = pthread_create (&pt, attr, gomp_thread_start, start_data++);
797       if (err != 0)
798         gomp_fatal ("Thread creation failed: %s", strerror (err));
799     }
800
801   if (__builtin_expect (gomp_places_list != NULL, 0))
802     pthread_attr_destroy (&thread_attr);
803
804  do_release:
805   gomp_barrier_wait (nested ? &team->barrier : &pool->threads_dock);
806
807   /* Decrease the barrier threshold to match the number of threads
808      that should arrive back at the end of this team.  The extra
809      threads should be exiting.  Note that we arrange for this test
810      to never be true for nested teams.  If AFFINITY_COUNT is non-zero,
811      the barrier as well as gomp_managed_threads was temporarily
812      set to NTHREADS + AFFINITY_COUNT.  For NTHREADS < OLD_THREADS_COUNT,
813      AFFINITY_COUNT if non-zero will be always at least
814      OLD_THREADS_COUNT - NTHREADS.  */
815   if (__builtin_expect (nthreads < old_threads_used, 0)
816       || __builtin_expect (affinity_count, 0))
817     {
818       long diff = (long) nthreads - (long) old_threads_used;
819
820       if (affinity_count)
821         diff = -affinity_count;
822
823       gomp_barrier_reinit (&pool->threads_dock, nthreads);
824
825 #ifdef HAVE_SYNC_BUILTINS
826       __sync_fetch_and_add (&gomp_managed_threads, diff);
827 #else
828       gomp_mutex_lock (&gomp_managed_threads_lock);
829       gomp_managed_threads += diff;
830       gomp_mutex_unlock (&gomp_managed_threads_lock);
831 #endif
832     }
833   if (__builtin_expect (affinity_thr != NULL, 0)
834       && team->prev_ts.place_partition_len > 64)
835     free (affinity_thr);
836 }
837
838
839 /* Terminate the current team.  This is only to be called by the master
840    thread.  We assume that we must wait for the other threads.  */
841
842 void
843 gomp_team_end (void)
844 {
845   struct gomp_thread *thr = gomp_thread ();
846   struct gomp_team *team = thr->ts.team;
847
848   /* This barrier handles all pending explicit threads.
849      As #pragma omp cancel parallel might get awaited count in
850      team->barrier in a inconsistent state, we need to use a different
851      counter here.  */
852   gomp_team_barrier_wait_final (&team->barrier);
853   if (__builtin_expect (team->team_cancelled, 0))
854     {
855       struct gomp_work_share *ws = team->work_shares_to_free;
856       do
857         {
858           struct gomp_work_share *next_ws = gomp_ptrlock_get (&ws->next_ws);
859           if (next_ws == NULL)
860             gomp_ptrlock_set (&ws->next_ws, ws);
861           gomp_fini_work_share (ws);
862           ws = next_ws;
863         }
864       while (ws != NULL);
865     }
866   else
867     gomp_fini_work_share (thr->ts.work_share);
868
869   gomp_end_task ();
870   thr->ts = team->prev_ts;
871
872   if (__builtin_expect (thr->ts.team != NULL, 0))
873     {
874 #ifdef HAVE_SYNC_BUILTINS
875       __sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads);
876 #else
877       gomp_mutex_lock (&gomp_managed_threads_lock);
878       gomp_managed_threads -= team->nthreads - 1L;
879       gomp_mutex_unlock (&gomp_managed_threads_lock);
880 #endif
881       /* This barrier has gomp_barrier_wait_last counterparts
882          and ensures the team can be safely destroyed.  */
883       gomp_barrier_wait (&team->barrier);
884     }
885
886   if (__builtin_expect (team->work_shares[0].next_alloc != NULL, 0))
887     {
888       struct gomp_work_share *ws = team->work_shares[0].next_alloc;
889       do
890         {
891           struct gomp_work_share *next_ws = ws->next_alloc;
892           free (ws);
893           ws = next_ws;
894         }
895       while (ws != NULL);
896     }
897   gomp_sem_destroy (&team->master_release);
898 #ifndef HAVE_SYNC_BUILTINS
899   gomp_mutex_destroy (&team->work_share_list_free_lock);
900 #endif
901
902   if (__builtin_expect (thr->ts.team != NULL, 0)
903       || __builtin_expect (team->nthreads == 1, 0))
904     free_team (team);
905   else
906     {
907       struct gomp_thread_pool *pool = thr->thread_pool;
908       if (pool->last_team)
909         free_team (pool->last_team);
910       pool->last_team = team;
911     }
912 }
913
914
915 /* Constructors for this file.  */
916
917 static void __attribute__((constructor))
918 initialize_team (void)
919 {
920 #if !defined HAVE_TLS && !defined USE_EMUTLS
921   static struct gomp_thread initial_thread_tls_data;
922
923   pthread_key_create (&gomp_tls_key, NULL);
924   pthread_setspecific (gomp_tls_key, &initial_thread_tls_data);
925 #endif
926
927   if (pthread_key_create (&gomp_thread_destructor, gomp_free_thread) != 0)
928     gomp_fatal ("could not create thread pool destructor.");
929 }
930
931 static void __attribute__((destructor))
932 team_destructor (void)
933 {
934   /* Without this dlclose on libgomp could lead to subsequent
935      crashes.  */
936   pthread_key_delete (gomp_thread_destructor);
937 }
938
939 struct gomp_task_icv *
940 gomp_new_icv (void)
941 {
942   struct gomp_thread *thr = gomp_thread ();
943   struct gomp_task *task = gomp_malloc (sizeof (struct gomp_task));
944   gomp_init_task (task, NULL, &gomp_global_icv);
945   thr->task = task;
946   pthread_setspecific (gomp_thread_destructor, thr);
947   return &task->icv;
948 }