Initial import of binutils 2.22 on the new vendor branch
[dragonfly.git] / lib / libc_r / uthread / uthread_kern.c
1 /*
2  * Copyright (c) 1995-1998 John Birrell <jb@cimlogic.com.au>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *      This product includes software developed by John Birrell.
16  * 4. Neither the name of the author nor the names of any co-contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY JOHN BIRRELL AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  * $FreeBSD: src/lib/libc_r/uthread/uthread_kern.c,v 1.28.2.13 2002/10/22 14:44:03 fjoe Exp $
33  * $DragonFly: src/lib/libc_r/uthread/uthread_kern.c,v 1.7 2007/12/14 20:07:59 dillon Exp $
34  *
35  */
36 #include <errno.h>
37 #include <poll.h>
38 #include <stdlib.h>
39 #include <stdarg.h>
40 #include <string.h>
41 #include <unistd.h>
42 #include <setjmp.h>
43 #include <sys/param.h>
44 #include <sys/types.h>
45 #include <sys/signalvar.h>
46 #include <sys/stat.h>
47 #include <sys/time.h>
48 #include <sys/socket.h>
49 #include <sys/uio.h>
50 #include <sys/syscall.h>
51 #include <fcntl.h>
52 #include <pthread.h>
53 #include "pthread_private.h"
54
55 /* #define DEBUG_THREAD_KERN */
56 #ifdef DEBUG_THREAD_KERN
57 #define DBG_MSG         stdout_debug
58 #else
59 #define DBG_MSG(x...)
60 #endif
61
62 /* Static function prototype definitions: */
63 static void
64 thread_kern_poll(int wait_reqd);
65
66 static void
67 dequeue_signals(void);
68
69 static inline void
70 thread_run_switch_hook(pthread_t thread_out, pthread_t thread_in);
71
72 /* Static variables: */
73 static int      last_tick = 0;
74 static int      called_from_handler = 0;
75
76 /*
77  * This is called when a signal handler finishes and wants to
78  * return to a previous frame.
79  */
80 void
81 _thread_kern_sched_frame(struct pthread_signal_frame *psf)
82 {
83         struct pthread  *curthread = _get_curthread();
84
85         /*
86          * Flag the pthread kernel as executing scheduler code
87          * to avoid a signal from interrupting this execution and
88          * corrupting the (soon-to-be) current frame.
89          */
90         _thread_kern_in_sched = 1;
91
92         /* Restore the signal frame: */
93         _thread_sigframe_restore(curthread, psf);
94
95         /* The signal mask was restored; check for any pending signals: */
96         curthread->check_pending = 1;
97
98         /* Switch to the thread scheduler: */
99         ___longjmp(_thread_kern_sched_jb, 1);
100 }
101
102
103 void
104 _thread_kern_sched(ucontext_t *ucp)
105 {
106         struct pthread  *curthread = _get_curthread();
107
108         /*
109          * Flag the pthread kernel as executing scheduler code
110          * to avoid a scheduler signal from interrupting this
111          * execution and calling the scheduler again.
112          */
113         _thread_kern_in_sched = 1;
114
115         /* Check if this function was called from the signal handler: */
116         if (ucp != NULL) {
117                 /* FP registers now saved and restored by kernel */
118                 called_from_handler = 1;
119                 DBG_MSG("Entering scheduler due to signal\n");
120         }
121
122         /* Save the state of the current thread: */
123         if (_setjmp(curthread->ctx.jb) != 0) {
124                 DBG_MSG("Returned from ___longjmp, thread %p\n",
125                     curthread);
126                 /*
127                  * This point is reached when a longjmp() is called
128                  * to restore the state of a thread.
129                  *
130                  * This is the normal way out of the scheduler.
131                  */
132                 _thread_kern_in_sched = 0;
133
134                 if (curthread->sig_defer_count == 0) {
135                         if (((curthread->cancelflags &
136                             PTHREAD_AT_CANCEL_POINT) == 0) &&
137                             ((curthread->cancelflags &
138                             PTHREAD_CANCEL_ASYNCHRONOUS) != 0))
139                                 /*
140                                  * Cancellations override signals.
141                                  *
142                                  * Stick a cancellation point at the
143                                  * start of each async-cancellable
144                                  * thread's resumption.
145                                  *
146                                  * We allow threads woken at cancel
147                                  * points to do their own checks.
148                                  */
149                                 pthread_testcancel();
150                 }
151
152                 if (_sched_switch_hook != NULL) {
153                         /* Run the installed switch hook: */
154                         thread_run_switch_hook(_last_user_thread, curthread);
155                 }
156                 if (ucp == NULL)
157                         return;
158                 else {
159                         /* NOTE: FP registers now saved and restored by kernel*/
160
161                         /*
162                          * Set the process signal mask in the context; it
163                          * could have changed by the handler.
164                          */
165                         ucp->uc_sigmask = _process_sigmask;
166
167                         /* Resume the interrupted thread: */
168                         sigreturn(ucp);
169                 }
170         }
171         /* Switch to the thread scheduler: */
172         ___longjmp(_thread_kern_sched_jb, 1);
173 }
174
175 void
176 _thread_kern_sched_sig(void)
177 {
178         struct pthread  *curthread = _get_curthread();
179
180         curthread->check_pending = 1;
181         _thread_kern_sched(NULL);
182 }
183
184
185 void
186 _thread_kern_scheduler(void)
187 {
188         struct timespec ts;
189         struct timeval  tv;
190         struct pthread  *curthread = _get_curthread();
191         pthread_t       pthread, pthread_h;
192         unsigned int    current_tick;
193         int             add_to_prioq;
194
195         /* If the currently running thread is a user thread, save it: */
196         if ((curthread->flags & PTHREAD_FLAGS_PRIVATE) == 0)
197                 _last_user_thread = curthread;
198
199         if (called_from_handler != 0) {
200                 called_from_handler = 0;
201
202                 /*
203                  * We were called from a signal handler; restore the process
204                  * signal mask.
205                  */
206                 if (__sys_sigprocmask(SIG_SETMASK,
207                     &_process_sigmask, NULL) != 0)
208                         PANIC("Unable to restore process mask after signal");
209         }
210
211         /*
212          * Enter a scheduling loop that finds the next thread that is
213          * ready to run. This loop completes when there are no more threads
214          * in the global list or when a thread has its state restored by
215          * either a sigreturn (if the state was saved as a sigcontext) or a
216          * longjmp (if the state was saved by a setjmp).
217          */
218         while (!(TAILQ_EMPTY(&_thread_list))) {
219                 /* Get the current time of day: */
220                 GET_CURRENT_TOD(tv);
221                 TIMEVAL_TO_TIMESPEC(&tv, &ts);
222                 current_tick = _sched_ticks;
223
224                 /*
225                  * Protect the scheduling queues from access by the signal
226                  * handler.
227                  */
228                 _queue_signals = 1;
229                 add_to_prioq = 0;
230
231                 if (curthread != &_thread_kern_thread) {
232                         /*
233                          * This thread no longer needs to yield the CPU.
234                          */
235                         curthread->yield_on_sig_undefer = 0;
236         
237                         if (curthread->state != PS_RUNNING) {
238                                 /*
239                                  * Save the current time as the time that the
240                                  * thread became inactive:
241                                  */
242                                 curthread->last_inactive = (long)current_tick;
243                                 if (curthread->last_inactive <
244                                     curthread->last_active) {
245                                         /* Account for a rollover: */
246                                         curthread->last_inactive =+
247                                             UINT_MAX + 1;
248                                 }
249                         }
250
251                         /*
252                          * Place the currently running thread into the
253                          * appropriate queue(s).
254                          */
255                         switch (curthread->state) {
256                         case PS_DEAD:
257                         case PS_STATE_MAX: /* to silence -Wall */
258                         case PS_SUSPENDED:
259                                 /*
260                                  * Dead and suspended threads are not placed
261                                  * in any queue:
262                                  */
263                                 break;
264
265                         case PS_RUNNING:
266                                 /*
267                                  * Runnable threads can't be placed in the
268                                  * priority queue until after waiting threads
269                                  * are polled (to preserve round-robin
270                                  * scheduling).
271                                  */
272                                 add_to_prioq = 1;
273                                 break;
274
275                         /*
276                          * States which do not depend on file descriptor I/O
277                          * operations or timeouts:
278                          */
279                         case PS_DEADLOCK:
280                         case PS_FDLR_WAIT:
281                         case PS_FDLW_WAIT:
282                         case PS_JOIN:
283                         case PS_MUTEX_WAIT:
284                         case PS_SIGSUSPEND:
285                         case PS_SIGTHREAD:
286                         case PS_SIGWAIT:
287                         case PS_WAIT_WAIT:
288                                 /* No timeouts for these states: */
289                                 curthread->wakeup_time.tv_sec = -1;
290                                 curthread->wakeup_time.tv_nsec = -1;
291
292                                 /* Restart the time slice: */
293                                 curthread->slice_usec = -1;
294
295                                 /* Insert into the waiting queue: */
296                                 PTHREAD_WAITQ_INSERT(curthread);
297                                 break;
298
299                         /* States which can timeout: */
300                         case PS_COND_WAIT:
301                         case PS_SLEEP_WAIT:
302                                 /* Restart the time slice: */
303                                 curthread->slice_usec = -1;
304
305                                 /* Insert into the waiting queue: */
306                                 PTHREAD_WAITQ_INSERT(curthread);
307                                 break;
308         
309                         /* States that require periodic work: */
310                         case PS_SPINBLOCK:
311                                 /* No timeouts for this state: */
312                                 curthread->wakeup_time.tv_sec = -1;
313                                 curthread->wakeup_time.tv_nsec = -1;
314
315                                 /* Increment spinblock count: */
316                                 _spinblock_count++;
317
318                                 /* FALLTHROUGH */
319                         case PS_FDR_WAIT:
320                         case PS_FDW_WAIT:
321                         case PS_POLL_WAIT:
322                         case PS_SELECT_WAIT:
323                                 /* Restart the time slice: */
324                                 curthread->slice_usec = -1;
325         
326                                 /* Insert into the waiting queue: */
327                                 PTHREAD_WAITQ_INSERT(curthread);
328         
329                                 /* Insert into the work queue: */
330                                 PTHREAD_WORKQ_INSERT(curthread);
331                                 break;
332                         }
333
334                         /*
335                          * Are there pending signals for this thread?
336                          *
337                          * This check has to be performed after the thread
338                          * has been placed in the queue(s) appropriate for
339                          * its state.  The process of adding pending signals
340                          * can change a threads state, which in turn will
341                          * attempt to add or remove the thread from any
342                          * scheduling queue to which it belongs.
343                          */
344                         if (curthread->check_pending != 0) {
345                                 curthread->check_pending = 0;
346                                 _thread_sig_check_pending(curthread);
347                         }
348                 }
349
350                 /*
351                  * Avoid polling file descriptors if there are none
352                  * waiting:
353                  */
354                 if (TAILQ_EMPTY(&_workq) != 0) {
355                 }
356                 /*
357                  * Poll file descriptors only if a new scheduling signal
358                  * has occurred or if we have no more runnable threads.
359                  */
360                 else if (((current_tick = _sched_ticks) != last_tick) ||
361                     ((curthread->state != PS_RUNNING) &&
362                     (PTHREAD_PRIOQ_FIRST() == NULL))) {
363                         /* Unprotect the scheduling queues: */
364                         _queue_signals = 0;
365
366                         /*
367                          * Poll file descriptors to update the state of threads
368                          * waiting on file I/O where data may be available:
369                          */
370                         thread_kern_poll(0);
371
372                         /* Protect the scheduling queues: */
373                         _queue_signals = 1;
374                 }
375                 last_tick = current_tick;
376
377                 /*
378                  * Wake up threads that have timedout.  This has to be
379                  * done after polling in case a thread does a poll or
380                  * select with zero time.
381                  */
382                 PTHREAD_WAITQ_SETACTIVE();
383                 while (((pthread = TAILQ_FIRST(&_waitingq)) != NULL) &&
384                     (pthread->wakeup_time.tv_sec != -1) &&
385                     (((pthread->wakeup_time.tv_sec == 0) &&
386                     (pthread->wakeup_time.tv_nsec == 0)) ||
387                     (pthread->wakeup_time.tv_sec < ts.tv_sec) ||
388                     ((pthread->wakeup_time.tv_sec == ts.tv_sec) &&
389                     (pthread->wakeup_time.tv_nsec <= ts.tv_nsec)))) {
390                         switch (pthread->state) {
391                         case PS_POLL_WAIT:
392                         case PS_SELECT_WAIT:
393                                 /* Return zero file descriptors ready: */
394                                 pthread->data.poll_data->nfds = 0;
395                                 /* FALLTHROUGH */
396                         default:
397                                 /*
398                                  * Remove this thread from the waiting queue
399                                  * (and work queue if necessary) and place it
400                                  * in the ready queue.
401                                  */
402                                 PTHREAD_WAITQ_CLEARACTIVE();
403                                 if (pthread->flags & PTHREAD_FLAGS_IN_WORKQ)
404                                         PTHREAD_WORKQ_REMOVE(pthread);
405                                 PTHREAD_NEW_STATE(pthread, PS_RUNNING);
406                                 PTHREAD_WAITQ_SETACTIVE();
407                                 break;
408                         }
409                         /*
410                          * Flag the timeout in the thread structure:
411                          */
412                         pthread->timeout = 1;
413                 }
414                 PTHREAD_WAITQ_CLEARACTIVE();
415
416                 /*
417                  * Check to see if the current thread needs to be added
418                  * to the priority queue:
419                  */
420                 if (add_to_prioq != 0) {
421                         /*
422                          * Save the current time as the time that the
423                          * thread became inactive:
424                          */
425                         current_tick = _sched_ticks;
426                         curthread->last_inactive = (long)current_tick;
427                         if (curthread->last_inactive <
428                             curthread->last_active) {
429                                 /* Account for a rollover: */
430                                 curthread->last_inactive =+ UINT_MAX + 1;
431                         }
432
433                         if ((curthread->slice_usec != -1) &&
434                            (curthread->attr.sched_policy != SCHED_FIFO)) {
435                                 /*
436                                  * Accumulate the number of microseconds for
437                                  * which the current thread has run:
438                                  */
439                                 curthread->slice_usec +=
440                                     (curthread->last_inactive -
441                                     curthread->last_active) *
442                                     (long)_clock_res_usec;
443                                 /* Check for time quantum exceeded: */
444                                 if (curthread->slice_usec > TIMESLICE_USEC)
445                                         curthread->slice_usec = -1;
446                         }
447
448                         if (curthread->slice_usec == -1) {
449                                 /*
450                                  * The thread exceeded its time
451                                  * quantum or it yielded the CPU;
452                                  * place it at the tail of the
453                                  * queue for its priority.
454                                  */
455                                 PTHREAD_PRIOQ_INSERT_TAIL(curthread);
456                         } else {
457                                 /*
458                                  * The thread hasn't exceeded its
459                                  * interval.  Place it at the head
460                                  * of the queue for its priority.
461                                  */
462                                 PTHREAD_PRIOQ_INSERT_HEAD(curthread);
463                         }
464                 }
465
466                 /*
467                  * Get the highest priority thread in the ready queue.
468                  */
469                 pthread_h = PTHREAD_PRIOQ_FIRST();
470
471                 /* Check if there are no threads ready to run: */
472                 if (pthread_h == NULL) {
473                         /*
474                          * Lock the pthread kernel by changing the pointer to
475                          * the running thread to point to the global kernel
476                          * thread structure:
477                          */
478                         _set_curthread(&_thread_kern_thread);
479                         curthread = &_thread_kern_thread;
480
481                         DBG_MSG("No runnable threads, using kernel thread %p\n",
482                             curthread);
483
484                         /* Unprotect the scheduling queues: */
485                         _queue_signals = 0;
486
487                         /*
488                          * There are no threads ready to run, so wait until
489                          * something happens that changes this condition:
490                          */
491                         thread_kern_poll(1);
492
493                         /*
494                          * This process' usage will likely be very small
495                          * while waiting in a poll.  Since the scheduling
496                          * clock is based on the profiling timer, it is
497                          * unlikely that the profiling timer will fire
498                          * and update the time of day.  To account for this,
499                          * get the time of day after polling with a timeout.
500                          */
501                         gettimeofday((struct timeval *) &_sched_tod, NULL);
502                         
503                         /* Check once more for a runnable thread: */
504                         _queue_signals = 1;
505                         pthread_h = PTHREAD_PRIOQ_FIRST();
506                         _queue_signals = 0;
507                 }
508
509                 if (pthread_h != NULL) {
510                         /* Remove the thread from the ready queue: */
511                         PTHREAD_PRIOQ_REMOVE(pthread_h);
512
513                         /* Unprotect the scheduling queues: */
514                         _queue_signals = 0;
515
516                         /*
517                          * Check for signals queued while the scheduling
518                          * queues were protected:
519                          */
520                         while (_sigq_check_reqd != 0) {
521                                 /* Clear before handling queued signals: */
522                                 _sigq_check_reqd = 0;
523
524                                 /* Protect the scheduling queues again: */
525                                 _queue_signals = 1;
526
527                                 dequeue_signals();
528
529                                 /*
530                                  * Check for a higher priority thread that
531                                  * became runnable due to signal handling.
532                                  */
533                                 if (((pthread = PTHREAD_PRIOQ_FIRST()) != NULL) &&
534                                     (pthread->active_priority > pthread_h->active_priority)) {
535                                         /* Remove the thread from the ready queue: */
536                                         PTHREAD_PRIOQ_REMOVE(pthread);
537
538                                         /*
539                                          * Insert the lower priority thread
540                                          * at the head of its priority list:
541                                          */
542                                         PTHREAD_PRIOQ_INSERT_HEAD(pthread_h);
543
544                                         /* There's a new thread in town: */
545                                         pthread_h = pthread;
546                                 }
547
548                                 /* Unprotect the scheduling queues: */
549                                 _queue_signals = 0;
550                         }
551
552                         /* Make the selected thread the current thread: */
553                         _set_curthread(pthread_h);
554                         curthread = pthread_h;
555
556                         /*
557                          * Save the current time as the time that the thread
558                          * became active:
559                          */
560                         current_tick = _sched_ticks;
561                         curthread->last_active = (long) current_tick;
562
563                         /*
564                          * Check if this thread is running for the first time
565                          * or running again after using its full time slice
566                          * allocation:
567                          */
568                         if (curthread->slice_usec == -1) {
569                                 /* Reset the accumulated time slice period: */
570                                 curthread->slice_usec = 0;
571                         }
572
573                         /*
574                          * If we had a context switch, run any
575                          * installed switch hooks.
576                          */
577                         if ((_sched_switch_hook != NULL) &&
578                             (_last_user_thread != curthread)) {
579                                 thread_run_switch_hook(_last_user_thread,
580                                     curthread);
581                         }
582                         /*
583                          * Continue the thread at its current frame:
584                          */
585 #if NOT_YET
586                         _setcontext(&curthread->ctx.uc);
587 #else
588                         ___longjmp(curthread->ctx.jb, 1);
589 #endif
590                         /* This point should not be reached. */
591                         PANIC("Thread has returned from sigreturn or longjmp");
592                 }
593         }
594
595         /* There are no more threads, so exit this process: */
596         exit(0);
597 }
598
599 void
600 _thread_kern_sched_state(enum pthread_state state, char *fname, int lineno)
601 {
602         struct pthread  *curthread = _get_curthread();
603
604         /*
605          * Flag the pthread kernel as executing scheduler code
606          * to avoid a scheduler signal from interrupting this
607          * execution and calling the scheduler again.
608          */
609         _thread_kern_in_sched = 1;
610
611         /*
612          * Prevent the signal handler from fiddling with this thread
613          * before its state is set and is placed into the proper queue.
614          */
615         _queue_signals = 1;
616
617         /* Change the state of the current thread: */
618         curthread->state = state;
619         curthread->fname = fname;
620         curthread->lineno = lineno;
621
622         /* Schedule the next thread that is ready: */
623         _thread_kern_sched(NULL);
624 }
625
626 void
627 _thread_kern_sched_state_unlock(enum pthread_state state,
628     spinlock_t *lock, char *fname, int lineno)
629 {
630         struct pthread  *curthread = _get_curthread();
631
632         /*
633          * Flag the pthread kernel as executing scheduler code
634          * to avoid a scheduler signal from interrupting this
635          * execution and calling the scheduler again.
636          */
637         _thread_kern_in_sched = 1;
638
639         /*
640          * Prevent the signal handler from fiddling with this thread
641          * before its state is set and it is placed into the proper
642          * queue(s).
643          */
644         _queue_signals = 1;
645
646         /* Change the state of the current thread: */
647         curthread->state = state;
648         curthread->fname = fname;
649         curthread->lineno = lineno;
650
651         _SPINUNLOCK(lock);
652
653         /* Schedule the next thread that is ready: */
654         _thread_kern_sched(NULL);
655 }
656
657 static void
658 thread_kern_poll(int wait_reqd)
659 {
660         int             count = 0;
661         int             i, found;
662         int             kern_pipe_added = 0;
663         int             nfds = 0;
664         int             timeout_ms = 0;
665         struct pthread  *pthread;
666         struct timespec ts;
667         struct timeval  tv;
668
669         /* Check if the caller wants to wait: */
670         if (wait_reqd == 0) {
671                 timeout_ms = 0;
672         }
673         else {
674                 /* Get the current time of day: */
675                 GET_CURRENT_TOD(tv);
676                 TIMEVAL_TO_TIMESPEC(&tv, &ts);
677
678                 _queue_signals = 1;
679                 pthread = TAILQ_FIRST(&_waitingq);
680                 _queue_signals = 0;
681
682                 if ((pthread == NULL) || (pthread->wakeup_time.tv_sec == -1)) {
683                         /*
684                          * Either there are no threads in the waiting queue,
685                          * or there are no threads that can timeout.
686                          */
687                         timeout_ms = INFTIM;
688                 }
689                 else if (pthread->wakeup_time.tv_sec - ts.tv_sec > 60000)
690                         /* Limit maximum timeout to prevent rollover. */
691                         timeout_ms = 60000;
692                 else {
693                         /*
694                          * Calculate the time left for the next thread to
695                          * timeout:
696                          */
697                         timeout_ms = ((pthread->wakeup_time.tv_sec - ts.tv_sec) *
698                             1000) + ((pthread->wakeup_time.tv_nsec - ts.tv_nsec) /
699                             1000000);
700                         /*
701                          * Don't allow negative timeouts:
702                          */
703                         if (timeout_ms < 0)
704                                 timeout_ms = 0;
705                 }
706         }
707                         
708         /* Protect the scheduling queues: */
709         _queue_signals = 1;
710
711         /*
712          * Check to see if the signal queue needs to be walked to look
713          * for threads awoken by a signal while in the scheduler.
714          */
715         if (_sigq_check_reqd != 0) {
716                 /* Reset flag before handling queued signals: */
717                 _sigq_check_reqd = 0;
718
719                 dequeue_signals();
720         }
721
722         /*
723          * Check for a thread that became runnable due to a signal:
724          */
725         if (PTHREAD_PRIOQ_FIRST() != NULL) {
726                 /*
727                  * Since there is at least one runnable thread,
728                  * disable the wait.
729                  */
730                 timeout_ms = 0;
731         }
732
733         /*
734          * Form the poll table:
735          */
736         nfds = 0;
737         if (timeout_ms != 0) {
738                 /* Add the kernel pipe to the poll table: */
739                 _thread_pfd_table[nfds].fd = _thread_kern_pipe[0];
740                 _thread_pfd_table[nfds].events = POLLRDNORM;
741                 _thread_pfd_table[nfds].revents = 0;
742                 nfds++;
743                 kern_pipe_added = 1;
744         }
745
746         PTHREAD_WAITQ_SETACTIVE();
747         TAILQ_FOREACH(pthread, &_workq, qe) {
748                 switch (pthread->state) {
749                 case PS_SPINBLOCK:
750                         /*
751                          * If the lock is available, let the thread run.
752                          */
753                         if (pthread->data.spinlock->access_lock == 0) {
754                                 PTHREAD_WAITQ_CLEARACTIVE();
755                                 PTHREAD_WORKQ_REMOVE(pthread);
756                                 PTHREAD_NEW_STATE(pthread,PS_RUNNING);
757                                 PTHREAD_WAITQ_SETACTIVE();
758                                 /* One less thread in a spinblock state: */
759                                 _spinblock_count--;
760                                 /*
761                                  * Since there is at least one runnable
762                                  * thread, disable the wait.
763                                  */
764                                 timeout_ms = 0;
765                         }
766                         break;
767
768                 /* File descriptor read wait: */
769                 case PS_FDR_WAIT:
770                         /* Limit number of polled files to table size: */
771                         if (nfds < _thread_dtablesize) {
772                                 _thread_pfd_table[nfds].events = POLLRDNORM;
773                                 _thread_pfd_table[nfds].fd = pthread->data.fd.fd;
774                                 nfds++;
775                         }
776                         break;
777
778                 /* File descriptor write wait: */
779                 case PS_FDW_WAIT:
780                         /* Limit number of polled files to table size: */
781                         if (nfds < _thread_dtablesize) {
782                                 _thread_pfd_table[nfds].events = POLLWRNORM;
783                                 _thread_pfd_table[nfds].fd = pthread->data.fd.fd;
784                                 nfds++;
785                         }
786                         break;
787
788                 /* File descriptor poll or select wait: */
789                 case PS_POLL_WAIT:
790                 case PS_SELECT_WAIT:
791                         /* Limit number of polled files to table size: */
792                         if (pthread->data.poll_data->nfds + nfds <
793                             _thread_dtablesize) {
794                                 for (i = 0; i < pthread->data.poll_data->nfds; i++) {
795                                         _thread_pfd_table[nfds + i].fd =
796                                             pthread->data.poll_data->fds[i].fd;
797                                         _thread_pfd_table[nfds + i].events =
798                                             pthread->data.poll_data->fds[i].events;
799                                 }
800                                 nfds += pthread->data.poll_data->nfds;
801                         }
802                         break;
803
804                 /* Other states do not depend on file I/O. */
805                 default:
806                         break;
807                 }
808         }
809         PTHREAD_WAITQ_CLEARACTIVE();
810
811         /*
812          * Wait for a file descriptor to be ready for read, write, or
813          * an exception, or a timeout to occur:
814          */
815         count = __sys_poll(_thread_pfd_table, nfds, timeout_ms);
816
817         if (kern_pipe_added != 0)
818                 /*
819                  * Remove the pthread kernel pipe file descriptor
820                  * from the pollfd table:
821                  */
822                 nfds = 1;
823         else
824                 nfds = 0;
825
826         /*
827          * Check if it is possible that there are bytes in the kernel
828          * read pipe waiting to be read:
829          */
830         if (count < 0 || ((kern_pipe_added != 0) &&
831             (_thread_pfd_table[0].revents & POLLRDNORM))) {
832                 /*
833                  * If the kernel read pipe was included in the
834                  * count:
835                  */
836                 if (count > 0) {
837                         /* Decrement the count of file descriptors: */
838                         count--;
839                 }
840
841                 if (_sigq_check_reqd != 0) {
842                         /* Reset flag before handling signals: */
843                         _sigq_check_reqd = 0;
844
845                         dequeue_signals();
846                 }
847         }
848
849         /*
850          * Check if any file descriptors are ready:
851          */
852         if (count > 0) {
853                 /*
854                  * Enter a loop to look for threads waiting on file
855                  * descriptors that are flagged as available by the
856                  * _poll syscall:
857                  */
858                 PTHREAD_WAITQ_SETACTIVE();
859                 TAILQ_FOREACH(pthread, &_workq, qe) {
860                         switch (pthread->state) {
861                         case PS_SPINBLOCK:
862                                 /*
863                                  * If the lock is available, let the thread run.
864                                  */
865                                 if (pthread->data.spinlock->access_lock == 0) {
866                                         PTHREAD_WAITQ_CLEARACTIVE();
867                                         PTHREAD_WORKQ_REMOVE(pthread);
868                                         PTHREAD_NEW_STATE(pthread,PS_RUNNING);
869                                         PTHREAD_WAITQ_SETACTIVE();
870
871                                         /*
872                                          * One less thread in a spinblock state:
873                                          */
874                                         _spinblock_count--;
875                                 }
876                                 break;
877
878                         /* File descriptor read wait: */
879                         case PS_FDR_WAIT:
880                                 if ((nfds < _thread_dtablesize) &&
881                                     ((_thread_pfd_table[nfds].revents
882                                     & (POLLRDNORM | POLLHUP
883                                       | POLLERR | POLLNVAL)) != 0)) {
884                                         PTHREAD_WAITQ_CLEARACTIVE();
885                                         PTHREAD_WORKQ_REMOVE(pthread);
886                                         PTHREAD_NEW_STATE(pthread,PS_RUNNING);
887                                         PTHREAD_WAITQ_SETACTIVE();
888                                 }
889                                 nfds++;
890                                 break;
891
892                         /* File descriptor write wait: */
893                         case PS_FDW_WAIT:
894                                 if ((nfds < _thread_dtablesize) &&
895                                     ((_thread_pfd_table[nfds].revents
896                                     & (POLLWRNORM | POLLHUP
897                                       | POLLERR | POLLNVAL)) != 0)) {
898                                         PTHREAD_WAITQ_CLEARACTIVE();
899                                         PTHREAD_WORKQ_REMOVE(pthread);
900                                         PTHREAD_NEW_STATE(pthread,PS_RUNNING);
901                                         PTHREAD_WAITQ_SETACTIVE();
902                                 }
903                                 nfds++;
904                                 break;
905
906                         /* File descriptor poll or select wait: */
907                         case PS_POLL_WAIT:
908                         case PS_SELECT_WAIT:
909                                 if (pthread->data.poll_data->nfds + nfds <
910                                     _thread_dtablesize) {
911                                         /*
912                                          * Enter a loop looking for I/O
913                                          * readiness:
914                                          */
915                                         found = 0;
916                                         for (i = 0; i < pthread->data.poll_data->nfds; i++) {
917                                                 if (_thread_pfd_table[nfds + i].revents != 0) {
918                                                         pthread->data.poll_data->fds[i].revents =
919                                                             _thread_pfd_table[nfds + i].revents;
920                                                         found++;
921                                                 }
922                                         }
923
924                                         /* Increment before destroying: */
925                                         nfds += pthread->data.poll_data->nfds;
926
927                                         if (found != 0) {
928                                                 pthread->data.poll_data->nfds = found;
929                                                 PTHREAD_WAITQ_CLEARACTIVE();
930                                                 PTHREAD_WORKQ_REMOVE(pthread);
931                                                 PTHREAD_NEW_STATE(pthread,PS_RUNNING);
932                                                 PTHREAD_WAITQ_SETACTIVE();
933                                         }
934                                 }
935                                 else
936                                         nfds += pthread->data.poll_data->nfds;
937                                 break;
938
939                         /* Other states do not depend on file I/O. */
940                         default:
941                                 break;
942                         }
943                 }
944                 PTHREAD_WAITQ_CLEARACTIVE();
945         }
946         else if (_spinblock_count != 0) {
947                 /*
948                  * Enter a loop to look for threads waiting on a spinlock
949                  * that is now available.
950                  */
951                 PTHREAD_WAITQ_SETACTIVE();
952                 TAILQ_FOREACH(pthread, &_workq, qe) {
953                         if (pthread->state == PS_SPINBLOCK) {
954                                 /*
955                                  * If the lock is available, let the thread run.
956                                  */
957                                 if (pthread->data.spinlock->access_lock == 0) {
958                                         PTHREAD_WAITQ_CLEARACTIVE();
959                                         PTHREAD_WORKQ_REMOVE(pthread);
960                                         PTHREAD_NEW_STATE(pthread,PS_RUNNING);
961                                         PTHREAD_WAITQ_SETACTIVE();
962
963                                         /*
964                                          * One less thread in a spinblock state:
965                                          */
966                                         _spinblock_count--;
967                                 }
968                         }
969                 }
970                 PTHREAD_WAITQ_CLEARACTIVE();
971         }
972
973         /* Unprotect the scheduling queues: */
974         _queue_signals = 0;
975
976         while (_sigq_check_reqd != 0) {
977                 /* Handle queued signals: */
978                 _sigq_check_reqd = 0;
979
980                 /* Protect the scheduling queues: */
981                 _queue_signals = 1;
982
983                 dequeue_signals();
984
985                 /* Unprotect the scheduling queues: */
986                 _queue_signals = 0;
987         }
988 }
989
990 void
991 _thread_kern_set_timeout(const struct timespec * timeout)
992 {
993         struct pthread  *curthread = _get_curthread();
994         struct timespec current_time;
995         struct timeval  tv;
996
997         /* Reset the timeout flag for the running thread: */
998         curthread->timeout = 0;
999
1000         /* Check if the thread is to wait forever: */
1001         if (timeout == NULL) {
1002                 /*
1003                  * Set the wakeup time to something that can be recognised as
1004                  * different to an actual time of day:
1005                  */
1006                 curthread->wakeup_time.tv_sec = -1;
1007                 curthread->wakeup_time.tv_nsec = -1;
1008         }
1009         /* Check if no waiting is required: */
1010         else if (timeout->tv_sec == 0 && timeout->tv_nsec == 0) {
1011                 /* Set the wake up time to 'immediately': */
1012                 curthread->wakeup_time.tv_sec = 0;
1013                 curthread->wakeup_time.tv_nsec = 0;
1014         } else {
1015                 /* Get the current time: */
1016                 GET_CURRENT_TOD(tv);
1017                 TIMEVAL_TO_TIMESPEC(&tv, &current_time);
1018
1019                 /* Calculate the time for the current thread to wake up: */
1020                 curthread->wakeup_time.tv_sec = current_time.tv_sec + timeout->tv_sec;
1021                 curthread->wakeup_time.tv_nsec = current_time.tv_nsec + timeout->tv_nsec;
1022
1023                 /* Check if the nanosecond field needs to wrap: */
1024                 if (curthread->wakeup_time.tv_nsec >= 1000000000) {
1025                         /* Wrap the nanosecond field: */
1026                         curthread->wakeup_time.tv_sec += 1;
1027                         curthread->wakeup_time.tv_nsec -= 1000000000;
1028                 }
1029         }
1030 }
1031
1032 void
1033 _thread_kern_sig_defer(void)
1034 {
1035         struct pthread  *curthread = _get_curthread();
1036
1037         /* Allow signal deferral to be recursive. */
1038         curthread->sig_defer_count++;
1039 }
1040
1041 void
1042 _thread_kern_sig_undefer(void)
1043 {
1044         struct pthread  *curthread = _get_curthread();
1045
1046         /*
1047          * Perform checks to yield only if we are about to undefer
1048          * signals.
1049          */
1050         if (curthread->sig_defer_count > 1) {
1051                 /* Decrement the signal deferral count. */
1052                 curthread->sig_defer_count--;
1053         }
1054         else if (curthread->sig_defer_count == 1) {
1055                 /* Reenable signals: */
1056                 curthread->sig_defer_count = 0;
1057
1058                 /*
1059                  * Check if there are queued signals:
1060                  */
1061                 if (_sigq_check_reqd != 0)
1062                         _thread_kern_sched(NULL);
1063
1064                 /*
1065                  * Check for asynchronous cancellation before delivering any
1066                  * pending signals:
1067                  */
1068                 if (((curthread->cancelflags & PTHREAD_AT_CANCEL_POINT) == 0) &&
1069                     ((curthread->cancelflags & PTHREAD_CANCEL_ASYNCHRONOUS) != 0))
1070                         pthread_testcancel();
1071
1072                 /*
1073                  * If there are pending signals or this thread has
1074                  * to yield the CPU, call the kernel scheduler:
1075                  *
1076                  * XXX - Come back and revisit the pending signal problem
1077                  */
1078                 if ((curthread->yield_on_sig_undefer != 0) ||
1079                     SIGNOTEMPTY(curthread->sigpend)) {
1080                         curthread->yield_on_sig_undefer = 0;
1081                         _thread_kern_sched(NULL);
1082                 }
1083         }
1084 }
1085
1086 static void
1087 dequeue_signals(void)
1088 {
1089         char    bufr[128];
1090         int     num;
1091
1092         /*
1093          * Enter a loop to clear the pthread kernel pipe:
1094          */
1095         while (((num = __sys_extpread(_thread_kern_pipe[0], bufr,
1096             sizeof(bufr), O_FNONBLOCKING, -1)) > 0) ||
1097             (num == -1 && errno == EINTR)) {
1098                 ;
1099         }
1100         if ((num < 0) && (errno != EAGAIN)) {
1101                 /*
1102                  * The only error we should expect is if there is
1103                  * no data to read.
1104                  */
1105                 PANIC("Unable to read from thread kernel pipe");
1106         }
1107         /* Handle any pending signals: */
1108         _thread_sig_handle_pending();
1109 }
1110
1111 static inline void
1112 thread_run_switch_hook(pthread_t thread_out, pthread_t thread_in)
1113 {
1114         pthread_t tid_out = thread_out;
1115         pthread_t tid_in = thread_in;
1116
1117         if ((tid_out != NULL) &&
1118             (tid_out->flags & PTHREAD_FLAGS_PRIVATE) != 0)
1119                 tid_out = NULL;
1120         if ((tid_in != NULL) &&
1121             (tid_in->flags & PTHREAD_FLAGS_PRIVATE) != 0)
1122                 tid_in = NULL;
1123
1124         if ((_sched_switch_hook != NULL) && (tid_out != tid_in)) {
1125                 /* Run the scheduler switch hook: */
1126                 _sched_switch_hook(tid_out, tid_in);
1127         }
1128 }
1129
1130 struct pthread *
1131 _get_curthread(void)
1132 {
1133         if (_thread_initial == NULL)
1134                 _thread_init();
1135
1136         return (_thread_run);
1137 }
1138
1139 void
1140 _set_curthread(struct pthread *newthread)
1141 {
1142         _thread_run = newthread;
1143         tls_set_tcb(newthread->tcb);
1144 }