Merge from vendor branch LIBARCHIVE:
[dragonfly.git] / lib / libc_r / uthread / uthread_kern.c
1 /*
2  * Copyright (c) 1995-1998 John Birrell <jb@cimlogic.com.au>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *      This product includes software developed by John Birrell.
16  * 4. Neither the name of the author nor the names of any co-contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY JOHN BIRRELL AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  * $FreeBSD: src/lib/libc_r/uthread/uthread_kern.c,v 1.28.2.13 2002/10/22 14:44:03 fjoe Exp $
33  * $DragonFly: src/lib/libc_r/uthread/uthread_kern.c,v 1.6 2007/01/08 21:41:53 dillon Exp $
34  *
35  */
36 #include <errno.h>
37 #include <poll.h>
38 #include <stdlib.h>
39 #include <stdarg.h>
40 #include <string.h>
41 #include <unistd.h>
42 #include <setjmp.h>
43 #include <sys/param.h>
44 #include <sys/types.h>
45 #include <sys/signalvar.h>
46 #include <sys/stat.h>
47 #include <sys/time.h>
48 #include <sys/socket.h>
49 #include <sys/uio.h>
50 #include <sys/syscall.h>
51 #include <fcntl.h>
52 #include <pthread.h>
53 #include "pthread_private.h"
54
55 /* #define DEBUG_THREAD_KERN */
56 #ifdef DEBUG_THREAD_KERN
57 #define DBG_MSG         stdout_debug
58 #else
59 #define DBG_MSG(x...)
60 #endif
61
62 /* Static function prototype definitions: */
63 static void
64 thread_kern_poll(int wait_reqd);
65
66 static void
67 dequeue_signals(void);
68
69 static inline void
70 thread_run_switch_hook(pthread_t thread_out, pthread_t thread_in);
71
72 /* Static variables: */
73 static int      last_tick = 0;
74 static int      called_from_handler = 0;
75
76 /*
77  * This is called when a signal handler finishes and wants to
78  * return to a previous frame.
79  */
80 void
81 _thread_kern_sched_frame(struct pthread_signal_frame *psf)
82 {
83         struct pthread  *curthread = _get_curthread();
84
85         /*
86          * Flag the pthread kernel as executing scheduler code
87          * to avoid a signal from interrupting this execution and
88          * corrupting the (soon-to-be) current frame.
89          */
90         _thread_kern_in_sched = 1;
91
92         /* Restore the signal frame: */
93         _thread_sigframe_restore(curthread, psf);
94
95         /* The signal mask was restored; check for any pending signals: */
96         curthread->check_pending = 1;
97
98         /* Switch to the thread scheduler: */
99         ___longjmp(_thread_kern_sched_jb, 1);
100 }
101
102
103 void
104 _thread_kern_sched(ucontext_t *ucp)
105 {
106         struct pthread  *curthread = _get_curthread();
107
108         /*
109          * Flag the pthread kernel as executing scheduler code
110          * to avoid a scheduler signal from interrupting this
111          * execution and calling the scheduler again.
112          */
113         _thread_kern_in_sched = 1;
114
115         /* Check if this function was called from the signal handler: */
116         if (ucp != NULL) {
117                 /* XXX - Save FP registers? */
118                 FP_SAVE_UC(ucp);
119                 called_from_handler = 1;
120                 DBG_MSG("Entering scheduler due to signal\n");
121         }
122
123         /* Save the state of the current thread: */
124         if (_setjmp(curthread->ctx.jb) != 0) {
125                 DBG_MSG("Returned from ___longjmp, thread %p\n",
126                     curthread);
127                 /*
128                  * This point is reached when a longjmp() is called
129                  * to restore the state of a thread.
130                  *
131                  * This is the normal way out of the scheduler.
132                  */
133                 _thread_kern_in_sched = 0;
134
135                 if (curthread->sig_defer_count == 0) {
136                         if (((curthread->cancelflags &
137                             PTHREAD_AT_CANCEL_POINT) == 0) &&
138                             ((curthread->cancelflags &
139                             PTHREAD_CANCEL_ASYNCHRONOUS) != 0))
140                                 /*
141                                  * Cancellations override signals.
142                                  *
143                                  * Stick a cancellation point at the
144                                  * start of each async-cancellable
145                                  * thread's resumption.
146                                  *
147                                  * We allow threads woken at cancel
148                                  * points to do their own checks.
149                                  */
150                                 pthread_testcancel();
151                 }
152
153                 if (_sched_switch_hook != NULL) {
154                         /* Run the installed switch hook: */
155                         thread_run_switch_hook(_last_user_thread, curthread);
156                 }
157                 if (ucp == NULL)
158                         return;
159                 else {
160                         /* XXX - Restore FP registers? */
161                         FP_RESTORE_UC(ucp);
162
163                         /*
164                          * Set the process signal mask in the context; it
165                          * could have changed by the handler.
166                          */
167                         ucp->uc_sigmask = _process_sigmask;
168
169                         /* Resume the interrupted thread: */
170                         sigreturn(ucp);
171                 }
172         }
173         /* Switch to the thread scheduler: */
174         ___longjmp(_thread_kern_sched_jb, 1);
175 }
176
177 void
178 _thread_kern_sched_sig(void)
179 {
180         struct pthread  *curthread = _get_curthread();
181
182         curthread->check_pending = 1;
183         _thread_kern_sched(NULL);
184 }
185
186
187 void
188 _thread_kern_scheduler(void)
189 {
190         struct timespec ts;
191         struct timeval  tv;
192         struct pthread  *curthread = _get_curthread();
193         pthread_t       pthread, pthread_h;
194         unsigned int    current_tick;
195         int             add_to_prioq;
196
197         /* If the currently running thread is a user thread, save it: */
198         if ((curthread->flags & PTHREAD_FLAGS_PRIVATE) == 0)
199                 _last_user_thread = curthread;
200
201         if (called_from_handler != 0) {
202                 called_from_handler = 0;
203
204                 /*
205                  * We were called from a signal handler; restore the process
206                  * signal mask.
207                  */
208                 if (__sys_sigprocmask(SIG_SETMASK,
209                     &_process_sigmask, NULL) != 0)
210                         PANIC("Unable to restore process mask after signal");
211         }
212
213         /*
214          * Enter a scheduling loop that finds the next thread that is
215          * ready to run. This loop completes when there are no more threads
216          * in the global list or when a thread has its state restored by
217          * either a sigreturn (if the state was saved as a sigcontext) or a
218          * longjmp (if the state was saved by a setjmp).
219          */
220         while (!(TAILQ_EMPTY(&_thread_list))) {
221                 /* Get the current time of day: */
222                 GET_CURRENT_TOD(tv);
223                 TIMEVAL_TO_TIMESPEC(&tv, &ts);
224                 current_tick = _sched_ticks;
225
226                 /*
227                  * Protect the scheduling queues from access by the signal
228                  * handler.
229                  */
230                 _queue_signals = 1;
231                 add_to_prioq = 0;
232
233                 if (curthread != &_thread_kern_thread) {
234                         /*
235                          * This thread no longer needs to yield the CPU.
236                          */
237                         curthread->yield_on_sig_undefer = 0;
238         
239                         if (curthread->state != PS_RUNNING) {
240                                 /*
241                                  * Save the current time as the time that the
242                                  * thread became inactive:
243                                  */
244                                 curthread->last_inactive = (long)current_tick;
245                                 if (curthread->last_inactive <
246                                     curthread->last_active) {
247                                         /* Account for a rollover: */
248                                         curthread->last_inactive =+
249                                             UINT_MAX + 1;
250                                 }
251                         }
252
253                         /*
254                          * Place the currently running thread into the
255                          * appropriate queue(s).
256                          */
257                         switch (curthread->state) {
258                         case PS_DEAD:
259                         case PS_STATE_MAX: /* to silence -Wall */
260                         case PS_SUSPENDED:
261                                 /*
262                                  * Dead and suspended threads are not placed
263                                  * in any queue:
264                                  */
265                                 break;
266
267                         case PS_RUNNING:
268                                 /*
269                                  * Runnable threads can't be placed in the
270                                  * priority queue until after waiting threads
271                                  * are polled (to preserve round-robin
272                                  * scheduling).
273                                  */
274                                 add_to_prioq = 1;
275                                 break;
276
277                         /*
278                          * States which do not depend on file descriptor I/O
279                          * operations or timeouts:
280                          */
281                         case PS_DEADLOCK:
282                         case PS_FDLR_WAIT:
283                         case PS_FDLW_WAIT:
284                         case PS_JOIN:
285                         case PS_MUTEX_WAIT:
286                         case PS_SIGSUSPEND:
287                         case PS_SIGTHREAD:
288                         case PS_SIGWAIT:
289                         case PS_WAIT_WAIT:
290                                 /* No timeouts for these states: */
291                                 curthread->wakeup_time.tv_sec = -1;
292                                 curthread->wakeup_time.tv_nsec = -1;
293
294                                 /* Restart the time slice: */
295                                 curthread->slice_usec = -1;
296
297                                 /* Insert into the waiting queue: */
298                                 PTHREAD_WAITQ_INSERT(curthread);
299                                 break;
300
301                         /* States which can timeout: */
302                         case PS_COND_WAIT:
303                         case PS_SLEEP_WAIT:
304                                 /* Restart the time slice: */
305                                 curthread->slice_usec = -1;
306
307                                 /* Insert into the waiting queue: */
308                                 PTHREAD_WAITQ_INSERT(curthread);
309                                 break;
310         
311                         /* States that require periodic work: */
312                         case PS_SPINBLOCK:
313                                 /* No timeouts for this state: */
314                                 curthread->wakeup_time.tv_sec = -1;
315                                 curthread->wakeup_time.tv_nsec = -1;
316
317                                 /* Increment spinblock count: */
318                                 _spinblock_count++;
319
320                                 /* FALLTHROUGH */
321                         case PS_FDR_WAIT:
322                         case PS_FDW_WAIT:
323                         case PS_POLL_WAIT:
324                         case PS_SELECT_WAIT:
325                                 /* Restart the time slice: */
326                                 curthread->slice_usec = -1;
327         
328                                 /* Insert into the waiting queue: */
329                                 PTHREAD_WAITQ_INSERT(curthread);
330         
331                                 /* Insert into the work queue: */
332                                 PTHREAD_WORKQ_INSERT(curthread);
333                                 break;
334                         }
335
336                         /*
337                          * Are there pending signals for this thread?
338                          *
339                          * This check has to be performed after the thread
340                          * has been placed in the queue(s) appropriate for
341                          * its state.  The process of adding pending signals
342                          * can change a threads state, which in turn will
343                          * attempt to add or remove the thread from any
344                          * scheduling queue to which it belongs.
345                          */
346                         if (curthread->check_pending != 0) {
347                                 curthread->check_pending = 0;
348                                 _thread_sig_check_pending(curthread);
349                         }
350                 }
351
352                 /*
353                  * Avoid polling file descriptors if there are none
354                  * waiting:
355                  */
356                 if (TAILQ_EMPTY(&_workq) != 0) {
357                 }
358                 /*
359                  * Poll file descriptors only if a new scheduling signal
360                  * has occurred or if we have no more runnable threads.
361                  */
362                 else if (((current_tick = _sched_ticks) != last_tick) ||
363                     ((curthread->state != PS_RUNNING) &&
364                     (PTHREAD_PRIOQ_FIRST() == NULL))) {
365                         /* Unprotect the scheduling queues: */
366                         _queue_signals = 0;
367
368                         /*
369                          * Poll file descriptors to update the state of threads
370                          * waiting on file I/O where data may be available:
371                          */
372                         thread_kern_poll(0);
373
374                         /* Protect the scheduling queues: */
375                         _queue_signals = 1;
376                 }
377                 last_tick = current_tick;
378
379                 /*
380                  * Wake up threads that have timedout.  This has to be
381                  * done after polling in case a thread does a poll or
382                  * select with zero time.
383                  */
384                 PTHREAD_WAITQ_SETACTIVE();
385                 while (((pthread = TAILQ_FIRST(&_waitingq)) != NULL) &&
386                     (pthread->wakeup_time.tv_sec != -1) &&
387                     (((pthread->wakeup_time.tv_sec == 0) &&
388                     (pthread->wakeup_time.tv_nsec == 0)) ||
389                     (pthread->wakeup_time.tv_sec < ts.tv_sec) ||
390                     ((pthread->wakeup_time.tv_sec == ts.tv_sec) &&
391                     (pthread->wakeup_time.tv_nsec <= ts.tv_nsec)))) {
392                         switch (pthread->state) {
393                         case PS_POLL_WAIT:
394                         case PS_SELECT_WAIT:
395                                 /* Return zero file descriptors ready: */
396                                 pthread->data.poll_data->nfds = 0;
397                                 /* FALLTHROUGH */
398                         default:
399                                 /*
400                                  * Remove this thread from the waiting queue
401                                  * (and work queue if necessary) and place it
402                                  * in the ready queue.
403                                  */
404                                 PTHREAD_WAITQ_CLEARACTIVE();
405                                 if (pthread->flags & PTHREAD_FLAGS_IN_WORKQ)
406                                         PTHREAD_WORKQ_REMOVE(pthread);
407                                 PTHREAD_NEW_STATE(pthread, PS_RUNNING);
408                                 PTHREAD_WAITQ_SETACTIVE();
409                                 break;
410                         }
411                         /*
412                          * Flag the timeout in the thread structure:
413                          */
414                         pthread->timeout = 1;
415                 }
416                 PTHREAD_WAITQ_CLEARACTIVE();
417
418                 /*
419                  * Check to see if the current thread needs to be added
420                  * to the priority queue:
421                  */
422                 if (add_to_prioq != 0) {
423                         /*
424                          * Save the current time as the time that the
425                          * thread became inactive:
426                          */
427                         current_tick = _sched_ticks;
428                         curthread->last_inactive = (long)current_tick;
429                         if (curthread->last_inactive <
430                             curthread->last_active) {
431                                 /* Account for a rollover: */
432                                 curthread->last_inactive =+ UINT_MAX + 1;
433                         }
434
435                         if ((curthread->slice_usec != -1) &&
436                            (curthread->attr.sched_policy != SCHED_FIFO)) {
437                                 /*
438                                  * Accumulate the number of microseconds for
439                                  * which the current thread has run:
440                                  */
441                                 curthread->slice_usec +=
442                                     (curthread->last_inactive -
443                                     curthread->last_active) *
444                                     (long)_clock_res_usec;
445                                 /* Check for time quantum exceeded: */
446                                 if (curthread->slice_usec > TIMESLICE_USEC)
447                                         curthread->slice_usec = -1;
448                         }
449
450                         if (curthread->slice_usec == -1) {
451                                 /*
452                                  * The thread exceeded its time
453                                  * quantum or it yielded the CPU;
454                                  * place it at the tail of the
455                                  * queue for its priority.
456                                  */
457                                 PTHREAD_PRIOQ_INSERT_TAIL(curthread);
458                         } else {
459                                 /*
460                                  * The thread hasn't exceeded its
461                                  * interval.  Place it at the head
462                                  * of the queue for its priority.
463                                  */
464                                 PTHREAD_PRIOQ_INSERT_HEAD(curthread);
465                         }
466                 }
467
468                 /*
469                  * Get the highest priority thread in the ready queue.
470                  */
471                 pthread_h = PTHREAD_PRIOQ_FIRST();
472
473                 /* Check if there are no threads ready to run: */
474                 if (pthread_h == NULL) {
475                         /*
476                          * Lock the pthread kernel by changing the pointer to
477                          * the running thread to point to the global kernel
478                          * thread structure:
479                          */
480                         _set_curthread(&_thread_kern_thread);
481                         curthread = &_thread_kern_thread;
482
483                         DBG_MSG("No runnable threads, using kernel thread %p\n",
484                             curthread);
485
486                         /* Unprotect the scheduling queues: */
487                         _queue_signals = 0;
488
489                         /*
490                          * There are no threads ready to run, so wait until
491                          * something happens that changes this condition:
492                          */
493                         thread_kern_poll(1);
494
495                         /*
496                          * This process' usage will likely be very small
497                          * while waiting in a poll.  Since the scheduling
498                          * clock is based on the profiling timer, it is
499                          * unlikely that the profiling timer will fire
500                          * and update the time of day.  To account for this,
501                          * get the time of day after polling with a timeout.
502                          */
503                         gettimeofday((struct timeval *) &_sched_tod, NULL);
504                         
505                         /* Check once more for a runnable thread: */
506                         _queue_signals = 1;
507                         pthread_h = PTHREAD_PRIOQ_FIRST();
508                         _queue_signals = 0;
509                 }
510
511                 if (pthread_h != NULL) {
512                         /* Remove the thread from the ready queue: */
513                         PTHREAD_PRIOQ_REMOVE(pthread_h);
514
515                         /* Unprotect the scheduling queues: */
516                         _queue_signals = 0;
517
518                         /*
519                          * Check for signals queued while the scheduling
520                          * queues were protected:
521                          */
522                         while (_sigq_check_reqd != 0) {
523                                 /* Clear before handling queued signals: */
524                                 _sigq_check_reqd = 0;
525
526                                 /* Protect the scheduling queues again: */
527                                 _queue_signals = 1;
528
529                                 dequeue_signals();
530
531                                 /*
532                                  * Check for a higher priority thread that
533                                  * became runnable due to signal handling.
534                                  */
535                                 if (((pthread = PTHREAD_PRIOQ_FIRST()) != NULL) &&
536                                     (pthread->active_priority > pthread_h->active_priority)) {
537                                         /* Remove the thread from the ready queue: */
538                                         PTHREAD_PRIOQ_REMOVE(pthread);
539
540                                         /*
541                                          * Insert the lower priority thread
542                                          * at the head of its priority list:
543                                          */
544                                         PTHREAD_PRIOQ_INSERT_HEAD(pthread_h);
545
546                                         /* There's a new thread in town: */
547                                         pthread_h = pthread;
548                                 }
549
550                                 /* Unprotect the scheduling queues: */
551                                 _queue_signals = 0;
552                         }
553
554                         /* Make the selected thread the current thread: */
555                         _set_curthread(pthread_h);
556                         curthread = pthread_h;
557
558                         /*
559                          * Save the current time as the time that the thread
560                          * became active:
561                          */
562                         current_tick = _sched_ticks;
563                         curthread->last_active = (long) current_tick;
564
565                         /*
566                          * Check if this thread is running for the first time
567                          * or running again after using its full time slice
568                          * allocation:
569                          */
570                         if (curthread->slice_usec == -1) {
571                                 /* Reset the accumulated time slice period: */
572                                 curthread->slice_usec = 0;
573                         }
574
575                         /*
576                          * If we had a context switch, run any
577                          * installed switch hooks.
578                          */
579                         if ((_sched_switch_hook != NULL) &&
580                             (_last_user_thread != curthread)) {
581                                 thread_run_switch_hook(_last_user_thread,
582                                     curthread);
583                         }
584                         /*
585                          * Continue the thread at its current frame:
586                          */
587 #if NOT_YET
588                         _setcontext(&curthread->ctx.uc);
589 #else
590                         ___longjmp(curthread->ctx.jb, 1);
591 #endif
592                         /* This point should not be reached. */
593                         PANIC("Thread has returned from sigreturn or longjmp");
594                 }
595         }
596
597         /* There are no more threads, so exit this process: */
598         exit(0);
599 }
600
601 void
602 _thread_kern_sched_state(enum pthread_state state, char *fname, int lineno)
603 {
604         struct pthread  *curthread = _get_curthread();
605
606         /*
607          * Flag the pthread kernel as executing scheduler code
608          * to avoid a scheduler signal from interrupting this
609          * execution and calling the scheduler again.
610          */
611         _thread_kern_in_sched = 1;
612
613         /*
614          * Prevent the signal handler from fiddling with this thread
615          * before its state is set and is placed into the proper queue.
616          */
617         _queue_signals = 1;
618
619         /* Change the state of the current thread: */
620         curthread->state = state;
621         curthread->fname = fname;
622         curthread->lineno = lineno;
623
624         /* Schedule the next thread that is ready: */
625         _thread_kern_sched(NULL);
626 }
627
628 void
629 _thread_kern_sched_state_unlock(enum pthread_state state,
630     spinlock_t *lock, char *fname, int lineno)
631 {
632         struct pthread  *curthread = _get_curthread();
633
634         /*
635          * Flag the pthread kernel as executing scheduler code
636          * to avoid a scheduler signal from interrupting this
637          * execution and calling the scheduler again.
638          */
639         _thread_kern_in_sched = 1;
640
641         /*
642          * Prevent the signal handler from fiddling with this thread
643          * before its state is set and it is placed into the proper
644          * queue(s).
645          */
646         _queue_signals = 1;
647
648         /* Change the state of the current thread: */
649         curthread->state = state;
650         curthread->fname = fname;
651         curthread->lineno = lineno;
652
653         _SPINUNLOCK(lock);
654
655         /* Schedule the next thread that is ready: */
656         _thread_kern_sched(NULL);
657 }
658
659 static void
660 thread_kern_poll(int wait_reqd)
661 {
662         int             count = 0;
663         int             i, found;
664         int             kern_pipe_added = 0;
665         int             nfds = 0;
666         int             timeout_ms = 0;
667         struct pthread  *pthread;
668         struct timespec ts;
669         struct timeval  tv;
670
671         /* Check if the caller wants to wait: */
672         if (wait_reqd == 0) {
673                 timeout_ms = 0;
674         }
675         else {
676                 /* Get the current time of day: */
677                 GET_CURRENT_TOD(tv);
678                 TIMEVAL_TO_TIMESPEC(&tv, &ts);
679
680                 _queue_signals = 1;
681                 pthread = TAILQ_FIRST(&_waitingq);
682                 _queue_signals = 0;
683
684                 if ((pthread == NULL) || (pthread->wakeup_time.tv_sec == -1)) {
685                         /*
686                          * Either there are no threads in the waiting queue,
687                          * or there are no threads that can timeout.
688                          */
689                         timeout_ms = INFTIM;
690                 }
691                 else if (pthread->wakeup_time.tv_sec - ts.tv_sec > 60000)
692                         /* Limit maximum timeout to prevent rollover. */
693                         timeout_ms = 60000;
694                 else {
695                         /*
696                          * Calculate the time left for the next thread to
697                          * timeout:
698                          */
699                         timeout_ms = ((pthread->wakeup_time.tv_sec - ts.tv_sec) *
700                             1000) + ((pthread->wakeup_time.tv_nsec - ts.tv_nsec) /
701                             1000000);
702                         /*
703                          * Don't allow negative timeouts:
704                          */
705                         if (timeout_ms < 0)
706                                 timeout_ms = 0;
707                 }
708         }
709                         
710         /* Protect the scheduling queues: */
711         _queue_signals = 1;
712
713         /*
714          * Check to see if the signal queue needs to be walked to look
715          * for threads awoken by a signal while in the scheduler.
716          */
717         if (_sigq_check_reqd != 0) {
718                 /* Reset flag before handling queued signals: */
719                 _sigq_check_reqd = 0;
720
721                 dequeue_signals();
722         }
723
724         /*
725          * Check for a thread that became runnable due to a signal:
726          */
727         if (PTHREAD_PRIOQ_FIRST() != NULL) {
728                 /*
729                  * Since there is at least one runnable thread,
730                  * disable the wait.
731                  */
732                 timeout_ms = 0;
733         }
734
735         /*
736          * Form the poll table:
737          */
738         nfds = 0;
739         if (timeout_ms != 0) {
740                 /* Add the kernel pipe to the poll table: */
741                 _thread_pfd_table[nfds].fd = _thread_kern_pipe[0];
742                 _thread_pfd_table[nfds].events = POLLRDNORM;
743                 _thread_pfd_table[nfds].revents = 0;
744                 nfds++;
745                 kern_pipe_added = 1;
746         }
747
748         PTHREAD_WAITQ_SETACTIVE();
749         TAILQ_FOREACH(pthread, &_workq, qe) {
750                 switch (pthread->state) {
751                 case PS_SPINBLOCK:
752                         /*
753                          * If the lock is available, let the thread run.
754                          */
755                         if (pthread->data.spinlock->access_lock == 0) {
756                                 PTHREAD_WAITQ_CLEARACTIVE();
757                                 PTHREAD_WORKQ_REMOVE(pthread);
758                                 PTHREAD_NEW_STATE(pthread,PS_RUNNING);
759                                 PTHREAD_WAITQ_SETACTIVE();
760                                 /* One less thread in a spinblock state: */
761                                 _spinblock_count--;
762                                 /*
763                                  * Since there is at least one runnable
764                                  * thread, disable the wait.
765                                  */
766                                 timeout_ms = 0;
767                         }
768                         break;
769
770                 /* File descriptor read wait: */
771                 case PS_FDR_WAIT:
772                         /* Limit number of polled files to table size: */
773                         if (nfds < _thread_dtablesize) {
774                                 _thread_pfd_table[nfds].events = POLLRDNORM;
775                                 _thread_pfd_table[nfds].fd = pthread->data.fd.fd;
776                                 nfds++;
777                         }
778                         break;
779
780                 /* File descriptor write wait: */
781                 case PS_FDW_WAIT:
782                         /* Limit number of polled files to table size: */
783                         if (nfds < _thread_dtablesize) {
784                                 _thread_pfd_table[nfds].events = POLLWRNORM;
785                                 _thread_pfd_table[nfds].fd = pthread->data.fd.fd;
786                                 nfds++;
787                         }
788                         break;
789
790                 /* File descriptor poll or select wait: */
791                 case PS_POLL_WAIT:
792                 case PS_SELECT_WAIT:
793                         /* Limit number of polled files to table size: */
794                         if (pthread->data.poll_data->nfds + nfds <
795                             _thread_dtablesize) {
796                                 for (i = 0; i < pthread->data.poll_data->nfds; i++) {
797                                         _thread_pfd_table[nfds + i].fd =
798                                             pthread->data.poll_data->fds[i].fd;
799                                         _thread_pfd_table[nfds + i].events =
800                                             pthread->data.poll_data->fds[i].events;
801                                 }
802                                 nfds += pthread->data.poll_data->nfds;
803                         }
804                         break;
805
806                 /* Other states do not depend on file I/O. */
807                 default:
808                         break;
809                 }
810         }
811         PTHREAD_WAITQ_CLEARACTIVE();
812
813         /*
814          * Wait for a file descriptor to be ready for read, write, or
815          * an exception, or a timeout to occur:
816          */
817         count = __sys_poll(_thread_pfd_table, nfds, timeout_ms);
818
819         if (kern_pipe_added != 0)
820                 /*
821                  * Remove the pthread kernel pipe file descriptor
822                  * from the pollfd table:
823                  */
824                 nfds = 1;
825         else
826                 nfds = 0;
827
828         /*
829          * Check if it is possible that there are bytes in the kernel
830          * read pipe waiting to be read:
831          */
832         if (count < 0 || ((kern_pipe_added != 0) &&
833             (_thread_pfd_table[0].revents & POLLRDNORM))) {
834                 /*
835                  * If the kernel read pipe was included in the
836                  * count:
837                  */
838                 if (count > 0) {
839                         /* Decrement the count of file descriptors: */
840                         count--;
841                 }
842
843                 if (_sigq_check_reqd != 0) {
844                         /* Reset flag before handling signals: */
845                         _sigq_check_reqd = 0;
846
847                         dequeue_signals();
848                 }
849         }
850
851         /*
852          * Check if any file descriptors are ready:
853          */
854         if (count > 0) {
855                 /*
856                  * Enter a loop to look for threads waiting on file
857                  * descriptors that are flagged as available by the
858                  * _poll syscall:
859                  */
860                 PTHREAD_WAITQ_SETACTIVE();
861                 TAILQ_FOREACH(pthread, &_workq, qe) {
862                         switch (pthread->state) {
863                         case PS_SPINBLOCK:
864                                 /*
865                                  * If the lock is available, let the thread run.
866                                  */
867                                 if (pthread->data.spinlock->access_lock == 0) {
868                                         PTHREAD_WAITQ_CLEARACTIVE();
869                                         PTHREAD_WORKQ_REMOVE(pthread);
870                                         PTHREAD_NEW_STATE(pthread,PS_RUNNING);
871                                         PTHREAD_WAITQ_SETACTIVE();
872
873                                         /*
874                                          * One less thread in a spinblock state:
875                                          */
876                                         _spinblock_count--;
877                                 }
878                                 break;
879
880                         /* File descriptor read wait: */
881                         case PS_FDR_WAIT:
882                                 if ((nfds < _thread_dtablesize) &&
883                                     ((_thread_pfd_table[nfds].revents
884                                     & (POLLRDNORM | POLLHUP
885                                       | POLLERR | POLLNVAL)) != 0)) {
886                                         PTHREAD_WAITQ_CLEARACTIVE();
887                                         PTHREAD_WORKQ_REMOVE(pthread);
888                                         PTHREAD_NEW_STATE(pthread,PS_RUNNING);
889                                         PTHREAD_WAITQ_SETACTIVE();
890                                 }
891                                 nfds++;
892                                 break;
893
894                         /* File descriptor write wait: */
895                         case PS_FDW_WAIT:
896                                 if ((nfds < _thread_dtablesize) &&
897                                     ((_thread_pfd_table[nfds].revents
898                                     & (POLLWRNORM | POLLHUP
899                                       | POLLERR | POLLNVAL)) != 0)) {
900                                         PTHREAD_WAITQ_CLEARACTIVE();
901                                         PTHREAD_WORKQ_REMOVE(pthread);
902                                         PTHREAD_NEW_STATE(pthread,PS_RUNNING);
903                                         PTHREAD_WAITQ_SETACTIVE();
904                                 }
905                                 nfds++;
906                                 break;
907
908                         /* File descriptor poll or select wait: */
909                         case PS_POLL_WAIT:
910                         case PS_SELECT_WAIT:
911                                 if (pthread->data.poll_data->nfds + nfds <
912                                     _thread_dtablesize) {
913                                         /*
914                                          * Enter a loop looking for I/O
915                                          * readiness:
916                                          */
917                                         found = 0;
918                                         for (i = 0; i < pthread->data.poll_data->nfds; i++) {
919                                                 if (_thread_pfd_table[nfds + i].revents != 0) {
920                                                         pthread->data.poll_data->fds[i].revents =
921                                                             _thread_pfd_table[nfds + i].revents;
922                                                         found++;
923                                                 }
924                                         }
925
926                                         /* Increment before destroying: */
927                                         nfds += pthread->data.poll_data->nfds;
928
929                                         if (found != 0) {
930                                                 pthread->data.poll_data->nfds = found;
931                                                 PTHREAD_WAITQ_CLEARACTIVE();
932                                                 PTHREAD_WORKQ_REMOVE(pthread);
933                                                 PTHREAD_NEW_STATE(pthread,PS_RUNNING);
934                                                 PTHREAD_WAITQ_SETACTIVE();
935                                         }
936                                 }
937                                 else
938                                         nfds += pthread->data.poll_data->nfds;
939                                 break;
940
941                         /* Other states do not depend on file I/O. */
942                         default:
943                                 break;
944                         }
945                 }
946                 PTHREAD_WAITQ_CLEARACTIVE();
947         }
948         else if (_spinblock_count != 0) {
949                 /*
950                  * Enter a loop to look for threads waiting on a spinlock
951                  * that is now available.
952                  */
953                 PTHREAD_WAITQ_SETACTIVE();
954                 TAILQ_FOREACH(pthread, &_workq, qe) {
955                         if (pthread->state == PS_SPINBLOCK) {
956                                 /*
957                                  * If the lock is available, let the thread run.
958                                  */
959                                 if (pthread->data.spinlock->access_lock == 0) {
960                                         PTHREAD_WAITQ_CLEARACTIVE();
961                                         PTHREAD_WORKQ_REMOVE(pthread);
962                                         PTHREAD_NEW_STATE(pthread,PS_RUNNING);
963                                         PTHREAD_WAITQ_SETACTIVE();
964
965                                         /*
966                                          * One less thread in a spinblock state:
967                                          */
968                                         _spinblock_count--;
969                                 }
970                         }
971                 }
972                 PTHREAD_WAITQ_CLEARACTIVE();
973         }
974
975         /* Unprotect the scheduling queues: */
976         _queue_signals = 0;
977
978         while (_sigq_check_reqd != 0) {
979                 /* Handle queued signals: */
980                 _sigq_check_reqd = 0;
981
982                 /* Protect the scheduling queues: */
983                 _queue_signals = 1;
984
985                 dequeue_signals();
986
987                 /* Unprotect the scheduling queues: */
988                 _queue_signals = 0;
989         }
990 }
991
992 void
993 _thread_kern_set_timeout(const struct timespec * timeout)
994 {
995         struct pthread  *curthread = _get_curthread();
996         struct timespec current_time;
997         struct timeval  tv;
998
999         /* Reset the timeout flag for the running thread: */
1000         curthread->timeout = 0;
1001
1002         /* Check if the thread is to wait forever: */
1003         if (timeout == NULL) {
1004                 /*
1005                  * Set the wakeup time to something that can be recognised as
1006                  * different to an actual time of day:
1007                  */
1008                 curthread->wakeup_time.tv_sec = -1;
1009                 curthread->wakeup_time.tv_nsec = -1;
1010         }
1011         /* Check if no waiting is required: */
1012         else if (timeout->tv_sec == 0 && timeout->tv_nsec == 0) {
1013                 /* Set the wake up time to 'immediately': */
1014                 curthread->wakeup_time.tv_sec = 0;
1015                 curthread->wakeup_time.tv_nsec = 0;
1016         } else {
1017                 /* Get the current time: */
1018                 GET_CURRENT_TOD(tv);
1019                 TIMEVAL_TO_TIMESPEC(&tv, &current_time);
1020
1021                 /* Calculate the time for the current thread to wake up: */
1022                 curthread->wakeup_time.tv_sec = current_time.tv_sec + timeout->tv_sec;
1023                 curthread->wakeup_time.tv_nsec = current_time.tv_nsec + timeout->tv_nsec;
1024
1025                 /* Check if the nanosecond field needs to wrap: */
1026                 if (curthread->wakeup_time.tv_nsec >= 1000000000) {
1027                         /* Wrap the nanosecond field: */
1028                         curthread->wakeup_time.tv_sec += 1;
1029                         curthread->wakeup_time.tv_nsec -= 1000000000;
1030                 }
1031         }
1032 }
1033
1034 void
1035 _thread_kern_sig_defer(void)
1036 {
1037         struct pthread  *curthread = _get_curthread();
1038
1039         /* Allow signal deferral to be recursive. */
1040         curthread->sig_defer_count++;
1041 }
1042
1043 void
1044 _thread_kern_sig_undefer(void)
1045 {
1046         struct pthread  *curthread = _get_curthread();
1047
1048         /*
1049          * Perform checks to yield only if we are about to undefer
1050          * signals.
1051          */
1052         if (curthread->sig_defer_count > 1) {
1053                 /* Decrement the signal deferral count. */
1054                 curthread->sig_defer_count--;
1055         }
1056         else if (curthread->sig_defer_count == 1) {
1057                 /* Reenable signals: */
1058                 curthread->sig_defer_count = 0;
1059
1060                 /*
1061                  * Check if there are queued signals:
1062                  */
1063                 if (_sigq_check_reqd != 0)
1064                         _thread_kern_sched(NULL);
1065
1066                 /*
1067                  * Check for asynchronous cancellation before delivering any
1068                  * pending signals:
1069                  */
1070                 if (((curthread->cancelflags & PTHREAD_AT_CANCEL_POINT) == 0) &&
1071                     ((curthread->cancelflags & PTHREAD_CANCEL_ASYNCHRONOUS) != 0))
1072                         pthread_testcancel();
1073
1074                 /*
1075                  * If there are pending signals or this thread has
1076                  * to yield the CPU, call the kernel scheduler:
1077                  *
1078                  * XXX - Come back and revisit the pending signal problem
1079                  */
1080                 if ((curthread->yield_on_sig_undefer != 0) ||
1081                     SIGNOTEMPTY(curthread->sigpend)) {
1082                         curthread->yield_on_sig_undefer = 0;
1083                         _thread_kern_sched(NULL);
1084                 }
1085         }
1086 }
1087
1088 static void
1089 dequeue_signals(void)
1090 {
1091         char    bufr[128];
1092         int     num;
1093
1094         /*
1095          * Enter a loop to clear the pthread kernel pipe:
1096          */
1097         while (((num = __sys_extpread(_thread_kern_pipe[0], bufr,
1098             sizeof(bufr), O_FNONBLOCKING, -1)) > 0) ||
1099             (num == -1 && errno == EINTR)) {
1100                 ;
1101         }
1102         if ((num < 0) && (errno != EAGAIN)) {
1103                 /*
1104                  * The only error we should expect is if there is
1105                  * no data to read.
1106                  */
1107                 PANIC("Unable to read from thread kernel pipe");
1108         }
1109         /* Handle any pending signals: */
1110         _thread_sig_handle_pending();
1111 }
1112
1113 static inline void
1114 thread_run_switch_hook(pthread_t thread_out, pthread_t thread_in)
1115 {
1116         pthread_t tid_out = thread_out;
1117         pthread_t tid_in = thread_in;
1118
1119         if ((tid_out != NULL) &&
1120             (tid_out->flags & PTHREAD_FLAGS_PRIVATE) != 0)
1121                 tid_out = NULL;
1122         if ((tid_in != NULL) &&
1123             (tid_in->flags & PTHREAD_FLAGS_PRIVATE) != 0)
1124                 tid_in = NULL;
1125
1126         if ((_sched_switch_hook != NULL) && (tid_out != tid_in)) {
1127                 /* Run the scheduler switch hook: */
1128                 _sched_switch_hook(tid_out, tid_in);
1129         }
1130 }
1131
1132 struct pthread *
1133 _get_curthread(void)
1134 {
1135         if (_thread_initial == NULL)
1136                 _thread_init();
1137
1138         return (_thread_run);
1139 }
1140
1141 void
1142 _set_curthread(struct pthread *newthread)
1143 {
1144         _thread_run = newthread;
1145         tls_set_tcb(newthread->tcb);
1146 }