The syncer is not a process any more, deal with it as a thread.
[dragonfly.git] / sys / kern / lwkt_thread.c
... / ...
CommitLineData
1/*
2 * Copyright (c) 2003 Matthew Dillon <dillon@backplane.com>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * Each cpu in a system has its own self-contained light weight kernel
27 * thread scheduler, which means that generally speaking we only need
28 * to use a critical section to avoid problems. Foreign thread
29 * scheduling is queued via (async) IPIs.
30 *
31 * $DragonFly: src/sys/kern/lwkt_thread.c,v 1.17 2003/07/08 09:57:13 dillon Exp $
32 */
33
34#include <sys/param.h>
35#include <sys/systm.h>
36#include <sys/kernel.h>
37#include <sys/proc.h>
38#include <sys/rtprio.h>
39#include <sys/queue.h>
40#include <sys/thread2.h>
41#include <sys/sysctl.h>
42#include <sys/kthread.h>
43#include <machine/cpu.h>
44#include <sys/lock.h>
45
46#include <vm/vm.h>
47#include <vm/vm_param.h>
48#include <vm/vm_kern.h>
49#include <vm/vm_object.h>
50#include <vm/vm_page.h>
51#include <vm/vm_map.h>
52#include <vm/vm_pager.h>
53#include <vm/vm_extern.h>
54#include <vm/vm_zone.h>
55
56#include <machine/stdarg.h>
57#include <machine/ipl.h>
58#ifdef SMP
59#include <machine/smp.h>
60#endif
61
62static int untimely_switch = 0;
63SYSCTL_INT(_lwkt, OID_AUTO, untimely_switch, CTLFLAG_RW, &untimely_switch, 0, "");
64#ifdef INVARIANTS
65static int token_debug = 0;
66SYSCTL_INT(_lwkt, OID_AUTO, token_debug, CTLFLAG_RW, &token_debug, 0, "");
67#endif
68static quad_t switch_count = 0;
69SYSCTL_QUAD(_lwkt, OID_AUTO, switch_count, CTLFLAG_RW, &switch_count, 0, "");
70static quad_t preempt_hit = 0;
71SYSCTL_QUAD(_lwkt, OID_AUTO, preempt_hit, CTLFLAG_RW, &preempt_hit, 0, "");
72static quad_t preempt_miss = 0;
73SYSCTL_QUAD(_lwkt, OID_AUTO, preempt_miss, CTLFLAG_RW, &preempt_miss, 0, "");
74static quad_t preempt_weird = 0;
75SYSCTL_QUAD(_lwkt, OID_AUTO, preempt_weird, CTLFLAG_RW, &preempt_weird, 0, "");
76static quad_t ipiq_count = 0;
77SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_count, CTLFLAG_RW, &ipiq_count, 0, "");
78static quad_t ipiq_fifofull = 0;
79SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_fifofull, CTLFLAG_RW, &ipiq_fifofull, 0, "");
80
81/*
82 * These helper procedures handle the runq, they can only be called from
83 * within a critical section.
84 */
85static __inline
86void
87_lwkt_dequeue(thread_t td)
88{
89 if (td->td_flags & TDF_RUNQ) {
90 int nq = td->td_pri & TDPRI_MASK;
91 struct globaldata *gd = mycpu;
92
93 td->td_flags &= ~TDF_RUNQ;
94 TAILQ_REMOVE(&gd->gd_tdrunq[nq], td, td_threadq);
95 /* runqmask is passively cleaned up by the switcher */
96 }
97}
98
99static __inline
100void
101_lwkt_enqueue(thread_t td)
102{
103 if ((td->td_flags & TDF_RUNQ) == 0) {
104 int nq = td->td_pri & TDPRI_MASK;
105 struct globaldata *gd = mycpu;
106
107 td->td_flags |= TDF_RUNQ;
108 TAILQ_INSERT_TAIL(&gd->gd_tdrunq[nq], td, td_threadq);
109 gd->gd_runqmask |= 1 << nq;
110#if 0
111 /*
112 * YYY needs cli/sti protection? gd_reqpri set by interrupt
113 * when made pending. need better mechanism.
114 */
115 if (gd->gd_reqpri < (td->td_pri & TDPRI_MASK))
116 gd->gd_reqpri = (td->td_pri & TDPRI_MASK);
117#endif
118 }
119}
120
121static __inline
122int
123_lwkt_wantresched(thread_t ntd, thread_t cur)
124{
125 return((ntd->td_pri & TDPRI_MASK) > (cur->td_pri & TDPRI_MASK));
126}
127
128/*
129 * LWKTs operate on a per-cpu basis
130 *
131 * WARNING! Called from early boot, 'mycpu' may not work yet.
132 */
133void
134lwkt_gdinit(struct globaldata *gd)
135{
136 int i;
137
138 for (i = 0; i < sizeof(gd->gd_tdrunq)/sizeof(gd->gd_tdrunq[0]); ++i)
139 TAILQ_INIT(&gd->gd_tdrunq[i]);
140 gd->gd_runqmask = 0;
141 TAILQ_INIT(&gd->gd_tdallq);
142}
143
144/*
145 * Initialize a thread wait structure prior to first use.
146 *
147 * NOTE! called from low level boot code, we cannot do anything fancy!
148 */
149void
150lwkt_init_wait(lwkt_wait_t w)
151{
152 TAILQ_INIT(&w->wa_waitq);
153}
154
155/*
156 * Create a new thread. The thread must be associated with a process context
157 * or LWKT start address before it can be scheduled.
158 *
159 * If you intend to create a thread without a process context this function
160 * does everything except load the startup and switcher function.
161 */
162thread_t
163lwkt_alloc_thread(struct thread *td)
164{
165 void *stack;
166 int flags = 0;
167
168 if (td == NULL) {
169 crit_enter();
170 if (mycpu->gd_tdfreecount > 0) {
171 --mycpu->gd_tdfreecount;
172 td = TAILQ_FIRST(&mycpu->gd_tdfreeq);
173 KASSERT(td != NULL && (td->td_flags & TDF_EXITED),
174 ("lwkt_alloc_thread: unexpected NULL or corrupted td"));
175 TAILQ_REMOVE(&mycpu->gd_tdfreeq, td, td_threadq);
176 crit_exit();
177 stack = td->td_kstack;
178 flags = td->td_flags & (TDF_ALLOCATED_STACK|TDF_ALLOCATED_THREAD);
179 } else {
180 crit_exit();
181 td = zalloc(thread_zone);
182 td->td_kstack = NULL;
183 flags |= TDF_ALLOCATED_THREAD;
184 }
185 }
186 if ((stack = td->td_kstack) == NULL) {
187 stack = (void *)kmem_alloc(kernel_map, UPAGES * PAGE_SIZE);
188 flags |= TDF_ALLOCATED_STACK;
189 }
190 lwkt_init_thread(td, stack, flags, mycpu);
191 return(td);
192}
193
194/*
195 * Initialize a preexisting thread structure. This function is used by
196 * lwkt_alloc_thread() and also used to initialize the per-cpu idlethread.
197 *
198 * NOTE! called from low level boot code, we cannot do anything fancy!
199 */
200void
201lwkt_init_thread(thread_t td, void *stack, int flags, struct globaldata *gd)
202{
203 bzero(td, sizeof(struct thread));
204 td->td_kstack = stack;
205 td->td_flags |= flags;
206 td->td_gd = gd;
207 td->td_pri = TDPRI_CRIT;
208 td->td_cpu = gd->gd_cpuid; /* YYY don't need this if have td_gd */
209 pmap_init_thread(td);
210 crit_enter();
211 TAILQ_INSERT_TAIL(&mycpu->gd_tdallq, td, td_allq);
212 crit_exit();
213}
214
215void
216lwkt_set_comm(thread_t td, const char *ctl, ...)
217{
218 va_list va;
219
220 va_start(va, ctl);
221 vsnprintf(td->td_comm, sizeof(td->td_comm), ctl, va);
222 va_end(va);
223}
224
225void
226lwkt_hold(thread_t td)
227{
228 ++td->td_refs;
229}
230
231void
232lwkt_rele(thread_t td)
233{
234 KKASSERT(td->td_refs > 0);
235 --td->td_refs;
236}
237
238void
239lwkt_wait_free(thread_t td)
240{
241 while (td->td_refs)
242 tsleep(td, PWAIT, "tdreap", hz);
243}
244
245void
246lwkt_free_thread(thread_t td)
247{
248 struct globaldata *gd = mycpu;
249
250 KASSERT(td->td_flags & TDF_EXITED,
251 ("lwkt_free_thread: did not exit! %p", td));
252
253 crit_enter();
254 TAILQ_REMOVE(&gd->gd_tdallq, td, td_allq);
255 if (gd->gd_tdfreecount < CACHE_NTHREADS &&
256 (td->td_flags & TDF_ALLOCATED_THREAD)
257 ) {
258 ++gd->gd_tdfreecount;
259 TAILQ_INSERT_HEAD(&gd->gd_tdfreeq, td, td_threadq);
260 crit_exit();
261 } else {
262 crit_exit();
263 if (td->td_kstack && (td->td_flags & TDF_ALLOCATED_STACK)) {
264 kmem_free(kernel_map,
265 (vm_offset_t)td->td_kstack, UPAGES * PAGE_SIZE);
266 /* gd invalid */
267 td->td_kstack = NULL;
268 }
269 if (td->td_flags & TDF_ALLOCATED_THREAD)
270 zfree(thread_zone, td);
271 }
272}
273
274
275/*
276 * Switch to the next runnable lwkt. If no LWKTs are runnable then
277 * switch to the idlethread. Switching must occur within a critical
278 * section to avoid races with the scheduling queue.
279 *
280 * We always have full control over our cpu's run queue. Other cpus
281 * that wish to manipulate our queue must use the cpu_*msg() calls to
282 * talk to our cpu, so a critical section is all that is needed and
283 * the result is very, very fast thread switching.
284 *
285 * The LWKT scheduler uses a fixed priority model and round-robins at
286 * each priority level. User process scheduling is a totally
287 * different beast and LWKT priorities should not be confused with
288 * user process priorities.
289 *
290 * The MP lock may be out of sync with the thread's td_mpcount. lwkt_switch()
291 * cleans it up. Note that the td_switch() function cannot do anything that
292 * requires the MP lock since the MP lock will have already been setup for
293 * the target thread (not the current thread).
294 */
295
296void
297lwkt_switch(void)
298{
299 struct globaldata *gd;
300 thread_t td = curthread;
301 thread_t ntd;
302#ifdef SMP
303 int mpheld;
304#endif
305
306 if (mycpu->gd_intr_nesting_level &&
307 td->td_preempted == NULL && panicstr == NULL
308 ) {
309 panic("lwkt_switch: cannot switch from within an interrupt, yet\n");
310 }
311
312 crit_enter();
313 ++switch_count;
314
315#ifdef SMP
316 /*
317 * td_mpcount cannot be used to determine if we currently hold the
318 * MP lock because get_mplock() will increment it prior to attempting
319 * to get the lock, and switch out if it can't. Look at the actual lock.
320 */
321 mpheld = MP_LOCK_HELD();
322#endif
323 if ((ntd = td->td_preempted) != NULL) {
324 /*
325 * We had preempted another thread on this cpu, resume the preempted
326 * thread. This occurs transparently, whether the preempted thread
327 * was scheduled or not (it may have been preempted after descheduling
328 * itself).
329 *
330 * We have to setup the MP lock for the original thread after backing
331 * out the adjustment that was made to curthread when the original
332 * was preempted.
333 */
334 KKASSERT(ntd->td_flags & TDF_PREEMPT_LOCK);
335#ifdef SMP
336 if (ntd->td_mpcount && mpheld == 0) {
337 panic("MPLOCK NOT HELD ON RETURN: %p %p %d %d\n",
338 td, ntd, td->td_mpcount, ntd->td_mpcount);
339 }
340 if (ntd->td_mpcount) {
341 td->td_mpcount -= ntd->td_mpcount;
342 KKASSERT(td->td_mpcount >= 0);
343 }
344#endif
345 ntd->td_flags |= TDF_PREEMPT_DONE;
346 /* YYY release mp lock on switchback if original doesn't need it */
347 } else {
348 /*
349 * Priority queue / round-robin at each priority. Note that user
350 * processes run at a fixed, low priority and the user process
351 * scheduler deals with interactions between user processes
352 * by scheduling and descheduling them from the LWKT queue as
353 * necessary.
354 *
355 * We have to adjust the MP lock for the target thread. If we
356 * need the MP lock and cannot obtain it we try to locate a
357 * thread that does not need the MP lock.
358 */
359 gd = mycpu;
360again:
361 if (gd->gd_runqmask) {
362 int nq = bsrl(gd->gd_runqmask);
363 if ((ntd = TAILQ_FIRST(&gd->gd_tdrunq[nq])) == NULL) {
364 gd->gd_runqmask &= ~(1 << nq);
365 goto again;
366 }
367#ifdef SMP
368 if (ntd->td_mpcount && mpheld == 0 && !cpu_try_mplock()) {
369 /*
370 * Target needs MP lock and we couldn't get it, try
371 * to locate a thread which does not need the MP lock
372 * to run.
373 */
374 u_int32_t rqmask = gd->gd_runqmask;
375 while (rqmask) {
376 TAILQ_FOREACH(ntd, &gd->gd_tdrunq[nq], td_threadq) {
377 if (ntd->td_mpcount == 0)
378 break;
379 }
380 if (ntd)
381 break;
382 rqmask &= ~(1 << nq);
383 nq = bsrl(rqmask);
384 }
385 if (ntd == NULL) {
386 ntd = gd->gd_idletd;
387 } else {
388 TAILQ_REMOVE(&gd->gd_tdrunq[nq], ntd, td_threadq);
389 TAILQ_INSERT_TAIL(&gd->gd_tdrunq[nq], ntd, td_threadq);
390 }
391 } else {
392 TAILQ_REMOVE(&gd->gd_tdrunq[nq], ntd, td_threadq);
393 TAILQ_INSERT_TAIL(&gd->gd_tdrunq[nq], ntd, td_threadq);
394 }
395#else
396 TAILQ_REMOVE(&gd->gd_tdrunq[nq], ntd, td_threadq);
397 TAILQ_INSERT_TAIL(&gd->gd_tdrunq[nq], ntd, td_threadq);
398#endif
399 } else {
400 ntd = gd->gd_idletd;
401 }
402 }
403 KASSERT(ntd->td_pri >= TDPRI_CRIT,
404 ("priority problem in lwkt_switch %d %d", td->td_pri, ntd->td_pri));
405
406 /*
407 * Do the actual switch. If the new target does not need the MP lock
408 * and we are holding it, release the MP lock. If the new target requires
409 * the MP lock we have already acquired it for the target.
410 */
411#ifdef SMP
412 if (ntd->td_mpcount == 0 ) {
413 if (MP_LOCK_HELD())
414 cpu_rel_mplock();
415 } else {
416 ASSERT_MP_LOCK_HELD();
417 }
418#endif
419 if (td != ntd) {
420 td->td_switch(ntd);
421 }
422
423 crit_exit();
424}
425
426/*
427 * Request that the target thread preempt the current thread. Preemption
428 * only works under a specific set of conditions:
429 *
430 * - We are not preempting ourselves
431 * - The target thread is owned by the current cpu
432 * - We are not currently being preempted
433 * - The target is not currently being preempted
434 * - We are able to satisfy the target's MP lock requirements (if any).
435 *
436 * THE CALLER OF LWKT_PREEMPT() MUST BE IN A CRITICAL SECTION. Typically
437 * this is called via lwkt_schedule() through the td_preemptable callback.
438 * critpri is the managed critical priority that we should ignore in order
439 * to determine whether preemption is possible (aka usually just the crit
440 * priority of lwkt_schedule() itself).
441 *
442 * XXX at the moment we run the target thread in a critical section during
443 * the preemption in order to prevent the target from taking interrupts
444 * that *WE* can't. Preemption is strictly limited to interrupt threads
445 * and interrupt-like threads, outside of a critical section, and the
446 * preempted source thread will be resumed the instant the target blocks
447 * whether or not the source is scheduled (i.e. preemption is supposed to
448 * be as transparent as possible).
449 *
450 * The target thread inherits our MP count (added to its own) for the
451 * duration of the preemption in order to preserve the atomicy of the
452 * MP lock during the preemption. Therefore, any preempting targets must be
453 * careful in regards to MP assertions. Note that the MP count may be
454 * out of sync with the physical mp_lock. If we preempt we have to preserve
455 * the expected situation.
456 */
457void
458lwkt_preempt(thread_t ntd, int critpri)
459{
460 thread_t td = curthread;
461#ifdef SMP
462 int mpheld;
463 int savecnt;
464#endif
465
466 /*
467 * The caller has put us in a critical section. We can only preempt
468 * if the caller of the caller was not in a critical section (basically
469 * a local interrupt), as determined by the 'critpri' parameter. If
470 * we are unable to preempt
471 *
472 * YYY The target thread must be in a critical section (else it must
473 * inherit our critical section? I dunno yet).
474 */
475 KASSERT(ntd->td_pri >= TDPRI_CRIT, ("BADCRIT0 %d", ntd->td_pri));
476
477 if (!_lwkt_wantresched(ntd, td)) {
478 ++preempt_miss;
479 return;
480 }
481 if ((td->td_pri & ~TDPRI_MASK) > critpri) {
482 ++preempt_miss;
483 need_resched();
484 return;
485 }
486#ifdef SMP
487 if (ntd->td_cpu != mycpu->gd_cpuid) {
488 ++preempt_miss;
489 return;
490 }
491#endif
492 if (td == ntd || ((td->td_flags | ntd->td_flags) & TDF_PREEMPT_LOCK)) {
493 ++preempt_weird;
494 need_resched();
495 return;
496 }
497 if (ntd->td_preempted) {
498 ++preempt_hit;
499 need_resched();
500 return;
501 }
502#ifdef SMP
503 mpheld = MP_LOCK_HELD();
504 if (mpheld && td->td_mpcount == 0)
505 panic("lwkt_preempt(): held and no count");
506 savecnt = td->td_mpcount;
507 ntd->td_mpcount += td->td_mpcount;
508 if (mpheld == 0 && ntd->td_mpcount && !cpu_try_mplock()) {
509 ntd->td_mpcount -= td->td_mpcount;
510 ++preempt_miss;
511 need_resched();
512 return;
513 }
514#endif
515
516 ++preempt_hit;
517 ntd->td_preempted = td;
518 td->td_flags |= TDF_PREEMPT_LOCK;
519 td->td_switch(ntd);
520 KKASSERT(ntd->td_preempted && (td->td_flags & TDF_PREEMPT_DONE));
521#ifdef SMP
522 KKASSERT(savecnt == td->td_mpcount);
523 if (mpheld == 0 && MP_LOCK_HELD())
524 cpu_rel_mplock();
525 else if (mpheld && !MP_LOCK_HELD())
526 panic("lwkt_preempt(): MP lock was not held through");
527#endif
528 ntd->td_preempted = NULL;
529 td->td_flags &= ~(TDF_PREEMPT_LOCK|TDF_PREEMPT_DONE);
530}
531
532/*
533 * Yield our thread while higher priority threads are pending. This is
534 * typically called when we leave a critical section but it can be safely
535 * called while we are in a critical section.
536 *
537 * This function will not generally yield to equal priority threads but it
538 * can occur as a side effect. Note that lwkt_switch() is called from
539 * inside the critical section to pervent its own crit_exit() from reentering
540 * lwkt_yield_quick().
541 *
542 * gd_reqpri indicates that *something* changed, e.g. an interrupt or softint
543 * came along but was blocked and made pending.
544 *
545 * (self contained on a per cpu basis)
546 */
547void
548lwkt_yield_quick(void)
549{
550 thread_t td = curthread;
551
552 if ((td->td_pri & TDPRI_MASK) < mycpu->gd_reqpri) {
553 mycpu->gd_reqpri = 0;
554 splz();
555 }
556
557 /*
558 * YYY enabling will cause wakeup() to task-switch, which really
559 * confused the old 4.x code. This is a good way to simulate
560 * preemption and MP without actually doing preemption or MP, because a
561 * lot of code assumes that wakeup() does not block.
562 */
563 if (untimely_switch && mycpu->gd_intr_nesting_level == 0) {
564 crit_enter();
565 /*
566 * YYY temporary hacks until we disassociate the userland scheduler
567 * from the LWKT scheduler.
568 */
569 if (td->td_flags & TDF_RUNQ) {
570 lwkt_switch(); /* will not reenter yield function */
571 } else {
572 lwkt_schedule_self(); /* make sure we are scheduled */
573 lwkt_switch(); /* will not reenter yield function */
574 lwkt_deschedule_self(); /* make sure we are descheduled */
575 }
576 crit_exit_noyield();
577 }
578}
579
580/*
581 * This implements a normal yield which, unlike _quick, will yield to equal
582 * priority threads as well. Note that gd_reqpri tests will be handled by
583 * the crit_exit() call in lwkt_switch().
584 *
585 * (self contained on a per cpu basis)
586 */
587void
588lwkt_yield(void)
589{
590 lwkt_schedule_self();
591 lwkt_switch();
592}
593
594/*
595 * Schedule a thread to run. As the current thread we can always safely
596 * schedule ourselves, and a shortcut procedure is provided for that
597 * function.
598 *
599 * (non-blocking, self contained on a per cpu basis)
600 */
601void
602lwkt_schedule_self(void)
603{
604 thread_t td = curthread;
605
606 crit_enter();
607 KASSERT(td->td_wait == NULL, ("lwkt_schedule_self(): td_wait not NULL!"));
608 _lwkt_enqueue(td);
609 if (td->td_proc && td->td_proc->p_stat == SSLEEP)
610 panic("SCHED SELF PANIC");
611 crit_exit();
612}
613
614/*
615 * Generic schedule. Possibly schedule threads belonging to other cpus and
616 * deal with threads that might be blocked on a wait queue.
617 *
618 * YYY this is one of the best places to implement load balancing code.
619 * Load balancing can be accomplished by requesting other sorts of actions
620 * for the thread in question.
621 */
622void
623lwkt_schedule(thread_t td)
624{
625#ifdef INVARIANTS
626 if ((td->td_flags & TDF_PREEMPT_LOCK) == 0 && td->td_proc
627 && td->td_proc->p_stat == SSLEEP
628 ) {
629 printf("PANIC schedule curtd = %p (%d %d) target %p (%d %d)\n",
630 curthread,
631 curthread->td_proc ? curthread->td_proc->p_pid : -1,
632 curthread->td_proc ? curthread->td_proc->p_stat : -1,
633 td,
634 td->td_proc ? curthread->td_proc->p_pid : -1,
635 td->td_proc ? curthread->td_proc->p_stat : -1
636 );
637 panic("SCHED PANIC");
638 }
639#endif
640 crit_enter();
641 if (td == curthread) {
642 _lwkt_enqueue(td);
643 } else {
644 lwkt_wait_t w;
645
646 /*
647 * If the thread is on a wait list we have to send our scheduling
648 * request to the owner of the wait structure. Otherwise we send
649 * the scheduling request to the cpu owning the thread. Races
650 * are ok, the target will forward the message as necessary (the
651 * message may chase the thread around before it finally gets
652 * acted upon).
653 *
654 * (remember, wait structures use stable storage)
655 */
656 if ((w = td->td_wait) != NULL) {
657 if (lwkt_trytoken(&w->wa_token)) {
658 TAILQ_REMOVE(&w->wa_waitq, td, td_threadq);
659 --w->wa_count;
660 td->td_wait = NULL;
661 if (td->td_cpu == mycpu->gd_cpuid) {
662 _lwkt_enqueue(td);
663 if (td->td_preemptable) {
664 td->td_preemptable(td, TDPRI_CRIT*2); /* YYY +token */
665 } else if (_lwkt_wantresched(td, curthread)) {
666 need_resched();
667 }
668 } else {
669 lwkt_send_ipiq(td->td_cpu, (ipifunc_t)lwkt_schedule, td);
670 }
671 lwkt_reltoken(&w->wa_token);
672 } else {
673 lwkt_send_ipiq(w->wa_token.t_cpu, (ipifunc_t)lwkt_schedule, td);
674 }
675 } else {
676 /*
677 * If the wait structure is NULL and we own the thread, there
678 * is no race (since we are in a critical section). If we
679 * do not own the thread there might be a race but the
680 * target cpu will deal with it.
681 */
682 if (td->td_cpu == mycpu->gd_cpuid) {
683 _lwkt_enqueue(td);
684 if (td->td_preemptable) {
685 td->td_preemptable(td, TDPRI_CRIT);
686 } else if (_lwkt_wantresched(td, curthread)) {
687 need_resched();
688 }
689 } else {
690 lwkt_send_ipiq(td->td_cpu, (ipifunc_t)lwkt_schedule, td);
691 }
692 }
693 }
694 crit_exit();
695}
696
697/*
698 * Deschedule a thread.
699 *
700 * (non-blocking, self contained on a per cpu basis)
701 */
702void
703lwkt_deschedule_self(void)
704{
705 thread_t td = curthread;
706
707 crit_enter();
708 KASSERT(td->td_wait == NULL, ("lwkt_schedule_self(): td_wait not NULL!"));
709 _lwkt_dequeue(td);
710 crit_exit();
711}
712
713/*
714 * Generic deschedule. Descheduling threads other then your own should be
715 * done only in carefully controlled circumstances. Descheduling is
716 * asynchronous.
717 *
718 * This function may block if the cpu has run out of messages.
719 */
720void
721lwkt_deschedule(thread_t td)
722{
723 crit_enter();
724 if (td == curthread) {
725 _lwkt_dequeue(td);
726 } else {
727 if (td->td_cpu == mycpu->gd_cpuid) {
728 _lwkt_dequeue(td);
729 } else {
730 lwkt_send_ipiq(td->td_cpu, (ipifunc_t)lwkt_deschedule, td);
731 }
732 }
733 crit_exit();
734}
735
736/*
737 * Set the target thread's priority. This routine does not automatically
738 * switch to a higher priority thread, LWKT threads are not designed for
739 * continuous priority changes. Yield if you want to switch.
740 *
741 * We have to retain the critical section count which uses the high bits
742 * of the td_pri field. The specified priority may also indicate zero or
743 * more critical sections by adding TDPRI_CRIT*N.
744 */
745void
746lwkt_setpri(thread_t td, int pri)
747{
748 KKASSERT(pri >= 0);
749 KKASSERT(td->td_cpu == mycpu->gd_cpuid);
750 crit_enter();
751 if (td->td_flags & TDF_RUNQ) {
752 _lwkt_dequeue(td);
753 td->td_pri = (td->td_pri & ~TDPRI_MASK) + pri;
754 _lwkt_enqueue(td);
755 } else {
756 td->td_pri = (td->td_pri & ~TDPRI_MASK) + pri;
757 }
758 crit_exit();
759}
760
761void
762lwkt_setpri_self(int pri)
763{
764 thread_t td = curthread;
765
766 KKASSERT(pri >= 0 && pri <= TDPRI_MAX);
767 crit_enter();
768 if (td->td_flags & TDF_RUNQ) {
769 _lwkt_dequeue(td);
770 td->td_pri = (td->td_pri & ~TDPRI_MASK) + pri;
771 _lwkt_enqueue(td);
772 } else {
773 td->td_pri = (td->td_pri & ~TDPRI_MASK) + pri;
774 }
775 crit_exit();
776}
777
778struct proc *
779lwkt_preempted_proc(void)
780{
781 thread_t td = curthread;
782 while (td->td_preempted)
783 td = td->td_preempted;
784 return(td->td_proc);
785}
786
787
788/*
789 * This function deschedules the current thread and blocks on the specified
790 * wait queue. We obtain ownership of the wait queue in order to block
791 * on it. A generation number is used to interlock the wait queue in case
792 * it gets signalled while we are blocked waiting on the token.
793 *
794 * Note: alternatively we could dequeue our thread and then message the
795 * target cpu owning the wait queue. YYY implement as sysctl.
796 *
797 * Note: wait queue signals normally ping-pong the cpu as an optimization.
798 */
799typedef struct lwkt_gettoken_req {
800 lwkt_token_t tok;
801 int cpu;
802} lwkt_gettoken_req;
803
804void
805lwkt_block(lwkt_wait_t w, const char *wmesg, int *gen)
806{
807 thread_t td = curthread;
808
809 lwkt_gettoken(&w->wa_token);
810 if (w->wa_gen == *gen) {
811 _lwkt_dequeue(td);
812 TAILQ_INSERT_TAIL(&w->wa_waitq, td, td_threadq);
813 ++w->wa_count;
814 td->td_wait = w;
815 td->td_wmesg = wmesg;
816 lwkt_switch();
817 }
818 /* token might be lost, doesn't matter for gen update */
819 *gen = w->wa_gen;
820 lwkt_reltoken(&w->wa_token);
821}
822
823/*
824 * Signal a wait queue. We gain ownership of the wait queue in order to
825 * signal it. Once a thread is removed from the wait queue we have to
826 * deal with the cpu owning the thread.
827 *
828 * Note: alternatively we could message the target cpu owning the wait
829 * queue. YYY implement as sysctl.
830 */
831void
832lwkt_signal(lwkt_wait_t w)
833{
834 thread_t td;
835 int count;
836
837 lwkt_gettoken(&w->wa_token);
838 ++w->wa_gen;
839 count = w->wa_count;
840 while ((td = TAILQ_FIRST(&w->wa_waitq)) != NULL && count) {
841 --count;
842 --w->wa_count;
843 TAILQ_REMOVE(&w->wa_waitq, td, td_threadq);
844 td->td_wait = NULL;
845 td->td_wmesg = NULL;
846 if (td->td_cpu == mycpu->gd_cpuid) {
847 _lwkt_enqueue(td);
848 } else {
849 lwkt_send_ipiq(td->td_cpu, (ipifunc_t)lwkt_schedule, td);
850 }
851 lwkt_regettoken(&w->wa_token);
852 }
853 lwkt_reltoken(&w->wa_token);
854}
855
856/*
857 * Acquire ownership of a token
858 *
859 * Acquire ownership of a token. The token may have spl and/or critical
860 * section side effects, depending on its purpose. These side effects
861 * guarentee that you will maintain ownership of the token as long as you
862 * do not block. If you block you may lose access to the token (but you
863 * must still release it even if you lose your access to it).
864 *
865 * YYY for now we use a critical section to prevent IPIs from taking away
866 * a token, but we really only need to disable IPIs ?
867 *
868 * YYY certain tokens could be made to act like mutexes when performance
869 * would be better (e.g. t_cpu == -1). This is not yet implemented.
870 *
871 * If the token is owned by another cpu we may have to send an IPI to
872 * it and then block. The IPI causes the token to be given away to the
873 * requesting cpu, unless it has already changed hands. Since only the
874 * current cpu can give away a token it owns we do not need a memory barrier.
875 */
876
877#ifdef SMP
878
879static
880void
881lwkt_gettoken_remote(void *arg)
882{
883 lwkt_gettoken_req *req = arg;
884 if (req->tok->t_cpu == mycpu->gd_cpuid) {
885 req->tok->t_cpu = req->cpu;
886 }
887}
888
889#endif
890
891int
892lwkt_gettoken(lwkt_token_t tok)
893{
894 /*
895 * Prevent preemption so the token can't be taken away from us once
896 * we gain ownership of it. Use a synchronous request which might
897 * block. The request will be forwarded as necessary playing catchup
898 * to the token.
899 */
900
901 crit_enter();
902#ifdef INVARIANTS
903 if (token_debug) {
904 printf("gettoken %p %d/%d\n", ((int **)&tok)[-1], (curthread->td_proc?curthread->td_proc->p_pid:-1), curthread->td_pri);
905 if (curthread->td_pri > 2000) {
906 curthread->td_pri = 1000;
907 panic("too HIGH!");
908 }
909 }
910#endif
911#ifdef SMP
912 while (tok->t_cpu != mycpu->gd_cpuid) {
913 struct lwkt_gettoken_req req;
914 int seq;
915 int dcpu;
916
917 req.cpu = mycpu->gd_cpuid;
918 req.tok = tok;
919 dcpu = (volatile int)tok->t_cpu;
920 seq = lwkt_send_ipiq(dcpu, lwkt_gettoken_remote, &req);
921 lwkt_wait_ipiq(dcpu, seq);
922 }
923#endif
924 /*
925 * leave us in a critical section on return. This will be undone
926 * by lwkt_reltoken(). Bump the generation number.
927 */
928 return(++tok->t_gen);
929}
930
931/*
932 * Attempt to acquire ownership of a token. Returns 1 on success, 0 on
933 * failure.
934 */
935int
936lwkt_trytoken(lwkt_token_t tok)
937{
938 crit_enter();
939#ifdef SMP
940 if (tok->t_cpu != mycpu->gd_cpuid) {
941 return(0);
942 }
943#endif
944 /* leave us in the critical section */
945 ++tok->t_gen;
946 return(1);
947}
948
949/*
950 * Release your ownership of a token. Releases must occur in reverse
951 * order to aquisitions, eventually so priorities can be unwound properly
952 * like SPLs. At the moment the actual implemention doesn't care.
953 *
954 * We can safely hand a token that we own to another cpu without notifying
955 * it, but once we do we can't get it back without requesting it (unless
956 * the other cpu hands it back to us before we check).
957 *
958 * We might have lost the token, so check that.
959 */
960void
961lwkt_reltoken(lwkt_token_t tok)
962{
963 if (tok->t_cpu == mycpu->gd_cpuid) {
964 tok->t_cpu = tok->t_reqcpu;
965 }
966 crit_exit();
967}
968
969/*
970 * Reacquire a token that might have been lost and compare and update the
971 * generation number. 0 is returned if the generation has not changed
972 * (nobody else obtained the token while we were blocked, on this cpu or
973 * any other cpu).
974 *
975 * This function returns with the token re-held whether the generation
976 * number changed or not.
977 */
978int
979lwkt_gentoken(lwkt_token_t tok, int *gen)
980{
981 if (lwkt_regettoken(tok) == *gen) {
982 return(0);
983 } else {
984 *gen = tok->t_gen;
985 return(-1);
986 }
987}
988
989
990/*
991 * Re-acquire a token that might have been lost. Returns the generation
992 * number of the token.
993 */
994int
995lwkt_regettoken(lwkt_token_t tok)
996{
997 /* assert we are in a critical section */
998 if (tok->t_cpu != mycpu->gd_cpuid) {
999#ifdef SMP
1000 while (tok->t_cpu != mycpu->gd_cpuid) {
1001 struct lwkt_gettoken_req req;
1002 int seq;
1003 int dcpu;
1004
1005 req.cpu = mycpu->gd_cpuid;
1006 req.tok = tok;
1007 dcpu = (volatile int)tok->t_cpu;
1008 seq = lwkt_send_ipiq(dcpu, lwkt_gettoken_remote, &req);
1009 lwkt_wait_ipiq(dcpu, seq);
1010 }
1011#endif
1012 ++tok->t_gen;
1013 }
1014 return(tok->t_gen);
1015}
1016
1017void
1018lwkt_inittoken(lwkt_token_t tok)
1019{
1020 /*
1021 * Zero structure and set cpu owner and reqcpu to cpu 0.
1022 */
1023 bzero(tok, sizeof(*tok));
1024}
1025
1026/*
1027 * Create a kernel process/thread/whatever. It shares it's address space
1028 * with proc0 - ie: kernel only.
1029 *
1030 * XXX should be renamed to lwkt_create()
1031 *
1032 * The thread will be entered with the MP lock held.
1033 */
1034int
1035lwkt_create(void (*func)(void *), void *arg,
1036 struct thread **tdp, thread_t template, int tdflags,
1037 const char *fmt, ...)
1038{
1039 thread_t td;
1040 va_list ap;
1041
1042 td = *tdp = lwkt_alloc_thread(template);
1043 cpu_set_thread_handler(td, kthread_exit, func, arg);
1044 td->td_flags |= TDF_VERBOSE | tdflags;
1045#ifdef SMP
1046 td->td_mpcount = 1;
1047#endif
1048
1049 /*
1050 * Set up arg0 for 'ps' etc
1051 */
1052 va_start(ap, fmt);
1053 vsnprintf(td->td_comm, sizeof(td->td_comm), fmt, ap);
1054 va_end(ap);
1055
1056 /*
1057 * Schedule the thread to run
1058 */
1059 if ((td->td_flags & TDF_STOPREQ) == 0)
1060 lwkt_schedule(td);
1061 else
1062 td->td_flags &= ~TDF_STOPREQ;
1063 return 0;
1064}
1065
1066/*
1067 * Destroy an LWKT thread. Warning! This function is not called when
1068 * a process exits, cpu_proc_exit() directly calls cpu_thread_exit() and
1069 * uses a different reaping mechanism.
1070 */
1071void
1072lwkt_exit(void)
1073{
1074 thread_t td = curthread;
1075
1076 if (td->td_flags & TDF_VERBOSE)
1077 printf("kthread %p %s has exited\n", td, td->td_comm);
1078 crit_enter();
1079 lwkt_deschedule_self();
1080 ++mycpu->gd_tdfreecount;
1081 TAILQ_INSERT_TAIL(&mycpu->gd_tdfreeq, td, td_threadq);
1082 cpu_thread_exit();
1083}
1084
1085/*
1086 * Create a kernel process/thread/whatever. It shares it's address space
1087 * with proc0 - ie: kernel only. 5.x compatible.
1088 */
1089int
1090kthread_create(void (*func)(void *), void *arg,
1091 struct thread **tdp, const char *fmt, ...)
1092{
1093 thread_t td;
1094 va_list ap;
1095
1096 td = *tdp = lwkt_alloc_thread(NULL);
1097 cpu_set_thread_handler(td, kthread_exit, func, arg);
1098 td->td_flags |= TDF_VERBOSE;
1099#ifdef SMP
1100 td->td_mpcount = 1;
1101#endif
1102
1103 /*
1104 * Set up arg0 for 'ps' etc
1105 */
1106 va_start(ap, fmt);
1107 vsnprintf(td->td_comm, sizeof(td->td_comm), fmt, ap);
1108 va_end(ap);
1109
1110 /*
1111 * Schedule the thread to run
1112 */
1113 lwkt_schedule(td);
1114 return 0;
1115}
1116
1117void
1118crit_panic(void)
1119{
1120 thread_t td = curthread;
1121 int lpri = td->td_pri;
1122
1123 td->td_pri = 0;
1124 panic("td_pri is/would-go negative! %p %d", td, lpri);
1125}
1126
1127/*
1128 * Destroy an LWKT thread. Warning! This function is not called when
1129 * a process exits, cpu_proc_exit() directly calls cpu_thread_exit() and
1130 * uses a different reaping mechanism.
1131 *
1132 * XXX duplicates lwkt_exit()
1133 */
1134void
1135kthread_exit(void)
1136{
1137 lwkt_exit();
1138}
1139
1140#ifdef SMP
1141
1142/*
1143 * Send a function execution request to another cpu. The request is queued
1144 * on the cpu<->cpu ipiq matrix. Each cpu owns a unique ipiq FIFO for every
1145 * possible target cpu. The FIFO can be written.
1146 *
1147 * YYY If the FIFO fills up we have to enable interrupts and process the
1148 * IPIQ while waiting for it to empty or we may deadlock with another cpu.
1149 * Create a CPU_*() function to do this!
1150 *
1151 * Must be called from a critical section.
1152 */
1153int
1154lwkt_send_ipiq(int dcpu, ipifunc_t func, void *arg)
1155{
1156 lwkt_ipiq_t ip;
1157 int windex;
1158
1159 if (dcpu == mycpu->gd_cpuid) {
1160 func(arg);
1161 return(0);
1162 }
1163 KKASSERT(curthread->td_pri >= TDPRI_CRIT);
1164 KKASSERT(dcpu >= 0 && dcpu < ncpus);
1165 ++ipiq_count;
1166 ip = &mycpu->gd_ipiq[dcpu];
1167 if (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 2) {
1168 unsigned int eflags = read_eflags();
1169 cpu_enable_intr();
1170 ++ipiq_fifofull;
1171 while (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 2) {
1172 KKASSERT(ip->ip_windex - ip->ip_rindex != MAXCPUFIFO - 1);
1173 lwkt_process_ipiq();
1174 }
1175 write_eflags(eflags);
1176 }
1177 KKASSERT(ip->ip_windex - ip->ip_rindex != MAXCPUFIFO - 1);
1178 windex = ip->ip_windex & MAXCPUFIFO_MASK;
1179 ip->ip_func[windex] = func;
1180 ip->ip_arg[windex] = arg;
1181 /* YYY memory barrier */
1182 ++ip->ip_windex;
1183 cpu_send_ipiq(dcpu); /* issues memory barrier if appropriate */
1184 return(ip->ip_windex);
1185}
1186
1187/*
1188 * Wait for the remote cpu to finish processing a function.
1189 *
1190 * YYY we have to enable interrupts and process the IPIQ while waiting
1191 * for it to empty or we may deadlock with another cpu. Create a CPU_*()
1192 * function to do this! YYY we really should 'block' here.
1193 *
1194 * Must be called from a critical section. Thsi routine may be called
1195 * from an interrupt (for example, if an interrupt wakes a foreign thread
1196 * up).
1197 */
1198void
1199lwkt_wait_ipiq(int dcpu, int seq)
1200{
1201 lwkt_ipiq_t ip;
1202
1203 if (dcpu != mycpu->gd_cpuid) {
1204 KKASSERT(dcpu >= 0 && dcpu < ncpus);
1205 ip = &mycpu->gd_ipiq[dcpu];
1206 if ((int)(ip->ip_rindex - seq) < 0) {
1207 unsigned int eflags = read_eflags();
1208 cpu_enable_intr();
1209 while ((int)(ip->ip_rindex - seq) < 0) {
1210 lwkt_process_ipiq();
1211#if 0
1212 lwkt_switch(); /* YYY fixme */
1213#endif
1214 }
1215 write_eflags(eflags);
1216 }
1217 }
1218}
1219
1220/*
1221 * Called from IPI interrupt (like a fast interrupt), which has placed
1222 * us in a critical section. The MP lock may or may not be held.
1223 * May also be called from doreti or splz.
1224 */
1225void
1226lwkt_process_ipiq(void)
1227{
1228 int n;
1229 int cpuid = mycpu->gd_cpuid;
1230
1231 for (n = 0; n < ncpus; ++n) {
1232 lwkt_ipiq_t ip;
1233 int ri;
1234
1235 if (n == cpuid)
1236 continue;
1237 ip = globaldata_find(n)->gd_ipiq;
1238 if (ip == NULL)
1239 continue;
1240 ip = &ip[cpuid];
1241 while (ip->ip_rindex != ip->ip_windex) {
1242 ri = ip->ip_rindex & MAXCPUFIFO_MASK;
1243 ip->ip_func[ri](ip->ip_arg[ri]);
1244 ++ip->ip_rindex;
1245 }
1246 }
1247}
1248
1249#else
1250
1251int
1252lwkt_send_ipiq(int dcpu, ipifunc_t func, void *arg)
1253{
1254 panic("lwkt_send_ipiq: UP box! (%d,%p,%p)", dcpu, func, arg);
1255 return(0); /* NOT REACHED */
1256}
1257
1258void
1259lwkt_wait_ipiq(int dcpu, int seq)
1260{
1261 panic("lwkt_wait_ipiq: UP box! (%d,%d)", dcpu, seq);
1262}
1263
1264#endif