MP Implementation 1/2: Get the APIC code working again, sweetly integrate the
[dragonfly.git] / sys / kern / lwkt_thread.c
CommitLineData
8ad65e08
MD
1/*
2 * Copyright (c) 2003 Matthew Dillon <dillon@backplane.com>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
f1d1c3fa
MD
26 * Each cpu in a system has its own self-contained light weight kernel
27 * thread scheduler, which means that generally speaking we only need
28 * to use a critical section to prevent hicups.
29 *
8a8d5d85 30 * $DragonFly: src/sys/kern/lwkt_thread.c,v 1.15 2003/07/06 21:23:51 dillon Exp $
8ad65e08
MD
31 */
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/kernel.h>
36#include <sys/proc.h>
37#include <sys/rtprio.h>
38#include <sys/queue.h>
f1d1c3fa 39#include <sys/thread2.h>
7d0bac62 40#include <sys/sysctl.h>
99df837e 41#include <sys/kthread.h>
f1d1c3fa 42#include <machine/cpu.h>
99df837e 43#include <sys/lock.h>
f1d1c3fa 44
7d0bac62
MD
45#include <vm/vm.h>
46#include <vm/vm_param.h>
47#include <vm/vm_kern.h>
48#include <vm/vm_object.h>
49#include <vm/vm_page.h>
50#include <vm/vm_map.h>
51#include <vm/vm_pager.h>
52#include <vm/vm_extern.h>
53#include <vm/vm_zone.h>
54
99df837e
MD
55#include <machine/stdarg.h>
56
7d0bac62 57static int untimely_switch = 0;
4b5f931b
MD
58SYSCTL_INT(_lwkt, OID_AUTO, untimely_switch, CTLFLAG_RW, &untimely_switch, 0, "");
59static quad_t switch_count = 0;
60SYSCTL_QUAD(_lwkt, OID_AUTO, switch_count, CTLFLAG_RW, &switch_count, 0, "");
61static quad_t preempt_hit = 0;
62SYSCTL_QUAD(_lwkt, OID_AUTO, preempt_hit, CTLFLAG_RW, &preempt_hit, 0, "");
63static quad_t preempt_miss = 0;
64SYSCTL_QUAD(_lwkt, OID_AUTO, preempt_miss, CTLFLAG_RW, &preempt_miss, 0, "");
26a0694b
MD
65static quad_t preempt_weird = 0;
66SYSCTL_QUAD(_lwkt, OID_AUTO, preempt_weird, CTLFLAG_RW, &preempt_weird, 0, "");
7d0bac62 67
4b5f931b
MD
68/*
69 * These helper procedures handle the runq, they can only be called from
70 * within a critical section.
71 */
f1d1c3fa
MD
72static __inline
73void
74_lwkt_dequeue(thread_t td)
75{
76 if (td->td_flags & TDF_RUNQ) {
4b5f931b
MD
77 int nq = td->td_pri & TDPRI_MASK;
78 struct globaldata *gd = mycpu;
79
f1d1c3fa 80 td->td_flags &= ~TDF_RUNQ;
4b5f931b
MD
81 TAILQ_REMOVE(&gd->gd_tdrunq[nq], td, td_threadq);
82 /* runqmask is passively cleaned up by the switcher */
f1d1c3fa
MD
83 }
84}
85
86static __inline
87void
88_lwkt_enqueue(thread_t td)
89{
90 if ((td->td_flags & TDF_RUNQ) == 0) {
4b5f931b
MD
91 int nq = td->td_pri & TDPRI_MASK;
92 struct globaldata *gd = mycpu;
93
f1d1c3fa 94 td->td_flags |= TDF_RUNQ;
4b5f931b
MD
95 TAILQ_INSERT_TAIL(&gd->gd_tdrunq[nq], td, td_threadq);
96 gd->gd_runqmask |= 1 << nq;
26a0694b
MD
97#if 0
98 /*
99 * YYY needs cli/sti protection? gd_reqpri set by interrupt
100 * when made pending. need better mechanism.
101 */
102 if (gd->gd_reqpri < (td->td_pri & TDPRI_MASK))
103 gd->gd_reqpri = (td->td_pri & TDPRI_MASK);
104#endif
f1d1c3fa
MD
105 }
106}
8ad65e08
MD
107
108/*
109 * LWKTs operate on a per-cpu basis
110 *
73e4f7b9 111 * WARNING! Called from early boot, 'mycpu' may not work yet.
8ad65e08
MD
112 */
113void
114lwkt_gdinit(struct globaldata *gd)
115{
4b5f931b
MD
116 int i;
117
118 for (i = 0; i < sizeof(gd->gd_tdrunq)/sizeof(gd->gd_tdrunq[0]); ++i)
119 TAILQ_INIT(&gd->gd_tdrunq[i]);
120 gd->gd_runqmask = 0;
73e4f7b9 121 TAILQ_INIT(&gd->gd_tdallq);
8ad65e08
MD
122}
123
7d0bac62
MD
124/*
125 * Initialize a thread wait structure prior to first use.
126 *
127 * NOTE! called from low level boot code, we cannot do anything fancy!
128 */
129void
130lwkt_init_wait(lwkt_wait_t w)
131{
132 TAILQ_INIT(&w->wa_waitq);
133}
134
135/*
136 * Create a new thread. The thread must be associated with a process context
137 * or LWKT start address before it can be scheduled.
0cfcada1
MD
138 *
139 * If you intend to create a thread without a process context this function
140 * does everything except load the startup and switcher function.
7d0bac62
MD
141 */
142thread_t
ef0fdad1 143lwkt_alloc_thread(struct thread *td)
7d0bac62 144{
99df837e 145 void *stack;
ef0fdad1 146 int flags = 0;
7d0bac62 147
ef0fdad1 148 if (td == NULL) {
26a0694b 149 crit_enter();
ef0fdad1
MD
150 if (mycpu->gd_tdfreecount > 0) {
151 --mycpu->gd_tdfreecount;
152 td = TAILQ_FIRST(&mycpu->gd_tdfreeq);
153 KASSERT(td != NULL && (td->td_flags & TDF_EXITED),
154 ("lwkt_alloc_thread: unexpected NULL or corrupted td"));
155 TAILQ_REMOVE(&mycpu->gd_tdfreeq, td, td_threadq);
156 crit_exit();
157 stack = td->td_kstack;
158 flags = td->td_flags & (TDF_ALLOCATED_STACK|TDF_ALLOCATED_THREAD);
159 } else {
160 crit_exit();
161 td = zalloc(thread_zone);
162 td->td_kstack = NULL;
163 flags |= TDF_ALLOCATED_THREAD;
164 }
165 }
166 if ((stack = td->td_kstack) == NULL) {
99df837e 167 stack = (void *)kmem_alloc(kernel_map, UPAGES * PAGE_SIZE);
ef0fdad1 168 flags |= TDF_ALLOCATED_STACK;
99df837e 169 }
26a0694b 170 lwkt_init_thread(td, stack, flags, mycpu);
99df837e 171 return(td);
7d0bac62
MD
172}
173
174/*
175 * Initialize a preexisting thread structure. This function is used by
176 * lwkt_alloc_thread() and also used to initialize the per-cpu idlethread.
177 *
178 * NOTE! called from low level boot code, we cannot do anything fancy!
179 */
180void
26a0694b 181lwkt_init_thread(thread_t td, void *stack, int flags, struct globaldata *gd)
7d0bac62 182{
99df837e
MD
183 bzero(td, sizeof(struct thread));
184 td->td_kstack = stack;
185 td->td_flags |= flags;
26a0694b
MD
186 td->td_gd = gd;
187 td->td_pri = TDPRI_CRIT;
8a8d5d85 188 td->td_cpu = gd->gd_cpuid; /* YYY don't need this if have td_gd */
99df837e 189 pmap_init_thread(td);
73e4f7b9
MD
190 crit_enter();
191 TAILQ_INSERT_TAIL(&mycpu->gd_tdallq, td, td_allq);
192 crit_exit();
193}
194
195void
196lwkt_set_comm(thread_t td, const char *ctl, ...)
197{
198 va_list va;
199
200 va_start(va, ctl);
201 vsnprintf(td->td_comm, sizeof(td->td_comm), ctl, va);
202 va_end(va);
7d0bac62
MD
203}
204
99df837e 205void
73e4f7b9 206lwkt_hold(thread_t td)
99df837e 207{
73e4f7b9
MD
208 ++td->td_refs;
209}
210
211void
212lwkt_rele(thread_t td)
213{
214 KKASSERT(td->td_refs > 0);
215 --td->td_refs;
216}
217
218void
219lwkt_wait_free(thread_t td)
220{
221 while (td->td_refs)
222 tsleep(td, PWAIT, "tdreap", hz);
223}
224
225void
226lwkt_free_thread(thread_t td)
227{
228 struct globaldata *gd = mycpu;
229
99df837e
MD
230 KASSERT(td->td_flags & TDF_EXITED,
231 ("lwkt_free_thread: did not exit! %p", td));
232
233 crit_enter();
73e4f7b9
MD
234 TAILQ_REMOVE(&gd->gd_tdallq, td, td_allq);
235 if (gd->gd_tdfreecount < CACHE_NTHREADS &&
99df837e
MD
236 (td->td_flags & TDF_ALLOCATED_THREAD)
237 ) {
73e4f7b9
MD
238 ++gd->gd_tdfreecount;
239 TAILQ_INSERT_HEAD(&gd->gd_tdfreeq, td, td_threadq);
99df837e
MD
240 crit_exit();
241 } else {
242 crit_exit();
243 if (td->td_kstack && (td->td_flags & TDF_ALLOCATED_STACK)) {
244 kmem_free(kernel_map,
245 (vm_offset_t)td->td_kstack, UPAGES * PAGE_SIZE);
73e4f7b9 246 /* gd invalid */
99df837e
MD
247 td->td_kstack = NULL;
248 }
249 if (td->td_flags & TDF_ALLOCATED_THREAD)
250 zfree(thread_zone, td);
251 }
252}
253
254
8ad65e08
MD
255/*
256 * Switch to the next runnable lwkt. If no LWKTs are runnable then
f1d1c3fa
MD
257 * switch to the idlethread. Switching must occur within a critical
258 * section to avoid races with the scheduling queue.
259 *
260 * We always have full control over our cpu's run queue. Other cpus
261 * that wish to manipulate our queue must use the cpu_*msg() calls to
262 * talk to our cpu, so a critical section is all that is needed and
263 * the result is very, very fast thread switching.
264 *
265 * We always 'own' our own thread and the threads on our run queue,l
266 * due to TDF_RUNNING or TDF_RUNQ being set. We can safely clear
267 * TDF_RUNNING while in a critical section.
268 *
269 * The td_switch() function must be called while in the critical section.
270 * This function saves as much state as is appropriate for the type of
271 * thread.
272 *
273 * (self contained on a per cpu basis)
8ad65e08
MD
274 */
275void
276lwkt_switch(void)
277{
4b5f931b 278 struct globaldata *gd;
f1d1c3fa 279 thread_t td = curthread;
8ad65e08 280 thread_t ntd;
8a8d5d85
MD
281#ifdef SMP
282 int mpheld;
283#endif
8ad65e08 284
b68b7282 285 if (mycpu->gd_intr_nesting_level && td->td_preempted == NULL)
26a0694b 286 panic("lwkt_switch: cannot switch from within an interrupt, yet\n");
ef0fdad1 287
f1d1c3fa 288 crit_enter();
4b5f931b 289 ++switch_count;
8a8d5d85
MD
290
291#ifdef SMP
292 /*
293 * td_mpcount cannot be used to determine if we currently hold the
294 * MP lock because get_mplock() will increment it prior to attempting
295 * to get the lock, and switch out if it can't. Look at the actual lock.
296 */
297 mpheld = MP_LOCK_HELD();
298#endif
99df837e
MD
299 if ((ntd = td->td_preempted) != NULL) {
300 /*
301 * We had preempted another thread on this cpu, resume the preempted
26a0694b
MD
302 * thread. This occurs transparently, whether the preempted thread
303 * was scheduled or not (it may have been preempted after descheduling
8a8d5d85
MD
304 * itself).
305 *
306 * We have to setup the MP lock for the original thread after backing
307 * out the adjustment that was made to curthread when the original
308 * was preempted.
99df837e 309 */
26a0694b 310 KKASSERT(ntd->td_flags & TDF_PREEMPT_LOCK);
8a8d5d85
MD
311#ifdef SMP
312 if (ntd->td_mpcount) {
313 td->td_mpcount -= ntd->td_mpcount;
314 KKASSERT(td->td_mpcount >= 0);
315 }
316#endif
26a0694b 317 ntd->td_flags |= TDF_PREEMPT_DONE;
8a8d5d85 318 /* YYY release mp lock on switchback if original doesn't need it */
8ad65e08 319 } else {
4b5f931b
MD
320 /*
321 * Priority queue / round-robin at each priority. Note that user
322 * processes run at a fixed, low priority and the user process
323 * scheduler deals with interactions between user processes
324 * by scheduling and descheduling them from the LWKT queue as
325 * necessary.
8a8d5d85
MD
326 *
327 * We have to adjust the MP lock for the target thread. If we
328 * need the MP lock and cannot obtain it we try to locate a
329 * thread that does not need the MP lock.
4b5f931b
MD
330 */
331 gd = mycpu;
4b5f931b
MD
332again:
333 if (gd->gd_runqmask) {
334 int nq = bsrl(gd->gd_runqmask);
335 if ((ntd = TAILQ_FIRST(&gd->gd_tdrunq[nq])) == NULL) {
336 gd->gd_runqmask &= ~(1 << nq);
337 goto again;
338 }
8a8d5d85
MD
339#ifdef SMP
340 if (ntd->td_mpcount && mpheld == 0 && !cpu_try_mplock()) {
341 /*
342 * Target needs MP lock and we couldn't get it.
343 */
344 u_int32_t rqmask = gd->gd_runqmask;
345 while (rqmask) {
346 TAILQ_FOREACH(ntd, &gd->gd_tdrunq[nq], td_threadq) {
347 if (ntd->td_mpcount == 0)
348 break;
349 }
350 if (ntd)
351 break;
352 rqmask &= ~(1 << nq);
353 nq = bsrl(rqmask);
354 }
355 if (ntd == NULL) {
356 ntd = gd->gd_idletd;
357 } else {
358 TAILQ_REMOVE(&gd->gd_tdrunq[nq], ntd, td_threadq);
359 TAILQ_INSERT_TAIL(&gd->gd_tdrunq[nq], ntd, td_threadq);
360 }
361 } else {
362 TAILQ_REMOVE(&gd->gd_tdrunq[nq], ntd, td_threadq);
363 TAILQ_INSERT_TAIL(&gd->gd_tdrunq[nq], ntd, td_threadq);
364 }
365#else
4b5f931b
MD
366 TAILQ_REMOVE(&gd->gd_tdrunq[nq], ntd, td_threadq);
367 TAILQ_INSERT_TAIL(&gd->gd_tdrunq[nq], ntd, td_threadq);
8a8d5d85 368#endif
4b5f931b
MD
369 } else {
370 ntd = gd->gd_idletd;
371 }
f1d1c3fa 372 }
26a0694b
MD
373 KASSERT(ntd->td_pri >= TDPRI_CRIT,
374 ("priority problem in lwkt_switch %d %d", td->td_pri, ntd->td_pri));
8a8d5d85
MD
375
376 /*
377 * Do the actual switch. If the new target does not need the MP lock
378 * and we are holding it, release the MP lock. If the new target requires
379 * the MP lock we have already acquired it for the target.
380 */
381#ifdef SMP
382 if (ntd->td_mpcount == 0 ) {
383 if (MP_LOCK_HELD())
384 cpu_rel_mplock();
385 } else {
386 ASSERT_MP_LOCK_HELD();
387 }
388#endif
389
390 if (td != ntd) {
f1d1c3fa 391 td->td_switch(ntd);
8a8d5d85 392 }
f1d1c3fa 393 crit_exit();
8ad65e08
MD
394}
395
b68b7282 396/*
26a0694b
MD
397 * Request that the target thread preempt the current thread. This only
398 * works if:
b68b7282 399 *
26a0694b
MD
400 * + We aren't trying to preempt ourselves (it can happen!)
401 * + We are not currently being preempted
8a8d5d85
MD
402 * + The target is not currently being preempted
403 * + The target either does not need the MP lock or we can get it
404 * for the target immediately.
b68b7282 405 *
26a0694b
MD
406 * XXX at the moment we run the target thread in a critical section during
407 * the preemption in order to prevent the target from taking interrupts
408 * that *WE* can't. Preemption is strictly limited to interrupt threads
409 * and interrupt-like threads, outside of a critical section, and the
410 * preempted source thread will be resumed the instant the target blocks
411 * whether or not the source is scheduled (i.e. preemption is supposed to
412 * be as transparent as possible).
4b5f931b
MD
413 *
414 * This call is typically made from an interrupt handler like sched_ithd()
415 * which will only run if the current thread is not in a critical section,
416 * so we optimize the priority check a bit.
26a0694b
MD
417 *
418 * CAREFUL! either we or the target thread may get interrupted during the
419 * switch.
8a8d5d85
MD
420 *
421 * The target thread inherits our MP count (added to its own) for the
422 * duration of the preemption in order to preserve the atomicy of the
423 * preemption.
b68b7282
MD
424 */
425void
73e4f7b9 426lwkt_preempt(thread_t ntd, int id)
b68b7282 427{
73e4f7b9 428 thread_t td = curthread;
8a8d5d85
MD
429#ifdef SMP
430 int mpheld;
431#endif
b68b7282 432
26a0694b
MD
433 /*
434 * The caller has put us in a critical section, and in order to have
435 * gotten here in the first place the thread the caller interrupted
436 * cannot have been in a critical section before.
437 */
438 KASSERT(ntd->td_pri >= TDPRI_CRIT, ("BADCRIT0 %d", ntd->td_pri));
439 KASSERT((td->td_pri & ~TDPRI_MASK) == TDPRI_CRIT, ("BADPRI %d", td->td_pri));
440
441 if (td == ntd || ((td->td_flags | ntd->td_flags) & TDF_PREEMPT_LOCK)) {
442 ++preempt_weird;
443 return;
444 }
445 if (ntd->td_preempted) {
4b5f931b 446 ++preempt_hit;
26a0694b
MD
447 return;
448 }
449 if ((ntd->td_pri & TDPRI_MASK) <= (td->td_pri & TDPRI_MASK)) {
4b5f931b 450 ++preempt_miss;
26a0694b 451 return;
b68b7282 452 }
8a8d5d85
MD
453#ifdef SMP
454 mpheld = MP_LOCK_HELD();
455 ntd->td_mpcount += td->td_mpcount;
456 if (mpheld == 0 && ntd->td_mpcount && !cpu_try_mplock()) {
457 ntd->td_mpcount -= td->td_mpcount;
458 ++preempt_miss;
459 return;
460 }
461#endif
26a0694b
MD
462
463 ++preempt_hit;
464 ntd->td_preempted = td;
465 td->td_flags |= TDF_PREEMPT_LOCK;
466 td->td_switch(ntd);
467 KKASSERT(ntd->td_preempted && (td->td_flags & TDF_PREEMPT_DONE));
468 ntd->td_preempted = NULL;
469 td->td_flags &= ~(TDF_PREEMPT_LOCK|TDF_PREEMPT_DONE);
b68b7282
MD
470}
471
f1d1c3fa
MD
472/*
473 * Yield our thread while higher priority threads are pending. This is
474 * typically called when we leave a critical section but it can be safely
475 * called while we are in a critical section.
476 *
477 * This function will not generally yield to equal priority threads but it
478 * can occur as a side effect. Note that lwkt_switch() is called from
479 * inside the critical section to pervent its own crit_exit() from reentering
480 * lwkt_yield_quick().
481 *
ef0fdad1
MD
482 * gd_reqpri indicates that *something* changed, e.g. an interrupt or softint
483 * came along but was blocked and made pending.
484 *
f1d1c3fa
MD
485 * (self contained on a per cpu basis)
486 */
487void
488lwkt_yield_quick(void)
489{
490 thread_t td = curthread;
ef0fdad1
MD
491
492 if ((td->td_pri & TDPRI_MASK) < mycpu->gd_reqpri) {
493 mycpu->gd_reqpri = 0;
f1d1c3fa
MD
494 splz();
495 }
496
497 /*
498 * YYY enabling will cause wakeup() to task-switch, which really
499 * confused the old 4.x code. This is a good way to simulate
7d0bac62
MD
500 * preemption and MP without actually doing preemption or MP, because a
501 * lot of code assumes that wakeup() does not block.
f1d1c3fa 502 */
ef0fdad1 503 if (untimely_switch && mycpu->gd_intr_nesting_level == 0) {
f1d1c3fa
MD
504 crit_enter();
505 /*
506 * YYY temporary hacks until we disassociate the userland scheduler
507 * from the LWKT scheduler.
508 */
509 if (td->td_flags & TDF_RUNQ) {
510 lwkt_switch(); /* will not reenter yield function */
511 } else {
512 lwkt_schedule_self(); /* make sure we are scheduled */
513 lwkt_switch(); /* will not reenter yield function */
514 lwkt_deschedule_self(); /* make sure we are descheduled */
515 }
516 crit_exit_noyield();
517 }
f1d1c3fa
MD
518}
519
8ad65e08 520/*
f1d1c3fa
MD
521 * This implements a normal yield which, unlike _quick, will yield to equal
522 * priority threads as well. Note that gd_reqpri tests will be handled by
523 * the crit_exit() call in lwkt_switch().
524 *
525 * (self contained on a per cpu basis)
8ad65e08
MD
526 */
527void
f1d1c3fa 528lwkt_yield(void)
8ad65e08 529{
f1d1c3fa
MD
530 lwkt_schedule_self();
531 lwkt_switch();
532}
533
534/*
535 * Schedule a thread to run. As the current thread we can always safely
536 * schedule ourselves, and a shortcut procedure is provided for that
537 * function.
538 *
539 * (non-blocking, self contained on a per cpu basis)
540 */
541void
542lwkt_schedule_self(void)
543{
544 thread_t td = curthread;
545
546 crit_enter();
547 KASSERT(td->td_wait == NULL, ("lwkt_schedule_self(): td_wait not NULL!"));
f1d1c3fa 548 _lwkt_enqueue(td);
26a0694b
MD
549 if (td->td_proc && td->td_proc->p_stat == SSLEEP)
550 panic("SCHED SELF PANIC");
f1d1c3fa 551 crit_exit();
8ad65e08 552}
8ad65e08
MD
553
554/*
f1d1c3fa
MD
555 * Generic schedule. Possibly schedule threads belonging to other cpus and
556 * deal with threads that might be blocked on a wait queue.
557 *
558 * This function will queue requests asynchronously when possible, but may
559 * block if no request structures are available. Upon return the caller
560 * should note that the scheduling request may not yet have been processed
561 * by the target cpu.
562 *
563 * YYY this is one of the best places to implement any load balancing code.
564 * Load balancing can be accomplished by requesting other sorts of actions
565 * for the thread in question.
8ad65e08
MD
566 */
567void
568lwkt_schedule(thread_t td)
569{
26a0694b
MD
570 if ((td->td_flags & TDF_PREEMPT_LOCK) == 0 && td->td_proc
571 && td->td_proc->p_stat == SSLEEP
572 ) {
573 printf("PANIC schedule curtd = %p (%d %d) target %p (%d %d)\n",
574 curthread,
575 curthread->td_proc ? curthread->td_proc->p_pid : -1,
576 curthread->td_proc ? curthread->td_proc->p_stat : -1,
577 td,
578 td->td_proc ? curthread->td_proc->p_pid : -1,
579 td->td_proc ? curthread->td_proc->p_stat : -1
580 );
581 panic("SCHED PANIC");
582 }
f1d1c3fa
MD
583 crit_enter();
584 if (td == curthread) {
585 _lwkt_enqueue(td);
586 } else {
587 lwkt_wait_t w;
588
589 /*
590 * If the thread is on a wait list we have to send our scheduling
591 * request to the owner of the wait structure. Otherwise we send
592 * the scheduling request to the cpu owning the thread. Races
593 * are ok, the target will forward the message as necessary (the
594 * message may chase the thread around before it finally gets
595 * acted upon).
596 *
597 * (remember, wait structures use stable storage)
598 */
599 if ((w = td->td_wait) != NULL) {
600 if (lwkt_havetoken(&w->wa_token)) {
601 TAILQ_REMOVE(&w->wa_waitq, td, td_threadq);
602 --w->wa_count;
603 td->td_wait = NULL;
d0e06f83 604 if (td->td_cpu == mycpu->gd_cpuid) {
f1d1c3fa
MD
605 _lwkt_enqueue(td);
606 } else {
607 panic("lwkt_schedule: cpu mismatch1");
8ad65e08 608#if 0
f1d1c3fa
MD
609 lwkt_cpu_msg_union_t msg = lwkt_getcpumsg();
610 initScheduleReqMsg_Wait(&msg.mu_SchedReq, td, w);
611 cpu_sendnormsg(&msg.mu_Msg);
8ad65e08 612#endif
f1d1c3fa
MD
613 }
614 } else {
615 panic("lwkt_schedule: cpu mismatch2");
616#if 0
617 lwkt_cpu_msg_union_t msg = lwkt_getcpumsg();
618 initScheduleReqMsg_Wait(&msg.mu_SchedReq, td, w);
619 cpu_sendnormsg(&msg.mu_Msg);
620#endif
621 }
622 } else {
623 /*
624 * If the wait structure is NULL and we own the thread, there
625 * is no race (since we are in a critical section). If we
626 * do not own the thread there might be a race but the
627 * target cpu will deal with it.
628 */
d0e06f83 629 if (td->td_cpu == mycpu->gd_cpuid) {
f1d1c3fa
MD
630 _lwkt_enqueue(td);
631 } else {
632 panic("lwkt_schedule: cpu mismatch3");
633#if 0
634 lwkt_cpu_msg_union_t msg = lwkt_getcpumsg();
635 initScheduleReqMsg_Thread(&msg.mu_SchedReq, td);
636 cpu_sendnormsg(&msg.mu_Msg);
637#endif
638 }
639 }
8ad65e08 640 }
f1d1c3fa 641 crit_exit();
8ad65e08
MD
642}
643
644/*
f1d1c3fa
MD
645 * Deschedule a thread.
646 *
647 * (non-blocking, self contained on a per cpu basis)
648 */
649void
650lwkt_deschedule_self(void)
651{
652 thread_t td = curthread;
653
654 crit_enter();
655 KASSERT(td->td_wait == NULL, ("lwkt_schedule_self(): td_wait not NULL!"));
f1d1c3fa
MD
656 _lwkt_dequeue(td);
657 crit_exit();
658}
659
660/*
661 * Generic deschedule. Descheduling threads other then your own should be
662 * done only in carefully controlled circumstances. Descheduling is
663 * asynchronous.
664 *
665 * This function may block if the cpu has run out of messages.
8ad65e08
MD
666 */
667void
668lwkt_deschedule(thread_t td)
669{
f1d1c3fa
MD
670 crit_enter();
671 if (td == curthread) {
672 _lwkt_dequeue(td);
673 } else {
d0e06f83 674 if (td->td_cpu == mycpu->gd_cpuid) {
f1d1c3fa
MD
675 _lwkt_dequeue(td);
676 } else {
677 panic("lwkt_deschedule: cpu mismatch");
678#if 0
679 lwkt_cpu_msg_union_t msg = lwkt_getcpumsg();
680 initDescheduleReqMsg_Thread(&msg.mu_DeschedReq, td);
681 cpu_sendnormsg(&msg.mu_Msg);
682#endif
683 }
684 }
685 crit_exit();
686}
687
4b5f931b
MD
688/*
689 * Set the target thread's priority. This routine does not automatically
690 * switch to a higher priority thread, LWKT threads are not designed for
691 * continuous priority changes. Yield if you want to switch.
692 *
693 * We have to retain the critical section count which uses the high bits
26a0694b
MD
694 * of the td_pri field. The specified priority may also indicate zero or
695 * more critical sections by adding TDPRI_CRIT*N.
4b5f931b
MD
696 */
697void
698lwkt_setpri(thread_t td, int pri)
699{
26a0694b
MD
700 KKASSERT(pri >= 0);
701 crit_enter();
702 if (td->td_flags & TDF_RUNQ) {
703 _lwkt_dequeue(td);
704 td->td_pri = (td->td_pri & ~TDPRI_MASK) + pri;
705 _lwkt_enqueue(td);
706 } else {
707 td->td_pri = (td->td_pri & ~TDPRI_MASK) + pri;
708 }
709 crit_exit();
710}
711
712void
713lwkt_setpri_self(int pri)
714{
715 thread_t td = curthread;
716
4b5f931b
MD
717 KKASSERT(pri >= 0 && pri <= TDPRI_MAX);
718 crit_enter();
719 if (td->td_flags & TDF_RUNQ) {
720 _lwkt_dequeue(td);
721 td->td_pri = (td->td_pri & ~TDPRI_MASK) + pri;
722 _lwkt_enqueue(td);
723 } else {
724 td->td_pri = (td->td_pri & ~TDPRI_MASK) + pri;
725 }
726 crit_exit();
727}
728
729struct proc *
730lwkt_preempted_proc(void)
731{
73e4f7b9 732 thread_t td = curthread;
4b5f931b
MD
733 while (td->td_preempted)
734 td = td->td_preempted;
735 return(td->td_proc);
736}
737
738
f1d1c3fa
MD
739/*
740 * This function deschedules the current thread and blocks on the specified
741 * wait queue. We obtain ownership of the wait queue in order to block
742 * on it. A generation number is used to interlock the wait queue in case
743 * it gets signalled while we are blocked waiting on the token.
744 *
745 * Note: alternatively we could dequeue our thread and then message the
746 * target cpu owning the wait queue. YYY implement as sysctl.
747 *
748 * Note: wait queue signals normally ping-pong the cpu as an optimization.
749 */
750void
ae8050a4 751lwkt_block(lwkt_wait_t w, const char *wmesg, int *gen)
f1d1c3fa
MD
752{
753 thread_t td = curthread;
f1d1c3fa 754
f1d1c3fa 755 lwkt_gettoken(&w->wa_token);
ae8050a4 756 if (w->wa_gen == *gen) {
f1d1c3fa
MD
757 _lwkt_dequeue(td);
758 TAILQ_INSERT_TAIL(&w->wa_waitq, td, td_threadq);
759 ++w->wa_count;
760 td->td_wait = w;
ae8050a4 761 td->td_wmesg = wmesg;
f1d1c3fa 762 lwkt_switch();
8ad65e08 763 }
ae8050a4
MD
764 /* token might be lost, doesn't matter for gen update */
765 *gen = w->wa_gen;
f1d1c3fa
MD
766 lwkt_reltoken(&w->wa_token);
767}
768
769/*
770 * Signal a wait queue. We gain ownership of the wait queue in order to
771 * signal it. Once a thread is removed from the wait queue we have to
772 * deal with the cpu owning the thread.
773 *
774 * Note: alternatively we could message the target cpu owning the wait
775 * queue. YYY implement as sysctl.
776 */
777void
778lwkt_signal(lwkt_wait_t w)
779{
780 thread_t td;
781 int count;
782
783 lwkt_gettoken(&w->wa_token);
784 ++w->wa_gen;
785 count = w->wa_count;
786 while ((td = TAILQ_FIRST(&w->wa_waitq)) != NULL && count) {
787 --count;
788 --w->wa_count;
789 TAILQ_REMOVE(&w->wa_waitq, td, td_threadq);
790 td->td_wait = NULL;
ae8050a4 791 td->td_wmesg = NULL;
d0e06f83 792 if (td->td_cpu == mycpu->gd_cpuid) {
f1d1c3fa
MD
793 _lwkt_enqueue(td);
794 } else {
795#if 0
796 lwkt_cpu_msg_union_t msg = lwkt_getcpumsg();
797 initScheduleReqMsg_Thread(&msg.mu_SchedReq, td);
798 cpu_sendnormsg(&msg.mu_Msg);
799#endif
800 panic("lwkt_signal: cpu mismatch");
801 }
802 lwkt_regettoken(&w->wa_token);
803 }
804 lwkt_reltoken(&w->wa_token);
805}
806
807/*
808 * Aquire ownership of a token
809 *
810 * Aquire ownership of a token. The token may have spl and/or critical
811 * section side effects, depending on its purpose. These side effects
812 * guarentee that you will maintain ownership of the token as long as you
813 * do not block. If you block you may lose access to the token (but you
814 * must still release it even if you lose your access to it).
815 *
816 * Note that the spl and critical section characteristics of a token
817 * may not be changed once the token has been initialized.
818 */
8a8d5d85 819int
f1d1c3fa
MD
820lwkt_gettoken(lwkt_token_t tok)
821{
822 /*
823 * Prevent preemption so the token can't be taken away from us once
824 * we gain ownership of it. Use a synchronous request which might
825 * block. The request will be forwarded as necessary playing catchup
826 * to the token.
827 */
828 crit_enter();
829#if 0
d0e06f83 830 while (tok->t_cpu != mycpu->gd_cpuid) {
f1d1c3fa
MD
831 lwkt_cpu_msg_union msg;
832 initTokenReqMsg(&msg.mu_TokenReq);
833 cpu_domsg(&msg);
834 }
835#endif
836 /*
837 * leave us in a critical section on return. This will be undone
8a8d5d85 838 * by lwkt_reltoken(). Bump the generation number.
f1d1c3fa 839 */
8a8d5d85 840 return(++tok->t_gen);
f1d1c3fa
MD
841}
842
843/*
844 * Release your ownership of a token. Releases must occur in reverse
845 * order to aquisitions, eventually so priorities can be unwound properly
846 * like SPLs. At the moment the actual implemention doesn't care.
847 *
848 * We can safely hand a token that we own to another cpu without notifying
849 * it, but once we do we can't get it back without requesting it (unless
850 * the other cpu hands it back to us before we check).
851 *
852 * We might have lost the token, so check that.
853 */
854void
855lwkt_reltoken(lwkt_token_t tok)
856{
d0e06f83 857 if (tok->t_cpu == mycpu->gd_cpuid) {
f1d1c3fa
MD
858 tok->t_cpu = tok->t_reqcpu;
859 }
860 crit_exit();
861}
862
863/*
8a8d5d85
MD
864 * Reacquire a token that might have been lost and compare and update the
865 * generation number. 0 is returned if the generation has not changed
866 * (nobody else obtained the token while we were blocked, on this cpu or
867 * any other cpu).
868 *
869 * This function returns with the token re-held whether the generation
870 * number changed or not.
871 */
872int
873lwkt_gentoken(lwkt_token_t tok, int *gen)
874{
875 if (lwkt_regettoken(tok) == *gen) {
876 return(0);
877 } else {
878 *gen = tok->t_gen;
879 return(-1);
880 }
881}
882
883
884/*
885 * Reacquire a token that might have been lost. Returns the generation
886 * number of the token.
f1d1c3fa
MD
887 */
888int
889lwkt_regettoken(lwkt_token_t tok)
890{
891#if 0
d0e06f83
MD
892 if (tok->t_cpu != mycpu->gd_cpuid) {
893 while (tok->t_cpu != mycpu->gd_cpuid) {
f1d1c3fa
MD
894 lwkt_cpu_msg_union msg;
895 initTokenReqMsg(&msg.mu_TokenReq);
896 cpu_domsg(&msg);
897 }
f1d1c3fa
MD
898 }
899#endif
8a8d5d85 900 return(tok->t_gen);
8ad65e08
MD
901}
902
72740893
MD
903void
904lwkt_inittoken(lwkt_token_t tok)
905{
906 /*
907 * Zero structure and set cpu owner and reqcpu to cpu 0.
908 */
909 bzero(tok, sizeof(*tok));
910}
911
99df837e
MD
912/*
913 * Create a kernel process/thread/whatever. It shares it's address space
914 * with proc0 - ie: kernel only.
915 *
916 * XXX should be renamed to lwkt_create()
8a8d5d85
MD
917 *
918 * The thread will be entered with the MP lock held.
99df837e
MD
919 */
920int
921lwkt_create(void (*func)(void *), void *arg,
73e4f7b9 922 struct thread **tdp, thread_t template, int tdflags,
ef0fdad1 923 const char *fmt, ...)
99df837e 924{
73e4f7b9 925 thread_t td;
99df837e
MD
926 va_list ap;
927
ef0fdad1 928 td = *tdp = lwkt_alloc_thread(template);
99df837e 929 cpu_set_thread_handler(td, kthread_exit, func, arg);
ef0fdad1 930 td->td_flags |= TDF_VERBOSE | tdflags;
8a8d5d85
MD
931#ifdef SMP
932 td->td_mpcount = 1;
933#endif
99df837e
MD
934
935 /*
936 * Set up arg0 for 'ps' etc
937 */
938 va_start(ap, fmt);
939 vsnprintf(td->td_comm, sizeof(td->td_comm), fmt, ap);
940 va_end(ap);
941
942 /*
943 * Schedule the thread to run
944 */
ef0fdad1
MD
945 if ((td->td_flags & TDF_STOPREQ) == 0)
946 lwkt_schedule(td);
947 else
948 td->td_flags &= ~TDF_STOPREQ;
99df837e
MD
949 return 0;
950}
951
952/*
953 * Destroy an LWKT thread. Warning! This function is not called when
954 * a process exits, cpu_proc_exit() directly calls cpu_thread_exit() and
955 * uses a different reaping mechanism.
956 */
957void
958lwkt_exit(void)
959{
960 thread_t td = curthread;
961
962 if (td->td_flags & TDF_VERBOSE)
963 printf("kthread %p %s has exited\n", td, td->td_comm);
964 crit_enter();
965 lwkt_deschedule_self();
966 ++mycpu->gd_tdfreecount;
967 TAILQ_INSERT_TAIL(&mycpu->gd_tdfreeq, td, td_threadq);
968 cpu_thread_exit();
969}
970
971/*
972 * Create a kernel process/thread/whatever. It shares it's address space
ef0fdad1 973 * with proc0 - ie: kernel only. 5.x compatible.
99df837e
MD
974 */
975int
976kthread_create(void (*func)(void *), void *arg,
977 struct thread **tdp, const char *fmt, ...)
978{
73e4f7b9 979 thread_t td;
99df837e
MD
980 va_list ap;
981
ef0fdad1 982 td = *tdp = lwkt_alloc_thread(NULL);
99df837e
MD
983 cpu_set_thread_handler(td, kthread_exit, func, arg);
984 td->td_flags |= TDF_VERBOSE;
8a8d5d85
MD
985#ifdef SMP
986 td->td_mpcount = 1;
987#endif
99df837e
MD
988
989 /*
990 * Set up arg0 for 'ps' etc
991 */
992 va_start(ap, fmt);
993 vsnprintf(td->td_comm, sizeof(td->td_comm), fmt, ap);
994 va_end(ap);
995
996 /*
997 * Schedule the thread to run
998 */
999 lwkt_schedule(td);
1000 return 0;
1001}
1002
26a0694b
MD
1003void
1004crit_panic(void)
1005{
73e4f7b9 1006 thread_t td = curthread;
26a0694b
MD
1007 int lpri = td->td_pri;
1008
1009 td->td_pri = 0;
1010 panic("td_pri is/would-go negative! %p %d", td, lpri);
1011}
1012
99df837e
MD
1013/*
1014 * Destroy an LWKT thread. Warning! This function is not called when
1015 * a process exits, cpu_proc_exit() directly calls cpu_thread_exit() and
1016 * uses a different reaping mechanism.
1017 *
1018 * XXX duplicates lwkt_exit()
1019 */
1020void
1021kthread_exit(void)
1022{
1023 lwkt_exit();
1024}
1025