Remove thread->td_cpu. thread->td_gd (which points to the globaldata
[dragonfly.git] / sys / kern / lwkt_thread.c
CommitLineData
8ad65e08
MD
1/*
2 * Copyright (c) 2003 Matthew Dillon <dillon@backplane.com>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
f1d1c3fa
MD
26 * Each cpu in a system has its own self-contained light weight kernel
27 * thread scheduler, which means that generally speaking we only need
96728c05
MD
28 * to use a critical section to avoid problems. Foreign thread
29 * scheduling is queued via (async) IPIs.
f1d1c3fa 30 *
a72187e9 31 * $DragonFly: src/sys/kern/lwkt_thread.c,v 1.27 2003/07/25 05:26:50 dillon Exp $
8ad65e08
MD
32 */
33
34#include <sys/param.h>
35#include <sys/systm.h>
36#include <sys/kernel.h>
37#include <sys/proc.h>
38#include <sys/rtprio.h>
39#include <sys/queue.h>
f1d1c3fa 40#include <sys/thread2.h>
7d0bac62 41#include <sys/sysctl.h>
99df837e 42#include <sys/kthread.h>
f1d1c3fa 43#include <machine/cpu.h>
99df837e 44#include <sys/lock.h>
f1d1c3fa 45
7d0bac62
MD
46#include <vm/vm.h>
47#include <vm/vm_param.h>
48#include <vm/vm_kern.h>
49#include <vm/vm_object.h>
50#include <vm/vm_page.h>
51#include <vm/vm_map.h>
52#include <vm/vm_pager.h>
53#include <vm/vm_extern.h>
54#include <vm/vm_zone.h>
55
99df837e 56#include <machine/stdarg.h>
57c254db 57#include <machine/ipl.h>
96728c05
MD
58#ifdef SMP
59#include <machine/smp.h>
60#endif
99df837e 61
7d0bac62 62static int untimely_switch = 0;
4b5f931b 63SYSCTL_INT(_lwkt, OID_AUTO, untimely_switch, CTLFLAG_RW, &untimely_switch, 0, "");
57c254db
MD
64#ifdef INVARIANTS
65static int token_debug = 0;
66SYSCTL_INT(_lwkt, OID_AUTO, token_debug, CTLFLAG_RW, &token_debug, 0, "");
67#endif
4b5f931b
MD
68static quad_t switch_count = 0;
69SYSCTL_QUAD(_lwkt, OID_AUTO, switch_count, CTLFLAG_RW, &switch_count, 0, "");
70static quad_t preempt_hit = 0;
71SYSCTL_QUAD(_lwkt, OID_AUTO, preempt_hit, CTLFLAG_RW, &preempt_hit, 0, "");
72static quad_t preempt_miss = 0;
73SYSCTL_QUAD(_lwkt, OID_AUTO, preempt_miss, CTLFLAG_RW, &preempt_miss, 0, "");
26a0694b
MD
74static quad_t preempt_weird = 0;
75SYSCTL_QUAD(_lwkt, OID_AUTO, preempt_weird, CTLFLAG_RW, &preempt_weird, 0, "");
96728c05
MD
76static quad_t ipiq_count = 0;
77SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_count, CTLFLAG_RW, &ipiq_count, 0, "");
78static quad_t ipiq_fifofull = 0;
79SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_fifofull, CTLFLAG_RW, &ipiq_fifofull, 0, "");
7d0bac62 80
4b5f931b
MD
81/*
82 * These helper procedures handle the runq, they can only be called from
83 * within a critical section.
84 */
f1d1c3fa
MD
85static __inline
86void
87_lwkt_dequeue(thread_t td)
88{
89 if (td->td_flags & TDF_RUNQ) {
4b5f931b
MD
90 int nq = td->td_pri & TDPRI_MASK;
91 struct globaldata *gd = mycpu;
92
f1d1c3fa 93 td->td_flags &= ~TDF_RUNQ;
4b5f931b
MD
94 TAILQ_REMOVE(&gd->gd_tdrunq[nq], td, td_threadq);
95 /* runqmask is passively cleaned up by the switcher */
f1d1c3fa
MD
96 }
97}
98
99static __inline
100void
101_lwkt_enqueue(thread_t td)
102{
103 if ((td->td_flags & TDF_RUNQ) == 0) {
4b5f931b
MD
104 int nq = td->td_pri & TDPRI_MASK;
105 struct globaldata *gd = mycpu;
106
f1d1c3fa 107 td->td_flags |= TDF_RUNQ;
4b5f931b
MD
108 TAILQ_INSERT_TAIL(&gd->gd_tdrunq[nq], td, td_threadq);
109 gd->gd_runqmask |= 1 << nq;
f1d1c3fa
MD
110 }
111}
8ad65e08 112
57c254db
MD
113static __inline
114int
115_lwkt_wantresched(thread_t ntd, thread_t cur)
116{
117 return((ntd->td_pri & TDPRI_MASK) > (cur->td_pri & TDPRI_MASK));
118}
119
8ad65e08
MD
120/*
121 * LWKTs operate on a per-cpu basis
122 *
73e4f7b9 123 * WARNING! Called from early boot, 'mycpu' may not work yet.
8ad65e08
MD
124 */
125void
126lwkt_gdinit(struct globaldata *gd)
127{
4b5f931b
MD
128 int i;
129
130 for (i = 0; i < sizeof(gd->gd_tdrunq)/sizeof(gd->gd_tdrunq[0]); ++i)
131 TAILQ_INIT(&gd->gd_tdrunq[i]);
132 gd->gd_runqmask = 0;
73e4f7b9 133 TAILQ_INIT(&gd->gd_tdallq);
8ad65e08
MD
134}
135
7d0bac62
MD
136/*
137 * Initialize a thread wait structure prior to first use.
138 *
139 * NOTE! called from low level boot code, we cannot do anything fancy!
140 */
141void
142lwkt_init_wait(lwkt_wait_t w)
143{
144 TAILQ_INIT(&w->wa_waitq);
145}
146
147/*
148 * Create a new thread. The thread must be associated with a process context
149 * or LWKT start address before it can be scheduled.
0cfcada1
MD
150 *
151 * If you intend to create a thread without a process context this function
152 * does everything except load the startup and switcher function.
7d0bac62
MD
153 */
154thread_t
ef0fdad1 155lwkt_alloc_thread(struct thread *td)
7d0bac62 156{
99df837e 157 void *stack;
ef0fdad1 158 int flags = 0;
7d0bac62 159
ef0fdad1 160 if (td == NULL) {
26a0694b 161 crit_enter();
ef0fdad1
MD
162 if (mycpu->gd_tdfreecount > 0) {
163 --mycpu->gd_tdfreecount;
164 td = TAILQ_FIRST(&mycpu->gd_tdfreeq);
d9eea1a5 165 KASSERT(td != NULL && (td->td_flags & TDF_RUNNING) == 0,
ef0fdad1
MD
166 ("lwkt_alloc_thread: unexpected NULL or corrupted td"));
167 TAILQ_REMOVE(&mycpu->gd_tdfreeq, td, td_threadq);
168 crit_exit();
169 stack = td->td_kstack;
170 flags = td->td_flags & (TDF_ALLOCATED_STACK|TDF_ALLOCATED_THREAD);
171 } else {
172 crit_exit();
173 td = zalloc(thread_zone);
174 td->td_kstack = NULL;
175 flags |= TDF_ALLOCATED_THREAD;
176 }
177 }
178 if ((stack = td->td_kstack) == NULL) {
99df837e 179 stack = (void *)kmem_alloc(kernel_map, UPAGES * PAGE_SIZE);
ef0fdad1 180 flags |= TDF_ALLOCATED_STACK;
99df837e 181 }
26a0694b 182 lwkt_init_thread(td, stack, flags, mycpu);
99df837e 183 return(td);
7d0bac62
MD
184}
185
186/*
187 * Initialize a preexisting thread structure. This function is used by
188 * lwkt_alloc_thread() and also used to initialize the per-cpu idlethread.
189 *
190 * NOTE! called from low level boot code, we cannot do anything fancy!
a72187e9 191 * Only the low level boot code will call this function with gd != mycpu.
7d0bac62
MD
192 */
193void
26a0694b 194lwkt_init_thread(thread_t td, void *stack, int flags, struct globaldata *gd)
7d0bac62 195{
99df837e
MD
196 bzero(td, sizeof(struct thread));
197 td->td_kstack = stack;
198 td->td_flags |= flags;
26a0694b
MD
199 td->td_gd = gd;
200 td->td_pri = TDPRI_CRIT;
ece04fd0 201 lwkt_init_port(&td->td_msgport, td);
99df837e 202 pmap_init_thread(td);
73e4f7b9 203 crit_enter();
a72187e9 204 TAILQ_INSERT_TAIL(&gd->gd_tdallq, td, td_allq);
73e4f7b9
MD
205 crit_exit();
206}
207
208void
209lwkt_set_comm(thread_t td, const char *ctl, ...)
210{
211 va_list va;
212
213 va_start(va, ctl);
214 vsnprintf(td->td_comm, sizeof(td->td_comm), ctl, va);
215 va_end(va);
7d0bac62
MD
216}
217
99df837e 218void
73e4f7b9 219lwkt_hold(thread_t td)
99df837e 220{
73e4f7b9
MD
221 ++td->td_refs;
222}
223
224void
225lwkt_rele(thread_t td)
226{
227 KKASSERT(td->td_refs > 0);
228 --td->td_refs;
229}
230
231void
232lwkt_wait_free(thread_t td)
233{
234 while (td->td_refs)
377d4740 235 tsleep(td, 0, "tdreap", hz);
73e4f7b9
MD
236}
237
238void
239lwkt_free_thread(thread_t td)
240{
241 struct globaldata *gd = mycpu;
242
d9eea1a5 243 KASSERT((td->td_flags & TDF_RUNNING) == 0,
99df837e
MD
244 ("lwkt_free_thread: did not exit! %p", td));
245
246 crit_enter();
73e4f7b9
MD
247 TAILQ_REMOVE(&gd->gd_tdallq, td, td_allq);
248 if (gd->gd_tdfreecount < CACHE_NTHREADS &&
99df837e
MD
249 (td->td_flags & TDF_ALLOCATED_THREAD)
250 ) {
73e4f7b9
MD
251 ++gd->gd_tdfreecount;
252 TAILQ_INSERT_HEAD(&gd->gd_tdfreeq, td, td_threadq);
99df837e
MD
253 crit_exit();
254 } else {
255 crit_exit();
256 if (td->td_kstack && (td->td_flags & TDF_ALLOCATED_STACK)) {
257 kmem_free(kernel_map,
258 (vm_offset_t)td->td_kstack, UPAGES * PAGE_SIZE);
73e4f7b9 259 /* gd invalid */
99df837e
MD
260 td->td_kstack = NULL;
261 }
262 if (td->td_flags & TDF_ALLOCATED_THREAD)
263 zfree(thread_zone, td);
264 }
265}
266
267
8ad65e08
MD
268/*
269 * Switch to the next runnable lwkt. If no LWKTs are runnable then
f1d1c3fa
MD
270 * switch to the idlethread. Switching must occur within a critical
271 * section to avoid races with the scheduling queue.
272 *
273 * We always have full control over our cpu's run queue. Other cpus
274 * that wish to manipulate our queue must use the cpu_*msg() calls to
275 * talk to our cpu, so a critical section is all that is needed and
276 * the result is very, very fast thread switching.
277 *
96728c05
MD
278 * The LWKT scheduler uses a fixed priority model and round-robins at
279 * each priority level. User process scheduling is a totally
280 * different beast and LWKT priorities should not be confused with
281 * user process priorities.
f1d1c3fa 282 *
96728c05
MD
283 * The MP lock may be out of sync with the thread's td_mpcount. lwkt_switch()
284 * cleans it up. Note that the td_switch() function cannot do anything that
285 * requires the MP lock since the MP lock will have already been setup for
286 * the target thread (not the current thread).
8ad65e08 287 */
96728c05 288
8ad65e08
MD
289void
290lwkt_switch(void)
291{
4b5f931b 292 struct globaldata *gd;
f1d1c3fa 293 thread_t td = curthread;
8ad65e08 294 thread_t ntd;
8a8d5d85
MD
295#ifdef SMP
296 int mpheld;
297#endif
8ad65e08 298
96728c05
MD
299 if (mycpu->gd_intr_nesting_level &&
300 td->td_preempted == NULL && panicstr == NULL
301 ) {
26a0694b 302 panic("lwkt_switch: cannot switch from within an interrupt, yet\n");
96728c05 303 }
ef0fdad1 304
cb973d15
MD
305 /*
306 * Passive release (used to transition from user to kernel mode
307 * when we block or switch rather then when we enter the kernel).
308 * This function is NOT called if we are switching into a preemption
309 * or returning from a preemption. Typically this causes us to lose
310 * our P_CURPROC designation (if we have one) and become a true LWKT
311 * thread, and may also hand P_CURPROC to another process and schedule
312 * its thread.
313 */
314 if (td->td_release)
315 td->td_release(td);
316
f1d1c3fa 317 crit_enter();
4b5f931b 318 ++switch_count;
8a8d5d85
MD
319
320#ifdef SMP
321 /*
322 * td_mpcount cannot be used to determine if we currently hold the
323 * MP lock because get_mplock() will increment it prior to attempting
324 * to get the lock, and switch out if it can't. Look at the actual lock.
325 */
326 mpheld = MP_LOCK_HELD();
327#endif
99df837e
MD
328 if ((ntd = td->td_preempted) != NULL) {
329 /*
330 * We had preempted another thread on this cpu, resume the preempted
26a0694b
MD
331 * thread. This occurs transparently, whether the preempted thread
332 * was scheduled or not (it may have been preempted after descheduling
8a8d5d85
MD
333 * itself).
334 *
335 * We have to setup the MP lock for the original thread after backing
336 * out the adjustment that was made to curthread when the original
337 * was preempted.
99df837e 338 */
26a0694b 339 KKASSERT(ntd->td_flags & TDF_PREEMPT_LOCK);
8a8d5d85 340#ifdef SMP
96728c05
MD
341 if (ntd->td_mpcount && mpheld == 0) {
342 panic("MPLOCK NOT HELD ON RETURN: %p %p %d %d\n",
343 td, ntd, td->td_mpcount, ntd->td_mpcount);
344 }
8a8d5d85
MD
345 if (ntd->td_mpcount) {
346 td->td_mpcount -= ntd->td_mpcount;
347 KKASSERT(td->td_mpcount >= 0);
348 }
349#endif
26a0694b 350 ntd->td_flags |= TDF_PREEMPT_DONE;
8a8d5d85 351 /* YYY release mp lock on switchback if original doesn't need it */
8ad65e08 352 } else {
4b5f931b
MD
353 /*
354 * Priority queue / round-robin at each priority. Note that user
355 * processes run at a fixed, low priority and the user process
356 * scheduler deals with interactions between user processes
357 * by scheduling and descheduling them from the LWKT queue as
358 * necessary.
8a8d5d85
MD
359 *
360 * We have to adjust the MP lock for the target thread. If we
361 * need the MP lock and cannot obtain it we try to locate a
362 * thread that does not need the MP lock.
4b5f931b
MD
363 */
364 gd = mycpu;
4b5f931b
MD
365again:
366 if (gd->gd_runqmask) {
367 int nq = bsrl(gd->gd_runqmask);
368 if ((ntd = TAILQ_FIRST(&gd->gd_tdrunq[nq])) == NULL) {
369 gd->gd_runqmask &= ~(1 << nq);
370 goto again;
371 }
8a8d5d85
MD
372#ifdef SMP
373 if (ntd->td_mpcount && mpheld == 0 && !cpu_try_mplock()) {
374 /*
96728c05
MD
375 * Target needs MP lock and we couldn't get it, try
376 * to locate a thread which does not need the MP lock
3c23a41a 377 * to run. If we cannot locate a thread spin in idle.
8a8d5d85
MD
378 */
379 u_int32_t rqmask = gd->gd_runqmask;
380 while (rqmask) {
381 TAILQ_FOREACH(ntd, &gd->gd_tdrunq[nq], td_threadq) {
382 if (ntd->td_mpcount == 0)
383 break;
384 }
385 if (ntd)
386 break;
387 rqmask &= ~(1 << nq);
388 nq = bsrl(rqmask);
389 }
390 if (ntd == NULL) {
a2a5ad0d
MD
391 ntd = &gd->gd_idlethread;
392 ntd->td_flags |= TDF_IDLE_NOHLT;
8a8d5d85
MD
393 } else {
394 TAILQ_REMOVE(&gd->gd_tdrunq[nq], ntd, td_threadq);
395 TAILQ_INSERT_TAIL(&gd->gd_tdrunq[nq], ntd, td_threadq);
396 }
397 } else {
398 TAILQ_REMOVE(&gd->gd_tdrunq[nq], ntd, td_threadq);
399 TAILQ_INSERT_TAIL(&gd->gd_tdrunq[nq], ntd, td_threadq);
400 }
401#else
4b5f931b
MD
402 TAILQ_REMOVE(&gd->gd_tdrunq[nq], ntd, td_threadq);
403 TAILQ_INSERT_TAIL(&gd->gd_tdrunq[nq], ntd, td_threadq);
8a8d5d85 404#endif
4b5f931b 405 } else {
3c23a41a
MD
406 /*
407 * Nothing to run but we may still need the BGL to deal with
408 * pending interrupts, spin in idle if so.
409 */
a2a5ad0d 410 ntd = &gd->gd_idlethread;
235957ed 411 if (gd->gd_reqflags)
3c23a41a 412 ntd->td_flags |= TDF_IDLE_NOHLT;
4b5f931b 413 }
f1d1c3fa 414 }
26a0694b
MD
415 KASSERT(ntd->td_pri >= TDPRI_CRIT,
416 ("priority problem in lwkt_switch %d %d", td->td_pri, ntd->td_pri));
8a8d5d85
MD
417
418 /*
419 * Do the actual switch. If the new target does not need the MP lock
420 * and we are holding it, release the MP lock. If the new target requires
421 * the MP lock we have already acquired it for the target.
422 */
423#ifdef SMP
424 if (ntd->td_mpcount == 0 ) {
425 if (MP_LOCK_HELD())
426 cpu_rel_mplock();
427 } else {
428 ASSERT_MP_LOCK_HELD();
429 }
430#endif
8a8d5d85 431 if (td != ntd) {
f1d1c3fa 432 td->td_switch(ntd);
8a8d5d85 433 }
96728c05 434
f1d1c3fa 435 crit_exit();
8ad65e08
MD
436}
437
cb973d15
MD
438/*
439 * Switch if another thread has a higher priority. Do not switch to other
440 * threads at the same priority.
441 */
442void
443lwkt_maybe_switch()
444{
445 struct globaldata *gd = mycpu;
446 struct thread *td = gd->gd_curthread;
447
448 if ((td->td_pri & TDPRI_MASK) < bsrl(gd->gd_runqmask)) {
449 lwkt_switch();
450 }
451}
452
b68b7282 453/*
96728c05
MD
454 * Request that the target thread preempt the current thread. Preemption
455 * only works under a specific set of conditions:
b68b7282 456 *
96728c05
MD
457 * - We are not preempting ourselves
458 * - The target thread is owned by the current cpu
459 * - We are not currently being preempted
460 * - The target is not currently being preempted
461 * - We are able to satisfy the target's MP lock requirements (if any).
462 *
463 * THE CALLER OF LWKT_PREEMPT() MUST BE IN A CRITICAL SECTION. Typically
464 * this is called via lwkt_schedule() through the td_preemptable callback.
465 * critpri is the managed critical priority that we should ignore in order
466 * to determine whether preemption is possible (aka usually just the crit
467 * priority of lwkt_schedule() itself).
b68b7282 468 *
26a0694b
MD
469 * XXX at the moment we run the target thread in a critical section during
470 * the preemption in order to prevent the target from taking interrupts
471 * that *WE* can't. Preemption is strictly limited to interrupt threads
472 * and interrupt-like threads, outside of a critical section, and the
473 * preempted source thread will be resumed the instant the target blocks
474 * whether or not the source is scheduled (i.e. preemption is supposed to
475 * be as transparent as possible).
4b5f931b 476 *
8a8d5d85
MD
477 * The target thread inherits our MP count (added to its own) for the
478 * duration of the preemption in order to preserve the atomicy of the
96728c05
MD
479 * MP lock during the preemption. Therefore, any preempting targets must be
480 * careful in regards to MP assertions. Note that the MP count may be
481 * out of sync with the physical mp_lock. If we preempt we have to preserve
482 * the expected situation.
b68b7282
MD
483 */
484void
96728c05 485lwkt_preempt(thread_t ntd, int critpri)
b68b7282 486{
73e4f7b9 487 thread_t td = curthread;
8a8d5d85
MD
488#ifdef SMP
489 int mpheld;
57c254db 490 int savecnt;
8a8d5d85 491#endif
b68b7282 492
26a0694b 493 /*
96728c05
MD
494 * The caller has put us in a critical section. We can only preempt
495 * if the caller of the caller was not in a critical section (basically
57c254db
MD
496 * a local interrupt), as determined by the 'critpri' parameter. If
497 * we are unable to preempt
96728c05
MD
498 *
499 * YYY The target thread must be in a critical section (else it must
500 * inherit our critical section? I dunno yet).
26a0694b
MD
501 */
502 KASSERT(ntd->td_pri >= TDPRI_CRIT, ("BADCRIT0 %d", ntd->td_pri));
26a0694b 503
cb973d15 504 need_resched();
57c254db
MD
505 if (!_lwkt_wantresched(ntd, td)) {
506 ++preempt_miss;
507 return;
508 }
96728c05
MD
509 if ((td->td_pri & ~TDPRI_MASK) > critpri) {
510 ++preempt_miss;
511 return;
512 }
513#ifdef SMP
a72187e9 514 if (ntd->td_gd != mycpu) {
96728c05
MD
515 ++preempt_miss;
516 return;
517 }
518#endif
26a0694b
MD
519 if (td == ntd || ((td->td_flags | ntd->td_flags) & TDF_PREEMPT_LOCK)) {
520 ++preempt_weird;
521 return;
522 }
523 if (ntd->td_preempted) {
4b5f931b 524 ++preempt_hit;
26a0694b 525 return;
b68b7282 526 }
8a8d5d85 527#ifdef SMP
a2a5ad0d
MD
528 /*
529 * note: an interrupt might have occured just as we were transitioning
a5934754
MD
530 * to the MP lock. In this case td_mpcount will be pre-disposed but
531 * not actually synchronized with the actual state of the lock. We
532 * can use it to imply an MP lock requirement for the preemption but
533 * we cannot use it to test whether we hold the MP lock or not.
a2a5ad0d 534 */
a5934754 535 mpheld = MP_LOCK_HELD();
96728c05
MD
536 if (mpheld && td->td_mpcount == 0)
537 panic("lwkt_preempt(): held and no count");
538 savecnt = td->td_mpcount;
8a8d5d85
MD
539 ntd->td_mpcount += td->td_mpcount;
540 if (mpheld == 0 && ntd->td_mpcount && !cpu_try_mplock()) {
541 ntd->td_mpcount -= td->td_mpcount;
542 ++preempt_miss;
543 return;
544 }
545#endif
26a0694b
MD
546
547 ++preempt_hit;
548 ntd->td_preempted = td;
549 td->td_flags |= TDF_PREEMPT_LOCK;
550 td->td_switch(ntd);
551 KKASSERT(ntd->td_preempted && (td->td_flags & TDF_PREEMPT_DONE));
96728c05
MD
552#ifdef SMP
553 KKASSERT(savecnt == td->td_mpcount);
554 if (mpheld == 0 && MP_LOCK_HELD())
555 cpu_rel_mplock();
556 else if (mpheld && !MP_LOCK_HELD())
557 panic("lwkt_preempt(): MP lock was not held through");
558#endif
26a0694b
MD
559 ntd->td_preempted = NULL;
560 td->td_flags &= ~(TDF_PREEMPT_LOCK|TDF_PREEMPT_DONE);
b68b7282
MD
561}
562
f1d1c3fa
MD
563/*
564 * Yield our thread while higher priority threads are pending. This is
565 * typically called when we leave a critical section but it can be safely
566 * called while we are in a critical section.
567 *
568 * This function will not generally yield to equal priority threads but it
569 * can occur as a side effect. Note that lwkt_switch() is called from
570 * inside the critical section to pervent its own crit_exit() from reentering
571 * lwkt_yield_quick().
572 *
235957ed 573 * gd_reqflags indicates that *something* changed, e.g. an interrupt or softint
ef0fdad1
MD
574 * came along but was blocked and made pending.
575 *
f1d1c3fa
MD
576 * (self contained on a per cpu basis)
577 */
578void
579lwkt_yield_quick(void)
580{
581 thread_t td = curthread;
ef0fdad1 582
a2a5ad0d 583 /*
235957ed 584 * gd_reqflags is cleared in splz if the cpl is 0. If we were to clear
a2a5ad0d
MD
585 * it with a non-zero cpl then we might not wind up calling splz after
586 * a task switch when the critical section is exited even though the
587 * new task could accept the interrupt. YYY alternative is to have
588 * lwkt_switch() just call splz unconditionally.
589 *
590 * XXX from crit_exit() only called after last crit section is released.
591 * If called directly will run splz() even if in a critical section.
592 */
235957ed 593 if (mycpu->gd_reqflags)
f1d1c3fa 594 splz();
f1d1c3fa
MD
595
596 /*
597 * YYY enabling will cause wakeup() to task-switch, which really
598 * confused the old 4.x code. This is a good way to simulate
7d0bac62
MD
599 * preemption and MP without actually doing preemption or MP, because a
600 * lot of code assumes that wakeup() does not block.
f1d1c3fa 601 */
ef0fdad1 602 if (untimely_switch && mycpu->gd_intr_nesting_level == 0) {
f1d1c3fa
MD
603 crit_enter();
604 /*
605 * YYY temporary hacks until we disassociate the userland scheduler
606 * from the LWKT scheduler.
607 */
608 if (td->td_flags & TDF_RUNQ) {
609 lwkt_switch(); /* will not reenter yield function */
610 } else {
611 lwkt_schedule_self(); /* make sure we are scheduled */
612 lwkt_switch(); /* will not reenter yield function */
613 lwkt_deschedule_self(); /* make sure we are descheduled */
614 }
615 crit_exit_noyield();
616 }
f1d1c3fa
MD
617}
618
8ad65e08 619/*
f1d1c3fa 620 * This implements a normal yield which, unlike _quick, will yield to equal
235957ed 621 * priority threads as well. Note that gd_reqflags tests will be handled by
f1d1c3fa
MD
622 * the crit_exit() call in lwkt_switch().
623 *
624 * (self contained on a per cpu basis)
8ad65e08
MD
625 */
626void
f1d1c3fa 627lwkt_yield(void)
8ad65e08 628{
f1d1c3fa
MD
629 lwkt_schedule_self();
630 lwkt_switch();
631}
632
633/*
634 * Schedule a thread to run. As the current thread we can always safely
635 * schedule ourselves, and a shortcut procedure is provided for that
636 * function.
637 *
638 * (non-blocking, self contained on a per cpu basis)
639 */
640void
641lwkt_schedule_self(void)
642{
643 thread_t td = curthread;
644
645 crit_enter();
646 KASSERT(td->td_wait == NULL, ("lwkt_schedule_self(): td_wait not NULL!"));
f1d1c3fa 647 _lwkt_enqueue(td);
26a0694b
MD
648 if (td->td_proc && td->td_proc->p_stat == SSLEEP)
649 panic("SCHED SELF PANIC");
f1d1c3fa 650 crit_exit();
8ad65e08 651}
8ad65e08
MD
652
653/*
f1d1c3fa
MD
654 * Generic schedule. Possibly schedule threads belonging to other cpus and
655 * deal with threads that might be blocked on a wait queue.
656 *
96728c05 657 * YYY this is one of the best places to implement load balancing code.
f1d1c3fa
MD
658 * Load balancing can be accomplished by requesting other sorts of actions
659 * for the thread in question.
8ad65e08
MD
660 */
661void
662lwkt_schedule(thread_t td)
663{
96728c05 664#ifdef INVARIANTS
26a0694b
MD
665 if ((td->td_flags & TDF_PREEMPT_LOCK) == 0 && td->td_proc
666 && td->td_proc->p_stat == SSLEEP
667 ) {
668 printf("PANIC schedule curtd = %p (%d %d) target %p (%d %d)\n",
669 curthread,
670 curthread->td_proc ? curthread->td_proc->p_pid : -1,
671 curthread->td_proc ? curthread->td_proc->p_stat : -1,
672 td,
673 td->td_proc ? curthread->td_proc->p_pid : -1,
674 td->td_proc ? curthread->td_proc->p_stat : -1
675 );
676 panic("SCHED PANIC");
677 }
96728c05 678#endif
f1d1c3fa
MD
679 crit_enter();
680 if (td == curthread) {
681 _lwkt_enqueue(td);
682 } else {
683 lwkt_wait_t w;
684
685 /*
686 * If the thread is on a wait list we have to send our scheduling
687 * request to the owner of the wait structure. Otherwise we send
688 * the scheduling request to the cpu owning the thread. Races
689 * are ok, the target will forward the message as necessary (the
690 * message may chase the thread around before it finally gets
691 * acted upon).
692 *
693 * (remember, wait structures use stable storage)
694 */
695 if ((w = td->td_wait) != NULL) {
96728c05 696 if (lwkt_trytoken(&w->wa_token)) {
f1d1c3fa
MD
697 TAILQ_REMOVE(&w->wa_waitq, td, td_threadq);
698 --w->wa_count;
699 td->td_wait = NULL;
a72187e9 700 if (td->td_gd == mycpu) {
f1d1c3fa 701 _lwkt_enqueue(td);
57c254db 702 if (td->td_preemptable) {
96728c05 703 td->td_preemptable(td, TDPRI_CRIT*2); /* YYY +token */
57c254db
MD
704 } else if (_lwkt_wantresched(td, curthread)) {
705 need_resched();
706 }
f1d1c3fa 707 } else {
a72187e9 708 lwkt_send_ipiq(td->td_gd->gd_cpuid, (ipifunc_t)lwkt_schedule, td);
f1d1c3fa 709 }
96728c05 710 lwkt_reltoken(&w->wa_token);
f1d1c3fa 711 } else {
96728c05 712 lwkt_send_ipiq(w->wa_token.t_cpu, (ipifunc_t)lwkt_schedule, td);
f1d1c3fa
MD
713 }
714 } else {
715 /*
716 * If the wait structure is NULL and we own the thread, there
717 * is no race (since we are in a critical section). If we
718 * do not own the thread there might be a race but the
719 * target cpu will deal with it.
720 */
a72187e9 721 if (td->td_gd == mycpu) {
f1d1c3fa 722 _lwkt_enqueue(td);
57c254db 723 if (td->td_preemptable) {
96728c05 724 td->td_preemptable(td, TDPRI_CRIT);
57c254db
MD
725 } else if (_lwkt_wantresched(td, curthread)) {
726 need_resched();
727 }
f1d1c3fa 728 } else {
a72187e9 729 lwkt_send_ipiq(td->td_gd->gd_cpuid, (ipifunc_t)lwkt_schedule, td);
f1d1c3fa
MD
730 }
731 }
8ad65e08 732 }
f1d1c3fa 733 crit_exit();
8ad65e08
MD
734}
735
d9eea1a5
MD
736/*
737 * Managed acquisition. This code assumes that the MP lock is held for
738 * the tdallq operation and that the thread has been descheduled from its
739 * original cpu. We also have to wait for the thread to be entirely switched
740 * out on its original cpu (this is usually fast enough that we never loop)
741 * since the LWKT system does not have to hold the MP lock while switching
742 * and the target may have released it before switching.
743 */
a2a5ad0d
MD
744void
745lwkt_acquire(thread_t td)
746{
747 struct globaldata *gd;
748
749 gd = td->td_gd;
750 KKASSERT((td->td_flags & TDF_RUNQ) == 0);
d9eea1a5
MD
751 while (td->td_flags & TDF_RUNNING) /* XXX spin */
752 ;
a2a5ad0d
MD
753 if (gd != mycpu) {
754 crit_enter();
755 TAILQ_REMOVE(&gd->gd_tdallq, td, td_allq); /* protected by BGL */
756 gd = mycpu;
757 td->td_gd = gd;
a2a5ad0d
MD
758 TAILQ_INSERT_TAIL(&gd->gd_tdallq, td, td_allq); /* protected by BGL */
759 crit_exit();
760 }
761}
762
8ad65e08 763/*
f1d1c3fa
MD
764 * Deschedule a thread.
765 *
766 * (non-blocking, self contained on a per cpu basis)
767 */
768void
769lwkt_deschedule_self(void)
770{
771 thread_t td = curthread;
772
773 crit_enter();
774 KASSERT(td->td_wait == NULL, ("lwkt_schedule_self(): td_wait not NULL!"));
f1d1c3fa
MD
775 _lwkt_dequeue(td);
776 crit_exit();
777}
778
779/*
780 * Generic deschedule. Descheduling threads other then your own should be
781 * done only in carefully controlled circumstances. Descheduling is
782 * asynchronous.
783 *
784 * This function may block if the cpu has run out of messages.
8ad65e08
MD
785 */
786void
787lwkt_deschedule(thread_t td)
788{
f1d1c3fa
MD
789 crit_enter();
790 if (td == curthread) {
791 _lwkt_dequeue(td);
792 } else {
a72187e9 793 if (td->td_gd == mycpu) {
f1d1c3fa
MD
794 _lwkt_dequeue(td);
795 } else {
a72187e9 796 lwkt_send_ipiq(td->td_gd->gd_cpuid, (ipifunc_t)lwkt_deschedule, td);
f1d1c3fa
MD
797 }
798 }
799 crit_exit();
800}
801
4b5f931b
MD
802/*
803 * Set the target thread's priority. This routine does not automatically
804 * switch to a higher priority thread, LWKT threads are not designed for
805 * continuous priority changes. Yield if you want to switch.
806 *
807 * We have to retain the critical section count which uses the high bits
26a0694b
MD
808 * of the td_pri field. The specified priority may also indicate zero or
809 * more critical sections by adding TDPRI_CRIT*N.
4b5f931b
MD
810 */
811void
812lwkt_setpri(thread_t td, int pri)
813{
26a0694b 814 KKASSERT(pri >= 0);
a72187e9 815 KKASSERT(td->td_gd == mycpu);
26a0694b
MD
816 crit_enter();
817 if (td->td_flags & TDF_RUNQ) {
818 _lwkt_dequeue(td);
819 td->td_pri = (td->td_pri & ~TDPRI_MASK) + pri;
820 _lwkt_enqueue(td);
821 } else {
822 td->td_pri = (td->td_pri & ~TDPRI_MASK) + pri;
823 }
824 crit_exit();
825}
826
827void
828lwkt_setpri_self(int pri)
829{
830 thread_t td = curthread;
831
4b5f931b
MD
832 KKASSERT(pri >= 0 && pri <= TDPRI_MAX);
833 crit_enter();
834 if (td->td_flags & TDF_RUNQ) {
835 _lwkt_dequeue(td);
836 td->td_pri = (td->td_pri & ~TDPRI_MASK) + pri;
837 _lwkt_enqueue(td);
838 } else {
839 td->td_pri = (td->td_pri & ~TDPRI_MASK) + pri;
840 }
841 crit_exit();
842}
843
844struct proc *
845lwkt_preempted_proc(void)
846{
73e4f7b9 847 thread_t td = curthread;
4b5f931b
MD
848 while (td->td_preempted)
849 td = td->td_preempted;
850 return(td->td_proc);
851}
852
ece04fd0
MD
853typedef struct lwkt_gettoken_req {
854 lwkt_token_t tok;
855 int cpu;
856} lwkt_gettoken_req;
857
858#if 0
4b5f931b 859
f1d1c3fa
MD
860/*
861 * This function deschedules the current thread and blocks on the specified
862 * wait queue. We obtain ownership of the wait queue in order to block
863 * on it. A generation number is used to interlock the wait queue in case
864 * it gets signalled while we are blocked waiting on the token.
865 *
866 * Note: alternatively we could dequeue our thread and then message the
867 * target cpu owning the wait queue. YYY implement as sysctl.
868 *
869 * Note: wait queue signals normally ping-pong the cpu as an optimization.
870 */
96728c05 871
f1d1c3fa 872void
ae8050a4 873lwkt_block(lwkt_wait_t w, const char *wmesg, int *gen)
f1d1c3fa
MD
874{
875 thread_t td = curthread;
f1d1c3fa 876
f1d1c3fa 877 lwkt_gettoken(&w->wa_token);
ae8050a4 878 if (w->wa_gen == *gen) {
f1d1c3fa
MD
879 _lwkt_dequeue(td);
880 TAILQ_INSERT_TAIL(&w->wa_waitq, td, td_threadq);
881 ++w->wa_count;
882 td->td_wait = w;
ae8050a4 883 td->td_wmesg = wmesg;
ece04fd0 884again:
f1d1c3fa 885 lwkt_switch();
ece04fd0
MD
886 lwkt_regettoken(&w->wa_token);
887 if (td->td_wmesg != NULL) {
888 _lwkt_dequeue(td);
889 goto again;
890 }
8ad65e08 891 }
ae8050a4
MD
892 /* token might be lost, doesn't matter for gen update */
893 *gen = w->wa_gen;
f1d1c3fa
MD
894 lwkt_reltoken(&w->wa_token);
895}
896
897/*
898 * Signal a wait queue. We gain ownership of the wait queue in order to
899 * signal it. Once a thread is removed from the wait queue we have to
900 * deal with the cpu owning the thread.
901 *
902 * Note: alternatively we could message the target cpu owning the wait
903 * queue. YYY implement as sysctl.
904 */
905void
ece04fd0 906lwkt_signal(lwkt_wait_t w, int count)
f1d1c3fa
MD
907{
908 thread_t td;
909 int count;
910
911 lwkt_gettoken(&w->wa_token);
912 ++w->wa_gen;
ece04fd0
MD
913 if (count < 0)
914 count = w->wa_count;
f1d1c3fa
MD
915 while ((td = TAILQ_FIRST(&w->wa_waitq)) != NULL && count) {
916 --count;
917 --w->wa_count;
918 TAILQ_REMOVE(&w->wa_waitq, td, td_threadq);
919 td->td_wait = NULL;
ae8050a4 920 td->td_wmesg = NULL;
a72187e9 921 if (td->td_gd == mycpu) {
f1d1c3fa
MD
922 _lwkt_enqueue(td);
923 } else {
a72187e9 924 lwkt_send_ipiq(td->td_gd->gd_cpuid, (ipifunc_t)lwkt_schedule, td);
f1d1c3fa
MD
925 }
926 lwkt_regettoken(&w->wa_token);
927 }
928 lwkt_reltoken(&w->wa_token);
929}
930
ece04fd0
MD
931#endif
932
f1d1c3fa 933/*
96728c05 934 * Acquire ownership of a token
f1d1c3fa 935 *
96728c05 936 * Acquire ownership of a token. The token may have spl and/or critical
f1d1c3fa
MD
937 * section side effects, depending on its purpose. These side effects
938 * guarentee that you will maintain ownership of the token as long as you
939 * do not block. If you block you may lose access to the token (but you
940 * must still release it even if you lose your access to it).
941 *
96728c05 942 * YYY for now we use a critical section to prevent IPIs from taking away
a2a5ad0d 943 * a token, but do we really only need to disable IPIs ?
96728c05
MD
944 *
945 * YYY certain tokens could be made to act like mutexes when performance
946 * would be better (e.g. t_cpu == -1). This is not yet implemented.
947 *
a2a5ad0d
MD
948 * YYY the tokens replace 4.x's simplelocks for the most part, but this
949 * means that 4.x does not expect a switch so for now we cannot switch
950 * when waiting for an IPI to be returned.
951 *
952 * YYY If the token is owned by another cpu we may have to send an IPI to
96728c05
MD
953 * it and then block. The IPI causes the token to be given away to the
954 * requesting cpu, unless it has already changed hands. Since only the
955 * current cpu can give away a token it owns we do not need a memory barrier.
a2a5ad0d 956 * This needs serious optimization.
f1d1c3fa 957 */
57c254db
MD
958
959#ifdef SMP
960
96728c05
MD
961static
962void
963lwkt_gettoken_remote(void *arg)
964{
965 lwkt_gettoken_req *req = arg;
966 if (req->tok->t_cpu == mycpu->gd_cpuid) {
634081ff 967#ifdef INVARIANTS
a2a5ad0d
MD
968 if (token_debug)
969 printf("GT(%d,%d) ", req->tok->t_cpu, req->cpu);
634081ff 970#endif
96728c05 971 req->tok->t_cpu = req->cpu;
a2a5ad0d
MD
972 req->tok->t_reqcpu = req->cpu; /* YYY leave owned by target cpu */
973 /* else set reqcpu to point to current cpu for release */
96728c05
MD
974 }
975}
976
57c254db
MD
977#endif
978
8a8d5d85 979int
f1d1c3fa
MD
980lwkt_gettoken(lwkt_token_t tok)
981{
982 /*
983 * Prevent preemption so the token can't be taken away from us once
984 * we gain ownership of it. Use a synchronous request which might
985 * block. The request will be forwarded as necessary playing catchup
986 * to the token.
987 */
96728c05 988
f1d1c3fa 989 crit_enter();
57c254db 990#ifdef INVARIANTS
a2a5ad0d
MD
991 if (curthread->td_pri > 2000) {
992 curthread->td_pri = 1000;
993 panic("too HIGH!");
57c254db
MD
994 }
995#endif
96728c05 996#ifdef SMP
d0e06f83 997 while (tok->t_cpu != mycpu->gd_cpuid) {
57c254db
MD
998 struct lwkt_gettoken_req req;
999 int seq;
96728c05
MD
1000 int dcpu;
1001
1002 req.cpu = mycpu->gd_cpuid;
1003 req.tok = tok;
1004 dcpu = (volatile int)tok->t_cpu;
a2a5ad0d 1005 KKASSERT(dcpu >= 0 && dcpu < ncpus);
634081ff 1006#ifdef INVARIANTS
a2a5ad0d
MD
1007 if (token_debug)
1008 printf("REQT%d ", dcpu);
634081ff 1009#endif
96728c05
MD
1010 seq = lwkt_send_ipiq(dcpu, lwkt_gettoken_remote, &req);
1011 lwkt_wait_ipiq(dcpu, seq);
634081ff 1012#ifdef INVARIANTS
a2a5ad0d
MD
1013 if (token_debug)
1014 printf("REQR%d ", tok->t_cpu);
634081ff 1015#endif
f1d1c3fa
MD
1016 }
1017#endif
1018 /*
1019 * leave us in a critical section on return. This will be undone
8a8d5d85 1020 * by lwkt_reltoken(). Bump the generation number.
f1d1c3fa 1021 */
8a8d5d85 1022 return(++tok->t_gen);
f1d1c3fa
MD
1023}
1024
96728c05
MD
1025/*
1026 * Attempt to acquire ownership of a token. Returns 1 on success, 0 on
1027 * failure.
1028 */
1029int
1030lwkt_trytoken(lwkt_token_t tok)
1031{
1032 crit_enter();
1033#ifdef SMP
1034 if (tok->t_cpu != mycpu->gd_cpuid) {
1035 return(0);
1036 }
1037#endif
1038 /* leave us in the critical section */
1039 ++tok->t_gen;
1040 return(1);
1041}
1042
f1d1c3fa
MD
1043/*
1044 * Release your ownership of a token. Releases must occur in reverse
1045 * order to aquisitions, eventually so priorities can be unwound properly
1046 * like SPLs. At the moment the actual implemention doesn't care.
1047 *
1048 * We can safely hand a token that we own to another cpu without notifying
1049 * it, but once we do we can't get it back without requesting it (unless
1050 * the other cpu hands it back to us before we check).
1051 *
1052 * We might have lost the token, so check that.
1053 */
1054void
1055lwkt_reltoken(lwkt_token_t tok)
1056{
d0e06f83 1057 if (tok->t_cpu == mycpu->gd_cpuid) {
f1d1c3fa
MD
1058 tok->t_cpu = tok->t_reqcpu;
1059 }
1060 crit_exit();
1061}
1062
1063/*
8a8d5d85
MD
1064 * Reacquire a token that might have been lost and compare and update the
1065 * generation number. 0 is returned if the generation has not changed
1066 * (nobody else obtained the token while we were blocked, on this cpu or
1067 * any other cpu).
1068 *
1069 * This function returns with the token re-held whether the generation
1070 * number changed or not.
1071 */
1072int
1073lwkt_gentoken(lwkt_token_t tok, int *gen)
1074{
1075 if (lwkt_regettoken(tok) == *gen) {
1076 return(0);
1077 } else {
1078 *gen = tok->t_gen;
1079 return(-1);
1080 }
1081}
1082
1083
1084/*
96728c05 1085 * Re-acquire a token that might have been lost. Returns the generation
8a8d5d85 1086 * number of the token.
f1d1c3fa
MD
1087 */
1088int
1089lwkt_regettoken(lwkt_token_t tok)
1090{
96728c05 1091 /* assert we are in a critical section */
d0e06f83 1092 if (tok->t_cpu != mycpu->gd_cpuid) {
96728c05 1093#ifdef SMP
d0e06f83 1094 while (tok->t_cpu != mycpu->gd_cpuid) {
57c254db
MD
1095 struct lwkt_gettoken_req req;
1096 int seq;
96728c05 1097 int dcpu;
57c254db 1098
96728c05
MD
1099 req.cpu = mycpu->gd_cpuid;
1100 req.tok = tok;
1101 dcpu = (volatile int)tok->t_cpu;
a2a5ad0d 1102 KKASSERT(dcpu >= 0 && dcpu < ncpus);
634081ff 1103#ifdef INVARIANTS
cb973d15
MD
1104 if (token_debug)
1105 printf("REQT%d ", dcpu);
634081ff 1106#endif
96728c05
MD
1107 seq = lwkt_send_ipiq(dcpu, lwkt_gettoken_remote, &req);
1108 lwkt_wait_ipiq(dcpu, seq);
634081ff 1109#ifdef INVARIATNS
cb973d15
MD
1110 if (token_debug)
1111 printf("REQR%d ", tok->t_cpu);
634081ff 1112#endif
f1d1c3fa 1113 }
f1d1c3fa 1114#endif
96728c05
MD
1115 ++tok->t_gen;
1116 }
8a8d5d85 1117 return(tok->t_gen);
8ad65e08
MD
1118}
1119
72740893
MD
1120void
1121lwkt_inittoken(lwkt_token_t tok)
1122{
1123 /*
1124 * Zero structure and set cpu owner and reqcpu to cpu 0.
1125 */
1126 bzero(tok, sizeof(*tok));
1127}
1128
99df837e
MD
1129/*
1130 * Create a kernel process/thread/whatever. It shares it's address space
1131 * with proc0 - ie: kernel only.
1132 *
1133 * XXX should be renamed to lwkt_create()
8a8d5d85
MD
1134 *
1135 * The thread will be entered with the MP lock held.
99df837e
MD
1136 */
1137int
1138lwkt_create(void (*func)(void *), void *arg,
73e4f7b9 1139 struct thread **tdp, thread_t template, int tdflags,
ef0fdad1 1140 const char *fmt, ...)
99df837e 1141{
73e4f7b9 1142 thread_t td;
99df837e
MD
1143 va_list ap;
1144
a2a5ad0d
MD
1145 td = lwkt_alloc_thread(template);
1146 if (tdp)
1147 *tdp = td;
99df837e 1148 cpu_set_thread_handler(td, kthread_exit, func, arg);
ef0fdad1 1149 td->td_flags |= TDF_VERBOSE | tdflags;
8a8d5d85
MD
1150#ifdef SMP
1151 td->td_mpcount = 1;
1152#endif
99df837e
MD
1153
1154 /*
1155 * Set up arg0 for 'ps' etc
1156 */
1157 va_start(ap, fmt);
1158 vsnprintf(td->td_comm, sizeof(td->td_comm), fmt, ap);
1159 va_end(ap);
1160
1161 /*
1162 * Schedule the thread to run
1163 */
ef0fdad1
MD
1164 if ((td->td_flags & TDF_STOPREQ) == 0)
1165 lwkt_schedule(td);
1166 else
1167 td->td_flags &= ~TDF_STOPREQ;
99df837e
MD
1168 return 0;
1169}
1170
1171/*
1172 * Destroy an LWKT thread. Warning! This function is not called when
1173 * a process exits, cpu_proc_exit() directly calls cpu_thread_exit() and
1174 * uses a different reaping mechanism.
1175 */
1176void
1177lwkt_exit(void)
1178{
1179 thread_t td = curthread;
1180
1181 if (td->td_flags & TDF_VERBOSE)
1182 printf("kthread %p %s has exited\n", td, td->td_comm);
1183 crit_enter();
1184 lwkt_deschedule_self();
1185 ++mycpu->gd_tdfreecount;
1186 TAILQ_INSERT_TAIL(&mycpu->gd_tdfreeq, td, td_threadq);
1187 cpu_thread_exit();
1188}
1189
1190/*
1191 * Create a kernel process/thread/whatever. It shares it's address space
ef0fdad1 1192 * with proc0 - ie: kernel only. 5.x compatible.
99df837e
MD
1193 */
1194int
1195kthread_create(void (*func)(void *), void *arg,
1196 struct thread **tdp, const char *fmt, ...)
1197{
73e4f7b9 1198 thread_t td;
99df837e
MD
1199 va_list ap;
1200
a2a5ad0d
MD
1201 td = lwkt_alloc_thread(NULL);
1202 if (tdp)
1203 *tdp = td;
99df837e
MD
1204 cpu_set_thread_handler(td, kthread_exit, func, arg);
1205 td->td_flags |= TDF_VERBOSE;
8a8d5d85
MD
1206#ifdef SMP
1207 td->td_mpcount = 1;
1208#endif
99df837e
MD
1209
1210 /*
1211 * Set up arg0 for 'ps' etc
1212 */
1213 va_start(ap, fmt);
1214 vsnprintf(td->td_comm, sizeof(td->td_comm), fmt, ap);
1215 va_end(ap);
1216
1217 /*
1218 * Schedule the thread to run
1219 */
1220 lwkt_schedule(td);
1221 return 0;
1222}
1223
26a0694b
MD
1224void
1225crit_panic(void)
1226{
73e4f7b9 1227 thread_t td = curthread;
26a0694b
MD
1228 int lpri = td->td_pri;
1229
1230 td->td_pri = 0;
1231 panic("td_pri is/would-go negative! %p %d", td, lpri);
1232}
1233
99df837e
MD
1234/*
1235 * Destroy an LWKT thread. Warning! This function is not called when
1236 * a process exits, cpu_proc_exit() directly calls cpu_thread_exit() and
1237 * uses a different reaping mechanism.
1238 *
1239 * XXX duplicates lwkt_exit()
1240 */
1241void
1242kthread_exit(void)
1243{
1244 lwkt_exit();
1245}
1246
96728c05
MD
1247#ifdef SMP
1248
1249/*
1250 * Send a function execution request to another cpu. The request is queued
1251 * on the cpu<->cpu ipiq matrix. Each cpu owns a unique ipiq FIFO for every
1252 * possible target cpu. The FIFO can be written.
1253 *
1254 * YYY If the FIFO fills up we have to enable interrupts and process the
1255 * IPIQ while waiting for it to empty or we may deadlock with another cpu.
1256 * Create a CPU_*() function to do this!
1257 *
1258 * Must be called from a critical section.
1259 */
1260int
1261lwkt_send_ipiq(int dcpu, ipifunc_t func, void *arg)
1262{
1263 lwkt_ipiq_t ip;
1264 int windex;
a2a5ad0d 1265 struct globaldata *gd = mycpu;
96728c05 1266
a2a5ad0d 1267 if (dcpu == gd->gd_cpuid) {
96728c05
MD
1268 func(arg);
1269 return(0);
1270 }
cb973d15 1271 crit_enter();
a2a5ad0d
MD
1272 ++gd->gd_intr_nesting_level;
1273#ifdef INVARIANTS
1274 if (gd->gd_intr_nesting_level > 20)
1275 panic("lwkt_send_ipiq: TOO HEAVILY NESTED!");
1276#endif
96728c05
MD
1277 KKASSERT(curthread->td_pri >= TDPRI_CRIT);
1278 KKASSERT(dcpu >= 0 && dcpu < ncpus);
1279 ++ipiq_count;
a2a5ad0d 1280 ip = &gd->gd_ipiq[dcpu];
cb973d15
MD
1281
1282 /*
1283 * We always drain before the FIFO becomes full so it should never
1284 * become full. We need to leave enough entries to deal with
1285 * reentrancy.
1286 */
1287 KKASSERT(ip->ip_windex - ip->ip_rindex != MAXCPUFIFO);
1288 windex = ip->ip_windex & MAXCPUFIFO_MASK;
1289 ip->ip_func[windex] = func;
1290 ip->ip_arg[windex] = arg;
1291 /* YYY memory barrier */
1292 ++ip->ip_windex;
96728c05
MD
1293 if (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 2) {
1294 unsigned int eflags = read_eflags();
1295 cpu_enable_intr();
1296 ++ipiq_fifofull;
cb973d15 1297 while (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 4) {
96728c05
MD
1298 KKASSERT(ip->ip_windex - ip->ip_rindex != MAXCPUFIFO - 1);
1299 lwkt_process_ipiq();
1300 }
1301 write_eflags(eflags);
1302 }
a2a5ad0d 1303 --gd->gd_intr_nesting_level;
96728c05 1304 cpu_send_ipiq(dcpu); /* issues memory barrier if appropriate */
cb973d15 1305 crit_exit();
96728c05
MD
1306 return(ip->ip_windex);
1307}
1308
cb973d15
MD
1309/*
1310 * Send a message to several target cpus. Typically used for scheduling.
1311 */
1312void
1313lwkt_send_ipiq_mask(u_int32_t mask, ipifunc_t func, void *arg)
1314{
1315 int cpuid;
1316
1317 while (mask) {
1318 cpuid = bsfl(mask);
1319 lwkt_send_ipiq(cpuid, func, arg);
1320 mask &= ~(1 << cpuid);
1321 }
1322}
1323
96728c05
MD
1324/*
1325 * Wait for the remote cpu to finish processing a function.
1326 *
1327 * YYY we have to enable interrupts and process the IPIQ while waiting
1328 * for it to empty or we may deadlock with another cpu. Create a CPU_*()
1329 * function to do this! YYY we really should 'block' here.
1330 *
1331 * Must be called from a critical section. Thsi routine may be called
1332 * from an interrupt (for example, if an interrupt wakes a foreign thread
1333 * up).
1334 */
1335void
1336lwkt_wait_ipiq(int dcpu, int seq)
1337{
1338 lwkt_ipiq_t ip;
a2a5ad0d 1339 int maxc = 100000000;
96728c05
MD
1340
1341 if (dcpu != mycpu->gd_cpuid) {
1342 KKASSERT(dcpu >= 0 && dcpu < ncpus);
1343 ip = &mycpu->gd_ipiq[dcpu];
cb973d15 1344 if ((int)(ip->ip_xindex - seq) < 0) {
96728c05
MD
1345 unsigned int eflags = read_eflags();
1346 cpu_enable_intr();
cb973d15 1347 while ((int)(ip->ip_xindex - seq) < 0) {
96728c05 1348 lwkt_process_ipiq();
a2a5ad0d 1349 if (--maxc == 0)
cb973d15 1350 printf("LWKT_WAIT_IPIQ WARNING! %d wait %d (%d)\n", mycpu->gd_cpuid, dcpu, ip->ip_xindex - seq);
a2a5ad0d
MD
1351 if (maxc < -1000000)
1352 panic("LWKT_WAIT_IPIQ");
96728c05
MD
1353 }
1354 write_eflags(eflags);
1355 }
1356 }
1357}
1358
1359/*
1360 * Called from IPI interrupt (like a fast interrupt), which has placed
1361 * us in a critical section. The MP lock may or may not be held.
cb973d15
MD
1362 * May also be called from doreti or splz, or be reentrantly called
1363 * indirectly through the ip_func[] we run.
96728c05
MD
1364 */
1365void
1366lwkt_process_ipiq(void)
1367{
1368 int n;
1369 int cpuid = mycpu->gd_cpuid;
1370
1371 for (n = 0; n < ncpus; ++n) {
1372 lwkt_ipiq_t ip;
1373 int ri;
1374
1375 if (n == cpuid)
1376 continue;
1377 ip = globaldata_find(n)->gd_ipiq;
1378 if (ip == NULL)
1379 continue;
1380 ip = &ip[cpuid];
cb973d15
MD
1381
1382 /*
1383 * Note: xindex is only updated after we are sure the function has
1384 * finished execution. Beware lwkt_process_ipiq() reentrancy! The
1385 * function may send an IPI which may block/drain.
1386 */
96728c05
MD
1387 while (ip->ip_rindex != ip->ip_windex) {
1388 ri = ip->ip_rindex & MAXCPUFIFO_MASK;
96728c05 1389 ++ip->ip_rindex;
cb973d15
MD
1390 ip->ip_func[ri](ip->ip_arg[ri]);
1391 /* YYY memory barrier */
1392 ip->ip_xindex = ip->ip_rindex;
96728c05
MD
1393 }
1394 }
1395}
1396
1397#else
1398
1399int
1400lwkt_send_ipiq(int dcpu, ipifunc_t func, void *arg)
1401{
1402 panic("lwkt_send_ipiq: UP box! (%d,%p,%p)", dcpu, func, arg);
1403 return(0); /* NOT REACHED */
1404}
1405
1406void
1407lwkt_wait_ipiq(int dcpu, int seq)
1408{
1409 panic("lwkt_wait_ipiq: UP box! (%d,%d)", dcpu, seq);
1410}
1411
1412#endif