Commit | Line | Data |
---|---|---|
984263bc MD |
1 | /*- |
2 | * Copyright (c) 1982, 1986, 1990, 1991, 1993 | |
3 | * The Regents of the University of California. All rights reserved. | |
4 | * (c) UNIX System Laboratories, Inc. | |
5 | * All or some portions of this file are derived from material licensed | |
6 | * to the University of California by American Telephone and Telegraph | |
7 | * Co. or Unix System Laboratories, Inc. and are reproduced herein with | |
8 | * the permission of UNIX System Laboratories, Inc. | |
9 | * | |
10 | * Redistribution and use in source and binary forms, with or without | |
11 | * modification, are permitted provided that the following conditions | |
12 | * are met: | |
13 | * 1. Redistributions of source code must retain the above copyright | |
14 | * notice, this list of conditions and the following disclaimer. | |
15 | * 2. Redistributions in binary form must reproduce the above copyright | |
16 | * notice, this list of conditions and the following disclaimer in the | |
17 | * documentation and/or other materials provided with the distribution. | |
18 | * 3. All advertising materials mentioning features or use of this software | |
19 | * must display the following acknowledgement: | |
20 | * This product includes software developed by the University of | |
21 | * California, Berkeley and its contributors. | |
22 | * 4. Neither the name of the University nor the names of its contributors | |
23 | * may be used to endorse or promote products derived from this software | |
24 | * without specific prior written permission. | |
25 | * | |
26 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
27 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
28 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
29 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
30 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
31 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
32 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
33 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
34 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
35 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
36 | * SUCH DAMAGE. | |
37 | * | |
38 | * @(#)kern_synch.c 8.9 (Berkeley) 5/19/95 | |
39 | * $FreeBSD: src/sys/kern/kern_synch.c,v 1.87.2.6 2002/10/13 07:29:53 kbyanc Exp $ | |
40 | */ | |
41 | ||
42 | #include "opt_ktrace.h" | |
43 | ||
44 | #include <sys/param.h> | |
45 | #include <sys/systm.h> | |
46 | #include <sys/proc.h> | |
47 | #include <sys/kernel.h> | |
48 | #include <sys/signalvar.h> | |
49 | #include <sys/resourcevar.h> | |
50 | #include <sys/vmmeter.h> | |
51 | #include <sys/sysctl.h> | |
344ad853 | 52 | #include <sys/lock.h> |
984263bc | 53 | #include <sys/uio.h> |
fc9ae81d | 54 | #ifdef KTRACE |
984263bc MD |
55 | #include <sys/ktrace.h> |
56 | #endif | |
f1d1c3fa | 57 | #include <sys/xwait.h> |
9afb0ffd | 58 | #include <sys/ktr.h> |
684a93c4 | 59 | #include <sys/serialize.h> |
984263bc | 60 | |
684a93c4 | 61 | #include <sys/signal2.h> |
bf765287 MD |
62 | #include <sys/thread2.h> |
63 | #include <sys/spinlock2.h> | |
7f6220a9 | 64 | #include <sys/mutex2.h> |
bf765287 | 65 | |
984263bc | 66 | #include <machine/cpu.h> |
984263bc MD |
67 | #include <machine/smp.h> |
68 | ||
fc17ad60 MD |
69 | TAILQ_HEAD(tslpque, thread); |
70 | ||
402ed7e1 | 71 | static void sched_setup (void *dummy); |
984263bc MD |
72 | SYSINIT(sched_setup, SI_SUB_KICK_SCHEDULER, SI_ORDER_FIRST, sched_setup, NULL) |
73 | ||
984263bc MD |
74 | int hogticks; |
75 | int lbolt; | |
50e4012a | 76 | void *lbolt_syncer; |
984263bc | 77 | int sched_quantum; /* Roundrobin scheduling quantum in ticks. */ |
17a9f566 | 78 | int ncpus; |
da23a592 MD |
79 | int ncpus2, ncpus2_shift, ncpus2_mask; /* note: mask not cpumask_t */ |
80 | int ncpus_fit, ncpus_fit_mask; /* note: mask not cpumask_t */ | |
e43a034f | 81 | int safepri; |
dbcd0c9b | 82 | int tsleep_now_works; |
5ea440eb | 83 | int tsleep_crypto_dump = 0; |
984263bc MD |
84 | |
85 | static struct callout loadav_callout; | |
35f9d051 | 86 | static struct callout schedcpu_callout; |
fc17ad60 | 87 | MALLOC_DEFINE(M_TSLEEP, "tslpque", "tsleep queues"); |
984263bc | 88 | |
5decebc7 MD |
89 | #define __DEALL(ident) __DEQUALIFY(void *, ident) |
90 | ||
9afb0ffd MD |
91 | #if !defined(KTR_TSLEEP) |
92 | #define KTR_TSLEEP KTR_ALL | |
93 | #endif | |
94 | KTR_INFO_MASTER(tsleep); | |
5bf48697 AE |
95 | KTR_INFO(KTR_TSLEEP, tsleep, tsleep_beg, 0, "tsleep enter %p", const volatile void *ident); |
96 | KTR_INFO(KTR_TSLEEP, tsleep, tsleep_end, 1, "tsleep exit"); | |
97 | KTR_INFO(KTR_TSLEEP, tsleep, wakeup_beg, 2, "wakeup enter %p", const volatile void *ident); | |
98 | KTR_INFO(KTR_TSLEEP, tsleep, wakeup_end, 3, "wakeup exit"); | |
99 | KTR_INFO(KTR_TSLEEP, tsleep, ilockfail, 4, "interlock failed %p", const volatile void *ident); | |
8aa3430c MD |
100 | |
101 | #define logtsleep1(name) KTR_LOG(tsleep_ ## name) | |
102 | #define logtsleep2(name, val) KTR_LOG(tsleep_ ## name, val) | |
9afb0ffd | 103 | |
984263bc MD |
104 | struct loadavg averunnable = |
105 | { {0, 0, 0}, FSCALE }; /* load average, of runnable procs */ | |
106 | /* | |
107 | * Constants for averages over 1, 5, and 15 minutes | |
108 | * when sampling at 5 second intervals. | |
109 | */ | |
110 | static fixpt_t cexp[3] = { | |
111 | 0.9200444146293232 * FSCALE, /* exp(-1/12) */ | |
112 | 0.9834714538216174 * FSCALE, /* exp(-1/60) */ | |
113 | 0.9944598480048967 * FSCALE, /* exp(-1/180) */ | |
114 | }; | |
115 | ||
402ed7e1 RG |
116 | static void endtsleep (void *); |
117 | static void loadav (void *arg); | |
402ed7e1 | 118 | static void schedcpu (void *arg); |
984263bc | 119 | |
a46fac56 MD |
120 | /* |
121 | * Adjust the scheduler quantum. The quantum is specified in microseconds. | |
122 | * Note that 'tick' is in microseconds per tick. | |
123 | */ | |
984263bc MD |
124 | static int |
125 | sysctl_kern_quantum(SYSCTL_HANDLER_ARGS) | |
126 | { | |
127 | int error, new_val; | |
128 | ||
a591f597 | 129 | new_val = sched_quantum * ustick; |
984263bc MD |
130 | error = sysctl_handle_int(oidp, &new_val, 0, req); |
131 | if (error != 0 || req->newptr == NULL) | |
132 | return (error); | |
a591f597 | 133 | if (new_val < ustick) |
984263bc | 134 | return (EINVAL); |
a591f597 | 135 | sched_quantum = new_val / ustick; |
984263bc MD |
136 | hogticks = 2 * sched_quantum; |
137 | return (0); | |
138 | } | |
139 | ||
140 | SYSCTL_PROC(_kern, OID_AUTO, quantum, CTLTYPE_INT|CTLFLAG_RW, | |
141 | 0, sizeof sched_quantum, sysctl_kern_quantum, "I", ""); | |
142 | ||
bc55d64f MD |
143 | static int pctcpu_decay = 10; |
144 | SYSCTL_INT(_kern, OID_AUTO, pctcpu_decay, CTLFLAG_RW, &pctcpu_decay, 0, ""); | |
dcc99b62 MD |
145 | |
146 | /* | |
147 | * kernel uses `FSCALE', userland (SHOULD) use kern.fscale | |
984263bc | 148 | */ |
460426e6 | 149 | int fscale __unused = FSCALE; /* exported to systat */ |
dcc99b62 | 150 | SYSCTL_INT(_kern, OID_AUTO, fscale, CTLFLAG_RD, 0, FSCALE, ""); |
984263bc MD |
151 | |
152 | /* | |
0a3f9b47 | 153 | * Recompute process priorities, once a second. |
dcc99b62 MD |
154 | * |
155 | * Since the userland schedulers are typically event oriented, if the | |
156 | * estcpu calculation at wakeup() time is not sufficient to make a | |
157 | * process runnable relative to other processes in the system we have | |
158 | * a 1-second recalc to help out. | |
159 | * | |
160 | * This code also allows us to store sysclock_t data in the process structure | |
161 | * without fear of an overrun, since sysclock_t are guarenteed to hold | |
162 | * several seconds worth of count. | |
8fa76237 MD |
163 | * |
164 | * WARNING! callouts can preempt normal threads. However, they will not | |
165 | * preempt a thread holding a spinlock so we *can* safely use spinlocks. | |
984263bc | 166 | */ |
8fa76237 MD |
167 | static int schedcpu_stats(struct proc *p, void *data __unused); |
168 | static int schedcpu_resource(struct proc *p, void *data __unused); | |
169 | ||
984263bc | 170 | static void |
26a0694b | 171 | schedcpu(void *arg) |
984263bc | 172 | { |
8fa76237 MD |
173 | allproc_scan(schedcpu_stats, NULL); |
174 | allproc_scan(schedcpu_resource, NULL); | |
175 | wakeup((caddr_t)&lbolt); | |
50e4012a | 176 | wakeup(lbolt_syncer); |
8fa76237 MD |
177 | callout_reset(&schedcpu_callout, hz, schedcpu, NULL); |
178 | } | |
179 | ||
180 | /* | |
181 | * General process statistics once a second | |
182 | */ | |
183 | static int | |
184 | schedcpu_stats(struct proc *p, void *data __unused) | |
185 | { | |
08f2f1bb SS |
186 | struct lwp *lp; |
187 | ||
7bea4e64 MD |
188 | /* |
189 | * Threads may not be completely set up if process in SIDL state. | |
190 | */ | |
191 | if (p->p_stat == SIDL) | |
192 | return(0); | |
193 | ||
0d78b86e | 194 | PHOLD(p); |
85946b6c MD |
195 | if (lwkt_trytoken(&p->p_token) == FALSE) { |
196 | PRELE(p); | |
197 | return(0); | |
198 | } | |
0d78b86e | 199 | |
8fa76237 | 200 | p->p_swtime++; |
c7e98b2f | 201 | FOREACH_LWP_IN_PROC(lp, p) { |
de4d4cb0 MD |
202 | if (lp->lwp_stat == LSSLEEP) { |
203 | ++lp->lwp_slptime; | |
204 | if (lp->lwp_slptime == 1) | |
205 | p->p_usched->uload_update(lp); | |
206 | } | |
4b5f931b | 207 | |
c7e98b2f SS |
208 | /* |
209 | * Only recalculate processes that are active or have slept | |
210 | * less then 2 seconds. The schedulers understand this. | |
bc55d64f | 211 | * Otherwise decay by 50% per second. |
c7e98b2f SS |
212 | */ |
213 | if (lp->lwp_slptime <= 1) { | |
214 | p->p_usched->recalculate(lp); | |
215 | } else { | |
bc55d64f MD |
216 | int decay; |
217 | ||
218 | decay = pctcpu_decay; | |
219 | cpu_ccfence(); | |
220 | if (decay <= 1) | |
221 | decay = 1; | |
222 | if (decay > 100) | |
223 | decay = 100; | |
224 | lp->lwp_pctcpu = (lp->lwp_pctcpu * (decay - 1)) / decay; | |
c7e98b2f | 225 | } |
8fa76237 | 226 | } |
0d78b86e | 227 | lwkt_reltoken(&p->p_token); |
d2d8515b | 228 | lwkt_yield(); |
0d78b86e | 229 | PRELE(p); |
8fa76237 MD |
230 | return(0); |
231 | } | |
a46fac56 | 232 | |
8fa76237 | 233 | /* |
84204577 | 234 | * Resource checks. XXX break out since ksignal/killproc can block, |
8fa76237 MD |
235 | * limiting us to one process killed per second. There is probably |
236 | * a better way. | |
237 | */ | |
238 | static int | |
239 | schedcpu_resource(struct proc *p, void *data __unused) | |
240 | { | |
241 | u_int64_t ttime; | |
08f2f1bb | 242 | struct lwp *lp; |
8fa76237 | 243 | |
0d78b86e MD |
244 | if (p->p_stat == SIDL) |
245 | return(0); | |
246 | ||
247 | PHOLD(p); | |
85946b6c MD |
248 | if (lwkt_trytoken(&p->p_token) == FALSE) { |
249 | PRELE(p); | |
250 | return(0); | |
251 | } | |
0d78b86e MD |
252 | |
253 | if (p->p_stat == SZOMB || p->p_limit == NULL) { | |
254 | lwkt_reltoken(&p->p_token); | |
255 | PRELE(p); | |
8fa76237 | 256 | return(0); |
984263bc | 257 | } |
344ad853 | 258 | |
c7e98b2f SS |
259 | ttime = 0; |
260 | FOREACH_LWP_IN_PROC(lp, p) { | |
e595c6cd MD |
261 | /* |
262 | * We may have caught an lp in the middle of being | |
263 | * created, lwp_thread can be NULL. | |
264 | */ | |
265 | if (lp->lwp_thread) { | |
266 | ttime += lp->lwp_thread->td_sticks; | |
267 | ttime += lp->lwp_thread->td_uticks; | |
268 | } | |
c7e98b2f | 269 | } |
8fa76237 MD |
270 | |
271 | switch(plimit_testcpulimit(p->p_limit, ttime)) { | |
272 | case PLIMIT_TESTCPU_KILL: | |
273 | killproc(p, "exceeded maximum CPU limit"); | |
274 | break; | |
275 | case PLIMIT_TESTCPU_XCPU: | |
4643740a MD |
276 | if ((p->p_flags & P_XCPU) == 0) { |
277 | p->p_flags |= P_XCPU; | |
84204577 | 278 | ksignal(p, SIGXCPU); |
344ad853 | 279 | } |
8fa76237 MD |
280 | break; |
281 | default: | |
c0b8a06d | 282 | break; |
344ad853 | 283 | } |
0d78b86e | 284 | lwkt_reltoken(&p->p_token); |
d2d8515b | 285 | lwkt_yield(); |
0d78b86e | 286 | PRELE(p); |
8fa76237 | 287 | return(0); |
984263bc MD |
288 | } |
289 | ||
290 | /* | |
dcc99b62 MD |
291 | * This is only used by ps. Generate a cpu percentage use over |
292 | * a period of one second. | |
984263bc | 293 | */ |
dcc99b62 | 294 | void |
553ea3c8 | 295 | updatepcpu(struct lwp *lp, int cpticks, int ttlticks) |
984263bc | 296 | { |
dcc99b62 MD |
297 | fixpt_t acc; |
298 | int remticks; | |
299 | ||
300 | acc = (cpticks << FSHIFT) / ttlticks; | |
301 | if (ttlticks >= ESTCPUFREQ) { | |
553ea3c8 | 302 | lp->lwp_pctcpu = acc; |
dcc99b62 MD |
303 | } else { |
304 | remticks = ESTCPUFREQ - ttlticks; | |
553ea3c8 | 305 | lp->lwp_pctcpu = (acc * ttlticks + lp->lwp_pctcpu * remticks) / |
dcc99b62 | 306 | ESTCPUFREQ; |
a46fac56 | 307 | } |
984263bc MD |
308 | } |
309 | ||
310 | /* | |
8aa3430c MD |
311 | * tsleep/wakeup hash table parameters. Try to find the sweet spot for |
312 | * like addresses being slept on. | |
984263bc | 313 | */ |
b12defdc MD |
314 | #define TABLESIZE 4001 |
315 | #define LOOKUP(x) (((u_int)(uintptr_t)(x)) % TABLESIZE) | |
984263bc | 316 | |
fc17ad60 MD |
317 | static cpumask_t slpque_cpumasks[TABLESIZE]; |
318 | ||
984263bc | 319 | /* |
a46fac56 | 320 | * General scheduler initialization. We force a reschedule 25 times |
fc17ad60 MD |
321 | * a second by default. Note that cpu0 is initialized in early boot and |
322 | * cannot make any high level calls. | |
323 | * | |
324 | * Each cpu has its own sleep queue. | |
984263bc | 325 | */ |
984263bc | 326 | void |
fc17ad60 | 327 | sleep_gdinit(globaldata_t gd) |
984263bc | 328 | { |
fc17ad60 | 329 | static struct tslpque slpque_cpu0[TABLESIZE]; |
9c1fad94 | 330 | int i; |
984263bc | 331 | |
fc17ad60 MD |
332 | if (gd->gd_cpuid == 0) { |
333 | sched_quantum = (hz + 24) / 25; | |
334 | hogticks = 2 * sched_quantum; | |
335 | ||
336 | gd->gd_tsleep_hash = slpque_cpu0; | |
337 | } else { | |
77652cad | 338 | gd->gd_tsleep_hash = kmalloc(sizeof(slpque_cpu0), |
fc17ad60 MD |
339 | M_TSLEEP, M_WAITOK | M_ZERO); |
340 | } | |
341 | for (i = 0; i < TABLESIZE; ++i) | |
342 | TAILQ_INIT(&gd->gd_tsleep_hash[i]); | |
984263bc MD |
343 | } |
344 | ||
ae8e83e6 MD |
345 | /* |
346 | * This is a dandy function that allows us to interlock tsleep/wakeup | |
347 | * operations with unspecified upper level locks, such as lockmgr locks, | |
348 | * simply by holding a critical section. The sequence is: | |
349 | * | |
350 | * (acquire upper level lock) | |
351 | * tsleep_interlock(blah) | |
352 | * (release upper level lock) | |
353 | * tsleep(blah, ...) | |
354 | * | |
355 | * Basically this functions queues us on the tsleep queue without actually | |
356 | * descheduling us. When tsleep() is later called with PINTERLOCK it | |
357 | * assumes the thread was already queued, otherwise it queues it there. | |
358 | * | |
359 | * Thus it is possible to receive the wakeup prior to going to sleep and | |
360 | * the race conditions are covered. | |
361 | */ | |
362 | static __inline void | |
5decebc7 | 363 | _tsleep_interlock(globaldata_t gd, const volatile void *ident, int flags) |
ae8e83e6 MD |
364 | { |
365 | thread_t td = gd->gd_curthread; | |
366 | int id; | |
367 | ||
368 | crit_enter_quick(td); | |
369 | if (td->td_flags & TDF_TSLEEPQ) { | |
370 | id = LOOKUP(td->td_wchan); | |
371 | TAILQ_REMOVE(&gd->gd_tsleep_hash[id], td, td_sleepq); | |
b12defdc MD |
372 | if (TAILQ_FIRST(&gd->gd_tsleep_hash[id]) == NULL) { |
373 | atomic_clear_cpumask(&slpque_cpumasks[id], | |
374 | gd->gd_cpumask); | |
375 | } | |
ae8e83e6 MD |
376 | } else { |
377 | td->td_flags |= TDF_TSLEEPQ; | |
378 | } | |
379 | id = LOOKUP(ident); | |
380 | TAILQ_INSERT_TAIL(&gd->gd_tsleep_hash[id], td, td_sleepq); | |
da23a592 | 381 | atomic_set_cpumask(&slpque_cpumasks[id], gd->gd_cpumask); |
ae8e83e6 MD |
382 | td->td_wchan = ident; |
383 | td->td_wdomain = flags & PDOMAIN_MASK; | |
ae8e83e6 MD |
384 | crit_exit_quick(td); |
385 | } | |
386 | ||
387 | void | |
5decebc7 | 388 | tsleep_interlock(const volatile void *ident, int flags) |
ae8e83e6 MD |
389 | { |
390 | _tsleep_interlock(mycpu, ident, flags); | |
391 | } | |
392 | ||
393 | /* | |
394 | * Remove thread from sleepq. Must be called with a critical section held. | |
4643740a | 395 | * The thread must not be migrating. |
ae8e83e6 MD |
396 | */ |
397 | static __inline void | |
398 | _tsleep_remove(thread_t td) | |
399 | { | |
400 | globaldata_t gd = mycpu; | |
401 | int id; | |
402 | ||
957625b2 | 403 | KKASSERT(td->td_gd == gd && IN_CRITICAL_SECT(td)); |
4643740a | 404 | KKASSERT((td->td_flags & TDF_MIGRATING) == 0); |
ae8e83e6 MD |
405 | if (td->td_flags & TDF_TSLEEPQ) { |
406 | td->td_flags &= ~TDF_TSLEEPQ; | |
407 | id = LOOKUP(td->td_wchan); | |
408 | TAILQ_REMOVE(&gd->gd_tsleep_hash[id], td, td_sleepq); | |
409 | if (TAILQ_FIRST(&gd->gd_tsleep_hash[id]) == NULL) | |
da23a592 | 410 | atomic_clear_cpumask(&slpque_cpumasks[id], gd->gd_cpumask); |
ae8e83e6 MD |
411 | td->td_wchan = NULL; |
412 | td->td_wdomain = 0; | |
413 | } | |
414 | } | |
415 | ||
416 | void | |
417 | tsleep_remove(thread_t td) | |
418 | { | |
419 | _tsleep_remove(td); | |
420 | } | |
421 | ||
984263bc MD |
422 | /* |
423 | * General sleep call. Suspends the current process until a wakeup is | |
424 | * performed on the specified identifier. The process will then be made | |
425 | * runnable with the specified priority. Sleeps at most timo/hz seconds | |
377d4740 | 426 | * (0 means no timeout). If flags includes PCATCH flag, signals are checked |
984263bc MD |
427 | * before and after sleeping, else signals are not checked. Returns 0 if |
428 | * awakened, EWOULDBLOCK if the timeout expires. If PCATCH is set and a | |
429 | * signal needs to be delivered, ERESTART is returned if the current system | |
430 | * call should be restarted if possible, and EINTR is returned if the system | |
431 | * call should be interrupted by the signal (return EINTR). | |
26a0694b | 432 | * |
0a3f9b47 MD |
433 | * Note that if we are a process, we release_curproc() before messing with |
434 | * the LWKT scheduler. | |
a46fac56 MD |
435 | * |
436 | * During autoconfiguration or after a panic, a sleep will simply | |
437 | * lower the priority briefly to allow interrupts, then return. | |
94f98873 MD |
438 | * |
439 | * WARNING! This code can't block (short of switching away), or bad things | |
440 | * will happen. No getting tokens, no blocking locks, etc. | |
984263bc MD |
441 | */ |
442 | int | |
5decebc7 | 443 | tsleep(const volatile void *ident, int flags, const char *wmesg, int timo) |
984263bc | 444 | { |
dadab5e9 | 445 | struct thread *td = curthread; |
08f2f1bb | 446 | struct lwp *lp = td->td_lwp; |
0cfcada1 | 447 | struct proc *p = td->td_proc; /* may be NULL */ |
fc17ad60 | 448 | globaldata_t gd; |
344ad853 MD |
449 | int sig; |
450 | int catch; | |
344ad853 | 451 | int error; |
e43a034f | 452 | int oldpri; |
076fecef | 453 | struct callout thandle; |
984263bc | 454 | |
b0da0c88 MD |
455 | /* |
456 | * Currently a severe hack. Make sure any delayed wakeups | |
457 | * are flushed before we sleep or we might deadlock on whatever | |
458 | * event we are sleeping on. | |
459 | */ | |
460 | if (td->td_flags & TDF_DELAYED_WAKEUP) | |
461 | wakeup_end_delayed(); | |
462 | ||
0cfcada1 MD |
463 | /* |
464 | * NOTE: removed KTRPOINT, it could cause races due to blocking | |
465 | * even in stable. Just scrap it for now. | |
466 | */ | |
5ea440eb | 467 | if (!tsleep_crypto_dump && (tsleep_now_works == 0 || panicstr)) { |
984263bc | 468 | /* |
dbcd0c9b MD |
469 | * After a panic, or before we actually have an operational |
470 | * softclock, just give interrupts a chance, then just return; | |
471 | * | |
984263bc MD |
472 | * don't run any other procs or panic below, |
473 | * in case this is the idle process and already asleep. | |
474 | */ | |
e43a034f | 475 | splz(); |
f9235b6d | 476 | oldpri = td->td_pri; |
e43a034f MD |
477 | lwkt_setpri_self(safepri); |
478 | lwkt_switch(); | |
479 | lwkt_setpri_self(oldpri); | |
984263bc MD |
480 | return (0); |
481 | } | |
8aa3430c | 482 | logtsleep2(tsleep_beg, ident); |
fc17ad60 MD |
483 | gd = td->td_gd; |
484 | KKASSERT(td != &gd->gd_idlethread); /* you must be kidding! */ | |
de4d4cb0 | 485 | td->td_wakefromcpu = -1; /* overwritten by _wakeup */ |
344ad853 MD |
486 | |
487 | /* | |
488 | * NOTE: all of this occurs on the current cpu, including any | |
489 | * callout-based wakeups, so a critical section is a sufficient | |
490 | * interlock. | |
491 | * | |
492 | * The entire sequence through to where we actually sleep must | |
493 | * run without breaking the critical section. | |
494 | */ | |
344ad853 MD |
495 | catch = flags & PCATCH; |
496 | error = 0; | |
497 | sig = 0; | |
498 | ||
37af14fe | 499 | crit_enter_quick(td); |
344ad853 | 500 | |
0cfcada1 | 501 | KASSERT(ident != NULL, ("tsleep: no ident")); |
7278a846 SS |
502 | KASSERT(lp == NULL || |
503 | lp->lwp_stat == LSRUN || /* Obvious */ | |
504 | lp->lwp_stat == LSSTOP, /* Set in tstop */ | |
505 | ("tsleep %p %s %d", | |
506 | ident, wmesg, lp->lwp_stat)); | |
0cfcada1 | 507 | |
5686ec5a MD |
508 | /* |
509 | * We interlock the sleep queue if the caller has not already done | |
510 | * it for us. This must be done before we potentially acquire any | |
511 | * tokens or we can loose the wakeup. | |
512 | */ | |
513 | if ((flags & PINTERLOCKED) == 0) { | |
5686ec5a MD |
514 | _tsleep_interlock(gd, ident, flags); |
515 | } | |
516 | ||
344ad853 | 517 | /* |
4643740a MD |
518 | * Setup for the current process (if this is a process). We must |
519 | * interlock with lwp_token to avoid remote wakeup races via | |
520 | * setrunnable() | |
344ad853 | 521 | */ |
08f2f1bb | 522 | if (lp) { |
4643740a | 523 | lwkt_gettoken(&lp->lwp_token); |
344ad853 MD |
524 | if (catch) { |
525 | /* | |
526 | * Early termination if PCATCH was set and a | |
527 | * signal is pending, interlocked with the | |
528 | * critical section. | |
529 | * | |
530 | * Early termination only occurs when tsleep() is | |
164b8401 | 531 | * entered while in a normal LSRUN state. |
344ad853 | 532 | */ |
08f2f1bb | 533 | if ((sig = CURSIG(lp)) != 0) |
344ad853 MD |
534 | goto resume; |
535 | ||
536 | /* | |
5686ec5a MD |
537 | * Causes ksignal to wake us up if a signal is |
538 | * received (interlocked with p->p_token). | |
344ad853 | 539 | */ |
4643740a | 540 | lp->lwp_flags |= LWP_SINTR; |
344ad853 | 541 | } |
5686ec5a MD |
542 | } else { |
543 | KKASSERT(p == NULL); | |
4ecd8190 | 544 | } |
344ad853 | 545 | |
4ecd8190 | 546 | /* |
4ecd8190 MD |
547 | * Make sure the current process has been untangled from |
548 | * the userland scheduler and initialize slptime to start | |
5686ec5a | 549 | * counting. |
c75e41b7 MD |
550 | * |
551 | * NOTE: td->td_wakefromcpu is pre-set by the release function | |
552 | * for the dfly scheduler, and then adjusted by _wakeup() | |
4ecd8190 MD |
553 | */ |
554 | if (lp) { | |
08f2f1bb SS |
555 | p->p_usched->release_curproc(lp); |
556 | lp->lwp_slptime = 0; | |
0a3f9b47 | 557 | } |
fc17ad60 | 558 | |
d9345d3a MD |
559 | /* |
560 | * If the interlocked flag is set but our cpu bit in the slpqueue | |
561 | * is no longer set, then a wakeup was processed inbetween the | |
4ecd8190 MD |
562 | * tsleep_interlock() (ours or the callers), and here. This can |
563 | * occur under numerous circumstances including when we release the | |
564 | * current process. | |
d9345d3a | 565 | * |
4ecd8190 MD |
566 | * Extreme loads can cause the sending of an IPI (e.g. wakeup()'s) |
567 | * to process incoming IPIs, thus draining incoming wakeups. | |
d9345d3a | 568 | */ |
4ecd8190 MD |
569 | if ((td->td_flags & TDF_TSLEEPQ) == 0) { |
570 | logtsleep2(ilockfail, ident); | |
571 | goto resume; | |
d9345d3a | 572 | } |
4ecd8190 MD |
573 | |
574 | /* | |
575 | * scheduling is blocked while in a critical section. Coincide | |
576 | * the descheduled-by-tsleep flag with the descheduling of the | |
577 | * lwkt. | |
8d446850 MD |
578 | * |
579 | * The timer callout is localized on our cpu and interlocked by | |
580 | * our critical section. | |
4ecd8190 | 581 | */ |
37af14fe | 582 | lwkt_deschedule_self(td); |
ae8e83e6 | 583 | td->td_flags |= TDF_TSLEEP_DESCHEDULED; |
344ad853 | 584 | td->td_wmesg = wmesg; |
344ad853 MD |
585 | |
586 | /* | |
8d446850 MD |
587 | * Setup the timeout, if any. The timeout is only operable while |
588 | * the thread is flagged descheduled. | |
344ad853 | 589 | */ |
8d446850 | 590 | KKASSERT((td->td_flags & TDF_TIMEOUT) == 0); |
076fecef | 591 | if (timo) { |
8d446850 | 592 | callout_init_mp(&thandle); |
076fecef MD |
593 | callout_reset(&thandle, timo, endtsleep, td); |
594 | } | |
344ad853 | 595 | |
984263bc | 596 | /* |
344ad853 | 597 | * Beddy bye bye. |
984263bc | 598 | */ |
08f2f1bb | 599 | if (lp) { |
26a0694b | 600 | /* |
52eedfb5 | 601 | * Ok, we are sleeping. Place us in the SSLEEP state. |
26a0694b | 602 | */ |
4643740a | 603 | KKASSERT((lp->lwp_mpflags & LWP_MP_ONRUNQ) == 0); |
6b4d33c2 | 604 | |
7278a846 SS |
605 | /* |
606 | * tstop() sets LSSTOP, so don't fiddle with that. | |
607 | */ | |
608 | if (lp->lwp_stat != LSSTOP) | |
609 | lp->lwp_stat = LSSLEEP; | |
08f2f1bb | 610 | lp->lwp_ru.ru_nvcsw++; |
e28d8b15 | 611 | p->p_usched->uload_update(lp); |
de4d4cb0 | 612 | lwkt_switch(); |
ab554892 MD |
613 | |
614 | /* | |
164b8401 | 615 | * And when we are woken up, put us back in LSRUN. If we |
ab554892 MD |
616 | * slept for over a second, recalculate our estcpu. |
617 | */ | |
164b8401 | 618 | lp->lwp_stat = LSRUN; |
de4d4cb0 MD |
619 | if (lp->lwp_slptime) { |
620 | p->p_usched->uload_update(lp); | |
08f2f1bb | 621 | p->p_usched->recalculate(lp); |
de4d4cb0 | 622 | } |
08f2f1bb | 623 | lp->lwp_slptime = 0; |
0cfcada1 MD |
624 | } else { |
625 | lwkt_switch(); | |
626 | } | |
344ad853 | 627 | |
fc17ad60 MD |
628 | /* |
629 | * Make sure we haven't switched cpus while we were asleep. It's | |
344ad853 | 630 | * not supposed to happen. Cleanup our temporary flags. |
fc17ad60 MD |
631 | */ |
632 | KKASSERT(gd == td->td_gd); | |
344ad853 MD |
633 | |
634 | /* | |
8d446850 | 635 | * Cleanup the timeout. If the timeout has already occured thandle |
4643740a MD |
636 | * has already been stopped, otherwise stop thandle. If the timeout |
637 | * is running (the callout thread must be blocked trying to get | |
638 | * lwp_token) then wait for us to get scheduled. | |
344ad853 MD |
639 | */ |
640 | if (timo) { | |
4643740a MD |
641 | while (td->td_flags & TDF_TIMEOUT_RUNNING) { |
642 | lwkt_deschedule_self(td); | |
643 | td->td_wmesg = "tsrace"; | |
644 | lwkt_switch(); | |
645 | kprintf("td %p %s: timeout race\n", td, td->td_comm); | |
646 | } | |
344ad853 MD |
647 | if (td->td_flags & TDF_TIMEOUT) { |
648 | td->td_flags &= ~TDF_TIMEOUT; | |
a40da8f0 | 649 | error = EWOULDBLOCK; |
344ad853 | 650 | } else { |
8d446850 | 651 | /* does not block when on same cpu */ |
344ad853 MD |
652 | callout_stop(&thandle); |
653 | } | |
0cfcada1 | 654 | } |
4643740a | 655 | td->td_flags &= ~TDF_TSLEEP_DESCHEDULED; |
344ad853 MD |
656 | |
657 | /* | |
8d446850 MD |
658 | * Make sure we have been removed from the sleepq. In most |
659 | * cases this will have been done for us already but it is | |
660 | * possible for a scheduling IPI to be in-flight from a | |
661 | * previous tsleep/tsleep_interlock() or due to a straight-out | |
662 | * call to lwkt_schedule() (in the case of an interrupt thread), | |
663 | * causing a spurious wakeup. | |
344ad853 | 664 | */ |
ae8e83e6 | 665 | _tsleep_remove(td); |
344ad853 | 666 | td->td_wmesg = NULL; |
344ad853 MD |
667 | |
668 | /* | |
7c1212ec MD |
669 | * Figure out the correct error return. If interrupted by a |
670 | * signal we want to return EINTR or ERESTART. | |
344ad853 MD |
671 | */ |
672 | resume: | |
4643740a | 673 | if (lp) { |
7c1212ec | 674 | if (catch && error == 0) { |
94f98873 | 675 | if (sig != 0 || (sig = CURSIG(lp))) { |
7c1212ec MD |
676 | if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig)) |
677 | error = EINTR; | |
678 | else | |
679 | error = ERESTART; | |
680 | } | |
984263bc | 681 | } |
4643740a MD |
682 | lp->lwp_flags &= ~LWP_SINTR; |
683 | lwkt_reltoken(&lp->lwp_token); | |
984263bc | 684 | } |
8aa3430c | 685 | logtsleep1(tsleep_end); |
344ad853 MD |
686 | crit_exit_quick(td); |
687 | return (error); | |
984263bc MD |
688 | } |
689 | ||
bf765287 MD |
690 | /* |
691 | * Interlocked spinlock sleep. An exclusively held spinlock must | |
e590ee86 | 692 | * be passed to ssleep(). The function will atomically release the |
bf765287 MD |
693 | * spinlock and tsleep on the ident, then reacquire the spinlock and |
694 | * return. | |
695 | * | |
696 | * This routine is fairly important along the critical path, so optimize it | |
697 | * heavily. | |
698 | */ | |
699 | int | |
5decebc7 | 700 | ssleep(const volatile void *ident, struct spinlock *spin, int flags, |
bf765287 MD |
701 | const char *wmesg, int timo) |
702 | { | |
703 | globaldata_t gd = mycpu; | |
704 | int error; | |
16523a43 | 705 | |
ae8e83e6 | 706 | _tsleep_interlock(gd, ident, flags); |
7cfe2b28 | 707 | spin_unlock_quick(gd, spin); |
ef48be0d | 708 | error = tsleep(ident, flags | PINTERLOCKED, wmesg, timo); |
7cfe2b28 | 709 | spin_lock_quick(gd, spin); |
bf765287 MD |
710 | |
711 | return (error); | |
16523a43 MD |
712 | } |
713 | ||
bed060de | 714 | int |
5decebc7 MD |
715 | lksleep(const volatile void *ident, struct lock *lock, int flags, |
716 | const char *wmesg, int timo) | |
bed060de AH |
717 | { |
718 | globaldata_t gd = mycpu; | |
719 | int error; | |
720 | ||
721 | _tsleep_interlock(gd, ident, flags); | |
722 | lockmgr(lock, LK_RELEASE); | |
723 | error = tsleep(ident, flags | PINTERLOCKED, wmesg, timo); | |
724 | lockmgr(lock, LK_EXCLUSIVE); | |
725 | ||
726 | return (error); | |
727 | } | |
728 | ||
7f6220a9 MD |
729 | /* |
730 | * Interlocked mutex sleep. An exclusively held mutex must be passed | |
731 | * to mtxsleep(). The function will atomically release the mutex | |
732 | * and tsleep on the ident, then reacquire the mutex and return. | |
733 | */ | |
734 | int | |
5decebc7 | 735 | mtxsleep(const volatile void *ident, struct mtx *mtx, int flags, |
7f6220a9 MD |
736 | const char *wmesg, int timo) |
737 | { | |
738 | globaldata_t gd = mycpu; | |
739 | int error; | |
740 | ||
741 | _tsleep_interlock(gd, ident, flags); | |
742 | mtx_unlock(mtx); | |
743 | error = tsleep(ident, flags | PINTERLOCKED, wmesg, timo); | |
744 | mtx_lock_ex_quick(mtx, wmesg); | |
745 | ||
746 | return (error); | |
747 | } | |
748 | ||
362e59be SZ |
749 | /* |
750 | * Interlocked serializer sleep. An exclusively held serializer must | |
ed3f6624 | 751 | * be passed to zsleep(). The function will atomically release |
362e59be SZ |
752 | * the serializer and tsleep on the ident, then reacquire the serializer |
753 | * and return. | |
754 | */ | |
755 | int | |
5decebc7 | 756 | zsleep(const volatile void *ident, struct lwkt_serialize *slz, int flags, |
ed3f6624 | 757 | const char *wmesg, int timo) |
362e59be | 758 | { |
ae8e83e6 | 759 | globaldata_t gd = mycpu; |
362e59be SZ |
760 | int ret; |
761 | ||
762 | ASSERT_SERIALIZED(slz); | |
763 | ||
ae8e83e6 | 764 | _tsleep_interlock(gd, ident, flags); |
362e59be | 765 | lwkt_serialize_exit(slz); |
ef48be0d | 766 | ret = tsleep(ident, flags | PINTERLOCKED, wmesg, timo); |
362e59be | 767 | lwkt_serialize_enter(slz); |
362e59be SZ |
768 | |
769 | return ret; | |
770 | } | |
771 | ||
a22c590e MD |
772 | /* |
773 | * Directly block on the LWKT thread by descheduling it. This | |
774 | * is much faster then tsleep(), but the only legal way to wake | |
775 | * us up is to directly schedule the thread. | |
776 | * | |
777 | * Setting TDF_SINTR will cause new signals to directly schedule us. | |
778 | * | |
ae8e83e6 | 779 | * This routine must be called while in a critical section. |
a22c590e MD |
780 | */ |
781 | int | |
782 | lwkt_sleep(const char *wmesg, int flags) | |
783 | { | |
784 | thread_t td = curthread; | |
785 | int sig; | |
786 | ||
787 | if ((flags & PCATCH) == 0 || td->td_lwp == NULL) { | |
788 | td->td_flags |= TDF_BLOCKED; | |
789 | td->td_wmesg = wmesg; | |
790 | lwkt_deschedule_self(td); | |
791 | lwkt_switch(); | |
792 | td->td_wmesg = NULL; | |
793 | td->td_flags &= ~TDF_BLOCKED; | |
794 | return(0); | |
795 | } | |
796 | if ((sig = CURSIG(td->td_lwp)) != 0) { | |
797 | if (SIGISMEMBER(td->td_proc->p_sigacts->ps_sigintr, sig)) | |
798 | return(EINTR); | |
799 | else | |
800 | return(ERESTART); | |
801 | ||
802 | } | |
803 | td->td_flags |= TDF_BLOCKED | TDF_SINTR; | |
804 | td->td_wmesg = wmesg; | |
805 | lwkt_deschedule_self(td); | |
806 | lwkt_switch(); | |
807 | td->td_flags &= ~(TDF_BLOCKED | TDF_SINTR); | |
808 | td->td_wmesg = NULL; | |
809 | return(0); | |
810 | } | |
811 | ||
984263bc | 812 | /* |
344ad853 | 813 | * Implement the timeout for tsleep. |
fc17ad60 | 814 | * |
344ad853 MD |
815 | * This type of callout timeout is scheduled on the same cpu the process |
816 | * is sleeping on. Also, at the moment, the MP lock is held. | |
984263bc MD |
817 | */ |
818 | static void | |
0cfcada1 | 819 | endtsleep(void *arg) |
984263bc | 820 | { |
0cfcada1 | 821 | thread_t td = arg; |
9a379a4a | 822 | struct lwp *lp; |
984263bc | 823 | |
8d446850 | 824 | /* |
4643740a MD |
825 | * We are going to have to get the lwp_token, which means we might |
826 | * block. This can race a tsleep getting woken up by other means | |
827 | * so set TDF_TIMEOUT_RUNNING to force the tsleep to wait for our | |
828 | * processing to complete (sorry tsleep!). | |
829 | * | |
830 | * We can safely set td_flags because td MUST be on the same cpu | |
831 | * as we are. | |
8d446850 | 832 | */ |
4643740a MD |
833 | KKASSERT(td->td_gd == mycpu); |
834 | crit_enter(); | |
835 | td->td_flags |= TDF_TIMEOUT_RUNNING | TDF_TIMEOUT; | |
344ad853 MD |
836 | |
837 | /* | |
4643740a MD |
838 | * This can block but TDF_TIMEOUT_RUNNING will prevent the thread |
839 | * from exiting the tsleep on us. The flag is interlocked by virtue | |
840 | * of lp being on the same cpu as we are. | |
344ad853 | 841 | */ |
8d446850 | 842 | if ((lp = td->td_lwp) != NULL) |
e2b148c6 | 843 | lwkt_gettoken(&lp->lwp_token); |
344ad853 | 844 | |
4643740a MD |
845 | KKASSERT(td->td_flags & TDF_TSLEEP_DESCHEDULED); |
846 | ||
847 | if (lp) { | |
848 | if (lp->lwp_proc->p_stat != SSTOP) | |
849 | setrunnable(lp); | |
e2b148c6 | 850 | lwkt_reltoken(&lp->lwp_token); |
4643740a MD |
851 | } else { |
852 | _tsleep_remove(td); | |
853 | lwkt_schedule(td); | |
854 | } | |
855 | KKASSERT(td->td_gd == mycpu); | |
856 | td->td_flags &= ~TDF_TIMEOUT_RUNNING; | |
37af14fe | 857 | crit_exit(); |
984263bc MD |
858 | } |
859 | ||
8fb8bca6 EN |
860 | /* |
861 | * Make all processes sleeping on the specified identifier runnable. | |
fc17ad60 MD |
862 | * count may be zero or one only. |
863 | * | |
c75e41b7 MD |
864 | * The domain encodes the sleep/wakeup domain, flags, plus the originating |
865 | * cpu. | |
344ad853 MD |
866 | * |
867 | * This call may run without the MP lock held. We can only manipulate thread | |
868 | * state on the cpu owning the thread. We CANNOT manipulate process state | |
869 | * at all. | |
5decebc7 MD |
870 | * |
871 | * _wakeup() can be passed to an IPI so we can't use (const volatile | |
872 | * void *ident). | |
8fb8bca6 EN |
873 | */ |
874 | static void | |
fc17ad60 | 875 | _wakeup(void *ident, int domain) |
984263bc | 876 | { |
fc17ad60 | 877 | struct tslpque *qp; |
0cfcada1 MD |
878 | struct thread *td; |
879 | struct thread *ntd; | |
fc17ad60 | 880 | globaldata_t gd; |
fc17ad60 | 881 | cpumask_t mask; |
fc17ad60 | 882 | int id; |
984263bc | 883 | |
37af14fe | 884 | crit_enter(); |
8aa3430c | 885 | logtsleep2(wakeup_beg, ident); |
fc17ad60 MD |
886 | gd = mycpu; |
887 | id = LOOKUP(ident); | |
888 | qp = &gd->gd_tsleep_hash[id]; | |
984263bc | 889 | restart: |
0cfcada1 | 890 | for (td = TAILQ_FIRST(qp); td != NULL; td = ntd) { |
ae8e83e6 | 891 | ntd = TAILQ_NEXT(td, td_sleepq); |
fc17ad60 MD |
892 | if (td->td_wchan == ident && |
893 | td->td_wdomain == (domain & PDOMAIN_MASK) | |
894 | ) { | |
ae8e83e6 MD |
895 | KKASSERT(td->td_gd == gd); |
896 | _tsleep_remove(td); | |
c75e41b7 | 897 | td->td_wakefromcpu = PWAKEUP_DECODE(domain); |
ae8e83e6 | 898 | if (td->td_flags & TDF_TSLEEP_DESCHEDULED) { |
ae8e83e6 MD |
899 | lwkt_schedule(td); |
900 | if (domain & PWAKEUP_ONE) | |
901 | goto done; | |
fc17ad60 | 902 | } |
0cfcada1 | 903 | goto restart; |
984263bc MD |
904 | } |
905 | } | |
fc17ad60 | 906 | |
fc17ad60 MD |
907 | /* |
908 | * We finished checking the current cpu but there still may be | |
909 | * more work to do. Either wakeup_one was requested and no matching | |
910 | * thread was found, or a normal wakeup was requested and we have | |
911 | * to continue checking cpus. | |
912 | * | |
fc17ad60 MD |
913 | * It should be noted that this scheme is actually less expensive then |
914 | * the old scheme when waking up multiple threads, since we send | |
915 | * only one IPI message per target candidate which may then schedule | |
916 | * multiple threads. Before we could have wound up sending an IPI | |
917 | * message for each thread on the target cpu (!= current cpu) that | |
918 | * needed to be woken up. | |
919 | * | |
920 | * NOTE: Wakeups occuring on remote cpus are asynchronous. This | |
921 | * should be ok since we are passing idents in the IPI rather then | |
922 | * thread pointers. | |
923 | */ | |
1f4f6e0b MD |
924 | if ((domain & PWAKEUP_MYCPU) == 0 && |
925 | (mask = slpque_cpumasks[id] & gd->gd_other_cpus) != 0) { | |
926 | lwkt_send_ipiq2_mask(mask, _wakeup, ident, | |
927 | domain | PWAKEUP_MYCPU); | |
fc17ad60 | 928 | } |
fc17ad60 | 929 | done: |
8aa3430c | 930 | logtsleep1(wakeup_end); |
37af14fe | 931 | crit_exit(); |
984263bc MD |
932 | } |
933 | ||
b336a9b1 MD |
934 | /* |
935 | * Wakeup all threads tsleep()ing on the specified ident, on all cpus | |
936 | */ | |
984263bc | 937 | void |
5decebc7 | 938 | wakeup(const volatile void *ident) |
984263bc | 939 | { |
b0da0c88 MD |
940 | globaldata_t gd = mycpu; |
941 | thread_t td = gd->gd_curthread; | |
942 | ||
943 | if (td && (td->td_flags & TDF_DELAYED_WAKEUP)) { | |
944 | if (!atomic_cmpset_ptr(&gd->gd_delayed_wakeup[0], NULL, ident)) { | |
945 | if (!atomic_cmpset_ptr(&gd->gd_delayed_wakeup[1], NULL, ident)) | |
946 | _wakeup(__DEALL(ident), PWAKEUP_ENCODE(0, gd->gd_cpuid)); | |
947 | } | |
948 | return; | |
949 | } | |
950 | _wakeup(__DEALL(ident), PWAKEUP_ENCODE(0, gd->gd_cpuid)); | |
0cfcada1 | 951 | } |
984263bc | 952 | |
b336a9b1 MD |
953 | /* |
954 | * Wakeup one thread tsleep()ing on the specified ident, on any cpu. | |
955 | */ | |
0cfcada1 | 956 | void |
5decebc7 | 957 | wakeup_one(const volatile void *ident) |
0cfcada1 | 958 | { |
fc17ad60 | 959 | /* XXX potentially round-robin the first responding cpu */ |
c75e41b7 MD |
960 | _wakeup(__DEALL(ident), PWAKEUP_ENCODE(0, mycpu->gd_cpuid) | |
961 | PWAKEUP_ONE); | |
da5fb9ef MD |
962 | } |
963 | ||
b336a9b1 MD |
964 | /* |
965 | * Wakeup threads tsleep()ing on the specified ident on the current cpu | |
966 | * only. | |
967 | */ | |
968 | void | |
5decebc7 | 969 | wakeup_mycpu(const volatile void *ident) |
b336a9b1 | 970 | { |
c75e41b7 MD |
971 | _wakeup(__DEALL(ident), PWAKEUP_ENCODE(0, mycpu->gd_cpuid) | |
972 | PWAKEUP_MYCPU); | |
b336a9b1 MD |
973 | } |
974 | ||
975 | /* | |
976 | * Wakeup one thread tsleep()ing on the specified ident on the current cpu | |
977 | * only. | |
978 | */ | |
979 | void | |
5decebc7 | 980 | wakeup_mycpu_one(const volatile void *ident) |
b336a9b1 MD |
981 | { |
982 | /* XXX potentially round-robin the first responding cpu */ | |
c75e41b7 MD |
983 | _wakeup(__DEALL(ident), PWAKEUP_ENCODE(0, mycpu->gd_cpuid) | |
984 | PWAKEUP_MYCPU | PWAKEUP_ONE); | |
b336a9b1 MD |
985 | } |
986 | ||
987 | /* | |
988 | * Wakeup all thread tsleep()ing on the specified ident on the specified cpu | |
989 | * only. | |
990 | */ | |
991 | void | |
5decebc7 | 992 | wakeup_oncpu(globaldata_t gd, const volatile void *ident) |
b336a9b1 | 993 | { |
c75e41b7 | 994 | globaldata_t mygd = mycpu; |
b336a9b1 | 995 | if (gd == mycpu) { |
c75e41b7 MD |
996 | _wakeup(__DEALL(ident), PWAKEUP_ENCODE(0, mygd->gd_cpuid) | |
997 | PWAKEUP_MYCPU); | |
b336a9b1 | 998 | } else { |
c75e41b7 MD |
999 | lwkt_send_ipiq2(gd, _wakeup, __DEALL(ident), |
1000 | PWAKEUP_ENCODE(0, mygd->gd_cpuid) | | |
1001 | PWAKEUP_MYCPU); | |
b336a9b1 MD |
1002 | } |
1003 | } | |
1004 | ||
1005 | /* | |
1006 | * Wakeup one thread tsleep()ing on the specified ident on the specified cpu | |
1007 | * only. | |
1008 | */ | |
1009 | void | |
5decebc7 | 1010 | wakeup_oncpu_one(globaldata_t gd, const volatile void *ident) |
b336a9b1 | 1011 | { |
c75e41b7 MD |
1012 | globaldata_t mygd = mycpu; |
1013 | if (gd == mygd) { | |
1014 | _wakeup(__DEALL(ident), PWAKEUP_ENCODE(0, mygd->gd_cpuid) | | |
1015 | PWAKEUP_MYCPU | PWAKEUP_ONE); | |
b336a9b1 | 1016 | } else { |
5decebc7 | 1017 | lwkt_send_ipiq2(gd, _wakeup, __DEALL(ident), |
c75e41b7 | 1018 | PWAKEUP_ENCODE(0, mygd->gd_cpuid) | |
5decebc7 | 1019 | PWAKEUP_MYCPU | PWAKEUP_ONE); |
b336a9b1 MD |
1020 | } |
1021 | } | |
1022 | ||
1023 | /* | |
1024 | * Wakeup all threads waiting on the specified ident that slept using | |
1025 | * the specified domain, on all cpus. | |
1026 | */ | |
da5fb9ef | 1027 | void |
5decebc7 | 1028 | wakeup_domain(const volatile void *ident, int domain) |
da5fb9ef | 1029 | { |
5decebc7 | 1030 | _wakeup(__DEALL(ident), PWAKEUP_ENCODE(domain, mycpu->gd_cpuid)); |
da5fb9ef MD |
1031 | } |
1032 | ||
b336a9b1 MD |
1033 | /* |
1034 | * Wakeup one thread waiting on the specified ident that slept using | |
1035 | * the specified domain, on any cpu. | |
1036 | */ | |
da5fb9ef | 1037 | void |
5decebc7 | 1038 | wakeup_domain_one(const volatile void *ident, int domain) |
da5fb9ef | 1039 | { |
fc17ad60 | 1040 | /* XXX potentially round-robin the first responding cpu */ |
5decebc7 MD |
1041 | _wakeup(__DEALL(ident), |
1042 | PWAKEUP_ENCODE(domain, mycpu->gd_cpuid) | PWAKEUP_ONE); | |
984263bc MD |
1043 | } |
1044 | ||
b0da0c88 MD |
1045 | void |
1046 | wakeup_start_delayed(void) | |
1047 | { | |
1048 | globaldata_t gd = mycpu; | |
1049 | ||
1050 | crit_enter(); | |
1051 | gd->gd_curthread->td_flags |= TDF_DELAYED_WAKEUP; | |
1052 | crit_exit(); | |
1053 | } | |
1054 | ||
1055 | void | |
1056 | wakeup_end_delayed(void) | |
1057 | { | |
1058 | globaldata_t gd = mycpu; | |
1059 | ||
1060 | if (gd->gd_curthread->td_flags & TDF_DELAYED_WAKEUP) { | |
1061 | crit_enter(); | |
1062 | gd->gd_curthread->td_flags &= ~TDF_DELAYED_WAKEUP; | |
1063 | if (gd->gd_delayed_wakeup[0] || gd->gd_delayed_wakeup[1]) { | |
1064 | if (gd->gd_delayed_wakeup[0]) { | |
1065 | wakeup(gd->gd_delayed_wakeup[0]); | |
1066 | gd->gd_delayed_wakeup[0] = NULL; | |
1067 | } | |
1068 | if (gd->gd_delayed_wakeup[1]) { | |
1069 | wakeup(gd->gd_delayed_wakeup[1]); | |
1070 | gd->gd_delayed_wakeup[1] = NULL; | |
1071 | } | |
1072 | } | |
1073 | crit_exit(); | |
1074 | } | |
1075 | } | |
1076 | ||
984263bc | 1077 | /* |
344ad853 MD |
1078 | * setrunnable() |
1079 | * | |
4643740a MD |
1080 | * Make a process runnable. lp->lwp_token must be held on call and this |
1081 | * function must be called from the cpu owning lp. | |
37af14fe | 1082 | * |
4643740a | 1083 | * This only has an effect if we are in LSSTOP or LSSLEEP. |
984263bc MD |
1084 | */ |
1085 | void | |
9a379a4a | 1086 | setrunnable(struct lwp *lp) |
984263bc | 1087 | { |
4643740a MD |
1088 | thread_t td = lp->lwp_thread; |
1089 | ||
e2b148c6 | 1090 | ASSERT_LWKT_TOKEN_HELD(&lp->lwp_token); |
4643740a | 1091 | KKASSERT(td->td_gd == mycpu); |
344ad853 | 1092 | crit_enter(); |
2daf83b0 SS |
1093 | if (lp->lwp_stat == LSSTOP) |
1094 | lp->lwp_stat = LSSLEEP; | |
4643740a MD |
1095 | if (lp->lwp_stat == LSSLEEP) { |
1096 | _tsleep_remove(td); | |
1097 | lwkt_schedule(td); | |
1098 | } else if (td->td_flags & TDF_SINTR) { | |
1099 | lwkt_schedule(td); | |
1100 | } | |
344ad853 | 1101 | crit_exit(); |
984263bc MD |
1102 | } |
1103 | ||
1104 | /* | |
164b8401 SS |
1105 | * The process is stopped due to some condition, usually because p_stat is |
1106 | * set to SSTOP, but also possibly due to being traced. | |
fc17ad60 | 1107 | * |
4643740a MD |
1108 | * Caller must hold p->p_token |
1109 | * | |
164b8401 | 1110 | * NOTE! If the caller sets SSTOP, the caller must also clear P_WAITED |
344ad853 MD |
1111 | * because the parent may check the child's status before the child actually |
1112 | * gets to this routine. | |
1113 | * | |
9a379a4a | 1114 | * This routine is called with the current lwp only, typically just |
4643740a MD |
1115 | * before returning to userland if the process state is detected as |
1116 | * possibly being in a stopped state. | |
984263bc MD |
1117 | */ |
1118 | void | |
9a379a4a | 1119 | tstop(void) |
984263bc | 1120 | { |
9a379a4a | 1121 | struct lwp *lp = curthread->td_lwp; |
7278a846 | 1122 | struct proc *p = lp->lwp_proc; |
8c986a82 | 1123 | struct proc *q; |
9a379a4a | 1124 | |
4643740a | 1125 | lwkt_gettoken(&lp->lwp_token); |
7278a846 | 1126 | crit_enter(); |
4643740a | 1127 | |
f33e8653 | 1128 | /* |
4643740a | 1129 | * If LWP_MP_WSTOP is set, we were sleeping |
f33e8653 SS |
1130 | * while our process was stopped. At this point |
1131 | * we were already counted as stopped. | |
1132 | */ | |
4643740a | 1133 | if ((lp->lwp_mpflags & LWP_MP_WSTOP) == 0) { |
f33e8653 SS |
1134 | /* |
1135 | * If we're the last thread to stop, signal | |
1136 | * our parent. | |
1137 | */ | |
1138 | p->p_nstopped++; | |
4643740a | 1139 | atomic_set_int(&lp->lwp_mpflags, LWP_MP_WSTOP); |
ea59a697 | 1140 | wakeup(&p->p_nstopped); |
f33e8653 | 1141 | if (p->p_nstopped == p->p_nthreads) { |
8c986a82 MD |
1142 | /* |
1143 | * Token required to interlock kern_wait() | |
1144 | */ | |
1145 | q = p->p_pptr; | |
1146 | PHOLD(q); | |
1147 | lwkt_gettoken(&q->p_token); | |
4643740a | 1148 | p->p_flags &= ~P_WAITED; |
f33e8653 | 1149 | wakeup(p->p_pptr); |
8c986a82 MD |
1150 | if ((q->p_sigacts->ps_flag & PS_NOCLDSTOP) == 0) |
1151 | ksignal(q, SIGCHLD); | |
1152 | lwkt_reltoken(&q->p_token); | |
1153 | PRELE(q); | |
f33e8653 SS |
1154 | } |
1155 | } | |
ea59a697 | 1156 | while (p->p_stat == SSTOP) { |
ea59a697 SS |
1157 | lp->lwp_stat = LSSTOP; |
1158 | tsleep(p, 0, "stop", 0); | |
1159 | } | |
7278a846 | 1160 | p->p_nstopped--; |
4643740a | 1161 | atomic_clear_int(&lp->lwp_mpflags, LWP_MP_WSTOP); |
7278a846 | 1162 | crit_exit(); |
4643740a | 1163 | lwkt_reltoken(&lp->lwp_token); |
26a0694b MD |
1164 | } |
1165 | ||
984263bc MD |
1166 | /* |
1167 | * Compute a tenex style load average of a quantity on | |
1168 | * 1, 5 and 15 minute intervals. | |
1169 | */ | |
c7e98b2f | 1170 | static int loadav_count_runnable(struct lwp *p, void *data); |
8fa76237 | 1171 | |
984263bc MD |
1172 | static void |
1173 | loadav(void *arg) | |
1174 | { | |
984263bc | 1175 | struct loadavg *avg; |
8fa76237 | 1176 | int i, nrun; |
984263bc | 1177 | |
984263bc | 1178 | nrun = 0; |
c7e98b2f | 1179 | alllwp_scan(loadav_count_runnable, &nrun); |
8fa76237 MD |
1180 | avg = &averunnable; |
1181 | for (i = 0; i < 3; i++) { | |
984263bc MD |
1182 | avg->ldavg[i] = (cexp[i] * avg->ldavg[i] + |
1183 | nrun * FSCALE * (FSCALE - cexp[i])) >> FSHIFT; | |
8fa76237 | 1184 | } |
984263bc MD |
1185 | |
1186 | /* | |
1187 | * Schedule the next update to occur after 5 seconds, but add a | |
1188 | * random variation to avoid synchronisation with processes that | |
1189 | * run at regular intervals. | |
1190 | */ | |
cddfb7bb | 1191 | callout_reset(&loadav_callout, hz * 4 + (int)(krandom() % (hz * 2 + 1)), |
8fa76237 MD |
1192 | loadav, NULL); |
1193 | } | |
1194 | ||
1195 | static int | |
c7e98b2f | 1196 | loadav_count_runnable(struct lwp *lp, void *data) |
8fa76237 MD |
1197 | { |
1198 | int *nrunp = data; | |
1199 | thread_t td; | |
1200 | ||
164b8401 SS |
1201 | switch (lp->lwp_stat) { |
1202 | case LSRUN: | |
08f2f1bb | 1203 | if ((td = lp->lwp_thread) == NULL) |
8fa76237 MD |
1204 | break; |
1205 | if (td->td_flags & TDF_BLOCKED) | |
1206 | break; | |
8fa76237 MD |
1207 | ++*nrunp; |
1208 | break; | |
1209 | default: | |
1210 | break; | |
1211 | } | |
d2d8515b | 1212 | lwkt_yield(); |
8fa76237 | 1213 | return(0); |
984263bc MD |
1214 | } |
1215 | ||
1216 | /* ARGSUSED */ | |
1217 | static void | |
6656cd91 | 1218 | sched_setup(void *dummy) |
984263bc | 1219 | { |
8d446850 MD |
1220 | callout_init_mp(&loadav_callout); |
1221 | callout_init_mp(&schedcpu_callout); | |
984263bc MD |
1222 | |
1223 | /* Kick off timeout driven events by calling first time. */ | |
984263bc MD |
1224 | schedcpu(NULL); |
1225 | loadav(NULL); | |
1226 | } | |
1227 |