| Commit | Line | Data |
|---|---|---|
| 984263bc | 1 | /* |
| 99ad9bc4 MD |
2 | * (MPSAFE) |
| 3 | * | |
| 984263bc MD |
4 | * Copyright (c) 1991, 1993 |
| 5 | * The Regents of the University of California. All rights reserved. | |
| 6 | * | |
| 7 | * This code is derived from software contributed to Berkeley by | |
| 8 | * The Mach Operating System project at Carnegie-Mellon University. | |
| 9 | * | |
| 10 | * Redistribution and use in source and binary forms, with or without | |
| 11 | * modification, are permitted provided that the following conditions | |
| 12 | * are met: | |
| 13 | * 1. Redistributions of source code must retain the above copyright | |
| 14 | * notice, this list of conditions and the following disclaimer. | |
| 15 | * 2. Redistributions in binary form must reproduce the above copyright | |
| 16 | * notice, this list of conditions and the following disclaimer in the | |
| 17 | * documentation and/or other materials provided with the distribution. | |
| 18 | * 3. All advertising materials mentioning features or use of this software | |
| 19 | * must display the following acknowledgement: | |
| 20 | * This product includes software developed by the University of | |
| 21 | * California, Berkeley and its contributors. | |
| 22 | * 4. Neither the name of the University nor the names of its contributors | |
| 23 | * may be used to endorse or promote products derived from this software | |
| 24 | * without specific prior written permission. | |
| 25 | * | |
| 26 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
| 27 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
| 28 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
| 29 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
| 30 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
| 31 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
| 32 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
| 33 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
| 34 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
| 35 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
| 36 | * SUCH DAMAGE. | |
| 37 | * | |
| 38 | * from: @(#)vm_glue.c 8.6 (Berkeley) 1/5/94 | |
| 39 | * | |
| 40 | * | |
| 41 | * Copyright (c) 1987, 1990 Carnegie-Mellon University. | |
| 42 | * All rights reserved. | |
| 43 | * | |
| 44 | * Permission to use, copy, modify and distribute this software and | |
| 45 | * its documentation is hereby granted, provided that both the copyright | |
| 46 | * notice and this permission notice appear in all copies of the | |
| 47 | * software, derivative works or modified versions, and any portions | |
| 48 | * thereof, and that both notices appear in supporting documentation. | |
| 49 | * | |
| 50 | * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" | |
| 51 | * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND | |
| 52 | * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. | |
| 53 | * | |
| 54 | * Carnegie Mellon requests users of this software to return to | |
| 55 | * | |
| 56 | * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU | |
| 57 | * School of Computer Science | |
| 58 | * Carnegie Mellon University | |
| 59 | * Pittsburgh PA 15213-3890 | |
| 60 | * | |
| 61 | * any improvements or extensions that they make and grant Carnegie the | |
| 62 | * rights to redistribute these changes. | |
| 63 | * | |
| 64 | * $FreeBSD: src/sys/vm/vm_glue.c,v 1.94.2.4 2003/01/13 22:51:17 dillon Exp $ | |
| 4ecf7cc9 | 65 | * $DragonFly: src/sys/vm/vm_glue.c,v 1.56 2008/07/01 02:02:56 dillon Exp $ |
| 984263bc MD |
66 | */ |
| 67 | ||
| 68 | #include "opt_vm.h" | |
| 69 | ||
| 70 | #include <sys/param.h> | |
| 71 | #include <sys/systm.h> | |
| 72 | #include <sys/proc.h> | |
| 73 | #include <sys/resourcevar.h> | |
| 74 | #include <sys/buf.h> | |
| 75 | #include <sys/shm.h> | |
| 76 | #include <sys/vmmeter.h> | |
| 77 | #include <sys/sysctl.h> | |
| 78 | ||
| 79 | #include <sys/kernel.h> | |
| 80 | #include <sys/unistd.h> | |
| 81 | ||
| 82 | #include <machine/limits.h> | |
| 83 | ||
| 84 | #include <vm/vm.h> | |
| 85 | #include <vm/vm_param.h> | |
| 86 | #include <sys/lock.h> | |
| 87 | #include <vm/pmap.h> | |
| 88 | #include <vm/vm_map.h> | |
| 89 | #include <vm/vm_page.h> | |
| 90 | #include <vm/vm_pageout.h> | |
| 91 | #include <vm/vm_kern.h> | |
| 92 | #include <vm/vm_extern.h> | |
| 93 | ||
| 94 | #include <sys/user.h> | |
| 12e4aaff | 95 | #include <vm/vm_page2.h> |
| cdd46d2e | 96 | #include <sys/thread2.h> |
| e3161323 | 97 | #include <sys/sysref2.h> |
| 984263bc MD |
98 | |
| 99 | /* | |
| 984263bc MD |
100 | * THIS MUST BE THE LAST INITIALIZATION ITEM!!! |
| 101 | * | |
| 102 | * Note: run scheduling should be divorced from the vm system. | |
| 103 | */ | |
| 1388df65 | 104 | static void scheduler (void *); |
| 984263bc MD |
105 | SYSINIT(scheduler, SI_SUB_RUN_SCHEDULER, SI_ORDER_FIRST, scheduler, NULL) |
| 106 | ||
| 344ad853 MD |
107 | #ifdef INVARIANTS |
| 108 | ||
| 109 | static int swap_debug = 0; | |
| 110 | SYSCTL_INT(_vm, OID_AUTO, swap_debug, | |
| 111 | CTLFLAG_RW, &swap_debug, 0, ""); | |
| 112 | ||
| 113 | #endif | |
| 114 | ||
| 115 | static int scheduler_notify; | |
| 984263bc | 116 | |
| 1388df65 | 117 | static void swapout (struct proc *); |
| 984263bc | 118 | |
| 99ad9bc4 MD |
119 | /* |
| 120 | * No requirements. | |
| 121 | */ | |
| 984263bc | 122 | int |
| 6cebf0fc | 123 | kernacc(c_caddr_t addr, int len, int rw) |
| 984263bc MD |
124 | { |
| 125 | boolean_t rv; | |
| 126 | vm_offset_t saddr, eaddr; | |
| 127 | vm_prot_t prot; | |
| 128 | ||
| 129 | KASSERT((rw & (~VM_PROT_ALL)) == 0, | |
| 130 | ("illegal ``rw'' argument to kernacc (%x)\n", rw)); | |
| ed468fd2 | 131 | |
| 03debc4a MD |
132 | /* |
| 133 | * The globaldata space is not part of the kernel_map proper, | |
| 134 | * check access separately. | |
| 135 | */ | |
| 136 | if (is_globaldata_space((vm_offset_t)addr, (vm_offset_t)(addr + len))) | |
| 137 | return (TRUE); | |
| 138 | ||
| 139 | /* | |
| 140 | * Nominal kernel memory access - check access via kernel_map. | |
| 141 | */ | |
| e4846942 | 142 | if ((vm_offset_t)addr + len > kernel_map.max_offset || |
| ed468fd2 MD |
143 | (vm_offset_t)addr + len < (vm_offset_t)addr) { |
| 144 | return (FALSE); | |
| 145 | } | |
| 984263bc MD |
146 | prot = rw; |
| 147 | saddr = trunc_page((vm_offset_t)addr); | |
| 148 | eaddr = round_page((vm_offset_t)addr + len); | |
| 46754a20 MD |
149 | rv = vm_map_check_protection(&kernel_map, saddr, eaddr, prot, FALSE); |
| 150 | ||
| 984263bc MD |
151 | return (rv == TRUE); |
| 152 | } | |
| 153 | ||
| 99ad9bc4 MD |
154 | /* |
| 155 | * No requirements. | |
| 156 | */ | |
| 984263bc | 157 | int |
| 6cebf0fc | 158 | useracc(c_caddr_t addr, int len, int rw) |
| 984263bc MD |
159 | { |
| 160 | boolean_t rv; | |
| 161 | vm_prot_t prot; | |
| 162 | vm_map_t map; | |
| 163 | vm_map_entry_t save_hint; | |
| b0c15cdf | 164 | vm_offset_t wrap; |
| 984263bc MD |
165 | |
| 166 | KASSERT((rw & (~VM_PROT_ALL)) == 0, | |
| 167 | ("illegal ``rw'' argument to useracc (%x)\n", rw)); | |
| 168 | prot = rw; | |
| 169 | /* | |
| 170 | * XXX - check separately to disallow access to user area and user | |
| 171 | * page tables - they are in the map. | |
| 984263bc | 172 | */ |
| b0c15cdf MD |
173 | wrap = (vm_offset_t)addr + len; |
| 174 | if (wrap > VM_MAX_USER_ADDRESS || wrap < (vm_offset_t)addr) { | |
| 984263bc MD |
175 | return (FALSE); |
| 176 | } | |
| 177 | map = &curproc->p_vmspace->vm_map; | |
| 178 | vm_map_lock_read(map); | |
| 179 | /* | |
| 180 | * We save the map hint, and restore it. Useracc appears to distort | |
| 181 | * the map hint unnecessarily. | |
| 182 | */ | |
| 183 | save_hint = map->hint; | |
| 46754a20 | 184 | rv = vm_map_check_protection(map, trunc_page((vm_offset_t)addr), |
| b0c15cdf | 185 | round_page(wrap), prot, TRUE); |
| 984263bc MD |
186 | map->hint = save_hint; |
| 187 | vm_map_unlock_read(map); | |
| 188 | ||
| 189 | return (rv == TRUE); | |
| 190 | } | |
| 191 | ||
| 99ad9bc4 MD |
192 | /* |
| 193 | * No requirements. | |
| 194 | */ | |
| 984263bc | 195 | void |
| 57e43348 | 196 | vslock(caddr_t addr, u_int len) |
| 984263bc | 197 | { |
| fc00aa2b MD |
198 | if (len) { |
| 199 | vm_map_wire(&curproc->p_vmspace->vm_map, | |
| 200 | trunc_page((vm_offset_t)addr), | |
| 201 | round_page((vm_offset_t)addr + len), 0); | |
| 202 | } | |
| 984263bc MD |
203 | } |
| 204 | ||
| 99ad9bc4 MD |
205 | /* |
| 206 | * No requirements. | |
| 207 | */ | |
| 984263bc | 208 | void |
| 57e43348 | 209 | vsunlock(caddr_t addr, u_int len) |
| 984263bc | 210 | { |
| fc00aa2b MD |
211 | if (len) { |
| 212 | vm_map_wire(&curproc->p_vmspace->vm_map, | |
| 213 | trunc_page((vm_offset_t)addr), | |
| 214 | round_page((vm_offset_t)addr + len), | |
| 215 | KM_PAGEABLE); | |
| 216 | } | |
| 984263bc MD |
217 | } |
| 218 | ||
| 219 | /* | |
| 220 | * Implement fork's actions on an address space. | |
| 221 | * Here we arrange for the address space to be copied or referenced, | |
| 222 | * allocate a user struct (pcb and kernel stack), then call the | |
| 223 | * machine-dependent layer to fill those in and make the new process | |
| 224 | * ready to run. The new process is set up so that it returns directly | |
| 225 | * to user mode to avoid stack copying and relocation problems. | |
| 99ad9bc4 MD |
226 | * |
| 227 | * No requirements. | |
| 984263bc MD |
228 | */ |
| 229 | void | |
| 13d13d89 | 230 | vm_fork(struct proc *p1, struct proc *p2, int flags) |
| 984263bc | 231 | { |
| 984263bc MD |
232 | if ((flags & RFPROC) == 0) { |
| 233 | /* | |
| 234 | * Divorce the memory, if it is shared, essentially | |
| 235 | * this changes shared memory amongst threads, into | |
| 236 | * COW locally. | |
| 237 | */ | |
| 238 | if ((flags & RFMEM) == 0) { | |
| e3161323 | 239 | if (p1->p_vmspace->vm_sysref.refcnt > 1) { |
| 984263bc MD |
240 | vmspace_unshare(p1); |
| 241 | } | |
| 242 | } | |
| 13d13d89 | 243 | cpu_fork(ONLY_LWP_IN_PROC(p1), NULL, flags); |
| 984263bc MD |
244 | return; |
| 245 | } | |
| 246 | ||
| 247 | if (flags & RFMEM) { | |
| 248 | p2->p_vmspace = p1->p_vmspace; | |
| e3161323 | 249 | sysref_get(&p1->p_vmspace->vm_sysref); |
| 984263bc MD |
250 | } |
| 251 | ||
| 252 | while (vm_page_count_severe()) { | |
| 4ecf7cc9 | 253 | vm_wait(0); |
| 984263bc MD |
254 | } |
| 255 | ||
| 256 | if ((flags & RFMEM) == 0) { | |
| 257 | p2->p_vmspace = vmspace_fork(p1->p_vmspace); | |
| 258 | ||
| 259 | pmap_pinit2(vmspace_pmap(p2->p_vmspace)); | |
| 260 | ||
| 261 | if (p1->p_vmspace->vm_shm) | |
| 262 | shmfork(p1, p2); | |
| 263 | } | |
| 264 | ||
| 13d13d89 | 265 | pmap_init_proc(p2); |
| 984263bc MD |
266 | } |
| 267 | ||
| 268 | /* | |
| ba39e2e0 | 269 | * Set default limits for VM system. Call during proc0's initialization. |
| 99ad9bc4 MD |
270 | * |
| 271 | * Called from the low level boot code only. | |
| 984263bc | 272 | */ |
| ba39e2e0 MD |
273 | void |
| 274 | vm_init_limits(struct proc *p) | |
| 984263bc | 275 | { |
| 984263bc MD |
276 | int rss_limit; |
| 277 | ||
| 278 | /* | |
| 279 | * Set up the initial limits on process VM. Set the maximum resident | |
| 280 | * set size to be half of (reasonably) available memory. Since this | |
| 281 | * is a soft limit, it comes into effect only when the system is out | |
| 282 | * of memory - half of main memory helps to favor smaller processes, | |
| 283 | * and reduces thrashing of the object cache. | |
| 284 | */ | |
| 285 | p->p_rlimit[RLIMIT_STACK].rlim_cur = dflssiz; | |
| 286 | p->p_rlimit[RLIMIT_STACK].rlim_max = maxssiz; | |
| 287 | p->p_rlimit[RLIMIT_DATA].rlim_cur = dfldsiz; | |
| 288 | p->p_rlimit[RLIMIT_DATA].rlim_max = maxdsiz; | |
| 289 | /* limit the limit to no less than 2MB */ | |
| 12e4aaff | 290 | rss_limit = max(vmstats.v_free_count, 512); |
| 984263bc MD |
291 | p->p_rlimit[RLIMIT_RSS].rlim_cur = ptoa(rss_limit); |
| 292 | p->p_rlimit[RLIMIT_RSS].rlim_max = RLIM_INFINITY; | |
| 293 | } | |
| 294 | ||
| 344ad853 MD |
295 | /* |
| 296 | * Faultin the specified process. Note that the process can be in any | |
| 297 | * state. Just clear P_SWAPPEDOUT and call wakeup in case the process is | |
| 298 | * sleeping. | |
| 99ad9bc4 MD |
299 | * |
| 300 | * No requirements. | |
| 344ad853 | 301 | */ |
| 984263bc | 302 | void |
| 57e43348 | 303 | faultin(struct proc *p) |
| 984263bc | 304 | { |
| 4643740a | 305 | if (p->p_flags & P_SWAPPEDOUT) { |
| 8ec60c3f | 306 | /* |
| 344ad853 MD |
307 | * The process is waiting in the kernel to return to user |
| 308 | * mode but cannot until P_SWAPPEDOUT gets cleared. | |
| 8ec60c3f | 309 | */ |
| a5fc46c9 | 310 | lwkt_gettoken(&p->p_token); |
| 4643740a | 311 | p->p_flags &= ~(P_SWAPPEDOUT | P_SWAPWAIT); |
| 344ad853 MD |
312 | #ifdef INVARIANTS |
| 313 | if (swap_debug) | |
| 086c1d7e | 314 | kprintf("swapping in %d (%s)\n", p->p_pid, p->p_comm); |
| 344ad853 MD |
315 | #endif |
| 316 | wakeup(p); | |
| a5fc46c9 | 317 | lwkt_reltoken(&p->p_token); |
| 984263bc MD |
318 | } |
| 319 | } | |
| 320 | ||
| 321 | /* | |
| f8c3996b MD |
322 | * Kernel initialization eventually falls through to this function, |
| 323 | * which is process 0. | |
| 324 | * | |
| 984263bc MD |
325 | * This swapin algorithm attempts to swap-in processes only if there |
| 326 | * is enough space for them. Of course, if a process waits for a long | |
| 327 | * time, it will be swapped in anyway. | |
| 328 | */ | |
| 8fa76237 MD |
329 | struct scheduler_info { |
| 330 | struct proc *pp; | |
| 331 | int ppri; | |
| 332 | }; | |
| 333 | ||
| 334 | static int scheduler_callback(struct proc *p, void *data); | |
| 335 | ||
| 984263bc | 336 | static void |
| 57e43348 | 337 | scheduler(void *dummy) |
| 984263bc | 338 | { |
| 8fa76237 | 339 | struct scheduler_info info; |
| 5f910b2f | 340 | struct proc *p; |
| 984263bc | 341 | |
| f8c3996b | 342 | KKASSERT(!IN_CRITICAL_SECT(curthread)); |
| 984263bc | 343 | loop: |
| 344ad853 MD |
344 | scheduler_notify = 0; |
| 345 | /* | |
| 346 | * Don't try to swap anything in if we are low on memory. | |
| 347 | */ | |
| 20479584 | 348 | if (vm_page_count_severe()) { |
| 4ecf7cc9 | 349 | vm_wait(0); |
| 984263bc MD |
350 | goto loop; |
| 351 | } | |
| 352 | ||
| 344ad853 MD |
353 | /* |
| 354 | * Look for a good candidate to wake up | |
| 355 | */ | |
| 8fa76237 MD |
356 | info.pp = NULL; |
| 357 | info.ppri = INT_MIN; | |
| 358 | allproc_scan(scheduler_callback, &info); | |
| 984263bc MD |
359 | |
| 360 | /* | |
| 344ad853 MD |
361 | * Nothing to do, back to sleep for at least 1/10 of a second. If |
| 362 | * we are woken up, immediately process the next request. If | |
| 363 | * multiple requests have built up the first is processed | |
| 364 | * immediately and the rest are staggered. | |
| 984263bc | 365 | */ |
| 8fa76237 | 366 | if ((p = info.pp) == NULL) { |
| 344ad853 MD |
367 | tsleep(&proc0, 0, "nowork", hz / 10); |
| 368 | if (scheduler_notify == 0) | |
| 369 | tsleep(&scheduler_notify, 0, "nowork", 0); | |
| 984263bc MD |
370 | goto loop; |
| 371 | } | |
| 984263bc MD |
372 | |
| 373 | /* | |
| 344ad853 MD |
374 | * Fault the selected process in, then wait for a short period of |
| 375 | * time and loop up. | |
| 376 | * | |
| 377 | * XXX we need a heuristic to get a measure of system stress and | |
| 378 | * then adjust our stagger wakeup delay accordingly. | |
| 984263bc | 379 | */ |
| 99ad9bc4 | 380 | lwkt_gettoken(&proc_token); |
| 984263bc MD |
381 | faultin(p); |
| 382 | p->p_swtime = 0; | |
| 8fa76237 | 383 | PRELE(p); |
| 99ad9bc4 | 384 | lwkt_reltoken(&proc_token); |
| 344ad853 | 385 | tsleep(&proc0, 0, "swapin", hz / 10); |
| 984263bc MD |
386 | goto loop; |
| 387 | } | |
| 388 | ||
| 99ad9bc4 MD |
389 | /* |
| 390 | * The caller must hold proc_token. | |
| 391 | */ | |
| 8fa76237 MD |
392 | static int |
| 393 | scheduler_callback(struct proc *p, void *data) | |
| 394 | { | |
| 395 | struct scheduler_info *info = data; | |
| 08f2f1bb | 396 | struct lwp *lp; |
| 8fa76237 MD |
397 | segsz_t pgs; |
| 398 | int pri; | |
| 399 | ||
| 4643740a | 400 | if (p->p_flags & P_SWAPWAIT) { |
| c7e98b2f SS |
401 | pri = 0; |
| 402 | FOREACH_LWP_IN_PROC(lp, p) { | |
| 403 | /* XXX lwp might need a different metric */ | |
| 404 | pri += lp->lwp_slptime; | |
| 405 | } | |
| 406 | pri += p->p_swtime - p->p_nice * 8; | |
| 8fa76237 MD |
407 | |
| 408 | /* | |
| 409 | * The more pages paged out while we were swapped, | |
| 410 | * the more work we have to do to get up and running | |
| 411 | * again and the lower our wakeup priority. | |
| 412 | * | |
| 413 | * Each second of sleep time is worth ~1MB | |
| 414 | */ | |
| b12defdc | 415 | lwkt_gettoken(&p->p_vmspace->vm_map.token); |
| 8fa76237 MD |
416 | pgs = vmspace_resident_count(p->p_vmspace); |
| 417 | if (pgs < p->p_vmspace->vm_swrss) { | |
| 418 | pri -= (p->p_vmspace->vm_swrss - pgs) / | |
| 419 | (1024 * 1024 / PAGE_SIZE); | |
| 420 | } | |
| b12defdc | 421 | lwkt_reltoken(&p->p_vmspace->vm_map.token); |
| 8fa76237 MD |
422 | |
| 423 | /* | |
| 424 | * If this process is higher priority and there is | |
| 425 | * enough space, then select this process instead of | |
| 426 | * the previous selection. | |
| 427 | */ | |
| 428 | if (pri > info->ppri) { | |
| 429 | if (info->pp) | |
| 430 | PRELE(info->pp); | |
| 431 | PHOLD(p); | |
| 432 | info->pp = p; | |
| 433 | info->ppri = pri; | |
| 434 | } | |
| 435 | } | |
| 436 | return(0); | |
| 437 | } | |
| 438 | ||
| 99ad9bc4 MD |
439 | /* |
| 440 | * SMP races ok. | |
| 441 | * No requirements. | |
| 442 | */ | |
| 344ad853 MD |
443 | void |
| 444 | swapin_request(void) | |
| 445 | { | |
| 446 | if (scheduler_notify == 0) { | |
| 447 | scheduler_notify = 1; | |
| 448 | wakeup(&scheduler_notify); | |
| 449 | } | |
| 450 | } | |
| 451 | ||
| 984263bc MD |
452 | #ifndef NO_SWAPPING |
| 453 | ||
| 454 | #define swappable(p) \ | |
| 455 | (((p)->p_lock == 0) && \ | |
| 4643740a | 456 | ((p)->p_flags & (P_TRACED|P_SYSTEM|P_SWAPPEDOUT|P_WEXIT)) == 0) |
| 984263bc MD |
457 | |
| 458 | ||
| 459 | /* | |
| 460 | * Swap_idle_threshold1 is the guaranteed swapped in time for a process | |
| 461 | */ | |
| 46311ac2 | 462 | static int swap_idle_threshold1 = 15; |
| 984263bc | 463 | SYSCTL_INT(_vm, OID_AUTO, swap_idle_threshold1, |
| 9733f757 | 464 | CTLFLAG_RW, &swap_idle_threshold1, 0, "Guaranteed process resident time (sec)"); |
| 984263bc MD |
465 | |
| 466 | /* | |
| 467 | * Swap_idle_threshold2 is the time that a process can be idle before | |
| 344ad853 MD |
468 | * it will be swapped out, if idle swapping is enabled. Default is |
| 469 | * one minute. | |
| 984263bc | 470 | */ |
| 344ad853 | 471 | static int swap_idle_threshold2 = 60; |
| 984263bc | 472 | SYSCTL_INT(_vm, OID_AUTO, swap_idle_threshold2, |
| 9733f757 | 473 | CTLFLAG_RW, &swap_idle_threshold2, 0, "Time (sec) a process can idle before being swapped"); |
| 984263bc MD |
474 | |
| 475 | /* | |
| 476 | * Swapout is driven by the pageout daemon. Very simple, we find eligible | |
| 344ad853 MD |
477 | * procs and mark them as being swapped out. This will cause the kernel |
| 478 | * to prefer to pageout those proc's pages first and the procs in question | |
| 479 | * will not return to user mode until the swapper tells them they can. | |
| 480 | * | |
| 984263bc MD |
481 | * If any procs have been sleeping/stopped for at least maxslp seconds, |
| 482 | * they are swapped. Else, we swap the longest-sleeping or stopped process, | |
| 483 | * if any, otherwise the longest-resident process. | |
| 484 | */ | |
| 8fa76237 MD |
485 | |
| 486 | static int swapout_procs_callback(struct proc *p, void *data); | |
| 487 | ||
| 99ad9bc4 MD |
488 | /* |
| 489 | * No requirements. | |
| 490 | */ | |
| 984263bc | 491 | void |
| 57e43348 | 492 | swapout_procs(int action) |
| 984263bc | 493 | { |
| 8fa76237 MD |
494 | allproc_scan(swapout_procs_callback, &action); |
| 495 | } | |
| 984263bc | 496 | |
| 99ad9bc4 | 497 | /* |
| b12defdc | 498 | * The caller must hold proc_token |
| 99ad9bc4 | 499 | */ |
| 8fa76237 MD |
500 | static int |
| 501 | swapout_procs_callback(struct proc *p, void *data) | |
| 502 | { | |
| 503 | struct vmspace *vm; | |
| 08f2f1bb | 504 | struct lwp *lp; |
| 8fa76237 | 505 | int action = *(int *)data; |
| c7e98b2f | 506 | int minslp = -1; |
| 8fa76237 MD |
507 | |
| 508 | if (!swappable(p)) | |
| 509 | return(0); | |
| 510 | ||
| a5fc46c9 | 511 | lwkt_gettoken(&p->p_token); |
| 8fa76237 MD |
512 | vm = p->p_vmspace; |
| 513 | ||
| c7e98b2f SS |
514 | /* |
| 515 | * We only consider active processes. | |
| 516 | */ | |
| a5fc46c9 MD |
517 | if (p->p_stat != SACTIVE && p->p_stat != SSTOP) { |
| 518 | lwkt_reltoken(&p->p_token); | |
| c7e98b2f | 519 | return(0); |
| a5fc46c9 | 520 | } |
| 164b8401 | 521 | |
| c7e98b2f | 522 | FOREACH_LWP_IN_PROC(lp, p) { |
| 8fa76237 MD |
523 | /* |
| 524 | * do not swap out a realtime process | |
| 525 | */ | |
| a5fc46c9 MD |
526 | if (RTP_PRIO_IS_REALTIME(lp->lwp_rtprio.type)) { |
| 527 | lwkt_reltoken(&p->p_token); | |
| 8fa76237 | 528 | return(0); |
| a5fc46c9 | 529 | } |
| 984263bc | 530 | |
| 8fa76237 MD |
531 | /* |
| 532 | * Guarentee swap_idle_threshold time in memory | |
| 533 | */ | |
| a5fc46c9 MD |
534 | if (lp->lwp_slptime < swap_idle_threshold1) { |
| 535 | lwkt_reltoken(&p->p_token); | |
| 8fa76237 | 536 | return(0); |
| a5fc46c9 | 537 | } |
| 8fa76237 MD |
538 | |
| 539 | /* | |
| 540 | * If the system is under memory stress, or if we | |
| 541 | * are swapping idle processes >= swap_idle_threshold2, | |
| 542 | * then swap the process out. | |
| 543 | */ | |
| 544 | if (((action & VM_SWAP_NORMAL) == 0) && | |
| 545 | (((action & VM_SWAP_IDLE) == 0) || | |
| 08f2f1bb | 546 | (lp->lwp_slptime < swap_idle_threshold2))) { |
| a5fc46c9 | 547 | lwkt_reltoken(&p->p_token); |
| 8fa76237 | 548 | return(0); |
| 984263bc | 549 | } |
| 8fa76237 | 550 | |
| c7e98b2f SS |
551 | if (minslp == -1 || lp->lwp_slptime < minslp) |
| 552 | minslp = lp->lwp_slptime; | |
| 553 | } | |
| 8fa76237 | 554 | |
| c7e98b2f SS |
555 | /* |
| 556 | * If the process has been asleep for awhile, swap | |
| 557 | * it out. | |
| 558 | */ | |
| 559 | if ((action & VM_SWAP_NORMAL) || | |
| 560 | ((action & VM_SWAP_IDLE) && | |
| 561 | (minslp > swap_idle_threshold2))) { | |
| 562 | swapout(p); | |
| 984263bc | 563 | } |
| c7e98b2f SS |
564 | |
| 565 | /* | |
| 566 | * cleanup our reference | |
| 567 | */ | |
| a5fc46c9 | 568 | lwkt_reltoken(&p->p_token); |
| c7e98b2f | 569 | |
| 8fa76237 | 570 | return(0); |
| 984263bc MD |
571 | } |
| 572 | ||
| 99ad9bc4 | 573 | /* |
| b12defdc | 574 | * The caller must hold proc_token and p->p_token |
| 99ad9bc4 | 575 | */ |
| 984263bc | 576 | static void |
| 57e43348 | 577 | swapout(struct proc *p) |
| 984263bc | 578 | { |
| 344ad853 MD |
579 | #ifdef INVARIANTS |
| 580 | if (swap_debug) | |
| 086c1d7e | 581 | kprintf("swapping out %d (%s)\n", p->p_pid, p->p_comm); |
| 984263bc | 582 | #endif |
| fde7ac71 | 583 | ++p->p_ru.ru_nswap; |
| b0c15cdf | 584 | |
| 984263bc MD |
585 | /* |
| 586 | * remember the process resident count | |
| 587 | */ | |
| 588 | p->p_vmspace->vm_swrss = vmspace_resident_count(p->p_vmspace); | |
| 4643740a | 589 | p->p_flags |= P_SWAPPEDOUT; |
| 984263bc MD |
590 | p->p_swtime = 0; |
| 591 | } | |
| 344ad853 | 592 | |
| 984263bc | 593 | #endif /* !NO_SWAPPING */ |
| 344ad853 | 594 |