MP Implmentation 3/4: MAJOR progress on SMP, full userland MP is now working!
[dragonfly.git] / sys / vm / vm_glue.c
CommitLineData
984263bc
MD
1/*
2 * Copyright (c) 1991, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * The Mach Operating System project at Carnegie-Mellon University.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * from: @(#)vm_glue.c 8.6 (Berkeley) 1/5/94
37 *
38 *
39 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
40 * All rights reserved.
41 *
42 * Permission to use, copy, modify and distribute this software and
43 * its documentation is hereby granted, provided that both the copyright
44 * notice and this permission notice appear in all copies of the
45 * software, derivative works or modified versions, and any portions
46 * thereof, and that both notices appear in supporting documentation.
47 *
48 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
49 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
50 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
51 *
52 * Carnegie Mellon requests users of this software to return to
53 *
54 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
55 * School of Computer Science
56 * Carnegie Mellon University
57 * Pittsburgh PA 15213-3890
58 *
59 * any improvements or extensions that they make and grant Carnegie the
60 * rights to redistribute these changes.
61 *
62 * $FreeBSD: src/sys/vm/vm_glue.c,v 1.94.2.4 2003/01/13 22:51:17 dillon Exp $
a2a5ad0d 63 * $DragonFly: src/sys/vm/vm_glue.c,v 1.11 2003/07/10 04:47:55 dillon Exp $
984263bc
MD
64 */
65
66#include "opt_vm.h"
67
68#include <sys/param.h>
69#include <sys/systm.h>
70#include <sys/proc.h>
71#include <sys/resourcevar.h>
72#include <sys/buf.h>
73#include <sys/shm.h>
74#include <sys/vmmeter.h>
75#include <sys/sysctl.h>
76
77#include <sys/kernel.h>
78#include <sys/unistd.h>
79
80#include <machine/limits.h>
81
82#include <vm/vm.h>
83#include <vm/vm_param.h>
84#include <sys/lock.h>
85#include <vm/pmap.h>
86#include <vm/vm_map.h>
87#include <vm/vm_page.h>
88#include <vm/vm_pageout.h>
89#include <vm/vm_kern.h>
90#include <vm/vm_extern.h>
91
92#include <sys/user.h>
12e4aaff 93#include <vm/vm_page2.h>
984263bc
MD
94
95/*
96 * System initialization
97 *
98 * Note: proc0 from proc.h
99 */
100
101static void vm_init_limits __P((void *));
102SYSINIT(vm_limits, SI_SUB_VM_CONF, SI_ORDER_FIRST, vm_init_limits, &proc0)
103
104/*
105 * THIS MUST BE THE LAST INITIALIZATION ITEM!!!
106 *
107 * Note: run scheduling should be divorced from the vm system.
108 */
109static void scheduler __P((void *));
110SYSINIT(scheduler, SI_SUB_RUN_SCHEDULER, SI_ORDER_FIRST, scheduler, NULL)
111
112
113static void swapout __P((struct proc *));
114
115int
116kernacc(addr, len, rw)
117 caddr_t addr;
118 int len, rw;
119{
120 boolean_t rv;
121 vm_offset_t saddr, eaddr;
122 vm_prot_t prot;
123
124 KASSERT((rw & (~VM_PROT_ALL)) == 0,
125 ("illegal ``rw'' argument to kernacc (%x)\n", rw));
126 prot = rw;
127 saddr = trunc_page((vm_offset_t)addr);
128 eaddr = round_page((vm_offset_t)addr + len);
129 vm_map_lock_read(kernel_map);
130 rv = vm_map_check_protection(kernel_map, saddr, eaddr, prot);
131 vm_map_unlock_read(kernel_map);
132 return (rv == TRUE);
133}
134
135int
136useracc(addr, len, rw)
137 caddr_t addr;
138 int len, rw;
139{
140 boolean_t rv;
141 vm_prot_t prot;
142 vm_map_t map;
143 vm_map_entry_t save_hint;
144
145 KASSERT((rw & (~VM_PROT_ALL)) == 0,
146 ("illegal ``rw'' argument to useracc (%x)\n", rw));
147 prot = rw;
148 /*
149 * XXX - check separately to disallow access to user area and user
150 * page tables - they are in the map.
151 *
152 * XXX - VM_MAXUSER_ADDRESS is an end address, not a max. It was once
153 * only used (as an end address) in trap.c. Use it as an end address
154 * here too. This bogusness has spread. I just fixed where it was
155 * used as a max in vm_mmap.c.
156 */
157 if ((vm_offset_t) addr + len > /* XXX */ VM_MAXUSER_ADDRESS
158 || (vm_offset_t) addr + len < (vm_offset_t) addr) {
159 return (FALSE);
160 }
161 map = &curproc->p_vmspace->vm_map;
162 vm_map_lock_read(map);
163 /*
164 * We save the map hint, and restore it. Useracc appears to distort
165 * the map hint unnecessarily.
166 */
167 save_hint = map->hint;
168 rv = vm_map_check_protection(map,
169 trunc_page((vm_offset_t)addr), round_page((vm_offset_t)addr + len), prot);
170 map->hint = save_hint;
171 vm_map_unlock_read(map);
172
173 return (rv == TRUE);
174}
175
176void
177vslock(addr, len)
178 caddr_t addr;
179 u_int len;
180{
181 vm_map_pageable(&curproc->p_vmspace->vm_map, trunc_page((vm_offset_t)addr),
182 round_page((vm_offset_t)addr + len), FALSE);
183}
184
185void
186vsunlock(addr, len)
187 caddr_t addr;
188 u_int len;
189{
190 vm_map_pageable(&curproc->p_vmspace->vm_map, trunc_page((vm_offset_t)addr),
191 round_page((vm_offset_t)addr + len), TRUE);
192}
193
194/*
195 * Implement fork's actions on an address space.
196 * Here we arrange for the address space to be copied or referenced,
197 * allocate a user struct (pcb and kernel stack), then call the
198 * machine-dependent layer to fill those in and make the new process
199 * ready to run. The new process is set up so that it returns directly
200 * to user mode to avoid stack copying and relocation problems.
201 */
202void
203vm_fork(p1, p2, flags)
204 register struct proc *p1, *p2;
205 int flags;
206{
207 register struct user *up;
7e1d4bf4 208 struct thread *td2;
984263bc
MD
209
210 if ((flags & RFPROC) == 0) {
211 /*
212 * Divorce the memory, if it is shared, essentially
213 * this changes shared memory amongst threads, into
214 * COW locally.
215 */
216 if ((flags & RFMEM) == 0) {
217 if (p1->p_vmspace->vm_refcnt > 1) {
218 vmspace_unshare(p1);
219 }
220 }
221 cpu_fork(p1, p2, flags);
222 return;
223 }
224
225 if (flags & RFMEM) {
226 p2->p_vmspace = p1->p_vmspace;
227 p1->p_vmspace->vm_refcnt++;
228 }
229
230 while (vm_page_count_severe()) {
231 VM_WAIT;
232 }
233
234 if ((flags & RFMEM) == 0) {
235 p2->p_vmspace = vmspace_fork(p1->p_vmspace);
236
237 pmap_pinit2(vmspace_pmap(p2->p_vmspace));
238
239 if (p1->p_vmspace->vm_shm)
240 shmfork(p1, p2);
241 }
242
ef0fdad1 243 td2 = lwkt_alloc_thread(NULL);
7d0bac62 244 pmap_init_proc(p2, td2);
26a0694b 245 lwkt_setpri(td2, TDPRI_KERN_USER);
984263bc
MD
246
247 up = p2->p_addr;
248
249 /*
250 * p_stats currently points at fields in the user struct
251 * but not at &u, instead at p_addr. Copy parts of
252 * p_stats; zero the rest of p_stats (statistics).
253 *
254 * If procsig->ps_refcnt is 1 and p2->p_sigacts is NULL we dont' need
255 * to share sigacts, so we use the up->u_sigacts.
256 */
257 p2->p_stats = &up->u_stats;
258 if (p2->p_sigacts == NULL) {
259 if (p2->p_procsig->ps_refcnt != 1)
260 printf ("PID:%d NULL sigacts with refcnt not 1!\n",p2->p_pid);
261 p2->p_sigacts = &up->u_sigacts;
262 up->u_sigacts = *p1->p_sigacts;
263 }
264
265 bzero(&up->u_stats.pstat_startzero,
266 (unsigned) ((caddr_t) &up->u_stats.pstat_endzero -
267 (caddr_t) &up->u_stats.pstat_startzero));
268 bcopy(&p1->p_stats->pstat_startcopy, &up->u_stats.pstat_startcopy,
269 ((caddr_t) &up->u_stats.pstat_endcopy -
270 (caddr_t) &up->u_stats.pstat_startcopy));
271
272
273 /*
274 * cpu_fork will copy and update the pcb, set up the kernel stack,
275 * and make the child ready to run.
276 */
277 cpu_fork(p1, p2, flags);
278}
279
280/*
281 * Called after process has been wait(2)'ed apon and is being reaped.
282 * The idea is to reclaim resources that we could not reclaim while
283 * the process was still executing.
284 */
285void
286vm_waitproc(struct proc *p)
287{
99df837e 288 cpu_proc_wait(p);
984263bc
MD
289 vmspace_exitfree(p); /* and clean-out the vmspace */
290}
291
292/*
293 * Set default limits for VM system.
294 * Called for proc 0, and then inherited by all others.
295 *
296 * XXX should probably act directly on proc0.
297 */
298static void
299vm_init_limits(udata)
300 void *udata;
301{
302 register struct proc *p = udata;
303 int rss_limit;
304
305 /*
306 * Set up the initial limits on process VM. Set the maximum resident
307 * set size to be half of (reasonably) available memory. Since this
308 * is a soft limit, it comes into effect only when the system is out
309 * of memory - half of main memory helps to favor smaller processes,
310 * and reduces thrashing of the object cache.
311 */
312 p->p_rlimit[RLIMIT_STACK].rlim_cur = dflssiz;
313 p->p_rlimit[RLIMIT_STACK].rlim_max = maxssiz;
314 p->p_rlimit[RLIMIT_DATA].rlim_cur = dfldsiz;
315 p->p_rlimit[RLIMIT_DATA].rlim_max = maxdsiz;
316 /* limit the limit to no less than 2MB */
12e4aaff 317 rss_limit = max(vmstats.v_free_count, 512);
984263bc
MD
318 p->p_rlimit[RLIMIT_RSS].rlim_cur = ptoa(rss_limit);
319 p->p_rlimit[RLIMIT_RSS].rlim_max = RLIM_INFINITY;
320}
321
322void
323faultin(p)
324 struct proc *p;
325{
326 int s;
327
328 if ((p->p_flag & P_INMEM) == 0) {
329
330 ++p->p_lock;
331
332 pmap_swapin_proc(p);
333
334 s = splhigh();
335
336 if (p->p_stat == SRUN)
337 setrunqueue(p);
338
339 p->p_flag |= P_INMEM;
340
341 /* undo the effect of setting SLOCK above */
342 --p->p_lock;
343 splx(s);
344
345 }
346}
347
348/*
349 * This swapin algorithm attempts to swap-in processes only if there
350 * is enough space for them. Of course, if a process waits for a long
351 * time, it will be swapped in anyway.
352 */
353/* ARGSUSED*/
354static void
355scheduler(dummy)
356 void *dummy;
357{
358 register struct proc *p;
359 register int pri;
360 struct proc *pp;
361 int ppri;
362
363loop:
364 if (vm_page_count_min()) {
365 VM_WAIT;
366 goto loop;
367 }
368
369 pp = NULL;
370 ppri = INT_MIN;
371 for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
372 if (p->p_stat == SRUN &&
373 (p->p_flag & (P_INMEM | P_SWAPPING)) == 0) {
374
375 pri = p->p_swtime + p->p_slptime;
376 if ((p->p_flag & P_SWAPINREQ) == 0) {
377 pri -= p->p_nice * 8;
378 }
379
380 /*
381 * if this process is higher priority and there is
382 * enough space, then select this process instead of
383 * the previous selection.
384 */
385 if (pri > ppri) {
386 pp = p;
387 ppri = pri;
388 }
389 }
390 }
391
392 /*
393 * Nothing to do, back to sleep.
394 */
395 if ((p = pp) == NULL) {
396 tsleep(&proc0, PVM, "sched", 0);
397 goto loop;
398 }
399 p->p_flag &= ~P_SWAPINREQ;
400
401 /*
402 * We would like to bring someone in. (only if there is space).
403 */
404 faultin(p);
405 p->p_swtime = 0;
406 goto loop;
407}
408
409#ifndef NO_SWAPPING
410
411#define swappable(p) \
412 (((p)->p_lock == 0) && \
413 ((p)->p_flag & (P_TRACED|P_SYSTEM|P_INMEM|P_WEXIT|P_SWAPPING)) == P_INMEM)
414
415
416/*
417 * Swap_idle_threshold1 is the guaranteed swapped in time for a process
418 */
419static int swap_idle_threshold1 = 2;
420SYSCTL_INT(_vm, OID_AUTO, swap_idle_threshold1,
421 CTLFLAG_RW, &swap_idle_threshold1, 0, "");
422
423/*
424 * Swap_idle_threshold2 is the time that a process can be idle before
425 * it will be swapped out, if idle swapping is enabled.
426 */
427static int swap_idle_threshold2 = 10;
428SYSCTL_INT(_vm, OID_AUTO, swap_idle_threshold2,
429 CTLFLAG_RW, &swap_idle_threshold2, 0, "");
430
431/*
432 * Swapout is driven by the pageout daemon. Very simple, we find eligible
433 * procs and unwire their u-areas. We try to always "swap" at least one
434 * process in case we need the room for a swapin.
435 * If any procs have been sleeping/stopped for at least maxslp seconds,
436 * they are swapped. Else, we swap the longest-sleeping or stopped process,
437 * if any, otherwise the longest-resident process.
438 */
439void
440swapout_procs(action)
441int action;
442{
443 register struct proc *p;
444 struct proc *outp, *outp2;
445 int outpri, outpri2;
446 int didswap = 0;
447
448 outp = outp2 = NULL;
449 outpri = outpri2 = INT_MIN;
450retry:
451 for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
452 struct vmspace *vm;
453 if (!swappable(p))
454 continue;
455
456 vm = p->p_vmspace;
457
458 switch (p->p_stat) {
459 default:
460 continue;
461
462 case SSLEEP:
463 case SSTOP:
464 /*
465 * do not swapout a realtime process
466 */
467 if (RTP_PRIO_IS_REALTIME(p->p_rtprio.type))
468 continue;
469
470 /*
26a0694b
MD
471 * YYY do not swapout a proc waiting on a critical
472 * event.
473 *
474 * Guarentee swap_idle_threshold time in memory
984263bc 475 */
26a0694b 476 if (p->p_slptime < swap_idle_threshold1)
984263bc
MD
477 continue;
478
479 /*
26a0694b
MD
480 * If the system is under memory stress, or if we
481 * are swapping idle processes >= swap_idle_threshold2,
482 * then swap the process out.
984263bc
MD
483 */
484 if (((action & VM_SWAP_NORMAL) == 0) &&
485 (((action & VM_SWAP_IDLE) == 0) ||
486 (p->p_slptime < swap_idle_threshold2)))
487 continue;
488
489 ++vm->vm_refcnt;
490 /*
491 * do not swapout a process that is waiting for VM
492 * data structures there is a possible deadlock.
493 */
494 if (lockmgr(&vm->vm_map.lock,
495 LK_EXCLUSIVE | LK_NOWAIT,
dadab5e9 496 (void *)0, curthread)) {
984263bc
MD
497 vmspace_free(vm);
498 continue;
499 }
500 vm_map_unlock(&vm->vm_map);
501 /*
502 * If the process has been asleep for awhile and had
503 * most of its pages taken away already, swap it out.
504 */
505 if ((action & VM_SWAP_NORMAL) ||
506 ((action & VM_SWAP_IDLE) &&
507 (p->p_slptime > swap_idle_threshold2))) {
508 swapout(p);
509 vmspace_free(vm);
510 didswap++;
511 goto retry;
512 }
513
514 /*
515 * cleanup our reference
516 */
517 vmspace_free(vm);
518 }
519 }
520 /*
521 * If we swapped something out, and another process needed memory,
522 * then wakeup the sched process.
523 */
524 if (didswap)
525 wakeup(&proc0);
526}
527
528static void
529swapout(p)
530 register struct proc *p;
531{
532
533#if defined(SWAP_DEBUG)
534 printf("swapping out %d\n", p->p_pid);
535#endif
536 ++p->p_stats->p_ru.ru_nswap;
537 /*
538 * remember the process resident count
539 */
540 p->p_vmspace->vm_swrss = vmspace_resident_count(p->p_vmspace);
541
542 (void) splhigh();
543 p->p_flag &= ~P_INMEM;
544 p->p_flag |= P_SWAPPING;
a2a5ad0d 545 if (p->p_flag & P_ONRUNQ)
984263bc
MD
546 remrunqueue(p);
547 (void) spl0();
548
549 pmap_swapout_proc(p);
550
551 p->p_flag &= ~P_SWAPPING;
552 p->p_swtime = 0;
553}
554#endif /* !NO_SWAPPING */