From 99ad9bc46e5e89d85f4ccde397f743ce992265f4 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Sat, 12 Jun 2010 14:01:05 -0700 Subject: [PATCH] kernel - MPSAFE work - tokenize more vm stuff --- sys/kern/kern_proc.c | 243 ++++++++++++++++++++++++++++------------- sys/kern/sys_mqueue.c | 7 +- sys/vm/default_pager.c | 2 +- sys/vm/device_pager.c | 2 +- sys/vm/vm_contig.c | 39 ++++++- sys/vm/vm_glue.c | 46 +++++++- sys/vm/vm_kern.c | 93 ++++++++-------- sys/vm/vm_meter.c | 38 ++++++- sys/vm/vm_pageout.c | 67 +++++++++++- sys/vm/vm_pageout.h | 1 - sys/vm/vm_pager.c | 37 +++++-- sys/vm/vm_swap.c | 35 ++++-- 12 files changed, 451 insertions(+), 159 deletions(-) diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c index cce2b3fc5b..64b5e978a5 100644 --- a/sys/kern/kern_proc.c +++ b/sys/kern/kern_proc.c @@ -1,4 +1,6 @@ /* + * (MPSAFE) + * * Copyright (c) 1982, 1986, 1989, 1991, 1993 * The Regents of the University of California. All rights reserved. * @@ -85,7 +87,6 @@ struct pgrphashhead *pgrphashtbl; u_long pgrphash; struct proclist allproc; struct proclist zombproc; -struct spinlock allproc_spin; /* * Random component to nextpid generation. We mix in a random factor to make @@ -97,6 +98,9 @@ struct spinlock allproc_spin; */ static int randompid = 0; +/* + * No requirements. + */ static int sysctl_kern_randompid(SYSCTL_HANDLER_ARGS) { @@ -121,13 +125,14 @@ SYSCTL_PROC(_kern, OID_AUTO, randompid, CTLTYPE_INT|CTLFLAG_RW, /* * Initialize global process hashing structures. + * + * Called from the low level boot code only. */ void procinit(void) { LIST_INIT(&allproc); LIST_INIT(&zombproc); - spin_init(&allproc_spin); lwkt_init(); pidhashtbl = hashinit(maxproc / 4, M_PROC, &pidhash); pgrphashtbl = hashinit(maxproc / 4, M_PROC, &pgrphash); @@ -136,58 +141,87 @@ procinit(void) /* * Is p an inferior of the current process? + * + * No requirements. + * The caller must hold proc_token if the caller wishes a stable result. */ int inferior(struct proc *p) { - for (; p != curproc; p = p->p_pptr) - if (p->p_pid == 0) + lwkt_gettoken(&proc_token); + while (p != curproc) { + if (p->p_pid == 0) { + lwkt_reltoken(&proc_token); return (0); + } + p = p->p_pptr; + } + lwkt_reltoken(&proc_token); return (1); } /* * Locate a process by number + * + * XXX TODO - change API to PHOLD() the returned process ? + * + * No requirements. + * The caller must hold proc_token if the caller wishes a stable result. */ struct proc * pfind(pid_t pid) { struct proc *p; + lwkt_gettoken(&proc_token); LIST_FOREACH(p, PIDHASH(pid), p_hash) { - if (p->p_pid == pid) + if (p->p_pid == pid) { + lwkt_reltoken(&proc_token); return (p); + } } + lwkt_reltoken(&proc_token); return (NULL); } /* * Locate a process group by number + * + * No requirements. + * The caller must hold proc_token if the caller wishes a stable result. */ struct pgrp * pgfind(pid_t pgid) { struct pgrp *pgrp; + lwkt_gettoken(&proc_token); LIST_FOREACH(pgrp, PGRPHASH(pgid), pg_hash) { if (pgrp->pg_id == pgid) return (pgrp); } + lwkt_reltoken(&proc_token); return (NULL); } /* * Move p to a new or existing process group (and session) + * + * No requirements. */ int enterpgrp(struct proc *p, pid_t pgid, int mksess) { - struct pgrp *pgrp = pgfind(pgid); + struct pgrp *pgrp; + int error; + + lwkt_gettoken(&proc_token); + pgrp = pgfind(pgid); KASSERT(pgrp == NULL || !mksess, - ("enterpgrp: setsid into non-empty pgrp")); + ("enterpgrp: setsid into non-empty pgrp")); KASSERT(!SESS_LEADER(p), - ("enterpgrp: session leader attempted setpgrp")); + ("enterpgrp: session leader attempted setpgrp")); if (pgrp == NULL) { pid_t savepid = p->p_pid; @@ -196,11 +230,13 @@ enterpgrp(struct proc *p, pid_t pgid, int mksess) * new process group */ KASSERT(p->p_pid == pgid, - ("enterpgrp: new pgrp and pid != pgid")); - if ((np = pfind(savepid)) == NULL || np != p) - return (ESRCH); - MALLOC(pgrp, struct pgrp *, sizeof(struct pgrp), M_PGRP, - M_WAITOK); + ("enterpgrp: new pgrp and pid != pgid")); + if ((np = pfind(savepid)) == NULL || np != p) { + error = ESRCH; + goto fatal; + } + MALLOC(pgrp, struct pgrp *, sizeof(struct pgrp), + M_PGRP, M_WAITOK); if (mksess) { struct session *sess; @@ -208,18 +244,18 @@ enterpgrp(struct proc *p, pid_t pgid, int mksess) * new session */ MALLOC(sess, struct session *, sizeof(struct session), - M_SESSION, M_WAITOK); + M_SESSION, M_WAITOK); sess->s_leader = p; sess->s_sid = p->p_pid; sess->s_count = 1; sess->s_ttyvp = NULL; sess->s_ttyp = NULL; bcopy(p->p_session->s_login, sess->s_login, - sizeof(sess->s_login)); + sizeof(sess->s_login)); p->p_flag &= ~P_CONTROLT; pgrp->pg_session = sess; KASSERT(p == curproc, - ("enterpgrp: mksession and p != curproc")); + ("enterpgrp: mksession and p != curproc")); } else { pgrp->pg_session = p->p_session; sess_hold(pgrp->pg_session); @@ -230,8 +266,9 @@ enterpgrp(struct proc *p, pid_t pgid, int mksess) pgrp->pg_jobc = 0; SLIST_INIT(&pgrp->pg_sigiolst); lockinit(&pgrp->pg_lock, "pgwt", 0, 0); - } else if (pgrp == p->p_pgrp) - return (0); + } else if (pgrp == p->p_pgrp) { + goto done; + } /* * Adjust eligibility of affected pgrps to participate in job control. @@ -246,30 +283,38 @@ enterpgrp(struct proc *p, pid_t pgid, int mksess) pgdelete(p->p_pgrp); p->p_pgrp = pgrp; LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist); - return (0); +done: + error = 0; +fatal: + lwkt_reltoken(&proc_token); + return (error); } /* - * remove process from process group + * Remove process from process group + * + * No requirements. */ int leavepgrp(struct proc *p) { - + lwkt_gettoken(&proc_token); LIST_REMOVE(p, p_pglist); if (LIST_EMPTY(&p->p_pgrp->pg_members)) pgdelete(p->p_pgrp); - p->p_pgrp = 0; + p->p_pgrp = NULL; + lwkt_reltoken(&proc_token); return (0); } /* - * delete a process group + * Delete a process group + * + * The caller must hold proc_token. */ static void pgdelete(struct pgrp *pgrp) { - /* * Reset any sigio structures pointing to us as a result of * F_SETOWN with our pgid. @@ -288,17 +333,25 @@ pgdelete(struct pgrp *pgrp) * Adjust the ref count on a session structure. When the ref count falls to * zero the tty is disassociated from the session and the session structure * is freed. Note that tty assocation is not itself ref-counted. + * + * No requirements. */ void sess_hold(struct session *sp) { + lwkt_gettoken(&tty_token); ++sp->s_count; + lwkt_reltoken(&tty_token); } +/* + * No requirements. + */ void sess_rele(struct session *sp) { KKASSERT(sp->s_count > 0); + lwkt_gettoken(&tty_token); if (--sp->s_count == 0) { if (sp->s_ttyp && sp->s_ttyp->t_session) { #ifdef TTY_DO_FULL_CLOSE @@ -313,6 +366,7 @@ sess_rele(struct session *sp) } kfree(sp, M_SESSION); } + lwkt_reltoken(&tty_token); } /* @@ -324,17 +378,21 @@ sess_rele(struct session *sp) * process group and that of its children. * entering == 0 => p is leaving specified group. * entering == 1 => p is entering specified group. + * + * No requirements. */ void fixjobc(struct proc *p, struct pgrp *pgrp, int entering) { struct pgrp *hispgrp; - struct session *mysession = pgrp->pg_session; + struct session *mysession; /* * Check p's parent to see whether p qualifies its own process * group; if so, adjust count for p's process group. */ + lwkt_gettoken(&proc_token); + mysession = pgrp->pg_session; if ((hispgrp = p->p_pptr->p_pgrp) != pgrp && hispgrp->pg_session == mysession) { if (entering) @@ -348,7 +406,7 @@ fixjobc(struct proc *p, struct pgrp *pgrp, int entering) * their process groups; if so, adjust counts for children's * process groups. */ - LIST_FOREACH(p, &p->p_children, p_sibling) + LIST_FOREACH(p, &p->p_children, p_sibling) { if ((hispgrp = p->p_pgrp) != pgrp && hispgrp->pg_session == mysession && p->p_stat != SZOMB) { @@ -357,12 +415,16 @@ fixjobc(struct proc *p, struct pgrp *pgrp, int entering) else if (--hispgrp->pg_jobc == 0) orphanpg(hispgrp); } + } + lwkt_reltoken(&proc_token); } /* * A process group has become orphaned; * if there are any stopped processes in the group, * hang-up all process in that group. + * + * The caller must hold proc_token. */ static void orphanpg(struct pgrp *pg) @@ -384,7 +446,7 @@ orphanpg(struct pgrp *pg) * Add a new process to the allproc list and the PID hash. This * also assigns a pid to the new process. * - * MPALMOSTSAFE - acquires mplock for karc4random() call + * No requirements. */ void proc_add_allproc(struct proc *p) @@ -397,11 +459,11 @@ proc_add_allproc(struct proc *p) rel_mplock(); } - spin_lock_wr(&allproc_spin); + lwkt_gettoken(&proc_token); p->p_pid = proc_getnewpid_locked(random_offset); LIST_INSERT_HEAD(&allproc, p, p_list); LIST_INSERT_HEAD(PIDHASH(p->p_pid), p, p_hash); - spin_unlock_wr(&allproc_spin); + lwkt_reltoken(&proc_token); } /* @@ -409,7 +471,7 @@ proc_add_allproc(struct proc *p) * proc_add_allproc() to guarentee that the new pid is not reused before * the new process can be added to the allproc list. * - * MPSAFE - must be called with allproc_spin held. + * The caller must hold proc_token. */ static pid_t @@ -477,22 +539,20 @@ again: * Called from exit1 to remove a process from the allproc * list and move it to the zombie list. * - * MPSAFE + * No requirements. */ void proc_move_allproc_zombie(struct proc *p) { - spin_lock_wr(&allproc_spin); + lwkt_gettoken(&proc_token); while (p->p_lock) { - spin_unlock_wr(&allproc_spin); tsleep(p, 0, "reap1", hz / 10); - spin_lock_wr(&allproc_spin); } LIST_REMOVE(p, p_list); LIST_INSERT_HEAD(&zombproc, p, p_list); LIST_REMOVE(p, p_hash); p->p_stat = SZOMB; - spin_unlock_wr(&allproc_spin); + lwkt_reltoken(&proc_token); dsched_exit_proc(p); } @@ -501,27 +561,26 @@ proc_move_allproc_zombie(struct proc *p) * from the zombie list and the sibling list. This routine will block * if someone has a lock on the proces (p_lock). * - * MPSAFE + * No requirements. */ void proc_remove_zombie(struct proc *p) { - spin_lock_wr(&allproc_spin); + lwkt_gettoken(&proc_token); while (p->p_lock) { - spin_unlock_wr(&allproc_spin); tsleep(p, 0, "reap1", hz / 10); - spin_lock_wr(&allproc_spin); } LIST_REMOVE(p, p_list); /* off zombproc */ LIST_REMOVE(p, p_sibling); - spin_unlock_wr(&allproc_spin); + lwkt_reltoken(&proc_token); } /* * Scan all processes on the allproc list. The process is automatically * held for the callback. A return value of -1 terminates the loop. * - * MPSAFE + * No requirements. + * The callback is made with the process held and proc_token held. */ void allproc_scan(int (*callback)(struct proc *, void *), void *data) @@ -529,24 +588,23 @@ allproc_scan(int (*callback)(struct proc *, void *), void *data) struct proc *p; int r; - spin_lock_rd(&allproc_spin); + lwkt_gettoken(&proc_token); LIST_FOREACH(p, &allproc, p_list) { PHOLD(p); - spin_unlock_rd(&allproc_spin); r = callback(p, data); - spin_lock_rd(&allproc_spin); PRELE(p); if (r < 0) break; } - spin_unlock_rd(&allproc_spin); + lwkt_reltoken(&proc_token); } /* * Scan all lwps of processes on the allproc list. The lwp is automatically * held for the callback. A return value of -1 terminates the loop. * - * possibly not MPSAFE, needs to access foreingn proc structures + * No requirements. + * The callback is made with the proces and lwp both held, and proc_token held. */ void alllwp_scan(int (*callback)(struct lwp *, void *), void *data) @@ -555,28 +613,27 @@ alllwp_scan(int (*callback)(struct lwp *, void *), void *data) struct lwp *lp; int r = 0; - spin_lock_rd(&allproc_spin); + lwkt_gettoken(&proc_token); LIST_FOREACH(p, &allproc, p_list) { PHOLD(p); - spin_unlock_rd(&allproc_spin); FOREACH_LWP_IN_PROC(lp, p) { LWPHOLD(lp); r = callback(lp, data); LWPRELE(lp); } - spin_lock_rd(&allproc_spin); PRELE(p); if (r < 0) break; } - spin_unlock_rd(&allproc_spin); + lwkt_reltoken(&proc_token); } /* * Scan all processes on the zombproc list. The process is automatically * held for the callback. A return value of -1 terminates the loop. * - * MPSAFE + * No requirements. + * The callback is made with the proces held and proc_token held. */ void zombproc_scan(int (*callback)(struct proc *, void *), void *data) @@ -584,23 +641,24 @@ zombproc_scan(int (*callback)(struct proc *, void *), void *data) struct proc *p; int r; - spin_lock_rd(&allproc_spin); + lwkt_gettoken(&proc_token); LIST_FOREACH(p, &zombproc, p_list) { PHOLD(p); - spin_unlock_rd(&allproc_spin); r = callback(p, data); - spin_lock_rd(&allproc_spin); PRELE(p); if (r < 0) break; } - spin_unlock_rd(&allproc_spin); + lwkt_reltoken(&proc_token); } #include "opt_ddb.h" #ifdef DDB #include +/* + * Debugging only + */ DB_SHOW_COMMAND(pgrpdump, pgrpdump) { struct pgrp *pgrp; @@ -630,18 +688,27 @@ DB_SHOW_COMMAND(pgrpdump, pgrpdump) /* * Locate a process on the zombie list. Return a held process or NULL. + * + * The caller must hold proc_token if a stable result is desired. + * No other requirements. */ struct proc * zpfind(pid_t pid) { struct proc *p; - LIST_FOREACH(p, &zombproc, p_list) + lwkt_gettoken(&proc_token); + LIST_FOREACH(p, &zombproc, p_list) { if (p->p_pid == pid) return (p); + } + lwkt_reltoken(&proc_token); return (NULL); } +/* + * The caller must hold proc_token. + */ static int sysctl_out_proc(struct proc *p, struct sysctl_req *req, int flags) { @@ -673,6 +740,9 @@ sysctl_out_proc(struct proc *p, struct sysctl_req *req, int flags) return (error); } +/* + * The caller must hold proc_token. + */ static int sysctl_out_proc_kthread(struct thread *td, struct sysctl_req *req, int flags) { @@ -686,6 +756,9 @@ sysctl_out_proc_kthread(struct thread *td, struct sysctl_req *req, int flags) return(0); } +/* + * No requirements. + */ static int sysctl_kern_proc(SYSCTL_HANDLER_ARGS) { @@ -708,23 +781,24 @@ sysctl_kern_proc(SYSCTL_HANDLER_ARGS) (oid != KERN_PROC_ALL && namelen != 1)) return (EINVAL); + lwkt_gettoken(&proc_token); if (oid == KERN_PROC_PID) { p = pfind((pid_t)name[0]); - if (!p) - return (0); + if (p == NULL) + goto post_threads; if (!PRISON_CHECK(cr1, p->p_ucred)) - return (0); + goto post_threads; PHOLD(p); error = sysctl_out_proc(p, req, flags); PRELE(p); - return (error); + goto post_threads; } if (!req->oldptr) { /* overestimate by 5 procs */ error = SYSCTL_OUT(req, 0, sizeof (struct kinfo_proc) * 5); if (error) - return (error); + goto post_threads; } for (doingzomb = 0; doingzomb <= 1; doingzomb++) { if (doingzomb) @@ -782,7 +856,7 @@ sysctl_kern_proc(SYSCTL_HANDLER_ARGS) error = sysctl_out_proc(p, req, flags); PRELE(p); if (error) - return (error); + goto post_threads; } } @@ -796,6 +870,7 @@ sysctl_kern_proc(SYSCTL_HANDLER_ARGS) origcpu = mycpu->gd_cpuid; if (!ps_showallthreads || jailed(cr1)) goto post_threads; + for (n = 1; n <= ncpus; ++n) { globaldata_t rgd; int nid; @@ -822,11 +897,12 @@ sysctl_kern_proc(SYSCTL_HANDLER_ARGS) error = sysctl_out_proc_kthread(td, req, doingzomb); lwkt_rele(td); if (error) - return (error); + goto post_threads; } } post_threads: - return (0); + lwkt_reltoken(&proc_token); + return (error); } /* @@ -834,6 +910,8 @@ post_threads: * title for another process without groping around in the address space * of the other process. It also allow a process to set its own "process * title to a string of its own choice. + * + * No requirements. */ static int sysctl_kern_proc_args(SYSCTL_HANDLER_ARGS) @@ -848,30 +926,40 @@ sysctl_kern_proc_args(SYSCTL_HANDLER_ARGS) if (namelen != 1) return (EINVAL); + lwkt_gettoken(&proc_token); p = pfind((pid_t)name[0]); - if (!p) - return (0); + if (p == NULL) + goto done; if ((!ps_argsopen) && p_trespass(cr1, p->p_ucred)) - return (0); + goto done; - if (req->newptr && curproc != p) - return (EPERM); + if (req->newptr && curproc != p) { + error = EPERM; + goto done; + } - if (req->oldptr && p->p_args != NULL) - error = SYSCTL_OUT(req, p->p_args->ar_args, p->p_args->ar_length); - if (req->newptr == NULL) - return (error); + PHOLD(p); + if (req->oldptr && p->p_args != NULL) { + error = SYSCTL_OUT(req, p->p_args->ar_args, + p->p_args->ar_length); + } + if (req->newptr == NULL) { + PRELE(p); + goto done; + } if (p->p_args && --p->p_args->ar_ref == 0) FREE(p->p_args, M_PARGS); p->p_args = NULL; - if (req->newlen + sizeof(struct pargs) > ps_arg_cache_limit) - return (error); + if (req->newlen + sizeof(struct pargs) > ps_arg_cache_limit) { + PRELE(p); + goto done; + } MALLOC(pa, struct pargs *, sizeof(struct pargs) + req->newlen, - M_PARGS, M_WAITOK); + M_PARGS, M_WAITOK); pa->ar_ref = 1; pa->ar_length = req->newlen; error = SYSCTL_IN(req, pa->ar_args, req->newlen); @@ -879,6 +967,9 @@ sysctl_kern_proc_args(SYSCTL_HANDLER_ARGS) p->p_args = pa; else FREE(pa, M_PARGS); + PRELE(p); +done: + lwkt_reltoken(&proc_token); return (error); } diff --git a/sys/kern/sys_mqueue.c b/sys/kern/sys_mqueue.c index 0901a7c5bd..14b02e2ef3 100644 --- a/sys/kern/sys_mqueue.c +++ b/sys/kern/sys_mqueue.c @@ -115,9 +115,6 @@ MALLOC_DEFINE(M_MQBUF, "mqueues", "Buffers to message queues"); struct objcache_malloc_args mqueue_malloc_args = { sizeof(struct mqueue), M_MQBUF }; -/* Spinlock around the process list */ -extern struct spinlock allproc_spin; - /* * Initialize POSIX message queue subsystem. */ @@ -872,10 +869,10 @@ error: mqueue_freemsg(msg, size); } else if (notify) { /* Send the notify, if needed */ - spin_lock_wr(&allproc_spin); + lwkt_gettoken(&proc_token); /*kpsignal(notify, &ksi, NULL);*/ ksignal(notify, mq->mq_sig_notify.sigev_signo); - spin_unlock_wr(&allproc_spin); + lwkt_reltoken(&proc_token); } return error; diff --git a/sys/vm/default_pager.c b/sys/vm/default_pager.c index 2f6fc60404..7d331b5f7a 100644 --- a/sys/vm/default_pager.c +++ b/sys/vm/default_pager.c @@ -1,5 +1,5 @@ /* - * (MPASFE) + * (MPSAFE) * * Copyright (c) 1995, David Greenman * All rights reserved. diff --git a/sys/vm/device_pager.c b/sys/vm/device_pager.c index f9997b8d2c..ba971e2e8c 100644 --- a/sys/vm/device_pager.c +++ b/sys/vm/device_pager.c @@ -1,5 +1,5 @@ /* - * (MPASFE) + * (MPSAFE) * * Copyright (c) 1990 University of Utah. * Copyright (c) 1991, 1993 diff --git a/sys/vm/vm_contig.c b/sys/vm/vm_contig.c index 056dce9015..6a2e90e85d 100644 --- a/sys/vm/vm_contig.c +++ b/sys/vm/vm_contig.c @@ -1,4 +1,6 @@ /* + * (MPSAFE) + * * Copyright (c) 2003, 2004 The DragonFly Project. All rights reserved. * * This code is derived from software contributed to The DragonFly Project @@ -136,7 +138,7 @@ * Otherwise if the object is of any other type, the generic * pageout (daemon) flush routine is invoked. * - * We must be in a critical section. + * The caller must hold vm_token. */ static int vm_contig_pg_clean(int queue) @@ -144,6 +146,8 @@ vm_contig_pg_clean(int queue) vm_object_t object; vm_page_t m, m_tmp, next; + ASSERT_LWKT_TOKEN_HELD(&vm_token); + for (m = TAILQ_FIRST(&vm_page_queues[queue].pl); m != NULL; m = next) { KASSERT(m->queue == queue, ("vm_contig_clean: page %p's queue is not %d", @@ -180,6 +184,8 @@ vm_contig_pg_clean(int queue) * Attempt to flush (count) pages from the given page queue. This may or * may not succeed. Take up to passes and delay 1/20 of a second * between each pass. + * + * The caller must hold vm_token. */ static void vm_contig_pg_flush(int queue, int count) @@ -199,10 +205,12 @@ vm_contig_pg_flush(int queue, int count) * * Malloc()'s data structures have been used for collection of * statistics and for allocations of less than a page. + * + * The caller must hold vm_token. */ static int vm_contig_pg_alloc(unsigned long size, vm_paddr_t low, vm_paddr_t high, - unsigned long alignment, unsigned long boundary, int mflags) + unsigned long alignment, unsigned long boundary, int mflags) { int i, start, pass; vm_offset_t phys; @@ -357,6 +365,9 @@ again: * Remove pages previously allocated by vm_contig_pg_alloc, and * assume all references to the pages have been removed, and that * it is OK to add them back to the free list. + * + * Caller must ensure no races on the page range in question. + * No other requirements. */ void vm_contig_pg_free(int start, u_long size) @@ -369,11 +380,13 @@ vm_contig_pg_free(int start, u_long size) if (size == 0) panic("vm_contig_pg_free: size must not be 0"); + lwkt_gettoken(&vm_token); for (i = start; i < (start + size / PAGE_SIZE); i++) { m = &pga[i]; vm_page_busy(m); vm_page_free(m); } + lwkt_reltoken(&vm_token); } /* @@ -382,6 +395,8 @@ vm_contig_pg_free(int start, u_long size) * Map previously allocated (vm_contig_pg_alloc) range of pages from * vm_page_array[] into the KVA. Once mapped, the pages are part of * the Kernel, and are to free'ed with kmem_free(&kernel_map, addr, size). + * + * No requirements. */ vm_offset_t vm_contig_pg_kmap(int start, u_long size, vm_map_t map, int flags) @@ -395,6 +410,7 @@ vm_contig_pg_kmap(int start, u_long size, vm_map_t map, int flags) panic("vm_contig_pg_kmap: size must not be 0"); crit_enter(); + lwkt_gettoken(&vm_token); /* * We've found a contiguous chunk that meets our requirements. @@ -412,6 +428,7 @@ vm_contig_pg_kmap(int start, u_long size, vm_map_t map, int flags) */ vm_map_unlock(map); vm_map_entry_release(count); + lwkt_reltoken(&vm_token); crit_exit(); return (0); } @@ -440,10 +457,14 @@ vm_contig_pg_kmap(int start, u_long size, vm_map_t map, int flags) } vm_map_wire(map, addr, addr + size, 0); + lwkt_reltoken(&vm_token); crit_exit(); return (addr); } +/* + * No requirements. + */ void * contigmalloc( unsigned long size, /* should be size_t here and for malloc() */ @@ -458,6 +479,9 @@ contigmalloc( boundary, &kernel_map); } +/* + * No requirements. + */ void * contigmalloc_map( unsigned long size, /* should be size_t here and for malloc() */ @@ -472,28 +496,37 @@ contigmalloc_map( int index; void *rv; + lwkt_gettoken(&vm_token); index = vm_contig_pg_alloc(size, low, high, alignment, boundary, flags); if (index < 0) { kprintf("contigmalloc_map: failed size %lu low=%llx " "high=%llx align=%lu boundary=%lu flags=%08x\n", size, (long long)low, (long long)high, alignment, boundary, flags); + lwkt_reltoken(&vm_token); return NULL; } rv = (void *)vm_contig_pg_kmap(index, size, map, flags); - if (!rv) + if (rv == NULL) vm_contig_pg_free(index, size); + lwkt_reltoken(&vm_token); return rv; } +/* + * No requirements. + */ void contigfree(void *addr, unsigned long size, struct malloc_type *type) { kmem_free(&kernel_map, (vm_offset_t)addr, size); } +/* + * No requirements. + */ vm_offset_t vm_page_alloc_contig( vm_offset_t size, diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c index ddc47b54bc..569adc0343 100644 --- a/sys/vm/vm_glue.c +++ b/sys/vm/vm_glue.c @@ -1,4 +1,6 @@ /* + * (MPSAFE) + * * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * @@ -114,6 +116,9 @@ static int scheduler_notify; static void swapout (struct proc *); +/* + * No requirements. + */ int kernacc(c_caddr_t addr, int len, int rw) { @@ -146,6 +151,9 @@ kernacc(c_caddr_t addr, int len, int rw) return (rv == TRUE); } +/* + * No requirements. + */ int useracc(c_caddr_t addr, int len, int rw) { @@ -186,6 +194,9 @@ useracc(c_caddr_t addr, int len, int rw) return (rv == TRUE); } +/* + * No requirements. + */ void vslock(caddr_t addr, u_int len) { @@ -196,6 +207,9 @@ vslock(caddr_t addr, u_int len) } } +/* + * No requirements. + */ void vsunlock(caddr_t addr, u_int len) { @@ -214,6 +228,8 @@ vsunlock(caddr_t addr, u_int len) * machine-dependent layer to fill those in and make the new process * ready to run. The new process is set up so that it returns directly * to user mode to avoid stack copying and relocation problems. + * + * No requirements. */ void vm_fork(struct proc *p1, struct proc *p2, int flags) @@ -258,6 +274,8 @@ vm_fork(struct proc *p1, struct proc *p2, int flags) * Called after process has been wait(2)'ed apon and is being reaped. * The idea is to reclaim resources that we could not reclaim while * the process was still executing. + * + * No requirements. */ void vm_waitproc(struct proc *p) @@ -268,6 +286,8 @@ vm_waitproc(struct proc *p) /* * Set default limits for VM system. Call during proc0's initialization. + * + * Called from the low level boot code only. */ void vm_init_limits(struct proc *p) @@ -295,6 +315,8 @@ vm_init_limits(struct proc *p) * Faultin the specified process. Note that the process can be in any * state. Just clear P_SWAPPEDOUT and call wakeup in case the process is * sleeping. + * + * No requirements. */ void faultin(struct proc *p) @@ -305,13 +327,14 @@ faultin(struct proc *p) * mode but cannot until P_SWAPPEDOUT gets cleared. */ crit_enter(); + lwkt_gettoken(&proc_token); p->p_flag &= ~(P_SWAPPEDOUT | P_SWAPWAIT); #ifdef INVARIANTS if (swap_debug) kprintf("swapping in %d (%s)\n", p->p_pid, p->p_comm); #endif wakeup(p); - + lwkt_reltoken(&proc_token); crit_exit(); } } @@ -324,7 +347,6 @@ faultin(struct proc *p) * is enough space for them. Of course, if a process waits for a long * time, it will be swapped in anyway. */ - struct scheduler_info { struct proc *pp; int ppri; @@ -376,13 +398,18 @@ loop: * XXX we need a heuristic to get a measure of system stress and * then adjust our stagger wakeup delay accordingly. */ + lwkt_gettoken(&proc_token); faultin(p); p->p_swtime = 0; PRELE(p); + lwkt_reltoken(&proc_token); tsleep(&proc0, 0, "swapin", hz / 10); goto loop; } +/* + * The caller must hold proc_token. + */ static int scheduler_callback(struct proc *p, void *data) { @@ -428,6 +455,10 @@ scheduler_callback(struct proc *p, void *data) return(0); } +/* + * SMP races ok. + * No requirements. + */ void swapin_request(void) { @@ -473,12 +504,20 @@ SYSCTL_INT(_vm, OID_AUTO, swap_idle_threshold2, static int swapout_procs_callback(struct proc *p, void *data); +/* + * No requirements. + */ void swapout_procs(int action) { + lwkt_gettoken(&vmspace_token); allproc_scan(swapout_procs_callback, &action); + lwkt_reltoken(&vmspace_token); } +/* + * The caller must hold proc_token and vmspace_token. + */ static int swapout_procs_callback(struct proc *p, void *data) { @@ -546,6 +585,9 @@ swapout_procs_callback(struct proc *p, void *data) return(0); } +/* + * The caller must hold proc_token and vmspace_token. + */ static void swapout(struct proc *p) { diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c index 1c35838c49..ba3ff65e8d 100644 --- a/sys/vm/vm_kern.c +++ b/sys/vm/vm_kern.c @@ -1,4 +1,6 @@ /* + * (MPSAFE) + * * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * @@ -92,10 +94,10 @@ struct vm_map clean_map; struct vm_map buffer_map; /* - * kmem_alloc_pageable: + * Allocate pageable memory to the kernel's address map. "map" must + * be kernel_map or a submap of kernel_map. * - * Allocate pageable memory to the kernel's address map. - * "map" must be kernel_map or a submap of kernel_map. + * No requirements. */ vm_offset_t kmem_alloc_pageable(vm_map_t map, vm_size_t size) @@ -117,9 +119,9 @@ kmem_alloc_pageable(vm_map_t map, vm_size_t size) } /* - * kmem_alloc_nofault: + * Same as kmem_alloc_pageable, except that it create a nofault entry. * - * Same as kmem_alloc_pageable, except that it create a nofault entry. + * No requirements. */ vm_offset_t kmem_alloc_nofault(vm_map_t map, vm_size_t size, vm_size_t align) @@ -141,8 +143,9 @@ kmem_alloc_nofault(vm_map_t map, vm_size_t size, vm_size_t align) } /* - * Allocate wired-down memory in the kernel's address map - * or a submap. + * Allocate wired-down memory in the kernel's address map or a submap. + * + * No requirements. */ vm_offset_t kmem_alloc3(vm_map_t map, vm_size_t size, int kmflags) @@ -205,6 +208,7 @@ kmem_alloc3(vm_map_t map, vm_size_t size, int kmflags) * race with page-out. vm_map_wire will wire the pages. */ + lwkt_gettoken(&vm_token); for (i = 0; i < size; i += PAGE_SIZE) { vm_page_t mem; @@ -216,29 +220,25 @@ kmem_alloc3(vm_map_t map, vm_size_t size, int kmflags) vm_page_flag_clear(mem, PG_ZERO); vm_page_wakeup(mem); } + lwkt_reltoken(&vm_token); /* * And finally, mark the data as non-pageable. */ - - vm_map_wire(map, (vm_offset_t) addr, addr + size, kmflags); + vm_map_wire(map, (vm_offset_t)addr, addr + size, kmflags); return (addr); } /* - * kmem_free: - * - * Release a region of kernel virtual memory allocated - * with kmem_alloc, and return the physical pages - * associated with that region. + * Release a region of kernel virtual memory allocated with kmem_alloc, + * and return the physical pages associated with that region. * - * WARNING! If the caller entered pages into the region using - * pmap_kenter() it must remove the pages using pmap_kremove[_quick]() - * before freeing the underlying kmem, otherwise resident_count will - * be mistabulated. + * WARNING! If the caller entered pages into the region using pmap_kenter() + * it must remove the pages using pmap_kremove[_quick]() before freeing the + * underlying kmem, otherwise resident_count will be mistabulated. * - * This routine may not block on kernel maps. + * No requirements. */ void kmem_free(vm_map_t map, vm_offset_t addr, vm_size_t size) @@ -247,17 +247,17 @@ kmem_free(vm_map_t map, vm_offset_t addr, vm_size_t size) } /* - * kmem_suballoc: - * - * Used to break a system map into smaller maps, usually to reduce - * contention and to provide large KVA spaces for subsystems like the - * buffer cache. + * Used to break a system map into smaller maps, usually to reduce + * contention and to provide large KVA spaces for subsystems like the + * buffer cache. * * parent Map to take range from * result * size Size of range to find * min, max Returned endpoints of map * pageable Can the region be paged + * + * No requirements. */ void kmem_suballoc(vm_map_t parent, vm_map_t result, @@ -267,6 +267,7 @@ kmem_suballoc(vm_map_t parent, vm_map_t result, size = round_page(size); + lwkt_gettoken(&vm_token); *min = (vm_offset_t) vm_map_min(parent); ret = vm_map_find(parent, NULL, (vm_offset_t) 0, min, size, PAGE_SIZE, @@ -282,17 +283,15 @@ kmem_suballoc(vm_map_t parent, vm_map_t result, vm_map_init(result, *min, *max, vm_map_pmap(parent)); if ((ret = vm_map_submap(parent, *min, *max, result)) != KERN_SUCCESS) panic("kmem_suballoc: unable to change range to submap"); + lwkt_reltoken(&vm_token); } /* - * kmem_alloc_wait: - * - * Allocates pageable memory from a sub-map of the kernel. If the submap - * has no room, the caller sleeps waiting for more memory in the submap. + * Allocates pageable memory from a sub-map of the kernel. If the submap + * has no room, the caller sleeps waiting for more memory in the submap. * - * This routine may block. + * No requirements. */ - vm_offset_t kmem_alloc_wait(vm_map_t map, vm_size_t size) { @@ -330,14 +329,15 @@ kmem_alloc_wait(vm_map_t map, vm_size_t size) 0); vm_map_unlock(map); vm_map_entry_release(count); + return (addr); } /* - * kmem_free_wakeup: + * Returns memory to a submap of the kernel, and wakes up any processes + * waiting for memory in that map. * - * Returns memory to a submap of the kernel, and wakes up any processes - * waiting for memory in that map. + * No requirements. */ void kmem_free_wakeup(vm_map_t map, vm_offset_t addr, vm_size_t size) @@ -353,20 +353,19 @@ kmem_free_wakeup(vm_map_t map, vm_offset_t addr, vm_size_t size) } /* - * kmem_init: + * Create the kernel_map and insert mappings to cover areas already + * allocated or reserved thus far. That is, the area (KvaStart,start) + * and (end,KvaEnd) must be marked as allocated. * - * Create the kernel_map and insert mappings to cover areas already - * allocated or reserved thus far. That is, the area (KvaStart,start) - * and (end,KvaEnd) must be marked as allocated. + * virtual2_start/end is a cutout Between KvaStart and start, + * for x86_64 due to the location of KERNBASE (at -2G). * - * virtual2_start/end is a cutout Between KvaStart and start, - * for x86_64 due to the location of KERNBASE (at -2G). + * We could use a min_offset of 0 instead of KvaStart, but since the + * min_offset is not used for any calculations other then a bounds check + * it does not effect readability. KvaStart is more appropriate. * - * We could use a min_offset of 0 instead of KvaStart, but since the - * min_offset is not used for any calculations other then a bounds check - * it does not effect readability. KvaStart is more appropriate. - * - * Depend on the zalloc bootstrap cache to get our vm_map_entry_t. + * Depend on the zalloc bootstrap cache to get our vm_map_entry_t. + * Called from the low level boot code only. */ void kmem_init(vm_offset_t start, vm_offset_t end) @@ -411,6 +410,9 @@ kmem_init(vm_offset_t start, vm_offset_t end) vm_map_entry_release(count); } +/* + * No requirements. + */ static int kvm_size(SYSCTL_HANDLER_ARGS) { @@ -421,6 +423,9 @@ kvm_size(SYSCTL_HANDLER_ARGS) SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 0, 0, kvm_size, "IU", "Size of KVM"); +/* + * No requirements. + */ static int kvm_free(SYSCTL_HANDLER_ARGS) { diff --git a/sys/vm/vm_meter.c b/sys/vm/vm_meter.c index 3a436c4a79..71eacf7fbe 100644 --- a/sys/vm/vm_meter.c +++ b/sys/vm/vm_meter.c @@ -1,4 +1,6 @@ /* + * (MPSAFE) + * * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * @@ -78,6 +80,9 @@ SYSCTL_STRUCT(_vm, VM_LOADAVG, loadavg, CTLFLAG_RD, static int do_vmtotal_callback(struct proc *p, void *data); +/* + * No requirements. + */ static int do_vmtotal(SYSCTL_HANDLER_ARGS) { @@ -107,6 +112,7 @@ do_vmtotal(SYSCTL_HANDLER_ARGS) /* * Calculate object memory usage statistics. */ + lwkt_gettoken(&vm_token); for (object = TAILQ_FIRST(&vm_object_list); object != NULL; object = TAILQ_NEXT(object, object_list)) { @@ -135,9 +141,13 @@ do_vmtotal(SYSCTL_HANDLER_ARGS) } } totalp->t_free = vmstats.v_free_count + vmstats.v_cache_count; + lwkt_reltoken(&vm_token); return (sysctl_handle_opaque(oidp, totalp, sizeof total, req)); } +/* + * The caller must hold proc_token. + */ static int do_vmtotal_callback(struct proc *p, void *data) { @@ -179,13 +189,17 @@ do_vmtotal_callback(struct proc *p, void *data) return (0); } } + /* * Note active objects. */ paging = 0; + lwkt_gettoken(&vm_token); if (p->p_vmspace) { - for (map = &p->p_vmspace->vm_map, entry = map->header.next; - entry != &map->header; entry = entry->next) { + map = &p->p_vmspace->vm_map; + vm_map_lock_read(map); + for (entry = map->header.next; + entry != &map->header; entry = entry->next) { if (entry->maptype != VM_MAPTYPE_NORMAL && entry->maptype != VM_MAPTYPE_VPAGETABLE) { continue; @@ -195,13 +209,17 @@ do_vmtotal_callback(struct proc *p, void *data) vm_object_set_flag(entry->object.vm_object, OBJ_ACTIVE); paging |= entry->object.vm_object->paging_in_progress; } + vm_map_unlock_read(map); } + lwkt_reltoken(&vm_token); if (paging) totalp->t_pw++; return(0); } - +/* + * No requirements. + */ static int do_vmstats(SYSCTL_HANDLER_ARGS) { @@ -209,6 +227,9 @@ do_vmstats(SYSCTL_HANDLER_ARGS) return (sysctl_handle_opaque(oidp, &vms, sizeof(vms), req)); } +/* + * No requirements. + */ static int do_vmmeter(SYSCTL_HANDLER_ARGS) { @@ -242,6 +263,8 @@ do_vmmeter(SYSCTL_HANDLER_ARGS) * structure. * * (sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req) + * + * No requirements. */ static int vcnt(SYSCTL_HANDLER_ARGS) @@ -257,6 +280,9 @@ vcnt(SYSCTL_HANDLER_ARGS) return(SYSCTL_OUT(req, &count, sizeof(int))); } +/* + * No requirements. + */ static int vcnt_intr(SYSCTL_HANDLER_ARGS) { @@ -402,6 +428,9 @@ SYSCTL_UINT(_vm_stats_vm, OID_AUTO, SYSCTL_INT(_vm_stats_misc, OID_AUTO, zero_page_count, CTLFLAG_RD, &vm_page_zero_count, 0, ""); +/* + * No requirements. + */ static int do_vmmeter_pcpu(SYSCTL_HANDLER_ARGS) { @@ -420,6 +449,9 @@ do_vmmeter_pcpu(SYSCTL_HANDLER_ARGS) return (sysctl_handle_opaque(oidp, &vmm, sizeof(vmm), req)); } +/* + * Called from the low level boot code only. + */ static void vmmeter_init(void *dummy __unused) { diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c index a32ecf76c8..32a57e1f61 100644 --- a/sys/vm/vm_pageout.c +++ b/sys/vm/vm_pageout.c @@ -1,4 +1,6 @@ /* + * (MPSAFE) + * * Copyright (c) 1991 Regents of the University of California. * All rights reserved. * Copyright (c) 1994 John S. Dyson @@ -221,6 +223,9 @@ static void vm_pageout_page_stats(void); /* * Update vm_load to slow down faulting processes. + * + * SMP races ok. + * No requirements. */ void vm_fault_ratecheck(void) @@ -243,8 +248,9 @@ vm_fault_ratecheck(void) * We set the busy bit to cause potential page faults on this page to * block. Note the careful timing, however, the busy bit isn't set till * late and we cannot do anything that will mess with the page. + * + * The caller must hold vm_token. */ - static int vm_pageout_clean(vm_page_t m) { @@ -380,6 +386,8 @@ more: * reference count all in here rather then in the parent. If we want * the parent to do more sophisticated things we may have to change * the ordering. + * + * The caller must hold vm_token. */ int vm_pageout_flush(vm_page_t *mc, int count, int flags) @@ -389,6 +397,8 @@ vm_pageout_flush(vm_page_t *mc, int count, int flags) int numpagedout = 0; int i; + ASSERT_LWKT_TOKEN_HELD(&vm_token); + /* * Initiate I/O. Bump the vm_page_t->busy counter. */ @@ -485,13 +495,14 @@ vm_pageout_flush(vm_page_t *mc, int count, int flags) * deactivate all of the pages in the object and its * backing_objects. * - * The object and map must be locked. + * The map must be locked. + * The caller must hold vm_token. */ static int vm_pageout_object_deactivate_pages_callback(vm_page_t, void *); static void vm_pageout_object_deactivate_pages(vm_map_t map, vm_object_t object, - vm_pindex_t desired, int map_remove_only) + vm_pindex_t desired, int map_remove_only) { struct rb_vm_page_scan_info info; int remove_mode; @@ -526,7 +537,10 @@ vm_pageout_object_deactivate_pages(vm_map_t map, vm_object_t object, object = object->backing_object; } } - + +/* + * The caller must hold vm_token. + */ static int vm_pageout_object_deactivate_pages_callback(vm_page_t p, void *data) { @@ -584,8 +598,10 @@ vm_pageout_object_deactivate_pages_callback(vm_page_t p, void *data) } /* - * deactivate some number of pages in a map, try to do it fairly, but + * Deactivate some number of pages in a map, try to do it fairly, but * that is really hard to do. + * + * The caller must hold vm_token. */ static void vm_pageout_map_deactivate_pages(vm_map_t map, vm_pindex_t desired) @@ -664,8 +680,10 @@ vm_pageout_map_deactivate_pages(vm_map_t map, vm_pindex_t desired) * Don't try to be fancy - being fancy can lead to vnode deadlocks. We * only do it for OBJT_DEFAULT and OBJT_SWAP objects which we know can * be trivially freed. + * + * The caller must hold vm_token. */ -void +static void vm_pageout_page_free(vm_page_t m) { vm_object_t object = m->object; @@ -690,6 +708,9 @@ struct vm_pageout_scan_info { static int vm_pageout_scan_callback(struct proc *p, void *data); +/* + * The caller must hold vm_token. + */ static int vm_pageout_scan(int pass) { @@ -1284,6 +1305,9 @@ rescan0: return(inactive_shortage); } +/* + * The caller must hold vm_token and proc_token. + */ static int vm_pageout_scan_callback(struct proc *p, void *data) { @@ -1334,6 +1358,8 @@ vm_pageout_scan_callback(struct proc *p, void *data) * so that during long periods of time where there is no paging, * that some statistic accumulation still occurs. This code * helps the situation where paging just starts to occur. + * + * The caller must hold vm_token. */ static void vm_pageout_page_stats(void) @@ -1423,6 +1449,9 @@ vm_pageout_page_stats(void) crit_exit(); } +/* + * The caller must hold vm_token. + */ static int vm_pageout_free_page_calc(vm_size_t count) { @@ -1449,6 +1478,8 @@ vm_pageout_free_page_calc(vm_size_t count) /* * vm_pageout is the high level pageout daemon. + * + * No requirements. */ static void vm_pageout(void) @@ -1456,6 +1487,11 @@ vm_pageout(void) int pass; int inactive_shortage; + /* + * Permanently hold vm_token. + */ + lwkt_gettoken(&vm_token); + /* * Initialize some paging parameters. */ @@ -1632,6 +1668,9 @@ vm_pageout(void) * * If the pagedaemon is already active bump vm_pages_needed as a hint * that there are even more requests pending. + * + * SMP races ok? + * No requirements. */ void pagedaemon_wakeup(void) @@ -1647,6 +1686,11 @@ pagedaemon_wakeup(void) } #if !defined(NO_SWAPPING) + +/* + * SMP races ok? + * No requirements. + */ static void vm_req_vmdaemon(void) { @@ -1660,9 +1704,17 @@ vm_req_vmdaemon(void) static int vm_daemon_callback(struct proc *p, void *data __unused); +/* + * No requirements. + */ static void vm_daemon(void) { + /* + * Permanently hold vm_token. + */ + lwkt_gettoken(&vm_token); + while (TRUE) { tsleep(&vm_daemon_needed, 0, "psleep", 0); if (vm_pageout_req_swapout) { @@ -1677,6 +1729,9 @@ vm_daemon(void) } } +/* + * Caller must hold vm_token and proc_token. + */ static int vm_daemon_callback(struct proc *p, void *data __unused) { diff --git a/sys/vm/vm_pageout.h b/sys/vm/vm_pageout.h index 7caf8906de..cc662ef140 100644 --- a/sys/vm/vm_pageout.h +++ b/sys/vm/vm_pageout.h @@ -114,7 +114,6 @@ extern void vm_waitpfault (void); void vm_pageout_page (vm_page_t, vm_object_t); void vm_pageout_cluster (vm_page_t, vm_object_t); int vm_pageout_flush (vm_page_t *, int, int); -void vm_pageout_page_free (vm_page_t); #endif diff --git a/sys/vm/vm_pager.c b/sys/vm/vm_pager.c index 889b848bd1..7021669436 100644 --- a/sys/vm/vm_pager.c +++ b/sys/vm/vm_pager.c @@ -1,4 +1,6 @@ /* + * (MPSAFE) + * * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * @@ -105,12 +107,18 @@ static void dead_pager_putpages (vm_object_t, vm_page_t *, int, int, int *); static boolean_t dead_pager_haspage (vm_object_t, vm_pindex_t); static void dead_pager_dealloc (vm_object_t); +/* + * No requirements. + */ static int dead_pager_getpage(vm_object_t obj, vm_page_t *mpp, int seqaccess) { return VM_PAGER_FAIL; } +/* + * No requirements. + */ static void dead_pager_putpages(vm_object_t object, vm_page_t *m, int count, int flags, int *rtvals) @@ -122,12 +130,18 @@ dead_pager_putpages(vm_object_t object, vm_page_t *m, int count, int flags, } } +/* + * No requirements. + */ static int dead_pager_haspage(vm_object_t object, vm_pindex_t pindex) { return FALSE; } +/* + * No requirements. + */ static void dead_pager_dealloc(vm_object_t object) { @@ -171,16 +185,21 @@ static vm_offset_t swapbkva; /* swap buffers kva */ static TAILQ_HEAD(swqueue, buf) bswlist; static struct spinlock bswspin = SPINLOCK_INITIALIZER(&bswspin); +/* + * Initialize the swap buffer list. + * + * Called from the low level boot code only. + */ static void vm_pager_init(void *arg __unused) { - /* - * Initialize the swap buffer list. - */ TAILQ_INIT(&bswlist); } SYSINIT(vm_mem, SI_BOOT1_VM, SI_ORDER_SECOND, vm_pager_init, NULL) +/* + * Called from the low level boot code only. + */ void vm_pager_bufferinit(void) { @@ -212,6 +231,9 @@ vm_pager_bufferinit(void) cluster_pbuf_freecnt = nswbuf / 2; } +/* + * No requirements. + */ void vm_pager_deallocate(vm_object_t object) { @@ -248,6 +270,8 @@ vm_pager_sync(void) /* * Initialize a physical buffer. + * + * No requirements. */ static void initpbuf(struct buf *bp) @@ -279,7 +303,7 @@ initpbuf(struct buf *bp) * NOTE: pfreecnt can be NULL, but this 'feature' will be removed * relatively soon when the rest of the subsystems get smart about it. XXX * - * MPSAFE + * No requirements. */ struct buf * getpbuf(int *pfreecnt) @@ -318,7 +342,7 @@ getpbuf(int *pfreecnt) * Note that there is no NULL hack here - all subsystems using this * call understand how to use pfreecnt. * - * MPSAFE + * No requirements. */ struct buf * trypbuf(int *pfreecnt) @@ -347,7 +371,7 @@ trypbuf(int *pfreecnt) * NOTE: pfreecnt can be NULL, but this 'feature' will be removed * relatively soon when the rest of the subsystems get smart about it. XXX * - * MPSAFE + * No requirements. */ void relpbuf(struct buf *bp, int *pfreecnt) @@ -378,4 +402,3 @@ relpbuf(struct buf *bp, int *pfreecnt) if (wake_freecnt) wakeup(pfreecnt); } - diff --git a/sys/vm/vm_swap.c b/sys/vm/vm_swap.c index f92566cdec..6e3c532743 100644 --- a/sys/vm/vm_swap.c +++ b/sys/vm/vm_swap.c @@ -1,4 +1,6 @@ /* + * (MPSAFE) + * * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * @@ -60,6 +62,7 @@ #include #include +#include /* * Indirect driver for multi-controller paging. @@ -71,6 +74,7 @@ static struct swdevt should_be_malloced[NSWAPDEV]; struct swdevt *swdevt = should_be_malloced; /* exported to pstat/systat */ static swblk_t nswap; /* first block after the interleaved devs */ +static struct mtx swap_mtx = MTX_INITIALIZER; int nswdev = NSWAPDEV; /* exported to pstat/systat */ int vm_swap_size; int vm_swap_max; @@ -79,16 +83,13 @@ static int swapdev_strategy (struct vop_strategy_args *ap); struct vnode *swapdev_vp; /* - * swapdev_strategy: - * - * vn_strategy() for swapdev_vp. - * Perform swap strategy interleave device selection. + * (struct vnode *a_vp, struct bio *b_bio) * - * The bp is expected to be locked and on call. + * vn_strategy() for swapdev_vp. Perform swap strategy interleave device + * selection. * - * (struct vnode *a_vp, struct bio *b_bio) + * No requirements. */ - static int swapdev_strategy(struct vop_strategy_args *ap) { @@ -174,7 +175,7 @@ VNODEOP_SET(swapdev_vnode_vops); * which must be in the swdevsw. Return EBUSY * if already swapping on this device. * - * MPALMOSTSAFE + * No requirements. */ int sys_swapon(struct swapon_args *uap) @@ -192,6 +193,7 @@ sys_swapon(struct swapon_args *uap) if (error) return (error); + mtx_lock(&swap_mtx); get_mplock(); vp = NULL; error = nlookup_init(&nd, uap->name, UIO_USERSPACE, NLC_FOLLOW); @@ -202,6 +204,7 @@ sys_swapon(struct swapon_args *uap) nlookup_done(&nd); if (error) { rel_mplock(); + mtx_unlock(&swap_mtx); return (error); } @@ -218,6 +221,7 @@ sys_swapon(struct swapon_args *uap) if (error) vrele(vp); rel_mplock(); + mtx_unlock(&swap_mtx); return (error); } @@ -249,6 +253,8 @@ swaponvp(struct thread *td, struct vnode *vp, u_quad_t nblks) cred = td->td_ucred; + mtx_lock(&swap_mtx); + if (!swapdev_vp) { error = getspecialvnode(VT_NON, NULL, &swapdev_vnode_vops_p, &swapdev_vp, 0, 0); @@ -259,19 +265,24 @@ swaponvp(struct thread *td, struct vnode *vp, u_quad_t nblks) } for (sp = swdevt, index = 0 ; index < nswdev; index++, sp++) { - if (sp->sw_vp == vp) + if (sp->sw_vp == vp) { + mtx_unlock(&swap_mtx); return EBUSY; + } if (!sp->sw_vp) goto found; } + mtx_unlock(&swap_mtx); return EINVAL; found: vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); error = VOP_OPEN(vp, FREAD | FWRITE, cred, NULL); vn_unlock(vp); - if (error) + if (error) { + mtx_unlock(&swap_mtx); return (error); + } /* * v_rdev is not valid until after the VOP_OPEN() call. dev_psize() @@ -286,12 +297,14 @@ swaponvp(struct thread *td, struct vnode *vp, u_quad_t nblks) dpsize = dev_dpsize(dev); if (dpsize == -1) { VOP_CLOSE(vp, FREAD | FWRITE); + mtx_unlock(&swap_mtx); return (ENXIO); } nblks = (u_quad_t)dpsize; } if (nblks == 0) { VOP_CLOSE(vp, FREAD | FWRITE); + mtx_unlock(&swap_mtx); return (ENXIO); } @@ -317,6 +330,7 @@ swaponvp(struct thread *td, struct vnode *vp, u_quad_t nblks) kprintf("exceeded maximum of %d blocks per swap unit\n", (int)BLIST_MAXBLKS / nswdev); VOP_CLOSE(vp, FREAD | FWRITE); + mtx_unlock(&swap_mtx); return (ENXIO); } @@ -350,5 +364,6 @@ swaponvp(struct thread *td, struct vnode *vp, u_quad_t nblks) } swap_pager_newswap(); + mtx_unlock(&swap_mtx); return (0); } -- 2.41.0