From: Matthew Dillon Date: Thu, 25 May 2006 07:36:37 +0000 (+0000) Subject: Convert almost all of the remaining manual traversals of the allproc X-Git-Tag: v2.0.1~4880 X-Git-Url: https://gitweb.dragonflybsd.org/dragonfly.git/commitdiff_plain/8fa762378a6a238beeecb5a2225e1b593482db0b Convert almost all of the remaining manual traversals of the allproc list over to allproc_scan(). The allproc_scan() code is MPSAFE, and code which before just cached a proc pointer now PHOLD's it as well, but access to the various proc fields is *NOT* yet MPSAFE. Still, we are closer now. --- diff --git a/sys/i386/i386/db_trace.c b/sys/i386/i386/db_trace.c index d01019196c..02da02980d 100644 --- a/sys/i386/i386/db_trace.c +++ b/sys/i386/i386/db_trace.c @@ -24,7 +24,7 @@ * rights to redistribute these changes. * * $FreeBSD: src/sys/i386/i386/db_trace.c,v 1.35.2.3 2002/02/21 22:31:25 silby Exp $ - * $DragonFly: src/sys/i386/i386/Attic/db_trace.c,v 1.12 2006/05/25 04:17:07 dillon Exp $ + * $DragonFly: src/sys/i386/i386/Attic/db_trace.c,v 1.13 2006/05/25 07:36:33 dillon Exp $ */ #include @@ -293,11 +293,7 @@ db_stack_trace_cmd(db_expr_t addr, boolean_t have_addr, db_expr_t count, (SP_REGS(&ddb_regs) - 4); callpc = PC_REGS(&ddb_regs); } else { - - FOREACH_PROC_IN_SYSTEM(p) { - if (p->p_pid == pid) - break; - } + p = pfind(pid); if (p == NULL) { db_printf("pid %d not found\n", pid); return; diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c index 164894b268..80a2c3c592 100644 --- a/sys/kern/init_main.c +++ b/sys/kern/init_main.c @@ -40,7 +40,7 @@ * * @(#)init_main.c 8.9 (Berkeley) 1/21/94 * $FreeBSD: src/sys/kern/init_main.c,v 1.134.2.8 2003/06/06 20:21:32 tegge Exp $ - * $DragonFly: src/sys/kern/init_main.c,v 1.54 2006/05/23 20:35:10 dillon Exp $ + * $DragonFly: src/sys/kern/init_main.c,v 1.55 2006/05/25 07:36:34 dillon Exp $ */ #include "opt_init_path.h" @@ -363,20 +363,19 @@ proc0_init(void *dummy __unused) } SYSINIT(p0init, SI_SUB_INTRINSIC, SI_ORDER_FIRST, proc0_init, NULL) +static int proc0_post_callback(struct proc *p, void *data __unused); + /* ARGSUSED*/ static void proc0_post(void *dummy __unused) { struct timespec ts; - struct proc *p; /* * Now we can look at the time, having had a chance to verify the * time from the file system. Pretend that proc0 started now. */ - FOREACH_PROC_IN_SYSTEM(p) { - microtime(&p->p_start); - } + allproc_scan(proc0_post_callback, NULL); /* * Give the ``random'' number generator a thump. @@ -385,6 +384,14 @@ proc0_post(void *dummy __unused) nanotime(&ts); srandom(ts.tv_sec ^ ts.tv_nsec); } + +static int +proc0_post_callback(struct proc *p, void *data __unused) +{ + microtime(&p->p_start); + return(0); +} + SYSINIT(p0post, SI_SUB_INTRINSIC_POST, SI_ORDER_FIRST, proc0_post, NULL) /* diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c index 93fdf8cebd..50aba9af49 100644 --- a/sys/kern/kern_descrip.c +++ b/sys/kern/kern_descrip.c @@ -70,7 +70,7 @@ * * @(#)kern_descrip.c 8.6 (Berkeley) 4/19/94 * $FreeBSD: src/sys/kern/kern_descrip.c,v 1.81.2.19 2004/02/28 00:43:31 tegge Exp $ - * $DragonFly: src/sys/kern/kern_descrip.c,v 1.62 2006/05/24 03:23:31 dillon Exp $ + * $DragonFly: src/sys/kern/kern_descrip.c,v 1.63 2006/05/25 07:36:34 dillon Exp $ */ #include "opt_compat.h" @@ -2313,17 +2313,19 @@ filedesc_to_leader_alloc(struct filedesc_to_leader *old, * * NOT MPSAFE - process list scan, SYSCTL_OUT (probably not mpsafe) */ + +struct sysctl_kern_file_info { + int count; + int error; + struct sysctl_req *req; +}; + +static int sysctl_kern_file_callback(struct proc *p, void *data); + static int sysctl_kern_file(SYSCTL_HANDLER_ARGS) { - struct kinfo_file kf; - struct filedesc *fdp; - struct file *fp; - struct proc *p; - uid_t uid; - int count; - int error; - int n; + struct sysctl_kern_file_info info; /* * Note: because the number of file descriptors is calculated @@ -2345,37 +2347,10 @@ sysctl_kern_file(SYSCTL_HANDLER_ARGS) * from the allproc list, we must skip it in that case to maintain * an unbroken chain through the allproc list. */ - count = 0; - error = 0; - LIST_FOREACH(p, &allproc, p_list) { - if (p->p_stat == SIDL || (p->p_flag & P_ZOMBIE)) - continue; - if (!PRISON_CHECK(req->td->td_proc->p_ucred, p->p_ucred) != 0) - continue; - if ((fdp = p->p_fd) == NULL) - continue; - PHOLD(p); - spin_lock_rd(&fdp->fd_spin); - for (n = 0; n < fdp->fd_nfiles; ++n) { - if ((fp = fdp->fd_files[n].fp) == NULL) - continue; - if (req->oldptr == NULL) { - ++count; - } else { - uid = p->p_ucred ? p->p_ucred->cr_uid : -1; - kcore_make_file(&kf, fp, p->p_pid, uid, n); - spin_unlock_rd(&fdp->fd_spin); - error = SYSCTL_OUT(req, &kf, sizeof(kf)); - spin_lock_rd(&fdp->fd_spin); - if (error) - break; - } - } - spin_unlock_rd(&fdp->fd_spin); - PRELE(p); - if (error) - break; - } + info.count = 0; + info.error = 0; + info.req = req; + allproc_scan(sysctl_kern_file_callback, &info); /* * When just calculating the size, overestimate a bit to try to @@ -2383,10 +2358,49 @@ sysctl_kern_file(SYSCTL_HANDLER_ARGS) * to fail later on. */ if (req->oldptr == NULL) { - count = (count + 16) + (count / 10); - error = SYSCTL_OUT(req, NULL, count * sizeof(kf)); + info.count = (info.count + 16) + (info.count / 10); + info.error = SYSCTL_OUT(req, NULL, + info.count * sizeof(struct kinfo_file)); } - return (error); + return (info.error); +} + +static int +sysctl_kern_file_callback(struct proc *p, void *data) +{ + struct sysctl_kern_file_info *info = data; + struct kinfo_file kf; + struct filedesc *fdp; + struct file *fp; + uid_t uid; + int n; + + if (p->p_stat == SIDL || (p->p_flag & P_ZOMBIE)) + return(0); + if (!PRISON_CHECK(info->req->td->td_proc->p_ucred, p->p_ucred) != 0) + return(0); + if ((fdp = p->p_fd) == NULL) + return(0); + spin_lock_rd(&fdp->fd_spin); + for (n = 0; n < fdp->fd_nfiles; ++n) { + if ((fp = fdp->fd_files[n].fp) == NULL) + continue; + if (info->req->oldptr == NULL) { + ++info->count; + } else { + uid = p->p_ucred ? p->p_ucred->cr_uid : -1; + kcore_make_file(&kf, fp, p->p_pid, uid, n); + spin_unlock_rd(&fdp->fd_spin); + info->error = SYSCTL_OUT(info->req, &kf, sizeof(kf)); + spin_lock_rd(&fdp->fd_spin); + if (info->error) + break; + } + } + spin_unlock_rd(&fdp->fd_spin); + if (info->error) + return(-1); + return(0); } SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD, diff --git a/sys/kern/kern_resource.c b/sys/kern/kern_resource.c index 17078d0926..e0add34ef4 100644 --- a/sys/kern/kern_resource.c +++ b/sys/kern/kern_resource.c @@ -37,7 +37,7 @@ * * @(#)kern_resource.c 8.5 (Berkeley) 1/21/94 * $FreeBSD: src/sys/kern/kern_resource.c,v 1.55.2.5 2001/11/03 01:41:08 ps Exp $ - * $DragonFly: src/sys/kern/kern_resource.c,v 1.26 2006/05/23 20:35:10 dillon Exp $ + * $DragonFly: src/sys/kern/kern_resource.c,v 1.27 2006/05/25 07:36:34 dillon Exp $ */ #include "opt_compat.h" @@ -76,9 +76,17 @@ static struct uidinfo *uilookup (uid_t uid); * Resource controls and accounting. */ +struct getpriority_info { + int low; + int who; +}; + +static int getpriority_callback(struct proc *p, void *data); + int getpriority(struct getpriority_args *uap) { + struct getpriority_info info; struct proc *curp = curproc; struct proc *p; int low = PRIO_MAX + 1; @@ -113,11 +121,10 @@ getpriority(struct getpriority_args *uap) case PRIO_USER: if (uap->who == 0) uap->who = curp->p_ucred->cr_uid; - FOREACH_PROC_IN_SYSTEM(p) - if (PRISON_CHECK(curp->p_ucred, p->p_ucred) && - p->p_ucred->cr_uid == uap->who && - p->p_nice < low) - low = p->p_nice; + info.low = low; + info.who = uap->who; + allproc_scan(getpriority_callback, &info); + low = info.low; break; default: @@ -129,16 +136,42 @@ getpriority(struct getpriority_args *uap) return (0); } -/* ARGSUSED */ +/* + * Figure out the current lowest nice priority for processes owned + * by the specified user. + */ +static +int +getpriority_callback(struct proc *p, void *data) +{ + struct getpriority_info *info = data; + + if (PRISON_CHECK(curproc->p_ucred, p->p_ucred) && + p->p_ucred->cr_uid == info->who && + p->p_nice < info->low) { + info->low = p->p_nice; + } + return(0); +} + +struct setpriority_info { + int prio; + int who; + int error; + int found; +}; + +static int setpriority_callback(struct proc *p, void *data); + int setpriority(struct setpriority_args *uap) { + struct setpriority_info info; struct proc *curp = curproc; struct proc *p; int found = 0, error = 0; switch (uap->which) { - case PRIO_PROCESS: if (uap->who == 0) p = curp; @@ -171,12 +204,13 @@ setpriority(struct setpriority_args *uap) case PRIO_USER: if (uap->who == 0) uap->who = curp->p_ucred->cr_uid; - FOREACH_PROC_IN_SYSTEM(p) - if (p->p_ucred->cr_uid == uap->who && - PRISON_CHECK(curp->p_ucred, p->p_ucred)) { - error = donice(p, uap->prio); - found++; - } + info.prio = uap->prio; + info.who = uap->who; + info.error = 0; + info.found = 0; + allproc_scan(setpriority_callback, &info); + error = info.error; + found = info.found; break; default: @@ -187,6 +221,23 @@ setpriority(struct setpriority_args *uap) return (error); } +static +int +setpriority_callback(struct proc *p, void *data) +{ + struct setpriority_info *info = data; + int error; + + if (p->p_ucred->cr_uid == info->who && + PRISON_CHECK(curproc->p_ucred, p->p_ucred)) { + error = donice(p, info->prio); + if (error) + info->error = error; + ++info->found; + } + return(0); +} + static int donice(struct proc *chgp, int n) { diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c index 4df05cb76c..e41662b0f1 100644 --- a/sys/kern/kern_sig.c +++ b/sys/kern/kern_sig.c @@ -37,7 +37,7 @@ * * @(#)kern_sig.c 8.7 (Berkeley) 4/18/94 * $FreeBSD: src/sys/kern/kern_sig.c,v 1.72.2.17 2003/05/16 16:34:34 obrien Exp $ - * $DragonFly: src/sys/kern/kern_sig.c,v 1.48 2006/05/17 18:30:20 dillon Exp $ + * $DragonFly: src/sys/kern/kern_sig.c,v 1.49 2006/05/25 07:36:34 dillon Exp $ */ #include "opt_ktrace.h" @@ -596,26 +596,29 @@ sigaltstack(struct sigaltstack_args *uap) * Common code for kill process group/broadcast kill. * cp is calling process. */ +struct killpg_info { + int nfound; + int sig; +}; + +static int killpg_all_callback(struct proc *p, void *data); + static int killpg(int sig, int pgid, int all) { + struct killpg_info info; struct proc *cp = curproc; struct proc *p; struct pgrp *pgrp; - int nfound = 0; + + info.nfound = 0; + info.sig = sig; if (all) { /* * broadcast */ - FOREACH_PROC_IN_SYSTEM(p) { - if (p->p_pid <= 1 || p->p_flag & P_SYSTEM || - p == cp || !CANSIGNAL(p, sig)) - continue; - nfound++; - if (sig) - psignal(p, sig); - } + allproc_scan(killpg_all_callback, &info); } else { if (pgid == 0) { /* @@ -633,12 +636,27 @@ killpg(int sig, int pgid, int all) !CANSIGNAL(p, sig)) { continue; } - nfound++; + ++info.nfound; if (sig) psignal(p, sig); } } - return (nfound ? 0 : ESRCH); + return (info.nfound ? 0 : ESRCH); +} + +static int +killpg_all_callback(struct proc *p, void *data) +{ + struct killpg_info *info = data; + + if (p->p_pid <= 1 || (p->p_flag & P_SYSTEM) || + p == curproc || !CANSIGNAL(p, info->sig)) { + return (0); + } + ++info->nfound; + if (info->sig) + psignal(p, info->sig); + return(0); } int diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c index a0671e9076..cd602e6fbc 100644 --- a/sys/kern/kern_synch.c +++ b/sys/kern/kern_synch.c @@ -37,7 +37,7 @@ * * @(#)kern_synch.c 8.9 (Berkeley) 5/19/95 * $FreeBSD: src/sys/kern/kern_synch.c,v 1.87.2.6 2002/10/13 07:29:53 kbyanc Exp $ - * $DragonFly: src/sys/kern/kern_synch.c,v 1.60 2006/05/23 20:35:10 dillon Exp $ + * $DragonFly: src/sys/kern/kern_synch.c,v 1.61 2006/05/25 07:36:34 dillon Exp $ */ #include "opt_ktrace.h" @@ -166,71 +166,84 @@ SYSCTL_INT(_kern, OID_AUTO, fscale, CTLFLAG_RD, 0, FSCALE, ""); * This code also allows us to store sysclock_t data in the process structure * without fear of an overrun, since sysclock_t are guarenteed to hold * several seconds worth of count. + * + * WARNING! callouts can preempt normal threads. However, they will not + * preempt a thread holding a spinlock so we *can* safely use spinlocks. */ -/* ARGSUSED */ +static int schedcpu_stats(struct proc *p, void *data __unused); +static int schedcpu_resource(struct proc *p, void *data __unused); + static void schedcpu(void *arg) { - struct proc *p; - u_int64_t ttime; + allproc_scan(schedcpu_stats, NULL); + allproc_scan(schedcpu_resource, NULL); + wakeup((caddr_t)&lbolt); + wakeup((caddr_t)&lbolt_syncer); + callout_reset(&schedcpu_callout, hz, schedcpu, NULL); +} + +/* + * General process statistics once a second + */ +static int +schedcpu_stats(struct proc *p, void *data __unused) +{ + crit_enter(); + p->p_swtime++; + if (p->p_stat == SSLEEP) + p->p_slptime++; /* - * General process statistics once a second + * Only recalculate processes that are active or have slept + * less then 2 seconds. The schedulers understand this. */ - FOREACH_PROC_IN_SYSTEM(p) { - crit_enter(); - p->p_swtime++; - if (p->p_stat == SSLEEP) - p->p_slptime++; + if (p->p_slptime <= 1) { + p->p_usched->recalculate(&p->p_lwp); + } else { + p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT; + } + crit_exit(); + return(0); +} - /* - * Only recalculate processes that are active or have slept - * less then 2 seconds. The schedulers understand this. - */ - if (p->p_slptime <= 1) { - p->p_usched->recalculate(&p->p_lwp); - } else { - p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT; - } +/* + * Resource checks. XXX break out since psignal/killproc can block, + * limiting us to one process killed per second. There is probably + * a better way. + */ +static int +schedcpu_resource(struct proc *p, void *data __unused) +{ + u_int64_t ttime; + + crit_enter(); + if (p->p_stat == SIDL || + (p->p_flag & P_ZOMBIE) || + p->p_limit == NULL || + p->p_thread == NULL + ) { crit_exit(); + return(0); } - /* - * Resource checks. XXX break out since psignal/killproc can block, - * limiting us to one process killed per second. There is probably - * a better way. - */ - FOREACH_PROC_IN_SYSTEM(p) { - crit_enter(); - if (p->p_stat == SIDL || - (p->p_flag & P_ZOMBIE) || - p->p_limit == NULL || - p->p_thread == NULL - ) { - crit_exit(); - continue; - } - ttime = p->p_thread->td_sticks + p->p_thread->td_uticks; - switch(plimit_testcpulimit(p->p_limit, ttime)) { - case PLIMIT_TESTCPU_KILL: - killproc(p, "exceeded maximum CPU limit"); - break; - case PLIMIT_TESTCPU_XCPU: - if ((p->p_flag & P_XCPU) == 0) { - p->p_flag |= P_XCPU; - psignal(p, SIGXCPU); - } - break; - default: - crit_exit(); - continue; + ttime = p->p_thread->td_sticks + p->p_thread->td_uticks; + + switch(plimit_testcpulimit(p->p_limit, ttime)) { + case PLIMIT_TESTCPU_KILL: + killproc(p, "exceeded maximum CPU limit"); + break; + case PLIMIT_TESTCPU_XCPU: + if ((p->p_flag & P_XCPU) == 0) { + p->p_flag |= P_XCPU; + psignal(p, SIGXCPU); } - crit_exit(); + break; + default: break; } - wakeup((caddr_t)&lbolt); - wakeup((caddr_t)&lbolt_syncer); - callout_reset(&schedcpu_callout, hz, schedcpu, NULL); + crit_exit(); + return(0); } /* @@ -892,34 +905,21 @@ uio_yield(void) * Compute a tenex style load average of a quantity on * 1, 5 and 15 minute intervals. */ +static int loadav_count_runnable(struct proc *p, void *data); + static void loadav(void *arg) { - int i, nrun; struct loadavg *avg; - struct proc *p; - thread_t td; + int i, nrun; - avg = &averunnable; nrun = 0; - FOREACH_PROC_IN_SYSTEM(p) { - switch (p->p_stat) { - case SRUN: - if ((td = p->p_thread) == NULL) - break; - if (td->td_flags & TDF_BLOCKED) - break; - /* fall through */ - case SIDL: - nrun++; - break; - default: - break; - } - } - for (i = 0; i < 3; i++) + allproc_scan(loadav_count_runnable, &nrun); + avg = &averunnable; + for (i = 0; i < 3; i++) { avg->ldavg[i] = (cexp[i] * avg->ldavg[i] + nrun * FSCALE * (FSCALE - cexp[i])) >> FSHIFT; + } /* * Schedule the next update to occur after 5 seconds, but add a @@ -927,7 +927,29 @@ loadav(void *arg) * run at regular intervals. */ callout_reset(&loadav_callout, hz * 4 + (int)(random() % (hz * 2 + 1)), - loadav, NULL); + loadav, NULL); +} + +static int +loadav_count_runnable(struct proc *p, void *data) +{ + int *nrunp = data; + thread_t td; + + switch (p->p_stat) { + case SRUN: + if ((td = p->p_thread) == NULL) + break; + if (td->td_flags & TDF_BLOCKED) + break; + /* fall through */ + case SIDL: + ++*nrunp; + break; + default: + break; + } + return(0); } /* ARGSUSED */ diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index 81d6efef8e..85aefc65b1 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -37,7 +37,7 @@ * * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 * $FreeBSD: src/sys/kern/vfs_syscalls.c,v 1.151.2.18 2003/04/04 20:35:58 tegge Exp $ - * $DragonFly: src/sys/kern/vfs_syscalls.c,v 1.93 2006/05/24 03:23:31 dillon Exp $ + * $DragonFly: src/sys/kern/vfs_syscalls.c,v 1.94 2006/05/25 07:36:34 dillon Exp $ */ #include @@ -62,6 +62,7 @@ #include #include #include +#include #include #include @@ -73,6 +74,7 @@ #include #include +#include static int checkvp_chdir (struct vnode *vn, struct thread *td); static void checkdirs (struct vnode *olddp, struct namecache *ncp); @@ -398,13 +400,20 @@ update: * must be associated with the vnode representing the root of the * mount point. */ +struct checkdirs_info { + struct vnode *olddp; + struct vnode *newdp; + struct namecache *ncp; +}; + +static int checkdirs_callback(struct proc *p, void *data); + static void checkdirs(struct vnode *olddp, struct namecache *ncp) { - struct filedesc *fdp; + struct checkdirs_info info; struct vnode *newdp; struct mount *mp; - struct proc *p; if (olddp->v_usecount == 1) return; @@ -418,24 +427,66 @@ checkdirs(struct vnode *olddp, struct namecache *ncp) vfs_cache_setroot(newdp, cache_hold(ncp)); } - FOREACH_PROC_IN_SYSTEM(p) { - fdp = p->p_fd; - if (fdp->fd_cdir == olddp) { - vrele(fdp->fd_cdir); - vref(newdp); - fdp->fd_cdir = newdp; - cache_drop(fdp->fd_ncdir); - fdp->fd_ncdir = cache_hold(ncp); + info.olddp = olddp; + info.newdp = newdp; + info.ncp = ncp; + allproc_scan(checkdirs_callback, &info); + vput(newdp); +} + +/* + * NOTE: callback is not MP safe because the scanned process's filedesc + * structure can be ripped out from under us, amoung other things. + */ +static int +checkdirs_callback(struct proc *p, void *data) +{ + struct checkdirs_info *info = data; + struct filedesc *fdp; + struct namecache *ncdrop1; + struct namecache *ncdrop2; + struct vnode *vprele1; + struct vnode *vprele2; + + if ((fdp = p->p_fd) != NULL) { + ncdrop1 = NULL; + ncdrop2 = NULL; + vprele1 = NULL; + vprele2 = NULL; + + /* + * MPUNSAFE - XXX fdp can be pulled out from under a + * foreign process. + * + * A shared filedesc is ok, we don't have to copy it + * because we are making this change globally. + */ + spin_lock_wr(&fdp->fd_spin); + if (fdp->fd_cdir == info->olddp) { + vprele1 = fdp->fd_cdir; + vref(info->newdp); + fdp->fd_cdir = info->newdp; + ncdrop1 = fdp->fd_ncdir; + fdp->fd_ncdir = cache_hold(info->ncp); } - if (fdp->fd_rdir == olddp) { - vrele(fdp->fd_rdir); - vref(newdp); - fdp->fd_rdir = newdp; - cache_drop(fdp->fd_nrdir); - fdp->fd_nrdir = cache_hold(ncp); + if (fdp->fd_rdir == info->olddp) { + vprele2 = fdp->fd_rdir; + vref(info->newdp); + fdp->fd_rdir = info->newdp; + ncdrop2 = fdp->fd_nrdir; + fdp->fd_nrdir = cache_hold(info->ncp); } + spin_unlock_wr(&fdp->fd_spin); + if (ncdrop1) + cache_drop(ncdrop1); + if (ncdrop2) + cache_drop(ncdrop2); + if (vprele1) + vrele(vprele1); + if (vprele2) + vrele(vprele2); } - vput(newdp); + return(0); } /* diff --git a/sys/platform/pc32/i386/db_trace.c b/sys/platform/pc32/i386/db_trace.c index e8f7860b01..9f42d2ce9c 100644 --- a/sys/platform/pc32/i386/db_trace.c +++ b/sys/platform/pc32/i386/db_trace.c @@ -24,7 +24,7 @@ * rights to redistribute these changes. * * $FreeBSD: src/sys/i386/i386/db_trace.c,v 1.35.2.3 2002/02/21 22:31:25 silby Exp $ - * $DragonFly: src/sys/platform/pc32/i386/db_trace.c,v 1.12 2006/05/25 04:17:07 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/i386/db_trace.c,v 1.13 2006/05/25 07:36:33 dillon Exp $ */ #include @@ -293,11 +293,7 @@ db_stack_trace_cmd(db_expr_t addr, boolean_t have_addr, db_expr_t count, (SP_REGS(&ddb_regs) - 4); callpc = PC_REGS(&ddb_regs); } else { - - FOREACH_PROC_IN_SYSTEM(p) { - if (p->p_pid == pid) - break; - } + p = pfind(pid); if (p == NULL) { db_printf("pid %d not found\n", pid); return; diff --git a/sys/sys/proc.h b/sys/sys/proc.h index 639dd4fd10..28792113eb 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -37,7 +37,7 @@ * * @(#)proc.h 8.15 (Berkeley) 5/19/95 * $FreeBSD: src/sys/sys/proc.h,v 1.99.2.9 2003/06/06 20:21:32 tegge Exp $ - * $DragonFly: src/sys/sys/proc.h,v 1.78 2006/05/24 18:59:50 dillon Exp $ + * $DragonFly: src/sys/sys/proc.h,v 1.79 2006/05/25 07:36:36 dillon Exp $ */ #ifndef _SYS_PROC_H_ @@ -376,11 +376,6 @@ MALLOC_DECLARE(M_PARGS); #define PRISON_CHECK(cr1, cr2) \ ((!(cr1)->cr_prison) || (cr1)->cr_prison == (cr2)->cr_prison) -/* - * Handy macro for LISTs. - */ -#define FOREACH_PROC_IN_SYSTEM(p) LIST_FOREACH((p), &allproc, p_list) - /* * We use process IDs <= PID_MAX; PID_MAX + 1 must also fit in a pid_t, * as it is used to represent "no process group". diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c index c6bfebcb8d..37404e9ebd 100644 --- a/sys/vm/vm_glue.c +++ b/sys/vm/vm_glue.c @@ -60,7 +60,7 @@ * rights to redistribute these changes. * * $FreeBSD: src/sys/vm/vm_glue.c,v 1.94.2.4 2003/01/13 22:51:17 dillon Exp $ - * $DragonFly: src/sys/vm/vm_glue.c,v 1.40 2006/04/25 16:22:32 dillon Exp $ + * $DragonFly: src/sys/vm/vm_glue.c,v 1.41 2006/05/25 07:36:37 dillon Exp $ */ #include "opt_vm.h" @@ -356,15 +356,18 @@ faultin(struct proc *p) * time, it will be swapped in anyway. */ -/* ARGSUSED*/ +struct scheduler_info { + struct proc *pp; + int ppri; +}; + +static int scheduler_callback(struct proc *p, void *data); + static void scheduler(void *dummy) { + struct scheduler_info info; struct proc *p; - struct proc *pp; - int pri; - int ppri; - segsz_t pgs; KKASSERT(!IN_CRITICAL_SECT(curthread)); loop: @@ -380,36 +383,9 @@ loop: /* * Look for a good candidate to wake up */ - pp = NULL; - ppri = INT_MIN; - for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) { - if (p->p_flag & P_SWAPWAIT) { - pri = p->p_swtime + p->p_slptime - p->p_nice * 8; - - /* - * The more pages paged out while we were swapped, - * the more work we have to do to get up and running - * again and the lower our wakeup priority. - * - * Each second of sleep time is worth ~1MB - */ - pgs = vmspace_resident_count(p->p_vmspace); - if (pgs < p->p_vmspace->vm_swrss) { - pri -= (p->p_vmspace->vm_swrss - pgs) / - (1024 * 1024 / PAGE_SIZE); - } - - /* - * if this process is higher priority and there is - * enough space, then select this process instead of - * the previous selection. - */ - if (pri > ppri) { - pp = p; - ppri = pri; - } - } - } + info.pp = NULL; + info.ppri = INT_MIN; + allproc_scan(scheduler_callback, &info); /* * Nothing to do, back to sleep for at least 1/10 of a second. If @@ -417,7 +393,7 @@ loop: * multiple requests have built up the first is processed * immediately and the rest are staggered. */ - if ((p = pp) == NULL) { + if ((p = info.pp) == NULL) { tsleep(&proc0, 0, "nowork", hz / 10); if (scheduler_notify == 0) tsleep(&scheduler_notify, 0, "nowork", 0); @@ -433,10 +409,50 @@ loop: */ faultin(p); p->p_swtime = 0; + PRELE(p); tsleep(&proc0, 0, "swapin", hz / 10); goto loop; } +static int +scheduler_callback(struct proc *p, void *data) +{ + struct scheduler_info *info = data; + segsz_t pgs; + int pri; + + if (p->p_flag & P_SWAPWAIT) { + pri = p->p_swtime + p->p_slptime - p->p_nice * 8; + + /* + * The more pages paged out while we were swapped, + * the more work we have to do to get up and running + * again and the lower our wakeup priority. + * + * Each second of sleep time is worth ~1MB + */ + pgs = vmspace_resident_count(p->p_vmspace); + if (pgs < p->p_vmspace->vm_swrss) { + pri -= (p->p_vmspace->vm_swrss - pgs) / + (1024 * 1024 / PAGE_SIZE); + } + + /* + * If this process is higher priority and there is + * enough space, then select this process instead of + * the previous selection. + */ + if (pri > info->ppri) { + if (info->pp) + PRELE(info->pp); + PHOLD(p); + info->pp = p; + info->ppri = pri; + } + } + return(0); +} + void swapin_request(void) { @@ -479,67 +495,68 @@ SYSCTL_INT(_vm, OID_AUTO, swap_idle_threshold2, * they are swapped. Else, we swap the longest-sleeping or stopped process, * if any, otherwise the longest-resident process. */ + +static int swapout_procs_callback(struct proc *p, void *data); + void swapout_procs(int action) { - struct proc *p; - struct proc *outp, *outp2; - int outpri, outpri2; - - outp = outp2 = NULL; - outpri = outpri2 = INT_MIN; -retry: - for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) { - struct vmspace *vm; - if (!swappable(p)) - continue; - - vm = p->p_vmspace; - - if (p->p_stat == SSLEEP || p->p_stat == SRUN) { - /* - * do not swap out a realtime process - */ - if (RTP_PRIO_IS_REALTIME(p->p_lwp.lwp_rtprio.type)) - continue; - - /* - * Guarentee swap_idle_threshold time in memory - */ - if (p->p_slptime < swap_idle_threshold1) - continue; - - /* - * If the system is under memory stress, or if we - * are swapping idle processes >= swap_idle_threshold2, - * then swap the process out. - */ - if (((action & VM_SWAP_NORMAL) == 0) && - (((action & VM_SWAP_IDLE) == 0) || - (p->p_slptime < swap_idle_threshold2))) { - continue; - } + allproc_scan(swapout_procs_callback, &action); +} - ++vm->vm_refcnt; - - /* - * If the process has been asleep for awhile, swap - * it out. - */ - if ((action & VM_SWAP_NORMAL) || - ((action & VM_SWAP_IDLE) && - (p->p_slptime > swap_idle_threshold2))) { - swapout(p); - vmspace_free(vm); - goto retry; - } +static int +swapout_procs_callback(struct proc *p, void *data) +{ + struct vmspace *vm; + int action = *(int *)data; + + if (!swappable(p)) + return(0); + + vm = p->p_vmspace; + + if (p->p_stat == SSLEEP || p->p_stat == SRUN) { + /* + * do not swap out a realtime process + */ + if (RTP_PRIO_IS_REALTIME(p->p_lwp.lwp_rtprio.type)) + return(0); - /* - * cleanup our reference - */ - vmspace_free(vm); + /* + * Guarentee swap_idle_threshold time in memory + */ + if (p->p_slptime < swap_idle_threshold1) + return(0); + + /* + * If the system is under memory stress, or if we + * are swapping idle processes >= swap_idle_threshold2, + * then swap the process out. + */ + if (((action & VM_SWAP_NORMAL) == 0) && + (((action & VM_SWAP_IDLE) == 0) || + (p->p_slptime < swap_idle_threshold2))) { + return(0); } + + ++vm->vm_refcnt; + + /* + * If the process has been asleep for awhile, swap + * it out. + */ + if ((action & VM_SWAP_NORMAL) || + ((action & VM_SWAP_IDLE) && + (p->p_slptime > swap_idle_threshold2))) { + swapout(p); + } + + /* + * cleanup our reference + */ + vmspace_free(vm); } + return(0); } static void diff --git a/sys/vm/vm_meter.c b/sys/vm/vm_meter.c index 07ce932c9c..d0f12fffe4 100644 --- a/sys/vm/vm_meter.c +++ b/sys/vm/vm_meter.c @@ -32,7 +32,7 @@ * * @(#)vm_meter.c 8.4 (Berkeley) 1/4/94 * $FreeBSD: src/sys/vm/vm_meter.c,v 1.34.2.7 2002/10/10 19:28:22 dillon Exp $ - * $DragonFly: src/sys/vm/vm_meter.c,v 1.8 2005/11/14 18:50:15 dillon Exp $ + * $DragonFly: src/sys/vm/vm_meter.c,v 1.9 2006/05/25 07:36:37 dillon Exp $ */ #include @@ -76,73 +76,32 @@ SYSCTL_UINT(_vm, OID_AUTO, v_free_severe, SYSCTL_STRUCT(_vm, VM_LOADAVG, loadavg, CTLFLAG_RD, &averunnable, loadavg, "Machine loadaverage history"); +static int do_vmtotal_callback(struct proc *p, void *data); + static int do_vmtotal(SYSCTL_HANDLER_ARGS) { - struct proc *p; - struct vmtotal total, *totalp; - vm_map_entry_t entry; + struct vmtotal total; + struct vmtotal *totalp; vm_object_t object; - vm_map_t map; - int paging; totalp = &total; bzero(totalp, sizeof *totalp); + /* * Mark all objects as inactive. */ for (object = TAILQ_FIRST(&vm_object_list); object != NULL; - object = TAILQ_NEXT(object,object_list)) + object = TAILQ_NEXT(object,object_list)) { vm_object_clear_flag(object, OBJ_ACTIVE); + } + /* * Calculate process statistics. */ - for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) { - if (p->p_flag & P_SYSTEM) - continue; - switch (p->p_stat) { - case 0: - continue; + allproc_scan(do_vmtotal_callback, totalp); - case SSLEEP: - if ((p->p_flag & P_SWAPPEDOUT) == 0) { - if ((p->p_flag & P_SINTR) == 0) - totalp->t_dw++; - else if (p->p_slptime < maxslp) - totalp->t_sl++; - } else if (p->p_slptime < maxslp) { - totalp->t_sw++; - } - if (p->p_slptime >= maxslp) - continue; - break; - - case SRUN: - case SIDL: - if (p->p_flag & P_SWAPPEDOUT) - totalp->t_sw++; - else - totalp->t_rq++; - if (p->p_stat == SIDL) - continue; - break; - } - /* - * Note active objects. - */ - paging = 0; - for (map = &p->p_vmspace->vm_map, entry = map->header.next; - entry != &map->header; entry = entry->next) { - if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) || - entry->object.vm_object == NULL) - continue; - vm_object_set_flag(entry->object.vm_object, OBJ_ACTIVE); - paging |= entry->object.vm_object->paging_in_progress; - } - if (paging) - totalp->t_pw++; - } /* * Calculate object memory usage statistics. */ @@ -174,6 +133,61 @@ do_vmtotal(SYSCTL_HANDLER_ARGS) return (sysctl_handle_opaque(oidp, totalp, sizeof total, req)); } +static int +do_vmtotal_callback(struct proc *p, void *data) +{ + struct vmtotal *totalp = data; + vm_map_entry_t entry; + vm_map_t map; + int paging; + + if (p->p_flag & P_SYSTEM) + return(0); + + switch (p->p_stat) { + case 0: + return(0); + case SSLEEP: + if ((p->p_flag & P_SWAPPEDOUT) == 0) { + if ((p->p_flag & P_SINTR) == 0) + totalp->t_dw++; + else if (p->p_slptime < maxslp) + totalp->t_sl++; + } else if (p->p_slptime < maxslp) { + totalp->t_sw++; + } + if (p->p_slptime >= maxslp) + return(0); + break; + + case SRUN: + case SIDL: + if (p->p_flag & P_SWAPPEDOUT) + totalp->t_sw++; + else + totalp->t_rq++; + if (p->p_stat == SIDL) + return(0); + break; + } + /* + * Note active objects. + */ + paging = 0; + for (map = &p->p_vmspace->vm_map, entry = map->header.next; + entry != &map->header; entry = entry->next) { + if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) || + entry->object.vm_object == NULL) + continue; + vm_object_set_flag(entry->object.vm_object, OBJ_ACTIVE); + paging |= entry->object.vm_object->paging_in_progress; + } + if (paging) + totalp->t_pw++; + return(0); +} + + static int do_vmstats(SYSCTL_HANDLER_ARGS) { diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c index 57df6e7969..e14b7ba9ac 100644 --- a/sys/vm/vm_object.c +++ b/sys/vm/vm_object.c @@ -62,7 +62,7 @@ * rights to redistribute these changes. * * $FreeBSD: src/sys/vm/vm_object.c,v 1.171.2.8 2003/05/26 19:17:56 alc Exp $ - * $DragonFly: src/sys/vm/vm_object.c,v 1.24 2006/05/17 17:47:58 dillon Exp $ + * $DragonFly: src/sys/vm/vm_object.c,v 1.25 2006/05/25 07:36:37 dillon Exp $ */ /* @@ -1787,16 +1787,24 @@ _vm_object_in_map(vm_map_t map, vm_object_t object, vm_map_entry_t entry) return 0; } +static int vm_object_in_map_callback(struct proc *p, void *data); + +struct vm_object_in_map_info { + vm_object_t object; + int rv; +}; + static int vm_object_in_map(vm_object_t object) { - struct proc *p; - for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) { - if( !p->p_vmspace /* || (p->p_flag & (P_SYSTEM|P_WEXIT)) */) - continue; - if( _vm_object_in_map(&p->p_vmspace->vm_map, object, 0)) - return 1; - } + struct vm_object_in_map_info info; + + info.rv = 0; + info.object = object; + + allproc_scan(vm_object_in_map_callback, &info); + if (info.rv) + return 1; if( _vm_object_in_map( kernel_map, object, 0)) return 1; if( _vm_object_in_map( pager_map, object, 0)) @@ -1806,6 +1814,20 @@ vm_object_in_map(vm_object_t object) return 0; } +static int +vm_object_in_map_callback(struct proc *p, void *data) +{ + struct vm_object_in_map_info *info = data; + + if (p->p_vmspace) { + if (_vm_object_in_map(&p->p_vmspace->vm_map, info->object, 0)) { + info->rv = 1; + return -1; + } + } + return (0); +} + DB_SHOW_COMMAND(vmochk, vm_object_check) { vm_object_t object; diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c index aa243eb0f1..91e77f022c 100644 --- a/sys/vm/vm_pageout.c +++ b/sys/vm/vm_pageout.c @@ -66,7 +66,7 @@ * rights to redistribute these changes. * * $FreeBSD: src/sys/vm/vm_pageout.c,v 1.151.2.15 2002/12/29 18:21:04 dillon Exp $ - * $DragonFly: src/sys/vm/vm_pageout.c,v 1.22 2006/05/23 01:21:48 dillon Exp $ + * $DragonFly: src/sys/vm/vm_pageout.c,v 1.23 2006/05/25 07:36:37 dillon Exp $ */ /* @@ -648,15 +648,22 @@ vm_pageout_page_free(vm_page_t m) { /* * vm_pageout_scan does the dirty work for the pageout daemon. */ + +struct vm_pageout_scan_info { + struct proc *bigproc; + vm_offset_t bigsize; +}; + +static int vm_pageout_scan_callback(struct proc *p, void *data); + static void vm_pageout_scan(int pass) { + struct vm_pageout_scan_info info; vm_page_t m, next; struct vm_page marker; int page_shortage, maxscan, pcount; int addl_page_shortage, addl_page_shortage_init; - struct proc *p, *bigproc; - vm_offset_t size, bigsize; vm_object_t object; int actcount; int vnodes_skipped = 0; @@ -1157,46 +1164,61 @@ rescan0: #if 0 if ((vm_swap_size < 64 || swap_pager_full) && vm_page_count_min()) { #endif - bigproc = NULL; - bigsize = 0; - for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) { - /* - * if this is a system process, skip it - */ - if ((p->p_flag & P_SYSTEM) || (p->p_pid == 1) || - ((p->p_pid < 48) && (vm_swap_size != 0))) { - continue; - } - /* - * if the process is in a non-running type state, - * don't touch it. - */ - if (p->p_stat != SRUN && p->p_stat != SSLEEP) { - continue; - } - /* - * get the process size - */ - size = vmspace_resident_count(p->p_vmspace) + - vmspace_swap_count(p->p_vmspace); - /* - * if the this process is bigger than the biggest one - * remember it. - */ - if (size > bigsize) { - bigproc = p; - bigsize = size; - } - } - if (bigproc != NULL) { - killproc(bigproc, "out of swap space"); - bigproc->p_nice = PRIO_MIN; - bigproc->p_usched->resetpriority(&bigproc->p_lwp); + info.bigproc = NULL; + info.bigsize = 0; + allproc_scan(vm_pageout_scan_callback, &info); + if (info.bigproc != NULL) { + killproc(info.bigproc, "out of swap space"); + info.bigproc->p_nice = PRIO_MIN; + info.bigproc->p_usched->resetpriority(&info.bigproc->p_lwp); wakeup(&vmstats.v_free_count); + PRELE(info.bigproc); } } } +static int +vm_pageout_scan_callback(struct proc *p, void *data) +{ + struct vm_pageout_scan_info *info = data; + vm_offset_t size; + + /* + * if this is a system process, skip it + */ + if ((p->p_flag & P_SYSTEM) || (p->p_pid == 1) || + ((p->p_pid < 48) && (vm_swap_size != 0))) { + return (0); + } + + /* + * if the process is in a non-running type state, + * don't touch it. + */ + if (p->p_stat != SRUN && p->p_stat != SSLEEP) { + return (0); + } + + /* + * get the process size + */ + size = vmspace_resident_count(p->p_vmspace) + + vmspace_swap_count(p->p_vmspace); + + /* + * If the this process is bigger than the biggest one + * remember it. + */ + if (size > info->bigsize) { + if (info->bigproc) + PRELE(info->bigproc); + PHOLD(p); + info->bigproc = p; + info->bigsize = size; + } + return(0); +} + /* * This routine tries to maintain the pseudo LRU active queue, * so that during long periods of time where there is no paging, @@ -1457,11 +1479,11 @@ vm_req_vmdaemon(void) } } +static int vm_daemon_callback(struct proc *p, void *data __unused); + static void vm_daemon(void) { - struct proc *p; - while (TRUE) { tsleep(&vm_daemon_needed, 0, "psleep", 0); if (vm_pageout_req_swapout) { @@ -1472,45 +1494,49 @@ vm_daemon(void) * scan the processes for exceeding their rlimits or if * process is swapped out -- deactivate pages */ + allproc_scan(vm_daemon_callback, NULL); + } +} - for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) { - vm_pindex_t limit, size; +static int +vm_daemon_callback(struct proc *p, void *data __unused) +{ + vm_pindex_t limit, size; - /* - * if this is a system process or if we have already - * looked at this process, skip it. - */ - if (p->p_flag & (P_SYSTEM | P_WEXIT)) { - continue; - } - /* - * if the process is in a non-running type state, - * don't touch it. - */ - if (p->p_stat != SRUN && p->p_stat != SSLEEP) { - continue; - } - /* - * get a limit - */ - limit = OFF_TO_IDX( - qmin(p->p_rlimit[RLIMIT_RSS].rlim_cur, - p->p_rlimit[RLIMIT_RSS].rlim_max)); + /* + * if this is a system process or if we have already + * looked at this process, skip it. + */ + if (p->p_flag & (P_SYSTEM | P_WEXIT)) + return (0); - /* - * let processes that are swapped out really be - * swapped out. Set the limit to nothing to get as - * many pages out to swap as possible. - */ - if (p->p_flag & P_SWAPPEDOUT) - limit = 0; + /* + * if the process is in a non-running type state, + * don't touch it. + */ + if (p->p_stat != SRUN && p->p_stat != SSLEEP) + return (0); - size = vmspace_resident_count(p->p_vmspace); - if (limit >= 0 && size >= limit) { - vm_pageout_map_deactivate_pages( - &p->p_vmspace->vm_map, limit); - } - } + /* + * get a limit + */ + limit = OFF_TO_IDX(qmin(p->p_rlimit[RLIMIT_RSS].rlim_cur, + p->p_rlimit[RLIMIT_RSS].rlim_max)); + + /* + * let processes that are swapped out really be + * swapped out. Set the limit to nothing to get as + * many pages out to swap as possible. + */ + if (p->p_flag & P_SWAPPEDOUT) + limit = 0; + + size = vmspace_resident_count(p->p_vmspace); + if (limit >= 0 && size >= limit) { + vm_pageout_map_deactivate_pages( + &p->p_vmspace->vm_map, limit); } + return (0); } + #endif