From: Matthew Dillon Date: Sat, 12 Feb 2011 22:20:09 +0000 (-0800) Subject: kernel - Make most of the fork and exit paths MPSAFE X-Git-Tag: v2.11.0~267^2~87 X-Git-Url: https://gitweb.dragonflybsd.org/dragonfly.git/commitdiff_plain/b5c4d81fee4abb297f1e3c6e6514c4bfc3c68bc0 kernel - Make most of the fork and exit paths MPSAFE * Remove the MP lock from numerous system calls (mainly socket calls) that no longer need it. * Use proc_token in a couple of places that still need work (instead of the MP lock). For example, the process group (pgrp) and several places which call pfind() still need to use the proc_token. * Use the per-process p->p_token in fork1(), exit1(), and lwp_exit(). The critical portions of these paths now have significant concurrency. * Use the per-process p->p_token when traversing p->p_children, primarily aiding the kern_wait() code. So the wait*() system calls should now have significant concurrency. * Change the fgetown() API to avoid certain races. * Add M_ZERO to the struct filedesc_to_leader allocation for safety purposes. --- diff --git a/sys/dev/drm/drm_drv.c b/sys/dev/drm/drm_drv.c index e407084add..8ddd75356d 100644 --- a/sys/dev/drm/drm_drv.c +++ b/sys/dev/drm/drm_drv.c @@ -732,7 +732,7 @@ int drm_ioctl(struct dev_ioctl_args *ap) return fsetown(*(int *)data, &dev->buf_sigio); case FIOGETOWN: - *(int *) data = fgetown(dev->buf_sigio); + *(int *) data = fgetown(&dev->buf_sigio); return 0; } diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c index ef81c41455..9c469a3778 100644 --- a/sys/kern/kern_descrip.c +++ b/sys/kern/kern_descrip.c @@ -104,7 +104,6 @@ #include #include #include -#include static void fsetfd_locked(struct filedesc *fdp, struct file *fp, int fd); static void fdreserve_locked (struct filedesc *fdp, int fd0, int incr); @@ -269,7 +268,6 @@ kern_fcntl(int fd, int cmd, union fcntl_dat *dat, struct ucred *cred) if ((fp = holdfp(p->p_fd, fd, -1)) == NULL) return (EBADF); - get_mplock(); switch (cmd) { case F_GETFL: dat->fc_flags = OFLAGS(fp->f_flag); @@ -387,7 +385,6 @@ kern_fcntl(int fd, int cmd, union fcntl_dat *dat, struct ucred *cred) error = EINVAL; break; } - rel_mplock(); fdrop(fp); return (error); @@ -644,15 +641,16 @@ retry: * If sigio is on the list associated with a process or process group, * disable signalling from the device, remove sigio from the list and * free sigio. + * + * MPSAFE */ void funsetown(struct sigio *sigio) { if (sigio == NULL) return; - crit_enter(); + lwkt_gettoken(&proc_token); *(sigio->sio_myref) = NULL; - crit_exit(); if (sigio->sio_pgid < 0) { SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio, sigio, sio_pgsigio); @@ -660,18 +658,25 @@ funsetown(struct sigio *sigio) SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio, sigio, sio_pgsigio); } + lwkt_reltoken(&proc_token); crfree(sigio->sio_ucred); kfree(sigio, M_SIGIO); } -/* Free a list of sigio structures. */ +/* + * Free a list of sigio structures. + * + * MPSAFE + */ void funsetownlst(struct sigiolst *sigiolst) { struct sigio *sigio; + lwkt_gettoken(&proc_token); while ((sigio = SLIST_FIRST(sigiolst)) != NULL) funsetown(sigio); + lwkt_reltoken(&proc_token); } /* @@ -679,6 +684,8 @@ funsetownlst(struct sigiolst *sigiolst) * * After permission checking, add a sigio structure to the sigio list for * the process or process group. + * + * MPSAFE */ int fsetown(pid_t pgid, struct sigio **sigiop) @@ -686,15 +693,20 @@ fsetown(pid_t pgid, struct sigio **sigiop) struct proc *proc; struct pgrp *pgrp; struct sigio *sigio; + int error; if (pgid == 0) { funsetown(*sigiop); return (0); } + + lwkt_gettoken(&proc_token); if (pgid > 0) { proc = pfind(pgid); - if (proc == NULL) - return (ESRCH); + if (proc == NULL) { + error = ESRCH; + goto done; + } /* * Policy - Don't allow a process to FSETOWN a process @@ -704,14 +716,18 @@ fsetown(pid_t pgid, struct sigio **sigiop) * restrict FSETOWN to the current process or process * group for maximum safety. */ - if (proc->p_session != curproc->p_session) - return (EPERM); + if (proc->p_session != curproc->p_session) { + error = EPERM; + goto done; + } pgrp = NULL; } else /* if (pgid < 0) */ { pgrp = pgfind(-pgid); - if (pgrp == NULL) - return (ESRCH); + if (pgrp == NULL) { + error = ESRCH; + goto done; + } /* * Policy - Don't allow a process to FSETOWN a process @@ -721,8 +737,10 @@ fsetown(pid_t pgid, struct sigio **sigiop) * restrict FSETOWN to the current process or process * group for maximum safety. */ - if (pgrp->pg_session != curproc->p_session) - return (EPERM); + if (pgrp->pg_session != curproc->p_session) { + error = EPERM; + goto done; + } proc = NULL; } @@ -740,19 +758,30 @@ fsetown(pid_t pgid, struct sigio **sigiop) /* It would be convenient if p_ruid was in ucred. */ sigio->sio_ruid = sigio->sio_ucred->cr_ruid; sigio->sio_myref = sigiop; - crit_enter(); *sigiop = sigio; - crit_exit(); - return (0); + error = 0; +done: + lwkt_reltoken(&proc_token); + return (error); } /* * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg). + * + * MPSAFE */ pid_t -fgetown(struct sigio *sigio) +fgetown(struct sigio **sigiop) { - return (sigio != NULL ? sigio->sio_pgid : 0); + struct sigio *sigio; + pid_t own; + + lwkt_gettoken(&proc_token); + sigio = *sigiop; + own = (sigio != NULL ? sigio->sio_pgid : 0); + lwkt_reltoken(&proc_token); + + return (own); } /* @@ -898,9 +927,7 @@ sys_shutdown(struct shutdown_args *uap) { int error; - get_mplock(); error = kern_shutdown(uap->s, uap->how); - rel_mplock(); return (error); } @@ -974,9 +1001,7 @@ sys_fpathconf(struct fpathconf_args *uap) case DTYPE_FIFO: case DTYPE_VNODE: vp = (struct vnode *)fp->f_data; - get_mplock(); error = VOP_PATHCONF(vp, uap->name, &uap->sysmsg_reg); - rel_mplock(); break; default: error = EOPNOTSUPP; @@ -2317,7 +2342,6 @@ closef(struct file *fp, struct proc *p) if (p != NULL && fp->f_type == DTYPE_VNODE && (((struct vnode *)fp->f_data)->v_flag & VMAYHAVELOCKS) ) { - get_mplock(); if ((p->p_leader->p_flag & P_ADVLOCK) != 0) { lf.l_whence = SEEK_SET; lf.l_start = 0; @@ -2329,6 +2353,7 @@ closef(struct file *fp, struct proc *p) } fdtol = p->p_fdtol; if (fdtol != NULL) { + lwkt_gettoken(&p->p_token); /* * Handle special case where file descriptor table * is shared between multiple process leaders. @@ -2355,8 +2380,8 @@ closef(struct file *fp, struct proc *p) wakeup(fdtol); } } + lwkt_reltoken(&p->p_token); } - rel_mplock(); } return (fdrop(fp)); } @@ -2398,7 +2423,6 @@ fdrop(struct file *fp) return (0); KKASSERT(SLIST_FIRST(&fp->f_klist) == NULL); - get_mplock(); /* * The last reference has gone away, we own the fp structure free @@ -2421,7 +2445,6 @@ fdrop(struct file *fp) else error = 0; ffree(fp); - rel_mplock(); return (error); } @@ -2444,7 +2467,6 @@ sys_flock(struct flock_args *uap) if ((fp = holdfp(p->p_fd, uap->fd, -1)) == NULL) return (EBADF); - get_mplock(); if (fp->f_type != DTYPE_VNODE) { error = EOPNOTSUPP; goto done; @@ -2473,7 +2495,6 @@ sys_flock(struct flock_args *uap) else error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_WAIT); done: - rel_mplock(); fdrop(fp); return (error); } @@ -2593,7 +2614,7 @@ filedesc_to_leader_alloc(struct filedesc_to_leader *old, struct filedesc_to_leader *fdtol; fdtol = kmalloc(sizeof(struct filedesc_to_leader), - M_FILEDESC_TO_LEADER, M_WAITOK); + M_FILEDESC_TO_LEADER, M_WAITOK | M_ZERO); fdtol->fdl_refcount = 1; fdtol->fdl_holdcount = 0; fdtol->fdl_wakeup = 0; diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c index b41f36f8a0..c6f6125bd1 100644 --- a/sys/kern/kern_exit.c +++ b/sys/kern/kern_exit.c @@ -90,6 +90,8 @@ static void killlwps(struct lwp *lp); static MALLOC_DEFINE(M_ATEXIT, "atexit", "atexit callback"); static MALLOC_DEFINE(M_ZOMBIE, "zombie", "zombie proc status"); +static struct lwkt_token deadlwp_token = LWKT_TOKEN_INITIALIZER(deadlwp_token); + /* * callout list for things to do at exit time */ @@ -112,8 +114,6 @@ struct lwplist deadlwp_list[MAXCPU]; * Death of process. * * SYS_EXIT_ARGS(int rval) - * - * MPALMOSTSAFE */ int sys_exit(struct exit_args *uap) @@ -131,6 +131,7 @@ sys_exit(struct exit_args *uap) int sys_extexit(struct extexit_args *uap) { + struct proc *p = curproc; int action, who; int error; @@ -158,7 +159,7 @@ sys_extexit(struct extexit_args *uap) return (EINVAL); } - get_mplock(); + lwkt_gettoken(&p->p_token); switch (who) { case EXTEXIT_LWP: @@ -168,20 +169,21 @@ sys_extexit(struct extexit_args *uap) * later, otherwise the proc will be an UNDEAD and not even a * SZOMB! */ - if (curproc->p_nthreads > 1) { - lwp_exit(0); + if (p->p_nthreads > 1) { + lwp_exit(0); /* called w/ p_token held */ /* NOT REACHED */ } /* else last lwp in proc: do the real thing */ /* FALLTHROUGH */ default: /* to help gcc */ case EXTEXIT_PROC: + lwkt_reltoken(&p->p_token); exit1(W_EXITCODE(uap->status, 0)); /* NOTREACHED */ } /* NOTREACHED */ - rel_mplock(); /* safety */ + lwkt_reltoken(&p->p_token); /* safety */ } /* @@ -276,14 +278,13 @@ exit1(int rv) struct exitlist *ep; int error; + lwkt_gettoken(&p->p_token); + if (p->p_pid == 1) { kprintf("init died (signal %d, exit %d)\n", WTERMSIG(rv), WEXITSTATUS(rv)); panic("Going nowhere without my init!"); } - - get_mplock(); - varsymset_clean(&p->p_varsymset); lockuninit(&p->p_varsymset.vx_lock); /* @@ -566,6 +567,8 @@ exit1(int rv) /* * Eventually called by every exiting LWP + * + * p->p_token must be held. mplock may be held and will be released. */ void lwp_exit(int masterexit) @@ -578,6 +581,7 @@ lwp_exit(int masterexit) * lwp_exit() may be called without setting LWP_WEXIT, so * make sure it is set here. */ + ASSERT_LWKT_TOKEN_HELD(&p->p_token); lp->lwp_flag |= LWP_WEXIT; /* @@ -637,14 +641,22 @@ lwp_exit(int masterexit) --p->p_nthreads; if (p->p_nthreads <= 1) wakeup(&p->p_nthreads); + lwkt_gettoken(&deadlwp_token); LIST_INSERT_HEAD(&deadlwp_list[mycpuid], lp, u.lwp_reap_entry); taskqueue_enqueue(taskqueue_thread[mycpuid], deadlwp_task[mycpuid]); + lwkt_reltoken(&deadlwp_token); } else { --p->p_nthreads; if (p->p_nthreads <= 1) wakeup(&p->p_nthreads); } + + /* + * Release p_token. The mp_token may also be held and we depend on + * the lwkt_switch() code to clean it up. + */ + lwkt_reltoken(&p->p_token); cpu_lwp_exit(); } @@ -758,11 +770,12 @@ kern_wait(pid_t pid, int *status, int options, struct rusage *rusage, int *res) pid = -q->p_pgid; if (options &~ (WUNTRACED|WNOHANG|WCONTINUED|WLINUXCLONE)) return (EINVAL); - get_mplock(); + + lwkt_gettoken(&q->p_token); loop: /* - * Hack for backwards compatibility with badly written user code. - * Or perhaps we have to do this anyway, it is unclear. XXX + * All sorts of things can change due to blocking so we have to loop + * all the way back up here. * * The problem is that if a process group is stopped and the parent * is doing a wait*(..., WUNTRACED, ...), it will see the STOP @@ -773,17 +786,19 @@ loop: * * Previously the CONT would overwrite the STOP because the tstop * was handled within tsleep(), and the parent would only see - * the CONT when both are stopped and continued together. This litte + * the CONT when both are stopped and continued together. This little * two-line hack restores this effect. */ while (q->p_stat == SSTOP) tstop(); nfound = 0; + LIST_FOREACH(p, &q->p_children, p_sibling) { if (pid != WAIT_ANY && - p->p_pid != pid && p->p_pgid != -pid) + p->p_pid != pid && p->p_pgid != -pid) { continue; + } /* * This special case handles a kthread spawned by linux_clone @@ -806,6 +821,7 @@ loop: * the master thread, otherwise we may race reaping * non-master threads. */ + lwkt_gettoken(&p->p_token); while (p->p_nthreads > 0) { tsleep(&p->p_nthreads, 0, "lwpzomb", hz); } @@ -824,6 +840,7 @@ loop: reaplwp(lp); } KKASSERT(p->p_nthreads == 0); + lwkt_reltoken(&p->p_token); /* * Don't do anything really bad until all references @@ -839,12 +856,6 @@ loop: while (p->p_lock) tsleep(p, 0, "reap3", hz); - /* scheduling hook for heuristic */ - /* XXX no lwp available, we need a different heuristic */ - /* - p->p_usched->heuristic_exiting(td->td_lwp, deadlp); - */ - /* Take care of our return values. */ *res = p->p_pid; if (status) @@ -940,25 +951,29 @@ loop: error = tsleep((caddr_t)q, PCATCH, "wait", 0); if (error) { done: - rel_mplock(); + lwkt_reltoken(&q->p_token); return (error); } goto loop; } /* - * make process 'parent' the new parent of process 'child'. + * Make process 'parent' the new parent of process 'child'. */ void proc_reparent(struct proc *child, struct proc *parent) { - if (child->p_pptr == parent) return; - + PHOLD(parent); + lwkt_gettoken(&child->p_token); + lwkt_gettoken(&parent->p_token); LIST_REMOVE(child, p_sibling); LIST_INSERT_HEAD(&parent->p_children, child, p_sibling); child->p_pptr = parent; + lwkt_reltoken(&parent->p_token); + lwkt_reltoken(&child->p_token); + PRELE(parent); } /* @@ -1018,12 +1033,12 @@ reaplwps(void *context, int dummy) struct lwplist *lwplist = context; struct lwp *lp; - get_mplock(); + lwkt_gettoken(&deadlwp_token); while ((lp = LIST_FIRST(lwplist))) { LIST_REMOVE(lp, u.lwp_reap_entry); reaplwp(lp); } - rel_mplock(); + lwkt_reltoken(&deadlwp_token); } static void diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c index 8e994757dd..89a8658da2 100644 --- a/sys/kern/kern_fork.c +++ b/sys/kern/kern_fork.c @@ -68,7 +68,6 @@ #include #include #include -#include #include @@ -118,14 +117,12 @@ sys_fork(struct fork_args *uap) struct proc *p2; int error; - get_mplock(); error = fork1(lp, RFFDG | RFPROC | RFPGLOCK, &p2); if (error == 0) { start_forked_proc(lp, p2); uap->sysmsg_fds[0] = p2->p_pid; uap->sysmsg_fds[1] = 0; } - rel_mplock(); return error; } @@ -139,14 +136,12 @@ sys_vfork(struct vfork_args *uap) struct proc *p2; int error; - get_mplock(); error = fork1(lp, RFFDG | RFPROC | RFPPWAIT | RFMEM | RFPGLOCK, &p2); if (error == 0) { start_forked_proc(lp, p2); uap->sysmsg_fds[0] = p2->p_pid; uap->sysmsg_fds[1] = 0; } - rel_mplock(); return error; } @@ -173,7 +168,6 @@ sys_rfork(struct rfork_args *uap) if ((uap->flags & RFKERNELONLY) != 0) return (EINVAL); - get_mplock(); error = fork1(lp, uap->flags | RFPGLOCK, &p2); if (error == 0) { if (p2) @@ -181,7 +175,6 @@ sys_rfork(struct rfork_args *uap) uap->sysmsg_fds[0] = p2 ? p2->p_pid : 0; uap->sysmsg_fds[1] = 0; } - rel_mplock(); return error; } @@ -200,7 +193,7 @@ sys_lwp_create(struct lwp_create_args *uap) if (error) goto fail2; - get_mplock(); + lwkt_gettoken(&p->p_token); plimit_lwp_fork(p); /* force exclusive access */ lp = lwp_fork(curthread->td_lwp, p, RFPROC); error = cpu_prepare_lwp(lp, ¶ms); @@ -219,7 +212,7 @@ sys_lwp_create(struct lwp_create_args *uap) lp->lwp_stat = LSRUN; p->p_usched->setrunqueue(lp); crit_exit(); - rel_mplock(); + lwkt_reltoken(&p->p_token); return (0); @@ -231,7 +224,7 @@ fail: lp->lwp_thread->td_flags |= TDF_EXITING; PHOLD(p); lwp_dispose(lp); - rel_mplock(); + lwkt_reltoken(&p->p_token); fail2: return (error); } @@ -254,6 +247,9 @@ fork1(struct lwp *lp1, int flags, struct proc **procp) if ((flags & (RFFDG|RFCFDG)) == (RFFDG|RFCFDG)) return (EINVAL); + lwkt_gettoken(&p1->p_token); + pgrp = NULL; + /* * Here we don't create a new process, but we divorce * certain parts of a process from itself. @@ -263,8 +259,10 @@ fork1(struct lwp *lp1, int flags, struct proc **procp) * This kind of stunt does not work anymore if * there are native threads (lwps) running */ - if (p1->p_nthreads != 1) - return (EINVAL); + if (p1->p_nthreads != 1) { + error = EINVAL; + goto done; + } vm_fork(p1, 0, flags); @@ -288,7 +286,8 @@ fork1(struct lwp *lp1, int flags, struct proc **procp) } } *procp = NULL; - return (0); + error = 0; + goto done; } /* @@ -301,8 +300,6 @@ fork1(struct lwp *lp1, int flags, struct proc **procp) * and cause the process group lock to be held indefinitely. If * a STOP occurs, the fork will be restarted after the CONT. */ - error = 0; - pgrp = NULL; if ((flags & RFPGLOCK) && (pgrp = p1->p_pgrp) != NULL) { lockmgr(&pgrp->pg_lock, LK_SHARED); if (CURSIG_NOBLOCK(lp1)) { @@ -439,10 +436,13 @@ fork1(struct lwp *lp1, int flags, struct proc **procp) fdtol = NULL; } else { p2->p_fd = fdshare(p1); - if (p1->p_fdtol == NULL) + if (p1->p_fdtol == NULL) { + lwkt_gettoken(&p1->p_token); p1->p_fdtol = filedesc_to_leader_alloc(NULL, p1->p_leader); + lwkt_reltoken(&p1->p_token); + } if ((flags & RFTHREAD) != 0) { /* * Shared file descriptor table and @@ -497,8 +497,12 @@ fork1(struct lwp *lp1, int flags, struct proc **procp) else pptr = p1; p2->p_pptr = pptr; - LIST_INSERT_HEAD(&pptr->p_children, p2, p_sibling); LIST_INIT(&p2->p_children); + + lwkt_gettoken(&pptr->p_token); + LIST_INSERT_HEAD(&pptr->p_children, p2, p_sibling); + lwkt_reltoken(&pptr->p_token); + varsymset_init(&p2->p_varsymset, &p1->p_varsymset); callout_init(&p2->p_ithandle); @@ -571,7 +575,9 @@ fork1(struct lwp *lp1, int flags, struct proc **procp) * Return child proc pointer to parent. */ *procp = p2; + error = 0; done: + lwkt_reltoken(&p1->p_token); if (pgrp) lockmgr(&pgrp->pg_lock, LK_RELEASE); return (error); diff --git a/sys/kern/kern_ktrace.c b/sys/kern/kern_ktrace.c index 4fc5e5119f..d159143458 100644 --- a/sys/kern/kern_ktrace.c +++ b/sys/kern/kern_ktrace.c @@ -394,9 +394,7 @@ sys_utrace(struct utrace_args *uap) if (!copyin(uap->addr, cp, uap->len)) { kth->ktr_buf = cp; kth->ktr_len = uap->len; - get_mplock(); ktrwrite(td->td_lwp, kth, NULL); - rel_mplock(); } FREE(kth, M_KTRACE); FREE(cp, M_KTRACE); @@ -416,9 +414,7 @@ ktrdestroy(struct ktrace_node **tracenodep) if ((tracenode = *tracenodep) != NULL) { *tracenodep = NULL; KKASSERT(tracenode->kn_refs > 0); - /* XXX not MP safe yet */ - --tracenode->kn_refs; - if (tracenode->kn_refs == 0) { + if (atomic_fetchadd_int(&tracenode->kn_refs, -1) == 1) { vn_close(tracenode->kn_vp, FREAD|FWRITE); tracenode->kn_vp = NULL; FREE(tracenode, M_KTRACE); @@ -436,7 +432,7 @@ ktrinherit(ktrace_node_t tracenode) { if (tracenode) { KKASSERT(tracenode->kn_refs > 0); - ++tracenode->kn_refs; + atomic_add_int(&tracenode->kn_refs, 1); } return(tracenode); } @@ -475,29 +471,44 @@ ktrsetchildren(struct thread *td, struct proc *top, int ops, int facs, ktrace_node_t tracenode) { struct proc *p; + struct proc *np; int ret = 0; - p = top; - for (;;) { + np = top; + if (np) { + PHOLD(np); + } + + while ((p = np) != NULL) { + lwkt_gettoken(&p->p_token); ret |= ktrops(td, p, ops, facs, tracenode); + /* * If this process has children, descend to them next, * otherwise do any siblings, and if done with this level, * follow back up the tree (but not past top). */ - if (!LIST_EMPTY(&p->p_children)) - p = LIST_FIRST(&p->p_children); - else for (;;) { - if (p == top) - return (ret); - if (LIST_NEXT(p, p_sibling)) { - p = LIST_NEXT(p, p_sibling); - break; + if (!LIST_EMPTY(&p->p_children)) { + np = LIST_FIRST(&p->p_children); + } else { + for (;;) { + if (p == top) { + np = NULL; + break; + } + if (LIST_NEXT(p, p_sibling)) { + np = LIST_NEXT(p, p_sibling); + break; + } + np = p->p_pptr; } - p = p->p_pptr; } + if (np) + PHOLD(np); + lwkt_reltoken(&p->p_token); + PRELE(p); } - /*NOTREACHED*/ + return (ret); } static void diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c index bcd8034f1e..62bc4884a7 100644 --- a/sys/kern/kern_proc.c +++ b/sys/kern/kern_proc.c @@ -394,12 +394,15 @@ fixjobc(struct proc *p, struct pgrp *pgrp, int entering) { struct pgrp *hispgrp; struct session *mysession; + struct proc *np; /* * Check p's parent to see whether p qualifies its own process * group; if so, adjust count for p's process group. */ lwkt_gettoken(&proc_token); + lwkt_gettoken(&p->p_token); /* p_children scan */ + mysession = pgrp->pg_session; if ((hispgrp = p->p_pptr->p_pgrp) != pgrp && hispgrp->pg_session == mysession) { @@ -414,16 +417,17 @@ fixjobc(struct proc *p, struct pgrp *pgrp, int entering) * their process groups; if so, adjust counts for children's * process groups. */ - LIST_FOREACH(p, &p->p_children, p_sibling) { - if ((hispgrp = p->p_pgrp) != pgrp && + LIST_FOREACH(np, &p->p_children, p_sibling) { + if ((hispgrp = np->p_pgrp) != pgrp && hispgrp->pg_session == mysession && - p->p_stat != SZOMB) { + np->p_stat != SZOMB) { if (entering) hispgrp->pg_jobc++; else if (--hispgrp->pg_jobc == 0) orphanpg(hispgrp); } } + lwkt_reltoken(&p->p_token); lwkt_reltoken(&proc_token); } diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c index e0adbe954e..61f21fe809 100644 --- a/sys/kern/kern_sig.c +++ b/sys/kern/kern_sig.c @@ -2034,6 +2034,7 @@ sigexit(struct lwp *lp, int sig) { struct proc *p = lp->lwp_proc; + lwkt_gettoken(&p->p_token); p->p_acflag |= AXSIG; if (sigprop(sig) & SA_CORE) { lp->lwp_sig = sig; @@ -2053,6 +2054,7 @@ sigexit(struct lwp *lp, int sig) sig &~ WCOREFLAG, sig & WCOREFLAG ? " (core dumped)" : ""); } + lwkt_reltoken(&p->p_token); exit1(W_EXITCODE(0, sig)); /* NOTREACHED */ } diff --git a/sys/kern/subr_log.c b/sys/kern/subr_log.c index f3b738241a..f8d73ade9a 100644 --- a/sys/kern/subr_log.c +++ b/sys/kern/subr_log.c @@ -257,7 +257,7 @@ logioctl(struct dev_ioctl_args *ap) return (fsetown(*(int *)ap->a_data, &logsoftc.sc_sigio)); case FIOGETOWN: - *(int *)ap->a_data = fgetown(logsoftc.sc_sigio); + *(int *)ap->a_data = fgetown(&logsoftc.sc_sigio); break; /* This is deprecated, FIOSETOWN should be used instead. */ @@ -266,7 +266,7 @@ logioctl(struct dev_ioctl_args *ap) /* This is deprecated, FIOGETOWN should be used instead */ case TIOCGPGRP: - *(int *)ap->a_data = -fgetown(logsoftc.sc_sigio); + *(int *)ap->a_data = -fgetown(&logsoftc.sc_sigio); break; default: diff --git a/sys/kern/sys_pipe.c b/sys/kern/sys_pipe.c index 49de412730..6e425dec6c 100644 --- a/sys/kern/sys_pipe.c +++ b/sys/kern/sys_pipe.c @@ -958,24 +958,20 @@ pipe_ioctl(struct file *fp, u_long cmd, caddr_t data, error = 0; break; case FIOSETOWN: - lwkt_gettoken(&proc_token); error = fsetown(*(int *)data, &mpipe->pipe_sigio); - lwkt_reltoken(&proc_token); break; case FIOGETOWN: - *(int *)data = fgetown(mpipe->pipe_sigio); + *(int *)data = fgetown(&mpipe->pipe_sigio); error = 0; break; case TIOCSPGRP: /* This is deprecated, FIOSETOWN should be used instead. */ - lwkt_gettoken(&proc_token); error = fsetown(-(*(int *)data), &mpipe->pipe_sigio); - lwkt_reltoken(&proc_token); break; case TIOCGPGRP: /* This is deprecated, FIOGETOWN should be used instead. */ - *(int *)data = -fgetown(mpipe->pipe_sigio); + *(int *)data = -fgetown(&mpipe->pipe_sigio); error = 0; break; default: @@ -1022,9 +1018,7 @@ pipe_close(struct file *fp) cpipe = (struct pipe *)fp->f_data; fp->f_ops = &badfileops; fp->f_data = NULL; - lwkt_gettoken(&proc_token); funsetown(cpipe->pipe_sigio); - lwkt_reltoken(&proc_token); pipeclose(cpipe); return (0); } diff --git a/sys/kern/sys_socket.c b/sys/kern/sys_socket.c index 504234c5ce..dbda4b4e46 100644 --- a/sys/kern/sys_socket.c +++ b/sys/kern/sys_socket.c @@ -51,7 +51,6 @@ #include #include -#include #include #include @@ -68,7 +67,7 @@ struct fileops socketops = { }; /* - * MPALMOSTSAFE - acquires mplock + * MPSAFE */ int soo_read(struct file *fp, struct uio *uio, struct ucred *cred, int fflags) @@ -77,7 +76,6 @@ soo_read(struct file *fp, struct uio *uio, struct ucred *cred, int fflags) int error; int msgflags; - get_mplock(); so = (struct socket *)fp->f_data; if (fflags & O_FBLOCKING) @@ -90,12 +88,11 @@ soo_read(struct file *fp, struct uio *uio, struct ucred *cred, int fflags) msgflags = 0; error = so_pru_soreceive(so, NULL, uio, NULL, NULL, &msgflags); - rel_mplock(); return (error); } /* - * MPALMOSTSAFE - acquires mplock + * MPSAFE */ int soo_write(struct file *fp, struct uio *uio, struct ucred *cred, int fflags) @@ -104,7 +101,6 @@ soo_write(struct file *fp, struct uio *uio, struct ucred *cred, int fflags) int error; int msgflags; - get_mplock(); so = (struct socket *)fp->f_data; if (fflags & O_FBLOCKING) @@ -117,12 +113,11 @@ soo_write(struct file *fp, struct uio *uio, struct ucred *cred, int fflags) msgflags = 0; error = so_pru_sosend(so, NULL, uio, NULL, NULL, msgflags, uio->uio_td); - rel_mplock(); return (error); } /* - * MPALMOSTSAFE - acquires mplock + * MPSAFE */ int soo_ioctl(struct file *fp, u_long cmd, caddr_t data, @@ -131,7 +126,6 @@ soo_ioctl(struct file *fp, u_long cmd, caddr_t data, struct socket *so; int error; - get_mplock(); so = (struct socket *)fp->f_data; switch (cmd) { @@ -155,14 +149,14 @@ soo_ioctl(struct file *fp, u_long cmd, caddr_t data, error = fsetown(*(int *)data, &so->so_sigio); break; case FIOGETOWN: - *(int *)data = fgetown(so->so_sigio); + *(int *)data = fgetown(&so->so_sigio); error = 0; break; case SIOCSPGRP: error = fsetown(-(*(int *)data), &so->so_sigio); break; case SIOCGPGRP: - *(int *)data = -fgetown(so->so_sigio); + *(int *)data = -fgetown(&so->so_sigio); error = 0; break; case SIOCATMARK: @@ -184,12 +178,11 @@ soo_ioctl(struct file *fp, u_long cmd, caddr_t data, } break; } - rel_mplock(); return (error); } /* - * MPSAFE - acquires mplock + * MPSAFE */ int soo_stat(struct file *fp, struct stat *ub, struct ucred *cred) @@ -218,38 +211,34 @@ soo_stat(struct file *fp, struct stat *ub, struct ucred *cred) } /* - * MPALMOSTSAFE - acquires mplock + * MPSAFE */ int soo_close(struct file *fp) { int error; - get_mplock(); fp->f_ops = &badfileops; if (fp->f_data) error = soclose((struct socket *)fp->f_data, fp->f_flag); else error = 0; fp->f_data = NULL; - rel_mplock(); return (error); } /* - * MPALMOSTSAFE - acquires mplock + * MPSAFE */ int soo_shutdown(struct file *fp, int how) { int error; - get_mplock(); if (fp->f_data) error = soshutdown((struct socket *)fp->f_data, how); else error = 0; - rel_mplock(); return (error); } diff --git a/sys/kern/tty.c b/sys/kern/tty.c index 348d770838..fd86301905 100644 --- a/sys/kern/tty.c +++ b/sys/kern/tty.c @@ -945,7 +945,7 @@ ttioctl(struct tty *tp, u_long cmd, void *data, int flag) lwkt_reltoken(&tty_token); return (ENOTTY); } - *(int *)data = fgetown(tp->t_sigio); + *(int *)data = fgetown(&tp->t_sigio); break; case TIOCEXCL: /* set exclusive use of tty */ diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c index c2955a2db2..7b763286cd 100644 --- a/sys/kern/uipc_syscalls.c +++ b/sys/kern/uipc_syscalls.c @@ -78,7 +78,6 @@ #include #include #include -#include #include #ifdef SCTP @@ -131,10 +130,8 @@ sys_socket(struct socket_args *uap) { int error; - get_mplock(); error = kern_socket(uap->domain, uap->type, uap->protocol, &uap->sysmsg_iresult); - rel_mplock(); return (error); } @@ -170,9 +167,7 @@ sys_bind(struct bind_args *uap) error = getsockaddr(&sa, uap->name, uap->namelen); if (error) return (error); - get_mplock(); error = kern_bind(uap->s, sa); - rel_mplock(); FREE(sa, M_SONAME); return (error); @@ -205,9 +200,7 @@ sys_listen(struct listen_args *uap) { int error; - get_mplock(); error = kern_listen(uap->s, uap->backlog); - rel_mplock(); return (error); } @@ -327,7 +320,7 @@ kern_accept(int s, int fflags, struct sockaddr **name, int *namelen, int *res) KNOTE(&head->so_rcv.ssb_kq.ki_note, 0); if (head->so_sigio != NULL) - fsetown(fgetown(head->so_sigio), &so->so_sigio); + fsetown(fgetown(&head->so_sigio), &so->so_sigio); nfp->f_type = DTYPE_SOCKET; nfp->f_flag = fflag; @@ -394,10 +387,8 @@ sys_accept(struct accept_args *uap) if (error) return (error); - get_mplock(); error = kern_accept(uap->s, 0, &sa, &sa_len, &uap->sysmsg_iresult); - rel_mplock(); if (error == 0) error = copyout(sa, uap->name, sa_len); @@ -408,10 +399,8 @@ sys_accept(struct accept_args *uap) if (sa) FREE(sa, M_SONAME); } else { - get_mplock(); error = kern_accept(uap->s, 0, NULL, 0, &uap->sysmsg_iresult); - rel_mplock(); } return (error); } @@ -434,10 +423,8 @@ sys_extaccept(struct extaccept_args *uap) if (error) return (error); - get_mplock(); error = kern_accept(uap->s, fflags, &sa, &sa_len, &uap->sysmsg_iresult); - rel_mplock(); if (error == 0) error = copyout(sa, uap->name, sa_len); @@ -448,10 +435,8 @@ sys_extaccept(struct extaccept_args *uap) if (sa) FREE(sa, M_SONAME); } else { - get_mplock(); error = kern_accept(uap->s, fflags, NULL, 0, &uap->sysmsg_iresult); - rel_mplock(); } return (error); } @@ -548,9 +533,7 @@ sys_connect(struct connect_args *uap) error = getsockaddr(&sa, uap->name, uap->namelen); if (error) return (error); - get_mplock(); error = kern_connect(uap->s, 0, sa); - rel_mplock(); FREE(sa, M_SONAME); return (error); @@ -571,9 +554,7 @@ sys_extconnect(struct extconnect_args *uap) error = getsockaddr(&sa, uap->name, uap->namelen); if (error) return (error); - get_mplock(); error = kern_connect(uap->s, fflags, sa); - rel_mplock(); FREE(sa, M_SONAME); return (error); @@ -647,9 +628,7 @@ sys_socketpair(struct socketpair_args *uap) { int error, sockv[2]; - get_mplock(); error = kern_socketpair(uap->domain, uap->type, uap->protocol, sockv); - rel_mplock(); if (error == 0) error = copyout(sockv, uap->rsv, sizeof(sockv)); @@ -743,10 +722,8 @@ sys_sendto(struct sendto_args *uap) auio.uio_rw = UIO_WRITE; auio.uio_td = td; - get_mplock(); error = kern_sendmsg(uap->s, sa, &auio, NULL, uap->flags, &uap->sysmsg_szresult); - rel_mplock(); if (sa) FREE(sa, M_SONAME); @@ -819,10 +796,8 @@ sys_sendmsg(struct sendmsg_args *uap) } } - get_mplock(); error = kern_sendmsg(uap->s, sa, &auio, control, uap->flags, &uap->sysmsg_szresult); - rel_mplock(); cleanup: iovec_free(&iov, aiov); @@ -935,10 +910,8 @@ sys_recvfrom(struct recvfrom_args *uap) auio.uio_rw = UIO_READ; auio.uio_td = td; - get_mplock(); error = kern_recvmsg(uap->s, uap->from ? &sa : NULL, &auio, NULL, &uap->flags, &uap->sysmsg_szresult); - rel_mplock(); if (error == 0 && uap->from) { /* note: sa may still be NULL */ @@ -1012,12 +985,10 @@ sys_recvmsg(struct recvmsg_args *uap) flags = uap->flags; - get_mplock(); error = kern_recvmsg(uap->s, (msg.msg_name ? &sa : NULL), &auio, (msg.msg_control ? &control : NULL), &flags, &uap->sysmsg_szresult); - rel_mplock(); /* * Conditionally copyout the name and populate the namelen field. @@ -1134,9 +1105,7 @@ sys_setsockopt(struct setsockopt_args *uap) goto out; } - get_mplock(); error = kern_setsockopt(uap->s, &sopt); - rel_mplock(); out: if (uap->val) kfree(sopt.sopt_val, M_TEMP); @@ -1207,9 +1176,7 @@ sys_getsockopt(struct getsockopt_args *uap) goto out; } - get_mplock(); error = kern_getsockopt(uap->s, &sopt); - rel_mplock(); if (error) goto out; valsize = sopt.sopt_valsize; @@ -1279,9 +1246,7 @@ sys_getsockname(struct getsockname_args *uap) if (error) return (error); - get_mplock(); error = kern_getsockname(uap->fdes, &sa, &sa_len); - rel_mplock(); if (error == 0) error = copyout(sa, uap->asa, sa_len); @@ -1351,9 +1316,7 @@ sys_getpeername(struct getpeername_args *uap) if (error) return (error); - get_mplock(); error = kern_getpeername(uap->fdes, &sa, &sa_len); - rel_mplock(); if (error == 0) error = copyout(sa, uap->asa, sa_len); @@ -1398,11 +1361,9 @@ getsockaddr(struct sockaddr **namp, caddr_t uaddr, size_t len) * Detach a mapped page and release resources back to the system. * We must release our wiring and if the object is ripped out * from under the vm_page we become responsible for freeing the - * page. These routines must be MPSAFE. + * page. * - * XXX HACK XXX TEMPORARY UNTIL WE IMPLEMENT EXT MBUF REFERENCE COUNTING - * - * XXX vm_page_*() routines are not MPSAFE yet, the MP lock is required. + * MPSAFE */ static void sf_buf_mfree(void *arg) @@ -1410,19 +1371,12 @@ sf_buf_mfree(void *arg) struct sf_buf *sf = arg; vm_page_t m; - /* - * XXX vm_page_*() and SFBUF routines not MPSAFE yet. - */ - get_mplock(); - crit_enter(); m = sf_buf_page(sf); if (sf_buf_free(sf) == 0) { vm_page_unwire(m, 0); if (m->wire_count == 0 && m->object == NULL) vm_page_try_to_free(m); } - crit_exit(); - rel_mplock(); } /* @@ -1473,7 +1427,6 @@ sys_sendfile(struct sendfile_args *uap) fdrop(fp); return (EINVAL); } - get_mplock(); vp = (struct vnode *)fp->f_data; vref(vp); fdrop(fp); @@ -1541,7 +1494,6 @@ sys_sendfile(struct sendfile_args *uap) done: if (vp) vrele(vp); - rel_mplock(); if (uap->sbytes != NULL) { sbytes += hdtr_size; copyout(&sbytes, uap->sbytes, sizeof(off_t)); @@ -1551,7 +1503,7 @@ done: int kern_sendfile(struct vnode *vp, int sfd, off_t offset, size_t nbytes, - struct mbuf *mheader, off_t *sbytes, int flags) + struct mbuf *mheader, off_t *sbytes, int flags) { struct thread *td = curthread; struct proc *p = td->td_proc; @@ -1851,7 +1803,6 @@ sys_sctp_peeloff(struct sctp_peeloff_args *uap) if (error) return (error); - get_mplock(); crit_enter(); head = (struct socket *)lfp->f_data; error = sctp_can_peel_off(head, assoc_id); @@ -1891,7 +1842,7 @@ sys_sctp_peeloff(struct sctp_peeloff_args *uap) soclrstate(so, SS_NOFDREF | SS_COMP); /* when clearing NOFDREF */ so->so_head = NULL; if (head->so_sigio != NULL) - fsetown(fgetown(head->so_sigio), &so->so_sigio); + fsetown(fgetown(&head->so_sigio), &so->so_sigio); nfp->f_type = DTYPE_SOCKET; nfp->f_flag = fflag; @@ -1912,7 +1863,6 @@ noconnection: * Release explicitly held references before returning. */ done: - rel_mplock(); if (nfp != NULL) fdrop(nfp); fdrop(lfp); diff --git a/sys/net/bpf.c b/sys/net/bpf.c index 71e67a95cd..0924d57a5d 100644 --- a/sys/net/bpf.c +++ b/sys/net/bpf.c @@ -917,7 +917,7 @@ bpfioctl(struct dev_ioctl_args *ap) break; case FIOGETOWN: - *(int *)ap->a_data = fgetown(d->bd_sigio); + *(int *)ap->a_data = fgetown(&d->bd_sigio); break; /* This is deprecated, FIOSETOWN should be used instead. */ @@ -927,7 +927,7 @@ bpfioctl(struct dev_ioctl_args *ap) /* This is deprecated, FIOGETOWN should be used instead. */ case TIOCGPGRP: - *(int *)ap->a_data = -fgetown(d->bd_sigio); + *(int *)ap->a_data = -fgetown(&d->bd_sigio); break; case BIOCSRSIG: /* Set receive signal */ diff --git a/sys/net/tap/if_tap.c b/sys/net/tap/if_tap.c index bfb36d19dd..d2d09490e0 100644 --- a/sys/net/tap/if_tap.c +++ b/sys/net/tap/if_tap.c @@ -730,7 +730,7 @@ tapioctl(struct dev_ioctl_args *ap) break; case FIOGETOWN: - *(int *)data = fgetown(tp->tap_sigio); + *(int *)data = fgetown(&tp->tap_sigio); break; /* this is deprecated, FIOSETOWN should be used instead */ @@ -740,7 +740,7 @@ tapioctl(struct dev_ioctl_args *ap) /* this is deprecated, FIOGETOWN should be used instead */ case TIOCGPGRP: - *(int *)data = -fgetown(tp->tap_sigio); + *(int *)data = -fgetown(&tp->tap_sigio); break; /* VMware/VMnet port ioctl's */ diff --git a/sys/net/tun/if_tun.c b/sys/net/tun/if_tun.c index 7d57239c8c..5f4e259773 100644 --- a/sys/net/tun/if_tun.c +++ b/sys/net/tun/if_tun.c @@ -504,7 +504,7 @@ tunioctl(struct dev_ioctl_args *ap) return (fsetown(*(int *)ap->a_data, &tp->tun_sigio)); case FIOGETOWN: - *(int *)ap->a_data = fgetown(tp->tun_sigio); + *(int *)ap->a_data = fgetown(&tp->tun_sigio); return (0); /* This is deprecated, FIOSETOWN should be used instead. */ @@ -513,7 +513,7 @@ tunioctl(struct dev_ioctl_args *ap) /* This is deprecated, FIOGETOWN should be used instead. */ case TIOCGPGRP: - *(int *)ap->a_data = -fgetown(tp->tun_sigio); + *(int *)ap->a_data = -fgetown(&tp->tun_sigio); return (0); default: diff --git a/sys/platform/pc32/i386/trap.c b/sys/platform/pc32/i386/trap.c index 1e51fbd4a3..96b8af2bd0 100644 --- a/sys/platform/pc32/i386/trap.c +++ b/sys/platform/pc32/i386/trap.c @@ -253,9 +253,9 @@ recheck: * If the jungle wants us dead, so be it. */ if (lp->lwp_flag & LWP_WEXIT) { - get_mplock(); + lwkt_gettoken(&p->p_token); lwp_exit(0); - rel_mplock(); /* NOT REACHED */ + lwkt_reltoken(&p->p_token); /* NOT REACHED */ } /* diff --git a/sys/platform/pc64/x86_64/trap.c b/sys/platform/pc64/x86_64/trap.c index 6a4eca562b..caea116cec 100644 --- a/sys/platform/pc64/x86_64/trap.c +++ b/sys/platform/pc64/x86_64/trap.c @@ -215,9 +215,9 @@ recheck: * If the jungle wants us dead, so be it. */ if (lp->lwp_flag & LWP_WEXIT) { - get_mplock(); + lwkt_gettoken(&p->p_token); lwp_exit(0); - rel_mplock(); /* NOT REACHED */ + lwkt_reltoken(&p->p_token); /* NOT REACHED */ } /* diff --git a/sys/platform/vkernel/i386/trap.c b/sys/platform/vkernel/i386/trap.c index 9100e55b72..ff69ae1d73 100644 --- a/sys/platform/vkernel/i386/trap.c +++ b/sys/platform/vkernel/i386/trap.c @@ -228,9 +228,9 @@ recheck: * If the jungle wants us dead, so be it. */ if (lp->lwp_flag & LWP_WEXIT) { - get_mplock(); + lwkt_gettoken(&p->p_token); lwp_exit(0); - rel_mplock(); /* NOT REACHED */ + lwkt_reltoken(&p->p_token); /* NOT REACHED */ } /* diff --git a/sys/platform/vkernel64/x86_64/trap.c b/sys/platform/vkernel64/x86_64/trap.c index a0a6d3abf5..874dfc553a 100644 --- a/sys/platform/vkernel64/x86_64/trap.c +++ b/sys/platform/vkernel64/x86_64/trap.c @@ -228,9 +228,9 @@ recheck: * If the jungle wants us dead, so be it. */ if (lp->lwp_flag & LWP_WEXIT) { - get_mplock(); + lwkt_gettoken(&p->p_token); lwp_exit(0); - rel_mplock(); /* NOT REACHED */ + lwkt_reltoken(&p->p_token); /* NOT REACHED */ } /* diff --git a/sys/sys/filedesc.h b/sys/sys/filedesc.h index 6c37fa715d..dd177afa9a 100644 --- a/sys/sys/filedesc.h +++ b/sys/sys/filedesc.h @@ -176,7 +176,7 @@ int holdsock (struct filedesc *fdp, int fdes, struct file **fpp); int holdvnode (struct filedesc *fdp, int fd, struct file **fpp); int fdissequential (struct file *); void fdsequential (struct file *, int); -pid_t fgetown (struct sigio *); +pid_t fgetown (struct sigio **); int fsetown (pid_t, struct sigio **); void funsetown (struct sigio *); void funsetownlst (struct sigiolst *);