From 0730ed66e3324415127af9bc0fe9dafa399f4e91 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Wed, 15 Aug 2012 18:11:11 -0700 Subject: [PATCH] kernel - Fix exit races which can lead to a corrupt p_children list * There are a few races when getting multiple tokens where a threaded process is wait*()ing for exiting children from multiple threads at once. Fix the problem by serializing the operation on a per-child basis, and by using PHOLD/PRELE prior to acquiring the child's p_token. Then re-check the conditions before accepting the child. * There is a small chance this will also reduce or fix VM on-exit races in i386, as this bug could result in an already-destroyed process being pulled off by the racing wait*(). Maybe 25% chance. --- sys/kern/kern_exit.c | 69 +++++++++++++++++++++++++++++++-- sys/kern/kern_proc.c | 92 +++++++++++++++++++++++++++++++++++++++++--- sys/sys/proc.h | 4 ++ 3 files changed, 156 insertions(+), 9 deletions(-) diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c index 365cf5df8a..aae085c27a 100644 --- a/sys/kern/kern_exit.c +++ b/sys/kern/kern_exit.c @@ -500,12 +500,19 @@ exit1(int rv) q = LIST_FIRST(&p->p_children); if (q) { lwkt_gettoken(&initproc->p_token); - while (q) { - nq = LIST_NEXT(q, p_sibling); + while ((q = LIST_FIRST(&p->p_children)) != NULL) { + PHOLD(q); + lwkt_gettoken(&q->p_token); + if (q != LIST_FIRST(&p->p_children)) { + lwkt_reltoken(&q->p_token); + PRELE(q); + continue; + } LIST_REMOVE(q, p_sibling); LIST_INSERT_HEAD(&initproc->p_children, q, p_sibling); q->p_pptr = initproc; q->p_sigparent = SIGCHLD; + /* * Traced processes are killed * since their existence means someone is screwing up. @@ -514,7 +521,8 @@ exit1(int rv) q->p_flags &= ~P_TRACED; ksignal(q, SIGKILL); } - q = nq; + lwkt_reltoken(&q->p_token); + PRELE(q); } lwkt_reltoken(&initproc->p_token); wakeup(initproc); @@ -878,8 +886,24 @@ loop: * We must wait for them to exit before we can reap * the master thread, otherwise we may race reaping * non-master threads. + * + * Only this routine can remove a process from + * the zombie list and destroy it, use PACQUIREZOMB() + * to serialize us and loop if it blocks (interlocked + * by the parent's q->p_token). + * + * WARNING! (p) can be invalid when PHOLDZOMB(p) + * returns non-zero. Be sure not to + * mess with it. */ + if (PHOLDZOMB(p)) + goto loop; lwkt_gettoken(&p->p_token); + if (p->p_pptr != q) { + lwkt_reltoken(&p->p_token); + PRELEZOMB(p); + goto loop; + } while (p->p_nthreads > 0) { tsleep(&p->p_nthreads, 0, "lwpzomb", hz); } @@ -910,6 +934,7 @@ loop: * put a hold on the process for short periods of * time. */ + PRELE(p); PSTALL(p, "reap3", 0); /* Take care of our return values. */ @@ -925,6 +950,7 @@ loop: * we need to give it back to the old parent. */ if (p->p_oppid && (t = pfind(p->p_oppid)) != NULL) { + PHOLD(p); p->p_oppid = 0; proc_reparent(p, t); ksignal(t, SIGCHLD); @@ -932,6 +958,7 @@ loop: error = 0; PRELE(t); lwkt_reltoken(&p->p_token); + PRELEZOMB(p); goto done; } @@ -988,6 +1015,13 @@ loop: vmspace_exitfree(p); PSTALL(p, "reap5", 0); + /* + * NOTE: We have to officially release ZOMB in order + * to ensure that a racing thread in kern_wait() + * which blocked on ZOMB is woken up. + */ + PHOLD(p); + PRELEZOMB(p); kfree(p, M_PROC); atomic_add_int(&nprocs, -1); error = 0; @@ -995,7 +1029,22 @@ loop: } if (p->p_stat == SSTOP && (p->p_flags & P_WAITED) == 0 && ((p->p_flags & P_TRACED) || (options & WUNTRACED))) { + PHOLD(p); lwkt_gettoken(&p->p_token); + if (p->p_pptr != q) { + lwkt_reltoken(&p->p_token); + PRELE(p); + goto loop; + } + if (p->p_stat != SSTOP || + (p->p_flags & P_WAITED) != 0 || + ((p->p_flags & P_TRACED) == 0 && + (options & WUNTRACED) == 0)) { + lwkt_reltoken(&p->p_token); + PRELE(p); + goto loop; + } + p->p_flags |= P_WAITED; *res = p->p_pid; @@ -1007,10 +1056,23 @@ loop: bzero(rusage, sizeof(*rusage)); error = 0; lwkt_reltoken(&p->p_token); + PRELE(p); goto done; } if ((options & WCONTINUED) && (p->p_flags & P_CONTINUED)) { + PHOLD(p); lwkt_gettoken(&p->p_token); + if (p->p_pptr != q) { + lwkt_reltoken(&p->p_token); + PRELE(p); + goto loop; + } + if ((p->p_flags & P_CONTINUED) == 0) { + lwkt_reltoken(&p->p_token); + PRELE(p); + goto loop; + } + *res = p->p_pid; p->p_usched->heuristic_exiting(td->td_lwp, p); p->p_flags &= ~P_CONTINUED; @@ -1019,6 +1081,7 @@ loop: *status = SIGCONT; error = 0; lwkt_reltoken(&p->p_token); + PRELE(p); goto done; } } diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c index 728b9c5e5c..3af2cd30a6 100644 --- a/sys/kern/kern_proc.c +++ b/sys/kern/kern_proc.c @@ -159,8 +159,9 @@ procinit(void) * The kernel waits for the hold count to drop to 0 (or 1 in some cases) at * various critical points in the fork/exec and exit paths before proceeding. */ +#define PLOCK_ZOMB 0x20000000 #define PLOCK_WAITING 0x40000000 -#define PLOCK_MASK 0x3FFFFFFF +#define PLOCK_MASK 0x1FFFFFFF void pstall(struct proc *p, const char *wmesg, int count) @@ -201,21 +202,98 @@ pstall(struct proc *p, const char *wmesg, int count) void phold(struct proc *p) +{ + atomic_add_int(&p->p_lock, 1); +} + +/* + * WARNING! On last release (p) can become instantly invalid due to + * MP races. + */ +void +prele(struct proc *p) { int o; int n; + /* + * Fast path + */ + if (atomic_cmpset_int(&p->p_lock, 1, 0)) + return; + + /* + * Slow path + */ for (;;) { o = p->p_lock; + KKASSERT((o & PLOCK_MASK) > 0); cpu_ccfence(); - n = o + 1; - if (atomic_cmpset_int(&p->p_lock, o, n)) + n = (o - 1) & ~PLOCK_WAITING; + if (atomic_cmpset_int(&p->p_lock, o, n)) { + if (o & PLOCK_WAITING) + wakeup(&p->p_lock); break; + } } } +/* + * Hold and flag serialized for zombie reaping purposes. + * + * This function will fail if it has to block, returning non-zero with + * neither the flag set or the hold count bumped. Note that we must block + * without holding a ref, meaning that the caller must ensure that (p) + * remains valid through some other interlock (typically on its parent + * process's p_token). + * + * Zero is returned on success. The hold count will be incremented and + * the serialization flag acquired. Note that serialization is only against + * other pholdzomb() calls, not against phold() calls. + */ +int +pholdzomb(struct proc *p) +{ + int o; + int n; + + /* + * Fast path + */ + if (atomic_cmpset_int(&p->p_lock, 0, PLOCK_ZOMB | 1)) + return(0); + + /* + * Slow path + */ + for (;;) { + o = p->p_lock; + cpu_ccfence(); + if ((o & PLOCK_ZOMB) == 0) { + n = (o + 1) | PLOCK_ZOMB; + if (atomic_cmpset_int(&p->p_lock, o, n)) + return(0); + } else { + KKASSERT((o & PLOCK_MASK) > 0); + n = o | PLOCK_WAITING; + tsleep_interlock(&p->p_lock, 0); + if (atomic_cmpset_int(&p->p_lock, o, n)) { + tsleep(&p->p_lock, PINTERLOCKED, "phldz", 0); + /* (p) can be ripped out at this point */ + return(1); + } + } + } +} + +/* + * Release PLOCK_ZOMB and the hold count, waking up any waiters. + * + * WARNING! On last release (p) can become instantly invalid due to + * MP races. + */ void -prele(struct proc *p) +prelezomb(struct proc *p) { int o; int n; @@ -223,17 +301,18 @@ prele(struct proc *p) /* * Fast path */ - if (atomic_cmpset_int(&p->p_lock, 1, 0)) + if (atomic_cmpset_int(&p->p_lock, PLOCK_ZOMB | 1, 0)) return; /* * Slow path */ + KKASSERT(p->p_lock & PLOCK_ZOMB); for (;;) { o = p->p_lock; KKASSERT((o & PLOCK_MASK) > 0); cpu_ccfence(); - n = (o - 1) & ~PLOCK_WAITING; + n = (o - 1) & ~(PLOCK_ZOMB | PLOCK_WAITING); if (atomic_cmpset_int(&p->p_lock, o, n)) { if (o & PLOCK_WAITING) wakeup(&p->p_lock); @@ -768,6 +847,7 @@ proc_remove_zombie(struct proc *p) PSTALL(p, "reap2", 0); LIST_REMOVE(p, p_list); /* off zombproc */ LIST_REMOVE(p, p_sibling); + p->p_pptr = NULL; lwkt_reltoken(&proc_token); } diff --git a/sys/sys/proc.h b/sys/sys/proc.h index 6ce64cf212..ef1857e867 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -453,6 +453,8 @@ extern void stopevent(struct proc*, unsigned int, unsigned int); */ #define PHOLD(p) phold((p)) #define PRELE(p) prele((p)) +#define PHOLDZOMB(p) pholdzomb((p)) +#define PRELEZOMB(p) prelezomb((p)) #define PSTALL(p, msg, n) \ do { if ((p)->p_lock > (n)) pstall((p), (msg), (n)); } while (0) @@ -556,6 +558,8 @@ void faultin (struct proc *p); void swapin_request (void); void phold (struct proc *); void prele (struct proc *); +int pholdzomb (struct proc *); +void prelezomb (struct proc *); void pstall (struct proc *, const char *, int); u_int32_t procrunnable (void); -- 2.41.0