kernel - Fix exit races which can lead to a corrupt p_children list
authorMatthew Dillon <dillon@apollo.backplane.com>
Thu, 16 Aug 2012 01:11:11 +0000 (18:11 -0700)
committerMatthew Dillon <dillon@apollo.backplane.com>
Tue, 21 Aug 2012 03:58:50 +0000 (20:58 -0700)
* There are a few races when getting multiple tokens where a threaded
  process is wait*()ing for exiting children from multiple threads
  at once.

  Fix the problem by serializing the operation on a per-child basis,
  and by using PHOLD/PRELE prior to acquiring the child's p_token.
  Then re-check the conditions before accepting the child.

* There is a small chance this will also reduce or fix VM on-exit races
  in i386, as this bug could result in an already-destroyed process
  being pulled off by the racing wait*().  Maybe 25% chance.

sys/kern/kern_exit.c
sys/kern/kern_proc.c
sys/sys/proc.h

index 7d2216a..3fdaace 100644 (file)
@@ -501,12 +501,19 @@ exit1(int rv)
        q = LIST_FIRST(&p->p_children);
        if (q) {
                lwkt_gettoken(&initproc->p_token);
        q = LIST_FIRST(&p->p_children);
        if (q) {
                lwkt_gettoken(&initproc->p_token);
-               while (q) {
-                       nq = LIST_NEXT(q, p_sibling);
+               while ((q = LIST_FIRST(&p->p_children)) != NULL) {
+                       PHOLD(q);
+                       lwkt_gettoken(&q->p_token);
+                       if (q != LIST_FIRST(&p->p_children)) {
+                               lwkt_reltoken(&q->p_token);
+                               PRELE(q);
+                               continue;
+                       }
                        LIST_REMOVE(q, p_sibling);
                        LIST_INSERT_HEAD(&initproc->p_children, q, p_sibling);
                        q->p_pptr = initproc;
                        q->p_sigparent = SIGCHLD;
                        LIST_REMOVE(q, p_sibling);
                        LIST_INSERT_HEAD(&initproc->p_children, q, p_sibling);
                        q->p_pptr = initproc;
                        q->p_sigparent = SIGCHLD;
+
                        /*
                         * Traced processes are killed
                         * since their existence means someone is screwing up.
                        /*
                         * Traced processes are killed
                         * since their existence means someone is screwing up.
@@ -515,7 +522,8 @@ exit1(int rv)
                                q->p_flags &= ~P_TRACED;
                                ksignal(q, SIGKILL);
                        }
                                q->p_flags &= ~P_TRACED;
                                ksignal(q, SIGKILL);
                        }
-                       q = nq;
+                       lwkt_reltoken(&q->p_token);
+                       PRELE(q);
                }
                lwkt_reltoken(&initproc->p_token);
                wakeup(initproc);
                }
                lwkt_reltoken(&initproc->p_token);
                wakeup(initproc);
@@ -880,8 +888,24 @@ loop:
                         * We must wait for them to exit before we can reap
                         * the master thread, otherwise we may race reaping
                         * non-master threads.
                         * We must wait for them to exit before we can reap
                         * the master thread, otherwise we may race reaping
                         * non-master threads.
+                        *
+                        * Only this routine can remove a process from
+                        * the zombie list and destroy it, use PACQUIREZOMB()
+                        * to serialize us and loop if it blocks (interlocked
+                        * by the parent's q->p_token).
+                        *
+                        * WARNING!  (p) can be invalid when PHOLDZOMB(p)
+                        *           returns non-zero.  Be sure not to
+                        *           mess with it.
                         */
                         */
+                       if (PHOLDZOMB(p))
+                               goto loop;
                        lwkt_gettoken(&p->p_token);
                        lwkt_gettoken(&p->p_token);
+                       if (p->p_pptr != q) {
+                               lwkt_reltoken(&p->p_token);
+                               PRELEZOMB(p);
+                               goto loop;
+                       }
                        while (p->p_nthreads > 0) {
                                tsleep(&p->p_nthreads, 0, "lwpzomb", hz);
                        }
                        while (p->p_nthreads > 0) {
                                tsleep(&p->p_nthreads, 0, "lwpzomb", hz);
                        }
@@ -912,6 +936,7 @@ loop:
                         * put a hold on the process for short periods of
                         * time.
                         */
                         * put a hold on the process for short periods of
                         * time.
                         */
+                       PRELE(p);
                        PSTALL(p, "reap3", 0);
 
                        /* Take care of our return values. */
                        PSTALL(p, "reap3", 0);
 
                        /* Take care of our return values. */
@@ -927,6 +952,7 @@ loop:
                         * we need to give it back to the old parent.
                         */
                        if (p->p_oppid && (t = pfind(p->p_oppid)) != NULL) {
                         * we need to give it back to the old parent.
                         */
                        if (p->p_oppid && (t = pfind(p->p_oppid)) != NULL) {
+                               PHOLD(p);
                                p->p_oppid = 0;
                                proc_reparent(p, t);
                                ksignal(t, SIGCHLD);
                                p->p_oppid = 0;
                                proc_reparent(p, t);
                                ksignal(t, SIGCHLD);
@@ -934,6 +960,7 @@ loop:
                                error = 0;
                                PRELE(t);
                                lwkt_reltoken(&p->p_token);
                                error = 0;
                                PRELE(t);
                                lwkt_reltoken(&p->p_token);
+                               PRELEZOMB(p);
                                goto done;
                        }
 
                                goto done;
                        }
 
@@ -990,6 +1017,13 @@ loop:
                        vmspace_exitfree(p);
                        PSTALL(p, "reap5", 0);
 
                        vmspace_exitfree(p);
                        PSTALL(p, "reap5", 0);
 
+                       /*
+                        * NOTE: We have to officially release ZOMB in order
+                        *       to ensure that a racing thread in kern_wait()
+                        *       which blocked on ZOMB is woken up.
+                        */
+                       PHOLD(p);
+                       PRELEZOMB(p);
                        kfree(p, M_PROC);
                        atomic_add_int(&nprocs, -1);
                        error = 0;
                        kfree(p, M_PROC);
                        atomic_add_int(&nprocs, -1);
                        error = 0;
@@ -997,7 +1031,22 @@ loop:
                }
                if (p->p_stat == SSTOP && (p->p_flags & P_WAITED) == 0 &&
                    ((p->p_flags & P_TRACED) || (options & WUNTRACED))) {
                }
                if (p->p_stat == SSTOP && (p->p_flags & P_WAITED) == 0 &&
                    ((p->p_flags & P_TRACED) || (options & WUNTRACED))) {
+                       PHOLD(p);
                        lwkt_gettoken(&p->p_token);
                        lwkt_gettoken(&p->p_token);
+                       if (p->p_pptr != q) {
+                               lwkt_reltoken(&p->p_token);
+                               PRELE(p);
+                               goto loop;
+                       }
+                       if (p->p_stat != SSTOP ||
+                           (p->p_flags & P_WAITED) != 0 ||
+                           ((p->p_flags & P_TRACED) == 0 &&
+                            (options & WUNTRACED) == 0)) {
+                               lwkt_reltoken(&p->p_token);
+                               PRELE(p);
+                               goto loop;
+                       }
+
                        p->p_flags |= P_WAITED;
 
                        *res = p->p_pid;
                        p->p_flags |= P_WAITED;
 
                        *res = p->p_pid;
@@ -1009,10 +1058,23 @@ loop:
                                bzero(rusage, sizeof(*rusage));
                        error = 0;
                        lwkt_reltoken(&p->p_token);
                                bzero(rusage, sizeof(*rusage));
                        error = 0;
                        lwkt_reltoken(&p->p_token);
+                       PRELE(p);
                        goto done;
                }
                if ((options & WCONTINUED) && (p->p_flags & P_CONTINUED)) {
                        goto done;
                }
                if ((options & WCONTINUED) && (p->p_flags & P_CONTINUED)) {
+                       PHOLD(p);
                        lwkt_gettoken(&p->p_token);
                        lwkt_gettoken(&p->p_token);
+                       if (p->p_pptr != q) {
+                               lwkt_reltoken(&p->p_token);
+                               PRELE(p);
+                               goto loop;
+                       }
+                       if ((p->p_flags & P_CONTINUED) == 0) {
+                               lwkt_reltoken(&p->p_token);
+                               PRELE(p);
+                               goto loop;
+                       }
+
                        *res = p->p_pid;
                        p->p_usched->heuristic_exiting(td->td_lwp, p);
                        p->p_flags &= ~P_CONTINUED;
                        *res = p->p_pid;
                        p->p_usched->heuristic_exiting(td->td_lwp, p);
                        p->p_flags &= ~P_CONTINUED;
@@ -1021,6 +1083,7 @@ loop:
                                *status = SIGCONT;
                        error = 0;
                        lwkt_reltoken(&p->p_token);
                                *status = SIGCONT;
                        error = 0;
                        lwkt_reltoken(&p->p_token);
+                       PRELE(p);
                        goto done;
                }
        }
                        goto done;
                }
        }
index 728b9c5..3af2cd3 100644 (file)
@@ -159,8 +159,9 @@ procinit(void)
  * The kernel waits for the hold count to drop to 0 (or 1 in some cases) at
  * various critical points in the fork/exec and exit paths before proceeding.
  */
  * The kernel waits for the hold count to drop to 0 (or 1 in some cases) at
  * various critical points in the fork/exec and exit paths before proceeding.
  */
+#define PLOCK_ZOMB     0x20000000
 #define PLOCK_WAITING  0x40000000
 #define PLOCK_WAITING  0x40000000
-#define PLOCK_MASK     0x3FFFFFFF
+#define PLOCK_MASK     0x1FFFFFFF
 
 void
 pstall(struct proc *p, const char *wmesg, int count)
 
 void
 pstall(struct proc *p, const char *wmesg, int count)
@@ -202,20 +203,97 @@ pstall(struct proc *p, const char *wmesg, int count)
 void
 phold(struct proc *p)
 {
 void
 phold(struct proc *p)
 {
+       atomic_add_int(&p->p_lock, 1);
+}
+
+/*
+ * WARNING!  On last release (p) can become instantly invalid due to
+ *          MP races.
+ */
+void
+prele(struct proc *p)
+{
        int o;
        int n;
 
        int o;
        int n;
 
+       /*
+        * Fast path
+        */
+       if (atomic_cmpset_int(&p->p_lock, 1, 0))
+               return;
+
+       /*
+        * Slow path
+        */
        for (;;) {
                o = p->p_lock;
        for (;;) {
                o = p->p_lock;
+               KKASSERT((o & PLOCK_MASK) > 0);
                cpu_ccfence();
                cpu_ccfence();
-               n = o + 1;
-               if (atomic_cmpset_int(&p->p_lock, o, n))
+               n = (o - 1) & ~PLOCK_WAITING;
+               if (atomic_cmpset_int(&p->p_lock, o, n)) {
+                       if (o & PLOCK_WAITING)
+                               wakeup(&p->p_lock);
                        break;
                        break;
+               }
        }
 }
 
        }
 }
 
+/*
+ * Hold and flag serialized for zombie reaping purposes.
+ *
+ * This function will fail if it has to block, returning non-zero with
+ * neither the flag set or the hold count bumped.  Note that we must block
+ * without holding a ref, meaning that the caller must ensure that (p)
+ * remains valid through some other interlock (typically on its parent
+ * process's p_token).
+ *
+ * Zero is returned on success.  The hold count will be incremented and
+ * the serialization flag acquired.  Note that serialization is only against
+ * other pholdzomb() calls, not against phold() calls.
+ */
+int
+pholdzomb(struct proc *p)
+{
+       int o;
+       int n;
+
+       /*
+        * Fast path
+        */
+       if (atomic_cmpset_int(&p->p_lock, 0, PLOCK_ZOMB | 1))
+               return(0);
+
+       /*
+        * Slow path
+        */
+       for (;;) {
+               o = p->p_lock;
+               cpu_ccfence();
+               if ((o & PLOCK_ZOMB) == 0) {
+                       n = (o + 1) | PLOCK_ZOMB;
+                       if (atomic_cmpset_int(&p->p_lock, o, n))
+                               return(0);
+               } else {
+                       KKASSERT((o & PLOCK_MASK) > 0);
+                       n = o | PLOCK_WAITING;
+                       tsleep_interlock(&p->p_lock, 0);
+                       if (atomic_cmpset_int(&p->p_lock, o, n)) {
+                               tsleep(&p->p_lock, PINTERLOCKED, "phldz", 0);
+                               /* (p) can be ripped out at this point */
+                               return(1);
+                       }
+               }
+       }
+}
+
+/*
+ * Release PLOCK_ZOMB and the hold count, waking up any waiters.
+ *
+ * WARNING!  On last release (p) can become instantly invalid due to
+ *          MP races.
+ */
 void
 void
-prele(struct proc *p)
+prelezomb(struct proc *p)
 {
        int o;
        int n;
 {
        int o;
        int n;
@@ -223,17 +301,18 @@ prele(struct proc *p)
        /*
         * Fast path
         */
        /*
         * Fast path
         */
-       if (atomic_cmpset_int(&p->p_lock, 1, 0))
+       if (atomic_cmpset_int(&p->p_lock, PLOCK_ZOMB | 1, 0))
                return;
 
        /*
         * Slow path
         */
                return;
 
        /*
         * Slow path
         */
+       KKASSERT(p->p_lock & PLOCK_ZOMB);
        for (;;) {
                o = p->p_lock;
                KKASSERT((o & PLOCK_MASK) > 0);
                cpu_ccfence();
        for (;;) {
                o = p->p_lock;
                KKASSERT((o & PLOCK_MASK) > 0);
                cpu_ccfence();
-               n = (o - 1) & ~PLOCK_WAITING;
+               n = (o - 1) & ~(PLOCK_ZOMB | PLOCK_WAITING);
                if (atomic_cmpset_int(&p->p_lock, o, n)) {
                        if (o & PLOCK_WAITING)
                                wakeup(&p->p_lock);
                if (atomic_cmpset_int(&p->p_lock, o, n)) {
                        if (o & PLOCK_WAITING)
                                wakeup(&p->p_lock);
@@ -768,6 +847,7 @@ proc_remove_zombie(struct proc *p)
        PSTALL(p, "reap2", 0);
        LIST_REMOVE(p, p_list); /* off zombproc */
        LIST_REMOVE(p, p_sibling);
        PSTALL(p, "reap2", 0);
        LIST_REMOVE(p, p_list); /* off zombproc */
        LIST_REMOVE(p, p_sibling);
+       p->p_pptr = NULL;
        lwkt_reltoken(&proc_token);
 }
 
        lwkt_reltoken(&proc_token);
 }
 
index 918d0ed..b181564 100644 (file)
@@ -453,6 +453,8 @@ extern void stopevent(struct proc*, unsigned int, unsigned int);
  */
 #define PHOLD(p)       phold((p))
 #define PRELE(p)       prele((p))
  */
 #define PHOLD(p)       phold((p))
 #define PRELE(p)       prele((p))
+#define PHOLDZOMB(p)   pholdzomb((p))
+#define PRELEZOMB(p)   prelezomb((p))
 #define PSTALL(p, msg, n) \
        do { if ((p)->p_lock > (n)) pstall((p), (msg), (n)); } while (0)
 
 #define PSTALL(p, msg, n) \
        do { if ((p)->p_lock > (n)) pstall((p), (msg), (n)); } while (0)
 
@@ -556,6 +558,8 @@ void        faultin (struct proc *p);
 void   swapin_request (void);
 void   phold (struct proc *);
 void   prele (struct proc *);
 void   swapin_request (void);
 void   phold (struct proc *);
 void   prele (struct proc *);
+int    pholdzomb (struct proc *);
+void   prelezomb (struct proc *);
 void   pstall (struct proc *, const char *, int);
 
 u_int32_t      procrunnable (void);
 void   pstall (struct proc *, const char *, int);
 
 u_int32_t      procrunnable (void);