From eb2adbf513fa1db03b9e8e7b61fcaf511babd367 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Thu, 17 Nov 2011 09:04:53 -0800 Subject: [PATCH] kernel - Fix ps/thread-exit and other related ps races * Adjust sysctl_kern_proc()'s kernel thread scanning code to use a marker instead of depending on td remaining on its proper list. Otherwise blocking conditions can rip td out from under us or move it to another cpu, potentially resulting in a crash or livelock. Index the scan backwards to avoid live-locking continuous adds to the list. * Fix a potential race is the zombie removal code vs a ps, p->p_token was being released too early. * Adjust lwkt_exit() to wait for the thread's hold count to drop to zero so lwkt_hold() works as advertised. --- sys/ddb/db_ps.c | 2 ++ sys/kern/kern_exit.c | 3 +- sys/kern/kern_proc.c | 76 ++++++++++++++++++++++++++++++++++++------ sys/kern/lwkt_thread.c | 11 ++++++ sys/sys/thread.h | 2 +- 5 files changed, 81 insertions(+), 13 deletions(-) diff --git a/sys/ddb/db_ps.c b/sys/ddb/db_ps.c index 16b31feb3a..8d93a15d72 100644 --- a/sys/ddb/db_ps.c +++ b/sys/ddb/db_ps.c @@ -160,6 +160,8 @@ db_ps(db_expr_t dummy1, boolean_t dummy2, db_expr_t dummy3, char *dummy4) return; db_printf(" tdq thread pid flags pri/cs/mp sp wmesg wchan comm\n"); TAILQ_FOREACH(td, &gd->gd_tdallq, td_allq) { + if (td->td_flags & TDF_MARKER) + continue; if (db_more(&nl) < 0) return; db_printf(" %3d %p %3d %08x %2d/%02d %p %8.8s %p %s\n", diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c index f01755916e..d43daee564 100644 --- a/sys/kern/kern_exit.c +++ b/sys/kern/kern_exit.c @@ -869,7 +869,6 @@ loop: reaplwp(lp); } KKASSERT(p->p_nthreads == 0); - lwkt_reltoken(&p->p_token); /* * Don't do anything really bad until all references @@ -904,6 +903,7 @@ loop: wakeup((caddr_t)t); error = 0; PRELE(t); + lwkt_reltoken(&p->p_token); goto done; } @@ -914,6 +914,7 @@ loop: * the zombie list. */ proc_remove_zombie(p); + lwkt_reltoken(&p->p_token); leavepgrp(p); p->p_xstat = 0; diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c index 667b966b47..eea0ede937 100644 --- a/sys/kern/kern_proc.c +++ b/sys/kern/kern_proc.c @@ -634,7 +634,9 @@ again: * Called from exit1 to remove a process from the allproc * list and move it to the zombie list. * - * No requirements. + * Caller must hold p->p_token. We are required to wait until p_lock + * becomes zero before we can manipulate the list, allowing allproc + * scans to guarantee consistency during a list scan. */ void proc_move_allproc_zombie(struct proc *p) @@ -656,14 +658,16 @@ proc_move_allproc_zombie(struct proc *p) * from the zombie list and the sibling list. This routine will block * if someone has a lock on the proces (p_lock). * - * No requirements. + * Caller must hold p->p_token. We are required to wait until p_lock + * becomes zero before we can manipulate the list, allowing allproc + * scans to guarantee consistency during a list scan. */ void proc_remove_zombie(struct proc *p) { lwkt_gettoken(&proc_token); while (p->p_lock) { - tsleep(p, 0, "reap1", hz / 10); + tsleep(p, 0, "reap2", hz / 10); } LIST_REMOVE(p, p_list); /* off zombproc */ LIST_REMOVE(p, p_sibling); @@ -690,6 +694,11 @@ allproc_scan(int (*callback)(struct proc *, void *), void *data) int r; int limit = nprocs + ncpus; + /* + * proc_token protects the allproc list and PHOLD() prevents the + * process from being removed from the allproc list or the zombproc + * list. + */ lwkt_gettoken(&proc_token); LIST_FOREACH(p, &allproc, p_list) { PHOLD(p); @@ -707,8 +716,9 @@ allproc_scan(int (*callback)(struct proc *, void *), void *data) * Scan all lwps of processes on the allproc list. The lwp is automatically * held for the callback. A return value of -1 terminates the loop. * - * No requirements. * The callback is made with the proces and lwp both held, and proc_token held. + * + * No requirements. */ void alllwp_scan(int (*callback)(struct lwp *, void *), void *data) @@ -717,6 +727,11 @@ alllwp_scan(int (*callback)(struct lwp *, void *), void *data) struct lwp *lp; int r = 0; + /* + * proc_token protects the allproc list and PHOLD() prevents the + * process from being removed from the allproc list or the zombproc + * list. + */ lwkt_gettoken(&proc_token); LIST_FOREACH(p, &allproc, p_list) { PHOLD(p); @@ -878,6 +893,7 @@ sysctl_kern_proc(SYSCTL_HANDLER_ARGS) struct proc *p; struct proclist *plist; struct thread *td; + struct thread *marker; int doingzomb, flags = 0; int error = 0; int n; @@ -888,9 +904,15 @@ sysctl_kern_proc(SYSCTL_HANDLER_ARGS) oid &= ~KERN_PROC_FLAGMASK; if ((oid == KERN_PROC_ALL && namelen != 0) || - (oid != KERN_PROC_ALL && namelen != 1)) + (oid != KERN_PROC_ALL && namelen != 1)) { return (EINVAL); + } + /* + * proc_token protects the allproc list and PHOLD() prevents the + * process from being removed from the allproc list or the zombproc + * list. + */ lwkt_gettoken(&proc_token); if (oid == KERN_PROC_PID) { p = pfindn((pid_t)name[0]); @@ -981,6 +1003,9 @@ sysctl_kern_proc(SYSCTL_HANDLER_ARGS) if (!ps_showallthreads || jailed(cr1)) goto post_threads; + marker = kmalloc(sizeof(struct thread), M_TEMP, M_WAITOK|M_ZERO); + error = 0; + for (n = 1; n <= ncpus; ++n) { globaldata_t rgd; int nid; @@ -991,25 +1016,54 @@ sysctl_kern_proc(SYSCTL_HANDLER_ARGS) rgd = globaldata_find(nid); lwkt_setcpu_self(rgd); - TAILQ_FOREACH(td, &mycpu->gd_tdallq, td_allq) { - if (td->td_proc) + crit_enter(); + TAILQ_INSERT_TAIL(&rgd->gd_tdallq, marker, td_allq); + crit_exit(); + + while ((td = TAILQ_PREV(marker, lwkt_queue, td_allq)) != NULL) { + crit_enter(); + if (td != TAILQ_PREV(marker, lwkt_queue, td_allq)) { + crit_exit(); continue; + } + TAILQ_REMOVE(&rgd->gd_tdallq, marker, td_allq); + TAILQ_INSERT_BEFORE(td, marker, td_allq); + lwkt_hold(td); + crit_exit(); + + if (td->td_flags & TDF_MARKER) { + lwkt_rele(td); + continue; + } + if (td->td_proc) { + lwkt_rele(td); + continue; + } + switch (oid) { case KERN_PROC_PGRP: case KERN_PROC_TTY: case KERN_PROC_UID: case KERN_PROC_RUID: - continue; + break; default: + error = sysctl_out_proc_kthread(td, req, + doingzomb); break; } - lwkt_hold(td); - error = sysctl_out_proc_kthread(td, req, doingzomb); lwkt_rele(td); if (error) - goto post_threads; + break; } + crit_enter(); + TAILQ_REMOVE(&rgd->gd_tdallq, marker, td_allq); + crit_exit(); + + if (error) + break; } + kfree(marker, M_TEMP); + post_threads: lwkt_reltoken(&proc_token); return (error); diff --git a/sys/kern/lwkt_thread.c b/sys/kern/lwkt_thread.c index d741f135d5..456323f7dc 100644 --- a/sys/kern/lwkt_thread.c +++ b/sys/kern/lwkt_thread.c @@ -517,6 +517,11 @@ lwkt_set_comm(thread_t td, const char *ctl, ...) KTR_LOG(ctxsw_newtd, td, &td->td_comm[0]); } +/* + * Prevent the thread from getting destroyed. Note that unlike PHOLD/PRELE + * this does not prevent the thread from migrating to another cpu so the + * gd_tdallq state is not protected by this. + */ void lwkt_hold(thread_t td) { @@ -1692,16 +1697,22 @@ lwkt_exit(void) */ gd = mycpu; crit_enter_quick(td); + lwkt_wait_free(td); while ((std = gd->gd_freetd) != NULL) { KKASSERT((std->td_flags & (TDF_RUNNING|TDF_PREEMPT_LOCK)) == 0); gd->gd_freetd = NULL; objcache_put(thread_cache, std); + lwkt_wait_free(td); } /* * Remove thread resources from kernel lists and deschedule us for * the last time. We cannot block after this point or we may end * up with a stale td on the tsleepq. + * + * None of this may block, the critical section is the only thing + * protecting tdallq and the only thing preventing new lwkt_hold() + * thread refs now. */ if (td->td_flags & TDF_TSLEEPQ) tsleep_remove(td); diff --git a/sys/sys/thread.h b/sys/sys/thread.h index 9c97487533..f380a98407 100644 --- a/sys/sys/thread.h +++ b/sys/sys/thread.h @@ -354,7 +354,7 @@ struct thread { #define TDF_ALLOCATED_STACK 0x00000400 /* objcache allocated stack */ #define TDF_VERBOSE 0x00000800 /* verbose on exit */ #define TDF_DEADLKTREAT 0x00001000 /* special lockmgr treatment */ -#define TDF_UNUSED2000 0x00002000 +#define TDF_MARKER 0x00002000 /* tdallq list scan marker */ #define TDF_TIMEOUT_RUNNING 0x00004000 /* tsleep timeout race */ #define TDF_TIMEOUT 0x00008000 /* tsleep timeout */ #define TDF_INTTHREAD 0x00010000 /* interrupt thread */ -- 2.41.0