From 3933a3ab606b81b57423112e261ca5426deac2e6 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Mon, 30 Aug 2010 12:29:06 -0700 Subject: [PATCH] kernel - Separate inherited mplocks from td_mplocks and fix a gettoken bug * Separate out td_mpcount into td_xpcount and td_mpcount. td_xpcount is an inherited mpcount. A preempting thread inherits the mpcount on the thread being preempted until it switches out to guarantee that the mplock remains atomic through the preemption (as expected by the poor thread that got preempted). * Fix a serious but hard to reproduce bug in lwkt_gettoken(). This function marks the token reference as being MPSAFE if td_mpcount is non-zero even when the token is not a MPSAFE token. However, until this patch td_mpcount also included inherited mpcounts when one thread preempts another and the inherited mpcounts could go away if the thread blocks or switches, leaving the token unprotected. * Fix a less serious bug where a new token reference was being populated prior to td_toks_stop being incremented, and where an existing token reference was being depopulated after td_toks_stop is decremented. Nothing can race us but switch around the index increment/decrement to protect the slot being operated upon. * Add a ton of assertions in the interrupt, trap, and syscall paths To assert that the mplock, number of tokens, and critcount remain unchanged across driver and other calls. --- sys/kern/kern_intr.c | 51 +++++++++++++++++++++++ sys/kern/kern_mplock.c | 56 ++++++++++++++----------- sys/kern/kern_shutdown.c | 20 +++++++-- sys/kern/lwkt_thread.c | 62 ++++++++++++++-------------- sys/kern/lwkt_token.c | 20 +++++---- sys/platform/pc32/i386/trap.c | 16 +++++-- sys/platform/pc64/x86_64/trap.c | 16 +++++-- sys/platform/vkernel/i386/trap.c | 20 +++++++-- sys/platform/vkernel64/x86_64/trap.c | 22 ++++++++-- sys/sys/mplock2.h | 17 +++++++- sys/sys/thread.h | 11 ++++- 11 files changed, 231 insertions(+), 80 deletions(-) diff --git a/sys/kern/kern_intr.c b/sys/kern/kern_intr.c index 39160d2ef6..ac43f89676 100644 --- a/sys/kern/kern_intr.c +++ b/sys/kern/kern_intr.c @@ -80,6 +80,44 @@ int max_installed_soft_intr; #define EMERGENCY_INTR_POLLING_FREQ_MAX 20000 +#ifdef INVARIANTS + +#define TD_INVARIANTS_DECLARE \ + int mpcount; \ + int spincount; \ + lwkt_tokref_t curstop + +#define TD_INVARIANTS_GET(td) \ + do { \ + mpcount = (td)->td_mpcount; \ + spincount = (td)->td_gd->gd_spinlocks_wr; \ + curstop = (td)->td_toks_stop; \ + } while(0) + +#define TD_INVARIANTS_TEST(td, name) \ + do { \ + KASSERT(mpcount == (td)->td_mpcount, \ + ("mpcount mismatch after interrupt handler %s", \ + name)); \ + KASSERT(spincount == (td)->td_gd->gd_spinlocks_wr, \ + ("spincount mismatch after interrupt handler %s", \ + name)); \ + KASSERT(curstop == (td)->td_toks_stop, \ + ("token count mismatch after interrupt handler %s", \ + name)); \ + } while(0) + +#define TD_INVARIANTS_ADJMP(count) mpcount += (count) + +#else + +#define TD_INVARIANTS_DECLARE +#define TD_INVARIANTS_GET(td) +#define TD_INVARIANTS_TEST(td) +#define TD_INVARIANTS_ADJMP(count) + +#endif + static int sysctl_emergency_freq(SYSCTL_HANDLER_ARGS); static int sysctl_emergency_enable(SYSCTL_HANDLER_ARGS); static void emergency_intr_timer_callback(systimer_t, struct intrframe *); @@ -608,6 +646,7 @@ ithread_fast_handler(struct intrframe *frame) #ifdef SMP int got_mplock; #endif + TD_INVARIANTS_DECLARE; intrec_t rec, next_rec; globaldata_t gd; thread_t td; @@ -652,7 +691,9 @@ ithread_fast_handler(struct intrframe *frame) got_mplock = 0; #endif + TD_INVARIANTS_GET(td); list = &info->i_reclist; + for (rec = *list; rec; rec = next_rec) { next_rec = rec->next; /* rec may be invalid after call */ @@ -665,6 +706,7 @@ ithread_fast_handler(struct intrframe *frame) break; } got_mplock = 1; + TD_INVARIANTS_ADJMP(1); } #endif if (rec->serializer) { @@ -674,6 +716,7 @@ ithread_fast_handler(struct intrframe *frame) } else { rec->handler(rec->argument, frame); } + TD_INVARIANTS_TEST(td, rec->name); } } @@ -731,6 +774,7 @@ ithread_handler(void *arg) globaldata_t gd; struct systimer ill_timer; /* enforced freq. timer */ u_int ill_count; /* interrupt livelock counter */ + TD_INVARIANTS_DECLARE; ill_count = 0; intr = (int)(intptr_t)arg; @@ -766,6 +810,8 @@ ithread_handler(void *arg) } #endif + TD_INVARIANTS_GET(gd->gd_curthread); + /* * If an interrupt is pending, clear i_running and execute the * handlers. Note that certain types of interrupts can re-trigger @@ -789,6 +835,7 @@ ithread_handler(void *arg) } else { rec->handler(rec->argument, NULL); } + TD_INVARIANTS_TEST(gd->gd_curthread, rec->name); } } @@ -914,8 +961,11 @@ ithread_emergency(void *arg __unused) struct intr_info *info; intrec_t rec, nrec; int intr; + thread_t td __debugvar = curthread; + TD_INVARIANTS_DECLARE; get_mplock(); + TD_INVARIANTS_GET(td); for (;;) { for (intr = 0; intr < max_installed_hard_intr; ++intr) { @@ -928,6 +978,7 @@ ithread_emergency(void *arg __unused) } else { rec->handler(rec->argument, NULL); } + TD_INVARIANTS_TEST(td, rec->name); } nrec = rec->next; } diff --git a/sys/kern/kern_mplock.c b/sys/kern/kern_mplock.c index 57bed32f2a..018df7832f 100644 --- a/sys/kern/kern_mplock.c +++ b/sys/kern/kern_mplock.c @@ -101,7 +101,8 @@ cpu_get_initial_mplock(void) /* * This code is called from the get_mplock() inline when the mplock - * is not already held. + * is not already held. td_mpcount has already been predisposed + * (incremented). */ void _get_mplock_predisposed(const char *file, int line) @@ -154,8 +155,9 @@ _get_mplock_contested(const char *file, int line) } /* - * Called if td_mpcount went negative or if td_mpcount is 0 and we were - * unable to release the MP lock. Handles sanity checks and conflicts. + * Called if td_mpcount went negative or if td_mpcount + td_xpcount is 0 + * and we were unable to release the MP lock. Handles sanity checks + * and conflicts. * * It is possible for the inline release to have raced an interrupt which * get/rel'd the MP lock, causing the inline's cmpset to fail. If this @@ -166,15 +168,18 @@ void _rel_mplock_contested(void) { globaldata_t gd = mycpu; + thread_t td = gd->gd_curthread; int ov; - KKASSERT(gd->gd_curthread->td_mpcount >= 0); - for (;;) { - ov = mp_lock; - if (ov != gd->gd_cpuid) - break; - if (atomic_cmpset_int(&mp_lock, ov, -1)) - break; + KKASSERT(td->td_mpcount >= 0); + if (td->td_mpcount + td->td_xpcount == 0) { + for (;;) { + ov = mp_lock; + if (ov != gd->gd_cpuid) + break; + if (atomic_cmpset_int(&mp_lock, ov, -1)) + break; + } } } @@ -201,12 +206,14 @@ _try_mplock_contested(const char *file, int line) KKASSERT(td->td_mpcount >= 0); ++mplock_contention_count; - for (;;) { - ov = mp_lock; - if (ov != gd->gd_cpuid) - break; - if (atomic_cmpset_int(&mp_lock, ov, -1)) - break; + if (td->td_mpcount + td->td_xpcount == 0) { + for (;;) { + ov = mp_lock; + if (ov != gd->gd_cpuid) + break; + if (atomic_cmpset_int(&mp_lock, ov, -1)) + break; + } } } @@ -223,19 +230,22 @@ _cpu_try_mplock_contested(const char *file, int line) /* * Temporarily yield the MP lock. This is part of lwkt_user_yield() - * which is kinda hackish. + * which is kinda hackish. The MP lock cannot be yielded if inherited + * due to a preemption. */ void yield_mplock(thread_t td) { int savecnt; - savecnt = td->td_mpcount; - td->td_mpcount = 1; - rel_mplock(); - DELAY(bgl_yield); - get_mplock(); - td->td_mpcount = savecnt; + if (td->td_xpcount == 0) { + savecnt = td->td_mpcount; + td->td_mpcount = 1; + rel_mplock(); + DELAY(bgl_yield); + get_mplock(); + td->td_mpcount = savecnt; + } } #if 0 diff --git a/sys/kern/kern_shutdown.c b/sys/kern/kern_shutdown.c index 05cdd91553..2ac0df30b2 100644 --- a/sys/kern/kern_shutdown.c +++ b/sys/kern/kern_shutdown.c @@ -149,6 +149,8 @@ int dumping; /* system is dumping */ static struct dumperinfo dumper; /* selected dumper */ globaldata_t panic_cpu_gd; /* which cpu took the panic */ +struct lwkt_tokref panic_tokens[LWKT_MAXTOKENS]; +int panic_tokens_count; int bootverbose = 0; /* note: assignment to force non-bss */ SYSCTL_INT(_debug, OID_AUTO, bootverbose, CTLFLAG_RW, @@ -666,6 +668,7 @@ panic(const char *fmt, ...) { int bootopt, newpanic; globaldata_t gd = mycpu; + thread_t td = gd->gd_curthread; __va_list ap; static char buf[256]; @@ -698,11 +701,11 @@ panic(const char *fmt, ...) ++mycpu->gd_trap_nesting_level; if (mycpu->gd_trap_nesting_level < 25) { kprintf("SECONDARY PANIC ON CPU %d THREAD %p\n", - mycpu->gd_cpuid, curthread); + mycpu->gd_cpuid, td); } - curthread->td_release = NULL; /* be a grinch */ + td->td_release = NULL; /* be a grinch */ for (;;) { - lwkt_deschedule_self(curthread); + lwkt_deschedule_self(td); lwkt_switch(); } /* NOT REACHED */ @@ -725,7 +728,18 @@ panic(const char *fmt, ...) #else panic_cpu_gd = gd; #endif + /* + * Try to get the system into a working state. Save information + * we are about to destroy. + */ kvcreinitspin(); + if (panicstr == NULL) { + bcopy(td->td_toks_array, panic_tokens, sizeof(panic_tokens)); + panic_tokens_count = td->td_toks_stop - &td->td_toks_base; + } + lwkt_relalltokens(td); + td->td_toks_stop = &td->td_toks_base; + /* * Setup */ diff --git a/sys/kern/lwkt_thread.c b/sys/kern/lwkt_thread.c index 41dd7a576c..83b2ad1802 100644 --- a/sys/kern/lwkt_thread.c +++ b/sys/kern/lwkt_thread.c @@ -481,9 +481,11 @@ lwkt_free_thread(thread_t td) * different beast and LWKT priorities should not be confused with * user process priorities. * - * The MP lock may be out of sync with the thread's td_mpcount. lwkt_switch() - * cleans it up. Note that the td_switch() function cannot do anything that - * requires the MP lock since the MP lock will have already been setup for + * The MP lock may be out of sync with the thread's td_mpcount + td_xpcount. + * lwkt_switch() cleans it up. + * + * Note that the td_switch() function cannot do anything that requires + * the MP lock since the MP lock will have already been setup for * the target thread (not the current thread). It's nice to have a scheduler * that does not need the MP lock to work because it allows us to do some * really cool high-performance MP lock optimizations. @@ -570,12 +572,12 @@ lwkt_switch(void) #ifdef SMP /* - * td_mpcount cannot be used to determine if we currently hold the - * MP lock because get_mplock() will increment it prior to attempting - * to get the lock, and switch out if it can't. Our ownership of - * the actual lock will remain stable while we are in a critical section - * (but, of course, another cpu may own or release the lock so the - * actual value of mp_lock is not stable). + * td_mpcount + td_xpcount cannot be used to determine if we currently + * hold the MP lock because get_mplock() will increment it prior to + * attempting to get the lock, and switch out if it can't. Our + * ownership of the actual lock will remain stable while we are + * in a critical section, and once we actually acquire the underlying + * lock as long as the count is greater than 0. */ mpheld = MP_LOCK_HELD(gd); #ifdef INVARIANTS @@ -601,14 +603,11 @@ lwkt_switch(void) if ((ntd = td->td_preempted) != NULL) { KKASSERT(ntd->td_flags & TDF_PREEMPT_LOCK); #ifdef SMP - if (ntd->td_mpcount && mpheld == 0) { + if (ntd->td_mpcount + ntd->td_xpcount && mpheld == 0) { panic("MPLOCK NOT HELD ON RETURN: %p %p %d %d", - td, ntd, td->td_mpcount, ntd->td_mpcount); - } - if (ntd->td_mpcount) { - td->td_mpcount -= ntd->td_mpcount; - KKASSERT(td->td_mpcount >= 0); + td, ntd, td->td_mpcount, ntd->td_mpcount + ntd->td_xpcount); } + td->td_xpcount = 0; #endif ntd->td_flags |= TDF_PREEMPT_DONE; @@ -653,6 +652,7 @@ lwkt_switch(void) if (gd->gd_reqflags & RQF_IDLECHECK_MASK) ntd->td_flags |= TDF_IDLE_NOHLT; #ifdef SMP + KKASSERT(ntd->td_xpcount == 0); if (ntd->td_mpcount) { if (gd->gd_trap_nesting_level == 0 && panicstr == NULL) panic("Idle thread %p was holding the BGL!", ntd); @@ -680,7 +680,8 @@ lwkt_switch(void) */ if (ntd->td_fairq_accum >= 0 && #ifdef SMP - (ntd->td_mpcount == 0 || mpheld || cpu_try_mplock()) && + (ntd->td_mpcount + ntd->td_xpcount == 0 || + mpheld || cpu_try_mplock()) && #endif (!TD_TOKS_HELD(ntd) || lwkt_getalltokens(ntd, &lmsg, &laddr)) ) { @@ -698,7 +699,7 @@ lwkt_switch(void) set_cpu_contention_mask(gd); /* Reload mpheld (it become stale after mplock/token ops) */ mpheld = MP_LOCK_HELD(gd); - if (ntd->td_mpcount && mpheld == 0) { + if (ntd->td_mpcount + ntd->td_xpcount && mpheld == 0) { lmsg = "mplock"; laddr = ntd->td_mplock_stallpc; } @@ -763,6 +764,7 @@ lwkt_switch(void) ntd = &gd->gd_idlethread; ntd->td_flags |= TDF_IDLE_NOHLT; #ifdef SMP + KKASSERT(ntd->td_xpcount == 0); if (ntd->td_mpcount) { mpheld = MP_LOCK_HELD(gd); if (gd->gd_trap_nesting_level == 0 && panicstr == NULL) @@ -800,7 +802,8 @@ lwkt_switch(void) if ((ntd->td_pri >= TDPRI_KERN_LPSCHED || nquserok || user_pri_sched) && ntd->td_fairq_accum >= 0 && #ifdef SMP - (ntd->td_mpcount == 0 || mpheld || cpu_try_mplock()) && + (ntd->td_mpcount + ntd->td_xpcount == 0 || + mpheld || cpu_try_mplock()) && #endif (!TD_TOKS_HELD(ntd) || lwkt_getalltokens(ntd, &lmsg, &laddr)) ) { @@ -816,7 +819,7 @@ lwkt_switch(void) * Reload mpheld (it become stale after mplock/token ops). */ mpheld = MP_LOCK_HELD(gd); - if (ntd->td_mpcount && mpheld == 0) { + if (ntd->td_mpcount + ntd->td_xpcount && mpheld == 0) { lmsg = "mplock"; laddr = ntd->td_mplock_stallpc; } @@ -881,7 +884,7 @@ haveidle: KASSERT(ntd->td_critcount, ("priority problem in lwkt_switch %d %d", td->td_pri, ntd->td_pri)); #ifdef SMP - if (ntd->td_mpcount == 0 ) { + if (ntd->td_mpcount + ntd->td_xpcount == 0 ) { if (MP_LOCK_HELD(gd)) cpu_rel_mplock(gd->gd_cpuid); } else { @@ -1005,18 +1008,17 @@ lwkt_preempt(thread_t ntd, int critcount) } #ifdef SMP /* - * note: an interrupt might have occured just as we were transitioning + * NOTE: An interrupt might have occured just as we were transitioning * to or from the MP lock. In this case td_mpcount will be pre-disposed - * (non-zero) but not actually synchronized with the actual state of the - * lock. We can use it to imply an MP lock requirement for the - * preemption but we cannot use it to test whether we hold the MP lock - * or not. + * (non-zero) but not actually synchronized with the mp_lock itself. + * We can use it to imply an MP lock requirement for the preemption but + * we cannot use it to test whether we hold the MP lock or not. */ savecnt = td->td_mpcount; mpheld = MP_LOCK_HELD(gd); - ntd->td_mpcount += td->td_mpcount; - if (mpheld == 0 && ntd->td_mpcount && !cpu_try_mplock()) { - ntd->td_mpcount -= td->td_mpcount; + ntd->td_xpcount = td->td_mpcount; + if (mpheld == 0 && ntd->td_mpcount + ntd->td_xpcount && !cpu_try_mplock()) { + ntd->td_xpcount = 0; ++preempt_miss; need_lwkt_resched(); return; @@ -1039,7 +1041,7 @@ lwkt_preempt(thread_t ntd, int critcount) mpheld = MP_LOCK_HELD(gd); if (mpheld && td->td_mpcount == 0) cpu_rel_mplock(gd->gd_cpuid); - else if (mpheld == 0 && td->td_mpcount) + else if (mpheld == 0 && td->td_mpcount + td->td_xpcount) panic("lwkt_preempt(): MP lock was not held through"); #endif ntd->td_preempted = NULL; @@ -1165,7 +1167,7 @@ lwkt_user_yield(void) * has a chaining effect since if the interrupt is blocked, so is * the event, so normal scheduling will not pick up on the problem. */ - if (cpu_contention_mask && td->td_mpcount) { + if (cpu_contention_mask && td->td_mpcount + td->td_xpcount) { yield_mplock(td); } #endif diff --git a/sys/kern/lwkt_token.c b/sys/kern/lwkt_token.c index a3a4819610..080f4b190e 100644 --- a/sys/kern/lwkt_token.c +++ b/sys/kern/lwkt_token.c @@ -200,6 +200,10 @@ _lwkt_token_pool_lookup(void *ptr) * holding the MP lock. This bypasses unncessary calls to get_mplock() and * rel_mplock() on tokens which are not normally MPSAFE when the thread * is already holding the MP lock. + * + * WARNING: The inherited td_xpcount does not count here because a switch + * could schedule the preempted thread and blow away the inherited + * mplock. */ static __inline void @@ -390,16 +394,18 @@ int _lwkt_trytokref(lwkt_tokref_t ref, thread_t td) { if ((ref->tr_flags & LWKT_TOKEN_MPSAFE) == 0) { - if (try_mplock() == 0) + if (try_mplock() == 0) { + --td->td_toks_stop; return (FALSE); + } } if (_lwkt_trytokref2(ref, td, 0) == FALSE) { /* * Cleanup, deactivate the failed token. */ - --td->td_toks_stop; if ((ref->tr_flags & LWKT_TOKEN_MPSAFE) == 0) rel_mplock(); + --td->td_toks_stop; return (FALSE); } return (TRUE); @@ -443,8 +449,8 @@ lwkt_gettoken(lwkt_token_t tok) ref = td->td_toks_stop; KKASSERT(ref < &td->td_toks_end); - _lwkt_tokref_init(ref, tok, td); ++td->td_toks_stop; + _lwkt_tokref_init(ref, tok, td); _lwkt_gettokref(ref, td, (const void **)&tok); } @@ -456,8 +462,8 @@ lwkt_gettoken_hard(lwkt_token_t tok) ref = td->td_toks_stop; KKASSERT(ref < &td->td_toks_end); - _lwkt_tokref_init(ref, tok, td); ++td->td_toks_stop; + _lwkt_tokref_init(ref, tok, td); _lwkt_gettokref(ref, td, (const void **)&tok); crit_enter_hard_gd(td->td_gd); } @@ -471,9 +477,9 @@ lwkt_getpooltoken(void *ptr) ref = td->td_toks_stop; KKASSERT(ref < &td->td_toks_end); + ++td->td_toks_stop; tok = _lwkt_token_pool_lookup(ptr); _lwkt_tokref_init(ref, tok, td); - ++td->td_toks_stop; _lwkt_gettokref(ref, td, (const void **)&ptr); return(tok); } @@ -486,8 +492,8 @@ lwkt_trytoken(lwkt_token_t tok) ref = td->td_toks_stop; KKASSERT(ref < &td->td_toks_end); - _lwkt_tokref_init(ref, tok, td); ++td->td_toks_stop; + _lwkt_tokref_init(ref, tok, td); return(_lwkt_trytokref(ref, td)); } @@ -509,13 +515,13 @@ lwkt_reltoken(lwkt_token_t tok) */ ref = td->td_toks_stop - 1; KKASSERT(ref >= &td->td_toks_base && ref->tr_tok == tok); - td->td_toks_stop = ref; /* * If the token was not MPSAFE release the MP lock. */ if ((ref->tr_flags & LWKT_TOKEN_MPSAFE) == 0) rel_mplock(); + td->td_toks_stop = ref; /* * Make sure the compiler does not reorder the clearing of diff --git a/sys/platform/pc32/i386/trap.c b/sys/platform/pc32/i386/trap.c index a20964bcbe..762e587070 100644 --- a/sys/platform/pc32/i386/trap.c +++ b/sys/platform/pc32/i386/trap.c @@ -397,6 +397,7 @@ trap(struct trapframe *frame) #endif #ifdef INVARIANTS int crit_count = td->td_critcount; + lwkt_tokref_t curstop = td->td_toks_stop; #endif vm_offset_t eva; @@ -901,8 +902,10 @@ kernel_trap: out: #ifdef SMP - if (ISPL(frame->tf_cs) == SEL_UPL) - KASSERT(td->td_mpcount == have_mplock, ("badmpcount trap/end from %p", (void *)frame->tf_eip)); + if (ISPL(frame->tf_cs) == SEL_UPL) { + KASSERT(td->td_mpcount == have_mplock, + ("badmpcount trap/end from %p", (void *)frame->tf_eip)); + } #endif userret(lp, frame, sticks); userexit(lp); @@ -915,8 +918,12 @@ out2: ; KTR_LOG(kernentry_trap_ret, p->p_pid, lp->lwp_tid); #ifdef INVARIANTS KASSERT(crit_count == td->td_critcount, - ("syscall: critical section count mismatch! %d/%d", + ("trap: critical section count mismatch! %d/%d", crit_count, td->td_pri)); + KASSERT(curstop == td->td_toks_stop, + ("trap: extra tokens held after trap! %zd/%zd", + curstop - &td->td_toks_base, + td->td_toks_stop - &td->td_toks_base)); #endif } @@ -1386,6 +1393,9 @@ bad: KASSERT(crit_count == td->td_critcount, ("syscall: critical section count mismatch! %d/%d", crit_count, td->td_pri)); + KASSERT(&td->td_toks_base == td->td_toks_stop, + ("syscall: extra tokens held after trap! %zd", + td->td_toks_stop - &td->td_toks_base)); #endif } diff --git a/sys/platform/pc64/x86_64/trap.c b/sys/platform/pc64/x86_64/trap.c index cb0f3ba8c6..07eb2c966b 100644 --- a/sys/platform/pc64/x86_64/trap.c +++ b/sys/platform/pc64/x86_64/trap.c @@ -357,6 +357,7 @@ trap(struct trapframe *frame) #endif #ifdef INVARIANTS int crit_count = td->td_critcount; + lwkt_tokref_t curstop = td->td_toks_stop; #endif vm_offset_t eva; @@ -763,8 +764,10 @@ trap(struct trapframe *frame) out: #ifdef SMP - if (ISPL(frame->tf_cs) == SEL_UPL) - KASSERT(td->td_mpcount == have_mplock, ("badmpcount trap/end from %p", (void *)frame->tf_rip)); + if (ISPL(frame->tf_cs) == SEL_UPL) { + KASSERT(td->td_mpcount == have_mplock, + ("badmpcount trap/end from %p", (void *)frame->tf_rip)); + } #endif userret(lp, frame, sticks); userexit(lp); @@ -777,8 +780,12 @@ out2: ; KTR_LOG(kernentry_trap_ret, p->p_pid, lp->lwp_tid); #ifdef INVARIANTS KASSERT(crit_count == td->td_critcount, - ("syscall: critical section count mismatch! %d/%d", + ("trap: critical section count mismatch! %d/%d", crit_count, td->td_pri)); + KASSERT(curstop == td->td_toks_stop, + ("trap: extra tokens held after trap! %ld/%ld", + curstop - &td->td_toks_base, + td->td_toks_stop - &td->td_toks_base)); #endif } @@ -1243,6 +1250,9 @@ bad: KASSERT(crit_count == td->td_critcount, ("syscall: critical section count mismatch! %d/%d", crit_count, td->td_pri)); + KASSERT(&td->td_toks_base == td->td_toks_stop, + ("syscall: extra tokens held after trap! %ld", + td->td_toks_stop - &td->td_toks_base)); #endif } diff --git a/sys/platform/vkernel/i386/trap.c b/sys/platform/vkernel/i386/trap.c index e46541a8cd..10477dae36 100644 --- a/sys/platform/vkernel/i386/trap.c +++ b/sys/platform/vkernel/i386/trap.c @@ -372,6 +372,7 @@ user_trap(struct trapframe *frame) #endif #ifdef INVARIANTS int crit_count = td->td_critcount; + lwkt_tokref_t curstop = td->td_toks_stop; #endif vm_offset_t eva; @@ -630,7 +631,8 @@ restart: out: #ifdef SMP - KASSERT(td->td_mpcount == have_mplock, ("badmpcount trap/end from %p", (void *)frame->tf_eip)); + KASSERT(td->td_mpcount == have_mplock, + ("badmpcount trap/end from %p", (void *)frame->tf_eip)); #endif userret(lp, frame, sticks); userexit(lp); @@ -642,8 +644,12 @@ out2: ; KTR_LOG(kernentry_trap_ret, lp->lwp_proc->p_pid, lp->lwp_tid); #ifdef INVARIANTS KASSERT(crit_count == td->td_critcount, - ("syscall: critical section count mismatch! %d/%d", + ("trap: critical section count mismatch! %d/%d", crit_count, td->td_pri)); + KASSERT(curstop == td->td_toks_stop, + ("trap: extra tokens held after trap! %zd/%zd", + curstop - &td->td_toks_base, + td->td_toks_stop - &td->td_toks_base)); #endif } @@ -660,6 +666,7 @@ kern_trap(struct trapframe *frame) #endif #ifdef INVARIANTS int crit_count = td->td_critcount; + lwkt_tokref_t curstop = td->td_toks_stop; #endif vm_offset_t eva; @@ -844,8 +851,12 @@ out2: #endif #ifdef INVARIANTS KASSERT(crit_count == td->td_critcount, - ("syscall: critical section count mismatch! %d/%d", + ("trap: critical section count mismatch! %d/%d", crit_count, td->td_pri)); + KASSERT(curstop == td->td_toks_stop, + ("trap: extra tokens held after trap! %zd/%zd", + curstop - &td->td_toks_base, + td->td_toks_stop - &td->td_toks_base)); #endif } @@ -1278,6 +1289,9 @@ bad: KASSERT(crit_count == td->td_critcount, ("syscall: critical section count mismatch! %d/%d", crit_count, td->td_pri)); + KASSERT(&td->td_toks_base == td->td_toks_stop, + ("syscall: extra tokens held after trap! %zd", + td->td_toks_stop - &td->td_toks_base)); #endif } diff --git a/sys/platform/vkernel64/x86_64/trap.c b/sys/platform/vkernel64/x86_64/trap.c index e0aa5414a4..b14f9999d9 100644 --- a/sys/platform/vkernel64/x86_64/trap.c +++ b/sys/platform/vkernel64/x86_64/trap.c @@ -372,6 +372,7 @@ user_trap(struct trapframe *frame) #endif #ifdef INVARIANTS int crit_count = td->td_critcount; + lwkt_tokref_t curstop = td->td_toks_stop; #endif vm_offset_t eva; @@ -606,7 +607,8 @@ restart: out: #ifdef SMP - KASSERT(td->td_mpcount == have_mplock, ("badmpcount trap/end from %p", (void *)frame->tf_rip)); + KASSERT(td->td_mpcount == have_mplock, + ("badmpcount trap/end from %p", (void *)frame->tf_rip)); #endif userret(lp, frame, sticks); userexit(lp); @@ -618,8 +620,12 @@ out2: ; KTR_LOG(kernentry_trap_ret, lp->lwp_proc->p_pid, lp->lwp_tid); #ifdef INVARIANTS KASSERT(crit_count == td->td_critcount, - ("syscall: critical section count mismatch! %d/%d", + ("trap: critical section count mismatch! %d/%d", crit_count, td->td_pri)); + KASSERT(curstop == td->td_toks_stop, + ("trap: extra tokens held after trap! %ld/%ld", + curstop - &td->td_toks_base, + td->td_toks_stop - &td->td_toks_base)); #endif } @@ -636,6 +642,7 @@ kern_trap(struct trapframe *frame) #endif #ifdef INVARIANTS int crit_count = td->td_critcount; + lwkt_tokref_t curstop = td->td_toks_stop; #endif vm_offset_t eva; @@ -820,8 +827,12 @@ out2: #endif #ifdef INVARIANTS KASSERT(crit_count == td->td_critcount, - ("syscall: critical section count mismatch! %d/%d", + ("trap: critical section count mismatch! %d/%d", crit_count, td->td_pri)); + KASSERT(curstop == td->td_toks_stop, + ("trap: extra tokens held after trap! %ld/%ld", + curstop - &td->td_toks_base, + td->td_toks_stop - &td->td_toks_base)); #endif } @@ -1335,9 +1346,12 @@ bad: #endif KTR_LOG(kernentry_syscall_ret, lp->lwp_proc->p_pid, lp->lwp_tid, error); #ifdef INVARIANTS - KASSERT(crit_count == td->td_critcount, + KASSERT(&td->td_toks_base == td->td_toks_stop, ("syscall: critical section count mismatch! %d/%d", crit_count, td->td_pri)); + KASSERT(curstop == td->td_toks_stop, + ("syscall: extra tokens held after trap! %ld", + td->td_toks_stop - &td->td_toks_base)); #endif } diff --git a/sys/sys/mplock2.h b/sys/sys/mplock2.h index 86d13acf85..fa47410d6d 100644 --- a/sys/sys/mplock2.h +++ b/sys/sys/mplock2.h @@ -44,6 +44,11 @@ extern int mp_lock_holder_line; * * The mplock must check a number of conditions and it is better to * leave it to a procedure if we cannot get it trivially. + * + * WARNING: The mp_lock and td_mpcount are not necessarily synchronized. + * We must synchronize them here. They can be unsynchronized + * for a variety of reasons including predisposition, td_xpcount, + * and so forth. */ static __inline void @@ -61,7 +66,8 @@ get_mplock_debug(const char *file, int line) * Release the MP lock * * In order to release the MP lock we must first pre-dispose td_mpcount - * for the release and then, if it is 0, release the actual lock. + * for the release and then, if it is 0 and td_xpcount is also zero, + * release the actual lock. * * The contested function is called only if we are unable to release the * Actual lock. This can occur if we raced an interrupt after decrementing @@ -69,6 +75,11 @@ get_mplock_debug(const char *file, int line) * * The function also catches the td_mpcount underflow case because the * lock will be in a released state and thus fail the subsequent release. + * + * WARNING: The mp_lock and td_mpcount are not necessarily synchronized. + * We must synchronize them here. They can be unsynchronized + * for a variety of reasons including predisposition, td_xpcount, + * and so forth. */ static __inline void @@ -79,8 +90,10 @@ rel_mplock(void) int n; n = --td->td_mpcount; - if (n <= 0 && atomic_cmpset_int(&mp_lock, gd->gd_cpuid, -1) == 0) + if (n < 0 || ((n + td->td_xpcount) == 0 && + atomic_cmpset_int(&mp_lock, gd->gd_cpuid, -1) == 0)) { _rel_mplock_contested(); + } } /* diff --git a/sys/sys/thread.h b/sys/sys/thread.h index b86163d415..e853167dd2 100644 --- a/sys/sys/thread.h +++ b/sys/sys/thread.h @@ -135,6 +135,8 @@ typedef struct lwkt_token { */ #define ASSERT_LWKT_TOKEN_HELD(tok) \ KKASSERT((tok)->t_ref && (tok)->t_ref->tr_owner == curthread) +#define ASSERT_NO_TOKENS_HELD(td) \ + KKASSERT((td)->td_toks_stop == &td->toks_array[0]) /* * Assert that a particular token is held and we are in a hard @@ -269,11 +271,16 @@ struct thread { int td_nest_count; /* prevent splz nesting */ #ifdef SMP int td_mpcount; /* MP lock held (count) */ + int td_xpcount; /* MP lock held inherited (count) */ int td_cscount; /* cpu synchronization master */ + int td_unused02[4]; /* for future fields */ #else int td_mpcount_unused; /* filler so size matches */ + int td_xpcount_unused; int td_cscount_unused; + int td_unused02[4]; #endif + int td_unused03[4]; /* for future fields */ struct iosched_data td_iosdata; /* Dynamic I/O scheduling data */ struct timeval td_start; /* start time for a thread/process */ char td_comm[MAXCOMLEN+1]; /* typ 16+1 bytes */ @@ -295,8 +302,8 @@ struct thread { struct md_thread td_mach; }; -#define td_toks_base td_toks_array[0] -#define td_toks_end td_toks_array[LWKT_MAXTOKENS] +#define td_toks_base td_toks_array[0] +#define td_toks_end td_toks_array[LWKT_MAXTOKENS] #define TD_TOKS_HELD(td) ((td)->td_toks_stop != &(td)->td_toks_base) #define TD_TOKS_NOT_HELD(td) ((td)->td_toks_stop == &(td)->td_toks_base) -- 2.41.0