From 4a28fe22d241d9ebd6072389b0d87650b332b579 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Sat, 28 Aug 2010 21:07:08 -0700 Subject: [PATCH] kernel - Introduce hard code sections, simplify critical sections & mplocks * Introduce hard code sections with crit_enter_hard(), crit_exit_hard(), lwkt_gettoken_hard() and lwkt_reltoken_hard(). These functions create a hard code section that, like an interrupt or ipi, does not allow any case which might potentially block or switch threads. While in a hard code section any such case will assert and panic the system. For example, acquiring a token that is not already held would be disallowed even if the acquisition could be accomplished without blocking. However, acquiring a token which is already held would be allowed. Same with the mplock, lockmgr locks, etc. (mtx's and serializers have not been dealt with yet). * Introduce ASSERT_LWKT_TOKEN_HARD() and ASSERT_LWKT_TOKEN_CRIT(). These assert that a token is held and a hard critical section (hard) or any critical section (crit) is in place. * Rework the critical section macros and optimize the crit_exit*() code to two conditionals which are usually always false regardless of whether critcount is transitioning 1->0 or not. Also declare crit_panic() __dead2 which may produce better code. * Rework get_mplock() to reduce code generation. The hard code section assertions would have made it too big. We still optimize the case where the mplock is already held. --- sys/kern/kern_lock.c | 39 ++++--------- sys/kern/kern_mplock.c | 22 +++++++ sys/kern/lwkt_thread.c | 44 ++++++++++---- sys/kern/lwkt_token.c | 47 ++++++++++++--- sys/sys/globaldata.h | 2 +- sys/sys/mplock2.h | 16 ++---- sys/sys/thread.h | 38 +++++++++++- sys/sys/thread2.h | 128 +++++++++++++++++++++-------------------- sys/vm/vm_map.h | 3 + 9 files changed, 216 insertions(+), 123 deletions(-) diff --git a/sys/kern/kern_lock.c b/sys/kern/kern_lock.c index 34ab77f998..9d755047d6 100644 --- a/sys/kern/kern_lock.c +++ b/sys/kern/kern_lock.c @@ -54,12 +54,6 @@ #include #include -/* - * 0: no warnings, 1: warnings, 2: panic - */ -static int lockmgr_from_int = 1; -SYSCTL_INT(_debug, OID_AUTO, lockmgr_from_int, CTLFLAG_RW, &lockmgr_from_int, 0, ""); - /* * Locking primitives implementation. * Locks provide shared/exclusive sychronization. @@ -174,33 +168,20 @@ debuglockmgr(struct lock *lkp, u_int flags, error = 0; dowakeup = 0; - if (lockmgr_from_int && mycpu->gd_intr_nesting_level && + if (mycpu->gd_intr_nesting_level && (flags & LK_NOWAIT) == 0 && (flags & LK_TYPE_MASK) != LK_RELEASE && didpanic == 0) { + #ifndef DEBUG_LOCKS - if (lockmgr_from_int == 2) { - didpanic = 1; - panic( - "lockmgr %s from %p: called from interrupt", - lkp->lk_wmesg, ((int **)&lkp)[-1]); - didpanic = 0; - } else { - kprintf( - "lockmgr %s from %p: called from interrupt\n", - lkp->lk_wmesg, ((int **)&lkp)[-1]); - } + didpanic = 1; + panic("lockmgr %s from %p: called from interrupt, ipi, " + "or hard code section", + lkp->lk_wmesg, ((int **)&lkp)[-1]); #else - if (lockmgr_from_int == 2) { - didpanic = 1; - panic( - "lockmgr %s from %s:%d: called from interrupt", - lkp->lk_wmesg, file, line); - didpanic = 0; - } else { - kprintf( - "lockmgr %s from %s:%d: called from interrupt\n", - lkp->lk_wmesg, file, line); - } + didpanic = 1; + panic("lockmgr %s from %s:%d: called from interrupt, ipi, " + "or hard code section", + lkp->lk_wmesg, file, line); #endif } diff --git a/sys/kern/kern_mplock.c b/sys/kern/kern_mplock.c index ea5eac3693..57bed32f2a 100644 --- a/sys/kern/kern_mplock.c +++ b/sys/kern/kern_mplock.c @@ -99,6 +99,28 @@ cpu_get_initial_mplock(void) curthread->td_mpcount = 1; } +/* + * This code is called from the get_mplock() inline when the mplock + * is not already held. + */ +void +_get_mplock_predisposed(const char *file, int line) +{ + globaldata_t gd = mycpu; + + if (gd->gd_intr_nesting_level) { + panic("Attempt to acquire mplock not already held " + "in hard section, ipi or interrupt %s:%d", + file, line); + } + if (atomic_cmpset_int(&mp_lock, -1, gd->gd_cpuid) == 0) + _get_mplock_contested(file, line); +#ifdef INVARIANTS + mp_lock_holder_file = file; + mp_lock_holder_line = line; +#endif +} + /* * Called when the MP lock could not be trvially acquired. The caller * has already bumped td_mpcount. diff --git a/sys/kern/lwkt_thread.c b/sys/kern/lwkt_thread.c index 964ddc20d5..1db5855575 100644 --- a/sys/kern/lwkt_thread.c +++ b/sys/kern/lwkt_thread.c @@ -520,8 +520,10 @@ lwkt_switch(void) int savegdtrap; if (gd->gd_trap_nesting_level == 0 && panicstr == NULL) { - panic("lwkt_switch: cannot switch from within " - "a fast interrupt, yet, td %p\n", td); + panic("lwkt_switch: Attempt to switch from a " + "a fast interrupt, ipi, or hard code section, " + "td %p\n", + td); } else { savegdnest = gd->gd_intr_nesting_level; savegdtrap = gd->gd_trap_nesting_level; @@ -529,7 +531,8 @@ lwkt_switch(void) gd->gd_trap_nesting_level = 0; if ((td->td_flags & TDF_PANICWARN) == 0) { td->td_flags |= TDF_PANICWARN; - kprintf("Warning: thread switch from interrupt or IPI, " + kprintf("Warning: thread switch from interrupt, IPI, " + "or hard code section.\n" "thread %p (%s)\n", td, td->td_comm); print_backtrace(-1); } @@ -1049,12 +1052,8 @@ lwkt_preempt(thread_t ntd, int critcount) /* * Conditionally call splz() if gd_reqflags indicates work is pending. - * - * td_nest_count prevents deep nesting via splz() or doreti() which - * might otherwise blow out the kernel stack. Note that except for - * this special case, we MUST call splz() here to handle any - * pending ints, particularly after we switch, or we might accidently - * halt the cpu with interrupts pending. + * This will work inside a critical section but not inside a hard code + * section. * * (self contained on a per cpu basis) */ @@ -1064,8 +1063,32 @@ splz_check(void) globaldata_t gd = mycpu; thread_t td = gd->gd_curthread; - if ((gd->gd_reqflags & RQF_IDLECHECK_MASK) && td->td_nest_count < 2) + if ((gd->gd_reqflags & RQF_IDLECHECK_MASK) && + gd->gd_intr_nesting_level == 0 && + td->td_nest_count < 2) + { splz(); + } +} + +/* + * This version is integrated into crit_exit, reqflags has already + * been tested but td_critcount has not. + * + * We only want to execute the splz() on the 1->0 transition of + * critcount and not in a hard code section or if too deeply nested. + */ +void +lwkt_maybe_splz(thread_t td) +{ + globaldata_t gd = td->td_gd; + + if (td->td_critcount == 0 && + gd->gd_intr_nesting_level == 0 && + td->td_nest_count < 2) + { + splz(); + } } /* @@ -1660,6 +1683,7 @@ crit_panic(void) td->td_critcount = 0; panic("td_critcount is/would-go negative! %p %d", td, lcrit); + /* NOT REACHED */ } #ifdef SMP diff --git a/sys/kern/lwkt_token.c b/sys/kern/lwkt_token.c index 1ec62699c7..be36331420 100644 --- a/sys/kern/lwkt_token.c +++ b/sys/kern/lwkt_token.c @@ -317,13 +317,11 @@ lwkt_relalltokens(thread_t td) */ static __inline int -_lwkt_trytokref2(lwkt_tokref_t nref, thread_t td) +_lwkt_trytokref2(lwkt_tokref_t nref, thread_t td, int blocking) { lwkt_token_t tok; lwkt_tokref_t ref; - KKASSERT(td->td_gd->gd_intr_nesting_level == 0); - /* * Make sure the compiler does not reorder prior instructions * beyond this demark. @@ -337,10 +335,15 @@ _lwkt_trytokref2(lwkt_tokref_t nref, thread_t td) for (;;) { /* * Try to acquire the token if we do not already have - * it. + * it. This is not allowed if we are in a hard code + * section (because it 'might' have blocked). */ ref = tok->t_ref; if (ref == NULL) { + KASSERT((blocking == 0 || + td->td_gd->gd_intr_nesting_level == 0), + ("Attempt to acquire token %p not already " + "held in hard code section", tok)); /* * NOTE: If atomic_cmpset_ptr() fails we have to * loop and try again. It just means we @@ -357,13 +360,22 @@ _lwkt_trytokref2(lwkt_tokref_t nref, thread_t td) * (it might belong to another thread and is thus * unstable), but we don't have to. We can simply * range-check it. + * + * It is ok to acquire a token that is already held + * by the current thread when in a hard code section. */ if (ref >= &td->td_toks_base && ref < td->td_toks_stop) return(TRUE); /* - * Otherwise we failed. + * Otherwise we failed, and it is not ok to attempt to + * acquire a token in a hard code section. */ + KASSERT((blocking == 0 || + td->td_gd->gd_intr_nesting_level == 0), + ("Attempt to acquire token %p not already " + "held in hard code section", tok)); + return(FALSE); } } @@ -379,7 +391,7 @@ _lwkt_trytokref(lwkt_tokref_t ref, thread_t td) if (try_mplock() == 0) return (FALSE); } - if (_lwkt_trytokref2(ref, td) == FALSE) { + if (_lwkt_trytokref2(ref, td, 0) == FALSE) { /* * Cleanup, deactivate the failed token. */ @@ -400,7 +412,7 @@ _lwkt_gettokref(lwkt_tokref_t ref, thread_t td, const void **stkframe) { if ((ref->tr_flags & LWKT_TOKEN_MPSAFE) == 0) get_mplock(); - if (_lwkt_trytokref2(ref, td) == FALSE) { + if (_lwkt_trytokref2(ref, td, 1) == FALSE) { /* * Give up running if we can't acquire the token right now. * @@ -434,6 +446,20 @@ lwkt_gettoken(lwkt_token_t tok) _lwkt_gettokref(ref, td, (const void **)&tok); } +void +lwkt_gettoken_hard(lwkt_token_t tok) +{ + thread_t td = curthread; + lwkt_tokref_t ref; + + ref = td->td_toks_stop; + KKASSERT(ref < &td->td_toks_end); + _lwkt_tokref_init(ref, tok, td); + ++td->td_toks_stop; + _lwkt_gettokref(ref, td, (const void **)&tok); + crit_enter_hard_gd(td->td_gd); +} + lwkt_token_t lwkt_getpooltoken(void *ptr) { @@ -503,6 +529,13 @@ lwkt_reltoken(lwkt_token_t tok) tok->t_ref = NULL; } +void +lwkt_reltoken_hard(lwkt_token_t tok) +{ + lwkt_reltoken(tok); + crit_exit_hard(); +} + /* * Pool tokens are used to provide a type-stable serializing token * pointer that does not race against disappearing data structures. diff --git a/sys/sys/globaldata.h b/sys/sys/globaldata.h index ba58de030b..5a0fa33eff 100644 --- a/sys/sys/globaldata.h +++ b/sys/sys/globaldata.h @@ -134,7 +134,7 @@ struct globaldata { cpumask_t gd_cpumask; /* mask = 1<gd_curthread; ++td->td_mpcount; - if (mp_lock != gd->gd_cpuid) { - if (atomic_cmpset_int(&mp_lock, -1, gd->gd_cpuid) == 0) - _get_mplock_contested(file, line); -#ifdef INVARIANTS - mp_lock_holder_file = file; - mp_lock_holder_line = line; -#endif - } + if (mp_lock != gd->gd_cpuid) + _get_mplock_predisposed(file, line); } /* diff --git a/sys/sys/thread.h b/sys/sys/thread.h index d6730ae6cf..b86163d415 100644 --- a/sys/sys/thread.h +++ b/sys/sys/thread.h @@ -130,9 +130,40 @@ typedef struct lwkt_token { .t_desc = #name \ } -#define ASSERT_LWKT_TOKEN_HELD(tok) \ +/* + * Assert that a particular token is held + */ +#define ASSERT_LWKT_TOKEN_HELD(tok) \ KKASSERT((tok)->t_ref && (tok)->t_ref->tr_owner == curthread) +/* + * Assert that a particular token is held and we are in a hard + * code execution section (interrupt, ipi, or hard code section). + * Hard code sections are not allowed to block or potentially block. + * e.g. lwkt_gettoken() would only be ok if the token were already + * held. + */ +#define ASSERT_LWKT_TOKEN_HARD(tok) \ + do { \ + globaldata_t zgd __debugvar = mycpu; \ + KKASSERT((tok)->t_ref && \ + (tok)->t_ref->tr_owner == zgd->gd_curthread && \ + zgd->gd_intr_nesting_level > 0); \ + } while(0) + +/* + * Assert that a particular token is held and we are in a normal + * critical section. Critical sections will not be preempted but + * can explicitly block (tsleep, lwkt_gettoken, etc). + */ +#define ASSERT_LWKT_TOKEN_CRIT(tok) \ + do { \ + globaldata_t zgd __debugvar = mycpu; \ + KKASSERT((tok)->t_ref && \ + (tok)->t_ref->tr_owner == zgd->gd_curthread && \ + zgd->gd_curthread->td_critcount > 0); \ + } while(0) + struct lwkt_tokref { lwkt_token_t tr_tok; /* token in question */ struct thread *tr_owner; /* me */ @@ -399,10 +430,13 @@ extern void lwkt_token_wait(void); extern void lwkt_hold(thread_t); extern void lwkt_rele(thread_t); extern void lwkt_passive_release(thread_t); +extern void lwkt_maybe_splz(thread_t); extern void lwkt_gettoken(lwkt_token_t); +extern void lwkt_gettoken_hard(lwkt_token_t); extern int lwkt_trytoken(lwkt_token_t); extern void lwkt_reltoken(lwkt_token_t); +extern void lwkt_reltoken_hard(lwkt_token_t); extern int lwkt_getalltokens(thread_t, const char **, const void **); extern void lwkt_relalltokens(thread_t); extern void lwkt_drain_token_requests(void); @@ -450,7 +484,7 @@ extern void lwkt_cpusync_start(cpumask_t, lwkt_cpusync_t); extern void lwkt_cpusync_add(cpumask_t, lwkt_cpusync_t); extern void lwkt_cpusync_finish(lwkt_cpusync_t); -extern void crit_panic(void); +extern void crit_panic(void) __dead2; extern struct lwp *lwkt_preempted_proc(void); extern int lwkt_create (void (*func)(void *), void *, struct thread **, diff --git a/sys/sys/thread2.h b/sys/sys/thread2.h index 710162fe75..63dcffe79f 100644 --- a/sys/sys/thread2.h +++ b/sys/sys/thread2.h @@ -42,30 +42,38 @@ #define __DEBUG_CRIT_PASS_ARG__ , id #define __DEBUG_CRIT_ENTER(td) _debug_crit_enter((td), id) #define __DEBUG_CRIT_EXIT(td) _debug_crit_exit((td), id) -#define crit_enter() _crit_enter(__FUNCTION__) -#define crit_enter_id(id) _crit_enter(id) +#define crit_enter() _crit_enter(mycpu, __FUNCTION__) +#define crit_enter_id(id) _crit_enter(mycpu, id) +#define crit_enter_gd(curgd) _crit_enter((curgd), __FUNCTION__) #define crit_enter_quick(curtd) _crit_enter_quick((curtd), __FUNCTION__) -#define crit_enter_gd(curgd) _crit_enter_gd(curgd, __FUNCTION__) -#define crit_exit() _crit_exit(__FUNCTION__) -#define crit_exit_id(id) _crit_exit(id) +#define crit_enter_hard() _crit_enter_hard(mycpu, __FUNCTION__) +#define crit_enter_hard_gd(curgd) _crit_enter_hard((curgd), __FUNCTION__) +#define crit_exit() _crit_exit(mycpu, __FUNCTION__) +#define crit_exit_id(id) _crit_exit(mycpu, id) +#define crit_exit_gd(curgd) _crit_exit((curgd), __FUNCTION__) #define crit_exit_quick(curtd) _crit_exit_quick((curtd), __FUNCTION__) +#define crit_exit_hard() _crit_exit_hard(mycpu, __FUNCTION__) +#define crit_exit_hard_gd(curgd) _crit_exit_hard((curgd), __FUNCTION__) #define crit_exit_noyield(curtd) _crit_exit_noyield((curtd),__FUNCTION__) -#define crit_exit_gd(curgd) _crit_exit_gd((curgd), __FUNCTION__) #else #define __DEBUG_CRIT_ARG__ void #define __DEBUG_CRIT_ADD_ARG__ #define __DEBUG_CRIT_PASS_ARG__ #define __DEBUG_CRIT_ENTER(td) #define __DEBUG_CRIT_EXIT(td) -#define crit_enter() _crit_enter() -#define crit_enter_id(id) _crit_enter() -#define crit_enter_quick(curtd) _crit_enter_quick(curtd) -#define crit_enter_gd(curgd) _crit_enter_gd(curgd) -#define crit_exit() _crit_exit() -#define crit_exit_id(id) _crit_exit() -#define crit_exit_quick(curtd) _crit_exit_quick(curtd) -#define crit_exit_noyield(curtd) _crit_exit_noyield(curtd) -#define crit_exit_gd(curgd) _crit_exit_gd(curgd) +#define crit_enter() _crit_enter(mycpu) +#define crit_enter_id(id) _crit_enter(mycpu) +#define crit_enter_gd(curgd) _crit_enter((curgd)) +#define crit_enter_quick(curtd) _crit_enter_quick((curtd)) +#define crit_enter_hard() _crit_enter_hard(mycpu) +#define crit_enter_hard_gd(curgd) _crit_enter_hard((curgd)) +#define crit_exit() _crit_exit(mycpu) +#define crit_exit_id(id) _crit_exit(mycpu) +#define crit_exit_gd(curgd) _crit_exit((curgd)) +#define crit_exit_quick(curtd) _crit_exit_quick((curtd)) +#define crit_exit_hard() _crit_exit_hard(mycpu) +#define crit_exit_hard_gd(curgd) _crit_exit_hard((curgd)) +#define crit_exit_noyield(curtd) _crit_exit_noyield((curtd)) #endif /* @@ -102,91 +110,85 @@ _debug_crit_exit(thread_t td, const char *id) #endif /* - * Critical sections prevent preemption by raising a thread's priority - * above the highest possible interrupting priority. Additionally, the - * current cpu will not be able to schedule a new thread but will instead - * place it on a pending list (with interrupts physically disabled) and - * set mycpu->gd_reqflags to indicate that work needs to be done, which - * splz_check() takes care of. + * Critical sections prevent preemption, but allowing explicit blocking + * and thread switching. Any interrupt occuring while in a critical + * section is made pending and returns immediately. Interrupts are not + * physically disabled. * - * Some of these routines take a struct thread pointer as an argument. This - * pointer MUST be curthread and is only passed as an optimization. + * Hard critical sections prevent preemption and disallow any blocking + * or thread switching, and in addition will assert on any blockable + * operation (acquire token not already held, lockmgr, mutex ops, or + * splz). Spinlocks can still be used in hard sections. * - * Synchronous switching and blocking is allowed while in a critical section. + * All critical section routines only operate on the current thread. + * Passed gd or td arguments are simply optimizations when mycpu or + * curthread is already available to the caller. */ +/* + * crit_enter + */ static __inline void -_crit_enter(__DEBUG_CRIT_ARG__) +_crit_enter_quick(thread_t td __DEBUG_CRIT_ADD_ARG__) { - struct thread *td = curthread; - -#ifdef INVARIANTS - if (td->td_critcount < 0) - crit_panic(); -#endif ++td->td_critcount; __DEBUG_CRIT_ENTER(td); cpu_ccfence(); } static __inline void -_crit_enter_quick(struct thread *curtd __DEBUG_CRIT_ADD_ARG__) +_crit_enter(globaldata_t gd __DEBUG_CRIT_ADD_ARG__) { - ++curtd->td_critcount; - __DEBUG_CRIT_ENTER(curtd); - cpu_ccfence(); + _crit_enter_quick(gd->gd_curthread __DEBUG_CRIT_PASS_ARG__); } static __inline void -_crit_enter_gd(globaldata_t mygd __DEBUG_CRIT_ADD_ARG__) +_crit_enter_hard(globaldata_t gd __DEBUG_CRIT_ADD_ARG__) { - _crit_enter_quick(mygd->gd_curthread __DEBUG_CRIT_PASS_ARG__); + _crit_enter_quick(gd->gd_curthread __DEBUG_CRIT_PASS_ARG__); + ++gd->gd_intr_nesting_level; } -static __inline void -_crit_exit_noyield(struct thread *curtd __DEBUG_CRIT_ADD_ARG__) -{ - __DEBUG_CRIT_EXIT(curtd); - --curtd->td_critcount; -#ifdef INVARIANTS - if (curtd->td_critcount < 0) - crit_panic(); -#endif - cpu_ccfence(); /* prevent compiler reordering */ -} +/* + * crit_exit*() + * + * NOTE: Conditionalizing just gd_reqflags, a case which is virtually + * never true regardless of crit_count, should result in 100% + * optimal code execution. We don't check crit_count because + * it just bloats the inline and does not improve performance. + */ static __inline void -_crit_exit(__DEBUG_CRIT_ARG__) +_crit_exit_noyield(thread_t td __DEBUG_CRIT_ADD_ARG__) { - thread_t td = curthread; - __DEBUG_CRIT_EXIT(td); --td->td_critcount; #ifdef INVARIANTS - if (td->td_critcount < 0) + if (__predict_false(td->td_critcount < 0)) crit_panic(); #endif cpu_ccfence(); /* prevent compiler reordering */ - if (td->td_gd->gd_reqflags && td->td_critcount == 0) - splz_check(); } static __inline void -_crit_exit_quick(struct thread *curtd __DEBUG_CRIT_ADD_ARG__) +_crit_exit_quick(thread_t td __DEBUG_CRIT_ADD_ARG__) { - globaldata_t gd = curtd->td_gd; + _crit_exit_noyield(td __DEBUG_CRIT_PASS_ARG__); + if (__predict_false(td->td_gd->gd_reqflags & RQF_IDLECHECK_MASK)) + lwkt_maybe_splz(td); +} - __DEBUG_CRIT_EXIT(curtd); - --curtd->td_critcount; - cpu_ccfence(); /* prevent compiler reordering */ - if (gd->gd_reqflags && curtd->td_critcount == 0) - splz_check(); +static __inline void +_crit_exit(globaldata_t gd __DEBUG_CRIT_ADD_ARG__) +{ + _crit_exit_quick(gd->gd_curthread __DEBUG_CRIT_PASS_ARG__); } static __inline void -_crit_exit_gd(globaldata_t mygd __DEBUG_CRIT_ADD_ARG__) +_crit_exit_hard(globaldata_t gd __DEBUG_CRIT_ADD_ARG__) { - _crit_exit_quick(mygd->gd_curthread __DEBUG_CRIT_PASS_ARG__); + --gd->gd_intr_nesting_level; + _crit_exit_quick(gd->gd_curthread __DEBUG_CRIT_PASS_ARG__); } static __inline int diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h index 75b9749457..14cab11b67 100644 --- a/sys/vm/vm_map.h +++ b/sys/vm/vm_map.h @@ -385,6 +385,9 @@ vm_map_lock_upgrade(vm_map_t map) { #define vm_map_max(map) ((map)->max_offset) #define vm_map_pmap(map) ((map)->pmap) +/* + * Must not block + */ static __inline struct pmap * vmspace_pmap(struct vmspace *vmspace) { -- 2.41.0