From a4d95680362b7eba0c4840b8d5598533900a2f17 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Mon, 2 Oct 2017 18:42:34 -0700 Subject: [PATCH] kernel - Fix GCC reordering problem with td_critcount * Wrap all ++td->td_critcount and --td->td_critcount use cases with an inline which executes cpu_ccfence() before and after, to guarantee that GCC does not try to reorder the operation around critical memory changes. * This fixes a race in lockmgr() and possibly a few other places too. --- sys/kern/kern_mutex.c | 40 ++++++++++++---------- sys/kern/kern_spinlock.c | 3 +- sys/kern/lwkt_thread.c | 1 + sys/platform/vkernel64/platform/machintr.c | 2 ++ sys/platform/vkernel64/x86_64/exception.c | 20 ++++++----- sys/sys/mutex2.h | 12 +++---- sys/sys/spinlock2.h | 20 +++++------ sys/sys/thread2.h | 28 ++++++++++++--- 8 files changed, 78 insertions(+), 48 deletions(-) diff --git a/sys/kern/kern_mutex.c b/sys/kern/kern_mutex.c index 527ab83291..b7b247f1be 100644 --- a/sys/kern/kern_mutex.c +++ b/sys/kern/kern_mutex.c @@ -136,9 +136,9 @@ __mtx_lock_ex(mtx_t *mtx, mtx_link_t *link, int flags, int to) } td = curthread; nlock = lock | MTX_EXWANTED | MTX_LINKSPIN; - ++td->td_critcount; + crit_enter_raw(td); if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock) == 0) { - --td->td_critcount; + crit_exit_raw(td); continue; } @@ -154,7 +154,7 @@ __mtx_lock_ex(mtx_t *mtx, mtx_link_t *link, int flags, int to) atomic_clear_int(&mtx->mtx_lock, MTX_LINKSPIN); } - --td->td_critcount; + crit_exit_raw(td); link->state = MTX_LINK_IDLE; error = ENOLCK; break; @@ -177,7 +177,7 @@ __mtx_lock_ex(mtx_t *mtx, mtx_link_t *link, int flags, int to) } isasync = (link->callback != NULL); atomic_clear_int(&mtx->mtx_lock, MTX_LINKSPIN); - --td->td_critcount; + crit_exit_raw(td); /* * If asynchronous lock request return without @@ -286,9 +286,9 @@ __mtx_lock_sh(mtx_t *mtx, mtx_link_t *link, int flags, int to) } td = curthread; nlock = lock | MTX_SHWANTED | MTX_LINKSPIN; - ++td->td_critcount; + crit_enter_raw(td); if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock) == 0) { - --td->td_critcount; + crit_exit_raw(td); continue; } @@ -306,7 +306,7 @@ __mtx_lock_sh(mtx_t *mtx, mtx_link_t *link, int flags, int to) MTX_LINKSPIN | MTX_SHWANTED); } - --td->td_critcount; + crit_exit_raw(td); link->state = MTX_LINK_IDLE; error = ENOLCK; break; @@ -329,7 +329,7 @@ __mtx_lock_sh(mtx_t *mtx, mtx_link_t *link, int flags, int to) } isasync = (link->callback != NULL); atomic_clear_int(&mtx->mtx_lock, MTX_LINKSPIN); - --td->td_critcount; + crit_exit_raw(td); /* * If asynchronous lock request return without @@ -441,7 +441,7 @@ _mtx_spinlock_try(mtx_t *mtx) } else { --gd->gd_spinlocks; cpu_ccfence(); - --gd->gd_curthread->td_critcount; + crit_exit_raw(gd->gd_curthread); res = EAGAIN; break; } @@ -744,7 +744,7 @@ mtx_chain_link_ex(mtx_t *mtx, u_int olock) olock &= ~MTX_LINKSPIN; nlock = olock | MTX_LINKSPIN | MTX_EXCLUSIVE; /* upgrade if necc */ - ++td->td_critcount; + crit_enter_raw(td); if (atomic_cmpset_int(&mtx->mtx_lock, olock, nlock)) { link = mtx->mtx_exlink; KKASSERT(link != NULL); @@ -778,11 +778,12 @@ mtx_chain_link_ex(mtx_t *mtx, u_int olock) wakeup(link); } atomic_clear_int(&mtx->mtx_lock, nlock); - --td->td_critcount; + crit_exit_raw(td); return 1; } /* retry */ - --td->td_critcount; + crit_exit_raw(td); + return 0; } @@ -805,7 +806,7 @@ mtx_chain_link_sh(mtx_t *mtx, u_int olock) olock &= ~MTX_LINKSPIN; nlock = olock | MTX_LINKSPIN; nlock &= ~MTX_EXCLUSIVE; - ++td->td_critcount; + crit_enter_raw(td); if (atomic_cmpset_int(&mtx->mtx_lock, olock, nlock)) { /* * It should not be possible for SHWANTED to be set without @@ -854,11 +855,12 @@ mtx_chain_link_sh(mtx_t *mtx, u_int olock) } atomic_clear_int(&mtx->mtx_lock, MTX_LINKSPIN | MTX_SHWANTED); - --td->td_critcount; + crit_exit_raw(td); return 1; } /* retry */ - --td->td_critcount; + crit_exit_raw(td); + return 0; } @@ -880,7 +882,7 @@ mtx_delete_link(mtx_t *mtx, mtx_link_t *link) * Do not use cmpxchg to wait for LINKSPIN to clear as this might * result in too much cpu cache traffic. */ - ++td->td_critcount; + crit_enter_raw(td); for (;;) { lock = mtx->mtx_lock; if (lock & MTX_LINKSPIN) { @@ -924,7 +926,7 @@ mtx_delete_link(mtx_t *mtx, mtx_link_t *link) break; } atomic_clear_int(&mtx->mtx_lock, nlock); - --td->td_critcount; + crit_exit_raw(td); } /* @@ -1039,7 +1041,7 @@ mtx_abort_link(mtx_t *mtx, mtx_link_t *link) /* * Acquire MTX_LINKSPIN */ - ++td->td_critcount; + crit_enter_raw(td); for (;;) { lock = mtx->mtx_lock; if (lock & MTX_LINKSPIN) { @@ -1141,5 +1143,5 @@ mtx_abort_link(mtx_t *mtx, mtx_link_t *link) break; } atomic_clear_int(&mtx->mtx_lock, nlock); - --td->td_critcount; + crit_exit_raw(td); } diff --git a/sys/kern/kern_spinlock.c b/sys/kern/kern_spinlock.c index 5056c2d6bd..c009ac0a4f 100644 --- a/sys/kern/kern_spinlock.c +++ b/sys/kern/kern_spinlock.c @@ -130,7 +130,8 @@ spin_trylock_contested(struct spinlock *spin) return TRUE; /*atomic_add_int(&spin->counta, -1);*/ --gd->gd_spinlocks; - --gd->gd_curthread->td_critcount; + crit_exit_raw(gd->gd_curthread); + return (FALSE); } diff --git a/sys/kern/lwkt_thread.c b/sys/kern/lwkt_thread.c index 9729e095fa..d3a26f5706 100644 --- a/sys/kern/lwkt_thread.c +++ b/sys/kern/lwkt_thread.c @@ -1727,6 +1727,7 @@ crit_panic(void) int lcrit = td->td_critcount; td->td_critcount = 0; + cpu_ccfence(); panic("td_critcount is/would-go negative! %p %d", td, lcrit); /* NOT REACHED */ } diff --git a/sys/platform/vkernel64/platform/machintr.c b/sys/platform/vkernel64/platform/machintr.c index 8dc7021a5e..ee9c59498a 100644 --- a/sys/platform/vkernel64/platform/machintr.c +++ b/sys/platform/vkernel64/platform/machintr.c @@ -177,8 +177,10 @@ signalintr(int intr) umtx_wakeup(&gd->mi.gd_reqflags, 0); } else { ++td->td_nest_count; + cpu_ccfence(); atomic_clear_int(&gd->gd_fpending, 1 << intr); sched_ithd_hard_virtual(intr); + cpu_ccfence(); --td->td_nest_count; } } diff --git a/sys/platform/vkernel64/x86_64/exception.c b/sys/platform/vkernel64/x86_64/exception.c index 02d6c17982..67e539c7ac 100644 --- a/sys/platform/vkernel64/x86_64/exception.c +++ b/sys/platform/vkernel64/x86_64/exception.c @@ -81,13 +81,13 @@ ipisig(int nada, siginfo_t *info, void *ctxp) save = errno; if (td->td_critcount == 0) { - ++td->td_critcount; + crit_enter_raw(td); ++gd->gd_cnt.v_ipi; ++gd->gd_intr_nesting_level; atomic_swap_int(&gd->gd_npoll, 0); lwkt_process_ipiq(); --gd->gd_intr_nesting_level; - --td->td_critcount; + crit_exit_raw(td); } else { need_ipiq(); } @@ -125,13 +125,13 @@ stopsig(int nada, siginfo_t *info, void *ctxp) sigaddset(&ss, SIGTERM); sigaddset(&ss, SIGWINCH); - ++td->td_critcount; + crit_enter_raw(td); ++gd->gd_intr_nesting_level; while (CPUMASK_TESTMASK(stopped_cpus, gd->gd_cpumask)) { sigsuspend(&ss); } --gd->gd_intr_nesting_level; - --td->td_critcount; + crit_exit_raw(td); errno = save; } @@ -149,11 +149,13 @@ kqueuesig(int nada, siginfo_t *info, void *ctxp) save = errno; if (td->td_critcount == 0) { - ++td->td_critcount; + crit_enter_raw(td); ++gd->gd_intr_nesting_level; + cpu_ccfence(); kqueue_intr(NULL); + cpu_ccfence(); --gd->gd_intr_nesting_level; - --td->td_critcount; + crit_exit_raw(td); } else { need_kqueue(); } @@ -170,11 +172,13 @@ timersig(int nada, siginfo_t *info, void *ctxp) save = errno; if (td->td_critcount == 0) { - ++td->td_critcount; + crit_enter_raw(td); ++gd->gd_intr_nesting_level; + cpu_ccfence(); vktimer_intr(NULL); + cpu_ccfence(); --gd->gd_intr_nesting_level; - --td->td_critcount; + crit_exit_raw(td); } else { need_timer(); } diff --git a/sys/sys/mutex2.h b/sys/sys/mutex2.h index 028adf5809..b03d1a4f8f 100644 --- a/sys/sys/mutex2.h +++ b/sys/sys/mutex2.h @@ -210,9 +210,9 @@ mtx_spinlock(mtx_t *mtx) /* * Predispose a hard critical section */ - ++gd->gd_curthread->td_critcount; - cpu_ccfence(); + crit_enter_raw(gd->gd_curthread); ++gd->gd_spinlocks; + cpu_ccfence(); /* * If we cannot get it trivially get it the hard way. @@ -234,9 +234,9 @@ mtx_spinlock_try(mtx_t *mtx) /* * Predispose a hard critical section */ - ++gd->gd_curthread->td_critcount; - cpu_ccfence(); + crit_enter_raw(gd->gd_curthread); ++gd->gd_spinlocks; + cpu_ccfence(); /* * If we cannot get it trivially call _mtx_spinlock_try(). This @@ -371,9 +371,9 @@ mtx_spinunlock(mtx_t *mtx) mtx_unlock(mtx); - --gd->gd_spinlocks; cpu_ccfence(); - --gd->gd_curthread->td_critcount; + --gd->gd_spinlocks; + crit_exit_raw(gd->gd_curthread); } /* diff --git a/sys/sys/spinlock2.h b/sys/sys/spinlock2.h index 9769eb20e4..081e111d88 100644 --- a/sys/sys/spinlock2.h +++ b/sys/sys/spinlock2.h @@ -71,9 +71,9 @@ spin_trylock(struct spinlock *spin) { globaldata_t gd = mycpu; - ++gd->gd_curthread->td_critcount; - cpu_ccfence(); + crit_enter_raw(gd->gd_curthread); ++gd->gd_spinlocks; + cpu_ccfence(); if (atomic_cmpset_int(&spin->counta, 0, 1) == 0) return (spin_trylock_contested(spin)); #ifdef DEBUG_LOCKS @@ -110,9 +110,9 @@ _spin_lock_quick(globaldata_t gd, struct spinlock *spin, const char *ident) { int count; - ++gd->gd_curthread->td_critcount; - cpu_ccfence(); + crit_enter_raw(gd->gd_curthread); ++gd->gd_spinlocks; + cpu_ccfence(); count = atomic_fetchadd_int(&spin->counta, 1); if (__predict_false(count != 0)) { @@ -171,9 +171,9 @@ spin_unlock_quick(globaldata_t gd, struct spinlock *spin) #ifdef DEBUG_LOCKS KKASSERT(gd->gd_spinlocks > 0); #endif - --gd->gd_spinlocks; cpu_ccfence(); - --gd->gd_curthread->td_critcount; + --gd->gd_spinlocks; + crit_exit_raw(gd->gd_curthread); } static __inline void @@ -198,9 +198,9 @@ _spin_lock_shared_quick(globaldata_t gd, struct spinlock *spin, { int counta; - ++gd->gd_curthread->td_critcount; - cpu_ccfence(); + crit_enter_raw(gd->gd_curthread); ++gd->gd_spinlocks; + cpu_ccfence(); counta = atomic_fetchadd_int(&spin->counta, 1); if (__predict_false((counta & SPINLOCK_SHARED) == 0)) { @@ -258,9 +258,9 @@ spin_unlock_shared_quick(globaldata_t gd, struct spinlock *spin) #ifdef DEBUG_LOCKS KKASSERT(gd->gd_spinlocks > 0); #endif - --gd->gd_spinlocks; cpu_ccfence(); - --gd->gd_curthread->td_critcount; + --gd->gd_spinlocks; + crit_exit_raw(gd->gd_curthread); } static __inline void diff --git a/sys/sys/thread2.h b/sys/sys/thread2.h index ccd93a0572..2b62fa5959 100644 --- a/sys/sys/thread2.h +++ b/sys/sys/thread2.h @@ -30,6 +30,26 @@ #include #include +/* + * Don't let GCC reorder critical section count adjustments, because it + * will BLOW US UP if it does. + */ +static __inline void +crit_enter_raw(thread_t td) +{ + cpu_ccfence(); + ++td->td_critcount; + cpu_ccfence(); +} + +static __inline void +crit_exit_raw(thread_t td) +{ + cpu_ccfence(); + --td->td_critcount; + cpu_ccfence(); +} + /* * Is a token held either by the specified thread or held shared? * @@ -161,9 +181,8 @@ _debug_crit_exit(thread_t td, const char *id) static __inline void _crit_enter_quick(thread_t td __DEBUG_CRIT_ADD_ARG__) { - ++td->td_critcount; + crit_enter_raw(td); __DEBUG_CRIT_ENTER(td); - cpu_ccfence(); } static __inline void @@ -177,6 +196,7 @@ _crit_enter_hard(globaldata_t gd __DEBUG_CRIT_ADD_ARG__) { _crit_enter_quick(gd->gd_curthread __DEBUG_CRIT_PASS_ARG__); ++gd->gd_intr_nesting_level; + cpu_ccfence(); } @@ -196,12 +216,11 @@ static __inline void _crit_exit_noyield(thread_t td __DEBUG_CRIT_ADD_ARG__) { __DEBUG_CRIT_EXIT(td); - --td->td_critcount; + crit_exit_raw(td); #ifdef INVARIANTS if (__predict_false(td->td_critcount < 0)) crit_panic(); #endif - cpu_ccfence(); /* prevent compiler reordering */ } static __inline void @@ -221,6 +240,7 @@ _crit_exit(globaldata_t gd __DEBUG_CRIT_ADD_ARG__) static __inline void _crit_exit_hard(globaldata_t gd __DEBUG_CRIT_ADD_ARG__) { + cpu_ccfence(); --gd->gd_intr_nesting_level; _crit_exit_quick(gd->gd_curthread __DEBUG_CRIT_PASS_ARG__); } -- 2.41.0