kernel - Fix GCC reordering problem with td_critcount
authorMatthew Dillon <dillon@apollo.backplane.com>
Tue, 3 Oct 2017 01:42:34 +0000 (18:42 -0700)
committerMatthew Dillon <dillon@apollo.backplane.com>
Tue, 3 Oct 2017 01:42:34 +0000 (18:42 -0700)
* Wrap all ++td->td_critcount and --td->td_critcount use cases
  with an inline which executes cpu_ccfence() before and after,
  to guarantee that GCC does not try to reorder the operation around
  critical memory changes.

* This fixes a race in lockmgr() and possibly a few other places
  too.

sys/kern/kern_mutex.c
sys/kern/kern_spinlock.c
sys/kern/lwkt_thread.c
sys/platform/vkernel64/platform/machintr.c
sys/platform/vkernel64/x86_64/exception.c
sys/sys/mutex2.h
sys/sys/spinlock2.h
sys/sys/thread2.h

index 527ab83..b7b247f 100644 (file)
@@ -136,9 +136,9 @@ __mtx_lock_ex(mtx_t *mtx, mtx_link_t *link, int flags, int to)
                }
                td = curthread;
                nlock = lock | MTX_EXWANTED | MTX_LINKSPIN;
-               ++td->td_critcount;
+               crit_enter_raw(td);
                if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock) == 0) {
-                       --td->td_critcount;
+                       crit_exit_raw(td);
                        continue;
                }
 
@@ -154,7 +154,7 @@ __mtx_lock_ex(mtx_t *mtx, mtx_link_t *link, int flags, int to)
                                atomic_clear_int(&mtx->mtx_lock,
                                                 MTX_LINKSPIN);
                        }
-                       --td->td_critcount;
+                       crit_exit_raw(td);
                        link->state = MTX_LINK_IDLE;
                        error = ENOLCK;
                        break;
@@ -177,7 +177,7 @@ __mtx_lock_ex(mtx_t *mtx, mtx_link_t *link, int flags, int to)
                }
                isasync = (link->callback != NULL);
                atomic_clear_int(&mtx->mtx_lock, MTX_LINKSPIN);
-               --td->td_critcount;
+               crit_exit_raw(td);
 
                /* 
                 * If asynchronous lock request return without
@@ -286,9 +286,9 @@ __mtx_lock_sh(mtx_t *mtx, mtx_link_t *link, int flags, int to)
                }
                td = curthread;
                nlock = lock | MTX_SHWANTED | MTX_LINKSPIN;
-               ++td->td_critcount;
+               crit_enter_raw(td);
                if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock) == 0) {
-                       --td->td_critcount;
+                       crit_exit_raw(td);
                        continue;
                }
 
@@ -306,7 +306,7 @@ __mtx_lock_sh(mtx_t *mtx, mtx_link_t *link, int flags, int to)
                                                 MTX_LINKSPIN |
                                                 MTX_SHWANTED);
                        }
-                       --td->td_critcount;
+                       crit_exit_raw(td);
                        link->state = MTX_LINK_IDLE;
                        error = ENOLCK;
                        break;
@@ -329,7 +329,7 @@ __mtx_lock_sh(mtx_t *mtx, mtx_link_t *link, int flags, int to)
                }
                isasync = (link->callback != NULL);
                atomic_clear_int(&mtx->mtx_lock, MTX_LINKSPIN);
-               --td->td_critcount;
+               crit_exit_raw(td);
 
                /* 
                 * If asynchronous lock request return without
@@ -441,7 +441,7 @@ _mtx_spinlock_try(mtx_t *mtx)
                } else {
                        --gd->gd_spinlocks;
                        cpu_ccfence();
-                       --gd->gd_curthread->td_critcount;
+                       crit_exit_raw(gd->gd_curthread);
                        res = EAGAIN;
                        break;
                }
@@ -744,7 +744,7 @@ mtx_chain_link_ex(mtx_t *mtx, u_int olock)
 
        olock &= ~MTX_LINKSPIN;
        nlock = olock | MTX_LINKSPIN | MTX_EXCLUSIVE;   /* upgrade if necc */
-       ++td->td_critcount;
+       crit_enter_raw(td);
        if (atomic_cmpset_int(&mtx->mtx_lock, olock, nlock)) {
                link = mtx->mtx_exlink;
                KKASSERT(link != NULL);
@@ -778,11 +778,12 @@ mtx_chain_link_ex(mtx_t *mtx, u_int olock)
                        wakeup(link);
                }
                atomic_clear_int(&mtx->mtx_lock, nlock);
-               --td->td_critcount;
+               crit_exit_raw(td);
                return 1;
        }
        /* retry */
-       --td->td_critcount;
+       crit_exit_raw(td);
+
        return 0;
 }
 
@@ -805,7 +806,7 @@ mtx_chain_link_sh(mtx_t *mtx, u_int olock)
        olock &= ~MTX_LINKSPIN;
        nlock = olock | MTX_LINKSPIN;
        nlock &= ~MTX_EXCLUSIVE;
-       ++td->td_critcount;
+       crit_enter_raw(td);
        if (atomic_cmpset_int(&mtx->mtx_lock, olock, nlock)) {
                /*
                 * It should not be possible for SHWANTED to be set without
@@ -854,11 +855,12 @@ mtx_chain_link_sh(mtx_t *mtx, u_int olock)
                }
                atomic_clear_int(&mtx->mtx_lock, MTX_LINKSPIN |
                                                 MTX_SHWANTED);
-               --td->td_critcount;
+               crit_exit_raw(td);
                return 1;
        }
        /* retry */
-       --td->td_critcount;
+       crit_exit_raw(td);
+
        return 0;
 }
 
@@ -880,7 +882,7 @@ mtx_delete_link(mtx_t *mtx, mtx_link_t *link)
         * Do not use cmpxchg to wait for LINKSPIN to clear as this might
         * result in too much cpu cache traffic.
         */
-       ++td->td_critcount;
+       crit_enter_raw(td);
        for (;;) {
                lock = mtx->mtx_lock;
                if (lock & MTX_LINKSPIN) {
@@ -924,7 +926,7 @@ mtx_delete_link(mtx_t *mtx, mtx_link_t *link)
                break;
        }
        atomic_clear_int(&mtx->mtx_lock, nlock);
-       --td->td_critcount;
+       crit_exit_raw(td);
 }
 
 /*
@@ -1039,7 +1041,7 @@ mtx_abort_link(mtx_t *mtx, mtx_link_t *link)
        /*
         * Acquire MTX_LINKSPIN
         */
-       ++td->td_critcount;
+       crit_enter_raw(td);
        for (;;) {
                lock = mtx->mtx_lock;
                if (lock & MTX_LINKSPIN) {
@@ -1141,5 +1143,5 @@ mtx_abort_link(mtx_t *mtx, mtx_link_t *link)
                break;
        }
        atomic_clear_int(&mtx->mtx_lock, nlock);
-       --td->td_critcount;
+       crit_exit_raw(td);
 }
index 5056c2d..c009ac0 100644 (file)
@@ -130,7 +130,8 @@ spin_trylock_contested(struct spinlock *spin)
                return TRUE;
        /*atomic_add_int(&spin->counta, -1);*/
        --gd->gd_spinlocks;
-       --gd->gd_curthread->td_critcount;
+       crit_exit_raw(gd->gd_curthread);
+
        return (FALSE);
 }
 
index 9729e09..d3a26f5 100644 (file)
@@ -1727,6 +1727,7 @@ crit_panic(void)
     int lcrit = td->td_critcount;
 
     td->td_critcount = 0;
+    cpu_ccfence();
     panic("td_critcount is/would-go negative! %p %d", td, lcrit);
     /* NOT REACHED */
 }
index 8dc7021..ee9c594 100644 (file)
@@ -177,8 +177,10 @@ signalintr(int intr)
                umtx_wakeup(&gd->mi.gd_reqflags, 0);
        } else {
                ++td->td_nest_count;
+               cpu_ccfence();
                atomic_clear_int(&gd->gd_fpending, 1 << intr);
                sched_ithd_hard_virtual(intr);
+               cpu_ccfence();
                --td->td_nest_count;
        }
 }
index 02d6c17..67e539c 100644 (file)
@@ -81,13 +81,13 @@ ipisig(int nada, siginfo_t *info, void *ctxp)
 
        save = errno;
        if (td->td_critcount == 0) {
-               ++td->td_critcount;
+               crit_enter_raw(td);
                ++gd->gd_cnt.v_ipi;
                ++gd->gd_intr_nesting_level;
                atomic_swap_int(&gd->gd_npoll, 0);
                lwkt_process_ipiq();
                --gd->gd_intr_nesting_level;
-               --td->td_critcount;
+               crit_exit_raw(td);
        } else {
                need_ipiq();
        }
@@ -125,13 +125,13 @@ stopsig(int nada, siginfo_t *info, void *ctxp)
        sigaddset(&ss, SIGTERM);
        sigaddset(&ss, SIGWINCH);
 
-       ++td->td_critcount;
+       crit_enter_raw(td);
        ++gd->gd_intr_nesting_level;
        while (CPUMASK_TESTMASK(stopped_cpus, gd->gd_cpumask)) {
                sigsuspend(&ss);
        }
        --gd->gd_intr_nesting_level;
-       --td->td_critcount;
+       crit_exit_raw(td);
 
        errno = save;
 }
@@ -149,11 +149,13 @@ kqueuesig(int nada, siginfo_t *info, void *ctxp)
 
        save = errno;
        if (td->td_critcount == 0) {
-               ++td->td_critcount;
+               crit_enter_raw(td);
                ++gd->gd_intr_nesting_level;
+               cpu_ccfence();
                kqueue_intr(NULL);
+               cpu_ccfence();
                --gd->gd_intr_nesting_level;
-               --td->td_critcount;
+               crit_exit_raw(td);
        } else {
                need_kqueue();
        }
@@ -170,11 +172,13 @@ timersig(int nada, siginfo_t *info, void *ctxp)
 
        save = errno;
        if (td->td_critcount == 0) {
-               ++td->td_critcount;
+               crit_enter_raw(td);
                ++gd->gd_intr_nesting_level;
+               cpu_ccfence();
                vktimer_intr(NULL);
+               cpu_ccfence();
                --gd->gd_intr_nesting_level;
-               --td->td_critcount;
+               crit_exit_raw(td);
        } else {
                need_timer();
        }
index 028adf5..b03d1a4 100644 (file)
@@ -210,9 +210,9 @@ mtx_spinlock(mtx_t *mtx)
        /*
         * Predispose a hard critical section
         */
-       ++gd->gd_curthread->td_critcount;
-       cpu_ccfence();
+       crit_enter_raw(gd->gd_curthread);
        ++gd->gd_spinlocks;
+       cpu_ccfence();
 
        /*
         * If we cannot get it trivially get it the hard way.
@@ -234,9 +234,9 @@ mtx_spinlock_try(mtx_t *mtx)
        /*
         * Predispose a hard critical section
         */
-       ++gd->gd_curthread->td_critcount;
-       cpu_ccfence();
+       crit_enter_raw(gd->gd_curthread);
        ++gd->gd_spinlocks;
+       cpu_ccfence();
 
        /*
         * If we cannot get it trivially call _mtx_spinlock_try().  This
@@ -371,9 +371,9 @@ mtx_spinunlock(mtx_t *mtx)
 
        mtx_unlock(mtx);
 
-       --gd->gd_spinlocks;
        cpu_ccfence();
-       --gd->gd_curthread->td_critcount;
+       --gd->gd_spinlocks;
+       crit_exit_raw(gd->gd_curthread);
 }
 
 /*
index 9769eb2..081e111 100644 (file)
@@ -71,9 +71,9 @@ spin_trylock(struct spinlock *spin)
 {
        globaldata_t gd = mycpu;
 
-       ++gd->gd_curthread->td_critcount;
-       cpu_ccfence();
+       crit_enter_raw(gd->gd_curthread);
        ++gd->gd_spinlocks;
+       cpu_ccfence();
        if (atomic_cmpset_int(&spin->counta, 0, 1) == 0)
                return (spin_trylock_contested(spin));
 #ifdef DEBUG_LOCKS
@@ -110,9 +110,9 @@ _spin_lock_quick(globaldata_t gd, struct spinlock *spin, const char *ident)
 {
        int count;
 
-       ++gd->gd_curthread->td_critcount;
-       cpu_ccfence();
+       crit_enter_raw(gd->gd_curthread);
        ++gd->gd_spinlocks;
+       cpu_ccfence();
 
        count = atomic_fetchadd_int(&spin->counta, 1);
        if (__predict_false(count != 0)) {
@@ -171,9 +171,9 @@ spin_unlock_quick(globaldata_t gd, struct spinlock *spin)
 #ifdef DEBUG_LOCKS
        KKASSERT(gd->gd_spinlocks > 0);
 #endif
-       --gd->gd_spinlocks;
        cpu_ccfence();
-       --gd->gd_curthread->td_critcount;
+       --gd->gd_spinlocks;
+       crit_exit_raw(gd->gd_curthread);
 }
 
 static __inline void
@@ -198,9 +198,9 @@ _spin_lock_shared_quick(globaldata_t gd, struct spinlock *spin,
 {
        int counta;
 
-       ++gd->gd_curthread->td_critcount;
-       cpu_ccfence();
+       crit_enter_raw(gd->gd_curthread);
        ++gd->gd_spinlocks;
+       cpu_ccfence();
 
        counta = atomic_fetchadd_int(&spin->counta, 1);
        if (__predict_false((counta & SPINLOCK_SHARED) == 0)) {
@@ -258,9 +258,9 @@ spin_unlock_shared_quick(globaldata_t gd, struct spinlock *spin)
 #ifdef DEBUG_LOCKS
        KKASSERT(gd->gd_spinlocks > 0);
 #endif
-       --gd->gd_spinlocks;
        cpu_ccfence();
-       --gd->gd_curthread->td_critcount;
+       --gd->gd_spinlocks;
+       crit_exit_raw(gd->gd_curthread);
 }
 
 static __inline void
index ccd93a0..2b62fa5 100644 (file)
 #include <machine/cpufunc.h>
 #include <machine/cpumask.h>
 
+/*
+ * Don't let GCC reorder critical section count adjustments, because it
+ * will BLOW US UP if it does.
+ */
+static __inline void
+crit_enter_raw(thread_t td)
+{
+       cpu_ccfence();
+       ++td->td_critcount;
+       cpu_ccfence();
+}
+
+static __inline void
+crit_exit_raw(thread_t td)
+{
+       cpu_ccfence();
+       --td->td_critcount;
+       cpu_ccfence();
+}
+
 /*
  * Is a token held either by the specified thread or held shared?
  *
@@ -161,9 +181,8 @@ _debug_crit_exit(thread_t td, const char *id)
 static __inline void
 _crit_enter_quick(thread_t td __DEBUG_CRIT_ADD_ARG__)
 {
-    ++td->td_critcount;
+    crit_enter_raw(td);
     __DEBUG_CRIT_ENTER(td);
-    cpu_ccfence();
 }
 
 static __inline void
@@ -177,6 +196,7 @@ _crit_enter_hard(globaldata_t gd __DEBUG_CRIT_ADD_ARG__)
 {
     _crit_enter_quick(gd->gd_curthread __DEBUG_CRIT_PASS_ARG__);
     ++gd->gd_intr_nesting_level;
+    cpu_ccfence();
 }
 
 
@@ -196,12 +216,11 @@ static __inline void
 _crit_exit_noyield(thread_t td __DEBUG_CRIT_ADD_ARG__)
 {
     __DEBUG_CRIT_EXIT(td);
-    --td->td_critcount;
+    crit_exit_raw(td);
 #ifdef INVARIANTS
     if (__predict_false(td->td_critcount < 0))
        crit_panic();
 #endif
-    cpu_ccfence();     /* prevent compiler reordering */
 }
 
 static __inline void
@@ -221,6 +240,7 @@ _crit_exit(globaldata_t gd __DEBUG_CRIT_ADD_ARG__)
 static __inline void
 _crit_exit_hard(globaldata_t gd __DEBUG_CRIT_ADD_ARG__)
 {
+    cpu_ccfence();
     --gd->gd_intr_nesting_level;
     _crit_exit_quick(gd->gd_curthread __DEBUG_CRIT_PASS_ARG__);
 }