kernel - Fix callout race and panic master
authorMatthew Dillon <dillon@apollo.backplane.com>
Mon, 21 Aug 2017 00:09:27 +0000 (17:09 -0700)
committerMatthew Dillon <dillon@apollo.backplane.com>
Mon, 21 Aug 2017 00:09:27 +0000 (17:09 -0700)
* Fix race (found by ivadasz) related to the IPI/WAIT code unblocking
  before the IPI is able to finish adjusting the knode and callout.
  The wait code was only waiting for the IPI counter to reach 0 via
  IPI_MASK, it also had to wait for the ARMED bit to get cleared.

* Avoid retesting c->c_flags to handle WAITING bit change races.  Instead,
  fully integrate the test-and-clear of the WAITING bit into
  callout_unpend_disarm().

* Fix an issue where callout_terminate() fails to IPI the remote cpu
  due to the function dispatch code clearing the ARMED bit.  No
  longer clear the ARMED bit.  This ensures that a termination or stop
  waits for the callout to return.

  This change means that synchronous callout operations to other cpus will
  be more expensive.  However, the kernel generally does not do cross-cpu
  callouts any more so its generally non-problem.

* Remove the now unused callout_maybe_clear_armed() inline.

* Also clear kn->kn_hook for EVFILT_TIMER when removing a callout, as
  a safety.

Reported-by: ivadasz (Imre Vadasz)
sys/kern/kern_event.c
sys/kern/kern_timeout.c

index 5d24979..cdd4471 100644 (file)
@@ -504,6 +504,7 @@ filt_timerdetach(struct knote *kn)
 
        calloutp = (struct callout *)kn->kn_hook;
        callout_terminate(calloutp);
+       kn->kn_hook = NULL;
        kfree(calloutp, M_KQUEUE);
        atomic_subtract_int(&kq_ncallouts, 1);
 }
@@ -511,7 +512,6 @@ filt_timerdetach(struct knote *kn)
 static int
 filt_timer(struct knote *kn, long hint)
 {
-
        return (kn->kn_data != 0);
 }
 
index c3c0d04..b33aca9 100644 (file)
@@ -165,15 +165,17 @@ SYSINIT(softclock_setup, SI_BOOT2_SOFTCLOCK, SI_ORDER_SECOND,
        swi_softclock_setup, NULL);
 
 /*
- * Clear PENDING and, if possible, also clear ARMED and WAITING.  Returns
- * the flags prior to the clear, atomically (used to check for WAITING).
+ * Clear PENDING and WAITING.  Returns the flags prior to the clear,
+ * atomically (used to check for WAITING)
  *
- * Clearing the cpu association (ARMED) can significantly improve the
- * performance of the next callout_reset*() call.
+ * This function is used to clear PENDING after removal of the callout
+ * from the queue, but (usually) prior to the callback being issued.
+ * We cannot safely clear ARMED due to the callback still needing to
+ * be issued.
  */
 static __inline
 int
-callout_unpend_disarm(struct callout *c)
+callout_unpend(struct callout *c)
 {
        int flags;
        int nflags;
@@ -182,11 +184,8 @@ callout_unpend_disarm(struct callout *c)
                flags = c->c_flags;
                cpu_ccfence();
                nflags = flags & ~(CALLOUT_PENDING | CALLOUT_WAITING);
-               if ((flags & CALLOUT_IPI_MASK) == 0)
-                       nflags &= ~CALLOUT_ARMED;
-               if (atomic_cmpset_int(&c->c_flags, flags, nflags)) {
+               if (atomic_cmpset_int(&c->c_flags, flags, nflags))
                        break;
-               }
                cpu_pause();
                /* retry */
        }
@@ -194,13 +193,12 @@ callout_unpend_disarm(struct callout *c)
 }
 
 /*
- * Clear ARMED after finishing adjustments to the callout, potentially
- * allowing other cpus to take over.  We can only do this if the IPI mask
- * is 0.
+ * Clear PENDING and and if possible clear ARMED and WAITING.  Returns
+ * the flags prior to the clear.
  */
 static __inline
 int
-callout_maybe_clear_armed(struct callout *c)
+callout_unpend_disarm(struct callout *c)
 {
        int flags;
        int nflags;
@@ -208,9 +206,9 @@ callout_maybe_clear_armed(struct callout *c)
        for (;;) {
                flags = c->c_flags;
                cpu_ccfence();
-               if (flags & (CALLOUT_PENDING | CALLOUT_IPI_MASK))
-                       break;
-               nflags = flags & ~CALLOUT_ARMED;
+               nflags = flags & ~CALLOUT_PENDING;
+               if ((flags & CALLOUT_IPI_MASK) == 0)
+                       nflags &= ~(CALLOUT_ARMED | CALLOUT_WAITING);
                if (atomic_cmpset_int(&c->c_flags, flags, nflags))
                        break;
                cpu_pause();
@@ -379,7 +377,7 @@ loop:
                                c_lk = c->c_lk;
                                c->c_func = NULL;
                                KKASSERT(c->c_flags & CALLOUT_DID_INIT);
-                               flags = callout_unpend_disarm(c);
+                               flags = callout_unpend(c);
                                error = lockmgr(c_lk, LK_EXCLUSIVE |
                                                      LK_CANCELABLE);
                                if (error == 0) {
@@ -399,7 +397,7 @@ loop:
                                c_arg = c->c_arg;
                                c->c_func = NULL;
                                KKASSERT(c->c_flags & CALLOUT_DID_INIT);
-                               flags = callout_unpend_disarm(c);
+                               flags = callout_unpend(c);
                                atomic_set_int(&c->c_flags, CALLOUT_EXECUTED);
                                crit_exit();
                                c_func(c_arg);
@@ -779,6 +777,7 @@ _callout_stop(struct callout *c, int issync)
                /* NOTE: IPI_MASK already tested */
                if ((flags & CALLOUT_PENDING) == 0)
                        nflags &= ~CALLOUT_ARMED;
+
                if (atomic_cmpset_int(&c->c_flags, flags, nflags)) {
                        /*
                         * Can only remove from callwheel if currently
@@ -799,12 +798,13 @@ _callout_stop(struct callout *c, int issync)
                                 *       physically removed (c) from the
                                 *       callwheel.
                                 *
-                                * NOTE: WAITING bit race exists when doing
-                                *       unconditional bit clears.
+                                * NOTE: This function also clears WAITING
+                                *       and returns oflags so we can test
+                                *       the bit.  However, we may have
+                                *       already cleared WAITING above, so
+                                *       we have to OR the flags.
                                 */
-                               callout_maybe_clear_armed(c);
-                               if (c->c_flags & CALLOUT_WAITING)
-                                       flags |= CALLOUT_WAITING;
+                               flags |= callout_unpend_disarm(c);
                        }
 
                        /*
@@ -835,8 +835,16 @@ _callout_stop(struct callout *c, int issync)
 
                flags = c->c_flags;
                cpu_ccfence();
-               if ((flags & CALLOUT_IPI_MASK) == 0)    /* fast path */
+
+               /*
+                * Fast path check
+                */
+               if ((flags & (CALLOUT_IPI_MASK | CALLOUT_ARMED)) == 0)
                        break;
+
+               /*
+                * Slow path
+                */
                nflags = flags | CALLOUT_WAITING;
                tsleep_interlock(c, 0);
                if (atomic_cmpset_int(&c->c_flags, flags, nflags)) {
@@ -935,9 +943,9 @@ callout_stop_ipi(void *arg, int issync, struct intrframe *frame)
                nflags = flags & ~(CALLOUT_ACTIVE | CALLOUT_PENDING);
                nflags = nflags - 1;                    /* dec ipi count */
                if ((flags & (CALLOUT_IPI_MASK | CALLOUT_PENDING)) == 1)
-                       nflags &= ~CALLOUT_ARMED;
+                       nflags &= ~(CALLOUT_ARMED | CALLOUT_WAITING);
                if ((flags & CALLOUT_IPI_MASK) == 1)
-                       nflags &= ~(CALLOUT_WAITING | CALLOUT_EXECUTED);
+                       nflags &= ~(CALLOUT_EXECUTED | CALLOUT_WAITING);
 
                if (atomic_cmpset_int(&c->c_flags, flags, nflags)) {
                        /*
@@ -959,12 +967,14 @@ callout_stop_ipi(void *arg, int issync, struct intrframe *frame)
                                 *       physically removed (c) from the
                                 *       callwheel.
                                 *
-                                * NOTE: WAITING bit race exists when doing
-                                *       unconditional bit clears.
+                                * NOTE: This function also clears WAITING
+                                *       and returns oflags so we can test
+                                *       the bit.  However, we may have
+                                *       already cleared WAITING above, so
+                                *       we have to OR the flags.
                                 */
-                               callout_maybe_clear_armed(c);
-                               if (c->c_flags & CALLOUT_WAITING)
-                                       flags |= CALLOUT_WAITING;
+                               flags |= callout_unpend_disarm(c);
+                               /* (c) CAN BE DESTROYED AT ANY TIME NOW */
                        }
 
                        /*