tsleep() - Add PINTERLOCKED flag to catch edge case.
authorMatthew Dillon <dillon@apollo.backplane.com>
Tue, 14 Jul 2009 10:14:33 +0000 (03:14 -0700)
committerMatthew Dillon <dillon@apollo.backplane.com>
Tue, 14 Jul 2009 10:14:33 +0000 (03:14 -0700)
When the tsleep_interlock() + UNLOCK + tsleep() combination is used it is
possible for an incoming wakeup IPI to be processed even if the combination
is used within a critical section, because operations inbetween the two
may send an IPI.  Under heavy loads sending an IPI can force incoming IPIs
to be processed synchronously to avoid deadlocks.

It is also possible for tsleep itself to create this condition when it
releases the user process schedule prior to descheduling itself.

PINTERLOCKED causes tsleep to check whether the bit set by tsleep_interlock()
is still set.  If it is not set we simply return without sleeping.

25 files changed:
sys/bus/cam/cam_sim.c
sys/bus/cam/cam_xpt.c
sys/dev/disk/ahci/ahci_dragonfly.c
sys/dev/disk/aic7xxx/aic_osm_lib.c
sys/dev/disk/sili/sili_dragonfly.c
sys/dev/drm/drmP.h
sys/dev/drm/drm_drv.c
sys/dev/drm/drm_lock.c
sys/dev/drm/radeon_cp.c
sys/dev/netif/iwi/if_iwi.c
sys/dev/raid/aac/aac.c
sys/dev/sound/pcm/sound.c
sys/kern/kern_synch.c
sys/kern/kern_umtx.c
sys/kern/lwkt_ipiq.c
sys/kern/lwkt_serialize.c
sys/kern/subr_bus.c
sys/kern/sys_pipe.c
sys/kern/vfs_bio.c
sys/kern/vfs_vnops.c
sys/net/tap/if_tap.c
sys/netproto/smb/smb_subr.c
sys/sys/param.h
sys/vfs/hammer/hammer_io.c
sys/vfs/hammer/hammer_subs.c

index a4f51e1..6a9f021 100644 (file)
@@ -104,10 +104,11 @@ sim_lock_sleep(void *ident, int flags, const char *wmesg, int timo,
                crit_enter();
                tsleep_interlock(ident);
                lockmgr(lock, LK_RELEASE);
+               retval = tsleep(ident, flags | PINTERLOCKED, wmesg, timo);
+       } else {
+               retval = tsleep(ident, flags, wmesg, timo);
        }
 
-       retval = tsleep(ident, flags, wmesg, timo);
-
        if (lock != &sim_mplock) {
                lockmgr(lock, LK_EXCLUSIVE);
                crit_exit();
index 70ddaab..41e8824 100644 (file)
@@ -1419,7 +1419,7 @@ xpt_scanner_thread(void *dummy)
                crit_enter();
                tsleep_interlock(&xsoftc.ccb_scanq);
                xpt_unlock_buses();
-               tsleep(&xsoftc.ccb_scanq, 0, "ccb_scanq", 0);
+               tsleep(&xsoftc.ccb_scanq, PINTERLOCKED, "ccb_scanq", 0);
                crit_exit();
        }
 }
index c529993..5c54583 100644 (file)
@@ -296,7 +296,7 @@ ahci_port_thread(void *arg)
                crit_enter();
                tsleep_interlock(&ap->ap_thread);
                if (ap->ap_signal == 0)
-                       tsleep(&ap->ap_thread, 0, "ahport", 0);
+                       tsleep(&ap->ap_thread, PINTERLOCKED, "ahport", 0);
                crit_exit();
                mask = ap->ap_signal;
        }
index 3ba2031..d30736a 100644 (file)
@@ -105,7 +105,7 @@ aic_terminate_recovery_thread(struct aic_softc *aic)
        aic_lock(aic);
        tsleep_interlock(aic->platform_data);
        aic_unlock(aic);
-       tsleep(aic->platform_data, 0, "thtrm", 0);
+       tsleep(aic->platform_data, PINTERLOCKED, "thtrm", 0);
        crit_exit();
 }
 
@@ -123,7 +123,7 @@ aic_recovery_thread(void *arg)
                        crit_enter();
                        tsleep_interlock(aic);
                        aic_unlock(aic);
-                       tsleep(aic, 0, "idle", 0);
+                       tsleep(aic, PINTERLOCKED, "idle", 0);
                        aic_lock(aic);
                        crit_exit();
                }
index 53acc6b..6641094 100644 (file)
@@ -296,7 +296,7 @@ sili_port_thread(void *arg)
                crit_enter();
                tsleep_interlock(&ap->ap_thread);
                if (ap->ap_signal == 0)
-                       tsleep(&ap->ap_thread, 0, "ahport", 0);
+                       tsleep(&ap->ap_thread, PINTERLOCKED, "ahport", 0);
                crit_exit();
                mask = ap->ap_signal;
        }
index 92eb9c1..7cfce1a 100644 (file)
@@ -292,7 +292,7 @@ for ( ret = 0 ; !ret && !(condition) ; ) {                  \
             crit_enter();                                      \
             tsleep_interlock(&(queue));                                \
             lwkt_serialize_exit(&dev->irq_lock);               \
-            ret = -tsleep(&(queue), PCATCH,                    \
+            ret = -tsleep(&(queue), PCATCH | PINTERLOCKED,     \
                          "drmwtq", (timeout));                 \
             crit_exit();                                       \
        } else {                                                \
index 231637f..0cc834e 100644 (file)
@@ -664,7 +664,7 @@ int drm_close(struct dev_close_args *ap)
                        tsleep_interlock((void *)&dev->lock.lock_queue);
                        DRM_UNLOCK();
                        retcode = tsleep((void *)&dev->lock.lock_queue,
-                           PCATCH, "drmlk2", 0);
+                           PCATCH | PINTERLOCKED, "drmlk2", 0);
                        crit_exit();
                        DRM_LOCK();
                        if (retcode)
index 32c8419..3306398 100644 (file)
@@ -81,8 +81,8 @@ int drm_lock(struct drm_device *dev, void *data, struct drm_file *file_priv)
                crit_enter();
                tsleep_interlock((void *)&dev->lock.lock_queue);
                DRM_UNLOCK();
-               ret = tsleep((void *)&dev->lock.lock_queue, PCATCH,
-                   "drmlk2", 0);
+               ret = tsleep((void *)&dev->lock.lock_queue,
+                            PCATCH | PINTERLOCKED, "drmlk2", 0);
                crit_exit();
                DRM_LOCK();
                if (ret != 0)
index b05e72b..fc9260e 100644 (file)
@@ -1689,10 +1689,11 @@ void radeon_do_release(struct drm_device * dev)
                                while ((ret = r600_do_cp_idle(dev_priv)) != 0) {
                                        DRM_DEBUG("radeon_do_cp_idle %d\n", ret);
                                        crit_enter();
-                                       tsleep_interlock((void *)&dev->lock.lock_queue);
+                                       tsleep_interlock(&dev->lock.lock_queue);
                                        DRM_UNLOCK();
-                                       ret = tsleep((void *)&dev->lock.lock_queue, PCATCH,
-                                           "rdnrel", 0);
+                                       ret = tsleep(&dev->lock.lock_queue,
+                                                   PCATCH | PINTERLOCKED,
+                                                   "rdnrel", 0);
                                        crit_exit();
                                        DRM_LOCK();
                                }
@@ -1700,10 +1701,11 @@ void radeon_do_release(struct drm_device * dev)
                                while ((ret = radeon_do_cp_idle(dev_priv)) != 0) {
                                        DRM_DEBUG("radeon_do_cp_idle %d\n", ret);
                                        crit_enter();
-                                       tsleep_interlock((void *)&dev->lock.lock_queue);
+                                       tsleep_interlock(&dev->lock.lock_queue);
                                        DRM_UNLOCK();
-                                       ret = tsleep((void *)&dev->lock.lock_queue, PCATCH,
-                                           "rdnrel", 0);
+                                       ret = tsleep(&dev->lock.lock_queue,
+                                                   PCATCH | PINTERLOCKED,
+                                                   "rdnrel", 0);
                                        crit_exit();
                                        DRM_LOCK();
                                }
index a0e02f5..1f33f68 100644 (file)
@@ -227,7 +227,7 @@ iwi_fw_monitor(void *arg)
                        tsleep_interlock(IWI_FW_WAKE_MONITOR(sc));
                        lwkt_serialize_exit(ifp->if_serializer);
                        error = tsleep(IWI_FW_WAKE_MONITOR(sc),
-                                      0, "iwifwm", 0);
+                                      PINTERLOCKED, "iwifwm", 0);
                        crit_exit();
                        lwkt_serialize_enter(ifp->if_serializer);
                }
@@ -264,7 +264,8 @@ iwi_fw_monitor(void *arg)
                                crit_enter();
                                tsleep_interlock(IWI_FW_CMD_ACKED(sc));
                                lwkt_serialize_exit(ifp->if_serializer);
-                               error = tsleep(IWI_FW_CMD_ACKED(sc), 0,
+                               error = tsleep(IWI_FW_CMD_ACKED(sc),
+                                              PINTERLOCKED,
                                               "iwirun", boff * hz);
                                crit_exit();
                                lwkt_serialize_enter(ifp->if_serializer);
@@ -572,7 +573,7 @@ iwi_detach(device_t dev)
                crit_enter();
                tsleep_interlock(IWI_FW_EXIT_MONITOR(sc));
                lwkt_serialize_exit(ifp->if_serializer);
-               tsleep(IWI_FW_EXIT_MONITOR(sc), 0, "iwiexi", 0);
+               tsleep(IWI_FW_EXIT_MONITOR(sc), PINTERLOCKED, "iwiexi", 0);
                crit_exit();
                /* No need to hold serializer again */
 
@@ -1628,7 +1629,7 @@ iwi_cmd(struct iwi_softc *sc, uint8_t type, void *data, uint8_t len, int async)
                crit_enter();
                tsleep_interlock(IWI_FW_CMD_ACKED(sc));
                lwkt_serialize_exit(ifp->if_serializer);
-               ret = tsleep(IWI_FW_CMD_ACKED(sc), 0, "iwicmd", hz);
+               ret = tsleep(IWI_FW_CMD_ACKED(sc), PINTERLOCKED, "iwicmd", hz);
                crit_exit();
                lwkt_serialize_enter(ifp->if_serializer);
        } else {
@@ -2293,7 +2294,7 @@ iwi_load_firmware(struct iwi_softc *sc, void *fw, int size)
        crit_enter();
        tsleep_interlock(IWI_FW_INITIALIZED(sc));
        lwkt_serialize_exit(ifp->if_serializer);
-       error = tsleep(IWI_FW_INITIALIZED(sc), 0, "iwiinit", hz);
+       error = tsleep(IWI_FW_INITIALIZED(sc), PINTERLOCKED, "iwiinit", hz);
        crit_exit();
        lwkt_serialize_enter(ifp->if_serializer);
        if (error != 0) {
index 4a57bbd..c869d89 100644 (file)
@@ -924,7 +924,7 @@ aac_command_thread(struct aac_softc *sc)
                        crit_enter();
                        tsleep_interlock(sc->aifthread);
                        AAC_LOCK_RELEASE(&sc->aac_io_lock);
-                       retval = tsleep(sc->aifthread, 0,
+                       retval = tsleep(sc->aifthread, PINTERLOCKED,
                                        "aifthd", AAC_PERIODIC_INTERVAL * hz);
                        AAC_LOCK_ACQUIRE(&sc->aac_io_lock);
                        crit_exit();
@@ -1370,7 +1370,7 @@ aac_wait_command(struct aac_command *cm)
        crit_enter();
        tsleep_interlock(cm);
        AAC_LOCK_RELEASE(&sc->aac_io_lock);
-       error = tsleep(cm, 0, "aacwait", 0);
+       error = tsleep(cm, PINTERLOCKED, "aacwait", 0);
        AAC_LOCK_ACQUIRE(&sc->aac_io_lock);
        crit_exit();
        return(error);
@@ -3138,7 +3138,7 @@ aac_ioctl_sendfib(struct aac_softc *sc, caddr_t ufib)
                crit_enter();
                tsleep_interlock(&cm);
                AAC_LOCK_RELEASE(&sc->aac_io_lock);
-               tsleep(&cm, 0, "sendfib", 0);
+               tsleep(&cm, PINTERLOCKED, "sendfib", 0);
                AAC_LOCK_ACQUIRE(&sc->aac_io_lock);
                crit_exit();
        }
index bc3c263..4acc95b 100644 (file)
@@ -131,7 +131,7 @@ snd_mtxsleep(void *addr, sndlock_t lock, int flags, const char *wmesg, int timo)
        crit_enter();
        tsleep_interlock(addr);
        snd_mtxunlock(lock);
-       r = tsleep(addr, flags, wmesg, timo);
+       r = tsleep(addr, flags | PINTERLOCKED, wmesg, timo);
        snd_mtxlock(lock);
        crit_exit();
        return(r);
index 1544b60..15e21fb 100644 (file)
@@ -93,6 +93,7 @@ KTR_INFO(KTR_TSLEEP, tsleep, tsleep_beg, 0, "tsleep enter %p", sizeof(void *));
 KTR_INFO(KTR_TSLEEP, tsleep, tsleep_end, 1, "tsleep exit", 0);
 KTR_INFO(KTR_TSLEEP, tsleep, wakeup_beg, 2, "wakeup enter %p", sizeof(void *));
 KTR_INFO(KTR_TSLEEP, tsleep, wakeup_end, 3, "wakeup exit", 0);
+KTR_INFO(KTR_TSLEEP, tsleep, ilockfail,  4, "interlock failed %p", sizeof(void *));
 
 #define logtsleep1(name)       KTR_LOG(tsleep_ ## name)
 #define logtsleep2(name, val)  KTR_LOG(tsleep_ ## name, val)
@@ -442,6 +443,21 @@ tsleep(void *ident, int flags, const char *wmesg, int timo)
        }
 
        /*
+        * If the interlocked flag is set but our cpu bit in the slpqueue
+        * is no longer set, then a wakeup was processed inbetween the
+        * tsleep_interlock() and here.  This can occur under extreme loads
+        * if the IPIQ fills up and gets processed synchronously by, say,
+        * a wakeup() or other IPI sent inbetween the interlock and here.
+        *
+        * Even the usched->release function just above can muff it up.
+        */
+       if ((flags & PINTERLOCKED) &&
+           (slpque_cpumasks[id] & gd->gd_cpumask) == 0) {
+               logtsleep2(ilockfail, ident);
+               goto resume;
+       }
+
+       /*
         * Move our thread to the correct queue and setup our wchan, etc.
         */
        lwkt_deschedule_self(td);
index 743f716..b765651 100644 (file)
@@ -136,7 +136,8 @@ sys_umtx_sleep(struct umtx_sleep_args *uap)
        if (*(int *)(sf_buf_kva(sf) + offset) == uap->value) {
            vm_page_init_action(&action, umtx_sleep_page_action_cow, waddr);
            vm_page_register_action(m, &action, VMEVENT_COW);
-           error = tsleep(waddr, PCATCH|PDOMAIN_UMTX, "umtxsl", timeout);
+           error = tsleep(waddr, PCATCH | PINTERLOCKED | PDOMAIN_UMTX,
+                          "umtxsl", timeout);
            vm_page_unregister_action(m, &action);
        } else {
            error = EBUSY;
index 18e6172..7a5c115 100644 (file)
@@ -604,7 +604,7 @@ lwkt_synchronize_ipiqs(const char *wmesg)
     while (other_cpumask != 0) {
        tsleep_interlock(&other_cpumask);
        if (other_cpumask != 0)
-           tsleep(&other_cpumask, 0, wmesg, 0);
+           tsleep(&other_cpumask, PINTERLOCKED, wmesg, 0);
     }
     crit_exit();
 }
index 6bb7e14..a29fac6 100644 (file)
@@ -290,7 +290,7 @@ lwkt_serialize_sleep(void *info)
     tsleep_interlock(s);
     if (atomic_intr_cond_test(&s->interlock) != 0) {
        logslz(sleep_beg, s);
-       tsleep(s, 0, "slize", 0);
+       tsleep(s, PINTERLOCKED, "slize", 0);
        logslz(sleep_end, s);
     }
     crit_exit();
@@ -340,7 +340,7 @@ lwkt_serialize_adaptive_sleep(void *arg)
     tsleep_interlock(s);
     if (atomic_intr_cond_test(&s->interlock) != 0) {
        logslz(sleep_beg, s);
-       tsleep(s, 0, "slize", 0);
+       tsleep(s, PINTERLOCKED, "slize", 0);
        logslz(sleep_end, s);
     }
     crit_exit();
index 709f2b6..7d84e19 100644 (file)
@@ -226,7 +226,7 @@ devread(struct dev_read_args *ap)
                crit_enter();
                tsleep_interlock(&devsoftc);
                lockmgr(&devsoftc.lock, LK_RELEASE);
-               rv = tsleep(&devsoftc, PCATCH, "devctl", 0);
+               rv = tsleep(&devsoftc, PCATCH | PINTERLOCKED, "devctl", 0);
                crit_exit();
                lockmgr(&devsoftc.lock, LK_EXCLUSIVE);
                if (rv) {
index e9a96a5..613dfee 100644 (file)
@@ -608,7 +608,7 @@ pipe_read(struct file *fp, struct uio *uio, struct ucred *cred, int fflags)
                crit_enter();
                tsleep_interlock(rpipe);
                lwkt_reltoken(&wlock);
-               error = tsleep(rpipe, PCATCH, "piperd", 0);
+               error = tsleep(rpipe, PCATCH | PINTERLOCKED, "piperd", 0);
                crit_exit();
                ++pipe_rblocked_count;
                if (error)
index cceaa54..6ce3743 100644 (file)
@@ -419,7 +419,7 @@ bd_wait(int totalspace)
                tsleep_interlock(&bd_wake_ary[i]);
                spin_unlock_wr(&needsbuffer_spin);
 
-               tsleep(&bd_wake_ary[i], 0, "flstik", hz);
+               tsleep(&bd_wake_ary[i], PINTERLOCKED, "flstik", hz);
                crit_exit();
 
                totalspace = runningbufspace + dirtybufspace - hidirtybufspace;
@@ -536,7 +536,8 @@ bio_track_wait(struct bio_track *track, int slp_flags, int slp_timo)
                tsleep_interlock(track);
                if (active == desired ||
                    atomic_cmpset_int(&track->bk_active, active, desired)) {
-                       error = tsleep(track, slp_flags, "iowait", slp_timo);
+                       error = tsleep(track, slp_flags | PINTERLOCKED,
+                                      "iowait", slp_timo);
                        if (error)
                                break;
                }
@@ -3262,9 +3263,9 @@ biowait(struct buf *bp)
                        if (bp->b_cmd == BUF_CMD_DONE)
                                break;
                        if (bp->b_cmd == BUF_CMD_READ)
-                               tsleep(bp, 0, "biord", 0);
+                               tsleep(bp, PINTERLOCKED, "biord", 0);
                        else
-                               tsleep(bp, 0, "biowr", 0);
+                               tsleep(bp, PINTERLOCKED, "biowr", 0);
                }
                crit_exit();
        }
index 8b8ac24..c460944 100644 (file)
@@ -499,7 +499,7 @@ vn_get_fpf_offset(struct file *fp)
                        crit_enter();
                        tsleep_interlock(&fp->f_flag);
                        if (atomic_cmpset_int(&fp->f_flag, flags, nflags))
-                               tsleep(&fp->f_flag, 0, "fpoff", 0);
+                               tsleep(&fp->f_flag, PINTERLOCKED, "fpoff", 0);
                        crit_exit();
                } else {
                        nflags = flags | FOFFSETLOCK;
index 4cb6fb2..af13274 100644 (file)
@@ -782,7 +782,7 @@ tapread(struct dev_read_args *ap)
                        crit_enter();
                        tsleep_interlock(tp);
                        ifnet_deserialize_all(ifp);
-                       error = tsleep(tp, PCATCH, "taprd", 0);
+                       error = tsleep(tp, PCATCH | PINTERLOCKED, "taprd", 0);
                        crit_exit();
                        if (error)
                                return (error);
index bdf041f..d5fc709 100644 (file)
@@ -422,7 +422,7 @@ smb_sleep(void *chan, struct smb_slock *sl, int slpflags, const char *wmesg, int
                crit_enter();
                tsleep_interlock(chan);
                smb_sl_unlock(sl);
-               error = tsleep(chan, slpflags, wmesg, timo);
+               error = tsleep(chan, slpflags | PINTERLOCKED, wmesg, timo);
                if ((slpflags & PDROP) == 0)
                        smb_sl_lock(sl);
                crit_exit();
index 97b09d8..7f4aae1 100644 (file)
 
 #define PCATCH         0x00000100      /* tsleep checks signals */
 #define PUSRFLAG1      0x00000200      /* Subsystem specific flag */
+#define PINTERLOCKED   0x00000400      /* Interlocked tsleep */
 #define PWAKEUP_CPUMASK        0x00003FFF      /* start cpu for chained wakeups */
 #define PWAKEUP_MYCPU  0x00004000      /* wakeup on current cpu only */
 #define PWAKEUP_ONE    0x00008000      /* argument to wakeup: only one */
index 62b13bc..b091525 100644 (file)
@@ -134,7 +134,7 @@ hammer_io_wait(hammer_io_t io)
                tsleep_interlock(io);
                io->waiting = 1;
                for (;;) {
-                       tsleep(io, 0, "hmrflw", 0);
+                       tsleep(io, PINTERLOCKED, "hmrflw", 0);
                        if (io->running == 0)
                                break;
                        tsleep_interlock(io);
@@ -1459,7 +1459,7 @@ hammer_io_flush_sync(hammer_mount_t hmp)
                        crit_enter();
                        tsleep_interlock(&bp->b_cmd);
                        if (bp->b_cmd != BUF_CMD_DONE)
-                               tsleep(&bp->b_cmd, 0, "hmrFLS", 0);
+                               tsleep(&bp->b_cmd, PINTERLOCKED, "hmrFLS", 0);
                        crit_exit();
                }
                bp->b_flags &= ~B_ASYNC;
index 5958188..49cb15e 100644 (file)
@@ -71,7 +71,7 @@ hammer_lock_ex_ident(struct hammer_lock *lock, const char *ident)
                        crit_enter();
                        tsleep_interlock(lock);
                        if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
-                               tsleep(lock, 0, ident, 0);
+                               tsleep(lock, PINTERLOCKED, ident, 0);
                                if (hammer_debug_locks)
                                        kprintf("hammer_lock_ex: try again\n");
                        }
@@ -153,7 +153,7 @@ hammer_lock_sh(struct hammer_lock *lock)
                        crit_enter();
                        tsleep_interlock(lock);
                        if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
-                               tsleep(lock, 0, "hmrlck", 0);
+                               tsleep(lock, PINTERLOCKED, "hmrlck", 0);
                        }
                        crit_exit();
                }