kernel - Refactor lockmgr()
authorMatthew Dillon <dillon@apollo.backplane.com>
Tue, 24 Oct 2017 01:39:16 +0000 (18:39 -0700)
committerMatthew Dillon <dillon@apollo.backplane.com>
Tue, 31 Oct 2017 17:49:47 +0000 (10:49 -0700)
* Seriously refactor lockmgr() so we can use atomic_fetchadd_*() for
  shared locks and reduce unnecessary atomic ops and atomic op loops.

  The main win here is being able to use atomic_fetchadd_*() when
  acquiring and releasing shared locks.  A simple fstat() loop (which
  utilizes a LK_SHARED lockmgr lock on the vnode) improves from 191ns
  to around 110ns per loop with 32 concurrent threads (on a 16-core/
  32-thread xeon).

* To accomplish this, the 32-bit lk_count field becomes 64-bits.  The
  shared count is separated into the high 32-bits, allowing it to be
  manipulated for both blocking shared requests and the shared lock
  count field.  The low count bits are used for exclusive locks.
  Control bits are adjusted to manage lockmgr features.

  LKC_SHARED Indicates shared lock count is active, else excl lock
count.  Can predispose the lock when the related count
is 0 (does not have to be cleared, for example).

  LKC_UPREQ Queued upgrade request.  Automatically granted by
releasing entity (UPREQ -> ~SHARED|1).

  LKC_EXREQ Queued exclusive request (only when lock held shared).
Automatically granted by releasing entity
(EXREQ -> ~SHARED|1).

  LKC_EXREQ2 Aggregated exclusive request.  When EXREQ cannot be
obtained due to the lock being held exclusively or
EXREQ already being queued, EXREQ2 is flagged for
wakeup/retries.

  LKC_CANCEL Cancel API support

  LKC_SMASK Shared lock count mask (LKC_SCOUNT increments).

  LKC_XMASK Exclusive lock count mask (+1 increments)

  The 'no lock' condition occurs when LKC_XMASK is 0 and LKC_SMASK is
  0, regardless of the state of LKC_SHARED.

* Lockmgr still supports exclusive priority over shared locks.  The
  semantics have slightly changed.  The priority mechanism only applies
  to the EXREQ holder.  Once an exclusive lock is obtained, any blocking
  shared or exclusive locks will have equal priority until the exclusive
  lock is released.  Once released, shared locks can squeeze in, but
  then the next pending exclusive lock will assert its priority over
  any new shared locks when it wakes up and loops.

  This isn't quite what I wanted, but it seems to work quite well.  I
  had to make a trade-off in the EXREQ lock-grant mechanism to improve
  performance.

* In addition, we use atomic_fcmpset_long() instead of
  atomic_cmpset_long() to reduce cache line flip flopping at least
  a little.

* Remove lockcount() and lockcountnb(), which tried to count lock refs.
  Replace with lockinuse(), which simply tells the caller whether the
  lock is referenced or not.

* Expand some of the copyright notices (years and authors) for major
  rewrites.  Really there are a lot more and I have to pay more attention
  to adjustments.

14 files changed:
sys/dev/disk/dm/dm_table.c
sys/dev/drm/include/linux/mutex.h
sys/dev/drm/include/linux/spinlock.h
sys/kern/kern_lock.c
sys/kern/kern_shutdown.c
sys/kern/vfs_bio.c
sys/kern/vfs_lock.c
sys/kern/vfs_subr.c
sys/sys/buf2.h
sys/sys/lock.h
sys/vfs/nfs/nfs_subs.c
sys/vfs/nfs/nfs_vnops.c
sys/vfs/tmpfs/tmpfs_subr.c
sys/vfs/ufs/ffs_softdep.c

index 2009539..a10b4d3 100644 (file)
@@ -252,7 +252,7 @@ dm_table_head_init(dm_table_head_t *head)
 void
 dm_table_head_destroy(dm_table_head_t *head)
 {
-       KKASSERT(lockcount(&head->table_mtx) == 0);
+       KKASSERT(!lockinuse(&head->table_mtx));
 
        /* tables don't exist when I call this routine, therefore it
         * doesn't make sense to have io_cnt != 0 */
index 8bbb90d..e594bb2 100644 (file)
@@ -30,7 +30,7 @@
 #include <sys/lock.h>
 #include <linux/lockdep.h>
 
-#define mutex_is_locked(lock)  (lockcount(lock) != 0)
+#define mutex_is_locked(lock)  (lockinuse(lock))
 
 #define mutex_lock(lock)       lockmgr(lock, LK_EXCLUSIVE)
 #define mutex_unlock(lock)     lockmgr(lock, LK_RELEASE)
index c1a3b32..199baf8 100644 (file)
@@ -35,7 +35,7 @@
 
 #define spin_is_locked(x)      spin_held(x)
 
-#define assert_spin_locked(x)  KKASSERT(lockcountnb(x))
+#define assert_spin_locked(x)  KKASSERT(lockinuse(x))
 
 /*
  * The spin_lock_irq() family of functions stop hardware interrupts
index 5ddae54..da570a8 100644 (file)
@@ -3,13 +3,16 @@
  *     The Regents of the University of California.  All rights reserved.
  * Copyright (C) 1997
  *     John S. Dyson.  All rights reserved.
- * Copyright (C) 2013-2014
+ * Copyright (C) 2013-2017
  *     Matthew Dillon, All rights reserved.
  *
  * This code contains ideas from software contributed to Berkeley by
  * Avadis Tevanian, Jr., Michael Wayne Young, and the Mach Operating
  * System project at Carnegie-Mellon University.
  *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@backplane.com>.  Extensively rewritten.
+ *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -48,7 +51,9 @@
 #include <sys/spinlock2.h>
 #include <sys/indefinite2.h>
 
-static void undo_upreq(struct lock *lkp);
+static void undo_shreq(struct lock *lkp);
+static int undo_upreq(struct lock *lkp);
+static int undo_exreq(struct lock *lkp);
 
 #ifdef DEBUG_CANCEL_LOCKS
 
@@ -79,8 +84,6 @@ SYSCTL_INT(_debug, OID_AUTO, lock_test_mode, CTLFLAG_RW,
 #define COUNT(td, x)
 #endif
 
-static int lockmgr_waitupgrade(struct lock *lkp, u_int flags);
-
 /*
  * Helper, assert basic conditions
  */
@@ -106,117 +109,172 @@ lockmgr_shared(struct lock *lkp, u_int flags)
 {
        uint32_t extflags;
        thread_t td;
-       int count;
+       uint64_t count;
        int error;
        int pflags;
-       int wflags;
        int timo;
+       int didloop;
 
        _lockmgr_assert(lkp, flags);
        extflags = (flags | lkp->lk_flags) & LK_EXTFLG_MASK;
        td = curthread;
-       error = 0;
+
        count = lkp->lk_count;
+       cpu_ccfence();
 
-       for (;;) {
-               cpu_ccfence();
+       /*
+        * If the caller already holds the lock exclusively then
+        * we silently obtain another count on the exclusive lock.
+        * Avoid accessing lk_lockholder until testing exclusivity.
+        *
+        * WARNING!  The old FreeBSD behavior was to downgrade,
+        *           but this creates a problem when recursions
+        *           return to the caller and the caller expects
+        *           its original exclusive lock to remain exclusively
+        *           locked.
+        */
+       if ((count & LKC_XMASK) && lkp->lk_lockholder == td) {
+               KKASSERT(lkp->lk_count & LKC_XMASK);
+               if ((extflags & LK_CANRECURSE) == 0) {
+                       if (extflags & LK_NOWAIT)
+                               return EBUSY;
+                       panic("lockmgr: locking against myself");
+               }
+               atomic_add_64(&lkp->lk_count, 1);
+               COUNT(td, 1);
+               return 0;
+       }
+
+       /*
+        * Unless TDF_DEADLKTREAT is set, we cannot add LKC_SCOUNT while
+        * SHARED is set and either EXREQ or UPREQ are set.
+        *
+        * NOTE: In the race-to-0 case (see undo_shreq()), we could
+        *       theoretically work the SMASK == 0 case here.
+        */
+       if ((td->td_flags & TDF_DEADLKTREAT) == 0) {
+               while ((count & LKC_SHARED) &&
+                      (count & (LKC_EXREQ | LKC_UPREQ))) {
+                       /*
+                        * Immediate failure conditions
+                        */
+                       if (extflags & LK_CANCELABLE) {
+                               if (count & LKC_CANCEL)
+                                       return ENOLCK;
+                       }
+                       if (extflags & LK_NOWAIT)
+                               return EBUSY;
 
+                       /*
+                        * Interlocked tsleep
+                        */
+                       pflags = (extflags & LK_PCATCH) ? PCATCH : 0;
+                       timo = (extflags & LK_TIMELOCK) ? lkp->lk_timo : 0;
+
+                       tsleep_interlock(lkp, pflags);
+                       count = atomic_fetchadd_long(&lkp->lk_count, 0);
+
+                       if ((count & LKC_SHARED) &&
+                           (count & (LKC_EXREQ | LKC_UPREQ))) {
+                               error = tsleep(lkp, pflags | PINTERLOCKED,
+                                              lkp->lk_wmesg, timo);
+                               if (error)
+                                       return error;
+                               count = lkp->lk_count;
+                               cpu_ccfence();
+                               continue;
+                       }
+                       break;
+               }
+       }
+
+       /*
+        * Bump the SCOUNT field.  The shared lock is granted only once
+        * the SHARED flag gets set.  If it is already set, we are done.
+        *
+        * (Racing an EXREQ or UPREQ operation is ok here, we already did
+        * our duty above).
+        */
+       count = atomic_fetchadd_64(&lkp->lk_count, LKC_SCOUNT) + LKC_SCOUNT;
+       error = 0;
+       didloop = 0;
+
+       for (;;) {
                /*
-                * Normal case
+                * We may be able to grant ourselves the bit trivially.
+                * We're done once the SHARED bit is granted.
                 */
-               if ((count & (LKC_EXREQ|LKC_UPREQ|LKC_EXCL)) == 0) {
-                       if (atomic_fcmpset_int(&lkp->lk_count,
-                                              &count, count + 1)) {
-                               COUNT(td, 1);
+               if ((count & (LKC_XMASK | LKC_EXREQ |
+                             LKC_UPREQ | LKC_SHARED)) == 0) {
+                       if (atomic_fcmpset_64(&lkp->lk_count,
+                                             &count, count | LKC_SHARED)) {
+                               /* count |= LKC_SHARED; NOT USED */
                                break;
                        }
                        continue;
                }
-
-               /*
-                * If the caller already holds the lock exclusively then
-                * we silently obtain another count on the exclusive lock.
-                *
-                * WARNING!  The old FreeBSD behavior was to downgrade,
-                *           but this creates a problem when recursions
-                *           return to the caller and the caller expects
-                *           its original exclusive lock to remain exclusively
-                *           locked.
-                */
-               if (lkp->lk_lockholder == td) {
-                       KKASSERT(count & LKC_EXCL);
-                       if ((extflags & LK_CANRECURSE) == 0) {
-                               if (extflags & LK_NOWAIT) {
-                                       error = EBUSY;
-                                       break;
-                               }
-                               panic("lockmgr: locking against myself");
+               if ((td->td_flags & TDF_DEADLKTREAT) &&
+                   (count & (LKC_XMASK | LKC_SHARED)) == 0) {
+                       if (atomic_fcmpset_64(&lkp->lk_count,
+                                             &count, count | LKC_SHARED)) {
+                               /* count |= LKC_SHARED; NOT USED */
+                               break;
                        }
-                       atomic_add_int(&lkp->lk_count, 1);
-                       COUNT(td, 1);
-                       break;
+                       continue;
                }
+               if (count & LKC_SHARED)
+                       break;
 
                /*
                 * Slow path
                 */
                pflags = (extflags & LK_PCATCH) ? PCATCH : 0;
                timo = (extflags & LK_TIMELOCK) ? lkp->lk_timo : 0;
-               wflags = (td->td_flags & TDF_DEADLKTREAT) ?
-                               LKC_EXCL : (LKC_EXCL|LKC_EXREQ|LKC_UPREQ);
 
-               /*
-                * Block while the lock is held exclusively or, conditionally,
-                * if other threads are trying to obtain an exclusive lock or
-                * upgrade to one.
-                */
-               if (count & wflags) {
-                       if (extflags & LK_CANCELABLE) {
-                               if (count & LKC_CANCEL) {
-                                       error = ENOLCK;
-                                       break;
-                               }
-                       }
-                       if (extflags & LK_NOWAIT) {
-                               error = EBUSY;
-                               break;
-                       }
-
-                       if ((extflags & LK_NOCOLLSTATS) == 0) {
-                               indefinite_info_t info;
-
-                               flags |= LK_NOCOLLSTATS;
-                               indefinite_init(&info, lkp->lk_wmesg, 1, 'l');
-                               error = lockmgr_shared(lkp, flags);
-                               indefinite_done(&info);
+               if (extflags & LK_CANCELABLE) {
+                       if (count & LKC_CANCEL) {
+                               undo_shreq(lkp);
+                               error = ENOLCK;
                                break;
                        }
+               }
+               if (extflags & LK_NOWAIT) {
+                       undo_shreq(lkp);
+                       error = EBUSY;
+                       break;
+               }
 
-                       tsleep_interlock(lkp, pflags);
-                       if (!atomic_fcmpset_int(&lkp->lk_count, &count,
-                                               count | LKC_SHREQ)) {
-                               continue;
-                       }
+               /*
+                * Interlocked after the first loop.
+                */
+               if (didloop) {
                        error = tsleep(lkp, pflags | PINTERLOCKED,
                                       lkp->lk_wmesg, timo);
-                       if (error)
-                               break;
                        if (extflags & LK_SLEEPFAIL) {
+                               undo_shreq(lkp);
                                error = ENOLCK;
                                break;
                        }
-                       continue;
+                       if (error) {
+                               undo_shreq(lkp);
+                               break;
+                       }
                }
+               didloop = 1;
 
                /*
-                * Otherwise we can bump the count
+                * Reload, shortcut grant case, then loop interlock
+                * and loop.
                 */
-               if (atomic_fcmpset_int(&lkp->lk_count, &count, count + 1)) {
-                       COUNT(td, 1);
+               count = lkp->lk_count;
+               if (count & LKC_SHARED)
                        break;
-               }
-               /* retry */
+               tsleep_interlock(lkp, pflags);
+               count = atomic_fetchadd_64(&lkp->lk_count, 0);
        }
+       if (error == 0)
+               COUNT(td, 1);
+
        return error;
 }
 
@@ -226,9 +284,10 @@ lockmgr_shared(struct lock *lkp, u_int flags)
 int
 lockmgr_exclusive(struct lock *lkp, u_int flags)
 {
+       uint64_t count;
+       uint64_t ncount;
        uint32_t extflags;
        thread_t td;
-       int count;
        int error;
        int pflags;
        int timo;
@@ -239,108 +298,205 @@ lockmgr_exclusive(struct lock *lkp, u_int flags)
 
        error = 0;
        count = lkp->lk_count;
+       cpu_ccfence();
 
-       for (;;) {
-               cpu_ccfence();
+       /*
+        * Recursive lock if we already hold it exclusively.  Avoid testing
+        * lk_lockholder until after testing lk_count.
+        */
+       if ((count & LKC_XMASK) && lkp->lk_lockholder == td) {
+               if ((extflags & LK_CANRECURSE) == 0) {
+                       if (extflags & LK_NOWAIT)
+                               return EBUSY;
+                       panic("lockmgr: locking against myself");
+               }
+               count = atomic_fetchadd_64(&lkp->lk_count, 1) + 1;
+               KKASSERT((count & LKC_XMASK) > 1);
+               COUNT(td, 1);
+               return 0;
+       }
 
+       /*
+        * Trivially acquire the lock, or block until we can set EXREQ.
+        * Set EXREQ2 if EXREQ is already set or the lock is already
+        * held exclusively.  EXREQ2 is an aggregation bit to request
+        * a wakeup.
+        *
+        * WARNING! We cannot set EXREQ if the lock is already held
+        *          exclusively because it may race another EXREQ
+        *          being cleared and granted.  We use the exclusivity
+        *          to prevent both EXREQ and UPREQ from being set.
+        *
+        *          This means that both shared and exclusive requests
+        *          have equal priority against a current exclusive holder's
+        *          release.  Exclusive requests still have priority over
+        *          new shared requests when the lock is already held shared.
+        */
+       for (;;) {
                /*
-                * Exclusive lock critical path.
+                * Normal trivial case
                 */
-               if (count == 0) {
-                       if (atomic_fcmpset_int(&lkp->lk_count, &count,
-                                              LKC_EXCL | (count + 1))) {
+               if ((count & (LKC_UPREQ | LKC_EXREQ |
+                             LKC_XMASK)) == 0 &&
+                   ((count & LKC_SHARED) == 0 ||
+                    (count & LKC_SMASK) == 0)) {
+                       ncount = (count + 1) & ~LKC_SHARED;
+                       if (atomic_fcmpset_64(&lkp->lk_count,
+                                             &count, ncount)) {
                                lkp->lk_lockholder = td;
                                COUNT(td, 1);
-                               break;
+                               return 0;
                        }
                        continue;
                }
 
+               if (extflags & LK_CANCELABLE) {
+                       if (count & LKC_CANCEL)
+                               return ENOLCK;
+               }
+               if (extflags & LK_NOWAIT)
+                       return EBUSY;
+
                /*
-                * Recursive lock if we already hold it exclusively.
+                * Interlock to set EXREQ or EXREQ2
                 */
-               if (lkp->lk_lockholder == td) {
-                       KKASSERT(count & LKC_EXCL);
-                       if ((extflags & LK_CANRECURSE) == 0) {
-                               if (extflags & LK_NOWAIT) {
-                                       error = EBUSY;
-                                       break;
-                               }
-                               panic("lockmgr: locking against myself");
+               pflags = (extflags & LK_PCATCH) ? PCATCH : 0;
+               timo = (extflags & LK_TIMELOCK) ? lkp->lk_timo : 0;
+
+               if (count & (LKC_EXREQ | LKC_XMASK))
+                       ncount = count | LKC_EXREQ2;
+               else
+                       ncount = count | LKC_EXREQ;
+               tsleep_interlock(lkp, pflags);
+               if (atomic_fcmpset_64(&lkp->lk_count, &count, ncount)) {
+                       /*
+                        * If we successfully transitioned to EXREQ we
+                        * can break out, otherwise we had set EXREQ2 and
+                        * we block.
+                        */
+                       if ((count & (LKC_EXREQ | LKC_XMASK)) == 0) {
+                               count = ncount;
+                               break;
                        }
-                       atomic_add_int(&lkp->lk_count, 1);
-                       COUNT(td, 1);
-                       break;
+
+                       error = tsleep(lkp, pflags | PINTERLOCKED,
+                                      lkp->lk_wmesg, timo);
+                       count = lkp->lk_count;  /* relod */
+                       cpu_ccfence();
                }
+#ifdef INVARIANTS
+               if (lock_test_mode > 0) {
+                       --lock_test_mode;
+                       print_backtrace(8);
+               }
+#endif
+               if (error)
+                       return error;
+               if (extflags & LK_SLEEPFAIL)
+                       return ENOLCK;
+       }
 
+       /*
+        * Once EXREQ has been set, wait for it to be granted
+        * We enter the loop with tsleep_interlock() already called.
+        */
+       for (;;) {
                /*
-                * We will block, handle LK_NOWAIT
+                * Waiting for EXREQ to be granted to us.
+                *
+                * NOTE! If we try to trivially get the exclusive lock
+                *       (basically by racing undo_shreq()) and succeed,
+                *       we must still wakeup(lkp) for another exclusive
+                *       lock trying to acquire EXREQ.  Easier to simply
+                *       wait for our own wakeup.
                 */
-               if (extflags & LK_NOWAIT) {
-                       error = EBUSY;
+               if ((count & LKC_EXREQ) == 0) {
+                       KKASSERT(count & LKC_XMASK);
+                       lkp->lk_lockholder = td;
+                       COUNT(td, 1);
                        break;
                }
+
+               /*
+                * Block waiting for our exreq to be granted.
+                * Check cancelation.  NOWAIT was already dealt with.
+                */
                if (extflags & LK_CANCELABLE) {
                        if (count & LKC_CANCEL) {
+                               if (undo_exreq(lkp) == 0) {
+                                       lkp->lk_lockholder = LK_KERNTHREAD;
+                                       lockmgr_release(lkp, 0);
+                               }
                                error = ENOLCK;
                                break;
                        }
                }
 
-               if ((extflags & LK_NOCOLLSTATS) == 0) {
-                       indefinite_info_t info;
-
-                       flags |= LK_NOCOLLSTATS;
-                       indefinite_init(&info, lkp->lk_wmesg, 1, 'L');
-                       error = lockmgr_exclusive(lkp, flags);
-                       indefinite_done(&info);
-                       break;
-               }
-
-               /*
-                * Wait until we can obtain the exclusive lock.  EXREQ is
-                * automatically cleared when all current holders release
-                * so if we abort the operation we can safely leave it set.
-                * There might be other exclusive requesters.
-                */
                pflags = (extflags & LK_PCATCH) ? PCATCH : 0;
                timo = (extflags & LK_TIMELOCK) ? lkp->lk_timo : 0;
 
-               tsleep_interlock(lkp, pflags);
-               if (!atomic_fcmpset_int(&lkp->lk_count, &count,
-                                       count | LKC_EXREQ)) {
-                       continue;
-               }
-
-               error = tsleep(lkp, pflags | PINTERLOCKED,
-                              lkp->lk_wmesg, timo);
+               error = tsleep(lkp, pflags | PINTERLOCKED, lkp->lk_wmesg, timo);
 #ifdef INVARIANTS
                if (lock_test_mode > 0) {
                        --lock_test_mode;
                        print_backtrace(8);
                }
 #endif
-               if (error)
-                       break;
+               /*
+                * A tsleep error is uncommon.  If it occurs we have to
+                * undo our EXREQ.  If we are granted the exclusive lock
+                * as we try to undo we have to deal with it.
+                */
                if (extflags & LK_SLEEPFAIL) {
-                       error = ENOLCK;
+                       if (undo_exreq(lkp) == 0) {
+                               lkp->lk_lockholder = LK_KERNTHREAD;
+                               lockmgr_release(lkp, 0);
+                       }
+                       if (error == 0)
+                               error = ENOLCK;
                        break;
                }
-               /* retry */
+               if (error) {
+                       if (undo_exreq(lkp))
+                               break;
+                       lkp->lk_lockholder = td;
+                       COUNT(td, 1);
+                       error = 0;
+                       break;
+               }
+
+               /*
+                * Reload after sleep, shortcut grant case.
+                * Then set the interlock and loop.
+                */
+               count = lkp->lk_count;
+               cpu_ccfence();
+               if ((count & LKC_EXREQ) == 0) {
+                       KKASSERT(count & LKC_XMASK);
+                       lkp->lk_lockholder = td;
+                       COUNT(td, 1);
+                       break;
+               }
+               tsleep_interlock(lkp, pflags);
+               count = atomic_fetchadd_64(&lkp->lk_count, 0);
        }
        return error;
 }
 
 /*
- * Downgrade an exclusive lock to shared
+ * Downgrade an exclusive lock to shared.
+ *
+ * This function always succeeds as long as the caller owns a legal
+ * exclusive lock with one reference.  UPREQ and EXREQ is ignored.
  */
 int
 lockmgr_downgrade(struct lock *lkp, u_int flags)
 {
+       uint64_t count;
+       uint64_t ncount;
        uint32_t extflags;
        thread_t otd;
        thread_t td;
-       int count;
 
        extflags = (flags | lkp->lk_flags) & LK_EXTFLG_MASK;
        td = curthread;
@@ -353,22 +509,37 @@ lockmgr_downgrade(struct lock *lkp, u_int flags)
                 * Downgrade an exclusive lock into a shared lock.  All
                 * counts on a recursive exclusive lock become shared.
                 *
-                * This function always succeeds.
+                * NOTE: Currently to reduce confusion we only allow
+                *       there to be one exclusive lock count, and panic
+                *       if there are more.
                 */
-               if (lkp->lk_lockholder != td ||
-                   (count & (LKC_EXCL|LKC_MASK)) != (LKC_EXCL|1)) {
-                       panic("lockmgr: not holding exclusive lock");
+               if (lkp->lk_lockholder != td || (count & LKC_XMASK) != 1) {
+                       panic("lockmgr: not holding exclusive lock: "
+                             "%p/%p %016jx", lkp->lk_lockholder, td, count);
                }
 
                /*
-                * NOTE! Must NULL-out lockholder before releasing LKC_EXCL.
+                * NOTE! Must NULL-out lockholder before releasing the
+                *       exclusive lock.
+                *
+                * NOTE! There might be pending shared requests, check
+                *       and wake them up.
                 */
                otd = lkp->lk_lockholder;
                lkp->lk_lockholder = NULL;
-               if (atomic_fcmpset_int(&lkp->lk_count, &count,
-                                      count & ~(LKC_EXCL|LKC_SHREQ))) {
-                       if (count & LKC_SHREQ)
+               ncount = (count & ~(LKC_XMASK | LKC_EXREQ2)) +
+                        ((count & LKC_XMASK) << LKC_SSHIFT);
+               ncount |= LKC_SHARED;
+
+               if (atomic_fcmpset_64(&lkp->lk_count, &count, ncount)) {
+                       /*
+                        * Wakeup any shared waiters (prior SMASK), or
+                        * any exclusive requests that couldn't set EXREQ
+                        * because the lock had been held exclusively.
+                        */
+                       if (count & (LKC_SMASK | LKC_EXREQ2))
                                wakeup(lkp);
+                       /* count = ncount; NOT USED */
                        break;
                }
                lkp->lk_lockholder = otd;
@@ -380,17 +551,23 @@ lockmgr_downgrade(struct lock *lkp, u_int flags)
 /*
  * Upgrade a shared lock to exclusive.  If LK_EXCLUPGRADE then guarantee
  * that no other exclusive requester can get in front of us and fail
- * immediately if another upgrade is pending.
+ * immediately if another upgrade is pending.  If we fail, the shared
+ * lock is released.
+ *
+ * If LK_EXCLUPGRADE is not set and we cannot upgrade because someone
+ * else is in front of us, we release the shared lock and acquire the
+ * exclusive lock normally.  If a failure occurs, the shared lock is
+ * released.
  */
 int
 lockmgr_upgrade(struct lock *lkp, u_int flags)
 {
+       uint64_t count;
+       uint64_t ncount;
        uint32_t extflags;
        thread_t td;
-       int count;
        int error;
        int pflags;
-       int wflags;
        int timo;
 
        _lockmgr_assert(lkp, flags);
@@ -398,219 +575,139 @@ lockmgr_upgrade(struct lock *lkp, u_int flags)
        td = curthread;
        error = 0;
        count = lkp->lk_count;
+       cpu_ccfence();
 
-       for (;;) {
-               cpu_ccfence();
+       /*
+        * If we already hold the lock exclusively this operation
+        * succeeds and is a NOP.
+        */
+       if (count & LKC_XMASK) {
+               if (lkp->lk_lockholder == td)
+                       return 0;
+               panic("lockmgr: upgrade unowned lock");
+       }
+       if ((count & LKC_SMASK) == 0)
+               panic("lockmgr: upgrade unowned lock");
 
+       /*
+        * Loop to acquire LKC_UPREQ
+        */
+       for (;;) {
                /*
-                * Upgrade from a single shared lock to an exclusive lock.
+                * If UPREQ is already pending, release the shared lock
+                * and acquire an exclusive lock normally.
                 *
-                * If another process is ahead of us to get an upgrade,
-                * then we want to fail rather than have an intervening
-                * exclusive access.  The shared lock is released on
-                * failure.
+                * If NOWAIT or EXCLUPGRADE the operation must be atomic,
+                * and this isn't, so we fail.
                 */
-               if ((flags & LK_TYPE_MASK) == LK_EXCLUPGRADE) {
-                       if (count & LKC_UPREQ) {
-                               lockmgr_release(lkp, LK_RELEASE);
+               if (count & LKC_UPREQ) {
+                       lockmgr_release(lkp, 0);
+                       if ((flags & LK_TYPE_MASK) == LK_EXCLUPGRADE)
                                error = EBUSY;
-                               break;
-                       }
+                       else if (extflags & LK_NOWAIT)
+                               error = EBUSY;
+                       else
+                               error = lockmgr_exclusive(lkp, flags);
+                       return error;
                }
-               /* fall through into normal upgrade */
 
                /*
-                * Upgrade a shared lock to an exclusive one.  This can cause
-                * the lock to be temporarily released and stolen by other
-                * threads.  LK_SLEEPFAIL or LK_NOWAIT may be used to detect
-                * this case, or use LK_EXCLUPGRADE.
-                *
-                * If the lock is already exclusively owned by us, this
-                * operation is a NOP.
-                *
-                * If we return an error (even NOWAIT), the current lock will
-                * be released.
-                *
-                * Start with the critical path.
+                * Try to immediately grant the upgrade, handle NOWAIT,
+                * or release the shared lock and simultaneously set UPREQ.
                 */
-               if ((count & (LKC_UPREQ|LKC_EXCL|LKC_MASK)) == 1) {
-                       if (atomic_fcmpset_int(&lkp->lk_count, &count,
-                                              count | LKC_EXCL)) {
+               if ((count & LKC_SMASK) == LKC_SCOUNT) {
+                       /*
+                        * Immediate grant
+                        */
+                       ncount = (count - LKC_SCOUNT + 1) & ~LKC_SHARED;
+                       if (atomic_fcmpset_64(&lkp->lk_count, &count, ncount)) {
                                lkp->lk_lockholder = td;
+                               return 0;
+                       }
+               } else if (extflags & LK_NOWAIT) {
+                       /*
+                        * Early EBUSY if an immediate grant is impossible
+                        */
+                       lockmgr_release(lkp, 0);
+                       return EBUSY;
+               } else {
+                       /*
+                        * Multiuple shared locks present, request the
+                        * upgrade and break to the next loop.
+                        */
+                       pflags = (extflags & LK_PCATCH) ? PCATCH : 0;
+                       tsleep_interlock(lkp, pflags);
+                       ncount = (count - LKC_SCOUNT) | LKC_UPREQ;
+                       if (atomic_fcmpset_64(&lkp->lk_count, &count, ncount)) {
+                               count = ncount;
                                break;
                        }
-                       continue;
                }
+               /* retry */
+       }
 
-               /*
-                * We own a lock coming into this, so there cannot be an
-                * UPGRANT already flagged.
-                */
-               KKASSERT((count & LKC_UPGRANT) == 0);
-
-               /*
-                * If we already hold the lock exclusively this operation
-                * succeeds and is a NOP.
-                */
-               if (count & LKC_EXCL) {
-                       if (lkp->lk_lockholder == td)
-                               break;
-                       panic("lockmgr: upgrade unowned lock");
-               }
-               if ((count & LKC_MASK) == 0)
-                       panic("lockmgr: upgrade unowned lock");
+       /*
+        * We have acquired LKC_UPREQ, wait until the upgrade is granted
+        * or the tsleep fails.
+        *
+        * NOWAIT and EXCLUPGRADE have already been handled.  The first
+        * tsleep_interlock() has already been associated.
+        */
+       for (;;) {
+               cpu_ccfence();
 
                /*
-                * We cannot upgrade without blocking at this point.
+                * We were granted our upgrade.  No other UPREQ can be
+                * made pending because we are now exclusive.
                 */
-               if (extflags & LK_NOWAIT) {
-                       lockmgr_release(lkp, LK_RELEASE);
-                       error = EBUSY;
+               if ((count & LKC_UPREQ) == 0) {
+                       KKASSERT((count & LKC_XMASK) == 1);
+                       lkp->lk_lockholder = td;
                        break;
                }
+
                if (extflags & LK_CANCELABLE) {
                        if (count & LKC_CANCEL) {
+                               if (undo_upreq(lkp) == 0) {
+                                       lkp->lk_lockholder = LK_KERNTHREAD;
+                                       lockmgr_release(lkp, 0);
+                               }
                                error = ENOLCK;
                                break;
                        }
                }
 
-               if ((extflags & LK_NOCOLLSTATS) == 0) {
-                       indefinite_info_t info;
-
-                       flags |= LK_NOCOLLSTATS;
-                       indefinite_init(&info, lkp->lk_wmesg, 1, 'U');
-                       error = lockmgr_upgrade(lkp, flags);
-                       indefinite_done(&info);
-                       break;
-               }
-
-               /*
-                * Release the shared lock and request the upgrade.
-                */
                pflags = (extflags & LK_PCATCH) ? PCATCH : 0;
                timo = (extflags & LK_TIMELOCK) ? lkp->lk_timo : 0;
-               tsleep_interlock(lkp, pflags);
-               wflags = (count & LKC_UPREQ) ? LKC_EXREQ : LKC_UPREQ;
 
-               /*
-                * If someone else owns UPREQ and this transition would
-                * allow it to be granted, we have to grant it.  Our
-                * lock count is transfered (we effectively release).
-                * We will then request a normal exclusive lock.
-                *
-                * Otherwise we release the shared lock and either do
-                * an UPREQ or an EXREQ.  The count is always > 1 in
-                * this case since we handle all other count == 1
-                * situations here and above.
-                */
-               if ((count & (LKC_UPREQ|LKC_MASK)) == (LKC_UPREQ | 1)) {
-                       wflags |= LKC_EXCL | LKC_UPGRANT;
-                       wflags |= count;
-                       wflags &= ~LKC_UPREQ;   /* was set from count */
-               } else {
-                       wflags |= (count - 1);
-               }
-
-               if (atomic_fcmpset_int(&lkp->lk_count, &count, wflags)) {
-                       COUNT(td, -1);
-
-                       /*
-                        * Must wakeup the thread granted the upgrade.
-                        */
-                       if ((count & (LKC_UPREQ|LKC_MASK)) == (LKC_UPREQ | 1))
-                               wakeup(lkp);
-
-                       error = tsleep(lkp, pflags | PINTERLOCKED,
-                                      lkp->lk_wmesg, timo);
-                       if (error) {
-                               if ((count & LKC_UPREQ) == 0)
-                                       undo_upreq(lkp);
-                               break;
+               error = tsleep(lkp, pflags | PINTERLOCKED, lkp->lk_wmesg, timo);
+               if (extflags & LK_SLEEPFAIL) {
+                       if (undo_upreq(lkp) == 0) {
+                               lkp->lk_lockholder = LK_KERNTHREAD;
+                               lockmgr_release(lkp, 0);
                        }
-                       if (extflags & LK_SLEEPFAIL) {
-                               if ((count & LKC_UPREQ) == 0)
-                                       undo_upreq(lkp);
+                       if (error == 0)
                                error = ENOLCK;
-                               break;
-                       }
-
-                       /*
-                        * Refactor to either LK_EXCLUSIVE or LK_WAITUPGRADE,
-                        * depending on whether we were able to acquire the
-                        * LKC_UPREQ bit.
-                        */
-                       if (count & LKC_UPREQ)
-                               error = lockmgr_exclusive(lkp, flags);
-                       else
-                               error = lockmgr_waitupgrade(lkp, flags);
                        break;
                }
-               /* retry */
-       }
-       return error;
-}
-
-/*
- * (internal helper)
- */
-static int
-lockmgr_waitupgrade(struct lock *lkp, u_int flags)
-{
-       uint32_t extflags;
-       thread_t td;
-       int count;
-       int error;
-       int pflags;
-       int timo;
-
-       extflags = (flags | lkp->lk_flags) & LK_EXTFLG_MASK;
-       td = curthread;
-       error = 0;
-       count = lkp->lk_count;
-
-       for (;;) {
-               cpu_ccfence();
+               if (error) {
+                       if (undo_upreq(lkp))
+                               break;
+                       error = 0;
+               }
 
                /*
-                * We own the LKC_UPREQ bit, wait until we are granted the
-                * exclusive lock (LKC_UPGRANT is set).
-                *
-                * IF THE OPERATION FAILS (tsleep error tsleep+LK_SLEEPFAIL),
-                * we have to undo the upgrade request and clean up any lock
-                * that might have been granted via a race.
+                * Reload the lock, short-cut the UPGRANT code before
+                * taking the time to interlock and loop.
                 */
-               if (count & LKC_UPGRANT) {
-                       if (atomic_fcmpset_int(&lkp->lk_count, &count,
-                                              count & ~LKC_UPGRANT)) {
-                               lkp->lk_lockholder = td;
-                               KKASSERT(count & LKC_EXCL);
-                               break;
-                       }
-                       /* retry */
-               } else if ((count & LKC_CANCEL) && (extflags & LK_CANCELABLE)) {
-                       undo_upreq(lkp);
-                       error = ENOLCK;
+               count = lkp->lk_count;
+               if ((count & LKC_UPREQ) == 0) {
+                       KKASSERT((count & LKC_XMASK) == 1);
+                       lkp->lk_lockholder = td;
                        break;
-               } else {
-                       pflags = (extflags & LK_PCATCH) ? PCATCH : 0;
-                       timo = (extflags & LK_TIMELOCK) ? lkp->lk_timo : 0;
-                       tsleep_interlock(lkp, pflags);
-                       if (atomic_fcmpset_int(&lkp->lk_count, &count, count)) {
-                               error = tsleep(lkp, pflags | PINTERLOCKED,
-                                              lkp->lk_wmesg, timo);
-                               if (error) {
-                                       undo_upreq(lkp);
-                                       break;
-                               }
-                               if (extflags & LK_SLEEPFAIL) {
-                                       error = ENOLCK;
-                                       undo_upreq(lkp);
-                                       break;
-                               }
-                       }
-                       /* retry */
                }
+               tsleep_interlock(lkp, pflags);
+               count = atomic_fetchadd_64(&lkp->lk_count, 0);
                /* retry */
        }
        return error;
@@ -618,148 +715,227 @@ lockmgr_waitupgrade(struct lock *lkp, u_int flags)
 
 /*
  * Release a held lock
+ *
+ * NOTE: When releasing to an unlocked state, we set the SHARED bit
+ *      to optimize shared lock requests.
  */
 int
 lockmgr_release(struct lock *lkp, u_int flags)
 {
+       uint64_t count;
+       uint64_t ncount;
        uint32_t extflags;
        thread_t otd;
        thread_t td;
-       int count;
 
        extflags = (flags | lkp->lk_flags) & LK_EXTFLG_MASK;
        td = curthread;
+
        count = lkp->lk_count;
+       cpu_ccfence();
 
        for (;;) {
-               cpu_ccfence();
-
                /*
-                * Release the currently held lock.  If releasing the current
-                * lock as part of an error return, error will ALREADY be
-                * non-zero.
+                * Release the currently held lock, grant all requests
+                * possible.
                 *
-                * When releasing the last lock we automatically transition
-                * LKC_UPREQ to LKC_EXCL|1.
-                *
-                * WARNING! We cannot detect when there are multiple exclusive
-                *          requests pending.  We clear EXREQ unconditionally
-                *          on the 1->0 transition so it is possible for
-                *          shared requests to race the next exclusive
-                *          request.
-                *
-                * WAERNING! lksleep() assumes that LK_RELEASE does not
+                * WARNING! lksleep() assumes that LK_RELEASE does not
                 *          block.
                 *
                 * Always succeeds.
+                * Never blocks.
                 */
-               if ((count & LKC_MASK) == 0)
+               if ((count & (LKC_SMASK | LKC_XMASK)) == 0)
                        panic("lockmgr: LK_RELEASE: no lock held");
 
-               if (count & LKC_EXCL) {
+               if (count & LKC_XMASK) {
+                       /*
+                        * Release exclusively held lock
+                        */
                        if (lkp->lk_lockholder != LK_KERNTHREAD &&
                            lkp->lk_lockholder != td) {
-                               panic("lockmgr: pid %d, not exlusive "
+                               panic("lockmgr: pid %d, not exclusive "
                                      "lock holder thr %p/%p unlocking",
                                    (td->td_proc ? td->td_proc->p_pid : -1),
                                    td, lkp->lk_lockholder);
                        }
-                       if ((count & (LKC_UPREQ|LKC_MASK)) == 1) {
+                       if ((count & (LKC_UPREQ | LKC_EXREQ |
+                                     LKC_XMASK)) == 1) {
                                /*
                                 * Last exclusive count is being released
+                                * with no UPREQ or EXREQ.  The SHARED
+                                * bit can be set or not without messing
+                                * anything up, so precondition it to
+                                * SHARED (which is the most cpu-optimal).
+                                *
+                                * Wakeup any EXREQ2.  EXREQ cannot be
+                                * set while an exclusive count is present
+                                * so we have to wakeup any EXREQ2 we find.
+                                *
+                                * We could hint the EXREQ2 by leaving
+                                * SHARED unset, but atm I don't see any
+                                * usefulness.
                                 */
                                otd = lkp->lk_lockholder;
                                lkp->lk_lockholder = NULL;
-                               if (!atomic_fcmpset_int(&lkp->lk_count,
-                                                       &count,
-                                             (count - 1) &
-                                          ~(LKC_EXCL | LKC_EXREQ |
-                                            LKC_SHREQ| LKC_CANCEL))) {
-                                       lkp->lk_lockholder = otd;
-                                       continue;
+                               ncount = (count - 1);
+                               ncount &= ~(LKC_CANCEL | LKC_EXREQ2);
+                               ncount |= LKC_SHARED;
+                               if (atomic_fcmpset_64(&lkp->lk_count,
+                                                     &count, ncount)) {
+                                       if (count & (LKC_SMASK | LKC_EXREQ2))
+                                               wakeup(lkp);
+                                       if (otd != LK_KERNTHREAD)
+                                               COUNT(td, -1);
+                                       /* count = ncount; NOT USED */
+                                       break;
                                }
-                               if (count & (LKC_EXREQ|LKC_SHREQ))
-                                       wakeup(lkp);
-                               /* success */
-                       } else if ((count & (LKC_UPREQ|LKC_MASK)) ==
+                               lkp->lk_lockholder = otd;
+                               /* retry */
+                       } else if ((count & (LKC_UPREQ | LKC_XMASK)) ==
                                   (LKC_UPREQ | 1)) {
                                /*
                                 * Last exclusive count is being released but
                                 * an upgrade request is present, automatically
                                 * grant an exclusive state to the owner of
-                                * the upgrade request.
+                                * the upgrade request.  Transfer count to
+                                * grant.
+                                *
+                                * EXREQ cannot be set while an exclusive
+                                * holder exists, so do not clear EXREQ2.
                                 */
                                otd = lkp->lk_lockholder;
                                lkp->lk_lockholder = NULL;
-                               if (!atomic_fcmpset_int(&lkp->lk_count,
-                                                       &count,
-                                               (count & ~LKC_UPREQ) |
-                                               LKC_UPGRANT)) {
-                                       lkp->lk_lockholder = otd;
-                                       continue;
+                               ncount = count & ~LKC_UPREQ;
+                               if (atomic_fcmpset_64(&lkp->lk_count,
+                                                     &count, ncount)) {
+                                       wakeup(lkp);
+                                       if (otd != LK_KERNTHREAD)
+                                               COUNT(td, -1);
+                                       /* count = ncount; NOT USED */
+                                       break;
                                }
-                               wakeup(lkp);
-                               /* success */
-                       } else {
+                               lkp->lk_lockholder = otd;
+                               /* retry */
+                       } else if ((count & (LKC_EXREQ | LKC_XMASK)) ==
+                                  (LKC_EXREQ | 1)) {
+                               /*
+                                * Last exclusive count is being released but
+                                * an exclusive request is present.  We
+                                * automatically grant an exclusive state to
+                                * the owner of the exclusive request,
+                                * transfering our count.
+                                *
+                                * This case virtually never occurs because
+                                * EXREQ is not set while exclusive holders
+                                * exist.  However, it might be set if a
+                                * an exclusive request is pending and a
+                                * shared holder upgrades.
+                                *
+                                * Don't bother clearing EXREQ2.  A thread
+                                * waiting to set EXREQ can't do it while
+                                * an exclusive lock is present.
+                                */
                                otd = lkp->lk_lockholder;
-                               if (!atomic_fcmpset_int(&lkp->lk_count,
-                                                       &count,
-                                                       count - 1)) {
-                                       continue;
+                               lkp->lk_lockholder = NULL;
+                               ncount = count & ~LKC_EXREQ;
+                               if (atomic_fcmpset_64(&lkp->lk_count,
+                                                     &count, ncount)) {
+                                       wakeup(lkp);
+                                       if (otd != LK_KERNTHREAD)
+                                               COUNT(td, -1);
+                                       /* count = ncount; NOT USED */
+                                       break;
                                }
-                               /* success */
+                               lkp->lk_lockholder = otd;
+                               /* retry */
+                       } else {
+                               /*
+                                * Multiple exclusive counts, drop by 1.
+                                * Since we are the holder and there is more
+                                * than one count, we can just decrement it.
+                                */
+                               count =
+                                   atomic_fetchadd_long(&lkp->lk_count, -1);
+                               /* count = count - 1  NOT NEEDED */
+                               if (lkp->lk_lockholder != LK_KERNTHREAD)
+                                       COUNT(td, -1);
+                               break;
                        }
-                       /* success */
-                       if (otd != LK_KERNTHREAD)
-                               COUNT(td, -1);
+                       /* retry */
                } else {
-                       if ((count & (LKC_UPREQ|LKC_MASK)) == 1) {
+                       /*
+                        * Release shared lock
+                        */
+                       KKASSERT((count & LKC_SHARED) && (count & LKC_SMASK));
+                       if ((count & (LKC_EXREQ | LKC_UPREQ | LKC_SMASK)) ==
+                           LKC_SCOUNT) {
                                /*
                                 * Last shared count is being released,
-                                * no upgrade request present.
+                                * no exclusive or upgrade request present.
+                                * Generally leave the shared bit set.
+                                * Clear the CANCEL bit.
                                 */
-                               if (!atomic_fcmpset_int(&lkp->lk_count,
-                                                       &count,
-                                             (count - 1) &
-                                              ~(LKC_EXREQ | LKC_SHREQ |
-                                                LKC_CANCEL))) {
-                                       continue;
+                               ncount = (count - LKC_SCOUNT) & ~LKC_CANCEL;
+                               if (atomic_fcmpset_64(&lkp->lk_count,
+                                                     &count, ncount)) {
+                                       COUNT(td, -1);
+                                       /* count = ncount; NOT USED */
+                                       break;
                                }
-                               if (count & (LKC_EXREQ|LKC_SHREQ))
-                                       wakeup(lkp);
-                               /* success */
-                       } else if ((count & (LKC_UPREQ|LKC_MASK)) ==
-                                  (LKC_UPREQ | 1)) {
+                               /* retry */
+                       } else if ((count & (LKC_UPREQ | LKC_SMASK)) ==
+                                  (LKC_UPREQ | LKC_SCOUNT)) {
                                /*
                                 * Last shared count is being released but
                                 * an upgrade request is present, automatically
                                 * grant an exclusive state to the owner of
-                                * the upgrade request.  Masked count
-                                * remains 1.
+                                * the upgrade request and transfer the count.
+                                */
+                               ncount = (count - LKC_SCOUNT + 1) &
+                                        ~(LKC_UPREQ | LKC_CANCEL | LKC_SHARED);
+                               if (atomic_fcmpset_64(&lkp->lk_count,
+                                                     &count, ncount)) {
+                                       wakeup(lkp);
+                                       COUNT(td, -1);
+                                       /* count = ncount; NOT USED */
+                                       break;
+                               }
+                               /* retry */
+                       } else if ((count & (LKC_EXREQ | LKC_SMASK)) ==
+                                  (LKC_EXREQ | LKC_SCOUNT)) {
+                               /*
+                                * Last shared count is being released but
+                                * an exclusive request is present, we
+                                * automatically grant an exclusive state to
+                                * the owner of the request and transfer
+                                * the count.
                                 */
-                               if (!atomic_fcmpset_int(&lkp->lk_count,
-                                                       &count,
-                                             (count & ~(LKC_UPREQ |
-                                                        LKC_CANCEL)) |
-                                             LKC_EXCL | LKC_UPGRANT)) {
-                                       continue;
+                               ncount = (count - LKC_SCOUNT + 1) &
+                                        ~(LKC_EXREQ | LKC_EXREQ2 |
+                                          LKC_CANCEL | LKC_SHARED);
+                               if (atomic_fcmpset_64(&lkp->lk_count,
+                                                     &count, ncount)) {
+                                       wakeup(lkp);
+                                       COUNT(td, -1);
+                                       /* count = ncount; NOT USED */
+                                       break;
                                }
-                               wakeup(lkp);
+                               /* retry */
                        } else {
                                /*
-                                * Shared count is greater than 1, just
-                                * decrement it by one.
+                                * Shared count is greater than 1.  We can
+                                * just use undo_shreq() to clean things up.
+                                * undo_shreq() will also handle races to 0
+                                * after the fact.
                                 */
-                               if (!atomic_fcmpset_int(&lkp->lk_count,
-                                                       &count,
-                                                       count - 1)) {
-                                       continue;
-                               }
+                               undo_shreq(lkp);
+                               COUNT(td, -1);
+                               break;
                        }
-                       /* success */
-                       COUNT(td, -1);
+                       /* retry */
                }
-               break;
+               /* retry */
        }
        return 0;
 }
@@ -776,19 +952,30 @@ lockmgr_release(struct lock *lkp, u_int flags)
 int
 lockmgr_cancel_beg(struct lock *lkp, u_int flags)
 {
-       int count;
+       uint64_t count;
 
        count = lkp->lk_count;
        for (;;) {
                cpu_ccfence();
 
                KKASSERT((count & LKC_CANCEL) == 0);    /* disallowed case */
-               KKASSERT((count & LKC_MASK) != 0);      /* issue w/lock held */
-               if (!atomic_fcmpset_int(&lkp->lk_count,
-                                       &count, count | LKC_CANCEL)) {
+
+               /* issue w/lock held */
+               KKASSERT((count & (LKC_XMASK | LKC_SMASK)) != 0);
+
+               if (!atomic_fcmpset_64(&lkp->lk_count,
+                                      &count, count | LKC_CANCEL)) {
                        continue;
                }
-               if (count & (LKC_EXREQ|LKC_SHREQ|LKC_UPREQ)) {
+               /* count |= LKC_CANCEL; NOT USED */
+
+               /*
+                * Wakeup any waiters.
+                *
+                * NOTE: EXREQ2 only matters when EXREQ is set, so don't
+                *       bother checking EXREQ2.
+                */
+               if (count & (LKC_EXREQ | LKC_SMASK | LKC_UPREQ)) {
                        wakeup(lkp);
                }
                break;
@@ -803,59 +990,214 @@ lockmgr_cancel_beg(struct lock *lkp, u_int flags)
 int
 lockmgr_cancel_end(struct lock *lkp, u_int flags)
 {
-       atomic_clear_int(&lkp->lk_count, LKC_CANCEL);
+       atomic_clear_long(&lkp->lk_count, LKC_CANCEL);
 
        return 0;
 }
 
 /*
- * Undo an upgrade request
+ * Backout SCOUNT from a failed shared lock attempt and handle any race
+ * to 0.  This function is also used by the release code for the less
+ * optimal race to 0 case.
+ *
+ * Always succeeds
+ * Must not block
+ */
+static void
+undo_shreq(struct lock *lkp)
+{
+       uint64_t count;
+       uint64_t ncount;
+
+       count = atomic_fetchadd_64(&lkp->lk_count, -LKC_SCOUNT) - LKC_SCOUNT;
+       while ((count & (LKC_EXREQ | LKC_UPREQ | LKC_CANCEL)) &&
+              (count & (LKC_SMASK | LKC_XMASK)) == 0) {
+               /*
+                * Note that UPREQ must have priority over EXREQ, and EXREQ
+                * over CANCEL, so if the atomic op fails we have to loop up.
+                */
+               if (count & LKC_UPREQ) {
+                       ncount = (count + 1) & ~(LKC_UPREQ | LKC_CANCEL |
+                                                LKC_SHARED);
+                       if (atomic_fcmpset_64(&lkp->lk_count, &count, ncount)) {
+                               wakeup(lkp);
+                               /* count = ncount; NOT USED */
+                               break;
+                       }
+                       continue;
+               }
+               if (count & LKC_EXREQ) {
+                       ncount = (count + 1) & ~(LKC_EXREQ | LKC_EXREQ2 |
+                                                LKC_CANCEL | LKC_SHARED);
+                       if (atomic_fcmpset_64(&lkp->lk_count, &count, ncount)) {
+                               wakeup(lkp);
+                               /* count = ncount; NOT USED */
+                               break;
+                       }
+                       continue;
+               }
+               if (count & LKC_CANCEL) {
+                       ncount = count & ~LKC_CANCEL;
+                       if (atomic_fcmpset_64(&lkp->lk_count, &count, ncount)) {
+                               /* count = ncount; NOT USED */
+                               break;
+                       }
+               }
+               /* retry */
+       }
+}
+
+/*
+ * Undo an exclusive request.  Returns EBUSY if we were able to undo the
+ * request, and 0 if the request was granted before we could undo it.
+ * When 0 is returned, the lock state has not been modified.  The caller
+ * is responsible for setting the lockholder to curthread.
  */
 static
-void
-undo_upreq(struct lock *lkp)
+int
+undo_exreq(struct lock *lkp)
 {
-       int count;
+       uint64_t count;
+       uint64_t ncount;
+       int error;
 
        count = lkp->lk_count;
+       error = 0;
+
        for (;;) {
                cpu_ccfence();
 
-               if (count & LKC_UPGRANT) {
+               if ((count & LKC_EXREQ) == 0) {
+                       /*
+                        * EXREQ was granted.  We own the exclusive lock.
+                        */
+                       break;
+               }
+               if (count & LKC_XMASK) {
+                       /*
+                        * Clear the EXREQ we still own.  Only wakeup on
+                        * EXREQ2 if no UPREQ.  There are still exclusive
+                        * holders so do not wake up any shared locks or
+                        * any UPREQ.
+                        *
+                        * If there is an UPREQ it will issue a wakeup()
+                        * for any EXREQ wait looops, so we can clear EXREQ2
+                        * now.
+                        */
+                       ncount = count & ~(LKC_EXREQ | LKC_EXREQ2);
+                       if (atomic_fcmpset_64(&lkp->lk_count, &count, ncount)) {
+                               if ((count & (LKC_EXREQ2 | LKC_UPREQ)) ==
+                                   LKC_EXREQ2) {
+                                       wakeup(lkp);
+                               }
+                               error = EBUSY;
+                               /* count = ncount; NOT USED */
+                               break;
+                       }
+                       /* retry */
+               } else if (count & LKC_UPREQ) {
+                       /*
+                        * Clear the EXREQ we still own.  We cannot wakeup any
+                        * shared or exclusive waiters because there is an
+                        * uprequest pending (that we do not handle here).
+                        *
+                        * If there is an UPREQ it will issue a wakeup()
+                        * for any EXREQ wait looops, so we can clear EXREQ2
+                        * now.
+                        */
+                       ncount = count & ~(LKC_EXREQ | LKC_EXREQ2);
+                       if (atomic_fcmpset_64(&lkp->lk_count, &count, ncount)) {
+                               error = EBUSY;
+                               break;
+                       }
+                       /* retry */
+               } else if ((count & LKC_SHARED) && (count & LKC_SMASK)) {
                        /*
-                        * UPREQ was shifted to UPGRANT.  We own UPGRANT now,
-                        * another thread might own UPREQ.  Clear UPGRANT
-                        * and release the granted lock.
+                        * No UPREQ, lock not held exclusively, but the lock
+                        * is held shared.  Clear EXREQ, wakeup anyone trying
+                        * to get the EXREQ bit (they have to set it
+                        * themselves, EXREQ2 is an aggregation).
                         */
-                       if (atomic_fcmpset_int(&lkp->lk_count, &count,
-                                              count & ~LKC_UPGRANT)) {
-                               lkp->lk_lockholder = curthread;
-                               lockmgr(lkp, LK_RELEASE);
+                       ncount = count & ~(LKC_EXREQ | LKC_EXREQ2);
+                       if (atomic_fcmpset_64(&lkp->lk_count, &count, ncount)) {
+                               if (count & LKC_EXREQ2)
+                                       wakeup(lkp);
+                               error = EBUSY;
+                               /* count = ncount; NOT USED */
                                break;
                        }
-               } else if (count & LKC_EXCL) {
+                       /* retry */
+               } else {
+                       /*
+                        * No UPREQ, lock not held exclusively or shared.
+                        * Grant the EXREQ and wakeup anyone waiting on
+                        * EXREQ2.
+                        */
+                       ncount = (count + 1) & ~(LKC_EXREQ | LKC_EXREQ2);
+                       if (atomic_fcmpset_64(&lkp->lk_count, &count, ncount)) {
+                               if (count & LKC_EXREQ2)
+                                       wakeup(lkp);
+                               /* count = ncount; NOT USED */
+                               /* we are granting, error == 0 */
+                               break;
+                       }
+                       /* retry */
+               }
+               /* retry */
+       }
+       return error;
+}
+
+/*
+ * Undo an upgrade request.  Returns EBUSY if we were able to undo the
+ * request, and 0 if the request was granted before we could undo it.
+ * When 0 is returned, the lock state has not been modified.  The caller
+ * is responsible for setting the lockholder to curthread.
+ */
+static
+int
+undo_upreq(struct lock *lkp)
+{
+       uint64_t count;
+       uint64_t ncount;
+       int error;
+
+       count = lkp->lk_count;
+       error = 0;
+
+       for (;;) {
+               cpu_ccfence();
+
+               if ((count & LKC_UPREQ) == 0) {
+                       /*
+                        * UPREQ was granted
+                        */
+                       break;
+               }
+               if (count & LKC_XMASK) {
                        /*
                         * Clear the UPREQ we still own.  Nobody to wakeup
                         * here because there is an existing exclusive
                         * holder.
                         */
-                       KKASSERT(count & LKC_UPREQ);
-                       KKASSERT((count & LKC_MASK) > 0);
-                       if (atomic_fcmpset_int(&lkp->lk_count, &count,
-                                              count & ~LKC_UPREQ)) {
-                               wakeup(lkp);
+                       if (atomic_fcmpset_64(&lkp->lk_count, &count,
+                                             count & ~LKC_UPREQ)) {
+                               error = EBUSY;
+                               /* count &= ~LKC_UPREQ; NOT USED */
                                break;
                        }
                } else if (count & LKC_EXREQ) {
                        /*
-                        * Clear the UPREQ we still own.  We cannot wakeup any
-                        * shared waiters because there is an exclusive
-                        * request pending.
+                        * Clear the UPREQ we still own.  Grant the exclusive
+                        * request and wake it up.
                         */
-                       KKASSERT(count & LKC_UPREQ);
-                       KKASSERT((count & LKC_MASK) > 0);
-                       if (atomic_fcmpset_int(&lkp->lk_count, &count,
-                                              count & ~LKC_UPREQ)) {
+                       ncount = (count + 1);
+                       ncount &= ~(LKC_EXREQ | LKC_EXREQ2 | LKC_UPREQ);
+
+                       if (atomic_fcmpset_64(&lkp->lk_count, &count, ncount)) {
+                               wakeup(lkp);
+                               error = EBUSY;
+                               /* count = ncount; NOT USED */
                                break;
                        }
                } else {
@@ -863,18 +1205,23 @@ undo_upreq(struct lock *lkp)
                         * Clear the UPREQ we still own.  Wakeup any shared
                         * waiters.
                         */
-                       KKASSERT(count & LKC_UPREQ);
-                       KKASSERT((count & LKC_MASK) > 0);
-                       if (atomic_fcmpset_int(&lkp->lk_count, &count,
-                                              count &
-                                              ~(LKC_UPREQ | LKC_SHREQ))) {
-                               if (count & LKC_SHREQ)
+                       ncount = count & ~LKC_UPREQ;
+                       if (count & LKC_SMASK)
+                               ncount |= LKC_SHARED;
+
+                       if (atomic_fcmpset_64(&lkp->lk_count, &count, ncount)) {
+                               if ((count & LKC_SHARED) == 0 &&
+                                   (ncount & LKC_SHARED)) {
                                        wakeup(lkp);
+                               }
+                               error = EBUSY;
+                               /* count = ncount; NOT USED */
                                break;
                        }
                }
                /* retry */
        }
+       return error;
 }
 
 void
@@ -901,7 +1248,7 @@ lockinit(struct lock *lkp, const char *wmesg, int timo, int flags)
        lkp->lk_count = 0;
        lkp->lk_wmesg = wmesg;
        lkp->lk_timo = timo;
-       lkp->lk_lockholder = LK_NOTHREAD;
+       lkp->lk_lockholder = NULL;
 }
 
 /*
@@ -922,7 +1269,12 @@ lockreinit(struct lock *lkp, const char *wmesg, int timo, int flags)
 void
 lockuninit(struct lock *lkp)
 {
-       KKASSERT((lkp->lk_count & (LKC_EXREQ|LKC_SHREQ|LKC_UPREQ)) == 0);
+       uint64_t count __unused;
+
+       count = lkp->lk_count;
+       cpu_ccfence();
+       KKASSERT((count & (LKC_EXREQ | LKC_UPREQ)) == 0 &&
+                ((count & LKC_SHARED) || (count & LKC_SMASK) == 0));
 }
 
 /*
@@ -932,18 +1284,20 @@ int
 lockstatus(struct lock *lkp, struct thread *td)
 {
        int lock_type = 0;
-       int count;
+       uint64_t count;
 
        count = lkp->lk_count;
        cpu_ccfence();
 
-       if (count & LKC_EXCL) {
-               if (td == NULL || lkp->lk_lockholder == td)
-                       lock_type = LK_EXCLUSIVE;
-               else
-                       lock_type = LK_EXCLOTHER;
-       } else if (count & LKC_MASK) {
-               lock_type = LK_SHARED;
+       if (count & (LKC_XMASK | LKC_SMASK | LKC_EXREQ | LKC_UPREQ)) {
+               if (count & LKC_XMASK) {
+                       if (td == NULL || lkp->lk_lockholder == td)
+                               lock_type = LK_EXCLUSIVE;
+                       else
+                               lock_type = LK_EXCLOTHER;
+               } else if ((count & LKC_SMASK) && (count & LKC_SHARED)) {
+                       lock_type = LK_SHARED;
+               }
        }
        return (lock_type);
 }
@@ -956,33 +1310,37 @@ int
 lockowned(struct lock *lkp)
 {
        thread_t td = curthread;
-       int count;
+       uint64_t count;
 
        count = lkp->lk_count;
        cpu_ccfence();
 
-       if (count & LKC_EXCL)
+       if (count & LKC_XMASK)
                return(lkp->lk_lockholder == td);
        else
-               return((count & LKC_MASK) != 0);
+               return((count & LKC_SMASK) != 0);
 }
 
+#if 0
 /*
  * Determine the number of holders of a lock.
  *
- * The non-blocking version can usually be used for assertions.
+ * REMOVED - Cannot be used due to our use of atomic_fetchadd_64()
+ *          for shared locks.  Caller can only test if the lock has
+ *          a count or not using lockinuse(lk) (sys/lock.h)
  */
 int
 lockcount(struct lock *lkp)
 {
-       return(lkp->lk_count & LKC_MASK);
+       panic("lockcount cannot be used");
 }
 
 int
 lockcountnb(struct lock *lkp)
 {
-       return(lkp->lk_count & LKC_MASK);
+       panic("lockcount cannot be used");
 }
+#endif
 
 /*
  * Print out information about state of a lock. Used by VOP_PRINT
@@ -993,27 +1351,28 @@ lockmgr_printinfo(struct lock *lkp)
 {
        struct thread *td = lkp->lk_lockholder;
        struct proc *p;
-       int count;
+       uint64_t count;
 
        count = lkp->lk_count;
        cpu_ccfence();
 
-       if (td && td != LK_KERNTHREAD && td != LK_NOTHREAD)
+       if (td && td != LK_KERNTHREAD)
                p = td->td_proc;
        else
                p = NULL;
 
-       if (count & LKC_EXCL) {
-               kprintf(" lock type %s: EXCLUS (count %08x) by td %p pid %d",
-                   lkp->lk_wmesg, count, td,
+       if (count & LKC_XMASK) {
+               kprintf(" lock type %s: EXCLUS (count %016jx) by td %p pid %d",
+                   lkp->lk_wmesg, (intmax_t)count, td,
                    p ? p->p_pid : -99);
-       } else if (count & LKC_MASK) {
-               kprintf(" lock type %s: SHARED (count %08x)",
-                   lkp->lk_wmesg, count);
+       } else if ((count & LKC_SMASK) && (count & LKC_SHARED)) {
+               kprintf(" lock type %s: SHARED (count %016jx)",
+                   lkp->lk_wmesg, (intmax_t)count);
        } else {
                kprintf(" lock type %s: NOTHELD", lkp->lk_wmesg);
        }
-       if (count & (LKC_EXREQ|LKC_SHREQ))
+       if ((count & (LKC_EXREQ | LKC_UPREQ)) ||
+           ((count & LKC_XMASK) && (count & LKC_SMASK)))
                kprintf(" with waiters\n");
        else
                kprintf("\n");
index 6004be4..df663bf 100644 (file)
@@ -397,7 +397,7 @@ shutdown_busycount1(struct buf *bp, void *info)
 
        if ((vp = bp->b_vp) != NULL && vp->v_tag == VT_TMPFS)
                return (0);
-       if ((bp->b_flags & B_INVAL) == 0 && BUF_REFCNT(bp) > 0)
+       if ((bp->b_flags & B_INVAL) == 0 && BUF_LOCKINUSE(bp))
                return(1);
        if ((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI)
                return (1);
@@ -431,7 +431,7 @@ shutdown_busycount2(struct buf *bp, void *info)
        /*
         * Only count buffers stuck on I/O, ignore everything else
         */
-       if (((bp->b_flags & B_INVAL) == 0 && BUF_REFCNT(bp)) ||
+       if (((bp->b_flags & B_INVAL) == 0 && BUF_LOCKINUSE(bp)) ||
            ((bp->b_flags & (B_DELWRI|B_INVAL)) == B_DELWRI)) {
                /*
                 * Only count buffers undergoing write I/O
index d807c63..e37f3e8 100644 (file)
@@ -835,12 +835,11 @@ _bremfree(struct buf *bp)
        struct bufpcpu *pcpu = &bufpcpu[bp->b_qcpu];
 
        if (bp->b_qindex != BQUEUE_NONE) {
-               KASSERT(BUF_REFCNTNB(bp) == 1, 
-                       ("bremfree: bp %p not locked",bp));
+               KASSERT(BUF_LOCKINUSE(bp), ("bremfree: bp %p not locked", bp));
                TAILQ_REMOVE(&pcpu->bufqueues[bp->b_qindex], bp, b_freelist);
                bp->b_qindex = BQUEUE_NONE;
        } else {
-               if (BUF_REFCNTNB(bp) <= 1)
+               if (!BUF_LOCKINUSE(bp))
                        panic("bremfree: removing a buffer not on a queue");
        }
 }
@@ -989,7 +988,7 @@ bwrite(struct buf *bp)
                brelse(bp);
                return (0);
        }
-       if (BUF_REFCNTNB(bp) == 0)
+       if (BUF_LOCKINUSE(bp) == 0)
                panic("bwrite: buffer is not busy???");
 
        /*
@@ -1037,7 +1036,7 @@ bawrite(struct buf *bp)
                brelse(bp);
                return;
        }
-       if (BUF_REFCNTNB(bp) == 0)
+       if (BUF_LOCKINUSE(bp) == 0)
                panic("bawrite: buffer is not busy???");
 
        /*
@@ -1078,7 +1077,7 @@ bawrite(struct buf *bp)
 void
 bdwrite(struct buf *bp)
 {
-       if (BUF_REFCNTNB(bp) == 0)
+       if (BUF_LOCKINUSE(bp) == 0)
                panic("bdwrite: buffer is not busy");
 
        if (bp->b_flags & B_INVAL) {
@@ -1538,13 +1537,6 @@ brelse(struct buf *bp)
                        
        if (bp->b_qindex != BQUEUE_NONE)
                panic("brelse: free buffer onto another queue???");
-       if (BUF_REFCNTNB(bp) > 1) {
-               /* Temporary panic to verify exclusive locking */
-               /* This panic goes away when we allow shared refs */
-               panic("brelse: multiple refs");
-               /* NOT REACHED */
-               return;
-       }
 
        /*
         * Figure out the correct queue to place the cleaned up buffer on.
@@ -1676,11 +1668,6 @@ bqrelse(struct buf *bp)
 
        if (bp->b_qindex != BQUEUE_NONE)
                panic("bqrelse: free buffer onto another queue???");
-       if (BUF_REFCNTNB(bp) > 1) {
-               /* do not release to free list */
-               panic("bqrelse: multiple refs");
-               return;
-       }
 
        buf_act_advance(bp);
 
@@ -3059,7 +3046,7 @@ allocbuf(struct buf *bp, int size)
        int desiredpages;
        int i;
 
-       if (BUF_REFCNT(bp) == 0)
+       if (BUF_LOCKINUSE(bp) == 0)
                panic("allocbuf: buffer not busy");
 
        if (bp->b_kvasize < size)
@@ -3540,8 +3527,7 @@ bpdone(struct buf *bp, int elseit)
 {
        buf_cmd_t cmd;
 
-       KASSERT(BUF_REFCNTNB(bp) > 0, 
-               ("bpdone: bp %p not busy %d", bp, BUF_REFCNTNB(bp)));
+       KASSERT(BUF_LOCKINUSE(bp), ("bpdone: bp %p not busy", bp));
        KASSERT(bp->b_cmd != BUF_CMD_DONE, 
                ("bpdone: bp %p already done!", bp));
 
index 10df380..45bff7b 100644 (file)
@@ -624,7 +624,7 @@ vx_get_nonblock(struct vnode *vp)
 {
        int error;
 
-       if (lockcountnb(&vp->v_lock))
+       if (lockinuse(&vp->v_lock))
                return(EBUSY);
        error = lockmgr(&vp->v_lock, LK_EXCLUSIVE | LK_NOWAIT);
        if (error == 0) {
index 994809c..f81a0e7 100644 (file)
@@ -1254,7 +1254,6 @@ vclean_vxlocked(struct vnode *vp, int flags)
         * object, if it has one. 
         */
        vinvalbuf(vp, V_SAVE, 0, 0);
-       KKASSERT(lockcountnb(&vp->v_lock) == 1);
 
        /*
         * If purging an active vnode (typically during a forced unmount
@@ -1294,7 +1293,6 @@ vclean_vxlocked(struct vnode *vp, int flags)
                        VOP_INACTIVE(vp);
                vinvalbuf(vp, V_SAVE, 0, 0);
        }
-       KKASSERT(lockcountnb(&vp->v_lock) == 1);
 
        /*
         * If the vnode has an object, destroy it.
@@ -1479,7 +1477,7 @@ vgone_vxlocked(struct vnode *vp)
         * assert that the VX lock is held.  This is an absolute requirement
         * now for vgone_vxlocked() to be called.
         */
-       KKASSERT(lockcountnb(&vp->v_lock) == 1);
+       KKASSERT(lockinuse(&vp->v_lock));
 
        /*
         * Clean out the filesystem specific data and set the VRECLAIMED
index b40d882..283a4f9 100644 (file)
@@ -125,22 +125,16 @@ BUF_KERNPROC(struct buf *bp)
  * where the buffer is expected to be owned or otherwise data stable.
  */
 static __inline int
-BUF_REFCNT(struct buf *bp)
+BUF_LOCKINUSE(struct buf *bp)
 {
-       return (lockcount(&(bp)->b_lock));
-}
-
-static __inline int
-BUF_REFCNTNB(struct buf *bp)
-{
-       return (lockcountnb(&(bp)->b_lock));
+       return (lockinuse(&(bp)->b_lock));
 }
 
 /*
  * Free a buffer lock.
  */
 #define BUF_LOCKFREE(bp)                       \
-       if (BUF_REFCNTNB(bp) > 0)               \
+       if (BUF_LOCKINUSE(bp))                  \
                panic("free locked buf")
 
 static __inline void
index d5fe5e9..299e5c5 100644 (file)
@@ -1,13 +1,16 @@
 /* 
  * Copyright (c) 1995
  *     The Regents of the University of California.  All rights reserved.
- * Copyright (c) 2013
+ * Copyright (c) 2013-2017
  *     The DragonFly Project.  All rights reserved.
  *
  * This code contains ideas from software contributed to Berkeley by
  * Avadis Tevanian, Jr., Michael Wayne Young, and the Mach Operating
  * System project at Carnegie-Mellon University.
  *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@backplane.com>
+ *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * The general lock structure.  Provides for multiple shared locks,
  * upgrading from shared to exclusive, and sleeping until the lock
  * can be gained.
+ *
+ * NOTE: We don't __cachealign struct lock, its too much bloat.  Users
+ *      of struct lock may be able to arrange it within greater structures
+ *      in more SMP-friendly ways.
  */
 struct thread;
 
 struct lock {
        u_int   lk_flags;               /* see below */
-       int     lk_count;               /* -shared, +exclusive */
        int     lk_timo;                /* maximum sleep time (for tsleep) */
+       uint64_t lk_count;              /* see LKC_* bits */
        const char *lk_wmesg;           /* resource sleeping (for tsleep) */
        struct thread *lk_lockholder;   /* thread of excl lock holder */
 };
@@ -144,13 +151,16 @@ struct lock {
  * Positive count is exclusive, negative count is shared.  The count field
  * must be large enough to accomodate all possible threads.
  */
-#define LKC_EXREQ      0x80000000      /* waiting for exclusive lock */
-#define LKC_SHREQ      0x40000000      /* waiting for shared lock */
-#define LKC_UPREQ      0x20000000      /* waiting for upgrade */
-#define LKC_EXCL       0x10000000      /* exclusive (else shr or unlocked) */
-#define LKC_UPGRANT    0x08000000      /* upgrade granted */
-#define LKC_CANCEL     0x04000000      /* cancel in effect */
-#define LKC_MASK       0x03FFFFFF
+#define LKC_RESERVED8  0x0000000080000000LU    /* (DNU, insn optimization) */
+#define LKC_EXREQ      0x0000000040000000LU    /* waiting for excl lock */
+#define LKC_SHARED     0x0000000020000000LU    /* shared lock(s) granted */
+#define LKC_UPREQ      0x0000000010000000LU    /* waiting for upgrade */
+#define LKC_EXREQ2     0x0000000008000000LU    /* multi-wait for EXREQ */
+#define LKC_CANCEL     0x0000000004000000LU    /* cancel in effect */
+#define LKC_XMASK      0x0000000003FFFFFFLU
+#define LKC_SMASK      0xFFFFFFFF00000000LU
+#define LKC_SCOUNT     0x0000000100000000LU
+#define LKC_SSHIFT     32
 
 /*
  * External lock flags.
@@ -198,7 +208,6 @@ struct lock {
  * Indicator that no process holds exclusive lock
  */
 #define LK_KERNTHREAD ((struct thread *)-2)
-#define LK_NOTHREAD ((struct thread *)-1)
 
 #ifdef _KERNEL
 
@@ -214,10 +223,10 @@ struct lock_args {
 #define LOCK_INITIALIZER(wmesg, timo, flags)   \
 {                                              \
        .lk_flags = ((flags) & LK_EXTFLG_MASK), \
+       .lk_timo = (timo),                      \
        .lk_count = 0,                          \
        .lk_wmesg = wmesg,                      \
-       .lk_timo = (timo),                      \
-       .lk_lockholder = LK_NOTHREAD            \
+       .lk_lockholder = NULL                   \
 }
 
 void   lockinit (struct lock *, const char *wmesg, int timo, int flags);
@@ -235,8 +244,6 @@ void        lockmgr_kernproc (struct lock *);
 void   lockmgr_printinfo (struct lock *);
 int    lockstatus (struct lock *, struct thread *);
 int    lockowned (struct lock *);
-int    lockcount (struct lock *);
-int    lockcountnb (struct lock *);
 
 #define        LOCK_SYSINIT(name, lock, desc, flags)                           \
        static struct lock_args name##_args = {                         \
@@ -280,6 +287,18 @@ lockmgr(struct lock *lkp, u_int flags)
        }
 }
 
+/*
+ * Returns non-zero if the lock is in-use.  Cannot be used to count
+ * refs on a lock (refs cannot be safely counted due to the use of
+ * atomic_fetchadd_int() for shared locks.
+ */
+static __inline
+int
+lockinuse(struct lock *lkp)
+{
+       return ((lkp->lk_count & (LKC_SMASK | LKC_XMASK)) != 0);
+}
+
 #endif /* _KERNEL */
 #endif /* _KERNEL || _KERNEL_STRUCTURES */
 #endif /* _SYS_LOCK_H_ */
index e7964cd..31416c5 100644 (file)
@@ -1448,7 +1448,7 @@ nfs_clearcommit_callback(struct mount *mp, struct vnode *vp,
 static int
 nfs_clearcommit_bp(struct buf *bp, void *data __unused)
 {
-       if (BUF_REFCNT(bp) == 0 &&
+       if (BUF_LOCKINUSE(bp) == 0 &&
            (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT))
             == (B_DELWRI | B_NEEDCOMMIT)) {
                bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK);
index 9f83ded..b594c1b 100644 (file)
@@ -3190,7 +3190,7 @@ nfs_strategy(struct vop_strategy_args *ap)
 
        KASSERT(bp->b_cmd != BUF_CMD_DONE,
                ("nfs_strategy: buffer %p unexpectedly marked done", bp));
-       KASSERT(BUF_REFCNT(bp) > 0,
+       KASSERT(BUF_LOCKINUSE(bp),
                ("nfs_strategy: buffer %p not locked", bp));
 
        if (bio->bio_flags & BIO_SYNC)
index 924aae1..e6c8225 100644 (file)
@@ -503,7 +503,7 @@ tmpfs_free_vp(struct vnode *vp)
        node = VP_TO_TMPFS_NODE(vp);
 
        TMPFS_NODE_LOCK(node);
-       KKASSERT(lockcount(TMPFS_NODE_MTX(node)) > 0);
+       KKASSERT(lockinuse(TMPFS_NODE_MTX(node)));
        node->tn_vnode = NULL;
        vp->v_data = NULL;
        TMPFS_NODE_UNLOCK(node);
index 6377ecd..add00ee 100644 (file)
@@ -248,7 +248,7 @@ free_lock(struct lock *lkp)
 static int
 lock_held(struct lock *lkp) 
 {
-       return lockcountnb(lkp);
+       return lockinuse(lkp);
 }
 #endif
 
@@ -360,7 +360,7 @@ static      void workitem_free(struct worklist *, int);
 static void
 worklist_insert(struct workhead *head, struct worklist *item)
 {
-       KKASSERT(lock_held(&lk) > 0);
+       KKASSERT(lock_held(&lk));
 
        if (item->wk_state & ONWORKLIST) {
                panic("worklist_insert: already on list");
@@ -602,7 +602,7 @@ process_worklist_item(struct mount *matchmnt, int flags)
        struct vnode *vp;
        int matchcnt = 0;
 
-       KKASSERT(lock_held(&lk) > 0);
+       KKASSERT(lock_held(&lk));
 
        matchfs = NULL;
        if (matchmnt != NULL)
@@ -839,7 +839,7 @@ pagedep_lookup(struct inode *ip, ufs_lbn_t lbn, int flags,
        struct mount *mp;
        int i;
 
-       KKASSERT(lock_held(&lk) > 0);
+       KKASSERT(lock_held(&lk));
        
        mp = ITOV(ip)->v_mount;
        pagedephd = PAGEDEP_HASH(mp, ip->i_number, lbn);
@@ -916,7 +916,7 @@ inodedep_lookup(struct fs *fs, ino_t inum, int flags,
        struct inodedep *inodedep;
        struct inodedep_hashhead *inodedephd;
 
-       KKASSERT(lock_held(&lk) > 0);
+       KKASSERT(lock_held(&lk));
 
        inodedephd = INODEDEP_HASH(fs, inum);
 top:
@@ -1208,7 +1208,7 @@ bmsafemap_lookup(struct buf *bp)
        struct bmsafemap *bmsafemap;
        struct worklist *wk;
 
-       KKASSERT(lock_held(&lk) > 0);
+       KKASSERT(lock_held(&lk));
 
        LIST_FOREACH(wk, &bp->b_dep, wk_list) {
                if (wk->wk_type == D_BMSAFEMAP)
@@ -1383,7 +1383,7 @@ allocdirect_merge(struct allocdirectlst *adphead,
 {
        struct freefrag *freefrag;
 
-       KKASSERT(lock_held(&lk) > 0);
+       KKASSERT(lock_held(&lk));
 
        if (newadp->ad_oldblkno != oldadp->ad_newblkno ||
            newadp->ad_oldsize != oldadp->ad_newsize ||
@@ -1990,7 +1990,7 @@ static void
 free_allocdirect(struct allocdirectlst *adphead,
                 struct allocdirect *adp, int delay)
 {
-       KKASSERT(lock_held(&lk) > 0);
+       KKASSERT(lock_held(&lk));
 
        if ((adp->ad_state & DEPCOMPLETE) == 0)
                LIST_REMOVE(adp, ad_deps);
@@ -2274,7 +2274,7 @@ free_allocindir(struct allocindir *aip, struct inodedep *inodedep)
 {
        struct freefrag *freefrag;
 
-       KKASSERT(lock_held(&lk) > 0);
+       KKASSERT(lock_held(&lk));
 
        if ((aip->ai_state & DEPCOMPLETE) == 0)
                LIST_REMOVE(aip, ai_deps);
@@ -2478,7 +2478,7 @@ free_diradd(struct diradd *dap)
        struct inodedep *inodedep;
        struct mkdir *mkdir, *nextmd;
 
-       KKASSERT(lock_held(&lk) > 0);
+       KKASSERT(lock_held(&lk));
 
        WORKLIST_REMOVE(&dap->da_list);
        LIST_REMOVE(dap, da_pdlist);
@@ -4708,7 +4708,7 @@ request_cleanup(int resource)
 {
        struct thread *td = curthread;          /* XXX */
 
-       KKASSERT(lock_held(&lk) > 0);
+       KKASSERT(lock_held(&lk));
 
        /*
         * We never hold up the filesystem syncer process.
@@ -5010,7 +5010,7 @@ getdirtybuf(struct buf **bpp, int waitfor)
        /*
         * Try to obtain the buffer lock without deadlocking on &lk.
         */
-       KKASSERT(lock_held(&lk) > 0);
+       KKASSERT(lock_held(&lk));
        error = BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT);
        if (error == 0) {
                /*