2 * Copyright (c) 2009 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * Implement fast persistent locks based on atomic_cmpset_int() with
36 * semantics similar to lockmgr locks but faster and taking up much less
37 * space. Taken from HAMMER's lock implementation.
39 * These are meant to complement our LWKT tokens. Tokens are only held
40 * while the thread is running. Mutexes can be held across blocking
43 * Most of the support is in sys/mutex[2].h. We mostly provide backoff
47 #include <sys/param.h>
48 #include <sys/systm.h>
49 #include <sys/kernel.h>
50 #include <sys/sysctl.h>
51 #include <sys/thread.h>
52 #include <sys/mutex.h>
54 #include <machine/cpufunc.h>
56 #include <sys/thread2.h>
57 #include <sys/mutex2.h>
59 static __int64_t mtx_contention_count;
60 static __int64_t mtx_collision_count;
61 static __int64_t mtx_wakeup_count;
63 SYSCTL_QUAD(_kern, OID_AUTO, mtx_contention_count, CTLFLAG_RW,
64 &mtx_contention_count, 0, "");
65 SYSCTL_QUAD(_kern, OID_AUTO, mtx_collision_count, CTLFLAG_RW,
66 &mtx_collision_count, 0, "");
67 SYSCTL_QUAD(_kern, OID_AUTO, mtx_wakeup_count, CTLFLAG_RW,
68 &mtx_wakeup_count, 0, "");
70 static void mtx_chain_link(mtx_t mtx);
71 static void mtx_delete_link(mtx_t mtx, mtx_link_t link);
74 * Exclusive-lock a mutex, block until acquired. Recursion is allowed.
76 * Returns 0 on success, or the tsleep() return code on failure.
77 * An error can only be returned if PCATCH is specified in the flags.
80 __mtx_lock_ex(mtx_t mtx, mtx_link_t link, const char *ident, int flags, int to)
89 nlock = MTX_EXCLUSIVE | 1;
90 if (atomic_cmpset_int(&mtx->mtx_lock, 0, nlock)) {
91 mtx->mtx_owner = curthread;
95 } else if ((lock & MTX_EXCLUSIVE) &&
96 mtx->mtx_owner == curthread) {
97 KKASSERT((lock & MTX_MASK) != MTX_MASK);
99 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) {
105 * Clearing MTX_EXLINK in lock causes us to loop until
106 * MTX_EXLINK is available. However, to avoid
107 * unnecessary cpu cache traffic we poll instead.
109 * Setting MTX_EXLINK in nlock causes us to loop until
110 * we can acquire MTX_EXLINK.
112 * Also set MTX_EXWANTED coincident with EXLINK, if
117 if (lock & MTX_EXLINK) {
119 ++mtx_collision_count;
123 /*lock &= ~MTX_EXLINK;*/
124 nlock = lock | MTX_EXWANTED | MTX_EXLINK;
126 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) {
128 * Check for early abort
130 if (link->state == MTX_LINK_ABORTED) {
131 atomic_clear_int(&mtx->mtx_lock,
135 if (mtx->mtx_link == NULL) {
136 atomic_clear_int(&mtx->mtx_lock,
143 * Success. Link in our structure then
144 * release EXLINK and sleep.
147 link->state = MTX_LINK_LINKED;
149 link->next = mtx->mtx_link;
150 link->prev = link->next->prev;
151 link->next->prev = link;
152 link->prev->next = link;
156 mtx->mtx_link = link;
158 tsleep_interlock(link, 0);
159 atomic_clear_int(&mtx->mtx_lock, MTX_EXLINK);
162 error = tsleep(link, flags, ident, to);
163 ++mtx_contention_count;
166 * Normal unlink, we should own the exclusive
169 if (link->state == MTX_LINK_LINKED)
170 mtx_delete_link(mtx, link);
171 if (link->state == MTX_LINK_ACQUIRED) {
172 KKASSERT(mtx->mtx_owner == link->owner);
178 * Aborted lock (mtx_abort_ex called).
180 if (link->state == MTX_LINK_ABORTED) {
186 * tsleep error, else retry.
194 ++mtx_collision_count;
200 _mtx_lock_ex_link(mtx_t mtx, mtx_link_t link,
201 const char *ident, int flags, int to)
203 return(__mtx_lock_ex(mtx, link, ident, flags, to));
207 _mtx_lock_ex(mtx_t mtx, const char *ident, int flags, int to)
209 struct mtx_link link;
211 mtx_link_init(&link);
212 return(__mtx_lock_ex(mtx, &link, ident, flags, to));
216 _mtx_lock_ex_quick(mtx_t mtx, const char *ident)
218 struct mtx_link link;
220 mtx_link_init(&link);
221 return(__mtx_lock_ex(mtx, &link, ident, 0, 0));
225 * Share-lock a mutex, block until acquired. Recursion is allowed.
227 * Returns 0 on success, or the tsleep() return code on failure.
228 * An error can only be returned if PCATCH is specified in the flags.
230 * NOTE: Shared locks get a mass-wakeup so if the tsleep fails we
231 * do not have to chain the wakeup().
234 __mtx_lock_sh(mtx_t mtx, const char *ident, int flags, int to)
241 lock = mtx->mtx_lock;
242 if ((lock & MTX_EXCLUSIVE) == 0) {
243 KKASSERT((lock & MTX_MASK) != MTX_MASK);
245 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) {
250 nlock = lock | MTX_SHWANTED;
251 tsleep_interlock(mtx, 0);
252 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) {
253 error = tsleep(mtx, flags, ident, to);
256 ++mtx_contention_count;
259 tsleep_remove(curthread);
262 ++mtx_collision_count;
268 _mtx_lock_sh(mtx_t mtx, const char *ident, int flags, int to)
270 return (__mtx_lock_sh(mtx, ident, flags, to));
274 _mtx_lock_sh_quick(mtx_t mtx, const char *ident)
276 return (__mtx_lock_sh(mtx, ident, 0, 0));
280 _mtx_spinlock_ex(mtx_t mtx)
288 lock = mtx->mtx_lock;
290 nlock = MTX_EXCLUSIVE | 1;
291 if (atomic_cmpset_int(&mtx->mtx_lock, 0, nlock)) {
292 mtx->mtx_owner = curthread;
295 } else if ((lock & MTX_EXCLUSIVE) &&
296 mtx->mtx_owner == curthread) {
297 KKASSERT((lock & MTX_MASK) != MTX_MASK);
299 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock))
306 for (bo = 0; bo < bb; ++bo)
308 ++mtx_contention_count;
311 ++mtx_collision_count;
316 _mtx_spinlock_sh(mtx_t mtx)
324 lock = mtx->mtx_lock;
325 if ((lock & MTX_EXCLUSIVE) == 0) {
326 KKASSERT((lock & MTX_MASK) != MTX_MASK);
328 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock))
335 for (bo = 0; bo < bb; ++bo)
337 ++mtx_contention_count;
340 ++mtx_collision_count;
345 _mtx_lock_ex_try(mtx_t mtx)
352 lock = mtx->mtx_lock;
354 nlock = MTX_EXCLUSIVE | 1;
355 if (atomic_cmpset_int(&mtx->mtx_lock, 0, nlock)) {
356 mtx->mtx_owner = curthread;
359 } else if ((lock & MTX_EXCLUSIVE) &&
360 mtx->mtx_owner == curthread) {
361 KKASSERT((lock & MTX_MASK) != MTX_MASK);
363 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock))
370 ++mtx_collision_count;
376 _mtx_lock_sh_try(mtx_t mtx)
383 lock = mtx->mtx_lock;
384 if ((lock & MTX_EXCLUSIVE) == 0) {
385 KKASSERT((lock & MTX_MASK) != MTX_MASK);
387 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock))
394 ++mtx_collision_count;
400 * If the lock is held exclusively it must be owned by the caller. If the
401 * lock is already a shared lock this operation is a NOP. A panic will
402 * occur if the lock is not held either shared or exclusive.
404 * The exclusive count is converted to a shared count.
407 _mtx_downgrade(mtx_t mtx)
413 lock = mtx->mtx_lock;
414 if ((lock & MTX_EXCLUSIVE) == 0) {
415 KKASSERT((lock & MTX_MASK) > 0);
418 KKASSERT(mtx->mtx_owner == curthread);
419 nlock = lock & ~(MTX_EXCLUSIVE | MTX_SHWANTED);
420 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) {
421 if (lock & MTX_SHWANTED) {
428 ++mtx_collision_count;
433 * Upgrade a shared lock to an exclusive lock. The upgrade will fail if
434 * the shared lock has a count other then 1. Optimize the most likely case
435 * but note that a single cmpset can fail due to WANTED races.
437 * If the lock is held exclusively it must be owned by the caller and
438 * this function will simply return without doing anything. A panic will
439 * occur if the lock is held exclusively by someone other then the caller.
441 * Returns 0 on success, EDEADLK on failure.
444 _mtx_upgrade_try(mtx_t mtx)
451 lock = mtx->mtx_lock;
453 if ((lock & ~MTX_EXWANTED) == 1) {
454 nlock = lock | MTX_EXCLUSIVE;
455 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) {
456 mtx->mtx_owner = curthread;
459 } else if (lock & MTX_EXCLUSIVE) {
460 KKASSERT(mtx->mtx_owner == curthread);
467 ++mtx_collision_count;
473 * Unlock a lock. The caller must hold the lock either shared or exclusive.
475 * Any release which makes the lock available when others want an exclusive
476 * lock causes us to chain the owner to the next exclusive lock instead of
477 * releasing the lock.
480 _mtx_unlock(mtx_t mtx)
486 lock = mtx->mtx_lock;
487 nlock = lock & ~(MTX_SHWANTED | MTX_EXLINK);
491 * Last release, shared lock, no exclusive waiters.
493 nlock = lock & MTX_EXLINK;
494 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock))
496 } else if (nlock == (MTX_EXCLUSIVE | 1)) {
498 * Last release, exclusive lock, no exclusive waiters.
499 * Wake up any shared waiters.
501 mtx->mtx_owner = NULL;
502 nlock = lock & MTX_EXLINK;
503 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) {
504 if (lock & MTX_SHWANTED) {
510 } else if (nlock == (MTX_EXWANTED | 1)) {
512 * Last release, shared lock, with exclusive
515 * Wait for EXLINK to clear, then acquire it.
516 * We could use the cmpset for this but polling
517 * is better on the cpu caches.
519 * Acquire an exclusive lock leaving the lockcount
520 * set to 1, and get EXLINK for access to mtx_link.
524 if (lock & MTX_EXLINK) {
526 ++mtx_collision_count;
530 /*lock &= ~MTX_EXLINK;*/
531 nlock |= MTX_EXLINK | MTX_EXCLUSIVE;
532 nlock |= (lock & MTX_SHWANTED);
534 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) {
540 } else if (nlock == (MTX_EXCLUSIVE | MTX_EXWANTED | 1)) {
542 * Last release, exclusive lock, with exclusive
545 * leave the exclusive lock intact and the lockcount
546 * set to 1, and get EXLINK for access to mtx_link.
550 if (lock & MTX_EXLINK) {
552 ++mtx_collision_count;
556 /*lock &= ~MTX_EXLINK;*/
558 nlock |= (lock & MTX_SHWANTED);
560 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) {
568 * Not the last release (shared or exclusive)
571 KKASSERT((nlock & MTX_MASK) != MTX_MASK);
572 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock))
576 ++mtx_collision_count;
581 * Chain mtx_chain_link. Called with the lock held exclusively with a
582 * single ref count, and also with MTX_EXLINK held.
585 mtx_chain_link(mtx_t mtx)
590 u_int clock; /* bits we own and want to clear */
593 * Chain the exclusive lock to the next link. The caller cleared
594 * SHWANTED so if there is no link we have to wake up any shared
598 if ((link = mtx->mtx_link) != NULL) {
599 KKASSERT(link->state == MTX_LINK_LINKED);
600 if (link->next == link) {
601 mtx->mtx_link = NULL;
602 clock |= MTX_EXWANTED;
604 mtx->mtx_link = link->next;
605 link->next->prev = link->prev;
606 link->prev->next = link->next;
608 link->state = MTX_LINK_ACQUIRED;
609 mtx->mtx_owner = link->owner;
612 * Chain was empty, release the exclusive lock's last count
613 * as well the bits shown.
615 clock |= MTX_EXCLUSIVE | MTX_EXWANTED | MTX_SHWANTED | 1;
619 * We have to uset cmpset here to deal with MTX_SHWANTED. If
620 * we just clear the bits we can miss a wakeup or, worse,
621 * leave mtx_lock unlocked with MTX_SHWANTED still set.
624 lock = mtx->mtx_lock;
625 nlock = lock & ~clock;
627 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) {
630 * Wakeup new exclusive holder. Leave
634 } else if (lock & MTX_SHWANTED) {
636 * Signal any shared waiters (and we also
639 mtx->mtx_owner = NULL;
646 ++mtx_collision_count;
651 * Delete a link structure after tsleep has failed. This code is not
652 * in the critical path as most exclusive waits are chained.
656 mtx_delete_link(mtx_t mtx, mtx_link_t link)
658 thread_t td = curthread;
663 * Acquire MTX_EXLINK.
665 * Do not use cmpxchg to wait for EXLINK to clear as this might
666 * result in too much cpu cache traffic.
670 lock = mtx->mtx_lock;
671 if (lock & MTX_EXLINK) {
673 ++mtx_collision_count;
676 /* lock &= ~MTX_EXLINK; */
677 nlock = lock | MTX_EXLINK;
678 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock))
681 ++mtx_collision_count;
685 * Delete the link and release EXLINK.
687 if (link->state == MTX_LINK_LINKED) {
688 if (link->next == link) {
689 mtx->mtx_link = NULL;
691 mtx->mtx_link = link->next;
692 link->next->prev = link->prev;
693 link->prev->next = link->next;
695 link->state = MTX_LINK_IDLE;
697 atomic_clear_int(&mtx->mtx_lock, MTX_EXLINK);
702 * Abort a mutex locking operation, causing mtx_lock_ex_link() to
703 * return ENOLCK. This may be called at any time after the
704 * mtx_link is initialized, including both before and after the call
705 * to mtx_lock_ex_link().
708 mtx_abort_ex_link(mtx_t mtx, mtx_link_t link)
710 thread_t td = curthread;
719 lock = mtx->mtx_lock;
720 if (lock & MTX_EXLINK) {
722 ++mtx_collision_count;
725 /* lock &= ~MTX_EXLINK; */
726 nlock = lock | MTX_EXLINK;
727 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock))
730 ++mtx_collision_count;
736 switch(link->state) {
739 * Link not started yet
741 link->state = MTX_LINK_ABORTED;
743 case MTX_LINK_LINKED:
745 * de-link, mark aborted, and wakeup the thread.
747 if (link->next == link) {
748 mtx->mtx_link = NULL;
750 mtx->mtx_link = link->next;
751 link->next->prev = link->prev;
752 link->prev->next = link->next;
754 link->state = MTX_LINK_ABORTED;
757 case MTX_LINK_ACQUIRED:
759 * Too late, the lock was acquired. Let it complete.
764 * link already aborted, do nothing.
768 atomic_clear_int(&mtx->mtx_lock, MTX_EXLINK);