2 * Copyright (c) 2003,2004,2009 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * lwkt_token - Implement soft token locks.
38 * Tokens are locks which serialize a thread only while the thread is
39 * running. If the thread blocks all tokens are released, then reacquired
40 * when the thread resumes.
42 * This implementation requires no critical sections or spin locks, but
43 * does use atomic_cmpset_ptr().
45 * Tokens may be recursively acquired by the same thread. However the
46 * caller must be sure to release such tokens in reverse order.
48 #include <sys/param.h>
49 #include <sys/systm.h>
50 #include <sys/kernel.h>
52 #include <sys/rtprio.h>
53 #include <sys/queue.h>
54 #include <sys/sysctl.h>
56 #include <sys/kthread.h>
57 #include <machine/cpu.h>
60 #include <sys/spinlock.h>
62 #include <sys/thread2.h>
63 #include <sys/spinlock2.h>
64 #include <sys/mplock2.h>
67 #include <vm/vm_param.h>
68 #include <vm/vm_kern.h>
69 #include <vm/vm_object.h>
70 #include <vm/vm_page.h>
71 #include <vm/vm_map.h>
72 #include <vm/vm_pager.h>
73 #include <vm/vm_extern.h>
74 #include <vm/vm_zone.h>
76 #include <machine/stdarg.h>
77 #include <machine/smp.h>
79 extern int lwkt_sched_debug;
81 #ifndef LWKT_NUM_POOL_TOKENS
82 #define LWKT_NUM_POOL_TOKENS 4001 /* prime number */
85 static lwkt_token pool_tokens[LWKT_NUM_POOL_TOKENS];
87 #define TOKEN_STRING "REF=%p TOK=%p TD=%p"
88 #define CONTENDED_STRING "REF=%p TOK=%p TD=%p (contention started)"
89 #define UNCONTENDED_STRING "REF=%p TOK=%p TD=%p (contention stopped)"
90 #if !defined(KTR_TOKENS)
91 #define KTR_TOKENS KTR_ALL
94 KTR_INFO_MASTER(tokens);
95 KTR_INFO(KTR_TOKENS, tokens, fail, 0, TOKEN_STRING, sizeof(void *) * 3);
96 KTR_INFO(KTR_TOKENS, tokens, succ, 1, TOKEN_STRING, sizeof(void *) * 3);
98 KTR_INFO(KTR_TOKENS, tokens, release, 2, TOKEN_STRING, sizeof(void *) * 3);
99 KTR_INFO(KTR_TOKENS, tokens, remote, 3, TOKEN_STRING, sizeof(void *) * 3);
100 KTR_INFO(KTR_TOKENS, tokens, reqremote, 4, TOKEN_STRING, sizeof(void *) * 3);
101 KTR_INFO(KTR_TOKENS, tokens, reqfail, 5, TOKEN_STRING, sizeof(void *) * 3);
102 KTR_INFO(KTR_TOKENS, tokens, drain, 6, TOKEN_STRING, sizeof(void *) * 3);
103 KTR_INFO(KTR_TOKENS, tokens, contention_start, 7, CONTENDED_STRING, sizeof(void *) * 3);
104 KTR_INFO(KTR_TOKENS, tokens, contention_stop, 7, UNCONTENDED_STRING, sizeof(void *) * 3);
107 #define logtoken(name, ref) \
108 KTR_LOG(tokens_ ## name, ref, ref->tr_tok, curthread)
111 * Global tokens. These replace the MP lock for major subsystem locking.
112 * These tokens are initially used to lockup both global and individual
115 * Once individual structures get their own locks these tokens are used
116 * only to protect global lists & other variables and to interlock
117 * allocations and teardowns and such.
119 * The UP initializer causes token acquisition to also acquire the MP lock
120 * for maximum compatibility. The feature may be enabled and disabled at
121 * any time, the MP state is copied to the tokref when the token is acquired
122 * and will not race against sysctl changes.
124 struct lwkt_token mp_token = LWKT_TOKEN_INITIALIZER(mp_token);
125 struct lwkt_token pmap_token = LWKT_TOKEN_INITIALIZER(pmap_token);
126 struct lwkt_token dev_token = LWKT_TOKEN_INITIALIZER(dev_token);
127 struct lwkt_token vm_token = LWKT_TOKEN_INITIALIZER(vm_token);
128 struct lwkt_token vmspace_token = LWKT_TOKEN_INITIALIZER(vmspace_token);
129 struct lwkt_token kvm_token = LWKT_TOKEN_INITIALIZER(kvm_token);
130 struct lwkt_token proc_token = LWKT_TOKEN_INITIALIZER(proc_token);
131 struct lwkt_token tty_token = LWKT_TOKEN_INITIALIZER(tty_token);
132 struct lwkt_token vnode_token = LWKT_TOKEN_INITIALIZER(vnode_token);
133 struct lwkt_token vmobj_token = LWKT_TOKEN_INITIALIZER(vmobj_token);
135 static int lwkt_token_spin = 5;
136 SYSCTL_INT(_lwkt, OID_AUTO, token_spin, CTLFLAG_RW,
137 &lwkt_token_spin, 0, "Decontention spin loops");
138 static int lwkt_token_delay = 0;
139 SYSCTL_INT(_lwkt, OID_AUTO, token_delay, CTLFLAG_RW,
140 &lwkt_token_delay, 0, "Decontention spin delay in ns");
143 * The collision count is bumped every time the LWKT scheduler fails
144 * to acquire needed tokens in addition to a normal lwkt_gettoken()
147 SYSCTL_LONG(_lwkt, OID_AUTO, mp_collisions, CTLFLAG_RW,
148 &mp_token.t_collisions, 0, "Collision counter of mp_token");
149 SYSCTL_LONG(_lwkt, OID_AUTO, pmap_collisions, CTLFLAG_RW,
150 &pmap_token.t_collisions, 0, "Collision counter of pmap_token");
151 SYSCTL_LONG(_lwkt, OID_AUTO, dev_collisions, CTLFLAG_RW,
152 &dev_token.t_collisions, 0, "Collision counter of dev_token");
153 SYSCTL_LONG(_lwkt, OID_AUTO, vm_collisions, CTLFLAG_RW,
154 &vm_token.t_collisions, 0, "Collision counter of vm_token");
155 SYSCTL_LONG(_lwkt, OID_AUTO, vmspace_collisions, CTLFLAG_RW,
156 &vmspace_token.t_collisions, 0, "Collision counter of vmspace_token");
157 SYSCTL_LONG(_lwkt, OID_AUTO, kvm_collisions, CTLFLAG_RW,
158 &kvm_token.t_collisions, 0, "Collision counter of kvm_token");
159 SYSCTL_LONG(_lwkt, OID_AUTO, proc_collisions, CTLFLAG_RW,
160 &proc_token.t_collisions, 0, "Collision counter of proc_token");
161 SYSCTL_LONG(_lwkt, OID_AUTO, tty_collisions, CTLFLAG_RW,
162 &tty_token.t_collisions, 0, "Collision counter of tty_token");
163 SYSCTL_LONG(_lwkt, OID_AUTO, vnode_collisions, CTLFLAG_RW,
164 &vnode_token.t_collisions, 0, "Collision counter of vnode_token");
166 #ifdef DEBUG_LOCKS_LATENCY
168 static long tokens_add_latency;
169 SYSCTL_LONG(_debug, OID_AUTO, tokens_add_latency, CTLFLAG_RW,
170 &tokens_add_latency, 0,
171 "Add spinlock latency");
176 static int _lwkt_getalltokens_sorted(thread_t td);
180 * Acquire the initial mplock
182 * (low level boot only)
185 cpu_get_initial_mplock(void)
187 KKASSERT(mp_token.t_ref == NULL);
188 if (lwkt_trytoken(&mp_token) == FALSE)
189 panic("cpu_get_initial_mplock");
194 * Return a pool token given an address. Use a prime number to reduce
199 _lwkt_token_pool_lookup(void *ptr)
203 i = (u_int)(uintptr_t)ptr % LWKT_NUM_POOL_TOKENS;
204 return(&pool_tokens[i]);
208 * Initialize a tokref_t prior to making it visible in the thread's
213 _lwkt_tokref_init(lwkt_tokref_t ref, lwkt_token_t tok, thread_t td, long excl)
216 ref->tr_count = excl;
221 * Attempt to acquire a shared or exclusive token. Returns TRUE on success,
224 * If TOK_EXCLUSIVE is set in mode we are attempting to get an exclusive
225 * token, otherwise are attempting to get a shared token.
227 * If TOK_EXCLREQ is set in mode this is a blocking operation, otherwise
228 * it is a non-blocking operation (for both exclusive or shared acquisions).
232 _lwkt_trytokref(lwkt_tokref_t ref, thread_t td, long mode)
239 KASSERT(((mode & TOK_EXCLREQ) == 0 || /* non blocking */
240 td->td_gd->gd_intr_nesting_level == 0 ||
241 panic_cpu_gd == mycpu),
242 ("Attempt to acquire token %p not already "
243 "held in hard code section", tok));
245 if (mode & TOK_EXCLUSIVE) {
247 * Attempt to get an exclusive token
250 count = tok->t_count;
251 oref = tok->t_ref; /* can be NULL */
253 if ((count & ~TOK_EXCLREQ) == 0) {
255 * It is possible to get the exclusive bit.
256 * We must clear TOK_EXCLREQ on successful
259 if (atomic_cmpset_long(&tok->t_count, count,
260 (count & ~TOK_EXCLREQ) |
262 KKASSERT(tok->t_ref == NULL);
267 } else if ((count & TOK_EXCLUSIVE) &&
268 oref >= &td->td_toks_base &&
269 oref < td->td_toks_stop) {
271 * Our thread already holds the exclusive
272 * bit, we treat this tokref as a shared
273 * token (sorta) to make the token release
276 * NOTE: oref cannot race above if it
277 * happens to be ours, so we're good.
278 * But we must still have a stable
279 * variable for both parts of the
282 * NOTE: Since we already have an exclusive
283 * lock and don't need to check EXCLREQ
284 * we can just use an atomic_add here
286 atomic_add_long(&tok->t_count, TOK_INCR);
287 ref->tr_count &= ~TOK_EXCLUSIVE;
289 } else if ((mode & TOK_EXCLREQ) &&
290 (count & TOK_EXCLREQ) == 0) {
292 * Unable to get the exclusive bit but being
293 * asked to set the exclusive-request bit.
294 * Since we are going to retry anyway just
295 * set the bit unconditionally.
297 atomic_set_long(&tok->t_count, TOK_EXCLREQ);
301 * Unable to get the exclusive bit and not
302 * being asked to set the exclusive-request
303 * (aka lwkt_trytoken()), or EXCLREQ was
313 * Attempt to get a shared token. Note that TOK_EXCLREQ
314 * for shared tokens simply means the caller intends to
315 * block. We never actually set the bit in tok->t_count.
318 count = tok->t_count;
319 oref = tok->t_ref; /* can be NULL */
321 if ((count & (TOK_EXCLUSIVE/*|TOK_EXCLREQ*/)) == 0) {
322 /* XXX EXCLREQ should work */
324 * It is possible to get the token shared.
326 if (atomic_cmpset_long(&tok->t_count, count,
331 } else if ((count & TOK_EXCLUSIVE) &&
332 oref >= &td->td_toks_base &&
333 oref < td->td_toks_stop) {
335 * We own the exclusive bit on the token so
336 * we can in fact also get it shared.
338 atomic_add_long(&tok->t_count, TOK_INCR);
342 * We failed to get the token shared
353 _lwkt_trytokref_spin(lwkt_tokref_t ref, thread_t td, long mode)
357 if (_lwkt_trytokref(ref, td, mode)) {
358 #ifdef DEBUG_LOCKS_LATENCY
360 for (j = tokens_add_latency; j > 0; --j)
365 for (spin = lwkt_token_spin; spin > 0; --spin) {
366 if (lwkt_token_delay)
367 tsc_delay(lwkt_token_delay);
370 if (_lwkt_trytokref(ref, td, mode)) {
371 #ifdef DEBUG_LOCKS_LATENCY
373 for (j = tokens_add_latency; j > 0; --j)
383 * Release a token that we hold.
387 _lwkt_reltokref(lwkt_tokref_t ref, thread_t td)
394 count = tok->t_count;
396 if (tok->t_ref == ref) {
398 * We are an exclusive holder. We must clear tr_ref
399 * before we clear the TOK_EXCLUSIVE bit. If we are
400 * unable to clear the bit we must restore
403 KKASSERT(count & TOK_EXCLUSIVE);
405 if (atomic_cmpset_long(&tok->t_count, count,
406 count & ~TOK_EXCLUSIVE)) {
413 * We are a shared holder
415 KKASSERT(count & TOK_COUNTMASK);
416 if (atomic_cmpset_long(&tok->t_count, count,
427 * Obtain all the tokens required by the specified thread on the current
428 * cpu, return 0 on failure and non-zero on success. If a failure occurs
429 * any partially acquired tokens will be released prior to return.
431 * lwkt_getalltokens is called by the LWKT scheduler to re-acquire all
432 * tokens that the thread had to release when it switched away.
434 * If spinning is non-zero this function acquires the tokens in a particular
435 * order to deal with potential deadlocks. We simply use address order for
438 * Called from a critical section.
441 lwkt_getalltokens(thread_t td, int spinning)
447 return(_lwkt_getalltokens_sorted(td));
450 * Acquire tokens in forward order, assign or validate tok->t_ref.
452 for (scan = &td->td_toks_base; scan < td->td_toks_stop; ++scan) {
456 * Only try really hard on the last token
458 if (scan == td->td_toks_stop - 1) {
459 if (_lwkt_trytokref_spin(scan, td, scan->tr_count))
462 if (_lwkt_trytokref(scan, td, scan->tr_count))
467 * Otherwise we failed to acquire all the tokens.
468 * Release whatever we did get.
470 if (lwkt_sched_debug > 0) {
472 kprintf("toka %p %s %s\n",
473 tok, tok->t_desc, td->td_comm);
475 td->td_wmesg = tok->t_desc;
477 while (--scan >= &td->td_toks_base)
478 _lwkt_reltokref(scan, td);
486 * Release all tokens owned by the specified thread on the current cpu.
488 * This code is really simple. Even in cases where we own all the tokens
489 * note that t_ref may not match the scan for recursively held tokens which
490 * are held deeper in the stack, or for the case where a lwkt_getalltokens()
493 * Tokens are released in reverse order to reduce chasing race failures.
495 * Called from a critical section.
498 lwkt_relalltokens(thread_t td)
503 * Weird order is to try to avoid a panic loop
505 if (td->td_toks_have) {
506 scan = td->td_toks_have;
507 td->td_toks_have = NULL;
509 scan = td->td_toks_stop;
511 while (--scan >= &td->td_toks_base)
512 _lwkt_reltokref(scan, td);
516 * This is the decontention version of lwkt_getalltokens(). The tokens are
517 * acquired in address-sorted order to deal with any deadlocks. Ultimately
518 * token failures will spin into the scheduler and get here.
520 * Called from critical section
524 _lwkt_getalltokens_sorted(thread_t td)
526 lwkt_tokref_t sort_array[LWKT_MAXTOKENS];
534 * Sort the token array. Yah yah, I know this isn't fun.
536 * NOTE: Recursively acquired tokens are ordered the same as in the
537 * td_toks_array so we can always get the earliest one first.
540 scan = &td->td_toks_base;
541 while (scan < td->td_toks_stop) {
542 for (j = 0; j < i; ++j) {
543 if (scan->tr_tok < sort_array[j]->tr_tok)
547 bcopy(sort_array + j, sort_array + j + 1,
548 (i - j) * sizeof(lwkt_tokref_t));
550 sort_array[j] = scan;
557 * Acquire tokens in forward order, assign or validate tok->t_ref.
559 for (i = 0; i < n; ++i) {
560 scan = sort_array[i];
564 * Only try really hard on the last token
566 if (scan == td->td_toks_stop - 1) {
567 if (_lwkt_trytokref_spin(scan, td, scan->tr_count))
570 if (_lwkt_trytokref(scan, td, scan->tr_count))
575 * Otherwise we failed to acquire all the tokens.
576 * Release whatever we did get.
578 if (lwkt_sched_debug > 0) {
580 kprintf("tokb %p %s %s\n",
581 tok, tok->t_desc, td->td_comm);
583 td->td_wmesg = tok->t_desc;
586 scan = sort_array[i];
587 _lwkt_reltokref(scan, td);
594 * We were successful, there is no need for another core to signal
601 * Get a serializing token. This routine can block.
604 lwkt_gettoken(lwkt_token_t tok)
606 thread_t td = curthread;
609 ref = td->td_toks_stop;
610 KKASSERT(ref < &td->td_toks_end);
613 _lwkt_tokref_init(ref, tok, td, TOK_EXCLUSIVE|TOK_EXCLREQ);
617 * Taking an exclusive token after holding it shared will
618 * livelock. Scan for that case and assert.
622 for (tk = &td->td_toks_base; tk < ref; tk++) {
623 if (tk->tr_tok != tok)
627 if (tk->tr_count & TOK_EXCLUSIVE)
630 /* We found only shared instances of this token if found >0 here */
631 KASSERT((found == 0), ("Token %p s/x livelock", tok));
635 if (_lwkt_trytokref_spin(ref, td, TOK_EXCLUSIVE|TOK_EXCLREQ))
639 * Give up running if we can't acquire the token right now.
641 * Since the tokref is already active the scheduler now
642 * takes care of acquisition, so we need only call
645 * Since we failed this was not a recursive token so upon
646 * return tr_tok->t_ref should be assigned to this specific
649 td->td_wmesg = tok->t_desc;
652 td->td_toks_have = td->td_toks_stop - 1;
655 KKASSERT(tok->t_ref == ref);
659 * Similar to gettoken but we acquire a shared token instead of an exclusive
663 lwkt_gettoken_shared(lwkt_token_t tok)
665 thread_t td = curthread;
668 ref = td->td_toks_stop;
669 KKASSERT(ref < &td->td_toks_end);
672 _lwkt_tokref_init(ref, tok, td, TOK_EXCLREQ);
676 * Taking a pool token in shared mode is a bad idea; other
677 * addresses deeper in the call stack may hash to the same pool
678 * token and you may end up with an exclusive-shared livelock.
679 * Warn in this condition.
681 if ((tok >= &pool_tokens[0]) &&
682 (tok < &pool_tokens[LWKT_NUM_POOL_TOKENS]))
683 kprintf("Warning! Taking pool token %p in shared mode\n", tok);
687 if (_lwkt_trytokref_spin(ref, td, TOK_EXCLREQ))
691 * Give up running if we can't acquire the token right now.
693 * Since the tokref is already active the scheduler now
694 * takes care of acquisition, so we need only call
697 * Since we failed this was not a recursive token so upon
698 * return tr_tok->t_ref should be assigned to this specific
701 td->td_wmesg = tok->t_desc;
704 td->td_toks_have = td->td_toks_stop - 1;
710 * Attempt to acquire a token, return TRUE on success, FALSE on failure.
712 * We setup the tokref in case we actually get the token (if we switch later
713 * it becomes mandatory so we set TOK_EXCLREQ), but we call trytokref without
714 * TOK_EXCLREQ in case we fail.
717 lwkt_trytoken(lwkt_token_t tok)
719 thread_t td = curthread;
722 ref = td->td_toks_stop;
723 KKASSERT(ref < &td->td_toks_end);
726 _lwkt_tokref_init(ref, tok, td, TOK_EXCLUSIVE|TOK_EXCLREQ);
728 if (_lwkt_trytokref(ref, td, TOK_EXCLUSIVE))
732 * Failed, unpend the request
742 lwkt_gettoken_hard(lwkt_token_t tok)
749 lwkt_getpooltoken(void *ptr)
753 tok = _lwkt_token_pool_lookup(ptr);
759 * Release a serializing token.
761 * WARNING! All tokens must be released in reverse order. This will be
765 lwkt_reltoken(lwkt_token_t tok)
767 thread_t td = curthread;
771 * Remove ref from thread token list and assert that it matches
772 * the token passed in. Tokens must be released in reverse order.
774 ref = td->td_toks_stop - 1;
775 KKASSERT(ref >= &td->td_toks_base && ref->tr_tok == tok);
776 _lwkt_reltokref(ref, td);
778 td->td_toks_stop = ref;
782 lwkt_reltoken_hard(lwkt_token_t tok)
789 * It is faster for users of lwkt_getpooltoken() to use the returned
790 * token and just call lwkt_reltoken(), but for convenience we provide
791 * this function which looks the token up based on the ident.
794 lwkt_relpooltoken(void *ptr)
796 lwkt_token_t tok = _lwkt_token_pool_lookup(ptr);
801 * Return a count of the number of token refs the thread has to the
802 * specified token, whether it currently owns the token or not.
805 lwkt_cnttoken(lwkt_token_t tok, thread_t td)
810 for (scan = &td->td_toks_base; scan < td->td_toks_stop; ++scan) {
811 if (scan->tr_tok == tok)
818 * Pool tokens are used to provide a type-stable serializing token
819 * pointer that does not race against disappearing data structures.
821 * This routine is called in early boot just after we setup the BSP's
822 * globaldata structure.
825 lwkt_token_pool_init(void)
829 for (i = 0; i < LWKT_NUM_POOL_TOKENS; ++i)
830 lwkt_token_init(&pool_tokens[i], "pool");
834 lwkt_token_pool_lookup(void *ptr)
836 return (_lwkt_token_pool_lookup(ptr));
840 * Initialize a token.
843 lwkt_token_init(lwkt_token_t tok, const char *desc)
847 tok->t_collisions = 0;
852 lwkt_token_uninit(lwkt_token_t tok)
858 * Exchange the two most recent tokens on the tokref stack. This allows
859 * you to release a token out of order.
861 * We have to be careful about the case where the top two tokens are
862 * the same token. In this case tok->t_ref will point to the deeper
863 * ref and must remain pointing to the deeper ref. If we were to swap
864 * it the first release would clear the token even though a second
865 * ref is still present.
867 * Only exclusively held tokens contain a reference to the tokref which
868 * has to be flipped along with the swap.
871 lwkt_token_swap(void)
873 lwkt_tokref_t ref1, ref2;
874 lwkt_token_t tok1, tok2;
876 thread_t td = curthread;
880 ref1 = td->td_toks_stop - 1;
881 ref2 = td->td_toks_stop - 2;
882 KKASSERT(ref1 >= &td->td_toks_base);
883 KKASSERT(ref2 >= &td->td_toks_base);
887 count1 = ref1->tr_count;
888 count2 = ref2->tr_count;
892 ref1->tr_count = count2;
894 ref2->tr_count = count1;
895 if (tok1->t_ref == ref1)
897 if (tok2->t_ref == ref2)