2 * Copyright (c) 2003,2004,2009 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * lwkt_token - Implement soft token locks.
38 * Tokens are locks which serialize a thread only while the thread is
39 * running. If the thread blocks all tokens are released, then reacquired
40 * when the thread resumes.
42 * This implementation requires no critical sections or spin locks, but
43 * does use atomic_cmpset_ptr().
45 * Tokens may be recursively acquired by the same thread. However the
46 * caller must be sure to release such tokens in reverse order.
48 #include <sys/param.h>
49 #include <sys/systm.h>
50 #include <sys/kernel.h>
52 #include <sys/rtprio.h>
53 #include <sys/queue.h>
54 #include <sys/sysctl.h>
56 #include <sys/kthread.h>
57 #include <machine/cpu.h>
60 #include <sys/spinlock.h>
62 #include <sys/thread2.h>
63 #include <sys/spinlock2.h>
64 #include <sys/mplock2.h>
67 #include <vm/vm_param.h>
68 #include <vm/vm_kern.h>
69 #include <vm/vm_object.h>
70 #include <vm/vm_page.h>
71 #include <vm/vm_map.h>
72 #include <vm/vm_pager.h>
73 #include <vm/vm_extern.h>
74 #include <vm/vm_zone.h>
76 #include <machine/stdarg.h>
77 #include <machine/smp.h>
79 #ifndef LWKT_NUM_POOL_TOKENS
80 #define LWKT_NUM_POOL_TOKENS 1024 /* power of 2 */
82 #define LWKT_MASK_POOL_TOKENS (LWKT_NUM_POOL_TOKENS - 1)
84 static lwkt_token pool_tokens[LWKT_NUM_POOL_TOKENS];
86 #define TOKEN_STRING "REF=%p TOK=%p TD=%p"
87 #define CONTENDED_STRING "REF=%p TOK=%p TD=%p (contention started)"
88 #define UNCONTENDED_STRING "REF=%p TOK=%p TD=%p (contention stopped)"
89 #if !defined(KTR_TOKENS)
90 #define KTR_TOKENS KTR_ALL
93 KTR_INFO_MASTER(tokens);
94 KTR_INFO(KTR_TOKENS, tokens, fail, 0, TOKEN_STRING, sizeof(void *) * 3);
95 KTR_INFO(KTR_TOKENS, tokens, succ, 1, TOKEN_STRING, sizeof(void *) * 3);
97 KTR_INFO(KTR_TOKENS, tokens, release, 2, TOKEN_STRING, sizeof(void *) * 3);
98 KTR_INFO(KTR_TOKENS, tokens, remote, 3, TOKEN_STRING, sizeof(void *) * 3);
99 KTR_INFO(KTR_TOKENS, tokens, reqremote, 4, TOKEN_STRING, sizeof(void *) * 3);
100 KTR_INFO(KTR_TOKENS, tokens, reqfail, 5, TOKEN_STRING, sizeof(void *) * 3);
101 KTR_INFO(KTR_TOKENS, tokens, drain, 6, TOKEN_STRING, sizeof(void *) * 3);
102 KTR_INFO(KTR_TOKENS, tokens, contention_start, 7, CONTENDED_STRING, sizeof(void *) * 3);
103 KTR_INFO(KTR_TOKENS, tokens, contention_stop, 7, UNCONTENDED_STRING, sizeof(void *) * 3);
106 #define logtoken(name, ref) \
107 KTR_LOG(tokens_ ## name, ref, ref->tr_tok, curthread)
110 * Global tokens. These replace the MP lock for major subsystem locking.
111 * These tokens are initially used to lockup both global and individual
114 * Once individual structures get their own locks these tokens are used
115 * only to protect global lists & other variables and to interlock
116 * allocations and teardowns and such.
118 * The UP initializer causes token acquisition to also acquire the MP lock
119 * for maximum compatibility. The feature may be enabled and disabled at
120 * any time, the MP state is copied to the tokref when the token is acquired
121 * and will not race against sysctl changes.
123 struct lwkt_token mp_token = LWKT_TOKEN_INITIALIZER(mp_token);
124 struct lwkt_token pmap_token = LWKT_TOKEN_INITIALIZER(pmap_token);
125 struct lwkt_token dev_token = LWKT_TOKEN_INITIALIZER(dev_token);
126 struct lwkt_token vm_token = LWKT_TOKEN_INITIALIZER(vm_token);
127 struct lwkt_token vmspace_token = LWKT_TOKEN_INITIALIZER(vmspace_token);
128 struct lwkt_token kvm_token = LWKT_TOKEN_INITIALIZER(kvm_token);
129 struct lwkt_token proc_token = LWKT_TOKEN_INITIALIZER(proc_token);
130 struct lwkt_token tty_token = LWKT_TOKEN_INITIALIZER(tty_token);
131 struct lwkt_token vnode_token = LWKT_TOKEN_INITIALIZER(vnode_token);
132 struct lwkt_token vmobj_token = LWKT_TOKEN_INITIALIZER(vmobj_token);
134 static int lwkt_token_ipi_dispatch = 4;
135 SYSCTL_INT(_lwkt, OID_AUTO, token_ipi_dispatch, CTLFLAG_RW,
136 &lwkt_token_ipi_dispatch, 0, "Number of IPIs to dispatch on token release");
139 * The collision count is bumped every time the LWKT scheduler fails
140 * to acquire needed tokens in addition to a normal lwkt_gettoken()
143 SYSCTL_LONG(_lwkt, OID_AUTO, mp_collisions, CTLFLAG_RW,
144 &mp_token.t_collisions, 0, "Collision counter of mp_token");
145 SYSCTL_LONG(_lwkt, OID_AUTO, pmap_collisions, CTLFLAG_RW,
146 &pmap_token.t_collisions, 0, "Collision counter of pmap_token");
147 SYSCTL_LONG(_lwkt, OID_AUTO, dev_collisions, CTLFLAG_RW,
148 &dev_token.t_collisions, 0, "Collision counter of dev_token");
149 SYSCTL_LONG(_lwkt, OID_AUTO, vm_collisions, CTLFLAG_RW,
150 &vm_token.t_collisions, 0, "Collision counter of vm_token");
151 SYSCTL_LONG(_lwkt, OID_AUTO, vmspace_collisions, CTLFLAG_RW,
152 &vmspace_token.t_collisions, 0, "Collision counter of vmspace_token");
153 SYSCTL_LONG(_lwkt, OID_AUTO, kvm_collisions, CTLFLAG_RW,
154 &kvm_token.t_collisions, 0, "Collision counter of kvm_token");
155 SYSCTL_LONG(_lwkt, OID_AUTO, proc_collisions, CTLFLAG_RW,
156 &proc_token.t_collisions, 0, "Collision counter of proc_token");
157 SYSCTL_LONG(_lwkt, OID_AUTO, tty_collisions, CTLFLAG_RW,
158 &tty_token.t_collisions, 0, "Collision counter of tty_token");
159 SYSCTL_LONG(_lwkt, OID_AUTO, vnode_collisions, CTLFLAG_RW,
160 &vnode_token.t_collisions, 0, "Collision counter of vnode_token");
164 * Acquire the initial mplock
166 * (low level boot only)
169 cpu_get_initial_mplock(void)
171 KKASSERT(mp_token.t_ref == NULL);
172 if (lwkt_trytoken(&mp_token) == FALSE)
173 panic("cpu_get_initial_mplock");
178 * Return a pool token given an address
182 _lwkt_token_pool_lookup(void *ptr)
186 i = ((int)(intptr_t)ptr >> 2) ^ ((int)(intptr_t)ptr >> 12);
187 return(&pool_tokens[i & LWKT_MASK_POOL_TOKENS]);
191 * Initialize a tokref_t prior to making it visible in the thread's
196 _lwkt_tokref_init(lwkt_tokref_t ref, lwkt_token_t tok, thread_t td)
204 * Force a LWKT reschedule on the target cpu when a requested token
209 lwkt_reltoken_mask_remote(void *arg, int arg2, struct intrframe *frame)
216 * This bit of code sends a LWKT reschedule request to whatever other cpus
217 * had contended on the token being released. We could wake up all the cpus
218 * but generally speaking if there is a lot of contention we really only want
219 * to wake up a subset of cpus to avoid aggregating O(N^2) IPIs. The current
220 * cpuid is used as a basis to select which other cpus to wake up.
222 * For the selected cpus we can avoid issuing the actual IPI if the target
223 * cpu's RQF_WAKEUP is already set. In this case simply setting the
224 * reschedule flag RQF_AST_LWKT_RESCHED will be sufficient.
226 * lwkt.token_ipi_dispatch specifies the maximum number of IPIs to dispatch
227 * on a token release.
231 _lwkt_reltoken_mask(lwkt_token_t tok)
237 cpumask_t wumask; /* wakeup mask */
238 cpumask_t remask; /* clear mask */
239 int wucount; /* wakeup count */
244 * Mask of contending cpus we want to wake up.
246 mask = tok->t_collmask;
252 * Degenerate case - IPI to all contending cpus
254 wucount = lwkt_token_ipi_dispatch;
255 if (wucount <= 0 || wucount >= ncpus) {
265 * Calculate which cpus to IPI. These cpus are potentially in a
266 * HLT state waiting for token contention to go away.
268 * Ask the cpu LWKT scheduler to reschedule by setting
269 * RQF_AST_LWKT_RESCHEDULE. Signal the cpu if RQF_WAKEUP is not
270 * set (otherwise it has already been signalled or will check the
271 * flag very soon anyway). Both bits must be adjusted atomically
272 * all in one go to avoid races.
274 * The collision mask is cleared for all cpus we set the resched
275 * flag for, but we only IPI the ones that need signalling.
277 while (wucount && mask) {
278 tmpmask = mask & ~(CPUMASK(mycpu->gd_cpuid) - 1);
280 cpuid = BSFCPUMASK(tmpmask);
282 cpuid = BSFCPUMASK(mask);
283 ngd = globaldata_find(cpuid);
285 reqflags = ngd->gd_reqflags;
286 if (atomic_cmpset_int(&ngd->gd_reqflags, reqflags,
289 RQF_AST_LWKT_RESCHED))) {
293 if ((reqflags & RQF_WAKEUP) == 0) {
294 wumask |= CPUMASK(cpuid);
297 remask |= CPUMASK(cpuid);
298 mask &= ~CPUMASK(cpuid);
301 atomic_clear_cpumask(&tok->t_collmask, remask);
302 lwkt_send_ipiq3_mask(wumask, lwkt_reltoken_mask_remote,
309 * Obtain all the tokens required by the specified thread on the current
310 * cpu, return 0 on failure and non-zero on success. If a failure occurs
311 * any partially acquired tokens will be released prior to return.
313 * lwkt_getalltokens is called by the LWKT scheduler to acquire all
314 * tokens that the thread had acquired prior to going to sleep.
316 * We always clear the collision mask on token aquision.
318 * Called from a critical section.
321 lwkt_getalltokens(thread_t td)
328 * Acquire tokens in forward order, assign or validate tok->t_ref.
330 for (scan = &td->td_toks_base; scan < td->td_toks_stop; ++scan) {
334 * Try to acquire the token if we do not already have
337 * NOTE: If atomic_cmpset_ptr() fails we have to
338 * loop and try again. It just means we
343 if (atomic_cmpset_ptr(&tok->t_ref, NULL, scan))
345 if (tok->t_collmask & td->td_gd->gd_cpumask) {
346 atomic_clear_cpumask(&tok->t_collmask,
347 td->td_gd->gd_cpumask);
355 * Someone holds the token.
357 * Test if ref is already recursively held by this
358 * thread. We cannot safely dereference tok->t_ref
359 * (it might belong to another thread and is thus
360 * unstable), but we don't have to. We can simply
363 if (ref >= &td->td_toks_base && ref < td->td_toks_stop)
368 * Otherwise we failed to acquire all the tokens.
369 * Undo and return. We have to try once more after
370 * setting cpumask to cover possible races against
371 * the checking of t_collmask.
373 atomic_set_cpumask(&tok->t_collmask,
374 td->td_gd->gd_cpumask);
375 if (atomic_cmpset_ptr(&tok->t_ref, NULL, scan)) {
376 if (tok->t_collmask & td->td_gd->gd_cpumask) {
377 atomic_clear_cpumask(&tok->t_collmask,
378 td->td_gd->gd_cpumask);
383 td->td_wmesg = tok->t_desc;
384 atomic_add_long(&tok->t_collisions, 1);
385 lwkt_relalltokens(td);
393 * Release all tokens owned by the specified thread on the current cpu.
395 * This code is really simple. Even in cases where we own all the tokens
396 * note that t_ref may not match the scan for recursively held tokens,
397 * or for the case where a lwkt_getalltokens() failed.
399 * The scheduler is responsible for maintaining the MP lock count, so
400 * we don't need to deal with tr_flags here.
402 * Called from a critical section.
405 lwkt_relalltokens(thread_t td)
410 for (scan = &td->td_toks_base; scan < td->td_toks_stop; ++scan) {
412 if (tok->t_ref == scan) {
414 _lwkt_reltoken_mask(tok);
420 * Token acquisition helper function. The caller must have already
421 * made nref visible by adjusting td_toks_stop and will be responsible
422 * for the disposition of nref on either success or failure.
424 * When acquiring tokens recursively we want tok->t_ref to point to
425 * the outer (first) acquisition so it gets cleared only on the last
430 _lwkt_trytokref2(lwkt_tokref_t nref, thread_t td, int blocking)
436 * Make sure the compiler does not reorder prior instructions
437 * beyond this demark.
442 * Attempt to gain ownership
447 * Try to acquire the token if we do not already have
448 * it. This is not allowed if we are in a hard code
449 * section (because it 'might' have blocked).
453 KASSERT((blocking == 0 ||
454 td->td_gd->gd_intr_nesting_level == 0 ||
455 panic_cpu_gd == mycpu),
456 ("Attempt to acquire token %p not already "
457 "held in hard code section", tok));
460 * NOTE: If atomic_cmpset_ptr() fails we have to
461 * loop and try again. It just means we
464 if (atomic_cmpset_ptr(&tok->t_ref, NULL, nref))
470 * Test if ref is already recursively held by this
471 * thread. We cannot safely dereference tok->t_ref
472 * (it might belong to another thread and is thus
473 * unstable), but we don't have to. We can simply
476 * It is ok to acquire a token that is already held
477 * by the current thread when in a hard code section.
479 if (ref >= &td->td_toks_base && ref < td->td_toks_stop)
483 * Otherwise we failed, and it is not ok to attempt to
484 * acquire a token in a hard code section.
486 KASSERT((blocking == 0 ||
487 td->td_gd->gd_intr_nesting_level == 0),
488 ("Attempt to acquire token %p not already "
489 "held in hard code section", tok));
496 * Get a serializing token. This routine can block.
499 lwkt_gettoken(lwkt_token_t tok)
501 thread_t td = curthread;
504 ref = td->td_toks_stop;
505 KKASSERT(ref < &td->td_toks_end);
508 _lwkt_tokref_init(ref, tok, td);
510 if (_lwkt_trytokref2(ref, td, 1) == FALSE) {
512 * Give up running if we can't acquire the token right now.
514 * Since the tokref is already active the scheduler now
515 * takes care of acquisition, so we need only call
518 * Since we failed this was not a recursive token so upon
519 * return tr_tok->t_ref should be assigned to this specific
525 * (DISABLED ATM) - Do not set t_collmask on a token
526 * acquisition failure, the scheduler will spin at least
527 * once and deal with hlt/spin semantics.
529 atomic_set_cpumask(&tok->t_collmask, td->td_gd->gd_cpumask);
530 if (atomic_cmpset_ptr(&tok->t_ref, NULL, ref)) {
531 atomic_clear_cpumask(&tok->t_collmask,
532 td->td_gd->gd_cpumask);
537 td->td_wmesg = tok->t_desc;
538 atomic_add_long(&tok->t_collisions, 1);
542 KKASSERT(tok->t_ref == ref);
547 lwkt_gettoken_hard(lwkt_token_t tok)
549 thread_t td = curthread;
552 ref = td->td_toks_stop;
553 KKASSERT(ref < &td->td_toks_end);
556 _lwkt_tokref_init(ref, tok, td);
558 if (_lwkt_trytokref2(ref, td, 1) == FALSE) {
560 * Give up running if we can't acquire the token right now.
562 * Since the tokref is already active the scheduler now
563 * takes care of acquisition, so we need only call
566 * Since we failed this was not a recursive token so upon
567 * return tr_tok->t_ref should be assigned to this specific
573 * (DISABLED ATM) - Do not set t_collmask on a token
574 * acquisition failure, the scheduler will spin at least
575 * once and deal with hlt/spin semantics.
577 atomic_set_cpumask(&tok->t_collmask, td->td_gd->gd_cpumask);
578 if (atomic_cmpset_ptr(&tok->t_ref, NULL, ref)) {
579 atomic_clear_cpumask(&tok->t_collmask,
580 td->td_gd->gd_cpumask);
585 td->td_wmesg = tok->t_desc;
586 atomic_add_long(&tok->t_collisions, 1);
590 KKASSERT(tok->t_ref == ref);
597 crit_enter_hard_gd(td->td_gd);
601 lwkt_getpooltoken(void *ptr)
603 thread_t td = curthread;
607 tok = _lwkt_token_pool_lookup(ptr);
608 ref = td->td_toks_stop;
609 KKASSERT(ref < &td->td_toks_end);
612 _lwkt_tokref_init(ref, tok, td);
614 if (_lwkt_trytokref2(ref, td, 1) == FALSE) {
616 * Give up running if we can't acquire the token right now.
618 * Since the tokref is already active the scheduler now
619 * takes care of acquisition, so we need only call
622 * Since we failed this was not a recursive token so upon
623 * return tr_tok->t_ref should be assigned to this specific
629 * (DISABLED ATM) - Do not set t_collmask on a token
630 * acquisition failure, the scheduler will spin at least
631 * once and deal with hlt/spin semantics.
633 atomic_set_cpumask(&tok->t_collmask, td->td_gd->gd_cpumask);
634 if (atomic_cmpset_ptr(&tok->t_ref, NULL, ref)) {
635 atomic_clear_cpumask(&tok->t_collmask,
636 td->td_gd->gd_cpumask);
641 td->td_wmesg = tok->t_desc;
642 atomic_add_long(&tok->t_collisions, 1);
646 KKASSERT(tok->t_ref == ref);
657 * Attempt to acquire a token, return TRUE on success, FALSE on failure.
660 lwkt_trytoken(lwkt_token_t tok)
662 thread_t td = curthread;
665 ref = td->td_toks_stop;
666 KKASSERT(ref < &td->td_toks_end);
669 _lwkt_tokref_init(ref, tok, td);
671 if (_lwkt_trytokref2(ref, td, 0) == FALSE) {
673 * Cleanup, deactivate the failed token.
683 * Release a serializing token.
685 * WARNING! All tokens must be released in reverse order. This will be
689 lwkt_reltoken(lwkt_token_t tok)
691 thread_t td = curthread;
695 * Remove ref from thread token list and assert that it matches
696 * the token passed in. Tokens must be released in reverse order.
698 ref = td->td_toks_stop - 1;
699 KKASSERT(ref >= &td->td_toks_base && ref->tr_tok == tok);
702 * Only clear the token if it matches ref. If ref was a recursively
703 * acquired token it may not match. Then adjust td_toks_stop.
705 * Some comparisons must be run prior to adjusting td_toks_stop
706 * to avoid racing against a fast interrupt/ ipi which tries to
709 * We must also be absolutely sure that the compiler does not
710 * reorder the clearing of t_ref and the adjustment of td_toks_stop,
711 * or reorder the adjustment of td_toks_stop against the conditional.
713 * NOTE: The mplock is a token also so sequencing is a bit complex.
715 if (tok->t_ref == ref) {
717 _lwkt_reltoken_mask(tok);
721 td->td_toks_stop = ref;
723 KKASSERT(tok->t_ref != ref);
727 lwkt_reltoken_hard(lwkt_token_t tok)
734 * It is faster for users of lwkt_getpooltoken() to use the returned
735 * token and just call lwkt_reltoken(), but for convenience we provide
736 * this function which looks the token up based on the ident.
739 lwkt_relpooltoken(void *ptr)
741 lwkt_token_t tok = _lwkt_token_pool_lookup(ptr);
746 * Return a count of the number of token refs the thread has to the
747 * specified token, whether it currently owns the token or not.
750 lwkt_cnttoken(lwkt_token_t tok, thread_t td)
755 for (scan = &td->td_toks_base; scan < td->td_toks_stop; ++scan) {
756 if (scan->tr_tok == tok)
764 * Pool tokens are used to provide a type-stable serializing token
765 * pointer that does not race against disappearing data structures.
767 * This routine is called in early boot just after we setup the BSP's
768 * globaldata structure.
771 lwkt_token_pool_init(void)
775 for (i = 0; i < LWKT_NUM_POOL_TOKENS; ++i)
776 lwkt_token_init(&pool_tokens[i], "pool");
780 lwkt_token_pool_lookup(void *ptr)
782 return (_lwkt_token_pool_lookup(ptr));
786 * Initialize a token.
789 lwkt_token_init(lwkt_token_t tok, const char *desc)
792 tok->t_collisions = 0;
798 lwkt_token_uninit(lwkt_token_t tok)
804 * Exchange the two most recent tokens on the tokref stack. This allows
805 * you to release a token out of order.
807 * We have to be careful about the case where the top two tokens are
808 * the same token. In this case tok->t_ref will point to the deeper
809 * ref and must remain pointing to the deeper ref. If we were to swap
810 * it the first release would clear the token even though a second
811 * ref is still present.
814 lwkt_token_swap(void)
816 lwkt_tokref_t ref1, ref2;
817 lwkt_token_t tok1, tok2;
818 thread_t td = curthread;
822 ref1 = td->td_toks_stop - 1;
823 ref2 = td->td_toks_stop - 2;
824 KKASSERT(ref1 > &td->td_toks_base);
825 KKASSERT(ref2 > &td->td_toks_base);
832 if (tok1->t_ref == ref1)
834 if (tok2->t_ref == ref2)
843 lwkt_token_is_stale(lwkt_tokref_t ref)
845 lwkt_token_t tok = ref->tr_tok;
847 KKASSERT(tok->t_owner == curthread && ref->tr_state == 1 &&
850 /* Token is not stale */
851 if (tok->t_lastowner == tok->t_owner)
855 * The token is stale. Reset to not stale so that the next call to
856 * lwkt_token_is_stale will return "not stale" unless the token
857 * was acquired in-between by another thread.
859 tok->t_lastowner = tok->t_owner;