nrelease - fix/improve livecd
[dragonfly.git] / sys / kern / lwkt_token.c
CommitLineData
c31b1324 1/*
00789ea1
MD
2 * Copyright (c) 2003-2006,2009-2019 The DragonFly Project.
3 * All rights reserved.
8c10bfcf
MD
4 *
5 * This code is derived from software contributed to The DragonFly Project
6 * by Matthew Dillon <dillon@backplane.com>
7 *
c31b1324
MD
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
8c10bfcf 11 *
c31b1324
MD
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
8c10bfcf
MD
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
17 * distribution.
18 * 3. Neither the name of The DragonFly Project nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific, prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
c31b1324 33 * SUCH DAMAGE.
c31b1324
MD
34 */
35
c6fbe95a
MD
36/*
37 * lwkt_token - Implement soft token locks.
38 *
39 * Tokens are locks which serialize a thread only while the thread is
40 * running. If the thread blocks all tokens are released, then reacquired
41 * when the thread resumes.
42 *
43 * This implementation requires no critical sections or spin locks, but
44 * does use atomic_cmpset_ptr().
45 *
46 * Tokens may be recursively acquired by the same thread. However the
47 * caller must be sure to release such tokens in reverse order.
48 */
c31b1324
MD
49#include <sys/param.h>
50#include <sys/systm.h>
51#include <sys/kernel.h>
52#include <sys/proc.h>
53#include <sys/rtprio.h>
54#include <sys/queue.h>
c31b1324 55#include <sys/sysctl.h>
4883dbe9 56#include <sys/ktr.h>
c31b1324
MD
57#include <sys/kthread.h>
58#include <machine/cpu.h>
59#include <sys/lock.h>
9d265729
MD
60#include <sys/spinlock.h>
61
62#include <sys/thread2.h>
63#include <sys/spinlock2.h>
3b998fa9 64#include <sys/mplock2.h>
c31b1324
MD
65
66#include <vm/vm.h>
67#include <vm/vm_param.h>
68#include <vm/vm_kern.h>
69#include <vm/vm_object.h>
70#include <vm/vm_page.h>
71#include <vm/vm_map.h>
72#include <vm/vm_pager.h>
73#include <vm/vm_extern.h>
74#include <vm/vm_zone.h>
75
76#include <machine/stdarg.h>
c31b1324
MD
77#include <machine/smp.h>
78
32dafe85
SW
79#include "opt_ddb.h"
80#ifdef DDB
81#include <ddb/ddb.h>
82#endif
83
b12defdc
MD
84extern int lwkt_sched_debug;
85
e79b36c7
MD
86#define LWKT_POOL_TOKENS 16384 /* must be power of 2 */
87#define LWKT_POOL_MASK (LWKT_POOL_TOKENS - 1)
41a01a4d 88
82d9053e
SZ
89struct lwkt_pool_token {
90 struct lwkt_token token;
91} __cachealign;
92
e79b36c7 93static struct lwkt_pool_token pool_tokens[LWKT_POOL_TOKENS];
f4e72ada
SW
94static struct spinlock tok_debug_spin =
95 SPINLOCK_INITIALIZER(&tok_debug_spin, "tok_debug_spin");
41a01a4d 96
f917e9bc 97#define TOKEN_STRING "REF=%p TOK=%p TD=%p"
5bf48697
AE
98#define TOKEN_ARGS lwkt_tokref_t ref, lwkt_token_t tok, struct thread *td
99#define CONTENDED_STRING TOKEN_STRING " (contention started)"
100#define UNCONTENDED_STRING TOKEN_STRING " (contention stopped)"
4883dbe9
MD
101#if !defined(KTR_TOKENS)
102#define KTR_TOKENS KTR_ALL
103#endif
790e4db7 104
4883dbe9 105KTR_INFO_MASTER(tokens);
5bf48697
AE
106KTR_INFO(KTR_TOKENS, tokens, fail, 0, TOKEN_STRING, TOKEN_ARGS);
107KTR_INFO(KTR_TOKENS, tokens, succ, 1, TOKEN_STRING, TOKEN_ARGS);
7cd8d145 108#if 0
5bf48697
AE
109KTR_INFO(KTR_TOKENS, tokens, release, 2, TOKEN_STRING, TOKEN_ARGS);
110KTR_INFO(KTR_TOKENS, tokens, remote, 3, TOKEN_STRING, TOKEN_ARGS);
111KTR_INFO(KTR_TOKENS, tokens, reqremote, 4, TOKEN_STRING, TOKEN_ARGS);
112KTR_INFO(KTR_TOKENS, tokens, reqfail, 5, TOKEN_STRING, TOKEN_ARGS);
113KTR_INFO(KTR_TOKENS, tokens, drain, 6, TOKEN_STRING, TOKEN_ARGS);
114KTR_INFO(KTR_TOKENS, tokens, contention_start, 7, CONTENDED_STRING, TOKEN_ARGS);
115KTR_INFO(KTR_TOKENS, tokens, contention_stop, 7, UNCONTENDED_STRING, TOKEN_ARGS);
790e4db7
MD
116#endif
117
f917e9bc
MD
118#define logtoken(name, ref) \
119 KTR_LOG(tokens_ ## name, ref, ref->tr_tok, curthread)
4883dbe9 120
c9aa7a82
MD
121/*
122 * Global tokens. These replace the MP lock for major subsystem locking.
123 * These tokens are initially used to lockup both global and individual
124 * operations.
125 *
126 * Once individual structures get their own locks these tokens are used
127 * only to protect global lists & other variables and to interlock
128 * allocations and teardowns and such.
129 *
130 * The UP initializer causes token acquisition to also acquire the MP lock
131 * for maximum compatibility. The feature may be enabled and disabled at
132 * any time, the MP state is copied to the tokref when the token is acquired
133 * and will not race against sysctl changes.
134 */
a3c18566
MD
135struct lwkt_token mp_token = LWKT_TOKEN_INITIALIZER(mp_token);
136struct lwkt_token pmap_token = LWKT_TOKEN_INITIALIZER(pmap_token);
137struct lwkt_token dev_token = LWKT_TOKEN_INITIALIZER(dev_token);
138struct lwkt_token vm_token = LWKT_TOKEN_INITIALIZER(vm_token);
139struct lwkt_token vmspace_token = LWKT_TOKEN_INITIALIZER(vmspace_token);
140struct lwkt_token kvm_token = LWKT_TOKEN_INITIALIZER(kvm_token);
a8d3ab53 141struct lwkt_token sigio_token = LWKT_TOKEN_INITIALIZER(sigio_token);
a3c18566
MD
142struct lwkt_token tty_token = LWKT_TOKEN_INITIALIZER(tty_token);
143struct lwkt_token vnode_token = LWKT_TOKEN_INITIALIZER(vnode_token);
2efb75f3
MD
144struct lwkt_token vga_token = LWKT_TOKEN_INITIALIZER(vga_token);
145struct lwkt_token kbd_token = LWKT_TOKEN_INITIALIZER(kbd_token);
c9aa7a82 146
cc705b82
MD
147/*
148 * Exponential backoff (exclusive tokens) and TSC windowing (shared tokens)
00789ea1
MD
149 * parameters. Remember that tokens backoff to the scheduler. This is a bit
150 * of trade-off. Smaller values like 128 work better in some situations,
151 * but under extreme loads larger values like 4096 seem to provide the most
152 * determinism.
cc705b82 153 */
00789ea1 154static int token_backoff_max __cachealign = 4096;
cc705b82
MD
155SYSCTL_INT(_lwkt, OID_AUTO, token_backoff_max, CTLFLAG_RW,
156 &token_backoff_max, 0, "Tokens exponential backoff");
157static int token_window_shift __cachealign = 8;
158SYSCTL_INT(_lwkt, OID_AUTO, token_window_shift, CTLFLAG_RW,
159 &token_window_shift, 0, "Tokens TSC windowing shift");
fc55f5f2 160
c9aa7a82
MD
161/*
162 * The collision count is bumped every time the LWKT scheduler fails
163 * to acquire needed tokens in addition to a normal lwkt_gettoken()
164 * stall.
165 */
b5d16701
MD
166SYSCTL_LONG(_lwkt, OID_AUTO, mp_collisions, CTLFLAG_RW,
167 &mp_token.t_collisions, 0, "Collision counter of mp_token");
0c52fa62
SG
168SYSCTL_LONG(_lwkt, OID_AUTO, pmap_collisions, CTLFLAG_RW,
169 &pmap_token.t_collisions, 0, "Collision counter of pmap_token");
170SYSCTL_LONG(_lwkt, OID_AUTO, dev_collisions, CTLFLAG_RW,
171 &dev_token.t_collisions, 0, "Collision counter of dev_token");
172SYSCTL_LONG(_lwkt, OID_AUTO, vm_collisions, CTLFLAG_RW,
173 &vm_token.t_collisions, 0, "Collision counter of vm_token");
174SYSCTL_LONG(_lwkt, OID_AUTO, vmspace_collisions, CTLFLAG_RW,
175 &vmspace_token.t_collisions, 0, "Collision counter of vmspace_token");
176SYSCTL_LONG(_lwkt, OID_AUTO, kvm_collisions, CTLFLAG_RW,
177 &kvm_token.t_collisions, 0, "Collision counter of kvm_token");
a8d3ab53
MD
178SYSCTL_LONG(_lwkt, OID_AUTO, sigio_collisions, CTLFLAG_RW,
179 &sigio_token.t_collisions, 0, "Collision counter of sigio_token");
0c52fa62
SG
180SYSCTL_LONG(_lwkt, OID_AUTO, tty_collisions, CTLFLAG_RW,
181 &tty_token.t_collisions, 0, "Collision counter of tty_token");
182SYSCTL_LONG(_lwkt, OID_AUTO, vnode_collisions, CTLFLAG_RW,
183 &vnode_token.t_collisions, 0, "Collision counter of vnode_token");
c9aa7a82 184
f4e72ada 185static int tokens_debug_output;
ce94514e
MD
186SYSCTL_INT(_lwkt, OID_AUTO, tokens_debug_output, CTLFLAG_RW,
187 &tokens_debug_output, 0, "Generate stack trace N times");
188
b12defdc
MD
189static int _lwkt_getalltokens_sorted(thread_t td);
190
b5d16701
MD
191/*
192 * Acquire the initial mplock
193 *
194 * (low level boot only)
195 */
196void
197cpu_get_initial_mplock(void)
198{
199 KKASSERT(mp_token.t_ref == NULL);
200 if (lwkt_trytoken(&mp_token) == FALSE)
201 panic("cpu_get_initial_mplock");
202}
b5d16701 203
c6fbe95a 204/*
b12defdc
MD
205 * Return a pool token given an address. Use a prime number to reduce
206 * overlaps.
c6fbe95a 207 */
e79b36c7
MD
208#define POOL_HASH_PRIME1 66555444443333333ULL
209#define POOL_HASH_PRIME2 989042931893ULL
210
c6fbe95a
MD
211static __inline
212lwkt_token_t
213_lwkt_token_pool_lookup(void *ptr)
214{
4e079e31
SW
215 uintptr_t hash1;
216 uintptr_t hash2;
217
218 hash1 = (uintptr_t)ptr + ((uintptr_t)ptr >> 18);
219 hash1 %= POOL_HASH_PRIME1;
220 hash2 = ((uintptr_t)ptr >> 8) + ((uintptr_t)ptr >> 24);
221 hash2 %= POOL_HASH_PRIME2;
222 return (&pool_tokens[(hash1 ^ hash2) & LWKT_POOL_MASK].token);
c6fbe95a
MD
223}
224
3b998fa9
MD
225/*
226 * Initialize a tokref_t prior to making it visible in the thread's
227 * token array.
228 */
229static __inline
230void
54341a3b 231_lwkt_tokref_init(lwkt_tokref_t ref, lwkt_token_t tok, thread_t td, long excl)
3b998fa9
MD
232{
233 ref->tr_tok = tok;
54341a3b 234 ref->tr_count = excl;
3b998fa9 235 ref->tr_owner = td;
3b998fa9 236}
c6fbe95a 237
85946b6c 238/*
54341a3b
MD
239 * Attempt to acquire a shared or exclusive token. Returns TRUE on success,
240 * FALSE on failure.
241 *
242 * If TOK_EXCLUSIVE is set in mode we are attempting to get an exclusive
243 * token, otherwise are attempting to get a shared token.
244 *
245 * If TOK_EXCLREQ is set in mode this is a blocking operation, otherwise
246 * it is a non-blocking operation (for both exclusive or shared acquisions).
85946b6c 247 */
54341a3b 248static __inline
b12defdc 249int
54341a3b 250_lwkt_trytokref(lwkt_tokref_t ref, thread_t td, long mode)
b12defdc 251{
54341a3b
MD
252 lwkt_token_t tok;
253 lwkt_tokref_t oref;
254 long count;
b12defdc 255
4bac0e14 256 tok = ref->tr_tok;
54341a3b
MD
257 KASSERT(((mode & TOK_EXCLREQ) == 0 || /* non blocking */
258 td->td_gd->gd_intr_nesting_level == 0 ||
259 panic_cpu_gd == mycpu),
260 ("Attempt to acquire token %p not already "
261 "held in hard code section", tok));
262
54341a3b
MD
263 if (mode & TOK_EXCLUSIVE) {
264 /*
265 * Attempt to get an exclusive token
266 */
7832c775
MD
267 count = tok->t_count;
268
54341a3b 269 for (;;) {
54341a3b
MD
270 oref = tok->t_ref; /* can be NULL */
271 cpu_ccfence();
272 if ((count & ~TOK_EXCLREQ) == 0) {
273 /*
274 * It is possible to get the exclusive bit.
275 * We must clear TOK_EXCLREQ on successful
276 * acquisition.
277 */
7832c775 278 if (atomic_fcmpset_long(&tok->t_count, &count,
4e079e31
SW
279 (count & ~TOK_EXCLREQ) |
280 TOK_EXCLUSIVE)) {
54341a3b
MD
281 KKASSERT(tok->t_ref == NULL);
282 tok->t_ref = ref;
283 return TRUE;
284 }
285 /* retry */
286 } else if ((count & TOK_EXCLUSIVE) &&
287 oref >= &td->td_toks_base &&
288 oref < td->td_toks_stop) {
289 /*
290 * Our thread already holds the exclusive
291 * bit, we treat this tokref as a shared
292 * token (sorta) to make the token release
83c891d5
MD
293 * code easier. Treating this as a shared
294 * token allows us to simply increment the
295 * count field.
54341a3b
MD
296 *
297 * NOTE: oref cannot race above if it
298 * happens to be ours, so we're good.
299 * But we must still have a stable
300 * variable for both parts of the
301 * comparison.
302 *
303 * NOTE: Since we already have an exclusive
304 * lock and don't need to check EXCLREQ
305 * we can just use an atomic_add here
306 */
307 atomic_add_long(&tok->t_count, TOK_INCR);
308 ref->tr_count &= ~TOK_EXCLUSIVE;
309 return TRUE;
310 } else if ((mode & TOK_EXCLREQ) &&
311 (count & TOK_EXCLREQ) == 0) {
312 /*
313 * Unable to get the exclusive bit but being
314 * asked to set the exclusive-request bit.
315 * Since we are going to retry anyway just
316 * set the bit unconditionally.
317 */
318 atomic_set_long(&tok->t_count, TOK_EXCLREQ);
319 return FALSE;
320 } else {
321 /*
322 * Unable to get the exclusive bit and not
323 * being asked to set the exclusive-request
324 * (aka lwkt_trytoken()), or EXCLREQ was
325 * already set.
326 */
327 cpu_pause();
328 return FALSE;
329 }
330 /* retry */
b12defdc 331 }
54341a3b
MD
332 } else {
333 /*
334 * Attempt to get a shared token. Note that TOK_EXCLREQ
335 * for shared tokens simply means the caller intends to
336 * block. We never actually set the bit in tok->t_count.
83c891d5
MD
337 *
338 * Due to the token's no-deadlock guarantee, and complications
339 * created by the sorted reacquisition code, we can only
340 * give exclusive requests priority over shared requests
341 * in situations where the thread holds only one token.
54341a3b 342 */
7832c775
MD
343 count = tok->t_count;
344
54341a3b 345 for (;;) {
54341a3b
MD
346 oref = tok->t_ref; /* can be NULL */
347 cpu_ccfence();
cc705b82 348 if ((count & (TOK_EXCLUSIVE|mode)) == 0 ||
83c891d5
MD
349 ((count & TOK_EXCLUSIVE) == 0 &&
350 td->td_toks_stop != &td->td_toks_base + 1)
351 ) {
54341a3b 352 /*
cff27bad 353 * It may be possible to get the token shared.
54341a3b 354 */
cff27bad 355 if ((atomic_fetchadd_long(&tok->t_count, TOK_INCR) & TOK_EXCLUSIVE) == 0) {
54341a3b
MD
356 return TRUE;
357 }
7832c775
MD
358 count = atomic_fetchadd_long(&tok->t_count,
359 -TOK_INCR);
360 count -= TOK_INCR;
54341a3b
MD
361 /* retry */
362 } else if ((count & TOK_EXCLUSIVE) &&
363 oref >= &td->td_toks_base &&
364 oref < td->td_toks_stop) {
365 /*
366 * We own the exclusive bit on the token so
367 * we can in fact also get it shared.
368 */
369 atomic_add_long(&tok->t_count, TOK_INCR);
370 return TRUE;
371 } else {
372 /*
373 * We failed to get the token shared
374 */
375 return FALSE;
376 }
377 /* retry */
b12defdc
MD
378 }
379 }
b12defdc
MD
380}
381
382static __inline
b12defdc 383int
54341a3b 384_lwkt_trytokref_spin(lwkt_tokref_t ref, thread_t td, long mode)
cbdd23b1 385{
83c891d5 386 if (_lwkt_trytokref(ref, td, mode))
54341a3b 387 return TRUE;
cc705b82
MD
388
389 if (mode & TOK_EXCLUSIVE) {
390 /*
391 * Contested exclusive token, use exponential backoff
392 * algorithm.
393 */
394 long expbackoff;
395 long loop;
396
397 expbackoff = 0;
398 while (expbackoff < 6 + token_backoff_max) {
399 expbackoff = (expbackoff + 1) * 3 / 2;
400 if ((rdtsc() >> token_window_shift) % ncpus != mycpuid) {
401 for (loop = expbackoff; loop; --loop)
402 cpu_pause();
403 }
404 if (_lwkt_trytokref(ref, td, mode))
405 return TRUE;
406 }
407 } else {
408 /*
409 * Contested shared token, use TSC windowing. Note that
410 * exclusive tokens have priority over shared tokens only
411 * for the first token.
412 */
413 if ((rdtsc() >> token_window_shift) % ncpus == mycpuid) {
414 if (_lwkt_trytokref(ref, td, mode & ~TOK_EXCLREQ))
415 return TRUE;
416 } else {
417 if (_lwkt_trytokref(ref, td, mode))
418 return TRUE;
419 }
420
fc55f5f2 421 }
cc705b82
MD
422 ++mycpu->gd_cnt.v_lock_colls;
423
54341a3b 424 return FALSE;
b12defdc
MD
425}
426
427/*
54341a3b 428 * Release a token that we hold.
5abad2c5
MD
429 *
430 * Since tokens are polled, we don't have to deal with wakeups and releasing
431 * is really easy.
b12defdc
MD
432 */
433static __inline
434void
54341a3b 435_lwkt_reltokref(lwkt_tokref_t ref, thread_t td)
b12defdc 436{
54341a3b
MD
437 lwkt_token_t tok;
438 long count;
b12defdc 439
54341a3b 440 tok = ref->tr_tok;
5abad2c5
MD
441 if (tok->t_ref == ref) {
442 /*
443 * We are an exclusive holder. We must clear tr_ref
444 * before we clear the TOK_EXCLUSIVE bit. If we are
445 * unable to clear the bit we must restore
446 * tok->t_ref.
447 */
448#if 0
449 KKASSERT(count & TOK_EXCLUSIVE);
450#endif
451 tok->t_ref = NULL;
452 atomic_clear_long(&tok->t_count, TOK_EXCLUSIVE);
453 } else {
454 /*
455 * We are a shared holder
456 */
457 count = atomic_fetchadd_long(&tok->t_count, -TOK_INCR);
458 KKASSERT(count & TOK_COUNTMASK); /* count prior */
2a418930 459 }
cbdd23b1
MD
460}
461
c31b1324 462/*
9d265729 463 * Obtain all the tokens required by the specified thread on the current
1fe5fad2
MD
464 * cpu, return 0 on failure and non-zero on success. If a failure occurs
465 * any partially acquired tokens will be released prior to return.
dd55d707 466 *
54341a3b
MD
467 * lwkt_getalltokens is called by the LWKT scheduler to re-acquire all
468 * tokens that the thread had to release when it switched away.
7eb611ef 469 *
b12defdc
MD
470 * If spinning is non-zero this function acquires the tokens in a particular
471 * order to deal with potential deadlocks. We simply use address order for
472 * the case.
3b998fa9 473 *
7eb611ef 474 * Called from a critical section.
c31b1324 475 */
41a01a4d 476int
b12defdc 477lwkt_getalltokens(thread_t td, int spinning)
41a01a4d 478{
3b998fa9 479 lwkt_tokref_t scan;
c6fbe95a
MD
480 lwkt_token_t tok;
481
b12defdc
MD
482 if (spinning)
483 return(_lwkt_getalltokens_sorted(td));
484
3b998fa9
MD
485 /*
486 * Acquire tokens in forward order, assign or validate tok->t_ref.
487 */
488 for (scan = &td->td_toks_base; scan < td->td_toks_stop; ++scan) {
489 tok = scan->tr_tok;
c6fbe95a
MD
490 for (;;) {
491 /*
54341a3b 492 * Only try really hard on the last token
c6fbe95a 493 */
54341a3b
MD
494 if (scan == td->td_toks_stop - 1) {
495 if (_lwkt_trytokref_spin(scan, td, scan->tr_count))
496 break;
497 } else {
498 if (_lwkt_trytokref(scan, td, scan->tr_count))
499 break;
c6fbe95a
MD
500 }
501
502 /*
54341a3b
MD
503 * Otherwise we failed to acquire all the tokens.
504 * Release whatever we did get.
3b998fa9 505 */
050032ec
MD
506 KASSERT(tok->t_desc,
507 ("token %p is not initialized", tok));
6d0742ae 508 td->td_gd->gd_cnt.v_lock_addr = tok;
b1793cc6
MD
509 td->td_gd->gd_cnt.v_lock_name[0] = 't';
510 strncpy(td->td_gd->gd_cnt.v_lock_name + 1,
511 tok->t_desc,
512 sizeof(td->td_gd->gd_cnt.v_lock_name) - 2);
85946b6c
MD
513 if (lwkt_sched_debug > 0) {
514 --lwkt_sched_debug;
515 kprintf("toka %p %s %s\n",
516 tok, tok->t_desc, td->td_comm);
517 }
2a9d4663 518 td->td_wmesg = tok->t_desc;
54341a3b
MD
519 ++tok->t_collisions;
520 while (--scan >= &td->td_toks_base)
521 _lwkt_reltokref(scan, td);
3b998fa9 522 return(FALSE);
38717797 523 }
41a01a4d 524 }
c6fbe95a 525 return (TRUE);
c31b1324
MD
526}
527
41a01a4d 528/*
9d265729 529 * Release all tokens owned by the specified thread on the current cpu.
c6fbe95a
MD
530 *
531 * This code is really simple. Even in cases where we own all the tokens
b12defdc
MD
532 * note that t_ref may not match the scan for recursively held tokens which
533 * are held deeper in the stack, or for the case where a lwkt_getalltokens()
534 * failed.
3b998fa9 535 *
b12defdc 536 * Tokens are released in reverse order to reduce chasing race failures.
7eb611ef
MD
537 *
538 * Called from a critical section.
41a01a4d 539 */
9d265729
MD
540void
541lwkt_relalltokens(thread_t td)
41a01a4d 542{
3b998fa9 543 lwkt_tokref_t scan;
c6fbe95a 544
54341a3b
MD
545 /*
546 * Weird order is to try to avoid a panic loop
547 */
548 if (td->td_toks_have) {
549 scan = td->td_toks_have;
550 td->td_toks_have = NULL;
551 } else {
552 scan = td->td_toks_stop;
b12defdc 553 }
54341a3b
MD
554 while (--scan >= &td->td_toks_base)
555 _lwkt_reltokref(scan, td);
b12defdc
MD
556}
557
558/*
559 * This is the decontention version of lwkt_getalltokens(). The tokens are
560 * acquired in address-sorted order to deal with any deadlocks. Ultimately
561 * token failures will spin into the scheduler and get here.
562 *
b12defdc
MD
563 * Called from critical section
564 */
565static
566int
567_lwkt_getalltokens_sorted(thread_t td)
568{
b12defdc
MD
569 lwkt_tokref_t sort_array[LWKT_MAXTOKENS];
570 lwkt_tokref_t scan;
b12defdc
MD
571 lwkt_token_t tok;
572 int i;
573 int j;
574 int n;
575
576 /*
577 * Sort the token array. Yah yah, I know this isn't fun.
578 *
579 * NOTE: Recursively acquired tokens are ordered the same as in the
580 * td_toks_array so we can always get the earliest one first.
83c891d5
MD
581 * This is particularly important when a token is acquired
582 * exclusively multiple times, as only the first acquisition
583 * is treated as an exclusive token.
b12defdc
MD
584 */
585 i = 0;
586 scan = &td->td_toks_base;
587 while (scan < td->td_toks_stop) {
588 for (j = 0; j < i; ++j) {
589 if (scan->tr_tok < sort_array[j]->tr_tok)
590 break;
591 }
592 if (j != i) {
593 bcopy(sort_array + j, sort_array + j + 1,
594 (i - j) * sizeof(lwkt_tokref_t));
cbdd23b1 595 }
b12defdc
MD
596 sort_array[j] = scan;
597 ++scan;
598 ++i;
41a01a4d 599 }
b12defdc
MD
600 n = i;
601
602 /*
603 * Acquire tokens in forward order, assign or validate tok->t_ref.
604 */
605 for (i = 0; i < n; ++i) {
606 scan = sort_array[i];
607 tok = scan->tr_tok;
608 for (;;) {
609 /*
54341a3b 610 * Only try really hard on the last token
b12defdc 611 */
54341a3b
MD
612 if (scan == td->td_toks_stop - 1) {
613 if (_lwkt_trytokref_spin(scan, td, scan->tr_count))
614 break;
615 } else {
616 if (_lwkt_trytokref(scan, td, scan->tr_count))
617 break;
b12defdc
MD
618 }
619
620 /*
54341a3b
MD
621 * Otherwise we failed to acquire all the tokens.
622 * Release whatever we did get.
b12defdc 623 */
6d0742ae 624 td->td_gd->gd_cnt.v_lock_addr = tok;
b1793cc6
MD
625 td->td_gd->gd_cnt.v_lock_name[0] = 't';
626 strncpy(td->td_gd->gd_cnt.v_lock_name + 1,
627 tok->t_desc,
628 sizeof(td->td_gd->gd_cnt.v_lock_name) - 2);
85946b6c
MD
629 if (lwkt_sched_debug > 0) {
630 --lwkt_sched_debug;
631 kprintf("tokb %p %s %s\n",
632 tok, tok->t_desc, td->td_comm);
633 }
b12defdc 634 td->td_wmesg = tok->t_desc;
54341a3b
MD
635 ++tok->t_collisions;
636 while (--i >= 0) {
637 scan = sort_array[i];
638 _lwkt_reltokref(scan, td);
b12defdc 639 }
54341a3b 640 return(FALSE);
b12defdc
MD
641 }
642 }
643
644 /*
645 * We were successful, there is no need for another core to signal
646 * us.
647 */
b12defdc 648 return (TRUE);
41a01a4d
MD
649}
650
c6fbe95a 651/*
b5d16701 652 * Get a serializing token. This routine can block.
c6fbe95a 653 */
7eb611ef 654void
b5d16701 655lwkt_gettoken(lwkt_token_t tok)
7eb611ef 656{
b5d16701
MD
657 thread_t td = curthread;
658 lwkt_tokref_t ref;
b5d16701 659
b5d16701
MD
660 ref = td->td_toks_stop;
661 KKASSERT(ref < &td->td_toks_end);
662 ++td->td_toks_stop;
663 cpu_ccfence();
54341a3b 664 _lwkt_tokref_init(ref, tok, td, TOK_EXCLUSIVE|TOK_EXCLREQ);
b5d16701 665
784abcf0
VS
666#ifdef DEBUG_LOCKS
667 /*
668 * Taking an exclusive token after holding it shared will
669 * livelock. Scan for that case and assert.
670 */
671 lwkt_tokref_t tk;
672 int found = 0;
673 for (tk = &td->td_toks_base; tk < ref; tk++) {
674 if (tk->tr_tok != tok)
675 continue;
676
677 found++;
678 if (tk->tr_count & TOK_EXCLUSIVE)
679 goto good;
680 }
681 /* We found only shared instances of this token if found >0 here */
682 KASSERT((found == 0), ("Token %p s/x livelock", tok));
683good:
684#endif
685
54341a3b
MD
686 if (_lwkt_trytokref_spin(ref, td, TOK_EXCLUSIVE|TOK_EXCLREQ))
687 return;
688
689 /*
690 * Give up running if we can't acquire the token right now.
691 *
692 * Since the tokref is already active the scheduler now
693 * takes care of acquisition, so we need only call
694 * lwkt_switch().
695 *
696 * Since we failed this was not a recursive token so upon
697 * return tr_tok->t_ref should be assigned to this specific
698 * ref.
699 */
700 td->td_wmesg = tok->t_desc;
701 ++tok->t_collisions;
702 logtoken(fail, ref);
703 td->td_toks_have = td->td_toks_stop - 1;
ce94514e
MD
704
705 if (tokens_debug_output > 0) {
706 --tokens_debug_output;
707 spin_lock(&tok_debug_spin);
e79b36c7
MD
708 kprintf("Excl Token %p thread %p %s %s\n",
709 tok, td, tok->t_desc, td->td_comm);
ce94514e
MD
710 print_backtrace(6);
711 kprintf("\n");
712 spin_unlock(&tok_debug_spin);
713 }
714
dd8be70a 715 atomic_set_int(&td->td_mpflags, TDF_MP_DIDYIELD);
54341a3b
MD
716 lwkt_switch();
717 logtoken(succ, ref);
718 KKASSERT(tok->t_ref == ref);
7eb611ef
MD
719}
720
54341a3b
MD
721/*
722 * Similar to gettoken but we acquire a shared token instead of an exclusive
723 * token.
724 */
4a28fe22 725void
54341a3b 726lwkt_gettoken_shared(lwkt_token_t tok)
4a28fe22
MD
727{
728 thread_t td = curthread;
729 lwkt_tokref_t ref;
b5d16701 730
4a28fe22
MD
731 ref = td->td_toks_stop;
732 KKASSERT(ref < &td->td_toks_end);
4a28fe22 733 ++td->td_toks_stop;
b528f10f 734 cpu_ccfence();
54341a3b 735 _lwkt_tokref_init(ref, tok, td, TOK_EXCLREQ);
b5d16701 736
784abcf0 737#ifdef DEBUG_LOCKS
4e079e31
SW
738 /*
739 * Taking a pool token in shared mode is a bad idea; other
740 * addresses deeper in the call stack may hash to the same pool
741 * token and you may end up with an exclusive-shared livelock.
742 * Warn in this condition.
743 */
744 if ((tok >= &pool_tokens[0].token) &&
745 (tok < &pool_tokens[LWKT_POOL_TOKENS].token))
746 kprintf("Warning! Taking pool token %p in shared mode\n", tok);
784abcf0
VS
747#endif
748
749
54341a3b
MD
750 if (_lwkt_trytokref_spin(ref, td, TOK_EXCLREQ))
751 return;
b5d16701 752
54341a3b
MD
753 /*
754 * Give up running if we can't acquire the token right now.
755 *
756 * Since the tokref is already active the scheduler now
757 * takes care of acquisition, so we need only call
758 * lwkt_switch().
759 *
760 * Since we failed this was not a recursive token so upon
761 * return tr_tok->t_ref should be assigned to this specific
762 * ref.
763 */
764 td->td_wmesg = tok->t_desc;
765 ++tok->t_collisions;
766 logtoken(fail, ref);
767 td->td_toks_have = td->td_toks_stop - 1;
ce94514e
MD
768
769 if (tokens_debug_output > 0) {
770 --tokens_debug_output;
771 spin_lock(&tok_debug_spin);
e79b36c7
MD
772 kprintf("Shar Token %p thread %p %s %s\n",
773 tok, td, tok->t_desc, td->td_comm);
ce94514e
MD
774 print_backtrace(6);
775 kprintf("\n");
776 spin_unlock(&tok_debug_spin);
777 }
778
dd8be70a 779 atomic_set_int(&td->td_mpflags, TDF_MP_DIDYIELD);
54341a3b
MD
780 lwkt_switch();
781 logtoken(succ, ref);
c31b1324
MD
782}
783
137b3005
MD
784/*
785 * Attempt to acquire a token, return TRUE on success, FALSE on failure.
54341a3b
MD
786 *
787 * We setup the tokref in case we actually get the token (if we switch later
788 * it becomes mandatory so we set TOK_EXCLREQ), but we call trytokref without
789 * TOK_EXCLREQ in case we fail.
137b3005 790 */
c31b1324 791int
3b998fa9 792lwkt_trytoken(lwkt_token_t tok)
c31b1324 793{
c6fbe95a 794 thread_t td = curthread;
3b998fa9 795 lwkt_tokref_t ref;
b5d16701 796
3b998fa9
MD
797 ref = td->td_toks_stop;
798 KKASSERT(ref < &td->td_toks_end);
3b998fa9 799 ++td->td_toks_stop;
b528f10f 800 cpu_ccfence();
54341a3b 801 _lwkt_tokref_init(ref, tok, td, TOK_EXCLUSIVE|TOK_EXCLREQ);
b5d16701 802
54341a3b
MD
803 if (_lwkt_trytokref(ref, td, TOK_EXCLUSIVE))
804 return TRUE;
805
806 /*
807 * Failed, unpend the request
808 */
809 cpu_ccfence();
810 --td->td_toks_stop;
811 ++tok->t_collisions;
812 return FALSE;
813}
814
54341a3b
MD
815lwkt_token_t
816lwkt_getpooltoken(void *ptr)
817{
818 lwkt_token_t tok;
819
820 tok = _lwkt_token_pool_lookup(ptr);
821 lwkt_gettoken(tok);
822 return (tok);
41a01a4d
MD
823}
824
c31b1324 825/*
c6fbe95a
MD
826 * Release a serializing token.
827 *
3b998fa9
MD
828 * WARNING! All tokens must be released in reverse order. This will be
829 * asserted.
c31b1324 830 */
41a01a4d 831void
3b998fa9 832lwkt_reltoken(lwkt_token_t tok)
c31b1324 833{
3b998fa9
MD
834 thread_t td = curthread;
835 lwkt_tokref_t ref;
c6fbe95a 836
3b998fa9
MD
837 /*
838 * Remove ref from thread token list and assert that it matches
839 * the token passed in. Tokens must be released in reverse order.
840 */
841 ref = td->td_toks_stop - 1;
ea6f2b92
MD
842 if (__predict_false(ref < &td->td_toks_base || ref->tr_tok != tok)) {
843 kprintf("LWKT_RELTOKEN ASSERTION td %p tok %p ref %p/%p\n",
844 td, tok, &td->td_toks_base, ref);
845 kprintf("REF CONTENT: tok=%p count=%016lx owner=%p\n",
846 ref->tr_tok, ref->tr_count, ref->tr_owner);
847 if (ref < &td->td_toks_base) {
848 kprintf("lwkt_reltoken: no tokens to release\n");
849 } else {
850 kprintf("lwkt_reltoken: release wants %s and got %s\n",
851 tok->t_desc, ref->tr_tok->t_desc);
852 }
853 panic("lwkt_reltoken: illegal release");
854 }
54341a3b 855 _lwkt_reltokref(ref, td);
b5d16701 856 cpu_sfence();
a3c18566 857 td->td_toks_stop = ref;
c31b1324
MD
858}
859
177e553a
MD
860/*
861 * It is faster for users of lwkt_getpooltoken() to use the returned
862 * token and just call lwkt_reltoken(), but for convenience we provide
863 * this function which looks the token up based on the ident.
864 */
865void
866lwkt_relpooltoken(void *ptr)
867{
868 lwkt_token_t tok = _lwkt_token_pool_lookup(ptr);
869 lwkt_reltoken(tok);
870}
871
b5d16701
MD
872/*
873 * Return a count of the number of token refs the thread has to the
874 * specified token, whether it currently owns the token or not.
875 */
876int
877lwkt_cnttoken(lwkt_token_t tok, thread_t td)
878{
879 lwkt_tokref_t scan;
880 int count = 0;
881
882 for (scan = &td->td_toks_base; scan < td->td_toks_stop; ++scan) {
883 if (scan->tr_tok == tok)
884 ++count;
885 }
886 return(count);
887}
888
41a01a4d
MD
889/*
890 * Pool tokens are used to provide a type-stable serializing token
891 * pointer that does not race against disappearing data structures.
892 *
893 * This routine is called in early boot just after we setup the BSP's
894 * globaldata structure.
895 */
896void
897lwkt_token_pool_init(void)
898{
c6fbe95a 899 int i;
41a01a4d 900
e79b36c7 901 for (i = 0; i < LWKT_POOL_TOKENS; ++i)
82d9053e 902 lwkt_token_init(&pool_tokens[i].token, "pool");
41a01a4d
MD
903}
904
905lwkt_token_t
c6fbe95a 906lwkt_token_pool_lookup(void *ptr)
41a01a4d 907{
c6fbe95a 908 return (_lwkt_token_pool_lookup(ptr));
41a01a4d
MD
909}
910
41a01a4d 911/*
5f6b9709 912 * Initialize a token.
41a01a4d
MD
913 */
914void
a3c18566 915lwkt_token_init(lwkt_token_t tok, const char *desc)
41a01a4d 916{
54341a3b 917 tok->t_count = 0;
c6fbe95a 918 tok->t_ref = NULL;
73d8e728 919 tok->t_collisions = 0;
c5724852 920 tok->t_desc = desc;
c31b1324
MD
921}
922
41a01a4d
MD
923void
924lwkt_token_uninit(lwkt_token_t tok)
925{
c6fbe95a 926 /* empty */
41a01a4d 927}
7eb611ef 928
d79f0a1a 929/*
31542241
MD
930 * Exchange the two most recent tokens on the tokref stack. This allows
931 * you to release a token out of order.
d79f0a1a 932 *
31542241
MD
933 * We have to be careful about the case where the top two tokens are
934 * the same token. In this case tok->t_ref will point to the deeper
935 * ref and must remain pointing to the deeper ref. If we were to swap
936 * it the first release would clear the token even though a second
937 * ref is still present.
54341a3b
MD
938 *
939 * Only exclusively held tokens contain a reference to the tokref which
940 * has to be flipped along with the swap.
d79f0a1a
VS
941 */
942void
943lwkt_token_swap(void)
944{
945 lwkt_tokref_t ref1, ref2;
946 lwkt_token_t tok1, tok2;
54341a3b 947 long count1, count2;
d79f0a1a
VS
948 thread_t td = curthread;
949
212f39f5
MD
950 crit_enter();
951
d79f0a1a
VS
952 ref1 = td->td_toks_stop - 1;
953 ref2 = td->td_toks_stop - 2;
84ecccb4
MD
954 KKASSERT(ref1 >= &td->td_toks_base);
955 KKASSERT(ref2 >= &td->td_toks_base);
d79f0a1a
VS
956
957 tok1 = ref1->tr_tok;
958 tok2 = ref2->tr_tok;
54341a3b
MD
959 count1 = ref1->tr_count;
960 count2 = ref2->tr_count;
961
31542241
MD
962 if (tok1 != tok2) {
963 ref1->tr_tok = tok2;
54341a3b 964 ref1->tr_count = count2;
31542241 965 ref2->tr_tok = tok1;
54341a3b 966 ref2->tr_count = count1;
31542241
MD
967 if (tok1->t_ref == ref1)
968 tok1->t_ref = ref2;
969 if (tok2->t_ref == ref2)
970 tok2->t_ref = ref1;
971 }
212f39f5
MD
972
973 crit_exit();
d79f0a1a 974}
e66fa178 975
32dafe85 976#ifdef DDB
e66fa178
AHJ
977DB_SHOW_COMMAND(tokens, db_tok_all)
978{
979 struct lwkt_token *tok, **ptr;
980 struct lwkt_token *toklist[16] = {
981 &mp_token,
982 &pmap_token,
983 &dev_token,
984 &vm_token,
985 &vmspace_token,
986 &kvm_token,
a8d3ab53 987 &sigio_token,
e66fa178
AHJ
988 &tty_token,
989 &vnode_token,
e66fa178
AHJ
990 NULL
991 };
992
993 ptr = toklist;
994 for (tok = *ptr; tok; tok = *(++ptr)) {
995 db_printf("tok=%p tr_owner=%p t_colissions=%ld t_desc=%s\n", tok,
996 (tok->t_ref ? tok->t_ref->tr_owner : NULL),
997 tok->t_collisions, tok->t_desc);
998 }
999}
32dafe85 1000#endif /* DDB */