sys/kern/lwkt_token.c

   1 /*
   2  * Copyright (c) 2003,2004,2009 The DragonFly Project.  All rights reserved.
   3  *
   4  * This code is derived from software contributed to The DragonFly Project
   5  * by Matthew Dillon <dillon@backplane.com>
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions
   9  * are met:
  10  *
  11  * 1. Redistributions of source code must retain the above copyright
  12  *    notice, this list of conditions and the following disclaimer.
  13  * 2. Redistributions in binary form must reproduce the above copyright
  14  *    notice, this list of conditions and the following disclaimer in
  15  *    the documentation and/or other materials provided with the
  16  *    distribution.
  17  * 3. Neither the name of The DragonFly Project nor the names of its
  18  *    contributors may be used to endorse or promote products derived
  19  *    from this software without specific, prior written permission.
  20  *
  21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
  25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
  27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
  29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  32  * SUCH DAMAGE.
  33  */
  34
  35 /*
  36  * lwkt_token - Implement soft token locks.
  37  *
  38  * Tokens are locks which serialize a thread only while the thread is
  39  * running.  If the thread blocks all tokens are released, then reacquired
  40  * when the thread resumes.
  41  *
  42  * This implementation requires no critical sections or spin locks, but
  43  * does use atomic_cmpset_ptr().
  44  *
  45  * Tokens may be recursively acquired by the same thread.  However the
  46  * caller must be sure to release such tokens in reverse order.
  47  */
  48 #include <sys/param.h>
  49 #include <sys/systm.h>
  50 #include <sys/kernel.h>
  51 #include <sys/proc.h>
  52 #include <sys/rtprio.h>
  53 #include <sys/queue.h>
  54 #include <sys/sysctl.h>
  55 #include <sys/ktr.h>
  56 #include <sys/kthread.h>
  57 #include <machine/cpu.h>
  58 #include <sys/lock.h>
  59 #include <sys/spinlock.h>
  60
  61 #include <sys/thread2.h>
  62 #include <sys/spinlock2.h>
  63 #include <sys/mplock2.h>
  64
  65 #include <vm/vm.h>
  66 #include <vm/vm_param.h>
  67 #include <vm/vm_kern.h>
  68 #include <vm/vm_object.h>
  69 #include <vm/vm_page.h>
  70 #include <vm/vm_map.h>
  71 #include <vm/vm_pager.h>
  72 #include <vm/vm_extern.h>
  73 #include <vm/vm_zone.h>
  74
  75 #include <machine/stdarg.h>
  76 #include <machine/smp.h>
  77
  78 #include "opt_ddb.h"
  79 #ifdef DDB
  80 #include <ddb/ddb.h>
  81 #endif
  82
  83 extern int lwkt_sched_debug;
  84
  85 #ifndef LWKT_NUM_POOL_TOKENS
  86 #define LWKT_NUM_POOL_TOKENS    4001    /* prime number */
  87 #endif
  88
  89 struct lwkt_pool_token {
  90         struct lwkt_token       token;
  91 } __cachealign;
  92
  93 static struct lwkt_pool_token   pool_tokens[LWKT_NUM_POOL_TOKENS];
  94 struct spinlock         tok_debug_spin = SPINLOCK_INITIALIZER(&tok_debug_spin, "tok_debug_spin");
  95
  96 #define TOKEN_STRING    "REF=%p TOK=%p TD=%p"
  97 #define TOKEN_ARGS      lwkt_tokref_t ref, lwkt_token_t tok, struct thread *td
  98 #define CONTENDED_STRING        TOKEN_STRING " (contention started)"
  99 #define UNCONTENDED_STRING      TOKEN_STRING " (contention stopped)"
 100 #if !defined(KTR_TOKENS)
 101 #define KTR_TOKENS      KTR_ALL
 102 #endif
 103
 104 KTR_INFO_MASTER(tokens);
 105 KTR_INFO(KTR_TOKENS, tokens, fail, 0, TOKEN_STRING, TOKEN_ARGS);
 106 KTR_INFO(KTR_TOKENS, tokens, succ, 1, TOKEN_STRING, TOKEN_ARGS);
 107 #if 0
 108 KTR_INFO(KTR_TOKENS, tokens, release, 2, TOKEN_STRING, TOKEN_ARGS);
 109 KTR_INFO(KTR_TOKENS, tokens, remote, 3, TOKEN_STRING, TOKEN_ARGS);
 110 KTR_INFO(KTR_TOKENS, tokens, reqremote, 4, TOKEN_STRING, TOKEN_ARGS);
 111 KTR_INFO(KTR_TOKENS, tokens, reqfail, 5, TOKEN_STRING, TOKEN_ARGS);
 112 KTR_INFO(KTR_TOKENS, tokens, drain, 6, TOKEN_STRING, TOKEN_ARGS);
 113 KTR_INFO(KTR_TOKENS, tokens, contention_start, 7, CONTENDED_STRING, TOKEN_ARGS);
 114 KTR_INFO(KTR_TOKENS, tokens, contention_stop, 7, UNCONTENDED_STRING, TOKEN_ARGS);
 115 #endif
 116
 117 #define logtoken(name, ref)                                             \
 118         KTR_LOG(tokens_ ## name, ref, ref->tr_tok, curthread)
 119
 120 /*
 121  * Global tokens.  These replace the MP lock for major subsystem locking.
 122  * These tokens are initially used to lockup both global and individual
 123  * operations.
 124  *
 125  * Once individual structures get their own locks these tokens are used
 126  * only to protect global lists & other variables and to interlock
 127  * allocations and teardowns and such.
 128  *
 129  * The UP initializer causes token acquisition to also acquire the MP lock
 130  * for maximum compatibility.  The feature may be enabled and disabled at
 131  * any time, the MP state is copied to the tokref when the token is acquired
 132  * and will not race against sysctl changes.
 133  */
 134 struct lwkt_token mp_token = LWKT_TOKEN_INITIALIZER(mp_token);
 135 struct lwkt_token pmap_token = LWKT_TOKEN_INITIALIZER(pmap_token);
 136 struct lwkt_token dev_token = LWKT_TOKEN_INITIALIZER(dev_token);
 137 struct lwkt_token vm_token = LWKT_TOKEN_INITIALIZER(vm_token);
 138 struct lwkt_token vmspace_token = LWKT_TOKEN_INITIALIZER(vmspace_token);
 139 struct lwkt_token kvm_token = LWKT_TOKEN_INITIALIZER(kvm_token);
 140 struct lwkt_token sigio_token = LWKT_TOKEN_INITIALIZER(sigio_token);
 141 struct lwkt_token tty_token = LWKT_TOKEN_INITIALIZER(tty_token);
 142 struct lwkt_token vnode_token = LWKT_TOKEN_INITIALIZER(vnode_token);
 143
 144 static int lwkt_token_spin = 5;
 145 SYSCTL_INT(_lwkt, OID_AUTO, token_spin, CTLFLAG_RW,
 146     &lwkt_token_spin, 0, "Decontention spin loops");
 147 static int lwkt_token_delay = 0;
 148 SYSCTL_INT(_lwkt, OID_AUTO, token_delay, CTLFLAG_RW,
 149     &lwkt_token_delay, 0, "Decontention spin delay in ns");
 150
 151 /*
 152  * The collision count is bumped every time the LWKT scheduler fails
 153  * to acquire needed tokens in addition to a normal lwkt_gettoken()
 154  * stall.
 155  */
 156 SYSCTL_LONG(_lwkt, OID_AUTO, mp_collisions, CTLFLAG_RW,
 157     &mp_token.t_collisions, 0, "Collision counter of mp_token");
 158 SYSCTL_LONG(_lwkt, OID_AUTO, pmap_collisions, CTLFLAG_RW,
 159     &pmap_token.t_collisions, 0, "Collision counter of pmap_token");
 160 SYSCTL_LONG(_lwkt, OID_AUTO, dev_collisions, CTLFLAG_RW,
 161     &dev_token.t_collisions, 0, "Collision counter of dev_token");
 162 SYSCTL_LONG(_lwkt, OID_AUTO, vm_collisions, CTLFLAG_RW,
 163     &vm_token.t_collisions, 0, "Collision counter of vm_token");
 164 SYSCTL_LONG(_lwkt, OID_AUTO, vmspace_collisions, CTLFLAG_RW,
 165     &vmspace_token.t_collisions, 0, "Collision counter of vmspace_token");
 166 SYSCTL_LONG(_lwkt, OID_AUTO, kvm_collisions, CTLFLAG_RW,
 167     &kvm_token.t_collisions, 0, "Collision counter of kvm_token");
 168 SYSCTL_LONG(_lwkt, OID_AUTO, sigio_collisions, CTLFLAG_RW,
 169     &sigio_token.t_collisions, 0, "Collision counter of sigio_token");
 170 SYSCTL_LONG(_lwkt, OID_AUTO, tty_collisions, CTLFLAG_RW,
 171     &tty_token.t_collisions, 0, "Collision counter of tty_token");
 172 SYSCTL_LONG(_lwkt, OID_AUTO, vnode_collisions, CTLFLAG_RW,
 173     &vnode_token.t_collisions, 0, "Collision counter of vnode_token");
 174
 175 int tokens_debug_output;
 176 SYSCTL_INT(_lwkt, OID_AUTO, tokens_debug_output, CTLFLAG_RW,
 177     &tokens_debug_output, 0, "Generate stack trace N times");
 178
 179
 180 #ifdef DEBUG_LOCKS_LATENCY
 181
 182 static long tokens_add_latency;
 183 SYSCTL_LONG(_debug, OID_AUTO, tokens_add_latency, CTLFLAG_RW,
 184             &tokens_add_latency, 0,
 185             "Add spinlock latency");
 186
 187 #endif
 188
 189
 190 static int _lwkt_getalltokens_sorted(thread_t td);
 191
 192 /*
 193  * Acquire the initial mplock
 194  *
 195  * (low level boot only)
 196  */
 197 void
 198 cpu_get_initial_mplock(void)
 199 {
 200         KKASSERT(mp_token.t_ref == NULL);
 201         if (lwkt_trytoken(&mp_token) == FALSE)
 202                 panic("cpu_get_initial_mplock");
 203 }
 204
 205 /*
 206  * Return a pool token given an address.  Use a prime number to reduce
 207  * overlaps.
 208  */
 209 static __inline
 210 lwkt_token_t
 211 _lwkt_token_pool_lookup(void *ptr)
 212 {
 213         u_int i;
 214
 215         i = (u_int)(uintptr_t)ptr % LWKT_NUM_POOL_TOKENS;
 216         return (&pool_tokens[i].token);
 217 }
 218
 219 /*
 220  * Initialize a tokref_t prior to making it visible in the thread's
 221  * token array.
 222  */
 223 static __inline
 224 void
 225 _lwkt_tokref_init(lwkt_tokref_t ref, lwkt_token_t tok, thread_t td, long excl)
 226 {
 227         ref->tr_tok = tok;
 228         ref->tr_count = excl;
 229         ref->tr_owner = td;
 230 }
 231
 232 /*
 233  * Attempt to acquire a shared or exclusive token.  Returns TRUE on success,
 234  * FALSE on failure.
 235  *
 236  * If TOK_EXCLUSIVE is set in mode we are attempting to get an exclusive
 237  * token, otherwise are attempting to get a shared token.
 238  *
 239  * If TOK_EXCLREQ is set in mode this is a blocking operation, otherwise
 240  * it is a non-blocking operation (for both exclusive or shared acquisions).
 241  */
 242 static __inline
 243 int
 244 _lwkt_trytokref(lwkt_tokref_t ref, thread_t td, long mode)
 245 {
 246         lwkt_token_t tok;
 247         lwkt_tokref_t oref;
 248         long count;
 249
 250         tok = ref->tr_tok;
 251         KASSERT(((mode & TOK_EXCLREQ) == 0 ||   /* non blocking */
 252                 td->td_gd->gd_intr_nesting_level == 0 ||
 253                 panic_cpu_gd == mycpu),
 254                 ("Attempt to acquire token %p not already "
 255                 "held in hard code section", tok));
 256
 257         if (mode & TOK_EXCLUSIVE) {
 258                 /*
 259                  * Attempt to get an exclusive token
 260                  */
 261                 count = tok->t_count;
 262
 263                 for (;;) {
 264                         oref = tok->t_ref;      /* can be NULL */
 265                         cpu_ccfence();
 266                         if ((count & ~TOK_EXCLREQ) == 0) {
 267                                 /*
 268                                  * It is possible to get the exclusive bit.
 269                                  * We must clear TOK_EXCLREQ on successful
 270                                  * acquisition.
 271                                  */
 272                                 if (atomic_fcmpset_long(&tok->t_count, &count,
 273                                                         (count & ~TOK_EXCLREQ) |
 274                                                         TOK_EXCLUSIVE)) {
 275                                         KKASSERT(tok->t_ref == NULL);
 276                                         tok->t_ref = ref;
 277                                         return TRUE;
 278                                 }
 279                                 /* retry */
 280                         } else if ((count & TOK_EXCLUSIVE) &&
 281                                    oref >= &td->td_toks_base &&
 282                                    oref < td->td_toks_stop) {
 283                                 /*
 284                                  * Our thread already holds the exclusive
 285                                  * bit, we treat this tokref as a shared
 286                                  * token (sorta) to make the token release
 287                                  * code easier.
 288                                  *
 289                                  * NOTE: oref cannot race above if it
 290                                  *       happens to be ours, so we're good.
 291                                  *       But we must still have a stable
 292                                  *       variable for both parts of the
 293                                  *       comparison.
 294                                  *
 295                                  * NOTE: Since we already have an exclusive
 296                                  *       lock and don't need to check EXCLREQ
 297                                  *       we can just use an atomic_add here
 298                                  */
 299                                 atomic_add_long(&tok->t_count, TOK_INCR);
 300                                 ref->tr_count &= ~TOK_EXCLUSIVE;
 301                                 return TRUE;
 302                         } else if ((mode & TOK_EXCLREQ) &&
 303                                    (count & TOK_EXCLREQ) == 0) {
 304                                 /*
 305                                  * Unable to get the exclusive bit but being
 306                                  * asked to set the exclusive-request bit.
 307                                  * Since we are going to retry anyway just
 308                                  * set the bit unconditionally.
 309                                  */
 310                                 atomic_set_long(&tok->t_count, TOK_EXCLREQ);
 311                                 return FALSE;
 312                         } else {
 313                                 /*
 314                                  * Unable to get the exclusive bit and not
 315                                  * being asked to set the exclusive-request
 316                                  * (aka lwkt_trytoken()), or EXCLREQ was
 317                                  * already set.
 318                                  */
 319                                 cpu_pause();
 320                                 return FALSE;
 321                         }
 322                         /* retry */
 323                 }
 324         } else {
 325                 /*
 326                  * Attempt to get a shared token.  Note that TOK_EXCLREQ
 327                  * for shared tokens simply means the caller intends to
 328                  * block.  We never actually set the bit in tok->t_count.
 329                  */
 330                 count = tok->t_count;
 331
 332                 for (;;) {
 333                         oref = tok->t_ref;      /* can be NULL */
 334                         cpu_ccfence();
 335                         if ((count & (TOK_EXCLUSIVE/*|TOK_EXCLREQ*/)) == 0) {
 336                                 /*
 337                                  * It may be possible to get the token shared.
 338                                  */
 339                                 if ((atomic_fetchadd_long(&tok->t_count, TOK_INCR) & TOK_EXCLUSIVE) == 0) {
 340                                         return TRUE;
 341                                 }
 342                                 count = atomic_fetchadd_long(&tok->t_count,
 343                                                              -TOK_INCR);
 344                                 count -= TOK_INCR;
 345                                 /* retry */
 346                         } else if ((count & TOK_EXCLUSIVE) &&
 347                                    oref >= &td->td_toks_base &&
 348                                    oref < td->td_toks_stop) {
 349                                 /*
 350                                  * We own the exclusive bit on the token so
 351                                  * we can in fact also get it shared.
 352                                  */
 353                                 atomic_add_long(&tok->t_count, TOK_INCR);
 354                                 return TRUE;
 355                         } else {
 356                                 /*
 357                                  * We failed to get the token shared
 358                                  */
 359                                 return FALSE;
 360                         }
 361                         /* retry */
 362                 }
 363         }
 364 }
 365
 366 static __inline
 367 int
 368 _lwkt_trytokref_spin(lwkt_tokref_t ref, thread_t td, long mode)
 369 {
 370         int spin;
 371
 372         if (_lwkt_trytokref(ref, td, mode)) {
 373 #ifdef DEBUG_LOCKS_LATENCY
 374                 long j;
 375                 for (j = tokens_add_latency; j > 0; --j)
 376                         cpu_ccfence();
 377 #endif
 378                 return TRUE;
 379         }
 380         for (spin = lwkt_token_spin; spin > 0; --spin) {
 381                 if (lwkt_token_delay)
 382                         tsc_delay(lwkt_token_delay);
 383                 else
 384                         cpu_pause();
 385                 if (_lwkt_trytokref(ref, td, mode)) {
 386 #ifdef DEBUG_LOCKS_LATENCY
 387                         long j;
 388                         for (j = tokens_add_latency; j > 0; --j)
 389                                 cpu_ccfence();
 390 #endif
 391                         return TRUE;
 392                 }
 393         }
 394         return FALSE;
 395 }
 396
 397 /*
 398  * Release a token that we hold.
 399  */
 400 static __inline
 401 void
 402 _lwkt_reltokref(lwkt_tokref_t ref, thread_t td)
 403 {
 404         lwkt_token_t tok;
 405         long count;
 406
 407         tok = ref->tr_tok;
 408         count = tok->t_count;
 409
 410         for (;;) {
 411                 cpu_ccfence();
 412                 if (tok->t_ref == ref) {
 413                         /*
 414                          * We are an exclusive holder.  We must clear tr_ref
 415                          * before we clear the TOK_EXCLUSIVE bit.  If we are
 416                          * unable to clear the bit we must restore
 417                          * tok->t_ref.
 418                          */
 419                         KKASSERT(count & TOK_EXCLUSIVE);
 420                         tok->t_ref = NULL;
 421                         if (atomic_fcmpset_long(&tok->t_count, &count,
 422                                                 count & ~TOK_EXCLUSIVE)) {
 423                                 return;
 424                         }
 425                         tok->t_ref = ref;
 426                         /* retry */
 427                 } else {
 428                         /*
 429                          * We are a shared holder
 430                          */
 431                         KKASSERT(count & TOK_COUNTMASK);
 432                         if (atomic_fcmpset_long(&tok->t_count, &count,
 433                                                 count - TOK_INCR)) {
 434                                 return;
 435                         }
 436                         /* retry */
 437                 }
 438                 /* retry */
 439         }
 440 }
 441
 442 /*
 443  * Obtain all the tokens required by the specified thread on the current
 444  * cpu, return 0 on failure and non-zero on success.  If a failure occurs
 445  * any partially acquired tokens will be released prior to return.
 446  *
 447  * lwkt_getalltokens is called by the LWKT scheduler to re-acquire all
 448  * tokens that the thread had to release when it switched away.
 449  *
 450  * If spinning is non-zero this function acquires the tokens in a particular
 451  * order to deal with potential deadlocks.  We simply use address order for
 452  * the case.
 453  *
 454  * Called from a critical section.
 455  */
 456 int
 457 lwkt_getalltokens(thread_t td, int spinning)
 458 {
 459         lwkt_tokref_t scan;
 460         lwkt_token_t tok;
 461
 462         if (spinning)
 463                 return(_lwkt_getalltokens_sorted(td));
 464
 465         /*
 466          * Acquire tokens in forward order, assign or validate tok->t_ref.
 467          */
 468         for (scan = &td->td_toks_base; scan < td->td_toks_stop; ++scan) {
 469                 tok = scan->tr_tok;
 470                 for (;;) {
 471                         /*
 472                          * Only try really hard on the last token
 473                          */
 474                         if (scan == td->td_toks_stop - 1) {
 475                             if (_lwkt_trytokref_spin(scan, td, scan->tr_count))
 476                                     break;
 477                         } else {
 478                             if (_lwkt_trytokref(scan, td, scan->tr_count))
 479                                     break;
 480                         }
 481
 482                         /*
 483                          * Otherwise we failed to acquire all the tokens.
 484                          * Release whatever we did get.
 485                          */
 486                         KASSERT(tok->t_desc,
 487                                 ("token %p is not initialized", tok));
 488                         td->td_gd->gd_cnt.v_lock_name[0] = 't';
 489                         strncpy(td->td_gd->gd_cnt.v_lock_name + 1,
 490                                 tok->t_desc,
 491                                 sizeof(td->td_gd->gd_cnt.v_lock_name) - 2);
 492                         if (lwkt_sched_debug > 0) {
 493                                 --lwkt_sched_debug;
 494                                 kprintf("toka %p %s %s\n",
 495                                         tok, tok->t_desc, td->td_comm);
 496                         }
 497                         td->td_wmesg = tok->t_desc;
 498                         ++tok->t_collisions;
 499                         while (--scan >= &td->td_toks_base)
 500                                 _lwkt_reltokref(scan, td);
 501                         return(FALSE);
 502                 }
 503         }
 504         return (TRUE);
 505 }
 506
 507 /*
 508  * Release all tokens owned by the specified thread on the current cpu.
 509  *
 510  * This code is really simple.  Even in cases where we own all the tokens
 511  * note that t_ref may not match the scan for recursively held tokens which
 512  * are held deeper in the stack, or for the case where a lwkt_getalltokens()
 513  * failed.
 514  *
 515  * Tokens are released in reverse order to reduce chasing race failures.
 516  *
 517  * Called from a critical section.
 518  */
 519 void
 520 lwkt_relalltokens(thread_t td)
 521 {
 522         lwkt_tokref_t scan;
 523
 524         /*
 525          * Weird order is to try to avoid a panic loop
 526          */
 527         if (td->td_toks_have) {
 528                 scan = td->td_toks_have;
 529                 td->td_toks_have = NULL;
 530         } else {
 531                 scan = td->td_toks_stop;
 532         }
 533         while (--scan >= &td->td_toks_base)
 534                 _lwkt_reltokref(scan, td);
 535 }
 536
 537 /*
 538  * This is the decontention version of lwkt_getalltokens().  The tokens are
 539  * acquired in address-sorted order to deal with any deadlocks.  Ultimately
 540  * token failures will spin into the scheduler and get here.
 541  *
 542  * Called from critical section
 543  */
 544 static
 545 int
 546 _lwkt_getalltokens_sorted(thread_t td)
 547 {
 548         lwkt_tokref_t sort_array[LWKT_MAXTOKENS];
 549         lwkt_tokref_t scan;
 550         lwkt_token_t tok;
 551         int i;
 552         int j;
 553         int n;
 554
 555         /*
 556          * Sort the token array.  Yah yah, I know this isn't fun.
 557          *
 558          * NOTE: Recursively acquired tokens are ordered the same as in the
 559          *       td_toks_array so we can always get the earliest one first.
 560          */
 561         i = 0;
 562         scan = &td->td_toks_base;
 563         while (scan < td->td_toks_stop) {
 564                 for (j = 0; j < i; ++j) {
 565                         if (scan->tr_tok < sort_array[j]->tr_tok)
 566                                 break;
 567                 }
 568                 if (j != i) {
 569                         bcopy(sort_array + j, sort_array + j + 1,
 570                               (i - j) * sizeof(lwkt_tokref_t));
 571                 }
 572                 sort_array[j] = scan;
 573                 ++scan;
 574                 ++i;
 575         }
 576         n = i;
 577
 578         /*
 579          * Acquire tokens in forward order, assign or validate tok->t_ref.
 580          */
 581         for (i = 0; i < n; ++i) {
 582                 scan = sort_array[i];
 583                 tok = scan->tr_tok;
 584                 for (;;) {
 585                         /*
 586                          * Only try really hard on the last token
 587                          */
 588                         if (scan == td->td_toks_stop - 1) {
 589                             if (_lwkt_trytokref_spin(scan, td, scan->tr_count))
 590                                     break;
 591                         } else {
 592                             if (_lwkt_trytokref(scan, td, scan->tr_count))
 593                                     break;
 594                         }
 595
 596                         /*
 597                          * Otherwise we failed to acquire all the tokens.
 598                          * Release whatever we did get.
 599                          */
 600                         td->td_gd->gd_cnt.v_lock_name[0] = 't';
 601                         strncpy(td->td_gd->gd_cnt.v_lock_name + 1,
 602                                 tok->t_desc,
 603                                 sizeof(td->td_gd->gd_cnt.v_lock_name) - 2);
 604                         if (lwkt_sched_debug > 0) {
 605                                 --lwkt_sched_debug;
 606                                 kprintf("tokb %p %s %s\n",
 607                                         tok, tok->t_desc, td->td_comm);
 608                         }
 609                         td->td_wmesg = tok->t_desc;
 610                         ++tok->t_collisions;
 611                         while (--i >= 0) {
 612                                 scan = sort_array[i];
 613                                 _lwkt_reltokref(scan, td);
 614                         }
 615                         return(FALSE);
 616                 }
 617         }
 618
 619         /*
 620          * We were successful, there is no need for another core to signal
 621          * us.
 622          */
 623         return (TRUE);
 624 }
 625
 626 /*
 627  * Get a serializing token.  This routine can block.
 628  */
 629 void
 630 lwkt_gettoken(lwkt_token_t tok)
 631 {
 632         thread_t td = curthread;
 633         lwkt_tokref_t ref;
 634
 635         ref = td->td_toks_stop;
 636         KKASSERT(ref < &td->td_toks_end);
 637         ++td->td_toks_stop;
 638         cpu_ccfence();
 639         _lwkt_tokref_init(ref, tok, td, TOK_EXCLUSIVE|TOK_EXCLREQ);
 640
 641 #ifdef DEBUG_LOCKS
 642         /*
 643          * Taking an exclusive token after holding it shared will
 644          * livelock. Scan for that case and assert.
 645          */
 646         lwkt_tokref_t tk;
 647         int found = 0;
 648         for (tk = &td->td_toks_base; tk < ref; tk++) {
 649                 if (tk->tr_tok != tok)
 650                         continue;
 651
 652                 found++;
 653                 if (tk->tr_count & TOK_EXCLUSIVE)
 654                         goto good;
 655         }
 656         /* We found only shared instances of this token if found >0 here */
 657         KASSERT((found == 0), ("Token %p s/x livelock", tok));
 658 good:
 659 #endif
 660
 661         if (_lwkt_trytokref_spin(ref, td, TOK_EXCLUSIVE|TOK_EXCLREQ))
 662                 return;
 663
 664         /*
 665          * Give up running if we can't acquire the token right now.
 666          *
 667          * Since the tokref is already active the scheduler now
 668          * takes care of acquisition, so we need only call
 669          * lwkt_switch().
 670          *
 671          * Since we failed this was not a recursive token so upon
 672          * return tr_tok->t_ref should be assigned to this specific
 673          * ref.
 674          */
 675         td->td_wmesg = tok->t_desc;
 676         ++tok->t_collisions;
 677         logtoken(fail, ref);
 678         td->td_toks_have = td->td_toks_stop - 1;
 679
 680         if (tokens_debug_output > 0) {
 681                 --tokens_debug_output;
 682                 spin_lock(&tok_debug_spin);
 683                 kprintf("Excl Token thread %p %s %s\n",
 684                         td, tok->t_desc, td->td_comm);
 685                 print_backtrace(6);
 686                 kprintf("\n");
 687                 spin_unlock(&tok_debug_spin);
 688         }
 689
 690         lwkt_switch();
 691         logtoken(succ, ref);
 692         KKASSERT(tok->t_ref == ref);
 693 }
 694
 695 /*
 696  * Similar to gettoken but we acquire a shared token instead of an exclusive
 697  * token.
 698  */
 699 void
 700 lwkt_gettoken_shared(lwkt_token_t tok)
 701 {
 702         thread_t td = curthread;
 703         lwkt_tokref_t ref;
 704
 705         ref = td->td_toks_stop;
 706         KKASSERT(ref < &td->td_toks_end);
 707         ++td->td_toks_stop;
 708         cpu_ccfence();
 709         _lwkt_tokref_init(ref, tok, td, TOK_EXCLREQ);
 710
 711 #ifdef DEBUG_LOCKS
 712         /*
 713          * Taking a pool token in shared mode is a bad idea; other
 714          * addresses deeper in the call stack may hash to the same pool
 715          * token and you may end up with an exclusive-shared livelock.
 716          * Warn in this condition.
 717          */
 718         if ((tok >= &pool_tokens[0].token) &&
 719             (tok < &pool_tokens[LWKT_NUM_POOL_TOKENS].token))
 720                 kprintf("Warning! Taking pool token %p in shared mode\n", tok);
 721 #endif
 722
 723
 724         if (_lwkt_trytokref_spin(ref, td, TOK_EXCLREQ))
 725                 return;
 726
 727         /*
 728          * Give up running if we can't acquire the token right now.
 729          *
 730          * Since the tokref is already active the scheduler now
 731          * takes care of acquisition, so we need only call
 732          * lwkt_switch().
 733          *
 734          * Since we failed this was not a recursive token so upon
 735          * return tr_tok->t_ref should be assigned to this specific
 736          * ref.
 737          */
 738         td->td_wmesg = tok->t_desc;
 739         ++tok->t_collisions;
 740         logtoken(fail, ref);
 741         td->td_toks_have = td->td_toks_stop - 1;
 742
 743         if (tokens_debug_output > 0) {
 744                 --tokens_debug_output;
 745                 spin_lock(&tok_debug_spin);
 746                 kprintf("Shar Token thread %p %s %s\n",
 747                         td, tok->t_desc, td->td_comm);
 748                 print_backtrace(6);
 749                 kprintf("\n");
 750                 spin_unlock(&tok_debug_spin);
 751         }
 752
 753         lwkt_switch();
 754         logtoken(succ, ref);
 755 }
 756
 757 /*
 758  * Attempt to acquire a token, return TRUE on success, FALSE on failure.
 759  *
 760  * We setup the tokref in case we actually get the token (if we switch later
 761  * it becomes mandatory so we set TOK_EXCLREQ), but we call trytokref without
 762  * TOK_EXCLREQ in case we fail.
 763  */
 764 int
 765 lwkt_trytoken(lwkt_token_t tok)
 766 {
 767         thread_t td = curthread;
 768         lwkt_tokref_t ref;
 769
 770         ref = td->td_toks_stop;
 771         KKASSERT(ref < &td->td_toks_end);
 772         ++td->td_toks_stop;
 773         cpu_ccfence();
 774         _lwkt_tokref_init(ref, tok, td, TOK_EXCLUSIVE|TOK_EXCLREQ);
 775
 776         if (_lwkt_trytokref(ref, td, TOK_EXCLUSIVE))
 777                 return TRUE;
 778
 779         /*
 780          * Failed, unpend the request
 781          */
 782         cpu_ccfence();
 783         --td->td_toks_stop;
 784         ++tok->t_collisions;
 785         return FALSE;
 786 }
 787
 788 lwkt_token_t
 789 lwkt_getpooltoken(void *ptr)
 790 {
 791         lwkt_token_t tok;
 792
 793         tok = _lwkt_token_pool_lookup(ptr);
 794         lwkt_gettoken(tok);
 795         return (tok);
 796 }
 797
 798 /*
 799  * Release a serializing token.
 800  *
 801  * WARNING!  All tokens must be released in reverse order.  This will be
 802  *           asserted.
 803  */
 804 void
 805 lwkt_reltoken(lwkt_token_t tok)
 806 {
 807         thread_t td = curthread;
 808         lwkt_tokref_t ref;
 809
 810         /*
 811          * Remove ref from thread token list and assert that it matches
 812          * the token passed in.  Tokens must be released in reverse order.
 813          */
 814         ref = td->td_toks_stop - 1;
 815         KKASSERT(ref >= &td->td_toks_base && ref->tr_tok == tok);
 816         _lwkt_reltokref(ref, td);
 817         cpu_sfence();
 818         td->td_toks_stop = ref;
 819 }
 820
 821 /*
 822  * It is faster for users of lwkt_getpooltoken() to use the returned
 823  * token and just call lwkt_reltoken(), but for convenience we provide
 824  * this function which looks the token up based on the ident.
 825  */
 826 void
 827 lwkt_relpooltoken(void *ptr)
 828 {
 829         lwkt_token_t tok = _lwkt_token_pool_lookup(ptr);
 830         lwkt_reltoken(tok);
 831 }
 832
 833 /*
 834  * Return a count of the number of token refs the thread has to the
 835  * specified token, whether it currently owns the token or not.
 836  */
 837 int
 838 lwkt_cnttoken(lwkt_token_t tok, thread_t td)
 839 {
 840         lwkt_tokref_t scan;
 841         int count = 0;
 842
 843         for (scan = &td->td_toks_base; scan < td->td_toks_stop; ++scan) {
 844                 if (scan->tr_tok == tok)
 845                         ++count;
 846         }
 847         return(count);
 848 }
 849
 850 /*
 851  * Pool tokens are used to provide a type-stable serializing token
 852  * pointer that does not race against disappearing data structures.
 853  *
 854  * This routine is called in early boot just after we setup the BSP's
 855  * globaldata structure.
 856  */
 857 void
 858 lwkt_token_pool_init(void)
 859 {
 860         int i;
 861
 862         for (i = 0; i < LWKT_NUM_POOL_TOKENS; ++i)
 863                 lwkt_token_init(&pool_tokens[i].token, "pool");
 864 }
 865
 866 lwkt_token_t
 867 lwkt_token_pool_lookup(void *ptr)
 868 {
 869         return (_lwkt_token_pool_lookup(ptr));
 870 }
 871
 872 /*
 873  * Initialize a token.
 874  */
 875 void
 876 lwkt_token_init(lwkt_token_t tok, const char *desc)
 877 {
 878         tok->t_count = 0;
 879         tok->t_ref = NULL;
 880         tok->t_collisions = 0;
 881         tok->t_desc = desc;
 882 }
 883
 884 void
 885 lwkt_token_uninit(lwkt_token_t tok)
 886 {
 887         /* empty */
 888 }
 889
 890 /*
 891  * Exchange the two most recent tokens on the tokref stack.  This allows
 892  * you to release a token out of order.
 893  *
 894  * We have to be careful about the case where the top two tokens are
 895  * the same token.  In this case tok->t_ref will point to the deeper
 896  * ref and must remain pointing to the deeper ref.  If we were to swap
 897  * it the first release would clear the token even though a second
 898  * ref is still present.
 899  *
 900  * Only exclusively held tokens contain a reference to the tokref which
 901  * has to be flipped along with the swap.
 902  */
 903 void
 904 lwkt_token_swap(void)
 905 {
 906         lwkt_tokref_t ref1, ref2;
 907         lwkt_token_t tok1, tok2;
 908         long count1, count2;
 909         thread_t td = curthread;
 910
 911         crit_enter();
 912
 913         ref1 = td->td_toks_stop - 1;
 914         ref2 = td->td_toks_stop - 2;
 915         KKASSERT(ref1 >= &td->td_toks_base);
 916         KKASSERT(ref2 >= &td->td_toks_base);
 917
 918         tok1 = ref1->tr_tok;
 919         tok2 = ref2->tr_tok;
 920         count1 = ref1->tr_count;
 921         count2 = ref2->tr_count;
 922
 923         if (tok1 != tok2) {
 924                 ref1->tr_tok = tok2;
 925                 ref1->tr_count = count2;
 926                 ref2->tr_tok = tok1;
 927                 ref2->tr_count = count1;
 928                 if (tok1->t_ref == ref1)
 929                         tok1->t_ref = ref2;
 930                 if (tok2->t_ref == ref2)
 931                         tok2->t_ref = ref1;
 932         }
 933
 934         crit_exit();
 935 }
 936
 937 #ifdef DDB
 938 DB_SHOW_COMMAND(tokens, db_tok_all)
 939 {
 940         struct lwkt_token *tok, **ptr;
 941         struct lwkt_token *toklist[16] = {
 942                 &mp_token,
 943                 &pmap_token,
 944                 &dev_token,
 945                 &vm_token,
 946                 &vmspace_token,
 947                 &kvm_token,
 948                 &sigio_token,
 949                 &tty_token,
 950                 &vnode_token,
 951                 NULL
 952         };
 953
 954         ptr = toklist;
 955         for (tok = *ptr; tok; tok = *(++ptr)) {
 956                 db_printf("tok=%p tr_owner=%p t_colissions=%ld t_desc=%s\n", tok,
 957                     (tok->t_ref ? tok->t_ref->tr_owner : NULL),
 958                     tok->t_collisions, tok->t_desc);
 959         }
 960 }
 961 #endif /* DDB */