sys/kern/lwkt_token.c

   1 /*
   2  * Copyright (c) 2003,2004,2009 The DragonFly Project.  All rights reserved.
   3  *
   4  * This code is derived from software contributed to The DragonFly Project
   5  * by Matthew Dillon <dillon@backplane.com>
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions
   9  * are met:
  10  *
  11  * 1. Redistributions of source code must retain the above copyright
  12  *    notice, this list of conditions and the following disclaimer.
  13  * 2. Redistributions in binary form must reproduce the above copyright
  14  *    notice, this list of conditions and the following disclaimer in
  15  *    the documentation and/or other materials provided with the
  16  *    distribution.
  17  * 3. Neither the name of The DragonFly Project nor the names of its
  18  *    contributors may be used to endorse or promote products derived
  19  *    from this software without specific, prior written permission.
  20  *
  21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
  25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
  27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
  29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  32  * SUCH DAMAGE.
  33  */
  34
  35 /*
  36  * lwkt_token - Implement soft token locks.
  37  *
  38  * Tokens are locks which serialize a thread only while the thread is
  39  * running.  If the thread blocks all tokens are released, then reacquired
  40  * when the thread resumes.
  41  *
  42  * This implementation requires no critical sections or spin locks, but
  43  * does use atomic_cmpset_ptr().
  44  *
  45  * Tokens may be recursively acquired by the same thread.  However the
  46  * caller must be sure to release such tokens in reverse order.
  47  */
  48 #include <sys/param.h>
  49 #include <sys/systm.h>
  50 #include <sys/kernel.h>
  51 #include <sys/proc.h>
  52 #include <sys/rtprio.h>
  53 #include <sys/queue.h>
  54 #include <sys/sysctl.h>
  55 #include <sys/ktr.h>
  56 #include <sys/kthread.h>
  57 #include <machine/cpu.h>
  58 #include <sys/lock.h>
  59 #include <sys/spinlock.h>
  60
  61 #include <sys/thread2.h>
  62 #include <sys/spinlock2.h>
  63 #include <sys/mplock2.h>
  64
  65 #include <vm/vm.h>
  66 #include <vm/vm_param.h>
  67 #include <vm/vm_kern.h>
  68 #include <vm/vm_object.h>
  69 #include <vm/vm_page.h>
  70 #include <vm/vm_map.h>
  71 #include <vm/vm_pager.h>
  72 #include <vm/vm_extern.h>
  73 #include <vm/vm_zone.h>
  74
  75 #include <machine/stdarg.h>
  76 #include <machine/smp.h>
  77
  78 #include "opt_ddb.h"
  79 #ifdef DDB
  80 #include <ddb/ddb.h>
  81 #endif
  82
  83 extern int lwkt_sched_debug;
  84
  85 #ifndef LWKT_NUM_POOL_TOKENS
  86 #define LWKT_NUM_POOL_TOKENS    16661
  87 #endif
  88
  89 struct lwkt_pool_token {
  90         struct lwkt_token       token;
  91 } __cachealign;
  92
  93 static struct lwkt_pool_token   pool_tokens[LWKT_NUM_POOL_TOKENS];
  94 struct spinlock tok_debug_spin = SPINLOCK_INITIALIZER(&tok_debug_spin,
  95                                                       "tok_debug_spin");
  96
  97 #define TOKEN_STRING    "REF=%p TOK=%p TD=%p"
  98 #define TOKEN_ARGS      lwkt_tokref_t ref, lwkt_token_t tok, struct thread *td
  99 #define CONTENDED_STRING        TOKEN_STRING " (contention started)"
 100 #define UNCONTENDED_STRING      TOKEN_STRING " (contention stopped)"
 101 #if !defined(KTR_TOKENS)
 102 #define KTR_TOKENS      KTR_ALL
 103 #endif
 104
 105 KTR_INFO_MASTER(tokens);
 106 KTR_INFO(KTR_TOKENS, tokens, fail, 0, TOKEN_STRING, TOKEN_ARGS);
 107 KTR_INFO(KTR_TOKENS, tokens, succ, 1, TOKEN_STRING, TOKEN_ARGS);
 108 #if 0
 109 KTR_INFO(KTR_TOKENS, tokens, release, 2, TOKEN_STRING, TOKEN_ARGS);
 110 KTR_INFO(KTR_TOKENS, tokens, remote, 3, TOKEN_STRING, TOKEN_ARGS);
 111 KTR_INFO(KTR_TOKENS, tokens, reqremote, 4, TOKEN_STRING, TOKEN_ARGS);
 112 KTR_INFO(KTR_TOKENS, tokens, reqfail, 5, TOKEN_STRING, TOKEN_ARGS);
 113 KTR_INFO(KTR_TOKENS, tokens, drain, 6, TOKEN_STRING, TOKEN_ARGS);
 114 KTR_INFO(KTR_TOKENS, tokens, contention_start, 7, CONTENDED_STRING, TOKEN_ARGS);
 115 KTR_INFO(KTR_TOKENS, tokens, contention_stop, 7, UNCONTENDED_STRING, TOKEN_ARGS);
 116 #endif
 117
 118 #define logtoken(name, ref)                                             \
 119         KTR_LOG(tokens_ ## name, ref, ref->tr_tok, curthread)
 120
 121 /*
 122  * Global tokens.  These replace the MP lock for major subsystem locking.
 123  * These tokens are initially used to lockup both global and individual
 124  * operations.
 125  *
 126  * Once individual structures get their own locks these tokens are used
 127  * only to protect global lists & other variables and to interlock
 128  * allocations and teardowns and such.
 129  *
 130  * The UP initializer causes token acquisition to also acquire the MP lock
 131  * for maximum compatibility.  The feature may be enabled and disabled at
 132  * any time, the MP state is copied to the tokref when the token is acquired
 133  * and will not race against sysctl changes.
 134  */
 135 struct lwkt_token mp_token = LWKT_TOKEN_INITIALIZER(mp_token);
 136 struct lwkt_token pmap_token = LWKT_TOKEN_INITIALIZER(pmap_token);
 137 struct lwkt_token dev_token = LWKT_TOKEN_INITIALIZER(dev_token);
 138 struct lwkt_token vm_token = LWKT_TOKEN_INITIALIZER(vm_token);
 139 struct lwkt_token vmspace_token = LWKT_TOKEN_INITIALIZER(vmspace_token);
 140 struct lwkt_token kvm_token = LWKT_TOKEN_INITIALIZER(kvm_token);
 141 struct lwkt_token sigio_token = LWKT_TOKEN_INITIALIZER(sigio_token);
 142 struct lwkt_token tty_token = LWKT_TOKEN_INITIALIZER(tty_token);
 143 struct lwkt_token vnode_token = LWKT_TOKEN_INITIALIZER(vnode_token);
 144
 145 /*
 146  * Exponential backoff (exclusive tokens) and TSC windowing (shared tokens)
 147  * parameters.  Remember that tokens backoff to the scheduler, large values
 148  * not recommended.
 149  */
 150 static int token_backoff_max __cachealign = 4096;
 151 SYSCTL_INT(_lwkt, OID_AUTO, token_backoff_max, CTLFLAG_RW,
 152     &token_backoff_max, 0, "Tokens exponential backoff");
 153 static int token_window_shift __cachealign = 8;
 154 SYSCTL_INT(_lwkt, OID_AUTO, token_window_shift, CTLFLAG_RW,
 155     &token_window_shift, 0, "Tokens TSC windowing shift");
 156
 157 /*
 158  * The collision count is bumped every time the LWKT scheduler fails
 159  * to acquire needed tokens in addition to a normal lwkt_gettoken()
 160  * stall.
 161  */
 162 SYSCTL_LONG(_lwkt, OID_AUTO, mp_collisions, CTLFLAG_RW,
 163     &mp_token.t_collisions, 0, "Collision counter of mp_token");
 164 SYSCTL_LONG(_lwkt, OID_AUTO, pmap_collisions, CTLFLAG_RW,
 165     &pmap_token.t_collisions, 0, "Collision counter of pmap_token");
 166 SYSCTL_LONG(_lwkt, OID_AUTO, dev_collisions, CTLFLAG_RW,
 167     &dev_token.t_collisions, 0, "Collision counter of dev_token");
 168 SYSCTL_LONG(_lwkt, OID_AUTO, vm_collisions, CTLFLAG_RW,
 169     &vm_token.t_collisions, 0, "Collision counter of vm_token");
 170 SYSCTL_LONG(_lwkt, OID_AUTO, vmspace_collisions, CTLFLAG_RW,
 171     &vmspace_token.t_collisions, 0, "Collision counter of vmspace_token");
 172 SYSCTL_LONG(_lwkt, OID_AUTO, kvm_collisions, CTLFLAG_RW,
 173     &kvm_token.t_collisions, 0, "Collision counter of kvm_token");
 174 SYSCTL_LONG(_lwkt, OID_AUTO, sigio_collisions, CTLFLAG_RW,
 175     &sigio_token.t_collisions, 0, "Collision counter of sigio_token");
 176 SYSCTL_LONG(_lwkt, OID_AUTO, tty_collisions, CTLFLAG_RW,
 177     &tty_token.t_collisions, 0, "Collision counter of tty_token");
 178 SYSCTL_LONG(_lwkt, OID_AUTO, vnode_collisions, CTLFLAG_RW,
 179     &vnode_token.t_collisions, 0, "Collision counter of vnode_token");
 180
 181 int tokens_debug_output;
 182 SYSCTL_INT(_lwkt, OID_AUTO, tokens_debug_output, CTLFLAG_RW,
 183     &tokens_debug_output, 0, "Generate stack trace N times");
 184
 185 static int _lwkt_getalltokens_sorted(thread_t td);
 186
 187 /*
 188  * Acquire the initial mplock
 189  *
 190  * (low level boot only)
 191  */
 192 void
 193 cpu_get_initial_mplock(void)
 194 {
 195         KKASSERT(mp_token.t_ref == NULL);
 196         if (lwkt_trytoken(&mp_token) == FALSE)
 197                 panic("cpu_get_initial_mplock");
 198 }
 199
 200 /*
 201  * Return a pool token given an address.  Use a prime number to reduce
 202  * overlaps.
 203  */
 204 static __inline
 205 lwkt_token_t
 206 _lwkt_token_pool_lookup(void *ptr)
 207 {
 208         uint32_t i;
 209
 210         i = (uint32_t)(uintptr_t)ptr % LWKT_NUM_POOL_TOKENS;
 211         return (&pool_tokens[i].token);
 212 }
 213
 214 /*
 215  * Initialize a tokref_t prior to making it visible in the thread's
 216  * token array.
 217  */
 218 static __inline
 219 void
 220 _lwkt_tokref_init(lwkt_tokref_t ref, lwkt_token_t tok, thread_t td, long excl)
 221 {
 222         ref->tr_tok = tok;
 223         ref->tr_count = excl;
 224         ref->tr_owner = td;
 225 }
 226
 227 /*
 228  * Attempt to acquire a shared or exclusive token.  Returns TRUE on success,
 229  * FALSE on failure.
 230  *
 231  * If TOK_EXCLUSIVE is set in mode we are attempting to get an exclusive
 232  * token, otherwise are attempting to get a shared token.
 233  *
 234  * If TOK_EXCLREQ is set in mode this is a blocking operation, otherwise
 235  * it is a non-blocking operation (for both exclusive or shared acquisions).
 236  */
 237 static __inline
 238 int
 239 _lwkt_trytokref(lwkt_tokref_t ref, thread_t td, long mode)
 240 {
 241         lwkt_token_t tok;
 242         lwkt_tokref_t oref;
 243         long count;
 244
 245         tok = ref->tr_tok;
 246         KASSERT(((mode & TOK_EXCLREQ) == 0 ||   /* non blocking */
 247                 td->td_gd->gd_intr_nesting_level == 0 ||
 248                 panic_cpu_gd == mycpu),
 249                 ("Attempt to acquire token %p not already "
 250                 "held in hard code section", tok));
 251
 252         if (mode & TOK_EXCLUSIVE) {
 253                 /*
 254                  * Attempt to get an exclusive token
 255                  */
 256                 count = tok->t_count;
 257
 258                 for (;;) {
 259                         oref = tok->t_ref;      /* can be NULL */
 260                         cpu_ccfence();
 261                         if ((count & ~TOK_EXCLREQ) == 0) {
 262                                 /*
 263                                  * It is possible to get the exclusive bit.
 264                                  * We must clear TOK_EXCLREQ on successful
 265                                  * acquisition.
 266                                  */
 267                                 if (atomic_fcmpset_long(&tok->t_count, &count,
 268                                                         (count & ~TOK_EXCLREQ) |
 269                                                         TOK_EXCLUSIVE)) {
 270                                         KKASSERT(tok->t_ref == NULL);
 271                                         tok->t_ref = ref;
 272                                         return TRUE;
 273                                 }
 274                                 /* retry */
 275                         } else if ((count & TOK_EXCLUSIVE) &&
 276                                    oref >= &td->td_toks_base &&
 277                                    oref < td->td_toks_stop) {
 278                                 /*
 279                                  * Our thread already holds the exclusive
 280                                  * bit, we treat this tokref as a shared
 281                                  * token (sorta) to make the token release
 282                                  * code easier.  Treating this as a shared
 283                                  * token allows us to simply increment the
 284                                  * count field.
 285                                  *
 286                                  * NOTE: oref cannot race above if it
 287                                  *       happens to be ours, so we're good.
 288                                  *       But we must still have a stable
 289                                  *       variable for both parts of the
 290                                  *       comparison.
 291                                  *
 292                                  * NOTE: Since we already have an exclusive
 293                                  *       lock and don't need to check EXCLREQ
 294                                  *       we can just use an atomic_add here
 295                                  */
 296                                 atomic_add_long(&tok->t_count, TOK_INCR);
 297                                 ref->tr_count &= ~TOK_EXCLUSIVE;
 298                                 return TRUE;
 299                         } else if ((mode & TOK_EXCLREQ) &&
 300                                    (count & TOK_EXCLREQ) == 0) {
 301                                 /*
 302                                  * Unable to get the exclusive bit but being
 303                                  * asked to set the exclusive-request bit.
 304                                  * Since we are going to retry anyway just
 305                                  * set the bit unconditionally.
 306                                  */
 307                                 atomic_set_long(&tok->t_count, TOK_EXCLREQ);
 308                                 return FALSE;
 309                         } else {
 310                                 /*
 311                                  * Unable to get the exclusive bit and not
 312                                  * being asked to set the exclusive-request
 313                                  * (aka lwkt_trytoken()), or EXCLREQ was
 314                                  * already set.
 315                                  */
 316                                 cpu_pause();
 317                                 return FALSE;
 318                         }
 319                         /* retry */
 320                 }
 321         } else {
 322                 /*
 323                  * Attempt to get a shared token.  Note that TOK_EXCLREQ
 324                  * for shared tokens simply means the caller intends to
 325                  * block.  We never actually set the bit in tok->t_count.
 326                  *
 327                  * Due to the token's no-deadlock guarantee, and complications
 328                  * created by the sorted reacquisition code, we can only
 329                  * give exclusive requests priority over shared requests
 330                  * in situations where the thread holds only one token.
 331                  */
 332                 count = tok->t_count;
 333
 334                 for (;;) {
 335                         oref = tok->t_ref;      /* can be NULL */
 336                         cpu_ccfence();
 337                         if ((count & (TOK_EXCLUSIVE|mode)) == 0 ||
 338                             ((count & TOK_EXCLUSIVE) == 0 &&
 339                             td->td_toks_stop != &td->td_toks_base + 1)
 340                         ) {
 341                                 /*
 342                                  * It may be possible to get the token shared.
 343                                  */
 344                                 if ((atomic_fetchadd_long(&tok->t_count, TOK_INCR) & TOK_EXCLUSIVE) == 0) {
 345                                         return TRUE;
 346                                 }
 347                                 count = atomic_fetchadd_long(&tok->t_count,
 348                                                              -TOK_INCR);
 349                                 count -= TOK_INCR;
 350                                 /* retry */
 351                         } else if ((count & TOK_EXCLUSIVE) &&
 352                                    oref >= &td->td_toks_base &&
 353                                    oref < td->td_toks_stop) {
 354                                 /*
 355                                  * We own the exclusive bit on the token so
 356                                  * we can in fact also get it shared.
 357                                  */
 358                                 atomic_add_long(&tok->t_count, TOK_INCR);
 359                                 return TRUE;
 360                         } else {
 361                                 /*
 362                                  * We failed to get the token shared
 363                                  */
 364                                 return FALSE;
 365                         }
 366                         /* retry */
 367                 }
 368         }
 369 }
 370
 371 static __inline
 372 int
 373 _lwkt_trytokref_spin(lwkt_tokref_t ref, thread_t td, long mode)
 374 {
 375         if (_lwkt_trytokref(ref, td, mode))
 376                 return TRUE;
 377
 378         if (mode & TOK_EXCLUSIVE) {
 379                 /*
 380                  * Contested exclusive token, use exponential backoff
 381                  * algorithm.
 382                  */
 383                 long expbackoff;
 384                 long loop;
 385
 386                 expbackoff = 0;
 387                 while (expbackoff < 6 + token_backoff_max) {
 388                         expbackoff = (expbackoff + 1) * 3 / 2;
 389                         if ((rdtsc() >> token_window_shift) % ncpus != mycpuid)  {
 390                                 for (loop = expbackoff; loop; --loop)
 391                                         cpu_pause();
 392                         }
 393                         if (_lwkt_trytokref(ref, td, mode))
 394                                 return TRUE;
 395                 }
 396         } else {
 397                 /*
 398                  * Contested shared token, use TSC windowing.  Note that
 399                  * exclusive tokens have priority over shared tokens only
 400                  * for the first token.
 401                  */
 402                 if ((rdtsc() >> token_window_shift) % ncpus == mycpuid) {
 403                         if (_lwkt_trytokref(ref, td, mode & ~TOK_EXCLREQ))
 404                                 return TRUE;
 405                 } else {
 406                         if (_lwkt_trytokref(ref, td, mode))
 407                                 return TRUE;
 408                 }
 409
 410         }
 411         ++mycpu->gd_cnt.v_lock_colls;
 412
 413         return FALSE;
 414 }
 415
 416 /*
 417  * Release a token that we hold.
 418  *
 419  * Since tokens are polled, we don't have to deal with wakeups and releasing
 420  * is really easy.
 421  */
 422 static __inline
 423 void
 424 _lwkt_reltokref(lwkt_tokref_t ref, thread_t td)
 425 {
 426         lwkt_token_t tok;
 427         long count;
 428
 429         tok = ref->tr_tok;
 430         if (tok->t_ref == ref) {
 431                 /*
 432                  * We are an exclusive holder.  We must clear tr_ref
 433                  * before we clear the TOK_EXCLUSIVE bit.  If we are
 434                  * unable to clear the bit we must restore
 435                  * tok->t_ref.
 436                  */
 437 #if 0
 438                 KKASSERT(count & TOK_EXCLUSIVE);
 439 #endif
 440                 tok->t_ref = NULL;
 441                 atomic_clear_long(&tok->t_count, TOK_EXCLUSIVE);
 442         } else {
 443                 /*
 444                  * We are a shared holder
 445                  */
 446                 count = atomic_fetchadd_long(&tok->t_count, -TOK_INCR);
 447                 KKASSERT(count & TOK_COUNTMASK);        /* count prior */
 448         }
 449 }
 450
 451 /*
 452  * Obtain all the tokens required by the specified thread on the current
 453  * cpu, return 0 on failure and non-zero on success.  If a failure occurs
 454  * any partially acquired tokens will be released prior to return.
 455  *
 456  * lwkt_getalltokens is called by the LWKT scheduler to re-acquire all
 457  * tokens that the thread had to release when it switched away.
 458  *
 459  * If spinning is non-zero this function acquires the tokens in a particular
 460  * order to deal with potential deadlocks.  We simply use address order for
 461  * the case.
 462  *
 463  * Called from a critical section.
 464  */
 465 int
 466 lwkt_getalltokens(thread_t td, int spinning)
 467 {
 468         lwkt_tokref_t scan;
 469         lwkt_token_t tok;
 470
 471         if (spinning)
 472                 return(_lwkt_getalltokens_sorted(td));
 473
 474         /*
 475          * Acquire tokens in forward order, assign or validate tok->t_ref.
 476          */
 477         for (scan = &td->td_toks_base; scan < td->td_toks_stop; ++scan) {
 478                 tok = scan->tr_tok;
 479                 for (;;) {
 480                         /*
 481                          * Only try really hard on the last token
 482                          */
 483                         if (scan == td->td_toks_stop - 1) {
 484                             if (_lwkt_trytokref_spin(scan, td, scan->tr_count))
 485                                     break;
 486                         } else {
 487                             if (_lwkt_trytokref(scan, td, scan->tr_count))
 488                                     break;
 489                         }
 490
 491                         /*
 492                          * Otherwise we failed to acquire all the tokens.
 493                          * Release whatever we did get.
 494                          */
 495                         KASSERT(tok->t_desc,
 496                                 ("token %p is not initialized", tok));
 497                         td->td_gd->gd_cnt.v_lock_name[0] = 't';
 498                         strncpy(td->td_gd->gd_cnt.v_lock_name + 1,
 499                                 tok->t_desc,
 500                                 sizeof(td->td_gd->gd_cnt.v_lock_name) - 2);
 501                         if (lwkt_sched_debug > 0) {
 502                                 --lwkt_sched_debug;
 503                                 kprintf("toka %p %s %s\n",
 504                                         tok, tok->t_desc, td->td_comm);
 505                         }
 506                         td->td_wmesg = tok->t_desc;
 507                         ++tok->t_collisions;
 508                         while (--scan >= &td->td_toks_base)
 509                                 _lwkt_reltokref(scan, td);
 510                         return(FALSE);
 511                 }
 512         }
 513         return (TRUE);
 514 }
 515
 516 /*
 517  * Release all tokens owned by the specified thread on the current cpu.
 518  *
 519  * This code is really simple.  Even in cases where we own all the tokens
 520  * note that t_ref may not match the scan for recursively held tokens which
 521  * are held deeper in the stack, or for the case where a lwkt_getalltokens()
 522  * failed.
 523  *
 524  * Tokens are released in reverse order to reduce chasing race failures.
 525  *
 526  * Called from a critical section.
 527  */
 528 void
 529 lwkt_relalltokens(thread_t td)
 530 {
 531         lwkt_tokref_t scan;
 532
 533         /*
 534          * Weird order is to try to avoid a panic loop
 535          */
 536         if (td->td_toks_have) {
 537                 scan = td->td_toks_have;
 538                 td->td_toks_have = NULL;
 539         } else {
 540                 scan = td->td_toks_stop;
 541         }
 542         while (--scan >= &td->td_toks_base)
 543                 _lwkt_reltokref(scan, td);
 544 }
 545
 546 /*
 547  * This is the decontention version of lwkt_getalltokens().  The tokens are
 548  * acquired in address-sorted order to deal with any deadlocks.  Ultimately
 549  * token failures will spin into the scheduler and get here.
 550  *
 551  * Called from critical section
 552  */
 553 static
 554 int
 555 _lwkt_getalltokens_sorted(thread_t td)
 556 {
 557         lwkt_tokref_t sort_array[LWKT_MAXTOKENS];
 558         lwkt_tokref_t scan;
 559         lwkt_token_t tok;
 560         int i;
 561         int j;
 562         int n;
 563
 564         /*
 565          * Sort the token array.  Yah yah, I know this isn't fun.
 566          *
 567          * NOTE: Recursively acquired tokens are ordered the same as in the
 568          *       td_toks_array so we can always get the earliest one first.
 569          *       This is particularly important when a token is acquired
 570          *       exclusively multiple times, as only the first acquisition
 571          *       is treated as an exclusive token.
 572          */
 573         i = 0;
 574         scan = &td->td_toks_base;
 575         while (scan < td->td_toks_stop) {
 576                 for (j = 0; j < i; ++j) {
 577                         if (scan->tr_tok < sort_array[j]->tr_tok)
 578                                 break;
 579                 }
 580                 if (j != i) {
 581                         bcopy(sort_array + j, sort_array + j + 1,
 582                               (i - j) * sizeof(lwkt_tokref_t));
 583                 }
 584                 sort_array[j] = scan;
 585                 ++scan;
 586                 ++i;
 587         }
 588         n = i;
 589
 590         /*
 591          * Acquire tokens in forward order, assign or validate tok->t_ref.
 592          */
 593         for (i = 0; i < n; ++i) {
 594                 scan = sort_array[i];
 595                 tok = scan->tr_tok;
 596                 for (;;) {
 597                         /*
 598                          * Only try really hard on the last token
 599                          */
 600                         if (scan == td->td_toks_stop - 1) {
 601                             if (_lwkt_trytokref_spin(scan, td, scan->tr_count))
 602                                     break;
 603                         } else {
 604                             if (_lwkt_trytokref(scan, td, scan->tr_count))
 605                                     break;
 606                         }
 607
 608                         /*
 609                          * Otherwise we failed to acquire all the tokens.
 610                          * Release whatever we did get.
 611                          */
 612                         td->td_gd->gd_cnt.v_lock_name[0] = 't';
 613                         strncpy(td->td_gd->gd_cnt.v_lock_name + 1,
 614                                 tok->t_desc,
 615                                 sizeof(td->td_gd->gd_cnt.v_lock_name) - 2);
 616                         if (lwkt_sched_debug > 0) {
 617                                 --lwkt_sched_debug;
 618                                 kprintf("tokb %p %s %s\n",
 619                                         tok, tok->t_desc, td->td_comm);
 620                         }
 621                         td->td_wmesg = tok->t_desc;
 622                         ++tok->t_collisions;
 623                         while (--i >= 0) {
 624                                 scan = sort_array[i];
 625                                 _lwkt_reltokref(scan, td);
 626                         }
 627                         return(FALSE);
 628                 }
 629         }
 630
 631         /*
 632          * We were successful, there is no need for another core to signal
 633          * us.
 634          */
 635         return (TRUE);
 636 }
 637
 638 /*
 639  * Get a serializing token.  This routine can block.
 640  */
 641 void
 642 lwkt_gettoken(lwkt_token_t tok)
 643 {
 644         thread_t td = curthread;
 645         lwkt_tokref_t ref;
 646
 647         ref = td->td_toks_stop;
 648         KKASSERT(ref < &td->td_toks_end);
 649         ++td->td_toks_stop;
 650         cpu_ccfence();
 651         _lwkt_tokref_init(ref, tok, td, TOK_EXCLUSIVE|TOK_EXCLREQ);
 652
 653 #ifdef DEBUG_LOCKS
 654         /*
 655          * Taking an exclusive token after holding it shared will
 656          * livelock. Scan for that case and assert.
 657          */
 658         lwkt_tokref_t tk;
 659         int found = 0;
 660         for (tk = &td->td_toks_base; tk < ref; tk++) {
 661                 if (tk->tr_tok != tok)
 662                         continue;
 663
 664                 found++;
 665                 if (tk->tr_count & TOK_EXCLUSIVE)
 666                         goto good;
 667         }
 668         /* We found only shared instances of this token if found >0 here */
 669         KASSERT((found == 0), ("Token %p s/x livelock", tok));
 670 good:
 671 #endif
 672
 673         if (_lwkt_trytokref_spin(ref, td, TOK_EXCLUSIVE|TOK_EXCLREQ))
 674                 return;
 675
 676         /*
 677          * Give up running if we can't acquire the token right now.
 678          *
 679          * Since the tokref is already active the scheduler now
 680          * takes care of acquisition, so we need only call
 681          * lwkt_switch().
 682          *
 683          * Since we failed this was not a recursive token so upon
 684          * return tr_tok->t_ref should be assigned to this specific
 685          * ref.
 686          */
 687         td->td_wmesg = tok->t_desc;
 688         ++tok->t_collisions;
 689         logtoken(fail, ref);
 690         td->td_toks_have = td->td_toks_stop - 1;
 691
 692         if (tokens_debug_output > 0) {
 693                 --tokens_debug_output;
 694                 spin_lock(&tok_debug_spin);
 695                 kprintf("Excl Token thread %p %s %s\n",
 696                         td, tok->t_desc, td->td_comm);
 697                 print_backtrace(6);
 698                 kprintf("\n");
 699                 spin_unlock(&tok_debug_spin);
 700         }
 701
 702         atomic_set_int(&td->td_mpflags, TDF_MP_DIDYIELD);
 703         lwkt_switch();
 704         logtoken(succ, ref);
 705         KKASSERT(tok->t_ref == ref);
 706 }
 707
 708 /*
 709  * Similar to gettoken but we acquire a shared token instead of an exclusive
 710  * token.
 711  */
 712 void
 713 lwkt_gettoken_shared(lwkt_token_t tok)
 714 {
 715         thread_t td = curthread;
 716         lwkt_tokref_t ref;
 717
 718         ref = td->td_toks_stop;
 719         KKASSERT(ref < &td->td_toks_end);
 720         ++td->td_toks_stop;
 721         cpu_ccfence();
 722         _lwkt_tokref_init(ref, tok, td, TOK_EXCLREQ);
 723
 724 #ifdef DEBUG_LOCKS
 725         /*
 726          * Taking a pool token in shared mode is a bad idea; other
 727          * addresses deeper in the call stack may hash to the same pool
 728          * token and you may end up with an exclusive-shared livelock.
 729          * Warn in this condition.
 730          */
 731         if ((tok >= &pool_tokens[0].token) &&
 732             (tok < &pool_tokens[LWKT_NUM_POOL_TOKENS].token))
 733                 kprintf("Warning! Taking pool token %p in shared mode\n", tok);
 734 #endif
 735
 736
 737         if (_lwkt_trytokref_spin(ref, td, TOK_EXCLREQ))
 738                 return;
 739
 740         /*
 741          * Give up running if we can't acquire the token right now.
 742          *
 743          * Since the tokref is already active the scheduler now
 744          * takes care of acquisition, so we need only call
 745          * lwkt_switch().
 746          *
 747          * Since we failed this was not a recursive token so upon
 748          * return tr_tok->t_ref should be assigned to this specific
 749          * ref.
 750          */
 751         td->td_wmesg = tok->t_desc;
 752         ++tok->t_collisions;
 753         logtoken(fail, ref);
 754         td->td_toks_have = td->td_toks_stop - 1;
 755
 756         if (tokens_debug_output > 0) {
 757                 --tokens_debug_output;
 758                 spin_lock(&tok_debug_spin);
 759                 kprintf("Shar Token thread %p %s %s\n",
 760                         td, tok->t_desc, td->td_comm);
 761                 print_backtrace(6);
 762                 kprintf("\n");
 763                 spin_unlock(&tok_debug_spin);
 764         }
 765
 766         atomic_set_int(&td->td_mpflags, TDF_MP_DIDYIELD);
 767         lwkt_switch();
 768         logtoken(succ, ref);
 769 }
 770
 771 /*
 772  * Attempt to acquire a token, return TRUE on success, FALSE on failure.
 773  *
 774  * We setup the tokref in case we actually get the token (if we switch later
 775  * it becomes mandatory so we set TOK_EXCLREQ), but we call trytokref without
 776  * TOK_EXCLREQ in case we fail.
 777  */
 778 int
 779 lwkt_trytoken(lwkt_token_t tok)
 780 {
 781         thread_t td = curthread;
 782         lwkt_tokref_t ref;
 783
 784         ref = td->td_toks_stop;
 785         KKASSERT(ref < &td->td_toks_end);
 786         ++td->td_toks_stop;
 787         cpu_ccfence();
 788         _lwkt_tokref_init(ref, tok, td, TOK_EXCLUSIVE|TOK_EXCLREQ);
 789
 790         if (_lwkt_trytokref(ref, td, TOK_EXCLUSIVE))
 791                 return TRUE;
 792
 793         /*
 794          * Failed, unpend the request
 795          */
 796         cpu_ccfence();
 797         --td->td_toks_stop;
 798         ++tok->t_collisions;
 799         return FALSE;
 800 }
 801
 802 lwkt_token_t
 803 lwkt_getpooltoken(void *ptr)
 804 {
 805         lwkt_token_t tok;
 806
 807         tok = _lwkt_token_pool_lookup(ptr);
 808         lwkt_gettoken(tok);
 809         return (tok);
 810 }
 811
 812 /*
 813  * Release a serializing token.
 814  *
 815  * WARNING!  All tokens must be released in reverse order.  This will be
 816  *           asserted.
 817  */
 818 void
 819 lwkt_reltoken(lwkt_token_t tok)
 820 {
 821         thread_t td = curthread;
 822         lwkt_tokref_t ref;
 823
 824         /*
 825          * Remove ref from thread token list and assert that it matches
 826          * the token passed in.  Tokens must be released in reverse order.
 827          */
 828         ref = td->td_toks_stop - 1;
 829         KKASSERT(ref >= &td->td_toks_base && ref->tr_tok == tok);
 830         _lwkt_reltokref(ref, td);
 831         cpu_sfence();
 832         td->td_toks_stop = ref;
 833 }
 834
 835 /*
 836  * It is faster for users of lwkt_getpooltoken() to use the returned
 837  * token and just call lwkt_reltoken(), but for convenience we provide
 838  * this function which looks the token up based on the ident.
 839  */
 840 void
 841 lwkt_relpooltoken(void *ptr)
 842 {
 843         lwkt_token_t tok = _lwkt_token_pool_lookup(ptr);
 844         lwkt_reltoken(tok);
 845 }
 846
 847 /*
 848  * Return a count of the number of token refs the thread has to the
 849  * specified token, whether it currently owns the token or not.
 850  */
 851 int
 852 lwkt_cnttoken(lwkt_token_t tok, thread_t td)
 853 {
 854         lwkt_tokref_t scan;
 855         int count = 0;
 856
 857         for (scan = &td->td_toks_base; scan < td->td_toks_stop; ++scan) {
 858                 if (scan->tr_tok == tok)
 859                         ++count;
 860         }
 861         return(count);
 862 }
 863
 864 /*
 865  * Pool tokens are used to provide a type-stable serializing token
 866  * pointer that does not race against disappearing data structures.
 867  *
 868  * This routine is called in early boot just after we setup the BSP's
 869  * globaldata structure.
 870  */
 871 void
 872 lwkt_token_pool_init(void)
 873 {
 874         int i;
 875
 876         for (i = 0; i < LWKT_NUM_POOL_TOKENS; ++i)
 877                 lwkt_token_init(&pool_tokens[i].token, "pool");
 878 }
 879
 880 lwkt_token_t
 881 lwkt_token_pool_lookup(void *ptr)
 882 {
 883         return (_lwkt_token_pool_lookup(ptr));
 884 }
 885
 886 /*
 887  * Initialize a token.
 888  */
 889 void
 890 lwkt_token_init(lwkt_token_t tok, const char *desc)
 891 {
 892         tok->t_count = 0;
 893         tok->t_ref = NULL;
 894         tok->t_collisions = 0;
 895         tok->t_desc = desc;
 896 }
 897
 898 void
 899 lwkt_token_uninit(lwkt_token_t tok)
 900 {
 901         /* empty */
 902 }
 903
 904 /*
 905  * Exchange the two most recent tokens on the tokref stack.  This allows
 906  * you to release a token out of order.
 907  *
 908  * We have to be careful about the case where the top two tokens are
 909  * the same token.  In this case tok->t_ref will point to the deeper
 910  * ref and must remain pointing to the deeper ref.  If we were to swap
 911  * it the first release would clear the token even though a second
 912  * ref is still present.
 913  *
 914  * Only exclusively held tokens contain a reference to the tokref which
 915  * has to be flipped along with the swap.
 916  */
 917 void
 918 lwkt_token_swap(void)
 919 {
 920         lwkt_tokref_t ref1, ref2;
 921         lwkt_token_t tok1, tok2;
 922         long count1, count2;
 923         thread_t td = curthread;
 924
 925         crit_enter();
 926
 927         ref1 = td->td_toks_stop - 1;
 928         ref2 = td->td_toks_stop - 2;
 929         KKASSERT(ref1 >= &td->td_toks_base);
 930         KKASSERT(ref2 >= &td->td_toks_base);
 931
 932         tok1 = ref1->tr_tok;
 933         tok2 = ref2->tr_tok;
 934         count1 = ref1->tr_count;
 935         count2 = ref2->tr_count;
 936
 937         if (tok1 != tok2) {
 938                 ref1->tr_tok = tok2;
 939                 ref1->tr_count = count2;
 940                 ref2->tr_tok = tok1;
 941                 ref2->tr_count = count1;
 942                 if (tok1->t_ref == ref1)
 943                         tok1->t_ref = ref2;
 944                 if (tok2->t_ref == ref2)
 945                         tok2->t_ref = ref1;
 946         }
 947
 948         crit_exit();
 949 }
 950
 951 #ifdef DDB
 952 DB_SHOW_COMMAND(tokens, db_tok_all)
 953 {
 954         struct lwkt_token *tok, **ptr;
 955         struct lwkt_token *toklist[16] = {
 956                 &mp_token,
 957                 &pmap_token,
 958                 &dev_token,
 959                 &vm_token,
 960                 &vmspace_token,
 961                 &kvm_token,
 962                 &sigio_token,
 963                 &tty_token,
 964                 &vnode_token,
 965                 NULL
 966         };
 967
 968         ptr = toklist;
 969         for (tok = *ptr; tok; tok = *(++ptr)) {
 970                 db_printf("tok=%p tr_owner=%p t_colissions=%ld t_desc=%s\n", tok,
 971                     (tok->t_ref ? tok->t_ref->tr_owner : NULL),
 972                     tok->t_collisions, tok->t_desc);
 973         }
 974 }
 975 #endif /* DDB */