sys/kern/lwkt_thread.c

   1 /*
   2  * Copyright (c) 2003 Matthew Dillon <dillon@backplane.com>
   3  * All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  * 1. Redistributions of source code must retain the above copyright
   9  *    notice, this list of conditions and the following disclaimer.
  10  * 2. Redistributions in binary form must reproduce the above copyright
  11  *    notice, this list of conditions and the following disclaimer in the
  12  *    documentation and/or other materials provided with the distribution.
  13  *
  14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  24  * SUCH DAMAGE.
  25  *
  26  *      Each cpu in a system has its own self-contained light weight kernel
  27  *      thread scheduler, which means that generally speaking we only need
  28  *      to use a critical section to prevent hicups.
  29  *
  30  * $DragonFly: src/sys/kern/lwkt_thread.c,v 1.12 2003/06/30 19:50:31 dillon Exp $
  31  */
  32
  33 #include <sys/param.h>
  34 #include <sys/systm.h>
  35 #include <sys/kernel.h>
  36 #include <sys/proc.h>
  37 #include <sys/rtprio.h>
  38 #include <sys/queue.h>
  39 #include <sys/thread2.h>
  40 #include <sys/sysctl.h>
  41 #include <sys/kthread.h>
  42 #include <machine/cpu.h>
  43 #include <sys/lock.h>
  44
  45 #include <vm/vm.h>
  46 #include <vm/vm_param.h>
  47 #include <vm/vm_kern.h>
  48 #include <vm/vm_object.h>
  49 #include <vm/vm_page.h>
  50 #include <vm/vm_map.h>
  51 #include <vm/vm_pager.h>
  52 #include <vm/vm_extern.h>
  53 #include <vm/vm_zone.h>
  54
  55 #include <machine/stdarg.h>
  56
  57 static int untimely_switch = 0;
  58 SYSCTL_INT(_lwkt, OID_AUTO, untimely_switch, CTLFLAG_RW, &untimely_switch, 0, "");
  59 static quad_t switch_count = 0;
  60 SYSCTL_QUAD(_lwkt, OID_AUTO, switch_count, CTLFLAG_RW, &switch_count, 0, "");
  61 static quad_t preempt_hit = 0;
  62 SYSCTL_QUAD(_lwkt, OID_AUTO, preempt_hit, CTLFLAG_RW, &preempt_hit, 0, "");
  63 static quad_t preempt_miss = 0;
  64 SYSCTL_QUAD(_lwkt, OID_AUTO, preempt_miss, CTLFLAG_RW, &preempt_miss, 0, "");
  65 static quad_t preempt_weird = 0;
  66 SYSCTL_QUAD(_lwkt, OID_AUTO, preempt_weird, CTLFLAG_RW, &preempt_weird, 0, "");
  67
  68 /*
  69  * These helper procedures handle the runq, they can only be called from
  70  * within a critical section.
  71  */
  72 static __inline
  73 void
  74 _lwkt_dequeue(thread_t td)
  75 {
  76     if (td->td_flags & TDF_RUNQ) {
  77         int nq = td->td_pri & TDPRI_MASK;
  78         struct globaldata *gd = mycpu;
  79
  80         td->td_flags &= ~TDF_RUNQ;
  81         TAILQ_REMOVE(&gd->gd_tdrunq[nq], td, td_threadq);
  82         /* runqmask is passively cleaned up by the switcher */
  83     }
  84 }
  85
  86 static __inline
  87 void
  88 _lwkt_enqueue(thread_t td)
  89 {
  90     if ((td->td_flags & TDF_RUNQ) == 0) {
  91         int nq = td->td_pri & TDPRI_MASK;
  92         struct globaldata *gd = mycpu;
  93
  94         td->td_flags |= TDF_RUNQ;
  95         TAILQ_INSERT_TAIL(&gd->gd_tdrunq[nq], td, td_threadq);
  96         gd->gd_runqmask |= 1 << nq;
  97 #if 0
  98         /*
  99          * YYY needs cli/sti protection? gd_reqpri set by interrupt
 100          * when made pending.  need better mechanism.
 101          */
 102         if (gd->gd_reqpri < (td->td_pri & TDPRI_MASK))
 103             gd->gd_reqpri = (td->td_pri & TDPRI_MASK);
 104 #endif
 105     }
 106 }
 107
 108 /*
 109  * LWKTs operate on a per-cpu basis
 110  *
 111  * YYY implement strict priorities & round-robin at the same priority
 112  */
 113 void
 114 lwkt_gdinit(struct globaldata *gd)
 115 {
 116     int i;
 117
 118     for (i = 0; i < sizeof(gd->gd_tdrunq)/sizeof(gd->gd_tdrunq[0]); ++i)
 119         TAILQ_INIT(&gd->gd_tdrunq[i]);
 120     gd->gd_runqmask = 0;
 121 }
 122
 123 /*
 124  * Initialize a thread wait structure prior to first use.
 125  *
 126  * NOTE!  called from low level boot code, we cannot do anything fancy!
 127  */
 128 void
 129 lwkt_init_wait(lwkt_wait_t w)
 130 {
 131     TAILQ_INIT(&w->wa_waitq);
 132 }
 133
 134 /*
 135  * Create a new thread.  The thread must be associated with a process context
 136  * or LWKT start address before it can be scheduled.
 137  *
 138  * If you intend to create a thread without a process context this function
 139  * does everything except load the startup and switcher function.
 140  */
 141 thread_t
 142 lwkt_alloc_thread(struct thread *td)
 143 {
 144     void *stack;
 145     int flags = 0;
 146
 147     if (td == NULL) {
 148         crit_enter();
 149         if (mycpu->gd_tdfreecount > 0) {
 150             --mycpu->gd_tdfreecount;
 151             td = TAILQ_FIRST(&mycpu->gd_tdfreeq);
 152             KASSERT(td != NULL && (td->td_flags & TDF_EXITED),
 153                 ("lwkt_alloc_thread: unexpected NULL or corrupted td"));
 154             TAILQ_REMOVE(&mycpu->gd_tdfreeq, td, td_threadq);
 155             crit_exit();
 156             stack = td->td_kstack;
 157             flags = td->td_flags & (TDF_ALLOCATED_STACK|TDF_ALLOCATED_THREAD);
 158         } else {
 159             crit_exit();
 160             td = zalloc(thread_zone);
 161             td->td_kstack = NULL;
 162             flags |= TDF_ALLOCATED_THREAD;
 163         }
 164     }
 165     if ((stack = td->td_kstack) == NULL) {
 166         stack = (void *)kmem_alloc(kernel_map, UPAGES * PAGE_SIZE);
 167         flags |= TDF_ALLOCATED_STACK;
 168     }
 169     lwkt_init_thread(td, stack, flags, mycpu);
 170     return(td);
 171 }
 172
 173 /*
 174  * Initialize a preexisting thread structure.  This function is used by
 175  * lwkt_alloc_thread() and also used to initialize the per-cpu idlethread.
 176  *
 177  * NOTE!  called from low level boot code, we cannot do anything fancy!
 178  */
 179 void
 180 lwkt_init_thread(thread_t td, void *stack, int flags, struct globaldata *gd)
 181 {
 182     bzero(td, sizeof(struct thread));
 183     td->td_kstack = stack;
 184     td->td_flags |= flags;
 185     td->td_gd = gd;
 186     td->td_pri = TDPRI_CRIT;
 187     pmap_init_thread(td);
 188 }
 189
 190 void
 191 lwkt_free_thread(struct thread *td)
 192 {
 193     KASSERT(td->td_flags & TDF_EXITED,
 194         ("lwkt_free_thread: did not exit! %p", td));
 195
 196     crit_enter();
 197     if (mycpu->gd_tdfreecount < CACHE_NTHREADS &&
 198         (td->td_flags & TDF_ALLOCATED_THREAD)
 199     ) {
 200         ++mycpu->gd_tdfreecount;
 201         TAILQ_INSERT_HEAD(&mycpu->gd_tdfreeq, td, td_threadq);
 202         crit_exit();
 203     } else {
 204         crit_exit();
 205         if (td->td_kstack && (td->td_flags & TDF_ALLOCATED_STACK)) {
 206             kmem_free(kernel_map,
 207                     (vm_offset_t)td->td_kstack, UPAGES * PAGE_SIZE);
 208             td->td_kstack = NULL;
 209         }
 210         if (td->td_flags & TDF_ALLOCATED_THREAD)
 211             zfree(thread_zone, td);
 212     }
 213 }
 214
 215
 216 /*
 217  * Switch to the next runnable lwkt.  If no LWKTs are runnable then
 218  * switch to the idlethread.  Switching must occur within a critical
 219  * section to avoid races with the scheduling queue.
 220  *
 221  * We always have full control over our cpu's run queue.  Other cpus
 222  * that wish to manipulate our queue must use the cpu_*msg() calls to
 223  * talk to our cpu, so a critical section is all that is needed and
 224  * the result is very, very fast thread switching.
 225  *
 226  * We always 'own' our own thread and the threads on our run queue,l
 227  * due to TDF_RUNNING or TDF_RUNQ being set.  We can safely clear
 228  * TDF_RUNNING while in a critical section.
 229  *
 230  * The td_switch() function must be called while in the critical section.
 231  * This function saves as much state as is appropriate for the type of
 232  * thread.
 233  *
 234  * (self contained on a per cpu basis)
 235  */
 236 void
 237 lwkt_switch(void)
 238 {
 239     struct globaldata *gd;
 240     thread_t td = curthread;
 241     thread_t ntd;
 242
 243     if (mycpu->gd_intr_nesting_level && td->td_preempted == NULL)
 244         panic("lwkt_switch: cannot switch from within an interrupt, yet\n");
 245
 246     crit_enter();
 247     ++switch_count;
 248     if ((ntd = td->td_preempted) != NULL) {
 249         /*
 250          * We had preempted another thread on this cpu, resume the preempted
 251          * thread.  This occurs transparently, whether the preempted thread
 252          * was scheduled or not (it may have been preempted after descheduling
 253          * itself).
 254          */
 255         KKASSERT(ntd->td_flags & TDF_PREEMPT_LOCK);
 256         ntd->td_flags |= TDF_PREEMPT_DONE;
 257     } else {
 258         /*
 259          * Priority queue / round-robin at each priority.  Note that user
 260          * processes run at a fixed, low priority and the user process
 261          * scheduler deals with interactions between user processes
 262          * by scheduling and descheduling them from the LWKT queue as
 263          * necessary.
 264          */
 265         gd = mycpu;
 266
 267 again:
 268         if (gd->gd_runqmask) {
 269             int nq = bsrl(gd->gd_runqmask);
 270             if ((ntd = TAILQ_FIRST(&gd->gd_tdrunq[nq])) == NULL) {
 271                 gd->gd_runqmask &= ~(1 << nq);
 272                 goto again;
 273             }
 274             TAILQ_REMOVE(&gd->gd_tdrunq[nq], ntd, td_threadq);
 275             TAILQ_INSERT_TAIL(&gd->gd_tdrunq[nq], ntd, td_threadq);
 276         } else {
 277             ntd = gd->gd_idletd;
 278         }
 279     }
 280     KASSERT(ntd->td_pri >= TDPRI_CRIT,
 281         ("priority problem in lwkt_switch %d %d", td->td_pri, ntd->td_pri));
 282     if (td != ntd)
 283         td->td_switch(ntd);
 284     crit_exit();
 285 }
 286
 287 /*
 288  * Request that the target thread preempt the current thread.  This only
 289  * works if:
 290  *
 291  *      + We aren't trying to preempt ourselves (it can happen!)
 292  *      + We are not currently being preempted
 293  *      + the target is not currently being preempted
 294  *
 295  * XXX at the moment we run the target thread in a critical section during
 296  * the preemption in order to prevent the target from taking interrupts
 297  * that *WE* can't.  Preemption is strictly limited to interrupt threads
 298  * and interrupt-like threads, outside of a critical section, and the
 299  * preempted source thread will be resumed the instant the target blocks
 300  * whether or not the source is scheduled (i.e. preemption is supposed to
 301  * be as transparent as possible).
 302  *
 303  * This call is typically made from an interrupt handler like sched_ithd()
 304  * which will only run if the current thread is not in a critical section,
 305  * so we optimize the priority check a bit.
 306  *
 307  * CAREFUL! either we or the target thread may get interrupted during the
 308  * switch.
 309  */
 310 void
 311 lwkt_preempt(struct thread *ntd, int id)
 312 {
 313     struct thread *td = curthread;
 314
 315     /*
 316      * The caller has put us in a critical section, and in order to have
 317      * gotten here in the first place the thread the caller interrupted
 318      * cannot have been in a critical section before.
 319      */
 320     KASSERT(ntd->td_pri >= TDPRI_CRIT, ("BADCRIT0 %d", ntd->td_pri));
 321     KASSERT((td->td_pri & ~TDPRI_MASK) == TDPRI_CRIT, ("BADPRI %d", td->td_pri));
 322
 323     if (td == ntd || ((td->td_flags | ntd->td_flags) & TDF_PREEMPT_LOCK)) {
 324         ++preempt_weird;
 325         return;
 326     }
 327     if (ntd->td_preempted) {
 328         ++preempt_hit;
 329         return;
 330     }
 331     if ((ntd->td_pri & TDPRI_MASK) <= (td->td_pri & TDPRI_MASK)) {
 332         ++preempt_miss;
 333         return;
 334     }
 335
 336     ++preempt_hit;
 337     ntd->td_preempted = td;
 338     td->td_flags |= TDF_PREEMPT_LOCK;
 339     td->td_switch(ntd);
 340     KKASSERT(ntd->td_preempted && (td->td_flags & TDF_PREEMPT_DONE));
 341     ntd->td_preempted = NULL;
 342     td->td_flags &= ~(TDF_PREEMPT_LOCK|TDF_PREEMPT_DONE);
 343 }
 344
 345 /*
 346  * Yield our thread while higher priority threads are pending.  This is
 347  * typically called when we leave a critical section but it can be safely
 348  * called while we are in a critical section.
 349  *
 350  * This function will not generally yield to equal priority threads but it
 351  * can occur as a side effect.  Note that lwkt_switch() is called from
 352  * inside the critical section to pervent its own crit_exit() from reentering
 353  * lwkt_yield_quick().
 354  *
 355  * gd_reqpri indicates that *something* changed, e.g. an interrupt or softint
 356  * came along but was blocked and made pending.
 357  *
 358  * (self contained on a per cpu basis)
 359  */
 360 void
 361 lwkt_yield_quick(void)
 362 {
 363     thread_t td = curthread;
 364
 365     if ((td->td_pri & TDPRI_MASK) < mycpu->gd_reqpri) {
 366         mycpu->gd_reqpri = 0;
 367         splz();
 368     }
 369
 370     /*
 371      * YYY enabling will cause wakeup() to task-switch, which really
 372      * confused the old 4.x code.  This is a good way to simulate
 373      * preemption and MP without actually doing preemption or MP, because a
 374      * lot of code assumes that wakeup() does not block.
 375      */
 376     if (untimely_switch && mycpu->gd_intr_nesting_level == 0) {
 377         crit_enter();
 378         /*
 379          * YYY temporary hacks until we disassociate the userland scheduler
 380          * from the LWKT scheduler.
 381          */
 382         if (td->td_flags & TDF_RUNQ) {
 383             lwkt_switch();              /* will not reenter yield function */
 384         } else {
 385             lwkt_schedule_self();       /* make sure we are scheduled */
 386             lwkt_switch();              /* will not reenter yield function */
 387             lwkt_deschedule_self();     /* make sure we are descheduled */
 388         }
 389         crit_exit_noyield();
 390     }
 391 }
 392
 393 /*
 394  * This implements a normal yield which, unlike _quick, will yield to equal
 395  * priority threads as well.  Note that gd_reqpri tests will be handled by
 396  * the crit_exit() call in lwkt_switch().
 397  *
 398  * (self contained on a per cpu basis)
 399  */
 400 void
 401 lwkt_yield(void)
 402 {
 403     lwkt_schedule_self();
 404     lwkt_switch();
 405 }
 406
 407 /*
 408  * Schedule a thread to run.  As the current thread we can always safely
 409  * schedule ourselves, and a shortcut procedure is provided for that
 410  * function.
 411  *
 412  * (non-blocking, self contained on a per cpu basis)
 413  */
 414 void
 415 lwkt_schedule_self(void)
 416 {
 417     thread_t td = curthread;
 418
 419     crit_enter();
 420     KASSERT(td->td_wait == NULL, ("lwkt_schedule_self(): td_wait not NULL!"));
 421     _lwkt_enqueue(td);
 422     if (td->td_proc && td->td_proc->p_stat == SSLEEP)
 423         panic("SCHED SELF PANIC");
 424     crit_exit();
 425 }
 426
 427 /*
 428  * Generic schedule.  Possibly schedule threads belonging to other cpus and
 429  * deal with threads that might be blocked on a wait queue.
 430  *
 431  * This function will queue requests asynchronously when possible, but may
 432  * block if no request structures are available.  Upon return the caller
 433  * should note that the scheduling request may not yet have been processed
 434  * by the target cpu.
 435  *
 436  * YYY this is one of the best places to implement any load balancing code.
 437  * Load balancing can be accomplished by requesting other sorts of actions
 438  * for the thread in question.
 439  */
 440 void
 441 lwkt_schedule(thread_t td)
 442 {
 443     if ((td->td_flags & TDF_PREEMPT_LOCK) == 0 && td->td_proc
 444         && td->td_proc->p_stat == SSLEEP
 445     ) {
 446         printf("PANIC schedule curtd = %p (%d %d) target %p (%d %d)\n",
 447             curthread,
 448             curthread->td_proc ? curthread->td_proc->p_pid : -1,
 449             curthread->td_proc ? curthread->td_proc->p_stat : -1,
 450             td,
 451             td->td_proc ? curthread->td_proc->p_pid : -1,
 452             td->td_proc ? curthread->td_proc->p_stat : -1
 453         );
 454         panic("SCHED PANIC");
 455     }
 456     crit_enter();
 457     if (td == curthread) {
 458         _lwkt_enqueue(td);
 459     } else {
 460         lwkt_wait_t w;
 461
 462         /*
 463          * If the thread is on a wait list we have to send our scheduling
 464          * request to the owner of the wait structure.  Otherwise we send
 465          * the scheduling request to the cpu owning the thread.  Races
 466          * are ok, the target will forward the message as necessary (the
 467          * message may chase the thread around before it finally gets
 468          * acted upon).
 469          *
 470          * (remember, wait structures use stable storage)
 471          */
 472         if ((w = td->td_wait) != NULL) {
 473             if (lwkt_havetoken(&w->wa_token)) {
 474                 TAILQ_REMOVE(&w->wa_waitq, td, td_threadq);
 475                 --w->wa_count;
 476                 td->td_wait = NULL;
 477                 if (td->td_cpu == mycpu->gd_cpuid) {
 478                     _lwkt_enqueue(td);
 479                 } else {
 480                     panic("lwkt_schedule: cpu mismatch1");
 481 #if 0
 482                     lwkt_cpu_msg_union_t msg = lwkt_getcpumsg();
 483                     initScheduleReqMsg_Wait(&msg.mu_SchedReq, td, w);
 484                     cpu_sendnormsg(&msg.mu_Msg);
 485 #endif
 486                 }
 487             } else {
 488                 panic("lwkt_schedule: cpu mismatch2");
 489 #if 0
 490                 lwkt_cpu_msg_union_t msg = lwkt_getcpumsg();
 491                 initScheduleReqMsg_Wait(&msg.mu_SchedReq, td, w);
 492                 cpu_sendnormsg(&msg.mu_Msg);
 493 #endif
 494             }
 495         } else {
 496             /*
 497              * If the wait structure is NULL and we own the thread, there
 498              * is no race (since we are in a critical section).  If we
 499              * do not own the thread there might be a race but the
 500              * target cpu will deal with it.
 501              */
 502             if (td->td_cpu == mycpu->gd_cpuid) {
 503                 _lwkt_enqueue(td);
 504             } else {
 505                 panic("lwkt_schedule: cpu mismatch3");
 506 #if 0
 507                 lwkt_cpu_msg_union_t msg = lwkt_getcpumsg();
 508                 initScheduleReqMsg_Thread(&msg.mu_SchedReq, td);
 509                 cpu_sendnormsg(&msg.mu_Msg);
 510 #endif
 511             }
 512         }
 513     }
 514     crit_exit();
 515 }
 516
 517 /*
 518  * Deschedule a thread.
 519  *
 520  * (non-blocking, self contained on a per cpu basis)
 521  */
 522 void
 523 lwkt_deschedule_self(void)
 524 {
 525     thread_t td = curthread;
 526
 527     crit_enter();
 528     KASSERT(td->td_wait == NULL, ("lwkt_schedule_self(): td_wait not NULL!"));
 529     _lwkt_dequeue(td);
 530     crit_exit();
 531 }
 532
 533 /*
 534  * Generic deschedule.  Descheduling threads other then your own should be
 535  * done only in carefully controlled circumstances.  Descheduling is
 536  * asynchronous.
 537  *
 538  * This function may block if the cpu has run out of messages.
 539  */
 540 void
 541 lwkt_deschedule(thread_t td)
 542 {
 543     crit_enter();
 544     if (td == curthread) {
 545         _lwkt_dequeue(td);
 546     } else {
 547         if (td->td_cpu == mycpu->gd_cpuid) {
 548             _lwkt_dequeue(td);
 549         } else {
 550             panic("lwkt_deschedule: cpu mismatch");
 551 #if 0
 552             lwkt_cpu_msg_union_t msg = lwkt_getcpumsg();
 553             initDescheduleReqMsg_Thread(&msg.mu_DeschedReq, td);
 554             cpu_sendnormsg(&msg.mu_Msg);
 555 #endif
 556         }
 557     }
 558     crit_exit();
 559 }
 560
 561 /*
 562  * Set the target thread's priority.  This routine does not automatically
 563  * switch to a higher priority thread, LWKT threads are not designed for
 564  * continuous priority changes.  Yield if you want to switch.
 565  *
 566  * We have to retain the critical section count which uses the high bits
 567  * of the td_pri field.  The specified priority may also indicate zero or
 568  * more critical sections by adding TDPRI_CRIT*N.
 569  */
 570 void
 571 lwkt_setpri(thread_t td, int pri)
 572 {
 573     KKASSERT(pri >= 0);
 574     crit_enter();
 575     if (td->td_flags & TDF_RUNQ) {
 576         _lwkt_dequeue(td);
 577         td->td_pri = (td->td_pri & ~TDPRI_MASK) + pri;
 578         _lwkt_enqueue(td);
 579     } else {
 580         td->td_pri = (td->td_pri & ~TDPRI_MASK) + pri;
 581     }
 582     crit_exit();
 583 }
 584
 585 void
 586 lwkt_setpri_self(int pri)
 587 {
 588     thread_t td = curthread;
 589
 590     KKASSERT(pri >= 0 && pri <= TDPRI_MAX);
 591     crit_enter();
 592     if (td->td_flags & TDF_RUNQ) {
 593         _lwkt_dequeue(td);
 594         td->td_pri = (td->td_pri & ~TDPRI_MASK) + pri;
 595         _lwkt_enqueue(td);
 596     } else {
 597         td->td_pri = (td->td_pri & ~TDPRI_MASK) + pri;
 598     }
 599     crit_exit();
 600 }
 601
 602 struct proc *
 603 lwkt_preempted_proc(void)
 604 {
 605     struct thread *td = curthread;
 606     while (td->td_preempted)
 607         td = td->td_preempted;
 608     return(td->td_proc);
 609 }
 610
 611
 612 /*
 613  * This function deschedules the current thread and blocks on the specified
 614  * wait queue.  We obtain ownership of the wait queue in order to block
 615  * on it.  A generation number is used to interlock the wait queue in case
 616  * it gets signalled while we are blocked waiting on the token.
 617  *
 618  * Note: alternatively we could dequeue our thread and then message the
 619  * target cpu owning the wait queue.  YYY implement as sysctl.
 620  *
 621  * Note: wait queue signals normally ping-pong the cpu as an optimization.
 622  */
 623 void
 624 lwkt_block(lwkt_wait_t w, const char *wmesg, int *gen)
 625 {
 626     thread_t td = curthread;
 627
 628     lwkt_gettoken(&w->wa_token);
 629     if (w->wa_gen == *gen) {
 630         _lwkt_dequeue(td);
 631         TAILQ_INSERT_TAIL(&w->wa_waitq, td, td_threadq);
 632         ++w->wa_count;
 633         td->td_wait = w;
 634         td->td_wmesg = wmesg;
 635         lwkt_switch();
 636     }
 637     /* token might be lost, doesn't matter for gen update */
 638     *gen = w->wa_gen;
 639     lwkt_reltoken(&w->wa_token);
 640 }
 641
 642 /*
 643  * Signal a wait queue.  We gain ownership of the wait queue in order to
 644  * signal it.  Once a thread is removed from the wait queue we have to
 645  * deal with the cpu owning the thread.
 646  *
 647  * Note: alternatively we could message the target cpu owning the wait
 648  * queue.  YYY implement as sysctl.
 649  */
 650 void
 651 lwkt_signal(lwkt_wait_t w)
 652 {
 653     thread_t td;
 654     int count;
 655
 656     lwkt_gettoken(&w->wa_token);
 657     ++w->wa_gen;
 658     count = w->wa_count;
 659     while ((td = TAILQ_FIRST(&w->wa_waitq)) != NULL && count) {
 660         --count;
 661         --w->wa_count;
 662         TAILQ_REMOVE(&w->wa_waitq, td, td_threadq);
 663         td->td_wait = NULL;
 664         td->td_wmesg = NULL;
 665         if (td->td_cpu == mycpu->gd_cpuid) {
 666             _lwkt_enqueue(td);
 667         } else {
 668 #if 0
 669             lwkt_cpu_msg_union_t msg = lwkt_getcpumsg();
 670             initScheduleReqMsg_Thread(&msg.mu_SchedReq, td);
 671             cpu_sendnormsg(&msg.mu_Msg);
 672 #endif
 673             panic("lwkt_signal: cpu mismatch");
 674         }
 675         lwkt_regettoken(&w->wa_token);
 676     }
 677     lwkt_reltoken(&w->wa_token);
 678 }
 679
 680 /*
 681  * Aquire ownership of a token
 682  *
 683  * Aquire ownership of a token.  The token may have spl and/or critical
 684  * section side effects, depending on its purpose.  These side effects
 685  * guarentee that you will maintain ownership of the token as long as you
 686  * do not block.  If you block you may lose access to the token (but you
 687  * must still release it even if you lose your access to it).
 688  *
 689  * Note that the spl and critical section characteristics of a token
 690  * may not be changed once the token has been initialized.
 691  */
 692 void
 693 lwkt_gettoken(lwkt_token_t tok)
 694 {
 695     /*
 696      * Prevent preemption so the token can't be taken away from us once
 697      * we gain ownership of it.  Use a synchronous request which might
 698      * block.  The request will be forwarded as necessary playing catchup
 699      * to the token.
 700      */
 701     crit_enter();
 702 #if 0
 703     while (tok->t_cpu != mycpu->gd_cpuid) {
 704         lwkt_cpu_msg_union msg;
 705         initTokenReqMsg(&msg.mu_TokenReq);
 706         cpu_domsg(&msg);
 707     }
 708 #endif
 709     /*
 710      * leave us in a critical section on return.  This will be undone
 711      * by lwkt_reltoken()
 712      */
 713 }
 714
 715 /*
 716  * Release your ownership of a token.  Releases must occur in reverse
 717  * order to aquisitions, eventually so priorities can be unwound properly
 718  * like SPLs.  At the moment the actual implemention doesn't care.
 719  *
 720  * We can safely hand a token that we own to another cpu without notifying
 721  * it, but once we do we can't get it back without requesting it (unless
 722  * the other cpu hands it back to us before we check).
 723  *
 724  * We might have lost the token, so check that.
 725  */
 726 void
 727 lwkt_reltoken(lwkt_token_t tok)
 728 {
 729     if (tok->t_cpu == mycpu->gd_cpuid) {
 730         tok->t_cpu = tok->t_reqcpu;
 731     }
 732     crit_exit();
 733 }
 734
 735 /*
 736  * Reaquire a token that might have been lost.  Returns 1 if we blocked
 737  * while reaquiring the token (meaning that you might have lost other
 738  * tokens you held when you made this call), return 0 if we did not block.
 739  */
 740 int
 741 lwkt_regettoken(lwkt_token_t tok)
 742 {
 743 #if 0
 744     if (tok->t_cpu != mycpu->gd_cpuid) {
 745         while (tok->t_cpu != mycpu->gd_cpuid) {
 746             lwkt_cpu_msg_union msg;
 747             initTokenReqMsg(&msg.mu_TokenReq);
 748             cpu_domsg(&msg);
 749         }
 750         return(1);
 751     }
 752 #endif
 753     return(0);
 754 }
 755
 756 /*
 757  * Create a kernel process/thread/whatever.  It shares it's address space
 758  * with proc0 - ie: kernel only.
 759  *
 760  * XXX should be renamed to lwkt_create()
 761  */
 762 int
 763 lwkt_create(void (*func)(void *), void *arg,
 764     struct thread **tdp, struct thread *template, int tdflags,
 765     const char *fmt, ...)
 766 {
 767     struct thread *td;
 768     va_list ap;
 769
 770     td = *tdp = lwkt_alloc_thread(template);
 771     cpu_set_thread_handler(td, kthread_exit, func, arg);
 772     td->td_flags |= TDF_VERBOSE | tdflags;
 773
 774     /*
 775      * Set up arg0 for 'ps' etc
 776      */
 777     va_start(ap, fmt);
 778     vsnprintf(td->td_comm, sizeof(td->td_comm), fmt, ap);
 779     va_end(ap);
 780
 781     /*
 782      * Schedule the thread to run
 783      */
 784     if ((td->td_flags & TDF_STOPREQ) == 0)
 785         lwkt_schedule(td);
 786     else
 787         td->td_flags &= ~TDF_STOPREQ;
 788     return 0;
 789 }
 790
 791 /*
 792  * Destroy an LWKT thread.   Warning!  This function is not called when
 793  * a process exits, cpu_proc_exit() directly calls cpu_thread_exit() and
 794  * uses a different reaping mechanism.
 795  */
 796 void
 797 lwkt_exit(void)
 798 {
 799     thread_t td = curthread;
 800
 801     if (td->td_flags & TDF_VERBOSE)
 802         printf("kthread %p %s has exited\n", td, td->td_comm);
 803     crit_enter();
 804     lwkt_deschedule_self();
 805     ++mycpu->gd_tdfreecount;
 806     TAILQ_INSERT_TAIL(&mycpu->gd_tdfreeq, td, td_threadq);
 807     cpu_thread_exit();
 808 }
 809
 810 /*
 811  * Create a kernel process/thread/whatever.  It shares it's address space
 812  * with proc0 - ie: kernel only.  5.x compatible.
 813  */
 814 int
 815 kthread_create(void (*func)(void *), void *arg,
 816     struct thread **tdp, const char *fmt, ...)
 817 {
 818     struct thread *td;
 819     va_list ap;
 820
 821     td = *tdp = lwkt_alloc_thread(NULL);
 822     cpu_set_thread_handler(td, kthread_exit, func, arg);
 823     td->td_flags |= TDF_VERBOSE;
 824
 825     /*
 826      * Set up arg0 for 'ps' etc
 827      */
 828     va_start(ap, fmt);
 829     vsnprintf(td->td_comm, sizeof(td->td_comm), fmt, ap);
 830     va_end(ap);
 831
 832     /*
 833      * Schedule the thread to run
 834      */
 835     lwkt_schedule(td);
 836     return 0;
 837 }
 838
 839 void
 840 crit_panic(void)
 841 {
 842     struct thread *td = curthread;
 843     int lpri = td->td_pri;
 844
 845     td->td_pri = 0;
 846     panic("td_pri is/would-go negative! %p %d", td, lpri);
 847 }
 848
 849 /*
 850  * Destroy an LWKT thread.   Warning!  This function is not called when
 851  * a process exits, cpu_proc_exit() directly calls cpu_thread_exit() and
 852  * uses a different reaping mechanism.
 853  *
 854  * XXX duplicates lwkt_exit()
 855  */
 856 void
 857 kthread_exit(void)
 858 {
 859     lwkt_exit();
 860 }
 861