sys/kern/kern_timeout.c

   1 /*
   2  * Copyright (c) 2004,2014 The DragonFly Project.  All rights reserved.
   3  *
   4  * This code is derived from software contributed to The DragonFly Project
   5  * by Matthew Dillon <dillon@backplane.com>
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions
   9  * are met:
  10  *
  11  * 1. Redistributions of source code must retain the above copyright
  12  *    notice, this list of conditions and the following disclaimer.
  13  * 2. Redistributions in binary form must reproduce the above copyright
  14  *    notice, this list of conditions and the following disclaimer in
  15  *    the documentation and/or other materials provided with the
  16  *    distribution.
  17  * 3. Neither the name of The DragonFly Project nor the names of its
  18  *    contributors may be used to endorse or promote products derived
  19  *    from this software without specific, prior written permission.
  20  *
  21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
  25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
  27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
  29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  32  * SUCH DAMAGE.
  33  */
  34 /*
  35  * Copyright (c) 1982, 1986, 1991, 1993
  36  *      The Regents of the University of California.  All rights reserved.
  37  * (c) UNIX System Laboratories, Inc.
  38  * All or some portions of this file are derived from material licensed
  39  * to the University of California by American Telephone and Telegraph
  40  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  41  * the permission of UNIX System Laboratories, Inc.
  42  *
  43  * Redistribution and use in source and binary forms, with or without
  44  * modification, are permitted provided that the following conditions
  45  * are met:
  46  * 1. Redistributions of source code must retain the above copyright
  47  *    notice, this list of conditions and the following disclaimer.
  48  * 2. Redistributions in binary form must reproduce the above copyright
  49  *    notice, this list of conditions and the following disclaimer in the
  50  *    documentation and/or other materials provided with the distribution.
  51  * 3. Neither the name of the University nor the names of its contributors
  52  *    may be used to endorse or promote products derived from this software
  53  *    without specific prior written permission.
  54  *
  55  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  56  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  57  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  58  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  59  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  60  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  61  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  62  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  63  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  64  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  65  * SUCH DAMAGE.
  66  */
  67 /*
  68  * The original callout mechanism was based on the work of Adam M. Costello
  69  * and George Varghese, published in a technical report entitled "Redesigning
  70  * the BSD Callout and Timer Facilities" and modified slightly for inclusion
  71  * in FreeBSD by Justin T. Gibbs.  The original work on the data structures
  72  * used in this implementation was published by G. Varghese and T. Lauck in
  73  * the paper "Hashed and Hierarchical Timing Wheels: Data Structures for
  74  * the Efficient Implementation of a Timer Facility" in the Proceedings of
  75  * the 11th ACM Annual Symposium on Operating Systems Principles,
  76  * Austin, Texas Nov 1987.
  77  *
  78  * The per-cpu augmentation was done by Matthew Dillon.  This file has
  79  * essentially been rewritten pretty much from scratch by Matt.
  80  */
  81
  82 #include <sys/param.h>
  83 #include <sys/systm.h>
  84 #include <sys/callout.h>
  85 #include <sys/kernel.h>
  86 #include <sys/interrupt.h>
  87 #include <sys/thread.h>
  88
  89 #include <sys/thread2.h>
  90 #include <sys/mplock2.h>
  91
  92 #include <vm/vm_extern.h>
  93
  94 struct softclock_pcpu {
  95         struct callout_tailq *callwheel;
  96         struct callout * volatile next;
  97         intptr_t running;       /* NOTE! Bit 0 used to flag wakeup */
  98         int softticks;          /* softticks index */
  99         int curticks;           /* per-cpu ticks counter */
 100         int isrunning;
 101         struct thread thread;
 102 };
 103
 104 typedef struct softclock_pcpu *softclock_pcpu_t;
 105
 106 static MALLOC_DEFINE(M_CALLOUT, "callout", "callout structures");
 107 static int cwheelsize;
 108 static int cwheelmask;
 109 static softclock_pcpu_t softclock_pcpu_ary[MAXCPU];
 110
 111 static void softclock_handler(void *arg);
 112 static void slotimer_callback(void *arg);
 113 static void callout_reset_ipi(void *arg);
 114 static void callout_stop_ipi(void *arg, int issync, struct intrframe *frame);
 115
 116 static __inline int
 117 callout_setclear(struct callout *c, int sflags, int cflags)
 118 {
 119         int flags;
 120         int nflags;
 121
 122         for (;;) {
 123                 flags = c->c_flags;
 124                 cpu_ccfence();
 125                 nflags = (flags | sflags) & ~cflags;
 126                 if (atomic_cmpset_int(&c->c_flags, flags, nflags))
 127                         break;
 128         }
 129         return flags;
 130 }
 131
 132 static void
 133 swi_softclock_setup(void *arg)
 134 {
 135         int cpu;
 136         int i;
 137         int target;
 138
 139         /*
 140          * Figure out how large a callwheel we need.  It must be a power of 2.
 141          *
 142          * ncallout is primarily based on available memory, don't explode
 143          * the allocations if the system has a lot of cpus.
 144          */
 145         target = ncallout / ncpus + 16;
 146
 147         cwheelsize = 1;
 148         while (cwheelsize < target)
 149                 cwheelsize <<= 1;
 150         cwheelmask = cwheelsize - 1;
 151
 152         /*
 153          * Initialize per-cpu data structures.
 154          */
 155         for (cpu = 0; cpu < ncpus; ++cpu) {
 156                 softclock_pcpu_t sc;
 157                 int wheel_sz;
 158
 159                 sc = (void *)kmem_alloc3(&kernel_map, sizeof(*sc),
 160                                          VM_SUBSYS_GD, KM_CPU(cpu));
 161                 memset(sc, 0, sizeof(*sc));
 162                 softclock_pcpu_ary[cpu] = sc;
 163
 164                 wheel_sz = sizeof(*sc->callwheel) * cwheelsize;
 165                 sc->callwheel = (void *)kmem_alloc3(&kernel_map, wheel_sz,
 166                                                     VM_SUBSYS_GD, KM_CPU(cpu));
 167                 memset(sc->callwheel, 0, wheel_sz);
 168                 for (i = 0; i < cwheelsize; ++i)
 169                         TAILQ_INIT(&sc->callwheel[i]);
 170
 171                 /*
 172                  * Mark the softclock handler as being an interrupt thread
 173                  * even though it really isn't, but do not allow it to
 174                  * preempt other threads (do not assign td_preemptable).
 175                  *
 176                  * Kernel code now assumes that callouts do not preempt
 177                  * the cpu they were scheduled on.
 178                  */
 179                 lwkt_create(softclock_handler, sc, NULL, &sc->thread,
 180                             TDF_NOSTART | TDF_INTTHREAD,
 181                             cpu, "softclock %d", cpu);
 182         }
 183 }
 184
 185 /*
 186  * Must occur after ncpus has been initialized.
 187  */
 188 SYSINIT(softclock_setup, SI_BOOT2_SOFTCLOCK, SI_ORDER_SECOND,
 189         swi_softclock_setup, NULL);
 190
 191 /*
 192  * This routine is called from the hardclock() (basically a FASTint/IPI) on
 193  * each cpu in the system.  sc->curticks is this cpu's notion of the timebase.
 194  * It IS NOT NECESSARILY SYNCHRONIZED WITH 'ticks'!  sc->softticks is where
 195  * the callwheel is currently indexed.
 196  *
 197  * WARNING!  The MP lock is not necessarily held on call, nor can it be
 198  * safely obtained.
 199  *
 200  * sc->softticks is adjusted by either this routine or our helper thread
 201  * depending on whether the helper thread is running or not.
 202  */
 203 void
 204 hardclock_softtick(globaldata_t gd)
 205 {
 206         softclock_pcpu_t sc;
 207
 208         sc = softclock_pcpu_ary[gd->gd_cpuid];
 209         ++sc->curticks;
 210         if (sc->isrunning)
 211                 return;
 212         if (sc->softticks == sc->curticks) {
 213                 /*
 214                  * In sync, only wakeup the thread if there is something to
 215                  * do.
 216                  */
 217                 if (TAILQ_FIRST(&sc->callwheel[sc->softticks & cwheelmask])) {
 218                         sc->isrunning = 1;
 219                         lwkt_schedule(&sc->thread);
 220                 } else {
 221                         ++sc->softticks;
 222                 }
 223         } else {
 224                 /*
 225                  * out of sync, wakeup the thread unconditionally so it can
 226                  * catch up.
 227                  */
 228                 sc->isrunning = 1;
 229                 lwkt_schedule(&sc->thread);
 230         }
 231 }
 232
 233 /*
 234  * This procedure is the main loop of our per-cpu helper thread.  The
 235  * sc->isrunning flag prevents us from racing hardclock_softtick() and
 236  * a critical section is sufficient to interlock sc->curticks and protect
 237  * us from remote IPI's / list removal.
 238  *
 239  * The thread starts with the MP lock released and not in a critical
 240  * section.  The loop itself is MP safe while individual callbacks
 241  * may or may not be, so we obtain or release the MP lock as appropriate.
 242  */
 243 static void
 244 softclock_handler(void *arg)
 245 {
 246         softclock_pcpu_t sc;
 247         struct callout *c;
 248         struct callout_tailq *bucket;
 249         struct callout slotimer;
 250         int mpsafe = 1;
 251         int flags;
 252
 253         /*
 254          * Setup pcpu slow clocks which we want to run from the callout
 255          * thread.
 256          */
 257         callout_init_mp(&slotimer);
 258         callout_reset(&slotimer, hz * 10, slotimer_callback, &slotimer);
 259
 260         /*
 261          * Run the callout thread at the same priority as other kernel
 262          * threads so it can be round-robined.
 263          */
 264         /*lwkt_setpri_self(TDPRI_SOFT_NORM);*/
 265
 266         /*
 267          * Loop critical section against ipi operations to this cpu.
 268          */
 269         sc = arg;
 270         crit_enter();
 271 loop:
 272         while (sc->softticks != (int)(sc->curticks + 1)) {
 273                 bucket = &sc->callwheel[sc->softticks & cwheelmask];
 274
 275                 for (c = TAILQ_FIRST(bucket); c; c = sc->next) {
 276                         void (*c_func)(void *);
 277                         void *c_arg;
 278                         struct lock *c_lk;
 279                         int error;
 280
 281                         if (c->c_time != sc->softticks) {
 282                                 sc->next = TAILQ_NEXT(c, c_links.tqe);
 283                                 continue;
 284                         }
 285
 286                         /*
 287                          * Synchronize with mpsafe requirements
 288                          */
 289                         flags = c->c_flags;
 290                         if (flags & CALLOUT_MPSAFE) {
 291                                 if (mpsafe == 0) {
 292                                         mpsafe = 1;
 293                                         rel_mplock();
 294                                 }
 295                         } else {
 296                                 /*
 297                                  * The request might be removed while we
 298                                  * are waiting to get the MP lock.  If it
 299                                  * was removed sc->next will point to the
 300                                  * next valid request or NULL, loop up.
 301                                  */
 302                                 if (mpsafe) {
 303                                         mpsafe = 0;
 304                                         sc->next = c;
 305                                         get_mplock();
 306                                         if (c != sc->next)
 307                                                 continue;
 308                                 }
 309                         }
 310
 311                         /*
 312                          * Queue protection only exists while we hold the
 313                          * critical section uninterrupted.
 314                          *
 315                          * Adjust sc->next when removing (c) from the queue,
 316                          * note that an IPI on this cpu may make further
 317                          * adjustments to sc->next.
 318                          */
 319                         sc->next = TAILQ_NEXT(c, c_links.tqe);
 320                         TAILQ_REMOVE(bucket, c, c_links.tqe);
 321
 322                         KASSERT((c->c_flags & CALLOUT_DID_INIT) &&
 323                                 (c->c_flags & CALLOUT_PENDING) &&
 324                                 CALLOUT_FLAGS_TO_CPU(c->c_flags) ==
 325                                 mycpu->gd_cpuid,
 326                                 ("callout %p: bad flags %08x", c, c->c_flags));
 327
 328                         /*
 329                          * Once CALLOUT_PENDING is cleared only the IPI_MASK
 330                          * prevents the callout from being moved to another
 331                          * cpu.  However, callout_stop() will also check
 332                          * sc->running on the assigned cpu if CALLOUT_EXECUTED
 333                          * is set.  CALLOUT_EXECUTE implies a callback
 334                          * interlock is needed when cross-cpu.
 335                          */
 336                         sc->running = (intptr_t)c;
 337                         c_func = c->c_func;
 338                         c_arg = c->c_arg;
 339                         c_lk = c->c_lk;
 340                         c->c_func = NULL;
 341
 342                         if ((flags & (CALLOUT_AUTOLOCK | CALLOUT_ACTIVE)) ==
 343                             (CALLOUT_AUTOLOCK | CALLOUT_ACTIVE)) {
 344                                 error = lockmgr(c_lk, LK_EXCLUSIVE |
 345                                                       LK_CANCELABLE);
 346                                 if (error == 0) {
 347                                         flags = callout_setclear(c,
 348                                                         CALLOUT_EXECUTED,
 349                                                         CALLOUT_PENDING |
 350                                                         CALLOUT_WAITING);
 351                                         crit_exit();
 352                                         c_func(c_arg);
 353                                         crit_enter();
 354                                         lockmgr(c_lk, LK_RELEASE);
 355                                 } else {
 356                                         flags = callout_setclear(c,
 357                                                         0,
 358                                                         CALLOUT_PENDING);
 359                                 }
 360                         } else if (flags & CALLOUT_ACTIVE) {
 361                                 flags = callout_setclear(c,
 362                                                 CALLOUT_EXECUTED,
 363                                                 CALLOUT_PENDING |
 364                                                 CALLOUT_WAITING);
 365                                 crit_exit();
 366                                 c_func(c_arg);
 367                                 crit_enter();
 368                         } else {
 369                                 flags = callout_setclear(c,
 370                                                 0,
 371                                                 CALLOUT_PENDING |
 372                                                 CALLOUT_WAITING);
 373                         }
 374
 375                         /*
 376                          * Read and clear sc->running.  If bit 0 was set,
 377                          * a callout_stop() is likely blocked waiting for
 378                          * the callback to complete.
 379                          *
 380                          * The sigclear above also cleared CALLOUT_WAITING
 381                          * and returns the contents of flags prior to clearing
 382                          * any bits.
 383                          *
 384                          * Interlock wakeup any _stop's waiting on us.  Note
 385                          * that once c_func() was called, the callout
 386                          * structure (c) pointer may no longer be valid.  It
 387                          * can only be used for the wakeup.
 388                          */
 389                         if ((atomic_readandclear_ptr(&sc->running) & 1) ||
 390                             (flags & CALLOUT_WAITING)) {
 391                                 wakeup(c);
 392                         }
 393                         /* NOTE: list may have changed */
 394                 }
 395                 ++sc->softticks;
 396         }
 397
 398         /*
 399          * Don't leave us holding the MP lock when we deschedule ourselves.
 400          */
 401         if (mpsafe == 0) {
 402                 mpsafe = 1;
 403                 rel_mplock();
 404         }
 405         sc->isrunning = 0;
 406         lwkt_deschedule_self(&sc->thread);      /* == curthread */
 407         lwkt_switch();
 408         goto loop;
 409         /* NOT REACHED */
 410 }
 411
 412 /*
 413  * A very slow system cleanup timer (10 second interval),
 414  * per-cpu.
 415  */
 416 void
 417 slotimer_callback(void *arg)
 418 {
 419         struct callout *c = arg;
 420
 421         slab_cleanup();
 422         callout_reset(c, hz * 10, slotimer_callback, c);
 423 }
 424
 425 /*
 426  * Start or restart a timeout.  Installs the callout structure on the
 427  * callwheel of the current cpu.  Callers may legally pass any value, even
 428  * if 0 or negative, but since the sc->curticks index may have already
 429  * been processed a minimum timeout of 1 tick will be enforced.
 430  *
 431  * This function will block if the callout is currently queued to a different
 432  * cpu or the callback is currently running in another thread.
 433  */
 434 void
 435 callout_reset(struct callout *c, int to_ticks, void (*ftn)(void *), void *arg)
 436 {
 437         softclock_pcpu_t sc;
 438         globaldata_t gd;
 439
 440 #ifdef INVARIANTS
 441         if ((c->c_flags & CALLOUT_DID_INIT) == 0) {
 442                 callout_init(c);
 443                 kprintf(
 444                     "callout_reset(%p) from %p: callout was not initialized\n",
 445                     c, ((int **)&c)[-1]);
 446                 print_backtrace(-1);
 447         }
 448 #endif
 449         gd = mycpu;
 450         sc = softclock_pcpu_ary[gd->gd_cpuid];
 451         crit_enter_gd(gd);
 452
 453         /*
 454          * Our cpu must gain ownership of the callout and cancel anything
 455          * still running, which is complex.  The easiest way to do it is to
 456          * issue a callout_stop_sync().  callout_stop_sync() will also
 457          * handle CALLOUT_EXECUTED (dispatch waiting), and clear it.
 458          *
 459          * WARNING: callout_stop_sync()'s return state can race other
 460          *          callout_*() calls due to blocking, so we must re-check.
 461          */
 462         for (;;) {
 463                 int flags;
 464                 int nflags;
 465
 466                 if (c->c_flags & (CALLOUT_ARMED_MASK | CALLOUT_EXECUTED))
 467                         callout_stop_sync(c);
 468                 flags = c->c_flags & ~(CALLOUT_ARMED_MASK | CALLOUT_EXECUTED);
 469                 nflags = (flags & ~CALLOUT_CPU_MASK) |
 470                          CALLOUT_CPU_TO_FLAGS(gd->gd_cpuid) |
 471                          CALLOUT_PENDING |
 472                          CALLOUT_ACTIVE;
 473                 if (atomic_cmpset_int(&c->c_flags, flags, nflags))
 474                         break;
 475                 cpu_pause();
 476         }
 477
 478         /*
 479          * With the critical section held and PENDING set we now 'own' the
 480          * callout.
 481          */
 482         if (to_ticks <= 0)
 483                 to_ticks = 1;
 484
 485         c->c_arg = arg;
 486         c->c_func = ftn;
 487         c->c_time = sc->curticks + to_ticks;
 488
 489         TAILQ_INSERT_TAIL(&sc->callwheel[c->c_time & cwheelmask],
 490                           c, c_links.tqe);
 491         crit_exit_gd(gd);
 492 }
 493
 494 /*
 495  * Setup a callout to run on the specified cpu.  Should generally be used
 496  * to run a callout on a specific cpu which does not nominally change.  This
 497  * callout_reset() will be issued asynchronously via an IPI.
 498  */
 499 void
 500 callout_reset_bycpu(struct callout *c, int to_ticks, void (*ftn)(void *),
 501                     void *arg, int cpuid)
 502 {
 503         globaldata_t gd;
 504         globaldata_t tgd;
 505
 506 #ifdef INVARIANTS
 507         if ((c->c_flags & CALLOUT_DID_INIT) == 0) {
 508                 callout_init(c);
 509                 kprintf(
 510                     "callout_reset(%p) from %p: callout was not initialized\n",
 511                     c, ((int **)&c)[-1]);
 512                 print_backtrace(-1);
 513         }
 514 #endif
 515         gd = mycpu;
 516         crit_enter_gd(gd);
 517
 518         tgd = globaldata_find(cpuid);
 519
 520         /*
 521          * This code is similar to the code in callout_reset() but we assign
 522          * the callout to the target cpu.  We cannot set PENDING here since
 523          * we cannot atomically add the callout to the target cpu's queue.
 524          * However, incrementing the IPI count has the effect of locking
 525          * the cpu assignment.
 526          *
 527          * WARNING: callout_stop_sync()'s return state can race other
 528          *          callout_*() calls due to blocking, so we must re-check.
 529          */
 530         for (;;) {
 531                 int flags;
 532                 int nflags;
 533
 534                 if (c->c_flags & (CALLOUT_ARMED_MASK | CALLOUT_EXECUTED))
 535                         callout_stop_sync(c);
 536                 flags = c->c_flags & ~(CALLOUT_ARMED_MASK | CALLOUT_EXECUTED);
 537                 nflags = (flags & ~(CALLOUT_CPU_MASK |
 538                                     CALLOUT_EXECUTED)) |
 539                          CALLOUT_CPU_TO_FLAGS(tgd->gd_cpuid) |
 540                          CALLOUT_ACTIVE;
 541                 nflags = nflags + 1;            /* bump IPI count */
 542                 if (atomic_cmpset_int(&c->c_flags, flags, nflags))
 543                         break;
 544                 cpu_pause();
 545         }
 546
 547         /*
 548          * Since we control our +1 in the IPI count, the target cpu cannot
 549          * now change until our IPI is processed.
 550          */
 551         if (to_ticks <= 0)
 552                 to_ticks = 1;
 553
 554         c->c_arg = arg;
 555         c->c_func = ftn;
 556         c->c_load = to_ticks;   /* IPI will add curticks */
 557
 558         lwkt_send_ipiq(tgd, callout_reset_ipi, c);
 559         crit_exit_gd(gd);
 560 }
 561
 562 /*
 563  * Remote IPI for callout_reset_bycpu().  The cpu assignment cannot be
 564  * ripped out from under us due to the count in IPI_MASK, but it is possible
 565  * that other IPIs executed so we must deal with other flags that might
 566  * have been set or cleared.
 567  */
 568 static void
 569 callout_reset_ipi(void *arg)
 570 {
 571         struct callout *c = arg;
 572         globaldata_t gd = mycpu;
 573         softclock_pcpu_t sc;
 574         int flags;
 575         int nflags;
 576
 577         sc = softclock_pcpu_ary[gd->gd_cpuid];
 578
 579         for (;;) {
 580                 flags = c->c_flags;
 581                 cpu_ccfence();
 582                 KKASSERT((flags & CALLOUT_IPI_MASK) > 0 &&
 583                          CALLOUT_FLAGS_TO_CPU(flags) == gd->gd_cpuid);
 584
 585                 nflags = (flags - 1) & ~(CALLOUT_EXECUTED | CALLOUT_WAITING);
 586                 nflags |= CALLOUT_PENDING;
 587
 588                 /*
 589                  * Put us on the queue
 590                  */
 591                 if (atomic_cmpset_int(&c->c_flags, flags, nflags)) {
 592                         if (flags & CALLOUT_PENDING) {
 593                                 if (sc->next == c)
 594                                         sc->next = TAILQ_NEXT(c, c_links.tqe);
 595                                 TAILQ_REMOVE(
 596                                         &sc->callwheel[c->c_time & cwheelmask],
 597                                         c,
 598                                         c_links.tqe);
 599                         }
 600                         c->c_time = sc->curticks + c->c_load;
 601                         TAILQ_INSERT_TAIL(
 602                                 &sc->callwheel[c->c_time & cwheelmask],
 603                                 c, c_links.tqe);
 604                         break;
 605                 }
 606                 /* retry */
 607                 cpu_pause();
 608         }
 609
 610         /*
 611          * Issue wakeup if requested.
 612          */
 613         if (flags & CALLOUT_WAITING)
 614                 wakeup(c);
 615 }
 616
 617 /*
 618  * Stop a running timer and ensure that any running callout completes before
 619  * returning.  If the timer is running on another cpu this function may block
 620  * to interlock against the callout.  If the callout is currently executing
 621  * or blocked in another thread this function may also block to interlock
 622  * against the callout.
 623  *
 624  * The caller must be careful to avoid deadlocks, either by using
 625  * callout_init_lk() (which uses the lockmgr lock cancelation feature),
 626  * by using tokens and dealing with breaks in the serialization, or using
 627  * the lockmgr lock cancelation feature yourself in the callout callback
 628  * function.
 629  *
 630  * callout_stop() returns non-zero if the callout was pending.
 631  */
 632 static int
 633 _callout_stop(struct callout *c, int issync)
 634 {
 635         globaldata_t gd = mycpu;
 636         globaldata_t tgd;
 637         softclock_pcpu_t sc;
 638         int flags;
 639         int nflags;
 640         int rc;
 641         int cpuid;
 642
 643 #ifdef INVARIANTS
 644         if ((c->c_flags & CALLOUT_DID_INIT) == 0) {
 645                 callout_init(c);
 646                 kprintf(
 647                     "callout_stop(%p) from %p: callout was not initialized\n",
 648                     c, ((int **)&c)[-1]);
 649                 print_backtrace(-1);
 650         }
 651 #endif
 652         crit_enter_gd(gd);
 653
 654 retry:
 655         /*
 656          * Adjust flags for the required operation.  If the callout is
 657          * armed on another cpu we break out into the remote-cpu code which
 658          * will issue an IPI.  If it is not armed we are trivially done,
 659          * but may still need to test EXECUTED.
 660          */
 661         for (;;) {
 662                 flags = c->c_flags;
 663                 cpu_ccfence();
 664
 665                 cpuid = CALLOUT_FLAGS_TO_CPU(flags);
 666
 667                 /*
 668                  * Armed on remote cpu (break to remote-cpu code)
 669                  */
 670                 if ((flags & CALLOUT_ARMED_MASK) && gd->gd_cpuid != cpuid) {
 671                         nflags = flags + 1;
 672                         if (atomic_cmpset_int(&c->c_flags, flags, nflags)) {
 673                                 /*
 674                                  * BREAK TO REMOTE-CPU CODE HERE
 675                                  */
 676                                 break;
 677                         }
 678                         cpu_pause();
 679                         continue;
 680                 }
 681
 682                 /*
 683                  * Armed or armable on current cpu
 684                  */
 685                 if (flags & CALLOUT_IPI_MASK) {
 686                         lwkt_process_ipiq();
 687                         cpu_pause();
 688                         continue;       /* retry */
 689                 }
 690
 691                 /*
 692                  * If PENDING is set we can remove the callout from our
 693                  * queue and also use the side effect that the bit causes
 694                  * the callout to be locked to our cpu.
 695                  */
 696                 if (flags & CALLOUT_PENDING) {
 697                         sc = softclock_pcpu_ary[gd->gd_cpuid];
 698                         if (sc->next == c)
 699                                 sc->next = TAILQ_NEXT(c, c_links.tqe);
 700                         TAILQ_REMOVE(
 701                                 &sc->callwheel[c->c_time & cwheelmask],
 702                                 c,
 703                                 c_links.tqe);
 704                         c->c_func = NULL;
 705
 706                         for (;;) {
 707                                 flags = c->c_flags;
 708                                 cpu_ccfence();
 709                                 nflags = flags & ~(CALLOUT_ACTIVE |
 710                                                    CALLOUT_EXECUTED |
 711                                                    CALLOUT_WAITING |
 712                                                    CALLOUT_PENDING);
 713                                 if (atomic_cmpset_int(&c->c_flags,
 714                                                       flags, nflags)) {
 715                                         goto skip_slow;
 716                                 }
 717                                 cpu_pause();
 718                         }
 719                         /* NOT REACHED */
 720                 }
 721
 722                 /*
 723                  * If PENDING was not set the callout might not be locked
 724                  * to this cpu.
 725                  */
 726                 nflags = flags & ~(CALLOUT_ACTIVE |
 727                                    CALLOUT_EXECUTED |
 728                                    CALLOUT_WAITING |
 729                                    CALLOUT_PENDING);
 730                 if (atomic_cmpset_int(&c->c_flags, flags, nflags)) {
 731                         goto skip_slow;
 732                 }
 733                 cpu_pause();
 734                 /* retry */
 735         }
 736
 737         /*
 738          * Remote cpu path.  We incremented the IPI_MASK count so the callout
 739          * is now locked to the remote cpu and we can safely send an IPI
 740          * to it.
 741          *
 742          * Once sent, wait for all IPIs to be processed.  If PENDING remains
 743          * set after all IPIs have processed we raced a callout or
 744          * callout_reset and must retry.  Callers expect the callout to
 745          * be completely stopped upon return, so make sure it is.
 746          */
 747         tgd = globaldata_find(cpuid);
 748         lwkt_send_ipiq3(tgd, callout_stop_ipi, c, issync);
 749
 750         for (;;) {
 751                 flags = c->c_flags;
 752                 cpu_ccfence();
 753
 754                 if ((flags & CALLOUT_IPI_MASK) == 0)
 755                         break;
 756
 757                 nflags = flags | CALLOUT_WAITING;
 758                 tsleep_interlock(c, 0);
 759                 if (atomic_cmpset_int(&c->c_flags, flags, nflags)) {
 760                         tsleep(c, PINTERLOCKED, "cstp1", 0);
 761                 }
 762         }
 763         if (flags & CALLOUT_PENDING)
 764                 goto retry;
 765
 766         /*
 767          * Caller expects callout_stop_sync() to clear EXECUTED and return
 768          * its previous status.
 769          */
 770         atomic_clear_int(&c->c_flags, CALLOUT_EXECUTED);
 771
 772 skip_slow:
 773         if (flags & CALLOUT_WAITING)
 774                 wakeup(c);
 775
 776         /*
 777          * If (issync) we must also wait for any in-progress callbacks to
 778          * complete, unless the stop is being executed from the callback
 779          * itself.  The EXECUTED flag is set prior to the callback
 780          * being made so our existing flags status already has it.
 781          *
 782          * If auto-lock mode is being used, this is where we cancel any
 783          * blocked lock that is potentially preventing the target cpu
 784          * from completing the callback.
 785          */
 786         while (issync) {
 787                 intptr_t *runp;
 788                 intptr_t runco;
 789
 790                 sc = softclock_pcpu_ary[cpuid];
 791                 if (gd->gd_curthread == &sc->thread)    /* stop from cb */
 792                         break;
 793                 runp = &sc->running;
 794                 runco = *runp;
 795                 cpu_ccfence();
 796                 if ((runco & ~(intptr_t)1) != (intptr_t)c)
 797                         break;
 798                 if (c->c_flags & CALLOUT_AUTOLOCK)
 799                         lockmgr(c->c_lk, LK_CANCEL_BEG);
 800                 tsleep_interlock(c, 0);
 801                 if (atomic_cmpset_long(runp, runco, runco | 1))
 802                         tsleep(c, PINTERLOCKED, "cstp3", 0);
 803                 if (c->c_flags & CALLOUT_AUTOLOCK)
 804                         lockmgr(c->c_lk, LK_CANCEL_END);
 805         }
 806
 807         crit_exit_gd(gd);
 808         rc = (flags & CALLOUT_EXECUTED) != 0;
 809
 810         return rc;
 811 }
 812
 813 /*
 814  * IPI for stop function.  The callout is locked to the receiving cpu
 815  * by the IPI_MASK count.
 816  */
 817 static void
 818 callout_stop_ipi(void *arg, int issync, struct intrframe *frame)
 819 {
 820         globaldata_t gd = mycpu;
 821         struct callout *c = arg;
 822         softclock_pcpu_t sc;
 823         int flags;
 824         int nflags;
 825
 826         flags = c->c_flags;
 827         cpu_ccfence();
 828
 829         KKASSERT(CALLOUT_FLAGS_TO_CPU(flags) == gd->gd_cpuid);
 830
 831         /*
 832          * We can handle the PENDING flag immediately.
 833          */
 834         if (flags & CALLOUT_PENDING) {
 835                 sc = softclock_pcpu_ary[gd->gd_cpuid];
 836                 if (sc->next == c)
 837                         sc->next = TAILQ_NEXT(c, c_links.tqe);
 838                 TAILQ_REMOVE(
 839                         &sc->callwheel[c->c_time & cwheelmask],
 840                         c,
 841                         c_links.tqe);
 842                 c->c_func = NULL;
 843         }
 844
 845         /*
 846          * Transition to the stopped state and decrement the IPI count.
 847          * Leave the EXECUTED bit alone (the next callout_reset() will
 848          * have to deal with it).
 849          */
 850         for (;;) {
 851                 flags = c->c_flags;
 852                 cpu_ccfence();
 853                 nflags = (flags - 1) & ~(CALLOUT_ACTIVE |
 854                                          CALLOUT_PENDING |
 855                                          CALLOUT_WAITING);
 856
 857                 if (atomic_cmpset_int(&c->c_flags, flags, nflags))
 858                         break;
 859                 cpu_pause();
 860         }
 861         if (flags & CALLOUT_WAITING)
 862                 wakeup(c);
 863 }
 864
 865 int
 866 callout_stop(struct callout *c)
 867 {
 868         return _callout_stop(c, 0);
 869 }
 870
 871 int
 872 callout_stop_sync(struct callout *c)
 873 {
 874         return _callout_stop(c, 1);
 875 }
 876
 877 void
 878 callout_stop_async(struct callout *c)
 879 {
 880         _callout_stop(c, 0);
 881 }
 882
 883 void
 884 callout_terminate(struct callout *c)
 885 {
 886         _callout_stop(c, 1);
 887         atomic_clear_int(&c->c_flags, CALLOUT_DID_INIT);
 888 }
 889
 890 /*
 891  * Prepare a callout structure for use by callout_reset() and/or
 892  * callout_stop().
 893  *
 894  * The MP version of this routine requires that the callback
 895  * function installed by callout_reset() be MP safe.
 896  *
 897  * The LK version of this routine is also MPsafe and will automatically
 898  * acquire the specified lock for the duration of the function call,
 899  * and release it after the function returns.  In addition, when autolocking
 900  * is used, callout_stop() becomes synchronous if the caller owns the lock.
 901  * callout_reset(), callout_stop(), and callout_stop_sync() will block
 902  * normally instead of spinning when a cpu race occurs.  Lock cancelation
 903  * is used to avoid deadlocks against the callout ring dispatch.
 904  *
 905  * The init functions can be called from any cpu and do not have to be
 906  * called from the cpu that the timer will eventually run on.
 907  */
 908 static __inline void
 909 _callout_init(struct callout *c, int flags)
 910 {
 911         bzero(c, sizeof *c);
 912         c->c_flags = flags;
 913 }
 914
 915 void
 916 callout_init(struct callout *c)
 917 {
 918         _callout_init(c, CALLOUT_DID_INIT);
 919 }
 920
 921 void
 922 callout_init_mp(struct callout *c)
 923 {
 924         _callout_init(c, CALLOUT_DID_INIT | CALLOUT_MPSAFE);
 925 }
 926
 927 void
 928 callout_init_lk(struct callout *c, struct lock *lk)
 929 {
 930         _callout_init(c, CALLOUT_DID_INIT | CALLOUT_MPSAFE | CALLOUT_AUTOLOCK);
 931         c->c_lk = lk;
 932 }