sys/kern/lwkt_ipiq.c

   1 /*
   2  * Copyright (c) 2003 Matthew Dillon <dillon@backplane.com>
   3  * All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  * 1. Redistributions of source code must retain the above copyright
   9  *    notice, this list of conditions and the following disclaimer.
  10  * 2. Redistributions in binary form must reproduce the above copyright
  11  *    notice, this list of conditions and the following disclaimer in the
  12  *    documentation and/or other materials provided with the distribution.
  13  *
  14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  24  * SUCH DAMAGE.
  25  *
  26  * $DragonFly: src/sys/kern/lwkt_ipiq.c,v 1.3 2004/02/17 19:38:49 dillon Exp $
  27  */
  28
  29 /*
  30  * This module implements IPI message queueing and the MI portion of IPI
  31  * message processing.
  32  */
  33
  34 #ifdef _KERNEL
  35
  36 #include <sys/param.h>
  37 #include <sys/systm.h>
  38 #include <sys/kernel.h>
  39 #include <sys/proc.h>
  40 #include <sys/rtprio.h>
  41 #include <sys/queue.h>
  42 #include <sys/thread2.h>
  43 #include <sys/sysctl.h>
  44 #include <sys/kthread.h>
  45 #include <machine/cpu.h>
  46 #include <sys/lock.h>
  47 #include <sys/caps.h>
  48
  49 #include <vm/vm.h>
  50 #include <vm/vm_param.h>
  51 #include <vm/vm_kern.h>
  52 #include <vm/vm_object.h>
  53 #include <vm/vm_page.h>
  54 #include <vm/vm_map.h>
  55 #include <vm/vm_pager.h>
  56 #include <vm/vm_extern.h>
  57 #include <vm/vm_zone.h>
  58
  59 #include <machine/stdarg.h>
  60 #include <machine/ipl.h>
  61 #include <machine/smp.h>
  62 #include <machine/atomic.h>
  63
  64 #define THREAD_STACK    (UPAGES * PAGE_SIZE)
  65
  66 #else
  67
  68 #include <sys/stdint.h>
  69 #include <libcaps/thread.h>
  70 #include <sys/thread.h>
  71 #include <sys/msgport.h>
  72 #include <sys/errno.h>
  73 #include <libcaps/globaldata.h>
  74 #include <sys/thread2.h>
  75 #include <sys/msgport2.h>
  76 #include <stdio.h>
  77 #include <stdlib.h>
  78 #include <string.h>
  79 #include <machine/cpufunc.h>
  80 #include <machine/lock.h>
  81
  82 #endif
  83
  84 #ifdef SMP
  85 static __int64_t ipiq_count;
  86 static __int64_t ipiq_fifofull;
  87 static __int64_t ipiq_cscount;
  88 #endif
  89
  90 #ifdef _KERNEL
  91
  92 #ifdef SMP
  93 SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_count, CTLFLAG_RW, &ipiq_count, 0, "");
  94 SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_fifofull, CTLFLAG_RW, &ipiq_fifofull, 0, "");
  95 SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_cscount, CTLFLAG_RW, &ipiq_cscount, 0, "");
  96 #endif
  97
  98 #endif
  99
 100 #ifdef SMP
 101
 102 static int lwkt_process_ipiq1(lwkt_ipiq_t ip, struct intrframe *frame);
 103 static void lwkt_cpusync_remote1(lwkt_cpusync_t poll);
 104 static void lwkt_cpusync_remote2(lwkt_cpusync_t poll);
 105
 106 /*
 107  * Send a function execution request to another cpu.  The request is queued
 108  * on the cpu<->cpu ipiq matrix.  Each cpu owns a unique ipiq FIFO for every
 109  * possible target cpu.  The FIFO can be written.
 110  *
 111  * YYY If the FIFO fills up we have to enable interrupts and process the
 112  * IPIQ while waiting for it to empty or we may deadlock with another cpu.
 113  * Create a CPU_*() function to do this!
 114  *
 115  * We can safely bump gd_intr_nesting_level because our crit_exit() at the
 116  * end will take care of any pending interrupts.
 117  *
 118  * Must be called from a critical section.
 119  */
 120 int
 121 lwkt_send_ipiq(globaldata_t target, ipifunc_t func, void *arg)
 122 {
 123     lwkt_ipiq_t ip;
 124     int windex;
 125     struct globaldata *gd = mycpu;
 126
 127     if (target == gd) {
 128         func(arg);
 129         return(0);
 130     }
 131     crit_enter();
 132     ++gd->gd_intr_nesting_level;
 133 #ifdef INVARIANTS
 134     if (gd->gd_intr_nesting_level > 20)
 135         panic("lwkt_send_ipiq: TOO HEAVILY NESTED!");
 136 #endif
 137     KKASSERT(curthread->td_pri >= TDPRI_CRIT);
 138     ++ipiq_count;
 139     ip = &gd->gd_ipiq[target->gd_cpuid];
 140
 141     /*
 142      * We always drain before the FIFO becomes full so it should never
 143      * become full.  We need to leave enough entries to deal with
 144      * reentrancy.
 145      */
 146     KKASSERT(ip->ip_windex - ip->ip_rindex != MAXCPUFIFO);
 147     windex = ip->ip_windex & MAXCPUFIFO_MASK;
 148     ip->ip_func[windex] = (ipifunc2_t)func;
 149     ip->ip_arg[windex] = arg;
 150     /* YYY memory barrier */
 151     ++ip->ip_windex;
 152     if (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 2) {
 153         unsigned int eflags = read_eflags();
 154         cpu_enable_intr();
 155         ++ipiq_fifofull;
 156         while (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 4) {
 157             KKASSERT(ip->ip_windex - ip->ip_rindex != MAXCPUFIFO - 1);
 158             lwkt_process_ipiq();
 159         }
 160         write_eflags(eflags);
 161     }
 162     --gd->gd_intr_nesting_level;
 163     cpu_send_ipiq(target->gd_cpuid);    /* issues mem barrier if appropriate */
 164     crit_exit();
 165     return(ip->ip_windex);
 166 }
 167
 168 /*
 169  * deprecated, used only by fast int forwarding.
 170  */
 171 int
 172 lwkt_send_ipiq_bycpu(int dcpu, ipifunc_t func, void *arg)
 173 {
 174     return(lwkt_send_ipiq(globaldata_find(dcpu), func, arg));
 175 }
 176
 177 /*
 178  * Send a message to several target cpus.  Typically used for scheduling.
 179  * The message will not be sent to stopped cpus.
 180  */
 181 int
 182 lwkt_send_ipiq_mask(u_int32_t mask, ipifunc_t func, void *arg)
 183 {
 184     int cpuid;
 185     int count = 0;
 186
 187     mask &= ~stopped_cpus;
 188     while (mask) {
 189         cpuid = bsfl(mask);
 190         lwkt_send_ipiq(globaldata_find(cpuid), func, arg);
 191         mask &= ~(1 << cpuid);
 192         ++count;
 193     }
 194     return(count);
 195 }
 196
 197 /*
 198  * Wait for the remote cpu to finish processing a function.
 199  *
 200  * YYY we have to enable interrupts and process the IPIQ while waiting
 201  * for it to empty or we may deadlock with another cpu.  Create a CPU_*()
 202  * function to do this!  YYY we really should 'block' here.
 203  *
 204  * MUST be called from a critical section.  This routine may be called
 205  * from an interrupt (for example, if an interrupt wakes a foreign thread
 206  * up).
 207  */
 208 void
 209 lwkt_wait_ipiq(globaldata_t target, int seq)
 210 {
 211     lwkt_ipiq_t ip;
 212     int maxc = 100000000;
 213
 214     if (target != mycpu) {
 215         ip = &mycpu->gd_ipiq[target->gd_cpuid];
 216         if ((int)(ip->ip_xindex - seq) < 0) {
 217             unsigned int eflags = read_eflags();
 218             cpu_enable_intr();
 219             while ((int)(ip->ip_xindex - seq) < 0) {
 220                 lwkt_process_ipiq();
 221                 if (--maxc == 0)
 222                         printf("LWKT_WAIT_IPIQ WARNING! %d wait %d (%d)\n", mycpu->gd_cpuid, target->gd_cpuid, ip->ip_xindex - seq);
 223                 if (maxc < -1000000)
 224                         panic("LWKT_WAIT_IPIQ");
 225             }
 226             write_eflags(eflags);
 227         }
 228     }
 229 }
 230
 231 /*
 232  * Called from IPI interrupt (like a fast interrupt), which has placed
 233  * us in a critical section.  The MP lock may or may not be held.
 234  * May also be called from doreti or splz, or be reentrantly called
 235  * indirectly through the ip_func[] we run.
 236  *
 237  * There are two versions, one where no interrupt frame is available (when
 238  * called from the send code and from splz, and one where an interrupt
 239  * frame is available.
 240  */
 241 void
 242 lwkt_process_ipiq(void)
 243 {
 244     globaldata_t gd = mycpu;
 245     lwkt_ipiq_t ip;
 246     int n;
 247
 248 again:
 249     for (n = 0; n < ncpus; ++n) {
 250         if (n != gd->gd_cpuid) {
 251             ip = globaldata_find(n)->gd_ipiq;
 252             if (ip != NULL) {
 253                 while (lwkt_process_ipiq1(&ip[gd->gd_cpuid], NULL))
 254                     ;
 255             }
 256         }
 257     }
 258     if (gd->gd_cpusyncq.ip_rindex != gd->gd_cpusyncq.ip_windex) {
 259         if (lwkt_process_ipiq1(&gd->gd_cpusyncq, NULL)) {
 260             if (gd->gd_curthread->td_cscount == 0)
 261                 goto again;
 262             need_ipiq();
 263         }
 264     }
 265 }
 266
 267 #ifdef _KERNEL
 268 void
 269 lwkt_process_ipiq_frame(struct intrframe frame)
 270 {
 271     globaldata_t gd = mycpu;
 272     lwkt_ipiq_t ip;
 273     int n;
 274
 275 again:
 276     for (n = 0; n < ncpus; ++n) {
 277         if (n != gd->gd_cpuid) {
 278             ip = globaldata_find(n)->gd_ipiq;
 279             if (ip != NULL) {
 280                 while (lwkt_process_ipiq1(&ip[gd->gd_cpuid], &frame))
 281                     ;
 282             }
 283         }
 284     }
 285     if (gd->gd_cpusyncq.ip_rindex != gd->gd_cpusyncq.ip_windex) {
 286         if (lwkt_process_ipiq1(&gd->gd_cpusyncq, &frame)) {
 287             if (gd->gd_curthread->td_cscount == 0)
 288                 goto again;
 289             need_ipiq();
 290         }
 291     }
 292 }
 293 #endif
 294
 295 static int
 296 lwkt_process_ipiq1(lwkt_ipiq_t ip, struct intrframe *frame)
 297 {
 298     int ri;
 299     int wi = ip->ip_windex;
 300     /*
 301      * Note: xindex is only updated after we are sure the function has
 302      * finished execution.  Beware lwkt_process_ipiq() reentrancy!  The
 303      * function may send an IPI which may block/drain.
 304      */
 305     while ((ri = ip->ip_rindex) != wi) {
 306         ip->ip_rindex = ri + 1;
 307         ri &= MAXCPUFIFO_MASK;
 308         ip->ip_func[ri](ip->ip_arg[ri], frame);
 309         /* YYY memory barrier */
 310         ip->ip_xindex = ip->ip_rindex;
 311     }
 312     return(wi != ip->ip_windex);
 313 }
 314
 315 #else
 316
 317 /*
 318  * !SMP dummy routines
 319  */
 320
 321 int
 322 lwkt_send_ipiq(globaldata_t target, ipifunc_t func, void *arg)
 323 {
 324     panic("lwkt_send_ipiq: UP box! (%d,%p,%p)", target->gd_cpuid, func, arg);
 325     return(0); /* NOT REACHED */
 326 }
 327
 328 void
 329 lwkt_wait_ipiq(globaldata_t target, int seq)
 330 {
 331     panic("lwkt_wait_ipiq: UP box! (%d,%d)", target->gd_cpuid, seq);
 332 }
 333
 334 #endif
 335
 336 /*
 337  * CPU Synchronization Support
 338  *
 339  * lwkt_cpusync_simple()
 340  *
 341  *      The function is executed synchronously before return on remote cpus.
 342  *      A lwkt_cpusync_t pointer is passed as an argument.  The data can
 343  *      be accessed via arg->cs_data.
 344  *
 345  *      XXX should I just pass the data as an argument to be consistent?
 346  */
 347
 348 void
 349 lwkt_cpusync_simple(cpumask_t mask, cpusync_func_t func, void *data)
 350 {
 351     struct lwkt_cpusync cmd;
 352
 353     cmd.cs_run_func = NULL;
 354     cmd.cs_fin1_func = func;
 355     cmd.cs_fin2_func = NULL;
 356     cmd.cs_data = data;
 357     lwkt_cpusync_start(mask & mycpu->gd_other_cpus, &cmd);
 358     if (mask & (1 << mycpu->gd_cpuid))
 359         func(&cmd);
 360     lwkt_cpusync_finish(&cmd);
 361 }
 362
 363 /*
 364  * lwkt_cpusync_fastdata()
 365  *
 366  *      The function is executed in tandem with return on remote cpus.
 367  *      The data is directly passed as an argument.  Do not pass pointers to
 368  *      temporary storage as the storage might have
 369  *      gone poof by the time the target cpu executes
 370  *      the function.
 371  *
 372  *      At the moment lwkt_cpusync is declared on the stack and we must wait
 373  *      for all remote cpus to ack in lwkt_cpusync_finish(), but as a future
 374  *      optimization we should be able to put a counter in the globaldata
 375  *      structure (if it is not otherwise being used) and just poke it and
 376  *      return without waiting. XXX
 377  */
 378 void
 379 lwkt_cpusync_fastdata(cpumask_t mask, cpusync_func2_t func, void *data)
 380 {
 381     struct lwkt_cpusync cmd;
 382
 383     cmd.cs_run_func = NULL;
 384     cmd.cs_fin1_func = NULL;
 385     cmd.cs_fin2_func = func;
 386     cmd.cs_data = NULL;
 387     lwkt_cpusync_start(mask & mycpu->gd_other_cpus, &cmd);
 388     if (mask & (1 << mycpu->gd_cpuid))
 389         func(data);
 390     lwkt_cpusync_finish(&cmd);
 391 }
 392
 393 /*
 394  * lwkt_cpusync_start()
 395  *
 396  *      Start synchronization with a set of target cpus, return once they are
 397  *      known to be in a synchronization loop.  The target cpus will execute
 398  *      poll->cs_run_func() IN TANDEM WITH THE RETURN.
 399  *
 400  *      XXX future: add lwkt_cpusync_start_quick() and require a call to
 401  *      lwkt_cpusync_add() or lwkt_cpusync_wait(), allowing the caller to
 402  *      potentially absorb the IPI latency doing something useful.
 403  */
 404 void
 405 lwkt_cpusync_start(cpumask_t mask, lwkt_cpusync_t poll)
 406 {
 407     globaldata_t gd = mycpu;
 408
 409     poll->cs_count = 0;
 410     poll->cs_mask = mask;
 411 #ifdef SMP
 412     poll->cs_maxcount = lwkt_send_ipiq_mask(
 413                 mask & gd->gd_other_cpus & smp_active_mask,
 414                 (ipifunc_t)lwkt_cpusync_remote1, poll);
 415 #endif
 416     if (mask & (1 << gd->gd_cpuid)) {
 417         if (poll->cs_run_func)
 418             poll->cs_run_func(poll);
 419     }
 420 #ifdef SMP
 421     if (poll->cs_maxcount) {
 422         ++ipiq_cscount;
 423         ++gd->gd_curthread->td_cscount;
 424         while (poll->cs_count != poll->cs_maxcount) {
 425             crit_enter();
 426             lwkt_process_ipiq();
 427             crit_exit();
 428         }
 429     }
 430 #endif
 431 }
 432
 433 void
 434 lwkt_cpusync_add(cpumask_t mask, lwkt_cpusync_t poll)
 435 {
 436     globaldata_t gd = mycpu;
 437     int count;
 438
 439     mask &= ~poll->cs_mask;
 440     poll->cs_mask |= mask;
 441 #ifdef SMP
 442     count = lwkt_send_ipiq_mask(
 443                 mask & gd->gd_other_cpus & smp_active_mask,
 444                 (ipifunc_t)lwkt_cpusync_remote1, poll);
 445 #endif
 446     if (mask & (1 << gd->gd_cpuid)) {
 447         if (poll->cs_run_func)
 448             poll->cs_run_func(poll);
 449     }
 450 #ifdef SMP
 451     poll->cs_maxcount += count;
 452     if (poll->cs_maxcount) {
 453         if (poll->cs_maxcount == count)
 454             ++gd->gd_curthread->td_cscount;
 455         while (poll->cs_count != poll->cs_maxcount) {
 456             crit_enter();
 457             lwkt_process_ipiq();
 458             crit_exit();
 459         }
 460     }
 461 #endif
 462 }
 463
 464 /*
 465  * Finish synchronization with a set of target cpus.  The target cpus will
 466  * execute cs_fin1_func(poll) prior to this function returning, and will
 467  * execute cs_fin2_func(data) IN TANDEM WITH THIS FUNCTION'S RETURN.
 468  *
 469  * If cs_maxcount is non-zero then we are mastering a cpusync with one or
 470  * more remote cpus and must account for it in our thread structure.
 471  */
 472 void
 473 lwkt_cpusync_finish(lwkt_cpusync_t poll)
 474 {
 475     globaldata_t gd = mycpu;
 476
 477     poll->cs_count = -1;
 478     if (poll->cs_mask & (1 << gd->gd_cpuid)) {
 479         if (poll->cs_fin1_func)
 480             poll->cs_fin1_func(poll);
 481         if (poll->cs_fin2_func)
 482             poll->cs_fin2_func(poll->cs_data);
 483     }
 484 #ifdef SMP
 485     if (poll->cs_maxcount) {
 486         while (poll->cs_count != -(poll->cs_maxcount + 1)) {
 487             crit_enter();
 488             lwkt_process_ipiq();
 489             crit_exit();
 490         }
 491         --gd->gd_curthread->td_cscount;
 492     }
 493 #endif
 494 }
 495
 496 #ifdef SMP
 497
 498 /*
 499  * helper IPI remote messaging function.
 500  *
 501  * Called on remote cpu when a new cpu synchronization request has been
 502  * sent to us.  Execute the run function and adjust cs_count, then requeue
 503  * the request so we spin on it.
 504  */
 505 static void
 506 lwkt_cpusync_remote1(lwkt_cpusync_t poll)
 507 {
 508     atomic_add_int(&poll->cs_count, 1);
 509     if (poll->cs_run_func)
 510         poll->cs_run_func(poll);
 511     lwkt_cpusync_remote2(poll);
 512 }
 513
 514 /*
 515  * helper IPI remote messaging function.
 516  *
 517  * Poll for the originator telling us to finish.  If it hasn't, requeue
 518  * our request so we spin on it.  When the originator requests that we
 519  * finish we execute cs_fin1_func(poll) synchronously and cs_fin2_func(data)
 520  * in tandem with the release.
 521  */
 522 static void
 523 lwkt_cpusync_remote2(lwkt_cpusync_t poll)
 524 {
 525     if (poll->cs_count < 0) {
 526         cpusync_func2_t savef;
 527         void *saved;
 528
 529         if (poll->cs_fin1_func)
 530             poll->cs_fin1_func(poll);
 531         if (poll->cs_fin2_func) {
 532             savef = poll->cs_fin2_func;
 533             saved = poll->cs_data;
 534             atomic_add_int(&poll->cs_count, -1);
 535             savef(saved);
 536         } else {
 537             atomic_add_int(&poll->cs_count, -1);
 538         }
 539     } else {
 540         globaldata_t gd = mycpu;
 541         lwkt_ipiq_t ip;
 542         int wi;
 543
 544         ip = &gd->gd_cpusyncq;
 545         wi = ip->ip_windex & MAXCPUFIFO_MASK;
 546         ip->ip_func[wi] = (ipifunc2_t)lwkt_cpusync_remote2;
 547         ip->ip_arg[wi] = poll;
 548         ++ip->ip_windex;
 549     }
 550 }
 551
 552 #endif