x86_64: Put the x86_64 specific "seg-fault..." message under bootverbose.
[dragonfly.git] / sys / kern / lwkt_ipiq.c
... / ...
CommitLineData
1/*
2 * Copyright (c) 2003,2004 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 * $DragonFly: src/sys/kern/lwkt_ipiq.c,v 1.27 2008/05/18 20:57:56 nth Exp $
35 */
36
37/*
38 * This module implements IPI message queueing and the MI portion of IPI
39 * message processing.
40 */
41
42#include "opt_ddb.h"
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/kernel.h>
47#include <sys/proc.h>
48#include <sys/rtprio.h>
49#include <sys/queue.h>
50#include <sys/thread2.h>
51#include <sys/sysctl.h>
52#include <sys/ktr.h>
53#include <sys/kthread.h>
54#include <machine/cpu.h>
55#include <sys/lock.h>
56#include <sys/caps.h>
57
58#include <vm/vm.h>
59#include <vm/vm_param.h>
60#include <vm/vm_kern.h>
61#include <vm/vm_object.h>
62#include <vm/vm_page.h>
63#include <vm/vm_map.h>
64#include <vm/vm_pager.h>
65#include <vm/vm_extern.h>
66#include <vm/vm_zone.h>
67
68#include <machine/stdarg.h>
69#include <machine/smp.h>
70#include <machine/atomic.h>
71
72#ifdef SMP
73static __int64_t ipiq_count; /* total calls to lwkt_send_ipiq*() */
74static __int64_t ipiq_fifofull; /* number of fifo full conditions detected */
75static __int64_t ipiq_avoided; /* interlock with target avoids cpu ipi */
76static __int64_t ipiq_passive; /* passive IPI messages */
77static __int64_t ipiq_cscount; /* number of cpu synchronizations */
78static int ipiq_optimized = 1; /* XXX temporary sysctl */
79#ifdef PANIC_DEBUG
80static int panic_ipiq_cpu = -1;
81static int panic_ipiq_count = 100;
82#endif
83#endif
84
85#ifdef SMP
86SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_count, CTLFLAG_RW, &ipiq_count, 0,
87 "Number of IPI's sent");
88SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_fifofull, CTLFLAG_RW, &ipiq_fifofull, 0,
89 "Number of fifo full conditions detected");
90SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_avoided, CTLFLAG_RW, &ipiq_avoided, 0,
91 "Number of IPI's avoided by interlock with target cpu");
92SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_passive, CTLFLAG_RW, &ipiq_passive, 0,
93 "Number of passive IPI messages sent");
94SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_cscount, CTLFLAG_RW, &ipiq_cscount, 0,
95 "Number of cpu synchronizations");
96SYSCTL_INT(_lwkt, OID_AUTO, ipiq_optimized, CTLFLAG_RW, &ipiq_optimized, 0,
97 "");
98#ifdef PANIC_DEBUG
99SYSCTL_INT(_lwkt, OID_AUTO, panic_ipiq_cpu, CTLFLAG_RW, &panic_ipiq_cpu, 0, "");
100SYSCTL_INT(_lwkt, OID_AUTO, panic_ipiq_count, CTLFLAG_RW, &panic_ipiq_count, 0, "");
101#endif
102
103#define IPIQ_STRING "func=%p arg1=%p arg2=%d scpu=%d dcpu=%d"
104#define IPIQ_ARG_SIZE (sizeof(void *) * 2 + sizeof(int) * 3)
105
106#if !defined(KTR_IPIQ)
107#define KTR_IPIQ KTR_ALL
108#endif
109KTR_INFO_MASTER(ipiq);
110KTR_INFO(KTR_IPIQ, ipiq, send_norm, 0, IPIQ_STRING, IPIQ_ARG_SIZE);
111KTR_INFO(KTR_IPIQ, ipiq, send_pasv, 1, IPIQ_STRING, IPIQ_ARG_SIZE);
112KTR_INFO(KTR_IPIQ, ipiq, send_nbio, 2, IPIQ_STRING, IPIQ_ARG_SIZE);
113KTR_INFO(KTR_IPIQ, ipiq, send_fail, 3, IPIQ_STRING, IPIQ_ARG_SIZE);
114KTR_INFO(KTR_IPIQ, ipiq, receive, 4, IPIQ_STRING, IPIQ_ARG_SIZE);
115KTR_INFO(KTR_IPIQ, ipiq, sync_start, 5, "cpumask=%08x", sizeof(cpumask_t));
116KTR_INFO(KTR_IPIQ, ipiq, sync_add, 6, "cpumask=%08x", sizeof(cpumask_t));
117KTR_INFO(KTR_IPIQ, ipiq, cpu_send, 7, IPIQ_STRING, IPIQ_ARG_SIZE);
118KTR_INFO(KTR_IPIQ, ipiq, send_end, 8, IPIQ_STRING, IPIQ_ARG_SIZE);
119
120#define logipiq(name, func, arg1, arg2, sgd, dgd) \
121 KTR_LOG(ipiq_ ## name, func, arg1, arg2, sgd->gd_cpuid, dgd->gd_cpuid)
122#define logipiq2(name, arg) \
123 KTR_LOG(ipiq_ ## name, arg)
124
125#endif /* SMP */
126
127#ifdef SMP
128
129static int lwkt_process_ipiq_core(globaldata_t sgd, lwkt_ipiq_t ip,
130 struct intrframe *frame);
131static void lwkt_cpusync_remote1(lwkt_cpusync_t poll);
132static void lwkt_cpusync_remote2(lwkt_cpusync_t poll);
133
134/*
135 * Send a function execution request to another cpu. The request is queued
136 * on the cpu<->cpu ipiq matrix. Each cpu owns a unique ipiq FIFO for every
137 * possible target cpu. The FIFO can be written.
138 *
139 * If the FIFO fills up we have to enable interrupts to avoid an APIC
140 * deadlock and process pending IPIQs while waiting for it to empty.
141 * Otherwise we may soft-deadlock with another cpu whos FIFO is also full.
142 *
143 * We can safely bump gd_intr_nesting_level because our crit_exit() at the
144 * end will take care of any pending interrupts.
145 *
146 * The actual hardware IPI is avoided if the target cpu is already processing
147 * the queue from a prior IPI. It is possible to pipeline IPI messages
148 * very quickly between cpus due to the FIFO hysteresis.
149 *
150 * Need not be called from a critical section.
151 */
152int
153lwkt_send_ipiq3(globaldata_t target, ipifunc3_t func, void *arg1, int arg2)
154{
155 lwkt_ipiq_t ip;
156 int windex;
157 struct globaldata *gd = mycpu;
158
159 logipiq(send_norm, func, arg1, arg2, gd, target);
160
161 if (target == gd) {
162 func(arg1, arg2, NULL);
163 logipiq(send_end, func, arg1, arg2, gd, target);
164 return(0);
165 }
166 crit_enter();
167 ++gd->gd_intr_nesting_level;
168#ifdef INVARIANTS
169 if (gd->gd_intr_nesting_level > 20)
170 panic("lwkt_send_ipiq: TOO HEAVILY NESTED!");
171#endif
172 KKASSERT(curthread->td_critcount);
173 ++ipiq_count;
174 ip = &gd->gd_ipiq[target->gd_cpuid];
175
176 /*
177 * Do not allow the FIFO to become full. Interrupts must be physically
178 * enabled while we liveloop to avoid deadlocking the APIC.
179 */
180 if (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 2) {
181#if defined(__i386__)
182 unsigned int eflags = read_eflags();
183#elif defined(__x86_64__)
184 unsigned long rflags = read_rflags();
185#endif
186
187 if (atomic_poll_acquire_int(&ip->ip_npoll) || ipiq_optimized == 0) {
188 logipiq(cpu_send, func, arg1, arg2, gd, target);
189 cpu_send_ipiq(target->gd_cpuid);
190 }
191 cpu_enable_intr();
192 ++ipiq_fifofull;
193 while (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 4) {
194 KKASSERT(ip->ip_windex - ip->ip_rindex != MAXCPUFIFO - 1);
195 lwkt_process_ipiq();
196 }
197#if defined(__i386__)
198 write_eflags(eflags);
199#elif defined(__x86_64__)
200 write_rflags(rflags);
201#endif
202 }
203
204 /*
205 * Queue the new message
206 */
207 windex = ip->ip_windex & MAXCPUFIFO_MASK;
208 ip->ip_func[windex] = func;
209 ip->ip_arg1[windex] = arg1;
210 ip->ip_arg2[windex] = arg2;
211 cpu_sfence();
212 ++ip->ip_windex;
213 --gd->gd_intr_nesting_level;
214
215 /*
216 * signal the target cpu that there is work pending.
217 */
218 if (atomic_poll_acquire_int(&ip->ip_npoll)) {
219 logipiq(cpu_send, func, arg1, arg2, gd, target);
220 cpu_send_ipiq(target->gd_cpuid);
221 } else {
222 if (ipiq_optimized == 0) {
223 logipiq(cpu_send, func, arg1, arg2, gd, target);
224 cpu_send_ipiq(target->gd_cpuid);
225 } else {
226 ++ipiq_avoided;
227 }
228 }
229 crit_exit();
230
231 logipiq(send_end, func, arg1, arg2, gd, target);
232 return(ip->ip_windex);
233}
234
235/*
236 * Similar to lwkt_send_ipiq() but this function does not actually initiate
237 * the IPI to the target cpu unless the FIFO has become too full, so it is
238 * very fast.
239 *
240 * This function is used for non-critical IPI messages, such as memory
241 * deallocations. The queue will typically be flushed by the target cpu at
242 * the next clock interrupt.
243 *
244 * Need not be called from a critical section.
245 */
246int
247lwkt_send_ipiq3_passive(globaldata_t target, ipifunc3_t func,
248 void *arg1, int arg2)
249{
250 lwkt_ipiq_t ip;
251 int windex;
252 struct globaldata *gd = mycpu;
253
254 KKASSERT(target != gd);
255 crit_enter();
256 logipiq(send_pasv, func, arg1, arg2, gd, target);
257 ++gd->gd_intr_nesting_level;
258#ifdef INVARIANTS
259 if (gd->gd_intr_nesting_level > 20)
260 panic("lwkt_send_ipiq: TOO HEAVILY NESTED!");
261#endif
262 KKASSERT(curthread->td_critcount);
263 ++ipiq_count;
264 ++ipiq_passive;
265 ip = &gd->gd_ipiq[target->gd_cpuid];
266
267 /*
268 * Do not allow the FIFO to become full. Interrupts must be physically
269 * enabled while we liveloop to avoid deadlocking the APIC.
270 */
271 if (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 2) {
272#if defined(__i386__)
273 unsigned int eflags = read_eflags();
274#elif defined(__x86_64__)
275 unsigned long rflags = read_rflags();
276#endif
277
278 if (atomic_poll_acquire_int(&ip->ip_npoll) || ipiq_optimized == 0) {
279 logipiq(cpu_send, func, arg1, arg2, gd, target);
280 cpu_send_ipiq(target->gd_cpuid);
281 }
282 cpu_enable_intr();
283 ++ipiq_fifofull;
284 while (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 4) {
285 KKASSERT(ip->ip_windex - ip->ip_rindex != MAXCPUFIFO - 1);
286 lwkt_process_ipiq();
287 }
288#if defined(__i386__)
289 write_eflags(eflags);
290#elif defined(__x86_64__)
291 write_rflags(rflags);
292#endif
293 }
294
295 /*
296 * Queue the new message
297 */
298 windex = ip->ip_windex & MAXCPUFIFO_MASK;
299 ip->ip_func[windex] = func;
300 ip->ip_arg1[windex] = arg1;
301 ip->ip_arg2[windex] = arg2;
302 cpu_sfence();
303 ++ip->ip_windex;
304 --gd->gd_intr_nesting_level;
305
306 /*
307 * Do not signal the target cpu, it will pick up the IPI when it next
308 * polls (typically on the next tick).
309 */
310 crit_exit();
311
312 logipiq(send_end, func, arg1, arg2, gd, target);
313 return(ip->ip_windex);
314}
315
316/*
317 * Send an IPI request without blocking, return 0 on success, ENOENT on
318 * failure. The actual queueing of the hardware IPI may still force us
319 * to spin and process incoming IPIs but that will eventually go away
320 * when we've gotten rid of the other general IPIs.
321 */
322int
323lwkt_send_ipiq3_nowait(globaldata_t target, ipifunc3_t func,
324 void *arg1, int arg2)
325{
326 lwkt_ipiq_t ip;
327 int windex;
328 struct globaldata *gd = mycpu;
329
330 logipiq(send_nbio, func, arg1, arg2, gd, target);
331 KKASSERT(curthread->td_critcount);
332 if (target == gd) {
333 func(arg1, arg2, NULL);
334 logipiq(send_end, func, arg1, arg2, gd, target);
335 return(0);
336 }
337 ++ipiq_count;
338 ip = &gd->gd_ipiq[target->gd_cpuid];
339
340 if (ip->ip_windex - ip->ip_rindex >= MAXCPUFIFO * 2 / 3) {
341 logipiq(send_fail, func, arg1, arg2, gd, target);
342 return(ENOENT);
343 }
344 windex = ip->ip_windex & MAXCPUFIFO_MASK;
345 ip->ip_func[windex] = func;
346 ip->ip_arg1[windex] = arg1;
347 ip->ip_arg2[windex] = arg2;
348 cpu_sfence();
349 ++ip->ip_windex;
350
351 /*
352 * This isn't a passive IPI, we still have to signal the target cpu.
353 */
354 if (atomic_poll_acquire_int(&ip->ip_npoll)) {
355 logipiq(cpu_send, func, arg1, arg2, gd, target);
356 cpu_send_ipiq(target->gd_cpuid);
357 } else {
358 if (ipiq_optimized == 0) {
359 logipiq(cpu_send, func, arg1, arg2, gd, target);
360 cpu_send_ipiq(target->gd_cpuid);
361 } else {
362 ++ipiq_avoided;
363 }
364 }
365
366 logipiq(send_end, func, arg1, arg2, gd, target);
367 return(0);
368}
369
370/*
371 * deprecated, used only by fast int forwarding.
372 */
373int
374lwkt_send_ipiq3_bycpu(int dcpu, ipifunc3_t func, void *arg1, int arg2)
375{
376 return(lwkt_send_ipiq3(globaldata_find(dcpu), func, arg1, arg2));
377}
378
379/*
380 * Send a message to several target cpus. Typically used for scheduling.
381 * The message will not be sent to stopped cpus.
382 */
383int
384lwkt_send_ipiq3_mask(cpumask_t mask, ipifunc3_t func, void *arg1, int arg2)
385{
386 int cpuid;
387 int count = 0;
388
389 mask &= ~stopped_cpus;
390 while (mask) {
391 cpuid = BSFCPUMASK(mask);
392 lwkt_send_ipiq3(globaldata_find(cpuid), func, arg1, arg2);
393 mask &= ~CPUMASK(cpuid);
394 ++count;
395 }
396 return(count);
397}
398
399/*
400 * Wait for the remote cpu to finish processing a function.
401 *
402 * YYY we have to enable interrupts and process the IPIQ while waiting
403 * for it to empty or we may deadlock with another cpu. Create a CPU_*()
404 * function to do this! YYY we really should 'block' here.
405 *
406 * MUST be called from a critical section. This routine may be called
407 * from an interrupt (for example, if an interrupt wakes a foreign thread
408 * up).
409 */
410void
411lwkt_wait_ipiq(globaldata_t target, int seq)
412{
413 lwkt_ipiq_t ip;
414 int maxc = 100000000;
415
416 if (target != mycpu) {
417 ip = &mycpu->gd_ipiq[target->gd_cpuid];
418 if ((int)(ip->ip_xindex - seq) < 0) {
419#if defined(__i386__)
420 unsigned int eflags = read_eflags();
421#elif defined(__x86_64__)
422 unsigned long rflags = read_rflags();
423#endif
424 cpu_enable_intr();
425 while ((int)(ip->ip_xindex - seq) < 0) {
426 crit_enter();
427 lwkt_process_ipiq();
428 crit_exit();
429 if (--maxc == 0)
430 kprintf("LWKT_WAIT_IPIQ WARNING! %d wait %d (%d)\n", mycpu->gd_cpuid, target->gd_cpuid, ip->ip_xindex - seq);
431 if (maxc < -1000000)
432 panic("LWKT_WAIT_IPIQ");
433 /*
434 * xindex may be modified by another cpu, use a load fence
435 * to ensure that the loop does not use a speculative value
436 * (which may improve performance).
437 */
438 cpu_lfence();
439 }
440#if defined(__i386__)
441 write_eflags(eflags);
442#elif defined(__x86_64__)
443 write_rflags(rflags);
444#endif
445 }
446 }
447}
448
449int
450lwkt_seq_ipiq(globaldata_t target)
451{
452 lwkt_ipiq_t ip;
453
454 ip = &mycpu->gd_ipiq[target->gd_cpuid];
455 return(ip->ip_windex);
456}
457
458/*
459 * Called from IPI interrupt (like a fast interrupt), which has placed
460 * us in a critical section. The MP lock may or may not be held.
461 * May also be called from doreti or splz, or be reentrantly called
462 * indirectly through the ip_func[] we run.
463 *
464 * There are two versions, one where no interrupt frame is available (when
465 * called from the send code and from splz, and one where an interrupt
466 * frame is available.
467 */
468void
469lwkt_process_ipiq(void)
470{
471 globaldata_t gd = mycpu;
472 globaldata_t sgd;
473 lwkt_ipiq_t ip;
474 int n;
475
476again:
477 for (n = 0; n < ncpus; ++n) {
478 if (n != gd->gd_cpuid) {
479 sgd = globaldata_find(n);
480 ip = sgd->gd_ipiq;
481 if (ip != NULL) {
482 while (lwkt_process_ipiq_core(sgd, &ip[gd->gd_cpuid], NULL))
483 ;
484 }
485 }
486 }
487 if (gd->gd_cpusyncq.ip_rindex != gd->gd_cpusyncq.ip_windex) {
488 if (lwkt_process_ipiq_core(gd, &gd->gd_cpusyncq, NULL)) {
489 if (gd->gd_curthread->td_cscount == 0)
490 goto again;
491 need_ipiq();
492 }
493 }
494}
495
496void
497lwkt_process_ipiq_frame(struct intrframe *frame)
498{
499 globaldata_t gd = mycpu;
500 globaldata_t sgd;
501 lwkt_ipiq_t ip;
502 int n;
503
504again:
505 for (n = 0; n < ncpus; ++n) {
506 if (n != gd->gd_cpuid) {
507 sgd = globaldata_find(n);
508 ip = sgd->gd_ipiq;
509 if (ip != NULL) {
510 while (lwkt_process_ipiq_core(sgd, &ip[gd->gd_cpuid], frame))
511 ;
512 }
513 }
514 }
515 if (gd->gd_cpusyncq.ip_rindex != gd->gd_cpusyncq.ip_windex) {
516 if (lwkt_process_ipiq_core(gd, &gd->gd_cpusyncq, frame)) {
517 if (gd->gd_curthread->td_cscount == 0)
518 goto again;
519 need_ipiq();
520 }
521 }
522}
523
524static int
525lwkt_process_ipiq_core(globaldata_t sgd, lwkt_ipiq_t ip,
526 struct intrframe *frame)
527{
528 globaldata_t mygd = mycpu;
529 int ri;
530 int wi;
531 ipifunc3_t copy_func;
532 void *copy_arg1;
533 int copy_arg2;
534
535 /*
536 * Obtain the current write index, which is modified by a remote cpu.
537 * Issue a load fence to prevent speculative reads of e.g. data written
538 * by the other cpu prior to it updating the index.
539 */
540 KKASSERT(curthread->td_critcount);
541 wi = ip->ip_windex;
542 cpu_lfence();
543 ++mygd->gd_intr_nesting_level;
544
545 /*
546 * NOTE: xindex is only updated after we are sure the function has
547 * finished execution. Beware lwkt_process_ipiq() reentrancy!
548 * The function may send an IPI which may block/drain.
549 *
550 * NOTE: Due to additional IPI operations that the callback function
551 * may make, it is possible for both rindex and windex to advance and
552 * thus for rindex to advance passed our cached windex.
553 *
554 * NOTE: A memory fence is required to prevent speculative loads prior
555 * to the loading of ip_rindex. Even though stores might be
556 * ordered, loads are probably not.
557 */
558 while (wi - (ri = ip->ip_rindex) > 0) {
559 ri &= MAXCPUFIFO_MASK;
560 cpu_mfence();
561 copy_func = ip->ip_func[ri];
562 copy_arg1 = ip->ip_arg1[ri];
563 copy_arg2 = ip->ip_arg2[ri];
564 ++ip->ip_rindex;
565 KKASSERT((ip->ip_rindex & MAXCPUFIFO_MASK) ==
566 ((ri + 1) & MAXCPUFIFO_MASK));
567 logipiq(receive, copy_func, copy_arg1, copy_arg2, sgd, mycpu);
568 copy_func(copy_arg1, copy_arg2, frame);
569 cpu_sfence();
570 ip->ip_xindex = ip->ip_rindex;
571
572#ifdef PANIC_DEBUG
573 /*
574 * Simulate panics during the processing of an IPI
575 */
576 if (mycpu->gd_cpuid == panic_ipiq_cpu && panic_ipiq_count) {
577 if (--panic_ipiq_count == 0) {
578#ifdef DDB
579 Debugger("PANIC_DEBUG");
580#else
581 panic("PANIC_DEBUG");
582#endif
583 }
584 }
585#endif
586 }
587 --mygd->gd_intr_nesting_level;
588
589 /*
590 * Return non-zero if there are more IPI messages pending on this
591 * ipiq. ip_npoll is left set as long as possible to reduce the
592 * number of IPIs queued by the originating cpu, but must be cleared
593 * *BEFORE* checking windex.
594 */
595 atomic_poll_release_int(&ip->ip_npoll);
596 return(wi != ip->ip_windex);
597}
598
599static void
600lwkt_sync_ipiq(void *arg)
601{
602 cpumask_t *cpumask = arg;
603
604 atomic_clear_cpumask(cpumask, mycpu->gd_cpumask);
605 if (*cpumask == 0)
606 wakeup(cpumask);
607}
608
609void
610lwkt_synchronize_ipiqs(const char *wmesg)
611{
612 cpumask_t other_cpumask;
613
614 other_cpumask = mycpu->gd_other_cpus & smp_active_mask;
615 lwkt_send_ipiq_mask(other_cpumask, lwkt_sync_ipiq, &other_cpumask);
616
617 while (other_cpumask != 0) {
618 tsleep_interlock(&other_cpumask, 0);
619 if (other_cpumask != 0)
620 tsleep(&other_cpumask, PINTERLOCKED, wmesg, 0);
621 }
622}
623
624#endif
625
626/*
627 * CPU Synchronization Support
628 *
629 * lwkt_cpusync_simple()
630 *
631 * The function is executed synchronously before return on remote cpus.
632 * A lwkt_cpusync_t pointer is passed as an argument. The data can
633 * be accessed via arg->cs_data.
634 *
635 * XXX should I just pass the data as an argument to be consistent?
636 */
637
638void
639lwkt_cpusync_simple(cpumask_t mask, cpusync_func_t func, void *data)
640{
641 struct lwkt_cpusync cmd;
642
643 cmd.cs_run_func = NULL;
644 cmd.cs_fin1_func = func;
645 cmd.cs_fin2_func = NULL;
646 cmd.cs_data = data;
647 lwkt_cpusync_start(mask & mycpu->gd_other_cpus, &cmd);
648 if (mask & CPUMASK(mycpu->gd_cpuid))
649 func(&cmd);
650 lwkt_cpusync_finish(&cmd);
651}
652
653/*
654 * lwkt_cpusync_fastdata()
655 *
656 * The function is executed in tandem with return on remote cpus.
657 * The data is directly passed as an argument. Do not pass pointers to
658 * temporary storage as the storage might have
659 * gone poof by the time the target cpu executes
660 * the function.
661 *
662 * At the moment lwkt_cpusync is declared on the stack and we must wait
663 * for all remote cpus to ack in lwkt_cpusync_finish(), but as a future
664 * optimization we should be able to put a counter in the globaldata
665 * structure (if it is not otherwise being used) and just poke it and
666 * return without waiting. XXX
667 */
668void
669lwkt_cpusync_fastdata(cpumask_t mask, cpusync_func2_t func, void *data)
670{
671 struct lwkt_cpusync cmd;
672
673 cmd.cs_run_func = NULL;
674 cmd.cs_fin1_func = NULL;
675 cmd.cs_fin2_func = func;
676 cmd.cs_data = NULL;
677 lwkt_cpusync_start(mask & mycpu->gd_other_cpus, &cmd);
678 if (mask & CPUMASK(mycpu->gd_cpuid))
679 func(data);
680 lwkt_cpusync_finish(&cmd);
681}
682
683/*
684 * lwkt_cpusync_start()
685 *
686 * Start synchronization with a set of target cpus, return once they are
687 * known to be in a synchronization loop. The target cpus will execute
688 * poll->cs_run_func() IN TANDEM WITH THE RETURN.
689 *
690 * XXX future: add lwkt_cpusync_start_quick() and require a call to
691 * lwkt_cpusync_add() or lwkt_cpusync_wait(), allowing the caller to
692 * potentially absorb the IPI latency doing something useful.
693 */
694void
695lwkt_cpusync_start(cpumask_t mask, lwkt_cpusync_t poll)
696{
697 globaldata_t gd = mycpu;
698
699 poll->cs_count = 0;
700 poll->cs_mask = mask;
701#ifdef SMP
702 logipiq2(sync_start, mask & gd->gd_other_cpus);
703 poll->cs_maxcount = lwkt_send_ipiq_mask(
704 mask & gd->gd_other_cpus & smp_active_mask,
705 (ipifunc1_t)lwkt_cpusync_remote1, poll);
706#endif
707 if (mask & gd->gd_cpumask) {
708 if (poll->cs_run_func)
709 poll->cs_run_func(poll);
710 }
711#ifdef SMP
712 if (poll->cs_maxcount) {
713 ++ipiq_cscount;
714 ++gd->gd_curthread->td_cscount;
715 while (poll->cs_count != poll->cs_maxcount) {
716 crit_enter();
717 lwkt_process_ipiq();
718 crit_exit();
719 }
720 }
721#endif
722}
723
724void
725lwkt_cpusync_add(cpumask_t mask, lwkt_cpusync_t poll)
726{
727 globaldata_t gd = mycpu;
728#ifdef SMP
729 int count;
730#endif
731
732 mask &= ~poll->cs_mask;
733 poll->cs_mask |= mask;
734#ifdef SMP
735 logipiq2(sync_add, mask & gd->gd_other_cpus);
736 count = lwkt_send_ipiq_mask(
737 mask & gd->gd_other_cpus & smp_active_mask,
738 (ipifunc1_t)lwkt_cpusync_remote1, poll);
739#endif
740 if (mask & gd->gd_cpumask) {
741 if (poll->cs_run_func)
742 poll->cs_run_func(poll);
743 }
744#ifdef SMP
745 poll->cs_maxcount += count;
746 if (poll->cs_maxcount) {
747 if (poll->cs_maxcount == count)
748 ++gd->gd_curthread->td_cscount;
749 while (poll->cs_count != poll->cs_maxcount) {
750 crit_enter();
751 lwkt_process_ipiq();
752 crit_exit();
753 }
754 }
755#endif
756}
757
758/*
759 * Finish synchronization with a set of target cpus. The target cpus will
760 * execute cs_fin1_func(poll) prior to this function returning, and will
761 * execute cs_fin2_func(data) IN TANDEM WITH THIS FUNCTION'S RETURN.
762 *
763 * If cs_maxcount is non-zero then we are mastering a cpusync with one or
764 * more remote cpus and must account for it in our thread structure.
765 */
766void
767lwkt_cpusync_finish(lwkt_cpusync_t poll)
768{
769 globaldata_t gd = mycpu;
770
771 poll->cs_count = -1;
772 if (poll->cs_mask & gd->gd_cpumask) {
773 if (poll->cs_fin1_func)
774 poll->cs_fin1_func(poll);
775 if (poll->cs_fin2_func)
776 poll->cs_fin2_func(poll->cs_data);
777 }
778#ifdef SMP
779 if (poll->cs_maxcount) {
780 while (poll->cs_count != -(poll->cs_maxcount + 1)) {
781 crit_enter();
782 lwkt_process_ipiq();
783 crit_exit();
784 }
785 --gd->gd_curthread->td_cscount;
786 }
787#endif
788}
789
790#ifdef SMP
791
792/*
793 * helper IPI remote messaging function.
794 *
795 * Called on remote cpu when a new cpu synchronization request has been
796 * sent to us. Execute the run function and adjust cs_count, then requeue
797 * the request so we spin on it.
798 */
799static void
800lwkt_cpusync_remote1(lwkt_cpusync_t poll)
801{
802 atomic_add_int(&poll->cs_count, 1);
803 if (poll->cs_run_func)
804 poll->cs_run_func(poll);
805 lwkt_cpusync_remote2(poll);
806}
807
808/*
809 * helper IPI remote messaging function.
810 *
811 * Poll for the originator telling us to finish. If it hasn't, requeue
812 * our request so we spin on it. When the originator requests that we
813 * finish we execute cs_fin1_func(poll) synchronously and cs_fin2_func(data)
814 * in tandem with the release.
815 */
816static void
817lwkt_cpusync_remote2(lwkt_cpusync_t poll)
818{
819 if (poll->cs_count < 0) {
820 cpusync_func2_t savef;
821 void *saved;
822
823 if (poll->cs_fin1_func)
824 poll->cs_fin1_func(poll);
825 if (poll->cs_fin2_func) {
826 savef = poll->cs_fin2_func;
827 saved = poll->cs_data;
828 cpu_ccfence(); /* required ordering for MP operation */
829 atomic_add_int(&poll->cs_count, -1);
830 savef(saved);
831 } else {
832 atomic_add_int(&poll->cs_count, -1);
833 }
834 } else {
835 globaldata_t gd = mycpu;
836 lwkt_ipiq_t ip;
837 int wi;
838
839 ip = &gd->gd_cpusyncq;
840 wi = ip->ip_windex & MAXCPUFIFO_MASK;
841 ip->ip_func[wi] = (ipifunc3_t)(ipifunc1_t)lwkt_cpusync_remote2;
842 ip->ip_arg1[wi] = poll;
843 ip->ip_arg2[wi] = 0;
844 cpu_sfence();
845 ++ip->ip_windex;
846 }
847}
848
849#endif