Another instance of IF_LLSOCKADDR.
[dragonfly.git] / sys / kern / lwkt_ipiq.c
CommitLineData
3b6b7bd1 1/*
8c10bfcf
MD
2 * Copyright (c) 2003,2004 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
6 *
3b6b7bd1
MD
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
8c10bfcf 10 *
3b6b7bd1
MD
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
8c10bfcf
MD
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3b6b7bd1 32 * SUCH DAMAGE.
8c10bfcf 33 *
8e449c5b 34 * $DragonFly: src/sys/kern/lwkt_ipiq.c,v 1.10 2005/04/18 01:02:58 dillon Exp $
3b6b7bd1
MD
35 */
36
37/*
38 * This module implements IPI message queueing and the MI portion of IPI
39 * message processing.
40 */
41
42#ifdef _KERNEL
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/kernel.h>
47#include <sys/proc.h>
48#include <sys/rtprio.h>
49#include <sys/queue.h>
50#include <sys/thread2.h>
51#include <sys/sysctl.h>
52#include <sys/kthread.h>
53#include <machine/cpu.h>
54#include <sys/lock.h>
55#include <sys/caps.h>
56
57#include <vm/vm.h>
58#include <vm/vm_param.h>
59#include <vm/vm_kern.h>
60#include <vm/vm_object.h>
61#include <vm/vm_page.h>
62#include <vm/vm_map.h>
63#include <vm/vm_pager.h>
64#include <vm/vm_extern.h>
65#include <vm/vm_zone.h>
66
67#include <machine/stdarg.h>
68#include <machine/ipl.h>
69#include <machine/smp.h>
70#include <machine/atomic.h>
71
72#define THREAD_STACK (UPAGES * PAGE_SIZE)
73
74#else
75
76#include <sys/stdint.h>
77#include <libcaps/thread.h>
78#include <sys/thread.h>
79#include <sys/msgport.h>
80#include <sys/errno.h>
81#include <libcaps/globaldata.h>
7e8303ad 82#include <machine/cpufunc.h>
3b6b7bd1
MD
83#include <sys/thread2.h>
84#include <sys/msgport2.h>
85#include <stdio.h>
86#include <stdlib.h>
87#include <string.h>
3b6b7bd1 88#include <machine/lock.h>
5e940450
MD
89#include <machine/cpu.h>
90#include <machine/atomic.h>
3b6b7bd1
MD
91
92#endif
93
94#ifdef SMP
4c9f5a7f
MD
95static __int64_t ipiq_count; /* total calls to lwkt_send_ipiq*() */
96static __int64_t ipiq_fifofull; /* number of fifo full conditions detected */
97static __int64_t ipiq_avoided; /* interlock with target avoids cpu ipi */
98static __int64_t ipiq_passive; /* passive IPI messages */
99static __int64_t ipiq_cscount; /* number of cpu synchronizations */
100static int ipiq_optimized = 1; /* XXX temporary sysctl */
3b6b7bd1
MD
101#endif
102
103#ifdef _KERNEL
104
105#ifdef SMP
106SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_count, CTLFLAG_RW, &ipiq_count, 0, "");
107SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_fifofull, CTLFLAG_RW, &ipiq_fifofull, 0, "");
4c9f5a7f
MD
108SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_avoided, CTLFLAG_RW, &ipiq_avoided, 0, "");
109SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_passive, CTLFLAG_RW, &ipiq_passive, 0, "");
0f7a3396 110SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_cscount, CTLFLAG_RW, &ipiq_cscount, 0, "");
4c9f5a7f 111SYSCTL_INT(_lwkt, OID_AUTO, ipiq_optimized, CTLFLAG_RW, &ipiq_optimized, 0, "");
3b6b7bd1
MD
112#endif
113
114#endif
115
116#ifdef SMP
117
118static int lwkt_process_ipiq1(lwkt_ipiq_t ip, struct intrframe *frame);
119static void lwkt_cpusync_remote1(lwkt_cpusync_t poll);
120static void lwkt_cpusync_remote2(lwkt_cpusync_t poll);
121
122/*
123 * Send a function execution request to another cpu. The request is queued
124 * on the cpu<->cpu ipiq matrix. Each cpu owns a unique ipiq FIFO for every
125 * possible target cpu. The FIFO can be written.
126 *
4c9f5a7f
MD
127 * If the FIFO fills up we have to enable interrupts to avoid an APIC
128 * deadlock and process pending IPIQs while waiting for it to empty.
129 * Otherwise we may soft-deadlock with another cpu whos FIFO is also full.
3b6b7bd1
MD
130 *
131 * We can safely bump gd_intr_nesting_level because our crit_exit() at the
132 * end will take care of any pending interrupts.
133 *
4c9f5a7f
MD
134 * The actual hardware IPI is avoided if the target cpu is already processing
135 * the queue from a prior IPI. It is possible to pipeline IPI messages
136 * very quickly between cpus due to the FIFO hysteresis.
137 *
138 * Need not be called from a critical section.
3b6b7bd1
MD
139 */
140int
141lwkt_send_ipiq(globaldata_t target, ipifunc_t func, void *arg)
142{
143 lwkt_ipiq_t ip;
144 int windex;
145 struct globaldata *gd = mycpu;
146
147 if (target == gd) {
148 func(arg);
149 return(0);
150 }
151 crit_enter();
152 ++gd->gd_intr_nesting_level;
153#ifdef INVARIANTS
154 if (gd->gd_intr_nesting_level > 20)
155 panic("lwkt_send_ipiq: TOO HEAVILY NESTED!");
156#endif
157 KKASSERT(curthread->td_pri >= TDPRI_CRIT);
158 ++ipiq_count;
159 ip = &gd->gd_ipiq[target->gd_cpuid];
160
161 /*
4c9f5a7f
MD
162 * Do not allow the FIFO to become full. Interrupts must be physically
163 * enabled while we liveloop to avoid deadlocking the APIC.
164 */
165 if (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 2) {
166 unsigned int eflags = read_eflags();
167
168 if (atomic_poll_acquire_int(&ip->ip_npoll) || ipiq_optimized == 0)
169 cpu_send_ipiq(target->gd_cpuid);
170 cpu_enable_intr();
171 ++ipiq_fifofull;
172 while (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 4) {
173 KKASSERT(ip->ip_windex - ip->ip_rindex != MAXCPUFIFO - 1);
174 lwkt_process_ipiq();
175 }
176 write_eflags(eflags);
177 }
178
179 /*
180 * Queue the new message
3b6b7bd1 181 */
3b6b7bd1
MD
182 windex = ip->ip_windex & MAXCPUFIFO_MASK;
183 ip->ip_func[windex] = (ipifunc2_t)func;
184 ip->ip_arg[windex] = arg;
41a01a4d 185 cpu_mb1();
3b6b7bd1 186 ++ip->ip_windex;
4c9f5a7f
MD
187 --gd->gd_intr_nesting_level;
188
189 /*
190 * signal the target cpu that there is work pending.
191 */
192 if (atomic_poll_acquire_int(&ip->ip_npoll)) {
193 cpu_send_ipiq(target->gd_cpuid);
194 } else {
195 if (ipiq_optimized == 0)
196 cpu_send_ipiq(target->gd_cpuid);
197 ++ipiq_avoided;
198 }
199 crit_exit();
200 return(ip->ip_windex);
201}
202
203/*
204 * Similar to lwkt_send_ipiq() but this function does not actually initiate
205 * the IPI to the target cpu unless the FIFO has become too full, so it is
206 * very fast.
207 *
208 * This function is used for non-critical IPI messages, such as memory
209 * deallocations. The queue will typically be flushed by the target cpu at
210 * the next clock interrupt.
211 *
212 * Need not be called from a critical section.
213 */
214int
215lwkt_send_ipiq_passive(globaldata_t target, ipifunc_t func, void *arg)
216{
217 lwkt_ipiq_t ip;
218 int windex;
219 struct globaldata *gd = mycpu;
220
221 KKASSERT(target != gd);
222 crit_enter();
223 ++gd->gd_intr_nesting_level;
224#ifdef INVARIANTS
225 if (gd->gd_intr_nesting_level > 20)
226 panic("lwkt_send_ipiq: TOO HEAVILY NESTED!");
227#endif
228 KKASSERT(curthread->td_pri >= TDPRI_CRIT);
229 ++ipiq_count;
230 ++ipiq_passive;
231 ip = &gd->gd_ipiq[target->gd_cpuid];
232
233 /*
234 * Do not allow the FIFO to become full. Interrupts must be physically
235 * enabled while we liveloop to avoid deadlocking the APIC.
236 */
3b6b7bd1
MD
237 if (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 2) {
238 unsigned int eflags = read_eflags();
4c9f5a7f
MD
239
240 if (atomic_poll_acquire_int(&ip->ip_npoll) || ipiq_optimized == 0)
241 cpu_send_ipiq(target->gd_cpuid);
3b6b7bd1
MD
242 cpu_enable_intr();
243 ++ipiq_fifofull;
244 while (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 4) {
245 KKASSERT(ip->ip_windex - ip->ip_rindex != MAXCPUFIFO - 1);
246 lwkt_process_ipiq();
247 }
248 write_eflags(eflags);
249 }
4c9f5a7f
MD
250
251 /*
252 * Queue the new message
253 */
254 windex = ip->ip_windex & MAXCPUFIFO_MASK;
255 ip->ip_func[windex] = (ipifunc2_t)func;
256 ip->ip_arg[windex] = arg;
257 cpu_mb1();
258 ++ip->ip_windex;
3b6b7bd1 259 --gd->gd_intr_nesting_level;
4c9f5a7f
MD
260
261 /*
262 * Do not signal the target cpu, it will pick up the IPI when it next
263 * polls (typically on the next tick).
264 */
3b6b7bd1
MD
265 crit_exit();
266 return(ip->ip_windex);
267}
268
41a01a4d 269/*
4c9f5a7f
MD
270 * Send an IPI request without blocking, return 0 on success, ENOENT on
271 * failure. The actual queueing of the hardware IPI may still force us
272 * to spin and process incoming IPIs but that will eventually go away
273 * when we've gotten rid of the other general IPIs.
41a01a4d
MD
274 */
275int
4c9f5a7f 276lwkt_send_ipiq_nowait(globaldata_t target, ipifunc_t func, void *arg)
41a01a4d
MD
277{
278 lwkt_ipiq_t ip;
279 int windex;
280 struct globaldata *gd = mycpu;
281
282 KKASSERT(curthread->td_pri >= TDPRI_CRIT);
283 if (target == gd) {
284 func(arg);
285 return(0);
286 }
287 ++ipiq_count;
288 ip = &gd->gd_ipiq[target->gd_cpuid];
289
8e449c5b 290 if (ip->ip_windex - ip->ip_rindex >= MAXCPUFIFO * 2 / 3)
41a01a4d 291 return(ENOENT);
41a01a4d
MD
292 windex = ip->ip_windex & MAXCPUFIFO_MASK;
293 ip->ip_func[windex] = (ipifunc2_t)func;
294 ip->ip_arg[windex] = arg;
295 cpu_mb1();
296 ++ip->ip_windex;
4c9f5a7f 297
41a01a4d 298 /*
4c9f5a7f 299 * This isn't a passive IPI, we still have to signal the target cpu.
41a01a4d 300 */
4c9f5a7f
MD
301 if (atomic_poll_acquire_int(&ip->ip_npoll)) {
302 cpu_send_ipiq(target->gd_cpuid);
303 } else {
304 if (ipiq_optimized == 0)
305 cpu_send_ipiq(target->gd_cpuid);
306 ++ipiq_avoided;
307 }
41a01a4d
MD
308 return(0);
309}
310
3b6b7bd1
MD
311/*
312 * deprecated, used only by fast int forwarding.
313 */
314int
315lwkt_send_ipiq_bycpu(int dcpu, ipifunc_t func, void *arg)
316{
317 return(lwkt_send_ipiq(globaldata_find(dcpu), func, arg));
318}
319
320/*
321 * Send a message to several target cpus. Typically used for scheduling.
322 * The message will not be sent to stopped cpus.
323 */
324int
325lwkt_send_ipiq_mask(u_int32_t mask, ipifunc_t func, void *arg)
326{
327 int cpuid;
328 int count = 0;
329
330 mask &= ~stopped_cpus;
331 while (mask) {
332 cpuid = bsfl(mask);
333 lwkt_send_ipiq(globaldata_find(cpuid), func, arg);
334 mask &= ~(1 << cpuid);
335 ++count;
336 }
337 return(count);
338}
339
340/*
341 * Wait for the remote cpu to finish processing a function.
342 *
343 * YYY we have to enable interrupts and process the IPIQ while waiting
344 * for it to empty or we may deadlock with another cpu. Create a CPU_*()
345 * function to do this! YYY we really should 'block' here.
346 *
347 * MUST be called from a critical section. This routine may be called
348 * from an interrupt (for example, if an interrupt wakes a foreign thread
349 * up).
350 */
351void
352lwkt_wait_ipiq(globaldata_t target, int seq)
353{
354 lwkt_ipiq_t ip;
355 int maxc = 100000000;
356
357 if (target != mycpu) {
358 ip = &mycpu->gd_ipiq[target->gd_cpuid];
359 if ((int)(ip->ip_xindex - seq) < 0) {
360 unsigned int eflags = read_eflags();
361 cpu_enable_intr();
362 while ((int)(ip->ip_xindex - seq) < 0) {
41a01a4d 363 crit_enter();
3b6b7bd1 364 lwkt_process_ipiq();
41a01a4d 365 crit_exit();
3b6b7bd1
MD
366 if (--maxc == 0)
367 printf("LWKT_WAIT_IPIQ WARNING! %d wait %d (%d)\n", mycpu->gd_cpuid, target->gd_cpuid, ip->ip_xindex - seq);
368 if (maxc < -1000000)
369 panic("LWKT_WAIT_IPIQ");
370 }
371 write_eflags(eflags);
372 }
373 }
374}
375
41a01a4d
MD
376int
377lwkt_seq_ipiq(globaldata_t target)
378{
379 lwkt_ipiq_t ip;
380
381 ip = &mycpu->gd_ipiq[target->gd_cpuid];
382 return(ip->ip_windex);
383}
384
3b6b7bd1
MD
385/*
386 * Called from IPI interrupt (like a fast interrupt), which has placed
387 * us in a critical section. The MP lock may or may not be held.
388 * May also be called from doreti or splz, or be reentrantly called
389 * indirectly through the ip_func[] we run.
390 *
391 * There are two versions, one where no interrupt frame is available (when
392 * called from the send code and from splz, and one where an interrupt
393 * frame is available.
394 */
395void
396lwkt_process_ipiq(void)
397{
398 globaldata_t gd = mycpu;
399 lwkt_ipiq_t ip;
400 int n;
401
402again:
403 for (n = 0; n < ncpus; ++n) {
404 if (n != gd->gd_cpuid) {
405 ip = globaldata_find(n)->gd_ipiq;
406 if (ip != NULL) {
407 while (lwkt_process_ipiq1(&ip[gd->gd_cpuid], NULL))
408 ;
409 }
410 }
411 }
412 if (gd->gd_cpusyncq.ip_rindex != gd->gd_cpusyncq.ip_windex) {
0f7a3396
MD
413 if (lwkt_process_ipiq1(&gd->gd_cpusyncq, NULL)) {
414 if (gd->gd_curthread->td_cscount == 0)
415 goto again;
416 need_ipiq();
417 }
3b6b7bd1
MD
418 }
419}
420
421#ifdef _KERNEL
422void
423lwkt_process_ipiq_frame(struct intrframe frame)
424{
425 globaldata_t gd = mycpu;
426 lwkt_ipiq_t ip;
427 int n;
428
429again:
430 for (n = 0; n < ncpus; ++n) {
431 if (n != gd->gd_cpuid) {
432 ip = globaldata_find(n)->gd_ipiq;
433 if (ip != NULL) {
434 while (lwkt_process_ipiq1(&ip[gd->gd_cpuid], &frame))
435 ;
436 }
437 }
438 }
439 if (gd->gd_cpusyncq.ip_rindex != gd->gd_cpusyncq.ip_windex) {
0f7a3396
MD
440 if (lwkt_process_ipiq1(&gd->gd_cpusyncq, &frame)) {
441 if (gd->gd_curthread->td_cscount == 0)
442 goto again;
443 need_ipiq();
444 }
3b6b7bd1
MD
445 }
446}
447#endif
448
449static int
450lwkt_process_ipiq1(lwkt_ipiq_t ip, struct intrframe *frame)
451{
452 int ri;
453 int wi = ip->ip_windex;
454 /*
455 * Note: xindex is only updated after we are sure the function has
456 * finished execution. Beware lwkt_process_ipiq() reentrancy! The
457 * function may send an IPI which may block/drain.
458 */
459 while ((ri = ip->ip_rindex) != wi) {
460 ip->ip_rindex = ri + 1;
461 ri &= MAXCPUFIFO_MASK;
462 ip->ip_func[ri](ip->ip_arg[ri], frame);
463 /* YYY memory barrier */
464 ip->ip_xindex = ip->ip_rindex;
465 }
4c9f5a7f
MD
466
467 /*
468 * Return non-zero if there are more IPI messages pending on this
469 * ipiq. ip_npoll is left set as long as possible to reduce the
470 * number of IPIs queued by the originating cpu, but must be cleared
471 * *BEFORE* checking windex.
472 */
473 atomic_poll_release_int(&ip->ip_npoll);
3b6b7bd1
MD
474 return(wi != ip->ip_windex);
475}
476
0f7a3396
MD
477#else
478
479/*
480 * !SMP dummy routines
481 */
482
483int
484lwkt_send_ipiq(globaldata_t target, ipifunc_t func, void *arg)
485{
486 panic("lwkt_send_ipiq: UP box! (%d,%p,%p)", target->gd_cpuid, func, arg);
487 return(0); /* NOT REACHED */
488}
489
490void
491lwkt_wait_ipiq(globaldata_t target, int seq)
492{
493 panic("lwkt_wait_ipiq: UP box! (%d,%d)", target->gd_cpuid, seq);
494}
495
496#endif
497
3b6b7bd1
MD
498/*
499 * CPU Synchronization Support
5c71a36a
MD
500 *
501 * lwkt_cpusync_simple()
502 *
503 * The function is executed synchronously before return on remote cpus.
504 * A lwkt_cpusync_t pointer is passed as an argument. The data can
505 * be accessed via arg->cs_data.
506 *
507 * XXX should I just pass the data as an argument to be consistent?
3b6b7bd1
MD
508 */
509
510void
5c71a36a
MD
511lwkt_cpusync_simple(cpumask_t mask, cpusync_func_t func, void *data)
512{
513 struct lwkt_cpusync cmd;
514
515 cmd.cs_run_func = NULL;
516 cmd.cs_fin1_func = func;
517 cmd.cs_fin2_func = NULL;
518 cmd.cs_data = data;
519 lwkt_cpusync_start(mask & mycpu->gd_other_cpus, &cmd);
520 if (mask & (1 << mycpu->gd_cpuid))
521 func(&cmd);
522 lwkt_cpusync_finish(&cmd);
523}
524
525/*
526 * lwkt_cpusync_fastdata()
527 *
528 * The function is executed in tandem with return on remote cpus.
529 * The data is directly passed as an argument. Do not pass pointers to
530 * temporary storage as the storage might have
531 * gone poof by the time the target cpu executes
532 * the function.
533 *
534 * At the moment lwkt_cpusync is declared on the stack and we must wait
535 * for all remote cpus to ack in lwkt_cpusync_finish(), but as a future
536 * optimization we should be able to put a counter in the globaldata
537 * structure (if it is not otherwise being used) and just poke it and
538 * return without waiting. XXX
539 */
540void
541lwkt_cpusync_fastdata(cpumask_t mask, cpusync_func2_t func, void *data)
3b6b7bd1
MD
542{
543 struct lwkt_cpusync cmd;
3b6b7bd1
MD
544
545 cmd.cs_run_func = NULL;
546 cmd.cs_fin1_func = NULL;
547 cmd.cs_fin2_func = func;
5c71a36a
MD
548 cmd.cs_data = NULL;
549 lwkt_cpusync_start(mask & mycpu->gd_other_cpus, &cmd);
3b6b7bd1
MD
550 if (mask & (1 << mycpu->gd_cpuid))
551 func(data);
5c71a36a 552 lwkt_cpusync_finish(&cmd);
3b6b7bd1
MD
553}
554
555/*
5c71a36a
MD
556 * lwkt_cpusync_start()
557 *
558 * Start synchronization with a set of target cpus, return once they are
559 * known to be in a synchronization loop. The target cpus will execute
560 * poll->cs_run_func() IN TANDEM WITH THE RETURN.
561 *
562 * XXX future: add lwkt_cpusync_start_quick() and require a call to
563 * lwkt_cpusync_add() or lwkt_cpusync_wait(), allowing the caller to
564 * potentially absorb the IPI latency doing something useful.
3b6b7bd1 565 */
5c71a36a 566void
3b6b7bd1
MD
567lwkt_cpusync_start(cpumask_t mask, lwkt_cpusync_t poll)
568{
0f7a3396
MD
569 globaldata_t gd = mycpu;
570
3b6b7bd1 571 poll->cs_count = 0;
5c71a36a 572 poll->cs_mask = mask;
0f7a3396
MD
573#ifdef SMP
574 poll->cs_maxcount = lwkt_send_ipiq_mask(
575 mask & gd->gd_other_cpus & smp_active_mask,
576 (ipifunc_t)lwkt_cpusync_remote1, poll);
577#endif
fda1ad89 578 if (mask & gd->gd_cpumask) {
5c71a36a
MD
579 if (poll->cs_run_func)
580 poll->cs_run_func(poll);
581 }
0f7a3396
MD
582#ifdef SMP
583 if (poll->cs_maxcount) {
584 ++ipiq_cscount;
585 ++gd->gd_curthread->td_cscount;
586 while (poll->cs_count != poll->cs_maxcount) {
587 crit_enter();
588 lwkt_process_ipiq();
589 crit_exit();
590 }
5c71a36a 591 }
0f7a3396 592#endif
5c71a36a
MD
593}
594
595void
596lwkt_cpusync_add(cpumask_t mask, lwkt_cpusync_t poll)
597{
0f7a3396 598 globaldata_t gd = mycpu;
41a01a4d 599#ifdef SMP
0f7a3396 600 int count;
41a01a4d 601#endif
0f7a3396 602
5c71a36a
MD
603 mask &= ~poll->cs_mask;
604 poll->cs_mask |= mask;
0f7a3396
MD
605#ifdef SMP
606 count = lwkt_send_ipiq_mask(
607 mask & gd->gd_other_cpus & smp_active_mask,
608 (ipifunc_t)lwkt_cpusync_remote1, poll);
609#endif
fda1ad89 610 if (mask & gd->gd_cpumask) {
5c71a36a
MD
611 if (poll->cs_run_func)
612 poll->cs_run_func(poll);
613 }
0f7a3396
MD
614#ifdef SMP
615 poll->cs_maxcount += count;
616 if (poll->cs_maxcount) {
617 if (poll->cs_maxcount == count)
618 ++gd->gd_curthread->td_cscount;
619 while (poll->cs_count != poll->cs_maxcount) {
620 crit_enter();
621 lwkt_process_ipiq();
622 crit_exit();
623 }
3b6b7bd1 624 }
0f7a3396 625#endif
3b6b7bd1
MD
626}
627
628/*
629 * Finish synchronization with a set of target cpus. The target cpus will
630 * execute cs_fin1_func(poll) prior to this function returning, and will
631 * execute cs_fin2_func(data) IN TANDEM WITH THIS FUNCTION'S RETURN.
0f7a3396
MD
632 *
633 * If cs_maxcount is non-zero then we are mastering a cpusync with one or
634 * more remote cpus and must account for it in our thread structure.
3b6b7bd1
MD
635 */
636void
5c71a36a 637lwkt_cpusync_finish(lwkt_cpusync_t poll)
3b6b7bd1 638{
0f7a3396 639 globaldata_t gd = mycpu;
5c71a36a 640
3b6b7bd1 641 poll->cs_count = -1;
fda1ad89 642 if (poll->cs_mask & gd->gd_cpumask) {
5c71a36a
MD
643 if (poll->cs_fin1_func)
644 poll->cs_fin1_func(poll);
645 if (poll->cs_fin2_func)
646 poll->cs_fin2_func(poll->cs_data);
647 }
0f7a3396
MD
648#ifdef SMP
649 if (poll->cs_maxcount) {
650 while (poll->cs_count != -(poll->cs_maxcount + 1)) {
651 crit_enter();
652 lwkt_process_ipiq();
653 crit_exit();
654 }
655 --gd->gd_curthread->td_cscount;
3b6b7bd1 656 }
0f7a3396 657#endif
3b6b7bd1
MD
658}
659
0f7a3396
MD
660#ifdef SMP
661
3b6b7bd1
MD
662/*
663 * helper IPI remote messaging function.
664 *
665 * Called on remote cpu when a new cpu synchronization request has been
666 * sent to us. Execute the run function and adjust cs_count, then requeue
667 * the request so we spin on it.
668 */
669static void
670lwkt_cpusync_remote1(lwkt_cpusync_t poll)
671{
672 atomic_add_int(&poll->cs_count, 1);
673 if (poll->cs_run_func)
674 poll->cs_run_func(poll);
675 lwkt_cpusync_remote2(poll);
676}
677
678/*
679 * helper IPI remote messaging function.
680 *
681 * Poll for the originator telling us to finish. If it hasn't, requeue
682 * our request so we spin on it. When the originator requests that we
683 * finish we execute cs_fin1_func(poll) synchronously and cs_fin2_func(data)
684 * in tandem with the release.
685 */
686static void
687lwkt_cpusync_remote2(lwkt_cpusync_t poll)
688{
689 if (poll->cs_count < 0) {
690 cpusync_func2_t savef;
691 void *saved;
692
693 if (poll->cs_fin1_func)
694 poll->cs_fin1_func(poll);
695 if (poll->cs_fin2_func) {
696 savef = poll->cs_fin2_func;
697 saved = poll->cs_data;
698 atomic_add_int(&poll->cs_count, -1);
699 savef(saved);
700 } else {
701 atomic_add_int(&poll->cs_count, -1);
702 }
703 } else {
704 globaldata_t gd = mycpu;
705 lwkt_ipiq_t ip;
706 int wi;
707
708 ip = &gd->gd_cpusyncq;
709 wi = ip->ip_windex & MAXCPUFIFO_MASK;
710 ip->ip_func[wi] = (ipifunc2_t)lwkt_cpusync_remote2;
711 ip->ip_arg[wi] = poll;
8e449c5b 712 cpu_mb1();
3b6b7bd1
MD
713 ++ip->ip_windex;
714 }
715}
716
3b6b7bd1 717#endif