Add additional sanity checks to IPIQ processing, do some cleanups,
[dragonfly.git] / sys / kern / lwkt_ipiq.c
CommitLineData
3b6b7bd1 1/*
8c10bfcf
MD
2 * Copyright (c) 2003,2004 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
6 *
3b6b7bd1
MD
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
8c10bfcf 10 *
3b6b7bd1
MD
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
8c10bfcf
MD
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3b6b7bd1 32 * SUCH DAMAGE.
8c10bfcf 33 *
728f6208 34 * $DragonFly: src/sys/kern/lwkt_ipiq.c,v 1.12 2005/06/21 05:03:12 dillon Exp $
3b6b7bd1
MD
35 */
36
37/*
38 * This module implements IPI message queueing and the MI portion of IPI
39 * message processing.
40 */
41
42#ifdef _KERNEL
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/kernel.h>
47#include <sys/proc.h>
48#include <sys/rtprio.h>
49#include <sys/queue.h>
50#include <sys/thread2.h>
51#include <sys/sysctl.h>
52#include <sys/kthread.h>
53#include <machine/cpu.h>
54#include <sys/lock.h>
55#include <sys/caps.h>
56
57#include <vm/vm.h>
58#include <vm/vm_param.h>
59#include <vm/vm_kern.h>
60#include <vm/vm_object.h>
61#include <vm/vm_page.h>
62#include <vm/vm_map.h>
63#include <vm/vm_pager.h>
64#include <vm/vm_extern.h>
65#include <vm/vm_zone.h>
66
67#include <machine/stdarg.h>
68#include <machine/ipl.h>
69#include <machine/smp.h>
70#include <machine/atomic.h>
71
72#define THREAD_STACK (UPAGES * PAGE_SIZE)
73
74#else
75
76#include <sys/stdint.h>
77#include <libcaps/thread.h>
78#include <sys/thread.h>
79#include <sys/msgport.h>
80#include <sys/errno.h>
81#include <libcaps/globaldata.h>
7e8303ad 82#include <machine/cpufunc.h>
3b6b7bd1
MD
83#include <sys/thread2.h>
84#include <sys/msgport2.h>
85#include <stdio.h>
86#include <stdlib.h>
87#include <string.h>
3b6b7bd1 88#include <machine/lock.h>
5e940450
MD
89#include <machine/cpu.h>
90#include <machine/atomic.h>
3b6b7bd1
MD
91
92#endif
93
94#ifdef SMP
4c9f5a7f
MD
95static __int64_t ipiq_count; /* total calls to lwkt_send_ipiq*() */
96static __int64_t ipiq_fifofull; /* number of fifo full conditions detected */
97static __int64_t ipiq_avoided; /* interlock with target avoids cpu ipi */
98static __int64_t ipiq_passive; /* passive IPI messages */
99static __int64_t ipiq_cscount; /* number of cpu synchronizations */
100static int ipiq_optimized = 1; /* XXX temporary sysctl */
3b6b7bd1
MD
101#endif
102
103#ifdef _KERNEL
104
105#ifdef SMP
106SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_count, CTLFLAG_RW, &ipiq_count, 0, "");
107SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_fifofull, CTLFLAG_RW, &ipiq_fifofull, 0, "");
4c9f5a7f
MD
108SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_avoided, CTLFLAG_RW, &ipiq_avoided, 0, "");
109SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_passive, CTLFLAG_RW, &ipiq_passive, 0, "");
0f7a3396 110SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_cscount, CTLFLAG_RW, &ipiq_cscount, 0, "");
4c9f5a7f 111SYSCTL_INT(_lwkt, OID_AUTO, ipiq_optimized, CTLFLAG_RW, &ipiq_optimized, 0, "");
3b6b7bd1
MD
112#endif
113
114#endif
115
116#ifdef SMP
117
118static int lwkt_process_ipiq1(lwkt_ipiq_t ip, struct intrframe *frame);
119static void lwkt_cpusync_remote1(lwkt_cpusync_t poll);
120static void lwkt_cpusync_remote2(lwkt_cpusync_t poll);
121
122/*
123 * Send a function execution request to another cpu. The request is queued
124 * on the cpu<->cpu ipiq matrix. Each cpu owns a unique ipiq FIFO for every
125 * possible target cpu. The FIFO can be written.
126 *
4c9f5a7f
MD
127 * If the FIFO fills up we have to enable interrupts to avoid an APIC
128 * deadlock and process pending IPIQs while waiting for it to empty.
129 * Otherwise we may soft-deadlock with another cpu whos FIFO is also full.
3b6b7bd1
MD
130 *
131 * We can safely bump gd_intr_nesting_level because our crit_exit() at the
132 * end will take care of any pending interrupts.
133 *
4c9f5a7f
MD
134 * The actual hardware IPI is avoided if the target cpu is already processing
135 * the queue from a prior IPI. It is possible to pipeline IPI messages
136 * very quickly between cpus due to the FIFO hysteresis.
137 *
138 * Need not be called from a critical section.
3b6b7bd1
MD
139 */
140int
141lwkt_send_ipiq(globaldata_t target, ipifunc_t func, void *arg)
142{
143 lwkt_ipiq_t ip;
144 int windex;
145 struct globaldata *gd = mycpu;
146
147 if (target == gd) {
148 func(arg);
149 return(0);
150 }
151 crit_enter();
152 ++gd->gd_intr_nesting_level;
153#ifdef INVARIANTS
154 if (gd->gd_intr_nesting_level > 20)
155 panic("lwkt_send_ipiq: TOO HEAVILY NESTED!");
156#endif
157 KKASSERT(curthread->td_pri >= TDPRI_CRIT);
158 ++ipiq_count;
159 ip = &gd->gd_ipiq[target->gd_cpuid];
160
161 /*
4c9f5a7f
MD
162 * Do not allow the FIFO to become full. Interrupts must be physically
163 * enabled while we liveloop to avoid deadlocking the APIC.
164 */
165 if (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 2) {
166 unsigned int eflags = read_eflags();
167
168 if (atomic_poll_acquire_int(&ip->ip_npoll) || ipiq_optimized == 0)
169 cpu_send_ipiq(target->gd_cpuid);
170 cpu_enable_intr();
171 ++ipiq_fifofull;
172 while (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 4) {
173 KKASSERT(ip->ip_windex - ip->ip_rindex != MAXCPUFIFO - 1);
174 lwkt_process_ipiq();
175 }
176 write_eflags(eflags);
177 }
178
179 /*
180 * Queue the new message
3b6b7bd1 181 */
3b6b7bd1
MD
182 windex = ip->ip_windex & MAXCPUFIFO_MASK;
183 ip->ip_func[windex] = (ipifunc2_t)func;
184 ip->ip_arg[windex] = arg;
35238fa5 185 cpu_sfence();
3b6b7bd1 186 ++ip->ip_windex;
4c9f5a7f
MD
187 --gd->gd_intr_nesting_level;
188
189 /*
190 * signal the target cpu that there is work pending.
191 */
192 if (atomic_poll_acquire_int(&ip->ip_npoll)) {
193 cpu_send_ipiq(target->gd_cpuid);
194 } else {
195 if (ipiq_optimized == 0)
196 cpu_send_ipiq(target->gd_cpuid);
197 ++ipiq_avoided;
198 }
199 crit_exit();
200 return(ip->ip_windex);
201}
202
203/*
204 * Similar to lwkt_send_ipiq() but this function does not actually initiate
205 * the IPI to the target cpu unless the FIFO has become too full, so it is
206 * very fast.
207 *
208 * This function is used for non-critical IPI messages, such as memory
209 * deallocations. The queue will typically be flushed by the target cpu at
210 * the next clock interrupt.
211 *
212 * Need not be called from a critical section.
213 */
214int
215lwkt_send_ipiq_passive(globaldata_t target, ipifunc_t func, void *arg)
216{
217 lwkt_ipiq_t ip;
218 int windex;
219 struct globaldata *gd = mycpu;
220
221 KKASSERT(target != gd);
222 crit_enter();
223 ++gd->gd_intr_nesting_level;
224#ifdef INVARIANTS
225 if (gd->gd_intr_nesting_level > 20)
226 panic("lwkt_send_ipiq: TOO HEAVILY NESTED!");
227#endif
228 KKASSERT(curthread->td_pri >= TDPRI_CRIT);
229 ++ipiq_count;
230 ++ipiq_passive;
231 ip = &gd->gd_ipiq[target->gd_cpuid];
232
233 /*
234 * Do not allow the FIFO to become full. Interrupts must be physically
235 * enabled while we liveloop to avoid deadlocking the APIC.
236 */
3b6b7bd1
MD
237 if (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 2) {
238 unsigned int eflags = read_eflags();
4c9f5a7f
MD
239
240 if (atomic_poll_acquire_int(&ip->ip_npoll) || ipiq_optimized == 0)
241 cpu_send_ipiq(target->gd_cpuid);
3b6b7bd1
MD
242 cpu_enable_intr();
243 ++ipiq_fifofull;
244 while (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 4) {
245 KKASSERT(ip->ip_windex - ip->ip_rindex != MAXCPUFIFO - 1);
246 lwkt_process_ipiq();
247 }
248 write_eflags(eflags);
249 }
4c9f5a7f
MD
250
251 /*
252 * Queue the new message
253 */
254 windex = ip->ip_windex & MAXCPUFIFO_MASK;
255 ip->ip_func[windex] = (ipifunc2_t)func;
256 ip->ip_arg[windex] = arg;
35238fa5 257 cpu_sfence();
4c9f5a7f 258 ++ip->ip_windex;
3b6b7bd1 259 --gd->gd_intr_nesting_level;
4c9f5a7f
MD
260
261 /*
262 * Do not signal the target cpu, it will pick up the IPI when it next
263 * polls (typically on the next tick).
264 */
3b6b7bd1
MD
265 crit_exit();
266 return(ip->ip_windex);
267}
268
41a01a4d 269/*
4c9f5a7f
MD
270 * Send an IPI request without blocking, return 0 on success, ENOENT on
271 * failure. The actual queueing of the hardware IPI may still force us
272 * to spin and process incoming IPIs but that will eventually go away
273 * when we've gotten rid of the other general IPIs.
41a01a4d
MD
274 */
275int
4c9f5a7f 276lwkt_send_ipiq_nowait(globaldata_t target, ipifunc_t func, void *arg)
41a01a4d
MD
277{
278 lwkt_ipiq_t ip;
279 int windex;
280 struct globaldata *gd = mycpu;
281
282 KKASSERT(curthread->td_pri >= TDPRI_CRIT);
283 if (target == gd) {
284 func(arg);
285 return(0);
286 }
287 ++ipiq_count;
288 ip = &gd->gd_ipiq[target->gd_cpuid];
289
8e449c5b 290 if (ip->ip_windex - ip->ip_rindex >= MAXCPUFIFO * 2 / 3)
41a01a4d 291 return(ENOENT);
41a01a4d
MD
292 windex = ip->ip_windex & MAXCPUFIFO_MASK;
293 ip->ip_func[windex] = (ipifunc2_t)func;
294 ip->ip_arg[windex] = arg;
35238fa5 295 cpu_sfence();
41a01a4d 296 ++ip->ip_windex;
4c9f5a7f 297
41a01a4d 298 /*
4c9f5a7f 299 * This isn't a passive IPI, we still have to signal the target cpu.
41a01a4d 300 */
4c9f5a7f
MD
301 if (atomic_poll_acquire_int(&ip->ip_npoll)) {
302 cpu_send_ipiq(target->gd_cpuid);
303 } else {
304 if (ipiq_optimized == 0)
305 cpu_send_ipiq(target->gd_cpuid);
728f6208
MD
306 else
307 ++ipiq_avoided;
4c9f5a7f 308 }
41a01a4d
MD
309 return(0);
310}
311
3b6b7bd1
MD
312/*
313 * deprecated, used only by fast int forwarding.
314 */
315int
316lwkt_send_ipiq_bycpu(int dcpu, ipifunc_t func, void *arg)
317{
318 return(lwkt_send_ipiq(globaldata_find(dcpu), func, arg));
319}
320
321/*
322 * Send a message to several target cpus. Typically used for scheduling.
323 * The message will not be sent to stopped cpus.
324 */
325int
326lwkt_send_ipiq_mask(u_int32_t mask, ipifunc_t func, void *arg)
327{
328 int cpuid;
329 int count = 0;
330
331 mask &= ~stopped_cpus;
332 while (mask) {
333 cpuid = bsfl(mask);
334 lwkt_send_ipiq(globaldata_find(cpuid), func, arg);
335 mask &= ~(1 << cpuid);
336 ++count;
337 }
338 return(count);
339}
340
341/*
342 * Wait for the remote cpu to finish processing a function.
343 *
344 * YYY we have to enable interrupts and process the IPIQ while waiting
345 * for it to empty or we may deadlock with another cpu. Create a CPU_*()
346 * function to do this! YYY we really should 'block' here.
347 *
348 * MUST be called from a critical section. This routine may be called
349 * from an interrupt (for example, if an interrupt wakes a foreign thread
350 * up).
351 */
352void
353lwkt_wait_ipiq(globaldata_t target, int seq)
354{
355 lwkt_ipiq_t ip;
356 int maxc = 100000000;
357
358 if (target != mycpu) {
359 ip = &mycpu->gd_ipiq[target->gd_cpuid];
360 if ((int)(ip->ip_xindex - seq) < 0) {
361 unsigned int eflags = read_eflags();
362 cpu_enable_intr();
363 while ((int)(ip->ip_xindex - seq) < 0) {
41a01a4d 364 crit_enter();
3b6b7bd1 365 lwkt_process_ipiq();
41a01a4d 366 crit_exit();
3b6b7bd1
MD
367 if (--maxc == 0)
368 printf("LWKT_WAIT_IPIQ WARNING! %d wait %d (%d)\n", mycpu->gd_cpuid, target->gd_cpuid, ip->ip_xindex - seq);
369 if (maxc < -1000000)
370 panic("LWKT_WAIT_IPIQ");
35238fa5
MD
371 /*
372 * xindex may be modified by another cpu, use a load fence
373 * to ensure that the loop does not use a speculative value
374 * (which may improve performance).
375 */
376 cpu_lfence();
3b6b7bd1
MD
377 }
378 write_eflags(eflags);
379 }
380 }
381}
382
41a01a4d
MD
383int
384lwkt_seq_ipiq(globaldata_t target)
385{
386 lwkt_ipiq_t ip;
387
388 ip = &mycpu->gd_ipiq[target->gd_cpuid];
389 return(ip->ip_windex);
390}
391
3b6b7bd1
MD
392/*
393 * Called from IPI interrupt (like a fast interrupt), which has placed
394 * us in a critical section. The MP lock may or may not be held.
395 * May also be called from doreti or splz, or be reentrantly called
396 * indirectly through the ip_func[] we run.
397 *
398 * There are two versions, one where no interrupt frame is available (when
399 * called from the send code and from splz, and one where an interrupt
400 * frame is available.
401 */
402void
403lwkt_process_ipiq(void)
404{
405 globaldata_t gd = mycpu;
406 lwkt_ipiq_t ip;
407 int n;
408
409again:
410 for (n = 0; n < ncpus; ++n) {
411 if (n != gd->gd_cpuid) {
412 ip = globaldata_find(n)->gd_ipiq;
413 if (ip != NULL) {
414 while (lwkt_process_ipiq1(&ip[gd->gd_cpuid], NULL))
415 ;
416 }
417 }
418 }
419 if (gd->gd_cpusyncq.ip_rindex != gd->gd_cpusyncq.ip_windex) {
0f7a3396
MD
420 if (lwkt_process_ipiq1(&gd->gd_cpusyncq, NULL)) {
421 if (gd->gd_curthread->td_cscount == 0)
422 goto again;
423 need_ipiq();
424 }
3b6b7bd1
MD
425 }
426}
427
428#ifdef _KERNEL
429void
430lwkt_process_ipiq_frame(struct intrframe frame)
431{
432 globaldata_t gd = mycpu;
433 lwkt_ipiq_t ip;
434 int n;
435
436again:
437 for (n = 0; n < ncpus; ++n) {
438 if (n != gd->gd_cpuid) {
439 ip = globaldata_find(n)->gd_ipiq;
440 if (ip != NULL) {
441 while (lwkt_process_ipiq1(&ip[gd->gd_cpuid], &frame))
442 ;
443 }
444 }
445 }
446 if (gd->gd_cpusyncq.ip_rindex != gd->gd_cpusyncq.ip_windex) {
0f7a3396
MD
447 if (lwkt_process_ipiq1(&gd->gd_cpusyncq, &frame)) {
448 if (gd->gd_curthread->td_cscount == 0)
449 goto again;
450 need_ipiq();
451 }
3b6b7bd1
MD
452 }
453}
454#endif
455
456static int
457lwkt_process_ipiq1(lwkt_ipiq_t ip, struct intrframe *frame)
458{
459 int ri;
35238fa5 460 int wi;
728f6208
MD
461 void (*copy_func)(void *data, struct intrframe *frame);
462 void *copy_arg;
35238fa5
MD
463
464 /*
465 * Obtain the current write index, which is modified by a remote cpu.
466 * Issue a load fence to prevent speculative reads of e.g. data written
467 * by the other cpu prior to it updating the index.
468 */
728f6208 469 KKASSERT(curthread->td_pri >= TDPRI_CRIT);
35238fa5
MD
470 wi = ip->ip_windex;
471 cpu_lfence();
472
3b6b7bd1
MD
473 /*
474 * Note: xindex is only updated after we are sure the function has
475 * finished execution. Beware lwkt_process_ipiq() reentrancy! The
476 * function may send an IPI which may block/drain.
477 */
478 while ((ri = ip->ip_rindex) != wi) {
3b6b7bd1 479 ri &= MAXCPUFIFO_MASK;
728f6208
MD
480 copy_func = ip->ip_func[ri];
481 copy_arg = ip->ip_arg[ri];
482 cpu_mfence();
483 ++ip->ip_rindex;
484 KKASSERT((ip->ip_rindex & MAXCPUFIFO_MASK) == ((ri + 1) & MAXCPUFIFO_MASK));
485 copy_func(copy_arg, frame);
35238fa5 486 cpu_sfence();
3b6b7bd1
MD
487 ip->ip_xindex = ip->ip_rindex;
488 }
4c9f5a7f
MD
489
490 /*
491 * Return non-zero if there are more IPI messages pending on this
492 * ipiq. ip_npoll is left set as long as possible to reduce the
493 * number of IPIs queued by the originating cpu, but must be cleared
494 * *BEFORE* checking windex.
495 */
496 atomic_poll_release_int(&ip->ip_npoll);
3b6b7bd1
MD
497 return(wi != ip->ip_windex);
498}
499
0f7a3396
MD
500#else
501
502/*
503 * !SMP dummy routines
504 */
505
506int
507lwkt_send_ipiq(globaldata_t target, ipifunc_t func, void *arg)
508{
509 panic("lwkt_send_ipiq: UP box! (%d,%p,%p)", target->gd_cpuid, func, arg);
510 return(0); /* NOT REACHED */
511}
512
513void
514lwkt_wait_ipiq(globaldata_t target, int seq)
515{
516 panic("lwkt_wait_ipiq: UP box! (%d,%d)", target->gd_cpuid, seq);
517}
518
519#endif
520
3b6b7bd1
MD
521/*
522 * CPU Synchronization Support
5c71a36a
MD
523 *
524 * lwkt_cpusync_simple()
525 *
526 * The function is executed synchronously before return on remote cpus.
527 * A lwkt_cpusync_t pointer is passed as an argument. The data can
528 * be accessed via arg->cs_data.
529 *
530 * XXX should I just pass the data as an argument to be consistent?
3b6b7bd1
MD
531 */
532
533void
5c71a36a
MD
534lwkt_cpusync_simple(cpumask_t mask, cpusync_func_t func, void *data)
535{
536 struct lwkt_cpusync cmd;
537
538 cmd.cs_run_func = NULL;
539 cmd.cs_fin1_func = func;
540 cmd.cs_fin2_func = NULL;
541 cmd.cs_data = data;
542 lwkt_cpusync_start(mask & mycpu->gd_other_cpus, &cmd);
543 if (mask & (1 << mycpu->gd_cpuid))
544 func(&cmd);
545 lwkt_cpusync_finish(&cmd);
546}
547
548/*
549 * lwkt_cpusync_fastdata()
550 *
551 * The function is executed in tandem with return on remote cpus.
552 * The data is directly passed as an argument. Do not pass pointers to
553 * temporary storage as the storage might have
554 * gone poof by the time the target cpu executes
555 * the function.
556 *
557 * At the moment lwkt_cpusync is declared on the stack and we must wait
558 * for all remote cpus to ack in lwkt_cpusync_finish(), but as a future
559 * optimization we should be able to put a counter in the globaldata
560 * structure (if it is not otherwise being used) and just poke it and
561 * return without waiting. XXX
562 */
563void
564lwkt_cpusync_fastdata(cpumask_t mask, cpusync_func2_t func, void *data)
3b6b7bd1
MD
565{
566 struct lwkt_cpusync cmd;
3b6b7bd1
MD
567
568 cmd.cs_run_func = NULL;
569 cmd.cs_fin1_func = NULL;
570 cmd.cs_fin2_func = func;
5c71a36a
MD
571 cmd.cs_data = NULL;
572 lwkt_cpusync_start(mask & mycpu->gd_other_cpus, &cmd);
3b6b7bd1
MD
573 if (mask & (1 << mycpu->gd_cpuid))
574 func(data);
5c71a36a 575 lwkt_cpusync_finish(&cmd);
3b6b7bd1
MD
576}
577
578/*
5c71a36a
MD
579 * lwkt_cpusync_start()
580 *
581 * Start synchronization with a set of target cpus, return once they are
582 * known to be in a synchronization loop. The target cpus will execute
583 * poll->cs_run_func() IN TANDEM WITH THE RETURN.
584 *
585 * XXX future: add lwkt_cpusync_start_quick() and require a call to
586 * lwkt_cpusync_add() or lwkt_cpusync_wait(), allowing the caller to
587 * potentially absorb the IPI latency doing something useful.
3b6b7bd1 588 */
5c71a36a 589void
3b6b7bd1
MD
590lwkt_cpusync_start(cpumask_t mask, lwkt_cpusync_t poll)
591{
0f7a3396
MD
592 globaldata_t gd = mycpu;
593
3b6b7bd1 594 poll->cs_count = 0;
5c71a36a 595 poll->cs_mask = mask;
0f7a3396
MD
596#ifdef SMP
597 poll->cs_maxcount = lwkt_send_ipiq_mask(
598 mask & gd->gd_other_cpus & smp_active_mask,
599 (ipifunc_t)lwkt_cpusync_remote1, poll);
600#endif
fda1ad89 601 if (mask & gd->gd_cpumask) {
5c71a36a
MD
602 if (poll->cs_run_func)
603 poll->cs_run_func(poll);
604 }
0f7a3396
MD
605#ifdef SMP
606 if (poll->cs_maxcount) {
607 ++ipiq_cscount;
608 ++gd->gd_curthread->td_cscount;
609 while (poll->cs_count != poll->cs_maxcount) {
610 crit_enter();
611 lwkt_process_ipiq();
612 crit_exit();
613 }
5c71a36a 614 }
0f7a3396 615#endif
5c71a36a
MD
616}
617
618void
619lwkt_cpusync_add(cpumask_t mask, lwkt_cpusync_t poll)
620{
0f7a3396 621 globaldata_t gd = mycpu;
41a01a4d 622#ifdef SMP
0f7a3396 623 int count;
41a01a4d 624#endif
0f7a3396 625
5c71a36a
MD
626 mask &= ~poll->cs_mask;
627 poll->cs_mask |= mask;
0f7a3396
MD
628#ifdef SMP
629 count = lwkt_send_ipiq_mask(
630 mask & gd->gd_other_cpus & smp_active_mask,
631 (ipifunc_t)lwkt_cpusync_remote1, poll);
632#endif
fda1ad89 633 if (mask & gd->gd_cpumask) {
5c71a36a
MD
634 if (poll->cs_run_func)
635 poll->cs_run_func(poll);
636 }
0f7a3396
MD
637#ifdef SMP
638 poll->cs_maxcount += count;
639 if (poll->cs_maxcount) {
640 if (poll->cs_maxcount == count)
641 ++gd->gd_curthread->td_cscount;
642 while (poll->cs_count != poll->cs_maxcount) {
643 crit_enter();
644 lwkt_process_ipiq();
645 crit_exit();
646 }
3b6b7bd1 647 }
0f7a3396 648#endif
3b6b7bd1
MD
649}
650
651/*
652 * Finish synchronization with a set of target cpus. The target cpus will
653 * execute cs_fin1_func(poll) prior to this function returning, and will
654 * execute cs_fin2_func(data) IN TANDEM WITH THIS FUNCTION'S RETURN.
0f7a3396
MD
655 *
656 * If cs_maxcount is non-zero then we are mastering a cpusync with one or
657 * more remote cpus and must account for it in our thread structure.
3b6b7bd1
MD
658 */
659void
5c71a36a 660lwkt_cpusync_finish(lwkt_cpusync_t poll)
3b6b7bd1 661{
0f7a3396 662 globaldata_t gd = mycpu;
5c71a36a 663
3b6b7bd1 664 poll->cs_count = -1;
fda1ad89 665 if (poll->cs_mask & gd->gd_cpumask) {
5c71a36a
MD
666 if (poll->cs_fin1_func)
667 poll->cs_fin1_func(poll);
668 if (poll->cs_fin2_func)
669 poll->cs_fin2_func(poll->cs_data);
670 }
0f7a3396
MD
671#ifdef SMP
672 if (poll->cs_maxcount) {
673 while (poll->cs_count != -(poll->cs_maxcount + 1)) {
674 crit_enter();
675 lwkt_process_ipiq();
676 crit_exit();
677 }
678 --gd->gd_curthread->td_cscount;
3b6b7bd1 679 }
0f7a3396 680#endif
3b6b7bd1
MD
681}
682
0f7a3396
MD
683#ifdef SMP
684
3b6b7bd1
MD
685/*
686 * helper IPI remote messaging function.
687 *
688 * Called on remote cpu when a new cpu synchronization request has been
689 * sent to us. Execute the run function and adjust cs_count, then requeue
690 * the request so we spin on it.
691 */
692static void
693lwkt_cpusync_remote1(lwkt_cpusync_t poll)
694{
695 atomic_add_int(&poll->cs_count, 1);
696 if (poll->cs_run_func)
697 poll->cs_run_func(poll);
698 lwkt_cpusync_remote2(poll);
699}
700
701/*
702 * helper IPI remote messaging function.
703 *
704 * Poll for the originator telling us to finish. If it hasn't, requeue
705 * our request so we spin on it. When the originator requests that we
706 * finish we execute cs_fin1_func(poll) synchronously and cs_fin2_func(data)
707 * in tandem with the release.
708 */
709static void
710lwkt_cpusync_remote2(lwkt_cpusync_t poll)
711{
712 if (poll->cs_count < 0) {
713 cpusync_func2_t savef;
714 void *saved;
715
716 if (poll->cs_fin1_func)
717 poll->cs_fin1_func(poll);
718 if (poll->cs_fin2_func) {
719 savef = poll->cs_fin2_func;
720 saved = poll->cs_data;
721 atomic_add_int(&poll->cs_count, -1);
722 savef(saved);
723 } else {
724 atomic_add_int(&poll->cs_count, -1);
725 }
726 } else {
727 globaldata_t gd = mycpu;
728 lwkt_ipiq_t ip;
729 int wi;
730
731 ip = &gd->gd_cpusyncq;
732 wi = ip->ip_windex & MAXCPUFIFO_MASK;
733 ip->ip_func[wi] = (ipifunc2_t)lwkt_cpusync_remote2;
734 ip->ip_arg[wi] = poll;
35238fa5 735 cpu_sfence();
3b6b7bd1
MD
736 ++ip->ip_windex;
737 }
738}
739
3b6b7bd1 740#endif