kernel - Provide descriptions for lwkt.* and debug.* sysctl's
[dragonfly.git] / sys / kern / lwkt_ipiq.c
CommitLineData
3b6b7bd1 1/*
8c10bfcf
MD
2 * Copyright (c) 2003,2004 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
6 *
3b6b7bd1
MD
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
8c10bfcf 10 *
3b6b7bd1
MD
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
8c10bfcf
MD
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3b6b7bd1 32 * SUCH DAMAGE.
8c10bfcf 33 *
546f2c66 34 * $DragonFly: src/sys/kern/lwkt_ipiq.c,v 1.27 2008/05/18 20:57:56 nth Exp $
3b6b7bd1
MD
35 */
36
37/*
38 * This module implements IPI message queueing and the MI portion of IPI
39 * message processing.
40 */
41
e8f15168
MD
42#include "opt_ddb.h"
43
3b6b7bd1
MD
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/kernel.h>
47#include <sys/proc.h>
48#include <sys/rtprio.h>
49#include <sys/queue.h>
50#include <sys/thread2.h>
51#include <sys/sysctl.h>
ac72c7f4 52#include <sys/ktr.h>
3b6b7bd1
MD
53#include <sys/kthread.h>
54#include <machine/cpu.h>
55#include <sys/lock.h>
56#include <sys/caps.h>
57
58#include <vm/vm.h>
59#include <vm/vm_param.h>
60#include <vm/vm_kern.h>
61#include <vm/vm_object.h>
62#include <vm/vm_page.h>
63#include <vm/vm_map.h>
64#include <vm/vm_pager.h>
65#include <vm/vm_extern.h>
66#include <vm/vm_zone.h>
67
68#include <machine/stdarg.h>
3b6b7bd1
MD
69#include <machine/smp.h>
70#include <machine/atomic.h>
71
3b6b7bd1 72#ifdef SMP
4c9f5a7f
MD
73static __int64_t ipiq_count; /* total calls to lwkt_send_ipiq*() */
74static __int64_t ipiq_fifofull; /* number of fifo full conditions detected */
75static __int64_t ipiq_avoided; /* interlock with target avoids cpu ipi */
76static __int64_t ipiq_passive; /* passive IPI messages */
77static __int64_t ipiq_cscount; /* number of cpu synchronizations */
78static int ipiq_optimized = 1; /* XXX temporary sysctl */
e8f15168
MD
79#ifdef PANIC_DEBUG
80static int panic_ipiq_cpu = -1;
81static int panic_ipiq_count = 100;
82#endif
3b6b7bd1
MD
83#endif
84
3b6b7bd1 85#ifdef SMP
0c52fa62
SG
86SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_count, CTLFLAG_RW, &ipiq_count, 0,
87 "Number of IPI's sent");
88SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_fifofull, CTLFLAG_RW, &ipiq_fifofull, 0,
89 "Number of fifo full conditions detected");
90SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_avoided, CTLFLAG_RW, &ipiq_avoided, 0,
91 "Number of IPI's avoided by interlock with target cpu");
92SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_passive, CTLFLAG_RW, &ipiq_passive, 0,
93 "Number of passive IPI messages sent");
94SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_cscount, CTLFLAG_RW, &ipiq_cscount, 0,
95 "Number of cpu synchronizations");
96SYSCTL_INT(_lwkt, OID_AUTO, ipiq_optimized, CTLFLAG_RW, &ipiq_optimized, 0,
97 "");
e8f15168
MD
98#ifdef PANIC_DEBUG
99SYSCTL_INT(_lwkt, OID_AUTO, panic_ipiq_cpu, CTLFLAG_RW, &panic_ipiq_cpu, 0, "");
100SYSCTL_INT(_lwkt, OID_AUTO, panic_ipiq_count, CTLFLAG_RW, &panic_ipiq_count, 0, "");
101#endif
3b6b7bd1 102
a7adb95a 103#define IPIQ_STRING "func=%p arg1=%p arg2=%d scpu=%d dcpu=%d"
5118bbc4 104#define IPIQ_ARG_SIZE (sizeof(void *) * 2 + sizeof(int) * 3)
ac72c7f4
MD
105
106#if !defined(KTR_IPIQ)
107#define KTR_IPIQ KTR_ALL
3b6b7bd1 108#endif
ac72c7f4
MD
109KTR_INFO_MASTER(ipiq);
110KTR_INFO(KTR_IPIQ, ipiq, send_norm, 0, IPIQ_STRING, IPIQ_ARG_SIZE);
111KTR_INFO(KTR_IPIQ, ipiq, send_pasv, 1, IPIQ_STRING, IPIQ_ARG_SIZE);
112KTR_INFO(KTR_IPIQ, ipiq, send_nbio, 2, IPIQ_STRING, IPIQ_ARG_SIZE);
113KTR_INFO(KTR_IPIQ, ipiq, send_fail, 3, IPIQ_STRING, IPIQ_ARG_SIZE);
114KTR_INFO(KTR_IPIQ, ipiq, receive, 4, IPIQ_STRING, IPIQ_ARG_SIZE);
d7ed9e5e
MD
115KTR_INFO(KTR_IPIQ, ipiq, sync_start, 5, "cpumask=%08x", sizeof(cpumask_t));
116KTR_INFO(KTR_IPIQ, ipiq, sync_add, 6, "cpumask=%08x", sizeof(cpumask_t));
866b61fb 117KTR_INFO(KTR_IPIQ, ipiq, cpu_send, 7, IPIQ_STRING, IPIQ_ARG_SIZE);
c92e86f1 118KTR_INFO(KTR_IPIQ, ipiq, send_end, 8, IPIQ_STRING, IPIQ_ARG_SIZE);
ac72c7f4 119
a7adb95a
SZ
120#define logipiq(name, func, arg1, arg2, sgd, dgd) \
121 KTR_LOG(ipiq_ ## name, func, arg1, arg2, sgd->gd_cpuid, dgd->gd_cpuid)
d7ed9e5e
MD
122#define logipiq2(name, arg) \
123 KTR_LOG(ipiq_ ## name, arg)
ac72c7f4
MD
124
125#endif /* SMP */
3b6b7bd1
MD
126
127#ifdef SMP
128
b8a98473
MD
129static int lwkt_process_ipiq_core(globaldata_t sgd, lwkt_ipiq_t ip,
130 struct intrframe *frame);
3b6b7bd1
MD
131static void lwkt_cpusync_remote1(lwkt_cpusync_t poll);
132static void lwkt_cpusync_remote2(lwkt_cpusync_t poll);
133
134/*
135 * Send a function execution request to another cpu. The request is queued
136 * on the cpu<->cpu ipiq matrix. Each cpu owns a unique ipiq FIFO for every
137 * possible target cpu. The FIFO can be written.
138 *
4c9f5a7f
MD
139 * If the FIFO fills up we have to enable interrupts to avoid an APIC
140 * deadlock and process pending IPIQs while waiting for it to empty.
141 * Otherwise we may soft-deadlock with another cpu whos FIFO is also full.
3b6b7bd1
MD
142 *
143 * We can safely bump gd_intr_nesting_level because our crit_exit() at the
144 * end will take care of any pending interrupts.
145 *
4c9f5a7f
MD
146 * The actual hardware IPI is avoided if the target cpu is already processing
147 * the queue from a prior IPI. It is possible to pipeline IPI messages
148 * very quickly between cpus due to the FIFO hysteresis.
149 *
150 * Need not be called from a critical section.
3b6b7bd1
MD
151 */
152int
b8a98473 153lwkt_send_ipiq3(globaldata_t target, ipifunc3_t func, void *arg1, int arg2)
3b6b7bd1
MD
154{
155 lwkt_ipiq_t ip;
156 int windex;
157 struct globaldata *gd = mycpu;
158
a7adb95a 159 logipiq(send_norm, func, arg1, arg2, gd, target);
ac72c7f4 160
3b6b7bd1 161 if (target == gd) {
b8a98473 162 func(arg1, arg2, NULL);
c92e86f1 163 logipiq(send_end, func, arg1, arg2, gd, target);
3b6b7bd1
MD
164 return(0);
165 }
166 crit_enter();
167 ++gd->gd_intr_nesting_level;
168#ifdef INVARIANTS
169 if (gd->gd_intr_nesting_level > 20)
170 panic("lwkt_send_ipiq: TOO HEAVILY NESTED!");
171#endif
f9235b6d 172 KKASSERT(curthread->td_critcount);
3b6b7bd1
MD
173 ++ipiq_count;
174 ip = &gd->gd_ipiq[target->gd_cpuid];
175
176 /*
4c9f5a7f
MD
177 * Do not allow the FIFO to become full. Interrupts must be physically
178 * enabled while we liveloop to avoid deadlocking the APIC.
179 */
180 if (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 2) {
46d4e165 181#if defined(__i386__)
4c9f5a7f 182 unsigned int eflags = read_eflags();
b2b3ffcd 183#elif defined(__x86_64__)
46d4e165
JG
184 unsigned long rflags = read_rflags();
185#endif
4c9f5a7f 186
866b61fb
SZ
187 if (atomic_poll_acquire_int(&ip->ip_npoll) || ipiq_optimized == 0) {
188 logipiq(cpu_send, func, arg1, arg2, gd, target);
4c9f5a7f 189 cpu_send_ipiq(target->gd_cpuid);
866b61fb 190 }
4c9f5a7f
MD
191 cpu_enable_intr();
192 ++ipiq_fifofull;
193 while (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 4) {
194 KKASSERT(ip->ip_windex - ip->ip_rindex != MAXCPUFIFO - 1);
195 lwkt_process_ipiq();
196 }
46d4e165 197#if defined(__i386__)
4c9f5a7f 198 write_eflags(eflags);
b2b3ffcd 199#elif defined(__x86_64__)
46d4e165
JG
200 write_rflags(rflags);
201#endif
4c9f5a7f
MD
202 }
203
204 /*
205 * Queue the new message
3b6b7bd1 206 */
3b6b7bd1 207 windex = ip->ip_windex & MAXCPUFIFO_MASK;
b8a98473
MD
208 ip->ip_func[windex] = func;
209 ip->ip_arg1[windex] = arg1;
210 ip->ip_arg2[windex] = arg2;
35238fa5 211 cpu_sfence();
3b6b7bd1 212 ++ip->ip_windex;
4c9f5a7f
MD
213 --gd->gd_intr_nesting_level;
214
215 /*
216 * signal the target cpu that there is work pending.
217 */
218 if (atomic_poll_acquire_int(&ip->ip_npoll)) {
866b61fb 219 logipiq(cpu_send, func, arg1, arg2, gd, target);
4c9f5a7f
MD
220 cpu_send_ipiq(target->gd_cpuid);
221 } else {
866b61fb
SZ
222 if (ipiq_optimized == 0) {
223 logipiq(cpu_send, func, arg1, arg2, gd, target);
4c9f5a7f 224 cpu_send_ipiq(target->gd_cpuid);
866b61fb
SZ
225 } else {
226 ++ipiq_avoided;
227 }
4c9f5a7f
MD
228 }
229 crit_exit();
c92e86f1
SZ
230
231 logipiq(send_end, func, arg1, arg2, gd, target);
4c9f5a7f
MD
232 return(ip->ip_windex);
233}
234
235/*
236 * Similar to lwkt_send_ipiq() but this function does not actually initiate
237 * the IPI to the target cpu unless the FIFO has become too full, so it is
238 * very fast.
239 *
240 * This function is used for non-critical IPI messages, such as memory
241 * deallocations. The queue will typically be flushed by the target cpu at
242 * the next clock interrupt.
243 *
244 * Need not be called from a critical section.
245 */
246int
b8a98473
MD
247lwkt_send_ipiq3_passive(globaldata_t target, ipifunc3_t func,
248 void *arg1, int arg2)
4c9f5a7f
MD
249{
250 lwkt_ipiq_t ip;
251 int windex;
252 struct globaldata *gd = mycpu;
253
254 KKASSERT(target != gd);
255 crit_enter();
a7adb95a 256 logipiq(send_pasv, func, arg1, arg2, gd, target);
4c9f5a7f
MD
257 ++gd->gd_intr_nesting_level;
258#ifdef INVARIANTS
259 if (gd->gd_intr_nesting_level > 20)
260 panic("lwkt_send_ipiq: TOO HEAVILY NESTED!");
261#endif
f9235b6d 262 KKASSERT(curthread->td_critcount);
4c9f5a7f
MD
263 ++ipiq_count;
264 ++ipiq_passive;
265 ip = &gd->gd_ipiq[target->gd_cpuid];
266
267 /*
268 * Do not allow the FIFO to become full. Interrupts must be physically
269 * enabled while we liveloop to avoid deadlocking the APIC.
270 */
3b6b7bd1 271 if (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 2) {
46d4e165 272#if defined(__i386__)
3b6b7bd1 273 unsigned int eflags = read_eflags();
b2b3ffcd 274#elif defined(__x86_64__)
46d4e165
JG
275 unsigned long rflags = read_rflags();
276#endif
4c9f5a7f 277
866b61fb
SZ
278 if (atomic_poll_acquire_int(&ip->ip_npoll) || ipiq_optimized == 0) {
279 logipiq(cpu_send, func, arg1, arg2, gd, target);
4c9f5a7f 280 cpu_send_ipiq(target->gd_cpuid);
866b61fb 281 }
3b6b7bd1
MD
282 cpu_enable_intr();
283 ++ipiq_fifofull;
284 while (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 4) {
285 KKASSERT(ip->ip_windex - ip->ip_rindex != MAXCPUFIFO - 1);
286 lwkt_process_ipiq();
287 }
46d4e165 288#if defined(__i386__)
3b6b7bd1 289 write_eflags(eflags);
b2b3ffcd 290#elif defined(__x86_64__)
46d4e165
JG
291 write_rflags(rflags);
292#endif
3b6b7bd1 293 }
4c9f5a7f
MD
294
295 /*
296 * Queue the new message
297 */
298 windex = ip->ip_windex & MAXCPUFIFO_MASK;
b8a98473
MD
299 ip->ip_func[windex] = func;
300 ip->ip_arg1[windex] = arg1;
301 ip->ip_arg2[windex] = arg2;
35238fa5 302 cpu_sfence();
4c9f5a7f 303 ++ip->ip_windex;
3b6b7bd1 304 --gd->gd_intr_nesting_level;
4c9f5a7f
MD
305
306 /*
307 * Do not signal the target cpu, it will pick up the IPI when it next
308 * polls (typically on the next tick).
309 */
3b6b7bd1 310 crit_exit();
c92e86f1
SZ
311
312 logipiq(send_end, func, arg1, arg2, gd, target);
3b6b7bd1
MD
313 return(ip->ip_windex);
314}
315
316/*
4c9f5a7f
MD
317 * Send an IPI request without blocking, return 0 on success, ENOENT on
318 * failure. The actual queueing of the hardware IPI may still force us
319 * to spin and process incoming IPIs but that will eventually go away
320 * when we've gotten rid of the other general IPIs.
41a01a4d
MD
321 */
322int
b8a98473
MD
323lwkt_send_ipiq3_nowait(globaldata_t target, ipifunc3_t func,
324 void *arg1, int arg2)
41a01a4d
MD
325{
326 lwkt_ipiq_t ip;
327 int windex;
328 struct globaldata *gd = mycpu;
329
a7adb95a 330 logipiq(send_nbio, func, arg1, arg2, gd, target);
f9235b6d 331 KKASSERT(curthread->td_critcount);
41a01a4d 332 if (target == gd) {
b8a98473 333 func(arg1, arg2, NULL);
c92e86f1 334 logipiq(send_end, func, arg1, arg2, gd, target);
41a01a4d
MD
335 return(0);
336 }
337 ++ipiq_count;
338 ip = &gd->gd_ipiq[target->gd_cpuid];
339
ac72c7f4 340 if (ip->ip_windex - ip->ip_rindex >= MAXCPUFIFO * 2 / 3) {
a7adb95a 341 logipiq(send_fail, func, arg1, arg2, gd, target);
41a01a4d 342 return(ENOENT);
ac72c7f4 343 }
41a01a4d 344 windex = ip->ip_windex & MAXCPUFIFO_MASK;
b8a98473
MD
345 ip->ip_func[windex] = func;
346 ip->ip_arg1[windex] = arg1;
347 ip->ip_arg2[windex] = arg2;
35238fa5 348 cpu_sfence();
41a01a4d 349 ++ip->ip_windex;
4c9f5a7f 350
41a01a4d 351 /*
4c9f5a7f 352 * This isn't a passive IPI, we still have to signal the target cpu.
41a01a4d 353 */
4c9f5a7f 354 if (atomic_poll_acquire_int(&ip->ip_npoll)) {
866b61fb 355 logipiq(cpu_send, func, arg1, arg2, gd, target);
4c9f5a7f
MD
356 cpu_send_ipiq(target->gd_cpuid);
357 } else {
866b61fb
SZ
358 if (ipiq_optimized == 0) {
359 logipiq(cpu_send, func, arg1, arg2, gd, target);
4c9f5a7f 360 cpu_send_ipiq(target->gd_cpuid);
866b61fb 361 } else {
728f6208 362 ++ipiq_avoided;
866b61fb 363 }
4c9f5a7f 364 }
c92e86f1
SZ
365
366 logipiq(send_end, func, arg1, arg2, gd, target);
41a01a4d
MD
367 return(0);
368}
369
370/*
3b6b7bd1
MD
371 * deprecated, used only by fast int forwarding.
372 */
373int
b8a98473 374lwkt_send_ipiq3_bycpu(int dcpu, ipifunc3_t func, void *arg1, int arg2)
3b6b7bd1 375{
b8a98473 376 return(lwkt_send_ipiq3(globaldata_find(dcpu), func, arg1, arg2));
3b6b7bd1
MD
377}
378
379/*
380 * Send a message to several target cpus. Typically used for scheduling.
381 * The message will not be sent to stopped cpus.
382 */
383int
b8a98473 384lwkt_send_ipiq3_mask(u_int32_t mask, ipifunc3_t func, void *arg1, int arg2)
3b6b7bd1
MD
385{
386 int cpuid;
387 int count = 0;
388
389 mask &= ~stopped_cpus;
390 while (mask) {
391 cpuid = bsfl(mask);
b8a98473 392 lwkt_send_ipiq3(globaldata_find(cpuid), func, arg1, arg2);
3b6b7bd1
MD
393 mask &= ~(1 << cpuid);
394 ++count;
395 }
396 return(count);
397}
398
399/*
400 * Wait for the remote cpu to finish processing a function.
401 *
402 * YYY we have to enable interrupts and process the IPIQ while waiting
403 * for it to empty or we may deadlock with another cpu. Create a CPU_*()
404 * function to do this! YYY we really should 'block' here.
405 *
406 * MUST be called from a critical section. This routine may be called
407 * from an interrupt (for example, if an interrupt wakes a foreign thread
408 * up).
409 */
410void
411lwkt_wait_ipiq(globaldata_t target, int seq)
412{
413 lwkt_ipiq_t ip;
414 int maxc = 100000000;
415
416 if (target != mycpu) {
417 ip = &mycpu->gd_ipiq[target->gd_cpuid];
418 if ((int)(ip->ip_xindex - seq) < 0) {
46d4e165 419#if defined(__i386__)
3b6b7bd1 420 unsigned int eflags = read_eflags();
b2b3ffcd 421#elif defined(__x86_64__)
46d4e165
JG
422 unsigned long rflags = read_rflags();
423#endif
3b6b7bd1
MD
424 cpu_enable_intr();
425 while ((int)(ip->ip_xindex - seq) < 0) {
41a01a4d 426 crit_enter();
3b6b7bd1 427 lwkt_process_ipiq();
41a01a4d 428 crit_exit();
3b6b7bd1 429 if (--maxc == 0)
6ea70f76 430 kprintf("LWKT_WAIT_IPIQ WARNING! %d wait %d (%d)\n", mycpu->gd_cpuid, target->gd_cpuid, ip->ip_xindex - seq);
3b6b7bd1
MD
431 if (maxc < -1000000)
432 panic("LWKT_WAIT_IPIQ");
35238fa5
MD
433 /*
434 * xindex may be modified by another cpu, use a load fence
435 * to ensure that the loop does not use a speculative value
436 * (which may improve performance).
437 */
438 cpu_lfence();
3b6b7bd1 439 }
46d4e165 440#if defined(__i386__)
3b6b7bd1 441 write_eflags(eflags);
b2b3ffcd 442#elif defined(__x86_64__)
46d4e165
JG
443 write_rflags(rflags);
444#endif
3b6b7bd1
MD
445 }
446 }
447}
448
41a01a4d
MD
449int
450lwkt_seq_ipiq(globaldata_t target)
451{
452 lwkt_ipiq_t ip;
453
454 ip = &mycpu->gd_ipiq[target->gd_cpuid];
455 return(ip->ip_windex);
456}
457
3b6b7bd1
MD
458/*
459 * Called from IPI interrupt (like a fast interrupt), which has placed
460 * us in a critical section. The MP lock may or may not be held.
461 * May also be called from doreti or splz, or be reentrantly called
462 * indirectly through the ip_func[] we run.
463 *
464 * There are two versions, one where no interrupt frame is available (when
465 * called from the send code and from splz, and one where an interrupt
466 * frame is available.
467 */
468void
469lwkt_process_ipiq(void)
470{
471 globaldata_t gd = mycpu;
ac72c7f4 472 globaldata_t sgd;
3b6b7bd1
MD
473 lwkt_ipiq_t ip;
474 int n;
475
476again:
477 for (n = 0; n < ncpus; ++n) {
478 if (n != gd->gd_cpuid) {
ac72c7f4
MD
479 sgd = globaldata_find(n);
480 ip = sgd->gd_ipiq;
3b6b7bd1 481 if (ip != NULL) {
b8a98473 482 while (lwkt_process_ipiq_core(sgd, &ip[gd->gd_cpuid], NULL))
3b6b7bd1
MD
483 ;
484 }
485 }
486 }
487 if (gd->gd_cpusyncq.ip_rindex != gd->gd_cpusyncq.ip_windex) {
b8a98473 488 if (lwkt_process_ipiq_core(gd, &gd->gd_cpusyncq, NULL)) {
0f7a3396
MD
489 if (gd->gd_curthread->td_cscount == 0)
490 goto again;
491 need_ipiq();
492 }
3b6b7bd1
MD
493 }
494}
495
3b6b7bd1 496void
c7eb0589 497lwkt_process_ipiq_frame(struct intrframe *frame)
3b6b7bd1
MD
498{
499 globaldata_t gd = mycpu;
ac72c7f4 500 globaldata_t sgd;
3b6b7bd1
MD
501 lwkt_ipiq_t ip;
502 int n;
503
504again:
505 for (n = 0; n < ncpus; ++n) {
506 if (n != gd->gd_cpuid) {
ac72c7f4
MD
507 sgd = globaldata_find(n);
508 ip = sgd->gd_ipiq;
3b6b7bd1 509 if (ip != NULL) {
c7eb0589 510 while (lwkt_process_ipiq_core(sgd, &ip[gd->gd_cpuid], frame))
3b6b7bd1
MD
511 ;
512 }
513 }
514 }
515 if (gd->gd_cpusyncq.ip_rindex != gd->gd_cpusyncq.ip_windex) {
c7eb0589 516 if (lwkt_process_ipiq_core(gd, &gd->gd_cpusyncq, frame)) {
0f7a3396
MD
517 if (gd->gd_curthread->td_cscount == 0)
518 goto again;
519 need_ipiq();
520 }
3b6b7bd1
MD
521 }
522}
3b6b7bd1
MD
523
524static int
b8a98473
MD
525lwkt_process_ipiq_core(globaldata_t sgd, lwkt_ipiq_t ip,
526 struct intrframe *frame)
3b6b7bd1 527{
2de4f77e 528 globaldata_t mygd = mycpu;
3b6b7bd1 529 int ri;
35238fa5 530 int wi;
b8a98473
MD
531 ipifunc3_t copy_func;
532 void *copy_arg1;
533 int copy_arg2;
35238fa5
MD
534
535 /*
536 * Obtain the current write index, which is modified by a remote cpu.
537 * Issue a load fence to prevent speculative reads of e.g. data written
538 * by the other cpu prior to it updating the index.
539 */
f9235b6d 540 KKASSERT(curthread->td_critcount);
35238fa5
MD
541 wi = ip->ip_windex;
542 cpu_lfence();
2de4f77e 543 ++mygd->gd_intr_nesting_level;
35238fa5 544
3b6b7bd1 545 /*
562273ea
MD
546 * NOTE: xindex is only updated after we are sure the function has
547 * finished execution. Beware lwkt_process_ipiq() reentrancy!
548 * The function may send an IPI which may block/drain.
d64a7617 549 *
562273ea
MD
550 * NOTE: Due to additional IPI operations that the callback function
551 * may make, it is possible for both rindex and windex to advance and
552 * thus for rindex to advance passed our cached windex.
553 *
554 * NOTE: A memory fence is required to prevent speculative loads prior
555 * to the loading of ip_rindex. Even though stores might be
556 * ordered, loads are probably not.
3b6b7bd1 557 */
d64a7617 558 while (wi - (ri = ip->ip_rindex) > 0) {
3b6b7bd1 559 ri &= MAXCPUFIFO_MASK;
562273ea 560 cpu_mfence();
728f6208 561 copy_func = ip->ip_func[ri];
b8a98473
MD
562 copy_arg1 = ip->ip_arg1[ri];
563 copy_arg2 = ip->ip_arg2[ri];
728f6208 564 ++ip->ip_rindex;
562273ea
MD
565 KKASSERT((ip->ip_rindex & MAXCPUFIFO_MASK) ==
566 ((ri + 1) & MAXCPUFIFO_MASK));
a7adb95a 567 logipiq(receive, copy_func, copy_arg1, copy_arg2, sgd, mycpu);
b8a98473 568 copy_func(copy_arg1, copy_arg2, frame);
35238fa5 569 cpu_sfence();
3b6b7bd1 570 ip->ip_xindex = ip->ip_rindex;
e8f15168
MD
571
572#ifdef PANIC_DEBUG
573 /*
574 * Simulate panics during the processing of an IPI
575 */
576 if (mycpu->gd_cpuid == panic_ipiq_cpu && panic_ipiq_count) {
577 if (--panic_ipiq_count == 0) {
578#ifdef DDB
579 Debugger("PANIC_DEBUG");
580#else
581 panic("PANIC_DEBUG");
582#endif
583 }
584 }
585#endif
3b6b7bd1 586 }
2de4f77e 587 --mygd->gd_intr_nesting_level;
4c9f5a7f
MD
588
589 /*
590 * Return non-zero if there are more IPI messages pending on this
591 * ipiq. ip_npoll is left set as long as possible to reduce the
592 * number of IPIs queued by the originating cpu, but must be cleared
593 * *BEFORE* checking windex.
594 */
595 atomic_poll_release_int(&ip->ip_npoll);
3b6b7bd1
MD
596 return(wi != ip->ip_windex);
597}
598
6c92c1f2
SZ
599static void
600lwkt_sync_ipiq(void *arg)
601{
602 cpumask_t *cpumask = arg;
603
604 atomic_clear_int(cpumask, mycpu->gd_cpumask);
605 if (*cpumask == 0)
606 wakeup(cpumask);
607}
608
609void
610lwkt_synchronize_ipiqs(const char *wmesg)
611{
612 cpumask_t other_cpumask;
613
614 other_cpumask = mycpu->gd_other_cpus & smp_active_mask;
615 lwkt_send_ipiq_mask(other_cpumask, lwkt_sync_ipiq, &other_cpumask);
616
6c92c1f2 617 while (other_cpumask != 0) {
ae8e83e6 618 tsleep_interlock(&other_cpumask, 0);
6c92c1f2 619 if (other_cpumask != 0)
d9345d3a 620 tsleep(&other_cpumask, PINTERLOCKED, wmesg, 0);
6c92c1f2 621 }
6c92c1f2
SZ
622}
623
0f7a3396
MD
624#endif
625
3b6b7bd1
MD
626/*
627 * CPU Synchronization Support
5c71a36a
MD
628 *
629 * lwkt_cpusync_simple()
630 *
631 * The function is executed synchronously before return on remote cpus.
632 * A lwkt_cpusync_t pointer is passed as an argument. The data can
633 * be accessed via arg->cs_data.
634 *
635 * XXX should I just pass the data as an argument to be consistent?
3b6b7bd1
MD
636 */
637
638void
5c71a36a
MD
639lwkt_cpusync_simple(cpumask_t mask, cpusync_func_t func, void *data)
640{
641 struct lwkt_cpusync cmd;
642
643 cmd.cs_run_func = NULL;
644 cmd.cs_fin1_func = func;
645 cmd.cs_fin2_func = NULL;
646 cmd.cs_data = data;
647 lwkt_cpusync_start(mask & mycpu->gd_other_cpus, &cmd);
648 if (mask & (1 << mycpu->gd_cpuid))
649 func(&cmd);
650 lwkt_cpusync_finish(&cmd);
651}
652
653/*
654 * lwkt_cpusync_fastdata()
655 *
656 * The function is executed in tandem with return on remote cpus.
657 * The data is directly passed as an argument. Do not pass pointers to
658 * temporary storage as the storage might have
659 * gone poof by the time the target cpu executes
660 * the function.
661 *
662 * At the moment lwkt_cpusync is declared on the stack and we must wait
663 * for all remote cpus to ack in lwkt_cpusync_finish(), but as a future
664 * optimization we should be able to put a counter in the globaldata
665 * structure (if it is not otherwise being used) and just poke it and
666 * return without waiting. XXX
667 */
668void
669lwkt_cpusync_fastdata(cpumask_t mask, cpusync_func2_t func, void *data)
3b6b7bd1
MD
670{
671 struct lwkt_cpusync cmd;
3b6b7bd1
MD
672
673 cmd.cs_run_func = NULL;
674 cmd.cs_fin1_func = NULL;
675 cmd.cs_fin2_func = func;
5c71a36a
MD
676 cmd.cs_data = NULL;
677 lwkt_cpusync_start(mask & mycpu->gd_other_cpus, &cmd);
3b6b7bd1
MD
678 if (mask & (1 << mycpu->gd_cpuid))
679 func(data);
5c71a36a 680 lwkt_cpusync_finish(&cmd);
3b6b7bd1
MD
681}
682
683/*
5c71a36a
MD
684 * lwkt_cpusync_start()
685 *
686 * Start synchronization with a set of target cpus, return once they are
687 * known to be in a synchronization loop. The target cpus will execute
688 * poll->cs_run_func() IN TANDEM WITH THE RETURN.
689 *
690 * XXX future: add lwkt_cpusync_start_quick() and require a call to
691 * lwkt_cpusync_add() or lwkt_cpusync_wait(), allowing the caller to
692 * potentially absorb the IPI latency doing something useful.
3b6b7bd1 693 */
5c71a36a 694void
3b6b7bd1
MD
695lwkt_cpusync_start(cpumask_t mask, lwkt_cpusync_t poll)
696{
0f7a3396
MD
697 globaldata_t gd = mycpu;
698
3b6b7bd1 699 poll->cs_count = 0;
5c71a36a 700 poll->cs_mask = mask;
0f7a3396 701#ifdef SMP
d7ed9e5e 702 logipiq2(sync_start, mask & gd->gd_other_cpus);
0f7a3396
MD
703 poll->cs_maxcount = lwkt_send_ipiq_mask(
704 mask & gd->gd_other_cpus & smp_active_mask,
b8a98473 705 (ipifunc1_t)lwkt_cpusync_remote1, poll);
0f7a3396 706#endif
fda1ad89 707 if (mask & gd->gd_cpumask) {
5c71a36a
MD
708 if (poll->cs_run_func)
709 poll->cs_run_func(poll);
710 }
0f7a3396
MD
711#ifdef SMP
712 if (poll->cs_maxcount) {
713 ++ipiq_cscount;
714 ++gd->gd_curthread->td_cscount;
715 while (poll->cs_count != poll->cs_maxcount) {
716 crit_enter();
717 lwkt_process_ipiq();
718 crit_exit();
719 }
5c71a36a 720 }
0f7a3396 721#endif
5c71a36a
MD
722}
723
724void
725lwkt_cpusync_add(cpumask_t mask, lwkt_cpusync_t poll)
726{
0f7a3396 727 globaldata_t gd = mycpu;
41a01a4d 728#ifdef SMP
0f7a3396 729 int count;
41a01a4d 730#endif
0f7a3396 731
5c71a36a
MD
732 mask &= ~poll->cs_mask;
733 poll->cs_mask |= mask;
0f7a3396 734#ifdef SMP
d7ed9e5e 735 logipiq2(sync_add, mask & gd->gd_other_cpus);
0f7a3396
MD
736 count = lwkt_send_ipiq_mask(
737 mask & gd->gd_other_cpus & smp_active_mask,
b8a98473 738 (ipifunc1_t)lwkt_cpusync_remote1, poll);
0f7a3396 739#endif
fda1ad89 740 if (mask & gd->gd_cpumask) {
5c71a36a
MD
741 if (poll->cs_run_func)
742 poll->cs_run_func(poll);
743 }
0f7a3396
MD
744#ifdef SMP
745 poll->cs_maxcount += count;
746 if (poll->cs_maxcount) {
747 if (poll->cs_maxcount == count)
748 ++gd->gd_curthread->td_cscount;
749 while (poll->cs_count != poll->cs_maxcount) {
750 crit_enter();
751 lwkt_process_ipiq();
752 crit_exit();
753 }
3b6b7bd1 754 }
0f7a3396 755#endif
3b6b7bd1
MD
756}
757
758/*
759 * Finish synchronization with a set of target cpus. The target cpus will
760 * execute cs_fin1_func(poll) prior to this function returning, and will
761 * execute cs_fin2_func(data) IN TANDEM WITH THIS FUNCTION'S RETURN.
0f7a3396
MD
762 *
763 * If cs_maxcount is non-zero then we are mastering a cpusync with one or
764 * more remote cpus and must account for it in our thread structure.
3b6b7bd1
MD
765 */
766void
5c71a36a 767lwkt_cpusync_finish(lwkt_cpusync_t poll)
3b6b7bd1 768{
0f7a3396 769 globaldata_t gd = mycpu;
5c71a36a 770
3b6b7bd1 771 poll->cs_count = -1;
fda1ad89 772 if (poll->cs_mask & gd->gd_cpumask) {
5c71a36a
MD
773 if (poll->cs_fin1_func)
774 poll->cs_fin1_func(poll);
775 if (poll->cs_fin2_func)
776 poll->cs_fin2_func(poll->cs_data);
777 }
0f7a3396
MD
778#ifdef SMP
779 if (poll->cs_maxcount) {
780 while (poll->cs_count != -(poll->cs_maxcount + 1)) {
781 crit_enter();
782 lwkt_process_ipiq();
783 crit_exit();
784 }
785 --gd->gd_curthread->td_cscount;
3b6b7bd1 786 }
0f7a3396 787#endif
3b6b7bd1
MD
788}
789
0f7a3396
MD
790#ifdef SMP
791
3b6b7bd1
MD
792/*
793 * helper IPI remote messaging function.
794 *
795 * Called on remote cpu when a new cpu synchronization request has been
796 * sent to us. Execute the run function and adjust cs_count, then requeue
797 * the request so we spin on it.
798 */
799static void
800lwkt_cpusync_remote1(lwkt_cpusync_t poll)
801{
802 atomic_add_int(&poll->cs_count, 1);
803 if (poll->cs_run_func)
804 poll->cs_run_func(poll);
805 lwkt_cpusync_remote2(poll);
806}
807
808/*
809 * helper IPI remote messaging function.
810 *
811 * Poll for the originator telling us to finish. If it hasn't, requeue
812 * our request so we spin on it. When the originator requests that we
813 * finish we execute cs_fin1_func(poll) synchronously and cs_fin2_func(data)
814 * in tandem with the release.
815 */
816static void
817lwkt_cpusync_remote2(lwkt_cpusync_t poll)
818{
819 if (poll->cs_count < 0) {
820 cpusync_func2_t savef;
821 void *saved;
822
823 if (poll->cs_fin1_func)
824 poll->cs_fin1_func(poll);
825 if (poll->cs_fin2_func) {
826 savef = poll->cs_fin2_func;
827 saved = poll->cs_data;
828 atomic_add_int(&poll->cs_count, -1);
829 savef(saved);
830 } else {
831 atomic_add_int(&poll->cs_count, -1);
832 }
833 } else {
834 globaldata_t gd = mycpu;
835 lwkt_ipiq_t ip;
836 int wi;
837
838 ip = &gd->gd_cpusyncq;
839 wi = ip->ip_windex & MAXCPUFIFO_MASK;
b8a98473
MD
840 ip->ip_func[wi] = (ipifunc3_t)(ipifunc1_t)lwkt_cpusync_remote2;
841 ip->ip_arg1[wi] = poll;
842 ip->ip_arg2[wi] = 0;
35238fa5 843 cpu_sfence();
3b6b7bd1
MD
844 ++ip->ip_windex;
845 }
846}
847
3b6b7bd1 848#endif