gitweb.dragonflybsd.org Git - dragonfly.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 2003 Matthew Dillon <dillon@backplane.com>
	3	* All rights reserved.
	4	*
	5	* Redistribution and use in source and binary forms, with or without
	6	* modification, are permitted provided that the following conditions
	7	* are met:
	8	* 1. Redistributions of source code must retain the above copyright
	9	* notice, this list of conditions and the following disclaimer.
	10	* 2. Redistributions in binary form must reproduce the above copyright
	11	* notice, this list of conditions and the following disclaimer in the
	12	* documentation and/or other materials provided with the distribution.
	13	*
	14	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	15	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	16	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	17	* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
	18	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	19	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	20	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	21	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	22	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	23	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	24	* SUCH DAMAGE.
	25	*
	26	* $DragonFly: src/sys/kern/lwkt_thread.c,v 1.51 2004/02/09 21:13:18 dillon Exp $
	27	*/
	28
	29	/*
	30	* Each cpu in a system has its own self-contained light weight kernel
	31	* thread scheduler, which means that generally speaking we only need
	32	* to use a critical section to avoid problems. Foreign thread
	33	* scheduling is queued via (async) IPIs.
	34	*
	35	* NOTE: on UP machines smp_active is defined to be 0. On SMP machines
	36	* smp_active is 0 prior to SMP activation, then it is 1. The LWKT module
	37	* uses smp_active to optimize UP builds and to avoid sending IPIs during
	38	* early boot (primarily interrupt and network thread initialization).
	39	*/
	40
	41	#ifdef _KERNEL
	42
	43	#include <sys/param.h>
	44	#include <sys/systm.h>
	45	#include <sys/kernel.h>
	46	#include <sys/proc.h>
	47	#include <sys/rtprio.h>
	48	#include <sys/queue.h>
	49	#include <sys/thread2.h>
	50	#include <sys/sysctl.h>
	51	#include <sys/kthread.h>
	52	#include <machine/cpu.h>
	53	#include <sys/lock.h>
	54	#include <sys/caps.h>
	55
	56	#include <vm/vm.h>
	57	#include <vm/vm_param.h>
	58	#include <vm/vm_kern.h>
	59	#include <vm/vm_object.h>
	60	#include <vm/vm_page.h>
	61	#include <vm/vm_map.h>
	62	#include <vm/vm_pager.h>
	63	#include <vm/vm_extern.h>
	64	#include <vm/vm_zone.h>
	65
	66	#include <machine/stdarg.h>
	67	#include <machine/ipl.h>
	68	#include <machine/smp.h>
	69
	70	#define THREAD_STACK (UPAGES * PAGE_SIZE)
	71
	72	#else
	73
	74	#include <sys/stdint.h>
	75	#include <libcaps/thread.h>
	76	#include <sys/thread.h>
	77	#include <sys/msgport.h>
	78	#include <sys/errno.h>
	79	#include <libcaps/globaldata.h>
	80	#include <sys/thread2.h>
	81	#include <sys/msgport2.h>
	82	#include <stdio.h>
	83	#include <stdlib.h>
	84	#include <string.h>
	85	#include <machine/cpufunc.h>
	86	#include <machine/lock.h>
	87
	88	#endif
	89
	90	static int untimely_switch = 0;
	91	static __int64_t switch_count = 0;
	92	static __int64_t preempt_hit = 0;
	93	static __int64_t preempt_miss = 0;
	94	static __int64_t preempt_weird = 0;
	95	#ifdef SMP
	96	static __int64_t ipiq_count = 0;
	97	static __int64_t ipiq_fifofull = 0;
	98	#endif
	99
	100	#ifdef _KERNEL
	101
	102	SYSCTL_INT(_lwkt, OID_AUTO, untimely_switch, CTLFLAG_RW, &untimely_switch, 0, "");
	103	SYSCTL_QUAD(_lwkt, OID_AUTO, switch_count, CTLFLAG_RW, &switch_count, 0, "");
	104	SYSCTL_QUAD(_lwkt, OID_AUTO, preempt_hit, CTLFLAG_RW, &preempt_hit, 0, "");
	105	SYSCTL_QUAD(_lwkt, OID_AUTO, preempt_miss, CTLFLAG_RW, &preempt_miss, 0, "");
	106	SYSCTL_QUAD(_lwkt, OID_AUTO, preempt_weird, CTLFLAG_RW, &preempt_weird, 0, "");
	107	#ifdef SMP
	108	SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_count, CTLFLAG_RW, &ipiq_count, 0, "");
	109	SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_fifofull, CTLFLAG_RW, &ipiq_fifofull, 0, "");
	110	#endif
	111
	112	#endif
	113
	114	/*
	115	* These helper procedures handle the runq, they can only be called from
	116	* within a critical section.
	117	*
	118	* WARNING! Prior to SMP being brought up it is possible to enqueue and
	119	* dequeue threads belonging to other cpus, so be sure to use td->td_gd
	120	* instead of 'mycpu' when referencing the globaldata structure. Once
	121	* SMP live enqueuing and dequeueing only occurs on the current cpu.
	122	*/
	123	static __inline
	124	void
	125	_lwkt_dequeue(thread_t td)
	126	{
	127	if (td->td_flags & TDF_RUNQ) {
	128	int nq = td->td_pri & TDPRI_MASK;
	129	struct globaldata *gd = td->td_gd;
	130
	131	td->td_flags &= ~TDF_RUNQ;
	132	TAILQ_REMOVE(&gd->gd_tdrunq[nq], td, td_threadq);
	133	/* runqmask is passively cleaned up by the switcher */
	134	}
	135	}
	136
	137	static __inline
	138	void
	139	_lwkt_enqueue(thread_t td)
	140	{
	141	if ((td->td_flags & TDF_RUNQ) == 0) {
	142	int nq = td->td_pri & TDPRI_MASK;
	143	struct globaldata *gd = td->td_gd;
	144
	145	td->td_flags \|= TDF_RUNQ;
	146	TAILQ_INSERT_TAIL(&gd->gd_tdrunq[nq], td, td_threadq);
	147	gd->gd_runqmask \|= 1 << nq;
	148	}
	149	}
	150
	151	static __inline
	152	int
	153	_lwkt_wantresched(thread_t ntd, thread_t cur)
	154	{
	155	return((ntd->td_pri & TDPRI_MASK) > (cur->td_pri & TDPRI_MASK));
	156	}
	157
	158	#ifdef _KERNEL
	159
	160	/*
	161	* LWKTs operate on a per-cpu basis
	162	*
	163	* WARNING! Called from early boot, 'mycpu' may not work yet.
	164	*/
	165	void
	166	lwkt_gdinit(struct globaldata *gd)
	167	{
	168	int i;
	169
	170	for (i = 0; i < sizeof(gd->gd_tdrunq)/sizeof(gd->gd_tdrunq[0]); ++i)
	171	TAILQ_INIT(&gd->gd_tdrunq[i]);
	172	gd->gd_runqmask = 0;
	173	TAILQ_INIT(&gd->gd_tdallq);
	174	}
	175
	176	#endif /* _KERNEL */
	177
	178	/*
	179	* Initialize a thread wait structure prior to first use.
	180	*
	181	* NOTE! called from low level boot code, we cannot do anything fancy!
	182	*/
	183	void
	184	lwkt_init_wait(lwkt_wait_t w)
	185	{
	186	TAILQ_INIT(&w->wa_waitq);
	187	}
	188
	189	/*
	190	* Create a new thread. The thread must be associated with a process context
	191	* or LWKT start address before it can be scheduled. If the target cpu is
	192	* -1 the thread will be created on the current cpu.
	193	*
	194	* If you intend to create a thread without a process context this function
	195	* does everything except load the startup and switcher function.
	196	*/
	197	thread_t
	198	lwkt_alloc_thread(struct thread *td, int cpu)
	199	{
	200	void *stack;
	201	int flags = 0;
	202
	203	if (td == NULL) {
	204	crit_enter();
	205	if (mycpu->gd_tdfreecount > 0) {
	206	--mycpu->gd_tdfreecount;
	207	td = TAILQ_FIRST(&mycpu->gd_tdfreeq);
	208	KASSERT(td != NULL && (td->td_flags & TDF_RUNNING) == 0,
	209	("lwkt_alloc_thread: unexpected NULL or corrupted td"));
	210	TAILQ_REMOVE(&mycpu->gd_tdfreeq, td, td_threadq);
	211	crit_exit();
	212	stack = td->td_kstack;
	213	flags = td->td_flags & (TDF_ALLOCATED_STACK\|TDF_ALLOCATED_THREAD);
	214	} else {
	215	crit_exit();
	216	#ifdef _KERNEL
	217	td = zalloc(thread_zone);
	218	#else
	219	td = malloc(sizeof(struct thread));
	220	#endif
	221	td->td_kstack = NULL;
	222	flags \|= TDF_ALLOCATED_THREAD;
	223	}
	224	}
	225	if ((stack = td->td_kstack) == NULL) {
	226	#ifdef _KERNEL
	227	stack = (void *)kmem_alloc(kernel_map, THREAD_STACK);
	228	#else
	229	stack = libcaps_alloc_stack(THREAD_STACK);
	230	#endif
	231	flags \|= TDF_ALLOCATED_STACK;
	232	}
	233	if (cpu < 0)
	234	lwkt_init_thread(td, stack, flags, mycpu);
	235	else
	236	lwkt_init_thread(td, stack, flags, globaldata_find(cpu));
	237	return(td);
	238	}
	239
	240	#ifdef _KERNEL
	241
	242	/*
	243	* Initialize a preexisting thread structure. This function is used by
	244	* lwkt_alloc_thread() and also used to initialize the per-cpu idlethread.
	245	*
	246	* All threads start out in a critical section at a priority of
	247	* TDPRI_KERN_DAEMON. Higher level code will modify the priority as
	248	* appropriate. This function may send an IPI message when the
	249	* requested cpu is not the current cpu and consequently gd_tdallq may
	250	* not be initialized synchronously from the point of view of the originating
	251	* cpu.
	252	*
	253	* NOTE! we have to be careful in regards to creating threads for other cpus
	254	* if SMP has not yet been activated.
	255	*/
	256	static void
	257	lwkt_init_thread_remote(void *arg)
	258	{
	259	thread_t td = arg;
	260
	261	TAILQ_INSERT_TAIL(&td->td_gd->gd_tdallq, td, td_allq);
	262	}
	263
	264	void
	265	lwkt_init_thread(thread_t td, void stack, int flags, struct globaldata gd)
	266	{
	267	bzero(td, sizeof(struct thread));
	268	td->td_kstack = stack;
	269	td->td_flags \|= flags;
	270	td->td_gd = gd;
	271	td->td_pri = TDPRI_KERN_DAEMON + TDPRI_CRIT;
	272	lwkt_initport(&td->td_msgport, td);
	273	pmap_init_thread(td);
	274	if (smp_active == 0 \|\| gd == mycpu) {
	275	crit_enter();
	276	TAILQ_INSERT_TAIL(&gd->gd_tdallq, td, td_allq);
	277	crit_exit();
	278	} else {
	279	lwkt_send_ipiq(gd->gd_cpuid, lwkt_init_thread_remote, td);
	280	}
	281	}
	282
	283	#endif /* _KERNEL */
	284
	285	void
	286	lwkt_set_comm(thread_t td, const char *ctl, ...)
	287	{
	288	__va_list va;
	289
	290	__va_start(va, ctl);
	291	vsnprintf(td->td_comm, sizeof(td->td_comm), ctl, va);
	292	__va_end(va);
	293	}
	294
	295	void
	296	lwkt_hold(thread_t td)
	297	{
	298	++td->td_refs;
	299	}
	300
	301	void
	302	lwkt_rele(thread_t td)
	303	{
	304	KKASSERT(td->td_refs > 0);
	305	--td->td_refs;
	306	}
	307
	308	#ifdef _KERNEL
	309
	310	void
	311	lwkt_wait_free(thread_t td)
	312	{
	313	while (td->td_refs)
	314	tsleep(td, 0, "tdreap", hz);
	315	}
	316
	317	#endif
	318
	319	void
	320	lwkt_free_thread(thread_t td)
	321	{
	322	struct globaldata *gd = mycpu;
	323
	324	KASSERT((td->td_flags & TDF_RUNNING) == 0,
	325	("lwkt_free_thread: did not exit! %p", td));
	326
	327	crit_enter();
	328	TAILQ_REMOVE(&gd->gd_tdallq, td, td_allq);
	329	if (gd->gd_tdfreecount < CACHE_NTHREADS &&
	330	(td->td_flags & TDF_ALLOCATED_THREAD)
	331	) {
	332	++gd->gd_tdfreecount;
	333	TAILQ_INSERT_HEAD(&gd->gd_tdfreeq, td, td_threadq);
	334	crit_exit();
	335	} else {
	336	crit_exit();
	337	if (td->td_kstack && (td->td_flags & TDF_ALLOCATED_STACK)) {
	338	#ifdef _KERNEL
	339	kmem_free(kernel_map, (vm_offset_t)td->td_kstack, THREAD_STACK);
	340	#else
	341	libcaps_free_stack(td->td_kstack, THREAD_STACK);
	342	#endif
	343	/* gd invalid */
	344	td->td_kstack = NULL;
	345	}
	346	if (td->td_flags & TDF_ALLOCATED_THREAD) {
	347	#ifdef _KERNEL
	348	zfree(thread_zone, td);
	349	#else
	350	free(td);
	351	#endif
	352	}
	353	}
	354	}
	355
	356
	357	/*
	358	* Switch to the next runnable lwkt. If no LWKTs are runnable then
	359	* switch to the idlethread. Switching must occur within a critical
	360	* section to avoid races with the scheduling queue.
	361	*
	362	* We always have full control over our cpu's run queue. Other cpus
	363	* that wish to manipulate our queue must use the cpu_*msg() calls to
	364	* talk to our cpu, so a critical section is all that is needed and
	365	* the result is very, very fast thread switching.
	366	*
	367	* The LWKT scheduler uses a fixed priority model and round-robins at
	368	* each priority level. User process scheduling is a totally
	369	* different beast and LWKT priorities should not be confused with
	370	* user process priorities.
	371	*
	372	* The MP lock may be out of sync with the thread's td_mpcount. lwkt_switch()
	373	* cleans it up. Note that the td_switch() function cannot do anything that
	374	* requires the MP lock since the MP lock will have already been setup for
	375	* the target thread (not the current thread). It's nice to have a scheduler
	376	* that does not need the MP lock to work because it allows us to do some
	377	* really cool high-performance MP lock optimizations.
	378	*/
	379
	380	void
	381	lwkt_switch(void)
	382	{
	383	struct globaldata *gd;
	384	thread_t td = curthread;
	385	thread_t ntd;
	386	#ifdef SMP
	387	int mpheld;
	388	#endif
	389
	390	/*
	391	* Switching from within a 'fast' (non thread switched) interrupt is
	392	* illegal.
	393	*/
	394	if (mycpu->gd_intr_nesting_level && panicstr == NULL) {
	395	panic("lwkt_switch: cannot switch from within a fast interrupt, yet\n");
	396	}
	397
	398	/*
	399	* Passive release (used to transition from user to kernel mode
	400	* when we block or switch rather then when we enter the kernel).
	401	* This function is NOT called if we are switching into a preemption
	402	* or returning from a preemption. Typically this causes us to lose
	403	* our P_CURPROC designation (if we have one) and become a true LWKT
	404	* thread, and may also hand P_CURPROC to another process and schedule
	405	* its thread.
	406	*/
	407	if (td->td_release)
	408	td->td_release(td);
	409
	410	crit_enter();
	411	++switch_count;
	412
	413	#ifdef SMP
	414	/*
	415	* td_mpcount cannot be used to determine if we currently hold the
	416	* MP lock because get_mplock() will increment it prior to attempting
	417	* to get the lock, and switch out if it can't. Our ownership of
	418	* the actual lock will remain stable while we are in a critical section
	419	* (but, of course, another cpu may own or release the lock so the
	420	* actual value of mp_lock is not stable).
	421	*/
	422	mpheld = MP_LOCK_HELD();
	423	#endif
	424	if ((ntd = td->td_preempted) != NULL) {
	425	/*
	426	* We had preempted another thread on this cpu, resume the preempted
	427	* thread. This occurs transparently, whether the preempted thread
	428	* was scheduled or not (it may have been preempted after descheduling
	429	* itself).
	430	*
	431	* We have to setup the MP lock for the original thread after backing
	432	* out the adjustment that was made to curthread when the original
	433	* was preempted.
	434	*/
	435	KKASSERT(ntd->td_flags & TDF_PREEMPT_LOCK);
	436	#ifdef SMP
	437	if (ntd->td_mpcount && mpheld == 0) {
	438	panic("MPLOCK NOT HELD ON RETURN: %p %p %d %d\n",
	439	td, ntd, td->td_mpcount, ntd->td_mpcount);
	440	}
	441	if (ntd->td_mpcount) {
	442	td->td_mpcount -= ntd->td_mpcount;
	443	KKASSERT(td->td_mpcount >= 0);
	444	}
	445	#endif
	446	ntd->td_flags \|= TDF_PREEMPT_DONE;
	447	/* YYY release mp lock on switchback if original doesn't need it */
	448	} else {
	449	/*
	450	* Priority queue / round-robin at each priority. Note that user
	451	* processes run at a fixed, low priority and the user process
	452	* scheduler deals with interactions between user processes
	453	* by scheduling and descheduling them from the LWKT queue as
	454	* necessary.
	455	*
	456	* We have to adjust the MP lock for the target thread. If we
	457	* need the MP lock and cannot obtain it we try to locate a
	458	* thread that does not need the MP lock.
	459	*/
	460	gd = mycpu;
	461	again:
	462	if (gd->gd_runqmask) {
	463	int nq = bsrl(gd->gd_runqmask);
	464	if ((ntd = TAILQ_FIRST(&gd->gd_tdrunq[nq])) == NULL) {
	465	gd->gd_runqmask &= ~(1 << nq);
	466	goto again;
	467	}
	468	#ifdef SMP
	469	if (ntd->td_mpcount && mpheld == 0 && !cpu_try_mplock()) {
	470	/*
	471	* Target needs MP lock and we couldn't get it, try
	472	* to locate a thread which does not need the MP lock
	473	* to run. If we cannot locate a thread spin in idle.
	474	*/
	475	u_int32_t rqmask = gd->gd_runqmask;
	476	while (rqmask) {
	477	TAILQ_FOREACH(ntd, &gd->gd_tdrunq[nq], td_threadq) {
	478	if (ntd->td_mpcount == 0)
	479	break;
	480	}
	481	if (ntd)
	482	break;
	483	rqmask &= ~(1 << nq);
	484	nq = bsrl(rqmask);
	485	}
	486	if (ntd == NULL) {
	487	ntd = &gd->gd_idlethread;
	488	ntd->td_flags \|= TDF_IDLE_NOHLT;
	489	} else {
	490	TAILQ_REMOVE(&gd->gd_tdrunq[nq], ntd, td_threadq);
	491	TAILQ_INSERT_TAIL(&gd->gd_tdrunq[nq], ntd, td_threadq);
	492	}
	493	} else {
	494	TAILQ_REMOVE(&gd->gd_tdrunq[nq], ntd, td_threadq);
	495	TAILQ_INSERT_TAIL(&gd->gd_tdrunq[nq], ntd, td_threadq);
	496	}
	497	#else
	498	TAILQ_REMOVE(&gd->gd_tdrunq[nq], ntd, td_threadq);
	499	TAILQ_INSERT_TAIL(&gd->gd_tdrunq[nq], ntd, td_threadq);
	500	#endif
	501	} else {
	502	/*
	503	* We have nothing to run but only let the idle loop halt
	504	* the cpu if there are no pending interrupts.
	505	*/
	506	ntd = &gd->gd_idlethread;
	507	if (gd->gd_reqflags & RQF_IDLECHECK_MASK)
	508	ntd->td_flags \|= TDF_IDLE_NOHLT;
	509	}
	510	}
	511	KASSERT(ntd->td_pri >= TDPRI_CRIT,
	512	("priority problem in lwkt_switch %d %d", td->td_pri, ntd->td_pri));
	513
	514	/*
	515	* Do the actual switch. If the new target does not need the MP lock
	516	* and we are holding it, release the MP lock. If the new target requires
	517	* the MP lock we have already acquired it for the target.
	518	*/
	519	#ifdef SMP
	520	if (ntd->td_mpcount == 0 ) {
	521	if (MP_LOCK_HELD())
	522	cpu_rel_mplock();
	523	} else {
	524	ASSERT_MP_LOCK_HELD();
	525	}
	526	#endif
	527	if (td != ntd) {
	528	td->td_switch(ntd);
	529	}
	530
	531	crit_exit();
	532	}
	533
	534	/*
	535	* Switch if another thread has a higher priority. Do not switch to other
	536	* threads at the same priority.
	537	*/
	538	void
	539	lwkt_maybe_switch()
	540	{
	541	struct globaldata *gd = mycpu;
	542	struct thread *td = gd->gd_curthread;
	543
	544	if ((td->td_pri & TDPRI_MASK) < bsrl(gd->gd_runqmask)) {
	545	lwkt_switch();
	546	}
	547	}
	548
	549	/*
	550	* Request that the target thread preempt the current thread. Preemption
	551	* only works under a specific set of conditions:
	552	*
	553	* - We are not preempting ourselves
	554	* - The target thread is owned by the current cpu
	555	* - We are not currently being preempted
	556	* - The target is not currently being preempted
	557	* - We are able to satisfy the target's MP lock requirements (if any).
	558	*
	559	* THE CALLER OF LWKT_PREEMPT() MUST BE IN A CRITICAL SECTION. Typically
	560	* this is called via lwkt_schedule() through the td_preemptable callback.
	561	* critpri is the managed critical priority that we should ignore in order
	562	* to determine whether preemption is possible (aka usually just the crit
	563	* priority of lwkt_schedule() itself).
	564	*
	565	* XXX at the moment we run the target thread in a critical section during
	566	* the preemption in order to prevent the target from taking interrupts
	567	* that WE can't. Preemption is strictly limited to interrupt threads
	568	* and interrupt-like threads, outside of a critical section, and the
	569	* preempted source thread will be resumed the instant the target blocks
	570	* whether or not the source is scheduled (i.e. preemption is supposed to
	571	* be as transparent as possible).
	572	*
	573	* The target thread inherits our MP count (added to its own) for the
	574	* duration of the preemption in order to preserve the atomicy of the
	575	* MP lock during the preemption. Therefore, any preempting targets must be
	576	* careful in regards to MP assertions. Note that the MP count may be
	577	* out of sync with the physical mp_lock, but we do not have to preserve
	578	* the original ownership of the lock if it was out of synch (that is, we
	579	* can leave it synchronized on return).
	580	*/
	581	void
	582	lwkt_preempt(thread_t ntd, int critpri)
	583	{
	584	struct globaldata *gd = mycpu;
	585	thread_t td = gd->gd_curthread;
	586	#ifdef SMP
	587	int mpheld;
	588	int savecnt;
	589	#endif
	590
	591	/*
	592	* The caller has put us in a critical section. We can only preempt
	593	* if the caller of the caller was not in a critical section (basically
	594	* a local interrupt), as determined by the 'critpri' parameter. If
	595	* we are unable to preempt
	596	*
	597	* YYY The target thread must be in a critical section (else it must
	598	* inherit our critical section? I dunno yet).
	599	*/
	600	KASSERT(ntd->td_pri >= TDPRI_CRIT, ("BADCRIT0 %d", ntd->td_pri));
	601
	602	need_resched();
	603	if (!_lwkt_wantresched(ntd, td)) {
	604	++preempt_miss;
	605	return;
	606	}
	607	if ((td->td_pri & ~TDPRI_MASK) > critpri) {
	608	++preempt_miss;
	609	return;
	610	}
	611	#ifdef SMP
	612	if (ntd->td_gd != gd) {
	613	++preempt_miss;
	614	return;
	615	}
	616	#endif
	617	if (td == ntd \|\| ((td->td_flags \| ntd->td_flags) & TDF_PREEMPT_LOCK)) {
	618	++preempt_weird;
	619	return;
	620	}
	621	if (ntd->td_preempted) {
	622	++preempt_hit;
	623	return;
	624	}
	625	#ifdef SMP
	626	/*
	627	* note: an interrupt might have occured just as we were transitioning
	628	* to or from the MP lock. In this case td_mpcount will be pre-disposed
	629	* (non-zero) but not actually synchronized with the actual state of the
	630	* lock. We can use it to imply an MP lock requirement for the
	631	* preemption but we cannot use it to test whether we hold the MP lock
	632	* or not.
	633	*/
	634	savecnt = td->td_mpcount;
	635	mpheld = MP_LOCK_HELD();
	636	ntd->td_mpcount += td->td_mpcount;
	637	if (mpheld == 0 && ntd->td_mpcount && !cpu_try_mplock()) {
	638	ntd->td_mpcount -= td->td_mpcount;
	639	++preempt_miss;
	640	return;
	641	}
	642	#endif
	643
	644	++preempt_hit;
	645	ntd->td_preempted = td;
	646	td->td_flags \|= TDF_PREEMPT_LOCK;
	647	td->td_switch(ntd);
	648	KKASSERT(ntd->td_preempted && (td->td_flags & TDF_PREEMPT_DONE));
	649	#ifdef SMP
	650	KKASSERT(savecnt == td->td_mpcount);
	651	mpheld = MP_LOCK_HELD();
	652	if (mpheld && td->td_mpcount == 0)
	653	cpu_rel_mplock();
	654	else if (mpheld == 0 && td->td_mpcount)
	655	panic("lwkt_preempt(): MP lock was not held through");
	656	#endif
	657	ntd->td_preempted = NULL;
	658	td->td_flags &= ~(TDF_PREEMPT_LOCK\|TDF_PREEMPT_DONE);
	659	}
	660
	661	/*
	662	* Yield our thread while higher priority threads are pending. This is
	663	* typically called when we leave a critical section but it can be safely
	664	* called while we are in a critical section.
	665	*
	666	* This function will not generally yield to equal priority threads but it
	667	* can occur as a side effect. Note that lwkt_switch() is called from
	668	* inside the critical section to prevent its own crit_exit() from reentering
	669	* lwkt_yield_quick().
	670	*
	671	* gd_reqflags indicates that something changed, e.g. an interrupt or softint
	672	* came along but was blocked and made pending.
	673	*
	674	* (self contained on a per cpu basis)
	675	*/
	676	void
	677	lwkt_yield_quick(void)
	678	{
	679	globaldata_t gd = mycpu;
	680	thread_t td = gd->gd_curthread;
	681
	682	/*
	683	* gd_reqflags is cleared in splz if the cpl is 0. If we were to clear
	684	* it with a non-zero cpl then we might not wind up calling splz after
	685	* a task switch when the critical section is exited even though the
	686	* new task could accept the interrupt.
	687	*
	688	* XXX from crit_exit() only called after last crit section is released.
	689	* If called directly will run splz() even if in a critical section.
	690	*
	691	* td_nest_count prevent deep nesting via splz() or doreti(). Note that
	692	* except for this special case, we MUST call splz() here to handle any
	693	* pending ints, particularly after we switch, or we might accidently
	694	* halt the cpu with interrupts pending.
	695	*/
	696	if (gd->gd_reqflags && td->td_nest_count < 2)
	697	splz();
	698
	699	/*
	700	* YYY enabling will cause wakeup() to task-switch, which really
	701	* confused the old 4.x code. This is a good way to simulate
	702	* preemption and MP without actually doing preemption or MP, because a
	703	* lot of code assumes that wakeup() does not block.
	704	*/
	705	if (untimely_switch && td->td_nest_count == 0 &&
	706	gd->gd_intr_nesting_level == 0
	707	) {
	708	crit_enter();
	709	/*
	710	* YYY temporary hacks until we disassociate the userland scheduler
	711	* from the LWKT scheduler.
	712	*/
	713	if (td->td_flags & TDF_RUNQ) {
	714	lwkt_switch(); /* will not reenter yield function */
	715	} else {
	716	lwkt_schedule_self(); /* make sure we are scheduled */
	717	lwkt_switch(); /* will not reenter yield function */
	718	lwkt_deschedule_self(); /* make sure we are descheduled */
	719	}
	720	crit_exit_noyield(td);
	721	}
	722	}
	723
	724	/*
	725	* This implements a normal yield which, unlike _quick, will yield to equal
	726	* priority threads as well. Note that gd_reqflags tests will be handled by
	727	* the crit_exit() call in lwkt_switch().
	728	*
	729	* (self contained on a per cpu basis)
	730	*/
	731	void
	732	lwkt_yield(void)
	733	{
	734	lwkt_schedule_self();
	735	lwkt_switch();
	736	}
	737
	738	/*
	739	* Schedule a thread to run. As the current thread we can always safely
	740	* schedule ourselves, and a shortcut procedure is provided for that
	741	* function.
	742	*
	743	* (non-blocking, self contained on a per cpu basis)
	744	*/
	745	void
	746	lwkt_schedule_self(void)
	747	{
	748	thread_t td = curthread;
	749
	750	crit_enter();
	751	KASSERT(td->td_wait == NULL, ("lwkt_schedule_self(): td_wait not NULL!"));
	752	_lwkt_enqueue(td);
	753	#ifdef _KERNEL
	754	if (td->td_proc && td->td_proc->p_stat == SSLEEP)
	755	panic("SCHED SELF PANIC");
	756	#endif
	757	crit_exit();
	758	}
	759
	760	/*
	761	* Generic schedule. Possibly schedule threads belonging to other cpus and
	762	* deal with threads that might be blocked on a wait queue.
	763	*
	764	* YYY this is one of the best places to implement load balancing code.
	765	* Load balancing can be accomplished by requesting other sorts of actions
	766	* for the thread in question.
	767	*/
	768	void
	769	lwkt_schedule(thread_t td)
	770	{
	771	#ifdef INVARIANTS
	772	if ((td->td_flags & TDF_PREEMPT_LOCK) == 0 && td->td_proc
	773	&& td->td_proc->p_stat == SSLEEP
	774	) {
	775	printf("PANIC schedule curtd = %p (%d %d) target %p (%d %d)\n",
	776	curthread,
	777	curthread->td_proc ? curthread->td_proc->p_pid : -1,
	778	curthread->td_proc ? curthread->td_proc->p_stat : -1,
	779	td,
	780	td->td_proc ? curthread->td_proc->p_pid : -1,
	781	td->td_proc ? curthread->td_proc->p_stat : -1
	782	);
	783	panic("SCHED PANIC");
	784	}
	785	#endif
	786	crit_enter();
	787	if (td == curthread) {
	788	_lwkt_enqueue(td);
	789	} else {
	790	lwkt_wait_t w;
	791
	792	/*
	793	* If the thread is on a wait list we have to send our scheduling
	794	* request to the owner of the wait structure. Otherwise we send
	795	* the scheduling request to the cpu owning the thread. Races
	796	* are ok, the target will forward the message as necessary (the
	797	* message may chase the thread around before it finally gets
	798	* acted upon).
	799	*
	800	* (remember, wait structures use stable storage)
	801	*/
	802	if ((w = td->td_wait) != NULL) {
	803	if (lwkt_trytoken(&w->wa_token)) {
	804	TAILQ_REMOVE(&w->wa_waitq, td, td_threadq);
	805	--w->wa_count;
	806	td->td_wait = NULL;
	807	if (smp_active == 0 \|\| td->td_gd == mycpu) {
	808	_lwkt_enqueue(td);
	809	if (td->td_preemptable) {
	810	td->td_preemptable(td, TDPRI_CRIT2); / YYY +token */
	811	} else if (_lwkt_wantresched(td, curthread)) {
	812	need_resched();
	813	}
	814	} else {
	815	lwkt_send_ipiq(td->td_gd->gd_cpuid, (ipifunc_t)lwkt_schedule, td);
	816	}
	817	lwkt_reltoken(&w->wa_token);
	818	} else {
	819	lwkt_send_ipiq(w->wa_token.t_cpu, (ipifunc_t)lwkt_schedule, td);
	820	}
	821	} else {
	822	/*
	823	* If the wait structure is NULL and we own the thread, there
	824	* is no race (since we are in a critical section). If we
	825	* do not own the thread there might be a race but the
	826	* target cpu will deal with it.
	827	*/
	828	if (smp_active == 0 \|\| td->td_gd == mycpu) {
	829	_lwkt_enqueue(td);
	830	if (td->td_preemptable) {
	831	td->td_preemptable(td, TDPRI_CRIT);
	832	} else if (_lwkt_wantresched(td, curthread)) {
	833	need_resched();
	834	}
	835	} else {
	836	lwkt_send_ipiq(td->td_gd->gd_cpuid, (ipifunc_t)lwkt_schedule, td);
	837	}
	838	}
	839	}
	840	crit_exit();
	841	}
	842
	843	/*
	844	* Managed acquisition. This code assumes that the MP lock is held for
	845	* the tdallq operation and that the thread has been descheduled from its
	846	* original cpu. We also have to wait for the thread to be entirely switched
	847	* out on its original cpu (this is usually fast enough that we never loop)
	848	* since the LWKT system does not have to hold the MP lock while switching
	849	* and the target may have released it before switching.
	850	*/
	851	void
	852	lwkt_acquire(thread_t td)
	853	{
	854	struct globaldata *gd;
	855
	856	gd = td->td_gd;
	857	KKASSERT((td->td_flags & TDF_RUNQ) == 0);
	858	while (td->td_flags & TDF_RUNNING) /* XXX spin */
	859	;
	860	if (gd != mycpu) {
	861	crit_enter();
	862	TAILQ_REMOVE(&gd->gd_tdallq, td, td_allq); /* protected by BGL */
	863	gd = mycpu;
	864	td->td_gd = gd;
	865	TAILQ_INSERT_TAIL(&gd->gd_tdallq, td, td_allq); /* protected by BGL */
	866	crit_exit();
	867	}
	868	}
	869
	870	/*
	871	* Deschedule a thread.
	872	*
	873	* (non-blocking, self contained on a per cpu basis)
	874	*/
	875	void
	876	lwkt_deschedule_self(void)
	877	{
	878	thread_t td = curthread;
	879
	880	crit_enter();
	881	KASSERT(td->td_wait == NULL, ("lwkt_schedule_self(): td_wait not NULL!"));
	882	_lwkt_dequeue(td);
	883	crit_exit();
	884	}
	885
	886	/*
	887	* Generic deschedule. Descheduling threads other then your own should be
	888	* done only in carefully controlled circumstances. Descheduling is
	889	* asynchronous.
	890	*
	891	* This function may block if the cpu has run out of messages.
	892	*/
	893	void
	894	lwkt_deschedule(thread_t td)
	895	{
	896	crit_enter();
	897	if (td == curthread) {
	898	_lwkt_dequeue(td);
	899	} else {
	900	if (td->td_gd == mycpu) {
	901	_lwkt_dequeue(td);
	902	} else {
	903	lwkt_send_ipiq(td->td_gd->gd_cpuid, (ipifunc_t)lwkt_deschedule, td);
	904	}
	905	}
	906	crit_exit();
	907	}
	908
	909	/*
	910	* Set the target thread's priority. This routine does not automatically
	911	* switch to a higher priority thread, LWKT threads are not designed for
	912	* continuous priority changes. Yield if you want to switch.
	913	*
	914	* We have to retain the critical section count which uses the high bits
	915	* of the td_pri field. The specified priority may also indicate zero or
	916	* more critical sections by adding TDPRI_CRIT*N.
	917	*/
	918	void
	919	lwkt_setpri(thread_t td, int pri)
	920	{
	921	KKASSERT(pri >= 0);
	922	KKASSERT(td->td_gd == mycpu);
	923	crit_enter();
	924	if (td->td_flags & TDF_RUNQ) {
	925	_lwkt_dequeue(td);
	926	td->td_pri = (td->td_pri & ~TDPRI_MASK) + pri;
	927	_lwkt_enqueue(td);
	928	} else {
	929	td->td_pri = (td->td_pri & ~TDPRI_MASK) + pri;
	930	}
	931	crit_exit();
	932	}
	933
	934	void
	935	lwkt_setpri_self(int pri)
	936	{
	937	thread_t td = curthread;
	938
	939	KKASSERT(pri >= 0 && pri <= TDPRI_MAX);
	940	crit_enter();
	941	if (td->td_flags & TDF_RUNQ) {
	942	_lwkt_dequeue(td);
	943	td->td_pri = (td->td_pri & ~TDPRI_MASK) + pri;
	944	_lwkt_enqueue(td);
	945	} else {
	946	td->td_pri = (td->td_pri & ~TDPRI_MASK) + pri;
	947	}
	948	crit_exit();
	949	}
	950
	951	struct proc *
	952	lwkt_preempted_proc(void)
	953	{
	954	thread_t td = curthread;
	955	while (td->td_preempted)
	956	td = td->td_preempted;
	957	return(td->td_proc);
	958	}
	959
	960	#if 0
	961
	962	/*
	963	* This function deschedules the current thread and blocks on the specified
	964	* wait queue. We obtain ownership of the wait queue in order to block
	965	* on it. A generation number is used to interlock the wait queue in case
	966	* it gets signalled while we are blocked waiting on the token.
	967	*
	968	* Note: alternatively we could dequeue our thread and then message the
	969	* target cpu owning the wait queue. YYY implement as sysctl.
	970	*
	971	* Note: wait queue signals normally ping-pong the cpu as an optimization.
	972	*/
	973
	974	void
	975	lwkt_block(lwkt_wait_t w, const char wmesg, int gen)
	976	{
	977	thread_t td = curthread;
	978
	979	lwkt_gettoken(&w->wa_token);
	980	if (w->wa_gen == *gen) {
	981	_lwkt_dequeue(td);
	982	TAILQ_INSERT_TAIL(&w->wa_waitq, td, td_threadq);
	983	++w->wa_count;
	984	td->td_wait = w;
	985	td->td_wmesg = wmesg;
	986	again:
	987	lwkt_switch();
	988	lwkt_regettoken(&w->wa_token);
	989	if (td->td_wmesg != NULL) {
	990	_lwkt_dequeue(td);
	991	goto again;
	992	}
	993	}
	994	/* token might be lost, doesn't matter for gen update */
	995	*gen = w->wa_gen;
	996	lwkt_reltoken(&w->wa_token);
	997	}
	998
	999	/*
	1000	* Signal a wait queue. We gain ownership of the wait queue in order to
	1001	* signal it. Once a thread is removed from the wait queue we have to
	1002	* deal with the cpu owning the thread.
	1003	*
	1004	* Note: alternatively we could message the target cpu owning the wait
	1005	* queue. YYY implement as sysctl.
	1006	*/
	1007	void
	1008	lwkt_signal(lwkt_wait_t w, int count)
	1009	{
	1010	thread_t td;
	1011	int count;
	1012
	1013	lwkt_gettoken(&w->wa_token);
	1014	++w->wa_gen;
	1015	if (count < 0)
	1016	count = w->wa_count;
	1017	while ((td = TAILQ_FIRST(&w->wa_waitq)) != NULL && count) {
	1018	--count;
	1019	--w->wa_count;
	1020	TAILQ_REMOVE(&w->wa_waitq, td, td_threadq);
	1021	td->td_wait = NULL;
	1022	td->td_wmesg = NULL;
	1023	if (td->td_gd == mycpu) {
	1024	_lwkt_enqueue(td);
	1025	} else {
	1026	lwkt_send_ipiq(td->td_gd->gd_cpuid, (ipifunc_t)lwkt_schedule, td);
	1027	}
	1028	lwkt_regettoken(&w->wa_token);
	1029	}
	1030	lwkt_reltoken(&w->wa_token);
	1031	}
	1032
	1033	#endif
	1034
	1035	/*
	1036	* Create a kernel process/thread/whatever. It shares it's address space
	1037	* with proc0 - ie: kernel only.
	1038	*
	1039	* NOTE! By default new threads are created with the MP lock held. A
	1040	* thread which does not require the MP lock should release it by calling
	1041	* rel_mplock() at the start of the new thread.
	1042	*/
	1043	int
	1044	lwkt_create(void (func)(void ), void *arg,
	1045	struct thread **tdp, thread_t template, int tdflags, int cpu,
	1046	const char *fmt, ...)
	1047	{
	1048	thread_t td;
	1049	__va_list ap;
	1050
	1051	td = lwkt_alloc_thread(template, cpu);
	1052	if (tdp)
	1053	*tdp = td;
	1054	cpu_set_thread_handler(td, lwkt_exit, func, arg);
	1055	td->td_flags \|= TDF_VERBOSE \| tdflags;
	1056	#ifdef SMP
	1057	td->td_mpcount = 1;
	1058	#endif
	1059
	1060	/*
	1061	* Set up arg0 for 'ps' etc
	1062	*/
	1063	__va_start(ap, fmt);
	1064	vsnprintf(td->td_comm, sizeof(td->td_comm), fmt, ap);
	1065	__va_end(ap);
	1066
	1067	/*
	1068	* Schedule the thread to run
	1069	*/
	1070	if ((td->td_flags & TDF_STOPREQ) == 0)
	1071	lwkt_schedule(td);
	1072	else
	1073	td->td_flags &= ~TDF_STOPREQ;
	1074	return 0;
	1075	}
	1076
	1077	/*
	1078	* kthread_* is specific to the kernel and is not needed by userland.
	1079	*/
	1080	#ifdef _KERNEL
	1081
	1082	/*
	1083	* Destroy an LWKT thread. Warning! This function is not called when
	1084	* a process exits, cpu_proc_exit() directly calls cpu_thread_exit() and
	1085	* uses a different reaping mechanism.
	1086	*/
	1087	void
	1088	lwkt_exit(void)
	1089	{
	1090	thread_t td = curthread;
	1091
	1092	if (td->td_flags & TDF_VERBOSE)
	1093	printf("kthread %p %s has exited\n", td, td->td_comm);
	1094	caps_exit(td);
	1095	crit_enter();
	1096	lwkt_deschedule_self();
	1097	++mycpu->gd_tdfreecount;
	1098	TAILQ_INSERT_TAIL(&mycpu->gd_tdfreeq, td, td_threadq);
	1099	cpu_thread_exit();
	1100	}
	1101
	1102	/*
	1103	* Create a kernel process/thread/whatever. It shares it's address space
	1104	* with proc0 - ie: kernel only. 5.x compatible.
	1105	*
	1106	* NOTE! By default kthreads are created with the MP lock held. A
	1107	* thread which does not require the MP lock should release it by calling
	1108	* rel_mplock() at the start of the new thread.
	1109	*/
	1110	int
	1111	kthread_create(void (func)(void ), void *arg,
	1112	struct thread *tdp, const char fmt, ...)
	1113	{
	1114	thread_t td;
	1115	__va_list ap;
	1116
	1117	td = lwkt_alloc_thread(NULL, -1);
	1118	if (tdp)
	1119	*tdp = td;
	1120	cpu_set_thread_handler(td, kthread_exit, func, arg);
	1121	td->td_flags \|= TDF_VERBOSE;
	1122	#ifdef SMP
	1123	td->td_mpcount = 1;
	1124	#endif
	1125
	1126	/*
	1127	* Set up arg0 for 'ps' etc
	1128	*/
	1129	__va_start(ap, fmt);
	1130	vsnprintf(td->td_comm, sizeof(td->td_comm), fmt, ap);
	1131	__va_end(ap);
	1132
	1133	/*
	1134	* Schedule the thread to run
	1135	*/
	1136	lwkt_schedule(td);
	1137	return 0;
	1138	}
	1139
	1140	/*
	1141	* Destroy an LWKT thread. Warning! This function is not called when
	1142	* a process exits, cpu_proc_exit() directly calls cpu_thread_exit() and
	1143	* uses a different reaping mechanism.
	1144	*
	1145	* XXX duplicates lwkt_exit()
	1146	*/
	1147	void
	1148	kthread_exit(void)
	1149	{
	1150	lwkt_exit();
	1151	}
	1152
	1153	#endif /* _KERNEL */
	1154
	1155	void
	1156	crit_panic(void)
	1157	{
	1158	thread_t td = curthread;
	1159	int lpri = td->td_pri;
	1160
	1161	td->td_pri = 0;
	1162	panic("td_pri is/would-go negative! %p %d", td, lpri);
	1163	}
	1164
	1165	#ifdef SMP
	1166
	1167	/*
	1168	* Send a function execution request to another cpu. The request is queued
	1169	* on the cpu<->cpu ipiq matrix. Each cpu owns a unique ipiq FIFO for every
	1170	* possible target cpu. The FIFO can be written.
	1171	*
	1172	* YYY If the FIFO fills up we have to enable interrupts and process the
	1173	* IPIQ while waiting for it to empty or we may deadlock with another cpu.
	1174	* Create a CPU_*() function to do this!
	1175	*
	1176	* We can safely bump gd_intr_nesting_level because our crit_exit() at the
	1177	* end will take care of any pending interrupts.
	1178	*
	1179	* Must be called from a critical section.
	1180	*/
	1181	int
	1182	lwkt_send_ipiq(int dcpu, ipifunc_t func, void *arg)
	1183	{
	1184	lwkt_ipiq_t ip;
	1185	int windex;
	1186	struct globaldata *gd = mycpu;
	1187
	1188	if (dcpu == gd->gd_cpuid) {
	1189	func(arg);
	1190	return(0);
	1191	}
	1192	crit_enter();
	1193	++gd->gd_intr_nesting_level;
	1194	#ifdef INVARIANTS
	1195	if (gd->gd_intr_nesting_level > 20)
	1196	panic("lwkt_send_ipiq: TOO HEAVILY NESTED!");
	1197	#endif
	1198	KKASSERT(curthread->td_pri >= TDPRI_CRIT);
	1199	KKASSERT(dcpu >= 0 && dcpu < ncpus);
	1200	++ipiq_count;
	1201	ip = &gd->gd_ipiq[dcpu];
	1202
	1203	/*
	1204	* We always drain before the FIFO becomes full so it should never
	1205	* become full. We need to leave enough entries to deal with
	1206	* reentrancy.
	1207	*/
	1208	KKASSERT(ip->ip_windex - ip->ip_rindex != MAXCPUFIFO);
	1209	windex = ip->ip_windex & MAXCPUFIFO_MASK;
	1210	ip->ip_func[windex] = func;
	1211	ip->ip_arg[windex] = arg;
	1212	/* YYY memory barrier */
	1213	++ip->ip_windex;
	1214	if (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 2) {
	1215	unsigned int eflags = read_eflags();
	1216	cpu_enable_intr();
	1217	++ipiq_fifofull;
	1218	while (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 4) {
	1219	KKASSERT(ip->ip_windex - ip->ip_rindex != MAXCPUFIFO - 1);
	1220	lwkt_process_ipiq();
	1221	}
	1222	write_eflags(eflags);
	1223	}
	1224	--gd->gd_intr_nesting_level;
	1225	cpu_send_ipiq(dcpu); /* issues memory barrier if appropriate */
	1226	crit_exit();
	1227	return(ip->ip_windex);
	1228	}
	1229
	1230	/*
	1231	* Send a message to several target cpus. Typically used for scheduling.
	1232	* The message will not be sent to stopped cpus.
	1233	*/
	1234	void
	1235	lwkt_send_ipiq_mask(u_int32_t mask, ipifunc_t func, void *arg)
	1236	{
	1237	int cpuid;
	1238
	1239	mask &= ~stopped_cpus;
	1240	while (mask) {
	1241	cpuid = bsfl(mask);
	1242	lwkt_send_ipiq(cpuid, func, arg);
	1243	mask &= ~(1 << cpuid);
	1244	}
	1245	}
	1246
	1247	/*
	1248	* Wait for the remote cpu to finish processing a function.
	1249	*
	1250	* YYY we have to enable interrupts and process the IPIQ while waiting
	1251	* for it to empty or we may deadlock with another cpu. Create a CPU_*()
	1252	* function to do this! YYY we really should 'block' here.
	1253	*
	1254	* Must be called from a critical section. Thsi routine may be called
	1255	* from an interrupt (for example, if an interrupt wakes a foreign thread
	1256	* up).
	1257	*/
	1258	void
	1259	lwkt_wait_ipiq(int dcpu, int seq)
	1260	{
	1261	lwkt_ipiq_t ip;
	1262	int maxc = 100000000;
	1263
	1264	if (dcpu != mycpu->gd_cpuid) {
	1265	KKASSERT(dcpu >= 0 && dcpu < ncpus);
	1266	ip = &mycpu->gd_ipiq[dcpu];
	1267	if ((int)(ip->ip_xindex - seq) < 0) {
	1268	unsigned int eflags = read_eflags();
	1269	cpu_enable_intr();
	1270	while ((int)(ip->ip_xindex - seq) < 0) {
	1271	lwkt_process_ipiq();
	1272	if (--maxc == 0)
	1273	printf("LWKT_WAIT_IPIQ WARNING! %d wait %d (%d)\n", mycpu->gd_cpuid, dcpu, ip->ip_xindex - seq);
	1274	if (maxc < -1000000)
	1275	panic("LWKT_WAIT_IPIQ");
	1276	}
	1277	write_eflags(eflags);
	1278	}
	1279	}
	1280	}
	1281
	1282	/*
	1283	* Called from IPI interrupt (like a fast interrupt), which has placed
	1284	* us in a critical section. The MP lock may or may not be held.
	1285	* May also be called from doreti or splz, or be reentrantly called
	1286	* indirectly through the ip_func[] we run.
	1287	*
	1288	* There are two versions, one where no interrupt frame is available (when
	1289	* called from the send code and from splz, and one where an interrupt
	1290	* frame is available.
	1291	*/
	1292	void
	1293	lwkt_process_ipiq(void)
	1294	{
	1295	int n;
	1296	int cpuid = mycpu->gd_cpuid;
	1297
	1298	for (n = 0; n < ncpus; ++n) {
	1299	lwkt_ipiq_t ip;
	1300	int ri;
	1301
	1302	if (n == cpuid)
	1303	continue;
	1304	ip = globaldata_find(n)->gd_ipiq;
	1305	if (ip == NULL)
	1306	continue;
	1307	ip = &ip[cpuid];
	1308
	1309	/*
	1310	* Note: xindex is only updated after we are sure the function has
	1311	* finished execution. Beware lwkt_process_ipiq() reentrancy! The
	1312	* function may send an IPI which may block/drain.
	1313	*/
	1314	while (ip->ip_rindex != ip->ip_windex) {
	1315	ri = ip->ip_rindex & MAXCPUFIFO_MASK;
	1316	++ip->ip_rindex;
	1317	ip->ip_func[ri](ip->ip_arg[ri], NULL);
	1318	/* YYY memory barrier */
	1319	ip->ip_xindex = ip->ip_rindex;
	1320	}
	1321	}
	1322	}
	1323
	1324	#ifdef _KERNEL
	1325	void
	1326	lwkt_process_ipiq_frame(struct intrframe frame)
	1327	{
	1328	int n;
	1329	int cpuid = mycpu->gd_cpuid;
	1330
	1331	for (n = 0; n < ncpus; ++n) {
	1332	lwkt_ipiq_t ip;
	1333	int ri;
	1334
	1335	if (n == cpuid)
	1336	continue;
	1337	ip = globaldata_find(n)->gd_ipiq;
	1338	if (ip == NULL)
	1339	continue;
	1340	ip = &ip[cpuid];
	1341
	1342	/*
	1343	* Note: xindex is only updated after we are sure the function has
	1344	* finished execution. Beware lwkt_process_ipiq() reentrancy! The
	1345	* function may send an IPI which may block/drain.
	1346	*/
	1347	while (ip->ip_rindex != ip->ip_windex) {
	1348	ri = ip->ip_rindex & MAXCPUFIFO_MASK;
	1349	++ip->ip_rindex;
	1350	ip->ip_func[ri](ip->ip_arg[ri], &frame);
	1351	/* YYY memory barrier */
	1352	ip->ip_xindex = ip->ip_rindex;
	1353	}
	1354	}
	1355	}
	1356	#endif
	1357
	1358	#else
	1359
	1360	int
	1361	lwkt_send_ipiq(int dcpu, ipifunc_t func, void *arg)
	1362	{
	1363	panic("lwkt_send_ipiq: UP box! (%d,%p,%p)", dcpu, func, arg);
	1364	return(0); /* NOT REACHED */
	1365	}
	1366
	1367	void
	1368	lwkt_wait_ipiq(int dcpu, int seq)
	1369	{
	1370	panic("lwkt_wait_ipiq: UP box! (%d,%d)", dcpu, seq);
	1371	}
	1372
	1373	#endif