gitweb.dragonflybsd.org Git - dragonfly.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 2003-2011 The DragonFly Project. All rights reserved.
	3	*
	4	* This code is derived from software contributed to The DragonFly Project
	5	* by Matthew Dillon <dillon@backplane.com>
	6	*
	7	* Redistribution and use in source and binary forms, with or without
	8	* modification, are permitted provided that the following conditions
	9	* are met:
	10	*
	11	* 1. Redistributions of source code must retain the above copyright
	12	* notice, this list of conditions and the following disclaimer.
	13	* 2. Redistributions in binary form must reproduce the above copyright
	14	* notice, this list of conditions and the following disclaimer in
	15	* the documentation and/or other materials provided with the
	16	* distribution.
	17	* 3. Neither the name of The DragonFly Project nor the names of its
	18	* contributors may be used to endorse or promote products derived
	19	* from this software without specific, prior written permission.
	20	*
	21	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
	22	* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
	23	* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
	24	* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
	25	* COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
	26	* INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
	27	* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
	28	* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
	29	* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
	30	* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
	31	* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	32	* SUCH DAMAGE.
	33	*/
	34
	35	/*
	36	* Each cpu in a system has its own self-contained light weight kernel
	37	* thread scheduler, which means that generally speaking we only need
	38	* to use a critical section to avoid problems. Foreign thread
	39	* scheduling is queued via (async) IPIs.
	40	*/
	41
	42	#include <sys/param.h>
	43	#include <sys/systm.h>
	44	#include <sys/kernel.h>
	45	#include <sys/proc.h>
	46	#include <sys/rtprio.h>
	47	#include <sys/kinfo.h>
	48	#include <sys/queue.h>
	49	#include <sys/sysctl.h>
	50	#include <sys/kthread.h>
	51	#include <machine/cpu.h>
	52	#include <sys/lock.h>
	53	#include <sys/caps.h>
	54	#include <sys/spinlock.h>
	55	#include <sys/ktr.h>
	56
	57	#include <sys/thread2.h>
	58	#include <sys/spinlock2.h>
	59	#include <sys/mplock2.h>
	60
	61	#include <sys/dsched.h>
	62
	63	#include <vm/vm.h>
	64	#include <vm/vm_param.h>
	65	#include <vm/vm_kern.h>
	66	#include <vm/vm_object.h>
	67	#include <vm/vm_page.h>
	68	#include <vm/vm_map.h>
	69	#include <vm/vm_pager.h>
	70	#include <vm/vm_extern.h>
	71
	72	#include <machine/stdarg.h>
	73	#include <machine/smp.h>
	74
	75	#if !defined(KTR_CTXSW)
	76	#define KTR_CTXSW KTR_ALL
	77	#endif
	78	KTR_INFO_MASTER(ctxsw);
	79	KTR_INFO(KTR_CTXSW, ctxsw, sw, 0, "#cpu[%d].td = %p",
	80	sizeof(int) + sizeof(struct thread *));
	81	KTR_INFO(KTR_CTXSW, ctxsw, pre, 1, "#cpu[%d].td = %p",
	82	sizeof(int) + sizeof(struct thread *));
	83	KTR_INFO(KTR_CTXSW, ctxsw, newtd, 2, "#threads[%p].name = %s",
	84	sizeof (struct thread ) + sizeof(char ));
	85	KTR_INFO(KTR_CTXSW, ctxsw, deadtd, 3, "#threads[%p].name = <dead>", sizeof (struct thread *));
	86
	87	static MALLOC_DEFINE(M_THREAD, "thread", "lwkt threads");
	88
	89	#ifdef INVARIANTS
	90	static int panic_on_cscount = 0;
	91	#endif
	92	static __int64_t switch_count = 0;
	93	static __int64_t preempt_hit = 0;
	94	static __int64_t preempt_miss = 0;
	95	static __int64_t preempt_weird = 0;
	96	static __int64_t token_contention_count[TDPRI_MAX+1] __debugvar;
	97	static int lwkt_use_spin_port;
	98	static struct objcache *thread_cache;
	99
	100	#ifdef SMP
	101	static void lwkt_schedule_remote(void arg, int arg2, struct intrframe frame);
	102	static void lwkt_setcpu_remote(void *arg);
	103	#endif
	104
	105	extern void cpu_heavy_restore(void);
	106	extern void cpu_lwkt_restore(void);
	107	extern void cpu_kthread_restore(void);
	108	extern void cpu_idle_restore(void);
	109
	110	/*
	111	* We can make all thread ports use the spin backend instead of the thread
	112	* backend. This should only be set to debug the spin backend.
	113	*/
	114	TUNABLE_INT("lwkt.use_spin_port", &lwkt_use_spin_port);
	115
	116	#ifdef INVARIANTS
	117	SYSCTL_INT(_lwkt, OID_AUTO, panic_on_cscount, CTLFLAG_RW, &panic_on_cscount, 0,
	118	"Panic if attempting to switch lwkt's while mastering cpusync");
	119	#endif
	120	SYSCTL_QUAD(_lwkt, OID_AUTO, switch_count, CTLFLAG_RW, &switch_count, 0,
	121	"Number of switched threads");
	122	SYSCTL_QUAD(_lwkt, OID_AUTO, preempt_hit, CTLFLAG_RW, &preempt_hit, 0,
	123	"Successful preemption events");
	124	SYSCTL_QUAD(_lwkt, OID_AUTO, preempt_miss, CTLFLAG_RW, &preempt_miss, 0,
	125	"Failed preemption events");
	126	SYSCTL_QUAD(_lwkt, OID_AUTO, preempt_weird, CTLFLAG_RW, &preempt_weird, 0,
	127	"Number of preempted threads.");
	128	#ifdef INVARIANTS
	129	SYSCTL_QUAD(_lwkt, OID_AUTO, token_contention_count_00, CTLFLAG_RW,
	130	&token_contention_count[0], 0, "spinning due to token contention");
	131	SYSCTL_QUAD(_lwkt, OID_AUTO, token_contention_count_01, CTLFLAG_RW,
	132	&token_contention_count[1], 0, "spinning due to token contention");
	133	SYSCTL_QUAD(_lwkt, OID_AUTO, token_contention_count_02, CTLFLAG_RW,
	134	&token_contention_count[2], 0, "spinning due to token contention");
	135	SYSCTL_QUAD(_lwkt, OID_AUTO, token_contention_count_03, CTLFLAG_RW,
	136	&token_contention_count[3], 0, "spinning due to token contention");
	137	SYSCTL_QUAD(_lwkt, OID_AUTO, token_contention_count_04, CTLFLAG_RW,
	138	&token_contention_count[4], 0, "spinning due to token contention");
	139	SYSCTL_QUAD(_lwkt, OID_AUTO, token_contention_count_05, CTLFLAG_RW,
	140	&token_contention_count[5], 0, "spinning due to token contention");
	141	SYSCTL_QUAD(_lwkt, OID_AUTO, token_contention_count_06, CTLFLAG_RW,
	142	&token_contention_count[6], 0, "spinning due to token contention");
	143	SYSCTL_QUAD(_lwkt, OID_AUTO, token_contention_count_07, CTLFLAG_RW,
	144	&token_contention_count[7], 0, "spinning due to token contention");
	145	SYSCTL_QUAD(_lwkt, OID_AUTO, token_contention_count_08, CTLFLAG_RW,
	146	&token_contention_count[8], 0, "spinning due to token contention");
	147	SYSCTL_QUAD(_lwkt, OID_AUTO, token_contention_count_09, CTLFLAG_RW,
	148	&token_contention_count[9], 0, "spinning due to token contention");
	149	SYSCTL_QUAD(_lwkt, OID_AUTO, token_contention_count_10, CTLFLAG_RW,
	150	&token_contention_count[10], 0, "spinning due to token contention");
	151	SYSCTL_QUAD(_lwkt, OID_AUTO, token_contention_count_11, CTLFLAG_RW,
	152	&token_contention_count[11], 0, "spinning due to token contention");
	153	SYSCTL_QUAD(_lwkt, OID_AUTO, token_contention_count_12, CTLFLAG_RW,
	154	&token_contention_count[12], 0, "spinning due to token contention");
	155	SYSCTL_QUAD(_lwkt, OID_AUTO, token_contention_count_13, CTLFLAG_RW,
	156	&token_contention_count[13], 0, "spinning due to token contention");
	157	SYSCTL_QUAD(_lwkt, OID_AUTO, token_contention_count_14, CTLFLAG_RW,
	158	&token_contention_count[14], 0, "spinning due to token contention");
	159	SYSCTL_QUAD(_lwkt, OID_AUTO, token_contention_count_15, CTLFLAG_RW,
	160	&token_contention_count[15], 0, "spinning due to token contention");
	161	SYSCTL_QUAD(_lwkt, OID_AUTO, token_contention_count_16, CTLFLAG_RW,
	162	&token_contention_count[16], 0, "spinning due to token contention");
	163	SYSCTL_QUAD(_lwkt, OID_AUTO, token_contention_count_17, CTLFLAG_RW,
	164	&token_contention_count[17], 0, "spinning due to token contention");
	165	SYSCTL_QUAD(_lwkt, OID_AUTO, token_contention_count_18, CTLFLAG_RW,
	166	&token_contention_count[18], 0, "spinning due to token contention");
	167	SYSCTL_QUAD(_lwkt, OID_AUTO, token_contention_count_19, CTLFLAG_RW,
	168	&token_contention_count[19], 0, "spinning due to token contention");
	169	SYSCTL_QUAD(_lwkt, OID_AUTO, token_contention_count_20, CTLFLAG_RW,
	170	&token_contention_count[20], 0, "spinning due to token contention");
	171	SYSCTL_QUAD(_lwkt, OID_AUTO, token_contention_count_21, CTLFLAG_RW,
	172	&token_contention_count[21], 0, "spinning due to token contention");
	173	SYSCTL_QUAD(_lwkt, OID_AUTO, token_contention_count_22, CTLFLAG_RW,
	174	&token_contention_count[22], 0, "spinning due to token contention");
	175	SYSCTL_QUAD(_lwkt, OID_AUTO, token_contention_count_23, CTLFLAG_RW,
	176	&token_contention_count[23], 0, "spinning due to token contention");
	177	SYSCTL_QUAD(_lwkt, OID_AUTO, token_contention_count_24, CTLFLAG_RW,
	178	&token_contention_count[24], 0, "spinning due to token contention");
	179	SYSCTL_QUAD(_lwkt, OID_AUTO, token_contention_count_25, CTLFLAG_RW,
	180	&token_contention_count[25], 0, "spinning due to token contention");
	181	SYSCTL_QUAD(_lwkt, OID_AUTO, token_contention_count_26, CTLFLAG_RW,
	182	&token_contention_count[26], 0, "spinning due to token contention");
	183	SYSCTL_QUAD(_lwkt, OID_AUTO, token_contention_count_27, CTLFLAG_RW,
	184	&token_contention_count[27], 0, "spinning due to token contention");
	185	SYSCTL_QUAD(_lwkt, OID_AUTO, token_contention_count_28, CTLFLAG_RW,
	186	&token_contention_count[28], 0, "spinning due to token contention");
	187	SYSCTL_QUAD(_lwkt, OID_AUTO, token_contention_count_29, CTLFLAG_RW,
	188	&token_contention_count[29], 0, "spinning due to token contention");
	189	SYSCTL_QUAD(_lwkt, OID_AUTO, token_contention_count_30, CTLFLAG_RW,
	190	&token_contention_count[30], 0, "spinning due to token contention");
	191	SYSCTL_QUAD(_lwkt, OID_AUTO, token_contention_count_31, CTLFLAG_RW,
	192	&token_contention_count[31], 0, "spinning due to token contention");
	193	#endif
	194	static int fairq_enable = 0;
	195	SYSCTL_INT(_lwkt, OID_AUTO, fairq_enable, CTLFLAG_RW,
	196	&fairq_enable, 0, "Turn on fairq priority accumulators");
	197	static int fairq_bypass = -1;
	198	SYSCTL_INT(_lwkt, OID_AUTO, fairq_bypass, CTLFLAG_RW,
	199	&fairq_bypass, 0, "Allow fairq to bypass td on token failure");
	200	extern int lwkt_sched_debug;
	201	int lwkt_sched_debug = 0;
	202	SYSCTL_INT(_lwkt, OID_AUTO, sched_debug, CTLFLAG_RW,
	203	&lwkt_sched_debug, 0, "Scheduler debug");
	204	static int lwkt_spin_loops = 10;
	205	SYSCTL_INT(_lwkt, OID_AUTO, spin_loops, CTLFLAG_RW,
	206	&lwkt_spin_loops, 0, "Scheduler spin loops until sorted decon");
	207	static int lwkt_spin_reseq = 0;
	208	SYSCTL_INT(_lwkt, OID_AUTO, spin_reseq, CTLFLAG_RW,
	209	&lwkt_spin_reseq, 0, "Scheduler resequencer enable");
	210	static int lwkt_spin_monitor = 0;
	211	SYSCTL_INT(_lwkt, OID_AUTO, spin_monitor, CTLFLAG_RW,
	212	&lwkt_spin_monitor, 0, "Scheduler uses monitor/mwait");
	213	static int lwkt_spin_fatal = 0; /* disabled */
	214	SYSCTL_INT(_lwkt, OID_AUTO, spin_fatal, CTLFLAG_RW,
	215	&lwkt_spin_fatal, 0, "LWKT scheduler spin loops till fatal panic");
	216	static int preempt_enable = 1;
	217	SYSCTL_INT(_lwkt, OID_AUTO, preempt_enable, CTLFLAG_RW,
	218	&preempt_enable, 0, "Enable preemption");
	219	static int lwkt_cache_threads = 32;
	220	SYSCTL_INT(_lwkt, OID_AUTO, cache_threads, CTLFLAG_RD,
	221	&lwkt_cache_threads, 0, "thread+kstack cache");
	222
	223	static __cachealign int lwkt_cseq_rindex;
	224	static __cachealign int lwkt_cseq_windex;
	225
	226	/*
	227	* These helper procedures handle the runq, they can only be called from
	228	* within a critical section.
	229	*
	230	* WARNING! Prior to SMP being brought up it is possible to enqueue and
	231	* dequeue threads belonging to other cpus, so be sure to use td->td_gd
	232	* instead of 'mycpu' when referencing the globaldata structure. Once
	233	* SMP live enqueuing and dequeueing only occurs on the current cpu.
	234	*/
	235	static __inline
	236	void
	237	_lwkt_dequeue(thread_t td)
	238	{
	239	if (td->td_flags & TDF_RUNQ) {
	240	struct globaldata *gd = td->td_gd;
	241
	242	td->td_flags &= ~TDF_RUNQ;
	243	TAILQ_REMOVE(&gd->gd_tdrunq, td, td_threadq);
	244	if (TAILQ_FIRST(&gd->gd_tdrunq) == NULL)
	245	atomic_clear_int(&gd->gd_reqflags, RQF_RUNNING);
	246	}
	247	}
	248
	249	/*
	250	* Priority enqueue.
	251	*
	252	* NOTE: There are a limited number of lwkt threads runnable since user
	253	* processes only schedule one at a time per cpu.
	254	*/
	255	static __inline
	256	void
	257	_lwkt_enqueue(thread_t td)
	258	{
	259	thread_t xtd;
	260
	261	if ((td->td_flags & (TDF_RUNQ\|TDF_MIGRATING\|TDF_BLOCKQ)) == 0) {
	262	struct globaldata *gd = td->td_gd;
	263
	264	td->td_flags \|= TDF_RUNQ;
	265	xtd = TAILQ_FIRST(&gd->gd_tdrunq);
	266	if (xtd == NULL) {
	267	TAILQ_INSERT_TAIL(&gd->gd_tdrunq, td, td_threadq);
	268	atomic_set_int(&gd->gd_reqflags, RQF_RUNNING);
	269	} else {
	270	while (xtd && xtd->td_pri >= td->td_pri)
	271	xtd = TAILQ_NEXT(xtd, td_threadq);
	272	if (xtd)
	273	TAILQ_INSERT_BEFORE(xtd, td, td_threadq);
	274	else
	275	TAILQ_INSERT_TAIL(&gd->gd_tdrunq, td, td_threadq);
	276	}
	277
	278	/*
	279	* Request a LWKT reschedule if we are now at the head of the queue.
	280	*/
	281	if (TAILQ_FIRST(&gd->gd_tdrunq) == td)
	282	need_lwkt_resched();
	283	}
	284	}
	285
	286	static __boolean_t
	287	_lwkt_thread_ctor(void obj, void privdata, int ocflags)
	288	{
	289	struct thread td = (struct thread )obj;
	290
	291	td->td_kstack = NULL;
	292	td->td_kstack_size = 0;
	293	td->td_flags = TDF_ALLOCATED_THREAD;
	294	return (1);
	295	}
	296
	297	static void
	298	_lwkt_thread_dtor(void obj, void privdata)
	299	{
	300	struct thread td = (struct thread )obj;
	301
	302	KASSERT(td->td_flags & TDF_ALLOCATED_THREAD,
	303	("_lwkt_thread_dtor: not allocated from objcache"));
	304	KASSERT((td->td_flags & TDF_ALLOCATED_STACK) && td->td_kstack &&
	305	td->td_kstack_size > 0,
	306	("_lwkt_thread_dtor: corrupted stack"));
	307	kmem_free(&kernel_map, (vm_offset_t)td->td_kstack, td->td_kstack_size);
	308	}
	309
	310	/*
	311	* Initialize the lwkt s/system.
	312	*
	313	* Nominally cache up to 32 thread + kstack structures.
	314	*/
	315	void
	316	lwkt_init(void)
	317	{
	318	TUNABLE_INT("lwkt.cache_threads", &lwkt_cache_threads);
	319	thread_cache = objcache_create_mbacked(
	320	M_THREAD, sizeof(struct thread),
	321	NULL, lwkt_cache_threads,
	322	_lwkt_thread_ctor, _lwkt_thread_dtor, NULL);
	323	}
	324
	325	/*
	326	* Schedule a thread to run. As the current thread we can always safely
	327	* schedule ourselves, and a shortcut procedure is provided for that
	328	* function.
	329	*
	330	* (non-blocking, self contained on a per cpu basis)
	331	*/
	332	void
	333	lwkt_schedule_self(thread_t td)
	334	{
	335	KKASSERT((td->td_flags & TDF_MIGRATING) == 0);
	336	crit_enter_quick(td);
	337	KASSERT(td != &td->td_gd->gd_idlethread,
	338	("lwkt_schedule_self(): scheduling gd_idlethread is illegal!"));
	339	KKASSERT(td->td_lwp == NULL \|\| (td->td_lwp->lwp_flag & LWP_ONRUNQ) == 0);
	340	_lwkt_enqueue(td);
	341	crit_exit_quick(td);
	342	}
	343
	344	/*
	345	* Deschedule a thread.
	346	*
	347	* (non-blocking, self contained on a per cpu basis)
	348	*/
	349	void
	350	lwkt_deschedule_self(thread_t td)
	351	{
	352	crit_enter_quick(td);
	353	_lwkt_dequeue(td);
	354	crit_exit_quick(td);
	355	}
	356
	357	/*
	358	* LWKTs operate on a per-cpu basis
	359	*
	360	* WARNING! Called from early boot, 'mycpu' may not work yet.
	361	*/
	362	void
	363	lwkt_gdinit(struct globaldata *gd)
	364	{
	365	TAILQ_INIT(&gd->gd_tdrunq);
	366	TAILQ_INIT(&gd->gd_tdallq);
	367	}
	368
	369	/*
	370	* Create a new thread. The thread must be associated with a process context
	371	* or LWKT start address before it can be scheduled. If the target cpu is
	372	* -1 the thread will be created on the current cpu.
	373	*
	374	* If you intend to create a thread without a process context this function
	375	* does everything except load the startup and switcher function.
	376	*/
	377	thread_t
	378	lwkt_alloc_thread(struct thread *td, int stksize, int cpu, int flags)
	379	{
	380	static int cpu_rotator;
	381	globaldata_t gd = mycpu;
	382	void *stack;
	383
	384	/*
	385	* If static thread storage is not supplied allocate a thread. Reuse
	386	* a cached free thread if possible. gd_freetd is used to keep an exiting
	387	* thread intact through the exit.
	388	*/
	389	if (td == NULL) {
	390	crit_enter_gd(gd);
	391	if ((td = gd->gd_freetd) != NULL) {
	392	KKASSERT((td->td_flags & (TDF_RUNNING\|TDF_PREEMPT_LOCK\|
	393	TDF_RUNQ)) == 0);
	394	gd->gd_freetd = NULL;
	395	} else {
	396	td = objcache_get(thread_cache, M_WAITOK);
	397	KKASSERT((td->td_flags & (TDF_RUNNING\|TDF_PREEMPT_LOCK\|
	398	TDF_RUNQ)) == 0);
	399	}
	400	crit_exit_gd(gd);
	401	KASSERT((td->td_flags &
	402	(TDF_ALLOCATED_THREAD\|TDF_RUNNING)) == TDF_ALLOCATED_THREAD,
	403	("lwkt_alloc_thread: corrupted td flags 0x%X", td->td_flags));
	404	flags \|= td->td_flags & (TDF_ALLOCATED_THREAD\|TDF_ALLOCATED_STACK);
	405	}
	406
	407	/*
	408	* Try to reuse cached stack.
	409	*/
	410	if ((stack = td->td_kstack) != NULL && td->td_kstack_size != stksize) {
	411	if (flags & TDF_ALLOCATED_STACK) {
	412	kmem_free(&kernel_map, (vm_offset_t)stack, td->td_kstack_size);
	413	stack = NULL;
	414	}
	415	}
	416	if (stack == NULL) {
	417	stack = (void *)kmem_alloc_stack(&kernel_map, stksize);
	418	flags \|= TDF_ALLOCATED_STACK;
	419	}
	420	if (cpu < 0) {
	421	cpu = ++cpu_rotator;
	422	cpu_ccfence();
	423	cpu %= ncpus;
	424	}
	425	lwkt_init_thread(td, stack, stksize, flags, globaldata_find(cpu));
	426	return(td);
	427	}
	428
	429	/*
	430	* Initialize a preexisting thread structure. This function is used by
	431	* lwkt_alloc_thread() and also used to initialize the per-cpu idlethread.
	432	*
	433	* All threads start out in a critical section at a priority of
	434	* TDPRI_KERN_DAEMON. Higher level code will modify the priority as
	435	* appropriate. This function may send an IPI message when the
	436	* requested cpu is not the current cpu and consequently gd_tdallq may
	437	* not be initialized synchronously from the point of view of the originating
	438	* cpu.
	439	*
	440	* NOTE! we have to be careful in regards to creating threads for other cpus
	441	* if SMP has not yet been activated.
	442	*/
	443	#ifdef SMP
	444
	445	static void
	446	lwkt_init_thread_remote(void *arg)
	447	{
	448	thread_t td = arg;
	449
	450	/*
	451	* Protected by critical section held by IPI dispatch
	452	*/
	453	TAILQ_INSERT_TAIL(&td->td_gd->gd_tdallq, td, td_allq);
	454	}
	455
	456	#endif
	457
	458	/*
	459	* lwkt core thread structural initialization.
	460	*
	461	* NOTE: All threads are initialized as mpsafe threads.
	462	*/
	463	void
	464	lwkt_init_thread(thread_t td, void *stack, int stksize, int flags,
	465	struct globaldata *gd)
	466	{
	467	globaldata_t mygd = mycpu;
	468
	469	bzero(td, sizeof(struct thread));
	470	td->td_kstack = stack;
	471	td->td_kstack_size = stksize;
	472	td->td_flags = flags;
	473	td->td_gd = gd;
	474	td->td_pri = TDPRI_KERN_DAEMON;
	475	td->td_critcount = 1;
	476	td->td_toks_stop = &td->td_toks_base;
	477	if (lwkt_use_spin_port \|\| (flags & TDF_FORCE_SPINPORT))
	478	lwkt_initport_spin(&td->td_msgport);
	479	else
	480	lwkt_initport_thread(&td->td_msgport, td);
	481	pmap_init_thread(td);
	482	#ifdef SMP
	483	/*
	484	* Normally initializing a thread for a remote cpu requires sending an
	485	* IPI. However, the idlethread is setup before the other cpus are
	486	* activated so we have to treat it as a special case. XXX manipulation
	487	* of gd_tdallq requires the BGL.
	488	*/
	489	if (gd == mygd \|\| td == &gd->gd_idlethread) {
	490	crit_enter_gd(mygd);
	491	TAILQ_INSERT_TAIL(&gd->gd_tdallq, td, td_allq);
	492	crit_exit_gd(mygd);
	493	} else {
	494	lwkt_send_ipiq(gd, lwkt_init_thread_remote, td);
	495	}
	496	#else
	497	crit_enter_gd(mygd);
	498	TAILQ_INSERT_TAIL(&gd->gd_tdallq, td, td_allq);
	499	crit_exit_gd(mygd);
	500	#endif
	501
	502	dsched_new_thread(td);
	503	}
	504
	505	void
	506	lwkt_set_comm(thread_t td, const char *ctl, ...)
	507	{
	508	__va_list va;
	509
	510	__va_start(va, ctl);
	511	kvsnprintf(td->td_comm, sizeof(td->td_comm), ctl, va);
	512	__va_end(va);
	513	KTR_LOG(ctxsw_newtd, td, &td->td_comm[0]);
	514	}
	515
	516	void
	517	lwkt_hold(thread_t td)
	518	{
	519	atomic_add_int(&td->td_refs, 1);
	520	}
	521
	522	void
	523	lwkt_rele(thread_t td)
	524	{
	525	KKASSERT(td->td_refs > 0);
	526	atomic_add_int(&td->td_refs, -1);
	527	}
	528
	529	void
	530	lwkt_wait_free(thread_t td)
	531	{
	532	while (td->td_refs)
	533	tsleep(td, 0, "tdreap", hz);
	534	}
	535
	536	void
	537	lwkt_free_thread(thread_t td)
	538	{
	539	KKASSERT(td->td_refs == 0);
	540	KKASSERT((td->td_flags & (TDF_RUNNING \| TDF_PREEMPT_LOCK \|
	541	TDF_RUNQ \| TDF_TSLEEPQ)) == 0);
	542	if (td->td_flags & TDF_ALLOCATED_THREAD) {
	543	objcache_put(thread_cache, td);
	544	} else if (td->td_flags & TDF_ALLOCATED_STACK) {
	545	/* client-allocated struct with internally allocated stack */
	546	KASSERT(td->td_kstack && td->td_kstack_size > 0,
	547	("lwkt_free_thread: corrupted stack"));
	548	kmem_free(&kernel_map, (vm_offset_t)td->td_kstack, td->td_kstack_size);
	549	td->td_kstack = NULL;
	550	td->td_kstack_size = 0;
	551	}
	552	KTR_LOG(ctxsw_deadtd, td);
	553	}
	554
	555
	556	/*
	557	* Switch to the next runnable lwkt. If no LWKTs are runnable then
	558	* switch to the idlethread. Switching must occur within a critical
	559	* section to avoid races with the scheduling queue.
	560	*
	561	* We always have full control over our cpu's run queue. Other cpus
	562	* that wish to manipulate our queue must use the cpu_*msg() calls to
	563	* talk to our cpu, so a critical section is all that is needed and
	564	* the result is very, very fast thread switching.
	565	*
	566	* The LWKT scheduler uses a fixed priority model and round-robins at
	567	* each priority level. User process scheduling is a totally
	568	* different beast and LWKT priorities should not be confused with
	569	* user process priorities.
	570	*
	571	* PREEMPTION NOTE: Preemption occurs via lwkt_preempt(). lwkt_switch()
	572	* is not called by the current thread in the preemption case, only when
	573	* the preempting thread blocks (in order to return to the original thread).
	574	*
	575	* SPECIAL NOTE ON SWITCH ATOMICY: Certain operations such as thread
	576	* migration and tsleep deschedule the current lwkt thread and call
	577	* lwkt_switch(). In particular, the target cpu of the migration fully
	578	* expects the thread to become non-runnable and can deadlock against
	579	* cpusync operations if we run any IPIs prior to switching the thread out.
	580	*
	581	* WE MUST BE VERY CAREFUL NOT TO RUN SPLZ DIRECTLY OR INDIRECTLY IF
	582	* THE CURRENT THREAD HAS BEEN DESCHEDULED!
	583	*/
	584	void
	585	lwkt_switch(void)
	586	{
	587	globaldata_t gd = mycpu;
	588	thread_t td = gd->gd_curthread;
	589	thread_t ntd;
	590	thread_t xtd;
	591	int spinning = 0;
	592
	593	KKASSERT(gd->gd_processing_ipiq == 0);
	594
	595	/*
	596	* Switching from within a 'fast' (non thread switched) interrupt or IPI
	597	* is illegal. However, we may have to do it anyway if we hit a fatal
	598	* kernel trap or we have paniced.
	599	*
	600	* If this case occurs save and restore the interrupt nesting level.
	601	*/
	602	if (gd->gd_intr_nesting_level) {
	603	int savegdnest;
	604	int savegdtrap;
	605
	606	if (gd->gd_trap_nesting_level == 0 && panic_cpu_gd != mycpu) {
	607	panic("lwkt_switch: Attempt to switch from a "
	608	"a fast interrupt, ipi, or hard code section, "
	609	"td %p\n",
	610	td);
	611	} else {
	612	savegdnest = gd->gd_intr_nesting_level;
	613	savegdtrap = gd->gd_trap_nesting_level;
	614	gd->gd_intr_nesting_level = 0;
	615	gd->gd_trap_nesting_level = 0;
	616	if ((td->td_flags & TDF_PANICWARN) == 0) {
	617	td->td_flags \|= TDF_PANICWARN;
	618	kprintf("Warning: thread switch from interrupt, IPI, "
	619	"or hard code section.\n"
	620	"thread %p (%s)\n", td, td->td_comm);
	621	print_backtrace(-1);
	622	}
	623	lwkt_switch();
	624	gd->gd_intr_nesting_level = savegdnest;
	625	gd->gd_trap_nesting_level = savegdtrap;
	626	return;
	627	}
	628	}
	629
	630	/*
	631	* Release our current user process designation if we are blocking
	632	* or if a user reschedule was requested.
	633	*
	634	* NOTE: This function is NOT called if we are switching into or
	635	* returning from a preemption.
	636	*
	637	* NOTE: Releasing our current user process designation may cause
	638	* it to be assigned to another thread, which in turn will
	639	* cause us to block in the usched acquire code when we attempt
	640	* to return to userland.
	641	*
	642	* NOTE: On SMP systems this can be very nasty when heavy token
	643	* contention is present so we want to be careful not to
	644	* release the designation gratuitously.
	645	*/
	646	if (td->td_release &&
	647	(user_resched_wanted() \|\| (td->td_flags & TDF_RUNQ) == 0)) {
	648	td->td_release(td);
	649	}
	650
	651	/*
	652	* Release all tokens
	653	*/
	654	crit_enter_gd(gd);
	655	if (TD_TOKS_HELD(td))
	656	lwkt_relalltokens(td);
	657
	658	/*
	659	* We had better not be holding any spin locks, but don't get into an
	660	* endless panic loop.
	661	*/
	662	KASSERT(gd->gd_spinlocks_wr == 0 \|\| panicstr != NULL,
	663	("lwkt_switch: still holding %d exclusive spinlocks!",
	664	gd->gd_spinlocks_wr));
	665
	666
	667	#ifdef SMP
	668	#ifdef INVARIANTS
	669	if (td->td_cscount) {
	670	kprintf("Diagnostic: attempt to switch while mastering cpusync: %p\n",
	671	td);
	672	if (panic_on_cscount)
	673	panic("switching while mastering cpusync");
	674	}
	675	#endif
	676	#endif
	677
	678	/*
	679	* If we had preempted another thread on this cpu, resume the preempted
	680	* thread. This occurs transparently, whether the preempted thread
	681	* was scheduled or not (it may have been preempted after descheduling
	682	* itself).
	683	*
	684	* We have to setup the MP lock for the original thread after backing
	685	* out the adjustment that was made to curthread when the original
	686	* was preempted.
	687	*/
	688	if ((ntd = td->td_preempted) != NULL) {
	689	KKASSERT(ntd->td_flags & TDF_PREEMPT_LOCK);
	690	ntd->td_flags \|= TDF_PREEMPT_DONE;
	691
	692	/*
	693	* The interrupt may have woken a thread up, we need to properly
	694	* set the reschedule flag if the originally interrupted thread is
	695	* at a lower priority.
	696	*
	697	* The interrupt may not have descheduled.
	698	*/
	699	if (TAILQ_FIRST(&gd->gd_tdrunq) != ntd)
	700	need_lwkt_resched();
	701	goto havethread_preempted;
	702	}
	703
	704	/*
	705	* If we cannot obtain ownership of the tokens we cannot immediately
	706	* schedule the target thread.
	707	*
	708	* Reminder: Again, we cannot afford to run any IPIs in this path if
	709	* the current thread has been descheduled.
	710	*/
	711	for (;;) {
	712	clear_lwkt_resched();
	713
	714	/*
	715	* Hotpath - pull the head of the run queue and attempt to schedule
	716	* it.
	717	*/
	718	for (;;) {
	719	ntd = TAILQ_FIRST(&gd->gd_tdrunq);
	720
	721	if (ntd == NULL) {
	722	/*
	723	* Runq is empty, switch to idle to allow it to halt.
	724	*/
	725	ntd = &gd->gd_idlethread;
	726	#ifdef SMP
	727	if (gd->gd_trap_nesting_level == 0 && panicstr == NULL)
	728	ASSERT_NO_TOKENS_HELD(ntd);
	729	#endif
	730	cpu_time.cp_msg[0] = 0;
	731	cpu_time.cp_stallpc = 0;
	732	goto haveidle;
	733	}
	734	break;
	735	}
	736
	737	/*
	738	* Hotpath - schedule ntd.
	739	*
	740	* NOTE: For UP there is no mplock and lwkt_getalltokens()
	741	* always succeeds.
	742	*/
	743	if (TD_TOKS_NOT_HELD(ntd) \|\|
	744	lwkt_getalltokens(ntd, (spinning >= lwkt_spin_loops)))
	745	{
	746	goto havethread;
	747	}
	748
	749	/*
	750	* Coldpath (SMP only since tokens always succeed on UP)
	751	*
	752	* We had some contention on the thread we wanted to schedule.
	753	* What we do now is try to find a thread that we can schedule
	754	* in its stead.
	755	*
	756	* The coldpath scan does NOT rearrange threads in the run list.
	757	* The lwkt_schedulerclock() will assert need_lwkt_resched() on
	758	* the next tick whenever the current head is not the current thread.
	759	*/
	760	#ifdef INVARIANTS
	761	++token_contention_count[ntd->td_pri];
	762	++ntd->td_contended;
	763	#endif
	764
	765	if (fairq_bypass > 0)
	766	goto skip;
	767
	768	xtd = NULL;
	769	while ((ntd = TAILQ_NEXT(ntd, td_threadq)) != NULL) {
	770	/*
	771	* Never schedule threads returning to userland or the
	772	* user thread scheduler helper thread when higher priority
	773	* threads are present.
	774	*/
	775	if (ntd->td_pri < TDPRI_KERN_LPSCHED) {
	776	ntd = NULL;
	777	break;
	778	}
	779
	780	/*
	781	* Try this one.
	782	*/
	783	if (TD_TOKS_NOT_HELD(ntd) \|\|
	784	lwkt_getalltokens(ntd, (spinning >= lwkt_spin_loops))) {
	785	goto havethread;
	786	}
	787	#ifdef INVARIANTS
	788	++token_contention_count[ntd->td_pri];
	789	++ntd->td_contended;
	790	#endif
	791	}
	792
	793	skip:
	794	/*
	795	* We exhausted the run list, meaning that all runnable threads
	796	* are contested.
	797	*/
	798	cpu_pause();
	799	ntd = &gd->gd_idlethread;
	800	#ifdef SMP
	801	if (gd->gd_trap_nesting_level == 0 && panicstr == NULL)
	802	ASSERT_NO_TOKENS_HELD(ntd);
	803	/* contention case, do not clear contention mask */
	804	#endif
	805
	806	/*
	807	* We are going to have to retry but if the current thread is not
	808	* on the runq we instead switch through the idle thread to get away
	809	* from the current thread. We have to flag for lwkt reschedule
	810	* to prevent the idle thread from halting.
	811	*
	812	* NOTE: A non-zero spinning is passed to lwkt_getalltokens() to
	813	* instruct it to deal with the potential for deadlocks by
	814	* ordering the tokens by address.
	815	*/
	816	if ((td->td_flags & TDF_RUNQ) == 0) {
	817	need_lwkt_resched(); /* prevent hlt */
	818	goto haveidle;
	819	}
	820	#if defined(INVARIANTS) && defined(__amd64__)
	821	if ((read_rflags() & PSL_I) == 0) {
	822	cpu_enable_intr();
	823	panic("lwkt_switch() called with interrupts disabled");
	824	}
	825	#endif
	826
	827	/*
	828	* Number iterations so far. After a certain point we switch to
	829	* a sorted-address/monitor/mwait version of lwkt_getalltokens()
	830	*/
	831	if (spinning < 0x7FFFFFFF)
	832	++spinning;
	833
	834	#ifdef SMP
	835	/*
	836	* lwkt_getalltokens() failed in sorted token mode, we can use
	837	* monitor/mwait in this case.
	838	*/
	839	if (spinning >= lwkt_spin_loops &&
	840	(cpu_mi_feature & CPU_MI_MONITOR) &&
	841	lwkt_spin_monitor)
	842	{
	843	cpu_mmw_pause_int(&gd->gd_reqflags,
	844	(gd->gd_reqflags \| RQF_SPINNING) &
	845	~RQF_IDLECHECK_WK_MASK);
	846	}
	847	#endif
	848
	849	/*
	850	* We already checked that td is still scheduled so this should be
	851	* safe.
	852	*/
	853	splz_check();
	854
	855	/*
	856	* This experimental resequencer is used as a fall-back to reduce
	857	* hw cache line contention by placing each core's scheduler into a
	858	* time-domain-multplexed slot.
	859	*
	860	* The resequencer is disabled by default. It's functionality has
	861	* largely been superceeded by the token algorithm which limits races
	862	* to a subset of cores.
	863	*
	864	* The resequencer algorithm tends to break down when more than
	865	* 20 cores are contending. What appears to happen is that new
	866	* tokens can be obtained out of address-sorted order by new cores
	867	* while existing cores languish in long delays between retries and
	868	* wind up being starved-out of the token acquisition.
	869	*/
	870	if (lwkt_spin_reseq && spinning >= lwkt_spin_reseq) {
	871	int cseq = atomic_fetchadd_int(&lwkt_cseq_windex, 1);
	872	int oseq;
	873
	874	while ((oseq = lwkt_cseq_rindex) != cseq) {
	875	cpu_ccfence();
	876	#if 1
	877	if (cpu_mi_feature & CPU_MI_MONITOR) {
	878	cpu_mmw_pause_int(&lwkt_cseq_rindex, oseq);
	879	} else {
	880	#endif
	881	cpu_pause();
	882	cpu_lfence();
	883	#if 1
	884	}
	885	#endif
	886	}
	887	DELAY(1);
	888	atomic_add_int(&lwkt_cseq_rindex, 1);
	889	}
	890	/* highest level for(;;) loop */
	891	}
	892
	893	havethread:
	894	/*
	895	* If the thread we came up with is a higher or equal priority verses
	896	* the thread at the head of the queue we move our thread to the
	897	* front. This way we can always check the front of the queue.
	898	*
	899	* Clear gd_idle_repeat when doing a normal switch to a non-idle
	900	* thread.
	901	*/
	902	ntd->td_wmesg = NULL;
	903	++gd->gd_cnt.v_swtch;
	904	#if 0
	905	xtd = TAILQ_FIRST(&gd->gd_tdrunq);
	906	if (ntd != xtd && ntd->td_pri >= xtd->td_pri) {
	907	TAILQ_REMOVE(&gd->gd_tdrunq, ntd, td_threadq);
	908	TAILQ_INSERT_HEAD(&gd->gd_tdrunq, ntd, td_threadq);
	909	}
	910	#endif
	911	gd->gd_idle_repeat = 0;
	912
	913	havethread_preempted:
	914	/*
	915	* If the new target does not need the MP lock and we are holding it,
	916	* release the MP lock. If the new target requires the MP lock we have
	917	* already acquired it for the target.
	918	*/
	919	;
	920	haveidle:
	921	KASSERT(ntd->td_critcount,
	922	("priority problem in lwkt_switch %d %d",
	923	td->td_critcount, ntd->td_critcount));
	924
	925	if (td != ntd) {
	926	/*
	927	* Execute the actual thread switch operation. This function
	928	* returns to the current thread and returns the previous thread
	929	* (which may be different from the thread we switched to).
	930	*
	931	* We are responsible for marking ntd as TDF_RUNNING.
	932	*/
	933	++switch_count;
	934	KTR_LOG(ctxsw_sw, gd->gd_cpuid, ntd);
	935	ntd->td_flags \|= TDF_RUNNING;
	936	lwkt_switch_return(td->td_switch(ntd));
	937	/* ntd invalid, td_switch() can return a different thread_t */
	938	}
	939
	940	#if 1
	941	/*
	942	* catch-all
	943	*/
	944	splz_check();
	945	#endif
	946	/* NOTE: current cpu may have changed after switch */
	947	crit_exit_quick(td);
	948	}
	949
	950	/*
	951	* Called by assembly in the td_switch (thread restore path) for thread
	952	* bootstrap cases which do not 'return' to lwkt_switch().
	953	*/
	954	void
	955	lwkt_switch_return(thread_t otd)
	956	{
	957	#ifdef SMP
	958	globaldata_t rgd;
	959
	960	/*
	961	* Check if otd was migrating. Now that we are on ntd we can finish
	962	* up the migration. This is a bit messy but it is the only place
	963	* where td is known to be fully descheduled.
	964	*
	965	* We can only activate the migration if otd was migrating but not
	966	* held on the cpu due to a preemption chain. We still have to
	967	* clear TDF_RUNNING on the old thread either way.
	968	*
	969	* We are responsible for clearing the previously running thread's
	970	* TDF_RUNNING.
	971	*/
	972	if ((rgd = otd->td_migrate_gd) != NULL &&
	973	(otd->td_flags & TDF_PREEMPT_LOCK) == 0) {
	974	KKASSERT((otd->td_flags & (TDF_MIGRATING \| TDF_RUNNING)) ==
	975	(TDF_MIGRATING \| TDF_RUNNING));
	976	otd->td_migrate_gd = NULL;
	977	otd->td_flags &= ~TDF_RUNNING;
	978	lwkt_send_ipiq(rgd, lwkt_setcpu_remote, otd);
	979	} else {
	980	otd->td_flags &= ~TDF_RUNNING;
	981	}
	982	#else
	983	otd->td_flags &= ~TDF_RUNNING;
	984	#endif
	985	}
	986
	987	/*
	988	* Request that the target thread preempt the current thread. Preemption
	989	* only works under a specific set of conditions:
	990	*
	991	* - We are not preempting ourselves
	992	* - The target thread is owned by the current cpu
	993	* - We are not currently being preempted
	994	* - The target is not currently being preempted
	995	* - We are not holding any spin locks
	996	* - The target thread is not holding any tokens
	997	* - We are able to satisfy the target's MP lock requirements (if any).
	998	*
	999	* THE CALLER OF LWKT_PREEMPT() MUST BE IN A CRITICAL SECTION. Typically
	1000	* this is called via lwkt_schedule() through the td_preemptable callback.
	1001	* critcount is the managed critical priority that we should ignore in order
	1002	* to determine whether preemption is possible (aka usually just the crit
	1003	* priority of lwkt_schedule() itself).
	1004	*
	1005	* XXX at the moment we run the target thread in a critical section during
	1006	* the preemption in order to prevent the target from taking interrupts
	1007	* that WE can't. Preemption is strictly limited to interrupt threads
	1008	* and interrupt-like threads, outside of a critical section, and the
	1009	* preempted source thread will be resumed the instant the target blocks
	1010	* whether or not the source is scheduled (i.e. preemption is supposed to
	1011	* be as transparent as possible).
	1012	*/
	1013	void
	1014	lwkt_preempt(thread_t ntd, int critcount)
	1015	{
	1016	struct globaldata *gd = mycpu;
	1017	thread_t xtd;
	1018	thread_t td;
	1019	int save_gd_intr_nesting_level;
	1020
	1021	/*
	1022	* The caller has put us in a critical section. We can only preempt
	1023	* if the caller of the caller was not in a critical section (basically
	1024	* a local interrupt), as determined by the 'critcount' parameter. We
	1025	* also can't preempt if the caller is holding any spinlocks (even if
	1026	* he isn't in a critical section). This also handles the tokens test.
	1027	*
	1028	* YYY The target thread must be in a critical section (else it must
	1029	* inherit our critical section? I dunno yet).
	1030	*/
	1031	KASSERT(ntd->td_critcount, ("BADCRIT0 %d", ntd->td_pri));
	1032
	1033	td = gd->gd_curthread;
	1034	if (preempt_enable == 0) {
	1035	#if 0
	1036	if (ntd->td_pri > td->td_pri)
	1037	need_lwkt_resched();
	1038	#endif
	1039	++preempt_miss;
	1040	return;
	1041	}
	1042	if (ntd->td_pri <= td->td_pri) {
	1043	++preempt_miss;
	1044	return;
	1045	}
	1046	if (td->td_critcount > critcount) {
	1047	++preempt_miss;
	1048	#if 0
	1049	need_lwkt_resched();
	1050	#endif
	1051	return;
	1052	}
	1053	#ifdef SMP
	1054	if (ntd->td_gd != gd) {
	1055	++preempt_miss;
	1056	#if 0
	1057	need_lwkt_resched();
	1058	#endif
	1059	return;
	1060	}
	1061	#endif
	1062	/*
	1063	* We don't have to check spinlocks here as they will also bump
	1064	* td_critcount.
	1065	*
	1066	* Do not try to preempt if the target thread is holding any tokens.
	1067	* We could try to acquire the tokens but this case is so rare there
	1068	* is no need to support it.
	1069	*/
	1070	KKASSERT(gd->gd_spinlocks_wr == 0);
	1071
	1072	if (TD_TOKS_HELD(ntd)) {
	1073	++preempt_miss;
	1074	#if 0
	1075	need_lwkt_resched();
	1076	#endif
	1077	return;
	1078	}
	1079	if (td == ntd \|\| ((td->td_flags \| ntd->td_flags) & TDF_PREEMPT_LOCK)) {
	1080	++preempt_weird;
	1081	#if 0
	1082	need_lwkt_resched();
	1083	#endif
	1084	return;
	1085	}
	1086	if (ntd->td_preempted) {
	1087	++preempt_hit;
	1088	#if 0
	1089	need_lwkt_resched();
	1090	#endif
	1091	return;
	1092	}
	1093	KKASSERT(gd->gd_processing_ipiq == 0);
	1094
	1095	/*
	1096	* Since we are able to preempt the current thread, there is no need to
	1097	* call need_lwkt_resched().
	1098	*
	1099	* We must temporarily clear gd_intr_nesting_level around the switch
	1100	* since switchouts from the target thread are allowed (they will just
	1101	* return to our thread), and since the target thread has its own stack.
	1102	*
	1103	* A preemption must switch back to the original thread, assert the
	1104	* case.
	1105	*/
	1106	++preempt_hit;
	1107	ntd->td_preempted = td;
	1108	td->td_flags \|= TDF_PREEMPT_LOCK;
	1109	KTR_LOG(ctxsw_pre, gd->gd_cpuid, ntd);
	1110	save_gd_intr_nesting_level = gd->gd_intr_nesting_level;
	1111	gd->gd_intr_nesting_level = 0;
	1112	ntd->td_flags \|= TDF_RUNNING;
	1113	xtd = td->td_switch(ntd);
	1114	KKASSERT(xtd == ntd);
	1115	lwkt_switch_return(xtd);
	1116	gd->gd_intr_nesting_level = save_gd_intr_nesting_level;
	1117
	1118	KKASSERT(ntd->td_preempted && (td->td_flags & TDF_PREEMPT_DONE));
	1119	ntd->td_preempted = NULL;
	1120	td->td_flags &= ~(TDF_PREEMPT_LOCK\|TDF_PREEMPT_DONE);
	1121	}
	1122
	1123	/*
	1124	* Conditionally call splz() if gd_reqflags indicates work is pending.
	1125	* This will work inside a critical section but not inside a hard code
	1126	* section.
	1127	*
	1128	* (self contained on a per cpu basis)
	1129	*/
	1130	void
	1131	splz_check(void)
	1132	{
	1133	globaldata_t gd = mycpu;
	1134	thread_t td = gd->gd_curthread;
	1135
	1136	if ((gd->gd_reqflags & RQF_IDLECHECK_MASK) &&
	1137	gd->gd_intr_nesting_level == 0 &&
	1138	td->td_nest_count < 2)
	1139	{
	1140	splz();
	1141	}
	1142	}
	1143
	1144	/*
	1145	* This version is integrated into crit_exit, reqflags has already
	1146	* been tested but td_critcount has not.
	1147	*
	1148	* We only want to execute the splz() on the 1->0 transition of
	1149	* critcount and not in a hard code section or if too deeply nested.
	1150	*/
	1151	void
	1152	lwkt_maybe_splz(thread_t td)
	1153	{
	1154	globaldata_t gd = td->td_gd;
	1155
	1156	if (td->td_critcount == 0 &&
	1157	gd->gd_intr_nesting_level == 0 &&
	1158	td->td_nest_count < 2)
	1159	{
	1160	splz();
	1161	}
	1162	}
	1163
	1164	/*
	1165	* Drivers which set up processing co-threads can call this function to
	1166	* run the co-thread at a higher priority and to allow it to preempt
	1167	* normal threads.
	1168	*/
	1169	void
	1170	lwkt_set_interrupt_support_thread(void)
	1171	{
	1172	thread_t td = curthread;
	1173
	1174	lwkt_setpri_self(TDPRI_INT_SUPPORT);
	1175	td->td_flags \|= TDF_INTTHREAD;
	1176	td->td_preemptable = lwkt_preempt;
	1177	}
	1178
	1179
	1180	/*
	1181	* This function is used to negotiate a passive release of the current
	1182	* process/lwp designation with the user scheduler, allowing the user
	1183	* scheduler to schedule another user thread. The related kernel thread
	1184	* (curthread) continues running in the released state.
	1185	*/
	1186	void
	1187	lwkt_passive_release(struct thread *td)
	1188	{
	1189	struct lwp *lp = td->td_lwp;
	1190
	1191	td->td_release = NULL;
	1192	lwkt_setpri_self(TDPRI_KERN_USER);
	1193	lp->lwp_proc->p_usched->release_curproc(lp);
	1194	}
	1195
	1196
	1197	/*
	1198	* This implements a LWKT yield, allowing a kernel thread to yield to other
	1199	* kernel threads at the same or higher priority. This function can be
	1200	* called in a tight loop and will typically only yield once per tick.
	1201	*
	1202	* Most kernel threads run at the same priority in order to allow equal
	1203	* sharing.
	1204	*
	1205	* (self contained on a per cpu basis)
	1206	*/
	1207	void
	1208	lwkt_yield(void)
	1209	{
	1210	globaldata_t gd = mycpu;
	1211	thread_t td = gd->gd_curthread;
	1212
	1213	if ((gd->gd_reqflags & RQF_IDLECHECK_MASK) && td->td_nest_count < 2)
	1214	splz();
	1215	if (lwkt_resched_wanted()) {
	1216	lwkt_schedule_self(curthread);
	1217	lwkt_switch();
	1218	}
	1219	}
	1220
	1221	/*
	1222	* This yield is designed for kernel threads with a user context.
	1223	*
	1224	* The kernel acting on behalf of the user is potentially cpu-bound,
	1225	* this function will efficiently allow other threads to run and also
	1226	* switch to other processes by releasing.
	1227	*
	1228	* The lwkt_user_yield() function is designed to have very low overhead
	1229	* if no yield is determined to be needed.
	1230	*/
	1231	void
	1232	lwkt_user_yield(void)
	1233	{
	1234	globaldata_t gd = mycpu;
	1235	thread_t td = gd->gd_curthread;
	1236
	1237	/*
	1238	* Always run any pending interrupts in case we are in a critical
	1239	* section.
	1240	*/
	1241	if ((gd->gd_reqflags & RQF_IDLECHECK_MASK) && td->td_nest_count < 2)
	1242	splz();
	1243
	1244	/*
	1245	* Switch (which forces a release) if another kernel thread needs
	1246	* the cpu, if userland wants us to resched, or if our kernel
	1247	* quantum has run out.
	1248	*/
	1249	if (lwkt_resched_wanted() \|\|
	1250	user_resched_wanted())
	1251	{
	1252	lwkt_switch();
	1253	}
	1254
	1255	#if 0
	1256	/*
	1257	* Reacquire the current process if we are released.
	1258	*
	1259	* XXX not implemented atm. The kernel may be holding locks and such,
	1260	* so we want the thread to continue to receive cpu.
	1261	*/
	1262	if (td->td_release == NULL && lp) {
	1263	lp->lwp_proc->p_usched->acquire_curproc(lp);
	1264	td->td_release = lwkt_passive_release;
	1265	lwkt_setpri_self(TDPRI_USER_NORM);
	1266	}
	1267	#endif
	1268	}
	1269
	1270	/*
	1271	* Generic schedule. Possibly schedule threads belonging to other cpus and
	1272	* deal with threads that might be blocked on a wait queue.
	1273	*
	1274	* We have a little helper inline function which does additional work after
	1275	* the thread has been enqueued, including dealing with preemption and
	1276	* setting need_lwkt_resched() (which prevents the kernel from returning
	1277	* to userland until it has processed higher priority threads).
	1278	*
	1279	* It is possible for this routine to be called after a failed _enqueue
	1280	* (due to the target thread migrating, sleeping, or otherwise blocked).
	1281	* We have to check that the thread is actually on the run queue!
	1282	*/
	1283	static __inline
	1284	void
	1285	_lwkt_schedule_post(globaldata_t gd, thread_t ntd, int ccount)
	1286	{
	1287	if (ntd->td_flags & TDF_RUNQ) {
	1288	if (ntd->td_preemptable) {
	1289	ntd->td_preemptable(ntd, ccount); /* YYY +token */
	1290	}
	1291	}
	1292	}
	1293
	1294	static __inline
	1295	void
	1296	_lwkt_schedule(thread_t td)
	1297	{
	1298	globaldata_t mygd = mycpu;
	1299
	1300	KASSERT(td != &td->td_gd->gd_idlethread,
	1301	("lwkt_schedule(): scheduling gd_idlethread is illegal!"));
	1302	KKASSERT((td->td_flags & TDF_MIGRATING) == 0);
	1303	crit_enter_gd(mygd);
	1304	KKASSERT(td->td_lwp == NULL \|\| (td->td_lwp->lwp_flag & LWP_ONRUNQ) == 0);
	1305	if (td == mygd->gd_curthread) {
	1306	_lwkt_enqueue(td);
	1307	} else {
	1308	/*
	1309	* If we own the thread, there is no race (since we are in a
	1310	* critical section). If we do not own the thread there might
	1311	* be a race but the target cpu will deal with it.
	1312	*/
	1313	#ifdef SMP
	1314	if (td->td_gd == mygd) {
	1315	_lwkt_enqueue(td);
	1316	_lwkt_schedule_post(mygd, td, 1);
	1317	} else {
	1318	lwkt_send_ipiq3(td->td_gd, lwkt_schedule_remote, td, 0);
	1319	}
	1320	#else
	1321	_lwkt_enqueue(td);
	1322	_lwkt_schedule_post(mygd, td, 1);
	1323	#endif
	1324	}
	1325	crit_exit_gd(mygd);
	1326	}
	1327
	1328	void
	1329	lwkt_schedule(thread_t td)
	1330	{
	1331	_lwkt_schedule(td);
	1332	}
	1333
	1334	void
	1335	lwkt_schedule_noresched(thread_t td) /* XXX not impl */
	1336	{
	1337	_lwkt_schedule(td);
	1338	}
	1339
	1340	#ifdef SMP
	1341
	1342	/*
	1343	* When scheduled remotely if frame != NULL the IPIQ is being
	1344	* run via doreti or an interrupt then preemption can be allowed.
	1345	*
	1346	* To allow preemption we have to drop the critical section so only
	1347	* one is present in _lwkt_schedule_post.
	1348	*/
	1349	static void
	1350	lwkt_schedule_remote(void arg, int arg2, struct intrframe frame)
	1351	{
	1352	thread_t td = curthread;
	1353	thread_t ntd = arg;
	1354
	1355	if (frame && ntd->td_preemptable) {
	1356	crit_exit_noyield(td);
	1357	_lwkt_schedule(ntd);
	1358	crit_enter_quick(td);
	1359	} else {
	1360	_lwkt_schedule(ntd);
	1361	}
	1362	}
	1363
	1364	/*
	1365	* Thread migration using a 'Pull' method. The thread may or may not be
	1366	* the current thread. It MUST be descheduled and in a stable state.
	1367	* lwkt_giveaway() must be called on the cpu owning the thread.
	1368	*
	1369	* At any point after lwkt_giveaway() is called, the target cpu may
	1370	* 'pull' the thread by calling lwkt_acquire().
	1371	*
	1372	* We have to make sure the thread is not sitting on a per-cpu tsleep
	1373	* queue or it will blow up when it moves to another cpu.
	1374	*
	1375	* MPSAFE - must be called under very specific conditions.
	1376	*/
	1377	void
	1378	lwkt_giveaway(thread_t td)
	1379	{
	1380	globaldata_t gd = mycpu;
	1381
	1382	crit_enter_gd(gd);
	1383	if (td->td_flags & TDF_TSLEEPQ)
	1384	tsleep_remove(td);
	1385	KKASSERT(td->td_gd == gd);
	1386	TAILQ_REMOVE(&gd->gd_tdallq, td, td_allq);
	1387	td->td_flags \|= TDF_MIGRATING;
	1388	crit_exit_gd(gd);
	1389	}
	1390
	1391	void
	1392	lwkt_acquire(thread_t td)
	1393	{
	1394	globaldata_t gd;
	1395	globaldata_t mygd;
	1396	int retry = 10000000;
	1397
	1398	KKASSERT(td->td_flags & TDF_MIGRATING);
	1399	gd = td->td_gd;
	1400	mygd = mycpu;
	1401	if (gd != mycpu) {
	1402	cpu_lfence();
	1403	KKASSERT((td->td_flags & TDF_RUNQ) == 0);
	1404	crit_enter_gd(mygd);
	1405	DEBUG_PUSH_INFO("lwkt_acquire");
	1406	while (td->td_flags & (TDF_RUNNING\|TDF_PREEMPT_LOCK)) {
	1407	#ifdef SMP
	1408	lwkt_process_ipiq();
	1409	#endif
	1410	cpu_lfence();
	1411	if (--retry == 0) {
	1412	kprintf("lwkt_acquire: stuck: td %p td->td_flags %08x\n",
	1413	td, td->td_flags);
	1414	retry = 10000000;
	1415	}
	1416	}
	1417	DEBUG_POP_INFO();
	1418	cpu_mfence();
	1419	td->td_gd = mygd;
	1420	TAILQ_INSERT_TAIL(&mygd->gd_tdallq, td, td_allq);
	1421	td->td_flags &= ~TDF_MIGRATING;
	1422	crit_exit_gd(mygd);
	1423	} else {
	1424	crit_enter_gd(mygd);
	1425	TAILQ_INSERT_TAIL(&mygd->gd_tdallq, td, td_allq);
	1426	td->td_flags &= ~TDF_MIGRATING;
	1427	crit_exit_gd(mygd);
	1428	}
	1429	}
	1430
	1431	#endif
	1432
	1433	/*
	1434	* Generic deschedule. Descheduling threads other then your own should be
	1435	* done only in carefully controlled circumstances. Descheduling is
	1436	* asynchronous.
	1437	*
	1438	* This function may block if the cpu has run out of messages.
	1439	*/
	1440	void
	1441	lwkt_deschedule(thread_t td)
	1442	{
	1443	crit_enter();
	1444	#ifdef SMP
	1445	if (td == curthread) {
	1446	_lwkt_dequeue(td);
	1447	} else {
	1448	if (td->td_gd == mycpu) {
	1449	_lwkt_dequeue(td);
	1450	} else {
	1451	lwkt_send_ipiq(td->td_gd, (ipifunc1_t)lwkt_deschedule, td);
	1452	}
	1453	}
	1454	#else
	1455	_lwkt_dequeue(td);
	1456	#endif
	1457	crit_exit();
	1458	}
	1459
	1460	/*
	1461	* Set the target thread's priority. This routine does not automatically
	1462	* switch to a higher priority thread, LWKT threads are not designed for
	1463	* continuous priority changes. Yield if you want to switch.
	1464	*/
	1465	void
	1466	lwkt_setpri(thread_t td, int pri)
	1467	{
	1468	if (td->td_pri != pri) {
	1469	KKASSERT(pri >= 0);
	1470	crit_enter();
	1471	if (td->td_flags & TDF_RUNQ) {
	1472	KKASSERT(td->td_gd == mycpu);
	1473	_lwkt_dequeue(td);
	1474	td->td_pri = pri;
	1475	_lwkt_enqueue(td);
	1476	} else {
	1477	td->td_pri = pri;
	1478	}
	1479	crit_exit();
	1480	}
	1481	}
	1482
	1483	/*
	1484	* Set the initial priority for a thread prior to it being scheduled for
	1485	* the first time. The thread MUST NOT be scheduled before or during
	1486	* this call. The thread may be assigned to a cpu other then the current
	1487	* cpu.
	1488	*
	1489	* Typically used after a thread has been created with TDF_STOPPREQ,
	1490	* and before the thread is initially scheduled.
	1491	*/
	1492	void
	1493	lwkt_setpri_initial(thread_t td, int pri)
	1494	{
	1495	KKASSERT(pri >= 0);
	1496	KKASSERT((td->td_flags & TDF_RUNQ) == 0);
	1497	td->td_pri = pri;
	1498	}
	1499
	1500	void
	1501	lwkt_setpri_self(int pri)
	1502	{
	1503	thread_t td = curthread;
	1504
	1505	KKASSERT(pri >= 0 && pri <= TDPRI_MAX);
	1506	crit_enter();
	1507	if (td->td_flags & TDF_RUNQ) {
	1508	_lwkt_dequeue(td);
	1509	td->td_pri = pri;
	1510	_lwkt_enqueue(td);
	1511	} else {
	1512	td->td_pri = pri;
	1513	}
	1514	crit_exit();
	1515	}
	1516
	1517	/*
	1518	* hz tick scheduler clock for LWKT threads
	1519	*/
	1520	void
	1521	lwkt_schedulerclock(thread_t td)
	1522	{
	1523	globaldata_t gd = td->td_gd;
	1524	thread_t xtd;
	1525
	1526	if (TAILQ_FIRST(&gd->gd_tdrunq) == td) {
	1527	/*
	1528	* If the current thread is at the head of the runq shift it to the
	1529	* end of any equal-priority threads and request a LWKT reschedule
	1530	* if it moved.
	1531	*/
	1532	xtd = TAILQ_NEXT(td, td_threadq);
	1533	if (xtd && xtd->td_pri == td->td_pri) {
	1534	TAILQ_REMOVE(&gd->gd_tdrunq, td, td_threadq);
	1535	while (xtd && xtd->td_pri == td->td_pri)
	1536	xtd = TAILQ_NEXT(xtd, td_threadq);
	1537	if (xtd)
	1538	TAILQ_INSERT_BEFORE(xtd, td, td_threadq);
	1539	else
	1540	TAILQ_INSERT_TAIL(&gd->gd_tdrunq, td, td_threadq);
	1541	need_lwkt_resched();
	1542	}
	1543	} else {
	1544	/*
	1545	* If we scheduled a thread other than the one at the head of the
	1546	* queue always request a reschedule every tick.
	1547	*/
	1548	need_lwkt_resched();
	1549	}
	1550	}
	1551
	1552	/*
	1553	* Migrate the current thread to the specified cpu.
	1554	*
	1555	* This is accomplished by descheduling ourselves from the current cpu
	1556	* and setting td_migrate_gd. The lwkt_switch() code will detect that the
	1557	* 'old' thread wants to migrate after it has been completely switched out
	1558	* and will complete the migration.
	1559	*
	1560	* TDF_MIGRATING prevents scheduling races while the thread is being migrated.
	1561	*
	1562	* We must be sure to release our current process designation (if a user
	1563	* process) before clearing out any tsleepq we are on because the release
	1564	* code may re-add us.
	1565	*
	1566	* We must be sure to remove ourselves from the current cpu's tsleepq
	1567	* before potentially moving to another queue. The thread can be on
	1568	* a tsleepq due to a left-over tsleep_interlock().
	1569	*/
	1570
	1571	void
	1572	lwkt_setcpu_self(globaldata_t rgd)
	1573	{
	1574	#ifdef SMP
	1575	thread_t td = curthread;
	1576
	1577	if (td->td_gd != rgd) {
	1578	crit_enter_quick(td);
	1579
	1580	if (td->td_release)
	1581	td->td_release(td);
	1582	if (td->td_flags & TDF_TSLEEPQ)
	1583	tsleep_remove(td);
	1584
	1585	/*
	1586	* Set TDF_MIGRATING to prevent a spurious reschedule while we are
	1587	* trying to deschedule ourselves and switch away, then deschedule
	1588	* ourself, remove us from tdallq, and set td_migrate_gd. Finally,
	1589	* call lwkt_switch() to complete the operation.
	1590	*/
	1591	td->td_flags \|= TDF_MIGRATING;
	1592	lwkt_deschedule_self(td);
	1593	TAILQ_REMOVE(&td->td_gd->gd_tdallq, td, td_allq);
	1594	td->td_migrate_gd = rgd;
	1595	lwkt_switch();
	1596
	1597	/*
	1598	* We are now on the target cpu
	1599	*/
	1600	KKASSERT(rgd == mycpu);
	1601	TAILQ_INSERT_TAIL(&rgd->gd_tdallq, td, td_allq);
	1602	crit_exit_quick(td);
	1603	}
	1604	#endif
	1605	}
	1606
	1607	void
	1608	lwkt_migratecpu(int cpuid)
	1609	{
	1610	#ifdef SMP
	1611	globaldata_t rgd;
	1612
	1613	rgd = globaldata_find(cpuid);
	1614	lwkt_setcpu_self(rgd);
	1615	#endif
	1616	}
	1617
	1618	#ifdef SMP
	1619	/*
	1620	* Remote IPI for cpu migration (called while in a critical section so we
	1621	* do not have to enter another one).
	1622	*
	1623	* The thread (td) has already been completely descheduled from the
	1624	* originating cpu and we can simply assert the case. The thread is
	1625	* assigned to the new cpu and enqueued.
	1626	*
	1627	* The thread will re-add itself to tdallq when it resumes execution.
	1628	*/
	1629	static void
	1630	lwkt_setcpu_remote(void *arg)
	1631	{
	1632	thread_t td = arg;
	1633	globaldata_t gd = mycpu;
	1634
	1635	KKASSERT((td->td_flags & (TDF_RUNNING\|TDF_PREEMPT_LOCK)) == 0);
	1636	td->td_gd = gd;
	1637	cpu_mfence();
	1638	td->td_flags &= ~TDF_MIGRATING;
	1639	KKASSERT(td->td_migrate_gd == NULL);
	1640	KKASSERT(td->td_lwp == NULL \|\| (td->td_lwp->lwp_flag & LWP_ONRUNQ) == 0);
	1641	_lwkt_enqueue(td);
	1642	}
	1643	#endif
	1644
	1645	struct lwp *
	1646	lwkt_preempted_proc(void)
	1647	{
	1648	thread_t td = curthread;
	1649	while (td->td_preempted)
	1650	td = td->td_preempted;
	1651	return(td->td_lwp);
	1652	}
	1653
	1654	/*
	1655	* Create a kernel process/thread/whatever. It shares it's address space
	1656	* with proc0 - ie: kernel only.
	1657	*
	1658	* If the cpu is not specified one will be selected. In the future
	1659	* specifying a cpu of -1 will enable kernel thread migration between
	1660	* cpus.
	1661	*/
	1662	int
	1663	lwkt_create(void (func)(void ), void arg, struct thread *tdp,
	1664	thread_t template, int tdflags, int cpu, const char *fmt, ...)
	1665	{
	1666	thread_t td;
	1667	__va_list ap;
	1668
	1669	td = lwkt_alloc_thread(template, LWKT_THREAD_STACK, cpu,
	1670	tdflags);
	1671	if (tdp)
	1672	*tdp = td;
	1673	cpu_set_thread_handler(td, lwkt_exit, func, arg);
	1674
	1675	/*
	1676	* Set up arg0 for 'ps' etc
	1677	*/
	1678	__va_start(ap, fmt);
	1679	kvsnprintf(td->td_comm, sizeof(td->td_comm), fmt, ap);
	1680	__va_end(ap);
	1681
	1682	/*
	1683	* Schedule the thread to run
	1684	*/
	1685	if ((td->td_flags & TDF_STOPREQ) == 0)
	1686	lwkt_schedule(td);
	1687	else
	1688	td->td_flags &= ~TDF_STOPREQ;
	1689	return 0;
	1690	}
	1691
	1692	/*
	1693	* Destroy an LWKT thread. Warning! This function is not called when
	1694	* a process exits, cpu_proc_exit() directly calls cpu_thread_exit() and
	1695	* uses a different reaping mechanism.
	1696	*/
	1697	void
	1698	lwkt_exit(void)
	1699	{
	1700	thread_t td = curthread;
	1701	thread_t std;
	1702	globaldata_t gd;
	1703
	1704	/*
	1705	* Do any cleanup that might block here
	1706	*/
	1707	if (td->td_flags & TDF_VERBOSE)
	1708	kprintf("kthread %p %s has exited\n", td, td->td_comm);
	1709	caps_exit(td);
	1710	biosched_done(td);
	1711	dsched_exit_thread(td);
	1712
	1713	/*
	1714	* Get us into a critical section to interlock gd_freetd and loop
	1715	* until we can get it freed.
	1716	*
	1717	* We have to cache the current td in gd_freetd because objcache_put()ing
	1718	* it would rip it out from under us while our thread is still active.
	1719	*/
	1720	gd = mycpu;
	1721	crit_enter_quick(td);
	1722	while ((std = gd->gd_freetd) != NULL) {
	1723	KKASSERT((std->td_flags & (TDF_RUNNING\|TDF_PREEMPT_LOCK)) == 0);
	1724	gd->gd_freetd = NULL;
	1725	objcache_put(thread_cache, std);
	1726	}
	1727
	1728	/*
	1729	* Remove thread resources from kernel lists and deschedule us for
	1730	* the last time. We cannot block after this point or we may end
	1731	* up with a stale td on the tsleepq.
	1732	*/
	1733	if (td->td_flags & TDF_TSLEEPQ)
	1734	tsleep_remove(td);
	1735	lwkt_deschedule_self(td);
	1736	lwkt_remove_tdallq(td);
	1737	KKASSERT(td->td_refs == 0);
	1738
	1739	/*
	1740	* Final cleanup
	1741	*/
	1742	KKASSERT(gd->gd_freetd == NULL);
	1743	if (td->td_flags & TDF_ALLOCATED_THREAD)
	1744	gd->gd_freetd = td;
	1745	cpu_thread_exit();
	1746	}
	1747
	1748	void
	1749	lwkt_remove_tdallq(thread_t td)
	1750	{
	1751	KKASSERT(td->td_gd == mycpu);
	1752	TAILQ_REMOVE(&td->td_gd->gd_tdallq, td, td_allq);
	1753	}
	1754
	1755	/*
	1756	* Code reduction and branch prediction improvements. Call/return
	1757	* overhead on modern cpus often degenerates into 0 cycles due to
	1758	* the cpu's branch prediction hardware and return pc cache. We
	1759	* can take advantage of this by not inlining medium-complexity
	1760	* functions and we can also reduce the branch prediction impact
	1761	* by collapsing perfectly predictable branches into a single
	1762	* procedure instead of duplicating it.
	1763	*
	1764	* Is any of this noticeable? Probably not, so I'll take the
	1765	* smaller code size.
	1766	*/
	1767	void
	1768	crit_exit_wrapper(__DEBUG_CRIT_ARG__)
	1769	{
	1770	_crit_exit(mycpu __DEBUG_CRIT_PASS_ARG__);
	1771	}
	1772
	1773	void
	1774	crit_panic(void)
	1775	{
	1776	thread_t td = curthread;
	1777	int lcrit = td->td_critcount;
	1778
	1779	td->td_critcount = 0;
	1780	panic("td_critcount is/would-go negative! %p %d", td, lcrit);
	1781	/* NOT REACHED */
	1782	}
	1783
	1784	#ifdef SMP
	1785
	1786	/*
	1787	* Called from debugger/panic on cpus which have been stopped. We must still
	1788	* process the IPIQ while stopped, even if we were stopped while in a critical
	1789	* section (XXX).
	1790	*
	1791	* If we are dumping also try to process any pending interrupts. This may
	1792	* or may not work depending on the state of the cpu at the point it was
	1793	* stopped.
	1794	*/
	1795	void
	1796	lwkt_smp_stopped(void)
	1797	{
	1798	globaldata_t gd = mycpu;
	1799
	1800	crit_enter_gd(gd);
	1801	if (dumping) {
	1802	lwkt_process_ipiq();
	1803	splz();
	1804	} else {
	1805	lwkt_process_ipiq();
	1806	}
	1807	crit_exit_gd(gd);
	1808	}
	1809
	1810	#endif