gitweb.dragonflybsd.org Git - dragonfly.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 2003 Matthew Dillon <dillon@backplane.com>
	3	* All rights reserved.
	4	*
	5	* Redistribution and use in source and binary forms, with or without
	6	* modification, are permitted provided that the following conditions
	7	* are met:
	8	* 1. Redistributions of source code must retain the above copyright
	9	* notice, this list of conditions and the following disclaimer.
	10	* 2. Redistributions in binary form must reproduce the above copyright
	11	* notice, this list of conditions and the following disclaimer in the
	12	* documentation and/or other materials provided with the distribution.
	13	*
	14	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	15	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	16	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	17	* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
	18	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	19	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	20	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	21	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	22	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	23	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	24	* SUCH DAMAGE.
	25	*
	26	* Each cpu in a system has its own self-contained light weight kernel
	27	* thread scheduler, which means that generally speaking we only need
	28	* to use a critical section to prevent hicups.
	29	*
	30	* $DragonFly: src/sys/kern/lwkt_thread.c,v 1.15 2003/07/06 21:23:51 dillon Exp $
	31	*/
	32
	33	#include <sys/param.h>
	34	#include <sys/systm.h>
	35	#include <sys/kernel.h>
	36	#include <sys/proc.h>
	37	#include <sys/rtprio.h>
	38	#include <sys/queue.h>
	39	#include <sys/thread2.h>
	40	#include <sys/sysctl.h>
	41	#include <sys/kthread.h>
	42	#include <machine/cpu.h>
	43	#include <sys/lock.h>
	44
	45	#include <vm/vm.h>
	46	#include <vm/vm_param.h>
	47	#include <vm/vm_kern.h>
	48	#include <vm/vm_object.h>
	49	#include <vm/vm_page.h>
	50	#include <vm/vm_map.h>
	51	#include <vm/vm_pager.h>
	52	#include <vm/vm_extern.h>
	53	#include <vm/vm_zone.h>
	54
	55	#include <machine/stdarg.h>
	56
	57	static int untimely_switch = 0;
	58	SYSCTL_INT(_lwkt, OID_AUTO, untimely_switch, CTLFLAG_RW, &untimely_switch, 0, "");
	59	static quad_t switch_count = 0;
	60	SYSCTL_QUAD(_lwkt, OID_AUTO, switch_count, CTLFLAG_RW, &switch_count, 0, "");
	61	static quad_t preempt_hit = 0;
	62	SYSCTL_QUAD(_lwkt, OID_AUTO, preempt_hit, CTLFLAG_RW, &preempt_hit, 0, "");
	63	static quad_t preempt_miss = 0;
	64	SYSCTL_QUAD(_lwkt, OID_AUTO, preempt_miss, CTLFLAG_RW, &preempt_miss, 0, "");
	65	static quad_t preempt_weird = 0;
	66	SYSCTL_QUAD(_lwkt, OID_AUTO, preempt_weird, CTLFLAG_RW, &preempt_weird, 0, "");
	67
	68	/*
	69	* These helper procedures handle the runq, they can only be called from
	70	* within a critical section.
	71	*/
	72	static __inline
	73	void
	74	_lwkt_dequeue(thread_t td)
	75	{
	76	if (td->td_flags & TDF_RUNQ) {
	77	int nq = td->td_pri & TDPRI_MASK;
	78	struct globaldata *gd = mycpu;
	79
	80	td->td_flags &= ~TDF_RUNQ;
	81	TAILQ_REMOVE(&gd->gd_tdrunq[nq], td, td_threadq);
	82	/* runqmask is passively cleaned up by the switcher */
	83	}
	84	}
	85
	86	static __inline
	87	void
	88	_lwkt_enqueue(thread_t td)
	89	{
	90	if ((td->td_flags & TDF_RUNQ) == 0) {
	91	int nq = td->td_pri & TDPRI_MASK;
	92	struct globaldata *gd = mycpu;
	93
	94	td->td_flags \|= TDF_RUNQ;
	95	TAILQ_INSERT_TAIL(&gd->gd_tdrunq[nq], td, td_threadq);
	96	gd->gd_runqmask \|= 1 << nq;
	97	#if 0
	98	/*
	99	* YYY needs cli/sti protection? gd_reqpri set by interrupt
	100	* when made pending. need better mechanism.
	101	*/
	102	if (gd->gd_reqpri < (td->td_pri & TDPRI_MASK))
	103	gd->gd_reqpri = (td->td_pri & TDPRI_MASK);
	104	#endif
	105	}
	106	}
	107
	108	/*
	109	* LWKTs operate on a per-cpu basis
	110	*
	111	* WARNING! Called from early boot, 'mycpu' may not work yet.
	112	*/
	113	void
	114	lwkt_gdinit(struct globaldata *gd)
	115	{
	116	int i;
	117
	118	for (i = 0; i < sizeof(gd->gd_tdrunq)/sizeof(gd->gd_tdrunq[0]); ++i)
	119	TAILQ_INIT(&gd->gd_tdrunq[i]);
	120	gd->gd_runqmask = 0;
	121	TAILQ_INIT(&gd->gd_tdallq);
	122	}
	123
	124	/*
	125	* Initialize a thread wait structure prior to first use.
	126	*
	127	* NOTE! called from low level boot code, we cannot do anything fancy!
	128	*/
	129	void
	130	lwkt_init_wait(lwkt_wait_t w)
	131	{
	132	TAILQ_INIT(&w->wa_waitq);
	133	}
	134
	135	/*
	136	* Create a new thread. The thread must be associated with a process context
	137	* or LWKT start address before it can be scheduled.
	138	*
	139	* If you intend to create a thread without a process context this function
	140	* does everything except load the startup and switcher function.
	141	*/
	142	thread_t
	143	lwkt_alloc_thread(struct thread *td)
	144	{
	145	void *stack;
	146	int flags = 0;
	147
	148	if (td == NULL) {
	149	crit_enter();
	150	if (mycpu->gd_tdfreecount > 0) {
	151	--mycpu->gd_tdfreecount;
	152	td = TAILQ_FIRST(&mycpu->gd_tdfreeq);
	153	KASSERT(td != NULL && (td->td_flags & TDF_EXITED),
	154	("lwkt_alloc_thread: unexpected NULL or corrupted td"));
	155	TAILQ_REMOVE(&mycpu->gd_tdfreeq, td, td_threadq);
	156	crit_exit();
	157	stack = td->td_kstack;
	158	flags = td->td_flags & (TDF_ALLOCATED_STACK\|TDF_ALLOCATED_THREAD);
	159	} else {
	160	crit_exit();
	161	td = zalloc(thread_zone);
	162	td->td_kstack = NULL;
	163	flags \|= TDF_ALLOCATED_THREAD;
	164	}
	165	}
	166	if ((stack = td->td_kstack) == NULL) {
	167	stack = (void )kmem_alloc(kernel_map, UPAGES PAGE_SIZE);
	168	flags \|= TDF_ALLOCATED_STACK;
	169	}
	170	lwkt_init_thread(td, stack, flags, mycpu);
	171	return(td);
	172	}
	173
	174	/*
	175	* Initialize a preexisting thread structure. This function is used by
	176	* lwkt_alloc_thread() and also used to initialize the per-cpu idlethread.
	177	*
	178	* NOTE! called from low level boot code, we cannot do anything fancy!
	179	*/
	180	void
	181	lwkt_init_thread(thread_t td, void stack, int flags, struct globaldata gd)
	182	{
	183	bzero(td, sizeof(struct thread));
	184	td->td_kstack = stack;
	185	td->td_flags \|= flags;
	186	td->td_gd = gd;
	187	td->td_pri = TDPRI_CRIT;
	188	td->td_cpu = gd->gd_cpuid; /* YYY don't need this if have td_gd */
	189	pmap_init_thread(td);
	190	crit_enter();
	191	TAILQ_INSERT_TAIL(&mycpu->gd_tdallq, td, td_allq);
	192	crit_exit();
	193	}
	194
	195	void
	196	lwkt_set_comm(thread_t td, const char *ctl, ...)
	197	{
	198	va_list va;
	199
	200	va_start(va, ctl);
	201	vsnprintf(td->td_comm, sizeof(td->td_comm), ctl, va);
	202	va_end(va);
	203	}
	204
	205	void
	206	lwkt_hold(thread_t td)
	207	{
	208	++td->td_refs;
	209	}
	210
	211	void
	212	lwkt_rele(thread_t td)
	213	{
	214	KKASSERT(td->td_refs > 0);
	215	--td->td_refs;
	216	}
	217
	218	void
	219	lwkt_wait_free(thread_t td)
	220	{
	221	while (td->td_refs)
	222	tsleep(td, PWAIT, "tdreap", hz);
	223	}
	224
	225	void
	226	lwkt_free_thread(thread_t td)
	227	{
	228	struct globaldata *gd = mycpu;
	229
	230	KASSERT(td->td_flags & TDF_EXITED,
	231	("lwkt_free_thread: did not exit! %p", td));
	232
	233	crit_enter();
	234	TAILQ_REMOVE(&gd->gd_tdallq, td, td_allq);
	235	if (gd->gd_tdfreecount < CACHE_NTHREADS &&
	236	(td->td_flags & TDF_ALLOCATED_THREAD)
	237	) {
	238	++gd->gd_tdfreecount;
	239	TAILQ_INSERT_HEAD(&gd->gd_tdfreeq, td, td_threadq);
	240	crit_exit();
	241	} else {
	242	crit_exit();
	243	if (td->td_kstack && (td->td_flags & TDF_ALLOCATED_STACK)) {
	244	kmem_free(kernel_map,
	245	(vm_offset_t)td->td_kstack, UPAGES * PAGE_SIZE);
	246	/* gd invalid */
	247	td->td_kstack = NULL;
	248	}
	249	if (td->td_flags & TDF_ALLOCATED_THREAD)
	250	zfree(thread_zone, td);
	251	}
	252	}
	253
	254
	255	/*
	256	* Switch to the next runnable lwkt. If no LWKTs are runnable then
	257	* switch to the idlethread. Switching must occur within a critical
	258	* section to avoid races with the scheduling queue.
	259	*
	260	* We always have full control over our cpu's run queue. Other cpus
	261	* that wish to manipulate our queue must use the cpu_*msg() calls to
	262	* talk to our cpu, so a critical section is all that is needed and
	263	* the result is very, very fast thread switching.
	264	*
	265	* We always 'own' our own thread and the threads on our run queue,l
	266	* due to TDF_RUNNING or TDF_RUNQ being set. We can safely clear
	267	* TDF_RUNNING while in a critical section.
	268	*
	269	* The td_switch() function must be called while in the critical section.
	270	* This function saves as much state as is appropriate for the type of
	271	* thread.
	272	*
	273	* (self contained on a per cpu basis)
	274	*/
	275	void
	276	lwkt_switch(void)
	277	{
	278	struct globaldata *gd;
	279	thread_t td = curthread;
	280	thread_t ntd;
	281	#ifdef SMP
	282	int mpheld;
	283	#endif
	284
	285	if (mycpu->gd_intr_nesting_level && td->td_preempted == NULL)
	286	panic("lwkt_switch: cannot switch from within an interrupt, yet\n");
	287
	288	crit_enter();
	289	++switch_count;
	290
	291	#ifdef SMP
	292	/*
	293	* td_mpcount cannot be used to determine if we currently hold the
	294	* MP lock because get_mplock() will increment it prior to attempting
	295	* to get the lock, and switch out if it can't. Look at the actual lock.
	296	*/
	297	mpheld = MP_LOCK_HELD();
	298	#endif
	299	if ((ntd = td->td_preempted) != NULL) {
	300	/*
	301	* We had preempted another thread on this cpu, resume the preempted
	302	* thread. This occurs transparently, whether the preempted thread
	303	* was scheduled or not (it may have been preempted after descheduling
	304	* itself).
	305	*
	306	* We have to setup the MP lock for the original thread after backing
	307	* out the adjustment that was made to curthread when the original
	308	* was preempted.
	309	*/
	310	KKASSERT(ntd->td_flags & TDF_PREEMPT_LOCK);
	311	#ifdef SMP
	312	if (ntd->td_mpcount) {
	313	td->td_mpcount -= ntd->td_mpcount;
	314	KKASSERT(td->td_mpcount >= 0);
	315	}
	316	#endif
	317	ntd->td_flags \|= TDF_PREEMPT_DONE;
	318	/* YYY release mp lock on switchback if original doesn't need it */
	319	} else {
	320	/*
	321	* Priority queue / round-robin at each priority. Note that user
	322	* processes run at a fixed, low priority and the user process
	323	* scheduler deals with interactions between user processes
	324	* by scheduling and descheduling them from the LWKT queue as
	325	* necessary.
	326	*
	327	* We have to adjust the MP lock for the target thread. If we
	328	* need the MP lock and cannot obtain it we try to locate a
	329	* thread that does not need the MP lock.
	330	*/
	331	gd = mycpu;
	332	again:
	333	if (gd->gd_runqmask) {
	334	int nq = bsrl(gd->gd_runqmask);
	335	if ((ntd = TAILQ_FIRST(&gd->gd_tdrunq[nq])) == NULL) {
	336	gd->gd_runqmask &= ~(1 << nq);
	337	goto again;
	338	}
	339	#ifdef SMP
	340	if (ntd->td_mpcount && mpheld == 0 && !cpu_try_mplock()) {
	341	/*
	342	* Target needs MP lock and we couldn't get it.
	343	*/
	344	u_int32_t rqmask = gd->gd_runqmask;
	345	while (rqmask) {
	346	TAILQ_FOREACH(ntd, &gd->gd_tdrunq[nq], td_threadq) {
	347	if (ntd->td_mpcount == 0)
	348	break;
	349	}
	350	if (ntd)
	351	break;
	352	rqmask &= ~(1 << nq);
	353	nq = bsrl(rqmask);
	354	}
	355	if (ntd == NULL) {
	356	ntd = gd->gd_idletd;
	357	} else {
	358	TAILQ_REMOVE(&gd->gd_tdrunq[nq], ntd, td_threadq);
	359	TAILQ_INSERT_TAIL(&gd->gd_tdrunq[nq], ntd, td_threadq);
	360	}
	361	} else {
	362	TAILQ_REMOVE(&gd->gd_tdrunq[nq], ntd, td_threadq);
	363	TAILQ_INSERT_TAIL(&gd->gd_tdrunq[nq], ntd, td_threadq);
	364	}
	365	#else
	366	TAILQ_REMOVE(&gd->gd_tdrunq[nq], ntd, td_threadq);
	367	TAILQ_INSERT_TAIL(&gd->gd_tdrunq[nq], ntd, td_threadq);
	368	#endif
	369	} else {
	370	ntd = gd->gd_idletd;
	371	}
	372	}
	373	KASSERT(ntd->td_pri >= TDPRI_CRIT,
	374	("priority problem in lwkt_switch %d %d", td->td_pri, ntd->td_pri));
	375
	376	/*
	377	* Do the actual switch. If the new target does not need the MP lock
	378	* and we are holding it, release the MP lock. If the new target requires
	379	* the MP lock we have already acquired it for the target.
	380	*/
	381	#ifdef SMP
	382	if (ntd->td_mpcount == 0 ) {
	383	if (MP_LOCK_HELD())
	384	cpu_rel_mplock();
	385	} else {
	386	ASSERT_MP_LOCK_HELD();
	387	}
	388	#endif
	389
	390	if (td != ntd) {
	391	td->td_switch(ntd);
	392	}
	393	crit_exit();
	394	}
	395
	396	/*
	397	* Request that the target thread preempt the current thread. This only
	398	* works if:
	399	*
	400	* + We aren't trying to preempt ourselves (it can happen!)
	401	* + We are not currently being preempted
	402	* + The target is not currently being preempted
	403	* + The target either does not need the MP lock or we can get it
	404	* for the target immediately.
	405	*
	406	* XXX at the moment we run the target thread in a critical section during
	407	* the preemption in order to prevent the target from taking interrupts
	408	* that WE can't. Preemption is strictly limited to interrupt threads
	409	* and interrupt-like threads, outside of a critical section, and the
	410	* preempted source thread will be resumed the instant the target blocks
	411	* whether or not the source is scheduled (i.e. preemption is supposed to
	412	* be as transparent as possible).
	413	*
	414	* This call is typically made from an interrupt handler like sched_ithd()
	415	* which will only run if the current thread is not in a critical section,
	416	* so we optimize the priority check a bit.
	417	*
	418	* CAREFUL! either we or the target thread may get interrupted during the
	419	* switch.
	420	*
	421	* The target thread inherits our MP count (added to its own) for the
	422	* duration of the preemption in order to preserve the atomicy of the
	423	* preemption.
	424	*/
	425	void
	426	lwkt_preempt(thread_t ntd, int id)
	427	{
	428	thread_t td = curthread;
	429	#ifdef SMP
	430	int mpheld;
	431	#endif
	432
	433	/*
	434	* The caller has put us in a critical section, and in order to have
	435	* gotten here in the first place the thread the caller interrupted
	436	* cannot have been in a critical section before.
	437	*/
	438	KASSERT(ntd->td_pri >= TDPRI_CRIT, ("BADCRIT0 %d", ntd->td_pri));
	439	KASSERT((td->td_pri & ~TDPRI_MASK) == TDPRI_CRIT, ("BADPRI %d", td->td_pri));
	440
	441	if (td == ntd \|\| ((td->td_flags \| ntd->td_flags) & TDF_PREEMPT_LOCK)) {
	442	++preempt_weird;
	443	return;
	444	}
	445	if (ntd->td_preempted) {
	446	++preempt_hit;
	447	return;
	448	}
	449	if ((ntd->td_pri & TDPRI_MASK) <= (td->td_pri & TDPRI_MASK)) {
	450	++preempt_miss;
	451	return;
	452	}
	453	#ifdef SMP
	454	mpheld = MP_LOCK_HELD();
	455	ntd->td_mpcount += td->td_mpcount;
	456	if (mpheld == 0 && ntd->td_mpcount && !cpu_try_mplock()) {
	457	ntd->td_mpcount -= td->td_mpcount;
	458	++preempt_miss;
	459	return;
	460	}
	461	#endif
	462
	463	++preempt_hit;
	464	ntd->td_preempted = td;
	465	td->td_flags \|= TDF_PREEMPT_LOCK;
	466	td->td_switch(ntd);
	467	KKASSERT(ntd->td_preempted && (td->td_flags & TDF_PREEMPT_DONE));
	468	ntd->td_preempted = NULL;
	469	td->td_flags &= ~(TDF_PREEMPT_LOCK\|TDF_PREEMPT_DONE);
	470	}
	471
	472	/*
	473	* Yield our thread while higher priority threads are pending. This is
	474	* typically called when we leave a critical section but it can be safely
	475	* called while we are in a critical section.
	476	*
	477	* This function will not generally yield to equal priority threads but it
	478	* can occur as a side effect. Note that lwkt_switch() is called from
	479	* inside the critical section to pervent its own crit_exit() from reentering
	480	* lwkt_yield_quick().
	481	*
	482	* gd_reqpri indicates that something changed, e.g. an interrupt or softint
	483	* came along but was blocked and made pending.
	484	*
	485	* (self contained on a per cpu basis)
	486	*/
	487	void
	488	lwkt_yield_quick(void)
	489	{
	490	thread_t td = curthread;
	491
	492	if ((td->td_pri & TDPRI_MASK) < mycpu->gd_reqpri) {
	493	mycpu->gd_reqpri = 0;
	494	splz();
	495	}
	496
	497	/*
	498	* YYY enabling will cause wakeup() to task-switch, which really
	499	* confused the old 4.x code. This is a good way to simulate
	500	* preemption and MP without actually doing preemption or MP, because a
	501	* lot of code assumes that wakeup() does not block.
	502	*/
	503	if (untimely_switch && mycpu->gd_intr_nesting_level == 0) {
	504	crit_enter();
	505	/*
	506	* YYY temporary hacks until we disassociate the userland scheduler
	507	* from the LWKT scheduler.
	508	*/
	509	if (td->td_flags & TDF_RUNQ) {
	510	lwkt_switch(); /* will not reenter yield function */
	511	} else {
	512	lwkt_schedule_self(); /* make sure we are scheduled */
	513	lwkt_switch(); /* will not reenter yield function */
	514	lwkt_deschedule_self(); /* make sure we are descheduled */
	515	}
	516	crit_exit_noyield();
	517	}
	518	}
	519
	520	/*
	521	* This implements a normal yield which, unlike _quick, will yield to equal
	522	* priority threads as well. Note that gd_reqpri tests will be handled by
	523	* the crit_exit() call in lwkt_switch().
	524	*
	525	* (self contained on a per cpu basis)
	526	*/
	527	void
	528	lwkt_yield(void)
	529	{
	530	lwkt_schedule_self();
	531	lwkt_switch();
	532	}
	533
	534	/*
	535	* Schedule a thread to run. As the current thread we can always safely
	536	* schedule ourselves, and a shortcut procedure is provided for that
	537	* function.
	538	*
	539	* (non-blocking, self contained on a per cpu basis)
	540	*/
	541	void
	542	lwkt_schedule_self(void)
	543	{
	544	thread_t td = curthread;
	545
	546	crit_enter();
	547	KASSERT(td->td_wait == NULL, ("lwkt_schedule_self(): td_wait not NULL!"));
	548	_lwkt_enqueue(td);
	549	if (td->td_proc && td->td_proc->p_stat == SSLEEP)
	550	panic("SCHED SELF PANIC");
	551	crit_exit();
	552	}
	553
	554	/*
	555	* Generic schedule. Possibly schedule threads belonging to other cpus and
	556	* deal with threads that might be blocked on a wait queue.
	557	*
	558	* This function will queue requests asynchronously when possible, but may
	559	* block if no request structures are available. Upon return the caller
	560	* should note that the scheduling request may not yet have been processed
	561	* by the target cpu.
	562	*
	563	* YYY this is one of the best places to implement any load balancing code.
	564	* Load balancing can be accomplished by requesting other sorts of actions
	565	* for the thread in question.
	566	*/
	567	void
	568	lwkt_schedule(thread_t td)
	569	{
	570	if ((td->td_flags & TDF_PREEMPT_LOCK) == 0 && td->td_proc
	571	&& td->td_proc->p_stat == SSLEEP
	572	) {
	573	printf("PANIC schedule curtd = %p (%d %d) target %p (%d %d)\n",
	574	curthread,
	575	curthread->td_proc ? curthread->td_proc->p_pid : -1,
	576	curthread->td_proc ? curthread->td_proc->p_stat : -1,
	577	td,
	578	td->td_proc ? curthread->td_proc->p_pid : -1,
	579	td->td_proc ? curthread->td_proc->p_stat : -1
	580	);
	581	panic("SCHED PANIC");
	582	}
	583	crit_enter();
	584	if (td == curthread) {
	585	_lwkt_enqueue(td);
	586	} else {
	587	lwkt_wait_t w;
	588
	589	/*
	590	* If the thread is on a wait list we have to send our scheduling
	591	* request to the owner of the wait structure. Otherwise we send
	592	* the scheduling request to the cpu owning the thread. Races
	593	* are ok, the target will forward the message as necessary (the
	594	* message may chase the thread around before it finally gets
	595	* acted upon).
	596	*
	597	* (remember, wait structures use stable storage)
	598	*/
	599	if ((w = td->td_wait) != NULL) {
	600	if (lwkt_havetoken(&w->wa_token)) {
	601	TAILQ_REMOVE(&w->wa_waitq, td, td_threadq);
	602	--w->wa_count;
	603	td->td_wait = NULL;
	604	if (td->td_cpu == mycpu->gd_cpuid) {
	605	_lwkt_enqueue(td);
	606	} else {
	607	panic("lwkt_schedule: cpu mismatch1");
	608	#if 0
	609	lwkt_cpu_msg_union_t msg = lwkt_getcpumsg();
	610	initScheduleReqMsg_Wait(&msg.mu_SchedReq, td, w);
	611	cpu_sendnormsg(&msg.mu_Msg);
	612	#endif
	613	}
	614	} else {
	615	panic("lwkt_schedule: cpu mismatch2");
	616	#if 0
	617	lwkt_cpu_msg_union_t msg = lwkt_getcpumsg();
	618	initScheduleReqMsg_Wait(&msg.mu_SchedReq, td, w);
	619	cpu_sendnormsg(&msg.mu_Msg);
	620	#endif
	621	}
	622	} else {
	623	/*
	624	* If the wait structure is NULL and we own the thread, there
	625	* is no race (since we are in a critical section). If we
	626	* do not own the thread there might be a race but the
	627	* target cpu will deal with it.
	628	*/
	629	if (td->td_cpu == mycpu->gd_cpuid) {
	630	_lwkt_enqueue(td);
	631	} else {
	632	panic("lwkt_schedule: cpu mismatch3");
	633	#if 0
	634	lwkt_cpu_msg_union_t msg = lwkt_getcpumsg();
	635	initScheduleReqMsg_Thread(&msg.mu_SchedReq, td);
	636	cpu_sendnormsg(&msg.mu_Msg);
	637	#endif
	638	}
	639	}
	640	}
	641	crit_exit();
	642	}
	643
	644	/*
	645	* Deschedule a thread.
	646	*
	647	* (non-blocking, self contained on a per cpu basis)
	648	*/
	649	void
	650	lwkt_deschedule_self(void)
	651	{
	652	thread_t td = curthread;
	653
	654	crit_enter();
	655	KASSERT(td->td_wait == NULL, ("lwkt_schedule_self(): td_wait not NULL!"));
	656	_lwkt_dequeue(td);
	657	crit_exit();
	658	}
	659
	660	/*
	661	* Generic deschedule. Descheduling threads other then your own should be
	662	* done only in carefully controlled circumstances. Descheduling is
	663	* asynchronous.
	664	*
	665	* This function may block if the cpu has run out of messages.
	666	*/
	667	void
	668	lwkt_deschedule(thread_t td)
	669	{
	670	crit_enter();
	671	if (td == curthread) {
	672	_lwkt_dequeue(td);
	673	} else {
	674	if (td->td_cpu == mycpu->gd_cpuid) {
	675	_lwkt_dequeue(td);
	676	} else {
	677	panic("lwkt_deschedule: cpu mismatch");
	678	#if 0
	679	lwkt_cpu_msg_union_t msg = lwkt_getcpumsg();
	680	initDescheduleReqMsg_Thread(&msg.mu_DeschedReq, td);
	681	cpu_sendnormsg(&msg.mu_Msg);
	682	#endif
	683	}
	684	}
	685	crit_exit();
	686	}
	687
	688	/*
	689	* Set the target thread's priority. This routine does not automatically
	690	* switch to a higher priority thread, LWKT threads are not designed for
	691	* continuous priority changes. Yield if you want to switch.
	692	*
	693	* We have to retain the critical section count which uses the high bits
	694	* of the td_pri field. The specified priority may also indicate zero or
	695	* more critical sections by adding TDPRI_CRIT*N.
	696	*/
	697	void
	698	lwkt_setpri(thread_t td, int pri)
	699	{
	700	KKASSERT(pri >= 0);
	701	crit_enter();
	702	if (td->td_flags & TDF_RUNQ) {
	703	_lwkt_dequeue(td);
	704	td->td_pri = (td->td_pri & ~TDPRI_MASK) + pri;
	705	_lwkt_enqueue(td);
	706	} else {
	707	td->td_pri = (td->td_pri & ~TDPRI_MASK) + pri;
	708	}
	709	crit_exit();
	710	}
	711
	712	void
	713	lwkt_setpri_self(int pri)
	714	{
	715	thread_t td = curthread;
	716
	717	KKASSERT(pri >= 0 && pri <= TDPRI_MAX);
	718	crit_enter();
	719	if (td->td_flags & TDF_RUNQ) {
	720	_lwkt_dequeue(td);
	721	td->td_pri = (td->td_pri & ~TDPRI_MASK) + pri;
	722	_lwkt_enqueue(td);
	723	} else {
	724	td->td_pri = (td->td_pri & ~TDPRI_MASK) + pri;
	725	}
	726	crit_exit();
	727	}
	728
	729	struct proc *
	730	lwkt_preempted_proc(void)
	731	{
	732	thread_t td = curthread;
	733	while (td->td_preempted)
	734	td = td->td_preempted;
	735	return(td->td_proc);
	736	}
	737
	738
	739	/*
	740	* This function deschedules the current thread and blocks on the specified
	741	* wait queue. We obtain ownership of the wait queue in order to block
	742	* on it. A generation number is used to interlock the wait queue in case
	743	* it gets signalled while we are blocked waiting on the token.
	744	*
	745	* Note: alternatively we could dequeue our thread and then message the
	746	* target cpu owning the wait queue. YYY implement as sysctl.
	747	*
	748	* Note: wait queue signals normally ping-pong the cpu as an optimization.
	749	*/
	750	void
	751	lwkt_block(lwkt_wait_t w, const char wmesg, int gen)
	752	{
	753	thread_t td = curthread;
	754
	755	lwkt_gettoken(&w->wa_token);
	756	if (w->wa_gen == *gen) {
	757	_lwkt_dequeue(td);
	758	TAILQ_INSERT_TAIL(&w->wa_waitq, td, td_threadq);
	759	++w->wa_count;
	760	td->td_wait = w;
	761	td->td_wmesg = wmesg;
	762	lwkt_switch();
	763	}
	764	/* token might be lost, doesn't matter for gen update */
	765	*gen = w->wa_gen;
	766	lwkt_reltoken(&w->wa_token);
	767	}
	768
	769	/*
	770	* Signal a wait queue. We gain ownership of the wait queue in order to
	771	* signal it. Once a thread is removed from the wait queue we have to
	772	* deal with the cpu owning the thread.
	773	*
	774	* Note: alternatively we could message the target cpu owning the wait
	775	* queue. YYY implement as sysctl.
	776	*/
	777	void
	778	lwkt_signal(lwkt_wait_t w)
	779	{
	780	thread_t td;
	781	int count;
	782
	783	lwkt_gettoken(&w->wa_token);
	784	++w->wa_gen;
	785	count = w->wa_count;
	786	while ((td = TAILQ_FIRST(&w->wa_waitq)) != NULL && count) {
	787	--count;
	788	--w->wa_count;
	789	TAILQ_REMOVE(&w->wa_waitq, td, td_threadq);
	790	td->td_wait = NULL;
	791	td->td_wmesg = NULL;
	792	if (td->td_cpu == mycpu->gd_cpuid) {
	793	_lwkt_enqueue(td);
	794	} else {
	795	#if 0
	796	lwkt_cpu_msg_union_t msg = lwkt_getcpumsg();
	797	initScheduleReqMsg_Thread(&msg.mu_SchedReq, td);
	798	cpu_sendnormsg(&msg.mu_Msg);
	799	#endif
	800	panic("lwkt_signal: cpu mismatch");
	801	}
	802	lwkt_regettoken(&w->wa_token);
	803	}
	804	lwkt_reltoken(&w->wa_token);
	805	}
	806
	807	/*
	808	* Aquire ownership of a token
	809	*
	810	* Aquire ownership of a token. The token may have spl and/or critical
	811	* section side effects, depending on its purpose. These side effects
	812	* guarentee that you will maintain ownership of the token as long as you
	813	* do not block. If you block you may lose access to the token (but you
	814	* must still release it even if you lose your access to it).
	815	*
	816	* Note that the spl and critical section characteristics of a token
	817	* may not be changed once the token has been initialized.
	818	*/
	819	int
	820	lwkt_gettoken(lwkt_token_t tok)
	821	{
	822	/*
	823	* Prevent preemption so the token can't be taken away from us once
	824	* we gain ownership of it. Use a synchronous request which might
	825	* block. The request will be forwarded as necessary playing catchup
	826	* to the token.
	827	*/
	828	crit_enter();
	829	#if 0
	830	while (tok->t_cpu != mycpu->gd_cpuid) {
	831	lwkt_cpu_msg_union msg;
	832	initTokenReqMsg(&msg.mu_TokenReq);
	833	cpu_domsg(&msg);
	834	}
	835	#endif
	836	/*
	837	* leave us in a critical section on return. This will be undone
	838	* by lwkt_reltoken(). Bump the generation number.
	839	*/
	840	return(++tok->t_gen);
	841	}
	842
	843	/*
	844	* Release your ownership of a token. Releases must occur in reverse
	845	* order to aquisitions, eventually so priorities can be unwound properly
	846	* like SPLs. At the moment the actual implemention doesn't care.
	847	*
	848	* We can safely hand a token that we own to another cpu without notifying
	849	* it, but once we do we can't get it back without requesting it (unless
	850	* the other cpu hands it back to us before we check).
	851	*
	852	* We might have lost the token, so check that.
	853	*/
	854	void
	855	lwkt_reltoken(lwkt_token_t tok)
	856	{
	857	if (tok->t_cpu == mycpu->gd_cpuid) {
	858	tok->t_cpu = tok->t_reqcpu;
	859	}
	860	crit_exit();
	861	}
	862
	863	/*
	864	* Reacquire a token that might have been lost and compare and update the
	865	* generation number. 0 is returned if the generation has not changed
	866	* (nobody else obtained the token while we were blocked, on this cpu or
	867	* any other cpu).
	868	*
	869	* This function returns with the token re-held whether the generation
	870	* number changed or not.
	871	*/
	872	int
	873	lwkt_gentoken(lwkt_token_t tok, int *gen)
	874	{
	875	if (lwkt_regettoken(tok) == *gen) {
	876	return(0);
	877	} else {
	878	*gen = tok->t_gen;
	879	return(-1);
	880	}
	881	}
	882
	883
	884	/*
	885	* Reacquire a token that might have been lost. Returns the generation
	886	* number of the token.
	887	*/
	888	int
	889	lwkt_regettoken(lwkt_token_t tok)
	890	{
	891	#if 0
	892	if (tok->t_cpu != mycpu->gd_cpuid) {
	893	while (tok->t_cpu != mycpu->gd_cpuid) {
	894	lwkt_cpu_msg_union msg;
	895	initTokenReqMsg(&msg.mu_TokenReq);
	896	cpu_domsg(&msg);
	897	}
	898	}
	899	#endif
	900	return(tok->t_gen);
	901	}
	902
	903	void
	904	lwkt_inittoken(lwkt_token_t tok)
	905	{
	906	/*
	907	* Zero structure and set cpu owner and reqcpu to cpu 0.
	908	*/
	909	bzero(tok, sizeof(*tok));
	910	}
	911
	912	/*
	913	* Create a kernel process/thread/whatever. It shares it's address space
	914	* with proc0 - ie: kernel only.
	915	*
	916	* XXX should be renamed to lwkt_create()
	917	*
	918	* The thread will be entered with the MP lock held.
	919	*/
	920	int
	921	lwkt_create(void (func)(void ), void *arg,
	922	struct thread **tdp, thread_t template, int tdflags,
	923	const char *fmt, ...)
	924	{
	925	thread_t td;
	926	va_list ap;
	927
	928	td = *tdp = lwkt_alloc_thread(template);
	929	cpu_set_thread_handler(td, kthread_exit, func, arg);
	930	td->td_flags \|= TDF_VERBOSE \| tdflags;
	931	#ifdef SMP
	932	td->td_mpcount = 1;
	933	#endif
	934
	935	/*
	936	* Set up arg0 for 'ps' etc
	937	*/
	938	va_start(ap, fmt);
	939	vsnprintf(td->td_comm, sizeof(td->td_comm), fmt, ap);
	940	va_end(ap);
	941
	942	/*
	943	* Schedule the thread to run
	944	*/
	945	if ((td->td_flags & TDF_STOPREQ) == 0)
	946	lwkt_schedule(td);
	947	else
	948	td->td_flags &= ~TDF_STOPREQ;
	949	return 0;
	950	}
	951
	952	/*
	953	* Destroy an LWKT thread. Warning! This function is not called when
	954	* a process exits, cpu_proc_exit() directly calls cpu_thread_exit() and
	955	* uses a different reaping mechanism.
	956	*/
	957	void
	958	lwkt_exit(void)
	959	{
	960	thread_t td = curthread;
	961
	962	if (td->td_flags & TDF_VERBOSE)
	963	printf("kthread %p %s has exited\n", td, td->td_comm);
	964	crit_enter();
	965	lwkt_deschedule_self();
	966	++mycpu->gd_tdfreecount;
	967	TAILQ_INSERT_TAIL(&mycpu->gd_tdfreeq, td, td_threadq);
	968	cpu_thread_exit();
	969	}
	970
	971	/*
	972	* Create a kernel process/thread/whatever. It shares it's address space
	973	* with proc0 - ie: kernel only. 5.x compatible.
	974	*/
	975	int
	976	kthread_create(void (func)(void ), void *arg,
	977	struct thread *tdp, const char fmt, ...)
	978	{
	979	thread_t td;
	980	va_list ap;
	981
	982	td = *tdp = lwkt_alloc_thread(NULL);
	983	cpu_set_thread_handler(td, kthread_exit, func, arg);
	984	td->td_flags \|= TDF_VERBOSE;
	985	#ifdef SMP
	986	td->td_mpcount = 1;
	987	#endif
	988
	989	/*
	990	* Set up arg0 for 'ps' etc
	991	*/
	992	va_start(ap, fmt);
	993	vsnprintf(td->td_comm, sizeof(td->td_comm), fmt, ap);
	994	va_end(ap);
	995
	996	/*
	997	* Schedule the thread to run
	998	*/
	999	lwkt_schedule(td);
	1000	return 0;
	1001	}
	1002
	1003	void
	1004	crit_panic(void)
	1005	{
	1006	thread_t td = curthread;
	1007	int lpri = td->td_pri;
	1008
	1009	td->td_pri = 0;
	1010	panic("td_pri is/would-go negative! %p %d", td, lpri);
	1011	}
	1012
	1013	/*
	1014	* Destroy an LWKT thread. Warning! This function is not called when
	1015	* a process exits, cpu_proc_exit() directly calls cpu_thread_exit() and
	1016	* uses a different reaping mechanism.
	1017	*
	1018	* XXX duplicates lwkt_exit()
	1019	*/
	1020	void
	1021	kthread_exit(void)
	1022	{
	1023	lwkt_exit();
	1024	}
	1025