gitweb.dragonflybsd.org Git - dragonfly.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 2006 The DragonFly Project. All rights reserved.
	3	*
	4	* This code is derived from software contributed to The DragonFly Project
	5	* by Matthew Dillon <dillon@backplane.com>
	6	*
	7	* Redistribution and use in source and binary forms, with or without
	8	* modification, are permitted provided that the following conditions
	9	* are met:
	10	*
	11	* 1. Redistributions of source code must retain the above copyright
	12	* notice, this list of conditions and the following disclaimer.
	13	* 2. Redistributions in binary form must reproduce the above copyright
	14	* notice, this list of conditions and the following disclaimer in
	15	* the documentation and/or other materials provided with the
	16	* distribution.
	17	* 3. Neither the name of The DragonFly Project nor the names of its
	18	* contributors may be used to endorse or promote products derived
	19	* from this software without specific, prior written permission.
	20	*
	21	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
	22	* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
	23	* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
	24	* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
	25	* COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
	26	* INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
	27	* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
	28	* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
	29	* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
	30	* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
	31	* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	32	* SUCH DAMAGE.
	33	*
	34	* $DragonFly: src/sys/kern/usched_dummy.c,v 1.9 2008/04/21 15:24:46 dillon Exp $
	35	*/
	36
	37	#include <sys/param.h>
	38	#include <sys/systm.h>
	39	#include <sys/kernel.h>
	40	#include <sys/lock.h>
	41	#include <sys/queue.h>
	42	#include <sys/proc.h>
	43	#include <sys/rtprio.h>
	44	#include <sys/uio.h>
	45	#include <sys/sysctl.h>
	46	#include <sys/resourcevar.h>
	47	#include <sys/spinlock.h>
	48	#include <machine/cpu.h>
	49	#include <machine/smp.h>
	50
	51	#include <sys/thread2.h>
	52	#include <sys/spinlock2.h>
	53	#include <sys/mplock2.h>
	54
	55	#define MAXPRI 128
	56	#define PRIBASE_REALTIME 0
	57	#define PRIBASE_NORMAL MAXPRI
	58	#define PRIBASE_IDLE (MAXPRI * 2)
	59	#define PRIBASE_THREAD (MAXPRI * 3)
	60	#define PRIBASE_NULL (MAXPRI * 4)
	61
	62	#define lwp_priority lwp_usdata.bsd4.priority
	63	#define lwp_estcpu lwp_usdata.bsd4.estcpu
	64
	65	static void dummy_acquire_curproc(struct lwp *lp);
	66	static void dummy_release_curproc(struct lwp *lp);
	67	static void dummy_select_curproc(globaldata_t gd);
	68	static void dummy_setrunqueue(struct lwp *lp);
	69	static void dummy_schedulerclock(struct lwp *lp, sysclock_t period,
	70	sysclock_t cpstamp);
	71	static void dummy_recalculate_estcpu(struct lwp *lp);
	72	static void dummy_resetpriority(struct lwp *lp);
	73	static void dummy_forking(struct lwp plp, struct lwp lp);
	74	static void dummy_exiting(struct lwp plp, struct proc child);
	75	static void dummy_yield(struct lwp *lp);
	76
	77	struct usched usched_dummy = {
	78	{ NULL },
	79	"dummy", "Dummy DragonFly Scheduler",
	80	NULL, /* default registration */
	81	NULL, /* default deregistration */
	82	dummy_acquire_curproc,
	83	dummy_release_curproc,
	84	dummy_setrunqueue,
	85	dummy_schedulerclock,
	86	dummy_recalculate_estcpu,
	87	dummy_resetpriority,
	88	dummy_forking,
	89	dummy_exiting,
	90	NULL, /* setcpumask not supported */
	91	dummy_yield
	92	};
	93
	94	struct usched_dummy_pcpu {
	95	int rrcount;
	96	struct thread helper_thread;
	97	struct lwp *uschedcp;
	98	};
	99
	100	typedef struct usched_dummy_pcpu *dummy_pcpu_t;
	101
	102	static struct usched_dummy_pcpu dummy_pcpu[MAXCPU];
	103	static cpumask_t dummy_curprocmask = -1;
	104	static cpumask_t dummy_rdyprocmask;
	105	static struct spinlock dummy_spin;
	106	static TAILQ_HEAD(rq, lwp) dummy_runq;
	107	static int dummy_runqcount;
	108
	109	static int usched_dummy_rrinterval = (ESTCPUFREQ + 9) / 10;
	110	SYSCTL_INT(_kern, OID_AUTO, usched_dummy_rrinterval, CTLFLAG_RW,
	111	&usched_dummy_rrinterval, 0, "");
	112
	113	/*
	114	* Initialize the run queues at boot time, clear cpu 0 in curprocmask
	115	* to allow dummy scheduling on cpu 0.
	116	*/
	117	static void
	118	dummyinit(void *dummy)
	119	{
	120	TAILQ_INIT(&dummy_runq);
	121	spin_init(&dummy_spin);
	122	atomic_clear_cpumask(&dummy_curprocmask, 1);
	123	}
	124	SYSINIT(runqueue, SI_BOOT2_USCHED, SI_ORDER_FIRST, dummyinit, NULL)
	125
	126	/*
	127	* DUMMY_ACQUIRE_CURPROC
	128	*
	129	* This function is called when the kernel intends to return to userland.
	130	* It is responsible for making the thread the current designated userland
	131	* thread for this cpu, blocking if necessary.
	132	*
	133	* We are expected to handle userland reschedule requests here too.
	134	*
	135	* WARNING! THIS FUNCTION IS ALLOWED TO CAUSE THE CURRENT THREAD TO MIGRATE
	136	* TO ANOTHER CPU! Because most of the kernel assumes that no migration will
	137	* occur, this function is called only under very controlled circumstances.
	138	*
	139	* MPSAFE
	140	*/
	141	static void
	142	dummy_acquire_curproc(struct lwp *lp)
	143	{
	144	globaldata_t gd = mycpu;
	145	dummy_pcpu_t dd = &dummy_pcpu[gd->gd_cpuid];
	146	thread_t td = lp->lwp_thread;
	147
	148	/*
	149	* Possibly select another thread
	150	*/
	151	if (user_resched_wanted())
	152	dummy_select_curproc(gd);
	153
	154	/*
	155	* If this cpu has no current thread, select ourself
	156	*/
	157	if (dd->uschedcp == lp \|\|
	158	(dd->uschedcp == NULL && TAILQ_EMPTY(&dummy_runq))) {
	159	atomic_set_cpumask(&dummy_curprocmask, gd->gd_cpumask);
	160	dd->uschedcp = lp;
	161	return;
	162	}
	163
	164	/*
	165	* If this cpu's current user process thread is not our thread,
	166	* deschedule ourselves and place us on the run queue, then
	167	* switch away.
	168	*
	169	* We loop until we become the current process. Its a good idea
	170	* to run any passive release(s) before we mess with the scheduler
	171	* so our thread is in the expected state.
	172	*/
	173	KKASSERT(dd->uschedcp != lp);
	174	if (td->td_release)
	175	td->td_release(lp->lwp_thread);
	176	do {
	177	crit_enter();
	178	lwkt_deschedule_self(td);
	179	dummy_setrunqueue(lp);
	180	if ((td->td_flags & TDF_RUNQ) == 0)
	181	++lp->lwp_ru.ru_nivcsw;
	182	lwkt_switch(); /* WE MAY MIGRATE TO ANOTHER CPU */
	183	crit_exit();
	184	gd = mycpu;
	185	dd = &dummy_pcpu[gd->gd_cpuid];
	186	KKASSERT((lp->lwp_mpflags & LWP_MP_ONRUNQ) == 0);
	187	} while (dd->uschedcp != lp);
	188	}
	189
	190	/*
	191	* DUMMY_RELEASE_CURPROC
	192	*
	193	* This routine detaches the current thread from the userland scheduler,
	194	* usually because the thread needs to run in the kernel (at kernel priority)
	195	* for a while.
	196	*
	197	* This routine is also responsible for selecting a new thread to
	198	* make the current thread.
	199	*
	200	* MPSAFE
	201	*/
	202	static void
	203	dummy_release_curproc(struct lwp *lp)
	204	{
	205	globaldata_t gd = mycpu;
	206	dummy_pcpu_t dd = &dummy_pcpu[gd->gd_cpuid];
	207
	208	KKASSERT((lp->lwp_mpflags & LWP_MP_ONRUNQ) == 0);
	209	if (dd->uschedcp == lp) {
	210	dummy_select_curproc(gd);
	211	}
	212	}
	213
	214	/*
	215	* DUMMY_SELECT_CURPROC
	216	*
	217	* Select a new current process for this cpu. This satisfies a user
	218	* scheduler reschedule request so clear that too.
	219	*
	220	* This routine is also responsible for equal-priority round-robining,
	221	* typically triggered from dummy_schedulerclock(). In our dummy example
	222	* all the 'user' threads are LWKT scheduled all at once and we just
	223	* call lwkt_switch().
	224	*
	225	* MPSAFE
	226	*/
	227	static
	228	void
	229	dummy_select_curproc(globaldata_t gd)
	230	{
	231	dummy_pcpu_t dd = &dummy_pcpu[gd->gd_cpuid];
	232	struct lwp *lp;
	233
	234	clear_user_resched();
	235	spin_lock(&dummy_spin);
	236	if ((lp = TAILQ_FIRST(&dummy_runq)) == NULL) {
	237	dd->uschedcp = NULL;
	238	atomic_clear_cpumask(&dummy_curprocmask, gd->gd_cpumask);
	239	spin_unlock(&dummy_spin);
	240	} else {
	241	--dummy_runqcount;
	242	TAILQ_REMOVE(&dummy_runq, lp, lwp_procq);
	243	atomic_clear_int(&lp->lwp_mpflags, LWP_MP_ONRUNQ);
	244	dd->uschedcp = lp;
	245	atomic_set_cpumask(&dummy_curprocmask, gd->gd_cpumask);
	246	spin_unlock(&dummy_spin);
	247	#ifdef SMP
	248	lwkt_acquire(lp->lwp_thread);
	249	#endif
	250	lwkt_schedule(lp->lwp_thread);
	251	}
	252	}
	253
	254	/*
	255	* DUMMY_SETRUNQUEUE
	256	*
	257	* This routine is called to schedule a new user process after a fork.
	258	* The scheduler module itself might also call this routine to place
	259	* the current process on the userland scheduler's run queue prior
	260	* to calling dummy_select_curproc().
	261	*
	262	* The caller may set LWP_PASSIVE_ACQ in lwp_flags to indicate that we should
	263	* attempt to leave the thread on the current cpu.
	264	*
	265	* MPSAFE
	266	*/
	267	static void
	268	dummy_setrunqueue(struct lwp *lp)
	269	{
	270	globaldata_t gd = mycpu;
	271	dummy_pcpu_t dd = &dummy_pcpu[gd->gd_cpuid];
	272	cpumask_t mask;
	273	int cpuid;
	274
	275	if (dd->uschedcp == NULL) {
	276	dd->uschedcp = lp;
	277	atomic_set_cpumask(&dummy_curprocmask, gd->gd_cpumask);
	278	lwkt_schedule(lp->lwp_thread);
	279	} else {
	280	/*
	281	* Add to our global runq
	282	*/
	283	KKASSERT((lp->lwp_mpflags & LWP_MP_ONRUNQ) == 0);
	284	spin_lock(&dummy_spin);
	285	++dummy_runqcount;
	286	TAILQ_INSERT_TAIL(&dummy_runq, lp, lwp_procq);
	287	atomic_set_int(&lp->lwp_mpflags, LWP_MP_ONRUNQ);
	288	#ifdef SMP
	289	lwkt_giveaway(lp->lwp_thread);
	290	#endif
	291
	292	/* lp = TAILQ_FIRST(&dummy_runq); */
	293
	294	/*
	295	* Notify the next available cpu. P.S. some
	296	* cpu affinity could be done here.
	297	*
	298	* The rdyprocmask bit placeholds the knowledge that there
	299	* is a process on the runq that needs service. If the
	300	* helper thread cannot find a home for it it will forward
	301	* the request to another available cpu.
	302	*/
	303	mask = ~dummy_curprocmask & dummy_rdyprocmask &
	304	gd->gd_other_cpus;
	305	if (mask) {
	306	cpuid = BSFCPUMASK(mask);
	307	atomic_clear_cpumask(&dummy_rdyprocmask, CPUMASK(cpuid));
	308	spin_unlock(&dummy_spin);
	309	lwkt_schedule(&dummy_pcpu[cpuid].helper_thread);
	310	} else {
	311	spin_unlock(&dummy_spin);
	312	}
	313	}
	314	}
	315
	316	/*
	317	* This routine is called from a systimer IPI. Thus it is called with
	318	* a critical section held. Any spinlocks we get here that are also
	319	* obtained in other procedures must be proected by a critical section
	320	* in those other procedures to avoid a deadlock.
	321	*
	322	* The MP lock may or may not be held on entry and cannot be obtained
	323	* by this routine (because it is called from a systimer IPI). Additionally,
	324	* because this is equivalent to a FAST interrupt, spinlocks cannot be used
	325	* (or at least, you have to check that gd_spin* counts are 0 before you
	326	* can).
	327	*
	328	* This routine is called at ESTCPUFREQ on each cpu independantly.
	329	*
	330	* This routine typically queues a reschedule request, which will cause
	331	* the scheduler's BLAH_select_curproc() to be called as soon as possible.
	332	*
	333	* MPSAFE
	334	*/
	335	static
	336	void
	337	dummy_schedulerclock(struct lwp *lp, sysclock_t period, sysclock_t cpstamp)
	338	{
	339	globaldata_t gd = mycpu;
	340	dummy_pcpu_t dd = &dummy_pcpu[gd->gd_cpuid];
	341
	342	if (++dd->rrcount >= usched_dummy_rrinterval) {
	343	dd->rrcount = 0;
	344	need_user_resched();
	345	}
	346	}
	347
	348	/*
	349	* DUMMY_RECALCULATE_ESTCPU
	350	*
	351	* Called once a second for any process that is running or has slept
	352	* for less then 2 seconds.
	353	*
	354	* MPSAFE
	355	*/
	356	static
	357	void
	358	dummy_recalculate_estcpu(struct lwp *lp)
	359	{
	360	}
	361
	362	/*
	363	* MPSAFE
	364	*/
	365	static
	366	void
	367	dummy_yield(struct lwp *lp)
	368	{
	369	need_user_resched();
	370	}
	371
	372	/*
	373	* DUMMY_RESETPRIORITY
	374	*
	375	* This routine is called after the kernel has potentially modified
	376	* the lwp_rtprio structure. The target process may be running or sleeping
	377	* or scheduled but not yet running or owned by another cpu. Basically,
	378	* it can be in virtually any state.
	379	*
	380	* This routine is called by fork1() for initial setup with the process
	381	* of the run queue, and also may be called normally with the process on or
	382	* off the run queue.
	383	*
	384	* MPSAFE
	385	*/
	386	static void
	387	dummy_resetpriority(struct lwp *lp)
	388	{
	389	/* XXX spinlock usually needed */
	390	/*
	391	* Set p_priority for general process comparisons
	392	*/
	393	switch(lp->lwp_rtprio.type) {
	394	case RTP_PRIO_REALTIME:
	395	lp->lwp_priority = PRIBASE_REALTIME + lp->lwp_rtprio.prio;
	396	return;
	397	case RTP_PRIO_NORMAL:
	398	lp->lwp_priority = PRIBASE_NORMAL + lp->lwp_rtprio.prio;
	399	break;
	400	case RTP_PRIO_IDLE:
	401	lp->lwp_priority = PRIBASE_IDLE + lp->lwp_rtprio.prio;
	402	return;
	403	case RTP_PRIO_THREAD:
	404	lp->lwp_priority = PRIBASE_THREAD + lp->lwp_rtprio.prio;
	405	return;
	406	}
	407	/* XXX spinlock usually needed */
	408	}
	409
	410
	411	/*
	412	* DUMMY_FORKING
	413	*
	414	* Called from fork1() when a new child process is being created. Allows
	415	* the scheduler to predispose the child process before it gets scheduled.
	416	*
	417	* MPSAFE
	418	*/
	419	static void
	420	dummy_forking(struct lwp plp, struct lwp lp)
	421	{
	422	lp->lwp_estcpu = plp->lwp_estcpu;
	423	#if 0
	424	++plp->lwp_estcpu;
	425	#endif
	426	}
	427
	428	/*
	429	* DUMMY_EXITING
	430	*
	431	* Called when the parent reaps a child. Typically used to propogate cpu
	432	* use by the child back to the parent as part of a batch detection
	433	* heuristic.
	434	*
	435	* NOTE: cpu use is not normally back-propogated to PID 1.
	436	*
	437	* MPSAFE
	438	*/
	439	static void
	440	dummy_exiting(struct lwp plp, struct proc child)
	441	{
	442	}
	443
	444	/*
	445	* SMP systems may need a scheduler helper thread. This is how one can be
	446	* setup.
	447	*
	448	* We use a neat LWKT scheduling trick to interlock the helper thread. It
	449	* is possible to deschedule an LWKT thread and then do some work before
	450	* switching away. The thread can be rescheduled at any time, even before
	451	* we switch away.
	452	*
	453	* MPSAFE
	454	*/
	455	#ifdef SMP
	456
	457	static void
	458	dummy_sched_thread(void *dummy)
	459	{
	460	globaldata_t gd;
	461	dummy_pcpu_t dd;
	462	struct lwp *lp;
	463	cpumask_t cpumask;
	464	cpumask_t tmpmask;
	465	int cpuid;
	466	int tmpid;
	467
	468	gd = mycpu;
	469	cpuid = gd->gd_cpuid;
	470	dd = &dummy_pcpu[cpuid];
	471	cpumask = CPUMASK(cpuid);
	472
	473	for (;;) {
	474	lwkt_deschedule_self(gd->gd_curthread); /* interlock */
	475	atomic_set_cpumask(&dummy_rdyprocmask, cpumask);
	476	spin_lock(&dummy_spin);
	477	if (dd->uschedcp) {
	478	/*
	479	* We raced another cpu trying to schedule a thread onto us.
	480	* If the runq isn't empty hit another free cpu.
	481	*/
	482	tmpmask = ~dummy_curprocmask & dummy_rdyprocmask &
	483	gd->gd_other_cpus;
	484	if (tmpmask && dummy_runqcount) {
	485	tmpid = BSFCPUMASK(tmpmask);
	486	KKASSERT(tmpid != cpuid);
	487	atomic_clear_cpumask(&dummy_rdyprocmask, CPUMASK(tmpid));
	488	spin_unlock(&dummy_spin);
	489	lwkt_schedule(&dummy_pcpu[tmpid].helper_thread);
	490	} else {
	491	spin_unlock(&dummy_spin);
	492	}
	493	} else if ((lp = TAILQ_FIRST(&dummy_runq)) != NULL) {
	494	--dummy_runqcount;
	495	TAILQ_REMOVE(&dummy_runq, lp, lwp_procq);
	496	atomic_clear_int(&lp->lwp_mpflags, LWP_MP_ONRUNQ);
	497	dd->uschedcp = lp;
	498	atomic_set_cpumask(&dummy_curprocmask, cpumask);
	499	spin_unlock(&dummy_spin);
	500	#ifdef SMP
	501	lwkt_acquire(lp->lwp_thread);
	502	#endif
	503	lwkt_schedule(lp->lwp_thread);
	504	} else {
	505	spin_unlock(&dummy_spin);
	506	}
	507	lwkt_switch();
	508	}
	509	}
	510
	511	/*
	512	* Setup our scheduler helpers. Note that curprocmask bit 0 has already
	513	* been cleared by rqinit() and we should not mess with it further.
	514	*/
	515	static void
	516	dummy_sched_thread_cpu_init(void)
	517	{
	518	int i;
	519
	520	if (bootverbose)
	521	kprintf("start dummy scheduler helpers on cpus:");
	522
	523	for (i = 0; i < ncpus; ++i) {
	524	dummy_pcpu_t dd = &dummy_pcpu[i];
	525	cpumask_t mask = CPUMASK(i);
	526
	527	if ((mask & smp_active_mask) == 0)
	528	continue;
	529
	530	if (bootverbose)
	531	kprintf(" %d", i);
	532
	533	lwkt_create(dummy_sched_thread, NULL, NULL, &dd->helper_thread,
	534	TDF_NOSTART, i, "dsched %d", i);
	535
	536	/*
	537	* Allow user scheduling on the target cpu. cpu #0 has already
	538	* been enabled in rqinit().
	539	*/
	540	if (i)
	541	atomic_clear_cpumask(&dummy_curprocmask, mask);
	542	atomic_set_cpumask(&dummy_rdyprocmask, mask);
	543	}
	544	if (bootverbose)
	545	kprintf("\n");
	546	}
	547	SYSINIT(uschedtd, SI_BOOT2_USCHED, SI_ORDER_SECOND,
	548	dummy_sched_thread_cpu_init, NULL)
	549
	550	#endif
	551