2 * Copyright (c) 1999 Peter Wemm <peter@FreeBSD.org>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * $FreeBSD: src/sys/kern/kern_switch.c,v 1.3.2.1 2000/05/16 06:58:12 dillon Exp $
27 * $DragonFly: src/sys/kern/Attic/kern_switch.c,v 1.5 2003/07/10 04:47:54 dillon Exp $
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/kernel.h>
33 #include <sys/queue.h>
35 #include <sys/rtprio.h>
36 #include <sys/thread2.h>
38 #include <machine/ipl.h>
39 #include <machine/cpu.h>
42 * debugging only YYY Remove me! define to schedule user processes only
43 * on the BSP. Interrupts can still be taken on the APs.
45 #undef ONLY_ONE_USER_CPU
48 * We have NQS (32) run queues per scheduling class. For the normal
49 * class, there are 128 priorities scaled onto these 32 queues. New
50 * processes are added to the last entry in each queue, and processes
51 * are selected for running by taking them from the head and maintaining
52 * a simple FIFO arrangement. Realtime and Idle priority processes have
53 * and explicit 0-31 priority which maps directly onto their class queue
54 * index. When a queue has something in it, the corresponding bit is
55 * set in the queuebits variable, allowing a single read to determine
56 * the state of all 32 queues and then a ffs() to find the first busy
59 static struct rq queues[NQS];
60 static struct rq rtqueues[NQS];
61 static struct rq idqueues[NQS];
62 static u_int32_t queuebits;
63 static u_int32_t rtqueuebits;
64 static u_int32_t idqueuebits;
65 static u_int32_t curprocmask = -1;
66 static u_int32_t rdyprocmask = 0;
70 * Initialize the run queues at boot time.
77 for (i = 0; i < NQS; i++) {
78 TAILQ_INIT(&queues[i]);
79 TAILQ_INIT(&rtqueues[i]);
80 TAILQ_INIT(&idqueues[i]);
88 SYSINIT(runqueue, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, rqinit, NULL)
91 test_resched(struct proc *curp, struct proc *newp)
93 if (newp->p_rtprio.type < curp->p_rtprio.type)
95 if (newp->p_rtprio.type == curp->p_rtprio.type) {
96 if (newp->p_rtprio.type == RTP_PRIO_NORMAL) {
97 if (newp->p_priority / PPQ <= curp->p_priority / PPQ)
99 } else if (newp->p_rtprio.prio < curp->p_rtprio.prio) {
107 * chooseproc() is called when a cpu needs a user process to LWKT schedule.
108 * chooseproc() will select a user process and return it.
120 pri = bsfl(rtqueuebits);
122 which = &rtqueuebits;
123 } else if (queuebits) {
124 pri = bsfl(queuebits);
127 } else if (idqueuebits) {
128 pri = bsfl(idqueuebits);
130 which = &idqueuebits;
135 KASSERT(p, ("chooseproc: no proc on busy queue"));
136 TAILQ_REMOVE(q, p, p_procq);
138 *which &= ~(1 << pri);
139 KASSERT((p->p_flag & P_ONRUNQ) != 0, ("not on runq6!"));
140 p->p_flag &= ~P_ONRUNQ;
146 * setrunqueue() 'wakes up' a 'user' process, which can mean several things.
148 * If P_CP_RELEASED is set the user process is under the control of the
149 * LWKT subsystem and we simply wake the thread up. This is ALWAYS the
150 * case when setrunqueue() is called from wakeup() and, in fact wakeup()
151 * asserts that P_CP_RELEASED is set.
153 * Note that acquire_curproc() already optimizes making the current process
154 * P_CURPROC, so setrunqueue() does not need to.
156 * If P_CP_RELEASED is not set we place the process on the run queue and we
157 * signal other cpus in the system that may need to be woken up to service
158 * the new 'user' process.
160 * The associated thread must NOT be scheduled.
161 * The process must be runnable.
162 * This must be called at splhigh().
165 setrunqueue(struct proc *p)
173 KASSERT(p->p_stat == SRUN, ("setrunqueue: proc not SRUN"));
174 KASSERT((p->p_flag & (P_ONRUNQ|P_CURPROC)) == 0,
175 ("process %d already on runq! flag %08x", p->p_pid, p->p_flag));
176 KKASSERT((p->p_thread->td_flags & TDF_RUNQ) == 0);
179 * If we have been released from the userland scheduler we
180 * directly schedule its thread.
182 if (p->p_flag & P_CP_RELEASED) {
183 lwkt_schedule(p->p_thread);
189 * Otherwise place this process on the userland scheduler's run
193 p->p_flag |= P_ONRUNQ;
194 if (p->p_rtprio.type == RTP_PRIO_NORMAL) {
195 pri = p->p_priority >> 2;
197 queuebits |= 1 << pri;
198 } else if (p->p_rtprio.type == RTP_PRIO_REALTIME ||
199 p->p_rtprio.type == RTP_PRIO_FIFO) {
200 pri = (u_int8_t)p->p_rtprio.prio;
202 rtqueuebits |= 1 << pri;
203 } else if (p->p_rtprio.type == RTP_PRIO_IDLE) {
204 pri = (u_int8_t)p->p_rtprio.prio;
206 idqueuebits |= 1 << pri;
208 panic("setrunqueue: invalid rtprio type");
211 p->p_rqindex = pri; /* remember the queue index */
212 TAILQ_INSERT_TAIL(q, p, p_procq);
215 * Wakeup other cpus to schedule the newly available thread.
216 * XXX doesn't really have to be in a critical section.
217 * We own giant after all.
219 if ((mask = ~curprocmask & rdyprocmask & mycpu->gd_other_cpus) != 0) {
220 int count = runqcount;
221 while (mask && count) {
223 KKASSERT((curprocmask & (1 << cpuid)) == 0);
224 rdyprocmask &= ~(1 << cpuid);
225 lwkt_schedule(&globaldata_find(cpuid)->gd_schedthread);
227 mask &= ~(1 << cpuid);
234 * remrunqueue() removes a given process from the run queue that it is on,
235 * clearing the queue busy bit if it becomes empty. This function is called
236 * when a userland process is selected for LWKT scheduling. Note that
237 * LWKT scheduling is an abstraction of 'curproc'.. there could very well be
238 * several userland processes whos threads are scheduled or otherwise in
239 * a special state, and such processes are NOT on the userland scheduler's
242 * This must be called at splhigh().
245 remrunqueue(struct proc *p)
252 KASSERT((p->p_flag & P_ONRUNQ) != 0, ("not on runq4!"));
253 p->p_flag &= ~P_ONRUNQ;
255 KKASSERT(runqcount >= 0);
257 if (p->p_rtprio.type == RTP_PRIO_NORMAL) {
260 } else if (p->p_rtprio.type == RTP_PRIO_REALTIME ||
261 p->p_rtprio.type == RTP_PRIO_FIFO) {
263 which = &rtqueuebits;
264 } else if (p->p_rtprio.type == RTP_PRIO_IDLE) {
266 which = &idqueuebits;
268 panic("remrunqueue: invalid rtprio type");
270 TAILQ_REMOVE(q, p, p_procq);
271 if (TAILQ_EMPTY(q)) {
272 KASSERT((*which & (1 << pri)) != 0,
273 ("remrunqueue: remove from empty queue"));
274 *which &= ~(1 << pri);
280 * Release the P_CURPROC designation on the CURRENT process only. This
281 * will allow another userland process to be scheduled and places our
282 * process back on the userland scheduling queue.
285 release_curproc(struct proc *p)
290 #ifdef ONLY_ONE_USER_CPU
291 KKASSERT(mycpu->gd_cpuid == 0 && p->p_thread->td_cpu == 0);
295 cpuid = p->p_thread->td_cpu;
296 p->p_flag |= P_CP_RELEASED;
297 if (p->p_flag & P_CURPROC) {
298 p->p_flag &= ~P_CURPROC;
299 KKASSERT(curprocmask & (1 << cpuid));
300 if ((np = chooseproc()) != NULL) {
301 np->p_flag |= P_CURPROC;
302 lwkt_acquire(np->p_thread);
303 lwkt_schedule(np->p_thread);
305 curprocmask &= ~(1 << cpuid);
312 * Acquire the P_CURPROC designation on the CURRENT process only. This
313 * function is called prior to returning to userland. If the system
314 * call or trap did not block and if no reschedule was requested it is
315 * highly likely that the P_CURPROC flag is still set in the proc, and
316 * we do almost nothing here.
319 acquire_curproc(struct proc *p)
325 * Short cut, we've already acquired the designation or we never
326 * lost it in the first place.
328 if ((p->p_flag & P_CURPROC) != 0)
332 * Long cut. This pulls in a bit of the userland scheduler as
333 * an optimization. If our cpu has not scheduled a userland
334 * process we gladly fill the slot, otherwise we choose the best
335 * candidate from the run queue and compare it against ourselves,
336 * scheduling either us or him depending.
338 * If our cpu's slot isn't free we put ourselves on the userland
339 * run queue and switch away. We should have P_CURPROC when we
340 * come back. Note that a cpu change can occur when we come back.
342 * YYY don't need critical section, we hold giant and no interrupt
343 * will mess w/ this proc? Or will it? What about curprocmask?
345 #ifdef ONLY_ONE_USER_CPU
346 KKASSERT(mycpu->gd_cpuid == 0 && p->p_thread->td_cpu == 0);
349 p->p_flag &= ~P_CP_RELEASED;
350 while ((p->p_flag & P_CURPROC) == 0) {
351 cpuid = p->p_thread->td_cpu; /* load/reload cpuid */
352 if ((curprocmask & (1 << cpuid)) == 0) {
353 curprocmask |= 1 << cpuid;
354 if ((np = chooseproc()) != NULL) {
355 KKASSERT((np->p_flag & P_CP_RELEASED) == 0);
356 if (test_resched(p, np)) {
357 np->p_flag |= P_CURPROC;
358 lwkt_acquire(np->p_thread);
359 lwkt_schedule(np->p_thread);
361 p->p_flag |= P_CURPROC;
365 p->p_flag |= P_CURPROC;
368 if ((p->p_flag & P_CURPROC) == 0) {
369 lwkt_deschedule_self();
372 KKASSERT((p->p_flag & (P_ONRUNQ|P_CURPROC|P_CP_RELEASED)) == P_CURPROC);
379 * Yield / synchronous reschedule. This is a bit tricky because the trap
380 * code might have set a lazy release on the switch function. The first
381 * thing we do is call lwkt_switch() to resolve the lazy release (if any).
382 * Then, if we are a process, we want to allow another process to run.
384 * The only way to do that is to acquire and then release P_CURPROC. We
385 * have to release it because the kernel expects it to be released as a
386 * sanity check when it goes to sleep.
388 * XXX we need a way to ensure that we wake up eventually from a yield,
389 * even if we are an idprio process.
394 struct thread *td = curthread;
395 struct proc *p = td->td_proc;
406 * For SMP systems a user scheduler helper thread is created for each
407 * cpu and is used to allow one cpu to wakeup another for the purposes of
408 * scheduling userland threads from setrunqueue(). UP systems do not
409 * need the helper since there is only one cpu. We can't use the idle
410 * thread for this because we need to hold the MP lock. Additionally,
411 * doing things this way allows us to HLT idle cpus on MP systems.
417 sched_thread(void *dummy)
419 int cpuid = mycpu->gd_cpuid; /* doesn't change */
420 u_int32_t cpumask = 1 << cpuid; /* doesn't change */
422 #ifdef ONLY_ONE_USER_CPU
423 KKASSERT(cpuid == 0);
426 get_mplock(); /* hold the MP lock */
430 rdyprocmask |= cpumask;
431 lwkt_deschedule_self(); /* interlock */
433 if ((curprocmask & cpumask) == 0 && (np = chooseproc()) != NULL) {
434 curprocmask |= cpumask;
435 np->p_flag |= P_CURPROC;
436 lwkt_acquire(np->p_thread);
437 lwkt_schedule(np->p_thread);
445 sched_thread_init(void)
447 int cpuid = mycpu->gd_cpuid;
449 lwkt_create(sched_thread, NULL, NULL, &mycpu->gd_schedthread,
450 TDF_STOPREQ, "usched %d", cpuid);
451 curprocmask &= ~(1 << cpuid); /* schedule user proc on cpu */
452 #ifdef ONLY_ONE_USER_CPU
454 curprocmask |= 1 << cpuid; /* DISABLE USER PROCS */
456 rdyprocmask |= 1 << cpuid;