2 * Copyright (c) 1999 Peter Wemm <peter@FreeBSD.org>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * $FreeBSD: src/sys/kern/kern_switch.c,v 1.3.2.1 2000/05/16 06:58:12 dillon Exp $
27 * $DragonFly: src/sys/kern/Attic/kern_switch.c,v 1.6 2003/07/10 18:23:24 dillon Exp $
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/kernel.h>
34 #include <sys/queue.h>
36 #include <sys/rtprio.h>
37 #include <sys/thread2.h>
39 #include <machine/ipl.h>
40 #include <machine/cpu.h>
43 * debugging only YYY Remove me! define to schedule user processes only
44 * on the BSP. Interrupts can still be taken on the APs.
46 #undef ONLY_ONE_USER_CPU
49 * We have NQS (32) run queues per scheduling class. For the normal
50 * class, there are 128 priorities scaled onto these 32 queues. New
51 * processes are added to the last entry in each queue, and processes
52 * are selected for running by taking them from the head and maintaining
53 * a simple FIFO arrangement. Realtime and Idle priority processes have
54 * and explicit 0-31 priority which maps directly onto their class queue
55 * index. When a queue has something in it, the corresponding bit is
56 * set in the queuebits variable, allowing a single read to determine
57 * the state of all 32 queues and then a ffs() to find the first busy
60 static struct rq queues[NQS];
61 static struct rq rtqueues[NQS];
62 static struct rq idqueues[NQS];
63 static u_int32_t queuebits;
64 static u_int32_t rtqueuebits;
65 static u_int32_t idqueuebits;
66 static u_int32_t curprocmask = -1;
67 static u_int32_t rdyprocmask = 0;
71 * Initialize the run queues at boot time.
78 for (i = 0; i < NQS; i++) {
79 TAILQ_INIT(&queues[i]);
80 TAILQ_INIT(&rtqueues[i]);
81 TAILQ_INIT(&idqueues[i]);
89 SYSINIT(runqueue, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, rqinit, NULL)
92 test_resched(struct proc *curp, struct proc *newp)
94 if (newp->p_rtprio.type < curp->p_rtprio.type)
96 if (newp->p_rtprio.type == curp->p_rtprio.type) {
97 if (newp->p_rtprio.type == RTP_PRIO_NORMAL) {
98 if (newp->p_priority / PPQ <= curp->p_priority / PPQ)
100 } else if (newp->p_rtprio.prio < curp->p_rtprio.prio) {
108 * chooseproc() is called when a cpu needs a user process to LWKT schedule.
109 * chooseproc() will select a user process and return it.
121 pri = bsfl(rtqueuebits);
123 which = &rtqueuebits;
124 } else if (queuebits) {
125 pri = bsfl(queuebits);
128 } else if (idqueuebits) {
129 pri = bsfl(idqueuebits);
131 which = &idqueuebits;
136 KASSERT(p, ("chooseproc: no proc on busy queue"));
137 TAILQ_REMOVE(q, p, p_procq);
139 *which &= ~(1 << pri);
140 KASSERT((p->p_flag & P_ONRUNQ) != 0, ("not on runq6!"));
141 p->p_flag &= ~P_ONRUNQ;
147 * setrunqueue() 'wakes up' a 'user' process, which can mean several things.
149 * If P_CP_RELEASED is set the user process is under the control of the
150 * LWKT subsystem and we simply wake the thread up. This is ALWAYS the
151 * case when setrunqueue() is called from wakeup() and, in fact wakeup()
152 * asserts that P_CP_RELEASED is set.
154 * Note that acquire_curproc() already optimizes making the current process
155 * P_CURPROC, so setrunqueue() does not need to.
157 * If P_CP_RELEASED is not set we place the process on the run queue and we
158 * signal other cpus in the system that may need to be woken up to service
159 * the new 'user' process.
161 * The associated thread must NOT be scheduled.
162 * The process must be runnable.
163 * This must be called at splhigh().
166 setrunqueue(struct proc *p)
174 KASSERT(p->p_stat == SRUN, ("setrunqueue: proc not SRUN"));
175 KASSERT((p->p_flag & (P_ONRUNQ|P_CURPROC)) == 0,
176 ("process %d already on runq! flag %08x", p->p_pid, p->p_flag));
177 KKASSERT((p->p_thread->td_flags & TDF_RUNQ) == 0);
180 * If we have been released from the userland scheduler we
181 * directly schedule its thread.
183 if (p->p_flag & P_CP_RELEASED) {
184 lwkt_schedule(p->p_thread);
190 * Otherwise place this process on the userland scheduler's run
194 p->p_flag |= P_ONRUNQ;
195 if (p->p_rtprio.type == RTP_PRIO_NORMAL) {
196 pri = p->p_priority >> 2;
198 queuebits |= 1 << pri;
199 } else if (p->p_rtprio.type == RTP_PRIO_REALTIME ||
200 p->p_rtprio.type == RTP_PRIO_FIFO) {
201 pri = (u_int8_t)p->p_rtprio.prio;
203 rtqueuebits |= 1 << pri;
204 } else if (p->p_rtprio.type == RTP_PRIO_IDLE) {
205 pri = (u_int8_t)p->p_rtprio.prio;
207 idqueuebits |= 1 << pri;
209 panic("setrunqueue: invalid rtprio type");
212 p->p_rqindex = pri; /* remember the queue index */
213 TAILQ_INSERT_TAIL(q, p, p_procq);
216 * Wakeup other cpus to schedule the newly available thread.
217 * XXX doesn't really have to be in a critical section.
218 * We own giant after all.
220 if ((mask = ~curprocmask & rdyprocmask & mycpu->gd_other_cpus) != 0) {
221 int count = runqcount;
222 while (mask && count) {
224 KKASSERT((curprocmask & (1 << cpuid)) == 0);
225 rdyprocmask &= ~(1 << cpuid);
226 lwkt_schedule(&globaldata_find(cpuid)->gd_schedthread);
228 mask &= ~(1 << cpuid);
235 * remrunqueue() removes a given process from the run queue that it is on,
236 * clearing the queue busy bit if it becomes empty. This function is called
237 * when a userland process is selected for LWKT scheduling. Note that
238 * LWKT scheduling is an abstraction of 'curproc'.. there could very well be
239 * several userland processes whos threads are scheduled or otherwise in
240 * a special state, and such processes are NOT on the userland scheduler's
243 * This must be called at splhigh().
246 remrunqueue(struct proc *p)
253 KASSERT((p->p_flag & P_ONRUNQ) != 0, ("not on runq4!"));
254 p->p_flag &= ~P_ONRUNQ;
256 KKASSERT(runqcount >= 0);
258 if (p->p_rtprio.type == RTP_PRIO_NORMAL) {
261 } else if (p->p_rtprio.type == RTP_PRIO_REALTIME ||
262 p->p_rtprio.type == RTP_PRIO_FIFO) {
264 which = &rtqueuebits;
265 } else if (p->p_rtprio.type == RTP_PRIO_IDLE) {
267 which = &idqueuebits;
269 panic("remrunqueue: invalid rtprio type");
271 TAILQ_REMOVE(q, p, p_procq);
272 if (TAILQ_EMPTY(q)) {
273 KASSERT((*which & (1 << pri)) != 0,
274 ("remrunqueue: remove from empty queue"));
275 *which &= ~(1 << pri);
281 * Release the P_CURPROC designation on the CURRENT process only. This
282 * will allow another userland process to be scheduled. If we do not
283 * have or cannot get the MP lock we just wakeup the scheduler thread for
286 * WARNING! The MP lock may be in an unsynchronized state due to the
287 * way get_mplock() works and the fact that this function may be called
288 * from a passive release during a lwkt_switch(). try_mplock() will deal
289 * with this for us but you should be aware that td_mpcount may not be
293 release_curproc(struct proc *p)
298 #ifdef ONLY_ONE_USER_CPU
299 KKASSERT(mycpu->gd_cpuid == 0 && p->p_thread->td_cpu == 0);
303 cpuid = p->p_thread->td_cpu;
304 p->p_flag |= P_CP_RELEASED;
305 if (p->p_flag & P_CURPROC) {
306 p->p_flag &= ~P_CURPROC;
308 KKASSERT(curprocmask & (1 << cpuid));
309 if ((np = chooseproc()) != NULL) {
310 np->p_flag |= P_CURPROC;
311 lwkt_acquire(np->p_thread);
312 lwkt_schedule(np->p_thread);
314 curprocmask &= ~(1 << cpuid);
318 curprocmask &= ~(1 << cpuid);
319 if (rdyprocmask & (1 << cpuid))
320 lwkt_schedule(&globaldata_find(cpuid)->gd_schedthread);
327 * Acquire the P_CURPROC designation on the CURRENT process only. This
328 * function is called prior to returning to userland. If the system
329 * call or trap did not block and if no reschedule was requested it is
330 * highly likely that the P_CURPROC flag is still set in the proc, and
331 * we do almost nothing here.
334 acquire_curproc(struct proc *p)
340 * Short cut, we've already acquired the designation or we never
341 * lost it in the first place.
343 if ((p->p_flag & P_CURPROC) != 0)
347 * Long cut. This pulls in a bit of the userland scheduler as
348 * an optimization. If our cpu has not scheduled a userland
349 * process we gladly fill the slot, otherwise we choose the best
350 * candidate from the run queue and compare it against ourselves,
351 * scheduling either us or him depending.
353 * If our cpu's slot isn't free we put ourselves on the userland
354 * run queue and switch away. We should have P_CURPROC when we
355 * come back. Note that a cpu change can occur when we come back.
357 * YYY don't need critical section, we hold giant and no interrupt
358 * will mess w/ this proc? Or will it? What about curprocmask?
360 #ifdef ONLY_ONE_USER_CPU
361 KKASSERT(mycpu->gd_cpuid == 0 && p->p_thread->td_cpu == 0);
364 p->p_flag &= ~P_CP_RELEASED;
365 while ((p->p_flag & P_CURPROC) == 0) {
366 cpuid = p->p_thread->td_cpu; /* load/reload cpuid */
367 if ((curprocmask & (1 << cpuid)) == 0) {
368 curprocmask |= 1 << cpuid;
369 if ((np = chooseproc()) != NULL) {
370 KKASSERT((np->p_flag & P_CP_RELEASED) == 0);
371 if (test_resched(p, np)) {
372 np->p_flag |= P_CURPROC;
373 lwkt_acquire(np->p_thread);
374 lwkt_schedule(np->p_thread);
376 p->p_flag |= P_CURPROC;
380 p->p_flag |= P_CURPROC;
383 if ((p->p_flag & P_CURPROC) == 0) {
384 lwkt_deschedule_self();
387 KKASSERT((p->p_flag & (P_ONRUNQ|P_CURPROC|P_CP_RELEASED)) == P_CURPROC);
394 * Yield / synchronous reschedule. This is a bit tricky because the trap
395 * code might have set a lazy release on the switch function. The first
396 * thing we do is call lwkt_switch() to resolve the lazy release (if any).
397 * Then, if we are a process, we want to allow another process to run.
399 * The only way to do that is to acquire and then release P_CURPROC. We
400 * have to release it because the kernel expects it to be released as a
401 * sanity check when it goes to sleep.
403 * XXX we need a way to ensure that we wake up eventually from a yield,
404 * even if we are an idprio process.
409 struct thread *td = curthread;
410 struct proc *p = td->td_proc;
421 * For SMP systems a user scheduler helper thread is created for each
422 * cpu and is used to allow one cpu to wakeup another for the purposes of
423 * scheduling userland threads from setrunqueue(). UP systems do not
424 * need the helper since there is only one cpu. We can't use the idle
425 * thread for this because we need to hold the MP lock. Additionally,
426 * doing things this way allows us to HLT idle cpus on MP systems.
432 sched_thread(void *dummy)
434 int cpuid = mycpu->gd_cpuid; /* doesn't change */
435 u_int32_t cpumask = 1 << cpuid; /* doesn't change */
437 #ifdef ONLY_ONE_USER_CPU
438 KKASSERT(cpuid == 0);
441 get_mplock(); /* hold the MP lock */
445 rdyprocmask |= cpumask;
446 lwkt_deschedule_self(); /* interlock */
448 if ((curprocmask & cpumask) == 0 && (np = chooseproc()) != NULL) {
449 curprocmask |= cpumask;
450 np->p_flag |= P_CURPROC;
451 lwkt_acquire(np->p_thread);
452 lwkt_schedule(np->p_thread);
460 sched_thread_init(void)
462 int cpuid = mycpu->gd_cpuid;
464 lwkt_create(sched_thread, NULL, NULL, &mycpu->gd_schedthread,
465 TDF_STOPREQ, "usched %d", cpuid);
466 curprocmask &= ~(1 << cpuid); /* schedule user proc on cpu */
467 #ifdef ONLY_ONE_USER_CPU
469 curprocmask |= 1 << cpuid; /* DISABLE USER PROCS */
471 rdyprocmask |= 1 << cpuid;