kernel - Fix sysclock_t comparison in usched code
[dragonfly.git] / sys / kern / usched_bsd4.c
CommitLineData
38b25931 1/*
e28d8b15
MD
2 * Copyright (c) 2012 The DragonFly Project. All rights reserved.
3 * Copyright (c) 1999 Peter Wemm <peter@FreeBSD.org>. All rights reserved.
4 *
5 * This code is derived from software contributed to The DragonFly Project
6 * by Matthew Dillon <dillon@backplane.com>,
7 * by Mihai Carabas <mihai.carabas@gmail.com>
8 * and many others.
38b25931
MD
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
38b25931
MD
30 */
31
32#include <sys/param.h>
33#include <sys/systm.h>
34#include <sys/kernel.h>
35#include <sys/lock.h>
36#include <sys/queue.h>
37#include <sys/proc.h>
38#include <sys/rtprio.h>
38b25931
MD
39#include <sys/uio.h>
40#include <sys/sysctl.h>
41#include <sys/resourcevar.h>
52eedfb5 42#include <sys/spinlock.h>
d6d39bc7 43#include <sys/cpu_topology.h>
52eedfb5
MD
44#include <sys/thread2.h>
45#include <sys/spinlock2.h>
684a93c4 46#include <sys/mplock2.h>
52eedfb5 47
d6d39bc7
MC
48#include <sys/ktr.h>
49
50#include <machine/cpu.h>
51#include <machine/smp.h>
52
38b25931
MD
53/*
54 * Priorities. Note that with 32 run queues per scheduler each queue
55 * represents four priority levels.
56 */
57
58#define MAXPRI 128
59#define PRIMASK (MAXPRI - 1)
60#define PRIBASE_REALTIME 0
61#define PRIBASE_NORMAL MAXPRI
62#define PRIBASE_IDLE (MAXPRI * 2)
63#define PRIBASE_THREAD (MAXPRI * 3)
64#define PRIBASE_NULL (MAXPRI * 4)
65
66#define NQS 32 /* 32 run queues. */
67#define PPQ (MAXPRI / NQS) /* priorities per queue */
52eedfb5 68#define PPQMASK (PPQ - 1)
38b25931
MD
69
70/*
71 * NICEPPQ - number of nice units per priority queue
38b25931
MD
72 *
73 * ESTCPUPPQ - number of estcpu units per priority queue
74 * ESTCPUMAX - number of estcpu units
38b25931
MD
75 */
76#define NICEPPQ 2
38b25931
MD
77#define ESTCPUPPQ 512
78#define ESTCPUMAX (ESTCPUPPQ * NQS)
52cac9fb 79#define BATCHMAX (ESTCPUFREQ * 30)
38b25931
MD
80#define PRIO_RANGE (PRIO_MAX - PRIO_MIN + 1)
81
82#define ESTCPULIM(v) min((v), ESTCPUMAX)
83
553ea3c8 84TAILQ_HEAD(rq, lwp);
38b25931 85
553ea3c8
SS
86#define lwp_priority lwp_usdata.bsd4.priority
87#define lwp_rqindex lwp_usdata.bsd4.rqindex
553ea3c8 88#define lwp_estcpu lwp_usdata.bsd4.estcpu
52cac9fb 89#define lwp_batch lwp_usdata.bsd4.batch
52eedfb5 90#define lwp_rqtype lwp_usdata.bsd4.rqtype
38b25931 91
553ea3c8
SS
92static void bsd4_acquire_curproc(struct lwp *lp);
93static void bsd4_release_curproc(struct lwp *lp);
38b25931 94static void bsd4_select_curproc(globaldata_t gd);
553ea3c8 95static void bsd4_setrunqueue(struct lwp *lp);
553ea3c8 96static void bsd4_schedulerclock(struct lwp *lp, sysclock_t period,
38b25931 97 sysclock_t cpstamp);
52eedfb5 98static void bsd4_recalculate_estcpu(struct lwp *lp);
553ea3c8
SS
99static void bsd4_resetpriority(struct lwp *lp);
100static void bsd4_forking(struct lwp *plp, struct lwp *lp);
52cac9fb 101static void bsd4_exiting(struct lwp *lp, struct proc *);
e28d8b15 102static void bsd4_uload_update(struct lwp *lp);
c3149361 103static void bsd4_yield(struct lwp *lp);
38b25931 104
52eedfb5 105#ifdef SMP
e28d8b15
MD
106static void bsd4_need_user_resched_remote(void *dummy);
107static int bsd4_batchy_looser_pri_test(struct lwp* lp);
108static struct lwp *bsd4_chooseproc_locked_cache_coherent(struct lwp *chklp);
696ae4d4 109static void bsd4_kick_helper(struct lwp *lp);
52eedfb5 110#endif
e28d8b15 111static struct lwp *bsd4_chooseproc_locked(struct lwp *chklp);
52eedfb5
MD
112static void bsd4_remrunqueue_locked(struct lwp *lp);
113static void bsd4_setrunqueue_locked(struct lwp *lp);
38b25931
MD
114
115struct usched usched_bsd4 = {
116 { NULL },
117 "bsd4", "Original DragonFly Scheduler",
cb7f4ab1
MD
118 NULL, /* default registration */
119 NULL, /* default deregistration */
38b25931
MD
120 bsd4_acquire_curproc,
121 bsd4_release_curproc,
38b25931 122 bsd4_setrunqueue,
38b25931
MD
123 bsd4_schedulerclock,
124 bsd4_recalculate_estcpu,
125 bsd4_resetpriority,
126 bsd4_forking,
cb7f4ab1 127 bsd4_exiting,
e28d8b15 128 bsd4_uload_update,
c3149361
MD
129 NULL, /* setcpumask not supported */
130 bsd4_yield
38b25931
MD
131};
132
52eedfb5 133struct usched_bsd4_pcpu {
d6d39bc7
MC
134 struct thread helper_thread;
135 short rrcount;
136 short upri;
137 struct lwp *uschedcp;
138 struct lwp *old_uschedcp;
139#ifdef SMP
140 cpu_node_t *cpunode;
141#endif
52eedfb5
MD
142};
143
144typedef struct usched_bsd4_pcpu *bsd4_pcpu_t;
145
38b25931
MD
146/*
147 * We have NQS (32) run queues per scheduling class. For the normal
148 * class, there are 128 priorities scaled onto these 32 queues. New
149 * processes are added to the last entry in each queue, and processes
150 * are selected for running by taking them from the head and maintaining
151 * a simple FIFO arrangement. Realtime and Idle priority processes have
152 * and explicit 0-31 priority which maps directly onto their class queue
153 * index. When a queue has something in it, the corresponding bit is
154 * set in the queuebits variable, allowing a single read to determine
155 * the state of all 32 queues and then a ffs() to find the first busy
156 * queue.
157 */
52eedfb5
MD
158static struct rq bsd4_queues[NQS];
159static struct rq bsd4_rtqueues[NQS];
160static struct rq bsd4_idqueues[NQS];
161static u_int32_t bsd4_queuebits;
162static u_int32_t bsd4_rtqueuebits;
163static u_int32_t bsd4_idqueuebits;
164static cpumask_t bsd4_curprocmask = -1; /* currently running a user process */
165static cpumask_t bsd4_rdyprocmask; /* ready to accept a user process */
166static int bsd4_runqcount;
38b25931 167#ifdef SMP
52eedfb5 168static volatile int bsd4_scancpu;
38b25931 169#endif
52eedfb5
MD
170static struct spinlock bsd4_spin;
171static struct usched_bsd4_pcpu bsd4_pcpu[MAXCPU];
d6d39bc7
MC
172static struct sysctl_ctx_list usched_bsd4_sysctl_ctx;
173static struct sysctl_oid *usched_bsd4_sysctl_tree;
174
175/* Debug info exposed through debug.* sysctl */
38b25931 176
e28d8b15
MD
177SYSCTL_INT(_debug, OID_AUTO, bsd4_runqcount, CTLFLAG_RD,
178 &bsd4_runqcount, 0,
179 "Number of run queues");
d6d39bc7
MC
180
181static int usched_bsd4_debug = -1;
e28d8b15
MD
182SYSCTL_INT(_debug, OID_AUTO, bsd4_scdebug, CTLFLAG_RW,
183 &usched_bsd4_debug, 0,
184 "Print debug information for this pid");
38b25931 185
e28d8b15
MD
186static int usched_bsd4_pid_debug = -1;
187SYSCTL_INT(_debug, OID_AUTO, bsd4_pid_debug, CTLFLAG_RW,
188 &usched_bsd4_pid_debug, 0,
189 "Print KTR debug information for this pid");
d6d39bc7
MC
190
191/* Tunning usched_bsd4 - configurable through kern.usched_bsd4.* */
192#ifdef SMP
193static int usched_bsd4_smt = 0;
194static int usched_bsd4_cache_coherent = 0;
195static int usched_bsd4_upri_affinity = 16; /* 32 queues - half-way */
196static int usched_bsd4_queue_checks = 5;
197static int usched_bsd4_stick_to_level = 0;
696ae4d4 198static long usched_bsd4_kicks;
d6d39bc7 199#endif
38b25931 200static int usched_bsd4_rrinterval = (ESTCPUFREQ + 9) / 10;
52cac9fb 201static int usched_bsd4_decay = 8;
52cac9fb 202static int usched_bsd4_batch_time = 10;
d6d39bc7
MC
203
204/* KTR debug printings */
205
c016171e 206KTR_INFO_MASTER_EXTERN(usched);
d6d39bc7
MC
207
208#if !defined(KTR_USCHED_BSD4)
209#define KTR_USCHED_BSD4 KTR_ALL
210#endif
211
212KTR_INFO(KTR_USCHED_BSD4, usched, bsd4_acquire_curproc_urw, 0,
213 "USCHED_BSD4(bsd4_acquire_curproc in user_reseched_wanted "
214 "after release: pid %d, cpuid %d, curr_cpuid %d)",
215 pid_t pid, int cpuid, int curr);
216KTR_INFO(KTR_USCHED_BSD4, usched, bsd4_acquire_curproc_before_loop, 0,
217 "USCHED_BSD4(bsd4_acquire_curproc before loop: pid %d, cpuid %d, "
218 "curr_cpuid %d)",
219 pid_t pid, int cpuid, int curr);
220KTR_INFO(KTR_USCHED_BSD4, usched, bsd4_acquire_curproc_not, 0,
221 "USCHED_BSD4(bsd4_acquire_curproc couldn't acquire after "
222 "bsd4_setrunqueue: pid %d, cpuid %d, curr_lp pid %d, curr_cpuid %d)",
223 pid_t pid, int cpuid, pid_t curr_pid, int curr_cpuid);
224KTR_INFO(KTR_USCHED_BSD4, usched, bsd4_acquire_curproc_switch, 0,
225 "USCHED_BSD4(bsd4_acquire_curproc after lwkt_switch: pid %d, "
226 "cpuid %d, curr_cpuid %d)",
227 pid_t pid, int cpuid, int curr);
228
229KTR_INFO(KTR_USCHED_BSD4, usched, bsd4_release_curproc, 0,
230 "USCHED_BSD4(bsd4_release_curproc before select: pid %d, "
231 "cpuid %d, curr_cpuid %d)",
232 pid_t pid, int cpuid, int curr);
233
234KTR_INFO(KTR_USCHED_BSD4, usched, bsd4_select_curproc, 0,
235 "USCHED_BSD4(bsd4_release_curproc before select: pid %d, "
236 "cpuid %d, old_pid %d, old_cpuid %d, curr_cpuid %d)",
237 pid_t pid, int cpuid, pid_t old_pid, int old_cpuid, int curr);
238
239#ifdef SMP
240KTR_INFO(KTR_USCHED_BSD4, usched, batchy_test_false, 0,
241 "USCHED_BSD4(batchy_looser_pri_test false: pid %d, "
242 "cpuid %d, verify_mask %lu)",
243 pid_t pid, int cpuid, cpumask_t mask);
244KTR_INFO(KTR_USCHED_BSD4, usched, batchy_test_true, 0,
245 "USCHED_BSD4(batchy_looser_pri_test true: pid %d, "
246 "cpuid %d, verify_mask %lu)",
247 pid_t pid, int cpuid, cpumask_t mask);
248
249KTR_INFO(KTR_USCHED_BSD4, usched, bsd4_setrunqueue_fc_smt, 0,
250 "USCHED_BSD4(bsd4_setrunqueue free cpus smt: pid %d, cpuid %d, "
251 "mask %lu, curr_cpuid %d)",
252 pid_t pid, int cpuid, cpumask_t mask, int curr);
253KTR_INFO(KTR_USCHED_BSD4, usched, bsd4_setrunqueue_fc_non_smt, 0,
254 "USCHED_BSD4(bsd4_setrunqueue free cpus check non_smt: pid %d, "
255 "cpuid %d, mask %lu, curr_cpuid %d)",
256 pid_t pid, int cpuid, cpumask_t mask, int curr);
257KTR_INFO(KTR_USCHED_BSD4, usched, bsd4_setrunqueue_rc, 0,
258 "USCHED_BSD4(bsd4_setrunqueue running cpus check: pid %d, "
259 "cpuid %d, mask %lu, curr_cpuid %d)",
260 pid_t pid, int cpuid, cpumask_t mask, int curr);
261KTR_INFO(KTR_USCHED_BSD4, usched, bsd4_setrunqueue_found, 0,
262 "USCHED_BSD4(bsd4_setrunqueue found cpu: pid %d, cpuid %d, "
263 "mask %lu, found_cpuid %d, curr_cpuid %d)",
264 pid_t pid, int cpuid, cpumask_t mask, int found_cpuid, int curr);
265KTR_INFO(KTR_USCHED_BSD4, usched, bsd4_setrunqueue_not_found, 0,
266 "USCHED_BSD4(bsd4_setrunqueue not found cpu: pid %d, cpuid %d, "
267 "try_cpuid %d, curr_cpuid %d)",
268 pid_t pid, int cpuid, int try_cpuid, int curr);
269KTR_INFO(KTR_USCHED_BSD4, usched, bsd4_setrunqueue_found_best_cpuid, 0,
270 "USCHED_BSD4(bsd4_setrunqueue found cpu: pid %d, cpuid %d, "
271 "mask %lu, found_cpuid %d, curr_cpuid %d)",
272 pid_t pid, int cpuid, cpumask_t mask, int found_cpuid, int curr);
273#endif
274
275KTR_INFO(KTR_USCHED_BSD4, usched, chooseproc, 0,
276 "USCHED_BSD4(chooseproc: pid %d, old_cpuid %d, curr_cpuid %d)",
277 pid_t pid, int old_cpuid, int curr);
278#ifdef SMP
279KTR_INFO(KTR_USCHED_BSD4, usched, chooseproc_cc, 0,
280 "USCHED_BSD4(chooseproc_cc: pid %d, old_cpuid %d, curr_cpuid %d)",
281 pid_t pid, int old_cpuid, int curr);
282KTR_INFO(KTR_USCHED_BSD4, usched, chooseproc_cc_not_good, 0,
283 "USCHED_BSD4(chooseproc_cc not good: pid %d, old_cpumask %lu, "
284 "sibling_mask %lu, curr_cpumask %lu)",
285 pid_t pid, cpumask_t old_cpumask, cpumask_t sibling_mask, cpumask_t curr);
286KTR_INFO(KTR_USCHED_BSD4, usched, chooseproc_cc_elected, 0,
287 "USCHED_BSD4(chooseproc_cc elected: pid %d, old_cpumask %lu, "
288 "sibling_mask %lu, curr_cpumask: %lu)",
289 pid_t pid, cpumask_t old_cpumask, cpumask_t sibling_mask, cpumask_t curr);
290
291KTR_INFO(KTR_USCHED_BSD4, usched, sched_thread_no_process, 0,
292 "USCHED_BSD4(sched_thread %d no process scheduled: pid %d, old_cpuid %d)",
293 int id, pid_t pid, int cpuid);
294KTR_INFO(KTR_USCHED_BSD4, usched, sched_thread_process, 0,
295 "USCHED_BSD4(sched_thread %d process scheduled: pid %d, old_cpuid %d)",
296 int id, pid_t pid, int cpuid);
297KTR_INFO(KTR_USCHED_BSD4, usched, sched_thread_no_process_found, 0,
298 "USCHED_BSD4(sched_thread %d no process found; tmpmask %lu)",
299 int id, cpumask_t tmpmask);
300#endif
38b25931
MD
301
302/*
303 * Initialize the run queues at boot time.
304 */
305static void
e28d8b15 306bsd4_rqinit(void *dummy)
38b25931
MD
307{
308 int i;
309
52eedfb5 310 spin_init(&bsd4_spin);
38b25931 311 for (i = 0; i < NQS; i++) {
52eedfb5
MD
312 TAILQ_INIT(&bsd4_queues[i]);
313 TAILQ_INIT(&bsd4_rtqueues[i]);
314 TAILQ_INIT(&bsd4_idqueues[i]);
38b25931 315 }
da23a592 316 atomic_clear_cpumask(&bsd4_curprocmask, 1);
38b25931 317}
e28d8b15 318SYSINIT(runqueue, SI_BOOT2_USCHED, SI_ORDER_FIRST, bsd4_rqinit, NULL)
38b25931
MD
319
320/*
52eedfb5 321 * BSD4_ACQUIRE_CURPROC
38b25931 322 *
52eedfb5
MD
323 * This function is called when the kernel intends to return to userland.
324 * It is responsible for making the thread the current designated userland
325 * thread for this cpu, blocking if necessary.
326 *
e3e6be1f
MD
327 * The kernel will not depress our LWKT priority until after we return,
328 * in case we have to shove over to another cpu.
329 *
330 * We must determine our thread's disposition before we switch away. This
331 * is very sensitive code.
52eedfb5
MD
332 *
333 * WARNING! THIS FUNCTION IS ALLOWED TO CAUSE THE CURRENT THREAD TO MIGRATE
334 * TO ANOTHER CPU! Because most of the kernel assumes that no migration will
335 * occur, this function is called only under very controlled circumstances.
336 *
52eedfb5 337 * MPSAFE
38b25931 338 */
52eedfb5
MD
339static void
340bsd4_acquire_curproc(struct lwp *lp)
38b25931 341{
b9eb1c19
MD
342 globaldata_t gd;
343 bsd4_pcpu_t dd;
4643740a 344 thread_t td;
85946b6c 345#if 0
b9eb1c19 346 struct lwp *olp;
85946b6c 347#endif
38b25931 348
4643740a
MD
349 /*
350 * Make sure we aren't sitting on a tsleep queue.
351 */
352 td = lp->lwp_thread;
353 crit_enter_quick(td);
354 if (td->td_flags & TDF_TSLEEPQ)
355 tsleep_remove(td);
b9eb1c19 356 bsd4_recalculate_estcpu(lp);
38b25931 357
38b25931 358 /*
b9eb1c19
MD
359 * If a reschedule was requested give another thread the
360 * driver's seat.
38b25931 361 */
b9eb1c19
MD
362 if (user_resched_wanted()) {
363 clear_user_resched();
364 bsd4_release_curproc(lp);
d6d39bc7
MC
365
366 KTR_COND_LOG(usched_bsd4_acquire_curproc_urw,
367 lp->lwp_proc->p_pid == usched_bsd4_pid_debug,
368 lp->lwp_proc->p_pid,
369 lp->lwp_thread->td_gd->gd_cpuid,
370 mycpu->gd_cpuid);
38b25931 371 }
38b25931 372
52eedfb5 373 /*
b9eb1c19 374 * Loop until we are the current user thread
52eedfb5 375 */
85946b6c
MD
376 gd = mycpu;
377 dd = &bsd4_pcpu[gd->gd_cpuid];
378
d6d39bc7
MC
379 KTR_COND_LOG(usched_bsd4_acquire_curproc_before_loop,
380 lp->lwp_proc->p_pid == usched_bsd4_pid_debug,
381 lp->lwp_proc->p_pid,
382 lp->lwp_thread->td_gd->gd_cpuid,
383 gd->gd_cpuid);
384
52eedfb5 385 do {
b9eb1c19 386 /*
85946b6c 387 * Process any pending events and higher priority threads.
b9eb1c19 388 */
85946b6c 389 lwkt_yield();
b9eb1c19
MD
390
391 /*
392 * Become the currently scheduled user thread for this cpu
393 * if we can do so trivially.
394 *
395 * We can steal another thread's current thread designation
396 * on this cpu since if we are running that other thread
397 * must not be, so we can safely deschedule it.
398 */
399 if (dd->uschedcp == lp) {
eb501f47
MD
400 /*
401 * We are already the current lwp (hot path).
402 */
b9eb1c19
MD
403 dd->upri = lp->lwp_priority;
404 } else if (dd->uschedcp == NULL) {
eb501f47
MD
405 /*
406 * We can trivially become the current lwp.
407 */
da23a592 408 atomic_set_cpumask(&bsd4_curprocmask, gd->gd_cpumask);
b9eb1c19
MD
409 dd->uschedcp = lp;
410 dd->upri = lp->lwp_priority;
411 } else if (dd->upri > lp->lwp_priority) {
eb501f47 412 /*
85946b6c
MD
413 * We can steal the current cpu's lwp designation
414 * away simply by replacing it. The other thread
415 * will stall when it tries to return to userland.
eb501f47 416 */
b9eb1c19
MD
417 dd->uschedcp = lp;
418 dd->upri = lp->lwp_priority;
85946b6c 419 /*
b9eb1c19
MD
420 lwkt_deschedule(olp->lwp_thread);
421 bsd4_setrunqueue(olp);
85946b6c 422 */
b9eb1c19 423 } else {
eb501f47
MD
424 /*
425 * We cannot become the current lwp, place the lp
426 * on the bsd4 run-queue and deschedule ourselves.
85946b6c
MD
427 *
428 * When we are reactivated we will have another
429 * chance.
eb501f47 430 */
b9eb1c19 431 lwkt_deschedule(lp->lwp_thread);
d6d39bc7 432
b9eb1c19 433 bsd4_setrunqueue(lp);
d6d39bc7
MC
434
435 KTR_COND_LOG(usched_bsd4_acquire_curproc_not,
436 lp->lwp_proc->p_pid == usched_bsd4_pid_debug,
437 lp->lwp_proc->p_pid,
438 lp->lwp_thread->td_gd->gd_cpuid,
439 dd->uschedcp->lwp_proc->p_pid,
440 gd->gd_cpuid);
441
442
b9eb1c19 443 lwkt_switch();
d6d39bc7 444
85946b6c
MD
445 /*
446 * Reload after a switch or setrunqueue/switch possibly
447 * moved us to another cpu.
448 */
449 gd = mycpu;
450 dd = &bsd4_pcpu[gd->gd_cpuid];
d6d39bc7
MC
451
452 KTR_COND_LOG(usched_bsd4_acquire_curproc_switch,
453 lp->lwp_proc->p_pid == usched_bsd4_pid_debug,
454 lp->lwp_proc->p_pid,
455 lp->lwp_thread->td_gd->gd_cpuid,
456 gd->gd_cpuid);
b9eb1c19 457 }
52eedfb5 458 } while (dd->uschedcp != lp);
b9eb1c19 459
4643740a
MD
460 crit_exit_quick(td);
461 KKASSERT((lp->lwp_mpflags & LWP_MP_ONRUNQ) == 0);
52eedfb5
MD
462}
463
464/*
465 * BSD4_RELEASE_CURPROC
466 *
467 * This routine detaches the current thread from the userland scheduler,
b9eb1c19
MD
468 * usually because the thread needs to run or block in the kernel (at
469 * kernel priority) for a while.
52eedfb5
MD
470 *
471 * This routine is also responsible for selecting a new thread to
472 * make the current thread.
473 *
474 * NOTE: This implementation differs from the dummy example in that
475 * bsd4_select_curproc() is able to select the current process, whereas
476 * dummy_select_curproc() is not able to select the current process.
477 * This means we have to NULL out uschedcp.
478 *
479 * Additionally, note that we may already be on a run queue if releasing
480 * via the lwkt_switch() in bsd4_setrunqueue().
481 *
52eedfb5
MD
482 * MPSAFE
483 */
d6d39bc7 484
52eedfb5
MD
485static void
486bsd4_release_curproc(struct lwp *lp)
487{
488 globaldata_t gd = mycpu;
489 bsd4_pcpu_t dd = &bsd4_pcpu[gd->gd_cpuid];
490
491 if (dd->uschedcp == lp) {
b9eb1c19 492 crit_enter();
4643740a 493 KKASSERT((lp->lwp_mpflags & LWP_MP_ONRUNQ) == 0);
d6d39bc7
MC
494
495 KTR_COND_LOG(usched_bsd4_release_curproc,
496 lp->lwp_proc->p_pid == usched_bsd4_pid_debug,
497 lp->lwp_proc->p_pid,
498 lp->lwp_thread->td_gd->gd_cpuid,
499 gd->gd_cpuid);
500
52eedfb5 501 dd->uschedcp = NULL; /* don't let lp be selected */
b9eb1c19 502 dd->upri = PRIBASE_NULL;
da23a592 503 atomic_clear_cpumask(&bsd4_curprocmask, gd->gd_cpumask);
d6d39bc7 504 dd->old_uschedcp = lp; /* used only for KTR debug prints */
52eedfb5 505 bsd4_select_curproc(gd);
b9eb1c19 506 crit_exit();
52eedfb5 507 }
38b25931
MD
508}
509
38b25931 510/*
52eedfb5
MD
511 * BSD4_SELECT_CURPROC
512 *
b9eb1c19
MD
513 * Select a new current process for this cpu and clear any pending user
514 * reschedule request. The cpu currently has no current process.
52eedfb5
MD
515 *
516 * This routine is also responsible for equal-priority round-robining,
517 * typically triggered from bsd4_schedulerclock(). In our dummy example
518 * all the 'user' threads are LWKT scheduled all at once and we just
519 * call lwkt_switch().
520 *
b9eb1c19
MD
521 * The calling process is not on the queue and cannot be selected.
522 *
52eedfb5 523 * MPSAFE
38b25931
MD
524 */
525static
526void
52eedfb5 527bsd4_select_curproc(globaldata_t gd)
38b25931 528{
52eedfb5
MD
529 bsd4_pcpu_t dd = &bsd4_pcpu[gd->gd_cpuid];
530 struct lwp *nlp;
531 int cpuid = gd->gd_cpuid;
38b25931 532
52eedfb5 533 crit_enter_gd(gd);
52eedfb5 534
287a8577 535 spin_lock(&bsd4_spin);
d6d39bc7
MC
536#ifdef SMP
537 if(usched_bsd4_cache_coherent)
e28d8b15 538 nlp = bsd4_chooseproc_locked_cache_coherent(dd->uschedcp);
d6d39bc7
MC
539 else
540#endif
e28d8b15 541 nlp = bsd4_chooseproc_locked(dd->uschedcp);
d6d39bc7
MC
542
543 if (nlp) {
544
545 KTR_COND_LOG(usched_bsd4_select_curproc,
546 nlp->lwp_proc->p_pid == usched_bsd4_pid_debug,
547 nlp->lwp_proc->p_pid,
548 nlp->lwp_thread->td_gd->gd_cpuid,
549 dd->old_uschedcp->lwp_proc->p_pid,
550 dd->old_uschedcp->lwp_thread->td_gd->gd_cpuid,
551 gd->gd_cpuid);
552
da23a592 553 atomic_set_cpumask(&bsd4_curprocmask, CPUMASK(cpuid));
52eedfb5
MD
554 dd->upri = nlp->lwp_priority;
555 dd->uschedcp = nlp;
901ecceb 556 dd->rrcount = 0; /* reset round robin */
287a8577 557 spin_unlock(&bsd4_spin);
52eedfb5
MD
558#ifdef SMP
559 lwkt_acquire(nlp->lwp_thread);
38b25931 560#endif
52eedfb5 561 lwkt_schedule(nlp->lwp_thread);
eb501f47
MD
562 } else {
563 spin_unlock(&bsd4_spin);
564 }
d6d39bc7 565
eb501f47 566#if 0
da23a592
MD
567 } else if (bsd4_runqcount && (bsd4_rdyprocmask & CPUMASK(cpuid))) {
568 atomic_clear_cpumask(&bsd4_rdyprocmask, CPUMASK(cpuid));
287a8577 569 spin_unlock(&bsd4_spin);
52eedfb5
MD
570 lwkt_schedule(&dd->helper_thread);
571 } else {
287a8577 572 spin_unlock(&bsd4_spin);
52eedfb5 573 }
eb501f47 574#endif
52eedfb5
MD
575 crit_exit_gd(gd);
576}
d6d39bc7
MC
577#ifdef SMP
578
579/*
580 * batchy_looser_pri_test() - determine if a process is batchy or not
581 * relative to the other processes running in the system
582 */
583static int
e28d8b15 584bsd4_batchy_looser_pri_test(struct lwp* lp)
d6d39bc7
MC
585{
586 cpumask_t mask;
587 bsd4_pcpu_t other_dd;
588 int cpu;
589
590 /* Current running processes */
591 mask = bsd4_curprocmask & smp_active_mask
592 & usched_global_cpumask;
593
594 while(mask) {
595 cpu = BSFCPUMASK(mask);
596 other_dd = &bsd4_pcpu[cpu];
597 if (other_dd->upri - lp->lwp_priority > usched_bsd4_upri_affinity * PPQ) {
598
599 KTR_COND_LOG(usched_batchy_test_false,
600 lp->lwp_proc->p_pid == usched_bsd4_pid_debug,
601 lp->lwp_proc->p_pid,
602 lp->lwp_thread->td_gd->gd_cpuid,
153fa3e0 603 (unsigned long)mask);
d6d39bc7
MC
604
605 return 0;
606 }
607 mask &= ~CPUMASK(cpu);
608 }
609
610 KTR_COND_LOG(usched_batchy_test_true,
611 lp->lwp_proc->p_pid == usched_bsd4_pid_debug,
612 lp->lwp_proc->p_pid,
613 lp->lwp_thread->td_gd->gd_cpuid,
153fa3e0 614 (unsigned long)mask);
d6d39bc7
MC
615
616 return 1;
617}
38b25931 618
d6d39bc7 619#endif
38b25931 620/*
d6d39bc7 621 *
52eedfb5
MD
622 * BSD4_SETRUNQUEUE
623 *
b9eb1c19
MD
624 * Place the specified lwp on the user scheduler's run queue. This routine
625 * must be called with the thread descheduled. The lwp must be runnable.
38b25931 626 *
b9eb1c19 627 * The thread may be the current thread as a special case.
52eedfb5
MD
628 *
629 * MPSAFE
38b25931
MD
630 */
631static void
553ea3c8 632bsd4_setrunqueue(struct lwp *lp)
38b25931 633{
52eedfb5
MD
634 globaldata_t gd;
635 bsd4_pcpu_t dd;
38b25931 636#ifdef SMP
b9eb1c19 637 int cpuid;
38b25931 638 cpumask_t mask;
52eedfb5 639 cpumask_t tmpmask;
38b25931
MD
640#endif
641
52eedfb5
MD
642 /*
643 * First validate the process state relative to the current cpu.
644 * We don't need the spinlock for this, just a critical section.
645 * We are in control of the process.
646 */
38b25931 647 crit_enter();
164b8401 648 KASSERT(lp->lwp_stat == LSRUN, ("setrunqueue: lwp not LSRUN"));
4643740a 649 KASSERT((lp->lwp_mpflags & LWP_MP_ONRUNQ) == 0,
164b8401 650 ("lwp %d/%d already on runq! flag %08x/%08x", lp->lwp_proc->p_pid,
4643740a 651 lp->lwp_tid, lp->lwp_proc->p_flags, lp->lwp_flags));
553ea3c8 652 KKASSERT((lp->lwp_thread->td_flags & TDF_RUNQ) == 0);
38b25931
MD
653
654 /*
52eedfb5
MD
655 * Note: gd and dd are relative to the target thread's last cpu,
656 * NOT our current cpu.
38b25931 657 */
553ea3c8 658 gd = lp->lwp_thread->td_gd;
52eedfb5 659 dd = &bsd4_pcpu[gd->gd_cpuid];
38b25931
MD
660
661 /*
52eedfb5
MD
662 * This process is not supposed to be scheduled anywhere or assigned
663 * as the current process anywhere. Assert the condition.
38b25931 664 */
52eedfb5 665 KKASSERT(dd->uschedcp != lp);
38b25931 666
b9eb1c19 667#ifndef SMP
38b25931 668 /*
b9eb1c19
MD
669 * If we are not SMP we do not have a scheduler helper to kick
670 * and must directly activate the process if none are scheduled.
38b25931 671 *
b9eb1c19
MD
672 * This is really only an issue when bootstrapping init since
673 * the caller in all other cases will be a user process, and
674 * even if released (dd->uschedcp == NULL), that process will
675 * kickstart the scheduler when it returns to user mode from
676 * the kernel.
38b25931 677 */
b9eb1c19 678 if (dd->uschedcp == NULL) {
da23a592 679 atomic_set_cpumask(&bsd4_curprocmask, gd->gd_cpumask);
52eedfb5
MD
680 dd->uschedcp = lp;
681 dd->upri = lp->lwp_priority;
553ea3c8 682 lwkt_schedule(lp->lwp_thread);
38b25931 683 crit_exit();
38b25931
MD
684 return;
685 }
b9eb1c19 686#endif
38b25931 687
38b25931
MD
688#ifdef SMP
689 /*
52eedfb5
MD
690 * XXX fixme. Could be part of a remrunqueue/setrunqueue
691 * operation when the priority is recalculated, so TDF_MIGRATING
692 * may already be set.
38b25931 693 */
52eedfb5
MD
694 if ((lp->lwp_thread->td_flags & TDF_MIGRATING) == 0)
695 lwkt_giveaway(lp->lwp_thread);
696#endif
50017724
MD
697
698 /*
699 * We lose control of lp the moment we release the spinlock after
700 * having placed lp on the queue. i.e. another cpu could pick it
701 * up and it could exit, or its priority could be further adjusted,
702 * or something like that.
703 */
287a8577 704 spin_lock(&bsd4_spin);
52eedfb5 705 bsd4_setrunqueue_locked(lp);
e28d8b15 706 lp->lwp_rebal_ticks = sched_ticks;
38b25931 707
b9eb1c19 708#ifdef SMP
38b25931 709 /*
b9eb1c19
MD
710 * Kick the scheduler helper on one of the other cpu's
711 * and request a reschedule if appropriate.
eb501f47
MD
712 *
713 * NOTE: We check all cpus whos rdyprocmask is set. First we
714 * look for cpus without designated lps, then we look for
715 * cpus with designated lps with a worse priority than our
716 * process.
38b25931 717 */
b9eb1c19 718 ++bsd4_scancpu;
38b25931 719
901ecceb 720 if (usched_bsd4_smt) {
d6d39bc7
MC
721
722 /*
901ecceb
MD
723 * SMT heuristic - Try to schedule on a free physical core.
724 * If no physical core found than choose the one that has
725 * an interactive thread.
d6d39bc7
MC
726 */
727
728 int best_cpuid = -1;
729 int min_prio = MAXPRI * MAXPRI;
730 int sibling;
731
732 cpuid = (bsd4_scancpu & 0xFFFF) % ncpus;
733 mask = ~bsd4_curprocmask & bsd4_rdyprocmask & lp->lwp_cpumask &
734 smp_active_mask & usched_global_cpumask;
735
736 KTR_COND_LOG(usched_bsd4_setrunqueue_fc_smt,
737 lp->lwp_proc->p_pid == usched_bsd4_pid_debug,
738 lp->lwp_proc->p_pid,
739 lp->lwp_thread->td_gd->gd_cpuid,
153fa3e0 740 (unsigned long)mask,
d6d39bc7
MC
741 mycpu->gd_cpuid);
742
743 while (mask) {
744 tmpmask = ~(CPUMASK(cpuid) - 1);
745 if (mask & tmpmask)
746 cpuid = BSFCPUMASK(mask & tmpmask);
747 else
748 cpuid = BSFCPUMASK(mask);
749 gd = globaldata_find(cpuid);
750 dd = &bsd4_pcpu[cpuid];
751
752 if ((dd->upri & ~PPQMASK) >= (lp->lwp_priority & ~PPQMASK)) {
753 if (dd->cpunode->parent_node->members & ~dd->cpunode->members & mask) {
754
755 KTR_COND_LOG(usched_bsd4_setrunqueue_found,
756 lp->lwp_proc->p_pid == usched_bsd4_pid_debug,
757 lp->lwp_proc->p_pid,
758 lp->lwp_thread->td_gd->gd_cpuid,
153fa3e0 759 (unsigned long)mask,
d6d39bc7
MC
760 cpuid,
761 mycpu->gd_cpuid);
762
763 goto found;
764 } else {
765 sibling = BSFCPUMASK(dd->cpunode->parent_node->members &
766 ~dd->cpunode->members);
767 if (min_prio > bsd4_pcpu[sibling].upri) {
768 min_prio = bsd4_pcpu[sibling].upri;
769 best_cpuid = cpuid;
770 }
771 }
772 }
773 mask &= ~CPUMASK(cpuid);
774 }
775
776 if (best_cpuid != -1) {
777 cpuid = best_cpuid;
778 gd = globaldata_find(cpuid);
779 dd = &bsd4_pcpu[cpuid];
780
781 KTR_COND_LOG(usched_bsd4_setrunqueue_found_best_cpuid,
782 lp->lwp_proc->p_pid == usched_bsd4_pid_debug,
783 lp->lwp_proc->p_pid,
784 lp->lwp_thread->td_gd->gd_cpuid,
153fa3e0 785 (unsigned long)mask,
d6d39bc7
MC
786 cpuid,
787 mycpu->gd_cpuid);
b9eb1c19 788
eb501f47 789 goto found;
d6d39bc7
MC
790 }
791 } else {
792 /* Fallback to the original heuristic */
793 cpuid = (bsd4_scancpu & 0xFFFF) % ncpus;
794 mask = ~bsd4_curprocmask & bsd4_rdyprocmask & lp->lwp_cpumask &
795 smp_active_mask & usched_global_cpumask;
796
797 KTR_COND_LOG(usched_bsd4_setrunqueue_fc_non_smt,
798 lp->lwp_proc->p_pid == usched_bsd4_pid_debug,
799 lp->lwp_proc->p_pid,
800 lp->lwp_thread->td_gd->gd_cpuid,
153fa3e0 801 (unsigned long)mask,
d6d39bc7
MC
802 mycpu->gd_cpuid);
803
804 while (mask) {
805 tmpmask = ~(CPUMASK(cpuid) - 1);
806 if (mask & tmpmask)
807 cpuid = BSFCPUMASK(mask & tmpmask);
808 else
809 cpuid = BSFCPUMASK(mask);
810 gd = globaldata_find(cpuid);
811 dd = &bsd4_pcpu[cpuid];
812
813 if ((dd->upri & ~PPQMASK) >= (lp->lwp_priority & ~PPQMASK)) {
814
815 KTR_COND_LOG(usched_bsd4_setrunqueue_found,
816 lp->lwp_proc->p_pid == usched_bsd4_pid_debug,
817 lp->lwp_proc->p_pid,
818 lp->lwp_thread->td_gd->gd_cpuid,
153fa3e0 819 (unsigned long)mask,
d6d39bc7
MC
820 cpuid,
821 mycpu->gd_cpuid);
822
823 goto found;
824 }
825 mask &= ~CPUMASK(cpuid);
826 }
eb501f47
MD
827 }
828
829 /*
830 * Then cpus which might have a currently running lp
831 */
832 mask = bsd4_curprocmask & bsd4_rdyprocmask &
916e604f 833 lp->lwp_cpumask & smp_active_mask & usched_global_cpumask;
eb501f47 834
d6d39bc7
MC
835 KTR_COND_LOG(usched_bsd4_setrunqueue_rc,
836 lp->lwp_proc->p_pid == usched_bsd4_pid_debug,
837 lp->lwp_proc->p_pid,
838 lp->lwp_thread->td_gd->gd_cpuid,
153fa3e0 839 (unsigned long)mask,
d6d39bc7
MC
840 mycpu->gd_cpuid);
841
eb501f47
MD
842 while (mask) {
843 tmpmask = ~(CPUMASK(cpuid) - 1);
844 if (mask & tmpmask)
845 cpuid = BSFCPUMASK(mask & tmpmask);
846 else
847 cpuid = BSFCPUMASK(mask);
848 gd = globaldata_find(cpuid);
849 dd = &bsd4_pcpu[cpuid];
850
d6d39bc7
MC
851 if ((dd->upri & ~PPQMASK) > (lp->lwp_priority & ~PPQMASK)) {
852
853 KTR_COND_LOG(usched_bsd4_setrunqueue_found,
854 lp->lwp_proc->p_pid == usched_bsd4_pid_debug,
855 lp->lwp_proc->p_pid,
856 lp->lwp_thread->td_gd->gd_cpuid,
153fa3e0 857 (unsigned long)mask,
d6d39bc7
MC
858 cpuid,
859 mycpu->gd_cpuid);
860
eb501f47 861 goto found;
d6d39bc7 862 }
da23a592 863 mask &= ~CPUMASK(cpuid);
b9eb1c19 864 }
eb501f47
MD
865
866 /*
867 * If we cannot find a suitable cpu we reload from bsd4_scancpu
868 * and round-robin. Other cpus will pickup as they release their
869 * current lwps or become ready.
870 *
916e604f
MD
871 * Avoid a degenerate system lockup case if usched_global_cpumask
872 * is set to 0 or otherwise does not cover lwp_cpumask.
873 *
eb501f47
MD
874 * We only kick the target helper thread in this case, we do not
875 * set the user resched flag because
876 */
877 cpuid = (bsd4_scancpu & 0xFFFF) % ncpus;
916e604f
MD
878 if ((CPUMASK(cpuid) & usched_global_cpumask) == 0) {
879 cpuid = 0;
880 }
eb501f47
MD
881 gd = globaldata_find(cpuid);
882 dd = &bsd4_pcpu[cpuid];
d6d39bc7
MC
883
884 KTR_COND_LOG(usched_bsd4_setrunqueue_not_found,
885 lp->lwp_proc->p_pid == usched_bsd4_pid_debug,
886 lp->lwp_proc->p_pid,
887 lp->lwp_thread->td_gd->gd_cpuid,
888 cpuid,
889 mycpu->gd_cpuid);
890
eb501f47
MD
891found:
892 if (gd == mycpu) {
893 spin_unlock(&bsd4_spin);
58bb3381
MD
894 if ((dd->upri & ~PPQMASK) > (lp->lwp_priority & ~PPQMASK)) {
895 if (dd->uschedcp == NULL) {
55b580ae 896 wakeup_mycpu(&dd->helper_thread);
58bb3381
MD
897 } else {
898 need_user_resched();
899 }
900 }
eb501f47
MD
901 } else {
902 atomic_clear_cpumask(&bsd4_rdyprocmask, CPUMASK(cpuid));
903 spin_unlock(&bsd4_spin);
904 if ((dd->upri & ~PPQMASK) > (lp->lwp_priority & ~PPQMASK))
e28d8b15 905 lwkt_send_ipiq(gd, bsd4_need_user_resched_remote, NULL);
eb501f47 906 else
d6d39bc7 907 wakeup(&dd->helper_thread);
eb501f47 908 }
b9eb1c19
MD
909#else
910 /*
911 * Request a reschedule if appropriate.
912 */
287a8577 913 spin_unlock(&bsd4_spin);
b9eb1c19
MD
914 if ((dd->upri & ~PPQMASK) > (lp->lwp_priority & ~PPQMASK)) {
915 need_user_resched();
38b25931
MD
916 }
917#endif
918 crit_exit();
919}
920
921/*
38b25931 922 * This routine is called from a systimer IPI. It MUST be MP-safe and
52eedfb5
MD
923 * the BGL IS NOT HELD ON ENTRY. This routine is called at ESTCPUFREQ on
924 * each cpu.
925 *
de4d4cb0
MD
926 * This routine is called on every sched tick. If the currently running
927 * thread belongs to this scheduler it will be called with a non-NULL lp,
928 * otherwise it will be called with a NULL lp.
929 *
270ac911 930 * MPSAFE
38b25931
MD
931 */
932static
933void
553ea3c8 934bsd4_schedulerclock(struct lwp *lp, sysclock_t period, sysclock_t cpstamp)
38b25931
MD
935{
936 globaldata_t gd = mycpu;
52eedfb5 937 bsd4_pcpu_t dd = &bsd4_pcpu[gd->gd_cpuid];
38b25931
MD
938
939 /*
de4d4cb0
MD
940 * No impl if no lp running.
941 */
942 if (lp == NULL)
943 return;
944
945 /*
38b25931
MD
946 * Do we need to round-robin? We round-robin 10 times a second.
947 * This should only occur for cpu-bound batch processes.
948 */
52eedfb5
MD
949 if (++dd->rrcount >= usched_bsd4_rrinterval) {
950 dd->rrcount = 0;
38b25931
MD
951 need_user_resched();
952 }
953
954 /*
52cac9fb 955 * Adjust estcpu upward using a real time equivalent calculation.
38b25931 956 */
52cac9fb 957 lp->lwp_estcpu = ESTCPULIM(lp->lwp_estcpu + ESTCPUMAX / ESTCPUFREQ + 1);
50017724
MD
958
959 /*
77912481
MD
960 * Spinlocks also hold a critical section so there should not be
961 * any active.
50017724 962 */
0846e4ce 963 KKASSERT(gd->gd_spinlocks == 0);
77912481
MD
964
965 bsd4_resetpriority(lp);
38b25931
MD
966}
967
968/*
52eedfb5 969 * Called from acquire and from kern_synch's one-second timer (one of the
d6d39bc7 970 * callout helper threads) with a critical section held.
38b25931 971 *
52eedfb5
MD
972 * Decay p_estcpu based on the number of ticks we haven't been running
973 * and our p_nice. As the load increases each process observes a larger
974 * number of idle ticks (because other processes are running in them).
975 * This observation leads to a larger correction which tends to make the
976 * system more 'batchy'.
38b25931 977 *
52eedfb5
MD
978 * Note that no recalculation occurs for a process which sleeps and wakes
979 * up in the same tick. That is, a system doing thousands of context
980 * switches per second will still only do serious estcpu calculations
981 * ESTCPUFREQ times per second.
38b25931 982 *
52eedfb5 983 * MPSAFE
38b25931
MD
984 */
985static
d6d39bc7 986void
52eedfb5 987bsd4_recalculate_estcpu(struct lwp *lp)
38b25931 988{
52eedfb5
MD
989 globaldata_t gd = mycpu;
990 sysclock_t cpbase;
52cac9fb
MD
991 sysclock_t ttlticks;
992 int estcpu;
993 int decay_factor;
38b25931
MD
994
995 /*
52eedfb5
MD
996 * We have to subtract periodic to get the last schedclock
997 * timeout time, otherwise we would get the upcoming timeout.
998 * Keep in mind that a process can migrate between cpus and
999 * while the scheduler clock should be very close, boundary
1000 * conditions could lead to a small negative delta.
38b25931 1001 */
52eedfb5 1002 cpbase = gd->gd_schedclock.time - gd->gd_schedclock.periodic;
38b25931 1003
52eedfb5
MD
1004 if (lp->lwp_slptime > 1) {
1005 /*
1006 * Too much time has passed, do a coarse correction.
1007 */
1008 lp->lwp_estcpu = lp->lwp_estcpu >> 1;
1009 bsd4_resetpriority(lp);
1010 lp->lwp_cpbase = cpbase;
1011 lp->lwp_cpticks = 0;
52cac9fb
MD
1012 lp->lwp_batch -= ESTCPUFREQ;
1013 if (lp->lwp_batch < 0)
1014 lp->lwp_batch = 0;
52eedfb5
MD
1015 } else if (lp->lwp_cpbase != cpbase) {
1016 /*
1017 * Adjust estcpu if we are in a different tick. Don't waste
d6d39bc7
MC
1018 * time if we are in the same tick.
1019 *
52eedfb5 1020 * First calculate the number of ticks in the measurement
52cac9fb 1021 * interval. The ttlticks calculation can wind up 0 due to
52eedfb5
MD
1022 * a bug in the handling of lwp_slptime (as yet not found),
1023 * so make sure we do not get a divide by 0 panic.
1024 */
52cac9fb
MD
1025 ttlticks = (cpbase - lp->lwp_cpbase) /
1026 gd->gd_schedclock.periodic;
68a23bee 1027 if ((ssysclock_t)ttlticks < 0) {
52cac9fb
MD
1028 ttlticks = 0;
1029 lp->lwp_cpbase = cpbase;
52eedfb5 1030 }
52cac9fb
MD
1031 if (ttlticks == 0)
1032 return;
1033 updatepcpu(lp, lp->lwp_cpticks, ttlticks);
38b25931 1034
52eedfb5 1035 /*
52cac9fb
MD
1036 * Calculate the percentage of one cpu used factoring in ncpus
1037 * and the load and adjust estcpu. Handle degenerate cases
1038 * by adding 1 to bsd4_runqcount.
1039 *
1040 * estcpu is scaled by ESTCPUMAX.
1041 *
1042 * bsd4_runqcount is the excess number of user processes
1043 * that cannot be immediately scheduled to cpus. We want
1044 * to count these as running to avoid range compression
1045 * in the base calculation (which is the actual percentage
1046 * of one cpu used).
52eedfb5 1047 */
52cac9fb
MD
1048 estcpu = (lp->lwp_cpticks * ESTCPUMAX) *
1049 (bsd4_runqcount + ncpus) / (ncpus * ttlticks);
38b25931 1050
52eedfb5 1051 /*
52cac9fb
MD
1052 * If estcpu is > 50% we become more batch-like
1053 * If estcpu is <= 50% we become less batch-like
5c559233 1054 *
52cac9fb 1055 * It takes 30 cpu seconds to traverse the entire range.
52eedfb5 1056 */
52cac9fb
MD
1057 if (estcpu > ESTCPUMAX / 2) {
1058 lp->lwp_batch += ttlticks;
1059 if (lp->lwp_batch > BATCHMAX)
1060 lp->lwp_batch = BATCHMAX;
1061 } else {
1062 lp->lwp_batch -= ttlticks;
1063 if (lp->lwp_batch < 0)
1064 lp->lwp_batch = 0;
5c559233 1065 }
344ad853 1066
d6d39bc7 1067 if (usched_bsd4_debug == lp->lwp_proc->p_pid) {
52cac9fb
MD
1068 kprintf("pid %d lwp %p estcpu %3d %3d bat %d cp %d/%d",
1069 lp->lwp_proc->p_pid, lp,
1070 estcpu, lp->lwp_estcpu,
1071 lp->lwp_batch,
1072 lp->lwp_cpticks, ttlticks);
5c559233 1073 }
52cac9fb
MD
1074
1075 /*
1076 * Adjust lp->lwp_esetcpu. The decay factor determines how
1077 * quickly lwp_estcpu collapses to its realtime calculation.
1078 * A slower collapse gives us a more accurate number but
1079 * can cause a cpu hog to eat too much cpu before the
1080 * scheduler decides to downgrade it.
1081 *
1082 * NOTE: p_nice is accounted for in bsd4_resetpriority(),
1083 * and not here, but we must still ensure that a
1084 * cpu-bound nice -20 process does not completely
1085 * override a cpu-bound nice +20 process.
1086 *
1087 * NOTE: We must use ESTCPULIM() here to deal with any
1088 * overshoot.
1089 */
1090 decay_factor = usched_bsd4_decay;
1091 if (decay_factor < 1)
1092 decay_factor = 1;
1093 if (decay_factor > 1024)
1094 decay_factor = 1024;
1095
1096 lp->lwp_estcpu = ESTCPULIM(
1097 (lp->lwp_estcpu * decay_factor + estcpu) /
1098 (decay_factor + 1));
1099
d6d39bc7 1100 if (usched_bsd4_debug == lp->lwp_proc->p_pid)
52cac9fb 1101 kprintf(" finalestcpu %d\n", lp->lwp_estcpu);
52eedfb5 1102 bsd4_resetpriority(lp);
52cac9fb 1103 lp->lwp_cpbase += ttlticks * gd->gd_schedclock.periodic;
52eedfb5
MD
1104 lp->lwp_cpticks = 0;
1105 }
38b25931
MD
1106}
1107
1108/*
1109 * Compute the priority of a process when running in user mode.
1110 * Arrange to reschedule if the resulting priority is better
1111 * than that of the current process.
52eedfb5
MD
1112 *
1113 * This routine may be called with any process.
1114 *
1115 * This routine is called by fork1() for initial setup with the process
1116 * of the run queue, and also may be called normally with the process on or
1117 * off the run queue.
1118 *
1119 * MPSAFE
38b25931
MD
1120 */
1121static void
553ea3c8 1122bsd4_resetpriority(struct lwp *lp)
38b25931 1123{
52eedfb5 1124 bsd4_pcpu_t dd;
38b25931 1125 int newpriority;
52eedfb5
MD
1126 u_short newrqtype;
1127 int reschedcpu;
52cac9fb
MD
1128 int checkpri;
1129 int estcpu;
270ac911 1130
38b25931 1131 /*
52eedfb5 1132 * Calculate the new priority and queue type
38b25931 1133 */
52eedfb5 1134 crit_enter();
287a8577 1135 spin_lock(&bsd4_spin);
52eedfb5
MD
1136
1137 newrqtype = lp->lwp_rtprio.type;
1138
1139 switch(newrqtype) {
38b25931 1140 case RTP_PRIO_REALTIME:
f64250e0 1141 case RTP_PRIO_FIFO:
52eedfb5
MD
1142 newpriority = PRIBASE_REALTIME +
1143 (lp->lwp_rtprio.prio & PRIMASK);
1144 break;
38b25931 1145 case RTP_PRIO_NORMAL:
52cac9fb
MD
1146 /*
1147 * Detune estcpu based on batchiness. lwp_batch ranges
1148 * from 0 to BATCHMAX. Limit estcpu for the sake of
1149 * the priority calculation to between 50% and 100%.
1150 */
1151 estcpu = lp->lwp_estcpu * (lp->lwp_batch + BATCHMAX) /
1152 (BATCHMAX * 2);
1153
1154 /*
1155 * p_nice piece Adds (0-40) * 2 0-80
1156 * estcpu Adds 16384 * 4 / 512 0-128
1157 */
52eedfb5 1158 newpriority = (lp->lwp_proc->p_nice - PRIO_MIN) * PPQ / NICEPPQ;
52cac9fb 1159 newpriority += estcpu * PPQ / ESTCPUPPQ;
52eedfb5
MD
1160 newpriority = newpriority * MAXPRI / (PRIO_RANGE * PPQ /
1161 NICEPPQ + ESTCPUMAX * PPQ / ESTCPUPPQ);
1162 newpriority = PRIBASE_NORMAL + (newpriority & PRIMASK);
38b25931
MD
1163 break;
1164 case RTP_PRIO_IDLE:
52eedfb5
MD
1165 newpriority = PRIBASE_IDLE + (lp->lwp_rtprio.prio & PRIMASK);
1166 break;
38b25931 1167 case RTP_PRIO_THREAD:
52eedfb5
MD
1168 newpriority = PRIBASE_THREAD + (lp->lwp_rtprio.prio & PRIMASK);
1169 break;
1170 default:
1171 panic("Bad RTP_PRIO %d", newrqtype);
1172 /* NOT REACHED */
38b25931
MD
1173 }
1174
1175 /*
52eedfb5
MD
1176 * The newpriority incorporates the queue type so do a simple masked
1177 * check to determine if the process has moved to another queue. If
1178 * it has, and it is currently on a run queue, then move it.
d992c377
MD
1179 *
1180 * td_upri has normal sense (higher values are more desireable), so
1181 * negate it.
38b25931 1182 */
d992c377 1183 lp->lwp_thread->td_upri = -(newpriority & ~PPQMASK);
52eedfb5
MD
1184 if ((lp->lwp_priority ^ newpriority) & ~PPQMASK) {
1185 lp->lwp_priority = newpriority;
4643740a 1186 if (lp->lwp_mpflags & LWP_MP_ONRUNQ) {
52eedfb5
MD
1187 bsd4_remrunqueue_locked(lp);
1188 lp->lwp_rqtype = newrqtype;
1189 lp->lwp_rqindex = (newpriority & PRIMASK) / PPQ;
1190 bsd4_setrunqueue_locked(lp);
52cac9fb 1191 checkpri = 1;
52eedfb5
MD
1192 } else {
1193 lp->lwp_rqtype = newrqtype;
1194 lp->lwp_rqindex = (newpriority & PRIMASK) / PPQ;
52cac9fb 1195 checkpri = 0;
52eedfb5 1196 }
52cac9fb 1197 reschedcpu = lp->lwp_thread->td_gd->gd_cpuid;
38b25931 1198 } else {
52eedfb5
MD
1199 lp->lwp_priority = newpriority;
1200 reschedcpu = -1;
52cac9fb 1201 checkpri = 1;
52eedfb5 1202 }
52eedfb5
MD
1203
1204 /*
50017724
MD
1205 * Determine if we need to reschedule the target cpu. This only
1206 * occurs if the LWP is already on a scheduler queue, which means
1207 * that idle cpu notification has already occured. At most we
1208 * need only issue a need_user_resched() on the appropriate cpu.
281b4fa8
YT
1209 *
1210 * The LWP may be owned by a CPU different from the current one,
1211 * in which case dd->uschedcp may be modified without an MP lock
1212 * or a spinlock held. The worst that happens is that the code
1213 * below causes a spurious need_user_resched() on the target CPU
1214 * and dd->pri to be wrong for a short period of time, both of
1215 * which are harmless.
52cac9fb
MD
1216 *
1217 * If checkpri is 0 we are adjusting the priority of the current
1218 * process, possibly higher (less desireable), so ignore the upri
1219 * check which will fail in that case.
52eedfb5
MD
1220 */
1221 if (reschedcpu >= 0) {
1222 dd = &bsd4_pcpu[reschedcpu];
eb501f47 1223 if ((bsd4_rdyprocmask & CPUMASK(reschedcpu)) &&
52cac9fb
MD
1224 (checkpri == 0 ||
1225 (dd->upri & ~PRIMASK) > (lp->lwp_priority & ~PRIMASK))) {
52eedfb5
MD
1226#ifdef SMP
1227 if (reschedcpu == mycpu->gd_cpuid) {
eb501f47 1228 spin_unlock(&bsd4_spin);
52eedfb5
MD
1229 need_user_resched();
1230 } else {
eb501f47
MD
1231 spin_unlock(&bsd4_spin);
1232 atomic_clear_cpumask(&bsd4_rdyprocmask,
1233 CPUMASK(reschedcpu));
52eedfb5 1234 lwkt_send_ipiq(lp->lwp_thread->td_gd,
e28d8b15
MD
1235 bsd4_need_user_resched_remote,
1236 NULL);
52eedfb5
MD
1237 }
1238#else
eb501f47 1239 spin_unlock(&bsd4_spin);
52eedfb5
MD
1240 need_user_resched();
1241#endif
eb501f47
MD
1242 } else {
1243 spin_unlock(&bsd4_spin);
52eedfb5 1244 }
eb501f47
MD
1245 } else {
1246 spin_unlock(&bsd4_spin);
38b25931
MD
1247 }
1248 crit_exit();
1249}
1250
3919ced0
MD
1251/*
1252 * MPSAFE
1253 */
c3149361
MD
1254static
1255void
d6d39bc7 1256bsd4_yield(struct lwp *lp)
c3149361
MD
1257{
1258#if 0
1259 /* FUTURE (or something similar) */
1260 switch(lp->lwp_rqtype) {
1261 case RTP_PRIO_NORMAL:
1262 lp->lwp_estcpu = ESTCPULIM(lp->lwp_estcpu + ESTCPUINCR);
c3149361
MD
1263 break;
1264 default:
1265 break;
1266 }
1267#endif
1268 need_user_resched();
1269}
1270
38b25931
MD
1271/*
1272 * Called from fork1() when a new child process is being created.
1273 *
1274 * Give the child process an initial estcpu that is more batch then
1275 * its parent and dock the parent for the fork (but do not
1276 * reschedule the parent). This comprises the main part of our batch
1277 * detection heuristic for both parallel forking and sequential execs.
1278 *
553ea3c8 1279 * XXX lwp should be "spawning" instead of "forking"
270ac911
MD
1280 *
1281 * MPSAFE
38b25931
MD
1282 */
1283static void
553ea3c8 1284bsd4_forking(struct lwp *plp, struct lwp *lp)
38b25931 1285{
52cac9fb
MD
1286 /*
1287 * Put the child 4 queue slots (out of 32) higher than the parent
1288 * (less desireable than the parent).
1289 */
1290 lp->lwp_estcpu = ESTCPULIM(plp->lwp_estcpu + ESTCPUPPQ * 4);
1291
1292 /*
1293 * The batch status of children always starts out centerline
1294 * and will inch-up or inch-down as appropriate. It takes roughly
1295 * ~15 seconds of >50% cpu to hit the limit.
1296 */
1297 lp->lwp_batch = BATCHMAX / 2;
1298
1299 /*
1300 * Dock the parent a cost for the fork, protecting us from fork
1301 * bombs. If the parent is forking quickly make the child more
1302 * batchy.
1303 */
1304 plp->lwp_estcpu = ESTCPULIM(plp->lwp_estcpu + ESTCPUPPQ / 16);
38b25931
MD
1305}
1306
1307/*
e28d8b15
MD
1308 * Called when a lwp is being removed from this scheduler, typically
1309 * during lwp_exit().
38b25931
MD
1310 */
1311static void
52cac9fb 1312bsd4_exiting(struct lwp *lp, struct proc *child_proc)
38b25931 1313{
38b25931
MD
1314}
1315
e28d8b15
MD
1316static void
1317bsd4_uload_update(struct lwp *lp)
1318{
1319}
1320
38b25931 1321/*
52eedfb5
MD
1322 * chooseproc() is called when a cpu needs a user process to LWKT schedule,
1323 * it selects a user process and returns it. If chklp is non-NULL and chklp
1324 * has a better or equal priority then the process that would otherwise be
1325 * chosen, NULL is returned.
38b25931 1326 *
52eedfb5
MD
1327 * Until we fix the RUNQ code the chklp test has to be strict or we may
1328 * bounce between processes trying to acquire the current process designation.
38b25931 1329 *
52eedfb5
MD
1330 * MPSAFE - must be called with bsd4_spin exclusive held. The spinlock is
1331 * left intact through the entire routine.
38b25931
MD
1332 */
1333static
52eedfb5 1334struct lwp *
e28d8b15 1335bsd4_chooseproc_locked(struct lwp *chklp)
38b25931 1336{
52eedfb5
MD
1337 struct lwp *lp;
1338 struct rq *q;
a60ccb85 1339 u_int32_t *which, *which2;
52eedfb5 1340 u_int32_t pri;
a60ccb85
DX
1341 u_int32_t rtqbits;
1342 u_int32_t tsqbits;
1343 u_int32_t idqbits;
1344 cpumask_t cpumask;
38b25931 1345
a60ccb85
DX
1346 rtqbits = bsd4_rtqueuebits;
1347 tsqbits = bsd4_queuebits;
1348 idqbits = bsd4_idqueuebits;
1349 cpumask = mycpu->gd_cpumask;
1350
d6d39bc7 1351
a60ccb85
DX
1352#ifdef SMP
1353again:
1354#endif
1355 if (rtqbits) {
1356 pri = bsfl(rtqbits);
52eedfb5
MD
1357 q = &bsd4_rtqueues[pri];
1358 which = &bsd4_rtqueuebits;
a60ccb85
DX
1359 which2 = &rtqbits;
1360 } else if (tsqbits) {
1361 pri = bsfl(tsqbits);
52eedfb5
MD
1362 q = &bsd4_queues[pri];
1363 which = &bsd4_queuebits;
a60ccb85
DX
1364 which2 = &tsqbits;
1365 } else if (idqbits) {
1366 pri = bsfl(idqbits);
52eedfb5
MD
1367 q = &bsd4_idqueues[pri];
1368 which = &bsd4_idqueuebits;
a60ccb85 1369 which2 = &idqbits;
52eedfb5
MD
1370 } else {
1371 return NULL;
1372 }
1373 lp = TAILQ_FIRST(q);
1374 KASSERT(lp, ("chooseproc: no lwp on busy queue"));
270ac911 1375
a60ccb85
DX
1376#ifdef SMP
1377 while ((lp->lwp_cpumask & cpumask) == 0) {
1378 lp = TAILQ_NEXT(lp, lwp_procq);
1379 if (lp == NULL) {
1380 *which2 &= ~(1 << pri);
1381 goto again;
1382 }
1383 }
1384#endif
1385
38b25931 1386 /*
52eedfb5
MD
1387 * If the passed lwp <chklp> is reasonably close to the selected
1388 * lwp <lp>, return NULL (indicating that <chklp> should be kept).
d6d39bc7 1389 *
52eedfb5
MD
1390 * Note that we must error on the side of <chklp> to avoid bouncing
1391 * between threads in the acquire code.
38b25931 1392 */
52eedfb5
MD
1393 if (chklp) {
1394 if (chklp->lwp_priority < lp->lwp_priority + PPQ)
1395 return(NULL);
1396 }
38b25931 1397
52eedfb5
MD
1398#ifdef SMP
1399 /*
1400 * If the chosen lwp does not reside on this cpu spend a few
1401 * cycles looking for a better candidate at the same priority level.
1402 * This is a fallback check, setrunqueue() tries to wakeup the
1403 * correct cpu and is our front-line affinity.
1404 */
1405 if (lp->lwp_thread->td_gd != mycpu &&
1406 (chklp = TAILQ_NEXT(lp, lwp_procq)) != NULL
1407 ) {
1408 if (chklp->lwp_thread->td_gd == mycpu) {
52eedfb5 1409 lp = chklp;
38b25931 1410 }
52eedfb5
MD
1411 }
1412#endif
38b25931 1413
d6d39bc7
MC
1414 KTR_COND_LOG(usched_chooseproc,
1415 lp->lwp_proc->p_pid == usched_bsd4_pid_debug,
1416 lp->lwp_proc->p_pid,
1417 lp->lwp_thread->td_gd->gd_cpuid,
1418 mycpu->gd_cpuid);
1419
52eedfb5
MD
1420 TAILQ_REMOVE(q, lp, lwp_procq);
1421 --bsd4_runqcount;
1422 if (TAILQ_EMPTY(q))
1423 *which &= ~(1 << pri);
4643740a
MD
1424 KASSERT((lp->lwp_mpflags & LWP_MP_ONRUNQ) != 0, ("not on runq6!"));
1425 atomic_clear_int(&lp->lwp_mpflags, LWP_MP_ONRUNQ);
901ecceb 1426
52eedfb5
MD
1427 return lp;
1428}
38b25931 1429
52eedfb5 1430#ifdef SMP
d6d39bc7
MC
1431/*
1432 * chooseproc() - with a cache coherence heuristic. Try to pull a process that
1433 * has its home on the current CPU> If the process doesn't have its home here
1434 * and is a batchy one (see batcy_looser_pri_test), we can wait for a
1435 * sched_tick, may be its home will become free and pull it in. Anyway,
1436 * we can't wait more than one tick. If that tick expired, we pull in that
1437 * process, no matter what.
1438 */
1439static
1440struct lwp *
e28d8b15 1441bsd4_chooseproc_locked_cache_coherent(struct lwp *chklp)
d6d39bc7
MC
1442{
1443 struct lwp *lp;
1444 struct rq *q;
1445 u_int32_t *which, *which2;
1446 u_int32_t pri;
1447 u_int32_t checks;
1448 u_int32_t rtqbits;
1449 u_int32_t tsqbits;
1450 u_int32_t idqbits;
1451 cpumask_t cpumask;
1452
1453 struct lwp * min_level_lwp = NULL;
1454 struct rq *min_q = NULL;
1455 cpumask_t siblings;
1456 cpu_node_t* cpunode = NULL;
1457 u_int32_t min_level = MAXCPU; /* number of levels < MAXCPU */
1458 u_int32_t *min_which = NULL;
1459 u_int32_t min_pri = 0;
1460 u_int32_t level = 0;
1461
1462 rtqbits = bsd4_rtqueuebits;
1463 tsqbits = bsd4_queuebits;
1464 idqbits = bsd4_idqueuebits;
1465 cpumask = mycpu->gd_cpumask;
1466
1467 /* Get the mask coresponding to the sysctl configured level */
1468 cpunode = bsd4_pcpu[mycpu->gd_cpuid].cpunode;
1469 level = usched_bsd4_stick_to_level;
1470 while (level) {
1471 cpunode = cpunode->parent_node;
1472 level--;
1473 }
1474 /* The cpus which can ellect a process */
1475 siblings = cpunode->members;
901ecceb 1476 checks = 0;
d6d39bc7
MC
1477
1478again:
1479 if (rtqbits) {
1480 pri = bsfl(rtqbits);
1481 q = &bsd4_rtqueues[pri];
1482 which = &bsd4_rtqueuebits;
1483 which2 = &rtqbits;
1484 } else if (tsqbits) {
1485 pri = bsfl(tsqbits);
1486 q = &bsd4_queues[pri];
1487 which = &bsd4_queuebits;
1488 which2 = &tsqbits;
1489 } else if (idqbits) {
1490 pri = bsfl(idqbits);
1491 q = &bsd4_idqueues[pri];
1492 which = &bsd4_idqueuebits;
1493 which2 = &idqbits;
1494 } else {
901ecceb
MD
1495 /*
1496 * No more left and we didn't reach the checks limit.
1497 */
e28d8b15 1498 bsd4_kick_helper(min_level_lwp);
d6d39bc7
MC
1499 return NULL;
1500 }
1501 lp = TAILQ_FIRST(q);
1502 KASSERT(lp, ("chooseproc: no lwp on busy queue"));
1503
901ecceb
MD
1504 /*
1505 * Limit the number of checks/queue to a configurable value to
d6d39bc7
MC
1506 * minimize the contention (we are in a locked region
1507 */
901ecceb 1508 while (checks < usched_bsd4_queue_checks) {
d6d39bc7
MC
1509 if ((lp->lwp_cpumask & cpumask) == 0 ||
1510 ((siblings & lp->lwp_thread->td_gd->gd_cpumask) == 0 &&
e28d8b15
MD
1511 (lp->lwp_rebal_ticks == sched_ticks ||
1512 lp->lwp_rebal_ticks == (int)(sched_ticks - 1)) &&
1513 bsd4_batchy_looser_pri_test(lp))) {
d6d39bc7
MC
1514
1515 KTR_COND_LOG(usched_chooseproc_cc_not_good,
1516 lp->lwp_proc->p_pid == usched_bsd4_pid_debug,
1517 lp->lwp_proc->p_pid,
153fa3e0
MD
1518 (unsigned long)lp->lwp_thread->td_gd->gd_cpumask,
1519 (unsigned long)siblings,
1520 (unsigned long)cpumask);
d6d39bc7
MC
1521
1522 cpunode = bsd4_pcpu[lp->lwp_thread->td_gd->gd_cpuid].cpunode;
1523 level = 0;
1524 while (cpunode) {
901ecceb 1525 if (cpunode->members & cpumask)
d6d39bc7 1526 break;
d6d39bc7
MC
1527 cpunode = cpunode->parent_node;
1528 level++;
1529 }
901ecceb
MD
1530 if (level < min_level ||
1531 (level == min_level && min_level_lwp &&
1532 lp->lwp_priority < min_level_lwp->lwp_priority)) {
e28d8b15 1533 bsd4_kick_helper(min_level_lwp);
d6d39bc7
MC
1534 min_level_lwp = lp;
1535 min_level = level;
1536 min_q = q;
1537 min_which = which;
1538 min_pri = pri;
901ecceb 1539 } else {
e28d8b15 1540 bsd4_kick_helper(lp);
d6d39bc7 1541 }
d6d39bc7
MC
1542 lp = TAILQ_NEXT(lp, lwp_procq);
1543 if (lp == NULL) {
1544 *which2 &= ~(1 << pri);
1545 goto again;
1546 }
1547 } else {
1548 KTR_COND_LOG(usched_chooseproc_cc_elected,
1549 lp->lwp_proc->p_pid == usched_bsd4_pid_debug,
1550 lp->lwp_proc->p_pid,
153fa3e0
MD
1551 (unsigned long)lp->lwp_thread->td_gd->gd_cpumask,
1552 (unsigned long)siblings,
1553 (unsigned long)cpumask);
d6d39bc7
MC
1554
1555 goto found;
1556 }
901ecceb 1557 ++checks;
d6d39bc7 1558 }
901ecceb
MD
1559
1560 /*
1561 * Checks exhausted, we tried to defer too many threads, so schedule
1562 * the best of the worst.
1563 */
d6d39bc7
MC
1564 lp = min_level_lwp;
1565 q = min_q;
1566 which = min_which;
1567 pri = min_pri;
1568 KASSERT(lp, ("chooseproc: at least the first lp was good"));
1569
1570found:
1571
1572 /*
1573 * If the passed lwp <chklp> is reasonably close to the selected
1574 * lwp <lp>, return NULL (indicating that <chklp> should be kept).
1575 *
1576 * Note that we must error on the side of <chklp> to avoid bouncing
1577 * between threads in the acquire code.
1578 */
1579 if (chklp) {
901ecceb 1580 if (chklp->lwp_priority < lp->lwp_priority + PPQ) {
e28d8b15 1581 bsd4_kick_helper(lp);
d6d39bc7 1582 return(NULL);
901ecceb 1583 }
d6d39bc7
MC
1584 }
1585
1586 KTR_COND_LOG(usched_chooseproc_cc,
1587 lp->lwp_proc->p_pid == usched_bsd4_pid_debug,
1588 lp->lwp_proc->p_pid,
1589 lp->lwp_thread->td_gd->gd_cpuid,
1590 mycpu->gd_cpuid);
1591
1592 TAILQ_REMOVE(q, lp, lwp_procq);
1593 --bsd4_runqcount;
1594 if (TAILQ_EMPTY(q))
1595 *which &= ~(1 << pri);
1596 KASSERT((lp->lwp_mpflags & LWP_MP_ONRUNQ) != 0, ("not on runq6!"));
1597 atomic_clear_int(&lp->lwp_mpflags, LWP_MP_ONRUNQ);
901ecceb 1598
d6d39bc7
MC
1599 return lp;
1600}
1601
901ecceb
MD
1602/*
1603 * If we aren't willing to schedule a ready process on our cpu, give it's
1604 * target cpu a kick rather than wait for the next tick.
1605 *
1606 * Called with bsd4_spin held.
1607 */
1608static
1609void
e28d8b15 1610bsd4_kick_helper(struct lwp *lp)
901ecceb
MD
1611{
1612 globaldata_t gd;
1613 bsd4_pcpu_t dd;
1614
1615 if (lp == NULL)
1616 return;
1617 gd = lp->lwp_thread->td_gd;
1618 dd = &bsd4_pcpu[gd->gd_cpuid];
1619 if ((smp_active_mask & usched_global_cpumask &
1620 bsd4_rdyprocmask & gd->gd_cpumask) == 0) {
1621 return;
1622 }
1623 ++usched_bsd4_kicks;
1624 atomic_clear_cpumask(&bsd4_rdyprocmask, gd->gd_cpumask);
1625 if ((dd->upri & ~PPQMASK) > (lp->lwp_priority & ~PPQMASK)) {
e28d8b15 1626 lwkt_send_ipiq(gd, bsd4_need_user_resched_remote, NULL);
901ecceb
MD
1627 } else {
1628 wakeup(&dd->helper_thread);
1629 }
1630}
b9eb1c19 1631
52eedfb5
MD
1632static
1633void
e28d8b15 1634bsd4_need_user_resched_remote(void *dummy)
52eedfb5 1635{
b9eb1c19
MD
1636 globaldata_t gd = mycpu;
1637 bsd4_pcpu_t dd = &bsd4_pcpu[gd->gd_cpuid];
1638
eb501f47 1639 need_user_resched();
55b580ae
AH
1640
1641 /* Call wakeup_mycpu to avoid sending IPIs to other CPUs */
1642 wakeup_mycpu(&dd->helper_thread);
52eedfb5 1643}
38b25931 1644
52eedfb5 1645#endif
38b25931 1646
52eedfb5
MD
1647/*
1648 * bsd4_remrunqueue_locked() removes a given process from the run queue
1649 * that it is on, clearing the queue busy bit if it becomes empty.
1650 *
1651 * Note that user process scheduler is different from the LWKT schedule.
1652 * The user process scheduler only manages user processes but it uses LWKT
1653 * underneath, and a user process operating in the kernel will often be
1654 * 'released' from our management.
1655 *
1656 * MPSAFE - bsd4_spin must be held exclusively on call
1657 */
1658static void
1659bsd4_remrunqueue_locked(struct lwp *lp)
1660{
1661 struct rq *q;
1662 u_int32_t *which;
1663 u_int8_t pri;
1664
4643740a
MD
1665 KKASSERT(lp->lwp_mpflags & LWP_MP_ONRUNQ);
1666 atomic_clear_int(&lp->lwp_mpflags, LWP_MP_ONRUNQ);
52eedfb5
MD
1667 --bsd4_runqcount;
1668 KKASSERT(bsd4_runqcount >= 0);
1669
1670 pri = lp->lwp_rqindex;
1671 switch(lp->lwp_rqtype) {
1672 case RTP_PRIO_NORMAL:
1673 q = &bsd4_queues[pri];
1674 which = &bsd4_queuebits;
1675 break;
1676 case RTP_PRIO_REALTIME:
1677 case RTP_PRIO_FIFO:
1678 q = &bsd4_rtqueues[pri];
1679 which = &bsd4_rtqueuebits;
1680 break;
1681 case RTP_PRIO_IDLE:
1682 q = &bsd4_idqueues[pri];
1683 which = &bsd4_idqueuebits;
1684 break;
1685 default:
1686 panic("remrunqueue: invalid rtprio type");
1687 /* NOT REACHED */
1688 }
1689 TAILQ_REMOVE(q, lp, lwp_procq);
1690 if (TAILQ_EMPTY(q)) {
1691 KASSERT((*which & (1 << pri)) != 0,
1692 ("remrunqueue: remove from empty queue"));
1693 *which &= ~(1 << pri);
38b25931
MD
1694 }
1695}
1696
52eedfb5
MD
1697/*
1698 * bsd4_setrunqueue_locked()
1699 *
1700 * Add a process whos rqtype and rqindex had previously been calculated
1701 * onto the appropriate run queue. Determine if the addition requires
1702 * a reschedule on a cpu and return the cpuid or -1.
1703 *
1704 * NOTE: Lower priorities are better priorities.
1705 *
1706 * MPSAFE - bsd4_spin must be held exclusively on call
1707 */
1708static void
1709bsd4_setrunqueue_locked(struct lwp *lp)
1710{
1711 struct rq *q;
1712 u_int32_t *which;
1713 int pri;
1714
4643740a
MD
1715 KKASSERT((lp->lwp_mpflags & LWP_MP_ONRUNQ) == 0);
1716 atomic_set_int(&lp->lwp_mpflags, LWP_MP_ONRUNQ);
52eedfb5
MD
1717 ++bsd4_runqcount;
1718
1719 pri = lp->lwp_rqindex;
1720
1721 switch(lp->lwp_rqtype) {
1722 case RTP_PRIO_NORMAL:
1723 q = &bsd4_queues[pri];
1724 which = &bsd4_queuebits;
1725 break;
1726 case RTP_PRIO_REALTIME:
1727 case RTP_PRIO_FIFO:
1728 q = &bsd4_rtqueues[pri];
1729 which = &bsd4_rtqueuebits;
1730 break;
1731 case RTP_PRIO_IDLE:
1732 q = &bsd4_idqueues[pri];
1733 which = &bsd4_idqueuebits;
1734 break;
1735 default:
1736 panic("remrunqueue: invalid rtprio type");
1737 /* NOT REACHED */
1738 }
1739
1740 /*
1741 * Add to the correct queue and set the appropriate bit. If no
1742 * lower priority (i.e. better) processes are in the queue then
1743 * we want a reschedule, calculate the best cpu for the job.
1744 *
1745 * Always run reschedules on the LWPs original cpu.
1746 */
1747 TAILQ_INSERT_TAIL(q, lp, lwp_procq);
1748 *which |= 1 << pri;
1749}
1750
38b25931
MD
1751#ifdef SMP
1752
1753/*
1754 * For SMP systems a user scheduler helper thread is created for each
1755 * cpu and is used to allow one cpu to wakeup another for the purposes of
c9e9fb21
MD
1756 * scheduling userland threads from setrunqueue().
1757 *
1758 * UP systems do not need the helper since there is only one cpu.
1759 *
1760 * We can't use the idle thread for this because we might block.
1761 * Additionally, doing things this way allows us to HLT idle cpus
1762 * on MP systems.
52eedfb5
MD
1763 *
1764 * MPSAFE
38b25931
MD
1765 */
1766static void
1767sched_thread(void *dummy)
1768{
52eedfb5
MD
1769 globaldata_t gd;
1770 bsd4_pcpu_t dd;
85946b6c 1771 bsd4_pcpu_t tmpdd;
52eedfb5 1772 struct lwp *nlp;
eb501f47 1773 cpumask_t mask;
52eedfb5 1774 int cpuid;
418f19aa 1775 cpumask_t tmpmask;
52eedfb5
MD
1776 int tmpid;
1777
1778 gd = mycpu;
1779 cpuid = gd->gd_cpuid; /* doesn't change */
eb501f47 1780 mask = gd->gd_cpumask; /* doesn't change */
52eedfb5
MD
1781 dd = &bsd4_pcpu[cpuid];
1782
1783 /*
c9e9fb21
MD
1784 * Since we are woken up only when no user processes are scheduled
1785 * on a cpu, we can run at an ultra low priority.
52eedfb5 1786 */
50017724 1787 lwkt_setpri_self(TDPRI_USER_SCHEDULER);
38b25931 1788
90f4cbeb 1789 tsleep(&dd->helper_thread, 0, "sched_thread_sleep", 0);
d6d39bc7 1790
38b25931 1791 for (;;) {
50017724
MD
1792 /*
1793 * We use the LWKT deschedule-interlock trick to avoid racing
1794 * bsd4_rdyprocmask. This means we cannot block through to the
1795 * manual lwkt_switch() call we make below.
1796 */
52eedfb5 1797 crit_enter_gd(gd);
d6d39bc7 1798 tsleep_interlock(&dd->helper_thread, 0);
287a8577 1799 spin_lock(&bsd4_spin);
eb501f47 1800 atomic_set_cpumask(&bsd4_rdyprocmask, mask);
b9eb1c19
MD
1801
1802 clear_user_resched(); /* This satisfied the reschedule request */
1803 dd->rrcount = 0; /* Reset the round-robin counter */
1804
eb501f47 1805 if ((bsd4_curprocmask & mask) == 0) {
b9eb1c19
MD
1806 /*
1807 * No thread is currently scheduled.
1808 */
1809 KKASSERT(dd->uschedcp == NULL);
e28d8b15 1810 if ((nlp = bsd4_chooseproc_locked(NULL)) != NULL) {
d6d39bc7
MC
1811 KTR_COND_LOG(usched_sched_thread_no_process,
1812 nlp->lwp_proc->p_pid == usched_bsd4_pid_debug,
1813 gd->gd_cpuid,
1814 nlp->lwp_proc->p_pid,
1815 nlp->lwp_thread->td_gd->gd_cpuid);
1816
eb501f47 1817 atomic_set_cpumask(&bsd4_curprocmask, mask);
52eedfb5
MD
1818 dd->upri = nlp->lwp_priority;
1819 dd->uschedcp = nlp;
901ecceb 1820 dd->rrcount = 0; /* reset round robin */
287a8577 1821 spin_unlock(&bsd4_spin);
52eedfb5
MD
1822 lwkt_acquire(nlp->lwp_thread);
1823 lwkt_schedule(nlp->lwp_thread);
1824 } else {
287a8577 1825 spin_unlock(&bsd4_spin);
52eedfb5 1826 }
b9eb1c19 1827 } else if (bsd4_runqcount) {
e28d8b15 1828 if ((nlp = bsd4_chooseproc_locked(dd->uschedcp)) != NULL) {
d6d39bc7
MC
1829 KTR_COND_LOG(usched_sched_thread_process,
1830 nlp->lwp_proc->p_pid == usched_bsd4_pid_debug,
1831 gd->gd_cpuid,
1832 nlp->lwp_proc->p_pid,
1833 nlp->lwp_thread->td_gd->gd_cpuid);
1834
eb501f47
MD
1835 dd->upri = nlp->lwp_priority;
1836 dd->uschedcp = nlp;
901ecceb 1837 dd->rrcount = 0; /* reset round robin */
eb501f47
MD
1838 spin_unlock(&bsd4_spin);
1839 lwkt_acquire(nlp->lwp_thread);
1840 lwkt_schedule(nlp->lwp_thread);
52eedfb5 1841 } else {
eb501f47
MD
1842 /*
1843 * CHAINING CONDITION TRAIN
1844 *
1845 * We could not deal with the scheduler wakeup
1846 * request on this cpu, locate a ready scheduler
1847 * with no current lp assignment and chain to it.
1848 *
1849 * This ensures that a wakeup race which fails due
1850 * to priority test does not leave other unscheduled
1851 * cpus idle when the runqueue is not empty.
1852 */
d6d39bc7 1853 tmpmask = ~bsd4_curprocmask &
901ecceb 1854 bsd4_rdyprocmask & smp_active_mask;
eb501f47
MD
1855 if (tmpmask) {
1856 tmpid = BSFCPUMASK(tmpmask);
85946b6c 1857 tmpdd = &bsd4_pcpu[tmpid];
eb501f47 1858 atomic_clear_cpumask(&bsd4_rdyprocmask,
901ecceb 1859 CPUMASK(tmpid));
eb501f47 1860 spin_unlock(&bsd4_spin);
d6d39bc7 1861 wakeup(&tmpdd->helper_thread);
eb501f47
MD
1862 } else {
1863 spin_unlock(&bsd4_spin);
1864 }
d6d39bc7
MC
1865
1866 KTR_LOG(usched_sched_thread_no_process_found,
901ecceb 1867 gd->gd_cpuid, (unsigned long)tmpmask);
52eedfb5 1868 }
b9eb1c19
MD
1869 } else {
1870 /*
1871 * The runq is empty.
1872 */
287a8577 1873 spin_unlock(&bsd4_spin);
38b25931 1874 }
85946b6c
MD
1875
1876 /*
1877 * We're descheduled unless someone scheduled us. Switch away.
1878 * Exiting the critical section will cause splz() to be called
1879 * for us if interrupts and such are pending.
1880 */
52eedfb5 1881 crit_exit_gd(gd);
901ecceb 1882 tsleep(&dd->helper_thread, PINTERLOCKED, "schslp", 0);
38b25931
MD
1883 }
1884}
1885
d6d39bc7
MC
1886/* sysctl stick_to_level parameter */
1887static int
1888sysctl_usched_bsd4_stick_to_level(SYSCTL_HANDLER_ARGS)
1889{
1890 int error, new_val;
1891
1892 new_val = usched_bsd4_stick_to_level;
1893
1894 error = sysctl_handle_int(oidp, &new_val, 0, req);
1895 if (error != 0 || req->newptr == NULL)
1896 return (error);
901ecceb 1897 if (new_val > cpu_topology_levels_number - 1 || new_val < 0)
d6d39bc7
MC
1898 return (EINVAL);
1899 usched_bsd4_stick_to_level = new_val;
1900 return (0);
1901}
1902
38b25931
MD
1903/*
1904 * Setup our scheduler helpers. Note that curprocmask bit 0 has already
1905 * been cleared by rqinit() and we should not mess with it further.
1906 */
1907static void
1908sched_thread_cpu_init(void)
1909{
d6d39bc7
MC
1910 int i;
1911 int cpuid;
1912 int smt_not_supported = 0;
1913 int cache_coherent_not_supported = 0;
901ecceb 1914
d6d39bc7
MC
1915 if (bootverbose)
1916 kprintf("Start scheduler helpers on cpus:\n");
38b25931 1917
d6d39bc7 1918 sysctl_ctx_init(&usched_bsd4_sysctl_ctx);
901ecceb
MD
1919 usched_bsd4_sysctl_tree =
1920 SYSCTL_ADD_NODE(&usched_bsd4_sysctl_ctx,
1921 SYSCTL_STATIC_CHILDREN(_kern), OID_AUTO,
1922 "usched_bsd4", CTLFLAG_RD, 0, "");
38b25931 1923
d6d39bc7
MC
1924 for (i = 0; i < ncpus; ++i) {
1925 bsd4_pcpu_t dd = &bsd4_pcpu[i];
1926 cpumask_t mask = CPUMASK(i);
38b25931 1927
d6d39bc7
MC
1928 if ((mask & smp_active_mask) == 0)
1929 continue;
38b25931 1930
d6d39bc7 1931 dd->cpunode = get_cpu_node_by_cpuid(i);
38b25931 1932
d6d39bc7
MC
1933 if (dd->cpunode == NULL) {
1934 smt_not_supported = 1;
1935 cache_coherent_not_supported = 1;
1936 if (bootverbose)
901ecceb
MD
1937 kprintf ("\tcpu%d - WARNING: No CPU NODE "
1938 "found for cpu\n", i);
d6d39bc7 1939 } else {
d6d39bc7 1940 switch (dd->cpunode->type) {
901ecceb
MD
1941 case THREAD_LEVEL:
1942 if (bootverbose)
1943 kprintf ("\tcpu%d - HyperThreading "
1944 "available. Core siblings: ",
1945 i);
1946 break;
1947 case CORE_LEVEL:
1948 smt_not_supported = 1;
1949
1950 if (bootverbose)
1951 kprintf ("\tcpu%d - No HT available, "
1952 "multi-core/physical "
1953 "cpu. Physical siblings: ",
1954 i);
1955 break;
1956 case CHIP_LEVEL:
1957 smt_not_supported = 1;
1958
1959 if (bootverbose)
1960 kprintf ("\tcpu%d - No HT available, "
1961 "single-core/physical cpu. "
1962 "Package Siblings: ",
1963 i);
1964 break;
1965 default:
1966 /* Let's go for safe defaults here */
1967 smt_not_supported = 1;
1968 cache_coherent_not_supported = 1;
1969 if (bootverbose)
1970 kprintf ("\tcpu%d - Unknown cpunode->"
1971 "type=%u. Siblings: ",
1972 i,
1973 (u_int)dd->cpunode->type);
1974 break;
d6d39bc7
MC
1975 }
1976
1977 if (bootverbose) {
1978 if (dd->cpunode->parent_node != NULL) {
1979 CPUSET_FOREACH(cpuid, dd->cpunode->parent_node->members)
1980 kprintf("cpu%d ", cpuid);
1981 kprintf("\n");
1982 } else {
1983 kprintf(" no siblings\n");
1984 }
1985 }
1986 }
1987
1988 lwkt_create(sched_thread, NULL, NULL, &dd->helper_thread,
901ecceb 1989 0, i, "usched %d", i);
d6d39bc7
MC
1990
1991 /*
1992 * Allow user scheduling on the target cpu. cpu #0 has already
1993 * been enabled in rqinit().
1994 */
1995 if (i)
1996 atomic_clear_cpumask(&bsd4_curprocmask, mask);
1997 atomic_set_cpumask(&bsd4_rdyprocmask, mask);
1998 dd->upri = PRIBASE_NULL;
1999
2000 }
2001
2002 /* usched_bsd4 sysctl configurable parameters */
2003
2004 SYSCTL_ADD_INT(&usched_bsd4_sysctl_ctx,
901ecceb
MD
2005 SYSCTL_CHILDREN(usched_bsd4_sysctl_tree),
2006 OID_AUTO, "rrinterval", CTLFLAG_RW,
2007 &usched_bsd4_rrinterval, 0, "");
d6d39bc7 2008 SYSCTL_ADD_INT(&usched_bsd4_sysctl_ctx,
901ecceb
MD
2009 SYSCTL_CHILDREN(usched_bsd4_sysctl_tree),
2010 OID_AUTO, "decay", CTLFLAG_RW,
2011 &usched_bsd4_decay, 0, "Extra decay when not running");
d6d39bc7 2012 SYSCTL_ADD_INT(&usched_bsd4_sysctl_ctx,
901ecceb
MD
2013 SYSCTL_CHILDREN(usched_bsd4_sysctl_tree),
2014 OID_AUTO, "batch_time", CTLFLAG_RW,
2015 &usched_bsd4_batch_time, 0, "Min batch counter value");
2016 SYSCTL_ADD_LONG(&usched_bsd4_sysctl_ctx,
2017 SYSCTL_CHILDREN(usched_bsd4_sysctl_tree),
2018 OID_AUTO, "kicks", CTLFLAG_RW,
2019 &usched_bsd4_kicks, "Number of kickstarts");
d6d39bc7
MC
2020
2021 /* Add enable/disable option for SMT scheduling if supported */
2022 if (smt_not_supported) {
2023 usched_bsd4_smt = 0;
2024 SYSCTL_ADD_STRING(&usched_bsd4_sysctl_ctx,
901ecceb
MD
2025 SYSCTL_CHILDREN(usched_bsd4_sysctl_tree),
2026 OID_AUTO, "smt", CTLFLAG_RD,
2027 "NOT SUPPORTED", 0, "SMT NOT SUPPORTED");
d6d39bc7
MC
2028 } else {
2029 usched_bsd4_smt = 1;
2030 SYSCTL_ADD_INT(&usched_bsd4_sysctl_ctx,
901ecceb
MD
2031 SYSCTL_CHILDREN(usched_bsd4_sysctl_tree),
2032 OID_AUTO, "smt", CTLFLAG_RW,
2033 &usched_bsd4_smt, 0, "Enable SMT scheduling");
d6d39bc7
MC
2034 }
2035
901ecceb
MD
2036 /*
2037 * Add enable/disable option for cache coherent scheduling
2038 * if supported
2039 */
d6d39bc7 2040 if (cache_coherent_not_supported) {
d6d39bc7
MC
2041 usched_bsd4_cache_coherent = 0;
2042 SYSCTL_ADD_STRING(&usched_bsd4_sysctl_ctx,
901ecceb
MD
2043 SYSCTL_CHILDREN(usched_bsd4_sysctl_tree),
2044 OID_AUTO, "cache_coherent", CTLFLAG_RD,
2045 "NOT SUPPORTED", 0,
2046 "Cache coherence NOT SUPPORTED");
d6d39bc7 2047 } else {
d6d39bc7
MC
2048 usched_bsd4_cache_coherent = 1;
2049 SYSCTL_ADD_INT(&usched_bsd4_sysctl_ctx,
901ecceb
MD
2050 SYSCTL_CHILDREN(usched_bsd4_sysctl_tree),
2051 OID_AUTO, "cache_coherent", CTLFLAG_RW,
2052 &usched_bsd4_cache_coherent, 0,
2053 "Enable/Disable cache coherent scheduling");
d6d39bc7
MC
2054
2055 SYSCTL_ADD_INT(&usched_bsd4_sysctl_ctx,
901ecceb
MD
2056 SYSCTL_CHILDREN(usched_bsd4_sysctl_tree),
2057 OID_AUTO, "upri_affinity", CTLFLAG_RW,
2058 &usched_bsd4_upri_affinity, 1,
2059 "Number of PPQs in user priority check");
d6d39bc7
MC
2060
2061 SYSCTL_ADD_INT(&usched_bsd4_sysctl_ctx,
901ecceb
MD
2062 SYSCTL_CHILDREN(usched_bsd4_sysctl_tree),
2063 OID_AUTO, "queue_checks", CTLFLAG_RW,
2064 &usched_bsd4_queue_checks, 5,
2065 "LWPs to check from a queue before giving up");
d6d39bc7
MC
2066
2067 SYSCTL_ADD_PROC(&usched_bsd4_sysctl_ctx,
901ecceb
MD
2068 SYSCTL_CHILDREN(usched_bsd4_sysctl_tree),
2069 OID_AUTO, "stick_to_level",
2070 CTLTYPE_INT | CTLFLAG_RW,
2071 NULL, sizeof usched_bsd4_stick_to_level,
2072 sysctl_usched_bsd4_stick_to_level, "I",
2073 "Stick a process to this level. See sysctl"
2074 "paremter hw.cpu_topology.level_description");
d6d39bc7 2075 }
38b25931 2076}
ba39e2e0
MD
2077SYSINIT(uschedtd, SI_BOOT2_USCHED, SI_ORDER_SECOND,
2078 sched_thread_cpu_init, NULL)
901ecceb 2079
d6d39bc7 2080#else /* No SMP options - just add the configurable parameters to sysctl */
38b25931 2081
d6d39bc7
MC
2082static void
2083sched_sysctl_tree_init(void)
2084{
2085 sysctl_ctx_init(&usched_bsd4_sysctl_ctx);
901ecceb
MD
2086 usched_bsd4_sysctl_tree =
2087 SYSCTL_ADD_NODE(&usched_bsd4_sysctl_ctx,
2088 SYSCTL_STATIC_CHILDREN(_kern), OID_AUTO,
2089 "usched_bsd4", CTLFLAG_RD, 0, "");
d6d39bc7
MC
2090
2091 /* usched_bsd4 sysctl configurable parameters */
2092 SYSCTL_ADD_INT(&usched_bsd4_sysctl_ctx,
901ecceb
MD
2093 SYSCTL_CHILDREN(usched_bsd4_sysctl_tree),
2094 OID_AUTO, "rrinterval", CTLFLAG_RW,
2095 &usched_bsd4_rrinterval, 0, "");
d6d39bc7 2096 SYSCTL_ADD_INT(&usched_bsd4_sysctl_ctx,
901ecceb
MD
2097 SYSCTL_CHILDREN(usched_bsd4_sysctl_tree),
2098 OID_AUTO, "decay", CTLFLAG_RW,
2099 &usched_bsd4_decay, 0, "Extra decay when not running");
d6d39bc7 2100 SYSCTL_ADD_INT(&usched_bsd4_sysctl_ctx,
901ecceb
MD
2101 SYSCTL_CHILDREN(usched_bsd4_sysctl_tree),
2102 OID_AUTO, "batch_time", CTLFLAG_RW,
2103 &usched_bsd4_batch_time, 0, "Min batch counter value");
d6d39bc7
MC
2104}
2105SYSINIT(uschedtd, SI_BOOT2_USCHED, SI_ORDER_SECOND,
2106 sched_sysctl_tree_init, NULL)
38b25931 2107#endif