From 0a3f9b4712574b03a70559adc293147ac99e6109 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Tue, 30 Mar 2004 19:14:18 +0000 Subject: [PATCH] Second major scheduler patch. This corrects interactive issues that were introduced in the pipe sf_buf patch. Split need_resched() into need_user_resched() and need_lwkt_resched(). Userland reschedules are requested when a process is scheduled with a higher priority then the currently running process, and LWKT reschedules are requested when a thread is scheduled with a higher priority then the currently running thread. As before, these are ASTs, LWKTs are not preemptively switch while running in the kernel. Exclusively use the resched wanted flags to determine whether to reschedule or call lwkt_switch() upon return to user mode. We were previously also testing the LWKT run queue for higher priority threads, but this was causing inefficient scheduler interactions when two processes are doing tightly bound synchronous IPC (e.g. using PIPEs) because in DragonFly the LWKT priority of a thread is raised when it enters the kernel, and lowered when it tries to return to userland. The wakeups occuring in the pipe code were causing extra quick-flip thread switches. Introduce a new tsleep() flag which disables the need_lwkt_resched() call when the sleeping thread is woken up. This is used by the PIPE code in the synchronous direct-write PIPE case to avoid the above problem. Redocument and revamp the ESTCPU code. The original changes reduced the interrupt rate from 100Hz (FBsd-4 and FBsd-5) to 20Hz, but did not compensate for the slower ramp-up time. This commit introduces a 'virtual' ESTCPU frequency which compensates without us having to bump up the actual systimer interrupt rate. Redo the P_CURPROC methodology, which is used by the userland scheduler to manage processes running in userland. Create a globaldata->gd_uschedcp process pointer which represents the current running-in-userland (or about to be running in userland) process, and carefully recode acquire_curproc() to allow this gd_uschedcp designation to be stolen from other threads trying to return to userland without having to request a reschedule (which would have to switch back to those threads to release the designation). This reduces the number of unnecessary context switches that occur due to scheduler interactions. Also note that this specifically solves the case where there might be several threads running in the kernel which are trying to return to userland at the same time. A heuristic check against gd_upri is used to select the correct thread for schedling to userland 'most of the time'. When the correct thread is not selected, we fall back to the old behavior of forcing a reschedule. Add debugging sysctl variables to better track userland scheduler efficiency. With these changes pipe statistics are further improved. Though some scheduling aberrations still exist(1), the previous scheduler had totally broken interactive processes and this one does not. BLKSIZE BEFORE NEWPIPE NOW Tests on AMD64 MBytes/s MBytes/s MBytes/s 3200+ FN85MB (64KB L1, 1MB L2) 256KB 1900 2200 2250 64KB 1800 2200 2250 32KB - - 3300 16KB 1650 2500-3000 2600-3200 8KB 1400 2300 2000-2400(1) 4KB 1300 1400-1500 1500-1700 --- sys/amd64/amd64/genassym.c | 5 +- sys/cpu/i386/include/cpu.h | 22 +- sys/emulation/posix4/ksched.c | 12 +- sys/i386/i386/genassym.c | 5 +- sys/i386/i386/sys_machdep.c | 4 +- sys/i386/i386/trap.c | 97 ++------ sys/i386/include/cpu.h | 22 +- sys/i386/isa/intr_machdep.c | 4 +- sys/i386/isa/ipl.s | 3 +- sys/kern/init_main.c | 7 +- sys/kern/kern_clock.c | 4 +- sys/kern/kern_exit.c | 6 +- sys/kern/kern_fork.c | 8 +- sys/kern/kern_sched.c | 12 +- sys/kern/kern_switch.c | 349 ++++++++++++++++++--------- sys/kern/kern_synch.c | 80 +++--- sys/kern/lwkt_thread.c | 89 +++---- sys/kern/sys_pipe.c | 8 +- sys/platform/pc32/i386/genassym.c | 5 +- sys/platform/pc32/i386/sys_machdep.c | 4 +- sys/platform/pc32/i386/trap.c | 97 ++------ sys/platform/pc32/isa/intr_machdep.c | 4 +- sys/platform/pc32/isa/ipl.s | 3 +- sys/platform/vkernel/i386/genassym.c | 5 +- sys/sys/globaldata.h | 37 +-- sys/sys/param.h | 3 +- sys/sys/proc.h | 14 +- sys/sys/thread.h | 4 +- 28 files changed, 473 insertions(+), 440 deletions(-) diff --git a/sys/amd64/amd64/genassym.c b/sys/amd64/amd64/genassym.c index e73417aaee..061ed9851a 100644 --- a/sys/amd64/amd64/genassym.c +++ b/sys/amd64/amd64/genassym.c @@ -35,7 +35,7 @@ * * from: @(#)genassym.c 5.11 (Berkeley) 5/10/91 * $FreeBSD: src/sys/i386/i386/genassym.c,v 1.86.2.3 2002/03/03 05:42:49 nyan Exp $ - * $DragonFly: src/sys/amd64/amd64/Attic/genassym.c,v 1.3 2004/02/21 06:37:02 dillon Exp $ + * $DragonFly: src/sys/amd64/amd64/Attic/genassym.c,v 1.4 2004/03/30 19:14:15 dillon Exp $ */ #include @@ -203,7 +203,8 @@ ASSYM(RQF_IPIQ, RQF_IPIQ); ASSYM(RQF_INTPEND, RQF_INTPEND); ASSYM(RQF_AST_OWEUPC, RQF_AST_OWEUPC); ASSYM(RQF_AST_SIGNAL, RQF_AST_SIGNAL); -ASSYM(RQF_AST_RESCHED, RQF_AST_RESCHED); +ASSYM(RQF_AST_USER_RESCHED, RQF_AST_USER_RESCHED); +ASSYM(RQF_AST_LWKT_RESCHED, RQF_AST_LWKT_RESCHED); ASSYM(RQF_AST_UPCALL, RQF_AST_UPCALL); ASSYM(RQF_AST_MASK, RQF_AST_MASK); diff --git a/sys/cpu/i386/include/cpu.h b/sys/cpu/i386/include/cpu.h index de37feac99..4a9820a0af 100644 --- a/sys/cpu/i386/include/cpu.h +++ b/sys/cpu/i386/include/cpu.h @@ -35,7 +35,7 @@ * * from: @(#)cpu.h 5.4 (Berkeley) 5/9/91 * $FreeBSD: src/sys/i386/include/cpu.h,v 1.43.2.2 2001/06/15 09:37:57 scottl Exp $ - * $DragonFly: src/sys/cpu/i386/include/cpu.h,v 1.14 2004/02/17 19:38:53 dillon Exp $ + * $DragonFly: src/sys/cpu/i386/include/cpu.h,v 1.15 2004/03/30 19:14:06 dillon Exp $ */ #ifndef _MACHINE_CPU_H_ @@ -72,8 +72,10 @@ * atomic instruction because an interrupt on the local cpu can modify * the gd_reqflags field. */ -#define need_resched() \ - atomic_set_int_nonlocked(&mycpu->gd_reqflags, RQF_AST_RESCHED) +#define need_lwkt_resched() \ + atomic_set_int_nonlocked(&mycpu->gd_reqflags, RQF_AST_LWKT_RESCHED) +#define need_user_resched() \ + atomic_set_int_nonlocked(&mycpu->gd_reqflags, RQF_AST_USER_RESCHED) #define need_proftick() \ atomic_set_int_nonlocked(&mycpu->gd_reqflags, RQF_AST_OWEUPC) #define need_ipiq() \ @@ -82,10 +84,16 @@ atomic_set_int_nonlocked(&mycpu->gd_reqflags, RQF_AST_SIGNAL) #define sigupcall() \ atomic_set_int_nonlocked(&mycpu->gd_reqflags, RQF_AST_UPCALL) -#define clear_resched() \ - atomic_clear_int_nonlocked(&mycpu->gd_reqflags, RQF_AST_RESCHED) -#define resched_wanted() \ - (mycpu->gd_reqflags & RQF_AST_RESCHED) +#define clear_user_resched() \ + atomic_clear_int_nonlocked(&mycpu->gd_reqflags, RQF_AST_USER_RESCHED) +#define clear_lwkt_resched() \ + atomic_clear_int_nonlocked(&mycpu->gd_reqflags, RQF_AST_LWKT_RESCHED) +#define user_resched_wanted() \ + (mycpu->gd_reqflags & RQF_AST_USER_RESCHED) +#define lwkt_resched_wanted() \ + (mycpu->gd_reqflags & RQF_AST_LWKT_RESCHED) +#define any_resched_wanted() \ + (mycpu->gd_reqflags & (RQF_AST_LWKT_RESCHED|RQF_AST_USER_RESCHED)) /* * CTL_MACHDEP definitions. diff --git a/sys/emulation/posix4/ksched.c b/sys/emulation/posix4/ksched.c index 31d92bdbc1..bcf3612970 100644 --- a/sys/emulation/posix4/ksched.c +++ b/sys/emulation/posix4/ksched.c @@ -30,7 +30,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/posix4/ksched.c,v 1.7.2.1 2000/05/16 06:58:13 dillon Exp $ - * $DragonFly: src/sys/emulation/posix4/Attic/ksched.c,v 1.3 2003/08/07 21:17:19 dillon Exp $ + * $DragonFly: src/sys/emulation/posix4/Attic/ksched.c,v 1.4 2004/03/30 19:14:18 dillon Exp $ */ /* ksched: Soft real time scheduling based on "rtprio". @@ -41,8 +41,8 @@ #include #include #include -#include /* For need_resched */ -#include /* For need_resched */ +#include /* For need_user_resched */ +#include /* For need_user_resched */ #include "posix4.h" @@ -172,7 +172,7 @@ int ksched_setscheduler(register_t *ret, struct ksched *ksched, ? RTP_PRIO_FIFO : RTP_PRIO_REALTIME; p->p_rtprio = rtp; - need_resched(); + need_user_resched(); } else e = EPERM; @@ -192,7 +192,7 @@ int ksched_setscheduler(register_t *ret, struct ksched *ksched, * on the scheduling code: You must leave the * scheduling info alone. */ - need_resched(); + need_user_resched(); } break; } @@ -209,7 +209,7 @@ int ksched_getscheduler(register_t *ret, struct ksched *ksched, struct proc *p) */ int ksched_yield(register_t *ret, struct ksched *ksched) { - need_resched(); + need_user_resched(); return 0; } diff --git a/sys/i386/i386/genassym.c b/sys/i386/i386/genassym.c index df430d6ce6..d0f42edd84 100644 --- a/sys/i386/i386/genassym.c +++ b/sys/i386/i386/genassym.c @@ -35,7 +35,7 @@ * * from: @(#)genassym.c 5.11 (Berkeley) 5/10/91 * $FreeBSD: src/sys/i386/i386/genassym.c,v 1.86.2.3 2002/03/03 05:42:49 nyan Exp $ - * $DragonFly: src/sys/i386/i386/Attic/genassym.c,v 1.35 2004/02/21 06:37:07 dillon Exp $ + * $DragonFly: src/sys/i386/i386/Attic/genassym.c,v 1.36 2004/03/30 19:14:04 dillon Exp $ */ #include @@ -191,7 +191,8 @@ ASSYM(RQF_IPIQ, RQF_IPIQ); ASSYM(RQF_INTPEND, RQF_INTPEND); ASSYM(RQF_AST_OWEUPC, RQF_AST_OWEUPC); ASSYM(RQF_AST_SIGNAL, RQF_AST_SIGNAL); -ASSYM(RQF_AST_RESCHED, RQF_AST_RESCHED); +ASSYM(RQF_AST_USER_RESCHED, RQF_AST_USER_RESCHED); +ASSYM(RQF_AST_LWKT_RESCHED, RQF_AST_LWKT_RESCHED); ASSYM(RQF_AST_UPCALL, RQF_AST_UPCALL); ASSYM(RQF_AST_MASK, RQF_AST_MASK); diff --git a/sys/i386/i386/sys_machdep.c b/sys/i386/i386/sys_machdep.c index cecbe7f373..b2e42f2e95 100644 --- a/sys/i386/i386/sys_machdep.c +++ b/sys/i386/i386/sys_machdep.c @@ -32,7 +32,7 @@ * * from: @(#)sys_machdep.c 5.5 (Berkeley) 1/19/91 * $FreeBSD: src/sys/i386/i386/sys_machdep.c,v 1.47.2.3 2002/10/07 17:20:00 jhb Exp $ - * $DragonFly: src/sys/i386/i386/Attic/sys_machdep.c,v 1.12 2003/12/20 05:52:26 dillon Exp $ + * $DragonFly: src/sys/i386/i386/Attic/sys_machdep.c,v 1.13 2004/03/30 19:14:04 dillon Exp $ * */ @@ -153,7 +153,7 @@ i386_extend_pcb(struct proc *p) ssdtosd(&ssd, &ext->ext_tssd); /* switch to the new TSS after syscall completes */ - need_resched(); + need_user_resched(); return 0; } diff --git a/sys/i386/i386/trap.c b/sys/i386/i386/trap.c index db58fba9c2..199d434632 100644 --- a/sys/i386/i386/trap.c +++ b/sys/i386/i386/trap.c @@ -36,7 +36,7 @@ * * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 * $FreeBSD: src/sys/i386/i386/trap.c,v 1.147.2.11 2003/02/27 19:09:59 luoqi Exp $ - * $DragonFly: src/sys/i386/i386/Attic/trap.c,v 1.47 2004/03/28 08:03:05 dillon Exp $ + * $DragonFly: src/sys/i386/i386/Attic/trap.c,v 1.48 2004/03/30 19:14:04 dillon Exp $ */ /* @@ -168,9 +168,6 @@ SYSCTL_INT(_machdep, OID_AUTO, fast_release, CTLFLAG_RW, static int slow_release; SYSCTL_INT(_machdep, OID_AUTO, slow_release, CTLFLAG_RW, &slow_release, 0, "Passive Release was nonoptimal"); -static int pass_release; -SYSCTL_INT(_machdep, OID_AUTO, pass_release, CTLFLAG_RW, - &pass_release, 0, "Passive Release on switch"); MALLOC_DEFINE(M_SYSMSG, "sysmsg", "sysmsg structure"); @@ -190,25 +187,7 @@ passive_release(struct thread *td) struct proc *p = td->td_proc; td->td_release = NULL; - - /* - * P_CP_RELEASED prevents the userland scheduler from messing with - * this proc. - */ - if ((p->p_flag & P_CP_RELEASED) == 0) { - p->p_flag |= P_CP_RELEASED; - lwkt_setpri_self(TDPRI_KERN_USER); - } - - /* - * Only one process will have a P_CURPROC designation for each cpu - * in the system. Releasing it allows another userland process to - * be scheduled in case our thread blocks in the kernel. - */ - if (p->p_flag & P_CURPROC) { - release_curproc(p); - ++pass_release; - } + release_curproc(p); } /* @@ -223,58 +202,26 @@ userenter(struct thread *curtd) curtd->td_release = passive_release; } +/* + * Reacquire our current process designation. This will not return until + * we have it. Our LWKT priority will be adjusted for our return to + * userland. acquire_curproc() also handles cleaning up P_CP_RELEASED. + * + * This is always the last step before returning to user mode. + */ static __inline void userexit(struct proc *p) { struct thread *td = p->p_thread; - /* - * Reacquire our P_CURPROC status and adjust the LWKT priority - * for our return to userland. We can fast path the case where - * td_release was not called by checking particular proc flags. - * Otherwise we do it the slow way. - * - * Lowering our priority may make other higher priority threads - * runnable. lwkt_setpri_self() does not switch away, so call - * lwkt_maybe_switch() to deal with it. We do this *before* we - * acquire P_CURPROC because another thread may also be intending - * to return to userland and if it has a higher user priority then - * us it will have to block and force us to reschedule, resulting in - * unnecessary extra context switches. - * - * WARNING! Once our priority is lowered to a user level priority - * it is possible, once we return to user mode (or if we were to - * block) for a cpu-bound user process to prevent us from getting cpu - * again. This is always the last step. - */ td->td_release = NULL; - if ((p->p_flag & (P_CP_RELEASED|P_CURPROC)) == P_CURPROC) { - ++fast_release; - lwkt_maybe_switch(); - } else { + if (p->p_flag & P_CP_RELEASED) ++slow_release; - lwkt_setpri_self(TDPRI_USER_NORM); - lwkt_maybe_switch(); - acquire_curproc(p); -#if 0 - /* POSSIBLE FUTURE */ - switch(p->p_rtprio.type) { - case RTP_PRIO_IDLE: - lwkt_setpri_self(TDPRI_USER_IDLE); - break; - case RTP_PRIO_REALTIME: - case RTP_PRIO_FIFO: - lwkt_setpri_self(TDPRI_USER_REAL); - break; - default: - lwkt_setpri_self(TDPRI_USER_NORM); - break; - } -#endif - } + else + ++fast_release; + acquire_curproc(p); } - static void userret(struct proc *p, struct trapframe *frame, u_quad_t oticks) { @@ -297,21 +244,13 @@ userret(struct proc *p, struct trapframe *frame, u_quad_t oticks) /* * If a reschedule has been requested then we release the current - * process in order to shift our P_CURPROC designation to another - * user process. userexit() will reacquire P_CURPROC and block - * there. + * process in order to shift the current process designation to + * another user process and/or to switch to a higher priority + * kernel thread at userexit() time. */ - if (resched_wanted()) { + if (any_resched_wanted()) { p->p_thread->td_release = NULL; - if ((p->p_flag & P_CP_RELEASED) == 0) { - p->p_flag |= P_CP_RELEASED; - lwkt_setpri_self(TDPRI_KERN_USER); - } - if (p->p_flag & P_CURPROC) { - release_curproc(p); - } else { - clear_resched(); - } + release_curproc(p); } /* diff --git a/sys/i386/include/cpu.h b/sys/i386/include/cpu.h index b91f5e5d41..cbf3cad62b 100644 --- a/sys/i386/include/cpu.h +++ b/sys/i386/include/cpu.h @@ -35,7 +35,7 @@ * * from: @(#)cpu.h 5.4 (Berkeley) 5/9/91 * $FreeBSD: src/sys/i386/include/cpu.h,v 1.43.2.2 2001/06/15 09:37:57 scottl Exp $ - * $DragonFly: src/sys/i386/include/Attic/cpu.h,v 1.14 2004/02/17 19:38:53 dillon Exp $ + * $DragonFly: src/sys/i386/include/Attic/cpu.h,v 1.15 2004/03/30 19:14:06 dillon Exp $ */ #ifndef _MACHINE_CPU_H_ @@ -72,8 +72,10 @@ * atomic instruction because an interrupt on the local cpu can modify * the gd_reqflags field. */ -#define need_resched() \ - atomic_set_int_nonlocked(&mycpu->gd_reqflags, RQF_AST_RESCHED) +#define need_lwkt_resched() \ + atomic_set_int_nonlocked(&mycpu->gd_reqflags, RQF_AST_LWKT_RESCHED) +#define need_user_resched() \ + atomic_set_int_nonlocked(&mycpu->gd_reqflags, RQF_AST_USER_RESCHED) #define need_proftick() \ atomic_set_int_nonlocked(&mycpu->gd_reqflags, RQF_AST_OWEUPC) #define need_ipiq() \ @@ -82,10 +84,16 @@ atomic_set_int_nonlocked(&mycpu->gd_reqflags, RQF_AST_SIGNAL) #define sigupcall() \ atomic_set_int_nonlocked(&mycpu->gd_reqflags, RQF_AST_UPCALL) -#define clear_resched() \ - atomic_clear_int_nonlocked(&mycpu->gd_reqflags, RQF_AST_RESCHED) -#define resched_wanted() \ - (mycpu->gd_reqflags & RQF_AST_RESCHED) +#define clear_user_resched() \ + atomic_clear_int_nonlocked(&mycpu->gd_reqflags, RQF_AST_USER_RESCHED) +#define clear_lwkt_resched() \ + atomic_clear_int_nonlocked(&mycpu->gd_reqflags, RQF_AST_LWKT_RESCHED) +#define user_resched_wanted() \ + (mycpu->gd_reqflags & RQF_AST_USER_RESCHED) +#define lwkt_resched_wanted() \ + (mycpu->gd_reqflags & RQF_AST_LWKT_RESCHED) +#define any_resched_wanted() \ + (mycpu->gd_reqflags & (RQF_AST_LWKT_RESCHED|RQF_AST_USER_RESCHED)) /* * CTL_MACHDEP definitions. diff --git a/sys/i386/isa/intr_machdep.c b/sys/i386/isa/intr_machdep.c index 59f84ef483..a388eb2020 100644 --- a/sys/i386/isa/intr_machdep.c +++ b/sys/i386/isa/intr_machdep.c @@ -35,7 +35,7 @@ * * from: @(#)isa.c 7.2 (Berkeley) 5/13/91 * $FreeBSD: src/sys/i386/isa/intr_machdep.c,v 1.29.2.5 2001/10/14 06:54:27 luigi Exp $ - * $DragonFly: src/sys/i386/isa/Attic/intr_machdep.c,v 1.20 2004/03/10 13:04:40 hmp Exp $ + * $DragonFly: src/sys/i386/isa/Attic/intr_machdep.c,v 1.21 2004/03/30 19:14:08 dillon Exp $ */ /* * This file contains an aggregated module marked: @@ -792,7 +792,7 @@ cpu_intr_preempt(struct thread *td, int critpri) if ((curthread->td_cpl & (1 << info->irq)) == 0) lwkt_preempt(td, critpri); else - need_resched(); + need_lwkt_resched(); } static int diff --git a/sys/i386/isa/ipl.s b/sys/i386/isa/ipl.s index 7299ce1cb4..851c15f673 100644 --- a/sys/i386/isa/ipl.s +++ b/sys/i386/isa/ipl.s @@ -37,7 +37,7 @@ * @(#)ipl.s * * $FreeBSD: src/sys/i386/isa/ipl.s,v 1.32.2.3 2002/05/16 16:03:56 bde Exp $ - * $DragonFly: src/sys/i386/isa/Attic/ipl.s,v 1.16 2004/01/30 05:42:16 dillon Exp $ + * $DragonFly: src/sys/i386/isa/Attic/ipl.s,v 1.17 2004/03/30 19:14:08 dillon Exp $ */ @@ -137,6 +137,7 @@ doreti_next: cmpl $1,in_vm86call /* YYY make per 'cpu'? */ jnz doreti_ast 1: + /* ASTs are only applicable when returning to userland */ testb $SEL_RPL_MASK,TF_CS(%esp) jnz doreti_ast 2: diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c index 29f7579124..dfb54531cb 100644 --- a/sys/kern/init_main.c +++ b/sys/kern/init_main.c @@ -40,7 +40,7 @@ * * @(#)init_main.c 8.9 (Berkeley) 1/21/94 * $FreeBSD: src/sys/kern/init_main.c,v 1.134.2.8 2003/06/06 20:21:32 tegge Exp $ - * $DragonFly: src/sys/kern/init_main.c,v 1.28 2004/03/01 06:33:16 dillon Exp $ + * $DragonFly: src/sys/kern/init_main.c,v 1.29 2004/03/30 19:14:11 dillon Exp $ */ #include "opt_init_path.h" @@ -552,8 +552,9 @@ start_init(void *dummy) * to user mode as init! * * WARNING! We may have been moved to another cpu after - * acquiring P_CURPROC. The MP lock will migrate with us - * though so we still have to release it. + * acquiring the current user process designation. The + * MP lock will migrate with us though so we still have to + * release it. */ if ((error = execve(&args)) == 0) { acquire_curproc(p); diff --git a/sys/kern/kern_clock.c b/sys/kern/kern_clock.c index 7f661ccb69..6c02e37b76 100644 --- a/sys/kern/kern_clock.c +++ b/sys/kern/kern_clock.c @@ -39,7 +39,7 @@ * * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94 * $FreeBSD: src/sys/kern/kern_clock.c,v 1.105.2.10 2002/10/17 13:19:40 maxim Exp $ - * $DragonFly: src/sys/kern/kern_clock.c,v 1.17 2004/03/20 19:21:08 dillon Exp $ + * $DragonFly: src/sys/kern/kern_clock.c,v 1.18 2004/03/30 19:14:11 dillon Exp $ */ #include "opt_ntp.h" @@ -435,7 +435,7 @@ statclock(systimer_t info, struct intrframe *frame) } /* - * The scheduler clock typically runs at a 10Hz rate. NOTE! systimer, + * The scheduler clock typically runs at a 20Hz rate. NOTE! systimer, * the MP lock might not be held. We can safely manipulate parts of curproc * but that's about it. */ diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c index b5334d9000..cd08d6979e 100644 --- a/sys/kern/kern_exit.c +++ b/sys/kern/kern_exit.c @@ -37,7 +37,7 @@ * * @(#)kern_exit.c 8.7 (Berkeley) 2/12/94 * $FreeBSD: src/sys/kern/kern_exit.c,v 1.92.2.11 2003/01/13 22:51:16 dillon Exp $ - * $DragonFly: src/sys/kern/kern_exit.c,v 1.32 2004/03/20 23:35:18 dillon Exp $ + * $DragonFly: src/sys/kern/kern_exit.c,v 1.33 2004/03/30 19:14:11 dillon Exp $ */ #include "opt_compat.h" @@ -374,8 +374,8 @@ exit1(int rv) } /* - * Release the P_CURPROC designation on the process so the userland - * scheduler can work in someone else. + * Release the current user process designation on the process so + * the userland scheduler can work in someone else. */ release_curproc(p); diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c index caa65ffd74..816056c83d 100644 --- a/sys/kern/kern_fork.c +++ b/sys/kern/kern_fork.c @@ -37,7 +37,7 @@ * * @(#)kern_fork.c 8.6 (Berkeley) 4/8/94 * $FreeBSD: src/sys/kern/kern_fork.c,v 1.72.2.14 2003/06/26 04:15:10 silby Exp $ - * $DragonFly: src/sys/kern/kern_fork.c,v 1.20 2004/03/20 23:35:18 dillon Exp $ + * $DragonFly: src/sys/kern/kern_fork.c,v 1.21 2004/03/30 19:14:11 dillon Exp $ */ #include "opt_ktrace.h" @@ -360,9 +360,9 @@ again: * The p_stats and p_sigacts substructs are set in vm_fork. * * P_CP_RELEASED indicates that the process is starting out in - * the kernel (in the fork trampoline). The flag will be converted - * to P_CURPROC when the new process calls userret() and attempts - * to return to userland + * the kernel (in the fork trampoline). The flag will be cleared + * when the new process calls userret() and acquires its current + * process designation for the return to userland. */ p2->p_flag = P_INMEM | P_CP_RELEASED; if (p1->p_flag & P_PROFIL) diff --git a/sys/kern/kern_sched.c b/sys/kern/kern_sched.c index 6b3e065c22..9631a82d26 100644 --- a/sys/kern/kern_sched.c +++ b/sys/kern/kern_sched.c @@ -30,7 +30,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/posix4/ksched.c,v 1.7.2.1 2000/05/16 06:58:13 dillon Exp $ - * $DragonFly: src/sys/kern/kern_sched.c,v 1.3 2003/08/07 21:17:19 dillon Exp $ + * $DragonFly: src/sys/kern/kern_sched.c,v 1.4 2004/03/30 19:14:18 dillon Exp $ */ /* ksched: Soft real time scheduling based on "rtprio". @@ -41,8 +41,8 @@ #include #include #include -#include /* For need_resched */ -#include /* For need_resched */ +#include /* For need_user_resched */ +#include /* For need_user_resched */ #include "posix4.h" @@ -172,7 +172,7 @@ int ksched_setscheduler(register_t *ret, struct ksched *ksched, ? RTP_PRIO_FIFO : RTP_PRIO_REALTIME; p->p_rtprio = rtp; - need_resched(); + need_user_resched(); } else e = EPERM; @@ -192,7 +192,7 @@ int ksched_setscheduler(register_t *ret, struct ksched *ksched, * on the scheduling code: You must leave the * scheduling info alone. */ - need_resched(); + need_user_resched(); } break; } @@ -209,7 +209,7 @@ int ksched_getscheduler(register_t *ret, struct ksched *ksched, struct proc *p) */ int ksched_yield(register_t *ret, struct ksched *ksched) { - need_resched(); + need_user_resched(); return 0; } diff --git a/sys/kern/kern_switch.c b/sys/kern/kern_switch.c index 5cb4b758c3..d9a6e4e444 100644 --- a/sys/kern/kern_switch.c +++ b/sys/kern/kern_switch.c @@ -24,7 +24,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/kern/kern_switch.c,v 1.3.2.1 2000/05/16 06:58:12 dillon Exp $ - * $DragonFly: src/sys/kern/Attic/kern_switch.c,v 1.19 2004/03/28 08:03:02 dillon Exp $ + * $DragonFly: src/sys/kern/Attic/kern_switch.c,v 1.20 2004/03/30 19:14:11 dillon Exp $ */ #include @@ -74,12 +74,17 @@ static int scancpu; #endif SYSCTL_INT(_debug, OID_AUTO, runqcount, CTLFLAG_RD, &runqcount, 0, ""); -static int usched_steal; -SYSCTL_INT(_debug, OID_AUTO, usched_steal, CTLFLAG_RW, - &usched_steal, 0, "Passive Release was nonoptimal"); +#ifdef INVARIANTS +static int usched_stalls; +SYSCTL_INT(_debug, OID_AUTO, usched_stalls, CTLFLAG_RW, + &usched_stalls, 0, "acquire_curproc() had to stall"); +static int usched_stolen; +SYSCTL_INT(_debug, OID_AUTO, usched_stolen, CTLFLAG_RW, + &usched_stolen, 0, "acquire_curproc() stole the des"); static int usched_optimal; SYSCTL_INT(_debug, OID_AUTO, usched_optimal, CTLFLAG_RW, - &usched_optimal, 0, "Passive Release was nonoptimal"); + &usched_optimal, 0, "acquire_curproc() was optimal"); +#endif #ifdef SMP static int remote_resched = 1; static int remote_resched_nonaffinity; @@ -95,8 +100,6 @@ SYSCTL_INT(_debug, OID_AUTO, choose_affinity, CTLFLAG_RD, &choose_affinity, 0, "chooseproc() was smart"); #endif -#define USCHED_COUNTER(td) ((td->td_gd == mycpu) ? ++usched_optimal : ++usched_steal) - /* * Initialize the run queues at boot time. */ @@ -116,10 +119,8 @@ SYSINIT(runqueue, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, rqinit, NULL) /* * Returns 1 if curp is equal to or better then newp. Note that - * lower p_priority values == higher process priorities. - * - * This routine is only called when the current process is trying to acquire - * P_CURPROC. Since it is already in-context we cut it some slack. + * lower p_priority values == higher process priorities. Assume curp + * is in-context and cut it some slack to avoid ping ponging. */ static __inline int @@ -145,7 +146,6 @@ chooseproc(struct proc *chkp) u_int32_t *which; u_int32_t pri; - clear_resched(); if (rtqueuebits) { pri = bsfl(rtqueuebits); q = &rtqueues[pri]; @@ -203,9 +203,9 @@ chooseproc(struct proc *chkp) */ static void -need_resched_remote(void *dummy) +need_user_resched_remote(void *dummy) { - need_resched(); + need_user_resched(); } #endif @@ -218,9 +218,6 @@ need_resched_remote(void *dummy) * case when setrunqueue() is called from wakeup() and, in fact wakeup() * asserts that P_CP_RELEASED is set. * - * Note that acquire_curproc() already optimizes making the current process - * P_CURPROC, so setrunqueue() does not need to. - * * If P_CP_RELEASED is not set we place the process on the run queue and we * signal other cpus in the system that may need to be woken up to service * the new 'user' process. @@ -228,7 +225,7 @@ need_resched_remote(void *dummy) * If P_PASSIVE_ACQ is set setrunqueue() will not wakeup potential target * cpus in an attempt to keep the process on the current cpu at least for * a little while to take advantage of locality of reference (e.g. fork/exec - * or short fork/exit). + * or short fork/exit, and uio_yield()). * * CPU AFFINITY: cpu affinity is handled by attempting to either schedule * or (user level) preempt on the same cpu that a process was previously @@ -261,36 +258,46 @@ setrunqueue(struct proc *p) crit_enter(); KASSERT(p->p_stat == SRUN, ("setrunqueue: proc not SRUN")); - KASSERT((p->p_flag & (P_ONRUNQ|P_CURPROC)) == 0, + KASSERT((p->p_flag & P_ONRUNQ) == 0, ("process %d already on runq! flag %08x", p->p_pid, p->p_flag)); KKASSERT((p->p_thread->td_flags & TDF_RUNQ) == 0); /* * If we have been released from the userland scheduler we - * directly schedule its thread. + * directly schedule its thread. If the priority is sufficiently + * high request a user reschedule. Note that the lwkt_resched + * is not typically set for wakeups of userland threads that happen + * to be sitting in the kernel because their LWKT priorities will + * generally be the same. */ if (p->p_flag & P_CP_RELEASED) { lwkt_schedule(p->p_thread); +#if 0 + if (gd->gd_uschedcp && test_resched(p, gd->gd_uschedcp)) + need_user_resched(); +#endif crit_exit(); return; } + /* + * We have not been released, make sure that we are not the currently + * designated process. + */ + gd = p->p_thread->td_gd; + KKASSERT(gd->gd_uschedcp != p); + /* * Check cpu affinity. The associated thread is stable at the * moment. Note that we may be checking another cpu here so we - * have to be careful. Note that gd_upri only counts when the - * curprocmask bit is set for the cpu in question, and since it is - * only a hint we can modify it on another cpu's globaldata structure. - * We use it to prevent unnecessary IPIs (hence the - PPQ). + * have to be careful. We are currently protected by the BGL. */ - gd = p->p_thread->td_gd; cpuid = gd->gd_cpuid; if ((curprocmask & (1 << cpuid)) == 0) { curprocmask |= 1 << cpuid; - p->p_flag |= P_CURPROC; + gd->gd_uschedcp = p; gd->gd_upri = p->p_priority; - USCHED_COUNTER(p->p_thread); lwkt_schedule(p->p_thread); /* CANNOT TOUCH PROC OR TD AFTER SCHEDULE CALL TO REMOTE CPU */ crit_exit(); @@ -345,14 +352,15 @@ setrunqueue(struct proc *p) * another cpu's gd_upri to avoid sending ipiq storms). */ if (gd == mycpu) { - if (p->p_priority - gd->gd_upri <= -PPQ) { - need_resched(); + if ((p->p_thread->td_flags & TDF_NORESCHED) == 0 && + p->p_priority - gd->gd_upri <= -PPQ) { + need_user_resched(); --count; } } else if (remote_resched) { if (p->p_priority - gd->gd_upri <= -PPQ) { gd->gd_upri = p->p_priority; - lwkt_send_ipiq(gd, need_resched_remote, NULL); + lwkt_send_ipiq(gd, need_user_resched_remote, NULL); --count; ++remote_resched_affinity; } @@ -406,15 +414,16 @@ setrunqueue(struct proc *p) if (p->p_priority - gd->gd_upri <= -PPQ) { gd->gd_upri = p->p_priority; - lwkt_send_ipiq(gd, need_resched_remote, NULL); + lwkt_send_ipiq(gd, need_user_resched_remote, NULL); ++remote_resched_nonaffinity; } } } #else - if (p->p_priority - gd->gd_upri <= -PPQ) { + if ((p->p_thread->td_flags & TDF_NORESCHED) == 0 && + p->p_priority - gd->gd_upri <= -PPQ) { /* do not set gd_upri */ - need_resched(); + need_user_resched(); } #endif crit_exit(); @@ -467,8 +476,12 @@ remrunqueue(struct proc *p) } /* - * Release the P_CURPROC designation on the current process for this cpu - * and attempt to assign a new current process from the run queue. + * Release the current process designation on p. P MUST BE CURPROC. + * Attempt to assign a new current process from the run queue. + * + * If passive is non-zero, gd_uschedcp may be left set to p, the + * fact that P_CP_RELEASED is set will allow it to be overridden at any + * time. * * If we do not have or cannot get the MP lock we just wakeup the userland * helper scheduler thread for this cpu. @@ -484,41 +497,54 @@ release_curproc(struct proc *p) { int cpuid; struct proc *np; + globaldata_t gd = mycpu; #ifdef ONLY_ONE_USER_CPU - KKASSERT(mycpu->gd_cpuid == 0 && p->p_thread->td_gd == mycpu); + KKASSERT(gd->gd_cpuid == 0 && p->p_thread->td_gd == gd); +#else + KKASSERT(p->p_thread->td_gd == gd); #endif crit_enter(); - clear_resched(); - cpuid = p->p_thread->td_gd->gd_cpuid; + cpuid = gd->gd_cpuid; if ((p->p_flag & P_CP_RELEASED) == 0) { p->p_flag |= P_CP_RELEASED; lwkt_setpri_self(TDPRI_KERN_USER); } - if (p->p_flag & P_CURPROC) { - p->p_flag &= ~P_CURPROC; - curprocmask &= ~(1 << cpuid); + if (gd->gd_uschedcp == p) { if (try_mplock()) { + /* + * YYY when the MP lock is not assumed (see else) we + * will have to check that gd_uschedcp is still == p + * after acquisition of the MP lock + */ /* - * Choose the next process to assign P_CURPROC to. + * Choose the next designated current user process. * Note that we cannot schedule gd_schedthread * if runqcount is 0 without creating a scheduling - * loop. + * loop. + * + * We do not clear the user resched request here, + * we need to test it later when we re-acquire. */ if ((np = chooseproc(NULL)) != NULL) { curprocmask |= 1 << cpuid; - np->p_flag |= P_CURPROC; - mycpu->gd_upri = np->p_priority; - USCHED_COUNTER(np->p_thread); + gd->gd_upri = np->p_priority; + gd->gd_uschedcp = np; lwkt_acquire(np->p_thread); lwkt_schedule(np->p_thread); } else if (runqcount && (rdyprocmask & (1 << cpuid))) { + gd->gd_uschedcp = NULL; + curprocmask &= ~(1 << cpuid); rdyprocmask &= ~(1 << cpuid); - lwkt_schedule(&mycpu->gd_schedthread); + lwkt_schedule(&gd->gd_schedthread); + } else { + gd->gd_uschedcp = NULL; + curprocmask &= ~(1 << cpuid); } rel_mplock(); } else { KKASSERT(0); /* MP LOCK ALWAYS HELD AT THE MOMENT */ + /* YYY uschedcp and curprocmask */ if (runqcount && (rdyprocmask & (1 << cpuid))) { rdyprocmask &= ~(1 << cpuid); lwkt_schedule(&mycpu->gd_schedthread); @@ -529,104 +555,209 @@ release_curproc(struct proc *p) } /* - * Acquire the P_CURPROC designation on the CURRENT process only. This - * function is called prior to returning to userland. If the system + * Acquire the current process designation on the CURRENT process only. + * This function is called prior to returning to userland. If the system * call or trap did not block and if no reschedule was requested it is - * highly likely that the P_CURPROC flag is still set in the proc, and - * we do almost nothing here. + * highly likely that p is still designated. + * + * If any reschedule (lwkt or user) was requested, release_curproc() has + * already been called and gd_uschedcp will be NULL. We must be sure not + * to return without clearing both the lwkt and user ASTs. */ void acquire_curproc(struct proc *p) { int cpuid; +#ifdef INVARIANTS + enum { ACQ_OPTIMAL, ACQ_STOLEN, ACQ_STALLED } state; +#endif struct proc *np; + globaldata_t gd = mycpu; +#ifdef ONLY_ONE_USER_CPU + KKASSERT(gd->gd_cpuid == 0); +#endif /* - * Short cut, we've already acquired the designation or we never - * lost it in the first place. P_CP_RELEASED is cleared, meaning - * that the process is again under the control of the userland - * scheduler. We do not have to fiddle with the LWKT priority, - * the trap code (userret/userexit) will do that for us. + * Shortcut the common case where the system call / other kernel entry + * did not block or otherwise release our current process designation. + * If a reschedule was requested the process would have been released + * from //trap.c and gd_uschedcp will be NULL. */ - if ((p->p_flag & P_CURPROC) != 0) { - p->p_flag &= ~P_CP_RELEASED; + if (gd->gd_uschedcp == p && (p->p_flag & P_CP_RELEASED) == 0) { +#ifdef INVARIANTS + ++usched_optimal; +#endif return; } + KKASSERT(p == gd->gd_curthread->td_proc); + clear_user_resched(); /* - * Long cut. This pulls in a bit of the userland scheduler as - * an optimization. If our cpu has not scheduled a userland - * process we gladly fill the slot, otherwise we choose the best - * candidate from the run queue and compare it against ourselves, - * scheduling either us or him depending. + * We drop our priority now. * - * If our cpu's slot isn't free we put ourselves on the userland - * run queue and switch away. We should have P_CURPROC when we - * come back. Note that a cpu change can occur when we come back. + * We must leave P_CP_RELEASED set. This allows other kernel threads + * exiting to userland to steal our gd_uschedcp. * - * YYY don't need critical section, we hold giant and no interrupt - * will mess w/ this proc? Or will it? What about curprocmask? + * NOTE: If P_CP_RELEASED is not set here, our priority was never + * raised and we therefore do not have to lower it. */ -#ifdef ONLY_ONE_USER_CPU - KKASSERT(mycpu->gd_cpuid == 0 && p->p_thread->td_gd == mycpu); + if (p->p_flag & P_CP_RELEASED) + lwkt_setpri_self(TDPRI_USER_NORM); + else + p->p_flag |= P_CP_RELEASED; + +#ifdef INVARIANTS + state = ACQ_OPTIMAL; #endif crit_enter(); - while ((p->p_flag & P_CURPROC) == 0) { - /* - * reload the cpuid - */ - cpuid = p->p_thread->td_gd->gd_cpuid; - + /* + * Obtain ownership of gd_uschedcp (the current process designation). + * + * Note: the while never loops be use the construct for the initial + * condition test and break statements. + */ + while (gd->gd_uschedcp != p) { /* - * (broken out from setrunqueue() as an optimization that - * allows us to avoid descheduling and rescheduling ourself) + * Choose the next process to become the current process. * - * Interlock against the helper scheduler thread by setting - * curprocmask while we choose a new process. Check our - * process against the new process to shortcut setrunqueue() - * and remrunqueue() operations. + * With P_CP_RELEASED set, we can compete for the designation. + * if any_resched_wanted() is set */ - if ((curprocmask & (1 << cpuid)) == 0) { + cpuid = gd->gd_cpuid; + np = gd->gd_uschedcp; + if (np == NULL) { + KKASSERT((curprocmask & (1 << cpuid)) == 0); curprocmask |= 1 << cpuid; - - if ((np = chooseproc(p)) != NULL) { - KKASSERT((np->p_flag & P_CP_RELEASED) == 0); - np->p_flag |= P_CURPROC; - mycpu->gd_upri = np->p_priority; - USCHED_COUNTER(np->p_thread); - lwkt_acquire(np->p_thread); - lwkt_schedule(np->p_thread); - } else { - p->p_flag |= P_CURPROC; + if ((np = chooseproc(p)) == NULL) { + gd->gd_uschedcp = p; + gd->gd_upri = p->p_priority; + break; } + KKASSERT((np->p_flag & P_CP_RELEASED) == 0); + gd->gd_upri = np->p_priority; + gd->gd_uschedcp = np; + lwkt_acquire(np->p_thread); + lwkt_schedule(np->p_thread); + /* fall through */ + } else if ((np->p_flag&P_CP_RELEASED) && !test_resched(np, p)) { + /* + * When gd_uschedcp's P_CP_RELEASED flag is set it + * must have just called lwkt_switch() in the post + * acquisition code below. We can safely dequeue and + * setrunqueue() it. + * + * Note that we reverse the arguments to test_resched() + * and use NOT. This reverses the hysteresis so we do + * not chain a sequence of steadily worse priorities + * and end up with a very low priority (high p_priority + * value) as our current process. + */ + KKASSERT(curprocmask & (1 << cpuid)); + gd->gd_uschedcp = p; + gd->gd_upri = p->p_priority; + + lwkt_deschedule(np->p_thread); /* local to cpu */ + np->p_flag &= ~P_CP_RELEASED; + setrunqueue(np); +#ifdef INVARIANTS + if (state == ACQ_OPTIMAL) + state = ACQ_STOLEN; +#endif break; } + + /* + * We couldn't acquire the designation, put us on + * the userland run queue for selection and block. + * setrunqueue() will call need_user_resched() if + * necessary if the existing current process has a lower + * priority. + */ + clear_lwkt_resched(); lwkt_deschedule_self(); - p->p_stats->p_ru.ru_nivcsw++; /* involuntary context sw */ p->p_flag &= ~P_CP_RELEASED; setrunqueue(p); - lwkt_switch(); /* CPU CAN CHANGE DUE TO SETRUNQUEUE() */ - KASSERT((p->p_flag & (P_ONRUNQ|P_CURPROC|P_CP_RELEASED)) == P_CURPROC, ("unexpected p_flag %08x acquiring P_CURPROC\n", p->p_flag)); + lwkt_switch(); + /* + * WE MAY HAVE BEEN MIGRATED TO ANOTHER CPU + */ + gd = mycpu; + KKASSERT((p->p_flag & (P_ONRUNQ|P_CP_RELEASED)) == 0); + break; } + + /* + * We have acquired gd_uschedcp and our priority is correct. + * + * If P_CP_RELEASED is set we have to check lwkt_resched_wanted() + * and lwkt_switch() if it returns TRUE in order to run any pending + * threads before returning to user mode. + * + * If P_CP_RELEASED is clear we have *ALREADY* done a switch (and + * we were possibly dequeued and setrunqueue()'d, and then woken up + * again via chooseproc()), and since our priority was lowered we + * are guarenteed that no other kernel threads are pending and that + * we are in fact the gd_uschedcp. + */ + if (p->p_flag & P_CP_RELEASED) { + if (lwkt_resched_wanted()) { + clear_lwkt_resched(); + lwkt_switch(); + gd = mycpu; /* We may have moved */ + if ((p->p_flag & P_CP_RELEASED) == 0) { + ++p->p_stats->p_ru.ru_nivcsw; +#ifdef INVARIANTS + state = ACQ_STALLED; + ++usched_stalls; +#endif + } + } + p->p_flag &= ~P_CP_RELEASED; + } else { + ++p->p_stats->p_ru.ru_nivcsw; +#ifdef INVARIANTS + state = ACQ_STALLED; + ++usched_stalls; +#endif + } + + /* + * That's it. Cleanup, we are done. The caller can return to + * user mode now. + */ + KKASSERT((p->p_flag & P_ONRUNQ) == 0 && gd->gd_uschedcp == p); crit_exit(); +#ifdef INVARIANTS + switch(state) { + case ACQ_OPTIMAL: + ++usched_optimal; + break; + case ACQ_STOLEN: + ++usched_stolen; + break; + default: + break; + } +#endif } /* * Yield / synchronous reschedule. This is a bit tricky because the trap * code might have set a lazy release on the switch function. Setting * P_PASSIVE_ACQ will ensure that the lazy release executes when we call - * switch, and that we will not be rescheduled to another cpu when we attempt - * to re-acquire P_CURPROC. - * - * We have to release P_CURPROC (by calling lwkt_switch(), and acquire it - * again to yield to another user process. Note that the release will - * ensure that we are running at a kernel LWKT priority, and this priority - * is not lowered through the reacquisition and rerelease sequence to ensure - * that we do not deadlock against a higher priority *user* process. + * switch, and that we are given a greater chance of affinity with our + * current cpu. * * We call lwkt_setpri_self() to rotate our thread to the end of the lwkt - * run queue. + * run queue. lwkt_switch() will also execute any assigned passive release + * (which usually calls release_curproc()), allowing a same/higher priority + * process to be designated as the current process. + * + * While it is possible for a lower priority process to be designated, + * it's call to lwkt_maybe_switch() in acquire_curproc() will likely + * round-robin back to us and we will be able to re-acquire the current + * process designation. */ void uio_yield(void) @@ -657,7 +788,8 @@ uio_yield(void) static void sched_thread(void *dummy) { - int cpuid = mycpu->gd_cpuid; /* doesn't change */ + globaldata_t gd = mycpu; + int cpuid = gd->gd_cpuid; /* doesn't change */ u_int32_t cpumask = 1 << cpuid; /* doesn't change */ #ifdef ONLY_ONE_USER_CPU @@ -673,9 +805,8 @@ sched_thread(void *dummy) crit_enter(); if ((curprocmask & cpumask) == 0 && (np = chooseproc(NULL)) != NULL) { curprocmask |= cpumask; - np->p_flag |= P_CURPROC; - mycpu->gd_upri = np->p_priority; - USCHED_COUNTER(np->p_thread); + gd->gd_upri = np->p_priority; + gd->gd_uschedcp = np; lwkt_acquire(np->p_thread); lwkt_schedule(np->p_thread); } diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c index 0e0537a719..e96b8ed408 100644 --- a/sys/kern/kern_synch.c +++ b/sys/kern/kern_synch.c @@ -37,7 +37,7 @@ * * @(#)kern_synch.c 8.9 (Berkeley) 5/19/95 * $FreeBSD: src/sys/kern/kern_synch.c,v 1.87.2.6 2002/10/13 07:29:53 kbyanc Exp $ - * $DragonFly: src/sys/kern/kern_synch.c,v 1.30 2004/03/20 19:16:24 dillon Exp $ + * $DragonFly: src/sys/kern/kern_synch.c,v 1.31 2004/03/30 19:14:11 dillon Exp $ */ #include "opt_ktrace.h" @@ -128,7 +128,7 @@ roundrobin_remote(void *arg) { struct proc *p = lwkt_preempted_proc(); if (p == NULL || RTP_PRIO_NEED_RR(p->p_rtprio.type)) - need_resched(); + need_user_resched(); } #endif @@ -138,7 +138,7 @@ roundrobin(void *arg) { struct proc *p = lwkt_preempted_proc(); if (p == NULL || RTP_PRIO_NEED_RR(p->p_rtprio.type)) - need_resched(); + need_user_resched(); #ifdef SMP lwkt_send_ipiq_mask(mycpu->gd_other_cpus, roundrobin_remote, NULL); #endif @@ -157,23 +157,27 @@ resched_cpus(u_int32_t mask) /* * The load average is scaled by FSCALE (2048 typ). The estimated cpu is - * incremented at a rate of ESTCPUFREQ per second, but this is + * incremented at a rate of ESTCPUVFREQ per second (40hz typ), but this is * divided up across all cpu bound processes running in the system so an - * individual process will get less under load. + * individual process will get less under load. ESTCPULIM typicaly caps + * out at ESTCPUMAX (around 376, or 11 nice levels). * - * We want to decay estcpu by 18% per second, but we have to scale to the - * load to avoid overpowering the estcpu aggregation. To stabilize the - * equation under low loads we make everything relative to a load average - * of 1.0. + * Generally speaking the decay equation needs to break-even on growth + * at the limit at all load levels >= 1.0, so if the estimated cpu for + * a process increases by (ESTVCPUFREQ / load) per second, then the decay + * should reach this value when estcpu reaches ESTCPUMAX. That calculation + * is: * - * estcpu -= estcpu * 0.18 / loadav base equation - * estcpu -= (estcpu + ESTCPUFREQ) * 0.18 / (loadav + 1) supplemented + * ESTCPUMAX * decay = ESTCPUVFREQ / load + * decay = ESTCPUVFREQ / (load * ESTCPUMAX) + * decay = estcpu * 0.053 / load * - * Note: 0.18 = 100/555 + * If the load is less then 1.0 we assume a load of 1.0. */ +#define cload(loadav) ((loadav) < FSCALE ? FSCALE : (loadav)) #define decay_cpu(loadav,estcpu) \ - (((estcpu + ESTCPUFREQ) * (100 * FSCALE / 555)) / ((loadav) + FSCALE)) + ((estcpu) * (FSCALE * ESTCPUVFREQ / ESTCPUMAX) / cload(loadav)) /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ static fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ @@ -198,7 +202,7 @@ SYSCTL_INT(_kern, OID_AUTO, fscale, CTLFLAG_RD, 0, FSCALE, ""); #define CCPU_SHIFT 11 /* - * Recompute process priorities, every hz ticks. + * Recompute process priorities, once a second. */ /* ARGSUSED */ static void @@ -311,12 +315,8 @@ sleepinit(void) * call should be restarted if possible, and EINTR is returned if the system * call should be interrupted by the signal (return EINTR). * - * If the process has P_CURPROC set mi_switch() will not re-queue it to - * the userland scheduler queues because we are in a SSLEEP state. If - * we are not the current process then we have to remove ourselves from - * the scheduler queues. - * - * YYY priority now unused + * Note that if we are a process, we release_curproc() before messing with + * the LWKT scheduler. */ int tsleep(void *ident, int flags, const char *wmesg, int timo) @@ -350,8 +350,12 @@ tsleep(void *ident, int flags, const char *wmesg, int timo) crit_enter(); td->td_wchan = ident; td->td_wmesg = wmesg; - if (p) + if (p) { + if (flags & PNORESCHED) + td->td_flags |= TDF_NORESCHED; + release_curproc(p); p->p_slptime = 0; + } lwkt_deschedule_self(); TAILQ_INSERT_TAIL(&slpque[id], td, td_threadq); if (timo) @@ -394,7 +398,6 @@ tsleep(void *ident, int flags, const char *wmesg, int timo) */ clrrunnable(p, SSLEEP); p->p_stats->p_ru.ru_nvcsw++; - KKASSERT(td->td_release || (p->p_flag & P_CURPROC) == 0); mi_switch(); KASSERT(p->p_stat == SRUN, ("tsleep: stat not srun")); } else { @@ -405,6 +408,7 @@ resume: if (p) p->p_flag &= ~P_SINTR; splx(s); + td->td_flags &= ~TDF_NORESCHED; if (td->td_flags & TDF_TIMEOUT) { td->td_flags &= ~TDF_TIMEOUT; if (sig == 0) @@ -605,7 +609,6 @@ mi_switch() * actually need splstatclock(). */ x = splstatclock(); - clear_resched(); /* * Check if the process exceeds its cpu resource allocation. @@ -682,25 +685,16 @@ setrunnable(struct proc *p) /* * Change the process state to NOT be runnable, removing it from the run - * queue. If P_CURPROC is not set and we are in SRUN the process is on the - * run queue (If P_INMEM is not set then it isn't because it is swapped). + * queue. */ void clrrunnable(struct proc *p, int stat) { - int s; - - s = splhigh(); - switch(p->p_stat) { - case SRUN: - if (p->p_flag & P_ONRUNQ) - remrunqueue(p); - break; - default: - break; - } + crit_enter_quick(p->p_thread); + if (p->p_stat == SRUN && (p->p_flag & P_ONRUNQ)) + remrunqueue(p); p->p_stat = stat; - splx(s); + crit_exit_quick(p->p_thread); } /* @@ -820,6 +814,12 @@ sched_setup(void *dummy) * time in 5 * loadav seconds. This causes the system to favor processes * which haven't run much recently, and to round-robin among other processes. * + * The actual schedulerclock interrupt rate is ESTCPUFREQ, but we generally + * want to ramp-up at a faster rate, ESTCPUVFREQ, so p_estcpu is scaled + * by (ESTCPUVFREQ / ESTCPUFREQ). You can control the ramp-up/ramp-down + * rate by adjusting ESTCPUVFREQ in sys/proc.h in integer multiples + * of ESTCPUFREQ. + * * WARNING! called from a fast-int or an IPI, the MP lock MIGHT NOT BE HELD * and we cannot block. */ @@ -831,9 +831,9 @@ schedulerclock(void *dummy) td = curthread; if ((p = td->td_proc) != NULL) { - p->p_cpticks++; - p->p_estcpu = ESTCPULIM(p->p_estcpu + 1); - if ((p->p_estcpu % PPQ) == 0 && try_mplock()) { + p->p_cpticks++; /* cpticks runs at ESTCPUFREQ */ + p->p_estcpu = ESTCPULIM(p->p_estcpu + ESTCPUVFREQ / ESTCPUFREQ); + if (try_mplock()) { resetpriority(p); rel_mplock(); } diff --git a/sys/kern/lwkt_thread.c b/sys/kern/lwkt_thread.c index f9c1257cde..de7c2eafef 100644 --- a/sys/kern/lwkt_thread.c +++ b/sys/kern/lwkt_thread.c @@ -23,7 +23,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/kern/lwkt_thread.c,v 1.57 2004/03/28 08:03:02 dillon Exp $ + * $DragonFly: src/sys/kern/lwkt_thread.c,v 1.58 2004/03/30 19:14:11 dillon Exp $ */ /* @@ -141,13 +141,6 @@ _lwkt_enqueue(thread_t td) } } -static __inline -int -_lwkt_wantresched(thread_t ntd, thread_t cur) -{ - return((ntd->td_pri & TDPRI_MASK) > (cur->td_pri & TDPRI_MASK)); -} - #ifdef _KERNEL /* @@ -406,9 +399,9 @@ lwkt_switch(void) * when we block or switch rather then when we enter the kernel). * This function is NOT called if we are switching into a preemption * or returning from a preemption. Typically this causes us to lose - * our P_CURPROC designation (if we have one) and become a true LWKT - * thread, and may also hand P_CURPROC to another process and schedule - * its thread. + * our current process designation (if we have one) and become a true + * LWKT thread, and may also hand the current process designation to + * another process and schedule thread. */ if (td->td_release) td->td_release(td); @@ -567,21 +560,6 @@ again: crit_exit(); } -/* - * Switch if another thread has a higher priority. Do not switch to other - * threads at the same priority. - */ -void -lwkt_maybe_switch() -{ - struct globaldata *gd = mycpu; - struct thread *td = gd->gd_curthread; - - if ((td->td_pri & TDPRI_MASK) < bsrl(gd->gd_runqmask)) { - lwkt_switch(); - } -} - /* * Request that the target thread preempt the current thread. Preemption * only works under a specific set of conditions: @@ -618,7 +596,7 @@ void lwkt_preempt(thread_t ntd, int critpri) { struct globaldata *gd = mycpu; - thread_t td = gd->gd_curthread; + thread_t td; #ifdef SMP int mpheld; int savecnt; @@ -627,8 +605,7 @@ lwkt_preempt(thread_t ntd, int critpri) /* * The caller has put us in a critical section. We can only preempt * if the caller of the caller was not in a critical section (basically - * a local interrupt), as determined by the 'critpri' parameter. If - * we are unable to preempt + * a local interrupt), as determined by the 'critpri' parameter. * * YYY The target thread must be in a critical section (else it must * inherit our critical section? I dunno yet). @@ -636,11 +613,14 @@ lwkt_preempt(thread_t ntd, int critpri) * Any tokens held by the target may not be held by thread(s) being * preempted. We take the easy way out and do not preempt if * the target is holding tokens. + * + * Set need_lwkt_resched() unconditionally for now YYY. */ KASSERT(ntd->td_pri >= TDPRI_CRIT, ("BADCRIT0 %d", ntd->td_pri)); - need_resched(); - if (!_lwkt_wantresched(ntd, td)) { + td = gd->gd_curthread; + need_lwkt_resched(); + if ((ntd->td_pri & TDPRI_MASK) <= (td->td_pri & TDPRI_MASK)) { ++preempt_miss; return; } @@ -813,10 +793,25 @@ lwkt_schedule_self(void) * Generic schedule. Possibly schedule threads belonging to other cpus and * deal with threads that might be blocked on a wait queue. * - * YYY this is one of the best places to implement load balancing code. - * Load balancing can be accomplished by requesting other sorts of actions - * for the thread in question. + * We have a little helper inline function which does additional work after + * the thread has been enqueued, including dealing with preemption and + * setting need_lwkt_resched() (which prevents the kernel from returning + * to userland until it has processed higher priority threads). */ +static __inline +void +_lwkt_schedule_post(thread_t ntd, int cpri) +{ + if (ntd->td_preemptable) { + ntd->td_preemptable(ntd, cpri); /* YYY +token */ + } else { + if ((ntd->td_flags & TDF_NORESCHED) == 0) { + if ((ntd->td_pri & TDPRI_MASK) >= TDPRI_KERN_USER) + need_lwkt_resched(); + } + } +} + void lwkt_schedule(thread_t td) { @@ -851,6 +846,10 @@ lwkt_schedule(thread_t td) * acted upon). * * (remember, wait structures use stable storage) + * + * NOTE: tokens no longer enter a critical section, so we only need + * to account for the crit_enter() above when calling + * _lwkt_schedule_post(). */ if ((w = td->td_wait) != NULL) { lwkt_tokref wref; @@ -862,19 +861,13 @@ lwkt_schedule(thread_t td) #ifdef SMP if (td->td_gd == mycpu) { _lwkt_enqueue(td); - if (td->td_preemptable) - td->td_preemptable(td, TDPRI_CRIT*2); /* YYY +token */ - else if (_lwkt_wantresched(td, curthread)) - need_resched(); + _lwkt_schedule_post(td, TDPRI_CRIT); } else { lwkt_send_ipiq(td->td_gd, (ipifunc_t)lwkt_schedule, td); } #else _lwkt_enqueue(td); - if (td->td_preemptable) - td->td_preemptable(td, TDPRI_CRIT*2); /* YYY +token */ - else if (_lwkt_wantresched(td, curthread)) - need_resched(); + _lwkt_schedule_post(td, TDPRI_CRIT); #endif lwkt_reltoken(&wref); } else { @@ -890,21 +883,13 @@ lwkt_schedule(thread_t td) #ifdef SMP if (td->td_gd == mycpu) { _lwkt_enqueue(td); - if (td->td_preemptable) { - td->td_preemptable(td, TDPRI_CRIT); - } else if (_lwkt_wantresched(td, curthread)) { - need_resched(); - } + _lwkt_schedule_post(td, TDPRI_CRIT); } else { lwkt_send_ipiq(td->td_gd, (ipifunc_t)lwkt_schedule, td); } #else _lwkt_enqueue(td); - if (td->td_preemptable) { - td->td_preemptable(td, TDPRI_CRIT); - } else if (_lwkt_wantresched(td, curthread)) { - need_resched(); - } + _lwkt_schedule_post(td, TDPRI_CRIT); #endif } } diff --git a/sys/kern/sys_pipe.c b/sys/kern/sys_pipe.c index ee7b8c5663..f61df8a652 100644 --- a/sys/kern/sys_pipe.c +++ b/sys/kern/sys_pipe.c @@ -17,7 +17,7 @@ * are met. * * $FreeBSD: src/sys/kern/sys_pipe.c,v 1.60.2.13 2002/08/05 15:05:15 des Exp $ - * $DragonFly: src/sys/kern/sys_pipe.c,v 1.15 2004/03/28 08:25:48 dillon Exp $ + * $DragonFly: src/sys/kern/sys_pipe.c,v 1.16 2004/03/30 19:14:11 dillon Exp $ */ /* @@ -490,7 +490,7 @@ pipe_read(struct file *fp, struct uio *uio, struct ucred *cred, error = EAGAIN; } else { rpipe->pipe_state |= PIPE_WANTR; - if ((error = tsleep(rpipe, PCATCH, + if ((error = tsleep(rpipe, PCATCH|PNORESCHED, "piperd", 0)) == 0) { error = pipelock(rpipe, 1); } @@ -715,7 +715,7 @@ retry: wakeup(wpipe); } pipeselwakeup(wpipe); - error = tsleep(wpipe, PCATCH, "pipdwt", 0); + error = tsleep(wpipe, PCATCH|PNORESCHED, "pipdwt", 0); } pipelock(wpipe,0); @@ -957,7 +957,7 @@ pipe_write(struct file *fp, struct uio *uio, struct ucred *cred, pipeselwakeup(wpipe); wpipe->pipe_state |= PIPE_WANTW; - error = tsleep(wpipe, PCATCH, "pipewr", 0); + error = tsleep(wpipe, PCATCH|PNORESCHED, "pipewr", 0); if (error != 0) break; /* diff --git a/sys/platform/pc32/i386/genassym.c b/sys/platform/pc32/i386/genassym.c index ecf5e2f36f..384cc7db4b 100644 --- a/sys/platform/pc32/i386/genassym.c +++ b/sys/platform/pc32/i386/genassym.c @@ -35,7 +35,7 @@ * * from: @(#)genassym.c 5.11 (Berkeley) 5/10/91 * $FreeBSD: src/sys/i386/i386/genassym.c,v 1.86.2.3 2002/03/03 05:42:49 nyan Exp $ - * $DragonFly: src/sys/platform/pc32/i386/genassym.c,v 1.35 2004/02/21 06:37:07 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/i386/genassym.c,v 1.36 2004/03/30 19:14:04 dillon Exp $ */ #include @@ -191,7 +191,8 @@ ASSYM(RQF_IPIQ, RQF_IPIQ); ASSYM(RQF_INTPEND, RQF_INTPEND); ASSYM(RQF_AST_OWEUPC, RQF_AST_OWEUPC); ASSYM(RQF_AST_SIGNAL, RQF_AST_SIGNAL); -ASSYM(RQF_AST_RESCHED, RQF_AST_RESCHED); +ASSYM(RQF_AST_USER_RESCHED, RQF_AST_USER_RESCHED); +ASSYM(RQF_AST_LWKT_RESCHED, RQF_AST_LWKT_RESCHED); ASSYM(RQF_AST_UPCALL, RQF_AST_UPCALL); ASSYM(RQF_AST_MASK, RQF_AST_MASK); diff --git a/sys/platform/pc32/i386/sys_machdep.c b/sys/platform/pc32/i386/sys_machdep.c index b920ea5fbb..3d10961c90 100644 --- a/sys/platform/pc32/i386/sys_machdep.c +++ b/sys/platform/pc32/i386/sys_machdep.c @@ -32,7 +32,7 @@ * * from: @(#)sys_machdep.c 5.5 (Berkeley) 1/19/91 * $FreeBSD: src/sys/i386/i386/sys_machdep.c,v 1.47.2.3 2002/10/07 17:20:00 jhb Exp $ - * $DragonFly: src/sys/platform/pc32/i386/sys_machdep.c,v 1.12 2003/12/20 05:52:26 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/i386/sys_machdep.c,v 1.13 2004/03/30 19:14:04 dillon Exp $ * */ @@ -153,7 +153,7 @@ i386_extend_pcb(struct proc *p) ssdtosd(&ssd, &ext->ext_tssd); /* switch to the new TSS after syscall completes */ - need_resched(); + need_user_resched(); return 0; } diff --git a/sys/platform/pc32/i386/trap.c b/sys/platform/pc32/i386/trap.c index 2bdf60417e..e68d641379 100644 --- a/sys/platform/pc32/i386/trap.c +++ b/sys/platform/pc32/i386/trap.c @@ -36,7 +36,7 @@ * * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 * $FreeBSD: src/sys/i386/i386/trap.c,v 1.147.2.11 2003/02/27 19:09:59 luoqi Exp $ - * $DragonFly: src/sys/platform/pc32/i386/trap.c,v 1.47 2004/03/28 08:03:05 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/i386/trap.c,v 1.48 2004/03/30 19:14:04 dillon Exp $ */ /* @@ -168,9 +168,6 @@ SYSCTL_INT(_machdep, OID_AUTO, fast_release, CTLFLAG_RW, static int slow_release; SYSCTL_INT(_machdep, OID_AUTO, slow_release, CTLFLAG_RW, &slow_release, 0, "Passive Release was nonoptimal"); -static int pass_release; -SYSCTL_INT(_machdep, OID_AUTO, pass_release, CTLFLAG_RW, - &pass_release, 0, "Passive Release on switch"); MALLOC_DEFINE(M_SYSMSG, "sysmsg", "sysmsg structure"); @@ -190,25 +187,7 @@ passive_release(struct thread *td) struct proc *p = td->td_proc; td->td_release = NULL; - - /* - * P_CP_RELEASED prevents the userland scheduler from messing with - * this proc. - */ - if ((p->p_flag & P_CP_RELEASED) == 0) { - p->p_flag |= P_CP_RELEASED; - lwkt_setpri_self(TDPRI_KERN_USER); - } - - /* - * Only one process will have a P_CURPROC designation for each cpu - * in the system. Releasing it allows another userland process to - * be scheduled in case our thread blocks in the kernel. - */ - if (p->p_flag & P_CURPROC) { - release_curproc(p); - ++pass_release; - } + release_curproc(p); } /* @@ -223,58 +202,26 @@ userenter(struct thread *curtd) curtd->td_release = passive_release; } +/* + * Reacquire our current process designation. This will not return until + * we have it. Our LWKT priority will be adjusted for our return to + * userland. acquire_curproc() also handles cleaning up P_CP_RELEASED. + * + * This is always the last step before returning to user mode. + */ static __inline void userexit(struct proc *p) { struct thread *td = p->p_thread; - /* - * Reacquire our P_CURPROC status and adjust the LWKT priority - * for our return to userland. We can fast path the case where - * td_release was not called by checking particular proc flags. - * Otherwise we do it the slow way. - * - * Lowering our priority may make other higher priority threads - * runnable. lwkt_setpri_self() does not switch away, so call - * lwkt_maybe_switch() to deal with it. We do this *before* we - * acquire P_CURPROC because another thread may also be intending - * to return to userland and if it has a higher user priority then - * us it will have to block and force us to reschedule, resulting in - * unnecessary extra context switches. - * - * WARNING! Once our priority is lowered to a user level priority - * it is possible, once we return to user mode (or if we were to - * block) for a cpu-bound user process to prevent us from getting cpu - * again. This is always the last step. - */ td->td_release = NULL; - if ((p->p_flag & (P_CP_RELEASED|P_CURPROC)) == P_CURPROC) { - ++fast_release; - lwkt_maybe_switch(); - } else { + if (p->p_flag & P_CP_RELEASED) ++slow_release; - lwkt_setpri_self(TDPRI_USER_NORM); - lwkt_maybe_switch(); - acquire_curproc(p); -#if 0 - /* POSSIBLE FUTURE */ - switch(p->p_rtprio.type) { - case RTP_PRIO_IDLE: - lwkt_setpri_self(TDPRI_USER_IDLE); - break; - case RTP_PRIO_REALTIME: - case RTP_PRIO_FIFO: - lwkt_setpri_self(TDPRI_USER_REAL); - break; - default: - lwkt_setpri_self(TDPRI_USER_NORM); - break; - } -#endif - } + else + ++fast_release; + acquire_curproc(p); } - static void userret(struct proc *p, struct trapframe *frame, u_quad_t oticks) { @@ -297,21 +244,13 @@ userret(struct proc *p, struct trapframe *frame, u_quad_t oticks) /* * If a reschedule has been requested then we release the current - * process in order to shift our P_CURPROC designation to another - * user process. userexit() will reacquire P_CURPROC and block - * there. + * process in order to shift the current process designation to + * another user process and/or to switch to a higher priority + * kernel thread at userexit() time. */ - if (resched_wanted()) { + if (any_resched_wanted()) { p->p_thread->td_release = NULL; - if ((p->p_flag & P_CP_RELEASED) == 0) { - p->p_flag |= P_CP_RELEASED; - lwkt_setpri_self(TDPRI_KERN_USER); - } - if (p->p_flag & P_CURPROC) { - release_curproc(p); - } else { - clear_resched(); - } + release_curproc(p); } /* diff --git a/sys/platform/pc32/isa/intr_machdep.c b/sys/platform/pc32/isa/intr_machdep.c index 8c9da69ed8..ae44c5fbac 100644 --- a/sys/platform/pc32/isa/intr_machdep.c +++ b/sys/platform/pc32/isa/intr_machdep.c @@ -35,7 +35,7 @@ * * from: @(#)isa.c 7.2 (Berkeley) 5/13/91 * $FreeBSD: src/sys/i386/isa/intr_machdep.c,v 1.29.2.5 2001/10/14 06:54:27 luigi Exp $ - * $DragonFly: src/sys/platform/pc32/isa/intr_machdep.c,v 1.20 2004/03/10 13:04:40 hmp Exp $ + * $DragonFly: src/sys/platform/pc32/isa/intr_machdep.c,v 1.21 2004/03/30 19:14:08 dillon Exp $ */ /* * This file contains an aggregated module marked: @@ -792,7 +792,7 @@ cpu_intr_preempt(struct thread *td, int critpri) if ((curthread->td_cpl & (1 << info->irq)) == 0) lwkt_preempt(td, critpri); else - need_resched(); + need_lwkt_resched(); } static int diff --git a/sys/platform/pc32/isa/ipl.s b/sys/platform/pc32/isa/ipl.s index d768e05f24..2be0bcd304 100644 --- a/sys/platform/pc32/isa/ipl.s +++ b/sys/platform/pc32/isa/ipl.s @@ -37,7 +37,7 @@ * @(#)ipl.s * * $FreeBSD: src/sys/i386/isa/ipl.s,v 1.32.2.3 2002/05/16 16:03:56 bde Exp $ - * $DragonFly: src/sys/platform/pc32/isa/ipl.s,v 1.16 2004/01/30 05:42:16 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/isa/ipl.s,v 1.17 2004/03/30 19:14:08 dillon Exp $ */ @@ -137,6 +137,7 @@ doreti_next: cmpl $1,in_vm86call /* YYY make per 'cpu'? */ jnz doreti_ast 1: + /* ASTs are only applicable when returning to userland */ testb $SEL_RPL_MASK,TF_CS(%esp) jnz doreti_ast 2: diff --git a/sys/platform/vkernel/i386/genassym.c b/sys/platform/vkernel/i386/genassym.c index 6465a42f38..50ad652e12 100644 --- a/sys/platform/vkernel/i386/genassym.c +++ b/sys/platform/vkernel/i386/genassym.c @@ -35,7 +35,7 @@ * * from: @(#)genassym.c 5.11 (Berkeley) 5/10/91 * $FreeBSD: src/sys/i386/i386/genassym.c,v 1.86.2.3 2002/03/03 05:42:49 nyan Exp $ - * $DragonFly: src/sys/platform/vkernel/i386/genassym.c,v 1.35 2004/02/21 06:37:07 dillon Exp $ + * $DragonFly: src/sys/platform/vkernel/i386/genassym.c,v 1.36 2004/03/30 19:14:04 dillon Exp $ */ #include @@ -191,7 +191,8 @@ ASSYM(RQF_IPIQ, RQF_IPIQ); ASSYM(RQF_INTPEND, RQF_INTPEND); ASSYM(RQF_AST_OWEUPC, RQF_AST_OWEUPC); ASSYM(RQF_AST_SIGNAL, RQF_AST_SIGNAL); -ASSYM(RQF_AST_RESCHED, RQF_AST_RESCHED); +ASSYM(RQF_AST_USER_RESCHED, RQF_AST_USER_RESCHED); +ASSYM(RQF_AST_LWKT_RESCHED, RQF_AST_LWKT_RESCHED); ASSYM(RQF_AST_UPCALL, RQF_AST_UPCALL); ASSYM(RQF_AST_MASK, RQF_AST_MASK); diff --git a/sys/sys/globaldata.h b/sys/sys/globaldata.h index 404b6b9d68..b6848da858 100644 --- a/sys/sys/globaldata.h +++ b/sys/sys/globaldata.h @@ -24,7 +24,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/include/globaldata.h,v 1.11.2.1 2000/05/16 06:58:10 dillon Exp $ - * $DragonFly: src/sys/sys/globaldata.h,v 1.28 2004/03/02 06:06:44 hmp Exp $ + * $DragonFly: src/sys/sys/globaldata.h,v 1.29 2004/03/30 19:14:13 dillon Exp $ */ #ifndef _SYS_GLOBALDATA_H_ @@ -76,6 +76,9 @@ * in various vm_map related operations. gd_vme_avail is *NOT* a count of * the number of structures in the cache but is instead a count of the number * of unreserved structures in the cache. See vm_map_entry_reserve(). + * + * gd_uschedcp is internal to the userland scheduler. It does not represent + * the currently running process. */ struct sysmsg; @@ -120,26 +123,30 @@ struct globaldata { struct pipe *gd_pipeq; /* cache pipe structures */ int gd_pipeqcount; /* number of structures */ lwkt_tokref_t gd_tokreqbase; /* requests from other cpus */ + struct proc *gd_uschedcp; /* userland scheduler */ /* extended by */ }; typedef struct globaldata *globaldata_t; -#define RQB_IPIQ 0 -#define RQB_INTPEND 1 -#define RQB_AST_OWEUPC 2 -#define RQB_AST_SIGNAL 3 -#define RQB_AST_RESCHED 4 -#define RQB_AST_UPCALL 5 +#define RQB_IPIQ 0 +#define RQB_INTPEND 1 +#define RQB_AST_OWEUPC 2 +#define RQB_AST_SIGNAL 3 +#define RQB_AST_USER_RESCHED 4 +#define RQB_AST_LWKT_RESCHED 5 +#define RQB_AST_UPCALL 6 -#define RQF_IPIQ (1 << RQB_IPIQ) -#define RQF_INTPEND (1 << RQB_INTPEND) -#define RQF_AST_OWEUPC (1 << RQB_AST_OWEUPC) -#define RQF_AST_SIGNAL (1 << RQB_AST_SIGNAL) -#define RQF_AST_RESCHED (1 << RQB_AST_RESCHED) -#define RQF_AST_UPCALL (1 << RQB_AST_UPCALL) -#define RQF_AST_MASK (RQF_AST_OWEUPC|RQF_AST_SIGNAL|RQF_AST_RESCHED|\ - RQF_AST_UPCALL) +#define RQF_IPIQ (1 << RQB_IPIQ) +#define RQF_INTPEND (1 << RQB_INTPEND) +#define RQF_AST_OWEUPC (1 << RQB_AST_OWEUPC) +#define RQF_AST_SIGNAL (1 << RQB_AST_SIGNAL) +#define RQF_AST_USER_RESCHED (1 << RQB_AST_USER_RESCHED) +#define RQF_AST_LWKT_RESCHED (1 << RQB_AST_LWKT_RESCHED) +#define RQF_AST_UPCALL (1 << RQB_AST_UPCALL) +#define RQF_AST_MASK (RQF_AST_OWEUPC|RQF_AST_SIGNAL|\ + RQF_AST_USER_RESCHED|RQF_AST_LWKT_RESCHED|\ + RQF_AST_UPCALL) #define RQF_IDLECHECK_MASK (RQF_IPIQ|RQF_INTPEND) #endif diff --git a/sys/sys/param.h b/sys/sys/param.h index 37337a191d..62689707d7 100644 --- a/sys/sys/param.h +++ b/sys/sys/param.h @@ -37,7 +37,7 @@ * * @(#)param.h 8.3 (Berkeley) 4/4/95 * $FreeBSD: src/sys/sys/param.h,v 1.61.2.38 2003/05/22 17:12:01 fjoe Exp $ - * $DragonFly: src/sys/sys/param.h,v 1.12 2004/03/02 20:55:10 drhodus Exp $ + * $DragonFly: src/sys/sys/param.h,v 1.13 2004/03/30 19:14:13 dillon Exp $ */ #ifndef _SYS_PARAM_H_ @@ -126,6 +126,7 @@ #define PCATCH 0x0100 /* OR'd with pri for tsleep to check signals */ #define PUSRFLAG1 0x0200 /* Subsystem specific flag */ +#define PNORESCHED 0x0400 /* Do not force a reschedule on wakeup */ #define NZERO 0 /* default "nice" */ diff --git a/sys/sys/proc.h b/sys/sys/proc.h index decde94005..0cbe349d1b 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -37,7 +37,7 @@ * * @(#)proc.h 8.15 (Berkeley) 5/19/95 * $FreeBSD: src/sys/sys/proc.h,v 1.99.2.9 2003/06/06 20:21:32 tegge Exp $ - * $DragonFly: src/sys/sys/proc.h,v 1.46 2004/03/20 23:35:17 dillon Exp $ + * $DragonFly: src/sys/sys/proc.h,v 1.47 2004/03/30 19:14:13 dillon Exp $ */ #ifndef _SYS_PROC_H_ @@ -263,7 +263,7 @@ struct proc { #define P_SINTR 0x00080 /* Sleep is interruptible. */ #define P_SUGID 0x00100 /* Had set id privileges since last exec. */ #define P_SYSTEM 0x00200 /* System proc: no sigs, stats or swapping. */ -#define P_CURPROC 0x00400 /* 'Current process' on this cpu */ +#define P_UNUSED00400 0x00400 #define P_TRACED 0x00800 /* Debugged process being traced. */ #define P_WAITED 0x01000 /* Debugging process has waited for child. */ #define P_WEXIT 0x02000 /* Working on exiting. */ @@ -391,12 +391,20 @@ TAILQ_HEAD(rq, proc); * ESTCPURAMP determines how slowly estcpu effects the process priority. * Higher numbers result in slower ramp-up times because estcpu is incremented * once per scheduler tick and maxes out at ESTCPULIM. + * + * ESTCPULIM = (127 - 2 * 40) * 8 = 376 + * + * NOTE: ESTCPUVFREQ is the 'virtual' estcpu accumulation frequency, whereas + * ESTCPUFREQ is the actual interrupt rate. The ratio is used to scale + * both the ramp-up and the decay calculations in kern_synch.c. */ #define ESTCPURAMP 8 /* higher equals slower */ #define NICE_ADJUST(value) (((unsigned int)(NICE_WEIGHT * 128) * (value)) / 128) -#define ESTCPULIM(v) min((v), (MAXPRI - NICE_ADJUST(PRIO_MAX - PRIO_MIN)) * ESTCPURAMP) +#define ESTCPUMAX ((MAXPRI - NICE_ADJUST(PRIO_MAX - PRIO_MIN)) * ESTCPURAMP) +#define ESTCPULIM(v) min((v), ESTCPUMAX) #define ESTCPUFREQ 20 /* estcpu update frequency */ +#define ESTCPUVFREQ 40 /* virtual freq controls ramp*/ #define NICE_WEIGHT 2.0 /* priorities per nice level */ #define PPQ ((MAXPRI + 1) / NQS) /* priorities per queue */ diff --git a/sys/sys/thread.h b/sys/sys/thread.h index b252b9dd09..f5c8352b7b 100644 --- a/sys/sys/thread.h +++ b/sys/sys/thread.h @@ -7,7 +7,7 @@ * Types which must already be defined when this header is included by * userland: struct md_thread * - * $DragonFly: src/sys/sys/thread.h,v 1.48 2004/03/14 20:54:02 hmp Exp $ + * $DragonFly: src/sys/sys/thread.h,v 1.49 2004/03/30 19:14:13 dillon Exp $ */ #ifndef _SYS_THREAD_H_ @@ -266,6 +266,7 @@ struct thread { #define TDF_WAKEREQ 0x4000 /* resume_kproc */ #define TDF_TIMEOUT 0x8000 /* tsleep timeout */ #define TDF_INTTHREAD 0x00010000 /* interrupt thread */ +#define TDF_NORESCHED 0x00020000 /* Do not reschedule on wake */ /* * Thread priorities. Typically only one thread from any given @@ -317,7 +318,6 @@ extern void lwkt_free_thread(struct thread *td); extern void lwkt_wait_init(struct lwkt_wait *w); extern void lwkt_gdinit(struct globaldata *gd); extern void lwkt_switch(void); -extern void lwkt_maybe_switch(void); extern void lwkt_preempt(thread_t ntd, int critpri); extern void lwkt_schedule(thread_t td); extern void lwkt_schedule_self(void); -- 2.41.0