From 26a0694ba4ad6b2eea86882e4436d151c7eab4cd Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Mon, 30 Jun 2003 19:50:32 +0000 Subject: [PATCH] Misc interrupts/LWKT 1/2: threaded interrupts 2: Major work on the user scheduler, separate it completely from the LWKT scheduler and make user priorities, including idprio, normal, and rtprio, work properly. This includes fixing the priority inversion problem that 4.x had. Also complete the work on interrupt preemption. There were a few things I wasn't doing correctly including not protecting the initial call to cpu_heavy_restore when a process is just starting up. Enhance DDB a bit (threads don't show up in PS yet). This is a major milestone. --- sys/ddb/db_ps.c | 21 +- sys/i386/i386/exception.s | 17 +- sys/i386/i386/machdep.c | 12 +- sys/i386/i386/swtch.s | 39 ++-- sys/i386/i386/trap.c | 131 +++++++++--- sys/i386/i386/vm_machdep.c | 4 +- sys/i386/icu/icu_vector.s | 12 +- sys/i386/isa/icu_vector.s | 12 +- sys/i386/isa/intr_machdep.c | 47 +++-- sys/i386/isa/ipl.s | 25 ++- sys/kern/init_main.c | 4 +- sys/kern/kern_exit.c | 8 +- sys/kern/kern_fork.c | 6 +- sys/kern/kern_intr.c | 25 +-- sys/kern/kern_kthread.c | 3 +- sys/kern/kern_resource.c | 5 +- sys/kern/kern_sig.c | 22 +-- sys/kern/kern_subr.c | 16 +- sys/kern/kern_switch.c | 101 +++++----- sys/kern/kern_synch.c | 286 +++++++++++++++++---------- sys/kern/kern_sysctl.c | 4 +- sys/kern/kern_threads.c | 13 +- sys/kern/lwkt_thread.c | 139 ++++++++++--- sys/platform/pc32/i386/exception.s | 17 +- sys/platform/pc32/i386/machdep.c | 12 +- sys/platform/pc32/i386/swtch.s | 39 ++-- sys/platform/pc32/i386/trap.c | 131 +++++++++--- sys/platform/pc32/i386/vm_machdep.c | 4 +- sys/platform/pc32/icu/icu_vector.s | 12 +- sys/platform/pc32/isa/icu_vector.s | 12 +- sys/platform/pc32/isa/intr_machdep.c | 47 +++-- sys/platform/pc32/isa/ipl.s | 25 ++- sys/sys/globaldata.h | 7 +- sys/sys/proc.h | 11 +- sys/sys/thread.h | 13 +- sys/sys/thread2.h | 38 +--- sys/vfs/specfs/spec_vnops.c | 5 +- sys/vm/vm_glue.c | 19 +- sys/vm/vm_meter.c | 3 +- 39 files changed, 851 insertions(+), 496 deletions(-) diff --git a/sys/ddb/db_ps.c b/sys/ddb/db_ps.c index 2efebbc444..482b54350b 100644 --- a/sys/ddb/db_ps.c +++ b/sys/ddb/db_ps.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/ddb/db_ps.c,v 1.20 1999/08/28 00:41:09 peter Exp $ - * $DragonFly: src/sys/ddb/db_ps.c,v 1.4 2003/06/23 17:55:27 dillon Exp $ + * $DragonFly: src/sys/ddb/db_ps.c,v 1.5 2003/06/30 19:50:28 dillon Exp $ */ #include #include @@ -107,4 +107,23 @@ db_ps(dummy1, dummy2, dummy3, dummy4) if (p == NULL && np > 0) p = zombproc.lh_first; } + + /* + * Dump running threads + */ + db_printf("cpu %d tdrunqmask %08x\n", mycpu->gd_cpuid, mycpu->gd_runqmask); + db_printf(" tdq thread pid flags pri(act) sp wmesg comm\n"); + for (np = 0; np < 32; ++np) { + thread_t td; + TAILQ_FOREACH(td, &mycpu->gd_tdrunq[np], td_threadq) { + db_printf(" %3d %p %3d %08x %3d(%3d) %p %8.8s %s\n", + np, td, + (td->td_proc ? td->td_proc->p_pid : -1), + td->td_flags, td->td_pri, + td->td_pri & TDPRI_MASK, + td->td_sp, + td->td_wmesg ? td->td_wmesg : "-", + td->td_proc ? td->td_proc->p_comm : td->td_comm); + } + } } diff --git a/sys/i386/i386/exception.s b/sys/i386/i386/exception.s index dd5d9baab1..3c8b9a294a 100644 --- a/sys/i386/i386/exception.s +++ b/sys/i386/i386/exception.s @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/i386/exception.s,v 1.65.2.3 2001/08/15 01:23:49 peter Exp $ - * $DragonFly: src/sys/i386/i386/Attic/exception.s,v 1.7 2003/06/29 03:28:42 dillon Exp $ + * $DragonFly: src/sys/i386/i386/Attic/exception.s,v 1.8 2003/06/30 19:50:30 dillon Exp $ */ #include "npx.h" @@ -312,13 +312,18 @@ IDTVEC(int0x80_syscall) movl $1,_intr_nesting_level jmp _doreti +/* + * This function is what cpu_heavy_restore jumps to after a new process + * is created. We are in a critical section in order to prevent + * cpu_heavy_restore from being interrupted (especially since it stores + * its context in a static place!), so the first thing we do is release + * the critical section. + */ ENTRY(fork_trampoline) + movl _curthread,%eax + subl $TDPRI_CRIT,TD_PRI(%eax) call _spl0 - - movl _curthread,%eax /* YYY heavy weight process must */ - pushl TD_PROC(%eax) /* YYY remove itself from runq because */ - call remrunqueue /* LWKT restore func doesn't do that */ - addl $4,%esp + call _splz /* * cpu_set_fork_handler intercepts this function call to diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c index df8cd2d5cb..5d7d92fd5f 100644 --- a/sys/i386/i386/machdep.c +++ b/sys/i386/i386/machdep.c @@ -36,7 +36,7 @@ * * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 * $FreeBSD: src/sys/i386/i386/machdep.c,v 1.385.2.30 2003/05/31 08:48:05 alc Exp $ - * $DragonFly: src/sys/i386/i386/Attic/machdep.c,v 1.17 2003/06/29 07:37:03 dillon Exp $ + * $DragonFly: src/sys/i386/i386/Attic/machdep.c,v 1.18 2003/06/30 19:50:30 dillon Exp $ */ #include "apm.h" @@ -944,6 +944,9 @@ cpu_halt(void) * (unless you want to blow things up!). Instead we look for runnable threads * and loop or halt as appropriate. Giant is not held on entry to the thread. * + * The main loop is entered with a critical section held, we must release + * the critical section before doing anything else. + * * Note on cpu_idle_hlt: On an SMP system this may cause the system to * halt until the next clock tick, even if a thread is ready YYY */ @@ -954,6 +957,7 @@ SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_hlt, CTLFLAG_RW, void cpu_idle(void) { + crit_exit(); for (;;) { lwkt_switch(); __asm __volatile("cli"); @@ -1826,12 +1830,14 @@ init386(int first) */ gd = &CPU_prvspace[0].mdglobaldata; - lwkt_init_thread(&thread0, proc0paddr, 0); + lwkt_init_thread(&thread0, proc0paddr, 0, &gd->mi); gd->mi.gd_curthread = &thread0; safepri = thread0.td_cpl = SWI_MASK | HWI_MASK; thread0.td_switch = cpu_heavy_switch; /* YYY eventually LWKT */ proc0.p_addr = (void *)thread0.td_kstack; proc0.p_thread = &thread0; + proc0.p_flag |= P_CURPROC; + gd->mi.gd_uprocscheduled = 1; thread0.td_proc = &proc0; atdevbase = ISA_HOLE_START + KERNBASE; @@ -2050,7 +2056,7 @@ cpu_gdinit(struct mdglobaldata *gd, int cpu) gd->mi.gd_idletd = &gd->gd_idlethread; sp = gd->mi.gd_prvspace->idlestack; - lwkt_init_thread(&gd->gd_idlethread, sp, 0); + lwkt_init_thread(&gd->gd_idlethread, sp, 0, &gd->mi); gd->gd_idlethread.td_switch = cpu_lwkt_switch; gd->gd_idlethread.td_sp -= sizeof(void *); *(void **)gd->gd_idlethread.td_sp = cpu_idle_restore; diff --git a/sys/i386/i386/swtch.s b/sys/i386/i386/swtch.s index 354b35d2a8..34a6101884 100644 --- a/sys/i386/i386/swtch.s +++ b/sys/i386/i386/swtch.s @@ -35,7 +35,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/i386/swtch.s,v 1.89.2.10 2003/01/23 03:36:24 ps Exp $ - * $DragonFly: src/sys/i386/i386/Attic/swtch.s,v 1.16 2003/06/29 03:28:42 dillon Exp $ + * $DragonFly: src/sys/i386/i386/Attic/swtch.s,v 1.17 2003/06/30 19:50:30 dillon Exp $ */ #include "npx.h" @@ -249,6 +249,7 @@ ENTRY(cpu_exit_switch) * * YYY note: spl check is done in mi_switch when it splx()'s. */ + ENTRY(cpu_heavy_restore) /* interrupts are disabled */ movl TD_PCB(%eax),%edx @@ -425,18 +426,6 @@ cpu_switch_load_gs: popl %ebx movl %eax,%dr7 1: -#if 0 - /* - * Remove the heavy weight process from the heavy weight queue. - * this will also have the side effect of removing the thread from - * the run queue. YYY temporary? - * - * LWKT threads stay on the run queue until explicitly removed. - */ - pushl %ecx - call remrunqueue - addl $4,%esp -#endif sti /* XXX */ ret @@ -555,10 +544,14 @@ ENTRY(cpu_idle_restore) * don't die. This restore function is used to bootstrap into an * LWKT based kernel thread only. cpu_lwkt_switch() will be used * after this. + * + * Since all of our context is on the stack we are reentrant and + * we can release our critical section and enable interrupts early. */ ENTRY(cpu_kthread_restore) movl TD_PCB(%eax),%ebx movl $0,%ebp + subl $TDPRI_CRIT,TD_PRI(%eax) sti popl %edx /* kthread exit function */ pushl PCB_EBX(%ebx) /* argument to ESI function */ @@ -571,6 +564,9 @@ ENTRY(cpu_kthread_restore) * * Standard LWKT switching function. Only non-scratch registers are * saved and we don't bother with the MMU state or anything else. + * + * This function is always called while in a critical section. + * * YYY BGL, SPL */ ENTRY(cpu_lwkt_switch) @@ -589,8 +585,14 @@ ENTRY(cpu_lwkt_switch) ret /* - * cpu_idle_restore() (current thread in %eax on entry) + * cpu_lwkt_restore() (current thread in %eax on entry) * + * Standard LWKT restore function. This function is always called + * while in a critical section. + * + * Warning: due to preemption the restore function can be used to + * 'return' to the original thread. Interrupt disablement must be + * protected through the switch so we cannot run splz here. */ ENTRY(cpu_lwkt_restore) popfl @@ -598,14 +600,5 @@ ENTRY(cpu_lwkt_restore) popl %esi popl %ebx popl %ebp - cmpl $0,_intr_nesting_level /* don't stack too deeply */ - jne 2f - testl _ipending,%ecx - jnz 1f - testl _fpending,%ecx - jz 2f -1: - call splz /* execute unmasked ints */ -2: ret diff --git a/sys/i386/i386/trap.c b/sys/i386/i386/trap.c index d685442ea7..eb57b609e8 100644 --- a/sys/i386/i386/trap.c +++ b/sys/i386/i386/trap.c @@ -36,7 +36,7 @@ * * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 * $FreeBSD: src/sys/i386/i386/trap.c,v 1.147.2.11 2003/02/27 19:09:59 luoqi Exp $ - * $DragonFly: src/sys/i386/i386/Attic/trap.c,v 1.11 2003/06/29 03:28:42 dillon Exp $ + * $DragonFly: src/sys/i386/i386/Attic/trap.c,v 1.12 2003/06/30 19:50:30 dillon Exp $ */ /* @@ -144,9 +144,6 @@ static char *trap_msg[] = { "machine check trap", /* 28 T_MCHK */ }; -static __inline int userret __P((struct proc *p, struct trapframe *frame, - u_quad_t oticks, int have_mplock)); - #if defined(I586_CPU) && !defined(NO_F00F_HACK) extern int has_f00f_bug; #endif @@ -160,46 +157,105 @@ static int panic_on_nmi = 1; SYSCTL_INT(_machdep, OID_AUTO, panic_on_nmi, CTLFLAG_RW, &panic_on_nmi, 0, "Panic on NMI"); -static __inline int -userret(p, frame, oticks, have_mplock) - struct proc *p; - struct trapframe *frame; - u_quad_t oticks; - int have_mplock; +/* + * USER->KERNEL transition. Do not transition us out of userland from the + * point of view of the userland scheduler unless we actually have to + * switch. + * + * usertdsw is called from within a critical section. + */ +static void +usertdsw(struct thread *ntd) +{ + struct thread *td = curthread; + + td->td_switch = cpu_heavy_switch; + lwkt_setpri_self(TDPRI_KERN_USER); +#if 0 + /* + * This is where we might want to catch the P_CURPROC designation + * and fix it for *any* switchout rather then just an mi_switch() + * switchout (move from mi_switch()?) YYY + */ + if (p->p_flag & P_CURPROC) { + ... + } +#endif + td->td_switch(ntd); +} + +/* + * Note that userenter() may be re-entered several times due to AST + * processing. + */ +static __inline void +userenter(void) +{ + struct thread *td = curthread; + + KKASSERT(td->td_switch == cpu_heavy_switch || + td->td_switch == usertdsw); + td->td_switch = usertdsw; +} + +static int +userret(struct proc *p, struct trapframe *frame, + u_quad_t oticks, int have_mplock) { int sig, s; - struct thread *td; + struct thread *td = curthread; + /* + * Post any pending signals + */ + crit_enter(); while ((sig = CURSIG(p)) != 0) { if (have_mplock == 0) { get_mplock(); have_mplock = 1; } + crit_exit(); postsig(sig); + crit_enter(); } - p->p_priority = p->p_usrpri; + /* + * Set our priority properly and restore our switch function + */ + if (td->td_switch == cpu_heavy_switch) { + switch(p->p_rtprio.type) { + case RTP_PRIO_IDLE: + lwkt_setpri_self(TDPRI_USER_IDLE); + break; + case RTP_PRIO_REALTIME: + case RTP_PRIO_FIFO: + lwkt_setpri_self(TDPRI_USER_REAL); + break; + default: + lwkt_setpri_self(TDPRI_USER_NORM); + break; + } + } else { + KKASSERT(td->td_switch == usertdsw); + td->td_switch = cpu_heavy_switch; + } + crit_exit(); + + /* + * If a reschedule has been requested we call chooseproc() to locate + * the next runnable process. When we wakeup from that we check + * for pending signals again. + */ if (resched_wanted()) { - /* - * Since we are curproc, clock will normally just change - * our priority without moving us from one queue to another - * (since the running process is not on a queue.) - * If that happened after we setrunqueue ourselves but before we - * mi_switch()'ed, we might not be on the queue indicated by - * our priority. - */ + uio_yield(); if (have_mplock == 0) { get_mplock(); have_mplock = 1; } - s = splhigh(); - setrunqueue(p); - p->p_stats->p_ru.ru_nivcsw++; - mi_switch(); - splx(s); while ((sig = CURSIG(p)) != 0) postsig(sig); } + /* * Charge system time if profiling. */ @@ -208,11 +264,24 @@ userret(p, frame, oticks, have_mplock) get_mplock(); have_mplock = 1; } - td = curthread; addupc_task(p, frame->tf_eip, - (u_int)(td->td_sticks - oticks) * psratio); + (u_int)(curthread->td_sticks - oticks) * psratio); } - curpriority = p->p_priority; + + /* + * In order to return to userland we need to be the designated + * current (user) process on this cpu, aka P_CURPROC. The + * setrunqueue() call could make us the current process. + */ + s = splhigh(); + while ((p->p_flag & P_CURPROC) == 0) { + p->p_stats->p_ru.ru_nivcsw++; + lwkt_deschedule_self(); + mi_switch(); + } + splx(s); + KKASSERT(mycpu->gd_uprocscheduled == 1); + return(have_mplock); } @@ -325,6 +394,8 @@ restart: if ((ISPL(frame.tf_cs) == SEL_UPL) || (frame.tf_eflags & PSL_VM)) { /* user trap */ + userenter(); + sticks = curthread->td_sticks; p->p_md.md_regs = &frame; @@ -1097,9 +1168,11 @@ syscall2(frame) /* * access non-atomic field from critical section. p_sticks is - * updated by the clock interrupt. + * updated by the clock interrupt. Also use this opportunity + * to raise our LWKT priority. */ crit_enter(); + userenter(); sticks = curthread->td_sticks; crit_exit(); diff --git a/sys/i386/i386/vm_machdep.c b/sys/i386/i386/vm_machdep.c index e46a935e2b..1fe7c760ed 100644 --- a/sys/i386/i386/vm_machdep.c +++ b/sys/i386/i386/vm_machdep.c @@ -39,7 +39,7 @@ * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91 * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$ * $FreeBSD: src/sys/i386/i386/vm_machdep.c,v 1.132.2.9 2003/01/25 19:02:23 dillon Exp $ - * $DragonFly: src/sys/i386/i386/Attic/vm_machdep.c,v 1.14 2003/06/29 03:28:42 dillon Exp $ + * $DragonFly: src/sys/i386/i386/Attic/vm_machdep.c,v 1.15 2003/06/30 19:50:30 dillon Exp $ */ #include "npx.h" @@ -608,7 +608,7 @@ vm_page_zero_idle() return (0); } -void +static void swi_vm(void *arg) { if (busdma_swi_pending != 0) diff --git a/sys/i386/icu/icu_vector.s b/sys/i386/icu/icu_vector.s index 68b00c5e7d..dd371c0fba 100644 --- a/sys/i386/icu/icu_vector.s +++ b/sys/i386/icu/icu_vector.s @@ -1,7 +1,7 @@ /* * from: vector.s, 386BSD 0.1 unknown origin * $FreeBSD: src/sys/i386/isa/icu_vector.s,v 1.14.2.2 2000/07/18 21:12:42 dfr Exp $ - * $DragonFly: src/sys/i386/icu/Attic/icu_vector.s,v 1.8 2003/06/29 07:37:06 dillon Exp $ + * $DragonFly: src/sys/i386/icu/Attic/icu_vector.s,v 1.9 2003/06/30 19:50:31 dillon Exp $ */ /* @@ -165,11 +165,9 @@ IDTVEC(vec_name) ; \ .text ; \ SUPERALIGN_TEXT ; \ IDTVEC(vec_name) ; \ - incl _intr_nesting_level ; \ - pushl %ebp ; /* frame for ddb backtrace */ \ - movl %esp, %ebp ; \ + pushl %ebp ; \ + movl %esp,%ebp ; \ PUSH_DUMMY ; \ - andl $~IRQ_LBIT(irq_num),_fpending ; \ pushl intr_unit + (irq_num) * 4 ; \ call *intr_handler + (irq_num) * 4 ; \ addl $4, %esp ; \ @@ -179,7 +177,6 @@ IDTVEC(vec_name) ; \ UNMASK_IRQ(icu, irq_num) ; \ POP_DUMMY ; \ popl %ebp ; \ - decl _intr_nesting_level ; \ ret ; \ /* @@ -232,12 +229,15 @@ IDTVEC(vec_name) ; \ movl $TDPRI_CRIT,_reqpri ; \ jmp 5f ; \ 2: ; \ + addl $TDPRI_CRIT,TD_PRI(%ebx) ; \ /* set running bit, clear pending bit, run handler */ \ orl $IRQ_LBIT(irq_num),_irunning ; \ andl $~IRQ_LBIT(irq_num),_ipending ; \ + sti ; \ pushl $irq_num ; \ call _sched_ithd ; \ addl $4,%esp ; \ + subl $TDPRI_CRIT,TD_PRI(%ebx) ; \ incl _cnt+V_INTR ; /* book-keeping YYY make per-cpu */ \ movl intr_countp + (irq_num) * 4,%eax ; \ incl (%eax) ; \ diff --git a/sys/i386/isa/icu_vector.s b/sys/i386/isa/icu_vector.s index 3da01b7fdc..4f6f032241 100644 --- a/sys/i386/isa/icu_vector.s +++ b/sys/i386/isa/icu_vector.s @@ -1,7 +1,7 @@ /* * from: vector.s, 386BSD 0.1 unknown origin * $FreeBSD: src/sys/i386/isa/icu_vector.s,v 1.14.2.2 2000/07/18 21:12:42 dfr Exp $ - * $DragonFly: src/sys/i386/isa/Attic/icu_vector.s,v 1.8 2003/06/29 07:37:06 dillon Exp $ + * $DragonFly: src/sys/i386/isa/Attic/icu_vector.s,v 1.9 2003/06/30 19:50:31 dillon Exp $ */ /* @@ -165,11 +165,9 @@ IDTVEC(vec_name) ; \ .text ; \ SUPERALIGN_TEXT ; \ IDTVEC(vec_name) ; \ - incl _intr_nesting_level ; \ - pushl %ebp ; /* frame for ddb backtrace */ \ - movl %esp, %ebp ; \ + pushl %ebp ; \ + movl %esp,%ebp ; \ PUSH_DUMMY ; \ - andl $~IRQ_LBIT(irq_num),_fpending ; \ pushl intr_unit + (irq_num) * 4 ; \ call *intr_handler + (irq_num) * 4 ; \ addl $4, %esp ; \ @@ -179,7 +177,6 @@ IDTVEC(vec_name) ; \ UNMASK_IRQ(icu, irq_num) ; \ POP_DUMMY ; \ popl %ebp ; \ - decl _intr_nesting_level ; \ ret ; \ /* @@ -232,12 +229,15 @@ IDTVEC(vec_name) ; \ movl $TDPRI_CRIT,_reqpri ; \ jmp 5f ; \ 2: ; \ + addl $TDPRI_CRIT,TD_PRI(%ebx) ; \ /* set running bit, clear pending bit, run handler */ \ orl $IRQ_LBIT(irq_num),_irunning ; \ andl $~IRQ_LBIT(irq_num),_ipending ; \ + sti ; \ pushl $irq_num ; \ call _sched_ithd ; \ addl $4,%esp ; \ + subl $TDPRI_CRIT,TD_PRI(%ebx) ; \ incl _cnt+V_INTR ; /* book-keeping YYY make per-cpu */ \ movl intr_countp + (irq_num) * 4,%eax ; \ incl (%eax) ; \ diff --git a/sys/i386/isa/intr_machdep.c b/sys/i386/isa/intr_machdep.c index c972594dfd..7fed089773 100644 --- a/sys/i386/isa/intr_machdep.c +++ b/sys/i386/isa/intr_machdep.c @@ -35,7 +35,7 @@ * * from: @(#)isa.c 7.2 (Berkeley) 5/13/91 * $FreeBSD: src/sys/i386/isa/intr_machdep.c,v 1.29.2.5 2001/10/14 06:54:27 luigi Exp $ - * $DragonFly: src/sys/i386/isa/Attic/intr_machdep.c,v 1.3 2003/06/29 03:28:43 dillon Exp $ + * $DragonFly: src/sys/i386/isa/Attic/intr_machdep.c,v 1.4 2003/06/30 19:50:31 dillon Exp $ */ /* * This file contains an aggregated module marked: @@ -111,12 +111,23 @@ #define NR_INTRNAMES (1 + ICU_LEN + 2 * ICU_LEN) -u_long *intr_countp[ICU_LEN]; -inthand2_t *intr_handler[ICU_LEN]; -u_int intr_mask[ICU_LEN]; -int intr_mihandler_installed[ICU_LEN]; -static u_int* intr_mptr[ICU_LEN]; -void *intr_unit[ICU_LEN]; +static inthand2_t isa_strayintr; + +u_long *intr_countp[ICU_LEN*2]; +inthand2_t *intr_handler[ICU_LEN*2] = { + isa_strayintr, isa_strayintr, isa_strayintr, isa_strayintr, + isa_strayintr, isa_strayintr, isa_strayintr, isa_strayintr, + isa_strayintr, isa_strayintr, isa_strayintr, isa_strayintr, + isa_strayintr, isa_strayintr, isa_strayintr, isa_strayintr, + isa_strayintr, isa_strayintr, isa_strayintr, isa_strayintr, + isa_strayintr, isa_strayintr, isa_strayintr, isa_strayintr, + isa_strayintr, isa_strayintr, isa_strayintr, isa_strayintr, + isa_strayintr, isa_strayintr, isa_strayintr, isa_strayintr, +}; +u_int intr_mask[ICU_LEN*2]; +int intr_mihandler_installed[ICU_LEN*2]; +static u_int* intr_mptr[ICU_LEN*2]; +void *intr_unit[ICU_LEN*2]; static inthand_t *fastintr[ICU_LEN] = { &IDTVEC(fastintr0), &IDTVEC(fastintr1), @@ -167,8 +178,6 @@ static inthand_t *slowintr[ICU_LEN] = { #endif /* APIC_IO */ }; -static inthand2_t isa_strayintr; - #ifdef PC98 #define NMI_PARITY 0x04 #define NMI_EPARITY 0x02 @@ -318,8 +327,7 @@ isa_defaultirq() * Caught a stray interrupt, notify */ static void -isa_strayintr(vcookiep) - void *vcookiep; +isa_strayintr(void *vcookiep) { int intr = (void **)vcookiep - &intr_unit[0]; @@ -340,6 +348,7 @@ isa_strayintr(vcookiep) * must be done before sending an EOI so it can't be done if * we are using AUTO_EOI_1. */ + printf("STRAY %d\n", intr); if (intrcnt[1 + intr] <= 5) log(LOG_ERR, "stray irq %d\n", intr); if (intrcnt[1 + intr] == 5) @@ -906,12 +915,6 @@ inthand_remove(intrec *idesc) return (0); } -void -call_fast_unpend(int irq) -{ - fastunpend[irq](); -} - /* * ithread_done() * @@ -919,7 +922,10 @@ call_fast_unpend(int irq) * processing a loop. We interlock with ipending and irunning. If * a new interrupt is pending for the thread the function clears the * pending bit and returns. If no new interrupt is pending we - * deschedule and sleep. + * deschedule and sleep. If we reschedule and return we have to + * disable the interrupt again or it will keep interrupting us. + * + * See kern/kern_intr.c for more information. */ void ithread_done(int irq) @@ -927,21 +933,22 @@ ithread_done(int irq) struct mdglobaldata *gd = mdcpu; int mask = 1 << irq; - crit_enter(); + KKASSERT(curthread->td_pri >= TDPRI_CRIT); INTREN(mask); if (gd->gd_ipending & mask) { atomic_clear_int(&gd->gd_ipending, mask); + INTRDIS(mask); lwkt_schedule_self(); } else { lwkt_deschedule_self(); if (gd->gd_ipending & mask) { /* race */ atomic_clear_int(&gd->gd_ipending, mask); + INTRDIS(mask); lwkt_schedule_self(); } else { atomic_clear_int(&gd->gd_irunning, mask); lwkt_switch(); } } - crit_exit(); } diff --git a/sys/i386/isa/ipl.s b/sys/i386/isa/ipl.s index c79dbdf217..7239ed8166 100644 --- a/sys/i386/isa/ipl.s +++ b/sys/i386/isa/ipl.s @@ -37,7 +37,7 @@ * @(#)ipl.s * * $FreeBSD: src/sys/i386/isa/ipl.s,v 1.32.2.3 2002/05/16 16:03:56 bde Exp $ - * $DragonFly: src/sys/i386/isa/Attic/ipl.s,v 1.4 2003/06/29 03:28:43 dillon Exp $ + * $DragonFly: src/sys/i386/isa/Attic/ipl.s,v 1.5 2003/06/30 19:50:31 dillon Exp $ */ @@ -68,6 +68,10 @@ _soft_imask: .long SWI_MASK _softnet_imask: .long SWI_NET_MASK .globl _softtty_imask _softtty_imask: .long SWI_TTY_MASK + .globl last_splz +last_splz: .long 0 + .globl last_splz2 +last_splz2: .long 0 .text @@ -103,10 +107,10 @@ doreti_next: testl _ipending,%ecx jne doreti_intr testl $AST_PENDING,_astpending /* any pending ASTs? */ - je 2f + jz 2f testl $PSL_VM,TF_EFLAGS(%esp) jz 1f - cmpl $1,_in_vm86call + cmpl $1,_in_vm86call /* YYY make per 'cpu' */ jnz doreti_ast 1: testb $SEL_RPL_MASK,TF_CS(%esp) @@ -203,15 +207,21 @@ doreti_ast: * SPLZ() a C callable procedure to dispatch any unmasked pending * interrupts regardless of critical section nesting. ASTs * are not dispatched. + * + * YYY at the moment I leave us in a critical section so as + * not to have to mess with the cpls which will soon be obsolete. */ SUPERALIGN_TEXT ENTRY(splz) + pushfl pushl %ebx movl _curthread,%ebx movl TD_CPL(%ebx),%eax + addl $TDPRI_CRIT,TD_PRI(%ebx) splz_next: + cli movl %eax,%ecx /* ecx = ~CPL */ notl %ecx testl _fpending,%ecx /* check for an unmasked fast int */ @@ -223,7 +233,9 @@ splz_next: testl _ipending,%ecx jne splz_intr + subl $TDPRI_CRIT,TD_PRI(%ebx) popl %ebx + popfl ret /* @@ -235,9 +247,12 @@ splz_fast: bsfl %ecx, %ecx /* locate the next dispatchable int */ btrl %ecx, _fpending /* is it really still pending? */ jnc splz_next + movl $1,last_splz + movl %ecx,last_splz2 pushl %eax call *_fastunpend(,%ecx,4) popl %eax + movl $-1,last_splz jmp splz_next /* @@ -249,11 +264,15 @@ splz_intr: bsfl %ecx, %ecx /* locate the next dispatchable int */ btrl %ecx, _ipending /* is it really still pending? */ jnc splz_next + sti + movl $2,last_splz pushl %eax pushl %ecx + movl %ecx,last_splz2 call _sched_ithd /* YYY must pull in imasks */ addl $4,%esp popl %eax + movl $-2,last_splz jmp splz_next /* diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c index a61f4c8d72..dd86cbdb08 100644 --- a/sys/kern/init_main.c +++ b/sys/kern/init_main.c @@ -40,7 +40,7 @@ * * @(#)init_main.c 8.9 (Berkeley) 1/21/94 * $FreeBSD: src/sys/kern/init_main.c,v 1.134.2.8 2003/06/06 20:21:32 tegge Exp $ - * $DragonFly: src/sys/kern/init_main.c,v 1.15 2003/06/28 04:16:04 dillon Exp $ + * $DragonFly: src/sys/kern/init_main.c,v 1.16 2003/06/30 19:50:31 dillon Exp $ */ #include "opt_init_path.h" @@ -290,7 +290,7 @@ proc0_init(void *dummy __unused) p->p_sysent = &aout_sysvec; - p->p_flag = P_INMEM | P_SYSTEM; + p->p_flag = P_INMEM | P_SYSTEM | P_CURPROC; p->p_stat = SRUN; p->p_nice = NZERO; p->p_rtprio.type = RTP_PRIO_NORMAL; diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c index 825e8fa587..fe3495b839 100644 --- a/sys/kern/kern_exit.c +++ b/sys/kern/kern_exit.c @@ -37,7 +37,7 @@ * * @(#)kern_exit.c 8.7 (Berkeley) 2/12/94 * $FreeBSD: src/sys/kern/kern_exit.c,v 1.92.2.11 2003/01/13 22:51:16 dillon Exp $ - * $DragonFly: src/sys/kern/kern_exit.c,v 1.11 2003/06/27 03:30:42 dillon Exp $ + * $DragonFly: src/sys/kern/kern_exit.c,v 1.12 2003/06/30 19:50:31 dillon Exp $ */ #include "opt_compat.h" @@ -364,6 +364,12 @@ exit1(int rv) p->p_limit = NULL; } + /* + * Release the P_CURPROC designation on the process so the userland + * scheduler can work in someone else. + */ + relscurproc(p); + /* * Finally, call machine-dependent code to release the remaining * resources including address space, the kernel stack and pcb. diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c index acf9b1ae85..5d614845c9 100644 --- a/sys/kern/kern_fork.c +++ b/sys/kern/kern_fork.c @@ -37,7 +37,7 @@ * * @(#)kern_fork.c 8.6 (Berkeley) 4/8/94 * $FreeBSD: src/sys/kern/kern_fork.c,v 1.72.2.13 2003/06/06 20:21:32 tegge Exp $ - * $DragonFly: src/sys/kern/kern_fork.c,v 1.8 2003/06/25 03:55:57 dillon Exp $ + * $DragonFly: src/sys/kern/kern_fork.c,v 1.9 2003/06/30 19:50:31 dillon Exp $ */ #include "opt_ktrace.h" @@ -607,7 +607,9 @@ void start_forked_proc(struct proc *p1, struct proc *p2) { /* - * Move from SIDL to RUN queue + * Move from SIDL to RUN queue, and activate the process's thread. + * Activation of the thread effectively makes the process "a" + * current process, so we do not setrunqueue(). */ KASSERT(p2->p_stat == SIDL, ("cannot start forked process, bad status: %p", p2)); diff --git a/sys/kern/kern_intr.c b/sys/kern/kern_intr.c index 929d37a624..e261dee9a3 100644 --- a/sys/kern/kern_intr.c +++ b/sys/kern/kern_intr.c @@ -24,7 +24,7 @@ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD: src/sys/kern/kern_intr.c,v 1.24.2.1 2001/10/14 20:05:50 luigi Exp $ - * $DragonFly: src/sys/kern/kern_intr.c,v 1.5 2003/06/29 07:37:06 dillon Exp $ + * $DragonFly: src/sys/kern/kern_intr.c,v 1.6 2003/06/30 19:50:31 dillon Exp $ * */ @@ -86,16 +86,18 @@ register_int(int intr, inthand2_t *handler, void *arg, const char *name) /* * Create an interrupt thread if necessary, leave it in an unscheduled - * state. + * state. The kthread restore function exits a critical section before + * starting the function so we need *TWO* critical sections in order + * for the handler to begin running in one. */ if ((td = ithreads[intr]) == NULL) { lwkt_create((void *)ithread_handler, (void *)intr, &ithreads[intr], &ithread_ary[intr], TDF_STOPREQ, "ithread %d", intr); td = ithreads[intr]; if (intr >= NHWI && intr < NHWI + NSWI) - lwkt_setpri(td, TDPRI_SOFT_NORM); + lwkt_setpri(td, TDPRI_SOFT_NORM + TDPRI_CRIT * 2); else - lwkt_setpri(td, TDPRI_INT_MED); + lwkt_setpri(td, TDPRI_INT_MED + TDPRI_CRIT * 2); } /* @@ -175,7 +177,8 @@ sched_ithd(int intr) } /* - * Interrupt threads run this as their main loop. + * Interrupt threads run this as their main loop. The handler should be + * in a critical section on entry. */ static void ithread_handler(void *arg) @@ -184,25 +187,15 @@ ithread_handler(void *arg) intrec_t **list = &intlists[intr]; intrec_t *rec; intrec_t *nrec; - int xpri = (curthread->td_pri & TDPRI_MASK) + TDPRI_CRIT; /* DEBUG YYY */ - crit_enter(); /* replaces SPLs */ + KKASSERT(curthread->td_pri >= TDPRI_CRIT); for (;;) { for (rec = *list; rec; rec = nrec) { nrec = rec->next; rec->handler(rec->argument); } ithread_done(intr); - - /* - * temporary sanity check. If we preempted another thread we - * are placed in another critical section by that thread which - * will be released when we block and resume the original thread. - */ - KKASSERT(curthread->td_pri == xpri || - (curthread->td_preempted && curthread->td_pri == xpri + TDPRI_CRIT)); } - crit_exit(); /* not reached */ } diff --git a/sys/kern/kern_kthread.c b/sys/kern/kern_kthread.c index e118977e00..7e84535d82 100644 --- a/sys/kern/kern_kthread.c +++ b/sys/kern/kern_kthread.c @@ -24,7 +24,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/kern/kern_kthread.c,v 1.5.2.3 2001/12/25 01:51:14 dillon Exp $ - * $DragonFly: src/sys/kern/kern_kthread.c,v 1.7 2003/06/27 03:30:42 dillon Exp $ + * $DragonFly: src/sys/kern/kern_kthread.c,v 1.8 2003/06/30 19:50:31 dillon Exp $ */ #include @@ -55,6 +55,7 @@ kproc_start(udata) error = kthread_create((void (*)(void *))kp->func, NULL, kp->global_threadpp, kp->arg0); + lwkt_setpri(*kp->global_threadpp, TDPRI_KERN_DAEMON); if (error) panic("kproc_start: %s: error %d", kp->arg0, error); } diff --git a/sys/kern/kern_resource.c b/sys/kern/kern_resource.c index e9f271df3a..7b6ba3d65a 100644 --- a/sys/kern/kern_resource.c +++ b/sys/kern/kern_resource.c @@ -37,7 +37,7 @@ * * @(#)kern_resource.c 8.5 (Berkeley) 1/21/94 * $FreeBSD: src/sys/kern/kern_resource.c,v 1.55.2.5 2001/11/03 01:41:08 ps Exp $ - * $DragonFly: src/sys/kern/kern_resource.c,v 1.6 2003/06/25 03:55:57 dillon Exp $ + * $DragonFly: src/sys/kern/kern_resource.c,v 1.7 2003/06/30 19:50:31 dillon Exp $ */ #include "opt_compat.h" @@ -275,10 +275,7 @@ rtprio(register struct rtprio_args *uap) * that other processes need (and the idleprio process can't run * due to a CPU-bound normal process). Fix me! XXX */ -#if 0 if (RTP_PRIO_IS_REALTIME(rtp.type)) -#endif - if (rtp.type != RTP_PRIO_NORMAL) return (EPERM); } switch (rtp.type) { diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c index edc8f589eb..d1244f6bfd 100644 --- a/sys/kern/kern_sig.c +++ b/sys/kern/kern_sig.c @@ -37,7 +37,7 @@ * * @(#)kern_sig.c 8.7 (Berkeley) 4/18/94 * $FreeBSD: src/sys/kern/kern_sig.c,v 1.72.2.17 2003/05/16 16:34:34 obrien Exp $ - * $DragonFly: src/sys/kern/kern_sig.c,v 1.7 2003/06/27 01:53:25 dillon Exp $ + * $DragonFly: src/sys/kern/kern_sig.c,v 1.8 2003/06/30 19:50:31 dillon Exp $ */ #include "opt_compat.h" @@ -1076,7 +1076,7 @@ psignal(p, sig) */ if (prop & SA_STOP) { if (action != SIG_DFL) - goto runfast; + goto run; /* * If a child holding parent blocked, * stopping could cause deadlock. @@ -1090,7 +1090,7 @@ psignal(p, sig) stop(p); goto out; } else - goto runfast; + goto run; /*NOTREACHED*/ case SSTOP: @@ -1105,7 +1105,7 @@ psignal(p, sig) * Kill signal always sets processes running. */ if (sig == SIGKILL) - goto runfast; + goto run; if (prop & SA_CONT) { /* @@ -1121,10 +1121,10 @@ psignal(p, sig) if (action == SIG_DFL) SIGDELSET(p->p_siglist, sig); if (action == SIG_CATCH) - goto runfast; + goto run; if (p->p_wchan == 0) goto run; - p->p_stat = SSLEEP; + clrrunnable(p, SSLEEP); goto out; } @@ -1162,13 +1162,6 @@ psignal(p, sig) goto out; } /*NOTREACHED*/ - -runfast: - /* - * Raise priority to at least PUSER. - */ - if (p->p_priority > PUSER) - p->p_priority = PUSER; run: setrunnable(p); out: @@ -1225,8 +1218,7 @@ issignal(p) do { stop(p); mi_switch(); - } while (!trace_req(p) - && p->p_flag & P_TRACED); + } while (!trace_req(p) && p->p_flag & P_TRACED); /* * If parent wants us to take the signal, diff --git a/sys/kern/kern_subr.c b/sys/kern/kern_subr.c index 219b085f37..06d4931a38 100644 --- a/sys/kern/kern_subr.c +++ b/sys/kern/kern_subr.c @@ -37,7 +37,7 @@ * * @(#)kern_subr.c 8.3 (Berkeley) 1/21/94 * $FreeBSD: src/sys/kern/kern_subr.c,v 1.31.2.2 2002/04/21 08:09:37 bde Exp $ - * $DragonFly: src/sys/kern/kern_subr.c,v 1.5 2003/06/25 03:55:57 dillon Exp $ + * $DragonFly: src/sys/kern/kern_subr.c,v 1.6 2003/06/30 19:50:31 dillon Exp $ */ #include @@ -419,17 +419,3 @@ phashinit(elements, type, nentries) return (hashtbl); } -void -uio_yield() -{ - struct proc *p; - int s; - - p = curproc; - s = splhigh(); - p->p_priority = p->p_usrpri; - setrunqueue(p); - p->p_stats->p_ru.ru_nivcsw++; - mi_switch(); - splx(s); -} diff --git a/sys/kern/kern_switch.c b/sys/kern/kern_switch.c index d760cd5705..4c6a1589a4 100644 --- a/sys/kern/kern_switch.c +++ b/sys/kern/kern_switch.c @@ -24,7 +24,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/kern/kern_switch.c,v 1.3.2.1 2000/05/16 06:58:12 dillon Exp $ - * $DragonFly: src/sys/kern/Attic/kern_switch.c,v 1.3 2003/06/20 02:09:56 dillon Exp $ + * $DragonFly: src/sys/kern/Attic/kern_switch.c,v 1.4 2003/06/30 19:50:31 dillon Exp $ */ #include @@ -72,21 +72,51 @@ SYSINIT(runqueue, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, rqinit, NULL) /* * setrunqueue() examines a process priority and class and inserts it on * the tail of it's appropriate run queue (based on class and priority). - * This sets the queue busy bit. + * This sets the queue busy bit. If no user processes have been scheduled + * on the LWKT subsystem we schedule this one. + * * The process must be runnable. * This must be called at splhigh(). - * - * YYY setrunqueue() is responsible for assigning a cpu to a user - * process. If the LWKT thread corresponding to the rt, id, or normal - * queues is not running, it will be woken up. YYY */ void setrunqueue(struct proc *p) { struct rq *q; u_int8_t pri; + struct globaldata *gd; KASSERT(p->p_stat == SRUN, ("setrunqueue: proc not SRUN")); + + /* + * If we are already the designated current process just + * wakeup the thread. + */ + if (p->p_flag & P_CURPROC) { + KASSERT((p->p_flag & P_ONRUNQ) == 0, ("already on runq!")); + lwkt_schedule(p->p_thread); + return; + } + + /* + * If the process's cpu is not running any userland processes + * then schedule this one's thread. + */ + gd = p->p_thread->td_gd; + if (gd->gd_uprocscheduled == 0) { + gd->gd_uprocscheduled = 1; + p->p_flag |= P_CURPROC; + lwkt_schedule(p->p_thread); + KASSERT((p->p_flag & P_ONRUNQ) == 0, ("already on runq2!")); + return; + } + + KASSERT((p->p_flag & P_ONRUNQ) == 0, ("already on runq3!")); + p->p_flag |= P_ONRUNQ; + /* + * Otherwise place this process on the userland scheduler's run + * queue for action. + */ + if (p->p_rtprio.type == RTP_PRIO_NORMAL) { pri = p->p_priority >> 2; q = &queues[pri]; @@ -105,12 +135,17 @@ setrunqueue(struct proc *p) } p->p_rqindex = pri; /* remember the queue index */ TAILQ_INSERT_TAIL(q, p, p_procq); - lwkt_schedule(p->p_thread); } /* * remrunqueue() removes a given process from the run queue that it is on, - * clearing the queue busy bit if it becomes empty. + * clearing the queue busy bit if it becomes empty. This function is called + * when a userland process is selected for LWKT scheduling. Note that + * LWKT scheduling is an abstraction of 'curproc'.. there could very well be + * several userland processes whos threads are scheduled or otherwise in + * a special state, and such processes are NOT on the userland scheduler's + * run queue. + * * This must be called at splhigh(). */ void @@ -120,6 +155,8 @@ remrunqueue(struct proc *p) u_int32_t *which; u_int8_t pri; + KASSERT((p->p_flag & P_ONRUNQ) != 0, ("not on runq4!")); + p->p_flag &= ~P_ONRUNQ; pri = p->p_rqindex; if (p->p_rtprio.type == RTP_PRIO_NORMAL) { q = &queues[pri]; @@ -140,37 +177,11 @@ remrunqueue(struct proc *p) ("remrunqueue: remove from empty queue")); *which &= ~(1 << pri); } - lwkt_deschedule(p->p_thread); } -#if 0 /* - * procrunnable() returns a boolean true (non-zero) value if there are - * any runnable processes. This is intended to be called from the idle - * loop to avoid the more expensive (and destructive) chooseproc(). - * - * MP SAFE. CALLED WITHOUT THE MP LOCK - */ -u_int32_t -procrunnable(void) -{ - return (rtqueuebits || queuebits || idqueuebits); -} -#endif - -#if 0 -/* - * chooseproc() selects the next process to run. Ideally, cpu_switch() - * would have determined that there is a process available before calling - * this, but it is not a requirement. The selected process is removed - * from it's queue, and the queue busy bit is cleared if it becomes empty. - * This must be called at splhigh(). - * - * For SMP, trivial affinity is implemented by locating the first process - * on the queue that has a matching lastcpu id. Since normal priorities - * are mapped four priority levels per queue, this may allow the cpu to - * choose a slightly lower priority process in order to preserve the cpu - * caches. + * chooseproc() is called when a cpu needs a user process to LWKT schedule. + * chooseproc() will select a user process and return it. */ struct proc * chooseproc(void) @@ -179,9 +190,6 @@ chooseproc(void) struct rq *q; u_int32_t *which; u_int32_t pri; -#ifdef SMP - u_char id; -#endif if (rtqueuebits) { pri = ffs(rtqueuebits) - 1; @@ -200,22 +208,11 @@ chooseproc(void) } p = TAILQ_FIRST(q); KASSERT(p, ("chooseproc: no proc on busy queue")); -#ifdef SMP - /* wander down the current run queue for this pri level for a match */ - id = cpuid; - while (p->p_lastcpu != id) { - p = TAILQ_NEXT(p, p_procq); - if (p == NULL) { - p = TAILQ_FIRST(q); - break; - } - } -#endif TAILQ_REMOVE(q, p, p_procq); if (TAILQ_EMPTY(q)) *which &= ~(1 << pri); + KASSERT((p->p_flag & P_ONRUNQ) != 0, ("not on runq6!")); + p->p_flag &= ~P_ONRUNQ; return p; } -#endif - diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c index dfa37859eb..6df3b0b67a 100644 --- a/sys/kern/kern_synch.c +++ b/sys/kern/kern_synch.c @@ -37,7 +37,7 @@ * * @(#)kern_synch.c 8.9 (Berkeley) 5/19/95 * $FreeBSD: src/sys/kern/kern_synch.c,v 1.87.2.6 2002/10/13 07:29:53 kbyanc Exp $ - * $DragonFly: src/sys/kern/kern_synch.c,v 1.11 2003/06/29 07:37:06 dillon Exp $ + * $DragonFly: src/sys/kern/kern_synch.c,v 1.12 2003/06/30 19:50:31 dillon Exp $ */ #include "opt_ktrace.h" @@ -50,6 +50,7 @@ #include #include #include +#include #ifdef KTRACE #include #include @@ -63,7 +64,6 @@ static void sched_setup __P((void *dummy)); SYSINIT(sched_setup, SI_SUB_KICK_SCHEDULER, SI_ORDER_FIRST, sched_setup, NULL) -u_char curpriority; int hogticks; int lbolt; int sched_quantum; /* Roundrobin scheduling quantum in ticks. */ @@ -83,7 +83,6 @@ static fixpt_t cexp[3] = { 0.9944598480048967 * FSCALE, /* exp(-1/180) */ }; -static int curpriority_cmp __P((struct proc *p)); static void endtsleep __P((void *)); static void loadav __P((void *arg)); static void maybe_resched __P((struct proc *chk)); @@ -110,57 +109,36 @@ sysctl_kern_quantum(SYSCTL_HANDLER_ARGS) SYSCTL_PROC(_kern, OID_AUTO, quantum, CTLTYPE_INT|CTLFLAG_RW, 0, sizeof sched_quantum, sysctl_kern_quantum, "I", ""); -/*- - * Compare priorities. Return: - * <0: priority of p < current priority - * 0: priority of p == current priority - * >0: priority of p > current priority - * The priorities are the normal priorities or the normal realtime priorities - * if p is on the same scheduler as curproc. Otherwise the process on the - * more realtimeish scheduler has lowest priority. As usual, a higher - * priority really means a lower priority. - */ -static int -curpriority_cmp(p) - struct proc *p; -{ - int c_class, p_class; - - c_class = RTP_PRIO_BASE(curproc->p_rtprio.type); - p_class = RTP_PRIO_BASE(p->p_rtprio.type); - if (p_class != c_class) - return (p_class - c_class); - if (p_class == RTP_PRIO_NORMAL) - return (((int)p->p_priority - (int)curpriority) / PPQ); - return ((int)p->p_rtprio.prio - (int)curproc->p_rtprio.prio); -} - /* - * Arrange to reschedule if necessary, taking the priorities and - * schedulers into account. + * Arrange to reschedule if necessary by checking to see if the current + * process is on the highest priority user scheduling queue. This may + * be run from an interrupt so we have to follow any preemption chains + * back to the original process. */ static void -maybe_resched(chk) - struct proc *chk; +maybe_resched(struct proc *chk) { - struct proc *p = curproc; /* XXX */ + struct proc *cur = lwkt_preempted_proc(); + + if (cur == NULL) + return; /* - * XXX idle scheduler still broken because proccess stays on idle - * scheduler during waits (such as when getting FS locks). If a - * standard process becomes runaway cpu-bound, the system can lockup - * due to idle-scheduler processes in wakeup never getting any cpu. + * Check the user queue (realtime, normal, idle). Lower numbers + * indicate higher priority queues. Lower numbers are also better + * for p_priority. */ - if (p == NULL) { -#if 0 - need_resched(); -#endif - } else if (chk == p) { - /* We may need to yield if our priority has been raised. */ - if (curpriority_cmp(chk) > 0) - need_resched(); - } else if (curpriority_cmp(chk) < 0) + if (chk->p_rtprio.type < cur->p_rtprio.type) { need_resched(); + } else if (chk->p_rtprio.type == cur->p_rtprio.type) { + if (chk->p_rtprio.type == RTP_PRIO_NORMAL) { + if (chk->p_priority / PPQ < cur->p_priority / PPQ) + need_resched(); + } else { + if (chk->p_rtprio.prio < cur->p_rtprio.prio) + need_resched(); + } + } } int @@ -287,8 +265,7 @@ SYSCTL_INT(_kern, OID_AUTO, fscale, CTLFLAG_RD, 0, FSCALE, ""); */ /* ARGSUSED */ static void -schedcpu(arg) - void *arg; +schedcpu(void *arg) { fixpt_t loadfac = loadfactor(averunnable.ldavg[0]); struct proc *p; @@ -330,21 +307,6 @@ schedcpu(arg) p->p_cpticks = 0; p->p_estcpu = decay_cpu(loadfac, p->p_estcpu); resetpriority(p); - if (p->p_priority >= PUSER) { - if ((p != curp) && -#ifdef SMP - p->p_oncpu == 0xff && /* idle */ -#endif - p->p_stat == SRUN && - (p->p_flag & P_INMEM) && - (p->p_priority / PPQ) != (p->p_usrpri / PPQ)) { - remrunqueue(p); - p->p_priority = p->p_usrpri; - setrunqueue(p); - } else { - p->p_priority = p->p_usrpri; - } - } splx(s); } wakeup((caddr_t)&lbolt); @@ -357,15 +319,14 @@ schedcpu(arg) * least six times the loadfactor will decay p_estcpu to zero. */ static void -updatepri(p) - register struct proc *p; +updatepri(struct proc *p) { - register unsigned int newcpu = p->p_estcpu; - register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]); + unsigned int newcpu = p->p_estcpu; + fixpt_t loadfac = loadfactor(averunnable.ldavg[0]); - if (p->p_slptime > 5 * loadfac) + if (p->p_slptime > 5 * loadfac) { p->p_estcpu = 0; - else { + } else { p->p_slptime--; /* the first time was done in schedcpu */ while (newcpu && --p->p_slptime) newcpu = decay_cpu(loadfac, newcpu); @@ -415,6 +376,13 @@ sleepinit(void) * signal needs to be delivered, ERESTART is returned if the current system * call should be restarted if possible, and EINTR is returned if the system * call should be interrupted by the signal (return EINTR). + * + * If the process has P_CURPROC set mi_switch() will not re-queue it to + * the userland scheduler queues because we are in a SSLEEP state. If + * we are not the current process then we have to remove ourselves from + * the scheduler queues. + * + * YYY priority now unused */ int tsleep(ident, priority, wmesg, timo) @@ -449,12 +417,11 @@ tsleep(ident, priority, wmesg, timo) KASSERT(p == NULL || p->p_stat == SRUN, ("tsleep %p %s %d", ident, wmesg, p->p_stat)); + crit_enter(); td->td_wchan = ident; td->td_wmesg = wmesg; - if (p) { + if (p) p->p_slptime = 0; - p->p_priority = priority & PRIMASK; - } lwkt_deschedule_self(); TAILQ_INSERT_TAIL(&slpque[id], td, td_threadq); if (timo) @@ -472,29 +439,40 @@ tsleep(ident, priority, wmesg, timo) if (catch) { p->p_flag |= P_SINTR; if ((sig = CURSIG(p))) { - if (td->td_wchan) + if (td->td_wchan) { unsleep(td); + lwkt_schedule_self(); + } p->p_stat = SRUN; goto resume; } - if (p->p_wchan == 0) { + if (p->p_wchan == NULL) { catch = 0; goto resume; } } else { sig = 0; } - p->p_stat = SSLEEP; + + /* + * If we are not the current process we have to remove ourself + * from the run queue. + */ + KASSERT(p->p_stat == SRUN, ("PSTAT NOT SRUN %d %d", p->p_pid, p->p_stat)); + /* + * If this is the current 'user' process schedule another one. + */ + clrrunnable(p, SSLEEP); p->p_stats->p_ru.ru_nvcsw++; mi_switch(); + KASSERT(p->p_stat == SRUN, ("tsleep: stat not srun")); } else { lwkt_switch(); } resume: - if (p) { - curpriority = p->p_usrpri; + crit_exit(); + if (p) p->p_flag &= ~P_SINTR; - } splx(s); if (td->td_flags & TDF_TIMEOUT) { td->td_flags &= ~TDF_TIMEOUT; @@ -573,7 +551,6 @@ xsleep(struct xwait *w, int priority, const char *wmesg, int timo, int *gen) p->p_wchan = w; p->p_wmesg = wmesg; p->p_slptime = 0; - p->p_priority = priority & PRIMASK; p->p_flag |= P_XSLEEP; TAILQ_INSERT_TAIL(&w->waitq, p, p_procq); if (timo) @@ -590,8 +567,10 @@ xsleep(struct xwait *w, int priority, const char *wmesg, int timo, int *gen) if (catch) { p->p_flag |= P_SINTR; if ((sig = CURSIG(p))) { - if (p->p_wchan) + if (p->p_wchan) { unsleep(p); + lwkt_schedule_self(); + } p->p_stat = SRUN; goto resume; } @@ -599,13 +578,13 @@ xsleep(struct xwait *w, int priority, const char *wmesg, int timo, int *gen) catch = 0; goto resume; } - } else + } else { sig = 0; - p->p_stat = SSLEEP; + } + clrrunnable(p, SSLEEP); p->p_stats->p_ru.ru_nvcsw++; mi_switch(); resume: - curpriority = p->p_usrpri; *gen = w->gen; /* update generation number */ splx(s); p->p_flag &= ~P_SINTR; @@ -785,6 +764,43 @@ wakeup_one(void *ident) _wakeup(ident, 1); } +/* + * Release the P_CURPROC designation on a process in order to allow the + * userland scheduler to schedule another one. This places a runnable + * process back on the userland scheduler's run queue. + * + * Note that losing P_CURPROC does not effect LWKT scheduling, you can + * still tsleep/wakeup after having lost P_CURPROC, but userret() will + * not return to user mode until it gets it back. + */ +static __inline +void +_relscurproc(struct proc *p) +{ + struct proc *np; + + if (p->p_flag & P_CURPROC) { + p->p_flag &= ~P_CURPROC; + lwkt_deschedule_self(); + if (p->p_stat == SRUN && (p->p_flag & P_INMEM)) { + setrunqueue(p); + } + if ((np = chooseproc()) != NULL) { + np->p_flag |= P_CURPROC; + lwkt_schedule(np->p_thread); + } else { + KKASSERT(mycpu->gd_uprocscheduled == 1); + mycpu->gd_uprocscheduled = 0; + } + } +} + +void +relscurproc(struct proc *p) +{ + _relscurproc(p); +} + /* * The machine independent parts of mi_switch(). * Must be called at splstatclock() or higher. @@ -818,6 +834,14 @@ mi_switch() x = splstatclock(); clear_resched(); + /* + * If the process being switched out is the 'current' process then + * we have to lose the P_CURPROC designation and choose a new + * process. If the process is not being LWKT managed and it is in + * SRUN we have to setrunqueue it. + */ + _relscurproc(p); + #ifdef SIMPLELOCK_DEBUG if (p->p_simple_locks) printf("sleep: holding simple lock\n"); @@ -853,7 +877,6 @@ mi_switch() */ cnt.v_swtch++; lwkt_switch(); - remrunqueue(p); splx(x); } @@ -893,27 +916,95 @@ setrunnable(struct proc *p) if ((p->p_flag & P_INMEM) == 0) { p->p_flag |= P_SWAPINREQ; wakeup((caddr_t)&proc0); - } - else + } else { maybe_resched(p); + } +} + +/* + * Change the process state to NOT be runnable, removing it from the run + * queue. If P_CURPROC is not set and we are in SRUN the process is on the + * run queue (If P_INMEM is not set then it isn't because it is swapped). + */ +void +clrrunnable(struct proc *p, int stat) +{ + int s; + + s = splhigh(); + switch(p->p_stat) { + case SRUN: + if ((p->p_flag & (P_INMEM|P_CURPROC)) == P_INMEM) + remrunqueue(p); + break; + default: + break; + } + p->p_stat = stat; + splx(s); +} + +/* + * yield / synchronous reschedule + * + * Simply calling mi_switch() has the effect we want. mi_switch will + * deschedule the current thread, make sure the current process is on + * the run queue, and then choose and reschedule another process. + */ +void +uio_yield() +{ + struct proc *p = curproc; + int s; + + s = splhigh(); +#if 0 + KKASSERT(p->p_stat == SRUN); + if ((p->p_flag & (P_INMEM|P_CURPROC)) == (P_INMEM|P_CURPROC)) + setrunqueue(p); + lwkt_deschedule_self(); +#endif + p->p_stats->p_ru.ru_nivcsw++; + mi_switch(); + splx(s); } /* * Compute the priority of a process when running in user mode. * Arrange to reschedule if the resulting priority is better * than that of the current process. + * + * YYY real time / idle procs do not use p_priority XXX */ void -resetpriority(p) - register struct proc *p; +resetpriority(struct proc *p) { - register unsigned int newpriority; - - if (p->p_rtprio.type == RTP_PRIO_NORMAL) { - newpriority = PUSER + p->p_estcpu / INVERSE_ESTCPU_WEIGHT + - NICE_WEIGHT * p->p_nice; - newpriority = min(newpriority, MAXPRI); - p->p_usrpri = newpriority; + unsigned int newpriority; + int opq; + int npq; + + if (p->p_rtprio.type != RTP_PRIO_NORMAL) + return; + newpriority = PUSER + p->p_estcpu / INVERSE_ESTCPU_WEIGHT + + NICE_WEIGHT * p->p_nice; + newpriority = min(newpriority, MAXPRI); + opq = p->p_priority / PPQ; + npq = newpriority / PPQ; + if (p->p_stat == SRUN && (p->p_flag & (P_CURPROC|P_INMEM)) == P_INMEM + && opq != npq) { + /* + * We have to move the process to another queue + */ + remrunqueue(p); + p->p_priority = newpriority; + setrunqueue(p); + } else { + /* + * Not on a queue or is on the same queue, we can just + * set the priority. + * YYY P_INMEM? + */ + p->p_priority = newpriority; } maybe_resched(p); } @@ -986,9 +1077,6 @@ schedclock(p) p->p_cpticks++; p->p_estcpu = ESTCPULIM(p->p_estcpu + 1); - if ((p->p_estcpu % INVERSE_ESTCPU_WEIGHT) == 0) { + if ((p->p_estcpu % INVERSE_ESTCPU_WEIGHT) == 0) resetpriority(p); - if (p->p_priority >= PUSER) - p->p_priority = p->p_usrpri; - } } diff --git a/sys/kern/kern_sysctl.c b/sys/kern/kern_sysctl.c index 7d667240fd..426502fe2c 100644 --- a/sys/kern/kern_sysctl.c +++ b/sys/kern/kern_sysctl.c @@ -38,7 +38,7 @@ * * @(#)kern_sysctl.c 8.4 (Berkeley) 4/14/94 * $FreeBSD: src/sys/kern/kern_sysctl.c,v 1.92.2.9 2003/05/01 22:48:09 trhodes Exp $ - * $DragonFly: src/sys/kern/kern_sysctl.c,v 1.5 2003/06/29 06:48:31 dillon Exp $ + * $DragonFly: src/sys/kern/kern_sysctl.c,v 1.6 2003/06/30 19:50:31 dillon Exp $ */ #include "opt_compat.h" @@ -1096,7 +1096,7 @@ sysctl_root(SYSCTL_HANDLER_ARGS) return (EPERM); /* Most likely only root can write */ - if (!(oid->oid_kind & CTLFLAG_ANYBODY) && p && + if (!(oid->oid_kind & CTLFLAG_ANYBODY) && req->newptr && p && (error = suser_cred(p->p_ucred, (oid->oid_kind & CTLFLAG_PRISON) ? PRISON_ROOT : 0))) return (error); diff --git a/sys/kern/kern_threads.c b/sys/kern/kern_threads.c index 61d8d41fd7..a84d9abf37 100644 --- a/sys/kern/kern_threads.c +++ b/sys/kern/kern_threads.c @@ -47,7 +47,7 @@ * and I certainly make no claims as to its fitness for *any* purpose. * * $FreeBSD: src/sys/kern/kern_threads.c,v 1.15 1999/08/28 00:46:15 peter Exp $ - * $DragonFly: src/sys/kern/kern_threads.c,v 1.3 2003/06/23 17:55:41 dillon Exp $ + * $DragonFly: src/sys/kern/kern_threads.c,v 1.4 2003/06/30 19:50:31 dillon Exp $ */ #include @@ -56,6 +56,7 @@ #include #include #include +#include /* uio_yield() fixme */ /* * Low level support for sleep/wakeup paradigm @@ -147,17 +148,9 @@ int yield(struct yield_args *uap) { struct proc *p = curproc; - int s; p->p_retval[0] = 0; - - s = splhigh(); - p->p_priority = MAXPRI; - setrunqueue(p); - p->p_stats->p_ru.ru_nvcsw++; - mi_switch(); - splx(s); - + uio_yield(); return(0); } diff --git a/sys/kern/lwkt_thread.c b/sys/kern/lwkt_thread.c index c4e72c3719..88f12ec35c 100644 --- a/sys/kern/lwkt_thread.c +++ b/sys/kern/lwkt_thread.c @@ -27,7 +27,7 @@ * thread scheduler, which means that generally speaking we only need * to use a critical section to prevent hicups. * - * $DragonFly: src/sys/kern/lwkt_thread.c,v 1.11 2003/06/29 07:37:06 dillon Exp $ + * $DragonFly: src/sys/kern/lwkt_thread.c,v 1.12 2003/06/30 19:50:31 dillon Exp $ */ #include @@ -62,6 +62,8 @@ static quad_t preempt_hit = 0; SYSCTL_QUAD(_lwkt, OID_AUTO, preempt_hit, CTLFLAG_RW, &preempt_hit, 0, ""); static quad_t preempt_miss = 0; SYSCTL_QUAD(_lwkt, OID_AUTO, preempt_miss, CTLFLAG_RW, &preempt_miss, 0, ""); +static quad_t preempt_weird = 0; +SYSCTL_QUAD(_lwkt, OID_AUTO, preempt_weird, CTLFLAG_RW, &preempt_weird, 0, ""); /* * These helper procedures handle the runq, they can only be called from @@ -92,6 +94,14 @@ _lwkt_enqueue(thread_t td) td->td_flags |= TDF_RUNQ; TAILQ_INSERT_TAIL(&gd->gd_tdrunq[nq], td, td_threadq); gd->gd_runqmask |= 1 << nq; +#if 0 + /* + * YYY needs cli/sti protection? gd_reqpri set by interrupt + * when made pending. need better mechanism. + */ + if (gd->gd_reqpri < (td->td_pri & TDPRI_MASK)) + gd->gd_reqpri = (td->td_pri & TDPRI_MASK); +#endif } } @@ -134,8 +144,8 @@ lwkt_alloc_thread(struct thread *td) void *stack; int flags = 0; - crit_enter(); if (td == NULL) { + crit_enter(); if (mycpu->gd_tdfreecount > 0) { --mycpu->gd_tdfreecount; td = TAILQ_FIRST(&mycpu->gd_tdfreeq); @@ -156,7 +166,7 @@ lwkt_alloc_thread(struct thread *td) stack = (void *)kmem_alloc(kernel_map, UPAGES * PAGE_SIZE); flags |= TDF_ALLOCATED_STACK; } - lwkt_init_thread(td, stack, flags); + lwkt_init_thread(td, stack, flags, mycpu); return(td); } @@ -167,11 +177,13 @@ lwkt_alloc_thread(struct thread *td) * NOTE! called from low level boot code, we cannot do anything fancy! */ void -lwkt_init_thread(thread_t td, void *stack, int flags) +lwkt_init_thread(thread_t td, void *stack, int flags, struct globaldata *gd) { bzero(td, sizeof(struct thread)); td->td_kstack = stack; td->td_flags |= flags; + td->td_gd = gd; + td->td_pri = TDPRI_CRIT; pmap_init_thread(td); } @@ -229,18 +241,19 @@ lwkt_switch(void) thread_t ntd; if (mycpu->gd_intr_nesting_level && td->td_preempted == NULL) - panic("lwkt_switch: cannot switch from within an interrupt\n"); + panic("lwkt_switch: cannot switch from within an interrupt, yet\n"); crit_enter(); ++switch_count; if ((ntd = td->td_preempted) != NULL) { /* * We had preempted another thread on this cpu, resume the preempted - * thread. + * thread. This occurs transparently, whether the preempted thread + * was scheduled or not (it may have been preempted after descheduling + * itself). */ - td->td_preempted = NULL; - td->td_pri -= TDPRI_CRIT; - ntd->td_flags &= ~TDF_PREEMPTED; + KKASSERT(ntd->td_flags & TDF_PREEMPT_LOCK); + ntd->td_flags |= TDF_PREEMPT_DONE; } else { /* * Priority queue / round-robin at each priority. Note that user @@ -264,48 +277,69 @@ again: ntd = gd->gd_idletd; } } + KASSERT(ntd->td_pri >= TDPRI_CRIT, + ("priority problem in lwkt_switch %d %d", td->td_pri, ntd->td_pri)); if (td != ntd) td->td_switch(ntd); crit_exit(); } /* - * The target thread preempts the current thread. The target thread - * structure must be stable and preempt-safe (e.g. an interrupt thread). - * When the target thread blocks the current thread will be resumed. + * Request that the target thread preempt the current thread. This only + * works if: * - * XXX the target runs in a critical section so it does not open the original - * thread up to additional interrupts that the original thread believes it - * is blocking. + * + We aren't trying to preempt ourselves (it can happen!) + * + We are not currently being preempted + * + the target is not currently being preempted * - * Normal kernel threads should not preempt other normal kernel threads - * as it breaks the assumptions kernel threads are allowed to make. Note - * that preemption does not mess around with the current thread's RUNQ - * state. + * XXX at the moment we run the target thread in a critical section during + * the preemption in order to prevent the target from taking interrupts + * that *WE* can't. Preemption is strictly limited to interrupt threads + * and interrupt-like threads, outside of a critical section, and the + * preempted source thread will be resumed the instant the target blocks + * whether or not the source is scheduled (i.e. preemption is supposed to + * be as transparent as possible). * * This call is typically made from an interrupt handler like sched_ithd() * which will only run if the current thread is not in a critical section, * so we optimize the priority check a bit. + * + * CAREFUL! either we or the target thread may get interrupted during the + * switch. */ void lwkt_preempt(struct thread *ntd, int id) { struct thread *td = curthread; - crit_enter(); /* YYY token */ - if (ntd->td_preempted == NULL && - (ntd->td_pri & TDPRI_MASK) > (td->td_pri & TDPRI_MASK) - ) { + /* + * The caller has put us in a critical section, and in order to have + * gotten here in the first place the thread the caller interrupted + * cannot have been in a critical section before. + */ + KASSERT(ntd->td_pri >= TDPRI_CRIT, ("BADCRIT0 %d", ntd->td_pri)); + KASSERT((td->td_pri & ~TDPRI_MASK) == TDPRI_CRIT, ("BADPRI %d", td->td_pri)); + + if (td == ntd || ((td->td_flags | ntd->td_flags) & TDF_PREEMPT_LOCK)) { + ++preempt_weird; + return; + } + if (ntd->td_preempted) { ++preempt_hit; - ntd->td_preempted = td; - td->td_flags |= TDF_PREEMPTED; - ntd->td_pri += TDPRI_CRIT; - while (td->td_flags & TDF_PREEMPTED) - ntd->td_switch(ntd); - } else { + return; + } + if ((ntd->td_pri & TDPRI_MASK) <= (td->td_pri & TDPRI_MASK)) { ++preempt_miss; + return; } - crit_exit_noyield(); + + ++preempt_hit; + ntd->td_preempted = td; + td->td_flags |= TDF_PREEMPT_LOCK; + td->td_switch(ntd); + KKASSERT(ntd->td_preempted && (td->td_flags & TDF_PREEMPT_DONE)); + ntd->td_preempted = NULL; + td->td_flags &= ~(TDF_PREEMPT_LOCK|TDF_PREEMPT_DONE); } /* @@ -385,6 +419,8 @@ lwkt_schedule_self(void) crit_enter(); KASSERT(td->td_wait == NULL, ("lwkt_schedule_self(): td_wait not NULL!")); _lwkt_enqueue(td); + if (td->td_proc && td->td_proc->p_stat == SSLEEP) + panic("SCHED SELF PANIC"); crit_exit(); } @@ -404,6 +440,19 @@ lwkt_schedule_self(void) void lwkt_schedule(thread_t td) { + if ((td->td_flags & TDF_PREEMPT_LOCK) == 0 && td->td_proc + && td->td_proc->p_stat == SSLEEP + ) { + printf("PANIC schedule curtd = %p (%d %d) target %p (%d %d)\n", + curthread, + curthread->td_proc ? curthread->td_proc->p_pid : -1, + curthread->td_proc ? curthread->td_proc->p_stat : -1, + td, + td->td_proc ? curthread->td_proc->p_pid : -1, + td->td_proc ? curthread->td_proc->p_stat : -1 + ); + panic("SCHED PANIC"); + } crit_enter(); if (td == curthread) { _lwkt_enqueue(td); @@ -515,11 +564,29 @@ lwkt_deschedule(thread_t td) * continuous priority changes. Yield if you want to switch. * * We have to retain the critical section count which uses the high bits - * of the td_pri field. + * of the td_pri field. The specified priority may also indicate zero or + * more critical sections by adding TDPRI_CRIT*N. */ void lwkt_setpri(thread_t td, int pri) { + KKASSERT(pri >= 0); + crit_enter(); + if (td->td_flags & TDF_RUNQ) { + _lwkt_dequeue(td); + td->td_pri = (td->td_pri & ~TDPRI_MASK) + pri; + _lwkt_enqueue(td); + } else { + td->td_pri = (td->td_pri & ~TDPRI_MASK) + pri; + } + crit_exit(); +} + +void +lwkt_setpri_self(int pri) +{ + thread_t td = curthread; + KKASSERT(pri >= 0 && pri <= TDPRI_MAX); crit_enter(); if (td->td_flags & TDF_RUNQ) { @@ -769,6 +836,16 @@ kthread_create(void (*func)(void *), void *arg, return 0; } +void +crit_panic(void) +{ + struct thread *td = curthread; + int lpri = td->td_pri; + + td->td_pri = 0; + panic("td_pri is/would-go negative! %p %d", td, lpri); +} + /* * Destroy an LWKT thread. Warning! This function is not called when * a process exits, cpu_proc_exit() directly calls cpu_thread_exit() and diff --git a/sys/platform/pc32/i386/exception.s b/sys/platform/pc32/i386/exception.s index acf2ff893a..f9b85bcc4f 100644 --- a/sys/platform/pc32/i386/exception.s +++ b/sys/platform/pc32/i386/exception.s @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/i386/exception.s,v 1.65.2.3 2001/08/15 01:23:49 peter Exp $ - * $DragonFly: src/sys/platform/pc32/i386/exception.s,v 1.7 2003/06/29 03:28:42 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/i386/exception.s,v 1.8 2003/06/30 19:50:30 dillon Exp $ */ #include "npx.h" @@ -312,13 +312,18 @@ IDTVEC(int0x80_syscall) movl $1,_intr_nesting_level jmp _doreti +/* + * This function is what cpu_heavy_restore jumps to after a new process + * is created. We are in a critical section in order to prevent + * cpu_heavy_restore from being interrupted (especially since it stores + * its context in a static place!), so the first thing we do is release + * the critical section. + */ ENTRY(fork_trampoline) + movl _curthread,%eax + subl $TDPRI_CRIT,TD_PRI(%eax) call _spl0 - - movl _curthread,%eax /* YYY heavy weight process must */ - pushl TD_PROC(%eax) /* YYY remove itself from runq because */ - call remrunqueue /* LWKT restore func doesn't do that */ - addl $4,%esp + call _splz /* * cpu_set_fork_handler intercepts this function call to diff --git a/sys/platform/pc32/i386/machdep.c b/sys/platform/pc32/i386/machdep.c index 207df58cc3..dea7d87848 100644 --- a/sys/platform/pc32/i386/machdep.c +++ b/sys/platform/pc32/i386/machdep.c @@ -36,7 +36,7 @@ * * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 * $FreeBSD: src/sys/i386/i386/machdep.c,v 1.385.2.30 2003/05/31 08:48:05 alc Exp $ - * $DragonFly: src/sys/platform/pc32/i386/machdep.c,v 1.17 2003/06/29 07:37:03 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/i386/machdep.c,v 1.18 2003/06/30 19:50:30 dillon Exp $ */ #include "apm.h" @@ -944,6 +944,9 @@ cpu_halt(void) * (unless you want to blow things up!). Instead we look for runnable threads * and loop or halt as appropriate. Giant is not held on entry to the thread. * + * The main loop is entered with a critical section held, we must release + * the critical section before doing anything else. + * * Note on cpu_idle_hlt: On an SMP system this may cause the system to * halt until the next clock tick, even if a thread is ready YYY */ @@ -954,6 +957,7 @@ SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_hlt, CTLFLAG_RW, void cpu_idle(void) { + crit_exit(); for (;;) { lwkt_switch(); __asm __volatile("cli"); @@ -1826,12 +1830,14 @@ init386(int first) */ gd = &CPU_prvspace[0].mdglobaldata; - lwkt_init_thread(&thread0, proc0paddr, 0); + lwkt_init_thread(&thread0, proc0paddr, 0, &gd->mi); gd->mi.gd_curthread = &thread0; safepri = thread0.td_cpl = SWI_MASK | HWI_MASK; thread0.td_switch = cpu_heavy_switch; /* YYY eventually LWKT */ proc0.p_addr = (void *)thread0.td_kstack; proc0.p_thread = &thread0; + proc0.p_flag |= P_CURPROC; + gd->mi.gd_uprocscheduled = 1; thread0.td_proc = &proc0; atdevbase = ISA_HOLE_START + KERNBASE; @@ -2050,7 +2056,7 @@ cpu_gdinit(struct mdglobaldata *gd, int cpu) gd->mi.gd_idletd = &gd->gd_idlethread; sp = gd->mi.gd_prvspace->idlestack; - lwkt_init_thread(&gd->gd_idlethread, sp, 0); + lwkt_init_thread(&gd->gd_idlethread, sp, 0, &gd->mi); gd->gd_idlethread.td_switch = cpu_lwkt_switch; gd->gd_idlethread.td_sp -= sizeof(void *); *(void **)gd->gd_idlethread.td_sp = cpu_idle_restore; diff --git a/sys/platform/pc32/i386/swtch.s b/sys/platform/pc32/i386/swtch.s index c834e43765..107562acad 100644 --- a/sys/platform/pc32/i386/swtch.s +++ b/sys/platform/pc32/i386/swtch.s @@ -35,7 +35,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/i386/swtch.s,v 1.89.2.10 2003/01/23 03:36:24 ps Exp $ - * $DragonFly: src/sys/platform/pc32/i386/swtch.s,v 1.16 2003/06/29 03:28:42 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/i386/swtch.s,v 1.17 2003/06/30 19:50:30 dillon Exp $ */ #include "npx.h" @@ -249,6 +249,7 @@ ENTRY(cpu_exit_switch) * * YYY note: spl check is done in mi_switch when it splx()'s. */ + ENTRY(cpu_heavy_restore) /* interrupts are disabled */ movl TD_PCB(%eax),%edx @@ -425,18 +426,6 @@ cpu_switch_load_gs: popl %ebx movl %eax,%dr7 1: -#if 0 - /* - * Remove the heavy weight process from the heavy weight queue. - * this will also have the side effect of removing the thread from - * the run queue. YYY temporary? - * - * LWKT threads stay on the run queue until explicitly removed. - */ - pushl %ecx - call remrunqueue - addl $4,%esp -#endif sti /* XXX */ ret @@ -555,10 +544,14 @@ ENTRY(cpu_idle_restore) * don't die. This restore function is used to bootstrap into an * LWKT based kernel thread only. cpu_lwkt_switch() will be used * after this. + * + * Since all of our context is on the stack we are reentrant and + * we can release our critical section and enable interrupts early. */ ENTRY(cpu_kthread_restore) movl TD_PCB(%eax),%ebx movl $0,%ebp + subl $TDPRI_CRIT,TD_PRI(%eax) sti popl %edx /* kthread exit function */ pushl PCB_EBX(%ebx) /* argument to ESI function */ @@ -571,6 +564,9 @@ ENTRY(cpu_kthread_restore) * * Standard LWKT switching function. Only non-scratch registers are * saved and we don't bother with the MMU state or anything else. + * + * This function is always called while in a critical section. + * * YYY BGL, SPL */ ENTRY(cpu_lwkt_switch) @@ -589,8 +585,14 @@ ENTRY(cpu_lwkt_switch) ret /* - * cpu_idle_restore() (current thread in %eax on entry) + * cpu_lwkt_restore() (current thread in %eax on entry) * + * Standard LWKT restore function. This function is always called + * while in a critical section. + * + * Warning: due to preemption the restore function can be used to + * 'return' to the original thread. Interrupt disablement must be + * protected through the switch so we cannot run splz here. */ ENTRY(cpu_lwkt_restore) popfl @@ -598,14 +600,5 @@ ENTRY(cpu_lwkt_restore) popl %esi popl %ebx popl %ebp - cmpl $0,_intr_nesting_level /* don't stack too deeply */ - jne 2f - testl _ipending,%ecx - jnz 1f - testl _fpending,%ecx - jz 2f -1: - call splz /* execute unmasked ints */ -2: ret diff --git a/sys/platform/pc32/i386/trap.c b/sys/platform/pc32/i386/trap.c index c2b4c6bb72..3716a25349 100644 --- a/sys/platform/pc32/i386/trap.c +++ b/sys/platform/pc32/i386/trap.c @@ -36,7 +36,7 @@ * * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 * $FreeBSD: src/sys/i386/i386/trap.c,v 1.147.2.11 2003/02/27 19:09:59 luoqi Exp $ - * $DragonFly: src/sys/platform/pc32/i386/trap.c,v 1.11 2003/06/29 03:28:42 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/i386/trap.c,v 1.12 2003/06/30 19:50:30 dillon Exp $ */ /* @@ -144,9 +144,6 @@ static char *trap_msg[] = { "machine check trap", /* 28 T_MCHK */ }; -static __inline int userret __P((struct proc *p, struct trapframe *frame, - u_quad_t oticks, int have_mplock)); - #if defined(I586_CPU) && !defined(NO_F00F_HACK) extern int has_f00f_bug; #endif @@ -160,46 +157,105 @@ static int panic_on_nmi = 1; SYSCTL_INT(_machdep, OID_AUTO, panic_on_nmi, CTLFLAG_RW, &panic_on_nmi, 0, "Panic on NMI"); -static __inline int -userret(p, frame, oticks, have_mplock) - struct proc *p; - struct trapframe *frame; - u_quad_t oticks; - int have_mplock; +/* + * USER->KERNEL transition. Do not transition us out of userland from the + * point of view of the userland scheduler unless we actually have to + * switch. + * + * usertdsw is called from within a critical section. + */ +static void +usertdsw(struct thread *ntd) +{ + struct thread *td = curthread; + + td->td_switch = cpu_heavy_switch; + lwkt_setpri_self(TDPRI_KERN_USER); +#if 0 + /* + * This is where we might want to catch the P_CURPROC designation + * and fix it for *any* switchout rather then just an mi_switch() + * switchout (move from mi_switch()?) YYY + */ + if (p->p_flag & P_CURPROC) { + ... + } +#endif + td->td_switch(ntd); +} + +/* + * Note that userenter() may be re-entered several times due to AST + * processing. + */ +static __inline void +userenter(void) +{ + struct thread *td = curthread; + + KKASSERT(td->td_switch == cpu_heavy_switch || + td->td_switch == usertdsw); + td->td_switch = usertdsw; +} + +static int +userret(struct proc *p, struct trapframe *frame, + u_quad_t oticks, int have_mplock) { int sig, s; - struct thread *td; + struct thread *td = curthread; + /* + * Post any pending signals + */ + crit_enter(); while ((sig = CURSIG(p)) != 0) { if (have_mplock == 0) { get_mplock(); have_mplock = 1; } + crit_exit(); postsig(sig); + crit_enter(); } - p->p_priority = p->p_usrpri; + /* + * Set our priority properly and restore our switch function + */ + if (td->td_switch == cpu_heavy_switch) { + switch(p->p_rtprio.type) { + case RTP_PRIO_IDLE: + lwkt_setpri_self(TDPRI_USER_IDLE); + break; + case RTP_PRIO_REALTIME: + case RTP_PRIO_FIFO: + lwkt_setpri_self(TDPRI_USER_REAL); + break; + default: + lwkt_setpri_self(TDPRI_USER_NORM); + break; + } + } else { + KKASSERT(td->td_switch == usertdsw); + td->td_switch = cpu_heavy_switch; + } + crit_exit(); + + /* + * If a reschedule has been requested we call chooseproc() to locate + * the next runnable process. When we wakeup from that we check + * for pending signals again. + */ if (resched_wanted()) { - /* - * Since we are curproc, clock will normally just change - * our priority without moving us from one queue to another - * (since the running process is not on a queue.) - * If that happened after we setrunqueue ourselves but before we - * mi_switch()'ed, we might not be on the queue indicated by - * our priority. - */ + uio_yield(); if (have_mplock == 0) { get_mplock(); have_mplock = 1; } - s = splhigh(); - setrunqueue(p); - p->p_stats->p_ru.ru_nivcsw++; - mi_switch(); - splx(s); while ((sig = CURSIG(p)) != 0) postsig(sig); } + /* * Charge system time if profiling. */ @@ -208,11 +264,24 @@ userret(p, frame, oticks, have_mplock) get_mplock(); have_mplock = 1; } - td = curthread; addupc_task(p, frame->tf_eip, - (u_int)(td->td_sticks - oticks) * psratio); + (u_int)(curthread->td_sticks - oticks) * psratio); } - curpriority = p->p_priority; + + /* + * In order to return to userland we need to be the designated + * current (user) process on this cpu, aka P_CURPROC. The + * setrunqueue() call could make us the current process. + */ + s = splhigh(); + while ((p->p_flag & P_CURPROC) == 0) { + p->p_stats->p_ru.ru_nivcsw++; + lwkt_deschedule_self(); + mi_switch(); + } + splx(s); + KKASSERT(mycpu->gd_uprocscheduled == 1); + return(have_mplock); } @@ -325,6 +394,8 @@ restart: if ((ISPL(frame.tf_cs) == SEL_UPL) || (frame.tf_eflags & PSL_VM)) { /* user trap */ + userenter(); + sticks = curthread->td_sticks; p->p_md.md_regs = &frame; @@ -1097,9 +1168,11 @@ syscall2(frame) /* * access non-atomic field from critical section. p_sticks is - * updated by the clock interrupt. + * updated by the clock interrupt. Also use this opportunity + * to raise our LWKT priority. */ crit_enter(); + userenter(); sticks = curthread->td_sticks; crit_exit(); diff --git a/sys/platform/pc32/i386/vm_machdep.c b/sys/platform/pc32/i386/vm_machdep.c index 87975c8126..3dee221cdd 100644 --- a/sys/platform/pc32/i386/vm_machdep.c +++ b/sys/platform/pc32/i386/vm_machdep.c @@ -39,7 +39,7 @@ * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91 * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$ * $FreeBSD: src/sys/i386/i386/vm_machdep.c,v 1.132.2.9 2003/01/25 19:02:23 dillon Exp $ - * $DragonFly: src/sys/platform/pc32/i386/vm_machdep.c,v 1.14 2003/06/29 03:28:42 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/i386/vm_machdep.c,v 1.15 2003/06/30 19:50:30 dillon Exp $ */ #include "npx.h" @@ -608,7 +608,7 @@ vm_page_zero_idle() return (0); } -void +static void swi_vm(void *arg) { if (busdma_swi_pending != 0) diff --git a/sys/platform/pc32/icu/icu_vector.s b/sys/platform/pc32/icu/icu_vector.s index 7758983c9e..2891130b2a 100644 --- a/sys/platform/pc32/icu/icu_vector.s +++ b/sys/platform/pc32/icu/icu_vector.s @@ -1,7 +1,7 @@ /* * from: vector.s, 386BSD 0.1 unknown origin * $FreeBSD: src/sys/i386/isa/icu_vector.s,v 1.14.2.2 2000/07/18 21:12:42 dfr Exp $ - * $DragonFly: src/sys/platform/pc32/icu/icu_vector.s,v 1.8 2003/06/29 07:37:06 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/icu/icu_vector.s,v 1.9 2003/06/30 19:50:31 dillon Exp $ */ /* @@ -165,11 +165,9 @@ IDTVEC(vec_name) ; \ .text ; \ SUPERALIGN_TEXT ; \ IDTVEC(vec_name) ; \ - incl _intr_nesting_level ; \ - pushl %ebp ; /* frame for ddb backtrace */ \ - movl %esp, %ebp ; \ + pushl %ebp ; \ + movl %esp,%ebp ; \ PUSH_DUMMY ; \ - andl $~IRQ_LBIT(irq_num),_fpending ; \ pushl intr_unit + (irq_num) * 4 ; \ call *intr_handler + (irq_num) * 4 ; \ addl $4, %esp ; \ @@ -179,7 +177,6 @@ IDTVEC(vec_name) ; \ UNMASK_IRQ(icu, irq_num) ; \ POP_DUMMY ; \ popl %ebp ; \ - decl _intr_nesting_level ; \ ret ; \ /* @@ -232,12 +229,15 @@ IDTVEC(vec_name) ; \ movl $TDPRI_CRIT,_reqpri ; \ jmp 5f ; \ 2: ; \ + addl $TDPRI_CRIT,TD_PRI(%ebx) ; \ /* set running bit, clear pending bit, run handler */ \ orl $IRQ_LBIT(irq_num),_irunning ; \ andl $~IRQ_LBIT(irq_num),_ipending ; \ + sti ; \ pushl $irq_num ; \ call _sched_ithd ; \ addl $4,%esp ; \ + subl $TDPRI_CRIT,TD_PRI(%ebx) ; \ incl _cnt+V_INTR ; /* book-keeping YYY make per-cpu */ \ movl intr_countp + (irq_num) * 4,%eax ; \ incl (%eax) ; \ diff --git a/sys/platform/pc32/isa/icu_vector.s b/sys/platform/pc32/isa/icu_vector.s index 29bc2e274b..e20808f6ea 100644 --- a/sys/platform/pc32/isa/icu_vector.s +++ b/sys/platform/pc32/isa/icu_vector.s @@ -1,7 +1,7 @@ /* * from: vector.s, 386BSD 0.1 unknown origin * $FreeBSD: src/sys/i386/isa/icu_vector.s,v 1.14.2.2 2000/07/18 21:12:42 dfr Exp $ - * $DragonFly: src/sys/platform/pc32/isa/Attic/icu_vector.s,v 1.8 2003/06/29 07:37:06 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/isa/Attic/icu_vector.s,v 1.9 2003/06/30 19:50:31 dillon Exp $ */ /* @@ -165,11 +165,9 @@ IDTVEC(vec_name) ; \ .text ; \ SUPERALIGN_TEXT ; \ IDTVEC(vec_name) ; \ - incl _intr_nesting_level ; \ - pushl %ebp ; /* frame for ddb backtrace */ \ - movl %esp, %ebp ; \ + pushl %ebp ; \ + movl %esp,%ebp ; \ PUSH_DUMMY ; \ - andl $~IRQ_LBIT(irq_num),_fpending ; \ pushl intr_unit + (irq_num) * 4 ; \ call *intr_handler + (irq_num) * 4 ; \ addl $4, %esp ; \ @@ -179,7 +177,6 @@ IDTVEC(vec_name) ; \ UNMASK_IRQ(icu, irq_num) ; \ POP_DUMMY ; \ popl %ebp ; \ - decl _intr_nesting_level ; \ ret ; \ /* @@ -232,12 +229,15 @@ IDTVEC(vec_name) ; \ movl $TDPRI_CRIT,_reqpri ; \ jmp 5f ; \ 2: ; \ + addl $TDPRI_CRIT,TD_PRI(%ebx) ; \ /* set running bit, clear pending bit, run handler */ \ orl $IRQ_LBIT(irq_num),_irunning ; \ andl $~IRQ_LBIT(irq_num),_ipending ; \ + sti ; \ pushl $irq_num ; \ call _sched_ithd ; \ addl $4,%esp ; \ + subl $TDPRI_CRIT,TD_PRI(%ebx) ; \ incl _cnt+V_INTR ; /* book-keeping YYY make per-cpu */ \ movl intr_countp + (irq_num) * 4,%eax ; \ incl (%eax) ; \ diff --git a/sys/platform/pc32/isa/intr_machdep.c b/sys/platform/pc32/isa/intr_machdep.c index 3cddbb46f1..84bd45814d 100644 --- a/sys/platform/pc32/isa/intr_machdep.c +++ b/sys/platform/pc32/isa/intr_machdep.c @@ -35,7 +35,7 @@ * * from: @(#)isa.c 7.2 (Berkeley) 5/13/91 * $FreeBSD: src/sys/i386/isa/intr_machdep.c,v 1.29.2.5 2001/10/14 06:54:27 luigi Exp $ - * $DragonFly: src/sys/platform/pc32/isa/intr_machdep.c,v 1.3 2003/06/29 03:28:43 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/isa/intr_machdep.c,v 1.4 2003/06/30 19:50:31 dillon Exp $ */ /* * This file contains an aggregated module marked: @@ -111,12 +111,23 @@ #define NR_INTRNAMES (1 + ICU_LEN + 2 * ICU_LEN) -u_long *intr_countp[ICU_LEN]; -inthand2_t *intr_handler[ICU_LEN]; -u_int intr_mask[ICU_LEN]; -int intr_mihandler_installed[ICU_LEN]; -static u_int* intr_mptr[ICU_LEN]; -void *intr_unit[ICU_LEN]; +static inthand2_t isa_strayintr; + +u_long *intr_countp[ICU_LEN*2]; +inthand2_t *intr_handler[ICU_LEN*2] = { + isa_strayintr, isa_strayintr, isa_strayintr, isa_strayintr, + isa_strayintr, isa_strayintr, isa_strayintr, isa_strayintr, + isa_strayintr, isa_strayintr, isa_strayintr, isa_strayintr, + isa_strayintr, isa_strayintr, isa_strayintr, isa_strayintr, + isa_strayintr, isa_strayintr, isa_strayintr, isa_strayintr, + isa_strayintr, isa_strayintr, isa_strayintr, isa_strayintr, + isa_strayintr, isa_strayintr, isa_strayintr, isa_strayintr, + isa_strayintr, isa_strayintr, isa_strayintr, isa_strayintr, +}; +u_int intr_mask[ICU_LEN*2]; +int intr_mihandler_installed[ICU_LEN*2]; +static u_int* intr_mptr[ICU_LEN*2]; +void *intr_unit[ICU_LEN*2]; static inthand_t *fastintr[ICU_LEN] = { &IDTVEC(fastintr0), &IDTVEC(fastintr1), @@ -167,8 +178,6 @@ static inthand_t *slowintr[ICU_LEN] = { #endif /* APIC_IO */ }; -static inthand2_t isa_strayintr; - #ifdef PC98 #define NMI_PARITY 0x04 #define NMI_EPARITY 0x02 @@ -318,8 +327,7 @@ isa_defaultirq() * Caught a stray interrupt, notify */ static void -isa_strayintr(vcookiep) - void *vcookiep; +isa_strayintr(void *vcookiep) { int intr = (void **)vcookiep - &intr_unit[0]; @@ -340,6 +348,7 @@ isa_strayintr(vcookiep) * must be done before sending an EOI so it can't be done if * we are using AUTO_EOI_1. */ + printf("STRAY %d\n", intr); if (intrcnt[1 + intr] <= 5) log(LOG_ERR, "stray irq %d\n", intr); if (intrcnt[1 + intr] == 5) @@ -906,12 +915,6 @@ inthand_remove(intrec *idesc) return (0); } -void -call_fast_unpend(int irq) -{ - fastunpend[irq](); -} - /* * ithread_done() * @@ -919,7 +922,10 @@ call_fast_unpend(int irq) * processing a loop. We interlock with ipending and irunning. If * a new interrupt is pending for the thread the function clears the * pending bit and returns. If no new interrupt is pending we - * deschedule and sleep. + * deschedule and sleep. If we reschedule and return we have to + * disable the interrupt again or it will keep interrupting us. + * + * See kern/kern_intr.c for more information. */ void ithread_done(int irq) @@ -927,21 +933,22 @@ ithread_done(int irq) struct mdglobaldata *gd = mdcpu; int mask = 1 << irq; - crit_enter(); + KKASSERT(curthread->td_pri >= TDPRI_CRIT); INTREN(mask); if (gd->gd_ipending & mask) { atomic_clear_int(&gd->gd_ipending, mask); + INTRDIS(mask); lwkt_schedule_self(); } else { lwkt_deschedule_self(); if (gd->gd_ipending & mask) { /* race */ atomic_clear_int(&gd->gd_ipending, mask); + INTRDIS(mask); lwkt_schedule_self(); } else { atomic_clear_int(&gd->gd_irunning, mask); lwkt_switch(); } } - crit_exit(); } diff --git a/sys/platform/pc32/isa/ipl.s b/sys/platform/pc32/isa/ipl.s index 8d32c1c06b..4040f2c42f 100644 --- a/sys/platform/pc32/isa/ipl.s +++ b/sys/platform/pc32/isa/ipl.s @@ -37,7 +37,7 @@ * @(#)ipl.s * * $FreeBSD: src/sys/i386/isa/ipl.s,v 1.32.2.3 2002/05/16 16:03:56 bde Exp $ - * $DragonFly: src/sys/platform/pc32/isa/ipl.s,v 1.4 2003/06/29 03:28:43 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/isa/ipl.s,v 1.5 2003/06/30 19:50:31 dillon Exp $ */ @@ -68,6 +68,10 @@ _soft_imask: .long SWI_MASK _softnet_imask: .long SWI_NET_MASK .globl _softtty_imask _softtty_imask: .long SWI_TTY_MASK + .globl last_splz +last_splz: .long 0 + .globl last_splz2 +last_splz2: .long 0 .text @@ -103,10 +107,10 @@ doreti_next: testl _ipending,%ecx jne doreti_intr testl $AST_PENDING,_astpending /* any pending ASTs? */ - je 2f + jz 2f testl $PSL_VM,TF_EFLAGS(%esp) jz 1f - cmpl $1,_in_vm86call + cmpl $1,_in_vm86call /* YYY make per 'cpu' */ jnz doreti_ast 1: testb $SEL_RPL_MASK,TF_CS(%esp) @@ -203,15 +207,21 @@ doreti_ast: * SPLZ() a C callable procedure to dispatch any unmasked pending * interrupts regardless of critical section nesting. ASTs * are not dispatched. + * + * YYY at the moment I leave us in a critical section so as + * not to have to mess with the cpls which will soon be obsolete. */ SUPERALIGN_TEXT ENTRY(splz) + pushfl pushl %ebx movl _curthread,%ebx movl TD_CPL(%ebx),%eax + addl $TDPRI_CRIT,TD_PRI(%ebx) splz_next: + cli movl %eax,%ecx /* ecx = ~CPL */ notl %ecx testl _fpending,%ecx /* check for an unmasked fast int */ @@ -223,7 +233,9 @@ splz_next: testl _ipending,%ecx jne splz_intr + subl $TDPRI_CRIT,TD_PRI(%ebx) popl %ebx + popfl ret /* @@ -235,9 +247,12 @@ splz_fast: bsfl %ecx, %ecx /* locate the next dispatchable int */ btrl %ecx, _fpending /* is it really still pending? */ jnc splz_next + movl $1,last_splz + movl %ecx,last_splz2 pushl %eax call *_fastunpend(,%ecx,4) popl %eax + movl $-1,last_splz jmp splz_next /* @@ -249,11 +264,15 @@ splz_intr: bsfl %ecx, %ecx /* locate the next dispatchable int */ btrl %ecx, _ipending /* is it really still pending? */ jnc splz_next + sti + movl $2,last_splz pushl %eax pushl %ecx + movl %ecx,last_splz2 call _sched_ithd /* YYY must pull in imasks */ addl $4,%esp popl %eax + movl $-2,last_splz jmp splz_next /* diff --git a/sys/sys/globaldata.h b/sys/sys/globaldata.h index 9052983cc9..1724032574 100644 --- a/sys/sys/globaldata.h +++ b/sys/sys/globaldata.h @@ -24,7 +24,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/include/globaldata.h,v 1.11.2.1 2000/05/16 06:58:10 dillon Exp $ - * $DragonFly: src/sys/sys/globaldata.h,v 1.3 2003/06/29 07:37:07 dillon Exp $ + * $DragonFly: src/sys/sys/globaldata.h,v 1.4 2003/06/30 19:50:32 dillon Exp $ */ #ifndef _SYS_GLOBALDATA_H_ @@ -50,6 +50,10 @@ * further checks are necessary. Interrupts are typically managed on a * per-processor basis at least until you leave a critical section, but * may then be scheduled to other cpus. + * + * gd_uprocscheduled indicates that the thread for a user process has been + * scheduled. It is used to schedule only one user process at a time in + * the LWKT subsystem. */ #ifndef _SYS_TIME_H_ @@ -71,6 +75,7 @@ struct globaldata { struct timeval gd_stattv; int gd_intr_nesting_level; /* (for fast interrupts) */ int gd_astpending; /* sorta MD but easier here */ + int gd_uprocscheduled; /* extended by */ }; diff --git a/sys/sys/proc.h b/sys/sys/proc.h index a313e1ab79..7a461f8be1 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -37,7 +37,7 @@ * * @(#)proc.h 8.15 (Berkeley) 5/19/95 * $FreeBSD: src/sys/sys/proc.h,v 1.99.2.9 2003/06/06 20:21:32 tegge Exp $ - * $DragonFly: src/sys/sys/proc.h,v 1.18 2003/06/28 02:36:44 dillon Exp $ + * $DragonFly: src/sys/sys/proc.h,v 1.19 2003/06/30 19:50:32 dillon Exp $ */ #ifndef _SYS_PROC_H_ @@ -203,8 +203,7 @@ struct proc { sigset_t p_sigmask; /* Current signal mask. */ stack_t p_sigstk; /* sp & on stack state variable */ - u_char p_priority; /* Process priority. */ - u_char p_usrpri; /* User-priority based on p_cpu and p_nice. */ + u_char p_priority; /* Tracks user sched queue */ char p_nice; /* Process "nice" value. */ char p_comm[MAXCOMLEN+1]; @@ -256,7 +255,7 @@ struct proc { #define P_SINTR 0x00080 /* Sleep is interruptible. */ #define P_SUGID 0x00100 /* Had set id privileges since last exec. */ #define P_SYSTEM 0x00200 /* System proc: no sigs, stats or swapping. */ -#define P_UNUSED00400 0x00400 /* (was Timing out during sleep.) */ +#define P_CURPROC 0x00400 /* 'Current process' on this cpu */ #define P_TRACED 0x00800 /* Debugged process being traced. */ #define P_WAITED 0x01000 /* Debugging process has waited for child. */ #define P_WEXIT 0x02000 /* Working on exiting. */ @@ -273,7 +272,7 @@ struct proc { #define P_SWAPINREQ 0x80000 /* Swapin request due to wakeup */ /* Marked a kernel thread */ -#define P_UNUSED100000 0x100000 +#define P_ONRUNQ 0x100000 /* LWKT scheduled (== not on user sched q) */ #define P_KTHREADP 0x200000 /* Process is really a kernel thread */ #define P_XSLEEP 0x400000 /* process sitting on xwait_t structure */ @@ -400,11 +399,13 @@ int inferior __P((struct proc *p)); int leavepgrp __P((struct proc *p)); void mi_switch __P((void)); void procinit __P((void)); +void relscurproc(struct proc *curp); int p_trespass __P((struct ucred *cr1, struct ucred *cr2)); void resetpriority __P((struct proc *)); int roundrobin_interval __P((void)); void schedclock __P((struct proc *)); void setrunnable __P((struct proc *)); +void clrrunnable __P((struct proc *, int stat)); void setrunqueue __P((struct proc *)); void sleepinit __P((void)); int suser __P((struct thread *td)); diff --git a/sys/sys/thread.h b/sys/sys/thread.h index e23f20d000..2f3c533808 100644 --- a/sys/sys/thread.h +++ b/sys/sys/thread.h @@ -4,7 +4,7 @@ * Implements the architecture independant portion of the LWKT * subsystem. * - * $DragonFly: src/sys/sys/thread.h,v 1.15 2003/06/29 07:37:07 dillon Exp $ + * $DragonFly: src/sys/sys/thread.h,v 1.16 2003/06/30 19:50:32 dillon Exp $ */ #ifndef _SYS_THREAD_H_ @@ -127,6 +127,7 @@ struct thread { TAILQ_ENTRY(thread) td_threadq; struct proc *td_proc; /* (optional) associated process */ struct pcb *td_pcb; /* points to pcb and top of kstack */ + struct globaldata *td_gd; /* associated with this cpu */ const char *td_wmesg; /* string name for blockage */ void *td_wchan; /* waiting on channel */ int td_cpu; /* cpu owning the thread */ @@ -153,7 +154,9 @@ struct thread { */ #define TDF_EXITED 0x0001 /* thread finished exiting */ #define TDF_RUNQ 0x0002 /* on run queue */ -#define TDF_PREEMPTED 0x0004 /* thread is currently preempted */ +#define TDF_PREEMPT_LOCK 0x0004 /* I have been preempted */ +#define TDF_PREEMPT_DONE 0x0008 /* acknowledge preemption complete */ + #define TDF_ALLOCATED_THREAD 0x0200 /* zalloc allocated thread */ #define TDF_ALLOCATED_STACK 0x0400 /* zalloc allocated stack */ #define TDF_VERBOSE 0x0800 /* verbose on exit */ @@ -177,6 +180,7 @@ struct thread { #define TDPRI_USER_NORM 6 /* user scheduler normal */ #define TDPRI_USER_REAL 8 /* user scheduler real time */ #define TDPRI_KERN_USER 10 /* kernel / block in syscall */ +#define TDPRI_KERN_DAEMON 12 /* kernel daemon (pageout, etc) */ #define TDPRI_SOFT_NORM 14 /* kernel / normal */ #define TDPRI_SOFT_TIMER 16 /* kernel / timer */ #define TDPRI_EXITING 19 /* exiting thread */ @@ -196,7 +200,8 @@ struct thread { extern struct vm_zone *thread_zone; extern struct thread *lwkt_alloc_thread(struct thread *template); -extern void lwkt_init_thread(struct thread *td, void *stack, int flags); +extern void lwkt_init_thread(struct thread *td, void *stack, int flags, + struct globaldata *gd); extern void lwkt_free_thread(struct thread *td); extern void lwkt_init_wait(struct lwkt_wait *w); extern void lwkt_gdinit(struct globaldata *gd); @@ -220,6 +225,8 @@ extern void lwkt_shlock(lwkt_rwlock_t lock, const char *wmesg); extern void lwkt_exunlock(lwkt_rwlock_t lock); extern void lwkt_shunlock(lwkt_rwlock_t lock); extern void lwkt_setpri(thread_t td, int pri); +extern void lwkt_setpri_self(int pri); +extern void crit_panic(void); extern struct proc *lwkt_preempted_proc(void); diff --git a/sys/sys/thread2.h b/sys/sys/thread2.h index 9efb554a87..9f1d8e59bc 100644 --- a/sys/sys/thread2.h +++ b/sys/sys/thread2.h @@ -8,7 +8,7 @@ * on a different cpu will not be immediately scheduled by a yield() on * this cpu. * - * $DragonFly: src/sys/sys/thread2.h,v 1.3 2003/06/29 07:37:07 dillon Exp $ + * $DragonFly: src/sys/sys/thread2.h,v 1.4 2003/06/30 19:50:32 dillon Exp $ */ #ifndef _SYS_THREAD2_H_ @@ -27,7 +27,11 @@ static __inline void crit_enter(void) { - curthread->td_pri += TDPRI_CRIT; + struct thread *td = curthread; + + if (td->td_pri < 0) + crit_panic(); + td->td_pri += TDPRI_CRIT; } static __inline void @@ -36,7 +40,8 @@ crit_exit_noyield(void) thread_t td = curthread; td->td_pri -= TDPRI_CRIT; - KASSERT(td->td_pri >= 0, ("crit_exit nesting error")); + if (td->td_pri < 0) + crit_panic(); } static __inline void @@ -45,35 +50,12 @@ crit_exit(void) thread_t td = curthread; td->td_pri -= TDPRI_CRIT; - KASSERT(td->td_pri >= 0, ("crit_exit nesting error")); + if (td->td_pri < 0) + crit_panic(); if (td->td_pri < mycpu->gd_reqpri) lwkt_yield_quick(); } -#if 0 -static __inline int -lwkt_raisepri(int pri) -{ - int opri = curthread->td_pri; - if (opri < pri) - curthread->td_pri = pri; - return(opri); -} - -static __inline int -lwkt_lowerpri(int pri) -{ - thread_t td = curthread; - int opri = td->td_pri; - if (opri > pri) { - td->td_pri = pri; - if (pri < mycpu->gd_reqpri) - lwkt_yield_quick(); - } - return(opri); -} -#endif - static __inline int lwkt_havetoken(lwkt_token_t tok) { diff --git a/sys/vfs/specfs/spec_vnops.c b/sys/vfs/specfs/spec_vnops.c index 1090cc8a9e..3f297b5604 100644 --- a/sys/vfs/specfs/spec_vnops.c +++ b/sys/vfs/specfs/spec_vnops.c @@ -32,7 +32,7 @@ * * @(#)spec_vnops.c 8.14 (Berkeley) 5/21/95 * $FreeBSD: src/sys/miscfs/specfs/spec_vnops.c,v 1.131.2.4 2001/02/26 04:23:20 jlemon Exp $ - * $DragonFly: src/sys/vfs/specfs/spec_vnops.c,v 1.7 2003/06/26 05:55:16 dillon Exp $ + * $DragonFly: src/sys/vfs/specfs/spec_vnops.c,v 1.8 2003/06/30 19:50:32 dillon Exp $ */ #include @@ -722,8 +722,9 @@ spec_getpages(ap) s = splbio(); /* We definitely need to be at splbio here. */ - while ((bp->b_flags & B_DONE) == 0) + while ((bp->b_flags & B_DONE) == 0) { tsleep(bp, PVM, "spread", 0); + } splx(s); diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c index 5e80ea7cae..dc3f634575 100644 --- a/sys/vm/vm_glue.c +++ b/sys/vm/vm_glue.c @@ -60,7 +60,7 @@ * rights to redistribute these changes. * * $FreeBSD: src/sys/vm/vm_glue.c,v 1.94.2.4 2003/01/13 22:51:17 dillon Exp $ - * $DragonFly: src/sys/vm/vm_glue.c,v 1.8 2003/06/29 03:28:46 dillon Exp $ + * $DragonFly: src/sys/vm/vm_glue.c,v 1.9 2003/06/30 19:50:32 dillon Exp $ */ #include "opt_vm.h" @@ -241,6 +241,7 @@ vm_fork(p1, p2, flags) td2 = lwkt_alloc_thread(NULL); pmap_init_proc(p2, td2); + lwkt_setpri(td2, TDPRI_KERN_USER); up = p2->p_addr; @@ -466,18 +467,18 @@ retry: continue; /* - * Do not swapout a process waiting on a critical - * event of some kind. Also guarantee swap_idle_threshold1 - * time in memory. + * YYY do not swapout a proc waiting on a critical + * event. + * + * Guarentee swap_idle_threshold time in memory */ - if (((p->p_priority & 0x7f) < PSOCK) || - (p->p_slptime < swap_idle_threshold1)) + if (p->p_slptime < swap_idle_threshold1) continue; /* - * If the system is under memory stress, or if we are swapping - * idle processes >= swap_idle_threshold2, then swap the process - * out. + * If the system is under memory stress, or if we + * are swapping idle processes >= swap_idle_threshold2, + * then swap the process out. */ if (((action & VM_SWAP_NORMAL) == 0) && (((action & VM_SWAP_IDLE) == 0) || diff --git a/sys/vm/vm_meter.c b/sys/vm/vm_meter.c index 30c23495e9..e0a4391827 100644 --- a/sys/vm/vm_meter.c +++ b/sys/vm/vm_meter.c @@ -32,7 +32,7 @@ * * @(#)vm_meter.c 8.4 (Berkeley) 1/4/94 * $FreeBSD: src/sys/vm/vm_meter.c,v 1.34.2.7 2002/10/10 19:28:22 dillon Exp $ - * $DragonFly: src/sys/vm/vm_meter.c,v 1.2 2003/06/17 04:29:00 dillon Exp $ + * $DragonFly: src/sys/vm/vm_meter.c,v 1.3 2003/06/30 19:50:32 dillon Exp $ */ #include @@ -112,6 +112,7 @@ vmtotal(SYSCTL_HANDLER_ARGS) case SSLEEP: case SSTOP: if (p->p_flag & P_INMEM) { + /* YYY p_priority */ if (p->p_priority <= PZERO) totalp->t_dw++; else if (p->p_slptime < maxslp) -- 2.41.0