From 8a8d5d85f4fb81fc5b1dd42da281ec8c7c569880 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Sun, 6 Jul 2003 21:23:56 +0000 Subject: [PATCH] MP Implementation 1/2: Get the APIC code working again, sweetly integrate the MP lock into the LWKT scheduler, replace the old simplelock code with tokens or spin locks as appropriate. In particular, the vnode interlock (and most other interlocks) are now tokens. Also clean up a few curproc/cred sequences that are no longer needed. The APs are left in degenerate state with non IPI interrupts disabled as additional LWKT work must be done before we can really make use of them, and FAST interrupts are not managed by the MP lock yet. The main thing for this stage was to get the system working with an APIC again. buildworld tested on UP and 2xCPU/MP (Dell 2550) --- sys/bus/isa/i386/isa_dma.c | 6 +- sys/conf/files.alpha | 4 +- sys/conf/files.i386 | 4 +- sys/conf/files.pc98 | 4 +- sys/cpu/i386/include/cpufunc.h | 30 +- sys/dev/drm/drm_dma.h | 5 +- sys/dev/drm/drm_os_freebsd.h | 10 +- sys/dev/raid/aac/aac.c | 6 +- sys/dev/raid/aac/aacvar.h | 10 +- sys/dev/serial/sio/sio.c | 102 +++-- sys/dev/sound/isa/i386/spkr/spkr.c | 6 +- sys/i386/apic/apic_ipl.s | 411 ++------------------ sys/i386/apic/apic_vector.s | 531 +++++++++++++------------- sys/i386/apic/mpapic.c | 27 +- sys/i386/i386/autoconf.c | 6 +- sys/i386/i386/db_interface.c | 10 +- sys/i386/i386/exception.s | 15 +- sys/i386/i386/genassym.c | 11 +- sys/i386/i386/i686_mem.c | 7 +- sys/i386/i386/identcpu.c | 26 +- sys/i386/i386/initcpu.c | 26 +- sys/i386/i386/k6_mem.c | 7 +- sys/i386/i386/machdep.c | 71 +++- sys/i386/i386/mp_machdep.c | 298 ++++++--------- sys/i386/i386/mpapic.c | 27 +- sys/i386/i386/mpboot.s | 73 ++-- sys/i386/i386/mplock.s | 398 +++++-------------- sys/i386/i386/perfmon.c | 14 +- sys/i386/i386/pmap.c | 54 +-- sys/i386/i386/simplelock.s | 321 ---------------- sys/i386/i386/spinlock.s | 110 ++++++ sys/i386/i386/swtch.s | 18 +- sys/i386/i386/trap.c | 130 +++---- sys/i386/i386/vm86.c | 10 +- sys/i386/i386/vm86bios.s | 8 +- sys/i386/include/apic.h | 330 +++++++++++----- sys/i386/include/cpufunc.h | 30 +- sys/i386/include/lock.h | 266 +++++++------ sys/i386/include/smp.h | 36 +- sys/i386/include/smptests.h | 27 +- sys/i386/isa/apic_ipl.s | 411 ++------------------ sys/i386/isa/apic_vector.s | 531 +++++++++++++------------- sys/i386/isa/clock.c | 50 +-- sys/i386/isa/intr_machdep.c | 6 +- sys/i386/isa/intr_machdep.h | 5 +- sys/i386/isa/npx.c | 26 +- sys/kern/imgact_elf.c | 6 +- sys/kern/init_main.c | 20 +- sys/kern/kern_exit.c | 6 +- sys/kern/kern_lock.c | 171 ++------- sys/kern/kern_synch.c | 46 ++- sys/kern/lwkt_thread.c | 141 ++++++- sys/kern/subr_bus.c | 12 +- sys/kern/subr_prf.c | 8 +- sys/kern/subr_rman.c | 58 ++- sys/kern/uipc_socket.c | 6 +- sys/kern/vfs_aio.c | 4 +- sys/kern/vfs_bio.c | 4 +- sys/kern/vfs_conf.c | 8 +- sys/kern/vfs_default.c | 12 +- sys/kern/vfs_subr.c | 312 ++++++++------- sys/kern/vfs_syscalls.c | 60 +-- sys/kern/vfs_vnops.c | 6 +- sys/netinet/in_pcb.c | 8 +- sys/netproto/smb/smb_iod.c | 10 +- sys/netproto/smb/smb_rq.c | 4 +- sys/netproto/smb/smb_subr.c | 16 +- sys/netproto/smb/smb_subr.h | 14 +- sys/opencrypto/crypto.c | 38 +- sys/platform/pc32/apic/apic_ipl.s | 411 ++------------------ sys/platform/pc32/apic/apic_vector.s | 531 +++++++++++++------------- sys/platform/pc32/apic/mpapic.c | 27 +- sys/platform/pc32/i386/autoconf.c | 6 +- sys/platform/pc32/i386/db_interface.c | 10 +- sys/platform/pc32/i386/exception.s | 15 +- sys/platform/pc32/i386/genassym.c | 11 +- sys/platform/pc32/i386/i686_mem.c | 7 +- sys/platform/pc32/i386/identcpu.c | 26 +- sys/platform/pc32/i386/initcpu.c | 26 +- sys/platform/pc32/i386/k6_mem.c | 7 +- sys/platform/pc32/i386/machdep.c | 71 +++- sys/platform/pc32/i386/mp_machdep.c | 298 ++++++--------- sys/platform/pc32/i386/mpapic.c | 27 +- sys/platform/pc32/i386/mpboot.s | 73 ++-- sys/platform/pc32/i386/mplock.s | 398 +++++-------------- sys/platform/pc32/i386/perfmon.c | 14 +- sys/platform/pc32/i386/pmap.c | 54 +-- sys/platform/pc32/i386/simplelock.s | 321 ---------------- sys/platform/pc32/i386/spinlock.s | 110 ++++++ sys/platform/pc32/i386/swtch.s | 18 +- sys/platform/pc32/i386/trap.c | 130 +++---- sys/platform/pc32/i386/vm86.c | 10 +- sys/platform/pc32/i386/vm86bios.s | 8 +- sys/platform/pc32/include/apic.h | 330 +++++++++++----- sys/platform/pc32/include/lock.h | 266 +++++++------ sys/platform/pc32/include/smp.h | 36 +- sys/platform/pc32/include/smptests.h | 27 +- sys/platform/pc32/isa/apic_ipl.s | 411 ++------------------ sys/platform/pc32/isa/apic_vector.s | 531 +++++++++++++------------- sys/platform/pc32/isa/clock.c | 50 +-- sys/platform/pc32/isa/intr_machdep.c | 6 +- sys/platform/pc32/isa/intr_machdep.h | 5 +- sys/platform/pc32/isa/npx.c | 26 +- sys/platform/vkernel/i386/genassym.c | 11 +- sys/sys/buf.h | 4 +- sys/sys/buf2.h | 10 +- sys/sys/lock.h | 32 +- sys/sys/mount.h | 6 +- sys/sys/proc.h | 8 +- sys/sys/rman.h | 4 +- sys/sys/signalvar.h | 6 +- sys/sys/thread.h | 18 +- sys/sys/thread2.h | 17 +- sys/sys/vmmeter.h | 7 +- sys/sys/vnode.h | 21 +- sys/vfs/deadfs/dead_vnops.c | 4 +- sys/vfs/gnu/ext2fs/ext2_vfsops.c | 48 ++- sys/vfs/hpfs/hpfs.h | 6 +- sys/vfs/hpfs/hpfs_hash.c | 38 +- sys/vfs/hpfs/hpfs_vfsops.c | 4 +- sys/vfs/hpfs/hpfs_vnops.c | 4 +- sys/vfs/isofs/cd9660/cd9660_node.c | 26 +- sys/vfs/mfs/mfs_vfsops.c | 3 +- sys/vfs/msdosfs/msdosfs_denode.c | 28 +- sys/vfs/msdosfs/msdosfs_vfsops.c | 31 +- sys/vfs/msdosfs/msdosfs_vnops.c | 11 +- sys/vfs/nfs/nfs_nqlease.c | 10 +- sys/vfs/ntfs/ntfs_ihash.c | 20 +- sys/vfs/ntfs/ntfs_inode.h | 4 +- sys/vfs/ntfs/ntfs_subr.c | 16 +- sys/vfs/ntfs/ntfs_vfsops.c | 6 +- sys/vfs/nullfs/null_vnops.c | 4 +- sys/vfs/nwfs/nwfs_node.c | 5 +- sys/vfs/nwfs/nwfs_vnops.c | 19 +- sys/vfs/smbfs/smbfs.h | 4 +- sys/vfs/ufs/ffs_vfsops.c | 53 +-- sys/vfs/ufs/ufs_ihash.c | 40 +- sys/vfs/ufs/ufs_inode.c | 4 +- sys/vfs/ufs/ufs_lookup.c | 4 +- sys/vfs/ufs/ufs_quota.c | 21 +- sys/vfs/ufs/ufs_readwrite.c | 4 +- sys/vfs/ufs/ufs_vnops.c | 22 +- sys/vm/vm_map.c | 42 +- sys/vm/vm_map.h | 10 +- sys/vm/vm_object.c | 20 +- sys/vm/vm_zone.c | 168 +++----- sys/vm/vm_zone.h | 11 +- sys/vm/vnode_pager.c | 6 +- 148 files changed, 4271 insertions(+), 6350 deletions(-) delete mode 100644 sys/i386/i386/simplelock.s create mode 100644 sys/i386/i386/spinlock.s delete mode 100644 sys/platform/pc32/i386/simplelock.s create mode 100644 sys/platform/pc32/i386/spinlock.s diff --git a/sys/bus/isa/i386/isa_dma.c b/sys/bus/isa/i386/isa_dma.c index bc0bacc2db..cba7a2216e 100644 --- a/sys/bus/isa/i386/isa_dma.c +++ b/sys/bus/isa/i386/isa_dma.c @@ -35,7 +35,7 @@ * * from: @(#)isa.c 7.2 (Berkeley) 5/13/91 * $FreeBSD: src/sys/i386/isa/isa_dma.c,v 1.4.2.1 2000/08/08 19:49:53 peter Exp $ - * $DragonFly: src/sys/bus/isa/i386/isa_dma.c,v 1.2 2003/06/17 04:28:37 dillon Exp $ + * $DragonFly: src/sys/bus/isa/i386/isa_dma.c,v 1.3 2003/07/06 21:23:49 dillon Exp $ */ /* @@ -448,14 +448,14 @@ isa_dmastatus(int chan) waport = DMA2_CHN(chan - 4) + 2; } - disable_intr(); /* no interrupts Mr Jones! */ + cpu_disable_intr(); /* YYY *//* no interrupts Mr Jones! */ outb(ffport, 0); /* clear register LSB flipflop */ low1 = inb(waport); high1 = inb(waport); outb(ffport, 0); /* clear again */ low2 = inb(waport); high2 = inb(waport); - enable_intr(); /* enable interrupts again */ + cpu_enable_intr(); /* enable interrupts again */ /* * Now decide if a wrap has tried to skew our results. diff --git a/sys/conf/files.alpha b/sys/conf/files.alpha index cba0f5e129..28c493e861 100644 --- a/sys/conf/files.alpha +++ b/sys/conf/files.alpha @@ -2,7 +2,7 @@ # files marked standard are always included. # # $FreeBSD: src/sys/conf/files.alpha,v 1.43.2.9 2002/11/21 23:45:37 sam Exp $ -# $DragonFly: src/sys/conf/Attic/files.alpha,v 1.2 2003/06/17 04:28:19 dillon Exp $ +# $DragonFly: src/sys/conf/Attic/files.alpha,v 1.3 2003/07/06 21:23:45 dillon Exp $ # # The long compile-with and dependency lines are required because of # limitations in config: backslash-newline doesn't work in strings, and @@ -68,7 +68,7 @@ alpha/alpha/perfmon.c optional perfmon profiling-routine alpha/alpha/perfmon.c optional perfmon alpha/alpha/pmap.c standard alpha/alpha/procfs_machdep.c standard -alpha/alpha/simplelock.s optional smp +alpha/alpha/spinlock.s standard alpha/alpha/support.s standard alpha/alpha/swtch.s standard alpha/alpha/sys_machdep.c standard diff --git a/sys/conf/files.i386 b/sys/conf/files.i386 index 585a061c2e..59db07914b 100644 --- a/sys/conf/files.i386 +++ b/sys/conf/files.i386 @@ -2,7 +2,7 @@ # files marked standard are always included. # # $FreeBSD: src/sys/conf/files.i386,v 1.307.2.38 2003/01/02 20:41:33 kan Exp $ -# $DragonFly: src/sys/conf/Attic/files.i386,v 1.2 2003/06/17 04:28:19 dillon Exp $ +# $DragonFly: src/sys/conf/Attic/files.i386,v 1.3 2003/07/06 21:23:45 dillon Exp $ # # The long compile-with and dependency lines are required because of # limitations in config: backslash-newline doesn't work in strings, and @@ -206,7 +206,7 @@ i386/i386/perfmon.c optional perfmon i386/i386/perfmon.c optional perfmon profiling-routine i386/i386/pmap.c standard i386/i386/procfs_machdep.c standard -i386/i386/simplelock.s optional smp +i386/i386/spinlock.s standard i386/i386/support.s standard i386/i386/swtch.s standard i386/i386/sys_machdep.c standard diff --git a/sys/conf/files.pc98 b/sys/conf/files.pc98 index 839fd0dcc1..1b541494b7 100644 --- a/sys/conf/files.pc98 +++ b/sys/conf/files.pc98 @@ -4,7 +4,7 @@ # modified for PC-9801 # # $FreeBSD: src/sys/conf/files.pc98,v 1.140.2.44 2003/02/10 13:11:50 nyan Exp $ -# $DragonFly: src/sys/conf/Attic/files.pc98,v 1.2 2003/06/17 04:28:20 dillon Exp $ +# $DragonFly: src/sys/conf/Attic/files.pc98,v 1.3 2003/07/06 21:23:45 dillon Exp $ # # The long compile-with and dependency lines are required because of # limitations in config: backslash-newline doesn't work in strings, and @@ -195,7 +195,7 @@ i386/i386/perfmon.c optional perfmon i386/i386/perfmon.c optional perfmon profiling-routine i386/i386/pmap.c standard i386/i386/procfs_machdep.c standard -i386/i386/simplelock.s optional smp +i386/i386/spinlock.s standard i386/i386/support.s standard i386/i386/swtch.s standard i386/i386/sys_machdep.c standard diff --git a/sys/cpu/i386/include/cpufunc.h b/sys/cpu/i386/include/cpufunc.h index c630ea941b..1abf5fc089 100644 --- a/sys/cpu/i386/include/cpufunc.h +++ b/sys/cpu/i386/include/cpufunc.h @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/include/cpufunc.h,v 1.96.2.3 2002/04/28 22:50:54 dwmalone Exp $ - * $DragonFly: src/sys/cpu/i386/include/cpufunc.h,v 1.4 2003/06/29 03:28:43 dillon Exp $ + * $DragonFly: src/sys/cpu/i386/include/cpufunc.h,v 1.5 2003/07/06 21:23:49 dillon Exp $ */ /* @@ -122,15 +122,6 @@ btrl(u_int *mask, int bit) return(result); } -static __inline void -disable_intr(void) -{ - __asm __volatile("cli" : : : "memory"); -#ifdef SMP - MPINTR_LOCK(); -#endif -} - static __inline void do_cpuid(u_int ax, u_int *p) { @@ -140,11 +131,14 @@ do_cpuid(u_int ax, u_int *p) } static __inline void -enable_intr(void) +cpu_disable_intr(void) +{ + __asm __volatile("cli" : : : "memory"); +} + +static __inline void +cpu_enable_intr(void) { -#ifdef SMP - MPINTR_UNLOCK(); -#endif __asm __volatile("sti"); } @@ -286,7 +280,9 @@ invd(void) * will cause the invl*() functions to be equivalent to the cpu_invl*() * functions. */ -#ifndef SMP +#ifdef SMP +void smp_invltlb(void); +#else #define smp_invltlb() #endif @@ -630,9 +626,9 @@ load_dr7(u_int sel) int breakpoint __P((void)); u_int bsfl __P((u_int mask)); u_int bsrl __P((u_int mask)); -void disable_intr __P((void)); +void cpu_disable_intr __P((void)); void do_cpuid __P((u_int ax, u_int *p)); -void enable_intr __P((void)); +void cpu_enable_intr __P((void)); u_char inb __P((u_int port)); u_int inl __P((u_int port)); void insb __P((u_int port, void *addr, size_t cnt)); diff --git a/sys/dev/drm/drm_dma.h b/sys/dev/drm/drm_dma.h index c058401005..f8808b8cc7 100644 --- a/sys/dev/drm/drm_dma.h +++ b/sys/dev/drm/drm_dma.h @@ -29,7 +29,7 @@ * Gareth Hughes * * $FreeBSD: src/sys/dev/drm/drm_dma.h,v 1.5.2.1 2003/04/26 07:05:28 anholt Exp $ - * $DragonFly: src/sys/dev/drm/Attic/drm_dma.h,v 1.2 2003/06/17 04:28:24 dillon Exp $ + * $DragonFly: src/sys/dev/drm/Attic/drm_dma.h,v 1.3 2003/07/06 21:23:47 dillon Exp $ */ #include "dev/drm/drmP.h" @@ -347,6 +347,7 @@ void DRM(vbl_send_signals)( drm_device_t *dev ) DRM_SPINLOCK(&dev->vbl_lock); +loop: vbl_sig = TAILQ_FIRST(&dev->vbl_sig_list); while (vbl_sig != NULL) { drm_vbl_sig_t *next = TAILQ_NEXT(vbl_sig, link); @@ -357,7 +358,9 @@ void DRM(vbl_send_signals)( drm_device_t *dev ) psignal(p, vbl_sig->signo); TAILQ_REMOVE(&dev->vbl_sig_list, vbl_sig, link); + DRM_SPINUNLOCK(&dev->vbl_lock); DRM_FREE(vbl_sig,sizeof(*vbl_sig)); + goto loop; } vbl_sig = next; } diff --git a/sys/dev/drm/drm_os_freebsd.h b/sys/dev/drm/drm_os_freebsd.h index 7a3a9e0d49..fe95022809 100644 --- a/sys/dev/drm/drm_os_freebsd.h +++ b/sys/dev/drm/drm_os_freebsd.h @@ -1,6 +1,6 @@ /* * $FreeBSD: src/sys/dev/drm/drm_os_freebsd.h,v 1.10.2.1 2003/04/26 07:05:28 anholt Exp $ - * $DragonFly: src/sys/dev/drm/Attic/drm_os_freebsd.h,v 1.3 2003/06/25 03:55:47 dillon Exp $ + * $DragonFly: src/sys/dev/drm/Attic/drm_os_freebsd.h,v 1.4 2003/07/06 21:23:47 dillon Exp $ */ #include #include @@ -94,11 +94,11 @@ #else #define DRM_CURPROC curproc #define DRM_STRUCTPROC struct proc -#define DRM_SPINTYPE struct simplelock -#define DRM_SPININIT(l,name) simple_lock_init(&l) +#define DRM_SPINTYPE struct lwkt_token +#define DRM_SPININIT(l,name) lwkt_inittoken(&l) #define DRM_SPINUNINIT(l) -#define DRM_SPINLOCK(l) simple_lock(l) -#define DRM_SPINUNLOCK(u) simple_unlock(u); +#define DRM_SPINLOCK(l) lwkt_gettoken(l) +#define DRM_SPINUNLOCK(u) lwkt_reltoken(u); #define DRM_CURRENTPID curproc->p_pid #endif diff --git a/sys/dev/raid/aac/aac.c b/sys/dev/raid/aac/aac.c index 10ec4bda26..2b32cda668 100644 --- a/sys/dev/raid/aac/aac.c +++ b/sys/dev/raid/aac/aac.c @@ -27,7 +27,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/dev/aac/aac.c,v 1.9.2.14 2003/04/08 13:22:08 scottl Exp $ - * $DragonFly: src/sys/dev/raid/aac/aac.c,v 1.3 2003/06/27 01:53:21 dillon Exp $ + * $DragonFly: src/sys/dev/raid/aac/aac.c,v 1.4 2003/07/06 21:23:47 dillon Exp $ */ /* @@ -2681,8 +2681,10 @@ aac_handle_aif(struct aac_softc *sc, struct aac_fib *fib) /* On the off chance that someone is sleeping for an aif... */ if (sc->aac_state & AAC_STATE_AIF_SLEEPER) wakeup(sc->aac_aifq); + /* token may have been lost */ /* Wakeup any poll()ers */ selwakeup(&sc->rcv_select); + /* token may have been lost */ } AAC_LOCK_RELEASE(&sc->aac_aifq_lock); @@ -2770,6 +2772,8 @@ aac_getnext_aif(struct aac_softc *sc, caddr_t arg) /* * Hand the next AIF off the top of the queue out to userspace. + * + * YYY token could be lost during copyout */ static int aac_return_aif(struct aac_softc *sc, caddr_t uptr) diff --git a/sys/dev/raid/aac/aacvar.h b/sys/dev/raid/aac/aacvar.h index 1a8afa1285..4d7b63ab76 100644 --- a/sys/dev/raid/aac/aacvar.h +++ b/sys/dev/raid/aac/aacvar.h @@ -27,7 +27,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/dev/aac/aacvar.h,v 1.4.2.7 2003/04/08 13:22:08 scottl Exp $ - * $DragonFly: src/sys/dev/raid/aac/aacvar.h,v 1.3 2003/06/23 17:55:28 dillon Exp $ + * $DragonFly: src/sys/dev/raid/aac/aacvar.h,v 1.4 2003/07/06 21:23:47 dillon Exp $ */ /* @@ -259,10 +259,10 @@ typedef struct mtx aac_lock_t; #define AAC_LOCK_ACQUIRE(l) mtx_lock(l) #define AAC_LOCK_RELEASE(l) mtx_unlock(l) #else -typedef struct simplelock aac_lock_t; -#define AAC_LOCK_INIT(l, s) simple_lock_init(l) -#define AAC_LOCK_ACQUIRE(l) simple_lock(l) -#define AAC_LOCK_RELEASE(l) simple_unlock(l) +typedef struct lwkt_token aac_lock_t; +#define AAC_LOCK_INIT(l, s) lwkt_inittoken(l) +#define AAC_LOCK_ACQUIRE(l) lwkt_gettoken(l) +#define AAC_LOCK_RELEASE(l) lwkt_reltoken(l) #endif #if __FreeBSD_version >= 500005 diff --git a/sys/dev/serial/sio/sio.c b/sys/dev/serial/sio/sio.c index 8f5c84ff4d..0652f6b84f 100644 --- a/sys/dev/serial/sio/sio.c +++ b/sys/dev/serial/sio/sio.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/isa/sio.c,v 1.291.2.35 2003/05/18 08:51:15 murray Exp $ - * $DragonFly: src/sys/dev/serial/sio/sio.c,v 1.5 2003/06/29 03:28:44 dillon Exp $ + * $DragonFly: src/sys/dev/serial/sio/sio.c,v 1.6 2003/07/06 21:23:50 dillon Exp $ * from: @(#)com.c 7.5 (Berkeley) 5/16/91 * from: i386/isa sio.c,v 1.234 */ @@ -103,16 +103,6 @@ #endif #include -#ifndef __i386__ -#define disable_intr() -#define enable_intr() -#endif - -#ifdef SMP -#define disable_intr() COM_DISABLE_INTR() -#define enable_intr() COM_ENABLE_INTR() -#endif /* SMP */ - #define LOTS_OF_EVENTS 64 /* helps separate urgent events from input */ #define CALLOUT_MASK 0x80 @@ -917,7 +907,7 @@ sioprobe(dev, xrid, rclk) * but mask them in the processor as well in case there are some * (misconfigured) shared interrupts. */ - disable_intr(); + com_lock(); /* EXTRA DELAY? */ /* @@ -1040,7 +1030,7 @@ sioprobe(dev, xrid, rclk) } sio_setreg(com, com_ier, 0); sio_setreg(com, com_cfcr, CFCR_8BITS); - enable_intr(); + com_unlock(); bus_release_resource(dev, SYS_RES_IOPORT, rid, port); return (iobase == siocniobase ? 0 : result); } @@ -1080,7 +1070,7 @@ sioprobe(dev, xrid, rclk) irqmap[3] = isa_irq_pending(); failures[9] = (sio_getreg(com, com_iir) & IIR_IMASK) - IIR_NOPEND; - enable_intr(); + com_unlock(); irqs = irqmap[1] & ~irqmap[0]; if (bus_get_resource(idev, SYS_RES_IRQ, 0, &xirq, NULL) == 0 && @@ -1274,7 +1264,7 @@ sioattach(dev, xrid, rclk) } else com->it_in.c_ispeed = com->it_in.c_ospeed = TTYDEF_SPEED; if (siosetwater(com, com->it_in.c_ispeed) != 0) { - enable_intr(); + com_unlock(); /* * Leave i/o resources allocated if this is a `cn'-level * console, so that other devices can't snarf them. @@ -1283,7 +1273,7 @@ sioattach(dev, xrid, rclk) bus_release_resource(dev, SYS_RES_IOPORT, rid, port); return (ENOMEM); } - enable_intr(); + com_unlock(); termioschars(&com->it_in); com->it_out = com->it_in; @@ -1583,7 +1573,7 @@ open_top: } } - disable_intr(); + com_lock(); (void) inb(com->line_status_port); (void) inb(com->data_port); com->prev_modem_status = com->last_modem_status @@ -1595,7 +1585,7 @@ open_top: outb(com->intr_ctl_port, IER_ERXRDY | IER_ETXRDY | IER_ERLS | IER_EMSC); } - enable_intr(); + com_unlock(); /* * Handle initial DCD. Callout devices get a fake initial * DCD (trapdoor DCD). If we are callout, then any sleeping @@ -1874,7 +1864,7 @@ sioinput(com) * call overhead). */ do { - enable_intr(); + com_unlock(); incc = com->iptr - buf; if (tp->t_rawq.c_cc + incc > tp->t_ihiwat && (com->state & CS_RTS_IFLOW @@ -1895,11 +1885,11 @@ sioinput(com) tp->t_lflag &= ~FLUSHO; comstart(tp); } - disable_intr(); + com_lock(); } while (buf < com->iptr); } else { do { - enable_intr(); + com_unlock(); line_status = buf[com->ierroff]; recv_data = *buf++; if (line_status @@ -1914,7 +1904,7 @@ sioinput(com) recv_data |= TTY_PE; } (*linesw[tp->t_line].l_rint)(recv_data, tp); - disable_intr(); + com_lock(); } while (buf < com->iptr); } com_events -= (com->iptr - com->ibuf); @@ -1935,9 +1925,9 @@ siointr(arg) void *arg; { #ifndef COM_MULTIPORT - COM_LOCK(); + com_lock(); siointr1((struct com_s *) arg); - COM_UNLOCK(); + com_unlock(); #else /* COM_MULTIPORT */ bool_t possibly_more_intrs; int unit; @@ -1950,13 +1940,13 @@ siointr(arg) * devices, then the edge from one may be lost because another is * on. */ - COM_LOCK(); + com_lock(); do { possibly_more_intrs = FALSE; for (unit = 0; unit < sio_numunits; ++unit) { com = com_addr(unit); /* - * XXX COM_LOCK(); + * XXX com_lock(); * would it work here, or be counter-productive? */ if (com != NULL @@ -1966,10 +1956,10 @@ siointr(arg) siointr1(com); possibly_more_intrs = TRUE; } - /* XXX COM_UNLOCK(); */ + /* XXX com_unlock(); */ } } while (possibly_more_intrs); - COM_UNLOCK(); + com_unlock(); #endif /* COM_MULTIPORT */ } @@ -2366,7 +2356,7 @@ repeat: * Discard any events related to never-opened or * going-away devices. */ - disable_intr(); + com_lock(); incc = com->iptr - com->ibuf; com->iptr = com->ibuf; if (com->state & CS_CHECKMSR) { @@ -2374,33 +2364,33 @@ repeat: com->state &= ~CS_CHECKMSR; } com_events -= incc; - enable_intr(); + com_unlock(); continue; } if (com->iptr != com->ibuf) { - disable_intr(); + com_lock(); sioinput(com); - enable_intr(); + com_unlock(); } if (com->state & CS_CHECKMSR) { u_char delta_modem_status; - disable_intr(); + com_lock(); delta_modem_status = com->last_modem_status ^ com->prev_modem_status; com->prev_modem_status = com->last_modem_status; com_events -= LOTS_OF_EVENTS; com->state &= ~CS_CHECKMSR; - enable_intr(); + com_unlock(); if (delta_modem_status & MSR_DCD) (*linesw[tp->t_line].l_modem) (tp, com->prev_modem_status & MSR_DCD); } if (com->state & CS_ODONE) { - disable_intr(); + com_lock(); com_events -= LOTS_OF_EVENTS; com->state &= ~CS_ODONE; - enable_intr(); + com_unlock(); if (!(com->state & CS_BUSY) && !(com->extra_state & CSE_BUSYCHECK)) { timeout(siobusycheck, com, hz / 100); @@ -2600,7 +2590,7 @@ comparam(tp, t) if (com->state >= (CS_BUSY | CS_TTGO)) siointr1(com); - enable_intr(); + com_unlock(); splx(s); comstart(tp); if (com->ibufold != NULL) { @@ -2630,7 +2620,7 @@ siosetwater(com, speed) for (ibufsize = 128; ibufsize < cp4ticks;) ibufsize <<= 1; if (ibufsize == com->ibufsize) { - disable_intr(); + com_lock(); return (0); } @@ -2640,7 +2630,7 @@ siosetwater(com, speed) */ ibuf = malloc(2 * ibufsize, M_DEVBUF, M_NOWAIT); if (ibuf == NULL) { - disable_intr(); + com_lock(); return (ENOMEM); } @@ -2658,7 +2648,7 @@ siosetwater(com, speed) * Read current input buffer, if any. Continue with interrupts * disabled. */ - disable_intr(); + com_lock(); if (com->iptr != com->ibuf) sioinput(com); @@ -2693,7 +2683,7 @@ comstart(tp) if (com == NULL) return; s = spltty(); - disable_intr(); + com_lock(); if (tp->t_state & TS_TTSTOP) com->state &= ~CS_TTGO; else @@ -2706,7 +2696,7 @@ comstart(tp) && com->state & CS_RTS_IFLOW) outb(com->modem_ctl_port, com->mcr_image |= MCR_RTS); } - enable_intr(); + com_unlock(); if (tp->t_state & (TS_TIMEOUT | TS_TTSTOP)) { ttwwakeup(tp); splx(s); @@ -2722,7 +2712,7 @@ comstart(tp) sizeof com->obuf1); com->obufs[0].l_next = NULL; com->obufs[0].l_queued = TRUE; - disable_intr(); + com_lock(); if (com->state & CS_BUSY) { qp = com->obufq.l_next; while ((next = qp->l_next) != NULL) @@ -2734,7 +2724,7 @@ comstart(tp) com->obufq.l_next = &com->obufs[0]; com->state |= CS_BUSY; } - enable_intr(); + com_unlock(); } if (tp->t_outq.c_cc != 0 && !com->obufs[1].l_queued) { com->obufs[1].l_tail @@ -2742,7 +2732,7 @@ comstart(tp) sizeof com->obuf2); com->obufs[1].l_next = NULL; com->obufs[1].l_queued = TRUE; - disable_intr(); + com_lock(); if (com->state & CS_BUSY) { qp = com->obufq.l_next; while ((next = qp->l_next) != NULL) @@ -2754,14 +2744,14 @@ comstart(tp) com->obufq.l_next = &com->obufs[1]; com->state |= CS_BUSY; } - enable_intr(); + com_unlock(); } tp->t_state |= TS_BUSY; } - disable_intr(); + com_lock(); if (com->state >= (CS_BUSY | CS_TTGO)) siointr1(com); /* fake interrupt to start output */ - enable_intr(); + com_unlock(); ttwwakeup(tp); splx(s); } @@ -2776,7 +2766,7 @@ comstop(tp, rw) com = com_addr(DEV_TO_UNIT(tp->t_dev)); if (com == NULL || com->gone) return; - disable_intr(); + com_lock(); if (rw & FWRITE) { if (com->hasfifo) #ifdef COM_ESP @@ -2803,7 +2793,7 @@ comstop(tp, rw) com_events -= (com->iptr - com->ibuf); com->iptr = com->ibuf; } - enable_intr(); + com_unlock(); comstart(tp); } @@ -2846,7 +2836,7 @@ commctl(com, bits, how) mcr |= MCR_RTS; if (com->gone) return(0); - disable_intr(); + com_lock(); switch (how) { case DMSET: outb(com->modem_ctl_port, @@ -2859,7 +2849,7 @@ commctl(com, bits, how) outb(com->modem_ctl_port, com->mcr_image &= ~mcr); break; } - enable_intr(); + com_unlock(); return (0); } @@ -2918,9 +2908,9 @@ comwakeup(chan) com = com_addr(unit); if (com != NULL && !com->gone && (com->state >= (CS_BUSY | CS_TTGO) || com->poll)) { - disable_intr(); + com_lock(); siointr1(com); - enable_intr(); + com_unlock(); } } @@ -2942,10 +2932,10 @@ comwakeup(chan) u_int delta; u_long total; - disable_intr(); + com_lock(); delta = com->delta_error_counts[errnum]; com->delta_error_counts[errnum] = 0; - enable_intr(); + com_unlock(); if (delta == 0) continue; total = com->error_counts[errnum] += delta; diff --git a/sys/dev/sound/isa/i386/spkr/spkr.c b/sys/dev/sound/isa/i386/spkr/spkr.c index 75dc8bcf4e..0d602bec61 100644 --- a/sys/dev/sound/isa/i386/spkr/spkr.c +++ b/sys/dev/sound/isa/i386/spkr/spkr.c @@ -5,7 +5,7 @@ * modified for FreeBSD by Andrew A. Chernov * * $FreeBSD: src/sys/i386/isa/spkr.c,v 1.45 2000/01/29 16:00:32 peter Exp $ - * $DragonFly: src/sys/dev/sound/isa/i386/spkr/Attic/spkr.c,v 1.3 2003/06/23 17:55:39 dillon Exp $ + * $DragonFly: src/sys/dev/sound/isa/i386/spkr/Attic/spkr.c,v 1.4 2003/07/06 21:23:49 dillon Exp $ */ #include @@ -98,10 +98,10 @@ tone(thz, ticks) return; } splx(sps); - disable_intr(); + clock_lock(); outb(TIMER_CNTR2, (divisor & 0xff)); /* send lo byte */ outb(TIMER_CNTR2, (divisor >> 8)); /* send hi byte */ - enable_intr(); + clock_unlock(); /* turn the speaker on */ outb(IO_PPI, inb(IO_PPI) | PPI_SPKR); diff --git a/sys/i386/apic/apic_ipl.s b/sys/i386/apic/apic_ipl.s index 313a0b722e..455f2cc49a 100644 --- a/sys/i386/apic/apic_ipl.s +++ b/sys/i386/apic/apic_ipl.s @@ -1,6 +1,6 @@ /*- - * Copyright (c) 1997, by Steve Passe - * All rights reserved. + * Copyright (c) 1997, by Steve Passe, All rights reserved. + * Copyright (c) 2003, by Matthew Dillon, All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -23,412 +23,72 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/isa/apic_ipl.s,v 1.27.2.2 2000/09/30 02:49:35 ps Exp $ - * $DragonFly: src/sys/i386/apic/Attic/apic_ipl.s,v 1.6 2003/07/01 20:31:38 dillon Exp $ + * $DragonFly: src/sys/i386/apic/Attic/apic_ipl.s,v 1.7 2003/07/06 21:23:49 dillon Exp $ */ -#if 0 - .data ALIGN_DATA -/* - * Routines used by splz_unpend to build an interrupt frame from a - * trap frame. The _vec[] routines build the proper frame on the stack, - * then call one of _Xintr0 thru _XintrNN. - * - * used by: - * i386/isa/apic_ipl.s (this file): splz_unpend JUMPs to HWIs. - * i386/isa/clock.c: setup _vec[clock] to point at _vec8254. - */ - .globl _vec -_vec: - .long vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7 - .long vec8, vec9, vec10, vec11, vec12, vec13, vec14, vec15 - .long vec16, vec17, vec18, vec19, vec20, vec21, vec22, vec23 + /* + * Interrupt mask for APIC interrupts, defaults to all hardware + * interrupts turned off. + */ -/* - * Note: - * This is the UP equivilant of _imen. - * It is OPAQUE, and must NOT be accessed directly. - * It MUST be accessed along with the IO APIC as a 'critical region'. - * Accessed by: - * INTREN() - * INTRDIS() - * MAYBE_MASK_IRQ - * MAYBE_UNMASK_IRQ - * imen_dump() - */ .p2align 2 /* MUST be 32bit aligned */ - .globl _apic_imen -_apic_imen: - .long HWI_MASK + .globl apic_imen +apic_imen: + .long HWI_MASK -/* - * - */ .text SUPERALIGN_TEXT -/* - * splz() - dispatch pending interrupts after cpl reduced - * - * Interrupt priority mechanism - * -- soft splXX masks with group mechanism (cpl) - * -- h/w masks for currently active or unused interrupts (imen) - * -- ipending = active interrupts currently masked by cpl - */ - -ENTRY(splz) - /* - * The caller has restored cpl and checked that (ipending & ~cpl) - * is nonzero. However, since ipending can change at any time - * (by an interrupt or, with SMP, by another cpu), we have to - * repeat the check. At the moment we must own the MP lock in - * the SMP case because the interruput handlers require it. We - * loop until no unmasked pending interrupts remain. - * - * No new unmaksed pending interrupts will be added during the - * loop because, being unmasked, the interrupt code will be able - * to execute the interrupts. - * - * Interrupts come in two flavors: Hardware interrupts and software - * interrupts. We have to detect the type of interrupt (based on the - * position of the interrupt bit) and call the appropriate dispatch - * routine. - * - * NOTE: "bsfl %ecx,%ecx" is undefined when %ecx is 0 so we can't - * rely on the secondary btrl tests. - */ - pushl %ebx - movl _curthread,%ebx - movl TD_CPL(%ebx),%eax -splz_next: - /* - * We don't need any locking here. (ipending & ~cpl) cannot grow - * while we're looking at it - any interrupt will shrink it to 0. - */ - movl $0,_reqpri - movl %eax,%ecx - notl %ecx /* set bit = unmasked level */ - andl _ipending,%ecx /* set bit = unmasked pending INT */ - jne splz_unpend - popl %ebx - ret - - ALIGN_TEXT -splz_unpend: - bsfl %ecx,%ecx - lock - btrl %ecx,_ipending - jnc splz_next - cmpl $NHWI,%ecx - jae splz_swi /* - * We would prefer to call the intr handler directly here but that - * doesn't work for badly behaved handlers that want the interrupt - * frame. Also, there's a problem determining the unit number. - * We should change the interface so that the unit number is not - * determined at config time. - * - * The vec[] routines build the proper frame on the stack so - * the interrupt will eventually return to the caller or splz, - * then calls one of _Xintr0 thru _XintrNN. + * Functions to enable and disable a hardware interrupt. Generally + * called with only one bit set in the mask but can handle multiple + * bits to present the same API as the ICU. */ - popl %ebx - jmp *_vec(,%ecx,4) - - ALIGN_TEXT -splz_swi: - pushl %eax /* save cpl across call */ - orl imasks(,%ecx,4),%eax - movl %eax,TD_CPL(%ebx) /* set cpl for SWI */ - call *_ihandlers(,%ecx,4) - popl %eax - movl %eax,TD_CPL(%ebx) /* restore cpl and loop */ - jmp splz_next - -/* - * Fake clock interrupt(s) so that they appear to come from our caller instead - * of from here, so that system profiling works. - * XXX do this more generally (for all vectors; look up the C entry point). - * XXX frame bogusness stops us from just jumping to the C entry point. - * We have to clear iactive since this is an unpend call, and it will be - * set from the time of the original INT. - */ - -/* - * The 'generic' vector stubs. - */ - -#define BUILD_VEC(irq_num) \ - ALIGN_TEXT ; \ -__CONCAT(vec,irq_num): ; \ - popl %eax ; \ - pushfl ; \ - pushl $KCSEL ; \ - pushl %eax ; \ - cli ; \ - lock ; /* MP-safe */ \ - andl $~IRQ_BIT(irq_num), iactive ; /* lazy masking */ \ - MEXITCOUNT ; \ - APIC_ITRACE(apic_itrace_splz, irq_num, APIC_ITRACE_SPLZ) ; \ - jmp __CONCAT(_Xintr,irq_num) - - BUILD_VEC(0) - BUILD_VEC(1) - BUILD_VEC(2) - BUILD_VEC(3) - BUILD_VEC(4) - BUILD_VEC(5) - BUILD_VEC(6) - BUILD_VEC(7) - BUILD_VEC(8) - BUILD_VEC(9) - BUILD_VEC(10) - BUILD_VEC(11) - BUILD_VEC(12) - BUILD_VEC(13) - BUILD_VEC(14) - BUILD_VEC(15) - BUILD_VEC(16) /* 8 additional INTs in IO APIC */ - BUILD_VEC(17) - BUILD_VEC(18) - BUILD_VEC(19) - BUILD_VEC(20) - BUILD_VEC(21) - BUILD_VEC(22) - BUILD_VEC(23) - - -/****************************************************************************** - * XXX FIXME: figure out where these belong. - */ - -/* this nonsense is to verify that masks ALWAYS have 1 and only 1 bit set */ -#define QUALIFY_MASKS_NOT - -#ifdef QUALIFY_MASKS -#define QUALIFY_MASK \ - btrl %ecx, %eax ; \ - andl %eax, %eax ; \ - jz 1f ; \ - pushl $bad_mask ; \ - call _panic ; \ -1: - -bad_mask: .asciz "bad mask" -#else -#define QUALIFY_MASK -#endif - -/* - * (soon to be) MP-safe function to clear ONE INT mask bit. - * The passed arg is a 32bit u_int MASK. - * It sets the associated bit in _apic_imen. - * It sets the mask bit of the associated IO APIC register. - */ -ENTRY(INTREN) - pushfl /* save state of EI flag */ - cli /* prevent recursion */ +ENTRY(INTRDIS) IMASK_LOCK /* enter critical reg */ - - movl 8(%esp), %eax /* mask into %eax */ - bsfl %eax, %ecx /* get pin index */ - btrl %ecx, apic_imen /* update apic_imen */ - - QUALIFY_MASK - + movl 4(%esp),%eax +1: + bsfl %eax,%ecx + jz 2f + btrl %ecx,%eax + btsl %ecx, apic_imen shll $4, %ecx movl CNAME(int_to_apicintpin) + 8(%ecx), %edx movl CNAME(int_to_apicintpin) + 12(%ecx), %ecx testl %edx, %edx - jz 1f - - movl %ecx, (%edx) /* write the target register index */ - movl 16(%edx), %eax /* read the target register data */ - andl $~IOART_INTMASK, %eax /* clear mask bit */ - movl %eax, 16(%edx) /* write the APIC register data */ -1: + jz 2f + movl %ecx, (%edx) /* target register index */ + orl $IOART_INTMASK,16(%edx) /* set intmask in target apic reg */ + jmp 1b +2: IMASK_UNLOCK /* exit critical reg */ - popfl /* restore old state of EI flag */ ret -/* - * (soon to be) MP-safe function to set ONE INT mask bit. - * The passed arg is a 32bit u_int MASK. - * It clears the associated bit in apic_imen. - * It clears the mask bit of the associated IO APIC register. - */ -ENTRY(INTRDIS) - pushfl /* save state of EI flag */ - cli /* prevent recursion */ +ENTRY(INTREN) IMASK_LOCK /* enter critical reg */ - - movl 8(%esp), %eax /* mask into %eax */ + movl 4(%esp), %eax /* mask into %eax */ +1: bsfl %eax, %ecx /* get pin index */ - btsl %ecx, apic_imen /* update _apic_imen */ - - QUALIFY_MASK - + jz 2f + btrl %ecx,%eax + btrl %ecx, apic_imen /* update apic_imen */ shll $4, %ecx movl CNAME(int_to_apicintpin) + 8(%ecx), %edx movl CNAME(int_to_apicintpin) + 12(%ecx), %ecx testl %edx, %edx - jz 1f - + jz 2f movl %ecx, (%edx) /* write the target register index */ - movl 16(%edx), %eax /* read the target register data */ - orl $IOART_INTMASK, %eax /* set mask bit */ - movl %eax, 16(%edx) /* write the APIC register data */ -1: + andl $~IOART_INTMASK, 16(%edx) /* clear mask bit */ + jmp 1b +2: IMASK_UNLOCK /* exit critical reg */ - popfl /* restore old state of EI flag */ - ret - - -/****************************************************************************** - * - */ - - -/* - * void write_ioapic_mask(int apic, u_int mask); - */ - -#define _INT_MASK 0x00010000 -#define _PIN_MASK 0x00ffffff - -#define _OLD_ESI 0(%esp) -#define _OLD_EBX 4(%esp) -#define _RETADDR 8(%esp) -#define _APIC 12(%esp) -#define _MASK 16(%esp) - - ALIGN_TEXT -write_ioapic_mask: - pushl %ebx /* scratch */ - pushl %esi /* scratch */ - - movl apic_imen, %ebx - xorl _MASK, %ebx /* %ebx = _apic_imen ^ mask */ - andl $_PIN_MASK, %ebx /* %ebx = _apic_imen & 0x00ffffff */ - jz all_done /* no change, return */ - - movl _APIC, %esi /* APIC # */ - movl ioapic, %ecx - movl (%ecx,%esi,4), %esi /* %esi holds APIC base address */ - -next_loop: /* %ebx = diffs, %esi = APIC base */ - bsfl %ebx, %ecx /* %ecx = index if 1st/next set bit */ - jz all_done - - btrl %ecx, %ebx /* clear this bit in diffs */ - leal 16(,%ecx,2), %edx /* calculate register index */ - - movl %edx, (%esi) /* write the target register index */ - movl 16(%esi), %eax /* read the target register data */ - - btl %ecx, _MASK /* test for mask or unmask */ - jnc clear /* bit is clear */ - orl $_INT_MASK, %eax /* set mask bit */ - jmp write -clear: andl $~_INT_MASK, %eax /* clear mask bit */ - -write: movl %eax, 16(%esi) /* write the APIC register data */ - - jmp next_loop /* try another pass */ - -all_done: - popl %esi - popl %ebx - ret - -#undef _OLD_ESI -#undef _OLD_EBX -#undef _RETADDR -#undef _APIC -#undef _MASK - -#undef _PIN_MASK -#undef _INT_MASK - -#ifdef oldcode - -_INTREN: - movl apic_imen, %eax - notl %eax /* mask = ~mask */ - andl apic_imen, %eax /* %eax = _apic_imen & ~mask */ - - pushl %eax /* new (future) _apic_imen value */ - pushl $0 /* APIC# arg */ - call write_ioapic_mask /* modify the APIC registers */ - - addl $4, %esp /* remove APIC# arg from stack */ - popl apic_imen /* _apic_imen |= mask */ - ret - -_INTRDIS: - movl _apic_imen, %eax - orl 4(%esp), %eax /* %eax = _apic_imen | mask */ - - pushl %eax /* new (future) _apic_imen value */ - pushl $0 /* APIC# arg */ - call write_ioapic_mask /* modify the APIC registers */ - - addl $4, %esp /* remove APIC# arg from stack */ - popl apic_imen /* _apic_imen |= mask */ - ret - -#endif /* oldcode */ - - -#ifdef ready - -/* - * u_int read_io_apic_mask(int apic); - */ - ALIGN_TEXT -read_io_apic_mask: ret -/* - * Set INT mask bit for each bit set in 'mask'. - * Ignore INT mask bit for all others. - * - * void set_io_apic_mask(apic, u_int32_t bits); - */ - ALIGN_TEXT -set_io_apic_mask: - ret - -/* - * void set_ioapic_maskbit(int apic, int bit); - */ - ALIGN_TEXT -set_ioapic_maskbit: - ret - -/* - * Clear INT mask bit for each bit set in 'mask'. - * Ignore INT mask bit for all others. - * - * void clr_io_apic_mask(int apic, u_int32_t bits); - */ - ALIGN_TEXT -clr_io_apic_mask: - ret - -/* - * void clr_ioapic_maskbit(int apic, int bit); - */ - ALIGN_TEXT -clr_ioapic_maskbit: - ret - -#endif /** ready */ - /****************************************************************************** * */ @@ -465,4 +125,3 @@ ENTRY(apic_eoi) movl $0, lapic+0xb0 ret -#endif diff --git a/sys/i386/apic/apic_vector.s b/sys/i386/apic/apic_vector.s index 27de928460..6bc664ad41 100644 --- a/sys/i386/apic/apic_vector.s +++ b/sys/i386/apic/apic_vector.s @@ -1,62 +1,23 @@ /* * from: vector.s, 386BSD 0.1 unknown origin * $FreeBSD: src/sys/i386/isa/apic_vector.s,v 1.47.2.5 2001/09/01 22:33:38 tegge Exp $ - * $DragonFly: src/sys/i386/apic/Attic/apic_vector.s,v 1.7 2003/07/01 20:31:38 dillon Exp $ + * $DragonFly: src/sys/i386/apic/Attic/apic_vector.s,v 1.8 2003/07/06 21:23:49 dillon Exp $ */ #include #include - #include "i386/isa/intr_machdep.h" /* convert an absolute IRQ# into a bitmask */ -#define IRQ_BIT(irq_num) (1 << (irq_num)) +#define IRQ_LBIT(irq_num) (1 << (irq_num)) /* make an index into the IO APIC from the IRQ# */ #define REDTBL_IDX(irq_num) (0x10 + ((irq_num) * 2)) - -/* - * Macros for interrupt interrupt entry, call to handler, and exit. - */ - -#define FAST_INTR(irq_num, vec_name) \ - .text ; \ - SUPERALIGN_TEXT ; \ -IDTVEC(vec_name) ; \ - pushl %eax ; /* save only call-used registers */ \ - pushl %ecx ; \ - pushl %edx ; \ - pushl %ds ; \ - pushl %es ; \ - pushl %fs ; \ - movl $KDSEL,%eax ; \ - mov %ax,%ds ; \ - movl %ax,%es ; \ - movl $KPSEL,%eax ; \ - mov %ax,%fs ; \ - FAKE_MCOUNT(6*4(%esp)) ; \ - pushl intr_unit + (irq_num) * 4 ; \ - call *intr_handler + (irq_num) * 4 ; /* do the work ASAP */ \ - addl $4, %esp ; \ - movl $0, lapic_eoi ; \ - lock ; \ - incl cnt+V_INTR ; /* book-keeping can wait */ \ - movl intr_countp + (irq_num) * 4, %eax ; \ - lock ; \ - incl (%eax) ; \ - MEXITCOUNT ; \ - popl %fs ; \ - popl %es ; \ - popl %ds ; \ - popl %edx ; \ - popl %ecx ; \ - popl %eax ; \ - iret - /* - * + * Push an interrupt frame in a format acceptable to doreti, reload + * the segment registers for the kernel. */ #define PUSH_FRAME \ pushl $0 ; /* dummy error code */ \ @@ -64,23 +25,54 @@ IDTVEC(vec_name) ; \ pushal ; \ pushl %ds ; /* save data and extra segments ... */ \ pushl %es ; \ - pushl %fs + pushl %fs ; \ + mov $KDSEL,%ax ; \ + mov %ax,%ds ; \ + mov %ax,%es ; \ + mov $KPSEL,%ax ; \ + mov %ax,%fs ; \ +#define PUSH_DUMMY \ + pushfl ; /* phys int frame / flags */ \ + pushl %cs ; /* phys int frame / cs */ \ + pushl 12(%esp) ; /* original caller eip */ \ + pushl $0 ; /* dummy error code */ \ + pushl $0 ; /* dummy trap type */ \ + subl $11*4,%esp ; /* pushal + 3 seg regs (dummy) */ \ + +/* + * Warning: POP_FRAME can only be used if there is no chance of a + * segment register being changed (e.g. by procfs), which is why syscalls + * have to use doreti. + */ #define POP_FRAME \ popl %fs ; \ popl %es ; \ popl %ds ; \ popal ; \ - addl $4+4,%esp + addl $2*4,%esp ; /* dummy trap & error codes */ \ + +#define POP_DUMMY \ + addl $16*4,%esp ; \ #define IOAPICADDR(irq_num) CNAME(int_to_apicintpin) + 16 * (irq_num) + 8 #define REDIRIDX(irq_num) CNAME(int_to_apicintpin) + 16 * (irq_num) + 12 + +/* + * Interrupts are expected to already be disabled when using these + * IMASK_*() macros. + */ +#define IMASK_LOCK \ + SPIN_LOCK(imen_spinlock) ; \ + +#define IMASK_UNLOCK \ + SPIN_UNLOCK(imen_spinlock) ; \ #define MASK_IRQ(irq_num) \ IMASK_LOCK ; /* into critical reg */ \ - testl $IRQ_BIT(irq_num), apic_imen ; \ + testl $IRQ_LBIT(irq_num), apic_imen ; \ jne 7f ; /* masked, don't mask */ \ - orl $IRQ_BIT(irq_num), apic_imen ; /* set the mask bit */ \ + orl $IRQ_LBIT(irq_num), apic_imen ; /* set the mask bit */ \ movl IOAPICADDR(irq_num), %ecx ; /* ioapic addr */ \ movl REDIRIDX(irq_num), %eax ; /* get the index */ \ movl %eax, (%ecx) ; /* write the index */ \ @@ -88,17 +80,18 @@ IDTVEC(vec_name) ; \ orl $IOART_INTMASK, %eax ; /* set the mask */ \ movl %eax, IOAPIC_WINDOW(%ecx) ; /* new value */ \ 7: ; /* already masked */ \ - IMASK_UNLOCK + IMASK_UNLOCK ; \ + /* * Test to see whether we are handling an edge or level triggered INT. * Level-triggered INTs must still be masked as we don't clear the source, * and the EOI cycle would cause redundant INTs to occur. */ #define MASK_LEVEL_IRQ(irq_num) \ - testl $IRQ_BIT(irq_num), apic_pin_trigger ; \ + testl $IRQ_LBIT(irq_num), apic_pin_trigger ; \ jz 9f ; /* edge, don't mask */ \ MASK_IRQ(irq_num) ; \ -9: +9: ; \ #ifdef APIC_INTR_REORDER @@ -108,27 +101,26 @@ IDTVEC(vec_name) ; \ testl apic_isrbit_location + 4 + 8 * (irq_num), %eax ; \ jz 9f ; /* not active */ \ movl $0, lapic_eoi ; \ - APIC_ITRACE(apic_itrace_eoi, irq_num, APIC_ITRACE_EOI) ; \ -9: +9: \ #else + #define EOI_IRQ(irq_num) \ - testl $IRQ_BIT(irq_num), lapic_isr1; \ + testl $IRQ_LBIT(irq_num), lapic_isr1; \ jz 9f ; /* not active */ \ movl $0, lapic_eoi; \ - APIC_ITRACE(apic_itrace_eoi, irq_num, APIC_ITRACE_EOI) ; \ -9: +9: \ + #endif - /* * Test to see if the source is currntly masked, clear if so. */ #define UNMASK_IRQ(irq_num) \ IMASK_LOCK ; /* into critical reg */ \ - testl $IRQ_BIT(irq_num), apic_imen ; \ + testl $IRQ_LBIT(irq_num), apic_imen ; \ je 7f ; /* bit clear, not masked */ \ - andl $~IRQ_BIT(irq_num), apic_imen ;/* clear mask bit */ \ + andl $~IRQ_LBIT(irq_num), apic_imen ;/* clear mask bit */ \ movl IOAPICADDR(irq_num),%ecx ; /* ioapic addr */ \ movl REDIRIDX(irq_num), %eax ; /* get the index */ \ movl %eax,(%ecx) ; /* write the index */ \ @@ -136,174 +128,189 @@ IDTVEC(vec_name) ; \ andl $~IOART_INTMASK,%eax ; /* clear the mask */ \ movl %eax,IOAPIC_WINDOW(%ecx) ; /* new value */ \ 7: ; \ - IMASK_UNLOCK - -#ifdef APIC_INTR_DIAGNOSTIC -#ifdef APIC_INTR_DIAGNOSTIC_IRQ -log_intr_event: - pushf - cli - pushl $CNAME(apic_itrace_debuglock) - call CNAME(s_lock_np) - addl $4, %esp - movl CNAME(apic_itrace_debugbuffer_idx), %ecx - andl $32767, %ecx - movl PCPU(cpuid), %eax - shll $8, %eax - orl 8(%esp), %eax - movw %ax, CNAME(apic_itrace_debugbuffer)(,%ecx,2) - incl %ecx - andl $32767, %ecx - movl %ecx, CNAME(apic_itrace_debugbuffer_idx) - pushl $CNAME(apic_itrace_debuglock) - call CNAME(s_unlock_np) - addl $4, %esp - popf - ret - + IMASK_UNLOCK ; \ -#define APIC_ITRACE(name, irq_num, id) \ - lock ; /* MP-safe */ \ - incl CNAME(name) + (irq_num) * 4 ; \ +/* + * Fast interrupt call handlers run in the following sequence: + * + * - Push the trap frame required by doreti + * - Mask the interrupt and reenable its source + * - If we cannot take the interrupt set its fpending bit and + * doreti. + * - If we can take the interrupt clear its fpending bit, + * call the handler, then unmask and doreti. + * + * YYY can cache gd base opitner instead of using hidden %fs prefixes. + */ + +#define FAST_INTR(irq_num, vec_name) \ + .text ; \ + SUPERALIGN_TEXT ; \ +IDTVEC(vec_name) ; \ + PUSH_FRAME ; \ + FAKE_MCOUNT(13*4(%esp)) ; \ + MASK_LEVEL_IRQ(irq_num) ; \ + EOI_IRQ(irq_num) ; \ + incl PCPU(intr_nesting_level) ; \ + movl PCPU(curthread),%ebx ; \ + movl TD_CPL(%ebx),%eax ; \ pushl %eax ; \ - pushl %ecx ; \ - pushl %edx ; \ - movl $(irq_num), %eax ; \ - cmpl $APIC_INTR_DIAGNOSTIC_IRQ, %eax ; \ - jne 7f ; \ - pushl $id ; \ - call log_intr_event ; \ + cmpl $TDPRI_CRIT,TD_PRI(%ebx) ; \ + jge 1f ; \ + testl $IRQ_LBIT(irq_num), %eax ; \ + jz 2f ; \ +1: ; \ + /* set the pending bit and return, leave interrupt masked */ \ + orl $IRQ_LBIT(irq_num),PCPU(fpending) ; \ + movl $TDPRI_CRIT, PCPU(reqpri) ; \ + jmp 5f ; \ +2: ; \ + /* clear pending bit, run handler */ \ + addl $TDPRI_CRIT,TD_PRI(%ebx) ; \ + andl $~IRQ_LBIT(irq_num),PCPU(fpending) ; \ + pushl intr_unit + (irq_num) * 4 ; \ + call *intr_handler + (irq_num) * 4 ; /* do the work ASAP */ \ addl $4, %esp ; \ -7: ; \ - popl %edx ; \ - popl %ecx ; \ - popl %eax -#else -#define APIC_ITRACE(name, irq_num, id) \ - lock ; /* MP-safe */ \ - incl CNAME(name) + (irq_num) * 4 -#endif + subl $TDPRI_CRIT,TD_PRI(%ebx) ; \ + incl PCPU(cnt)+V_INTR ; /* book-keeping make per cpu YYY */ \ + movl intr_countp + (irq_num) * 4, %eax ; \ + incl (%eax) ; \ + UNMASK_IRQ(irq_num) ; \ +5: ; \ + MEXITCOUNT ; \ + jmp doreti ; \ -#define APIC_ITRACE_ENTER 1 -#define APIC_ITRACE_EOI 2 -#define APIC_ITRACE_TRYISRLOCK 3 -#define APIC_ITRACE_GOTISRLOCK 4 -#define APIC_ITRACE_ENTER2 5 -#define APIC_ITRACE_LEAVE 6 -#define APIC_ITRACE_UNMASK 7 -#define APIC_ITRACE_ACTIVE 8 -#define APIC_ITRACE_MASKED 9 -#define APIC_ITRACE_NOISRLOCK 10 -#define APIC_ITRACE_MASKED2 11 -#define APIC_ITRACE_SPLZ 12 -#define APIC_ITRACE_DORETI 13 - -#else -#define APIC_ITRACE(name, irq_num, id) -#endif - -#define INTR(irq_num, vec_name, maybe_extra_ipending) \ +/* + * Restart fast interrupt held up by critical section or cpl. + * + * - Push a dummy trape frame as required by doreti + * - The interrupt source is already masked + * - Clear the fpending bit + * - Run the handler + * - Unmask the interrupt + * - Pop the dummy frame and do a normal return + * + * YYY can cache gd base pointer instead of using hidden %fs + * prefixes. + */ + +#define FAST_UNPEND(irq_num, vec_name) \ + .text ; \ + SUPERALIGN_TEXT ; \ +IDTVEC(vec_name) ; \ + pushl %ebp ; \ + movl %esp,%ebp ; \ + PUSH_DUMMY ; \ + pushl intr_unit + (irq_num) * 4 ; \ + call *intr_handler + (irq_num) * 4 ; /* do the work ASAP */ \ + addl $4, %esp ; \ + incl PCPU(cnt)+V_INTR ; /* book-keeping make per cpu YYY */ \ + movl intr_countp + (irq_num) * 4, %eax ; \ + incl (%eax) ; \ + UNMASK_IRQ(irq_num) ; \ + POP_DUMMY ; \ + popl %ebp ; \ + ret ; \ + +/* + * Slow interrupt call handlers run in the following sequence: + * + * - Push the trap frame required by doreti. + * - Mask the interrupt and reenable its source. + * - If we cannot take the interrupt set its ipending bit and + * doreti. In addition to checking for a critical section + * and cpl mask we also check to see if the thread is still + * running. + * - If we can take the interrupt clear its ipending bit, + * set its irunning bit, and schedule the thread. Leave + * interrupts masked and doreti. + * + * the interrupt thread will run its handlers and loop if + * ipending is found to be set. ipending/irunning interlock + * the interrupt thread with the interrupt. The handler calls + * UNPEND when it is through. + * + * Note that we do not enable interrupts when calling sched_ithd. + * YYY sched_ithd may preempt us synchronously (fix interrupt stacking) + * + * YYY can cache gd base pointer instead of using hidden %fs + * prefixes. + */ + +#define INTR(irq_num, vec_name, maybe_extra_ipending) \ .text ; \ SUPERALIGN_TEXT ; \ -/* XintrNN: entry point used by IDT/HWIs & splz_unpend via _vec[]. */ \ IDTVEC(vec_name) ; \ PUSH_FRAME ; \ - movl $KDSEL, %eax ; /* reload with kernel's data segment */ \ - mov %ax, %ds ; \ - mov %ax, %es ; \ - movl $KPSEL, %eax ; \ - mov %ax, %fs ; \ -; \ maybe_extra_ipending ; \ -; \ - APIC_ITRACE(apic_itrace_enter, irq_num, APIC_ITRACE_ENTER) ; \ - lock ; /* MP-safe */ \ - btsl $(irq_num), iactive ; /* lazy masking */ \ - jc 1f ; /* already active */ \ ; \ MASK_LEVEL_IRQ(irq_num) ; \ EOI_IRQ(irq_num) ; \ -0: ; \ - APIC_ITRACE(apic_itrace_tryisrlock, irq_num, APIC_ITRACE_TRYISRLOCK) ;\ - MP_TRYLOCK ; /* XXX this is going away... */ \ - testl %eax, %eax ; /* did we get it? */ \ - jz 3f ; /* no */ \ -; \ - APIC_ITRACE(apic_itrace_gotisrlock, irq_num, APIC_ITRACE_GOTISRLOCK) ;\ + incl PCPU(intr_nesting_level) ; \ movl PCPU(curthread),%ebx ; \ - testl $IRQ_BIT(irq_num), TD_MACH+MTD_CPL(%eax) ; \ - jne 2f ; /* this INT masked */ \ + movl TD_CPL(%ebx),%eax ; \ + pushl %eax ; /* cpl do restore */ \ cmpl $TDPRI_CRIT,TD_PRI(%ebx) ; \ - jge 2f ; /* in critical sec */ \ -; \ - incb PCPU(intr_nesting_level) ; \ -; \ - /* entry point used by doreti_unpend for HWIs. */ \ -__CONCAT(Xresume,irq_num): ; \ - FAKE_MCOUNT(13*4(%esp)) ; /* XXX avoid dbl cnt */ \ - lock ; incl _cnt+V_INTR ; /* tally interrupts */ \ - movl _intr_countp + (irq_num) * 4, %eax ; \ - lock ; incl (%eax) ; \ -; \ - movl PCPU(curthread), %ebx ; \ - movl TD_MACH+MTD_CPL(%ebx), %eax ; \ - pushl %eax ; /* cpl restored by doreti */ \ - orl _intr_mask + (irq_num) * 4, %eax ; \ - movl %eax, TD_MACH+MTD_CPL(%ebx) ; \ - lock ; \ - andl $~IRQ_BIT(irq_num), PCPU(ipending) ; \ -; \ - pushl _intr_unit + (irq_num) * 4 ; \ - APIC_ITRACE(apic_itrace_enter2, irq_num, APIC_ITRACE_ENTER2) ; \ + jge 1f ; \ + testl $IRQ_LBIT(irq_num),PCPU(irunning) ; \ + jnz 1f ; \ + testl $IRQ_LBIT(irq_num),%eax ; \ + jz 1f ; \ +1: ; \ + /* set the pending bit and return, leave the interrupt masked */ \ + orl $IRQ_LBIT(irq_num), PCPU(ipending) ; \ + movl $TDPRI_CRIT, PCPU(reqpri) ; \ + jmp 5f ; \ +2: ; \ + addl $TDPRI_CRIT,TD_PRI(%ebx) ; \ + /* set running bit, clear pending bit, run handler */ \ + orl $IRQ_LBIT(irq_num), PCPU(irunning) ; \ + andl $~IRQ_LBIT(irq_num), PCPU(ipending) ; \ sti ; \ - call *_intr_handler + (irq_num) * 4 ; \ - cli ; \ - APIC_ITRACE(apic_itrace_leave, irq_num, APIC_ITRACE_LEAVE) ; \ + pushl $irq_num ; \ + call sched_ithd ; \ addl $4,%esp ; \ -; \ - lock ; andl $~IRQ_BIT(irq_num), iactive ; \ - UNMASK_IRQ(irq_num) ; \ - APIC_ITRACE(apic_itrace_unmask, irq_num, APIC_ITRACE_UNMASK) ; \ - sti ; /* doreti repeats cli/sti */ \ + subl $TDPRI_CRIT,TD_PRI(%ebx) ; \ + incl PCPU(cnt)+V_INTR ; /* book-keeping YYY make per-cpu */ \ + movl intr_countp + (irq_num) * 4,%eax ; \ + incl (%eax) ; \ +5: ; \ MEXITCOUNT ; \ jmp doreti ; \ -; \ - ALIGN_TEXT ; \ -1: ; /* active */ \ - APIC_ITRACE(apic_itrace_active, irq_num, APIC_ITRACE_ACTIVE) ; \ - MASK_IRQ(irq_num) ; \ - EOI_IRQ(irq_num) ; \ - lock ; \ - orl $IRQ_BIT(irq_num), PCPU(ipending) ; \ - movl $TDPRI_CRIT, PCPU(reqpri) ; \ - lock ; \ - btsl $(irq_num), iactive ; /* still active */ \ - jnc 0b ; /* retry */ \ - POP_FRAME ; \ - iret ; /* XXX: iactive bit might be 0 now */ \ - ALIGN_TEXT ; \ -2: ; /* masked by cpl, leave iactive set */ \ - APIC_ITRACE(apic_itrace_masked, irq_num, APIC_ITRACE_MASKED) ; \ - lock ; \ - orl $IRQ_BIT(irq_num), PCPU(ipending) ; \ - movl $TDPRI_CRIT, PCPU(reqpri) ; \ - MP_RELLOCK ; \ - POP_FRAME ; \ - iret ; \ + +/* + * Unmask a slow interrupt. This function is used by interrupt threads + * after they have descheduled themselves to reenable interrupts and + * possibly cause a reschedule to occur. The interrupt's irunning bit + * is cleared prior to unmasking. + */ + +#define INTR_UNMASK(irq_num, vec_name, icu) \ + .text ; \ + SUPERALIGN_TEXT ; \ +IDTVEC(vec_name) ; \ + pushl %ebp ; /* frame for ddb backtrace */ \ + movl %esp, %ebp ; \ + andl $~IRQ_LBIT(irq_num), PCPU(irunning) ; \ + UNMASK_IRQ(irq_num) ; \ + popl %ebp ; \ + ret ; \ + +#if 0 + /* XXX forward_irq to cpu holding the BGL? */ + ALIGN_TEXT ; \ 3: ; /* other cpu has isr lock */ \ - APIC_ITRACE(apic_itrace_noisrlock, irq_num, APIC_ITRACE_NOISRLOCK) ;\ lock ; \ - orl $IRQ_BIT(irq_num), PCPU(ipending) ; \ + orl $IRQ_LBIT(irq_num), PCPU(ipending) ; \ movl $TDPRI_CRIT,_reqpri ; \ - testl $IRQ_BIT(irq_num), TD_MACH+MTD_CPL(%ebx) ; \ + testl $IRQ_LBIT(irq_num), TD_CPL(%ebx) ; \ jne 4f ; /* this INT masked */ \ call forward_irq ; /* forward irq to lock holder */ \ POP_FRAME ; /* and return */ \ iret ; \ ALIGN_TEXT ; \ 4: ; /* blocked */ \ - APIC_ITRACE(apic_itrace_masked2, irq_num, APIC_ITRACE_MASKED2) ;\ POP_FRAME ; /* and return */ \ iret @@ -314,6 +321,9 @@ __CONCAT(Xresume,irq_num): ; \ * 8259 PIC for missing INTs. See the APIC documentation for details. * This routine should NOT do an 'EOI' cycle. */ + +#endif + .text SUPERALIGN_TEXT .globl Xspuriousint @@ -329,8 +339,8 @@ Xspuriousint: */ .text SUPERALIGN_TEXT - .globl _Xinvltlb -_Xinvltlb: + .globl Xinvltlb +Xinvltlb: pushl %eax #ifdef COUNT_XINVLTLB_HITS @@ -353,6 +363,7 @@ _Xinvltlb: iret +#if 0 #ifdef BETTER_CLOCK /* @@ -413,13 +424,14 @@ Xcpucheckstate: iret #endif /* BETTER_CLOCK */ +#endif /* * Executed by a CPU when it receives an Xcpuast IPI from another CPU, * * - Signals its receipt by clearing bit cpuid in checkstate_need_ast. - * - * - We need a better method of triggering asts on other cpus. + * - MP safe in regards to setting AST_PENDING because doreti is in + * a cli mode when it checks. */ .text @@ -427,11 +439,6 @@ Xcpucheckstate: .globl Xcpuast Xcpuast: PUSH_FRAME - movl $KDSEL, %eax - mov %ax, %ds /* use KERNEL data segment */ - mov %ax, %es - movl $KPSEL, %eax - mov %ax, %fs movl PCPU(cpuid), %eax lock /* checkstate_need_ast &= ~(1< @@ -74,11 +74,13 @@ apic_initialize(void) /* set the Task Priority Register as needed */ temp = lapic.tpr; temp &= ~APIC_TPR_PRIO; /* clear priority field */ -#ifdef GRAB_LOPRIO - /* Leave the BSP at TPR 0 during boot to make sure it gets interrupts */ + + /* + * Leave the BSP and TPR 0 during boot so it gets all the interrupts, + * set APs at TPR 0xF0 at boot so they get no ints. + */ if (mycpu->gd_cpuid != 0) - temp |= LOPRIO_LEVEL; /* allow INT arbitration */ -#endif + temp |= TPR_IPI_ONLY; /* disable INTs on this cpu */ lapic.tpr = temp; /* enable the local APIC */ @@ -188,7 +190,6 @@ io_apic_setup_intpin(int apic, int pin) u_int32_t target; /* the window register is 32 bits */ u_int32_t vector; /* the window register is 32 bits */ int level; - u_int eflags; target = IOART_DEST; @@ -209,14 +210,11 @@ io_apic_setup_intpin(int apic, int pin) * shouldn't and stop the carnage. */ vector = NRSVIDT + pin; /* IDT vec */ - eflags = read_eflags(); - __asm __volatile("cli" : : : "memory"); - s_lock(&imen_lock); + imen_lock(); io_apic_write(apic, select, (io_apic_read(apic, select) & ~IOART_INTMASK & ~0xff)|IOART_INTMSET|vector); - s_unlock(&imen_lock); - write_eflags(eflags); + imen_unlock(); /* we only deal with vectored INTs here */ if (apic_int_type(apic, pin) != 0) @@ -260,13 +258,10 @@ io_apic_setup_intpin(int apic, int pin) printf("IOAPIC #%d intpin %d -> irq %d\n", apic, pin, irq); vector = NRSVIDT + irq; /* IDT vec */ - eflags = read_eflags(); - __asm __volatile("cli" : : : "memory"); - s_lock(&imen_lock); + imen_lock(); io_apic_write(apic, select, flags | vector); io_apic_write(apic, select + 1, target); - s_unlock(&imen_lock); - write_eflags(eflags); + imen_unlock(); } int diff --git a/sys/i386/i386/autoconf.c b/sys/i386/i386/autoconf.c index 512cb2084c..78f5a8a20a 100644 --- a/sys/i386/i386/autoconf.c +++ b/sys/i386/i386/autoconf.c @@ -35,7 +35,7 @@ * * from: @(#)autoconf.c 7.1 (Berkeley) 5/9/91 * $FreeBSD: src/sys/i386/i386/autoconf.c,v 1.146.2.2 2001/06/07 06:05:58 dd Exp $ - * $DragonFly: src/sys/i386/i386/Attic/autoconf.c,v 1.4 2003/06/28 04:16:02 dillon Exp $ + * $DragonFly: src/sys/i386/i386/Attic/autoconf.c,v 1.5 2003/07/06 21:23:48 dillon Exp $ */ /* @@ -144,9 +144,9 @@ configure(dummy) */ #ifdef APIC_IO bsp_apic_configure(); - enable_intr(); + cpu_enable_intr(); #else - enable_intr(); + cpu_enable_intr(); INTREN(IRQ_SLAVE); #endif /* APIC_IO */ diff --git a/sys/i386/i386/db_interface.c b/sys/i386/i386/db_interface.c index 7021f1eb45..ce1c478998 100644 --- a/sys/i386/i386/db_interface.c +++ b/sys/i386/i386/db_interface.c @@ -24,7 +24,7 @@ * rights to redistribute these changes. * * $FreeBSD: src/sys/i386/i386/db_interface.c,v 1.48.2.1 2000/07/07 00:38:46 obrien Exp $ - * $DragonFly: src/sys/i386/i386/Attic/db_interface.c,v 1.3 2003/07/04 00:32:24 dillon Exp $ + * $DragonFly: src/sys/i386/i386/Attic/db_interface.c,v 1.4 2003/07/06 21:23:48 dillon Exp $ */ /* @@ -64,6 +64,8 @@ static int db_global_jmpbuf_valid; #define rss() ({u_short ss; __asm __volatile("mov %%ss,%0" : "=r" (ss)); ss;}) #endif +#define VERBOSE_CPUSTOP_ON_DDBBREAK + /* * kdb_trap - field a TRACE or BPT trap */ @@ -139,7 +141,8 @@ kdb_trap(type, code, regs) #ifdef CPUSTOP_ON_DDBBREAK #if defined(VERBOSE_CPUSTOP_ON_DDBBREAK) - db_printf("\nCPU%d stopping CPUs: 0x%08x\n", cpuid, other_cpus); + db_printf("\nCPU%d stopping CPUs: 0x%08x\n", + mycpu->gd_cpuid, mycpu->gd_other_cpus); #endif /* VERBOSE_CPUSTOP_ON_DDBBREAK */ /* We stop all CPUs except ourselves (obviously) */ @@ -168,7 +171,8 @@ kdb_trap(type, code, regs) #ifdef CPUSTOP_ON_DDBBREAK #if defined(VERBOSE_CPUSTOP_ON_DDBBREAK) - db_printf("\nCPU%d restarting CPUs: 0x%08x\n", cpuid, stopped_cpus); + db_printf("\nCPU%d restarting CPUs: 0x%08x\n", + mycpu->gd_cpuid, stopped_cpus); #endif /* VERBOSE_CPUSTOP_ON_DDBBREAK */ /* Restart all the CPUs we previously stopped */ diff --git a/sys/i386/i386/exception.s b/sys/i386/i386/exception.s index 8d41ad77ca..03b2a1e0e5 100644 --- a/sys/i386/i386/exception.s +++ b/sys/i386/i386/exception.s @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/i386/exception.s,v 1.65.2.3 2001/08/15 01:23:49 peter Exp $ - * $DragonFly: src/sys/i386/i386/Attic/exception.s,v 1.10 2003/07/03 17:24:01 dillon Exp $ + * $DragonFly: src/sys/i386/i386/Attic/exception.s,v 1.11 2003/07/06 21:23:48 dillon Exp $ */ #include "npx.h" @@ -194,8 +194,7 @@ IDTVEC(xmm) * Note that int0x80_syscall is a trap gate. Only page faults * use an interrupt gate. * - * Note that all calls to MP_LOCK must occur with interrupts enabled - * in order to be able to take IPI's while waiting for the lock. + * Note that we are MP through to the call to trap(). */ SUPERALIGN_TEXT @@ -216,7 +215,6 @@ alltraps_with_regs_pushed: calltrap: FAKE_MCOUNT(btrap) /* init "from" _btrap -> calltrap */ incl PCPU(cnt)+V_TRAP /* YYY per-cpu */ - MP_LOCK movl PCPU(curthread),%eax /* keep orig cpl here during call */ movl TD_CPL(%eax),%ebx call trap @@ -268,9 +266,6 @@ IDTVEC(syscall) cli /* atomic astpending access */ cmpl $0,PCPU(astpending) je doreti_syscall_ret -#ifdef SMP - MP_LOCK -#endif pushl $0 /* cpl to restore */ movl $1,PCPU(intr_nesting_level) jmp doreti @@ -305,9 +300,6 @@ IDTVEC(int0x80_syscall) cli /* atomic astpending access */ cmpl $0,PCPU(astpending) je doreti_syscall_ret -#ifdef SMP - MP_LOCK -#endif pushl $0 /* cpl to restore */ movl $1,PCPU(intr_nesting_level) jmp doreti @@ -318,6 +310,9 @@ IDTVEC(int0x80_syscall) * cpu_heavy_restore from being interrupted (especially since it stores * its context in a static place!), so the first thing we do is release * the critical section. + * + * The MP lock is held on entry, but for processes fork_return (esi) + * releases it. 'doreti' always runs without the MP lock. */ ENTRY(fork_trampoline) movl PCPU(curthread),%eax diff --git a/sys/i386/i386/genassym.c b/sys/i386/i386/genassym.c index 3aec802898..1bda9909bf 100644 --- a/sys/i386/i386/genassym.c +++ b/sys/i386/i386/genassym.c @@ -35,7 +35,7 @@ * * from: @(#)genassym.c 5.11 (Berkeley) 5/10/91 * $FreeBSD: src/sys/i386/i386/genassym.c,v 1.86.2.3 2002/03/03 05:42:49 nyan Exp $ - * $DragonFly: src/sys/i386/i386/Attic/genassym.c,v 1.20 2003/07/04 00:32:24 dillon Exp $ + * $DragonFly: src/sys/i386/i386/Attic/genassym.c,v 1.21 2003/07/06 21:23:48 dillon Exp $ */ #include "opt_user_ldt.h" @@ -50,6 +50,7 @@ #include #include #include +#include #include #include #include @@ -87,8 +88,14 @@ ASSYM(TD_SP, offsetof(struct thread, td_sp)); ASSYM(TD_PRI, offsetof(struct thread, td_pri)); ASSYM(TD_MACH, offsetof(struct thread, td_mach)); ASSYM(TD_WCHAN, offsetof(struct thread, td_wchan)); +#ifdef SMP +ASSYM(TD_MPCOUNT, offsetof(struct thread, td_mpcount)); +#endif ASSYM(TD_FLAGS, offsetof(struct thread, td_flags)); ASSYM(TDF_EXITED, TDF_EXITED); +#ifdef SMP +ASSYM(MP_FREE_LOCK, MP_FREE_LOCK); +#endif ASSYM(RW_OWNER, offsetof(struct lwkt_rwlock, rw_owner)); @@ -101,6 +108,8 @@ ASSYM(SRUN, SRUN); ASSYM(V_TRAP, offsetof(struct vmmeter, v_trap)); ASSYM(V_SYSCALL, offsetof(struct vmmeter, v_syscall)); ASSYM(V_INTR, offsetof(struct vmmeter, v_intr)); +ASSYM(V_FORWARDED_HITS, offsetof(struct vmmeter, v_forwarded_hits)); +ASSYM(V_FORWARDED_MISSES, offsetof(struct vmmeter, v_forwarded_misses)); ASSYM(UPAGES, UPAGES); ASSYM(PAGE_SIZE, PAGE_SIZE); ASSYM(NPTEPG, NPTEPG); diff --git a/sys/i386/i386/i686_mem.c b/sys/i386/i386/i686_mem.c index d49c97249a..72c71ff6ed 100644 --- a/sys/i386/i386/i686_mem.c +++ b/sys/i386/i386/i686_mem.c @@ -24,7 +24,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/i386/i686_mem.c,v 1.8.2.4 2002/09/24 08:12:51 mdodd Exp $ - * $DragonFly: src/sys/i386/i386/Attic/i686_mem.c,v 1.2 2003/06/17 04:28:35 dillon Exp $ + * $DragonFly: src/sys/i386/i386/Attic/i686_mem.c,v 1.3 2003/07/06 21:23:48 dillon Exp $ */ #include @@ -39,6 +39,7 @@ #ifdef SMP #include #endif +#include /* * i686 memory range operations @@ -268,9 +269,9 @@ i686_mrstore(struct mem_range_softc *sc) */ smp_rendezvous(NULL, i686_mrstoreone, NULL, (void *)sc); #else - disable_intr(); /* disable interrupts */ + mpintr_lock(); /* doesn't have to be mpintr YYY */ i686_mrstoreone((void *)sc); - enable_intr(); + mpintr_unlock(); #endif } diff --git a/sys/i386/i386/identcpu.c b/sys/i386/i386/identcpu.c index 5e764b1551..27d62c5fda 100644 --- a/sys/i386/i386/identcpu.c +++ b/sys/i386/i386/identcpu.c @@ -39,7 +39,7 @@ * * from: Id: machdep.c,v 1.193 1996/06/18 01:22:04 bde Exp * $FreeBSD: src/sys/i386/i386/identcpu.c,v 1.80.2.15 2003/04/11 17:06:41 jhb Exp $ - * $DragonFly: src/sys/i386/i386/Attic/identcpu.c,v 1.2 2003/06/17 04:28:35 dillon Exp $ + * $DragonFly: src/sys/i386/i386/Attic/identcpu.c,v 1.3 2003/07/06 21:23:48 dillon Exp $ */ #include "opt_cpu.h" @@ -813,12 +813,10 @@ identblue(void) static void identifycyrix(void) { - u_int eflags; int ccr2_test = 0, dir_test = 0; u_char ccr2, ccr3; - eflags = read_eflags(); - disable_intr(); + mpintr_lock(); ccr2 = read_cyrix_reg(CCR2); write_cyrix_reg(CCR2, ccr2 ^ CCR2_LOCK_NW); @@ -843,7 +841,7 @@ identifycyrix(void) else cyrix_did = 0x00ff; /* Old 486SLC/DLC and TI486SXLC/SXL */ - write_eflags(eflags); + mpintr_unlock(); } /* @@ -1097,12 +1095,10 @@ u_int32_t longrun_modes[LONGRUN_MODE_MAX][3] = { static u_int tmx86_get_longrun_mode(void) { - u_long eflags; union msrinfo msrinfo; u_int low, high, flags, mode; - eflags = read_eflags(); - disable_intr(); + mpintr_lock(); msrinfo.msr = rdmsr(MSR_TMx86_LONGRUN); low = LONGRUN_MODE_MASK(msrinfo.regs[0]); @@ -1118,40 +1114,36 @@ tmx86_get_longrun_mode(void) } mode = LONGRUN_MODE_UNKNOWN; out: - write_eflags(eflags); + mpintr_unlock(); return (mode); } static u_int tmx86_get_longrun_status(u_int * frequency, u_int * voltage, u_int * percentage) { - u_long eflags; u_int regs[4]; - eflags = read_eflags(); - disable_intr(); + mpintr_lock(); do_cpuid(0x80860007, regs); *frequency = regs[0]; *voltage = regs[1]; *percentage = regs[2]; - write_eflags(eflags); + mpintr_unlock(); return (1); } static u_int tmx86_set_longrun_mode(u_int mode) { - u_long eflags; union msrinfo msrinfo; if (mode >= LONGRUN_MODE_UNKNOWN) { return (0); } - eflags = read_eflags(); - disable_intr(); + mpintr_lock(); /* Write LongRun mode values to Model Specific Register. */ msrinfo.msr = rdmsr(MSR_TMx86_LONGRUN); @@ -1166,7 +1158,7 @@ tmx86_set_longrun_mode(u_int mode) msrinfo.regs[0] = (msrinfo.regs[0] & ~0x01) | longrun_modes[mode][2]; wrmsr(MSR_TMx86_LONGRUN_FLAGS, msrinfo.msr); - write_eflags(eflags); + mpintr_unlock(); return (1); } diff --git a/sys/i386/i386/initcpu.c b/sys/i386/i386/initcpu.c index 49e696769a..dddbd08c43 100644 --- a/sys/i386/i386/initcpu.c +++ b/sys/i386/i386/initcpu.c @@ -27,7 +27,7 @@ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD: src/sys/i386/i386/initcpu.c,v 1.19.2.9 2003/04/05 13:47:19 dwmalone Exp $ - * $DragonFly: src/sys/i386/i386/Attic/initcpu.c,v 1.2 2003/06/17 04:28:35 dillon Exp $ + * $DragonFly: src/sys/i386/i386/Attic/initcpu.c,v 1.3 2003/07/06 21:23:48 dillon Exp $ */ #include "opt_cpu.h" @@ -87,7 +87,7 @@ init_bluelightning(void) #endif eflags = read_eflags(); - disable_intr(); + cpu_disable_intr(); load_cr0(rcr0() | CR0_CD | CR0_NW); invd(); @@ -121,7 +121,7 @@ init_486dlc(void) u_char ccr0; eflags = read_eflags(); - disable_intr(); + cpu_disable_intr(); invd(); ccr0 = read_cyrix_reg(CCR0); @@ -167,7 +167,7 @@ init_cy486dx(void) u_char ccr2; eflags = read_eflags(); - disable_intr(); + cpu_disable_intr(); invd(); ccr2 = read_cyrix_reg(CCR2); @@ -198,7 +198,7 @@ init_5x86(void) u_char ccr2, ccr3, ccr4, pcr0; eflags = read_eflags(); - disable_intr(); + cpu_disable_intr(); load_cr0(rcr0() | CR0_CD | CR0_NW); wbinvd(); @@ -302,7 +302,7 @@ init_i486_on_386(void) #endif eflags = read_eflags(); - disable_intr(); + cpu_disable_intr(); load_cr0(rcr0() & ~(CR0_CD | CR0_NW)); /* CD = 0, NW = 0 */ @@ -322,7 +322,7 @@ init_6x86(void) u_char ccr3, ccr4; eflags = read_eflags(); - disable_intr(); + cpu_disable_intr(); load_cr0(rcr0() | CR0_CD | CR0_NW); wbinvd(); @@ -403,7 +403,7 @@ init_6x86MX(void) u_char ccr3, ccr4; eflags = read_eflags(); - disable_intr(); + cpu_disable_intr(); load_cr0(rcr0() | CR0_CD | CR0_NW); wbinvd(); @@ -483,7 +483,7 @@ init_mendocino(void) u_int64_t bbl_cr_ctl3; eflags = read_eflags(); - disable_intr(); + cpu_disable_intr(); load_cr0(rcr0() | CR0_CD | CR0_NW); wbinvd(); @@ -657,7 +657,7 @@ enable_K5_wt_alloc(void) * a stepping of 4 or greater. */ if (((cpu_id & 0xf0) > 0) && ((cpu_id & 0x0f) > 3)) { - disable_intr(); + cpu_disable_intr(); msr = rdmsr(0x83); /* HWCR */ wrmsr(0x83, msr & !(0x10)); @@ -701,7 +701,7 @@ enable_K6_wt_alloc(void) u_long eflags; eflags = read_eflags(); - disable_intr(); + cpu_disable_intr(); wbinvd(); #ifdef CPU_DISABLE_CACHE @@ -763,7 +763,7 @@ enable_K6_2_wt_alloc(void) u_long eflags; eflags = read_eflags(); - disable_intr(); + cpu_disable_intr(); wbinvd(); #ifdef CPU_DISABLE_CACHE @@ -832,7 +832,7 @@ DB_SHOW_COMMAND(cyrixreg, cyrixreg) cr0 = rcr0(); if (strcmp(cpu_vendor,"CyrixInstead") == 0) { eflags = read_eflags(); - disable_intr(); + cpu_disable_intr(); if ((cpu != CPU_M1SC) && (cpu != CPU_CY486DX)) { diff --git a/sys/i386/i386/k6_mem.c b/sys/i386/i386/k6_mem.c index 98d5f75b11..84efb8b476 100644 --- a/sys/i386/i386/k6_mem.c +++ b/sys/i386/i386/k6_mem.c @@ -24,7 +24,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/i386/k6_mem.c,v 1.4.2.2 2002/09/16 21:58:41 dwmalone Exp $ - * $DragonFly: src/sys/i386/i386/Attic/k6_mem.c,v 1.2 2003/06/17 04:28:35 dillon Exp $ + * $DragonFly: src/sys/i386/i386/Attic/k6_mem.c,v 1.3 2003/07/06 21:23:48 dillon Exp $ * */ @@ -37,6 +37,7 @@ #include #include +#include /* * A K6-2 MTRR is defined as the highest 15 bits having the address, the next @@ -167,14 +168,14 @@ k6_mrset(struct mem_range_softc *sc, struct mem_range_desc *desc, int *arg) { out: - disable_intr(); + mpintr_lock(); wbinvd(); reg = rdmsr(UWCCR); reg &= ~(0xffffffff << (32 * d)); reg |= mtrr << (32 * d); wrmsr(UWCCR, reg); wbinvd(); - enable_intr(); + mpintr_unlock(); return 0; } diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c index c77d7b58fe..ceacbc8de5 100644 --- a/sys/i386/i386/machdep.c +++ b/sys/i386/i386/machdep.c @@ -36,7 +36,7 @@ * * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 * $FreeBSD: src/sys/i386/i386/machdep.c,v 1.385.2.30 2003/05/31 08:48:05 alc Exp $ - * $DragonFly: src/sys/i386/i386/Attic/machdep.c,v 1.21 2003/07/03 18:19:51 dillon Exp $ + * $DragonFly: src/sys/i386/i386/Attic/machdep.c,v 1.22 2003/07/06 21:23:48 dillon Exp $ */ #include "apm.h" @@ -135,6 +135,7 @@ static void fill_fpregs_xmm __P((struct savexmm *, struct save87 *)); #ifdef DIRECTIO extern void ffs_rawread_setup(void); #endif /* DIRECTIO */ +static void init_locks(void); SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL) @@ -950,7 +951,7 @@ cpu_halt(void) * Note on cpu_idle_hlt: On an SMP system this may cause the system to * halt until the next clock tick, even if a thread is ready YYY */ -static int cpu_idle_hlt = 1; +static int cpu_idle_hlt = 0; SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_hlt, CTLFLAG_RW, &cpu_idle_hlt, 0, "Idle loop HLT enable"); @@ -1829,6 +1830,7 @@ init386(int first) * Prevent lowering of the ipl if we call tsleep() early. */ gd = &CPU_prvspace[0].mdglobaldata; + bzero(gd, sizeof(*gd)); gd->mi.gd_curthread = &thread0; @@ -1915,6 +1917,8 @@ init386(int first) #ifdef USER_LDT gd->gd_currentldt = _default_ldt; #endif + /* spinlocks and the BGL */ + init_locks(); /* exceptions */ for (x = 0; x < NIDT; x++) @@ -2633,3 +2637,66 @@ outb(u_int port, u_char data) } #endif /* DDB */ + + + +#include "opt_cpu.h" +#include "opt_htt.h" +#include "opt_user_ldt.h" + + +/* + * initialize all the SMP locks + */ + +/* critical region around IO APIC, apic_imen */ +struct spinlock imen_spinlock; + +/* Make FAST_INTR() routines sequential */ +struct spinlock fast_intr_spinlock; + +/* critical region for old style disable_intr/enable_intr */ +struct spinlock mpintr_spinlock; + +/* critical region around INTR() routines */ +struct spinlock intr_spinlock; + +/* lock region used by kernel profiling */ +struct spinlock mcount_spinlock; + +/* locks com (tty) data/hardware accesses: a FASTINTR() */ +struct spinlock com_spinlock; + +/* locks kernel printfs */ +struct spinlock cons_spinlock; + +/* lock regions around the clock hardware */ +struct spinlock clock_spinlock; + +/* lock around the MP rendezvous */ +struct spinlock smp_rv_spinlock; + +static void +init_locks(void) +{ + /* + * mp_lock = 0; BSP already owns the MP lock + */ + /* + * Get the initial mp_lock with a count of 1 for the BSP. + * This uses a LOGICAL cpu ID, ie BSP == 0. + */ +#ifdef SMP + cpu_get_initial_mplock(); +#endif + spin_lock_init(&mcount_spinlock); + spin_lock_init(&fast_intr_spinlock); + spin_lock_init(&intr_spinlock); + spin_lock_init(&mpintr_spinlock); + spin_lock_init(&imen_spinlock); + spin_lock_init(&smp_rv_spinlock); + spin_lock_init(&com_spinlock); + spin_lock_init(&clock_spinlock); + spin_lock_init(&cons_spinlock); +} + diff --git a/sys/i386/i386/mp_machdep.c b/sys/i386/i386/mp_machdep.c index 62124654ad..f60a00a3a7 100644 --- a/sys/i386/i386/mp_machdep.c +++ b/sys/i386/i386/mp_machdep.c @@ -23,7 +23,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/i386/mp_machdep.c,v 1.115.2.15 2003/03/14 21:22:35 jhb Exp $ - * $DragonFly: src/sys/i386/i386/Attic/mp_machdep.c,v 1.8 2003/06/28 04:16:02 dillon Exp $ + * $DragonFly: src/sys/i386/i386/Attic/mp_machdep.c,v 1.9 2003/07/06 21:23:48 dillon Exp $ */ #include "opt_cpu.h" @@ -52,14 +52,12 @@ #include #include #include -#ifdef BETTER_CLOCK #include #include #include #ifdef GPROF #include #endif -#endif #include #include @@ -187,8 +185,8 @@ typedef struct BASETABLE_ENTRY { * it follows the very early stages of AP boot by placing values in CMOS ram. * it NORMALLY will never be needed and thus the primitive method for enabling. * -#define CHECK_POINTS */ +#define CHECK_POINTS #if defined(CHECK_POINTS) && !defined(PC98) #define CHECK_READ(A) (outb(CMOS_REG, (A)), inb(CMOS_DATA)) @@ -259,27 +257,7 @@ extern int nkpt; u_int32_t cpu_apic_versions[MAXCPU]; u_int32_t *io_apic_versions; -#ifdef APIC_INTR_DIAGNOSTIC -int apic_itrace_enter[32]; -int apic_itrace_tryisrlock[32]; -int apic_itrace_gotisrlock[32]; -int apic_itrace_active[32]; -int apic_itrace_masked[32]; -int apic_itrace_noisrlock[32]; -int apic_itrace_masked2[32]; -int apic_itrace_unmask[32]; -int apic_itrace_noforward[32]; -int apic_itrace_leave[32]; -int apic_itrace_enter2[32]; -int apic_itrace_doreti[32]; -int apic_itrace_splz[32]; -int apic_itrace_eoi[32]; -#ifdef APIC_INTR_DIAGNOSTIC_IRQ -unsigned short apic_itrace_debugbuffer[32768]; -int apic_itrace_debugbuffer_idx; -struct simplelock apic_itrace_debuglock; -#endif -#endif +struct apic_intmapinfo int_to_apicintpin[APIC_INTMAPSIZE]; #ifdef APIC_INTR_REORDER struct { @@ -288,7 +266,6 @@ struct { } apic_isrbit_location[32]; #endif -struct apic_intmapinfo int_to_apicintpin[APIC_INTMAPSIZE]; /* * APIC ID logical/physical mapping structures. @@ -337,7 +314,6 @@ static int mptable_pass2(void); static void default_mp_table(int type); static void fix_mp_table(void); static void setup_apic_irq_mapping(void); -static void init_locks(void); static int start_all_aps(u_int boot_addr); static void install_ap_tramp(u_int boot_addr); static int start_ap(int logicalCpu, u_int boot_addr); @@ -463,11 +439,12 @@ init_secondary(void) int gsel_tss; int x, myid = bootAP; u_int cr0; + struct mdglobaldata *md; gdt_segs[GPRIV_SEL].ssd_base = (int) &CPU_prvspace[myid]; gdt_segs[GPROC0_SEL].ssd_base = - (int) &CPU_prvspace[myid].globaldata.gd_common_tss; - CPU_prvspace[myid].globaldata.gd_prvspace = &CPU_prvspace[myid]; + (int) &CPU_prvspace[myid].mdglobaldata.gd_common_tss; + CPU_prvspace[myid].mdglobaldata.mi.gd_prvspace = &CPU_prvspace[myid]; for (x = 0; x < NGDT; x++) { ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd); @@ -486,11 +463,14 @@ init_secondary(void) gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS; - common_tss.tss_esp0 = 0; /* not used until after switch */ - common_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL); - common_tss.tss_ioopt = (sizeof common_tss) << 16; - tss_gdt = &gdt[myid * NGDT + GPROC0_SEL].sd; - common_tssd = *tss_gdt; + + md = mdcpu; + + md->gd_common_tss.tss_esp0 = 0; /* not used until after switch */ + md->gd_common_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL); + md->gd_common_tss.tss_ioopt = (sizeof md->gd_common_tss) << 16; + md->gd_tss_gdt = &gdt[myid * NGDT + GPROC0_SEL].sd; + md->gd_common_tssd = *md->gd_tss_gdt; ltr(gsel_tss); /* @@ -575,9 +555,6 @@ mp_enable(u_int boot_addr) if (x) default_mp_table(x); - /* initialize all SMP locks */ - init_locks(); - /* post scan cleanup */ fix_mp_table(); setup_apic_irq_mapping(); @@ -604,10 +581,12 @@ mp_enable(u_int boot_addr) setidt(XINVLTLB_OFFSET, Xinvltlb, SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); +#if 0 #ifdef BETTER_CLOCK /* install an inter-CPU IPI for reading processor state */ setidt(XCPUCHECKSTATE_OFFSET, Xcpucheckstate, SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); +#endif #endif /* install an inter-CPU IPI for all-CPU rendezvous */ @@ -745,7 +724,7 @@ static int lookup_bus_type __P((char *name)); * 1st pass on motherboard's Intel MP specification table. * * initializes: - * mp_ncpus = 1 + * ncpus = 1 * * determines: * cpu_apic_address (common to all CPUs) @@ -862,7 +841,7 @@ mptable_pass1(void) * Count the BSP. * This is also used as a counter while starting the APs. */ - mp_ncpus = 1; + ncpus = 1; --mp_naps; /* subtract the BSP */ } @@ -1998,82 +1977,6 @@ default_mp_table(int type) #endif /* APIC_IO */ } - -/* - * initialize all the SMP locks - */ - -/* critical region around IO APIC, apic_imen */ -struct simplelock imen_lock; - -/* critical region around splxx(), cpl, cml, cil, ipending */ -struct simplelock cpl_lock; - -/* Make FAST_INTR() routines sequential */ -struct simplelock fast_intr_lock; - -/* critical region around INTR() routines */ -struct simplelock intr_lock; - -/* lock regions protected in UP kernel via cli/sti */ -struct simplelock mpintr_lock; - -/* lock region used by kernel profiling */ -struct simplelock mcount_lock; - -#ifdef USE_COMLOCK -/* locks com (tty) data/hardware accesses: a FASTINTR() */ -struct simplelock com_lock; -#endif /* USE_COMLOCK */ - -#ifdef USE_CLOCKLOCK -/* lock regions around the clock hardware */ -struct simplelock clock_lock; -#endif /* USE_CLOCKLOCK */ - -/* lock around the MP rendezvous */ -static struct simplelock smp_rv_lock; - -static void -init_locks(void) -{ - /* - * Get the initial mp_lock with a count of 1 for the BSP. - * This uses a LOGICAL cpu ID, ie BSP == 0. - */ - mp_lock = 0x00000001; - -#if 0 - /* ISR uses its own "giant lock" */ - isr_lock = FREE_LOCK; -#endif - -#if defined(APIC_INTR_DIAGNOSTIC) && defined(APIC_INTR_DIAGNOSTIC_IRQ) - s_lock_init((struct simplelock*)&apic_itrace_debuglock); -#endif - - s_lock_init((struct simplelock*)&mpintr_lock); - - s_lock_init((struct simplelock*)&mcount_lock); - - s_lock_init((struct simplelock*)&fast_intr_lock); - s_lock_init((struct simplelock*)&intr_lock); - s_lock_init((struct simplelock*)&imen_lock); - s_lock_init((struct simplelock*)&cpl_lock); - s_lock_init(&smp_rv_lock); - -#ifdef USE_COMLOCK - s_lock_init((struct simplelock*)&com_lock); -#endif /* USE_COMLOCK */ -#ifdef USE_CLOCKLOCK - s_lock_init((struct simplelock*)&clock_lock); -#endif /* USE_CLOCKLOCK */ -} - - -/* Wait for all APs to be fully initialized */ -extern int wait_ap(unsigned int); - /* * start each AP in our list */ @@ -2083,7 +1986,7 @@ start_all_aps(u_int boot_addr) int x, i, pg; u_char mpbiosreason; u_long mpbioswarmvec; - struct globaldata *gd; + struct mdglobaldata *gd; char *stack; uintptr_t kptbase; @@ -2124,24 +2027,29 @@ start_all_aps(u_int boot_addr) pg = x * i386_btop(sizeof(struct privatespace)); /* allocate a new private data page */ - gd = (struct globaldata *)kmem_alloc(kernel_map, PAGE_SIZE); + gd = (struct mdglobaldata *)kmem_alloc(kernel_map, PAGE_SIZE); /* wire it into the private page table page */ SMPpt[pg] = (pt_entry_t)(PG_V | PG_RW | vtophys(gd)); /* allocate and set up an idle stack data page */ stack = (char *)kmem_alloc(kernel_map, UPAGES*PAGE_SIZE); - for (i = 0; i < UPAGES; i++) + for (i = 0; i < UPAGES; i++) { SMPpt[pg + 5 + i] = (pt_entry_t) (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack)); + } SMPpt[pg + 1] = 0; /* *gd_CMAP1 */ SMPpt[pg + 2] = 0; /* *gd_CMAP2 */ SMPpt[pg + 3] = 0; /* *gd_CMAP3 */ SMPpt[pg + 4] = 0; /* *gd_PMAP1 */ + gd = &CPU_prvspace[x].mdglobaldata; /* official location */ + bzero(gd, sizeof(*gd)); + gd->mi.gd_prvspace = &CPU_prvspace[x]; + /* prime data page for it to use */ - mi_gdinit(gd, x); + mi_gdinit(&gd->mi, x); cpu_gdinit(gd, x); gd->gd_cpu_lockid = x << 24; gd->gd_CMAP1 = &SMPpt[pg + 1]; @@ -2161,7 +2069,10 @@ start_all_aps(u_int boot_addr) outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */ #endif - bootSTK = &CPU_prvspace[x].idlestack[UPAGES*PAGE_SIZE]; + /* + * Setup the AP boot stack + */ + bootSTK = &CPU_prvspace[x].idlestack[UPAGES*PAGE_SIZE/2]; bootAP = x; /* attempt to start the Application Processor */ @@ -2183,7 +2094,7 @@ start_all_aps(u_int boot_addr) } /* build our map of 'other' CPUs */ - other_cpus = all_cpus & ~(1 << cpuid); + mycpu->gd_other_cpus = all_cpus & ~(1 << mycpu->gd_cpuid); /* fill in our (BSP) APIC version */ cpu_apic_versions[0] = lapic.version; @@ -2196,24 +2107,15 @@ start_all_aps(u_int boot_addr) #endif /* - * Set up the idle context for the BSP. Similar to above except - * that some was done by locore, some by pmap.c and some is implicit - * because the BSP is cpu#0 and the page is initially zero, and also - * because we can refer to variables by name on the BSP.. + * NOTE! The idlestack for the BSP was setup by locore. Finish + * up, clean out the P==V mapping we did earlier. */ - - /* Allocate and setup BSP idle stack */ - stack = (char *)kmem_alloc(kernel_map, UPAGES * PAGE_SIZE); - for (i = 0; i < UPAGES; i++) - SMPpt[5 + i] = (pt_entry_t) - (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack)); - for (x = 0; x < NKPT; x++) PTD[x] = 0; pmap_set_opt(); /* number of APs actually started */ - return mp_ncpus - 1; + return ncpus - 1; } @@ -2301,7 +2203,10 @@ start_ap(int logical_cpu, u_int boot_addr) vector = (boot_addr >> 12) & 0xff; /* used as a watchpoint to signal AP startup */ - cpus = mp_ncpus; + cpus = ncpus; + + /* Make sure the target cpu sees everything */ + wbinvd(); /* * first we do an INIT/RESET IPI this INIT IPI might be run, reseting @@ -2358,12 +2263,12 @@ start_ap(int logical_cpu, u_int boot_addr) /* spin */ ; u_sleep(200); /* wait ~200uS */ - /* wait for it to start */ + /* wait for it to start, see ap_init() */ set_apic_timer(5000000);/* == 5 seconds */ - while (read_apic_timer()) - if (mp_ncpus > cpus) + while (read_apic_timer()) { + if (ncpus > cpus) return 1; /* return SUCCESS */ - + } return 0; /* return FAILURE */ } @@ -2473,29 +2378,42 @@ SYSCTL_INT(_machdep, OID_AUTO, forward_roundrobin_enabled, CTLFLAG_RW, &forward_roundrobin_enabled, 0, ""); /* - * This is called once the rest of the system is up and running and we're - * ready to let the AP's out of the pen. + * This is called once the mpboot code has gotten us properly relocated + * and the MMU turned on, etc. ap_init() is actually the idle thread, + * and when it returns the scheduler will call the real cpu_idle() main + * loop for the idlethread. Interrupts are disabled on entry and should + * remain disabled at return. */ -void ap_init(void); void -ap_init() +ap_init(void) { u_int apic_id; + /* + * Signal the BSP that we have started up successfully by incrementing + * ncpus. Note that we do not hold the BGL yet. The BSP is waiting + * for our signal. + */ + ++ncpus; + + /* + * Get the MP lock so we can finish initializing. + */ + while (cpu_try_mplock() == 0) + ; + /* BSP may have changed PTD while we're waiting for the lock */ cpu_invltlb(); - smp_cpus++; - #if defined(I586_CPU) && !defined(NO_F00F_HACK) lidt(&r_idt); #endif /* Build our map of 'other' CPUs. */ - other_cpus = all_cpus & ~(1 << cpuid); + mycpu->gd_other_cpus = all_cpus & ~(1 << mycpu->gd_cpuid); - printf("SMP: AP CPU #%d Launched!\n", cpuid); + printf("SMP: AP CPU #%d Launched!\n", mycpu->gd_cpuid); /* set up CPU registers and state */ cpu_setregs(); @@ -2508,8 +2426,8 @@ ap_init() /* A quick check from sanity claus */ apic_id = (apic_id_to_logical[(lapic.id & 0x0f000000) >> 24]); - if (cpuid != apic_id) { - printf("SMP: cpuid = %d\n", cpuid); + if (mycpu->gd_cpuid != apic_id) { + printf("SMP: cpuid = %d\n", mycpu->gd_cpuid); printf("SMP: apic_id = %d\n", apic_id); printf("PTD[MPPTDI] = %p\n", (void *)PTD[MPPTDI]); panic("cpuid mismatch! boom!!"); @@ -2522,15 +2440,23 @@ ap_init() mem_range_AP_init(); /* - * Activate smp_invltlb, although strictly speaking, this isn't - * quite correct yet. We should have a bitfield for cpus willing - * to accept TLB flush IPI's or something and sync them. + * Since we have the BGL if smp_cpus matches ncpus then we are + * the last AP to get to this point and we can enable IPI's, + * tlb shootdowns, freezes, and so forth. */ - if (smp_cpus == mp_ncpus) { + ++smp_cpus; + if (smp_cpus == ncpus) { invltlb_ok = 1; smp_started = 1; /* enable IPI's, tlb shootdown, freezes etc */ smp_active = 1; /* historic */ } + + /* + * The idle loop doesn't expect the BGL to be held and while + * lwkt_switch() normally cleans things up this is a special case + * because we returning almost directly into the idle loop. + */ + cpu_rel_mplock(); } #ifdef BETTER_CLOCK @@ -2540,7 +2466,7 @@ ap_init() #define CHECKSTATE_INTR 2 /* Do not staticize. Used from apic_vector.s */ -struct proc* checkstate_curproc[MAXCPU]; +struct thread *checkstate_curtd[MAXCPU]; int checkstate_cpustate[MAXCPU]; u_long checkstate_pc[MAXCPU]; @@ -2548,6 +2474,7 @@ u_long checkstate_pc[MAXCPU]; ((int)(((u_quad_t)((pc) - (prof)->pr_off) * \ (u_quad_t)((prof)->pr_scale)) >> 16) & ~1) +#if 0 static void addupc_intr_forwarded(struct proc *p, int id, int *astmap) { @@ -2567,28 +2494,30 @@ addupc_intr_forwarded(struct proc *p, int id, int *astmap) *astmap |= (1 << id); } } +#endif static void forwarded_statclock(int id, int pscnt, int *astmap) { +#if 0 struct pstats *pstats; long rss; struct rusage *ru; struct vmspace *vm; int cpustate; - struct proc *p; + struct thread *td; #ifdef GPROF register struct gmonparam *g; int i; #endif - p = checkstate_curproc[id]; + t = checkstate_curtd[id]; cpustate = checkstate_cpustate[id]; switch (cpustate) { case CHECKSTATE_USER: - if (p->p_flag & P_PROFIL) - addupc_intr_forwarded(p, id, astmap); + if (td->td_proc && td->td_proc->p_flag & P_PROFIL) + addupc_intr_forwarded(td->td_proc, id, astmap); if (pscnt > 1) return; p->p_uticks++; @@ -2657,6 +2586,7 @@ forwarded_statclock(int id, int pscnt, int *astmap) ru->ru_maxrss = rss; } } +#endif } void @@ -2680,9 +2610,10 @@ forward_statclock(int pscnt) if (!smp_started || !invltlb_ok || cold || panicstr) return; + printf("forward_statclock\n"); /* Step 1: Probe state (user, cpu, interrupt, spinlock, idle ) */ - map = other_cpus & ~stopped_cpus ; + map = mycpu->gd_other_cpus & ~stopped_cpus ; checkstate_probed_cpus = 0; if (map != 0) selected_apic_ipi(map, @@ -2707,8 +2638,8 @@ forward_statclock(int pscnt) */ map = 0; - for (id = 0; id < mp_ncpus; id++) { - if (id == cpuid) + for (id = 0; id < ncpus; id++) { + if (id == mycpu->gd_cpuid) continue; if (((1 << id) & checkstate_probed_cpus) == 0) continue; @@ -2737,8 +2668,10 @@ forward_hardclock(int pscnt) { int map; int id; +#if 0 struct proc *p; struct pstats *pstats; +#endif int i; /* Kludge. We don't yet have separate locks for the interrupts @@ -2757,7 +2690,7 @@ forward_hardclock(int pscnt) /* Step 1: Probe state (user, cpu, interrupt, spinlock, idle) */ - map = other_cpus & ~stopped_cpus ; + map = mycpu->gd_other_cpus & ~stopped_cpus ; checkstate_probed_cpus = 0; if (map != 0) selected_apic_ipi(map, @@ -2783,11 +2716,13 @@ forward_hardclock(int pscnt) */ map = 0; - for (id = 0; id < mp_ncpus; id++) { - if (id == cpuid) + for (id = 0; id < ncpus; id++) { + if (id == mycpu->gd_cpuid) continue; if (((1 << id) & checkstate_probed_cpus) == 0) continue; + printf("forward_hardclock\n"); +#if 0 p = checkstate_curproc[id]; if (p) { pstats = p->p_stats; @@ -2806,6 +2741,7 @@ forward_hardclock(int pscnt) if (stathz == 0) { forwarded_statclock( id, pscnt, &map); } +#endif } if (map != 0) { checkstate_need_ast |= map; @@ -2830,6 +2766,8 @@ forward_hardclock(int pscnt) void forward_signal(struct proc *p) { + /* YYY forward_signal */ +#if 0 int map; int id; int i; @@ -2873,11 +2811,14 @@ forward_signal(struct proc *p) if (id == p->p_oncpu) return; } +#endif } void forward_roundrobin(void) { + /* YYY forward_roundrobin */ +#if 0 u_int map; int i; @@ -2885,8 +2826,8 @@ forward_roundrobin(void) return; if (!forward_roundrobin_enabled) return; - resched_cpus |= other_cpus; - map = other_cpus & ~stopped_cpus ; + resched_cpus |= mycpu->gd_other_cpus; + map = mycpu->gd_other_cpus & ~stopped_cpus ; #if 1 selected_apic_ipi(map, XCPUAST_OFFSET, APIC_DELMODE_FIXED); #else @@ -2904,20 +2845,20 @@ forward_roundrobin(void) break; } } +#endif } - #ifdef APIC_INTR_REORDER /* - * Maintain mapping from softintr vector to isr bit in local apic. + * Maintain mapping from softintr vector to isr bit in local apic. */ void set_lapic_isrloc(int intr, int vector) { if (intr < 0 || intr > 32) - panic("set_apic_isrloc: bad intr argument: %d",intr); + panic("set_apic_isrloc: bad intr argument: %d",intr); if (vector < ICU_OFFSET || vector > 255) - panic("set_apic_isrloc: bad vector argument: %d",vector); + panic("set_apic_isrloc: bad vector argument: %d",vector); apic_isrbit_location[intr].location = &lapic.isr0 + ((vector>>5)<<2); apic_isrbit_location[intr].bit = (1<<(vector & 31)); } @@ -2946,14 +2887,14 @@ smp_rendezvous_action(void) smp_rv_setup_func(smp_rv_func_arg); /* spin on entry rendezvous */ atomic_add_int(&smp_rv_waiters[0], 1); - while (smp_rv_waiters[0] < mp_ncpus) + while (smp_rv_waiters[0] < ncpus) ; /* action function */ if (smp_rv_action_func != NULL) smp_rv_action_func(smp_rv_func_arg); /* spin on exit rendezvous */ atomic_add_int(&smp_rv_waiters[1], 1); - while (smp_rv_waiters[1] < mp_ncpus) + while (smp_rv_waiters[1] < ncpus) ; /* teardown function */ if (smp_rv_teardown_func != NULL) @@ -2966,10 +2907,8 @@ smp_rendezvous(void (* setup_func)(void *), void (* teardown_func)(void *), void *arg) { - u_int efl; - - /* obtain rendezvous lock */ - s_lock(&smp_rv_lock); /* XXX sleep here? NOWAIT flag? */ + /* obtain rendezvous lock. This disables interrupts */ + spin_lock(&smp_rv_spinlock); /* XXX sleep here? NOWAIT flag? */ /* set static function pointers */ smp_rv_setup_func = setup_func; @@ -2979,19 +2918,12 @@ smp_rendezvous(void (* setup_func)(void *), smp_rv_waiters[0] = 0; smp_rv_waiters[1] = 0; - /* disable interrupts on this CPU, save interrupt status */ - efl = read_eflags(); - write_eflags(efl & ~PSL_I); - /* signal other processors, which will enter the IPI with interrupts off */ all_but_self_ipi(XRENDEZVOUS_OFFSET); /* call executor function */ smp_rendezvous_action(); - /* restore interrupt flag */ - write_eflags(efl); - /* release lock */ - s_unlock(&smp_rv_lock); + spin_unlock(&smp_rv_spinlock); } diff --git a/sys/i386/i386/mpapic.c b/sys/i386/i386/mpapic.c index 8e4c7cc79f..f2a7bcc7ed 100644 --- a/sys/i386/i386/mpapic.c +++ b/sys/i386/i386/mpapic.c @@ -23,7 +23,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/i386/mpapic.c,v 1.37.2.7 2003/01/25 02:31:47 peter Exp $ - * $DragonFly: src/sys/i386/i386/Attic/mpapic.c,v 1.3 2003/07/04 00:32:24 dillon Exp $ + * $DragonFly: src/sys/i386/i386/Attic/mpapic.c,v 1.4 2003/07/06 21:23:48 dillon Exp $ */ #include @@ -74,11 +74,13 @@ apic_initialize(void) /* set the Task Priority Register as needed */ temp = lapic.tpr; temp &= ~APIC_TPR_PRIO; /* clear priority field */ -#ifdef GRAB_LOPRIO - /* Leave the BSP at TPR 0 during boot to make sure it gets interrupts */ + + /* + * Leave the BSP and TPR 0 during boot so it gets all the interrupts, + * set APs at TPR 0xF0 at boot so they get no ints. + */ if (mycpu->gd_cpuid != 0) - temp |= LOPRIO_LEVEL; /* allow INT arbitration */ -#endif + temp |= TPR_IPI_ONLY; /* disable INTs on this cpu */ lapic.tpr = temp; /* enable the local APIC */ @@ -188,7 +190,6 @@ io_apic_setup_intpin(int apic, int pin) u_int32_t target; /* the window register is 32 bits */ u_int32_t vector; /* the window register is 32 bits */ int level; - u_int eflags; target = IOART_DEST; @@ -209,14 +210,11 @@ io_apic_setup_intpin(int apic, int pin) * shouldn't and stop the carnage. */ vector = NRSVIDT + pin; /* IDT vec */ - eflags = read_eflags(); - __asm __volatile("cli" : : : "memory"); - s_lock(&imen_lock); + imen_lock(); io_apic_write(apic, select, (io_apic_read(apic, select) & ~IOART_INTMASK & ~0xff)|IOART_INTMSET|vector); - s_unlock(&imen_lock); - write_eflags(eflags); + imen_unlock(); /* we only deal with vectored INTs here */ if (apic_int_type(apic, pin) != 0) @@ -260,13 +258,10 @@ io_apic_setup_intpin(int apic, int pin) printf("IOAPIC #%d intpin %d -> irq %d\n", apic, pin, irq); vector = NRSVIDT + irq; /* IDT vec */ - eflags = read_eflags(); - __asm __volatile("cli" : : : "memory"); - s_lock(&imen_lock); + imen_lock(); io_apic_write(apic, select, flags | vector); io_apic_write(apic, select + 1, target); - s_unlock(&imen_lock); - write_eflags(eflags); + imen_unlock(); } int diff --git a/sys/i386/i386/mpboot.s b/sys/i386/i386/mpboot.s index 7797a77a0c..8c581ad13a 100644 --- a/sys/i386/i386/mpboot.s +++ b/sys/i386/i386/mpboot.s @@ -32,7 +32,7 @@ * multiprocessor systems. * * $FreeBSD: src/sys/i386/i386/mpboot.s,v 1.13.2.3 2000/09/07 01:18:26 tegge Exp $ - * $DragonFly: src/sys/i386/i386/Attic/mpboot.s,v 1.3 2003/07/01 20:30:40 dillon Exp $ + * $DragonFly: src/sys/i386/i386/Attic/mpboot.s,v 1.4 2003/07/06 21:23:48 dillon Exp $ */ #include /* miscellaneous asm macros */ @@ -46,18 +46,18 @@ * it follows the very early stages of AP boot by placing values in CMOS ram. * it NORMALLY will never be needed and thus the primitive method for enabling. * -#define CHECK_POINTS */ +#define CHECK_POINTS #if defined(CHECK_POINTS) && !defined(PC98) #define CMOS_REG (0x70) #define CMOS_DATA (0x71) #define CHECKPOINT(A,D) \ - movb $(A),%al ; \ + movb $A,%al ; \ outb %al,$CMOS_REG ; \ - movb $(D),%al ; \ + movb D,%al ; \ outb %al,$CMOS_DATA #else @@ -68,30 +68,31 @@ /* - * the APs enter here from their trampoline code (bootMP, below) + * The APs enter here from their trampoline code (bootMP, below) + * NOTE: %fs is not setup until the call to init_secondary()! */ .p2align 4 NON_GPROF_ENTRY(MPentry) - CHECKPOINT(0x36, 3) + CHECKPOINT(0x36, $3) /* Now enable paging mode */ movl IdlePTD-KERNBASE, %eax movl %eax,%cr3 movl %cr0,%eax orl $CR0_PE|CR0_PG,%eax /* enable paging */ movl %eax,%cr0 /* let the games begin! */ - movl bootSTK,%esp /* boot stack end loc. */ + movl bootSTK,%esp /* boot stack end loc. */ pushl $mp_begin /* jump to high mem */ - ret + NON_GPROF_RET /* * Wait for the booting CPU to signal startup */ mp_begin: /* now running relocated at KERNBASE */ - CHECKPOINT(0x37, 4) + CHECKPOINT(0x37, $4) call init_secondary /* load i386 tables */ - CHECKPOINT(0x38, 5) + CHECKPOINT(0x38, $5) /* * If the [BSP] CPU has support for VME, turn it on. @@ -108,47 +109,23 @@ mp_begin: /* now running relocated at KERNBASE */ andl $~APIC_SVR_SWEN, %eax /* clear software enable bit */ movl %eax, lapic_svr - /* signal our startup to the BSP */ + /* data returned to BSP */ movl lapic_ver, %eax /* our version reg contents */ movl %eax, cpu_apic_versions /* into [ 0 ] */ - incl mp_ncpus /* signal BSP */ - - CHECKPOINT(0x39, 6) - /* wait till we can get into the kernel */ - call boot_get_mplock + CHECKPOINT(0x39, $6) - /* Now, let's prepare for some REAL WORK :-) */ - call ap_init - - call rel_mplock - wbinvd /* Avoid livelock */ -2: - cmpl $0, CNAME(smp_started) /* Wait for last AP to be ready */ - jz 2b - call get_mplock - - /* let her rip! (loads new stack) */ - jmp cpu_switch - -NON_GPROF_ENTRY(wait_ap) - pushl %ebp - movl %esp, %ebp - call rel_mplock - wbinvd /* Avoid livelock */ - movl %eax, 8(%ebp) -1: - cmpl $0, CNAME(smp_started) - jnz 2f - decl %eax - cmpl $0, %eax - jge 1b -2: - call get_mplock - movl %ebp, %esp - popl %ebp + /* + * Execute the context restore function for the idlethread which + * has conveniently been set as curthread. Remember, %eax must + * contain the target thread. Or BSP/AP synchronization occurs + * in ap_init(). We do not need to mess with the BGL for this + * because LWKT threads are self-contained on each cpu (or, at least, + * the idlethread is!). + */ + movl PCPU(curthread),%eax + movl TD_SP(%eax),%esp ret - /* * This is the embedded trampoline or bootstrap that is @@ -167,7 +144,7 @@ BOOTMP1: NON_GPROF_ENTRY(bootMP) .code16 cli - CHECKPOINT(0x34, 1) + CHECKPOINT(0x34, $1) /* First guarantee a 'clean slate' */ xorl %eax, %eax movl %eax, %ebx @@ -203,7 +180,7 @@ NON_GPROF_ENTRY(bootMP) .code32 protmode: - CHECKPOINT(0x35, 2) + CHECKPOINT(0x35, $2) /* * we are NOW running for the first time with %eip diff --git a/sys/i386/i386/mplock.s b/sys/i386/i386/mplock.s index eda75de1f9..207c3d2e22 100644 --- a/sys/i386/i386/mplock.s +++ b/sys/i386/i386/mplock.s @@ -7,7 +7,7 @@ * ---------------------------------------------------------------------------- * * $FreeBSD: src/sys/i386/i386/mplock.s,v 1.29.2.2 2000/05/16 06:58:06 dillon Exp $ - * $DragonFly: src/sys/i386/i386/Attic/mplock.s,v 1.3 2003/07/01 20:30:40 dillon Exp $ + * $DragonFly: src/sys/i386/i386/Attic/mplock.s,v 1.4 2003/07/06 21:23:48 dillon Exp $ * * Functions for locking between CPUs in a SMP system. * @@ -25,320 +25,130 @@ #include /** GRAB_LOPRIO */ #include -#define GLPROFILE_NOT - -#ifdef CHEAP_TPR - -/* we assumme that the 'reserved bits' can be written with zeros */ - -#else /* CHEAP_TPR */ - -#error HEADS UP: this code needs work -/* - * The APIC doc says that reserved bits must be written with whatever - * value they currently contain, ie you should: read, modify, write, - * instead of just writing new values to the TPR register. Current - * silicon seems happy with just writing. If the behaviour of the - * silicon changes, all code that access the lapic_tpr must be modified. - * The last version to contain such code was: - * Id: mplock.s,v 1.17 1997/08/10 20:59:07 fsmp Exp - */ - -#endif /* CHEAP_TPR */ - -#ifdef GRAB_LOPRIO -/* - * Claim LOWest PRIOrity, ie. attempt to grab ALL INTerrupts. - */ - -/* after 1st acquire of lock we grab all hardware INTs */ -#define GRAB_HWI movl $ALLHWI_LEVEL, lapic_tpr - -/* after last release of lock give up LOW PRIO (ie, arbitrate INTerrupts) */ -#define ARB_HWI movl $LOPRIO_LEVEL, lapic_tpr /* CHEAP_TPR */ - -#else /* GRAB_LOPRIO */ - -#define GRAB_HWI /* nop */ -#define ARB_HWI /* nop */ - -#endif /* GRAB_LOPRIO */ +#include "assym.s" + .data + ALIGN_DATA +#ifdef SMP + .globl mp_lock +mp_lock: + .long -1 /* initialized to not held */ +#endif .text - -#ifdef SMP - -/*********************************************************************** - * void MPgetlock_edx(unsigned int *lock : %edx) - * ---------------------------------- - * Destroys %eax, %ecx. %edx must hold lock argument. - * - * Grabs hardware interrupts on first aquire. - * - * NOTE: Serialization is not required if we already hold the lock, since - * we already hold the lock, nor do we need a locked instruction if we - * already hold the lock. - */ - -NON_GPROF_ENTRY(MPgetlock_edx) + SUPERALIGN_TEXT + + /* + * Note on cmpxchgl... exchanges ecx with mem if mem matches eax. + * Z=1 (jz) on success. + */ +NON_GPROF_ENTRY(cpu_get_initial_mplock) + movl PCPU(curthread),%ecx + movl $1,TD_MPCOUNT(%ecx) /* curthread has mpcount of 1 */ + movl $0,mp_lock /* owned by cpu 0 */ + NON_GPROF_RET + + /* + * cpu_try_mplock() returns non-zero on success, 0 on failure. It + * only adjusts mp_lock. It does not touch td_mpcount, and it + * must be called from inside a critical section. + */ +NON_GPROF_ENTRY(cpu_try_mplock) + movl PCPU(cpuid),%ecx + movl $-1,%eax + cmpxchgl %ecx,mp_lock /* ecx<->mem if eax matches */ + jnz 1f + movl $1,%eax + NON_GPROF_RET 1: - movl (%edx), %eax /* Get current contents of lock */ - movl %eax, %ecx - andl $CPU_FIELD,%ecx - cmpl cpu_lockid, %ecx /* Do we already own the lock? */ - jne 2f - incl %eax /* yes, just bump the count */ - movl %eax, (%edx) /* serialization not required */ - ret -2: - movl $FREE_LOCK, %eax /* lock must be free */ - movl cpu_lockid, %ecx - incl %ecx - lock - cmpxchg %ecx, (%edx) /* attempt to replace %eax<->%ecx */ -#ifdef GLPROFILE - jne 3f - incl gethits2 -#else - jne 1b -#endif /* GLPROFILE */ - GRAB_HWI /* 1st acquire, grab hw INTs */ - ret -#ifdef GLPROFILE -3: - incl gethits3 - jmp 1b -#endif - -/*********************************************************************** - * int MPtrylock(unsigned int *lock) - * --------------------------------- - * Destroys %eax, %ecx and %edx. - * Returns 1 if lock was successfull - */ + movl $0,%eax + NON_GPROF_RET -NON_GPROF_ENTRY(MPtrylock) - movl 4(%esp), %edx /* Get the address of the lock */ - - movl $FREE_LOCK, %eax /* Assume it's free */ - movl cpu_lockid, %ecx /* - get pre-shifted logical cpu id */ - incl %ecx /* - new count is one */ - lock - cmpxchg %ecx, (%edx) /* - try it atomically */ - jne 1f /* ...do not collect $200 */ -#ifdef GLPROFILE - incl tryhits2 -#endif /* GLPROFILE */ - GRAB_HWI /* 1st acquire, grab hw INTs */ - movl $1, %eax - ret +NON_GPROF_ENTRY(get_mplock) + movl PCPU(curthread),%edx + cmpl $0,TD_MPCOUNT(%edx) + je 1f + incl TD_MPCOUNT(%edx) /* already have it, just ++mpcount */ + NON_GPROF_RET 1: - movl (%edx), %eax /* Try to see if we have it already */ - andl $COUNT_FIELD, %eax /* - get count */ - movl cpu_lockid, %ecx /* - get pre-shifted logical cpu id */ - orl %ecx, %eax /* - combine them */ - movl %eax, %ecx - incl %ecx /* - new count is one more */ - lock - cmpxchg %ecx, (%edx) /* - try it atomically */ - jne 2f /* - miss */ -#ifdef GLPROFILE - incl tryhits -#endif /* GLPROFILE */ - movl $1, %eax - ret -2: -#ifdef GLPROFILE - incl tryhits3 -#endif /* GLPROFILE */ - movl $0, %eax - ret - - -/*********************************************************************** - * void MPrellock_edx(unsigned int *lock : %edx) - * ---------------------------------- - * Destroys %ecx, argument must be in %edx - * - * SERIALIZATION NOTE! - * - * After a lot of arguing, it turns out that there is no problem with - * not having a synchronizing instruction in the MP unlock code. There - * are two things to keep in mind: First, Intel guarentees that writes - * are ordered amoungst themselves. Second, the P6 is allowed to reorder - * reads around writes. Third, the P6 maintains cache consistency (snoops - * the bus). The second is not an issue since the one read we do is the - * basis for the conditional which determines whether the write will be - * made or not. - * - * Therefore, no synchronizing instruction is required on unlock. There are - * three performance cases: First, if a single cpu is getting and releasing - * the lock the removal of the synchronizing instruction saves approx - * 200 nS (testing w/ duel cpu PIII 450). Second, if one cpu is contending - * for the lock while the other holds it, the removal of the synchronizing - * instruction results in a 700nS LOSS in performance. Third, if two cpu's - * are switching off ownership of the MP lock but not contending for it (the - * most common case), this results in a 400nS IMPROVEMENT in performance. - * - * Since our goal is to reduce lock contention in the first place, we have - * decided to remove the synchronizing instruction from the unlock code. - */ - -NON_GPROF_ENTRY(MPrellock_edx) - movl (%edx), %ecx /* - get the value */ - decl %ecx /* - new count is one less */ - testl $COUNT_FIELD, %ecx /* - Unless it's zero... */ + pushfl + cli + movl $1,TD_MPCOUNT(%edx) + movl PCPU(cpuid),%ecx + movl $-1,%eax + cmpxchgl %ecx,mp_lock /* ecx<->mem & JZ if eax matches */ jnz 2f - ARB_HWI /* last release, arbitrate hw INTs */ - movl $FREE_LOCK, %ecx /* - In which case we release it */ -#if 0 - lock - addl $0,0(%esp) /* see note above */ -#endif + popfl /* success */ + NON_GPROF_RET 2: - movl %ecx, (%edx) - ret - -/*********************************************************************** - * void get_mplock() - * ----------------- - * All registers preserved - * - * Stack (after call to _MPgetlock): - * - * edx 4(%esp) - * ecx 8(%esp) - * eax 12(%esp) - * - * Requirements: Interrupts should be enabled on call so we can take - * IPI's and FAST INTs while we are waiting for the lock - * (else the system may not be able to halt). - * - * XXX there are still places where get_mplock() is called - * with interrupts disabled, so we have to temporarily reenable - * interrupts. - * - * Side effects: The current cpu will be given ownership of the - * hardware interrupts when it first aquires the lock. - * - * Costs: Initial aquisition requires the use of a costly locked - * instruction, but recursive aquisition is cheap. Release - * is very cheap. - */ + movl PCPU(cpuid),%eax /* failure */ + cmpl %eax,mp_lock + je badmp_get + popfl + jmp lwkt_switch /* will be correct on return */ -NON_GPROF_ENTRY(get_mplock) - pushl %eax - pushl %ecx - pushl %edx - movl $mp_lock, %edx - pushfl - testl $(1<<9), (%esp) - jz 2f - call MPgetlock_edx - addl $4,%esp +NON_GPROF_ENTRY(try_mplock) + movl PCPU(curthread),%edx + cmpl $0,TD_MPCOUNT(%edx) + je 1f + incl TD_MPCOUNT(%edx) /* already have it, just ++mpcount */ + movl $1,%eax + NON_GPROF_RET 1: - popl %edx - popl %ecx - popl %eax - ret + pushfl + cli + movl PCPU(cpuid),%ecx + movl $-1,%eax + cmpxchgl %ecx,mp_lock /* ecx<->mem & JZ if eax matches */ + jnz 2f + movl $1,TD_MPCOUNT(%edx) + popfl /* success */ + movl $1,%eax + NON_GPROF_RET 2: - sti - call MPgetlock_edx + movl PCPU(cpuid),%eax /* failure */ + cmpl %eax,mp_lock + je badmp_get popfl - jmp 1b + movl $0,%eax + NON_GPROF_RET -/* - * Special version of get_mplock that is used during bootstrap when we can't - * yet enable interrupts of any sort since the APIC isn't online yet. We - * do an endrun around MPgetlock_edx to avoid enabling interrupts. - * - * XXX FIXME.. - APIC should be online from the start to simplify IPI's. - */ -NON_GPROF_ENTRY(boot_get_mplock) - pushl %eax - pushl %ecx - pushl %edx -#ifdef GRAB_LOPRIO +NON_GPROF_ENTRY(rel_mplock) + movl PCPU(curthread),%edx + cmpl $1,TD_MPCOUNT(%edx) + je 1f + subl $1,TD_MPCOUNT(%edx) + NON_GPROF_RET +1: pushfl - pushl lapic_tpr cli -#endif - - movl $mp_lock, %edx - call MPgetlock_edx - -#ifdef GRAB_LOPRIO - popl lapic_tpr + movl $0,TD_MPCOUNT(%edx) + movl $MP_FREE_LOCK,mp_lock popfl -#endif - popl %edx - popl %ecx - popl %eax - ret - -/*********************************************************************** - * void try_mplock() - * ----------------- - * reg %eax == 1 if success - */ - -NON_GPROF_ENTRY(try_mplock) - pushl %ecx - pushl %edx - pushl $mp_lock - call MPtrylock - add $4, %esp - popl %edx - popl %ecx - ret - -/*********************************************************************** - * void rel_mplock() - * ----------------- - * All registers preserved - */ - -NON_GPROF_ENTRY(rel_mplock) - pushl %ecx - pushl %edx - movl $mp_lock,%edx - call MPrellock_edx - popl %edx - popl %ecx - ret + NON_GPROF_RET -#endif +badmp_get: + pushl $bmpsw1 + call panic +badmp_rel: + pushl $bmpsw2 + call panic -/*********************************************************************** - * - */ .data - .p2align 2 /* xx_lock aligned on int boundary */ -#ifdef SMP +bmpsw1: + .asciz "try/get_mplock(): already have lock!" - .globl mp_lock -mp_lock: .long 0 +bmpsw2: + .asciz "rel_mplock(): not holding lock!" -#ifdef GLPROFILE - .globl gethits -gethits: - .long 0 -gethits2: - .long 0 -gethits3: - .long 0 +#if 0 +/* after 1st acquire of lock we grab all hardware INTs */ +#ifdef GRAB_LOPRIO +#define GRAB_HWI movl $ALLHWI_LEVEL, lapic_tpr - .globl tryhits -tryhits: - .long 0 -tryhits2: - .long 0 -tryhits3: - .long 0 +/* after last release of lock give up LOW PRIO (ie, arbitrate INTerrupts) */ +#define ARB_HWI movl $LOPRIO_LEVEL, lapic_tpr /* CHEAP_TPR */ +#endif +#endif -msg: - .asciz "lock hits: 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x\n" -#endif /* GLPROFILE */ -#endif /* SMP */ diff --git a/sys/i386/i386/perfmon.c b/sys/i386/i386/perfmon.c index 9b1bbd5378..77a19a323d 100644 --- a/sys/i386/i386/perfmon.c +++ b/sys/i386/i386/perfmon.c @@ -27,7 +27,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/i386/perfmon.c,v 1.21 1999/09/25 18:24:04 phk Exp $ - * $DragonFly: src/sys/i386/i386/Attic/perfmon.c,v 1.3 2003/06/23 17:55:38 dillon Exp $ + * $DragonFly: src/sys/i386/i386/Attic/perfmon.c,v 1.4 2003/07/06 21:23:48 dillon Exp $ */ #include @@ -124,11 +124,11 @@ perfmon_setup(int pmc, unsigned int control) perfmon_inuse |= (1 << pmc); control &= ~(PMCF_SYS_FLAGS << 16); - disable_intr(); + mpintr_lock(); /* doesn't have to be mpintr_lock YYY */ ctl_shadow[pmc] = control; writectl(pmc); wrmsr(msr_pmc[pmc], pmc_shadow[pmc] = 0); - enable_intr(); + mpintr_unlock(); return 0; } @@ -167,11 +167,11 @@ perfmon_start(int pmc) return EINVAL; if (perfmon_inuse & (1 << pmc)) { - disable_intr(); + mpintr_lock(); /* doesn't have to be mpintr YYY */ ctl_shadow[pmc] |= (PMCF_EN << 16); wrmsr(msr_pmc[pmc], pmc_shadow[pmc]); writectl(pmc); - enable_intr(); + mpintr_unlock(); return 0; } return EBUSY; @@ -184,11 +184,11 @@ perfmon_stop(int pmc) return EINVAL; if (perfmon_inuse & (1 << pmc)) { - disable_intr(); + mpintr_lock(); pmc_shadow[pmc] = rdmsr(msr_pmc[pmc]) & 0xffffffffffULL; ctl_shadow[pmc] &= ~(PMCF_EN << 16); writectl(pmc); - enable_intr(); + mpintr_unlock(); return 0; } return EBUSY; diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c index 3210ed56a3..3074043277 100644 --- a/sys/i386/i386/pmap.c +++ b/sys/i386/i386/pmap.c @@ -40,7 +40,7 @@ * * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 * $FreeBSD: src/sys/i386/i386/pmap.c,v 1.250.2.18 2002/03/06 22:48:53 silby Exp $ - * $DragonFly: src/sys/i386/i386/Attic/pmap.c,v 1.15 2003/07/04 00:32:24 dillon Exp $ + * $DragonFly: src/sys/i386/i386/Attic/pmap.c,v 1.16 2003/07/06 21:23:48 dillon Exp $ */ /* @@ -373,25 +373,27 @@ pmap_bootstrap(firstaddr, loadaddr) ptditmp |= PG_V | PG_RW | PG_PS | PG_U | pgeflag; pdir4mb = ptditmp; - if (ncpus == 1) { - /* - * Enable the PSE mode. - */ - load_cr4(rcr4() | CR4_PSE); +#ifndef SMP + /* + * Enable the PSE mode. If we are SMP we can't do this + * now because the APs will not be able to use it when + * they boot up. + */ + load_cr4(rcr4() | CR4_PSE); - /* - * We can do the mapping here for the single processor - * case. We simply ignore the old page table page from - * now on. - */ - /* - * For SMP, we still need 4K pages to bootstrap APs, - * PSE will be enabled as soon as all APs are up. - */ - PTD[KPTDI] = (pd_entry_t) ptditmp; - kernel_pmap->pm_pdir[KPTDI] = (pd_entry_t) ptditmp; - invltlb(); - } + /* + * We can do the mapping here for the single processor + * case. We simply ignore the old page table page from + * now on. + */ + /* + * For SMP, we still need 4K pages to bootstrap APs, + * PSE will be enabled as soon as all APs are up. + */ + PTD[KPTDI] = (pd_entry_t) ptditmp; + kernel_pmap->pm_pdir[KPTDI] = (pd_entry_t) ptditmp; + invltlb(); +#endif } #endif #ifdef APIC_IO @@ -827,6 +829,9 @@ pmap_init_proc(struct proc *p, struct thread *td) p->p_thread = td; td->td_proc = p; td->td_switch = cpu_heavy_switch; +#ifdef SMP + td->td_mpcount = 1; +#endif bzero(p->p_addr, sizeof(*p->p_addr)); } @@ -1405,21 +1410,20 @@ pmap_reference(pmap) ***************************************************/ /* - * free the pv_entry back to the free list + * free the pv_entry back to the free list. This function may be + * called from an interrupt. */ static PMAP_INLINE void free_pv_entry(pv) pv_entry_t pv; { pv_entry_count--; - zfreei(pvzone, pv); + zfree(pvzone, pv); } /* * get a new pv_entry, allocating a block from the system - * when needed. - * the memory allocation is performed bypassing the malloc code - * because of the possibility of allocations at interrupt time. + * when needed. This function may be called from an interrupt. */ static pv_entry_t get_pv_entry(void) @@ -1431,7 +1435,7 @@ get_pv_entry(void) pmap_pagedaemon_waken = 1; wakeup (&vm_pages_needed); } - return zalloci(pvzone); + return zalloc(pvzone); } /* diff --git a/sys/i386/i386/simplelock.s b/sys/i386/i386/simplelock.s deleted file mode 100644 index 92c23d69ea..0000000000 --- a/sys/i386/i386/simplelock.s +++ /dev/null @@ -1,321 +0,0 @@ -/*- - * Copyright (c) 1997, by Steve Passe - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. The name of the developer may NOT be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD: src/sys/i386/i386/simplelock.s,v 1.11.2.2 2003/02/04 20:55:28 jhb Exp $ - * $DragonFly: src/sys/i386/i386/Attic/simplelock.s,v 1.3 2003/07/01 20:30:40 dillon Exp $ - */ - -/* - * credit to Bruce Evans for help with asm optimization. - */ - -#include /* miscellaneous macros */ -#include -#include - -#include /** FAST_HI */ - -/* - * The following impliments the primitives described in i386/i386/param.h - * necessary for the Lite2 lock manager system. - * The major difference is that the "volatility" of the lock datum has been - * pushed down from the various functions to lock_data itself. - */ - -/* - * The simple-lock routines are the primitives out of which the lock - * package is built. The machine-dependent code must implement an - * atomic test_and_set operation that indivisibly sets the simple lock - * to non-zero and returns its old value. It also assumes that the - * setting of the lock to zero below is indivisible. Simple locks may - * only be used for exclusive locks. - * - * struct simplelock { - * volatile int lock_data; - * }; - */ - -/* - * void - * s_lock_init(struct simplelock *lkp) - * { - * lkp->lock_data = 0; - * } - */ -ENTRY(s_lock_init) - movl 4(%esp), %eax /* get the address of the lock */ - movl $0, (%eax) - ret - - -/* - * void - * s_lock(struct simplelock *lkp) - * { - * while (test_and_set(&lkp->lock_data)) - * continue; - * } - * - * Note: - * If the acquire fails we do a loop of reads waiting for the lock to - * become free instead of continually beating on the lock with xchgl. - * The theory here is that the CPU will stay within its cache until - * a write by the other CPU updates it, instead of continually updating - * the local cache (and thus causing external bus writes) with repeated - * writes to the lock. - */ -#ifndef SL_DEBUG - -ENTRY(s_lock) - movl 4(%esp), %eax /* get the address of the lock */ - movl $1, %ecx -setlock: - xchgl %ecx, (%eax) - testl %ecx, %ecx - jz gotit /* it was clear, return */ -wait: - pause - cmpl $0, (%eax) /* wait to empty */ - jne wait /* still set... */ - jmp setlock /* empty again, try once more */ -gotit: - ret - -#else /* SL_DEBUG */ - -ENTRY(s_lock) - movl 4(%esp), %edx /* get the address of the lock */ -setlock: - movl _cpu_lockid, %ecx /* add cpu id portion */ - incl %ecx /* add lock portion */ - movl $0, %eax - lock - cmpxchgl %ecx, (%edx) - jz gotit /* it was clear, return */ - pushl %eax /* save what we xchanged */ - decl %eax /* remove lock portion */ - cmpl _cpu_lockid, %eax /* do we hold it? */ - je bad_slock /* yes, thats not good... */ - addl $4, %esp /* clear the stack */ -wait: - pause - cmpl $0, (%edx) /* wait to empty */ - jne wait /* still set... */ - jmp setlock /* empty again, try once more */ -gotit: - ret - - ALIGN_TEXT -bad_slock: - /* %eax (current lock) is already on the stack */ - pushl %edx - pushl cpuid - pushl $bsl1 - call panic - -bsl1: .asciz "rslock: cpu: %d, addr: 0x%08x, lock: 0x%08x" - -#endif /* SL_DEBUG */ - - -/* - * int - * s_lock_try(struct simplelock *lkp) - * { - * return (!test_and_set(&lkp->lock_data)); - * } - */ -#ifndef SL_DEBUG - -ENTRY(s_lock_try) - movl 4(%esp), %eax /* get the address of the lock */ - movl $1, %ecx - - xchgl %ecx, (%eax) - testl %ecx, %ecx - setz %al /* 1 if previous value was 0 */ - movzbl %al, %eax /* convert to an int */ - - ret - -#else /* SL_DEBUG */ - -ENTRY(s_lock_try) - movl 4(%esp), %edx /* get the address of the lock */ - movl cpu_lockid, %ecx /* add cpu id portion */ - incl %ecx /* add lock portion */ - - xorl %eax, %eax - lock - cmpxchgl %ecx, (%edx) - setz %al /* 1 if previous value was 0 */ - movzbl %al, %eax /* convert to an int */ - - ret - -#endif /* SL_DEBUG */ - - -/* - * void - * s_unlock(struct simplelock *lkp) - * { - * lkp->lock_data = 0; - * } - */ -ENTRY(s_unlock) - movl 4(%esp), %eax /* get the address of the lock */ - movl $0, (%eax) - ret - -#if 0 - -/* - * XXX CRUFTY SS_LOCK IMPLEMENTATION REMOVED XXX - * - * These versions of simple_lock block interrupts, - * making it suitable for regions accessed by both top and bottom levels. - * This is done by saving the current value of the cpu flags in a per-cpu - * global, and disabling interrupts when the lock is taken. When the - * lock is released, interrupts might be enabled, depending upon the saved - * cpu flags. - * Because of this, it must ONLY be used for SHORT, deterministic paths! - * - * Note: - * It would appear to be "bad behaviour" to blindly store a value in - * ss_eflags, as this could destroy the previous contents. But since ss_eflags - * is a per-cpu variable, and its fatal to attempt to acquire a simplelock - * that you already hold, we get away with it. This needs to be cleaned - * up someday... - */ - -/* - * void ss_lock(struct simplelock *lkp) - */ -#ifndef SL_DEBUG - -ENTRY(ss_lock) - movl 4(%esp), %eax /* get the address of the lock */ - movl $1, %ecx /* value for a held lock */ -ssetlock: - pushfl - cli - xchgl %ecx, (%eax) /* compete */ - testl %ecx, %ecx - jz sgotit /* it was clear, return */ - popfl /* previous value while waiting */ -swait: - pause - cmpl $0, (%eax) /* wait to empty */ - jne swait /* still set... */ - jmp ssetlock /* empty again, try once more */ -sgotit: - popl ss_eflags /* save the old eflags */ - ret - -#else /* SL_DEBUG */ - -ENTRY(ss_lock) - movl 4(%esp), %edx /* get the address of the lock */ -ssetlock: - movl cpu_lockid, %ecx /* add cpu id portion */ - incl %ecx /* add lock portion */ - pushfl - cli - movl $0, %eax - lock - cmpxchgl %ecx, (%edx) /* compete */ - jz sgotit /* it was clear, return */ - pushl %eax /* save what we xchanged */ - decl %eax /* remove lock portion */ - cmpl cpu_lockid, %eax /* do we hold it? */ - je sbad_slock /* yes, thats not good... */ - addl $4, %esp /* clear the stack */ - popfl -swait: - pause - cmpl $0, (%edx) /* wait to empty */ - jne swait /* still set... */ - jmp ssetlock /* empty again, try once more */ -sgotit: - popl ss_eflags /* save the old task priority */ -sgotit2: - ret - - ALIGN_TEXT -sbad_slock: - /* %eax (current lock) is already on the stack */ - pushl %edx - pushl cpuid - pushl $sbsl1 - call panic - -sbsl1: .asciz "rsslock: cpu: %d, addr: 0x%08x, lock: 0x%08x" - -#endif /* SL_DEBUG */ - -/* - * void ss_unlock(struct simplelock *lkp) - */ -ENTRY(ss_unlock) - movl 4(%esp), %eax /* get the address of the lock */ - movl $0, (%eax) /* clear the simple lock */ - testl $PSL_I, ss_eflags - jz ss_unlock2 - sti -ss_unlock2: - ret - -#endif - -/* - * These versions of simple_lock does not contain calls to profiling code. - * Thus they can be called from the profiling code. - */ - -/* - * void s_lock_np(struct simplelock *lkp) - */ -NON_GPROF_ENTRY(s_lock_np) - movl 4(%esp), %eax /* get the address of the lock */ - movl $1, %ecx -1: - xchgl %ecx, (%eax) - testl %ecx, %ecx - jz 3f -2: - pause - cmpl $0, (%eax) /* wait to empty */ - jne 2b /* still set... */ - jmp 1b /* empty again, try once more */ -3: - NON_GPROF_RET - -/* - * void s_unlock_np(struct simplelock *lkp) - */ -NON_GPROF_ENTRY(s_unlock_np) - movl 4(%esp), %eax /* get the address of the lock */ - movl $0, (%eax) - NON_GPROF_RET diff --git a/sys/i386/i386/spinlock.s b/sys/i386/i386/spinlock.s new file mode 100644 index 0000000000..e8191babbf --- /dev/null +++ b/sys/i386/i386/spinlock.s @@ -0,0 +1,110 @@ +/*- + * Copyright (c) 2003, by Matthew dillon All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. The name of the developer may NOT be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/sys/i386/i386/simplelock.s,v 1.11.2.2 2003/02/04 20:55:28 jhb Exp $ + * $DragonFly: src/sys/i386/i386/Attic/spinlock.s,v 1.1 2003/07/06 21:23:48 dillon Exp $ + */ + +#include /* miscellaneous macros */ +#include + +/* + * The spinlock routines may only be used for low level debugging, like + * kernel printfs, and when no other option is available such as situations + * relating to hardware interrupt masks. Spinlock routines should not be + * used in interrupt service routines or in any other situation. + * + * NOTE: for UP the spinlock routines still disable/restore interrupts + */ +ENTRY(spin_lock) + movl 4(%esp),%edx + SPIN_LOCK((%edx)) /* note: %eax, %ecx tromped */ + ret + +ENTRY(spin_unlock) + movl 4(%esp),%edx + SPIN_UNLOCK((%edx)) /* note: %eax, %ecx tromped */ + ret + +NON_GPROF_ENTRY(spin_lock_np) + movl 4(%esp),%edx + SPIN_LOCK((%edx)) /* note: %eax, %ecx tromped */ + NON_GPROF_RET + +NON_GPROF_ENTRY(spin_unlock_np) + movl 4(%esp), %edx /* get the address of the lock */ + SPIN_UNLOCK((%edx)) + NON_GPROF_RET + +/* + * Auxillary convenience routines. Note that these functions disable and + * restore interrupts as well, on SMP, as performing spin locking functions. + */ +NON_GPROF_ENTRY(imen_lock) + SPIN_LOCK(imen_spinlock) + NON_GPROF_RET + +NON_GPROF_ENTRY(imen_unlock) + SPIN_UNLOCK(imen_spinlock) + NON_GPROF_RET + +NON_GPROF_ENTRY(intr_lock) + SPIN_LOCK(intr_spinlock) + NON_GPROF_RET + +NON_GPROF_ENTRY(intr_unlock) + SPIN_UNLOCK(intr_spinlock) + NON_GPROF_RET + +NON_GPROF_ENTRY(mpintr_lock) + SPIN_LOCK(mpintr_spinlock) + NON_GPROF_RET + +NON_GPROF_ENTRY(mpintr_unlock) + SPIN_UNLOCK(mpintr_spinlock) + NON_GPROF_RET + +NON_GPROF_ENTRY(clock_lock) + SPIN_LOCK(clock_spinlock) + NON_GPROF_RET + +NON_GPROF_ENTRY(clock_unlock) + SPIN_UNLOCK(clock_spinlock) + NON_GPROF_RET + +NON_GPROF_ENTRY(com_lock) + SPIN_LOCK(com_spinlock) + NON_GPROF_RET + +NON_GPROF_ENTRY(com_unlock) + SPIN_UNLOCK(com_spinlock) + NON_GPROF_RET + +NON_GPROF_ENTRY(cons_lock) + SPIN_LOCK(cons_spinlock) + NON_GPROF_RET + +NON_GPROF_ENTRY(cons_unlock) + SPIN_UNLOCK(cons_spinlock) + NON_GPROF_RET + diff --git a/sys/i386/i386/swtch.s b/sys/i386/i386/swtch.s index 2e84637b98..4ea5c72857 100644 --- a/sys/i386/i386/swtch.s +++ b/sys/i386/i386/swtch.s @@ -35,7 +35,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/i386/swtch.s,v 1.89.2.10 2003/01/23 03:36:24 ps Exp $ - * $DragonFly: src/sys/i386/i386/Attic/swtch.s,v 1.20 2003/07/05 05:54:00 dillon Exp $ + * $DragonFly: src/sys/i386/i386/Attic/swtch.s,v 1.21 2003/07/06 21:23:48 dillon Exp $ */ #include "npx.h" @@ -194,11 +194,13 @@ ENTRY(cpu_exit_switch) * any waiters. */ orl $TDF_EXITED,TD_FLAGS(%ecx) +#if 0 /* YYY MP lock may not be held by new target */ pushl %eax pushl %ecx /* wakeup(oldthread) */ call wakeup addl $4,%esp popl %eax /* note: next thread expects curthread in %eax */ +#endif /* * Restore the next thread's state and resume it. Note: the @@ -318,20 +320,6 @@ ENTRY(cpu_heavy_restore) movl PCB_EIP(%edx),%eax movl %eax,(%esp) - /* - * SMP ickyness to direct interrupts. - */ - -#ifdef SMP -#ifdef GRAB_LOPRIO /* hold LOPRIO for INTs */ -#ifdef CHEAP_TPR - movl $0, lapic_tpr -#else - andl $~APIC_TPR_PRIO, lapic_tpr -#endif /** CHEAP_TPR */ -#endif /** GRAB_LOPRIO */ -#endif /* SMP */ - /* * Restore the user LDT if we have one */ diff --git a/sys/i386/i386/trap.c b/sys/i386/i386/trap.c index 932ab07294..7c076190ec 100644 --- a/sys/i386/i386/trap.c +++ b/sys/i386/i386/trap.c @@ -36,7 +36,7 @@ * * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 * $FreeBSD: src/sys/i386/i386/trap.c,v 1.147.2.11 2003/02/27 19:09:59 luoqi Exp $ - * $DragonFly: src/sys/i386/i386/Attic/trap.c,v 1.17 2003/07/03 21:22:38 dillon Exp $ + * $DragonFly: src/sys/i386/i386/Attic/trap.c,v 1.18 2003/07/06 21:23:48 dillon Exp $ */ /* @@ -162,7 +162,9 @@ SYSCTL_INT(_machdep, OID_AUTO, panic_on_nmi, CTLFLAG_RW, * point of view of the userland scheduler unless we actually have to * switch. * - * usertdsw is called from within a critical section. + * usertdsw is called from within a critical section, but the BGL will + * have already been released by lwkt_switch() so only call MP safe functions + * that don't block! */ static void usertdsw(struct thread *ntd) @@ -205,9 +207,8 @@ userenter(void) td->td_switch = usertdsw; } -static int -userret(struct proc *p, struct trapframe *frame, - u_quad_t oticks, int have_mplock) +static void +userret(struct proc *p, struct trapframe *frame, u_quad_t oticks) { int sig, s; struct thread *td = curthread; @@ -217,10 +218,6 @@ userret(struct proc *p, struct trapframe *frame, */ crit_enter(); while ((sig = CURSIG(p)) != 0) { - if (have_mplock == 0) { - get_mplock(); - have_mplock = 1; - } crit_exit(); postsig(sig); crit_enter(); @@ -257,10 +254,6 @@ userret(struct proc *p, struct trapframe *frame, */ if (resched_wanted()) { uio_yield(); - if (have_mplock == 0) { - get_mplock(); - have_mplock = 1; - } while ((sig = CURSIG(p)) != 0) postsig(sig); } @@ -269,10 +262,6 @@ userret(struct proc *p, struct trapframe *frame, * Charge system time if profiling. */ if (p->p_flag & P_PROFIL) { - if (have_mplock == 0) { - get_mplock(); - have_mplock = 1; - } addupc_task(p, frame->tf_eip, (u_int)(curthread->td_sticks - oticks) * psratio); } @@ -290,8 +279,6 @@ userret(struct proc *p, struct trapframe *frame, } splx(s); KKASSERT(mycpu->gd_uprocscheduled == 1); - - return(have_mplock); } #ifdef DEVICE_POLLING @@ -315,11 +302,13 @@ trap(frame) int i = 0, ucode = 0, type, code; vm_offset_t eva; + get_mplock(); + #ifdef DDB if (db_active) { eva = (frame.tf_trapno == T_PAGEFLT ? rcr2() : 0); trap_fatal(&frame, eva); - return; + goto out2; } #endif @@ -342,7 +331,7 @@ trap(frame) */ printf("kernel trap %d with interrupts disabled\n", type); - enable_intr(); + cpu_enable_intr(); } eva = 0; @@ -359,7 +348,7 @@ trap(frame) * correct. */ eva = rcr2(); - enable_intr(); + cpu_enable_intr(); } #ifdef DEVICE_POLLING @@ -377,12 +366,13 @@ restart: if (frame.tf_eflags & PSL_VM && (type == T_PROTFLT || type == T_STKFLT)) { i = vm86_emulate((struct vm86frame *)&frame); - if (i != 0) + if (i != 0) { /* * returns to original process */ vm86_trap((struct vm86frame *)&frame); - return; + } + goto out2; } switch (type) { /* @@ -392,7 +382,7 @@ restart: case T_PROTFLT: case T_SEGNPFLT: trap_fatal(&frame, eva); - return; + goto out2; case T_TRCTRAP: type = T_BPTFLT; /* kernel breakpoint */ /* FALL THROUGH */ @@ -494,7 +484,7 @@ restart: kdb_trap (type, 0, &frame); } #endif /* DDB */ - return; + goto out2; } else if (panic_on_nmi) panic("NMI indicates hardware failure"); break; @@ -525,7 +515,7 @@ restart: i = (*pmath_emulate)(&frame); if (i == 0) { if (!(frame.tf_eflags & PSL_T)) - return; + goto out2; frame.tf_eflags &= ~PSL_T; i = SIGTRAP; } @@ -549,7 +539,7 @@ kernel_trap: switch (type) { case T_PAGEFLT: /* page fault */ (void) trap_pfault(&frame, FALSE, eva); - return; + goto out2; case T_DNA: #if NNPX > 0 @@ -559,7 +549,7 @@ kernel_trap: * registered such use. */ if (npxdna()) - return; + goto out2; #endif break; @@ -579,7 +569,7 @@ kernel_trap: do { \ if (frame.tf_eip == (int)where) { \ frame.tf_eip = (int)whereto; \ - return; \ + goto out2; \ } \ } while (0) @@ -596,7 +586,7 @@ kernel_trap: if (frame.tf_eip == (int)cpu_switch_load_gs) { curthread->td_pcb->pcb_gs = 0; psignal(p, SIGBUS); - return; + goto out2; } MAYBE_DORETI_FAULT(doreti_iret, doreti_iret_fault); @@ -608,7 +598,7 @@ kernel_trap: doreti_popl_fs_fault); if (curthread->td_pcb->pcb_onfault) { frame.tf_eip = (int)curthread->td_pcb->pcb_onfault; - return; + goto out2; } } break; @@ -625,7 +615,7 @@ kernel_trap: */ if (frame.tf_eflags & PSL_NT) { frame.tf_eflags &= ~PSL_NT; - return; + goto out2; } break; @@ -637,7 +627,7 @@ kernel_trap: * silently until the syscall handler has * saved the flags. */ - return; + goto out2; } if (frame.tf_eip == (int)IDTVEC(syscall) + 1) { /* @@ -645,7 +635,7 @@ kernel_trap: * flags. Stop single stepping it. */ frame.tf_eflags &= ~PSL_T; - return; + goto out2; } /* * Ignore debug register trace traps due to @@ -663,7 +653,7 @@ kernel_trap: * processor doesn't */ load_dr6(rdr6() & 0xfffffff0); - return; + goto out2; } /* * Fall through (TRCTRAP kernel mode, kernel address) @@ -675,7 +665,7 @@ kernel_trap: */ #ifdef DDB if (kdb_trap (type, 0, &frame)) - return; + goto out2; #endif break; @@ -695,7 +685,8 @@ kernel_trap: sysbeep(TIMER_FREQ/880, hz); lastalert = time_second; } - return; + /* YYY mp count */ + goto out2; } #else /* !POWERFAIL_NMI */ /* machine/parity/power fail/"kitchen sink" faults */ @@ -710,16 +701,16 @@ kernel_trap: kdb_trap (type, 0, &frame); } #endif /* DDB */ - return; + goto out2; } else if (panic_on_nmi == 0) - return; + goto out2; /* FALL THROUGH */ #endif /* POWERFAIL_NMI */ #endif /* NISA > 0 */ } trap_fatal(&frame, eva); - return; + goto out2; } /* Translate fault for emulators (e.g. Linux) */ @@ -739,7 +730,13 @@ kernel_trap: #endif out: - userret(p, &frame, sticks, 1); +#ifdef SMP + if (ISPL(frame.tf_cs) == SEL_UPL) + KASSERT(curthread->td_mpcount == 1, ("badmpcount trap from %p", (void *)frame.tf_eip)); +#endif + userret(p, &frame, sticks); +out2: + rel_mplock(); } #ifdef notyet @@ -979,7 +976,7 @@ trap_fatal(frame, eva) #ifdef SMP /* three seperate prints in case of a trap on an unmapped page */ printf("mp_lock = %08x; ", mp_lock); - printf("cpuid = %d; ", cpuid); + printf("cpuid = %d; ", mycpu->gd_cpuid); printf("lapic.id = %08x\n", lapic.id); #endif if (type == T_PAGEFLT) { @@ -1089,7 +1086,7 @@ dblfault_handler() #ifdef SMP /* three seperate prints in case of a trap on an unmapped page */ printf("mp_lock = %08x; ", mp_lock); - printf("cpuid = %d; ", cpuid); + printf("cpuid = %d; ", mycpu->gd_cpuid); printf("lapic.id = %08x\n", lapic.id); #endif panic("double fault"); @@ -1164,7 +1161,6 @@ syscall2(frame) int error; int narg; int args[8]; - int have_mplock = 0; u_int code; #ifdef DIAGNOSTIC @@ -1175,10 +1171,14 @@ syscall2(frame) } #endif +#ifdef SMP + KASSERT(curthread->td_mpcount == 0, ("badmpcount syscall from %p", (void *)frame.tf_eip)); + get_mplock(); +#endif /* * access non-atomic field from critical section. p_sticks is * updated by the clock interrupt. Also use this opportunity - * to raise our LWKT priority. + * to lazy-raise our LWKT priority. */ crit_enter(); userenter(); @@ -1194,9 +1194,7 @@ syscall2(frame) /* * The prep code is not MP aware. */ - get_mplock(); (*p->p_sysent->sv_prepsyscall)(&frame, args, &code, ¶ms); - rel_mplock(); } else { /* * Need to check if this is a 32 bit or 64 bit syscall. @@ -1233,8 +1231,6 @@ syscall2(frame) */ if (params && (i = narg * sizeof(int)) && (error = copyin(params, (caddr_t)args, (u_int)i))) { - get_mplock(); - have_mplock = 1; #ifdef KTRACE if (KTRPOINT(td, KTR_SYSCALL)) ktrsyscall(p->p_tracep, code, narg, args); @@ -1242,6 +1238,7 @@ syscall2(frame) goto bad; } +#if 0 /* * Try to run the syscall without the MP lock if the syscall * is MP safe. We have to obtain the MP lock no matter what if @@ -1251,13 +1248,10 @@ syscall2(frame) get_mplock(); have_mplock = 1; } +#endif #ifdef KTRACE if (KTRPOINT(td, KTR_SYSCALL)) { - if (have_mplock == 0) { - get_mplock(); - have_mplock = 1; - } ktrsyscall(p->p_tracep, code, narg, args); } #endif @@ -1311,10 +1305,6 @@ bad: * Traced syscall. trapsignal() is not MP aware. */ if ((orig_tf_eflags & PSL_T) && !(orig_tf_eflags & PSL_VM)) { - if (have_mplock == 0) { - get_mplock(); - have_mplock = 1; - } frame.tf_eflags &= ~PSL_T; trapsignal(p, SIGTRAP, 0); } @@ -1322,14 +1312,10 @@ bad: /* * Handle reschedule and other end-of-syscall issues */ - have_mplock = userret(p, &frame, sticks, have_mplock); + userret(p, &frame, sticks); #ifdef KTRACE if (KTRPOINT(td, KTR_SYSRET)) { - if (have_mplock == 0) { - get_mplock(); - have_mplock = 1; - } ktrsysret(p->p_tracep, code, error, p->p_retval[0]); } #endif @@ -1341,17 +1327,20 @@ bad: */ STOPEVENT(p, S_SCX, code); +#ifdef SMP /* * Release the MP lock if we had to get it */ - if (have_mplock) - rel_mplock(); + KASSERT(curthread->td_mpcount == 1, ("badmpcount syscall from %p", (void *)frame.tf_eip)); + rel_mplock(); +#endif } /* * Simplified back end of syscall(), used when returning from fork() - * directly into user mode. MP lock is held on entry and should be - * held on return. + * directly into user mode. MP lock is held on entry and should be + * released on return. This code will return back into the fork + * trampoline code which then runs doreti. */ void fork_return(p, frame) @@ -1362,9 +1351,14 @@ fork_return(p, frame) frame.tf_eflags &= ~PSL_C; /* success */ frame.tf_edx = 1; - userret(p, &frame, 0, 1); + userret(p, &frame, 0); #ifdef KTRACE if (KTRPOINT(p->p_thread, KTR_SYSRET)) ktrsysret(p->p_tracep, SYS_fork, 0, 0); #endif +#ifdef SMP + KKASSERT(curthread->td_mpcount == 1); + rel_mplock(); +#endif } + diff --git a/sys/i386/i386/vm86.c b/sys/i386/i386/vm86.c index a096995384..133181e20d 100644 --- a/sys/i386/i386/vm86.c +++ b/sys/i386/i386/vm86.c @@ -24,7 +24,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/i386/vm86.c,v 1.31.2.2 2001/10/05 06:18:55 peter Exp $ - * $DragonFly: src/sys/i386/i386/Attic/vm86.c,v 1.5 2003/06/25 03:55:53 dillon Exp $ + * $DragonFly: src/sys/i386/i386/Attic/vm86.c,v 1.6 2003/07/06 21:23:48 dillon Exp $ */ #include @@ -544,6 +544,9 @@ vm86_prepcall(struct vm86frame vmf) /* * vm86 trap handler; determines whether routine succeeded or not. * Called while in vm86 space, returns to calling process. + * + * A MP lock ref is held on entry from trap() and must be released prior + * to returning to the VM86 call. */ void vm86_trap(struct vm86frame *vmf) @@ -560,6 +563,7 @@ vm86_trap(struct vm86frame *vmf) else vmf->vmf_trapno = vmf->vmf_trapno << 16; + rel_mplock(); vm86_biosret(vmf); } @@ -569,6 +573,8 @@ vm86_intcall(int intnum, struct vm86frame *vmf) if (intnum < 0 || intnum > 0xff) return (EINVAL); + ASSERT_MP_LOCK_HELD(); + vmf->vmf_trapno = intnum; return (vm86_bioscall(vmf)); } @@ -589,6 +595,8 @@ vm86_datacall(intnum, vmf, vmc) u_int page; int i, entry, retval; + ASSERT_MP_LOCK_HELD(); + for (i = 0; i < vmc->npages; i++) { page = vtophys(vmc->pmap[i].kva & PG_FRAME); entry = vmc->pmap[i].pte_num; diff --git a/sys/i386/i386/vm86bios.s b/sys/i386/i386/vm86bios.s index ab089fba29..3edd8a156f 100644 --- a/sys/i386/i386/vm86bios.s +++ b/sys/i386/i386/vm86bios.s @@ -24,7 +24,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/i386/vm86bios.s,v 1.15.2.1 2000/05/16 06:58:07 dillon Exp $ - * $DragonFly: src/sys/i386/i386/Attic/vm86bios.s,v 1.8 2003/07/01 20:30:40 dillon Exp $ + * $DragonFly: src/sys/i386/i386/Attic/vm86bios.s,v 1.9 2003/07/06 21:23:48 dillon Exp $ */ #include /* miscellaneous asm macros */ @@ -63,12 +63,6 @@ ENTRY(vm86_bioscall) pushl %edi pushl %gs -#ifdef SMP - pushl %edx - MP_LOCK /* Get global lock */ - popl %edx -#endif - #if NNPX > 0 movl PCPU(curthread),%ecx cmpl %ecx,PCPU(npxthread) /* do we need to save fp? */ diff --git a/sys/i386/include/apic.h b/sys/i386/include/apic.h index 6bb9c1f8dc..7a8c8234bd 100644 --- a/sys/i386/include/apic.h +++ b/sys/i386/include/apic.h @@ -1,6 +1,6 @@ /* - * Copyright (c) 1996, by Peter Wemm and Steve Passe - * All rights reserved. + * Copyright (c) 1996, by Peter Wemm and Steve Passe, All rights reserved. + * Copyright (c) 2003, by Matthew Dillon, All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -23,100 +23,241 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/include/apic.h,v 1.14.2.2 2003/03/21 21:46:15 jhb Exp $ - * $DragonFly: src/sys/i386/include/Attic/apic.h,v 1.2 2003/06/17 04:28:35 dillon Exp $ + * $DragonFly: src/sys/i386/include/Attic/apic.h,v 1.3 2003/07/06 21:23:49 dillon Exp $ */ #ifndef _MACHINE_APIC_H_ #define _MACHINE_APIC_H_ /* - * Local && I/O APIC definitions. - */ - -/* - * Pentium P54C+ Build-in APIC - * (Advanced programmable Interrupt Controller) - * - * Base Address of Build-in APIC in memory location - * is 0xfee00000. - * - * Map of APIC REgisters: - * - * Offset (hex) Description Read/Write state - * 000 Reserved - * 010 Reserved - * 020 ID Local APIC ID R/W - * 030 VER Local APIC Version R - * 040 Reserved - * 050 Reserved - * 060 Reserved - * 070 Reserved - * 080 Task Priority Register R/W - * 090 Arbitration Priority Register R - * 0A0 Processor Priority Register R - * 0B0 EOI Register W - * 0C0 RRR Remote read R - * 0D0 Logical Destination R/W - * 0E0 Destination Format Register 0..27 R; 28..31 R/W - * 0F0 SVR Spurious Interrupt Vector Reg. 0..3 R; 4..9 R/W - * 100 ISR 000-031 R - * 110 ISR 032-063 R - * 120 ISR 064-095 R - * 130 ISR 095-128 R - * 140 ISR 128-159 R - * 150 ISR 160-191 R - * 160 ISR 192-223 R - * 170 ISR 224-255 R - * 180 TMR 000-031 R - * 190 TMR 032-063 R - * 1A0 TMR 064-095 R - * 1B0 TMR 095-128 R - * 1C0 TMR 128-159 R - * 1D0 TMR 160-191 R - * 1E0 TMR 192-223 R - * 1F0 TMR 224-255 R - * 200 IRR 000-031 R - * 210 IRR 032-063 R - * 220 IRR 064-095 R - * 230 IRR 095-128 R - * 240 IRR 128-159 R - * 250 IRR 160-191 R - * 260 IRR 192-223 R - * 270 IRR 224-255 R - * 280 Error Status Register R - * 290 Reserved - * 2A0 Reserved - * 2B0 Reserved - * 2C0 Reserved - * 2D0 Reserved - * 2E0 Reserved - * 2F0 Reserved - * 300 ICR_LOW Interrupt Command Reg. (0-31) R/W - * 310 ICR_HI Interrupt Command Reg. (32-63) R/W - * 320 Local Vector Table (Timer) R/W - * 330 Reserved - * 340 Reserved - * 350 LVT1 Local Vector Table (LINT0) R/W - * 360 LVT2 Local Vector Table (LINT1) R/W - * 370 LVT3 Local Vector Table (ERROR) R/W - * 380 Initial Count Reg. for Timer R/W - * 390 Current Count of Timer R - * 3A0 Reserved - * 3B0 Reserved - * 3C0 Reserved - * 3D0 Reserved - * 3E0 Timer Divide Configuration Reg. R/W - * 3F0 Reserved - */ - - -/****************************************************************************** - * global defines, etc. - */ - - -/****************************************************************************** - * LOCAL APIC structure + * Local && I/O APIC definitions for Pentium P54C+ Built-in APIC. + * + * A per-cpu APIC resides in memory location 0xFEE00000. + * + * 31 ... 24 23 ... 16 15 ... 8 7 ... 0 + * +-----------+-----------+-----------+-----------+ + * 0000 | | | | | + * 0010 | | | | | + * +-----------+-----------+-----------+-----------+ + * + * +-----------+-----------+-----------+-----------+ + * 0020 ID | | ID | | | | RW + * +-----------+-----------+-----------+-----------+ + * + * The physical APIC ID is used with physical interrupt + * delivery modes. + * + * +-----------+-----------+-----------+-----------+ + * 0030 VER | | | | | + * +-----------+-----------+-----------+-----------+ + * 0040 | | | | | + * 0050 | | | | | + * 0060 | | | | | + * 0070 | | | | | + * +-----------+-----------+-----------+-----------+ + * 0080 TPR | | | | PRIO SUBC | + * 0090 APR | | | | | + * 00A0 PPR | | | | | + * +-----------+-----------+-----------+-----------+ + * + * The Task Priority Register provides a priority threshold + * mechanism for interrupting the processor. Only interrupts + * with a higher priority then that specified in the TPR will + * be served. Other interrupts are recorded and serviced + * as soon as the TPR value decreases enough to allow that + * (unless EOId by another APIC). + * + * PRIO (7:4). Main priority. If 15 the APIC will not + * accept any interrupts. + * SUBC (3:0) Sub priority. See APR/PPR. + * + * + * The Processor Priority Register determines whether a + * pending interrupt can be dispensed to the processor. ISRV + * Is the vector of the highest priority ISR bit set or + * zero if no ISR bit is set. + * + * IF TPR[7:4] >= ISRV[7:4] + * PPR[7:0] = TPR[7:0] + * ELSE + * PPR[7:0] = ISRV[7:4].000 + * + * The Arbitration Priority Register holds the current + * lowest priority of the procsesor, a value used during + * lowest-priority arbitration. + * + * IF (TPR[7:4] >= IRRV[7:4] AND TPR[7:4] > ISRV[7:4]) + * APR[7:0] = TPR[7:0] + * ELSE + * APR[7:4] = max((TPR[7:4]&ISRV[7:4]),IRRV[7:4]).000 + * + * +-----------+-----------+-----------+-----------+ + * 00B0 EOI | | | | | + * +-----------+-----------+-----------+-----------+ + * 00C0 | | | | | + * +-----------+-----------+-----------+-----------+ + * 00D0 LDR |LOG APICID | | | | + * +-----------+-----------+-----------+-----------+ + * 00E0 DFR |MODEL| | | | | + * +-----------+-----------+-----------+-----------+ + * + * The logical APIC ID is used with logical interrupt + * delivery modes. Interpretation of logical destination + * information depends on the MODEL bits in the Destination + * Format Regiuster. + * + * MODEL=1111 FLAT MODEL - The MDA is interpreted as + * a decoded address. By setting + * one bit in the LDR for each + * local apic 8 APICs can coexist. + * + * MODEL=0000 CLUSTER MODEL - + * + * 31 ... 24 23 ... 16 15 ... 8 7 ... 0 + * +-----------+-----------+-----------+-----------+ + * 00F0 SVR | | | | | + * +-----------+-----------+-----------+-----------+ + * 0100-0170 ISR| | | | | + * 0180-01F0 TMR| | | | | + * 0200-0270 IRR| | | | | + * +-----------+-----------+-----------+-----------+ + * + * These registers represent 256 bits, one bit for each + * possible interrupt. Interrupts 0-15 are reserved so + * bits 0-15 are also reserved. + * + * TMR - Trigger mode register. Upon acceptance of an int + * the corresponding bit is cleared for edge-trig and + * set for level-trig. If the TMR bit is set (level), + * the local APIC sends an EOI to all I/O APICs as + * a result of software issuing an EOI command. + * + * IRR - Interrupt Request Register. Contains active + * interrupt requests that have been accepted but not + * yet dispensed by the current local APIC. The bit is + * cleared and the corresponding ISR bit is set when + * the INTA cycle is issued. + * + * ISR - Interrupt In-Service register. Interrupt has been + * delivered but not yet fully serviced. Cleared when + * an EOI is issued from the processor. An EOI will + * also send an EOI to all I/O APICs if TMR was set. + * + * +-----------+-----------+-----------+-----------+ + * 0280 ESR | | | | | + * 0290-02F0 | | | | | + * +--FEDCBA98-+--76543210-+--FEDCBA98-+-----------+ + * 0300 ICR_LO | | XX | TL SDMMM | vector | + * 0310 ICR_HI | DEST FIELD| | | | + * +-----------+-----------+-----------+-----------+ + * + * The interrupt command register + * + * XX: Destination Shorthand field: + * + * 00 Use Destination field + * 01 Self only. Dest field ignored. + * 10 All including self (uses a + * destination field of 0x0F) + * 11 All excluding self (uses a + * destination field of 0x0F) + * + * T: 1 = Level 0 = Edge Trigger modde, used for + * the INIT level de-assert delivery mode only. + * Not sure. + * + * L: 0 = De-Assert, 1 = Assert. Not sure what this + * is. For INIT mode use 0, for all other modes + * use 1. + * + * S: 1 = Send Pending. Interrupt has been injected + * but APIC has not yet accepted it. + * + * D: 0=physical 1=logical. In physical mode + * only 24-27 of DEST FIELD is used from ICR_HI. + * + * MMM: 000 Fixed. Deliver to all processors according + * to the ICR. Always treated as edge trig. + * + * 001 Lowest Priority. Deliver to just the + * processor running at the lowest priority. + * + * 010 SMI. The vector must be 00B. Only edge + * triggered is allowed. The vector field + * must be programmed to zero (huh?). + * + * 011 + * + * 100 NMI. Deliver as an NMI to all processors + * listed in the destination field. The + * vector is ignored. Alawys treated as + * edge triggered. + * + * 101 INIT. Deliver as an INIT signal to all + * processors (like FIXED). Vector is ignored + * and it is always edge-triggered. + * + * 110 Start Up. Sends a special message between + * cpus. the vector contains a start-up + * address for MP boot protocol. + * Always edge triggered. Note: a startup + * int is not automatically tried in case of + * failure. + * + * 111 + * + * +-----------+--------10-+--FEDCBA98-+-----------+ + * 0320 LTIMER | | TM | ---S---- | vector | + * 0330 | | | | | + * +-----------+--------10-+--FEDCBA98-+-----------+ + * 0340 LVPCINT | | -M | ---S-MMM | vector | + * 0350 LVINT0 | | -M | LRPS-MMM | vector | + * 0360 LVINT1 | | -M | LRPS-MMM | vector | + * 0370 LVERROR | | -M | -------- | vector | + * +-----------+-----------+-----------+-----------+ + * + * T: 1 = periodic, 0 = one-shot + * + * M: 1 = masked + * + * L: 1 = level, 0 = edge + * + * R: For level triggered only, set to 1 when a + * level int is accepted, cleared by EOI. + * + * P: Pin Polarity 0 = Active High, 1 = Active Low + * + * S: 1 = Send Pending. Interrupt has been injected + * but APIC has not yet accepted it. + * + * MMM 000 = Fixed deliver to cpu according to LVT + * + * MMM 100 = NMI deliver as an NMI. Always edge + * + * MMM 111 = ExtInt deliver from 8259, routes INTA + * bus cycle to external + * controller. Controller is + * expected to supply vector. + * Always level. + * + * +-----------+-----------+-----------+-----------+ + * 0380 ICR | | | | | + * 0390 CCR | | | | | + * 03A0 | | | | | + * 03B0 | | | | | + * 03C0 | | | | | + * 03D0 | | | | | + * 03E0 DCR | | | | | + * +-----------+-----------+-----------+-----------+ + * + * + * NOTE ON EOI: Upon receiving an EOI the APIC clears the highest priority + * interrupt in the ISR and selects the next highest priority interrupt + * for posting to the CPU. If the interrupt being EOId was level + * triggered the APIC will send an EOI to all I/O APICs. For the moment + * you can write garbage to the EOI register but for future compatibility + * 0 should be written. + * */ #ifndef LOCORE @@ -128,8 +269,8 @@ struct LAPIC { /* reserved */ PAD4; /* reserved */ PAD4; - u_int32_t id; PAD3; - u_int32_t version; PAD3; + u_int32_t id; PAD3; /* 0020 R/W */ + u_int32_t version; PAD3; /* 0030 RO */ /* reserved */ PAD4; /* reserved */ PAD4; /* reserved */ PAD4; @@ -220,11 +361,6 @@ typedef struct IOAPIC ioapic_t; #define ALLHWI_LEVEL 0x00000000 /* TPR of CPU grabbing INTs */ #endif /** GRAB_LOPRIO */ -/* XXX these 2 don't really belong here... */ -#define COUNT_FIELD 0x00ffffff /* count portion of the lock */ -#define CPU_FIELD 0xff000000 /* cpu portion of the lock */ -#define FREE_LOCK 0xffffffff /* value of lock when free */ - /* * XXX This code assummes that the reserved field of the * local APIC TPR can be written with all 0s. diff --git a/sys/i386/include/cpufunc.h b/sys/i386/include/cpufunc.h index b28d441a35..7899b21c51 100644 --- a/sys/i386/include/cpufunc.h +++ b/sys/i386/include/cpufunc.h @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/include/cpufunc.h,v 1.96.2.3 2002/04/28 22:50:54 dwmalone Exp $ - * $DragonFly: src/sys/i386/include/Attic/cpufunc.h,v 1.4 2003/06/29 03:28:43 dillon Exp $ + * $DragonFly: src/sys/i386/include/Attic/cpufunc.h,v 1.5 2003/07/06 21:23:49 dillon Exp $ */ /* @@ -122,15 +122,6 @@ btrl(u_int *mask, int bit) return(result); } -static __inline void -disable_intr(void) -{ - __asm __volatile("cli" : : : "memory"); -#ifdef SMP - MPINTR_LOCK(); -#endif -} - static __inline void do_cpuid(u_int ax, u_int *p) { @@ -140,11 +131,14 @@ do_cpuid(u_int ax, u_int *p) } static __inline void -enable_intr(void) +cpu_disable_intr(void) +{ + __asm __volatile("cli" : : : "memory"); +} + +static __inline void +cpu_enable_intr(void) { -#ifdef SMP - MPINTR_UNLOCK(); -#endif __asm __volatile("sti"); } @@ -286,7 +280,9 @@ invd(void) * will cause the invl*() functions to be equivalent to the cpu_invl*() * functions. */ -#ifndef SMP +#ifdef SMP +void smp_invltlb(void); +#else #define smp_invltlb() #endif @@ -630,9 +626,9 @@ load_dr7(u_int sel) int breakpoint __P((void)); u_int bsfl __P((u_int mask)); u_int bsrl __P((u_int mask)); -void disable_intr __P((void)); +void cpu_disable_intr __P((void)); void do_cpuid __P((u_int ax, u_int *p)); -void enable_intr __P((void)); +void cpu_enable_intr __P((void)); u_char inb __P((u_int port)); u_int inl __P((u_int port)); void insb __P((u_int port, void *addr, size_t cnt)); diff --git a/sys/i386/include/lock.h b/sys/i386/include/lock.h index ba28263b6b..a8e00705bf 100644 --- a/sys/i386/include/lock.h +++ b/sys/i386/include/lock.h @@ -1,6 +1,5 @@ /* - * Copyright (c) 1997, by Steve Passe - * All rights reserved. + * Copyright (c) 2003, Matthew Dillon, All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -23,164 +22,183 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/include/lock.h,v 1.11.2.2 2000/09/30 02:49:34 ps Exp $ - * $DragonFly: src/sys/i386/include/Attic/lock.h,v 1.2 2003/06/17 04:28:35 dillon Exp $ + * $DragonFly: src/sys/i386/include/Attic/lock.h,v 1.3 2003/07/06 21:23:49 dillon Exp $ */ - #ifndef _MACHINE_LOCK_H_ #define _MACHINE_LOCK_H_ +#ifndef _MACHINE_PSL_H_ +#include +#endif -#ifdef LOCORE - +/* + * MP_FREE_LOCK is used by both assembly and C under SMP. + */ #ifdef SMP +#define MP_FREE_LOCK 0xffffffff /* value of lock when free */ +#endif -#define MPLOCKED lock ; +#ifdef LOCORE /* - * Some handy macros to allow logical organization. + * Spinlock assembly support. Note: eax and ecx can be tromped. No + * other register will be. Note that these routines are sometimes + * called with (%edx) as the mem argument. + * + * Under UP the spinlock routines still serve to disable/restore + * interrupts. */ -#define MP_LOCK call _get_mplock -#define MP_TRYLOCK \ - pushl $_mp_lock ; /* GIANT_LOCK */ \ - call _MPtrylock ; /* try to get lock */ \ - add $4, %esp +#ifdef SMP -#define MP_RELLOCK \ - movl $_mp_lock,%edx ; /* GIANT_LOCK */ \ - call _MPrellock_edx +#define SPIN_INIT(mem) \ + movl $0,mem ; \ + +#define SPIN_INIT_NOREG(mem) \ + SPIN_INIT(mem) ; \ + +#define SPIN_LOCK(mem) \ + pushfl ; \ + popl %ecx ; /* flags */ \ + cli ; \ + orl $PSL_C,%ecx ; /* make sure non-zero */ \ +7: ; \ + movl $0,%eax ; /* expected contents of lock */ \ + cmpxchgl %ecx,mem ; /* Z=1 (jz) on success */ \ + jz 8f ; \ + jmp 7b ; \ +8: ; \ + +#define SPIN_LOCK_PUSH_REGS \ + subl $8,%esp ; \ + movl %ecx,(%esp) ; \ + movl %eax,4(%esp) ; \ + +#define SPIN_LOCK_POP_REGS \ + movl (%esp),%ecx ; \ + movl 4(%esp),%eax ; \ + addl $8,%esp ; \ + +#define SPIN_LOCK_FRAME_SIZE 8 + +#define SPIN_LOCK_NOREG(mem) \ + SPIN_LOCK_PUSH_REGS ; \ + SPIN_LOCK(mem) ; \ + SPIN_LOCK_POP_REGS ; \ + +#define SPIN_UNLOCK(mem) \ + pushl mem ; \ + movl $0,mem ; \ + popfl ; \ + +#define SPIN_UNLOCK_PUSH_REGS +#define SPIN_UNLOCK_POP_REGS +#define SPIN_UNLOCK_FRAME_SIZE 0 + +#define SPIN_UNLOCK_NOREG(mem) \ + SPIN_UNLOCK(mem) ; \ -/* - * Protects the IO APIC and apic_imen as a critical region. - */ -#define IMASK_LOCK \ - pushl $_imen_lock ; /* address of lock */ \ - call _s_lock ; /* MP-safe */ \ - addl $4, %esp +#else -#define IMASK_UNLOCK \ - movl $0, _imen_lock +#define SPIN_LOCK(mem) \ + pushfl ; \ + cli ; \ + orl $PSL_C,(%esp) ; \ + popl mem ; \ -#else /* SMP */ +#define SPIN_LOCK_PUSH_RESG +#define SPIN_LOCK_POP_REGS +#define SPIN_LOCK_FRAME_SIZE 0 -#define MPLOCKED /* NOP */ +#define SPIN_UNLOCK(mem) \ + pushl mem ; \ + movl $0,mem ; \ + popfl ; \ -#define MP_LOCK /* NOP */ +#define SPIN_UNLOCK_PUSH_REGS +#define SPIN_UNLOCK_POP_REGS +#define SPIN_UNLOCK_FRAME_SIZE 0 -#endif /* SMP */ +#endif /* SMP */ -#else /* LOCORE */ +#else /* LOCORE */ -#ifdef SMP +/* + * Spinlock functions (UP and SMP). Under UP a spinlock still serves + * to disable/restore interrupts even if it doesn't spin. + */ +struct spinlock { + volatile int opaque; +}; -#include /** xxx_LOCK */ +typedef struct spinlock *spinlock_t; + +void mpintr_lock(void); /* disables int / spinlock combo */ +void mpintr_unlock(void); +void com_lock(void); /* disables int / spinlock combo */ +void com_unlock(void); +void imen_lock(void); /* disables int / spinlock combo */ +void imen_unlock(void); +void clock_lock(void); /* disables int / spinlock combo */ +void clock_unlock(void); +void cons_lock(void); /* disables int / spinlock combo */ +void cons_unlock(void); + +extern struct spinlock smp_rv_spinlock; + +void spin_lock(spinlock_t lock); +void spin_lock_np(spinlock_t lock); +void spin_unlock(spinlock_t lock); +void spin_unlock_np(spinlock_t lock); +#if 0 +void spin_lock_init(spinlock_t lock); +#endif /* - * Locks regions protected in UP kernel via cli/sti. + * Inline version of spinlock routines -- overrides assembly. Only unlock + * and init here please. */ -#ifdef USE_MPINTRLOCK -#define MPINTR_LOCK() s_lock(&mpintr_lock) -#define MPINTR_UNLOCK() s_unlock(&mpintr_lock) -#else -#define MPINTR_LOCK() -#define MPINTR_UNLOCK() -#endif /* USE_MPINTRLOCK */ +static __inline void +spin_lock_init(spinlock_t lock) +{ + lock->opaque = 0; +} /* - * sio/cy lock. - * XXX should rc (RISCom/8) use this? - */ -#ifdef USE_COMLOCK -#define COM_LOCK() s_lock(&com_lock) -#define COM_UNLOCK() s_unlock(&com_lock) -#define COM_DISABLE_INTR() \ - { __asm __volatile("cli" : : : "memory"); COM_LOCK(); } -#define COM_ENABLE_INTR() \ - { COM_UNLOCK(); __asm __volatile("sti"); } -#else -#define COM_LOCK() -#define COM_UNLOCK() -#define COM_DISABLE_INTR() disable_intr() -#define COM_ENABLE_INTR() enable_intr() -#endif /* USE_COMLOCK */ - -/* - * Clock hardware/struct lock. - * XXX pcaudio and friends still need this lock installed. + * MP LOCK functions for SMP and UP. Under UP the MP lock does not exist + * but we leave a few functions intact as macros for convenience. */ -#ifdef USE_CLOCKLOCK -#define CLOCK_LOCK() s_lock(&clock_lock) -#define CLOCK_UNLOCK() s_unlock(&clock_lock) -#define CLOCK_DISABLE_INTR() \ - { __asm __volatile("cli" : : : "memory"); CLOCK_LOCK(); } -#define CLOCK_ENABLE_INTR() \ - { CLOCK_UNLOCK(); __asm __volatile("sti"); } -#else -#define CLOCK_LOCK() -#define CLOCK_UNLOCK() -#define CLOCK_DISABLE_INTR() disable_intr() -#define CLOCK_ENABLE_INTR() enable_intr() -#endif /* USE_CLOCKLOCK */ - -#else /* SMP */ - -#define MPINTR_LOCK() -#define MPINTR_UNLOCK() - -#define COM_LOCK() -#define COM_UNLOCK() -#define CLOCK_LOCK() -#define CLOCK_UNLOCK() +#ifdef SMP -#endif /* SMP */ +void get_mplock(void); +int try_mplock(void); +void rel_mplock(void); +int cpu_try_mplock(void); +#if 0 +void cpu_rel_mplock(void); +#endif +void cpu_get_initial_mplock(void); -/* - * Simple spin lock. - * It is an error to hold one of these locks while a process is sleeping. - */ -struct simplelock { - volatile int lock_data; -}; +extern u_int mp_lock; -/* functions in simplelock.s */ -void s_lock_init __P((struct simplelock *)); -void s_lock __P((struct simplelock *)); -int s_lock_try __P((struct simplelock *)); -void ss_lock __P((struct simplelock *)); -void ss_unlock __P((struct simplelock *)); -void s_lock_np __P((struct simplelock *)); -void s_unlock_np __P((struct simplelock *)); +#define MP_LOCK_HELD() (mp_lock == mycpu->gd_cpuid) +#define ASSERT_MP_LOCK_HELD() KKASSERT(MP_LOCK_HELD()) -/* inline simplelock functions */ static __inline void -s_unlock(struct simplelock *lkp) +cpu_rel_mplock(void) { - lkp->lock_data = 0; + mp_lock = MP_FREE_LOCK; } -/* global data in mp_machdep.c */ -extern struct simplelock imen_lock; -extern struct simplelock cpl_lock; -extern struct simplelock fast_intr_lock; -extern struct simplelock intr_lock; -extern struct simplelock clock_lock; -extern struct simplelock com_lock; -extern struct simplelock mpintr_lock; -extern struct simplelock mcount_lock; - -#if !defined(SIMPLELOCK_DEBUG) && MAXCPU > 1 -/* - * This set of defines turns on the real functions in i386/isa/apic_ipl.s. - */ -#define simple_lock_init(alp) s_lock_init(alp) -#define simple_lock(alp) s_lock(alp) -#define simple_lock_try(alp) s_lock_try(alp) -#define simple_unlock(alp) s_unlock(alp) - -#endif /* !SIMPLELOCK_DEBUG && MAXCPU > 1 */ +#else -#endif /* LOCORE */ +#define get_mplock() +#define try_mplock() 1 +#define rel_mplock() +#define ASSERT_MP_LOCK_HELD() -#endif /* !_MACHINE_LOCK_H_ */ +#endif /* SMP */ +#endif /* LOCORE */ +#endif /* !_MACHINE_LOCK_H_ */ diff --git a/sys/i386/include/smp.h b/sys/i386/include/smp.h index 78056f4dfd..ad3d8930b1 100644 --- a/sys/i386/include/smp.h +++ b/sys/i386/include/smp.h @@ -7,7 +7,7 @@ * ---------------------------------------------------------------------------- * * $FreeBSD: src/sys/i386/include/smp.h,v 1.50.2.5 2001/02/13 22:32:45 tegge Exp $ - * $DragonFly: src/sys/i386/include/Attic/smp.h,v 1.2 2003/06/17 04:28:36 dillon Exp $ + * $DragonFly: src/sys/i386/include/Attic/smp.h,v 1.3 2003/07/06 21:23:49 dillon Exp $ * */ @@ -47,23 +47,6 @@ extern int bootMP_size; /* functions in mpboot.s */ void bootMP __P((void)); -/* global data in mplock.s */ -extern u_int mp_lock; -extern u_int isr_lock; -#ifdef RECURSIVE_MPINTRLOCK -extern u_int mpintr_lock; -#endif /* RECURSIVE_MPINTRLOCK */ - -/* functions in mplock.s */ -void get_mplock __P((void)); -void rel_mplock __P((void)); -int try_mplock __P((void)); -#ifdef RECURSIVE_MPINTRLOCK -void get_mpintrlock __P((void)); -void rel_mpintrlock __P((void)); -int try_mpintrlock __P((void)); -#endif /* RECURSIVE_MPINTRLOCK */ - /* global data in apic_vector.s */ extern volatile u_int stopped_cpus; extern volatile u_int started_cpus; @@ -80,7 +63,6 @@ void io_apic_write __P((int, int, u_int)); /* global data in mp_machdep.c */ extern int bsp_apic_ready; -extern int mp_ncpus; extern int mp_naps; extern int mp_nbusses; extern int mp_napics; @@ -126,8 +108,8 @@ void assign_apic_irq __P((int apic, int intpin, int irq)); void revoke_apic_irq __P((int irq)); void bsp_apic_configure __P((void)); void init_secondary __P((void)); -void smp_invltlb __P((void)); int stop_cpus __P((u_int)); +void ap_init __P((void)); int restart_cpus __P((u_int)); #ifdef BETTER_CLOCK void forward_statclock __P((int pscnt)); @@ -178,20 +160,6 @@ extern volatile int smp_idle_loops; #endif /* !LOCORE */ #else /* !SMP && !APIC_IO */ -/* - * Create dummy MP lock empties - */ - -static __inline void -get_mplock(void) -{ -} - -static __inline void -rel_mplock(void) -{ -} - #endif #endif /* _KERNEL */ diff --git a/sys/i386/include/smptests.h b/sys/i386/include/smptests.h index c97ed4f1ea..19fe59116e 100644 --- a/sys/i386/include/smptests.h +++ b/sys/i386/include/smptests.h @@ -23,7 +23,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/include/smptests.h,v 1.33.2.1 2000/05/16 06:58:10 dillon Exp $ - * $DragonFly: src/sys/i386/include/Attic/smptests.h,v 1.2 2003/06/17 04:28:36 dillon Exp $ + * $DragonFly: src/sys/i386/include/Attic/smptests.h,v 1.3 2003/07/06 21:23:49 dillon Exp $ */ #ifndef _MACHINE_SMPTESTS_H_ @@ -64,16 +64,6 @@ #define PUSHDOWN_LEVEL_3_NOT #define PUSHDOWN_LEVEL_4_NOT -/* - * Debug version of simple_lock. This will store the CPU id of the - * holding CPU along with the lock. When a CPU fails to get the lock - * it compares its own id to the holder id. If they are the same it - * panic()s, as simple locks are binary, and this would cause a deadlock. - * - */ -#define SL_DEBUG - - /* * Put FAST_INTR() ISRs at an APIC priority above the regular INTs. * Allow the mp_lock() routines to handle FAST interrupts while spinning. @@ -191,21 +181,6 @@ #define GIANT_LOCK #ifdef APIC_IO -/* - * Enable extra counters for some selected locations in the interrupt handlers. - * Look in apic_vector.s, apic_ipl.s and ipl.s for APIC_ITRACE or - * APIC_INTR_DIAGNOSTIC. - */ -#undef APIC_INTR_DIAGNOSTIC - -/* - * Add extra tracking of a specific interrupt. Look in apic_vector.s, - * apic_ipl.s and ipl.s for APIC_ITRACE and log_intr_event. - * APIC_INTR_DIAGNOSTIC must be defined for this to work. - */ -#ifdef APIC_INTR_DIAGNOSTIC -#define APIC_INTR_DIAGNOSTIC_IRQ 17 -#endif /* * Don't assume that slow interrupt handler X is called from vector diff --git a/sys/i386/isa/apic_ipl.s b/sys/i386/isa/apic_ipl.s index 1bd6c2d07f..05c7d27668 100644 --- a/sys/i386/isa/apic_ipl.s +++ b/sys/i386/isa/apic_ipl.s @@ -1,6 +1,6 @@ /*- - * Copyright (c) 1997, by Steve Passe - * All rights reserved. + * Copyright (c) 1997, by Steve Passe, All rights reserved. + * Copyright (c) 2003, by Matthew Dillon, All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -23,412 +23,72 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/isa/apic_ipl.s,v 1.27.2.2 2000/09/30 02:49:35 ps Exp $ - * $DragonFly: src/sys/i386/isa/Attic/apic_ipl.s,v 1.6 2003/07/01 20:31:38 dillon Exp $ + * $DragonFly: src/sys/i386/isa/Attic/apic_ipl.s,v 1.7 2003/07/06 21:23:49 dillon Exp $ */ -#if 0 - .data ALIGN_DATA -/* - * Routines used by splz_unpend to build an interrupt frame from a - * trap frame. The _vec[] routines build the proper frame on the stack, - * then call one of _Xintr0 thru _XintrNN. - * - * used by: - * i386/isa/apic_ipl.s (this file): splz_unpend JUMPs to HWIs. - * i386/isa/clock.c: setup _vec[clock] to point at _vec8254. - */ - .globl _vec -_vec: - .long vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7 - .long vec8, vec9, vec10, vec11, vec12, vec13, vec14, vec15 - .long vec16, vec17, vec18, vec19, vec20, vec21, vec22, vec23 + /* + * Interrupt mask for APIC interrupts, defaults to all hardware + * interrupts turned off. + */ -/* - * Note: - * This is the UP equivilant of _imen. - * It is OPAQUE, and must NOT be accessed directly. - * It MUST be accessed along with the IO APIC as a 'critical region'. - * Accessed by: - * INTREN() - * INTRDIS() - * MAYBE_MASK_IRQ - * MAYBE_UNMASK_IRQ - * imen_dump() - */ .p2align 2 /* MUST be 32bit aligned */ - .globl _apic_imen -_apic_imen: - .long HWI_MASK + .globl apic_imen +apic_imen: + .long HWI_MASK -/* - * - */ .text SUPERALIGN_TEXT -/* - * splz() - dispatch pending interrupts after cpl reduced - * - * Interrupt priority mechanism - * -- soft splXX masks with group mechanism (cpl) - * -- h/w masks for currently active or unused interrupts (imen) - * -- ipending = active interrupts currently masked by cpl - */ - -ENTRY(splz) - /* - * The caller has restored cpl and checked that (ipending & ~cpl) - * is nonzero. However, since ipending can change at any time - * (by an interrupt or, with SMP, by another cpu), we have to - * repeat the check. At the moment we must own the MP lock in - * the SMP case because the interruput handlers require it. We - * loop until no unmasked pending interrupts remain. - * - * No new unmaksed pending interrupts will be added during the - * loop because, being unmasked, the interrupt code will be able - * to execute the interrupts. - * - * Interrupts come in two flavors: Hardware interrupts and software - * interrupts. We have to detect the type of interrupt (based on the - * position of the interrupt bit) and call the appropriate dispatch - * routine. - * - * NOTE: "bsfl %ecx,%ecx" is undefined when %ecx is 0 so we can't - * rely on the secondary btrl tests. - */ - pushl %ebx - movl _curthread,%ebx - movl TD_CPL(%ebx),%eax -splz_next: - /* - * We don't need any locking here. (ipending & ~cpl) cannot grow - * while we're looking at it - any interrupt will shrink it to 0. - */ - movl $0,_reqpri - movl %eax,%ecx - notl %ecx /* set bit = unmasked level */ - andl _ipending,%ecx /* set bit = unmasked pending INT */ - jne splz_unpend - popl %ebx - ret - - ALIGN_TEXT -splz_unpend: - bsfl %ecx,%ecx - lock - btrl %ecx,_ipending - jnc splz_next - cmpl $NHWI,%ecx - jae splz_swi /* - * We would prefer to call the intr handler directly here but that - * doesn't work for badly behaved handlers that want the interrupt - * frame. Also, there's a problem determining the unit number. - * We should change the interface so that the unit number is not - * determined at config time. - * - * The vec[] routines build the proper frame on the stack so - * the interrupt will eventually return to the caller or splz, - * then calls one of _Xintr0 thru _XintrNN. + * Functions to enable and disable a hardware interrupt. Generally + * called with only one bit set in the mask but can handle multiple + * bits to present the same API as the ICU. */ - popl %ebx - jmp *_vec(,%ecx,4) - - ALIGN_TEXT -splz_swi: - pushl %eax /* save cpl across call */ - orl imasks(,%ecx,4),%eax - movl %eax,TD_CPL(%ebx) /* set cpl for SWI */ - call *_ihandlers(,%ecx,4) - popl %eax - movl %eax,TD_CPL(%ebx) /* restore cpl and loop */ - jmp splz_next - -/* - * Fake clock interrupt(s) so that they appear to come from our caller instead - * of from here, so that system profiling works. - * XXX do this more generally (for all vectors; look up the C entry point). - * XXX frame bogusness stops us from just jumping to the C entry point. - * We have to clear iactive since this is an unpend call, and it will be - * set from the time of the original INT. - */ - -/* - * The 'generic' vector stubs. - */ - -#define BUILD_VEC(irq_num) \ - ALIGN_TEXT ; \ -__CONCAT(vec,irq_num): ; \ - popl %eax ; \ - pushfl ; \ - pushl $KCSEL ; \ - pushl %eax ; \ - cli ; \ - lock ; /* MP-safe */ \ - andl $~IRQ_BIT(irq_num), iactive ; /* lazy masking */ \ - MEXITCOUNT ; \ - APIC_ITRACE(apic_itrace_splz, irq_num, APIC_ITRACE_SPLZ) ; \ - jmp __CONCAT(_Xintr,irq_num) - - BUILD_VEC(0) - BUILD_VEC(1) - BUILD_VEC(2) - BUILD_VEC(3) - BUILD_VEC(4) - BUILD_VEC(5) - BUILD_VEC(6) - BUILD_VEC(7) - BUILD_VEC(8) - BUILD_VEC(9) - BUILD_VEC(10) - BUILD_VEC(11) - BUILD_VEC(12) - BUILD_VEC(13) - BUILD_VEC(14) - BUILD_VEC(15) - BUILD_VEC(16) /* 8 additional INTs in IO APIC */ - BUILD_VEC(17) - BUILD_VEC(18) - BUILD_VEC(19) - BUILD_VEC(20) - BUILD_VEC(21) - BUILD_VEC(22) - BUILD_VEC(23) - - -/****************************************************************************** - * XXX FIXME: figure out where these belong. - */ - -/* this nonsense is to verify that masks ALWAYS have 1 and only 1 bit set */ -#define QUALIFY_MASKS_NOT - -#ifdef QUALIFY_MASKS -#define QUALIFY_MASK \ - btrl %ecx, %eax ; \ - andl %eax, %eax ; \ - jz 1f ; \ - pushl $bad_mask ; \ - call _panic ; \ -1: - -bad_mask: .asciz "bad mask" -#else -#define QUALIFY_MASK -#endif - -/* - * (soon to be) MP-safe function to clear ONE INT mask bit. - * The passed arg is a 32bit u_int MASK. - * It sets the associated bit in _apic_imen. - * It sets the mask bit of the associated IO APIC register. - */ -ENTRY(INTREN) - pushfl /* save state of EI flag */ - cli /* prevent recursion */ +ENTRY(INTRDIS) IMASK_LOCK /* enter critical reg */ - - movl 8(%esp), %eax /* mask into %eax */ - bsfl %eax, %ecx /* get pin index */ - btrl %ecx, apic_imen /* update apic_imen */ - - QUALIFY_MASK - + movl 4(%esp),%eax +1: + bsfl %eax,%ecx + jz 2f + btrl %ecx,%eax + btsl %ecx, apic_imen shll $4, %ecx movl CNAME(int_to_apicintpin) + 8(%ecx), %edx movl CNAME(int_to_apicintpin) + 12(%ecx), %ecx testl %edx, %edx - jz 1f - - movl %ecx, (%edx) /* write the target register index */ - movl 16(%edx), %eax /* read the target register data */ - andl $~IOART_INTMASK, %eax /* clear mask bit */ - movl %eax, 16(%edx) /* write the APIC register data */ -1: + jz 2f + movl %ecx, (%edx) /* target register index */ + orl $IOART_INTMASK,16(%edx) /* set intmask in target apic reg */ + jmp 1b +2: IMASK_UNLOCK /* exit critical reg */ - popfl /* restore old state of EI flag */ ret -/* - * (soon to be) MP-safe function to set ONE INT mask bit. - * The passed arg is a 32bit u_int MASK. - * It clears the associated bit in apic_imen. - * It clears the mask bit of the associated IO APIC register. - */ -ENTRY(INTRDIS) - pushfl /* save state of EI flag */ - cli /* prevent recursion */ +ENTRY(INTREN) IMASK_LOCK /* enter critical reg */ - - movl 8(%esp), %eax /* mask into %eax */ + movl 4(%esp), %eax /* mask into %eax */ +1: bsfl %eax, %ecx /* get pin index */ - btsl %ecx, apic_imen /* update _apic_imen */ - - QUALIFY_MASK - + jz 2f + btrl %ecx,%eax + btrl %ecx, apic_imen /* update apic_imen */ shll $4, %ecx movl CNAME(int_to_apicintpin) + 8(%ecx), %edx movl CNAME(int_to_apicintpin) + 12(%ecx), %ecx testl %edx, %edx - jz 1f - + jz 2f movl %ecx, (%edx) /* write the target register index */ - movl 16(%edx), %eax /* read the target register data */ - orl $IOART_INTMASK, %eax /* set mask bit */ - movl %eax, 16(%edx) /* write the APIC register data */ -1: + andl $~IOART_INTMASK, 16(%edx) /* clear mask bit */ + jmp 1b +2: IMASK_UNLOCK /* exit critical reg */ - popfl /* restore old state of EI flag */ - ret - - -/****************************************************************************** - * - */ - - -/* - * void write_ioapic_mask(int apic, u_int mask); - */ - -#define _INT_MASK 0x00010000 -#define _PIN_MASK 0x00ffffff - -#define _OLD_ESI 0(%esp) -#define _OLD_EBX 4(%esp) -#define _RETADDR 8(%esp) -#define _APIC 12(%esp) -#define _MASK 16(%esp) - - ALIGN_TEXT -write_ioapic_mask: - pushl %ebx /* scratch */ - pushl %esi /* scratch */ - - movl apic_imen, %ebx - xorl _MASK, %ebx /* %ebx = _apic_imen ^ mask */ - andl $_PIN_MASK, %ebx /* %ebx = _apic_imen & 0x00ffffff */ - jz all_done /* no change, return */ - - movl _APIC, %esi /* APIC # */ - movl ioapic, %ecx - movl (%ecx,%esi,4), %esi /* %esi holds APIC base address */ - -next_loop: /* %ebx = diffs, %esi = APIC base */ - bsfl %ebx, %ecx /* %ecx = index if 1st/next set bit */ - jz all_done - - btrl %ecx, %ebx /* clear this bit in diffs */ - leal 16(,%ecx,2), %edx /* calculate register index */ - - movl %edx, (%esi) /* write the target register index */ - movl 16(%esi), %eax /* read the target register data */ - - btl %ecx, _MASK /* test for mask or unmask */ - jnc clear /* bit is clear */ - orl $_INT_MASK, %eax /* set mask bit */ - jmp write -clear: andl $~_INT_MASK, %eax /* clear mask bit */ - -write: movl %eax, 16(%esi) /* write the APIC register data */ - - jmp next_loop /* try another pass */ - -all_done: - popl %esi - popl %ebx - ret - -#undef _OLD_ESI -#undef _OLD_EBX -#undef _RETADDR -#undef _APIC -#undef _MASK - -#undef _PIN_MASK -#undef _INT_MASK - -#ifdef oldcode - -_INTREN: - movl apic_imen, %eax - notl %eax /* mask = ~mask */ - andl apic_imen, %eax /* %eax = _apic_imen & ~mask */ - - pushl %eax /* new (future) _apic_imen value */ - pushl $0 /* APIC# arg */ - call write_ioapic_mask /* modify the APIC registers */ - - addl $4, %esp /* remove APIC# arg from stack */ - popl apic_imen /* _apic_imen |= mask */ - ret - -_INTRDIS: - movl _apic_imen, %eax - orl 4(%esp), %eax /* %eax = _apic_imen | mask */ - - pushl %eax /* new (future) _apic_imen value */ - pushl $0 /* APIC# arg */ - call write_ioapic_mask /* modify the APIC registers */ - - addl $4, %esp /* remove APIC# arg from stack */ - popl apic_imen /* _apic_imen |= mask */ - ret - -#endif /* oldcode */ - - -#ifdef ready - -/* - * u_int read_io_apic_mask(int apic); - */ - ALIGN_TEXT -read_io_apic_mask: ret -/* - * Set INT mask bit for each bit set in 'mask'. - * Ignore INT mask bit for all others. - * - * void set_io_apic_mask(apic, u_int32_t bits); - */ - ALIGN_TEXT -set_io_apic_mask: - ret - -/* - * void set_ioapic_maskbit(int apic, int bit); - */ - ALIGN_TEXT -set_ioapic_maskbit: - ret - -/* - * Clear INT mask bit for each bit set in 'mask'. - * Ignore INT mask bit for all others. - * - * void clr_io_apic_mask(int apic, u_int32_t bits); - */ - ALIGN_TEXT -clr_io_apic_mask: - ret - -/* - * void clr_ioapic_maskbit(int apic, int bit); - */ - ALIGN_TEXT -clr_ioapic_maskbit: - ret - -#endif /** ready */ - /****************************************************************************** * */ @@ -465,4 +125,3 @@ ENTRY(apic_eoi) movl $0, lapic+0xb0 ret -#endif diff --git a/sys/i386/isa/apic_vector.s b/sys/i386/isa/apic_vector.s index 6cd3736974..55a4af9f26 100644 --- a/sys/i386/isa/apic_vector.s +++ b/sys/i386/isa/apic_vector.s @@ -1,62 +1,23 @@ /* * from: vector.s, 386BSD 0.1 unknown origin * $FreeBSD: src/sys/i386/isa/apic_vector.s,v 1.47.2.5 2001/09/01 22:33:38 tegge Exp $ - * $DragonFly: src/sys/i386/isa/Attic/apic_vector.s,v 1.7 2003/07/01 20:31:38 dillon Exp $ + * $DragonFly: src/sys/i386/isa/Attic/apic_vector.s,v 1.8 2003/07/06 21:23:49 dillon Exp $ */ #include #include - #include "i386/isa/intr_machdep.h" /* convert an absolute IRQ# into a bitmask */ -#define IRQ_BIT(irq_num) (1 << (irq_num)) +#define IRQ_LBIT(irq_num) (1 << (irq_num)) /* make an index into the IO APIC from the IRQ# */ #define REDTBL_IDX(irq_num) (0x10 + ((irq_num) * 2)) - -/* - * Macros for interrupt interrupt entry, call to handler, and exit. - */ - -#define FAST_INTR(irq_num, vec_name) \ - .text ; \ - SUPERALIGN_TEXT ; \ -IDTVEC(vec_name) ; \ - pushl %eax ; /* save only call-used registers */ \ - pushl %ecx ; \ - pushl %edx ; \ - pushl %ds ; \ - pushl %es ; \ - pushl %fs ; \ - movl $KDSEL,%eax ; \ - mov %ax,%ds ; \ - movl %ax,%es ; \ - movl $KPSEL,%eax ; \ - mov %ax,%fs ; \ - FAKE_MCOUNT(6*4(%esp)) ; \ - pushl intr_unit + (irq_num) * 4 ; \ - call *intr_handler + (irq_num) * 4 ; /* do the work ASAP */ \ - addl $4, %esp ; \ - movl $0, lapic_eoi ; \ - lock ; \ - incl cnt+V_INTR ; /* book-keeping can wait */ \ - movl intr_countp + (irq_num) * 4, %eax ; \ - lock ; \ - incl (%eax) ; \ - MEXITCOUNT ; \ - popl %fs ; \ - popl %es ; \ - popl %ds ; \ - popl %edx ; \ - popl %ecx ; \ - popl %eax ; \ - iret - /* - * + * Push an interrupt frame in a format acceptable to doreti, reload + * the segment registers for the kernel. */ #define PUSH_FRAME \ pushl $0 ; /* dummy error code */ \ @@ -64,23 +25,54 @@ IDTVEC(vec_name) ; \ pushal ; \ pushl %ds ; /* save data and extra segments ... */ \ pushl %es ; \ - pushl %fs + pushl %fs ; \ + mov $KDSEL,%ax ; \ + mov %ax,%ds ; \ + mov %ax,%es ; \ + mov $KPSEL,%ax ; \ + mov %ax,%fs ; \ +#define PUSH_DUMMY \ + pushfl ; /* phys int frame / flags */ \ + pushl %cs ; /* phys int frame / cs */ \ + pushl 12(%esp) ; /* original caller eip */ \ + pushl $0 ; /* dummy error code */ \ + pushl $0 ; /* dummy trap type */ \ + subl $11*4,%esp ; /* pushal + 3 seg regs (dummy) */ \ + +/* + * Warning: POP_FRAME can only be used if there is no chance of a + * segment register being changed (e.g. by procfs), which is why syscalls + * have to use doreti. + */ #define POP_FRAME \ popl %fs ; \ popl %es ; \ popl %ds ; \ popal ; \ - addl $4+4,%esp + addl $2*4,%esp ; /* dummy trap & error codes */ \ + +#define POP_DUMMY \ + addl $16*4,%esp ; \ #define IOAPICADDR(irq_num) CNAME(int_to_apicintpin) + 16 * (irq_num) + 8 #define REDIRIDX(irq_num) CNAME(int_to_apicintpin) + 16 * (irq_num) + 12 + +/* + * Interrupts are expected to already be disabled when using these + * IMASK_*() macros. + */ +#define IMASK_LOCK \ + SPIN_LOCK(imen_spinlock) ; \ + +#define IMASK_UNLOCK \ + SPIN_UNLOCK(imen_spinlock) ; \ #define MASK_IRQ(irq_num) \ IMASK_LOCK ; /* into critical reg */ \ - testl $IRQ_BIT(irq_num), apic_imen ; \ + testl $IRQ_LBIT(irq_num), apic_imen ; \ jne 7f ; /* masked, don't mask */ \ - orl $IRQ_BIT(irq_num), apic_imen ; /* set the mask bit */ \ + orl $IRQ_LBIT(irq_num), apic_imen ; /* set the mask bit */ \ movl IOAPICADDR(irq_num), %ecx ; /* ioapic addr */ \ movl REDIRIDX(irq_num), %eax ; /* get the index */ \ movl %eax, (%ecx) ; /* write the index */ \ @@ -88,17 +80,18 @@ IDTVEC(vec_name) ; \ orl $IOART_INTMASK, %eax ; /* set the mask */ \ movl %eax, IOAPIC_WINDOW(%ecx) ; /* new value */ \ 7: ; /* already masked */ \ - IMASK_UNLOCK + IMASK_UNLOCK ; \ + /* * Test to see whether we are handling an edge or level triggered INT. * Level-triggered INTs must still be masked as we don't clear the source, * and the EOI cycle would cause redundant INTs to occur. */ #define MASK_LEVEL_IRQ(irq_num) \ - testl $IRQ_BIT(irq_num), apic_pin_trigger ; \ + testl $IRQ_LBIT(irq_num), apic_pin_trigger ; \ jz 9f ; /* edge, don't mask */ \ MASK_IRQ(irq_num) ; \ -9: +9: ; \ #ifdef APIC_INTR_REORDER @@ -108,27 +101,26 @@ IDTVEC(vec_name) ; \ testl apic_isrbit_location + 4 + 8 * (irq_num), %eax ; \ jz 9f ; /* not active */ \ movl $0, lapic_eoi ; \ - APIC_ITRACE(apic_itrace_eoi, irq_num, APIC_ITRACE_EOI) ; \ -9: +9: \ #else + #define EOI_IRQ(irq_num) \ - testl $IRQ_BIT(irq_num), lapic_isr1; \ + testl $IRQ_LBIT(irq_num), lapic_isr1; \ jz 9f ; /* not active */ \ movl $0, lapic_eoi; \ - APIC_ITRACE(apic_itrace_eoi, irq_num, APIC_ITRACE_EOI) ; \ -9: +9: \ + #endif - /* * Test to see if the source is currntly masked, clear if so. */ #define UNMASK_IRQ(irq_num) \ IMASK_LOCK ; /* into critical reg */ \ - testl $IRQ_BIT(irq_num), apic_imen ; \ + testl $IRQ_LBIT(irq_num), apic_imen ; \ je 7f ; /* bit clear, not masked */ \ - andl $~IRQ_BIT(irq_num), apic_imen ;/* clear mask bit */ \ + andl $~IRQ_LBIT(irq_num), apic_imen ;/* clear mask bit */ \ movl IOAPICADDR(irq_num),%ecx ; /* ioapic addr */ \ movl REDIRIDX(irq_num), %eax ; /* get the index */ \ movl %eax,(%ecx) ; /* write the index */ \ @@ -136,174 +128,189 @@ IDTVEC(vec_name) ; \ andl $~IOART_INTMASK,%eax ; /* clear the mask */ \ movl %eax,IOAPIC_WINDOW(%ecx) ; /* new value */ \ 7: ; \ - IMASK_UNLOCK - -#ifdef APIC_INTR_DIAGNOSTIC -#ifdef APIC_INTR_DIAGNOSTIC_IRQ -log_intr_event: - pushf - cli - pushl $CNAME(apic_itrace_debuglock) - call CNAME(s_lock_np) - addl $4, %esp - movl CNAME(apic_itrace_debugbuffer_idx), %ecx - andl $32767, %ecx - movl PCPU(cpuid), %eax - shll $8, %eax - orl 8(%esp), %eax - movw %ax, CNAME(apic_itrace_debugbuffer)(,%ecx,2) - incl %ecx - andl $32767, %ecx - movl %ecx, CNAME(apic_itrace_debugbuffer_idx) - pushl $CNAME(apic_itrace_debuglock) - call CNAME(s_unlock_np) - addl $4, %esp - popf - ret - + IMASK_UNLOCK ; \ -#define APIC_ITRACE(name, irq_num, id) \ - lock ; /* MP-safe */ \ - incl CNAME(name) + (irq_num) * 4 ; \ +/* + * Fast interrupt call handlers run in the following sequence: + * + * - Push the trap frame required by doreti + * - Mask the interrupt and reenable its source + * - If we cannot take the interrupt set its fpending bit and + * doreti. + * - If we can take the interrupt clear its fpending bit, + * call the handler, then unmask and doreti. + * + * YYY can cache gd base opitner instead of using hidden %fs prefixes. + */ + +#define FAST_INTR(irq_num, vec_name) \ + .text ; \ + SUPERALIGN_TEXT ; \ +IDTVEC(vec_name) ; \ + PUSH_FRAME ; \ + FAKE_MCOUNT(13*4(%esp)) ; \ + MASK_LEVEL_IRQ(irq_num) ; \ + EOI_IRQ(irq_num) ; \ + incl PCPU(intr_nesting_level) ; \ + movl PCPU(curthread),%ebx ; \ + movl TD_CPL(%ebx),%eax ; \ pushl %eax ; \ - pushl %ecx ; \ - pushl %edx ; \ - movl $(irq_num), %eax ; \ - cmpl $APIC_INTR_DIAGNOSTIC_IRQ, %eax ; \ - jne 7f ; \ - pushl $id ; \ - call log_intr_event ; \ + cmpl $TDPRI_CRIT,TD_PRI(%ebx) ; \ + jge 1f ; \ + testl $IRQ_LBIT(irq_num), %eax ; \ + jz 2f ; \ +1: ; \ + /* set the pending bit and return, leave interrupt masked */ \ + orl $IRQ_LBIT(irq_num),PCPU(fpending) ; \ + movl $TDPRI_CRIT, PCPU(reqpri) ; \ + jmp 5f ; \ +2: ; \ + /* clear pending bit, run handler */ \ + addl $TDPRI_CRIT,TD_PRI(%ebx) ; \ + andl $~IRQ_LBIT(irq_num),PCPU(fpending) ; \ + pushl intr_unit + (irq_num) * 4 ; \ + call *intr_handler + (irq_num) * 4 ; /* do the work ASAP */ \ addl $4, %esp ; \ -7: ; \ - popl %edx ; \ - popl %ecx ; \ - popl %eax -#else -#define APIC_ITRACE(name, irq_num, id) \ - lock ; /* MP-safe */ \ - incl CNAME(name) + (irq_num) * 4 -#endif + subl $TDPRI_CRIT,TD_PRI(%ebx) ; \ + incl PCPU(cnt)+V_INTR ; /* book-keeping make per cpu YYY */ \ + movl intr_countp + (irq_num) * 4, %eax ; \ + incl (%eax) ; \ + UNMASK_IRQ(irq_num) ; \ +5: ; \ + MEXITCOUNT ; \ + jmp doreti ; \ -#define APIC_ITRACE_ENTER 1 -#define APIC_ITRACE_EOI 2 -#define APIC_ITRACE_TRYISRLOCK 3 -#define APIC_ITRACE_GOTISRLOCK 4 -#define APIC_ITRACE_ENTER2 5 -#define APIC_ITRACE_LEAVE 6 -#define APIC_ITRACE_UNMASK 7 -#define APIC_ITRACE_ACTIVE 8 -#define APIC_ITRACE_MASKED 9 -#define APIC_ITRACE_NOISRLOCK 10 -#define APIC_ITRACE_MASKED2 11 -#define APIC_ITRACE_SPLZ 12 -#define APIC_ITRACE_DORETI 13 - -#else -#define APIC_ITRACE(name, irq_num, id) -#endif - -#define INTR(irq_num, vec_name, maybe_extra_ipending) \ +/* + * Restart fast interrupt held up by critical section or cpl. + * + * - Push a dummy trape frame as required by doreti + * - The interrupt source is already masked + * - Clear the fpending bit + * - Run the handler + * - Unmask the interrupt + * - Pop the dummy frame and do a normal return + * + * YYY can cache gd base pointer instead of using hidden %fs + * prefixes. + */ + +#define FAST_UNPEND(irq_num, vec_name) \ + .text ; \ + SUPERALIGN_TEXT ; \ +IDTVEC(vec_name) ; \ + pushl %ebp ; \ + movl %esp,%ebp ; \ + PUSH_DUMMY ; \ + pushl intr_unit + (irq_num) * 4 ; \ + call *intr_handler + (irq_num) * 4 ; /* do the work ASAP */ \ + addl $4, %esp ; \ + incl PCPU(cnt)+V_INTR ; /* book-keeping make per cpu YYY */ \ + movl intr_countp + (irq_num) * 4, %eax ; \ + incl (%eax) ; \ + UNMASK_IRQ(irq_num) ; \ + POP_DUMMY ; \ + popl %ebp ; \ + ret ; \ + +/* + * Slow interrupt call handlers run in the following sequence: + * + * - Push the trap frame required by doreti. + * - Mask the interrupt and reenable its source. + * - If we cannot take the interrupt set its ipending bit and + * doreti. In addition to checking for a critical section + * and cpl mask we also check to see if the thread is still + * running. + * - If we can take the interrupt clear its ipending bit, + * set its irunning bit, and schedule the thread. Leave + * interrupts masked and doreti. + * + * the interrupt thread will run its handlers and loop if + * ipending is found to be set. ipending/irunning interlock + * the interrupt thread with the interrupt. The handler calls + * UNPEND when it is through. + * + * Note that we do not enable interrupts when calling sched_ithd. + * YYY sched_ithd may preempt us synchronously (fix interrupt stacking) + * + * YYY can cache gd base pointer instead of using hidden %fs + * prefixes. + */ + +#define INTR(irq_num, vec_name, maybe_extra_ipending) \ .text ; \ SUPERALIGN_TEXT ; \ -/* XintrNN: entry point used by IDT/HWIs & splz_unpend via _vec[]. */ \ IDTVEC(vec_name) ; \ PUSH_FRAME ; \ - movl $KDSEL, %eax ; /* reload with kernel's data segment */ \ - mov %ax, %ds ; \ - mov %ax, %es ; \ - movl $KPSEL, %eax ; \ - mov %ax, %fs ; \ -; \ maybe_extra_ipending ; \ -; \ - APIC_ITRACE(apic_itrace_enter, irq_num, APIC_ITRACE_ENTER) ; \ - lock ; /* MP-safe */ \ - btsl $(irq_num), iactive ; /* lazy masking */ \ - jc 1f ; /* already active */ \ ; \ MASK_LEVEL_IRQ(irq_num) ; \ EOI_IRQ(irq_num) ; \ -0: ; \ - APIC_ITRACE(apic_itrace_tryisrlock, irq_num, APIC_ITRACE_TRYISRLOCK) ;\ - MP_TRYLOCK ; /* XXX this is going away... */ \ - testl %eax, %eax ; /* did we get it? */ \ - jz 3f ; /* no */ \ -; \ - APIC_ITRACE(apic_itrace_gotisrlock, irq_num, APIC_ITRACE_GOTISRLOCK) ;\ + incl PCPU(intr_nesting_level) ; \ movl PCPU(curthread),%ebx ; \ - testl $IRQ_BIT(irq_num), TD_MACH+MTD_CPL(%eax) ; \ - jne 2f ; /* this INT masked */ \ + movl TD_CPL(%ebx),%eax ; \ + pushl %eax ; /* cpl do restore */ \ cmpl $TDPRI_CRIT,TD_PRI(%ebx) ; \ - jge 2f ; /* in critical sec */ \ -; \ - incb PCPU(intr_nesting_level) ; \ -; \ - /* entry point used by doreti_unpend for HWIs. */ \ -__CONCAT(Xresume,irq_num): ; \ - FAKE_MCOUNT(13*4(%esp)) ; /* XXX avoid dbl cnt */ \ - lock ; incl _cnt+V_INTR ; /* tally interrupts */ \ - movl _intr_countp + (irq_num) * 4, %eax ; \ - lock ; incl (%eax) ; \ -; \ - movl PCPU(curthread), %ebx ; \ - movl TD_MACH+MTD_CPL(%ebx), %eax ; \ - pushl %eax ; /* cpl restored by doreti */ \ - orl _intr_mask + (irq_num) * 4, %eax ; \ - movl %eax, TD_MACH+MTD_CPL(%ebx) ; \ - lock ; \ - andl $~IRQ_BIT(irq_num), PCPU(ipending) ; \ -; \ - pushl _intr_unit + (irq_num) * 4 ; \ - APIC_ITRACE(apic_itrace_enter2, irq_num, APIC_ITRACE_ENTER2) ; \ + jge 1f ; \ + testl $IRQ_LBIT(irq_num),PCPU(irunning) ; \ + jnz 1f ; \ + testl $IRQ_LBIT(irq_num),%eax ; \ + jz 1f ; \ +1: ; \ + /* set the pending bit and return, leave the interrupt masked */ \ + orl $IRQ_LBIT(irq_num), PCPU(ipending) ; \ + movl $TDPRI_CRIT, PCPU(reqpri) ; \ + jmp 5f ; \ +2: ; \ + addl $TDPRI_CRIT,TD_PRI(%ebx) ; \ + /* set running bit, clear pending bit, run handler */ \ + orl $IRQ_LBIT(irq_num), PCPU(irunning) ; \ + andl $~IRQ_LBIT(irq_num), PCPU(ipending) ; \ sti ; \ - call *_intr_handler + (irq_num) * 4 ; \ - cli ; \ - APIC_ITRACE(apic_itrace_leave, irq_num, APIC_ITRACE_LEAVE) ; \ + pushl $irq_num ; \ + call sched_ithd ; \ addl $4,%esp ; \ -; \ - lock ; andl $~IRQ_BIT(irq_num), iactive ; \ - UNMASK_IRQ(irq_num) ; \ - APIC_ITRACE(apic_itrace_unmask, irq_num, APIC_ITRACE_UNMASK) ; \ - sti ; /* doreti repeats cli/sti */ \ + subl $TDPRI_CRIT,TD_PRI(%ebx) ; \ + incl PCPU(cnt)+V_INTR ; /* book-keeping YYY make per-cpu */ \ + movl intr_countp + (irq_num) * 4,%eax ; \ + incl (%eax) ; \ +5: ; \ MEXITCOUNT ; \ jmp doreti ; \ -; \ - ALIGN_TEXT ; \ -1: ; /* active */ \ - APIC_ITRACE(apic_itrace_active, irq_num, APIC_ITRACE_ACTIVE) ; \ - MASK_IRQ(irq_num) ; \ - EOI_IRQ(irq_num) ; \ - lock ; \ - orl $IRQ_BIT(irq_num), PCPU(ipending) ; \ - movl $TDPRI_CRIT, PCPU(reqpri) ; \ - lock ; \ - btsl $(irq_num), iactive ; /* still active */ \ - jnc 0b ; /* retry */ \ - POP_FRAME ; \ - iret ; /* XXX: iactive bit might be 0 now */ \ - ALIGN_TEXT ; \ -2: ; /* masked by cpl, leave iactive set */ \ - APIC_ITRACE(apic_itrace_masked, irq_num, APIC_ITRACE_MASKED) ; \ - lock ; \ - orl $IRQ_BIT(irq_num), PCPU(ipending) ; \ - movl $TDPRI_CRIT, PCPU(reqpri) ; \ - MP_RELLOCK ; \ - POP_FRAME ; \ - iret ; \ + +/* + * Unmask a slow interrupt. This function is used by interrupt threads + * after they have descheduled themselves to reenable interrupts and + * possibly cause a reschedule to occur. The interrupt's irunning bit + * is cleared prior to unmasking. + */ + +#define INTR_UNMASK(irq_num, vec_name, icu) \ + .text ; \ + SUPERALIGN_TEXT ; \ +IDTVEC(vec_name) ; \ + pushl %ebp ; /* frame for ddb backtrace */ \ + movl %esp, %ebp ; \ + andl $~IRQ_LBIT(irq_num), PCPU(irunning) ; \ + UNMASK_IRQ(irq_num) ; \ + popl %ebp ; \ + ret ; \ + +#if 0 + /* XXX forward_irq to cpu holding the BGL? */ + ALIGN_TEXT ; \ 3: ; /* other cpu has isr lock */ \ - APIC_ITRACE(apic_itrace_noisrlock, irq_num, APIC_ITRACE_NOISRLOCK) ;\ lock ; \ - orl $IRQ_BIT(irq_num), PCPU(ipending) ; \ + orl $IRQ_LBIT(irq_num), PCPU(ipending) ; \ movl $TDPRI_CRIT,_reqpri ; \ - testl $IRQ_BIT(irq_num), TD_MACH+MTD_CPL(%ebx) ; \ + testl $IRQ_LBIT(irq_num), TD_CPL(%ebx) ; \ jne 4f ; /* this INT masked */ \ call forward_irq ; /* forward irq to lock holder */ \ POP_FRAME ; /* and return */ \ iret ; \ ALIGN_TEXT ; \ 4: ; /* blocked */ \ - APIC_ITRACE(apic_itrace_masked2, irq_num, APIC_ITRACE_MASKED2) ;\ POP_FRAME ; /* and return */ \ iret @@ -314,6 +321,9 @@ __CONCAT(Xresume,irq_num): ; \ * 8259 PIC for missing INTs. See the APIC documentation for details. * This routine should NOT do an 'EOI' cycle. */ + +#endif + .text SUPERALIGN_TEXT .globl Xspuriousint @@ -329,8 +339,8 @@ Xspuriousint: */ .text SUPERALIGN_TEXT - .globl _Xinvltlb -_Xinvltlb: + .globl Xinvltlb +Xinvltlb: pushl %eax #ifdef COUNT_XINVLTLB_HITS @@ -353,6 +363,7 @@ _Xinvltlb: iret +#if 0 #ifdef BETTER_CLOCK /* @@ -413,13 +424,14 @@ Xcpucheckstate: iret #endif /* BETTER_CLOCK */ +#endif /* * Executed by a CPU when it receives an Xcpuast IPI from another CPU, * * - Signals its receipt by clearing bit cpuid in checkstate_need_ast. - * - * - We need a better method of triggering asts on other cpus. + * - MP safe in regards to setting AST_PENDING because doreti is in + * a cli mode when it checks. */ .text @@ -427,11 +439,6 @@ Xcpucheckstate: .globl Xcpuast Xcpuast: PUSH_FRAME - movl $KDSEL, %eax - mov %ax, %ds /* use KERNEL data segment */ - mov %ax, %es - movl $KPSEL, %eax - mov %ax, %fs movl PCPU(cpuid), %eax lock /* checkstate_need_ast &= ~(1< #endif -#ifdef SMP -#define disable_intr() CLOCK_DISABLE_INTR() -#define enable_intr() CLOCK_ENABLE_INTR() - #ifdef APIC_IO #include /* The interrupt triggered by the 8254 (timer) chip */ @@ -102,7 +98,6 @@ int apic_8254_intr; static u_long read_intr_count __P((int vec)); static void setup_8254_mixed_mode __P((void)); #endif -#endif /* SMP */ /* * 32-bit time_t's can't reach leap years before 1904 or after 2036, so we @@ -204,7 +199,7 @@ static void clkintr(struct clockframe frame) { if (timecounter->tc_get_timecount == i8254_get_timecount) { - disable_intr(); + clock_lock(); if (i8254_ticked) i8254_ticked = 0; else { @@ -212,7 +207,7 @@ clkintr(struct clockframe frame) i8254_lastcount = 0; } clkintr_pending = 0; - enable_intr(); + clock_unlock(); } timer_func(&frame); switch (timer0_state) { @@ -231,14 +226,14 @@ clkintr(struct clockframe frame) break; case ACQUIRE_PENDING: - disable_intr(); + clock_lock(); i8254_offset = i8254_get_timecount(NULL); i8254_lastcount = 0; timer0_max_count = TIMER_DIV(new_rate); outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT); outb(TIMER_CNTR0, timer0_max_count & 0xff); outb(TIMER_CNTR0, timer0_max_count >> 8); - enable_intr(); + clock_unlock(); timer_func = new_function; timer0_state = ACQUIRED; setdelayed(); @@ -247,7 +242,7 @@ clkintr(struct clockframe frame) case RELEASE_PENDING: if ((timer0_prescaler_count += timer0_max_count) >= hardclock_max_count) { - disable_intr(); + clock_lock(); i8254_offset = i8254_get_timecount(NULL); i8254_lastcount = 0; timer0_max_count = hardclock_max_count; @@ -255,7 +250,7 @@ clkintr(struct clockframe frame) TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT); outb(TIMER_CNTR0, timer0_max_count & 0xff); outb(TIMER_CNTR0, timer0_max_count >> 8); - enable_intr(); + clock_unlock(); timer0_prescaler_count = 0; timer_func = hardclock; timer0_state = RELEASED; @@ -402,11 +397,9 @@ DB_SHOW_COMMAND(rtc, rtc) static int getit(void) { - u_long ef; int high, low; - ef = read_eflags(); - disable_intr(); + clock_lock(); /* Select timer0 and latch counter value. */ outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH); @@ -414,8 +407,7 @@ getit(void) low = inb(TIMER_CNTR0); high = inb(TIMER_CNTR0); - CLOCK_UNLOCK(); - write_eflags(ef); + clock_unlock(); return ((high << 8) | low); } @@ -529,10 +521,10 @@ sysbeep(int pitch, int period) splx(x); return (-1); /* XXX Should be EBUSY, but nobody cares anyway. */ } - disable_intr(); + clock_lock(); outb(TIMER_CNTR2, pitch); outb(TIMER_CNTR2, (pitch>>8)); - enable_intr(); + clock_unlock(); if (!beeping) { /* enable counter2 output to speaker */ outb(IO_PPI, inb(IO_PPI) | 3); @@ -681,11 +673,9 @@ fail: static void set_timer_freq(u_int freq, int intr_freq) { - u_long ef; int new_timer0_max_count; - ef = read_eflags(); - disable_intr(); + clock_lock(); timer_freq = freq; new_timer0_max_count = hardclock_max_count = TIMER_DIV(intr_freq); if (new_timer0_max_count != timer0_max_count) { @@ -694,22 +684,17 @@ set_timer_freq(u_int freq, int intr_freq) outb(TIMER_CNTR0, timer0_max_count & 0xff); outb(TIMER_CNTR0, timer0_max_count >> 8); } - CLOCK_UNLOCK(); - write_eflags(ef); + clock_unlock(); } static void i8254_restore(void) { - u_long ef; - - ef = read_eflags(); - disable_intr(); + clock_lock(); outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT); outb(TIMER_CNTR0, timer0_max_count & 0xff); outb(TIMER_CNTR0, timer0_max_count >> 8); - CLOCK_UNLOCK(); - write_eflags(ef); + clock_unlock(); } static void @@ -1212,7 +1197,7 @@ i8254_get_timecount(struct timecounter *tc) u_int high, low; ef = read_eflags(); - disable_intr(); + clock_lock(); /* Select timer0 and latch counter value. */ outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH); @@ -1236,8 +1221,7 @@ i8254_get_timecount(struct timecounter *tc) } i8254_lastcount = count; count += i8254_offset; - CLOCK_UNLOCK(); - write_eflags(ef); + clock_unlock(); return (count); } diff --git a/sys/i386/isa/intr_machdep.c b/sys/i386/isa/intr_machdep.c index 6ae5dde27b..11c96851ca 100644 --- a/sys/i386/isa/intr_machdep.c +++ b/sys/i386/isa/intr_machdep.c @@ -35,7 +35,7 @@ * * from: @(#)isa.c 7.2 (Berkeley) 5/13/91 * $FreeBSD: src/sys/i386/isa/intr_machdep.c,v 1.29.2.5 2001/10/14 06:54:27 luigi Exp $ - * $DragonFly: src/sys/i386/isa/Attic/intr_machdep.c,v 1.5 2003/07/04 00:32:28 dillon Exp $ + * $DragonFly: src/sys/i386/isa/Attic/intr_machdep.c,v 1.6 2003/07/06 21:23:49 dillon Exp $ */ /* * This file contains an aggregated module marked: @@ -472,7 +472,7 @@ icu_setup(int intr, inthand2_t *handler, void *arg, u_int *maskptr, int flags) return (EBUSY); ef = read_eflags(); - disable_intr(); + cpu_disable_intr(); /* YYY */ intr_handler[intr] = handler; intr_mptr[intr] = maskptr; intr_mask[intr] = mask | SWI_CLOCK_MASK | (1 << intr); @@ -530,7 +530,7 @@ icu_unset(intr, handler) INTRDIS(1 << intr); ef = read_eflags(); - disable_intr(); + cpu_disable_intr(); /* YYY */ intr_countp[intr] = &intrcnt[1 + intr]; intr_handler[intr] = isa_strayintr; intr_mptr[intr] = NULL; diff --git a/sys/i386/isa/intr_machdep.h b/sys/i386/isa/intr_machdep.h index e7d97200e3..332eae1ee6 100644 --- a/sys/i386/isa/intr_machdep.h +++ b/sys/i386/isa/intr_machdep.h @@ -31,15 +31,17 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/isa/intr_machdep.h,v 1.19.2.2 2001/10/14 20:05:50 luigi Exp $ - * $DragonFly: src/sys/i386/isa/Attic/intr_machdep.h,v 1.3 2003/06/29 03:28:43 dillon Exp $ + * $DragonFly: src/sys/i386/isa/Attic/intr_machdep.h,v 1.4 2003/07/06 21:23:49 dillon Exp $ */ #ifndef _I386_ISA_INTR_MACHDEP_H_ #define _I386_ISA_INTR_MACHDEP_H_ #ifndef _SYS_INTERRUPT_H_ +#ifndef LOCORE #include #endif +#endif /* * Low level interrupt code. @@ -99,6 +101,7 @@ #define TPR_IGNORE_HWI 0x5f /* ignore INTs */ #define TPR_BLOCK_FHWI 0x7f /* hardware FAST INTs */ #define TPR_IGNORE_FHWI 0x8f /* ignore FAST INTs */ +#define TPR_IPI_ONLY 0x8f /* ignore FAST INTs */ #define TPR_BLOCK_XINVLTLB 0x9f /* */ #define TPR_BLOCK_XCPUSTOP 0xaf /* */ #define TPR_BLOCK_ALL 0xff /* all INTs */ diff --git a/sys/i386/isa/npx.c b/sys/i386/isa/npx.c index ad5b427448..05526efbe2 100644 --- a/sys/i386/isa/npx.c +++ b/sys/i386/isa/npx.c @@ -33,7 +33,7 @@ * * from: @(#)npx.c 7.2 (Berkeley) 5/12/91 * $FreeBSD: src/sys/i386/isa/npx.c,v 1.80.2.3 2001/10/20 19:04:38 tegge Exp $ - * $DragonFly: src/sys/i386/isa/Attic/npx.c,v 1.6 2003/06/28 04:16:04 dillon Exp $ + * $DragonFly: src/sys/i386/isa/Attic/npx.c,v 1.7 2003/07/06 21:23:49 dillon Exp $ */ #include "opt_cpu.h" @@ -257,7 +257,7 @@ npx_probe(dev) npx_irq = 13; npx_intrno = NRSVIDT + npx_irq; save_eflags = read_eflags(); - disable_intr(); + cpu_disable_intr(); save_icu1_mask = inb(IO_ICU1 + 1); save_icu2_mask = inb(IO_ICU2 + 1); save_idt_npxintr = idt[npx_intrno]; @@ -267,9 +267,9 @@ npx_probe(dev) setidt(16, probetrap, SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(npx_intrno, probeintr, SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); npx_idt_probeintr = idt[npx_intrno]; - enable_intr(); + cpu_enable_intr(); result = npx_probe1(dev); - disable_intr(); + cpu_disable_intr(); outb(IO_ICU1 + 1, save_icu1_mask); outb(IO_ICU2 + 1, save_icu2_mask); idt[npx_intrno] = save_idt_npxintr; @@ -733,6 +733,9 @@ static char fpetable[128] = { * longjmp() out. Both preserving the state and longjmp()ing may be * destroyed by IRQ13 bugs. Clearing FP exceptions is not an acceptable * solution for signals other than SIGFPE. + * + * The MP lock is not held on entry (see i386/i386/exception.s) and + * should not be held on exit. */ void npx_intr(dummy) @@ -744,11 +747,13 @@ npx_intr(dummy) u_long *exstat; if (npxthread == NULL || !npx_exists) { + get_mplock(); printf("npxintr: npxthread = %p, curthread = %p, npx_exists = %d\n", npxthread, curthread, npx_exists); panic("npxintr from nowhere"); } if (npxthread != curthread) { + get_mplock(); printf("npxintr: npxthread = %p, curthread = %p, npx_exists = %d\n", npxthread, curthread, npx_exists); panic("npxintr from non-current process"); @@ -760,6 +765,8 @@ npx_intr(dummy) fnstcw(&control); fnclex(); + get_mplock(); + /* * Pass exception to process. */ @@ -801,6 +808,7 @@ npx_intr(dummy) */ psignal(curproc, SIGFPE); } + rel_mplock(); } /* @@ -874,21 +882,23 @@ npxsave(addr) u_char old_icu1_mask; u_char old_icu2_mask; struct gate_descriptor save_idt_npxintr; + u_long save_eflags; - disable_intr(); + save_eflags = read_eflags(); + cpu_disable_intr(); old_icu1_mask = inb(IO_ICU1 + 1); old_icu2_mask = inb(IO_ICU2 + 1); save_idt_npxintr = idt[npx_intrno]; outb(IO_ICU1 + 1, old_icu1_mask & ~(IRQ_SLAVE | npx0_imask)); outb(IO_ICU2 + 1, old_icu2_mask & ~(npx0_imask >> 8)); idt[npx_intrno] = npx_idt_probeintr; - enable_intr(); + cpu_enable_intr(); stop_emulating(); fnsave(addr); fnop(); start_emulating(); npxthread = NULL; - disable_intr(); + cpu_disable_intr(); icu1_mask = inb(IO_ICU1 + 1); /* masks may have changed */ icu2_mask = inb(IO_ICU2 + 1); outb(IO_ICU1 + 1, @@ -897,7 +907,7 @@ npxsave(addr) (icu2_mask & ~(npx0_imask >> 8)) | (old_icu2_mask & (npx0_imask >> 8))); idt[npx_intrno] = save_idt_npxintr; - enable_intr(); /* back to usual state */ + write_eflags(save_eflags); /* back to usual state */ #endif /* SMP */ } diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c index 65b6dc443e..783f4ef14b 100644 --- a/sys/kern/imgact_elf.c +++ b/sys/kern/imgact_elf.c @@ -27,7 +27,7 @@ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD: src/sys/kern/imgact_elf.c,v 1.73.2.13 2002/12/28 19:49:41 dillon Exp $ - * $DragonFly: src/sys/kern/imgact_elf.c,v 1.4 2003/06/25 03:55:57 dillon Exp $ + * $DragonFly: src/sys/kern/imgact_elf.c,v 1.5 2003/07/06 21:23:51 dillon Exp $ */ #include @@ -516,9 +516,9 @@ exec_elf_imgact(struct image_params *imgp) * a context switch. Better safe than sorry; I really don't want * the file to change while it's being loaded. */ - simple_lock(&imgp->vp->v_interlock); + lwkt_gettoken(&imgp->vp->v_interlock); imgp->vp->v_flag |= VTEXT; - simple_unlock(&imgp->vp->v_interlock); + lwkt_reltoken(&imgp->vp->v_interlock); vmspace = imgp->proc->p_vmspace; diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c index d375827a0c..602507c378 100644 --- a/sys/kern/init_main.c +++ b/sys/kern/init_main.c @@ -40,7 +40,7 @@ * * @(#)init_main.c 8.9 (Berkeley) 1/21/94 * $FreeBSD: src/sys/kern/init_main.c,v 1.134.2.8 2003/06/06 20:21:32 tegge Exp $ - * $DragonFly: src/sys/kern/init_main.c,v 1.18 2003/07/03 17:24:02 dillon Exp $ + * $DragonFly: src/sys/kern/init_main.c,v 1.19 2003/07/06 21:23:51 dillon Exp $ */ #include "opt_init_path.h" @@ -164,6 +164,7 @@ sysinit_add(struct sysinit **set) void mi_startup(void) { + struct sysinit *sip; /* system initialization*/ struct sysinit **sipp; /* system initialization*/ struct sysinit **xipp; /* interior loop of sort*/ struct sysinit *save; /* bubble*/ @@ -192,19 +193,18 @@ restart: * The last item on the list is expected to be the scheduler, * which will not return. */ - for (sipp = sysinit; *sipp; sipp++) { - - if ((*sipp)->subsystem == SI_SUB_DUMMY) + for (sipp = sysinit; (sip = *sipp) != NULL; sipp++) { + if (sip->subsystem == SI_SUB_DUMMY) continue; /* skip dummy task(s)*/ - if ((*sipp)->subsystem == SI_SUB_DONE) + if (sip->subsystem == SI_SUB_DONE) continue; /* Call function */ - (*((*sipp)->func))((*sipp)->udata); + (*(sip->func))(sip->udata); /* Check off the one we're just done */ - (*sipp)->subsystem = SI_SUB_DONE; + sip->subsystem = SI_SUB_DONE; /* Check if we've installed more sysinit items via KLD */ if (newsysinit != NULL) { @@ -423,6 +423,8 @@ SYSCTL_STRING(_kern, OID_AUTO, init_path, CTLFLAG_RD, init_path, 0, ""); /* * Start the initial user process; try exec'ing each pathname in init_path. * The program is invoked with one argument containing the boot flags. + * + * The MP lock is held on entry. */ static void start_init(void *dummy) @@ -529,8 +531,10 @@ start_init(void *dummy) * Otherwise, return via fork_trampoline() all the way * to user mode as init! */ - if ((error = execve(&args)) == 0) + if ((error = execve(&args)) == 0) { + rel_mplock(); return; + } if (error != ENOENT) printf("exec %.*s: error %d\n", (int)(next - path), path, error); diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c index edfa8a2cba..322b7454de 100644 --- a/sys/kern/kern_exit.c +++ b/sys/kern/kern_exit.c @@ -37,7 +37,7 @@ * * @(#)kern_exit.c 8.7 (Berkeley) 2/12/94 * $FreeBSD: src/sys/kern/kern_exit.c,v 1.92.2.11 2003/01/13 22:51:16 dillon Exp $ - * $DragonFly: src/sys/kern/kern_exit.c,v 1.14 2003/06/30 23:54:02 dillon Exp $ + * $DragonFly: src/sys/kern/kern_exit.c,v 1.15 2003/07/06 21:23:51 dillon Exp $ */ #include "opt_compat.h" @@ -446,9 +446,11 @@ loop: * The process's thread may still be in the middle * of switching away, we can't rip its stack out from * under it until TDF_EXITED is set. + * + * YYY no wakeup occurs so we depend on the timeout. */ if ((p->p_thread->td_flags & TDF_EXITED) == 0) { - tsleep(p->p_thread, PWAIT, "reap", 0); + tsleep(p->p_thread, PWAIT, "reap", 1); goto loop; } diff --git a/sys/kern/kern_lock.c b/sys/kern/kern_lock.c index 8f0fd738d3..cad6b789ad 100644 --- a/sys/kern/kern_lock.c +++ b/sys/kern/kern_lock.c @@ -39,7 +39,7 @@ * * @(#)kern_lock.c 8.18 (Berkeley) 5/21/95 * $FreeBSD: src/sys/kern/kern_lock.c,v 1.31.2.3 2001/12/25 01:44:44 dillon Exp $ - * $DragonFly: src/sys/kern/kern_lock.c,v 1.3 2003/06/25 03:55:57 dillon Exp $ + * $DragonFly: src/sys/kern/kern_lock.c,v 1.4 2003/07/06 21:23:51 dillon Exp $ */ #include "opt_lint.h" @@ -73,7 +73,6 @@ LK_SHARE_NONZERO | LK_WAIT_NONZERO) static int acquire(struct lock *lkp, int extflags, int wanted); -static int apause(struct lock *lkp, int flags); static int acquiredrain(struct lock *lkp, int extflags) ; static LOCK_INLINE void @@ -98,36 +97,9 @@ shareunlock(struct lock *lkp, int decr) { } } -/* - * This is the waitloop optimization, and note for this to work - * simple_lock and simple_unlock should be subroutines to avoid - * optimization troubles. - */ static int -apause(struct lock *lkp, int flags) +acquire(struct lock *lkp, int extflags, int wanted) { -#ifdef SMP - int i, lock_wait; -#endif - - if ((lkp->lk_flags & flags) == 0) - return 0; -#ifdef SMP - for (lock_wait = LOCK_WAIT_TIME; lock_wait > 0; lock_wait--) { - simple_unlock(&lkp->lk_interlock); - for (i = LOCK_SAMPLE_WAIT; i > 0; i--) - if ((lkp->lk_flags & flags) == 0) - break; - simple_lock(&lkp->lk_interlock); - if ((lkp->lk_flags & flags) == 0) - return 0; - } -#endif - return 1; -} - -static int -acquire(struct lock *lkp, int extflags, int wanted) { int s, error; if ((extflags & LK_NOWAIT) && (lkp->lk_flags & wanted)) { @@ -135,8 +107,7 @@ acquire(struct lock *lkp, int extflags, int wanted) { } if (((lkp->lk_flags | extflags) & LK_NOPAUSE) == 0) { - error = apause(lkp, wanted); - if (error == 0) + if ((lkp->lk_flags & wanted) == 0) return 0; } @@ -144,10 +115,10 @@ acquire(struct lock *lkp, int extflags, int wanted) { while ((lkp->lk_flags & wanted) != 0) { lkp->lk_flags |= LK_WAIT_NONZERO; lkp->lk_waitcount++; - simple_unlock(&lkp->lk_interlock); + lwkt_reltoken(&lkp->lk_interlock); error = tsleep(lkp, lkp->lk_prio, lkp->lk_wmesg, ((extflags & LK_TIMELOCK) ? lkp->lk_timo : 0)); - simple_lock(&lkp->lk_interlock); + lwkt_gettoken(&lkp->lk_interlock); if (lkp->lk_waitcount == 1) { lkp->lk_flags &= ~LK_WAIT_NONZERO; lkp->lk_waitcount = 0; @@ -176,10 +147,10 @@ acquire(struct lock *lkp, int extflags, int wanted) { */ int #ifndef DEBUG_LOCKS -lockmgr(struct lock *lkp, u_int flags, struct simplelock *interlkp, +lockmgr(struct lock *lkp, u_int flags, struct lwkt_token *interlkp, struct thread *td) #else -debuglockmgr(struct lock *lkp, u_int flags, struct simplelock *interlkp, +debuglockmgr(struct lock *lkp, u_int flags, struct lwkt_token *interlkp, struct thread *td, const char *name, const char *file, int line) #endif { @@ -188,9 +159,9 @@ debuglockmgr(struct lock *lkp, u_int flags, struct simplelock *interlkp, error = 0; - simple_lock(&lkp->lk_interlock); + lwkt_gettoken(&lkp->lk_interlock); if (flags & LK_INTERLOCK) - simple_unlock(interlkp); + lwkt_reltoken(interlkp); extflags = (flags | lkp->lk_flags) & LK_EXTFLG_MASK; @@ -417,7 +388,7 @@ debuglockmgr(struct lock *lkp, u_int flags, struct simplelock *interlkp, break; default: - simple_unlock(&lkp->lk_interlock); + lwkt_reltoken(&lkp->lk_interlock); panic("lockmgr: unknown locktype request %d", flags & LK_TYPE_MASK); /* NOTREACHED */ @@ -428,29 +399,29 @@ debuglockmgr(struct lock *lkp, u_int flags, struct simplelock *interlkp, lkp->lk_flags &= ~LK_WAITDRAIN; wakeup((void *)&lkp->lk_flags); } - simple_unlock(&lkp->lk_interlock); + lwkt_reltoken(&lkp->lk_interlock); return (error); } static int -acquiredrain(struct lock *lkp, int extflags) { +acquiredrain(struct lock *lkp, int extflags) +{ int error; if ((extflags & LK_NOWAIT) && (lkp->lk_flags & LK_ALL)) { return EBUSY; } - error = apause(lkp, LK_ALL); - if (error == 0) + if ((lkp->lk_flags & LK_ALL) == 0) return 0; while (lkp->lk_flags & LK_ALL) { lkp->lk_flags |= LK_WAITDRAIN; - simple_unlock(&lkp->lk_interlock); + lwkt_reltoken(&lkp->lk_interlock); error = tsleep(&lkp->lk_flags, lkp->lk_prio, lkp->lk_wmesg, ((extflags & LK_TIMELOCK) ? lkp->lk_timo : 0)); - simple_lock(&lkp->lk_interlock); + lwkt_gettoken(&lkp->lk_interlock); if (error) return error; if (extflags & LK_SLEEPFAIL) { @@ -471,8 +442,7 @@ lockinit(lkp, prio, wmesg, timo, flags) int timo; int flags; { - - simple_lock_init(&lkp->lk_interlock); + lwkt_inittoken(&lkp->lk_interlock); lkp->lk_flags = (flags & LK_EXTFLG_MASK); lkp->lk_sharecount = 0; lkp->lk_waitcount = 0; @@ -491,15 +461,16 @@ lockstatus(struct lock *lkp, struct thread *td) { int lock_type = 0; - simple_lock(&lkp->lk_interlock); + lwkt_gettoken(&lkp->lk_interlock); if (lkp->lk_exclusivecount != 0) { if (td == NULL || lkp->lk_lockholder == td) lock_type = LK_EXCLUSIVE; else lock_type = LK_EXCLOTHER; - } else if (lkp->lk_sharecount != 0) + } else if (lkp->lk_sharecount != 0) { lock_type = LK_SHARED; - simple_unlock(&lkp->lk_interlock); + } + lwkt_reltoken(&lkp->lk_interlock); return (lock_type); } @@ -512,9 +483,9 @@ lockcount(lkp) { int count; - simple_lock(&lkp->lk_interlock); + lwkt_gettoken(&lkp->lk_interlock); count = lkp->lk_exclusivecount + lkp->lk_sharecount; - simple_unlock(&lkp->lk_interlock); + lwkt_reltoken(&lkp->lk_interlock); return (count); } @@ -545,99 +516,3 @@ lockmgr_printinfo(lkp) printf(" with %d pending", lkp->lk_waitcount); } -#if defined(SIMPLELOCK_DEBUG) && (MAXCPU == 1 || defined(COMPILING_LINT)) -#include -#include - -static int lockpausetime = 0; -SYSCTL_INT(_debug, OID_AUTO, lockpausetime, CTLFLAG_RW, &lockpausetime, 0, ""); - -static int simplelockrecurse; - -/* - * Simple lock functions so that the debugger can see from whence - * they are being called. - */ -void -simple_lock_init(alp) - struct simplelock *alp; -{ - - alp->lock_data = 0; -} - -void -_simple_lock(alp, id, l) - struct simplelock *alp; - const char *id; - int l; -{ - - if (simplelockrecurse) - return; - if (alp->lock_data == 1) { - if (lockpausetime == -1) - panic("%s:%d: simple_lock: lock held", id, l); - printf("%s:%d: simple_lock: lock held\n", id, l); - if (lockpausetime == 1) { - Debugger("simple_lock"); - /*BACKTRACE(curproc); */ - } else if (lockpausetime > 1) { - printf("%s:%d: simple_lock: lock held...", id, l); - tsleep(&lockpausetime, PCATCH | PPAUSE, "slock", - lockpausetime * hz); - printf(" continuing\n"); - } - } - alp->lock_data = 1; - if (curproc) - curproc->p_simple_locks++; -} - -int -_simple_lock_try(alp, id, l) - struct simplelock *alp; - const char *id; - int l; -{ - - if (alp->lock_data) - return (0); - if (simplelockrecurse) - return (1); - alp->lock_data = 1; - if (curproc) - curproc->p_simple_locks++; - return (1); -} - -void -_simple_unlock(alp, id, l) - struct simplelock *alp; - const char *id; - int l; -{ - - if (simplelockrecurse) - return; - if (alp->lock_data == 0) { - if (lockpausetime == -1) - panic("%s:%d: simple_unlock: lock not held", id, l); - printf("%s:%d: simple_unlock: lock not held\n", id, l); - if (lockpausetime == 1) { - Debugger("simple_unlock"); - /* BACKTRACE(curproc); */ - } else if (lockpausetime > 1) { - printf("%s:%d: simple_unlock: lock not held...", id, l); - tsleep(&lockpausetime, PCATCH | PPAUSE, "sunlock", - lockpausetime * hz); - printf(" continuing\n"); - } - } - alp->lock_data = 0; - if (curproc) - curproc->p_simple_locks--; -} -#elif defined(SIMPLELOCK_DEBUG) -#error "SIMPLELOCK_DEBUG is not compatible with SMP!" -#endif /* SIMPLELOCK_DEBUG && MAXCPU == 1 */ diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c index 4163cbd646..b01f725b32 100644 --- a/sys/kern/kern_synch.c +++ b/sys/kern/kern_synch.c @@ -37,7 +37,7 @@ * * @(#)kern_synch.c 8.9 (Berkeley) 5/19/95 * $FreeBSD: src/sys/kern/kern_synch.c,v 1.87.2.6 2002/10/13 07:29:53 kbyanc Exp $ - * $DragonFly: src/sys/kern/kern_synch.c,v 1.14 2003/07/03 17:24:02 dillon Exp $ + * $DragonFly: src/sys/kern/kern_synch.c,v 1.15 2003/07/06 21:23:51 dillon Exp $ */ #include "opt_ktrace.h" @@ -89,6 +89,7 @@ static void maybe_resched __P((struct proc *chk)); static void roundrobin __P((void *arg)); static void schedcpu __P((void *arg)); static void updatepri __P((struct proc *p)); +static void crit_panicints(void); static int sysctl_kern_quantum(SYSCTL_HANDLER_ARGS) @@ -155,18 +156,15 @@ static void roundrobin(arg) void *arg; { -#ifndef SMP - struct proc *p = curproc; /* XXX */ -#endif - + struct proc *p = lwkt_preempted_proc(); #ifdef SMP - need_resched(); + if (p == NULL || RTP_PRIO_NEED_RR(p->p_rtprio.type)) + need_resched(); forward_roundrobin(); #else - if (p == 0 || RTP_PRIO_NEED_RR(p->p_rtprio.type)) + if (p == NULL || RTP_PRIO_NEED_RR(p->p_rtprio.type)) need_resched(); #endif - timeout(roundrobin, NULL, sched_quantum); } @@ -400,8 +398,6 @@ tsleep(ident, priority, wmesg, timo) * NOTE: removed KTRPOINT, it could cause races due to blocking * even in stable. Just scrap it for now. */ - s = splhigh(); - if (cold || panicstr) { /* * After a panic, or during autoconfiguration, @@ -409,10 +405,10 @@ tsleep(ident, priority, wmesg, timo) * don't run any other procs or panic below, * in case this is the idle process and already asleep. */ - splx(safepri); - splx(s); + crit_panicints(); return (0); } + s = splhigh(); KASSERT(ident != NULL, ("tsleep: no ident")); KASSERT(p == NULL || p->p_stat == SRUN, ("tsleep %p %s %d", ident, wmesg, p->p_stat)); @@ -519,8 +515,6 @@ xsleep(struct xwait *w, int priority, const char *wmesg, int timo, int *gen) if (KTRPOINT(td, KTR_CSW)) ktrcsw(p->p_tracep, 1, 0); #endif - s = splhigh(); - if (cold || panicstr) { /* * After a panic, or during autoconfiguration, @@ -528,10 +522,10 @@ xsleep(struct xwait *w, int priority, const char *wmesg, int timo, int *gen) * don't run any other procs or panic below, * in case this is the idle process and already asleep. */ - splx(safepri); - splx(s); + crit_panicints(); return (0); } + s = splhigh(); KASSERT(p != NULL, ("xsleep1")); KASSERT(w != NULL && p->p_stat == SRUN, ("xsleep")); @@ -844,11 +838,6 @@ mi_switch() */ _relscurproc(p); -#ifdef SIMPLELOCK_DEBUG - if (p->p_simple_locks) - printf("sleep: holding simple lock\n"); -#endif - /* * Check if the process exceeds its cpu resource allocation. * If over max, kill it. Time spent in interrupts is not @@ -1084,3 +1073,18 @@ schedclock(p) if ((p->p_estcpu % INVERSE_ESTCPU_WEIGHT) == 0) resetpriority(p); } + +static +void +crit_panicints(void) +{ + int s; + int cpri; + + s = splhigh(); + cpri = crit_panic_save(); + splx(safepri); + crit_panic_restore(cpri); + splx(s); +} + diff --git a/sys/kern/lwkt_thread.c b/sys/kern/lwkt_thread.c index 5e37b08921..b1c0cb6c38 100644 --- a/sys/kern/lwkt_thread.c +++ b/sys/kern/lwkt_thread.c @@ -27,7 +27,7 @@ * thread scheduler, which means that generally speaking we only need * to use a critical section to prevent hicups. * - * $DragonFly: src/sys/kern/lwkt_thread.c,v 1.14 2003/07/04 00:32:30 dillon Exp $ + * $DragonFly: src/sys/kern/lwkt_thread.c,v 1.15 2003/07/06 21:23:51 dillon Exp $ */ #include @@ -185,6 +185,7 @@ lwkt_init_thread(thread_t td, void *stack, int flags, struct globaldata *gd) td->td_flags |= flags; td->td_gd = gd; td->td_pri = TDPRI_CRIT; + td->td_cpu = gd->gd_cpuid; /* YYY don't need this if have td_gd */ pmap_init_thread(td); crit_enter(); TAILQ_INSERT_TAIL(&mycpu->gd_tdallq, td, td_allq); @@ -277,21 +278,44 @@ lwkt_switch(void) struct globaldata *gd; thread_t td = curthread; thread_t ntd; +#ifdef SMP + int mpheld; +#endif if (mycpu->gd_intr_nesting_level && td->td_preempted == NULL) panic("lwkt_switch: cannot switch from within an interrupt, yet\n"); crit_enter(); ++switch_count; + +#ifdef SMP + /* + * td_mpcount cannot be used to determine if we currently hold the + * MP lock because get_mplock() will increment it prior to attempting + * to get the lock, and switch out if it can't. Look at the actual lock. + */ + mpheld = MP_LOCK_HELD(); +#endif if ((ntd = td->td_preempted) != NULL) { /* * We had preempted another thread on this cpu, resume the preempted * thread. This occurs transparently, whether the preempted thread * was scheduled or not (it may have been preempted after descheduling - * itself). + * itself). + * + * We have to setup the MP lock for the original thread after backing + * out the adjustment that was made to curthread when the original + * was preempted. */ KKASSERT(ntd->td_flags & TDF_PREEMPT_LOCK); +#ifdef SMP + if (ntd->td_mpcount) { + td->td_mpcount -= ntd->td_mpcount; + KKASSERT(td->td_mpcount >= 0); + } +#endif ntd->td_flags |= TDF_PREEMPT_DONE; + /* YYY release mp lock on switchback if original doesn't need it */ } else { /* * Priority queue / round-robin at each priority. Note that user @@ -299,9 +323,12 @@ lwkt_switch(void) * scheduler deals with interactions between user processes * by scheduling and descheduling them from the LWKT queue as * necessary. + * + * We have to adjust the MP lock for the target thread. If we + * need the MP lock and cannot obtain it we try to locate a + * thread that does not need the MP lock. */ gd = mycpu; - again: if (gd->gd_runqmask) { int nq = bsrl(gd->gd_runqmask); @@ -309,16 +336,60 @@ again: gd->gd_runqmask &= ~(1 << nq); goto again; } +#ifdef SMP + if (ntd->td_mpcount && mpheld == 0 && !cpu_try_mplock()) { + /* + * Target needs MP lock and we couldn't get it. + */ + u_int32_t rqmask = gd->gd_runqmask; + while (rqmask) { + TAILQ_FOREACH(ntd, &gd->gd_tdrunq[nq], td_threadq) { + if (ntd->td_mpcount == 0) + break; + } + if (ntd) + break; + rqmask &= ~(1 << nq); + nq = bsrl(rqmask); + } + if (ntd == NULL) { + ntd = gd->gd_idletd; + } else { + TAILQ_REMOVE(&gd->gd_tdrunq[nq], ntd, td_threadq); + TAILQ_INSERT_TAIL(&gd->gd_tdrunq[nq], ntd, td_threadq); + } + } else { + TAILQ_REMOVE(&gd->gd_tdrunq[nq], ntd, td_threadq); + TAILQ_INSERT_TAIL(&gd->gd_tdrunq[nq], ntd, td_threadq); + } +#else TAILQ_REMOVE(&gd->gd_tdrunq[nq], ntd, td_threadq); TAILQ_INSERT_TAIL(&gd->gd_tdrunq[nq], ntd, td_threadq); +#endif } else { ntd = gd->gd_idletd; } } KASSERT(ntd->td_pri >= TDPRI_CRIT, ("priority problem in lwkt_switch %d %d", td->td_pri, ntd->td_pri)); - if (td != ntd) + + /* + * Do the actual switch. If the new target does not need the MP lock + * and we are holding it, release the MP lock. If the new target requires + * the MP lock we have already acquired it for the target. + */ +#ifdef SMP + if (ntd->td_mpcount == 0 ) { + if (MP_LOCK_HELD()) + cpu_rel_mplock(); + } else { + ASSERT_MP_LOCK_HELD(); + } +#endif + + if (td != ntd) { td->td_switch(ntd); + } crit_exit(); } @@ -328,7 +399,9 @@ again: * * + We aren't trying to preempt ourselves (it can happen!) * + We are not currently being preempted - * + the target is not currently being preempted + * + The target is not currently being preempted + * + The target either does not need the MP lock or we can get it + * for the target immediately. * * XXX at the moment we run the target thread in a critical section during * the preemption in order to prevent the target from taking interrupts @@ -344,11 +417,18 @@ again: * * CAREFUL! either we or the target thread may get interrupted during the * switch. + * + * The target thread inherits our MP count (added to its own) for the + * duration of the preemption in order to preserve the atomicy of the + * preemption. */ void lwkt_preempt(thread_t ntd, int id) { thread_t td = curthread; +#ifdef SMP + int mpheld; +#endif /* * The caller has put us in a critical section, and in order to have @@ -370,6 +450,15 @@ lwkt_preempt(thread_t ntd, int id) ++preempt_miss; return; } +#ifdef SMP + mpheld = MP_LOCK_HELD(); + ntd->td_mpcount += td->td_mpcount; + if (mpheld == 0 && ntd->td_mpcount && !cpu_try_mplock()) { + ntd->td_mpcount -= td->td_mpcount; + ++preempt_miss; + return; + } +#endif ++preempt_hit; ntd->td_preempted = td; @@ -727,7 +816,7 @@ lwkt_signal(lwkt_wait_t w) * Note that the spl and critical section characteristics of a token * may not be changed once the token has been initialized. */ -void +int lwkt_gettoken(lwkt_token_t tok) { /* @@ -746,8 +835,9 @@ lwkt_gettoken(lwkt_token_t tok) #endif /* * leave us in a critical section on return. This will be undone - * by lwkt_reltoken() + * by lwkt_reltoken(). Bump the generation number. */ + return(++tok->t_gen); } /* @@ -771,9 +861,29 @@ lwkt_reltoken(lwkt_token_t tok) } /* - * Reaquire a token that might have been lost. Returns 1 if we blocked - * while reaquiring the token (meaning that you might have lost other - * tokens you held when you made this call), return 0 if we did not block. + * Reacquire a token that might have been lost and compare and update the + * generation number. 0 is returned if the generation has not changed + * (nobody else obtained the token while we were blocked, on this cpu or + * any other cpu). + * + * This function returns with the token re-held whether the generation + * number changed or not. + */ +int +lwkt_gentoken(lwkt_token_t tok, int *gen) +{ + if (lwkt_regettoken(tok) == *gen) { + return(0); + } else { + *gen = tok->t_gen; + return(-1); + } +} + + +/* + * Reacquire a token that might have been lost. Returns the generation + * number of the token. */ int lwkt_regettoken(lwkt_token_t tok) @@ -785,10 +895,9 @@ lwkt_regettoken(lwkt_token_t tok) initTokenReqMsg(&msg.mu_TokenReq); cpu_domsg(&msg); } - return(1); } #endif - return(0); + return(tok->t_gen); } void @@ -805,6 +914,8 @@ lwkt_inittoken(lwkt_token_t tok) * with proc0 - ie: kernel only. * * XXX should be renamed to lwkt_create() + * + * The thread will be entered with the MP lock held. */ int lwkt_create(void (*func)(void *), void *arg, @@ -817,6 +928,9 @@ lwkt_create(void (*func)(void *), void *arg, td = *tdp = lwkt_alloc_thread(template); cpu_set_thread_handler(td, kthread_exit, func, arg); td->td_flags |= TDF_VERBOSE | tdflags; +#ifdef SMP + td->td_mpcount = 1; +#endif /* * Set up arg0 for 'ps' etc @@ -868,6 +982,9 @@ kthread_create(void (*func)(void *), void *arg, td = *tdp = lwkt_alloc_thread(NULL); cpu_set_thread_handler(td, kthread_exit, func, arg); td->td_flags |= TDF_VERBOSE; +#ifdef SMP + td->td_mpcount = 1; +#endif /* * Set up arg0 for 'ps' etc diff --git a/sys/kern/subr_bus.c b/sys/kern/subr_bus.c index 06e7179913..dac3263244 100644 --- a/sys/kern/subr_bus.c +++ b/sys/kern/subr_bus.c @@ -24,7 +24,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/kern/subr_bus.c,v 1.54.2.9 2002/10/10 15:13:32 jhb Exp $ - * $DragonFly: src/sys/kern/subr_bus.c,v 1.2 2003/06/17 04:28:41 dillon Exp $ + * $DragonFly: src/sys/kern/subr_bus.c,v 1.3 2003/07/06 21:23:51 dillon Exp $ */ #include "opt_bus.h" @@ -1029,6 +1029,9 @@ device_disable(device_t dev) dev->flags &= ~DF_ENABLED; } +/* + * YYY cannot block + */ void device_busy(device_t dev) { @@ -1040,6 +1043,9 @@ device_busy(device_t dev) dev->state = DS_BUSY; } +/* + * YYY cannot block + */ void device_unbusy(device_t dev) { @@ -2283,8 +2289,8 @@ root_bus_configure(void) PDEBUG((".")); for (dev = TAILQ_FIRST(&root_bus->children); dev; - dev = TAILQ_NEXT(dev, link)) { - device_probe_and_attach(dev); + dev = TAILQ_NEXT(dev, link)) { + device_probe_and_attach(dev); } } diff --git a/sys/kern/subr_prf.c b/sys/kern/subr_prf.c index 3af37ed1dc..c99e0e31c8 100644 --- a/sys/kern/subr_prf.c +++ b/sys/kern/subr_prf.c @@ -37,7 +37,7 @@ * * @(#)subr_prf.c 8.3 (Berkeley) 1/21/94 * $FreeBSD: src/sys/kern/subr_prf.c,v 1.61.2.5 2002/08/31 18:22:08 dwmalone Exp $ - * $DragonFly: src/sys/kern/subr_prf.c,v 1.3 2003/06/25 03:55:57 dillon Exp $ + * $DragonFly: src/sys/kern/subr_prf.c,v 1.4 2003/07/06 21:23:51 dillon Exp $ */ #include @@ -52,6 +52,7 @@ #include #include #include +#include /* * Note that stdarg.h and the ANSI style va_start macro is used for both @@ -126,6 +127,7 @@ uprintf(const char *fmt, ...) va_start(ap, fmt); pca.tty = p->p_session->s_ttyp; pca.flags = TOTTY; + retval = kvprintf(fmt, putchar, &pca, 10, ap); va_end(ap); } @@ -300,7 +302,9 @@ printf(const char *fmt, ...) pca.tty = NULL; pca.flags = TOCONS | TOLOG; pca.pri = -1; + cons_lock(); retval = kvprintf(fmt, putchar, &pca, 10, ap); + cons_unlock(); va_end(ap); if (!panicstr) msgbuftrigger = 1; @@ -320,7 +324,9 @@ vprintf(const char *fmt, va_list ap) pca.tty = NULL; pca.flags = TOCONS | TOLOG; pca.pri = -1; + cons_lock(); retval = kvprintf(fmt, putchar, &pca, 10, ap); + cons_unlock(); if (!panicstr) msgbuftrigger = 1; consintr = savintr; /* reenable interrupts */ diff --git a/sys/kern/subr_rman.c b/sys/kern/subr_rman.c index 081e057cc7..3f0b2cd28a 100644 --- a/sys/kern/subr_rman.c +++ b/sys/kern/subr_rman.c @@ -27,7 +27,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/kern/subr_rman.c,v 1.10.2.1 2001/06/05 08:06:08 imp Exp $ - * $DragonFly: src/sys/kern/subr_rman.c,v 1.2 2003/06/17 04:28:41 dillon Exp $ + * $DragonFly: src/sys/kern/subr_rman.c,v 1.3 2003/07/06 21:23:51 dillon Exp $ */ /* @@ -70,9 +70,7 @@ static MALLOC_DEFINE(M_RMAN, "rman", "Resource manager"); struct rman_head rman_head; -#ifndef NULL_SIMPLELOCKS -static struct simplelock rman_lock; /* mutex to protect rman_head */ -#endif +static struct lwkt_token rman_tok; /* mutex to protect rman_head */ static int int_rman_activate_resource(struct rman *rm, struct resource *r, struct resource **whohas); static int int_rman_deactivate_resource(struct resource *r); @@ -88,7 +86,7 @@ rman_init(struct rman *rm) if (once == 0) { once = 1; TAILQ_INIT(&rman_head); - simple_lock_init(&rman_lock); + lwkt_inittoken(&rman_tok); } if (rm->rm_type == RMAN_UNINIT) @@ -98,13 +96,13 @@ rman_init(struct rman *rm) CIRCLEQ_INIT(&rm->rm_list); rm->rm_slock = malloc(sizeof *rm->rm_slock, M_RMAN, M_NOWAIT); - if (rm->rm_slock == 0) + if (rm->rm_slock == NULL) return ENOMEM; - simple_lock_init(rm->rm_slock); + lwkt_inittoken(rm->rm_slock); - simple_lock(&rman_lock); + lwkt_gettoken(&rman_tok); TAILQ_INSERT_TAIL(&rman_head, rm, rm_link); - simple_unlock(&rman_lock); + lwkt_reltoken(&rman_tok); return 0; } @@ -128,7 +126,7 @@ rman_manage_region(struct rman *rm, u_long start, u_long end) r->r_dev = 0; r->r_rm = rm; - simple_lock(rm->rm_slock); + lwkt_gettoken(rm->rm_slock); for (s = CIRCLEQ_FIRST(&rm->rm_list); !CIRCLEQ_TERMCOND(s, rm->rm_list) && s->r_end < r->r_start; s = CIRCLEQ_NEXT(s, r_link)) @@ -140,7 +138,7 @@ rman_manage_region(struct rman *rm, u_long start, u_long end) CIRCLEQ_INSERT_BEFORE(&rm->rm_list, s, r, r_link); } - simple_unlock(rm->rm_slock); + lwkt_reltoken(rm->rm_slock); return 0; } @@ -149,10 +147,10 @@ rman_fini(struct rman *rm) { struct resource *r; - simple_lock(rm->rm_slock); + lwkt_gettoken(rm->rm_slock); CIRCLEQ_FOREACH(r, &rm->rm_list, r_link) { if (r->r_flags & RF_ALLOCATED) { - simple_unlock(rm->rm_slock); + lwkt_reltoken(rm->rm_slock); return EBUSY; } } @@ -166,10 +164,10 @@ rman_fini(struct rman *rm) CIRCLEQ_REMOVE(&rm->rm_list, r, r_link); free(r, M_RMAN); } - simple_unlock(rm->rm_slock); - simple_lock(&rman_lock); + lwkt_reltoken(rm->rm_slock); + lwkt_gettoken(&rman_tok); TAILQ_REMOVE(&rman_head, rm, rm_link); - simple_unlock(&rman_lock); + lwkt_reltoken(&rman_tok); free(rm->rm_slock, M_RMAN); return 0; @@ -193,7 +191,7 @@ rman_reserve_resource(struct rman *rm, u_long start, u_long end, u_long count, want_activate = (flags & RF_ACTIVE); flags &= ~RF_ACTIVE; - simple_lock(rm->rm_slock); + lwkt_gettoken(rm->rm_slock); for (r = CIRCLEQ_FIRST(&rm->rm_list); !CIRCLEQ_TERMCOND(r, rm->rm_list) && r->r_end < start; @@ -399,7 +397,7 @@ out: } } - simple_unlock(rm->rm_slock); + lwkt_reltoken(rm->rm_slock); return (rv); } @@ -446,9 +444,9 @@ rman_activate_resource(struct resource *r) struct rman *rm; rm = r->r_rm; - simple_lock(rm->rm_slock); + lwkt_gettoken(rm->rm_slock); rv = int_rman_activate_resource(rm, r, &whohas); - simple_unlock(rm->rm_slock); + lwkt_reltoken(rm->rm_slock); return rv; } @@ -461,28 +459,28 @@ rman_await_resource(struct resource *r, int pri, int timo) rm = r->r_rm; for (;;) { - simple_lock(rm->rm_slock); + lwkt_gettoken(rm->rm_slock); rv = int_rman_activate_resource(rm, r, &whohas); if (rv != EBUSY) - return (rv); /* returns with simplelock */ + return (rv); /* returns with simple token */ if (r->r_sharehead == 0) panic("rman_await_resource"); /* * splhigh hopefully will prevent a race between - * simple_unlock and tsleep where a process + * lwkt_reltoken and tsleep where a process * could conceivably get in and release the resource - * before we have a chance to sleep on it. + * before we have a chance to sleep on it. YYY */ s = splhigh(); whohas->r_flags |= RF_WANTED; - simple_unlock(rm->rm_slock); + lwkt_reltoken(rm->rm_slock); /* YYY */ rv = tsleep(r->r_sharehead, pri, "rmwait", timo); if (rv) { splx(s); return rv; } - simple_lock(rm->rm_slock); + lwkt_gettoken(rm->rm_slock); splx(s); } } @@ -507,9 +505,9 @@ rman_deactivate_resource(struct resource *r) struct rman *rm; rm = r->r_rm; - simple_lock(rm->rm_slock); + lwkt_gettoken(rm->rm_slock); int_rman_deactivate_resource(r); - simple_unlock(rm->rm_slock); + lwkt_reltoken(rm->rm_slock); return 0; } @@ -607,9 +605,9 @@ rman_release_resource(struct resource *r) int rv; struct rman *rm = r->r_rm; - simple_lock(rm->rm_slock); + lwkt_gettoken(rm->rm_slock); rv = int_rman_release_resource(rm, r); - simple_unlock(rm->rm_slock); + lwkt_reltoken(rm->rm_slock); return (rv); } diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c index 3452b32241..2e58df83b5 100644 --- a/sys/kern/uipc_socket.c +++ b/sys/kern/uipc_socket.c @@ -32,7 +32,7 @@ * * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94 * $FreeBSD: src/sys/kern/uipc_socket.c,v 1.68.2.22 2002/12/15 09:24:23 maxim Exp $ - * $DragonFly: src/sys/kern/uipc_socket.c,v 1.5 2003/06/26 02:17:45 dillon Exp $ + * $DragonFly: src/sys/kern/uipc_socket.c,v 1.6 2003/07/06 21:23:51 dillon Exp $ */ #include "opt_inet.h" @@ -111,7 +111,7 @@ soalloc(waitok) { struct socket *so; - so = zalloci(socket_zone); + so = zalloc(socket_zone); if (so) { /* XXX race condition for reentrant kernel */ bzero(so, sizeof *so); @@ -200,7 +200,7 @@ sodealloc(struct socket *so) } #endif /* INET */ crfree(so->so_cred); - zfreei(socket_zone, so); + zfree(socket_zone, so); } int diff --git a/sys/kern/vfs_aio.c b/sys/kern/vfs_aio.c index 6da232e03e..d00b7bf364 100644 --- a/sys/kern/vfs_aio.c +++ b/sys/kern/vfs_aio.c @@ -14,7 +14,7 @@ * of the author. This software is distributed AS-IS. * * $FreeBSD: src/sys/kern/vfs_aio.c,v 1.70.2.28 2003/05/29 06:15:35 alc Exp $ - * $DragonFly: src/sys/kern/vfs_aio.c,v 1.5 2003/06/26 02:17:45 dillon Exp $ + * $DragonFly: src/sys/kern/vfs_aio.c,v 1.6 2003/07/06 21:23:51 dillon Exp $ */ /* @@ -610,6 +610,8 @@ aio_process(struct aiocblist *aiocbe) /* * The AIO daemon, most of the actual work is done in aio_process, * but the setup (and address space mgmt) is done in this routine. + * + * The MP lock is held on entry. */ static void aio_daemon(void *uproc) diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index 6b5f37c244..7ed1d9b99a 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -12,7 +12,7 @@ * John S. Dyson. * * $FreeBSD: src/sys/kern/vfs_bio.c,v 1.242.2.20 2003/05/28 18:38:10 alc Exp $ - * $DragonFly: src/sys/kern/vfs_bio.c,v 1.8 2003/07/03 17:24:02 dillon Exp $ + * $DragonFly: src/sys/kern/vfs_bio.c,v 1.9 2003/07/06 21:23:51 dillon Exp $ */ /* @@ -339,7 +339,7 @@ bufinit(void) TAILQ_INIT(&bswlist); LIST_INIT(&invalhash); - simple_lock_init(&buftimelock); + lwkt_inittoken(&buftimetoken); for (i = 0; i <= bufhashmask; i++) LIST_INIT(&bufhashtbl[i]); diff --git a/sys/kern/vfs_conf.c b/sys/kern/vfs_conf.c index 232f0ad293..63771c819a 100644 --- a/sys/kern/vfs_conf.c +++ b/sys/kern/vfs_conf.c @@ -26,7 +26,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/kern/vfs_conf.c,v 1.49.2.5 2003/01/07 11:56:53 joerg Exp $ - * $DragonFly: src/sys/kern/vfs_conf.c,v 1.3 2003/06/25 03:55:57 dillon Exp $ + * $DragonFly: src/sys/kern/vfs_conf.c,v 1.4 2003/07/06 21:23:51 dillon Exp $ */ /* @@ -233,11 +233,11 @@ done: } else { /* register with list of mounted filesystems */ - simple_lock(&mountlist_slock); + lwkt_gettoken(&mountlist_token); TAILQ_INSERT_HEAD(&mountlist, mp, mnt_list); - simple_unlock(&mountlist_slock); + lwkt_reltoken(&mountlist_token); - /* sanity check system clock against root filesystem timestamp */ + /* sanity check system clock against root fs timestamp */ inittodr(mp->mnt_time); vfs_unbusy(mp, td); } diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c index 079864d384..0d5879827b 100644 --- a/sys/kern/vfs_default.c +++ b/sys/kern/vfs_default.c @@ -37,7 +37,7 @@ * * * $FreeBSD: src/sys/kern/vfs_default.c,v 1.28.2.7 2003/01/10 18:23:26 bde Exp $ - * $DragonFly: src/sys/kern/vfs_default.c,v 1.4 2003/06/26 05:55:14 dillon Exp $ + * $DragonFly: src/sys/kern/vfs_default.c,v 1.5 2003/07/06 21:23:51 dillon Exp $ */ #include @@ -249,7 +249,7 @@ vop_stdlock(ap) if ((l = (struct lock *)ap->a_vp->v_data) == NULL) { if (ap->a_flags & LK_INTERLOCK) - simple_unlock(&ap->a_vp->v_interlock); + lwkt_reltoken(&ap->a_vp->v_interlock); return 0; } @@ -273,7 +273,7 @@ vop_stdunlock(ap) if ((l = (struct lock *)ap->a_vp->v_data) == NULL) { if (ap->a_flags & LK_INTERLOCK) - simple_unlock(&ap->a_vp->v_interlock); + lwkt_reltoken(&ap->a_vp->v_interlock); return 0; } @@ -379,7 +379,7 @@ vop_sharedlock(ap) if (l == NULL) { if (ap->a_flags & LK_INTERLOCK) - simple_unlock(&ap->a_vp->v_interlock); + lwkt_reltoken(&ap->a_vp->v_interlock); return 0; } switch (flags & LK_TYPE_MASK) { @@ -473,7 +473,7 @@ vop_nolock(ap) * the interlock here. */ if (ap->a_flags & LK_INTERLOCK) - simple_unlock(&ap->a_vp->v_interlock); + lwkt_reltoken(&ap->a_vp->v_interlock); return (0); #endif } @@ -490,7 +490,7 @@ vop_nounlock(ap) } */ *ap; { if (ap->a_flags & LK_INTERLOCK) - simple_unlock(&ap->a_vp->v_interlock); + lwkt_reltoken(&ap->a_vp->v_interlock); return (0); } diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index dca63a6345..5fcaeac521 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -37,7 +37,7 @@ * * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95 * $FreeBSD: src/sys/kern/vfs_subr.c,v 1.249.2.30 2003/04/04 20:35:57 tegge Exp $ - * $DragonFly: src/sys/kern/vfs_subr.c,v 1.9 2003/07/03 17:24:02 dillon Exp $ + * $DragonFly: src/sys/kern/vfs_subr.c,v 1.10 2003/07/06 21:23:51 dillon Exp $ */ /* @@ -125,14 +125,12 @@ SYSCTL_INT(_vfs, OID_AUTO, ioopt, CTLFLAG_RW, &vfs_ioopt, 0, ""); #endif struct mntlist mountlist = TAILQ_HEAD_INITIALIZER(mountlist); /* mounted fs */ -struct simplelock mountlist_slock; -struct simplelock mntvnode_slock; +struct lwkt_token mountlist_token; +struct lwkt_token mntvnode_token; int nfs_mount_type = -1; -#ifndef NULL_SIMPLELOCKS -static struct simplelock mntid_slock; -static struct simplelock vnode_free_list_slock; -static struct simplelock spechash_slock; -#endif +static struct lwkt_token mntid_token; +static struct lwkt_token vnode_free_list_token; +static struct lwkt_token spechash_token; struct nfs_public nfs_pub; /* publicly exported FS */ static vm_zone_t vnode_zone; @@ -181,11 +179,11 @@ vntblinit() desiredvnodes = maxproc + vmstats.v_page_count / 4; minvnodes = desiredvnodes / 4; - simple_lock_init(&mntvnode_slock); - simple_lock_init(&mntid_slock); - simple_lock_init(&spechash_slock); + lwkt_inittoken(&mntvnode_token); + lwkt_inittoken(&mntid_token); + lwkt_inittoken(&spechash_token); TAILQ_INIT(&vnode_free_list); - simple_lock_init(&vnode_free_list_slock); + lwkt_inittoken(&vnode_free_list_token); vnode_zone = zinit("VNODE", sizeof (struct vnode), 0, 0, 5); /* * Initialize the filesystem syncer. @@ -200,7 +198,7 @@ vntblinit() * unmounting. Interlock is not released on failure. */ int -vfs_busy(struct mount *mp, int flags, struct simplelock *interlkp, +vfs_busy(struct mount *mp, int flags, struct lwkt_token *interlkp, struct thread *td) { int lkflags; @@ -210,7 +208,7 @@ vfs_busy(struct mount *mp, int flags, struct simplelock *interlkp, return (ENOENT); mp->mnt_kern_flag |= MNTK_MWAIT; if (interlkp) { - simple_unlock(interlkp); + lwkt_reltoken(interlkp); } /* * Since all busy locks are shared except the exclusive @@ -220,7 +218,7 @@ vfs_busy(struct mount *mp, int flags, struct simplelock *interlkp, */ tsleep((caddr_t)mp, PVFS, "vfs_busy", 0); if (interlkp) { - simple_lock(interlkp); + lwkt_gettoken(interlkp); } return (ENOENT); } @@ -320,15 +318,15 @@ vfs_getvfs(fsid) { register struct mount *mp; - simple_lock(&mountlist_slock); + lwkt_gettoken(&mountlist_token); TAILQ_FOREACH(mp, &mountlist, mnt_list) { if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { - simple_unlock(&mountlist_slock); + lwkt_reltoken(&mountlist_token); return (mp); } } - simple_unlock(&mountlist_slock); + lwkt_reltoken(&mountlist_token); return ((struct mount *) 0); } @@ -352,7 +350,7 @@ vfs_getnewfsid(mp) fsid_t tfsid; int mtype; - simple_lock(&mntid_slock); + lwkt_gettoken(&mntid_token); mtype = mp->mnt_vfc->vfc_typenum; tfsid.val[1] = mtype; mtype = (mtype & 0xFF) << 24; @@ -365,7 +363,7 @@ vfs_getnewfsid(mp) } mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; mp->mnt_stat.f_fsid.val[1] = tfsid.val[1]; - simple_unlock(&mntid_slock); + lwkt_reltoken(&mntid_token); } /* @@ -463,6 +461,7 @@ vlrureclaim(struct mount *mp) int trigger; int usevnodes; int count; + int gen; /* * Calculate the trigger point, don't allow user @@ -477,7 +476,7 @@ vlrureclaim(struct mount *mp) trigger = vmstats.v_page_count * 2 / usevnodes; done = 0; - simple_lock(&mntvnode_slock); + gen = lwkt_gettoken(&mntvnode_token); count = mp->mnt_nvnodelistsize / 10 + 1; while (count && (vp = TAILQ_FIRST(&mp->mnt_nvnodelist)) != NULL) { TAILQ_REMOVE(&mp->mnt_nvnodelist, vp, v_nmntvnodes); @@ -486,21 +485,23 @@ vlrureclaim(struct mount *mp) if (vp->v_type != VNON && vp->v_type != VBAD && VMIGHTFREE(vp) && /* critical path opt */ - (vp->v_object == NULL || vp->v_object->resident_page_count < trigger) && - simple_lock_try(&vp->v_interlock) + (vp->v_object == NULL || vp->v_object->resident_page_count < trigger) ) { - simple_unlock(&mntvnode_slock); - if (VMIGHTFREE(vp)) { - vgonel(vp, curthread); - done++; + lwkt_gettoken(&vp->v_interlock); + if (lwkt_gentoken(&mntvnode_token, &gen) == 0) { + if (VMIGHTFREE(vp)) { + vgonel(vp, curthread); + done++; + } else { + lwkt_reltoken(&vp->v_interlock); + } } else { - simple_unlock(&vp->v_interlock); + lwkt_reltoken(&vp->v_interlock); } - simple_lock(&mntvnode_slock); } --count; } - simple_unlock(&mntvnode_slock); + lwkt_reltoken(&mntvnode_token); return done; } @@ -533,18 +534,18 @@ vnlru_proc(void) continue; } done = 0; - simple_lock(&mountlist_slock); + lwkt_gettoken(&mountlist_token); for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { - if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, td)) { + if (vfs_busy(mp, LK_NOWAIT, &mountlist_token, td)) { nmp = TAILQ_NEXT(mp, mnt_list); continue; } done += vlrureclaim(mp); - simple_lock(&mountlist_slock); + lwkt_gettoken(&mountlist_token); nmp = TAILQ_NEXT(mp, mnt_list); vfs_unbusy(mp, td); } - simple_unlock(&mountlist_slock); + lwkt_reltoken(&mountlist_token); if (done == 0) { vnlru_nowhere++; tsleep(td, PPAUSE, "vlrup", hz * 3); @@ -576,6 +577,8 @@ getnewvnode(tag, mp, vops, vpp) struct vnode **vpp; { int s; + int gen; + int vgen; struct thread *td = curthread; /* XXX */ struct vnode *vp = NULL; vm_object_t object; @@ -602,7 +605,7 @@ getnewvnode(tag, mp, vops, vpp) * a new vnode if we can't find one or if we have not reached a * good minimum for good LRU performance. */ - simple_lock(&vnode_free_list_slock); + gen = lwkt_gettoken(&vnode_free_list_token); if (freevnodes >= wantfreevnodes && numvnodes >= minvnodes) { int count; @@ -611,14 +614,59 @@ getnewvnode(tag, mp, vops, vpp) if (vp == NULL || vp->v_usecount) panic("getnewvnode: free vnode isn't"); - TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); + /* + * Get the vnode's interlock, then re-obtain + * vnode_free_list_token in case we lost it. If we + * did lose it while getting the vnode interlock, + * even if we got it back again, then retry. + */ + vgen = lwkt_gettoken(&vp->v_interlock); + if (lwkt_gentoken(&vnode_free_list_token, &gen) != 0) { + --count; + lwkt_reltoken(&vp->v_interlock); + vp = NULL; + continue; + } + + /* + * Whew! We have both tokens. Since we didn't lose + * the free list VFREE had better still be set. But + * we aren't out of the woods yet. We have to get + * the object (may block). If the vnode is not + * suitable then move it to the end of the list + * if we can. If we can't move it to the end of the + * list retry again. + */ if ((VOP_GETVOBJECT(vp, &object) == 0 && - (object->resident_page_count || object->ref_count)) || - !simple_lock_try(&vp->v_interlock)) { - TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); + (object->resident_page_count || object->ref_count)) + ) { + if (lwkt_gentoken(&vp->v_interlock, &vgen) == 0 && + lwkt_gentoken(&vnode_free_list_token, &gen) == 0 + ) { + TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); + TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); + } else { + --count; + } + lwkt_reltoken(&vp->v_interlock); + vp = NULL; + continue; + } + + /* + * Still not out of the woods. VOBJECT might have + * blocked, if we did not retain our tokens we have + * to retry. + */ + if (lwkt_gentoken(&vp->v_interlock, &vgen) != 0 || + lwkt_gentoken(&vnode_free_list_token, &gen) != 0) { + --count; vp = NULL; continue; } + TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); + KKASSERT(vp->v_flag & VFREE); + if (LIST_FIRST(&vp->v_cache_src)) { /* * note: nameileafonly sysctl is temporary, @@ -632,7 +680,7 @@ getnewvnode(tag, mp, vops, vpp) * subdirectories. */ if (cache_leaf_test(vp) < 0) { - simple_unlock(&vp->v_interlock); + lwkt_reltoken(&vp->v_interlock); TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); vp = NULL; continue; @@ -646,7 +694,7 @@ getnewvnode(tag, mp, vops, vpp) * turned off (otherwise we reuse them * too quickly). */ - simple_unlock(&vp->v_interlock); + lwkt_reltoken(&vp->v_interlock); TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); vp = NULL; continue; @@ -660,13 +708,13 @@ getnewvnode(tag, mp, vops, vpp) vp->v_flag |= VDOOMED; vp->v_flag &= ~VFREE; freevnodes--; - simple_unlock(&vnode_free_list_slock); - cache_purge(vp); + lwkt_reltoken(&vnode_free_list_token); + cache_purge(vp); /* YYY may block */ vp->v_lease = NULL; if (vp->v_type != VBAD) { vgonel(vp, td); } else { - simple_unlock(&vp->v_interlock); + lwkt_reltoken(&vp->v_interlock); } #ifdef INVARIANTS @@ -689,10 +737,10 @@ getnewvnode(tag, mp, vops, vpp) vp->v_socket = 0; vp->v_writecount = 0; /* XXX */ } else { - simple_unlock(&vnode_free_list_slock); + lwkt_reltoken(&vnode_free_list_token); vp = (struct vnode *) zalloc(vnode_zone); bzero((char *) vp, sizeof *vp); - simple_lock_init(&vp->v_interlock); + lwkt_inittoken(&vp->v_interlock); vp->v_dd = vp; cache_purge(vp); LIST_INIT(&vp->v_cache_src); @@ -724,7 +772,7 @@ insmntque(vp, mp) register struct mount *mp; { - simple_lock(&mntvnode_slock); + lwkt_gettoken(&mntvnode_token); /* * Delete from old mount point vnode list, if on one. */ @@ -738,12 +786,12 @@ insmntque(vp, mp) * Insert into list of vnodes for the new mount point, if available. */ if ((vp->v_mount = mp) == NULL) { - simple_unlock(&mntvnode_slock); + lwkt_reltoken(&mntvnode_token); return; } TAILQ_INSERT_TAIL(&mp->mnt_nvnodelist, vp, v_nmntvnodes); mp->mnt_nvnodelistsize++; - simple_unlock(&mntvnode_slock); + lwkt_reltoken(&mntvnode_token); } /* @@ -875,12 +923,12 @@ vinvalbuf(struct vnode *vp, int flags, struct thread *td, /* * Destroy the copy in the VM cache, too. */ - simple_lock(&vp->v_interlock); + lwkt_gettoken(&vp->v_interlock); if (VOP_GETVOBJECT(vp, &object) == 0) { vm_object_page_remove(object, 0, 0, (flags & V_SAVE) ? TRUE : FALSE); } - simple_unlock(&vp->v_interlock); + lwkt_reltoken(&vp->v_interlock); if (!TAILQ_EMPTY(&vp->v_dirtyblkhd) || !TAILQ_EMPTY(&vp->v_cleanblkhd)) panic("vinvalbuf: flush failed"); @@ -1472,9 +1520,9 @@ addalias(nvp, dev) panic("addalias on non-special vnode"); nvp->v_rdev = dev; - simple_lock(&spechash_slock); + lwkt_gettoken(&spechash_token); SLIST_INSERT_HEAD(&dev->si_hlist, nvp, v_specnext); - simple_unlock(&spechash_slock); + lwkt_reltoken(&spechash_token); } /* @@ -1500,7 +1548,7 @@ vget(vp, flags, td) * the VXLOCK flag is set. */ if ((flags & LK_INTERLOCK) == 0) { - simple_lock(&vp->v_interlock); + lwkt_gettoken(&vp->v_interlock); } if (vp->v_flag & VXLOCK) { if (vp->v_vxproc == curproc) { @@ -1510,7 +1558,7 @@ vget(vp, flags, td) #endif } else { vp->v_flag |= VXWANT; - simple_unlock(&vp->v_interlock); + lwkt_reltoken(&vp->v_interlock); tsleep((caddr_t)vp, PINOD, "vget", 0); return (ENOENT); } @@ -1530,26 +1578,26 @@ vget(vp, flags, td) * before sleeping so that multiple processes do * not try to recycle it. */ - simple_lock(&vp->v_interlock); + lwkt_gettoken(&vp->v_interlock); vp->v_usecount--; if (VSHOULDFREE(vp)) vfree(vp); else vlruvp(vp); - simple_unlock(&vp->v_interlock); + lwkt_reltoken(&vp->v_interlock); } return (error); } - simple_unlock(&vp->v_interlock); + lwkt_reltoken(&vp->v_interlock); return (0); } void vref(struct vnode *vp) { - simple_lock(&vp->v_interlock); + lwkt_gettoken(&vp->v_interlock); vp->v_usecount++; - simple_unlock(&vp->v_interlock); + lwkt_reltoken(&vp->v_interlock); } /* @@ -1563,12 +1611,12 @@ vrele(struct vnode *vp) KASSERT(vp != NULL, ("vrele: null vp")); - simple_lock(&vp->v_interlock); + lwkt_gettoken(&vp->v_interlock); if (vp->v_usecount > 1) { vp->v_usecount--; - simple_unlock(&vp->v_interlock); + lwkt_reltoken(&vp->v_interlock); return; } @@ -1591,7 +1639,7 @@ vrele(struct vnode *vp) } else { #ifdef DIAGNOSTIC vprint("vrele: negative ref count", vp); - simple_unlock(&vp->v_interlock); + lwkt_reltoken(&vp->v_interlock); #endif panic("vrele: negative ref cnt"); } @@ -1604,7 +1652,7 @@ vput(struct vnode *vp) KASSERT(vp != NULL, ("vput: null vp")); - simple_lock(&vp->v_interlock); + lwkt_gettoken(&vp->v_interlock); if (vp->v_usecount > 1) { vp->v_usecount--; @@ -1619,7 +1667,7 @@ vput(struct vnode *vp) * If we are doing a vpu, the node is already locked, * so we just need to release the vnode mutex. */ - simple_unlock(&vp->v_interlock); + lwkt_reltoken(&vp->v_interlock); VOP_INACTIVE(vp, td); if (VSHOULDFREE(vp)) vfree(vp); @@ -1714,7 +1762,7 @@ vflush(mp, rootrefs, flags) return (error); vput(rootvp); } - simple_lock(&mntvnode_slock); + lwkt_gettoken(&mntvnode_token); loop: for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp; vp = nvp) { /* @@ -1725,12 +1773,12 @@ loop: goto loop; nvp = TAILQ_NEXT(vp, v_nmntvnodes); - simple_lock(&vp->v_interlock); + lwkt_gettoken(&vp->v_interlock); /* * Skip over a vnodes marked VSYSTEM. */ if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) { - simple_unlock(&vp->v_interlock); + lwkt_reltoken(&vp->v_interlock); continue; } /* @@ -1743,7 +1791,7 @@ loop: (VOP_GETATTR(vp, &vattr, td) == 0 && vattr.va_nlink > 0)) && (vp->v_writecount == 0 || vp->v_type != VREG)) { - simple_unlock(&vp->v_interlock); + lwkt_reltoken(&vp->v_interlock); continue; } @@ -1752,9 +1800,9 @@ loop: * vnode data structures and we are done. */ if (vp->v_usecount == 0) { - simple_unlock(&mntvnode_slock); + lwkt_reltoken(&mntvnode_token); vgonel(vp, td); - simple_lock(&mntvnode_slock); + lwkt_gettoken(&mntvnode_token); continue; } @@ -1764,7 +1812,7 @@ loop: * all other files, just kill them. */ if (flags & FORCECLOSE) { - simple_unlock(&mntvnode_slock); + lwkt_reltoken(&mntvnode_token); if (vp->v_type != VBLK && vp->v_type != VCHR) { vgonel(vp, td); } else { @@ -1772,30 +1820,30 @@ loop: vp->v_op = spec_vnodeop_p; insmntque(vp, (struct mount *) 0); } - simple_lock(&mntvnode_slock); + lwkt_gettoken(&mntvnode_token); continue; } #ifdef DIAGNOSTIC if (busyprt) vprint("vflush: busy vnode", vp); #endif - simple_unlock(&vp->v_interlock); + lwkt_reltoken(&vp->v_interlock); busy++; } - simple_unlock(&mntvnode_slock); + lwkt_reltoken(&mntvnode_token); if (rootrefs > 0 && (flags & FORCECLOSE) == 0) { /* * If just the root vnode is busy, and if its refcount * is equal to `rootrefs', then go ahead and kill it. */ - simple_lock(&rootvp->v_interlock); + lwkt_gettoken(&rootvp->v_interlock); KASSERT(busy > 0, ("vflush: not busy")); KASSERT(rootvp->v_usecount >= rootrefs, ("vflush: rootrefs")); if (busy == 1 && rootvp->v_usecount == rootrefs) { vgonel(rootvp, td); busy = 0; } else - simple_unlock(&rootvp->v_interlock); + lwkt_reltoken(&rootvp->v_interlock); } if (busy) return (EBUSY); @@ -1818,10 +1866,10 @@ vlruvp(struct vnode *vp) struct mount *mp; if ((mp = vp->v_mount) != NULL) { - simple_lock(&mntvnode_slock); + lwkt_gettoken(&mntvnode_token); TAILQ_REMOVE(&mp->mnt_nvnodelist, vp, v_nmntvnodes); TAILQ_INSERT_TAIL(&mp->mnt_nvnodelist, vp, v_nmntvnodes); - simple_unlock(&mntvnode_slock); + lwkt_reltoken(&mntvnode_token); } #endif } @@ -1893,7 +1941,7 @@ vclean(struct vnode *vp, int flags, struct thread *td) * Inline copy of vrele() since VOP_INACTIVE * has already been called. */ - simple_lock(&vp->v_interlock); + lwkt_gettoken(&vp->v_interlock); if (--vp->v_usecount <= 0) { #ifdef DIAGNOSTIC if (vp->v_usecount < 0 || vp->v_writecount != 0) { @@ -1903,7 +1951,7 @@ vclean(struct vnode *vp, int flags, struct thread *td) #endif vfree(vp); } - simple_unlock(&vp->v_interlock); + lwkt_reltoken(&vp->v_interlock); } cache_purge(vp); @@ -1949,15 +1997,15 @@ vop_revoke(ap) */ if (vp->v_flag & VXLOCK) { vp->v_flag |= VXWANT; - simple_unlock(&vp->v_interlock); + lwkt_reltoken(&vp->v_interlock); tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0); return (0); } dev = vp->v_rdev; for (;;) { - simple_lock(&spechash_slock); + lwkt_gettoken(&spechash_token); vq = SLIST_FIRST(&dev->si_hlist); - simple_unlock(&spechash_slock); + lwkt_reltoken(&spechash_token); if (!vq) break; vgone(vq); @@ -1970,17 +2018,17 @@ vop_revoke(ap) * Release the passed interlock if the vnode will be recycled. */ int -vrecycle(struct vnode *vp, struct simplelock *inter_lkp, struct thread *td) +vrecycle(struct vnode *vp, struct lwkt_token *inter_lkp, struct thread *td) { - simple_lock(&vp->v_interlock); + lwkt_gettoken(&vp->v_interlock); if (vp->v_usecount == 0) { if (inter_lkp) { - simple_unlock(inter_lkp); + lwkt_reltoken(inter_lkp); } vgonel(vp, td); return (1); } - simple_unlock(&vp->v_interlock); + lwkt_reltoken(&vp->v_interlock); return (0); } @@ -1993,7 +2041,7 @@ vgone(struct vnode *vp) { struct thread *td = curthread; /* XXX */ - simple_lock(&vp->v_interlock); + lwkt_gettoken(&vp->v_interlock); vgonel(vp, td); } @@ -2011,7 +2059,7 @@ vgonel(struct vnode *vp, struct thread *td) */ if (vp->v_flag & VXLOCK) { vp->v_flag |= VXWANT; - simple_unlock(&vp->v_interlock); + lwkt_reltoken(&vp->v_interlock); tsleep((caddr_t)vp, PINOD, "vgone", 0); return; } @@ -2020,7 +2068,7 @@ vgonel(struct vnode *vp, struct thread *td) * Clean out the filesystem specific data. */ vclean(vp, DOCLOSE, td); - simple_lock(&vp->v_interlock); + lwkt_gettoken(&vp->v_interlock); /* * Delete from old mount point vnode list, if on one. @@ -2032,10 +2080,10 @@ vgonel(struct vnode *vp, struct thread *td) * if it is on one. */ if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_rdev != NULL) { - simple_lock(&spechash_slock); + lwkt_gettoken(&spechash_token); SLIST_REMOVE(&vp->v_hashchain, vp, vnode, v_specnext); freedev(vp->v_rdev); - simple_unlock(&spechash_slock); + lwkt_reltoken(&spechash_token); vp->v_rdev = NULL; } @@ -2051,19 +2099,19 @@ vgonel(struct vnode *vp, struct thread *td) */ if (vp->v_usecount == 0 && !(vp->v_flag & VDOOMED)) { s = splbio(); - simple_lock(&vnode_free_list_slock); + lwkt_gettoken(&vnode_free_list_token); if (vp->v_flag & VFREE) TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); else freevnodes++; vp->v_flag |= VFREE; TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); - simple_unlock(&vnode_free_list_slock); + lwkt_reltoken(&vnode_free_list_token); splx(s); } vp->v_type = VBAD; - simple_unlock(&vp->v_interlock); + lwkt_reltoken(&vp->v_interlock); } /* @@ -2077,15 +2125,15 @@ vfinddev(dev, type, vpp) { struct vnode *vp; - simple_lock(&spechash_slock); + lwkt_gettoken(&spechash_token); SLIST_FOREACH(vp, &dev->si_hlist, v_specnext) { if (type == vp->v_type) { *vpp = vp; - simple_unlock(&spechash_slock); + lwkt_reltoken(&spechash_token); return (1); } } - simple_unlock(&spechash_slock); + lwkt_reltoken(&spechash_token); return (0); } @@ -2100,10 +2148,10 @@ vcount(vp) int count; count = 0; - simple_lock(&spechash_slock); + lwkt_gettoken(&spechash_token); SLIST_FOREACH(vq, &vp->v_hashchain, v_specnext) count += vq->v_usecount; - simple_unlock(&spechash_slock); + lwkt_reltoken(&spechash_token); return (count); } @@ -2185,9 +2233,9 @@ DB_SHOW_COMMAND(lockedvnodes, lockedvnodes) struct vnode *vp; printf("Locked vnodes\n"); - simple_lock(&mountlist_slock); + lwkt_gettoken(&mountlist_token); for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { - if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, td)) { + if (vfs_busy(mp, LK_NOWAIT, &mountlist_token, td)) { nmp = TAILQ_NEXT(mp, mnt_list); continue; } @@ -2195,11 +2243,11 @@ DB_SHOW_COMMAND(lockedvnodes, lockedvnodes) if (VOP_ISLOCKED(vp, NULL)) vprint((char *)0, vp); } - simple_lock(&mountlist_slock); + lwkt_gettoken(&mountlist_token); nmp = TAILQ_NEXT(mp, mnt_list); vfs_unbusy(mp, td); } - simple_unlock(&mountlist_slock); + lwkt_reltoken(&mountlist_token); } #endif @@ -2303,14 +2351,14 @@ sysctl_vnode(SYSCTL_HANDLER_ARGS) return (SYSCTL_OUT(req, 0, (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ))); - simple_lock(&mountlist_slock); + lwkt_gettoken(&mountlist_token); for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { - if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { + if (vfs_busy(mp, LK_NOWAIT, &mountlist_token, p)) { nmp = TAILQ_NEXT(mp, mnt_list); continue; } again: - simple_lock(&mntvnode_slock); + lwkt_gettoken(&mntvnode_token); for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nvp) { @@ -2320,22 +2368,22 @@ again: * recycled onto the same filesystem. */ if (vp->v_mount != mp) { - simple_unlock(&mntvnode_slock); + lwkt_reltoken(&mntvnode_token); goto again; } nvp = TAILQ_NEXT(vp, v_nmntvnodes); - simple_unlock(&mntvnode_slock); + lwkt_reltoken(&mntvnode_token); if ((error = SYSCTL_OUT(req, &vp, VPTRSZ)) || (error = SYSCTL_OUT(req, vp, VNODESZ))) return (error); - simple_lock(&mntvnode_slock); + lwkt_gettoken(&mntvnode_token); } - simple_unlock(&mntvnode_slock); - simple_lock(&mountlist_slock); + lwkt_reltoken(&mntvnode_token); + lwkt_gettoken(&mountlist_token); nmp = TAILQ_NEXT(mp, mnt_list); vfs_unbusy(mp, p); } - simple_unlock(&mountlist_slock); + lwkt_reltoken(&mountlist_token); return (0); } @@ -2668,7 +2716,7 @@ vfs_msync(struct mount *mp, int flags) int tries; tries = 5; - simple_lock(&mntvnode_slock); + lwkt_gettoken(&mntvnode_token); loop: for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nvp) { if (vp->v_mount != mp) { @@ -2688,7 +2736,7 @@ loop: */ if ((vp->v_flag & VOBJDIRTY) && (flags == MNT_WAIT || VOP_ISLOCKED(vp, NULL) == 0)) { - simple_unlock(&mntvnode_slock); + lwkt_reltoken(&mntvnode_token); if (!vget(vp, LK_EXCLUSIVE | LK_RETRY | LK_NOOBJ, td)) { if (VOP_GETVOBJECT(vp, &obj) == 0) { @@ -2696,7 +2744,7 @@ loop: } vput(vp); } - simple_lock(&mntvnode_slock); + lwkt_gettoken(&mntvnode_token); if (TAILQ_NEXT(vp, v_nmntvnodes) != nvp) { if (--tries > 0) goto loop; @@ -2704,7 +2752,7 @@ loop: } } } - simple_unlock(&mntvnode_slock); + lwkt_reltoken(&mntvnode_token); } /* @@ -2728,7 +2776,7 @@ vfree(vp) int s; s = splbio(); - simple_lock(&vnode_free_list_slock); + lwkt_gettoken(&vnode_free_list_token); KASSERT((vp->v_flag & VFREE) == 0, ("vnode already free")); if (vp->v_flag & VAGE) { TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); @@ -2736,7 +2784,7 @@ vfree(vp) TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); } freevnodes++; - simple_unlock(&vnode_free_list_slock); + lwkt_reltoken(&vnode_free_list_token); vp->v_flag &= ~VAGE; vp->v_flag |= VFREE; splx(s); @@ -2749,11 +2797,11 @@ vbusy(vp) int s; s = splbio(); - simple_lock(&vnode_free_list_slock); + lwkt_gettoken(&vnode_free_list_token); KASSERT((vp->v_flag & VFREE) != 0, ("vnode not free")); TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); freevnodes--; - simple_unlock(&vnode_free_list_slock); + lwkt_reltoken(&vnode_free_list_token); vp->v_flag &= ~(VFREE|VAGE); splx(s); } @@ -2769,7 +2817,7 @@ vbusy(vp) int vn_pollrecord(struct vnode *vp, struct thread *td, int events) { - simple_lock(&vp->v_pollinfo.vpi_lock); + lwkt_gettoken(&vp->v_pollinfo.vpi_token); if (vp->v_pollinfo.vpi_revents & events) { /* * This leaves events we are not interested @@ -2781,12 +2829,12 @@ vn_pollrecord(struct vnode *vp, struct thread *td, int events) events &= vp->v_pollinfo.vpi_revents; vp->v_pollinfo.vpi_revents &= ~events; - simple_unlock(&vp->v_pollinfo.vpi_lock); + lwkt_reltoken(&vp->v_pollinfo.vpi_token); return events; } vp->v_pollinfo.vpi_events |= events; selrecord(td, &vp->v_pollinfo.vpi_selinfo); - simple_unlock(&vp->v_pollinfo.vpi_lock); + lwkt_reltoken(&vp->v_pollinfo.vpi_token); return 0; } @@ -2801,7 +2849,7 @@ vn_pollevent(vp, events) struct vnode *vp; short events; { - simple_lock(&vp->v_pollinfo.vpi_lock); + lwkt_gettoken(&vp->v_pollinfo.vpi_token); if (vp->v_pollinfo.vpi_events & events) { /* * We clear vpi_events so that we don't @@ -2818,7 +2866,7 @@ vn_pollevent(vp, events) vp->v_pollinfo.vpi_revents |= events; selwakeup(&vp->v_pollinfo.vpi_selinfo); } - simple_unlock(&vp->v_pollinfo.vpi_lock); + lwkt_reltoken(&vp->v_pollinfo.vpi_token); } /* @@ -2830,12 +2878,12 @@ void vn_pollgone(vp) struct vnode *vp; { - simple_lock(&vp->v_pollinfo.vpi_lock); + lwkt_gettoken(&vp->v_pollinfo.vpi_token); if (vp->v_pollinfo.vpi_events) { vp->v_pollinfo.vpi_events = 0; selwakeup(&vp->v_pollinfo.vpi_selinfo); } - simple_unlock(&vp->v_pollinfo.vpi_lock); + lwkt_reltoken(&vp->v_pollinfo.vpi_token); } @@ -2940,9 +2988,9 @@ sync_fsync(ap) * Walk the list of vnodes pushing all that are dirty and * not already on the sync list. */ - simple_lock(&mountlist_slock); - if (vfs_busy(mp, LK_EXCLUSIVE | LK_NOWAIT, &mountlist_slock, td) != 0) { - simple_unlock(&mountlist_slock); + lwkt_gettoken(&mountlist_token); + if (vfs_busy(mp, LK_EXCLUSIVE | LK_NOWAIT, &mountlist_token, td) != 0) { + lwkt_reltoken(&mountlist_token); return (0); } asyncflag = mp->mnt_flag & MNT_ASYNC; diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index b9e472317f..4597066500 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -37,7 +37,7 @@ * * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 * $FreeBSD: src/sys/kern/vfs_syscalls.c,v 1.151.2.18 2003/04/04 20:35:58 tegge Exp $ - * $DragonFly: src/sys/kern/vfs_syscalls.c,v 1.7 2003/06/27 01:53:25 dillon Exp $ + * $DragonFly: src/sys/kern/vfs_syscalls.c,v 1.8 2003/07/06 21:23:51 dillon Exp $ */ /* For 4.3 integer FS ID compatibility */ @@ -176,16 +176,16 @@ mount(struct mount_args *uap) vput(vp); return (EBUSY); } - simple_lock(&vp->v_interlock); + lwkt_gettoken(&vp->v_interlock); if ((vp->v_flag & VMOUNT) != 0 || vp->v_mountedhere != NULL) { - simple_unlock(&vp->v_interlock); + lwkt_reltoken(&vp->v_interlock); vfs_unbusy(mp, td); vput(vp); return (EBUSY); } vp->v_flag |= VMOUNT; - simple_unlock(&vp->v_interlock); + lwkt_reltoken(&vp->v_interlock); mp->mnt_flag |= SCARG(uap, flags) & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); VOP_UNLOCK(vp, 0, td); @@ -261,15 +261,15 @@ mount(struct mount_args *uap) return (ENODEV); } } - simple_lock(&vp->v_interlock); + lwkt_gettoken(&vp->v_interlock); if ((vp->v_flag & VMOUNT) != 0 || vp->v_mountedhere != NULL) { - simple_unlock(&vp->v_interlock); + lwkt_reltoken(&vp->v_interlock); vput(vp); return (EBUSY); } vp->v_flag |= VMOUNT; - simple_unlock(&vp->v_interlock); + lwkt_reltoken(&vp->v_interlock); /* * Allocate and initialize the filesystem. @@ -331,9 +331,9 @@ update: mp->mnt_syncer = NULL; } vfs_unbusy(mp, td); - simple_lock(&vp->v_interlock); + lwkt_gettoken(&vp->v_interlock); vp->v_flag &= ~VMOUNT; - simple_unlock(&vp->v_interlock); + lwkt_reltoken(&vp->v_interlock); vrele(vp); return (error); } @@ -343,13 +343,13 @@ update: */ cache_purge(vp); if (!error) { - simple_lock(&vp->v_interlock); + lwkt_gettoken(&vp->v_interlock); vp->v_flag &= ~VMOUNT; vp->v_mountedhere = mp; - simple_unlock(&vp->v_interlock); - simple_lock(&mountlist_slock); + lwkt_reltoken(&vp->v_interlock); + lwkt_gettoken(&mountlist_token); TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list); - simple_unlock(&mountlist_slock); + lwkt_reltoken(&mountlist_token); checkdirs(vp); VOP_UNLOCK(vp, 0, td); if ((mp->mnt_flag & MNT_RDONLY) == 0) @@ -358,9 +358,9 @@ update: if ((error = VFS_START(mp, 0, td)) != 0) vrele(vp); } else { - simple_lock(&vp->v_interlock); + lwkt_gettoken(&vp->v_interlock); vp->v_flag &= ~VMOUNT; - simple_unlock(&vp->v_interlock); + lwkt_reltoken(&vp->v_interlock); mp->mnt_vfc->vfc_refcount--; vfs_unbusy(mp, td); free((caddr_t)mp, M_MOUNT); @@ -480,9 +480,9 @@ dounmount(struct mount *mp, int flags, struct thread *td) int error; int async_flag; - simple_lock(&mountlist_slock); + lwkt_gettoken(&mountlist_token); if (mp->mnt_kern_flag & MNTK_UNMOUNT) { - simple_unlock(&mountlist_slock); + lwkt_reltoken(&mountlist_token); return (EBUSY); } mp->mnt_kern_flag |= MNTK_UNMOUNT; @@ -490,7 +490,7 @@ dounmount(struct mount *mp, int flags, struct thread *td) if (flags & MNT_FORCE) mp->mnt_kern_flag |= MNTK_UNMOUNTF; error = lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK | - ((flags & MNT_FORCE) ? 0 : LK_NOWAIT), &mountlist_slock, td); + ((flags & MNT_FORCE) ? 0 : LK_NOWAIT), &mountlist_token, td); if (error) { mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); if (mp->mnt_kern_flag & MNTK_MWAIT) @@ -511,14 +511,14 @@ dounmount(struct mount *mp, int flags, struct thread *td) (error = VFS_SYNC(mp, MNT_WAIT, td)) == 0) || (flags & MNT_FORCE)) error = VFS_UNMOUNT(mp, flags, td); - simple_lock(&mountlist_slock); + lwkt_gettoken(&mountlist_token); if (error) { if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL) (void) vfs_allocate_syncvnode(mp); mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); mp->mnt_flag |= async_flag; lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK | LK_REENABLE, - &mountlist_slock, td); + &mountlist_token, td); if (mp->mnt_kern_flag & MNTK_MWAIT) wakeup((caddr_t)mp); return (error); @@ -531,7 +531,7 @@ dounmount(struct mount *mp, int flags, struct thread *td) mp->mnt_vfc->vfc_refcount--; if (!TAILQ_EMPTY(&mp->mnt_nvnodelist)) panic("unmount: dangling vnode"); - lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &mountlist_slock, td); + lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &mountlist_token, td); if (mp->mnt_kern_flag & MNTK_MWAIT) wakeup((caddr_t)mp); free((caddr_t)mp, M_MOUNT); @@ -560,9 +560,9 @@ sync(struct sync_args *uap) struct mount *mp, *nmp; int asyncflag; - simple_lock(&mountlist_slock); + lwkt_gettoken(&mountlist_token); for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { - if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, td)) { + if (vfs_busy(mp, LK_NOWAIT, &mountlist_token, td)) { nmp = TAILQ_NEXT(mp, mnt_list); continue; } @@ -573,11 +573,11 @@ sync(struct sync_args *uap) VFS_SYNC(mp, MNT_NOWAIT, td); mp->mnt_flag |= asyncflag; } - simple_lock(&mountlist_slock); + lwkt_gettoken(&mountlist_token); nmp = TAILQ_NEXT(mp, mnt_list); vfs_unbusy(mp, td); } - simple_unlock(&mountlist_slock); + lwkt_reltoken(&mountlist_token); #if 0 /* * XXX don't call vfs_bufstats() yet because that routine @@ -715,9 +715,9 @@ getfsstat(struct getfsstat_args *uap) maxcount = SCARG(uap, bufsize) / sizeof(struct statfs); sfsp = (caddr_t)SCARG(uap, buf); count = 0; - simple_lock(&mountlist_slock); + lwkt_gettoken(&mountlist_token); for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { - if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, td)) { + if (vfs_busy(mp, LK_NOWAIT, &mountlist_token, td)) { nmp = TAILQ_NEXT(mp, mnt_list); continue; } @@ -731,7 +731,7 @@ getfsstat(struct getfsstat_args *uap) if (((SCARG(uap, flags) & (MNT_LAZY|MNT_NOWAIT)) == 0 || (SCARG(uap, flags) & MNT_WAIT)) && (error = VFS_STATFS(mp, sp, td))) { - simple_lock(&mountlist_slock); + lwkt_gettoken(&mountlist_token); nmp = TAILQ_NEXT(mp, mnt_list); vfs_unbusy(mp, td); continue; @@ -745,11 +745,11 @@ getfsstat(struct getfsstat_args *uap) sfsp += sizeof(*sp); } count++; - simple_lock(&mountlist_slock); + lwkt_gettoken(&mountlist_token); nmp = TAILQ_NEXT(mp, mnt_list); vfs_unbusy(mp, td); } - simple_unlock(&mountlist_slock); + lwkt_reltoken(&mountlist_token); if (sfsp && count > maxcount) p->p_retval[0] = maxcount; else diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c index 9899527667..4aab82bba8 100644 --- a/sys/kern/vfs_vnops.c +++ b/sys/kern/vfs_vnops.c @@ -37,7 +37,7 @@ * * @(#)vfs_vnops.c 8.2 (Berkeley) 1/21/94 * $FreeBSD: src/sys/kern/vfs_vnops.c,v 1.87.2.13 2002/12/29 18:19:53 dillon Exp $ - * $DragonFly: src/sys/kern/vfs_vnops.c,v 1.6 2003/06/27 01:53:25 dillon Exp $ + * $DragonFly: src/sys/kern/vfs_vnops.c,v 1.7 2003/07/06 21:23:51 dillon Exp $ */ #include @@ -634,10 +634,10 @@ debug_vn_lock(struct vnode *vp, int flags, struct thread *td, do { if ((flags & LK_INTERLOCK) == 0) - simple_lock(&vp->v_interlock); + lwkt_gettoken(&vp->v_interlock); if ((vp->v_flag & VXLOCK) && vp->v_vxproc != curproc) { vp->v_flag |= VXWANT; - simple_unlock(&vp->v_interlock); + lwkt_reltoken(&vp->v_interlock); tsleep((caddr_t)vp, PINOD, "vn_lock", 0); error = ENOENT; } else { diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c index 1395f60536..55320a0503 100644 --- a/sys/netinet/in_pcb.c +++ b/sys/netinet/in_pcb.c @@ -32,7 +32,7 @@ * * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95 * $FreeBSD: src/sys/netinet/in_pcb.c,v 1.59.2.26 2003/01/24 05:11:33 sam Exp $ - * $DragonFly: src/sys/netinet/in_pcb.c,v 1.4 2003/06/25 03:56:04 dillon Exp $ + * $DragonFly: src/sys/netinet/in_pcb.c,v 1.5 2003/07/06 21:23:52 dillon Exp $ */ #include "opt_ipsec.h" @@ -152,7 +152,7 @@ in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, struct thread *td) int error; #endif - inp = zalloci(pcbinfo->ipi_zone); + inp = zalloc(pcbinfo->ipi_zone); if (inp == NULL) return (ENOBUFS); bzero((caddr_t)inp, sizeof(*inp)); @@ -162,7 +162,7 @@ in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, struct thread *td) #ifdef IPSEC error = ipsec_init_policy(so, &inp->inp_sp); if (error != 0) { - zfreei(pcbinfo->ipi_zone, inp); + zfree(pcbinfo->ipi_zone, inp); return error; } #endif /*IPSEC*/ @@ -573,7 +573,7 @@ in_pcbdetach(inp) rtfree(inp->inp_route.ro_rt); ip_freemoptions(inp->inp_moptions); inp->inp_vflag = 0; - zfreei(ipi->ipi_zone, inp); + zfree(ipi->ipi_zone, inp); } /* diff --git a/sys/netproto/smb/smb_iod.c b/sys/netproto/smb/smb_iod.c index 8df762d2a8..2cd4113fcf 100644 --- a/sys/netproto/smb/smb_iod.c +++ b/sys/netproto/smb/smb_iod.c @@ -30,7 +30,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/netsmb/smb_iod.c,v 1.1.2.2 2002/04/23 03:45:01 bp Exp $ - * $DragonFly: src/sys/netproto/smb/smb_iod.c,v 1.4 2003/06/27 01:53:26 dillon Exp $ + * $DragonFly: src/sys/netproto/smb/smb_iod.c,v 1.5 2003/07/06 21:23:53 dillon Exp $ */ #include @@ -382,7 +382,7 @@ smb_iod_request(struct smbiod *iod, int event, void *ident) return 0; } smb_iod_wakeup(iod); - msleep(evp, SMB_IOD_EVLOCKPTR(iod), PWAIT | PDROP, "90evw", 0); + smb_sleep(evp, SMB_IOD_EVLOCKPTR(iod), PWAIT | PDROP, "90evw", 0); error = evp->ev_error; free(evp, M_SMBIOD); return error; @@ -443,7 +443,7 @@ smb_iod_addrq(struct smb_rq *rqp) if (iod->iod_muxcnt < vcp->vc_maxmux) break; iod->iod_muxwant++; - msleep(&iod->iod_muxwant, SMB_IOD_RQLOCKPTR(iod), + smb_sleep(&iod->iod_muxwant, SMB_IOD_RQLOCKPTR(iod), PWAIT, "90mux", 0); } iod->iod_muxcnt++; @@ -469,7 +469,7 @@ smb_iod_removerq(struct smb_rq *rqp) SMB_IOD_RQLOCK(iod); while (rqp->sr_flags & SMBR_XLOCK) { rqp->sr_flags |= SMBR_XLOCKWANT; - msleep(rqp, SMB_IOD_RQLOCKPTR(iod), PWAIT, "90xrm", 0); + smb_sleep(rqp, SMB_IOD_RQLOCKPTR(iod), PWAIT, "90xrm", 0); } TAILQ_REMOVE(&iod->iod_rqlist, rqp, sr_link); iod->iod_muxcnt--; @@ -502,7 +502,7 @@ smb_iod_waitrq(struct smb_rq *rqp) } SMBRQ_SLOCK(rqp); if (rqp->sr_rpgen == rqp->sr_rplast) - msleep(&rqp->sr_state, SMBRQ_SLOCKPTR(rqp), PWAIT, "90wrq", 0); + smb_sleep(&rqp->sr_state, SMBRQ_SLOCKPTR(rqp), PWAIT, "90wrq", 0); rqp->sr_rplast++; SMBRQ_SUNLOCK(rqp); error = rqp->sr_lerror; diff --git a/sys/netproto/smb/smb_rq.c b/sys/netproto/smb/smb_rq.c index 66444a490d..759110e9c2 100644 --- a/sys/netproto/smb/smb_rq.c +++ b/sys/netproto/smb/smb_rq.c @@ -30,7 +30,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/netsmb/smb_rq.c,v 1.1.2.2 2002/04/23 03:45:01 bp Exp $ - * $DragonFly: src/sys/netproto/smb/smb_rq.c,v 1.3 2003/06/25 03:56:06 dillon Exp $ + * $DragonFly: src/sys/netproto/smb/smb_rq.c,v 1.4 2003/07/06 21:23:53 dillon Exp $ */ #include #include @@ -179,7 +179,7 @@ smb_rq_enqueue(struct smb_rq *rqp) for (;;) { SMBS_ST_LOCK(ssp); if (ssp->ss_flags & SMBS_RECONNECTING) { - msleep(&ssp->ss_vcgenid, SMBS_ST_LOCKPTR(ssp), + smb_sleep(&ssp->ss_vcgenid, SMBS_ST_LOCKPTR(ssp), PWAIT | PDROP, "90trcn", hz); if (smb_proc_intr(rqp->sr_cred->scr_td)) return EINTR; diff --git a/sys/netproto/smb/smb_subr.c b/sys/netproto/smb/smb_subr.c index a420ef6e31..04685ca175 100644 --- a/sys/netproto/smb/smb_subr.c +++ b/sys/netproto/smb/smb_subr.c @@ -30,7 +30,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/netsmb/smb_subr.c,v 1.1.2.2 2001/09/03 08:55:11 bp Exp $ - * $DragonFly: src/sys/netproto/smb/smb_subr.c,v 1.5 2003/06/25 03:56:06 dillon Exp $ + * $DragonFly: src/sys/netproto/smb/smb_subr.c,v 1.6 2003/07/06 21:23:53 dillon Exp $ */ #include #include @@ -423,15 +423,21 @@ kthread_create2(void (*func)(void *), void *arg, return 0; } +/* + * smb_sleep() icky compat routine. Leave the token held through the tsleep + * to interlock against the sleep. Remember that the token could be lost + * since we blocked, so reget or release as appropriate. + */ int -msleep(void *chan, struct simplelock *mtx, int pri, const char *wmesg, int timo) +smb_sleep(void *chan, struct lwkt_token *mtx, int pri, const char *wmesg, int timo) { int error; - if (mtx) - simple_unlock(mtx); error = tsleep(chan, pri, wmesg, timo); if ((pri & PDROP) == 0 && mtx) - simple_lock(mtx); + lwkt_regettoken(mtx); + else + lwkt_reltoken(mtx); return error; } + diff --git a/sys/netproto/smb/smb_subr.h b/sys/netproto/smb/smb_subr.h index 115c3a0447..fdc09641b8 100644 --- a/sys/netproto/smb/smb_subr.h +++ b/sys/netproto/smb/smb_subr.h @@ -30,7 +30,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/netsmb/smb_subr.h,v 1.1.2.1 2001/05/22 08:32:34 bp Exp $ - * $DragonFly: src/sys/netproto/smb/smb_subr.h,v 1.4 2003/06/25 03:56:06 dillon Exp $ + * $DragonFly: src/sys/netproto/smb/smb_subr.h,v 1.5 2003/07/06 21:23:53 dillon Exp $ */ #ifndef _NETSMB_SMB_SUBR_H_ #define _NETSMB_SMB_SUBR_H_ @@ -80,11 +80,11 @@ void m_dumpm(struct mbuf *m); #include #define lockdestroy(lock) -#define smb_slock simplelock -#define smb_sl_init(mtx, desc) simple_lock_init(mtx) +#define smb_slock lwkt_token +#define smb_sl_init(mtx, desc) lwkt_inittoken(mtx) #define smb_sl_destroy(mtx) -#define smb_sl_lock(mtx) simple_lock(mtx) -#define smb_sl_unlock(mtx) simple_unlock(mtx) +#define smb_sl_lock(mtx) lwkt_gettoken(mtx) +#define smb_sl_unlock(mtx) lwkt_reltoken(mtx) #define SMB_STRFREE(p) do { if (p) smb_strfree(p); } while(0) @@ -140,7 +140,7 @@ extern smb_unichar smb_unieol; struct mbchain; struct proc; struct thread; -struct simplelock; +struct lwkt_token; struct smb_vc; struct smb_rq; @@ -172,7 +172,7 @@ int smb_checksmp(void); */ int kthread_create2(void (*func)(void *), void *arg, struct proc **newpp, int flags, const char *fmt, ...); -int msleep(void *chan, struct simplelock *mtx, int pri, const char *wmesg, int timo); +int smb_sleep(void *chan, struct lwkt_token *mtx, int pri, const char *wmesg, int timo); #endif /* !_NETSMB_SMB_SUBR_H_ */ diff --git a/sys/opencrypto/crypto.c b/sys/opencrypto/crypto.c index 20d113942c..2544c2f30c 100644 --- a/sys/opencrypto/crypto.c +++ b/sys/opencrypto/crypto.c @@ -1,5 +1,5 @@ /* $FreeBSD: src/sys/opencrypto/crypto.c,v 1.4.2.7 2003/06/03 00:09:02 sam Exp $ */ -/* $DragonFly: src/sys/opencrypto/crypto.c,v 1.4 2003/06/29 03:28:45 dillon Exp $ */ +/* $DragonFly: src/sys/opencrypto/crypto.c,v 1.5 2003/07/06 21:23:54 dillon Exp $ */ /* $OpenBSD: crypto.c,v 1.38 2002/06/11 11:14:29 beck Exp $ */ /* * The author of this code is Angelos D. Keromytis (angelos@cis.upenn.edu) @@ -837,40 +837,26 @@ void crypto_freereq(struct cryptop *crp) { struct cryptodesc *crd; - int s; - - if (crp == NULL) - return; - /* NB: see below for an explanation */ - s = splcrypto(); - while ((crd = crp->crp_desc) != NULL) { - crp->crp_desc = crd->crd_next; - zfree(cryptodesc_zone, crd); + if (crp) { + while ((crd = crp->crp_desc) != NULL) { + crp->crp_desc = crd->crd_next; + zfree(cryptodesc_zone, crd); + } + zfree(cryptop_zone, crp); } - zfree(cryptop_zone, crp); - splx(s); } /* - * Acquire a set of crypto descriptors. + * Acquire a set of crypto descriptors. The descriptors are self contained + * so no special spl protection is necessary. */ struct cryptop * crypto_getreq(int num) { struct cryptodesc *crd; struct cryptop *crp; - int s; - /* - * Must interlock access to the zone. Calls may come in - * at raised ipl from network protocols, but in general - * we cannot be certain where we'll be called from. We - * could use zalloci/zfreei which is safe to be called - * from anywhere or use splhigh, but for now splcrypto - * is safe as it blocks crypto drivers and network threads. - */ - s = splcrypto(); crp = zalloc(cryptop_zone); if (crp != NULL) { bzero(crp, sizeof (*crp)); @@ -878,16 +864,14 @@ crypto_getreq(int num) crd = zalloc(cryptodesc_zone); if (crd == NULL) { crypto_freereq(crp); - splx(s); - return NULL; + crp = NULL; + break; } - bzero(crd, sizeof (*crd)); crd->crd_next = crp->crp_desc; crp->crp_desc = crd; } } - splx(s); return crp; } diff --git a/sys/platform/pc32/apic/apic_ipl.s b/sys/platform/pc32/apic/apic_ipl.s index 7d0e92860f..7c368389e7 100644 --- a/sys/platform/pc32/apic/apic_ipl.s +++ b/sys/platform/pc32/apic/apic_ipl.s @@ -1,6 +1,6 @@ /*- - * Copyright (c) 1997, by Steve Passe - * All rights reserved. + * Copyright (c) 1997, by Steve Passe, All rights reserved. + * Copyright (c) 2003, by Matthew Dillon, All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -23,412 +23,72 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/isa/apic_ipl.s,v 1.27.2.2 2000/09/30 02:49:35 ps Exp $ - * $DragonFly: src/sys/platform/pc32/apic/apic_ipl.s,v 1.6 2003/07/01 20:31:38 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/apic/apic_ipl.s,v 1.7 2003/07/06 21:23:49 dillon Exp $ */ -#if 0 - .data ALIGN_DATA -/* - * Routines used by splz_unpend to build an interrupt frame from a - * trap frame. The _vec[] routines build the proper frame on the stack, - * then call one of _Xintr0 thru _XintrNN. - * - * used by: - * i386/isa/apic_ipl.s (this file): splz_unpend JUMPs to HWIs. - * i386/isa/clock.c: setup _vec[clock] to point at _vec8254. - */ - .globl _vec -_vec: - .long vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7 - .long vec8, vec9, vec10, vec11, vec12, vec13, vec14, vec15 - .long vec16, vec17, vec18, vec19, vec20, vec21, vec22, vec23 + /* + * Interrupt mask for APIC interrupts, defaults to all hardware + * interrupts turned off. + */ -/* - * Note: - * This is the UP equivilant of _imen. - * It is OPAQUE, and must NOT be accessed directly. - * It MUST be accessed along with the IO APIC as a 'critical region'. - * Accessed by: - * INTREN() - * INTRDIS() - * MAYBE_MASK_IRQ - * MAYBE_UNMASK_IRQ - * imen_dump() - */ .p2align 2 /* MUST be 32bit aligned */ - .globl _apic_imen -_apic_imen: - .long HWI_MASK + .globl apic_imen +apic_imen: + .long HWI_MASK -/* - * - */ .text SUPERALIGN_TEXT -/* - * splz() - dispatch pending interrupts after cpl reduced - * - * Interrupt priority mechanism - * -- soft splXX masks with group mechanism (cpl) - * -- h/w masks for currently active or unused interrupts (imen) - * -- ipending = active interrupts currently masked by cpl - */ - -ENTRY(splz) - /* - * The caller has restored cpl and checked that (ipending & ~cpl) - * is nonzero. However, since ipending can change at any time - * (by an interrupt or, with SMP, by another cpu), we have to - * repeat the check. At the moment we must own the MP lock in - * the SMP case because the interruput handlers require it. We - * loop until no unmasked pending interrupts remain. - * - * No new unmaksed pending interrupts will be added during the - * loop because, being unmasked, the interrupt code will be able - * to execute the interrupts. - * - * Interrupts come in two flavors: Hardware interrupts and software - * interrupts. We have to detect the type of interrupt (based on the - * position of the interrupt bit) and call the appropriate dispatch - * routine. - * - * NOTE: "bsfl %ecx,%ecx" is undefined when %ecx is 0 so we can't - * rely on the secondary btrl tests. - */ - pushl %ebx - movl _curthread,%ebx - movl TD_CPL(%ebx),%eax -splz_next: - /* - * We don't need any locking here. (ipending & ~cpl) cannot grow - * while we're looking at it - any interrupt will shrink it to 0. - */ - movl $0,_reqpri - movl %eax,%ecx - notl %ecx /* set bit = unmasked level */ - andl _ipending,%ecx /* set bit = unmasked pending INT */ - jne splz_unpend - popl %ebx - ret - - ALIGN_TEXT -splz_unpend: - bsfl %ecx,%ecx - lock - btrl %ecx,_ipending - jnc splz_next - cmpl $NHWI,%ecx - jae splz_swi /* - * We would prefer to call the intr handler directly here but that - * doesn't work for badly behaved handlers that want the interrupt - * frame. Also, there's a problem determining the unit number. - * We should change the interface so that the unit number is not - * determined at config time. - * - * The vec[] routines build the proper frame on the stack so - * the interrupt will eventually return to the caller or splz, - * then calls one of _Xintr0 thru _XintrNN. + * Functions to enable and disable a hardware interrupt. Generally + * called with only one bit set in the mask but can handle multiple + * bits to present the same API as the ICU. */ - popl %ebx - jmp *_vec(,%ecx,4) - - ALIGN_TEXT -splz_swi: - pushl %eax /* save cpl across call */ - orl imasks(,%ecx,4),%eax - movl %eax,TD_CPL(%ebx) /* set cpl for SWI */ - call *_ihandlers(,%ecx,4) - popl %eax - movl %eax,TD_CPL(%ebx) /* restore cpl and loop */ - jmp splz_next - -/* - * Fake clock interrupt(s) so that they appear to come from our caller instead - * of from here, so that system profiling works. - * XXX do this more generally (for all vectors; look up the C entry point). - * XXX frame bogusness stops us from just jumping to the C entry point. - * We have to clear iactive since this is an unpend call, and it will be - * set from the time of the original INT. - */ - -/* - * The 'generic' vector stubs. - */ - -#define BUILD_VEC(irq_num) \ - ALIGN_TEXT ; \ -__CONCAT(vec,irq_num): ; \ - popl %eax ; \ - pushfl ; \ - pushl $KCSEL ; \ - pushl %eax ; \ - cli ; \ - lock ; /* MP-safe */ \ - andl $~IRQ_BIT(irq_num), iactive ; /* lazy masking */ \ - MEXITCOUNT ; \ - APIC_ITRACE(apic_itrace_splz, irq_num, APIC_ITRACE_SPLZ) ; \ - jmp __CONCAT(_Xintr,irq_num) - - BUILD_VEC(0) - BUILD_VEC(1) - BUILD_VEC(2) - BUILD_VEC(3) - BUILD_VEC(4) - BUILD_VEC(5) - BUILD_VEC(6) - BUILD_VEC(7) - BUILD_VEC(8) - BUILD_VEC(9) - BUILD_VEC(10) - BUILD_VEC(11) - BUILD_VEC(12) - BUILD_VEC(13) - BUILD_VEC(14) - BUILD_VEC(15) - BUILD_VEC(16) /* 8 additional INTs in IO APIC */ - BUILD_VEC(17) - BUILD_VEC(18) - BUILD_VEC(19) - BUILD_VEC(20) - BUILD_VEC(21) - BUILD_VEC(22) - BUILD_VEC(23) - - -/****************************************************************************** - * XXX FIXME: figure out where these belong. - */ - -/* this nonsense is to verify that masks ALWAYS have 1 and only 1 bit set */ -#define QUALIFY_MASKS_NOT - -#ifdef QUALIFY_MASKS -#define QUALIFY_MASK \ - btrl %ecx, %eax ; \ - andl %eax, %eax ; \ - jz 1f ; \ - pushl $bad_mask ; \ - call _panic ; \ -1: - -bad_mask: .asciz "bad mask" -#else -#define QUALIFY_MASK -#endif - -/* - * (soon to be) MP-safe function to clear ONE INT mask bit. - * The passed arg is a 32bit u_int MASK. - * It sets the associated bit in _apic_imen. - * It sets the mask bit of the associated IO APIC register. - */ -ENTRY(INTREN) - pushfl /* save state of EI flag */ - cli /* prevent recursion */ +ENTRY(INTRDIS) IMASK_LOCK /* enter critical reg */ - - movl 8(%esp), %eax /* mask into %eax */ - bsfl %eax, %ecx /* get pin index */ - btrl %ecx, apic_imen /* update apic_imen */ - - QUALIFY_MASK - + movl 4(%esp),%eax +1: + bsfl %eax,%ecx + jz 2f + btrl %ecx,%eax + btsl %ecx, apic_imen shll $4, %ecx movl CNAME(int_to_apicintpin) + 8(%ecx), %edx movl CNAME(int_to_apicintpin) + 12(%ecx), %ecx testl %edx, %edx - jz 1f - - movl %ecx, (%edx) /* write the target register index */ - movl 16(%edx), %eax /* read the target register data */ - andl $~IOART_INTMASK, %eax /* clear mask bit */ - movl %eax, 16(%edx) /* write the APIC register data */ -1: + jz 2f + movl %ecx, (%edx) /* target register index */ + orl $IOART_INTMASK,16(%edx) /* set intmask in target apic reg */ + jmp 1b +2: IMASK_UNLOCK /* exit critical reg */ - popfl /* restore old state of EI flag */ ret -/* - * (soon to be) MP-safe function to set ONE INT mask bit. - * The passed arg is a 32bit u_int MASK. - * It clears the associated bit in apic_imen. - * It clears the mask bit of the associated IO APIC register. - */ -ENTRY(INTRDIS) - pushfl /* save state of EI flag */ - cli /* prevent recursion */ +ENTRY(INTREN) IMASK_LOCK /* enter critical reg */ - - movl 8(%esp), %eax /* mask into %eax */ + movl 4(%esp), %eax /* mask into %eax */ +1: bsfl %eax, %ecx /* get pin index */ - btsl %ecx, apic_imen /* update _apic_imen */ - - QUALIFY_MASK - + jz 2f + btrl %ecx,%eax + btrl %ecx, apic_imen /* update apic_imen */ shll $4, %ecx movl CNAME(int_to_apicintpin) + 8(%ecx), %edx movl CNAME(int_to_apicintpin) + 12(%ecx), %ecx testl %edx, %edx - jz 1f - + jz 2f movl %ecx, (%edx) /* write the target register index */ - movl 16(%edx), %eax /* read the target register data */ - orl $IOART_INTMASK, %eax /* set mask bit */ - movl %eax, 16(%edx) /* write the APIC register data */ -1: + andl $~IOART_INTMASK, 16(%edx) /* clear mask bit */ + jmp 1b +2: IMASK_UNLOCK /* exit critical reg */ - popfl /* restore old state of EI flag */ - ret - - -/****************************************************************************** - * - */ - - -/* - * void write_ioapic_mask(int apic, u_int mask); - */ - -#define _INT_MASK 0x00010000 -#define _PIN_MASK 0x00ffffff - -#define _OLD_ESI 0(%esp) -#define _OLD_EBX 4(%esp) -#define _RETADDR 8(%esp) -#define _APIC 12(%esp) -#define _MASK 16(%esp) - - ALIGN_TEXT -write_ioapic_mask: - pushl %ebx /* scratch */ - pushl %esi /* scratch */ - - movl apic_imen, %ebx - xorl _MASK, %ebx /* %ebx = _apic_imen ^ mask */ - andl $_PIN_MASK, %ebx /* %ebx = _apic_imen & 0x00ffffff */ - jz all_done /* no change, return */ - - movl _APIC, %esi /* APIC # */ - movl ioapic, %ecx - movl (%ecx,%esi,4), %esi /* %esi holds APIC base address */ - -next_loop: /* %ebx = diffs, %esi = APIC base */ - bsfl %ebx, %ecx /* %ecx = index if 1st/next set bit */ - jz all_done - - btrl %ecx, %ebx /* clear this bit in diffs */ - leal 16(,%ecx,2), %edx /* calculate register index */ - - movl %edx, (%esi) /* write the target register index */ - movl 16(%esi), %eax /* read the target register data */ - - btl %ecx, _MASK /* test for mask or unmask */ - jnc clear /* bit is clear */ - orl $_INT_MASK, %eax /* set mask bit */ - jmp write -clear: andl $~_INT_MASK, %eax /* clear mask bit */ - -write: movl %eax, 16(%esi) /* write the APIC register data */ - - jmp next_loop /* try another pass */ - -all_done: - popl %esi - popl %ebx - ret - -#undef _OLD_ESI -#undef _OLD_EBX -#undef _RETADDR -#undef _APIC -#undef _MASK - -#undef _PIN_MASK -#undef _INT_MASK - -#ifdef oldcode - -_INTREN: - movl apic_imen, %eax - notl %eax /* mask = ~mask */ - andl apic_imen, %eax /* %eax = _apic_imen & ~mask */ - - pushl %eax /* new (future) _apic_imen value */ - pushl $0 /* APIC# arg */ - call write_ioapic_mask /* modify the APIC registers */ - - addl $4, %esp /* remove APIC# arg from stack */ - popl apic_imen /* _apic_imen |= mask */ - ret - -_INTRDIS: - movl _apic_imen, %eax - orl 4(%esp), %eax /* %eax = _apic_imen | mask */ - - pushl %eax /* new (future) _apic_imen value */ - pushl $0 /* APIC# arg */ - call write_ioapic_mask /* modify the APIC registers */ - - addl $4, %esp /* remove APIC# arg from stack */ - popl apic_imen /* _apic_imen |= mask */ - ret - -#endif /* oldcode */ - - -#ifdef ready - -/* - * u_int read_io_apic_mask(int apic); - */ - ALIGN_TEXT -read_io_apic_mask: ret -/* - * Set INT mask bit for each bit set in 'mask'. - * Ignore INT mask bit for all others. - * - * void set_io_apic_mask(apic, u_int32_t bits); - */ - ALIGN_TEXT -set_io_apic_mask: - ret - -/* - * void set_ioapic_maskbit(int apic, int bit); - */ - ALIGN_TEXT -set_ioapic_maskbit: - ret - -/* - * Clear INT mask bit for each bit set in 'mask'. - * Ignore INT mask bit for all others. - * - * void clr_io_apic_mask(int apic, u_int32_t bits); - */ - ALIGN_TEXT -clr_io_apic_mask: - ret - -/* - * void clr_ioapic_maskbit(int apic, int bit); - */ - ALIGN_TEXT -clr_ioapic_maskbit: - ret - -#endif /** ready */ - /****************************************************************************** * */ @@ -465,4 +125,3 @@ ENTRY(apic_eoi) movl $0, lapic+0xb0 ret -#endif diff --git a/sys/platform/pc32/apic/apic_vector.s b/sys/platform/pc32/apic/apic_vector.s index 7378217655..fab5073395 100644 --- a/sys/platform/pc32/apic/apic_vector.s +++ b/sys/platform/pc32/apic/apic_vector.s @@ -1,62 +1,23 @@ /* * from: vector.s, 386BSD 0.1 unknown origin * $FreeBSD: src/sys/i386/isa/apic_vector.s,v 1.47.2.5 2001/09/01 22:33:38 tegge Exp $ - * $DragonFly: src/sys/platform/pc32/apic/apic_vector.s,v 1.7 2003/07/01 20:31:38 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/apic/apic_vector.s,v 1.8 2003/07/06 21:23:49 dillon Exp $ */ #include #include - #include "i386/isa/intr_machdep.h" /* convert an absolute IRQ# into a bitmask */ -#define IRQ_BIT(irq_num) (1 << (irq_num)) +#define IRQ_LBIT(irq_num) (1 << (irq_num)) /* make an index into the IO APIC from the IRQ# */ #define REDTBL_IDX(irq_num) (0x10 + ((irq_num) * 2)) - -/* - * Macros for interrupt interrupt entry, call to handler, and exit. - */ - -#define FAST_INTR(irq_num, vec_name) \ - .text ; \ - SUPERALIGN_TEXT ; \ -IDTVEC(vec_name) ; \ - pushl %eax ; /* save only call-used registers */ \ - pushl %ecx ; \ - pushl %edx ; \ - pushl %ds ; \ - pushl %es ; \ - pushl %fs ; \ - movl $KDSEL,%eax ; \ - mov %ax,%ds ; \ - movl %ax,%es ; \ - movl $KPSEL,%eax ; \ - mov %ax,%fs ; \ - FAKE_MCOUNT(6*4(%esp)) ; \ - pushl intr_unit + (irq_num) * 4 ; \ - call *intr_handler + (irq_num) * 4 ; /* do the work ASAP */ \ - addl $4, %esp ; \ - movl $0, lapic_eoi ; \ - lock ; \ - incl cnt+V_INTR ; /* book-keeping can wait */ \ - movl intr_countp + (irq_num) * 4, %eax ; \ - lock ; \ - incl (%eax) ; \ - MEXITCOUNT ; \ - popl %fs ; \ - popl %es ; \ - popl %ds ; \ - popl %edx ; \ - popl %ecx ; \ - popl %eax ; \ - iret - /* - * + * Push an interrupt frame in a format acceptable to doreti, reload + * the segment registers for the kernel. */ #define PUSH_FRAME \ pushl $0 ; /* dummy error code */ \ @@ -64,23 +25,54 @@ IDTVEC(vec_name) ; \ pushal ; \ pushl %ds ; /* save data and extra segments ... */ \ pushl %es ; \ - pushl %fs + pushl %fs ; \ + mov $KDSEL,%ax ; \ + mov %ax,%ds ; \ + mov %ax,%es ; \ + mov $KPSEL,%ax ; \ + mov %ax,%fs ; \ +#define PUSH_DUMMY \ + pushfl ; /* phys int frame / flags */ \ + pushl %cs ; /* phys int frame / cs */ \ + pushl 12(%esp) ; /* original caller eip */ \ + pushl $0 ; /* dummy error code */ \ + pushl $0 ; /* dummy trap type */ \ + subl $11*4,%esp ; /* pushal + 3 seg regs (dummy) */ \ + +/* + * Warning: POP_FRAME can only be used if there is no chance of a + * segment register being changed (e.g. by procfs), which is why syscalls + * have to use doreti. + */ #define POP_FRAME \ popl %fs ; \ popl %es ; \ popl %ds ; \ popal ; \ - addl $4+4,%esp + addl $2*4,%esp ; /* dummy trap & error codes */ \ + +#define POP_DUMMY \ + addl $16*4,%esp ; \ #define IOAPICADDR(irq_num) CNAME(int_to_apicintpin) + 16 * (irq_num) + 8 #define REDIRIDX(irq_num) CNAME(int_to_apicintpin) + 16 * (irq_num) + 12 + +/* + * Interrupts are expected to already be disabled when using these + * IMASK_*() macros. + */ +#define IMASK_LOCK \ + SPIN_LOCK(imen_spinlock) ; \ + +#define IMASK_UNLOCK \ + SPIN_UNLOCK(imen_spinlock) ; \ #define MASK_IRQ(irq_num) \ IMASK_LOCK ; /* into critical reg */ \ - testl $IRQ_BIT(irq_num), apic_imen ; \ + testl $IRQ_LBIT(irq_num), apic_imen ; \ jne 7f ; /* masked, don't mask */ \ - orl $IRQ_BIT(irq_num), apic_imen ; /* set the mask bit */ \ + orl $IRQ_LBIT(irq_num), apic_imen ; /* set the mask bit */ \ movl IOAPICADDR(irq_num), %ecx ; /* ioapic addr */ \ movl REDIRIDX(irq_num), %eax ; /* get the index */ \ movl %eax, (%ecx) ; /* write the index */ \ @@ -88,17 +80,18 @@ IDTVEC(vec_name) ; \ orl $IOART_INTMASK, %eax ; /* set the mask */ \ movl %eax, IOAPIC_WINDOW(%ecx) ; /* new value */ \ 7: ; /* already masked */ \ - IMASK_UNLOCK + IMASK_UNLOCK ; \ + /* * Test to see whether we are handling an edge or level triggered INT. * Level-triggered INTs must still be masked as we don't clear the source, * and the EOI cycle would cause redundant INTs to occur. */ #define MASK_LEVEL_IRQ(irq_num) \ - testl $IRQ_BIT(irq_num), apic_pin_trigger ; \ + testl $IRQ_LBIT(irq_num), apic_pin_trigger ; \ jz 9f ; /* edge, don't mask */ \ MASK_IRQ(irq_num) ; \ -9: +9: ; \ #ifdef APIC_INTR_REORDER @@ -108,27 +101,26 @@ IDTVEC(vec_name) ; \ testl apic_isrbit_location + 4 + 8 * (irq_num), %eax ; \ jz 9f ; /* not active */ \ movl $0, lapic_eoi ; \ - APIC_ITRACE(apic_itrace_eoi, irq_num, APIC_ITRACE_EOI) ; \ -9: +9: \ #else + #define EOI_IRQ(irq_num) \ - testl $IRQ_BIT(irq_num), lapic_isr1; \ + testl $IRQ_LBIT(irq_num), lapic_isr1; \ jz 9f ; /* not active */ \ movl $0, lapic_eoi; \ - APIC_ITRACE(apic_itrace_eoi, irq_num, APIC_ITRACE_EOI) ; \ -9: +9: \ + #endif - /* * Test to see if the source is currntly masked, clear if so. */ #define UNMASK_IRQ(irq_num) \ IMASK_LOCK ; /* into critical reg */ \ - testl $IRQ_BIT(irq_num), apic_imen ; \ + testl $IRQ_LBIT(irq_num), apic_imen ; \ je 7f ; /* bit clear, not masked */ \ - andl $~IRQ_BIT(irq_num), apic_imen ;/* clear mask bit */ \ + andl $~IRQ_LBIT(irq_num), apic_imen ;/* clear mask bit */ \ movl IOAPICADDR(irq_num),%ecx ; /* ioapic addr */ \ movl REDIRIDX(irq_num), %eax ; /* get the index */ \ movl %eax,(%ecx) ; /* write the index */ \ @@ -136,174 +128,189 @@ IDTVEC(vec_name) ; \ andl $~IOART_INTMASK,%eax ; /* clear the mask */ \ movl %eax,IOAPIC_WINDOW(%ecx) ; /* new value */ \ 7: ; \ - IMASK_UNLOCK - -#ifdef APIC_INTR_DIAGNOSTIC -#ifdef APIC_INTR_DIAGNOSTIC_IRQ -log_intr_event: - pushf - cli - pushl $CNAME(apic_itrace_debuglock) - call CNAME(s_lock_np) - addl $4, %esp - movl CNAME(apic_itrace_debugbuffer_idx), %ecx - andl $32767, %ecx - movl PCPU(cpuid), %eax - shll $8, %eax - orl 8(%esp), %eax - movw %ax, CNAME(apic_itrace_debugbuffer)(,%ecx,2) - incl %ecx - andl $32767, %ecx - movl %ecx, CNAME(apic_itrace_debugbuffer_idx) - pushl $CNAME(apic_itrace_debuglock) - call CNAME(s_unlock_np) - addl $4, %esp - popf - ret - + IMASK_UNLOCK ; \ -#define APIC_ITRACE(name, irq_num, id) \ - lock ; /* MP-safe */ \ - incl CNAME(name) + (irq_num) * 4 ; \ +/* + * Fast interrupt call handlers run in the following sequence: + * + * - Push the trap frame required by doreti + * - Mask the interrupt and reenable its source + * - If we cannot take the interrupt set its fpending bit and + * doreti. + * - If we can take the interrupt clear its fpending bit, + * call the handler, then unmask and doreti. + * + * YYY can cache gd base opitner instead of using hidden %fs prefixes. + */ + +#define FAST_INTR(irq_num, vec_name) \ + .text ; \ + SUPERALIGN_TEXT ; \ +IDTVEC(vec_name) ; \ + PUSH_FRAME ; \ + FAKE_MCOUNT(13*4(%esp)) ; \ + MASK_LEVEL_IRQ(irq_num) ; \ + EOI_IRQ(irq_num) ; \ + incl PCPU(intr_nesting_level) ; \ + movl PCPU(curthread),%ebx ; \ + movl TD_CPL(%ebx),%eax ; \ pushl %eax ; \ - pushl %ecx ; \ - pushl %edx ; \ - movl $(irq_num), %eax ; \ - cmpl $APIC_INTR_DIAGNOSTIC_IRQ, %eax ; \ - jne 7f ; \ - pushl $id ; \ - call log_intr_event ; \ + cmpl $TDPRI_CRIT,TD_PRI(%ebx) ; \ + jge 1f ; \ + testl $IRQ_LBIT(irq_num), %eax ; \ + jz 2f ; \ +1: ; \ + /* set the pending bit and return, leave interrupt masked */ \ + orl $IRQ_LBIT(irq_num),PCPU(fpending) ; \ + movl $TDPRI_CRIT, PCPU(reqpri) ; \ + jmp 5f ; \ +2: ; \ + /* clear pending bit, run handler */ \ + addl $TDPRI_CRIT,TD_PRI(%ebx) ; \ + andl $~IRQ_LBIT(irq_num),PCPU(fpending) ; \ + pushl intr_unit + (irq_num) * 4 ; \ + call *intr_handler + (irq_num) * 4 ; /* do the work ASAP */ \ addl $4, %esp ; \ -7: ; \ - popl %edx ; \ - popl %ecx ; \ - popl %eax -#else -#define APIC_ITRACE(name, irq_num, id) \ - lock ; /* MP-safe */ \ - incl CNAME(name) + (irq_num) * 4 -#endif + subl $TDPRI_CRIT,TD_PRI(%ebx) ; \ + incl PCPU(cnt)+V_INTR ; /* book-keeping make per cpu YYY */ \ + movl intr_countp + (irq_num) * 4, %eax ; \ + incl (%eax) ; \ + UNMASK_IRQ(irq_num) ; \ +5: ; \ + MEXITCOUNT ; \ + jmp doreti ; \ -#define APIC_ITRACE_ENTER 1 -#define APIC_ITRACE_EOI 2 -#define APIC_ITRACE_TRYISRLOCK 3 -#define APIC_ITRACE_GOTISRLOCK 4 -#define APIC_ITRACE_ENTER2 5 -#define APIC_ITRACE_LEAVE 6 -#define APIC_ITRACE_UNMASK 7 -#define APIC_ITRACE_ACTIVE 8 -#define APIC_ITRACE_MASKED 9 -#define APIC_ITRACE_NOISRLOCK 10 -#define APIC_ITRACE_MASKED2 11 -#define APIC_ITRACE_SPLZ 12 -#define APIC_ITRACE_DORETI 13 - -#else -#define APIC_ITRACE(name, irq_num, id) -#endif - -#define INTR(irq_num, vec_name, maybe_extra_ipending) \ +/* + * Restart fast interrupt held up by critical section or cpl. + * + * - Push a dummy trape frame as required by doreti + * - The interrupt source is already masked + * - Clear the fpending bit + * - Run the handler + * - Unmask the interrupt + * - Pop the dummy frame and do a normal return + * + * YYY can cache gd base pointer instead of using hidden %fs + * prefixes. + */ + +#define FAST_UNPEND(irq_num, vec_name) \ + .text ; \ + SUPERALIGN_TEXT ; \ +IDTVEC(vec_name) ; \ + pushl %ebp ; \ + movl %esp,%ebp ; \ + PUSH_DUMMY ; \ + pushl intr_unit + (irq_num) * 4 ; \ + call *intr_handler + (irq_num) * 4 ; /* do the work ASAP */ \ + addl $4, %esp ; \ + incl PCPU(cnt)+V_INTR ; /* book-keeping make per cpu YYY */ \ + movl intr_countp + (irq_num) * 4, %eax ; \ + incl (%eax) ; \ + UNMASK_IRQ(irq_num) ; \ + POP_DUMMY ; \ + popl %ebp ; \ + ret ; \ + +/* + * Slow interrupt call handlers run in the following sequence: + * + * - Push the trap frame required by doreti. + * - Mask the interrupt and reenable its source. + * - If we cannot take the interrupt set its ipending bit and + * doreti. In addition to checking for a critical section + * and cpl mask we also check to see if the thread is still + * running. + * - If we can take the interrupt clear its ipending bit, + * set its irunning bit, and schedule the thread. Leave + * interrupts masked and doreti. + * + * the interrupt thread will run its handlers and loop if + * ipending is found to be set. ipending/irunning interlock + * the interrupt thread with the interrupt. The handler calls + * UNPEND when it is through. + * + * Note that we do not enable interrupts when calling sched_ithd. + * YYY sched_ithd may preempt us synchronously (fix interrupt stacking) + * + * YYY can cache gd base pointer instead of using hidden %fs + * prefixes. + */ + +#define INTR(irq_num, vec_name, maybe_extra_ipending) \ .text ; \ SUPERALIGN_TEXT ; \ -/* XintrNN: entry point used by IDT/HWIs & splz_unpend via _vec[]. */ \ IDTVEC(vec_name) ; \ PUSH_FRAME ; \ - movl $KDSEL, %eax ; /* reload with kernel's data segment */ \ - mov %ax, %ds ; \ - mov %ax, %es ; \ - movl $KPSEL, %eax ; \ - mov %ax, %fs ; \ -; \ maybe_extra_ipending ; \ -; \ - APIC_ITRACE(apic_itrace_enter, irq_num, APIC_ITRACE_ENTER) ; \ - lock ; /* MP-safe */ \ - btsl $(irq_num), iactive ; /* lazy masking */ \ - jc 1f ; /* already active */ \ ; \ MASK_LEVEL_IRQ(irq_num) ; \ EOI_IRQ(irq_num) ; \ -0: ; \ - APIC_ITRACE(apic_itrace_tryisrlock, irq_num, APIC_ITRACE_TRYISRLOCK) ;\ - MP_TRYLOCK ; /* XXX this is going away... */ \ - testl %eax, %eax ; /* did we get it? */ \ - jz 3f ; /* no */ \ -; \ - APIC_ITRACE(apic_itrace_gotisrlock, irq_num, APIC_ITRACE_GOTISRLOCK) ;\ + incl PCPU(intr_nesting_level) ; \ movl PCPU(curthread),%ebx ; \ - testl $IRQ_BIT(irq_num), TD_MACH+MTD_CPL(%eax) ; \ - jne 2f ; /* this INT masked */ \ + movl TD_CPL(%ebx),%eax ; \ + pushl %eax ; /* cpl do restore */ \ cmpl $TDPRI_CRIT,TD_PRI(%ebx) ; \ - jge 2f ; /* in critical sec */ \ -; \ - incb PCPU(intr_nesting_level) ; \ -; \ - /* entry point used by doreti_unpend for HWIs. */ \ -__CONCAT(Xresume,irq_num): ; \ - FAKE_MCOUNT(13*4(%esp)) ; /* XXX avoid dbl cnt */ \ - lock ; incl _cnt+V_INTR ; /* tally interrupts */ \ - movl _intr_countp + (irq_num) * 4, %eax ; \ - lock ; incl (%eax) ; \ -; \ - movl PCPU(curthread), %ebx ; \ - movl TD_MACH+MTD_CPL(%ebx), %eax ; \ - pushl %eax ; /* cpl restored by doreti */ \ - orl _intr_mask + (irq_num) * 4, %eax ; \ - movl %eax, TD_MACH+MTD_CPL(%ebx) ; \ - lock ; \ - andl $~IRQ_BIT(irq_num), PCPU(ipending) ; \ -; \ - pushl _intr_unit + (irq_num) * 4 ; \ - APIC_ITRACE(apic_itrace_enter2, irq_num, APIC_ITRACE_ENTER2) ; \ + jge 1f ; \ + testl $IRQ_LBIT(irq_num),PCPU(irunning) ; \ + jnz 1f ; \ + testl $IRQ_LBIT(irq_num),%eax ; \ + jz 1f ; \ +1: ; \ + /* set the pending bit and return, leave the interrupt masked */ \ + orl $IRQ_LBIT(irq_num), PCPU(ipending) ; \ + movl $TDPRI_CRIT, PCPU(reqpri) ; \ + jmp 5f ; \ +2: ; \ + addl $TDPRI_CRIT,TD_PRI(%ebx) ; \ + /* set running bit, clear pending bit, run handler */ \ + orl $IRQ_LBIT(irq_num), PCPU(irunning) ; \ + andl $~IRQ_LBIT(irq_num), PCPU(ipending) ; \ sti ; \ - call *_intr_handler + (irq_num) * 4 ; \ - cli ; \ - APIC_ITRACE(apic_itrace_leave, irq_num, APIC_ITRACE_LEAVE) ; \ + pushl $irq_num ; \ + call sched_ithd ; \ addl $4,%esp ; \ -; \ - lock ; andl $~IRQ_BIT(irq_num), iactive ; \ - UNMASK_IRQ(irq_num) ; \ - APIC_ITRACE(apic_itrace_unmask, irq_num, APIC_ITRACE_UNMASK) ; \ - sti ; /* doreti repeats cli/sti */ \ + subl $TDPRI_CRIT,TD_PRI(%ebx) ; \ + incl PCPU(cnt)+V_INTR ; /* book-keeping YYY make per-cpu */ \ + movl intr_countp + (irq_num) * 4,%eax ; \ + incl (%eax) ; \ +5: ; \ MEXITCOUNT ; \ jmp doreti ; \ -; \ - ALIGN_TEXT ; \ -1: ; /* active */ \ - APIC_ITRACE(apic_itrace_active, irq_num, APIC_ITRACE_ACTIVE) ; \ - MASK_IRQ(irq_num) ; \ - EOI_IRQ(irq_num) ; \ - lock ; \ - orl $IRQ_BIT(irq_num), PCPU(ipending) ; \ - movl $TDPRI_CRIT, PCPU(reqpri) ; \ - lock ; \ - btsl $(irq_num), iactive ; /* still active */ \ - jnc 0b ; /* retry */ \ - POP_FRAME ; \ - iret ; /* XXX: iactive bit might be 0 now */ \ - ALIGN_TEXT ; \ -2: ; /* masked by cpl, leave iactive set */ \ - APIC_ITRACE(apic_itrace_masked, irq_num, APIC_ITRACE_MASKED) ; \ - lock ; \ - orl $IRQ_BIT(irq_num), PCPU(ipending) ; \ - movl $TDPRI_CRIT, PCPU(reqpri) ; \ - MP_RELLOCK ; \ - POP_FRAME ; \ - iret ; \ + +/* + * Unmask a slow interrupt. This function is used by interrupt threads + * after they have descheduled themselves to reenable interrupts and + * possibly cause a reschedule to occur. The interrupt's irunning bit + * is cleared prior to unmasking. + */ + +#define INTR_UNMASK(irq_num, vec_name, icu) \ + .text ; \ + SUPERALIGN_TEXT ; \ +IDTVEC(vec_name) ; \ + pushl %ebp ; /* frame for ddb backtrace */ \ + movl %esp, %ebp ; \ + andl $~IRQ_LBIT(irq_num), PCPU(irunning) ; \ + UNMASK_IRQ(irq_num) ; \ + popl %ebp ; \ + ret ; \ + +#if 0 + /* XXX forward_irq to cpu holding the BGL? */ + ALIGN_TEXT ; \ 3: ; /* other cpu has isr lock */ \ - APIC_ITRACE(apic_itrace_noisrlock, irq_num, APIC_ITRACE_NOISRLOCK) ;\ lock ; \ - orl $IRQ_BIT(irq_num), PCPU(ipending) ; \ + orl $IRQ_LBIT(irq_num), PCPU(ipending) ; \ movl $TDPRI_CRIT,_reqpri ; \ - testl $IRQ_BIT(irq_num), TD_MACH+MTD_CPL(%ebx) ; \ + testl $IRQ_LBIT(irq_num), TD_CPL(%ebx) ; \ jne 4f ; /* this INT masked */ \ call forward_irq ; /* forward irq to lock holder */ \ POP_FRAME ; /* and return */ \ iret ; \ ALIGN_TEXT ; \ 4: ; /* blocked */ \ - APIC_ITRACE(apic_itrace_masked2, irq_num, APIC_ITRACE_MASKED2) ;\ POP_FRAME ; /* and return */ \ iret @@ -314,6 +321,9 @@ __CONCAT(Xresume,irq_num): ; \ * 8259 PIC for missing INTs. See the APIC documentation for details. * This routine should NOT do an 'EOI' cycle. */ + +#endif + .text SUPERALIGN_TEXT .globl Xspuriousint @@ -329,8 +339,8 @@ Xspuriousint: */ .text SUPERALIGN_TEXT - .globl _Xinvltlb -_Xinvltlb: + .globl Xinvltlb +Xinvltlb: pushl %eax #ifdef COUNT_XINVLTLB_HITS @@ -353,6 +363,7 @@ _Xinvltlb: iret +#if 0 #ifdef BETTER_CLOCK /* @@ -413,13 +424,14 @@ Xcpucheckstate: iret #endif /* BETTER_CLOCK */ +#endif /* * Executed by a CPU when it receives an Xcpuast IPI from another CPU, * * - Signals its receipt by clearing bit cpuid in checkstate_need_ast. - * - * - We need a better method of triggering asts on other cpus. + * - MP safe in regards to setting AST_PENDING because doreti is in + * a cli mode when it checks. */ .text @@ -427,11 +439,6 @@ Xcpucheckstate: .globl Xcpuast Xcpuast: PUSH_FRAME - movl $KDSEL, %eax - mov %ax, %ds /* use KERNEL data segment */ - mov %ax, %es - movl $KPSEL, %eax - mov %ax, %fs movl PCPU(cpuid), %eax lock /* checkstate_need_ast &= ~(1< @@ -74,11 +74,13 @@ apic_initialize(void) /* set the Task Priority Register as needed */ temp = lapic.tpr; temp &= ~APIC_TPR_PRIO; /* clear priority field */ -#ifdef GRAB_LOPRIO - /* Leave the BSP at TPR 0 during boot to make sure it gets interrupts */ + + /* + * Leave the BSP and TPR 0 during boot so it gets all the interrupts, + * set APs at TPR 0xF0 at boot so they get no ints. + */ if (mycpu->gd_cpuid != 0) - temp |= LOPRIO_LEVEL; /* allow INT arbitration */ -#endif + temp |= TPR_IPI_ONLY; /* disable INTs on this cpu */ lapic.tpr = temp; /* enable the local APIC */ @@ -188,7 +190,6 @@ io_apic_setup_intpin(int apic, int pin) u_int32_t target; /* the window register is 32 bits */ u_int32_t vector; /* the window register is 32 bits */ int level; - u_int eflags; target = IOART_DEST; @@ -209,14 +210,11 @@ io_apic_setup_intpin(int apic, int pin) * shouldn't and stop the carnage. */ vector = NRSVIDT + pin; /* IDT vec */ - eflags = read_eflags(); - __asm __volatile("cli" : : : "memory"); - s_lock(&imen_lock); + imen_lock(); io_apic_write(apic, select, (io_apic_read(apic, select) & ~IOART_INTMASK & ~0xff)|IOART_INTMSET|vector); - s_unlock(&imen_lock); - write_eflags(eflags); + imen_unlock(); /* we only deal with vectored INTs here */ if (apic_int_type(apic, pin) != 0) @@ -260,13 +258,10 @@ io_apic_setup_intpin(int apic, int pin) printf("IOAPIC #%d intpin %d -> irq %d\n", apic, pin, irq); vector = NRSVIDT + irq; /* IDT vec */ - eflags = read_eflags(); - __asm __volatile("cli" : : : "memory"); - s_lock(&imen_lock); + imen_lock(); io_apic_write(apic, select, flags | vector); io_apic_write(apic, select + 1, target); - s_unlock(&imen_lock); - write_eflags(eflags); + imen_unlock(); } int diff --git a/sys/platform/pc32/i386/autoconf.c b/sys/platform/pc32/i386/autoconf.c index cf85a7bc5f..be65f355ff 100644 --- a/sys/platform/pc32/i386/autoconf.c +++ b/sys/platform/pc32/i386/autoconf.c @@ -35,7 +35,7 @@ * * from: @(#)autoconf.c 7.1 (Berkeley) 5/9/91 * $FreeBSD: src/sys/i386/i386/autoconf.c,v 1.146.2.2 2001/06/07 06:05:58 dd Exp $ - * $DragonFly: src/sys/platform/pc32/i386/autoconf.c,v 1.4 2003/06/28 04:16:02 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/i386/autoconf.c,v 1.5 2003/07/06 21:23:48 dillon Exp $ */ /* @@ -144,9 +144,9 @@ configure(dummy) */ #ifdef APIC_IO bsp_apic_configure(); - enable_intr(); + cpu_enable_intr(); #else - enable_intr(); + cpu_enable_intr(); INTREN(IRQ_SLAVE); #endif /* APIC_IO */ diff --git a/sys/platform/pc32/i386/db_interface.c b/sys/platform/pc32/i386/db_interface.c index ea872e334b..ba56c0603b 100644 --- a/sys/platform/pc32/i386/db_interface.c +++ b/sys/platform/pc32/i386/db_interface.c @@ -24,7 +24,7 @@ * rights to redistribute these changes. * * $FreeBSD: src/sys/i386/i386/db_interface.c,v 1.48.2.1 2000/07/07 00:38:46 obrien Exp $ - * $DragonFly: src/sys/platform/pc32/i386/db_interface.c,v 1.3 2003/07/04 00:32:24 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/i386/db_interface.c,v 1.4 2003/07/06 21:23:48 dillon Exp $ */ /* @@ -64,6 +64,8 @@ static int db_global_jmpbuf_valid; #define rss() ({u_short ss; __asm __volatile("mov %%ss,%0" : "=r" (ss)); ss;}) #endif +#define VERBOSE_CPUSTOP_ON_DDBBREAK + /* * kdb_trap - field a TRACE or BPT trap */ @@ -139,7 +141,8 @@ kdb_trap(type, code, regs) #ifdef CPUSTOP_ON_DDBBREAK #if defined(VERBOSE_CPUSTOP_ON_DDBBREAK) - db_printf("\nCPU%d stopping CPUs: 0x%08x\n", cpuid, other_cpus); + db_printf("\nCPU%d stopping CPUs: 0x%08x\n", + mycpu->gd_cpuid, mycpu->gd_other_cpus); #endif /* VERBOSE_CPUSTOP_ON_DDBBREAK */ /* We stop all CPUs except ourselves (obviously) */ @@ -168,7 +171,8 @@ kdb_trap(type, code, regs) #ifdef CPUSTOP_ON_DDBBREAK #if defined(VERBOSE_CPUSTOP_ON_DDBBREAK) - db_printf("\nCPU%d restarting CPUs: 0x%08x\n", cpuid, stopped_cpus); + db_printf("\nCPU%d restarting CPUs: 0x%08x\n", + mycpu->gd_cpuid, stopped_cpus); #endif /* VERBOSE_CPUSTOP_ON_DDBBREAK */ /* Restart all the CPUs we previously stopped */ diff --git a/sys/platform/pc32/i386/exception.s b/sys/platform/pc32/i386/exception.s index 926a71203f..233c755449 100644 --- a/sys/platform/pc32/i386/exception.s +++ b/sys/platform/pc32/i386/exception.s @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/i386/exception.s,v 1.65.2.3 2001/08/15 01:23:49 peter Exp $ - * $DragonFly: src/sys/platform/pc32/i386/exception.s,v 1.10 2003/07/03 17:24:01 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/i386/exception.s,v 1.11 2003/07/06 21:23:48 dillon Exp $ */ #include "npx.h" @@ -194,8 +194,7 @@ IDTVEC(xmm) * Note that int0x80_syscall is a trap gate. Only page faults * use an interrupt gate. * - * Note that all calls to MP_LOCK must occur with interrupts enabled - * in order to be able to take IPI's while waiting for the lock. + * Note that we are MP through to the call to trap(). */ SUPERALIGN_TEXT @@ -216,7 +215,6 @@ alltraps_with_regs_pushed: calltrap: FAKE_MCOUNT(btrap) /* init "from" _btrap -> calltrap */ incl PCPU(cnt)+V_TRAP /* YYY per-cpu */ - MP_LOCK movl PCPU(curthread),%eax /* keep orig cpl here during call */ movl TD_CPL(%eax),%ebx call trap @@ -268,9 +266,6 @@ IDTVEC(syscall) cli /* atomic astpending access */ cmpl $0,PCPU(astpending) je doreti_syscall_ret -#ifdef SMP - MP_LOCK -#endif pushl $0 /* cpl to restore */ movl $1,PCPU(intr_nesting_level) jmp doreti @@ -305,9 +300,6 @@ IDTVEC(int0x80_syscall) cli /* atomic astpending access */ cmpl $0,PCPU(astpending) je doreti_syscall_ret -#ifdef SMP - MP_LOCK -#endif pushl $0 /* cpl to restore */ movl $1,PCPU(intr_nesting_level) jmp doreti @@ -318,6 +310,9 @@ IDTVEC(int0x80_syscall) * cpu_heavy_restore from being interrupted (especially since it stores * its context in a static place!), so the first thing we do is release * the critical section. + * + * The MP lock is held on entry, but for processes fork_return (esi) + * releases it. 'doreti' always runs without the MP lock. */ ENTRY(fork_trampoline) movl PCPU(curthread),%eax diff --git a/sys/platform/pc32/i386/genassym.c b/sys/platform/pc32/i386/genassym.c index 4d8d7de367..c0f416c8ea 100644 --- a/sys/platform/pc32/i386/genassym.c +++ b/sys/platform/pc32/i386/genassym.c @@ -35,7 +35,7 @@ * * from: @(#)genassym.c 5.11 (Berkeley) 5/10/91 * $FreeBSD: src/sys/i386/i386/genassym.c,v 1.86.2.3 2002/03/03 05:42:49 nyan Exp $ - * $DragonFly: src/sys/platform/pc32/i386/genassym.c,v 1.20 2003/07/04 00:32:24 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/i386/genassym.c,v 1.21 2003/07/06 21:23:48 dillon Exp $ */ #include "opt_user_ldt.h" @@ -50,6 +50,7 @@ #include #include #include +#include #include #include #include @@ -87,8 +88,14 @@ ASSYM(TD_SP, offsetof(struct thread, td_sp)); ASSYM(TD_PRI, offsetof(struct thread, td_pri)); ASSYM(TD_MACH, offsetof(struct thread, td_mach)); ASSYM(TD_WCHAN, offsetof(struct thread, td_wchan)); +#ifdef SMP +ASSYM(TD_MPCOUNT, offsetof(struct thread, td_mpcount)); +#endif ASSYM(TD_FLAGS, offsetof(struct thread, td_flags)); ASSYM(TDF_EXITED, TDF_EXITED); +#ifdef SMP +ASSYM(MP_FREE_LOCK, MP_FREE_LOCK); +#endif ASSYM(RW_OWNER, offsetof(struct lwkt_rwlock, rw_owner)); @@ -101,6 +108,8 @@ ASSYM(SRUN, SRUN); ASSYM(V_TRAP, offsetof(struct vmmeter, v_trap)); ASSYM(V_SYSCALL, offsetof(struct vmmeter, v_syscall)); ASSYM(V_INTR, offsetof(struct vmmeter, v_intr)); +ASSYM(V_FORWARDED_HITS, offsetof(struct vmmeter, v_forwarded_hits)); +ASSYM(V_FORWARDED_MISSES, offsetof(struct vmmeter, v_forwarded_misses)); ASSYM(UPAGES, UPAGES); ASSYM(PAGE_SIZE, PAGE_SIZE); ASSYM(NPTEPG, NPTEPG); diff --git a/sys/platform/pc32/i386/i686_mem.c b/sys/platform/pc32/i386/i686_mem.c index 73ce928236..5cbb591781 100644 --- a/sys/platform/pc32/i386/i686_mem.c +++ b/sys/platform/pc32/i386/i686_mem.c @@ -24,7 +24,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/i386/i686_mem.c,v 1.8.2.4 2002/09/24 08:12:51 mdodd Exp $ - * $DragonFly: src/sys/platform/pc32/i386/i686_mem.c,v 1.2 2003/06/17 04:28:35 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/i386/i686_mem.c,v 1.3 2003/07/06 21:23:48 dillon Exp $ */ #include @@ -39,6 +39,7 @@ #ifdef SMP #include #endif +#include /* * i686 memory range operations @@ -268,9 +269,9 @@ i686_mrstore(struct mem_range_softc *sc) */ smp_rendezvous(NULL, i686_mrstoreone, NULL, (void *)sc); #else - disable_intr(); /* disable interrupts */ + mpintr_lock(); /* doesn't have to be mpintr YYY */ i686_mrstoreone((void *)sc); - enable_intr(); + mpintr_unlock(); #endif } diff --git a/sys/platform/pc32/i386/identcpu.c b/sys/platform/pc32/i386/identcpu.c index 78b837c7fe..5cb7a7e55e 100644 --- a/sys/platform/pc32/i386/identcpu.c +++ b/sys/platform/pc32/i386/identcpu.c @@ -39,7 +39,7 @@ * * from: Id: machdep.c,v 1.193 1996/06/18 01:22:04 bde Exp * $FreeBSD: src/sys/i386/i386/identcpu.c,v 1.80.2.15 2003/04/11 17:06:41 jhb Exp $ - * $DragonFly: src/sys/platform/pc32/i386/identcpu.c,v 1.2 2003/06/17 04:28:35 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/i386/identcpu.c,v 1.3 2003/07/06 21:23:48 dillon Exp $ */ #include "opt_cpu.h" @@ -813,12 +813,10 @@ identblue(void) static void identifycyrix(void) { - u_int eflags; int ccr2_test = 0, dir_test = 0; u_char ccr2, ccr3; - eflags = read_eflags(); - disable_intr(); + mpintr_lock(); ccr2 = read_cyrix_reg(CCR2); write_cyrix_reg(CCR2, ccr2 ^ CCR2_LOCK_NW); @@ -843,7 +841,7 @@ identifycyrix(void) else cyrix_did = 0x00ff; /* Old 486SLC/DLC and TI486SXLC/SXL */ - write_eflags(eflags); + mpintr_unlock(); } /* @@ -1097,12 +1095,10 @@ u_int32_t longrun_modes[LONGRUN_MODE_MAX][3] = { static u_int tmx86_get_longrun_mode(void) { - u_long eflags; union msrinfo msrinfo; u_int low, high, flags, mode; - eflags = read_eflags(); - disable_intr(); + mpintr_lock(); msrinfo.msr = rdmsr(MSR_TMx86_LONGRUN); low = LONGRUN_MODE_MASK(msrinfo.regs[0]); @@ -1118,40 +1114,36 @@ tmx86_get_longrun_mode(void) } mode = LONGRUN_MODE_UNKNOWN; out: - write_eflags(eflags); + mpintr_unlock(); return (mode); } static u_int tmx86_get_longrun_status(u_int * frequency, u_int * voltage, u_int * percentage) { - u_long eflags; u_int regs[4]; - eflags = read_eflags(); - disable_intr(); + mpintr_lock(); do_cpuid(0x80860007, regs); *frequency = regs[0]; *voltage = regs[1]; *percentage = regs[2]; - write_eflags(eflags); + mpintr_unlock(); return (1); } static u_int tmx86_set_longrun_mode(u_int mode) { - u_long eflags; union msrinfo msrinfo; if (mode >= LONGRUN_MODE_UNKNOWN) { return (0); } - eflags = read_eflags(); - disable_intr(); + mpintr_lock(); /* Write LongRun mode values to Model Specific Register. */ msrinfo.msr = rdmsr(MSR_TMx86_LONGRUN); @@ -1166,7 +1158,7 @@ tmx86_set_longrun_mode(u_int mode) msrinfo.regs[0] = (msrinfo.regs[0] & ~0x01) | longrun_modes[mode][2]; wrmsr(MSR_TMx86_LONGRUN_FLAGS, msrinfo.msr); - write_eflags(eflags); + mpintr_unlock(); return (1); } diff --git a/sys/platform/pc32/i386/initcpu.c b/sys/platform/pc32/i386/initcpu.c index 1397558196..3ff84682f3 100644 --- a/sys/platform/pc32/i386/initcpu.c +++ b/sys/platform/pc32/i386/initcpu.c @@ -27,7 +27,7 @@ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD: src/sys/i386/i386/initcpu.c,v 1.19.2.9 2003/04/05 13:47:19 dwmalone Exp $ - * $DragonFly: src/sys/platform/pc32/i386/initcpu.c,v 1.2 2003/06/17 04:28:35 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/i386/initcpu.c,v 1.3 2003/07/06 21:23:48 dillon Exp $ */ #include "opt_cpu.h" @@ -87,7 +87,7 @@ init_bluelightning(void) #endif eflags = read_eflags(); - disable_intr(); + cpu_disable_intr(); load_cr0(rcr0() | CR0_CD | CR0_NW); invd(); @@ -121,7 +121,7 @@ init_486dlc(void) u_char ccr0; eflags = read_eflags(); - disable_intr(); + cpu_disable_intr(); invd(); ccr0 = read_cyrix_reg(CCR0); @@ -167,7 +167,7 @@ init_cy486dx(void) u_char ccr2; eflags = read_eflags(); - disable_intr(); + cpu_disable_intr(); invd(); ccr2 = read_cyrix_reg(CCR2); @@ -198,7 +198,7 @@ init_5x86(void) u_char ccr2, ccr3, ccr4, pcr0; eflags = read_eflags(); - disable_intr(); + cpu_disable_intr(); load_cr0(rcr0() | CR0_CD | CR0_NW); wbinvd(); @@ -302,7 +302,7 @@ init_i486_on_386(void) #endif eflags = read_eflags(); - disable_intr(); + cpu_disable_intr(); load_cr0(rcr0() & ~(CR0_CD | CR0_NW)); /* CD = 0, NW = 0 */ @@ -322,7 +322,7 @@ init_6x86(void) u_char ccr3, ccr4; eflags = read_eflags(); - disable_intr(); + cpu_disable_intr(); load_cr0(rcr0() | CR0_CD | CR0_NW); wbinvd(); @@ -403,7 +403,7 @@ init_6x86MX(void) u_char ccr3, ccr4; eflags = read_eflags(); - disable_intr(); + cpu_disable_intr(); load_cr0(rcr0() | CR0_CD | CR0_NW); wbinvd(); @@ -483,7 +483,7 @@ init_mendocino(void) u_int64_t bbl_cr_ctl3; eflags = read_eflags(); - disable_intr(); + cpu_disable_intr(); load_cr0(rcr0() | CR0_CD | CR0_NW); wbinvd(); @@ -657,7 +657,7 @@ enable_K5_wt_alloc(void) * a stepping of 4 or greater. */ if (((cpu_id & 0xf0) > 0) && ((cpu_id & 0x0f) > 3)) { - disable_intr(); + cpu_disable_intr(); msr = rdmsr(0x83); /* HWCR */ wrmsr(0x83, msr & !(0x10)); @@ -701,7 +701,7 @@ enable_K6_wt_alloc(void) u_long eflags; eflags = read_eflags(); - disable_intr(); + cpu_disable_intr(); wbinvd(); #ifdef CPU_DISABLE_CACHE @@ -763,7 +763,7 @@ enable_K6_2_wt_alloc(void) u_long eflags; eflags = read_eflags(); - disable_intr(); + cpu_disable_intr(); wbinvd(); #ifdef CPU_DISABLE_CACHE @@ -832,7 +832,7 @@ DB_SHOW_COMMAND(cyrixreg, cyrixreg) cr0 = rcr0(); if (strcmp(cpu_vendor,"CyrixInstead") == 0) { eflags = read_eflags(); - disable_intr(); + cpu_disable_intr(); if ((cpu != CPU_M1SC) && (cpu != CPU_CY486DX)) { diff --git a/sys/platform/pc32/i386/k6_mem.c b/sys/platform/pc32/i386/k6_mem.c index b138db21fb..db120b5b5b 100644 --- a/sys/platform/pc32/i386/k6_mem.c +++ b/sys/platform/pc32/i386/k6_mem.c @@ -24,7 +24,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/i386/k6_mem.c,v 1.4.2.2 2002/09/16 21:58:41 dwmalone Exp $ - * $DragonFly: src/sys/platform/pc32/i386/k6_mem.c,v 1.2 2003/06/17 04:28:35 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/i386/k6_mem.c,v 1.3 2003/07/06 21:23:48 dillon Exp $ * */ @@ -37,6 +37,7 @@ #include #include +#include /* * A K6-2 MTRR is defined as the highest 15 bits having the address, the next @@ -167,14 +168,14 @@ k6_mrset(struct mem_range_softc *sc, struct mem_range_desc *desc, int *arg) { out: - disable_intr(); + mpintr_lock(); wbinvd(); reg = rdmsr(UWCCR); reg &= ~(0xffffffff << (32 * d)); reg |= mtrr << (32 * d); wrmsr(UWCCR, reg); wbinvd(); - enable_intr(); + mpintr_unlock(); return 0; } diff --git a/sys/platform/pc32/i386/machdep.c b/sys/platform/pc32/i386/machdep.c index 31398add4a..57a51ef56a 100644 --- a/sys/platform/pc32/i386/machdep.c +++ b/sys/platform/pc32/i386/machdep.c @@ -36,7 +36,7 @@ * * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 * $FreeBSD: src/sys/i386/i386/machdep.c,v 1.385.2.30 2003/05/31 08:48:05 alc Exp $ - * $DragonFly: src/sys/platform/pc32/i386/machdep.c,v 1.21 2003/07/03 18:19:51 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/i386/machdep.c,v 1.22 2003/07/06 21:23:48 dillon Exp $ */ #include "apm.h" @@ -135,6 +135,7 @@ static void fill_fpregs_xmm __P((struct savexmm *, struct save87 *)); #ifdef DIRECTIO extern void ffs_rawread_setup(void); #endif /* DIRECTIO */ +static void init_locks(void); SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL) @@ -950,7 +951,7 @@ cpu_halt(void) * Note on cpu_idle_hlt: On an SMP system this may cause the system to * halt until the next clock tick, even if a thread is ready YYY */ -static int cpu_idle_hlt = 1; +static int cpu_idle_hlt = 0; SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_hlt, CTLFLAG_RW, &cpu_idle_hlt, 0, "Idle loop HLT enable"); @@ -1829,6 +1830,7 @@ init386(int first) * Prevent lowering of the ipl if we call tsleep() early. */ gd = &CPU_prvspace[0].mdglobaldata; + bzero(gd, sizeof(*gd)); gd->mi.gd_curthread = &thread0; @@ -1915,6 +1917,8 @@ init386(int first) #ifdef USER_LDT gd->gd_currentldt = _default_ldt; #endif + /* spinlocks and the BGL */ + init_locks(); /* exceptions */ for (x = 0; x < NIDT; x++) @@ -2633,3 +2637,66 @@ outb(u_int port, u_char data) } #endif /* DDB */ + + + +#include "opt_cpu.h" +#include "opt_htt.h" +#include "opt_user_ldt.h" + + +/* + * initialize all the SMP locks + */ + +/* critical region around IO APIC, apic_imen */ +struct spinlock imen_spinlock; + +/* Make FAST_INTR() routines sequential */ +struct spinlock fast_intr_spinlock; + +/* critical region for old style disable_intr/enable_intr */ +struct spinlock mpintr_spinlock; + +/* critical region around INTR() routines */ +struct spinlock intr_spinlock; + +/* lock region used by kernel profiling */ +struct spinlock mcount_spinlock; + +/* locks com (tty) data/hardware accesses: a FASTINTR() */ +struct spinlock com_spinlock; + +/* locks kernel printfs */ +struct spinlock cons_spinlock; + +/* lock regions around the clock hardware */ +struct spinlock clock_spinlock; + +/* lock around the MP rendezvous */ +struct spinlock smp_rv_spinlock; + +static void +init_locks(void) +{ + /* + * mp_lock = 0; BSP already owns the MP lock + */ + /* + * Get the initial mp_lock with a count of 1 for the BSP. + * This uses a LOGICAL cpu ID, ie BSP == 0. + */ +#ifdef SMP + cpu_get_initial_mplock(); +#endif + spin_lock_init(&mcount_spinlock); + spin_lock_init(&fast_intr_spinlock); + spin_lock_init(&intr_spinlock); + spin_lock_init(&mpintr_spinlock); + spin_lock_init(&imen_spinlock); + spin_lock_init(&smp_rv_spinlock); + spin_lock_init(&com_spinlock); + spin_lock_init(&clock_spinlock); + spin_lock_init(&cons_spinlock); +} + diff --git a/sys/platform/pc32/i386/mp_machdep.c b/sys/platform/pc32/i386/mp_machdep.c index cdb8947d98..07b3ae1e9c 100644 --- a/sys/platform/pc32/i386/mp_machdep.c +++ b/sys/platform/pc32/i386/mp_machdep.c @@ -23,7 +23,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/i386/mp_machdep.c,v 1.115.2.15 2003/03/14 21:22:35 jhb Exp $ - * $DragonFly: src/sys/platform/pc32/i386/mp_machdep.c,v 1.8 2003/06/28 04:16:02 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/i386/mp_machdep.c,v 1.9 2003/07/06 21:23:48 dillon Exp $ */ #include "opt_cpu.h" @@ -52,14 +52,12 @@ #include #include #include -#ifdef BETTER_CLOCK #include #include #include #ifdef GPROF #include #endif -#endif #include #include @@ -187,8 +185,8 @@ typedef struct BASETABLE_ENTRY { * it follows the very early stages of AP boot by placing values in CMOS ram. * it NORMALLY will never be needed and thus the primitive method for enabling. * -#define CHECK_POINTS */ +#define CHECK_POINTS #if defined(CHECK_POINTS) && !defined(PC98) #define CHECK_READ(A) (outb(CMOS_REG, (A)), inb(CMOS_DATA)) @@ -259,27 +257,7 @@ extern int nkpt; u_int32_t cpu_apic_versions[MAXCPU]; u_int32_t *io_apic_versions; -#ifdef APIC_INTR_DIAGNOSTIC -int apic_itrace_enter[32]; -int apic_itrace_tryisrlock[32]; -int apic_itrace_gotisrlock[32]; -int apic_itrace_active[32]; -int apic_itrace_masked[32]; -int apic_itrace_noisrlock[32]; -int apic_itrace_masked2[32]; -int apic_itrace_unmask[32]; -int apic_itrace_noforward[32]; -int apic_itrace_leave[32]; -int apic_itrace_enter2[32]; -int apic_itrace_doreti[32]; -int apic_itrace_splz[32]; -int apic_itrace_eoi[32]; -#ifdef APIC_INTR_DIAGNOSTIC_IRQ -unsigned short apic_itrace_debugbuffer[32768]; -int apic_itrace_debugbuffer_idx; -struct simplelock apic_itrace_debuglock; -#endif -#endif +struct apic_intmapinfo int_to_apicintpin[APIC_INTMAPSIZE]; #ifdef APIC_INTR_REORDER struct { @@ -288,7 +266,6 @@ struct { } apic_isrbit_location[32]; #endif -struct apic_intmapinfo int_to_apicintpin[APIC_INTMAPSIZE]; /* * APIC ID logical/physical mapping structures. @@ -337,7 +314,6 @@ static int mptable_pass2(void); static void default_mp_table(int type); static void fix_mp_table(void); static void setup_apic_irq_mapping(void); -static void init_locks(void); static int start_all_aps(u_int boot_addr); static void install_ap_tramp(u_int boot_addr); static int start_ap(int logicalCpu, u_int boot_addr); @@ -463,11 +439,12 @@ init_secondary(void) int gsel_tss; int x, myid = bootAP; u_int cr0; + struct mdglobaldata *md; gdt_segs[GPRIV_SEL].ssd_base = (int) &CPU_prvspace[myid]; gdt_segs[GPROC0_SEL].ssd_base = - (int) &CPU_prvspace[myid].globaldata.gd_common_tss; - CPU_prvspace[myid].globaldata.gd_prvspace = &CPU_prvspace[myid]; + (int) &CPU_prvspace[myid].mdglobaldata.gd_common_tss; + CPU_prvspace[myid].mdglobaldata.mi.gd_prvspace = &CPU_prvspace[myid]; for (x = 0; x < NGDT; x++) { ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd); @@ -486,11 +463,14 @@ init_secondary(void) gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS; - common_tss.tss_esp0 = 0; /* not used until after switch */ - common_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL); - common_tss.tss_ioopt = (sizeof common_tss) << 16; - tss_gdt = &gdt[myid * NGDT + GPROC0_SEL].sd; - common_tssd = *tss_gdt; + + md = mdcpu; + + md->gd_common_tss.tss_esp0 = 0; /* not used until after switch */ + md->gd_common_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL); + md->gd_common_tss.tss_ioopt = (sizeof md->gd_common_tss) << 16; + md->gd_tss_gdt = &gdt[myid * NGDT + GPROC0_SEL].sd; + md->gd_common_tssd = *md->gd_tss_gdt; ltr(gsel_tss); /* @@ -575,9 +555,6 @@ mp_enable(u_int boot_addr) if (x) default_mp_table(x); - /* initialize all SMP locks */ - init_locks(); - /* post scan cleanup */ fix_mp_table(); setup_apic_irq_mapping(); @@ -604,10 +581,12 @@ mp_enable(u_int boot_addr) setidt(XINVLTLB_OFFSET, Xinvltlb, SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); +#if 0 #ifdef BETTER_CLOCK /* install an inter-CPU IPI for reading processor state */ setidt(XCPUCHECKSTATE_OFFSET, Xcpucheckstate, SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); +#endif #endif /* install an inter-CPU IPI for all-CPU rendezvous */ @@ -745,7 +724,7 @@ static int lookup_bus_type __P((char *name)); * 1st pass on motherboard's Intel MP specification table. * * initializes: - * mp_ncpus = 1 + * ncpus = 1 * * determines: * cpu_apic_address (common to all CPUs) @@ -862,7 +841,7 @@ mptable_pass1(void) * Count the BSP. * This is also used as a counter while starting the APs. */ - mp_ncpus = 1; + ncpus = 1; --mp_naps; /* subtract the BSP */ } @@ -1998,82 +1977,6 @@ default_mp_table(int type) #endif /* APIC_IO */ } - -/* - * initialize all the SMP locks - */ - -/* critical region around IO APIC, apic_imen */ -struct simplelock imen_lock; - -/* critical region around splxx(), cpl, cml, cil, ipending */ -struct simplelock cpl_lock; - -/* Make FAST_INTR() routines sequential */ -struct simplelock fast_intr_lock; - -/* critical region around INTR() routines */ -struct simplelock intr_lock; - -/* lock regions protected in UP kernel via cli/sti */ -struct simplelock mpintr_lock; - -/* lock region used by kernel profiling */ -struct simplelock mcount_lock; - -#ifdef USE_COMLOCK -/* locks com (tty) data/hardware accesses: a FASTINTR() */ -struct simplelock com_lock; -#endif /* USE_COMLOCK */ - -#ifdef USE_CLOCKLOCK -/* lock regions around the clock hardware */ -struct simplelock clock_lock; -#endif /* USE_CLOCKLOCK */ - -/* lock around the MP rendezvous */ -static struct simplelock smp_rv_lock; - -static void -init_locks(void) -{ - /* - * Get the initial mp_lock with a count of 1 for the BSP. - * This uses a LOGICAL cpu ID, ie BSP == 0. - */ - mp_lock = 0x00000001; - -#if 0 - /* ISR uses its own "giant lock" */ - isr_lock = FREE_LOCK; -#endif - -#if defined(APIC_INTR_DIAGNOSTIC) && defined(APIC_INTR_DIAGNOSTIC_IRQ) - s_lock_init((struct simplelock*)&apic_itrace_debuglock); -#endif - - s_lock_init((struct simplelock*)&mpintr_lock); - - s_lock_init((struct simplelock*)&mcount_lock); - - s_lock_init((struct simplelock*)&fast_intr_lock); - s_lock_init((struct simplelock*)&intr_lock); - s_lock_init((struct simplelock*)&imen_lock); - s_lock_init((struct simplelock*)&cpl_lock); - s_lock_init(&smp_rv_lock); - -#ifdef USE_COMLOCK - s_lock_init((struct simplelock*)&com_lock); -#endif /* USE_COMLOCK */ -#ifdef USE_CLOCKLOCK - s_lock_init((struct simplelock*)&clock_lock); -#endif /* USE_CLOCKLOCK */ -} - - -/* Wait for all APs to be fully initialized */ -extern int wait_ap(unsigned int); - /* * start each AP in our list */ @@ -2083,7 +1986,7 @@ start_all_aps(u_int boot_addr) int x, i, pg; u_char mpbiosreason; u_long mpbioswarmvec; - struct globaldata *gd; + struct mdglobaldata *gd; char *stack; uintptr_t kptbase; @@ -2124,24 +2027,29 @@ start_all_aps(u_int boot_addr) pg = x * i386_btop(sizeof(struct privatespace)); /* allocate a new private data page */ - gd = (struct globaldata *)kmem_alloc(kernel_map, PAGE_SIZE); + gd = (struct mdglobaldata *)kmem_alloc(kernel_map, PAGE_SIZE); /* wire it into the private page table page */ SMPpt[pg] = (pt_entry_t)(PG_V | PG_RW | vtophys(gd)); /* allocate and set up an idle stack data page */ stack = (char *)kmem_alloc(kernel_map, UPAGES*PAGE_SIZE); - for (i = 0; i < UPAGES; i++) + for (i = 0; i < UPAGES; i++) { SMPpt[pg + 5 + i] = (pt_entry_t) (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack)); + } SMPpt[pg + 1] = 0; /* *gd_CMAP1 */ SMPpt[pg + 2] = 0; /* *gd_CMAP2 */ SMPpt[pg + 3] = 0; /* *gd_CMAP3 */ SMPpt[pg + 4] = 0; /* *gd_PMAP1 */ + gd = &CPU_prvspace[x].mdglobaldata; /* official location */ + bzero(gd, sizeof(*gd)); + gd->mi.gd_prvspace = &CPU_prvspace[x]; + /* prime data page for it to use */ - mi_gdinit(gd, x); + mi_gdinit(&gd->mi, x); cpu_gdinit(gd, x); gd->gd_cpu_lockid = x << 24; gd->gd_CMAP1 = &SMPpt[pg + 1]; @@ -2161,7 +2069,10 @@ start_all_aps(u_int boot_addr) outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */ #endif - bootSTK = &CPU_prvspace[x].idlestack[UPAGES*PAGE_SIZE]; + /* + * Setup the AP boot stack + */ + bootSTK = &CPU_prvspace[x].idlestack[UPAGES*PAGE_SIZE/2]; bootAP = x; /* attempt to start the Application Processor */ @@ -2183,7 +2094,7 @@ start_all_aps(u_int boot_addr) } /* build our map of 'other' CPUs */ - other_cpus = all_cpus & ~(1 << cpuid); + mycpu->gd_other_cpus = all_cpus & ~(1 << mycpu->gd_cpuid); /* fill in our (BSP) APIC version */ cpu_apic_versions[0] = lapic.version; @@ -2196,24 +2107,15 @@ start_all_aps(u_int boot_addr) #endif /* - * Set up the idle context for the BSP. Similar to above except - * that some was done by locore, some by pmap.c and some is implicit - * because the BSP is cpu#0 and the page is initially zero, and also - * because we can refer to variables by name on the BSP.. + * NOTE! The idlestack for the BSP was setup by locore. Finish + * up, clean out the P==V mapping we did earlier. */ - - /* Allocate and setup BSP idle stack */ - stack = (char *)kmem_alloc(kernel_map, UPAGES * PAGE_SIZE); - for (i = 0; i < UPAGES; i++) - SMPpt[5 + i] = (pt_entry_t) - (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack)); - for (x = 0; x < NKPT; x++) PTD[x] = 0; pmap_set_opt(); /* number of APs actually started */ - return mp_ncpus - 1; + return ncpus - 1; } @@ -2301,7 +2203,10 @@ start_ap(int logical_cpu, u_int boot_addr) vector = (boot_addr >> 12) & 0xff; /* used as a watchpoint to signal AP startup */ - cpus = mp_ncpus; + cpus = ncpus; + + /* Make sure the target cpu sees everything */ + wbinvd(); /* * first we do an INIT/RESET IPI this INIT IPI might be run, reseting @@ -2358,12 +2263,12 @@ start_ap(int logical_cpu, u_int boot_addr) /* spin */ ; u_sleep(200); /* wait ~200uS */ - /* wait for it to start */ + /* wait for it to start, see ap_init() */ set_apic_timer(5000000);/* == 5 seconds */ - while (read_apic_timer()) - if (mp_ncpus > cpus) + while (read_apic_timer()) { + if (ncpus > cpus) return 1; /* return SUCCESS */ - + } return 0; /* return FAILURE */ } @@ -2473,29 +2378,42 @@ SYSCTL_INT(_machdep, OID_AUTO, forward_roundrobin_enabled, CTLFLAG_RW, &forward_roundrobin_enabled, 0, ""); /* - * This is called once the rest of the system is up and running and we're - * ready to let the AP's out of the pen. + * This is called once the mpboot code has gotten us properly relocated + * and the MMU turned on, etc. ap_init() is actually the idle thread, + * and when it returns the scheduler will call the real cpu_idle() main + * loop for the idlethread. Interrupts are disabled on entry and should + * remain disabled at return. */ -void ap_init(void); void -ap_init() +ap_init(void) { u_int apic_id; + /* + * Signal the BSP that we have started up successfully by incrementing + * ncpus. Note that we do not hold the BGL yet. The BSP is waiting + * for our signal. + */ + ++ncpus; + + /* + * Get the MP lock so we can finish initializing. + */ + while (cpu_try_mplock() == 0) + ; + /* BSP may have changed PTD while we're waiting for the lock */ cpu_invltlb(); - smp_cpus++; - #if defined(I586_CPU) && !defined(NO_F00F_HACK) lidt(&r_idt); #endif /* Build our map of 'other' CPUs. */ - other_cpus = all_cpus & ~(1 << cpuid); + mycpu->gd_other_cpus = all_cpus & ~(1 << mycpu->gd_cpuid); - printf("SMP: AP CPU #%d Launched!\n", cpuid); + printf("SMP: AP CPU #%d Launched!\n", mycpu->gd_cpuid); /* set up CPU registers and state */ cpu_setregs(); @@ -2508,8 +2426,8 @@ ap_init() /* A quick check from sanity claus */ apic_id = (apic_id_to_logical[(lapic.id & 0x0f000000) >> 24]); - if (cpuid != apic_id) { - printf("SMP: cpuid = %d\n", cpuid); + if (mycpu->gd_cpuid != apic_id) { + printf("SMP: cpuid = %d\n", mycpu->gd_cpuid); printf("SMP: apic_id = %d\n", apic_id); printf("PTD[MPPTDI] = %p\n", (void *)PTD[MPPTDI]); panic("cpuid mismatch! boom!!"); @@ -2522,15 +2440,23 @@ ap_init() mem_range_AP_init(); /* - * Activate smp_invltlb, although strictly speaking, this isn't - * quite correct yet. We should have a bitfield for cpus willing - * to accept TLB flush IPI's or something and sync them. + * Since we have the BGL if smp_cpus matches ncpus then we are + * the last AP to get to this point and we can enable IPI's, + * tlb shootdowns, freezes, and so forth. */ - if (smp_cpus == mp_ncpus) { + ++smp_cpus; + if (smp_cpus == ncpus) { invltlb_ok = 1; smp_started = 1; /* enable IPI's, tlb shootdown, freezes etc */ smp_active = 1; /* historic */ } + + /* + * The idle loop doesn't expect the BGL to be held and while + * lwkt_switch() normally cleans things up this is a special case + * because we returning almost directly into the idle loop. + */ + cpu_rel_mplock(); } #ifdef BETTER_CLOCK @@ -2540,7 +2466,7 @@ ap_init() #define CHECKSTATE_INTR 2 /* Do not staticize. Used from apic_vector.s */ -struct proc* checkstate_curproc[MAXCPU]; +struct thread *checkstate_curtd[MAXCPU]; int checkstate_cpustate[MAXCPU]; u_long checkstate_pc[MAXCPU]; @@ -2548,6 +2474,7 @@ u_long checkstate_pc[MAXCPU]; ((int)(((u_quad_t)((pc) - (prof)->pr_off) * \ (u_quad_t)((prof)->pr_scale)) >> 16) & ~1) +#if 0 static void addupc_intr_forwarded(struct proc *p, int id, int *astmap) { @@ -2567,28 +2494,30 @@ addupc_intr_forwarded(struct proc *p, int id, int *astmap) *astmap |= (1 << id); } } +#endif static void forwarded_statclock(int id, int pscnt, int *astmap) { +#if 0 struct pstats *pstats; long rss; struct rusage *ru; struct vmspace *vm; int cpustate; - struct proc *p; + struct thread *td; #ifdef GPROF register struct gmonparam *g; int i; #endif - p = checkstate_curproc[id]; + t = checkstate_curtd[id]; cpustate = checkstate_cpustate[id]; switch (cpustate) { case CHECKSTATE_USER: - if (p->p_flag & P_PROFIL) - addupc_intr_forwarded(p, id, astmap); + if (td->td_proc && td->td_proc->p_flag & P_PROFIL) + addupc_intr_forwarded(td->td_proc, id, astmap); if (pscnt > 1) return; p->p_uticks++; @@ -2657,6 +2586,7 @@ forwarded_statclock(int id, int pscnt, int *astmap) ru->ru_maxrss = rss; } } +#endif } void @@ -2680,9 +2610,10 @@ forward_statclock(int pscnt) if (!smp_started || !invltlb_ok || cold || panicstr) return; + printf("forward_statclock\n"); /* Step 1: Probe state (user, cpu, interrupt, spinlock, idle ) */ - map = other_cpus & ~stopped_cpus ; + map = mycpu->gd_other_cpus & ~stopped_cpus ; checkstate_probed_cpus = 0; if (map != 0) selected_apic_ipi(map, @@ -2707,8 +2638,8 @@ forward_statclock(int pscnt) */ map = 0; - for (id = 0; id < mp_ncpus; id++) { - if (id == cpuid) + for (id = 0; id < ncpus; id++) { + if (id == mycpu->gd_cpuid) continue; if (((1 << id) & checkstate_probed_cpus) == 0) continue; @@ -2737,8 +2668,10 @@ forward_hardclock(int pscnt) { int map; int id; +#if 0 struct proc *p; struct pstats *pstats; +#endif int i; /* Kludge. We don't yet have separate locks for the interrupts @@ -2757,7 +2690,7 @@ forward_hardclock(int pscnt) /* Step 1: Probe state (user, cpu, interrupt, spinlock, idle) */ - map = other_cpus & ~stopped_cpus ; + map = mycpu->gd_other_cpus & ~stopped_cpus ; checkstate_probed_cpus = 0; if (map != 0) selected_apic_ipi(map, @@ -2783,11 +2716,13 @@ forward_hardclock(int pscnt) */ map = 0; - for (id = 0; id < mp_ncpus; id++) { - if (id == cpuid) + for (id = 0; id < ncpus; id++) { + if (id == mycpu->gd_cpuid) continue; if (((1 << id) & checkstate_probed_cpus) == 0) continue; + printf("forward_hardclock\n"); +#if 0 p = checkstate_curproc[id]; if (p) { pstats = p->p_stats; @@ -2806,6 +2741,7 @@ forward_hardclock(int pscnt) if (stathz == 0) { forwarded_statclock( id, pscnt, &map); } +#endif } if (map != 0) { checkstate_need_ast |= map; @@ -2830,6 +2766,8 @@ forward_hardclock(int pscnt) void forward_signal(struct proc *p) { + /* YYY forward_signal */ +#if 0 int map; int id; int i; @@ -2873,11 +2811,14 @@ forward_signal(struct proc *p) if (id == p->p_oncpu) return; } +#endif } void forward_roundrobin(void) { + /* YYY forward_roundrobin */ +#if 0 u_int map; int i; @@ -2885,8 +2826,8 @@ forward_roundrobin(void) return; if (!forward_roundrobin_enabled) return; - resched_cpus |= other_cpus; - map = other_cpus & ~stopped_cpus ; + resched_cpus |= mycpu->gd_other_cpus; + map = mycpu->gd_other_cpus & ~stopped_cpus ; #if 1 selected_apic_ipi(map, XCPUAST_OFFSET, APIC_DELMODE_FIXED); #else @@ -2904,20 +2845,20 @@ forward_roundrobin(void) break; } } +#endif } - #ifdef APIC_INTR_REORDER /* - * Maintain mapping from softintr vector to isr bit in local apic. + * Maintain mapping from softintr vector to isr bit in local apic. */ void set_lapic_isrloc(int intr, int vector) { if (intr < 0 || intr > 32) - panic("set_apic_isrloc: bad intr argument: %d",intr); + panic("set_apic_isrloc: bad intr argument: %d",intr); if (vector < ICU_OFFSET || vector > 255) - panic("set_apic_isrloc: bad vector argument: %d",vector); + panic("set_apic_isrloc: bad vector argument: %d",vector); apic_isrbit_location[intr].location = &lapic.isr0 + ((vector>>5)<<2); apic_isrbit_location[intr].bit = (1<<(vector & 31)); } @@ -2946,14 +2887,14 @@ smp_rendezvous_action(void) smp_rv_setup_func(smp_rv_func_arg); /* spin on entry rendezvous */ atomic_add_int(&smp_rv_waiters[0], 1); - while (smp_rv_waiters[0] < mp_ncpus) + while (smp_rv_waiters[0] < ncpus) ; /* action function */ if (smp_rv_action_func != NULL) smp_rv_action_func(smp_rv_func_arg); /* spin on exit rendezvous */ atomic_add_int(&smp_rv_waiters[1], 1); - while (smp_rv_waiters[1] < mp_ncpus) + while (smp_rv_waiters[1] < ncpus) ; /* teardown function */ if (smp_rv_teardown_func != NULL) @@ -2966,10 +2907,8 @@ smp_rendezvous(void (* setup_func)(void *), void (* teardown_func)(void *), void *arg) { - u_int efl; - - /* obtain rendezvous lock */ - s_lock(&smp_rv_lock); /* XXX sleep here? NOWAIT flag? */ + /* obtain rendezvous lock. This disables interrupts */ + spin_lock(&smp_rv_spinlock); /* XXX sleep here? NOWAIT flag? */ /* set static function pointers */ smp_rv_setup_func = setup_func; @@ -2979,19 +2918,12 @@ smp_rendezvous(void (* setup_func)(void *), smp_rv_waiters[0] = 0; smp_rv_waiters[1] = 0; - /* disable interrupts on this CPU, save interrupt status */ - efl = read_eflags(); - write_eflags(efl & ~PSL_I); - /* signal other processors, which will enter the IPI with interrupts off */ all_but_self_ipi(XRENDEZVOUS_OFFSET); /* call executor function */ smp_rendezvous_action(); - /* restore interrupt flag */ - write_eflags(efl); - /* release lock */ - s_unlock(&smp_rv_lock); + spin_unlock(&smp_rv_spinlock); } diff --git a/sys/platform/pc32/i386/mpapic.c b/sys/platform/pc32/i386/mpapic.c index a487785817..d390cffb6e 100644 --- a/sys/platform/pc32/i386/mpapic.c +++ b/sys/platform/pc32/i386/mpapic.c @@ -23,7 +23,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/i386/mpapic.c,v 1.37.2.7 2003/01/25 02:31:47 peter Exp $ - * $DragonFly: src/sys/platform/pc32/i386/Attic/mpapic.c,v 1.3 2003/07/04 00:32:24 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/i386/Attic/mpapic.c,v 1.4 2003/07/06 21:23:48 dillon Exp $ */ #include @@ -74,11 +74,13 @@ apic_initialize(void) /* set the Task Priority Register as needed */ temp = lapic.tpr; temp &= ~APIC_TPR_PRIO; /* clear priority field */ -#ifdef GRAB_LOPRIO - /* Leave the BSP at TPR 0 during boot to make sure it gets interrupts */ + + /* + * Leave the BSP and TPR 0 during boot so it gets all the interrupts, + * set APs at TPR 0xF0 at boot so they get no ints. + */ if (mycpu->gd_cpuid != 0) - temp |= LOPRIO_LEVEL; /* allow INT arbitration */ -#endif + temp |= TPR_IPI_ONLY; /* disable INTs on this cpu */ lapic.tpr = temp; /* enable the local APIC */ @@ -188,7 +190,6 @@ io_apic_setup_intpin(int apic, int pin) u_int32_t target; /* the window register is 32 bits */ u_int32_t vector; /* the window register is 32 bits */ int level; - u_int eflags; target = IOART_DEST; @@ -209,14 +210,11 @@ io_apic_setup_intpin(int apic, int pin) * shouldn't and stop the carnage. */ vector = NRSVIDT + pin; /* IDT vec */ - eflags = read_eflags(); - __asm __volatile("cli" : : : "memory"); - s_lock(&imen_lock); + imen_lock(); io_apic_write(apic, select, (io_apic_read(apic, select) & ~IOART_INTMASK & ~0xff)|IOART_INTMSET|vector); - s_unlock(&imen_lock); - write_eflags(eflags); + imen_unlock(); /* we only deal with vectored INTs here */ if (apic_int_type(apic, pin) != 0) @@ -260,13 +258,10 @@ io_apic_setup_intpin(int apic, int pin) printf("IOAPIC #%d intpin %d -> irq %d\n", apic, pin, irq); vector = NRSVIDT + irq; /* IDT vec */ - eflags = read_eflags(); - __asm __volatile("cli" : : : "memory"); - s_lock(&imen_lock); + imen_lock(); io_apic_write(apic, select, flags | vector); io_apic_write(apic, select + 1, target); - s_unlock(&imen_lock); - write_eflags(eflags); + imen_unlock(); } int diff --git a/sys/platform/pc32/i386/mpboot.s b/sys/platform/pc32/i386/mpboot.s index 8145c03ff7..9c85e26b92 100644 --- a/sys/platform/pc32/i386/mpboot.s +++ b/sys/platform/pc32/i386/mpboot.s @@ -32,7 +32,7 @@ * multiprocessor systems. * * $FreeBSD: src/sys/i386/i386/mpboot.s,v 1.13.2.3 2000/09/07 01:18:26 tegge Exp $ - * $DragonFly: src/sys/platform/pc32/i386/mpboot.s,v 1.3 2003/07/01 20:30:40 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/i386/mpboot.s,v 1.4 2003/07/06 21:23:48 dillon Exp $ */ #include /* miscellaneous asm macros */ @@ -46,18 +46,18 @@ * it follows the very early stages of AP boot by placing values in CMOS ram. * it NORMALLY will never be needed and thus the primitive method for enabling. * -#define CHECK_POINTS */ +#define CHECK_POINTS #if defined(CHECK_POINTS) && !defined(PC98) #define CMOS_REG (0x70) #define CMOS_DATA (0x71) #define CHECKPOINT(A,D) \ - movb $(A),%al ; \ + movb $A,%al ; \ outb %al,$CMOS_REG ; \ - movb $(D),%al ; \ + movb D,%al ; \ outb %al,$CMOS_DATA #else @@ -68,30 +68,31 @@ /* - * the APs enter here from their trampoline code (bootMP, below) + * The APs enter here from their trampoline code (bootMP, below) + * NOTE: %fs is not setup until the call to init_secondary()! */ .p2align 4 NON_GPROF_ENTRY(MPentry) - CHECKPOINT(0x36, 3) + CHECKPOINT(0x36, $3) /* Now enable paging mode */ movl IdlePTD-KERNBASE, %eax movl %eax,%cr3 movl %cr0,%eax orl $CR0_PE|CR0_PG,%eax /* enable paging */ movl %eax,%cr0 /* let the games begin! */ - movl bootSTK,%esp /* boot stack end loc. */ + movl bootSTK,%esp /* boot stack end loc. */ pushl $mp_begin /* jump to high mem */ - ret + NON_GPROF_RET /* * Wait for the booting CPU to signal startup */ mp_begin: /* now running relocated at KERNBASE */ - CHECKPOINT(0x37, 4) + CHECKPOINT(0x37, $4) call init_secondary /* load i386 tables */ - CHECKPOINT(0x38, 5) + CHECKPOINT(0x38, $5) /* * If the [BSP] CPU has support for VME, turn it on. @@ -108,47 +109,23 @@ mp_begin: /* now running relocated at KERNBASE */ andl $~APIC_SVR_SWEN, %eax /* clear software enable bit */ movl %eax, lapic_svr - /* signal our startup to the BSP */ + /* data returned to BSP */ movl lapic_ver, %eax /* our version reg contents */ movl %eax, cpu_apic_versions /* into [ 0 ] */ - incl mp_ncpus /* signal BSP */ - - CHECKPOINT(0x39, 6) - /* wait till we can get into the kernel */ - call boot_get_mplock + CHECKPOINT(0x39, $6) - /* Now, let's prepare for some REAL WORK :-) */ - call ap_init - - call rel_mplock - wbinvd /* Avoid livelock */ -2: - cmpl $0, CNAME(smp_started) /* Wait for last AP to be ready */ - jz 2b - call get_mplock - - /* let her rip! (loads new stack) */ - jmp cpu_switch - -NON_GPROF_ENTRY(wait_ap) - pushl %ebp - movl %esp, %ebp - call rel_mplock - wbinvd /* Avoid livelock */ - movl %eax, 8(%ebp) -1: - cmpl $0, CNAME(smp_started) - jnz 2f - decl %eax - cmpl $0, %eax - jge 1b -2: - call get_mplock - movl %ebp, %esp - popl %ebp + /* + * Execute the context restore function for the idlethread which + * has conveniently been set as curthread. Remember, %eax must + * contain the target thread. Or BSP/AP synchronization occurs + * in ap_init(). We do not need to mess with the BGL for this + * because LWKT threads are self-contained on each cpu (or, at least, + * the idlethread is!). + */ + movl PCPU(curthread),%eax + movl TD_SP(%eax),%esp ret - /* * This is the embedded trampoline or bootstrap that is @@ -167,7 +144,7 @@ BOOTMP1: NON_GPROF_ENTRY(bootMP) .code16 cli - CHECKPOINT(0x34, 1) + CHECKPOINT(0x34, $1) /* First guarantee a 'clean slate' */ xorl %eax, %eax movl %eax, %ebx @@ -203,7 +180,7 @@ NON_GPROF_ENTRY(bootMP) .code32 protmode: - CHECKPOINT(0x35, 2) + CHECKPOINT(0x35, $2) /* * we are NOW running for the first time with %eip diff --git a/sys/platform/pc32/i386/mplock.s b/sys/platform/pc32/i386/mplock.s index 8a5cb0fb86..aa0dbea2ed 100644 --- a/sys/platform/pc32/i386/mplock.s +++ b/sys/platform/pc32/i386/mplock.s @@ -7,7 +7,7 @@ * ---------------------------------------------------------------------------- * * $FreeBSD: src/sys/i386/i386/mplock.s,v 1.29.2.2 2000/05/16 06:58:06 dillon Exp $ - * $DragonFly: src/sys/platform/pc32/i386/mplock.s,v 1.3 2003/07/01 20:30:40 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/i386/mplock.s,v 1.4 2003/07/06 21:23:48 dillon Exp $ * * Functions for locking between CPUs in a SMP system. * @@ -25,320 +25,130 @@ #include /** GRAB_LOPRIO */ #include -#define GLPROFILE_NOT - -#ifdef CHEAP_TPR - -/* we assumme that the 'reserved bits' can be written with zeros */ - -#else /* CHEAP_TPR */ - -#error HEADS UP: this code needs work -/* - * The APIC doc says that reserved bits must be written with whatever - * value they currently contain, ie you should: read, modify, write, - * instead of just writing new values to the TPR register. Current - * silicon seems happy with just writing. If the behaviour of the - * silicon changes, all code that access the lapic_tpr must be modified. - * The last version to contain such code was: - * Id: mplock.s,v 1.17 1997/08/10 20:59:07 fsmp Exp - */ - -#endif /* CHEAP_TPR */ - -#ifdef GRAB_LOPRIO -/* - * Claim LOWest PRIOrity, ie. attempt to grab ALL INTerrupts. - */ - -/* after 1st acquire of lock we grab all hardware INTs */ -#define GRAB_HWI movl $ALLHWI_LEVEL, lapic_tpr - -/* after last release of lock give up LOW PRIO (ie, arbitrate INTerrupts) */ -#define ARB_HWI movl $LOPRIO_LEVEL, lapic_tpr /* CHEAP_TPR */ - -#else /* GRAB_LOPRIO */ - -#define GRAB_HWI /* nop */ -#define ARB_HWI /* nop */ - -#endif /* GRAB_LOPRIO */ +#include "assym.s" + .data + ALIGN_DATA +#ifdef SMP + .globl mp_lock +mp_lock: + .long -1 /* initialized to not held */ +#endif .text - -#ifdef SMP - -/*********************************************************************** - * void MPgetlock_edx(unsigned int *lock : %edx) - * ---------------------------------- - * Destroys %eax, %ecx. %edx must hold lock argument. - * - * Grabs hardware interrupts on first aquire. - * - * NOTE: Serialization is not required if we already hold the lock, since - * we already hold the lock, nor do we need a locked instruction if we - * already hold the lock. - */ - -NON_GPROF_ENTRY(MPgetlock_edx) + SUPERALIGN_TEXT + + /* + * Note on cmpxchgl... exchanges ecx with mem if mem matches eax. + * Z=1 (jz) on success. + */ +NON_GPROF_ENTRY(cpu_get_initial_mplock) + movl PCPU(curthread),%ecx + movl $1,TD_MPCOUNT(%ecx) /* curthread has mpcount of 1 */ + movl $0,mp_lock /* owned by cpu 0 */ + NON_GPROF_RET + + /* + * cpu_try_mplock() returns non-zero on success, 0 on failure. It + * only adjusts mp_lock. It does not touch td_mpcount, and it + * must be called from inside a critical section. + */ +NON_GPROF_ENTRY(cpu_try_mplock) + movl PCPU(cpuid),%ecx + movl $-1,%eax + cmpxchgl %ecx,mp_lock /* ecx<->mem if eax matches */ + jnz 1f + movl $1,%eax + NON_GPROF_RET 1: - movl (%edx), %eax /* Get current contents of lock */ - movl %eax, %ecx - andl $CPU_FIELD,%ecx - cmpl cpu_lockid, %ecx /* Do we already own the lock? */ - jne 2f - incl %eax /* yes, just bump the count */ - movl %eax, (%edx) /* serialization not required */ - ret -2: - movl $FREE_LOCK, %eax /* lock must be free */ - movl cpu_lockid, %ecx - incl %ecx - lock - cmpxchg %ecx, (%edx) /* attempt to replace %eax<->%ecx */ -#ifdef GLPROFILE - jne 3f - incl gethits2 -#else - jne 1b -#endif /* GLPROFILE */ - GRAB_HWI /* 1st acquire, grab hw INTs */ - ret -#ifdef GLPROFILE -3: - incl gethits3 - jmp 1b -#endif - -/*********************************************************************** - * int MPtrylock(unsigned int *lock) - * --------------------------------- - * Destroys %eax, %ecx and %edx. - * Returns 1 if lock was successfull - */ + movl $0,%eax + NON_GPROF_RET -NON_GPROF_ENTRY(MPtrylock) - movl 4(%esp), %edx /* Get the address of the lock */ - - movl $FREE_LOCK, %eax /* Assume it's free */ - movl cpu_lockid, %ecx /* - get pre-shifted logical cpu id */ - incl %ecx /* - new count is one */ - lock - cmpxchg %ecx, (%edx) /* - try it atomically */ - jne 1f /* ...do not collect $200 */ -#ifdef GLPROFILE - incl tryhits2 -#endif /* GLPROFILE */ - GRAB_HWI /* 1st acquire, grab hw INTs */ - movl $1, %eax - ret +NON_GPROF_ENTRY(get_mplock) + movl PCPU(curthread),%edx + cmpl $0,TD_MPCOUNT(%edx) + je 1f + incl TD_MPCOUNT(%edx) /* already have it, just ++mpcount */ + NON_GPROF_RET 1: - movl (%edx), %eax /* Try to see if we have it already */ - andl $COUNT_FIELD, %eax /* - get count */ - movl cpu_lockid, %ecx /* - get pre-shifted logical cpu id */ - orl %ecx, %eax /* - combine them */ - movl %eax, %ecx - incl %ecx /* - new count is one more */ - lock - cmpxchg %ecx, (%edx) /* - try it atomically */ - jne 2f /* - miss */ -#ifdef GLPROFILE - incl tryhits -#endif /* GLPROFILE */ - movl $1, %eax - ret -2: -#ifdef GLPROFILE - incl tryhits3 -#endif /* GLPROFILE */ - movl $0, %eax - ret - - -/*********************************************************************** - * void MPrellock_edx(unsigned int *lock : %edx) - * ---------------------------------- - * Destroys %ecx, argument must be in %edx - * - * SERIALIZATION NOTE! - * - * After a lot of arguing, it turns out that there is no problem with - * not having a synchronizing instruction in the MP unlock code. There - * are two things to keep in mind: First, Intel guarentees that writes - * are ordered amoungst themselves. Second, the P6 is allowed to reorder - * reads around writes. Third, the P6 maintains cache consistency (snoops - * the bus). The second is not an issue since the one read we do is the - * basis for the conditional which determines whether the write will be - * made or not. - * - * Therefore, no synchronizing instruction is required on unlock. There are - * three performance cases: First, if a single cpu is getting and releasing - * the lock the removal of the synchronizing instruction saves approx - * 200 nS (testing w/ duel cpu PIII 450). Second, if one cpu is contending - * for the lock while the other holds it, the removal of the synchronizing - * instruction results in a 700nS LOSS in performance. Third, if two cpu's - * are switching off ownership of the MP lock but not contending for it (the - * most common case), this results in a 400nS IMPROVEMENT in performance. - * - * Since our goal is to reduce lock contention in the first place, we have - * decided to remove the synchronizing instruction from the unlock code. - */ - -NON_GPROF_ENTRY(MPrellock_edx) - movl (%edx), %ecx /* - get the value */ - decl %ecx /* - new count is one less */ - testl $COUNT_FIELD, %ecx /* - Unless it's zero... */ + pushfl + cli + movl $1,TD_MPCOUNT(%edx) + movl PCPU(cpuid),%ecx + movl $-1,%eax + cmpxchgl %ecx,mp_lock /* ecx<->mem & JZ if eax matches */ jnz 2f - ARB_HWI /* last release, arbitrate hw INTs */ - movl $FREE_LOCK, %ecx /* - In which case we release it */ -#if 0 - lock - addl $0,0(%esp) /* see note above */ -#endif + popfl /* success */ + NON_GPROF_RET 2: - movl %ecx, (%edx) - ret - -/*********************************************************************** - * void get_mplock() - * ----------------- - * All registers preserved - * - * Stack (after call to _MPgetlock): - * - * edx 4(%esp) - * ecx 8(%esp) - * eax 12(%esp) - * - * Requirements: Interrupts should be enabled on call so we can take - * IPI's and FAST INTs while we are waiting for the lock - * (else the system may not be able to halt). - * - * XXX there are still places where get_mplock() is called - * with interrupts disabled, so we have to temporarily reenable - * interrupts. - * - * Side effects: The current cpu will be given ownership of the - * hardware interrupts when it first aquires the lock. - * - * Costs: Initial aquisition requires the use of a costly locked - * instruction, but recursive aquisition is cheap. Release - * is very cheap. - */ + movl PCPU(cpuid),%eax /* failure */ + cmpl %eax,mp_lock + je badmp_get + popfl + jmp lwkt_switch /* will be correct on return */ -NON_GPROF_ENTRY(get_mplock) - pushl %eax - pushl %ecx - pushl %edx - movl $mp_lock, %edx - pushfl - testl $(1<<9), (%esp) - jz 2f - call MPgetlock_edx - addl $4,%esp +NON_GPROF_ENTRY(try_mplock) + movl PCPU(curthread),%edx + cmpl $0,TD_MPCOUNT(%edx) + je 1f + incl TD_MPCOUNT(%edx) /* already have it, just ++mpcount */ + movl $1,%eax + NON_GPROF_RET 1: - popl %edx - popl %ecx - popl %eax - ret + pushfl + cli + movl PCPU(cpuid),%ecx + movl $-1,%eax + cmpxchgl %ecx,mp_lock /* ecx<->mem & JZ if eax matches */ + jnz 2f + movl $1,TD_MPCOUNT(%edx) + popfl /* success */ + movl $1,%eax + NON_GPROF_RET 2: - sti - call MPgetlock_edx + movl PCPU(cpuid),%eax /* failure */ + cmpl %eax,mp_lock + je badmp_get popfl - jmp 1b + movl $0,%eax + NON_GPROF_RET -/* - * Special version of get_mplock that is used during bootstrap when we can't - * yet enable interrupts of any sort since the APIC isn't online yet. We - * do an endrun around MPgetlock_edx to avoid enabling interrupts. - * - * XXX FIXME.. - APIC should be online from the start to simplify IPI's. - */ -NON_GPROF_ENTRY(boot_get_mplock) - pushl %eax - pushl %ecx - pushl %edx -#ifdef GRAB_LOPRIO +NON_GPROF_ENTRY(rel_mplock) + movl PCPU(curthread),%edx + cmpl $1,TD_MPCOUNT(%edx) + je 1f + subl $1,TD_MPCOUNT(%edx) + NON_GPROF_RET +1: pushfl - pushl lapic_tpr cli -#endif - - movl $mp_lock, %edx - call MPgetlock_edx - -#ifdef GRAB_LOPRIO - popl lapic_tpr + movl $0,TD_MPCOUNT(%edx) + movl $MP_FREE_LOCK,mp_lock popfl -#endif - popl %edx - popl %ecx - popl %eax - ret - -/*********************************************************************** - * void try_mplock() - * ----------------- - * reg %eax == 1 if success - */ - -NON_GPROF_ENTRY(try_mplock) - pushl %ecx - pushl %edx - pushl $mp_lock - call MPtrylock - add $4, %esp - popl %edx - popl %ecx - ret - -/*********************************************************************** - * void rel_mplock() - * ----------------- - * All registers preserved - */ - -NON_GPROF_ENTRY(rel_mplock) - pushl %ecx - pushl %edx - movl $mp_lock,%edx - call MPrellock_edx - popl %edx - popl %ecx - ret + NON_GPROF_RET -#endif +badmp_get: + pushl $bmpsw1 + call panic +badmp_rel: + pushl $bmpsw2 + call panic -/*********************************************************************** - * - */ .data - .p2align 2 /* xx_lock aligned on int boundary */ -#ifdef SMP +bmpsw1: + .asciz "try/get_mplock(): already have lock!" - .globl mp_lock -mp_lock: .long 0 +bmpsw2: + .asciz "rel_mplock(): not holding lock!" -#ifdef GLPROFILE - .globl gethits -gethits: - .long 0 -gethits2: - .long 0 -gethits3: - .long 0 +#if 0 +/* after 1st acquire of lock we grab all hardware INTs */ +#ifdef GRAB_LOPRIO +#define GRAB_HWI movl $ALLHWI_LEVEL, lapic_tpr - .globl tryhits -tryhits: - .long 0 -tryhits2: - .long 0 -tryhits3: - .long 0 +/* after last release of lock give up LOW PRIO (ie, arbitrate INTerrupts) */ +#define ARB_HWI movl $LOPRIO_LEVEL, lapic_tpr /* CHEAP_TPR */ +#endif +#endif -msg: - .asciz "lock hits: 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x\n" -#endif /* GLPROFILE */ -#endif /* SMP */ diff --git a/sys/platform/pc32/i386/perfmon.c b/sys/platform/pc32/i386/perfmon.c index b70ad6499a..d2c2de6289 100644 --- a/sys/platform/pc32/i386/perfmon.c +++ b/sys/platform/pc32/i386/perfmon.c @@ -27,7 +27,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/i386/perfmon.c,v 1.21 1999/09/25 18:24:04 phk Exp $ - * $DragonFly: src/sys/platform/pc32/i386/perfmon.c,v 1.3 2003/06/23 17:55:38 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/i386/perfmon.c,v 1.4 2003/07/06 21:23:48 dillon Exp $ */ #include @@ -124,11 +124,11 @@ perfmon_setup(int pmc, unsigned int control) perfmon_inuse |= (1 << pmc); control &= ~(PMCF_SYS_FLAGS << 16); - disable_intr(); + mpintr_lock(); /* doesn't have to be mpintr_lock YYY */ ctl_shadow[pmc] = control; writectl(pmc); wrmsr(msr_pmc[pmc], pmc_shadow[pmc] = 0); - enable_intr(); + mpintr_unlock(); return 0; } @@ -167,11 +167,11 @@ perfmon_start(int pmc) return EINVAL; if (perfmon_inuse & (1 << pmc)) { - disable_intr(); + mpintr_lock(); /* doesn't have to be mpintr YYY */ ctl_shadow[pmc] |= (PMCF_EN << 16); wrmsr(msr_pmc[pmc], pmc_shadow[pmc]); writectl(pmc); - enable_intr(); + mpintr_unlock(); return 0; } return EBUSY; @@ -184,11 +184,11 @@ perfmon_stop(int pmc) return EINVAL; if (perfmon_inuse & (1 << pmc)) { - disable_intr(); + mpintr_lock(); pmc_shadow[pmc] = rdmsr(msr_pmc[pmc]) & 0xffffffffffULL; ctl_shadow[pmc] &= ~(PMCF_EN << 16); writectl(pmc); - enable_intr(); + mpintr_unlock(); return 0; } return EBUSY; diff --git a/sys/platform/pc32/i386/pmap.c b/sys/platform/pc32/i386/pmap.c index 32d0a2a748..6faa71d1f4 100644 --- a/sys/platform/pc32/i386/pmap.c +++ b/sys/platform/pc32/i386/pmap.c @@ -40,7 +40,7 @@ * * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 * $FreeBSD: src/sys/i386/i386/pmap.c,v 1.250.2.18 2002/03/06 22:48:53 silby Exp $ - * $DragonFly: src/sys/platform/pc32/i386/pmap.c,v 1.15 2003/07/04 00:32:24 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/i386/pmap.c,v 1.16 2003/07/06 21:23:48 dillon Exp $ */ /* @@ -373,25 +373,27 @@ pmap_bootstrap(firstaddr, loadaddr) ptditmp |= PG_V | PG_RW | PG_PS | PG_U | pgeflag; pdir4mb = ptditmp; - if (ncpus == 1) { - /* - * Enable the PSE mode. - */ - load_cr4(rcr4() | CR4_PSE); +#ifndef SMP + /* + * Enable the PSE mode. If we are SMP we can't do this + * now because the APs will not be able to use it when + * they boot up. + */ + load_cr4(rcr4() | CR4_PSE); - /* - * We can do the mapping here for the single processor - * case. We simply ignore the old page table page from - * now on. - */ - /* - * For SMP, we still need 4K pages to bootstrap APs, - * PSE will be enabled as soon as all APs are up. - */ - PTD[KPTDI] = (pd_entry_t) ptditmp; - kernel_pmap->pm_pdir[KPTDI] = (pd_entry_t) ptditmp; - invltlb(); - } + /* + * We can do the mapping here for the single processor + * case. We simply ignore the old page table page from + * now on. + */ + /* + * For SMP, we still need 4K pages to bootstrap APs, + * PSE will be enabled as soon as all APs are up. + */ + PTD[KPTDI] = (pd_entry_t) ptditmp; + kernel_pmap->pm_pdir[KPTDI] = (pd_entry_t) ptditmp; + invltlb(); +#endif } #endif #ifdef APIC_IO @@ -827,6 +829,9 @@ pmap_init_proc(struct proc *p, struct thread *td) p->p_thread = td; td->td_proc = p; td->td_switch = cpu_heavy_switch; +#ifdef SMP + td->td_mpcount = 1; +#endif bzero(p->p_addr, sizeof(*p->p_addr)); } @@ -1405,21 +1410,20 @@ pmap_reference(pmap) ***************************************************/ /* - * free the pv_entry back to the free list + * free the pv_entry back to the free list. This function may be + * called from an interrupt. */ static PMAP_INLINE void free_pv_entry(pv) pv_entry_t pv; { pv_entry_count--; - zfreei(pvzone, pv); + zfree(pvzone, pv); } /* * get a new pv_entry, allocating a block from the system - * when needed. - * the memory allocation is performed bypassing the malloc code - * because of the possibility of allocations at interrupt time. + * when needed. This function may be called from an interrupt. */ static pv_entry_t get_pv_entry(void) @@ -1431,7 +1435,7 @@ get_pv_entry(void) pmap_pagedaemon_waken = 1; wakeup (&vm_pages_needed); } - return zalloci(pvzone); + return zalloc(pvzone); } /* diff --git a/sys/platform/pc32/i386/simplelock.s b/sys/platform/pc32/i386/simplelock.s deleted file mode 100644 index 506f09dbb6..0000000000 --- a/sys/platform/pc32/i386/simplelock.s +++ /dev/null @@ -1,321 +0,0 @@ -/*- - * Copyright (c) 1997, by Steve Passe - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. The name of the developer may NOT be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD: src/sys/i386/i386/simplelock.s,v 1.11.2.2 2003/02/04 20:55:28 jhb Exp $ - * $DragonFly: src/sys/platform/pc32/i386/Attic/simplelock.s,v 1.3 2003/07/01 20:30:40 dillon Exp $ - */ - -/* - * credit to Bruce Evans for help with asm optimization. - */ - -#include /* miscellaneous macros */ -#include -#include - -#include /** FAST_HI */ - -/* - * The following impliments the primitives described in i386/i386/param.h - * necessary for the Lite2 lock manager system. - * The major difference is that the "volatility" of the lock datum has been - * pushed down from the various functions to lock_data itself. - */ - -/* - * The simple-lock routines are the primitives out of which the lock - * package is built. The machine-dependent code must implement an - * atomic test_and_set operation that indivisibly sets the simple lock - * to non-zero and returns its old value. It also assumes that the - * setting of the lock to zero below is indivisible. Simple locks may - * only be used for exclusive locks. - * - * struct simplelock { - * volatile int lock_data; - * }; - */ - -/* - * void - * s_lock_init(struct simplelock *lkp) - * { - * lkp->lock_data = 0; - * } - */ -ENTRY(s_lock_init) - movl 4(%esp), %eax /* get the address of the lock */ - movl $0, (%eax) - ret - - -/* - * void - * s_lock(struct simplelock *lkp) - * { - * while (test_and_set(&lkp->lock_data)) - * continue; - * } - * - * Note: - * If the acquire fails we do a loop of reads waiting for the lock to - * become free instead of continually beating on the lock with xchgl. - * The theory here is that the CPU will stay within its cache until - * a write by the other CPU updates it, instead of continually updating - * the local cache (and thus causing external bus writes) with repeated - * writes to the lock. - */ -#ifndef SL_DEBUG - -ENTRY(s_lock) - movl 4(%esp), %eax /* get the address of the lock */ - movl $1, %ecx -setlock: - xchgl %ecx, (%eax) - testl %ecx, %ecx - jz gotit /* it was clear, return */ -wait: - pause - cmpl $0, (%eax) /* wait to empty */ - jne wait /* still set... */ - jmp setlock /* empty again, try once more */ -gotit: - ret - -#else /* SL_DEBUG */ - -ENTRY(s_lock) - movl 4(%esp), %edx /* get the address of the lock */ -setlock: - movl _cpu_lockid, %ecx /* add cpu id portion */ - incl %ecx /* add lock portion */ - movl $0, %eax - lock - cmpxchgl %ecx, (%edx) - jz gotit /* it was clear, return */ - pushl %eax /* save what we xchanged */ - decl %eax /* remove lock portion */ - cmpl _cpu_lockid, %eax /* do we hold it? */ - je bad_slock /* yes, thats not good... */ - addl $4, %esp /* clear the stack */ -wait: - pause - cmpl $0, (%edx) /* wait to empty */ - jne wait /* still set... */ - jmp setlock /* empty again, try once more */ -gotit: - ret - - ALIGN_TEXT -bad_slock: - /* %eax (current lock) is already on the stack */ - pushl %edx - pushl cpuid - pushl $bsl1 - call panic - -bsl1: .asciz "rslock: cpu: %d, addr: 0x%08x, lock: 0x%08x" - -#endif /* SL_DEBUG */ - - -/* - * int - * s_lock_try(struct simplelock *lkp) - * { - * return (!test_and_set(&lkp->lock_data)); - * } - */ -#ifndef SL_DEBUG - -ENTRY(s_lock_try) - movl 4(%esp), %eax /* get the address of the lock */ - movl $1, %ecx - - xchgl %ecx, (%eax) - testl %ecx, %ecx - setz %al /* 1 if previous value was 0 */ - movzbl %al, %eax /* convert to an int */ - - ret - -#else /* SL_DEBUG */ - -ENTRY(s_lock_try) - movl 4(%esp), %edx /* get the address of the lock */ - movl cpu_lockid, %ecx /* add cpu id portion */ - incl %ecx /* add lock portion */ - - xorl %eax, %eax - lock - cmpxchgl %ecx, (%edx) - setz %al /* 1 if previous value was 0 */ - movzbl %al, %eax /* convert to an int */ - - ret - -#endif /* SL_DEBUG */ - - -/* - * void - * s_unlock(struct simplelock *lkp) - * { - * lkp->lock_data = 0; - * } - */ -ENTRY(s_unlock) - movl 4(%esp), %eax /* get the address of the lock */ - movl $0, (%eax) - ret - -#if 0 - -/* - * XXX CRUFTY SS_LOCK IMPLEMENTATION REMOVED XXX - * - * These versions of simple_lock block interrupts, - * making it suitable for regions accessed by both top and bottom levels. - * This is done by saving the current value of the cpu flags in a per-cpu - * global, and disabling interrupts when the lock is taken. When the - * lock is released, interrupts might be enabled, depending upon the saved - * cpu flags. - * Because of this, it must ONLY be used for SHORT, deterministic paths! - * - * Note: - * It would appear to be "bad behaviour" to blindly store a value in - * ss_eflags, as this could destroy the previous contents. But since ss_eflags - * is a per-cpu variable, and its fatal to attempt to acquire a simplelock - * that you already hold, we get away with it. This needs to be cleaned - * up someday... - */ - -/* - * void ss_lock(struct simplelock *lkp) - */ -#ifndef SL_DEBUG - -ENTRY(ss_lock) - movl 4(%esp), %eax /* get the address of the lock */ - movl $1, %ecx /* value for a held lock */ -ssetlock: - pushfl - cli - xchgl %ecx, (%eax) /* compete */ - testl %ecx, %ecx - jz sgotit /* it was clear, return */ - popfl /* previous value while waiting */ -swait: - pause - cmpl $0, (%eax) /* wait to empty */ - jne swait /* still set... */ - jmp ssetlock /* empty again, try once more */ -sgotit: - popl ss_eflags /* save the old eflags */ - ret - -#else /* SL_DEBUG */ - -ENTRY(ss_lock) - movl 4(%esp), %edx /* get the address of the lock */ -ssetlock: - movl cpu_lockid, %ecx /* add cpu id portion */ - incl %ecx /* add lock portion */ - pushfl - cli - movl $0, %eax - lock - cmpxchgl %ecx, (%edx) /* compete */ - jz sgotit /* it was clear, return */ - pushl %eax /* save what we xchanged */ - decl %eax /* remove lock portion */ - cmpl cpu_lockid, %eax /* do we hold it? */ - je sbad_slock /* yes, thats not good... */ - addl $4, %esp /* clear the stack */ - popfl -swait: - pause - cmpl $0, (%edx) /* wait to empty */ - jne swait /* still set... */ - jmp ssetlock /* empty again, try once more */ -sgotit: - popl ss_eflags /* save the old task priority */ -sgotit2: - ret - - ALIGN_TEXT -sbad_slock: - /* %eax (current lock) is already on the stack */ - pushl %edx - pushl cpuid - pushl $sbsl1 - call panic - -sbsl1: .asciz "rsslock: cpu: %d, addr: 0x%08x, lock: 0x%08x" - -#endif /* SL_DEBUG */ - -/* - * void ss_unlock(struct simplelock *lkp) - */ -ENTRY(ss_unlock) - movl 4(%esp), %eax /* get the address of the lock */ - movl $0, (%eax) /* clear the simple lock */ - testl $PSL_I, ss_eflags - jz ss_unlock2 - sti -ss_unlock2: - ret - -#endif - -/* - * These versions of simple_lock does not contain calls to profiling code. - * Thus they can be called from the profiling code. - */ - -/* - * void s_lock_np(struct simplelock *lkp) - */ -NON_GPROF_ENTRY(s_lock_np) - movl 4(%esp), %eax /* get the address of the lock */ - movl $1, %ecx -1: - xchgl %ecx, (%eax) - testl %ecx, %ecx - jz 3f -2: - pause - cmpl $0, (%eax) /* wait to empty */ - jne 2b /* still set... */ - jmp 1b /* empty again, try once more */ -3: - NON_GPROF_RET - -/* - * void s_unlock_np(struct simplelock *lkp) - */ -NON_GPROF_ENTRY(s_unlock_np) - movl 4(%esp), %eax /* get the address of the lock */ - movl $0, (%eax) - NON_GPROF_RET diff --git a/sys/platform/pc32/i386/spinlock.s b/sys/platform/pc32/i386/spinlock.s new file mode 100644 index 0000000000..8046c99ea4 --- /dev/null +++ b/sys/platform/pc32/i386/spinlock.s @@ -0,0 +1,110 @@ +/*- + * Copyright (c) 2003, by Matthew dillon All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. The name of the developer may NOT be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/sys/i386/i386/simplelock.s,v 1.11.2.2 2003/02/04 20:55:28 jhb Exp $ + * $DragonFly: src/sys/platform/pc32/i386/spinlock.s,v 1.1 2003/07/06 21:23:48 dillon Exp $ + */ + +#include /* miscellaneous macros */ +#include + +/* + * The spinlock routines may only be used for low level debugging, like + * kernel printfs, and when no other option is available such as situations + * relating to hardware interrupt masks. Spinlock routines should not be + * used in interrupt service routines or in any other situation. + * + * NOTE: for UP the spinlock routines still disable/restore interrupts + */ +ENTRY(spin_lock) + movl 4(%esp),%edx + SPIN_LOCK((%edx)) /* note: %eax, %ecx tromped */ + ret + +ENTRY(spin_unlock) + movl 4(%esp),%edx + SPIN_UNLOCK((%edx)) /* note: %eax, %ecx tromped */ + ret + +NON_GPROF_ENTRY(spin_lock_np) + movl 4(%esp),%edx + SPIN_LOCK((%edx)) /* note: %eax, %ecx tromped */ + NON_GPROF_RET + +NON_GPROF_ENTRY(spin_unlock_np) + movl 4(%esp), %edx /* get the address of the lock */ + SPIN_UNLOCK((%edx)) + NON_GPROF_RET + +/* + * Auxillary convenience routines. Note that these functions disable and + * restore interrupts as well, on SMP, as performing spin locking functions. + */ +NON_GPROF_ENTRY(imen_lock) + SPIN_LOCK(imen_spinlock) + NON_GPROF_RET + +NON_GPROF_ENTRY(imen_unlock) + SPIN_UNLOCK(imen_spinlock) + NON_GPROF_RET + +NON_GPROF_ENTRY(intr_lock) + SPIN_LOCK(intr_spinlock) + NON_GPROF_RET + +NON_GPROF_ENTRY(intr_unlock) + SPIN_UNLOCK(intr_spinlock) + NON_GPROF_RET + +NON_GPROF_ENTRY(mpintr_lock) + SPIN_LOCK(mpintr_spinlock) + NON_GPROF_RET + +NON_GPROF_ENTRY(mpintr_unlock) + SPIN_UNLOCK(mpintr_spinlock) + NON_GPROF_RET + +NON_GPROF_ENTRY(clock_lock) + SPIN_LOCK(clock_spinlock) + NON_GPROF_RET + +NON_GPROF_ENTRY(clock_unlock) + SPIN_UNLOCK(clock_spinlock) + NON_GPROF_RET + +NON_GPROF_ENTRY(com_lock) + SPIN_LOCK(com_spinlock) + NON_GPROF_RET + +NON_GPROF_ENTRY(com_unlock) + SPIN_UNLOCK(com_spinlock) + NON_GPROF_RET + +NON_GPROF_ENTRY(cons_lock) + SPIN_LOCK(cons_spinlock) + NON_GPROF_RET + +NON_GPROF_ENTRY(cons_unlock) + SPIN_UNLOCK(cons_spinlock) + NON_GPROF_RET + diff --git a/sys/platform/pc32/i386/swtch.s b/sys/platform/pc32/i386/swtch.s index 8b6de1d88c..d7fd6be36f 100644 --- a/sys/platform/pc32/i386/swtch.s +++ b/sys/platform/pc32/i386/swtch.s @@ -35,7 +35,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/i386/swtch.s,v 1.89.2.10 2003/01/23 03:36:24 ps Exp $ - * $DragonFly: src/sys/platform/pc32/i386/swtch.s,v 1.20 2003/07/05 05:54:00 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/i386/swtch.s,v 1.21 2003/07/06 21:23:48 dillon Exp $ */ #include "npx.h" @@ -194,11 +194,13 @@ ENTRY(cpu_exit_switch) * any waiters. */ orl $TDF_EXITED,TD_FLAGS(%ecx) +#if 0 /* YYY MP lock may not be held by new target */ pushl %eax pushl %ecx /* wakeup(oldthread) */ call wakeup addl $4,%esp popl %eax /* note: next thread expects curthread in %eax */ +#endif /* * Restore the next thread's state and resume it. Note: the @@ -318,20 +320,6 @@ ENTRY(cpu_heavy_restore) movl PCB_EIP(%edx),%eax movl %eax,(%esp) - /* - * SMP ickyness to direct interrupts. - */ - -#ifdef SMP -#ifdef GRAB_LOPRIO /* hold LOPRIO for INTs */ -#ifdef CHEAP_TPR - movl $0, lapic_tpr -#else - andl $~APIC_TPR_PRIO, lapic_tpr -#endif /** CHEAP_TPR */ -#endif /** GRAB_LOPRIO */ -#endif /* SMP */ - /* * Restore the user LDT if we have one */ diff --git a/sys/platform/pc32/i386/trap.c b/sys/platform/pc32/i386/trap.c index 970ca45d45..cb0b4080b7 100644 --- a/sys/platform/pc32/i386/trap.c +++ b/sys/platform/pc32/i386/trap.c @@ -36,7 +36,7 @@ * * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 * $FreeBSD: src/sys/i386/i386/trap.c,v 1.147.2.11 2003/02/27 19:09:59 luoqi Exp $ - * $DragonFly: src/sys/platform/pc32/i386/trap.c,v 1.17 2003/07/03 21:22:38 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/i386/trap.c,v 1.18 2003/07/06 21:23:48 dillon Exp $ */ /* @@ -162,7 +162,9 @@ SYSCTL_INT(_machdep, OID_AUTO, panic_on_nmi, CTLFLAG_RW, * point of view of the userland scheduler unless we actually have to * switch. * - * usertdsw is called from within a critical section. + * usertdsw is called from within a critical section, but the BGL will + * have already been released by lwkt_switch() so only call MP safe functions + * that don't block! */ static void usertdsw(struct thread *ntd) @@ -205,9 +207,8 @@ userenter(void) td->td_switch = usertdsw; } -static int -userret(struct proc *p, struct trapframe *frame, - u_quad_t oticks, int have_mplock) +static void +userret(struct proc *p, struct trapframe *frame, u_quad_t oticks) { int sig, s; struct thread *td = curthread; @@ -217,10 +218,6 @@ userret(struct proc *p, struct trapframe *frame, */ crit_enter(); while ((sig = CURSIG(p)) != 0) { - if (have_mplock == 0) { - get_mplock(); - have_mplock = 1; - } crit_exit(); postsig(sig); crit_enter(); @@ -257,10 +254,6 @@ userret(struct proc *p, struct trapframe *frame, */ if (resched_wanted()) { uio_yield(); - if (have_mplock == 0) { - get_mplock(); - have_mplock = 1; - } while ((sig = CURSIG(p)) != 0) postsig(sig); } @@ -269,10 +262,6 @@ userret(struct proc *p, struct trapframe *frame, * Charge system time if profiling. */ if (p->p_flag & P_PROFIL) { - if (have_mplock == 0) { - get_mplock(); - have_mplock = 1; - } addupc_task(p, frame->tf_eip, (u_int)(curthread->td_sticks - oticks) * psratio); } @@ -290,8 +279,6 @@ userret(struct proc *p, struct trapframe *frame, } splx(s); KKASSERT(mycpu->gd_uprocscheduled == 1); - - return(have_mplock); } #ifdef DEVICE_POLLING @@ -315,11 +302,13 @@ trap(frame) int i = 0, ucode = 0, type, code; vm_offset_t eva; + get_mplock(); + #ifdef DDB if (db_active) { eva = (frame.tf_trapno == T_PAGEFLT ? rcr2() : 0); trap_fatal(&frame, eva); - return; + goto out2; } #endif @@ -342,7 +331,7 @@ trap(frame) */ printf("kernel trap %d with interrupts disabled\n", type); - enable_intr(); + cpu_enable_intr(); } eva = 0; @@ -359,7 +348,7 @@ trap(frame) * correct. */ eva = rcr2(); - enable_intr(); + cpu_enable_intr(); } #ifdef DEVICE_POLLING @@ -377,12 +366,13 @@ restart: if (frame.tf_eflags & PSL_VM && (type == T_PROTFLT || type == T_STKFLT)) { i = vm86_emulate((struct vm86frame *)&frame); - if (i != 0) + if (i != 0) { /* * returns to original process */ vm86_trap((struct vm86frame *)&frame); - return; + } + goto out2; } switch (type) { /* @@ -392,7 +382,7 @@ restart: case T_PROTFLT: case T_SEGNPFLT: trap_fatal(&frame, eva); - return; + goto out2; case T_TRCTRAP: type = T_BPTFLT; /* kernel breakpoint */ /* FALL THROUGH */ @@ -494,7 +484,7 @@ restart: kdb_trap (type, 0, &frame); } #endif /* DDB */ - return; + goto out2; } else if (panic_on_nmi) panic("NMI indicates hardware failure"); break; @@ -525,7 +515,7 @@ restart: i = (*pmath_emulate)(&frame); if (i == 0) { if (!(frame.tf_eflags & PSL_T)) - return; + goto out2; frame.tf_eflags &= ~PSL_T; i = SIGTRAP; } @@ -549,7 +539,7 @@ kernel_trap: switch (type) { case T_PAGEFLT: /* page fault */ (void) trap_pfault(&frame, FALSE, eva); - return; + goto out2; case T_DNA: #if NNPX > 0 @@ -559,7 +549,7 @@ kernel_trap: * registered such use. */ if (npxdna()) - return; + goto out2; #endif break; @@ -579,7 +569,7 @@ kernel_trap: do { \ if (frame.tf_eip == (int)where) { \ frame.tf_eip = (int)whereto; \ - return; \ + goto out2; \ } \ } while (0) @@ -596,7 +586,7 @@ kernel_trap: if (frame.tf_eip == (int)cpu_switch_load_gs) { curthread->td_pcb->pcb_gs = 0; psignal(p, SIGBUS); - return; + goto out2; } MAYBE_DORETI_FAULT(doreti_iret, doreti_iret_fault); @@ -608,7 +598,7 @@ kernel_trap: doreti_popl_fs_fault); if (curthread->td_pcb->pcb_onfault) { frame.tf_eip = (int)curthread->td_pcb->pcb_onfault; - return; + goto out2; } } break; @@ -625,7 +615,7 @@ kernel_trap: */ if (frame.tf_eflags & PSL_NT) { frame.tf_eflags &= ~PSL_NT; - return; + goto out2; } break; @@ -637,7 +627,7 @@ kernel_trap: * silently until the syscall handler has * saved the flags. */ - return; + goto out2; } if (frame.tf_eip == (int)IDTVEC(syscall) + 1) { /* @@ -645,7 +635,7 @@ kernel_trap: * flags. Stop single stepping it. */ frame.tf_eflags &= ~PSL_T; - return; + goto out2; } /* * Ignore debug register trace traps due to @@ -663,7 +653,7 @@ kernel_trap: * processor doesn't */ load_dr6(rdr6() & 0xfffffff0); - return; + goto out2; } /* * Fall through (TRCTRAP kernel mode, kernel address) @@ -675,7 +665,7 @@ kernel_trap: */ #ifdef DDB if (kdb_trap (type, 0, &frame)) - return; + goto out2; #endif break; @@ -695,7 +685,8 @@ kernel_trap: sysbeep(TIMER_FREQ/880, hz); lastalert = time_second; } - return; + /* YYY mp count */ + goto out2; } #else /* !POWERFAIL_NMI */ /* machine/parity/power fail/"kitchen sink" faults */ @@ -710,16 +701,16 @@ kernel_trap: kdb_trap (type, 0, &frame); } #endif /* DDB */ - return; + goto out2; } else if (panic_on_nmi == 0) - return; + goto out2; /* FALL THROUGH */ #endif /* POWERFAIL_NMI */ #endif /* NISA > 0 */ } trap_fatal(&frame, eva); - return; + goto out2; } /* Translate fault for emulators (e.g. Linux) */ @@ -739,7 +730,13 @@ kernel_trap: #endif out: - userret(p, &frame, sticks, 1); +#ifdef SMP + if (ISPL(frame.tf_cs) == SEL_UPL) + KASSERT(curthread->td_mpcount == 1, ("badmpcount trap from %p", (void *)frame.tf_eip)); +#endif + userret(p, &frame, sticks); +out2: + rel_mplock(); } #ifdef notyet @@ -979,7 +976,7 @@ trap_fatal(frame, eva) #ifdef SMP /* three seperate prints in case of a trap on an unmapped page */ printf("mp_lock = %08x; ", mp_lock); - printf("cpuid = %d; ", cpuid); + printf("cpuid = %d; ", mycpu->gd_cpuid); printf("lapic.id = %08x\n", lapic.id); #endif if (type == T_PAGEFLT) { @@ -1089,7 +1086,7 @@ dblfault_handler() #ifdef SMP /* three seperate prints in case of a trap on an unmapped page */ printf("mp_lock = %08x; ", mp_lock); - printf("cpuid = %d; ", cpuid); + printf("cpuid = %d; ", mycpu->gd_cpuid); printf("lapic.id = %08x\n", lapic.id); #endif panic("double fault"); @@ -1164,7 +1161,6 @@ syscall2(frame) int error; int narg; int args[8]; - int have_mplock = 0; u_int code; #ifdef DIAGNOSTIC @@ -1175,10 +1171,14 @@ syscall2(frame) } #endif +#ifdef SMP + KASSERT(curthread->td_mpcount == 0, ("badmpcount syscall from %p", (void *)frame.tf_eip)); + get_mplock(); +#endif /* * access non-atomic field from critical section. p_sticks is * updated by the clock interrupt. Also use this opportunity - * to raise our LWKT priority. + * to lazy-raise our LWKT priority. */ crit_enter(); userenter(); @@ -1194,9 +1194,7 @@ syscall2(frame) /* * The prep code is not MP aware. */ - get_mplock(); (*p->p_sysent->sv_prepsyscall)(&frame, args, &code, ¶ms); - rel_mplock(); } else { /* * Need to check if this is a 32 bit or 64 bit syscall. @@ -1233,8 +1231,6 @@ syscall2(frame) */ if (params && (i = narg * sizeof(int)) && (error = copyin(params, (caddr_t)args, (u_int)i))) { - get_mplock(); - have_mplock = 1; #ifdef KTRACE if (KTRPOINT(td, KTR_SYSCALL)) ktrsyscall(p->p_tracep, code, narg, args); @@ -1242,6 +1238,7 @@ syscall2(frame) goto bad; } +#if 0 /* * Try to run the syscall without the MP lock if the syscall * is MP safe. We have to obtain the MP lock no matter what if @@ -1251,13 +1248,10 @@ syscall2(frame) get_mplock(); have_mplock = 1; } +#endif #ifdef KTRACE if (KTRPOINT(td, KTR_SYSCALL)) { - if (have_mplock == 0) { - get_mplock(); - have_mplock = 1; - } ktrsyscall(p->p_tracep, code, narg, args); } #endif @@ -1311,10 +1305,6 @@ bad: * Traced syscall. trapsignal() is not MP aware. */ if ((orig_tf_eflags & PSL_T) && !(orig_tf_eflags & PSL_VM)) { - if (have_mplock == 0) { - get_mplock(); - have_mplock = 1; - } frame.tf_eflags &= ~PSL_T; trapsignal(p, SIGTRAP, 0); } @@ -1322,14 +1312,10 @@ bad: /* * Handle reschedule and other end-of-syscall issues */ - have_mplock = userret(p, &frame, sticks, have_mplock); + userret(p, &frame, sticks); #ifdef KTRACE if (KTRPOINT(td, KTR_SYSRET)) { - if (have_mplock == 0) { - get_mplock(); - have_mplock = 1; - } ktrsysret(p->p_tracep, code, error, p->p_retval[0]); } #endif @@ -1341,17 +1327,20 @@ bad: */ STOPEVENT(p, S_SCX, code); +#ifdef SMP /* * Release the MP lock if we had to get it */ - if (have_mplock) - rel_mplock(); + KASSERT(curthread->td_mpcount == 1, ("badmpcount syscall from %p", (void *)frame.tf_eip)); + rel_mplock(); +#endif } /* * Simplified back end of syscall(), used when returning from fork() - * directly into user mode. MP lock is held on entry and should be - * held on return. + * directly into user mode. MP lock is held on entry and should be + * released on return. This code will return back into the fork + * trampoline code which then runs doreti. */ void fork_return(p, frame) @@ -1362,9 +1351,14 @@ fork_return(p, frame) frame.tf_eflags &= ~PSL_C; /* success */ frame.tf_edx = 1; - userret(p, &frame, 0, 1); + userret(p, &frame, 0); #ifdef KTRACE if (KTRPOINT(p->p_thread, KTR_SYSRET)) ktrsysret(p->p_tracep, SYS_fork, 0, 0); #endif +#ifdef SMP + KKASSERT(curthread->td_mpcount == 1); + rel_mplock(); +#endif } + diff --git a/sys/platform/pc32/i386/vm86.c b/sys/platform/pc32/i386/vm86.c index 423da40241..0958bfe96d 100644 --- a/sys/platform/pc32/i386/vm86.c +++ b/sys/platform/pc32/i386/vm86.c @@ -24,7 +24,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/i386/vm86.c,v 1.31.2.2 2001/10/05 06:18:55 peter Exp $ - * $DragonFly: src/sys/platform/pc32/i386/vm86.c,v 1.5 2003/06/25 03:55:53 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/i386/vm86.c,v 1.6 2003/07/06 21:23:48 dillon Exp $ */ #include @@ -544,6 +544,9 @@ vm86_prepcall(struct vm86frame vmf) /* * vm86 trap handler; determines whether routine succeeded or not. * Called while in vm86 space, returns to calling process. + * + * A MP lock ref is held on entry from trap() and must be released prior + * to returning to the VM86 call. */ void vm86_trap(struct vm86frame *vmf) @@ -560,6 +563,7 @@ vm86_trap(struct vm86frame *vmf) else vmf->vmf_trapno = vmf->vmf_trapno << 16; + rel_mplock(); vm86_biosret(vmf); } @@ -569,6 +573,8 @@ vm86_intcall(int intnum, struct vm86frame *vmf) if (intnum < 0 || intnum > 0xff) return (EINVAL); + ASSERT_MP_LOCK_HELD(); + vmf->vmf_trapno = intnum; return (vm86_bioscall(vmf)); } @@ -589,6 +595,8 @@ vm86_datacall(intnum, vmf, vmc) u_int page; int i, entry, retval; + ASSERT_MP_LOCK_HELD(); + for (i = 0; i < vmc->npages; i++) { page = vtophys(vmc->pmap[i].kva & PG_FRAME); entry = vmc->pmap[i].pte_num; diff --git a/sys/platform/pc32/i386/vm86bios.s b/sys/platform/pc32/i386/vm86bios.s index 1ac67af98b..ee873253cd 100644 --- a/sys/platform/pc32/i386/vm86bios.s +++ b/sys/platform/pc32/i386/vm86bios.s @@ -24,7 +24,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/i386/vm86bios.s,v 1.15.2.1 2000/05/16 06:58:07 dillon Exp $ - * $DragonFly: src/sys/platform/pc32/i386/vm86bios.s,v 1.8 2003/07/01 20:30:40 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/i386/vm86bios.s,v 1.9 2003/07/06 21:23:48 dillon Exp $ */ #include /* miscellaneous asm macros */ @@ -63,12 +63,6 @@ ENTRY(vm86_bioscall) pushl %edi pushl %gs -#ifdef SMP - pushl %edx - MP_LOCK /* Get global lock */ - popl %edx -#endif - #if NNPX > 0 movl PCPU(curthread),%ecx cmpl %ecx,PCPU(npxthread) /* do we need to save fp? */ diff --git a/sys/platform/pc32/include/apic.h b/sys/platform/pc32/include/apic.h index be8d12825c..9f6e032091 100644 --- a/sys/platform/pc32/include/apic.h +++ b/sys/platform/pc32/include/apic.h @@ -1,6 +1,6 @@ /* - * Copyright (c) 1996, by Peter Wemm and Steve Passe - * All rights reserved. + * Copyright (c) 1996, by Peter Wemm and Steve Passe, All rights reserved. + * Copyright (c) 2003, by Matthew Dillon, All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -23,100 +23,241 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/include/apic.h,v 1.14.2.2 2003/03/21 21:46:15 jhb Exp $ - * $DragonFly: src/sys/platform/pc32/include/Attic/apic.h,v 1.2 2003/06/17 04:28:35 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/include/Attic/apic.h,v 1.3 2003/07/06 21:23:49 dillon Exp $ */ #ifndef _MACHINE_APIC_H_ #define _MACHINE_APIC_H_ /* - * Local && I/O APIC definitions. - */ - -/* - * Pentium P54C+ Build-in APIC - * (Advanced programmable Interrupt Controller) - * - * Base Address of Build-in APIC in memory location - * is 0xfee00000. - * - * Map of APIC REgisters: - * - * Offset (hex) Description Read/Write state - * 000 Reserved - * 010 Reserved - * 020 ID Local APIC ID R/W - * 030 VER Local APIC Version R - * 040 Reserved - * 050 Reserved - * 060 Reserved - * 070 Reserved - * 080 Task Priority Register R/W - * 090 Arbitration Priority Register R - * 0A0 Processor Priority Register R - * 0B0 EOI Register W - * 0C0 RRR Remote read R - * 0D0 Logical Destination R/W - * 0E0 Destination Format Register 0..27 R; 28..31 R/W - * 0F0 SVR Spurious Interrupt Vector Reg. 0..3 R; 4..9 R/W - * 100 ISR 000-031 R - * 110 ISR 032-063 R - * 120 ISR 064-095 R - * 130 ISR 095-128 R - * 140 ISR 128-159 R - * 150 ISR 160-191 R - * 160 ISR 192-223 R - * 170 ISR 224-255 R - * 180 TMR 000-031 R - * 190 TMR 032-063 R - * 1A0 TMR 064-095 R - * 1B0 TMR 095-128 R - * 1C0 TMR 128-159 R - * 1D0 TMR 160-191 R - * 1E0 TMR 192-223 R - * 1F0 TMR 224-255 R - * 200 IRR 000-031 R - * 210 IRR 032-063 R - * 220 IRR 064-095 R - * 230 IRR 095-128 R - * 240 IRR 128-159 R - * 250 IRR 160-191 R - * 260 IRR 192-223 R - * 270 IRR 224-255 R - * 280 Error Status Register R - * 290 Reserved - * 2A0 Reserved - * 2B0 Reserved - * 2C0 Reserved - * 2D0 Reserved - * 2E0 Reserved - * 2F0 Reserved - * 300 ICR_LOW Interrupt Command Reg. (0-31) R/W - * 310 ICR_HI Interrupt Command Reg. (32-63) R/W - * 320 Local Vector Table (Timer) R/W - * 330 Reserved - * 340 Reserved - * 350 LVT1 Local Vector Table (LINT0) R/W - * 360 LVT2 Local Vector Table (LINT1) R/W - * 370 LVT3 Local Vector Table (ERROR) R/W - * 380 Initial Count Reg. for Timer R/W - * 390 Current Count of Timer R - * 3A0 Reserved - * 3B0 Reserved - * 3C0 Reserved - * 3D0 Reserved - * 3E0 Timer Divide Configuration Reg. R/W - * 3F0 Reserved - */ - - -/****************************************************************************** - * global defines, etc. - */ - - -/****************************************************************************** - * LOCAL APIC structure + * Local && I/O APIC definitions for Pentium P54C+ Built-in APIC. + * + * A per-cpu APIC resides in memory location 0xFEE00000. + * + * 31 ... 24 23 ... 16 15 ... 8 7 ... 0 + * +-----------+-----------+-----------+-----------+ + * 0000 | | | | | + * 0010 | | | | | + * +-----------+-----------+-----------+-----------+ + * + * +-----------+-----------+-----------+-----------+ + * 0020 ID | | ID | | | | RW + * +-----------+-----------+-----------+-----------+ + * + * The physical APIC ID is used with physical interrupt + * delivery modes. + * + * +-----------+-----------+-----------+-----------+ + * 0030 VER | | | | | + * +-----------+-----------+-----------+-----------+ + * 0040 | | | | | + * 0050 | | | | | + * 0060 | | | | | + * 0070 | | | | | + * +-----------+-----------+-----------+-----------+ + * 0080 TPR | | | | PRIO SUBC | + * 0090 APR | | | | | + * 00A0 PPR | | | | | + * +-----------+-----------+-----------+-----------+ + * + * The Task Priority Register provides a priority threshold + * mechanism for interrupting the processor. Only interrupts + * with a higher priority then that specified in the TPR will + * be served. Other interrupts are recorded and serviced + * as soon as the TPR value decreases enough to allow that + * (unless EOId by another APIC). + * + * PRIO (7:4). Main priority. If 15 the APIC will not + * accept any interrupts. + * SUBC (3:0) Sub priority. See APR/PPR. + * + * + * The Processor Priority Register determines whether a + * pending interrupt can be dispensed to the processor. ISRV + * Is the vector of the highest priority ISR bit set or + * zero if no ISR bit is set. + * + * IF TPR[7:4] >= ISRV[7:4] + * PPR[7:0] = TPR[7:0] + * ELSE + * PPR[7:0] = ISRV[7:4].000 + * + * The Arbitration Priority Register holds the current + * lowest priority of the procsesor, a value used during + * lowest-priority arbitration. + * + * IF (TPR[7:4] >= IRRV[7:4] AND TPR[7:4] > ISRV[7:4]) + * APR[7:0] = TPR[7:0] + * ELSE + * APR[7:4] = max((TPR[7:4]&ISRV[7:4]),IRRV[7:4]).000 + * + * +-----------+-----------+-----------+-----------+ + * 00B0 EOI | | | | | + * +-----------+-----------+-----------+-----------+ + * 00C0 | | | | | + * +-----------+-----------+-----------+-----------+ + * 00D0 LDR |LOG APICID | | | | + * +-----------+-----------+-----------+-----------+ + * 00E0 DFR |MODEL| | | | | + * +-----------+-----------+-----------+-----------+ + * + * The logical APIC ID is used with logical interrupt + * delivery modes. Interpretation of logical destination + * information depends on the MODEL bits in the Destination + * Format Regiuster. + * + * MODEL=1111 FLAT MODEL - The MDA is interpreted as + * a decoded address. By setting + * one bit in the LDR for each + * local apic 8 APICs can coexist. + * + * MODEL=0000 CLUSTER MODEL - + * + * 31 ... 24 23 ... 16 15 ... 8 7 ... 0 + * +-----------+-----------+-----------+-----------+ + * 00F0 SVR | | | | | + * +-----------+-----------+-----------+-----------+ + * 0100-0170 ISR| | | | | + * 0180-01F0 TMR| | | | | + * 0200-0270 IRR| | | | | + * +-----------+-----------+-----------+-----------+ + * + * These registers represent 256 bits, one bit for each + * possible interrupt. Interrupts 0-15 are reserved so + * bits 0-15 are also reserved. + * + * TMR - Trigger mode register. Upon acceptance of an int + * the corresponding bit is cleared for edge-trig and + * set for level-trig. If the TMR bit is set (level), + * the local APIC sends an EOI to all I/O APICs as + * a result of software issuing an EOI command. + * + * IRR - Interrupt Request Register. Contains active + * interrupt requests that have been accepted but not + * yet dispensed by the current local APIC. The bit is + * cleared and the corresponding ISR bit is set when + * the INTA cycle is issued. + * + * ISR - Interrupt In-Service register. Interrupt has been + * delivered but not yet fully serviced. Cleared when + * an EOI is issued from the processor. An EOI will + * also send an EOI to all I/O APICs if TMR was set. + * + * +-----------+-----------+-----------+-----------+ + * 0280 ESR | | | | | + * 0290-02F0 | | | | | + * +--FEDCBA98-+--76543210-+--FEDCBA98-+-----------+ + * 0300 ICR_LO | | XX | TL SDMMM | vector | + * 0310 ICR_HI | DEST FIELD| | | | + * +-----------+-----------+-----------+-----------+ + * + * The interrupt command register + * + * XX: Destination Shorthand field: + * + * 00 Use Destination field + * 01 Self only. Dest field ignored. + * 10 All including self (uses a + * destination field of 0x0F) + * 11 All excluding self (uses a + * destination field of 0x0F) + * + * T: 1 = Level 0 = Edge Trigger modde, used for + * the INIT level de-assert delivery mode only. + * Not sure. + * + * L: 0 = De-Assert, 1 = Assert. Not sure what this + * is. For INIT mode use 0, for all other modes + * use 1. + * + * S: 1 = Send Pending. Interrupt has been injected + * but APIC has not yet accepted it. + * + * D: 0=physical 1=logical. In physical mode + * only 24-27 of DEST FIELD is used from ICR_HI. + * + * MMM: 000 Fixed. Deliver to all processors according + * to the ICR. Always treated as edge trig. + * + * 001 Lowest Priority. Deliver to just the + * processor running at the lowest priority. + * + * 010 SMI. The vector must be 00B. Only edge + * triggered is allowed. The vector field + * must be programmed to zero (huh?). + * + * 011 + * + * 100 NMI. Deliver as an NMI to all processors + * listed in the destination field. The + * vector is ignored. Alawys treated as + * edge triggered. + * + * 101 INIT. Deliver as an INIT signal to all + * processors (like FIXED). Vector is ignored + * and it is always edge-triggered. + * + * 110 Start Up. Sends a special message between + * cpus. the vector contains a start-up + * address for MP boot protocol. + * Always edge triggered. Note: a startup + * int is not automatically tried in case of + * failure. + * + * 111 + * + * +-----------+--------10-+--FEDCBA98-+-----------+ + * 0320 LTIMER | | TM | ---S---- | vector | + * 0330 | | | | | + * +-----------+--------10-+--FEDCBA98-+-----------+ + * 0340 LVPCINT | | -M | ---S-MMM | vector | + * 0350 LVINT0 | | -M | LRPS-MMM | vector | + * 0360 LVINT1 | | -M | LRPS-MMM | vector | + * 0370 LVERROR | | -M | -------- | vector | + * +-----------+-----------+-----------+-----------+ + * + * T: 1 = periodic, 0 = one-shot + * + * M: 1 = masked + * + * L: 1 = level, 0 = edge + * + * R: For level triggered only, set to 1 when a + * level int is accepted, cleared by EOI. + * + * P: Pin Polarity 0 = Active High, 1 = Active Low + * + * S: 1 = Send Pending. Interrupt has been injected + * but APIC has not yet accepted it. + * + * MMM 000 = Fixed deliver to cpu according to LVT + * + * MMM 100 = NMI deliver as an NMI. Always edge + * + * MMM 111 = ExtInt deliver from 8259, routes INTA + * bus cycle to external + * controller. Controller is + * expected to supply vector. + * Always level. + * + * +-----------+-----------+-----------+-----------+ + * 0380 ICR | | | | | + * 0390 CCR | | | | | + * 03A0 | | | | | + * 03B0 | | | | | + * 03C0 | | | | | + * 03D0 | | | | | + * 03E0 DCR | | | | | + * +-----------+-----------+-----------+-----------+ + * + * + * NOTE ON EOI: Upon receiving an EOI the APIC clears the highest priority + * interrupt in the ISR and selects the next highest priority interrupt + * for posting to the CPU. If the interrupt being EOId was level + * triggered the APIC will send an EOI to all I/O APICs. For the moment + * you can write garbage to the EOI register but for future compatibility + * 0 should be written. + * */ #ifndef LOCORE @@ -128,8 +269,8 @@ struct LAPIC { /* reserved */ PAD4; /* reserved */ PAD4; - u_int32_t id; PAD3; - u_int32_t version; PAD3; + u_int32_t id; PAD3; /* 0020 R/W */ + u_int32_t version; PAD3; /* 0030 RO */ /* reserved */ PAD4; /* reserved */ PAD4; /* reserved */ PAD4; @@ -220,11 +361,6 @@ typedef struct IOAPIC ioapic_t; #define ALLHWI_LEVEL 0x00000000 /* TPR of CPU grabbing INTs */ #endif /** GRAB_LOPRIO */ -/* XXX these 2 don't really belong here... */ -#define COUNT_FIELD 0x00ffffff /* count portion of the lock */ -#define CPU_FIELD 0xff000000 /* cpu portion of the lock */ -#define FREE_LOCK 0xffffffff /* value of lock when free */ - /* * XXX This code assummes that the reserved field of the * local APIC TPR can be written with all 0s. diff --git a/sys/platform/pc32/include/lock.h b/sys/platform/pc32/include/lock.h index 9c599e08de..67ea825e1e 100644 --- a/sys/platform/pc32/include/lock.h +++ b/sys/platform/pc32/include/lock.h @@ -1,6 +1,5 @@ /* - * Copyright (c) 1997, by Steve Passe - * All rights reserved. + * Copyright (c) 2003, Matthew Dillon, All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -23,164 +22,183 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/include/lock.h,v 1.11.2.2 2000/09/30 02:49:34 ps Exp $ - * $DragonFly: src/sys/platform/pc32/include/lock.h,v 1.2 2003/06/17 04:28:35 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/include/lock.h,v 1.3 2003/07/06 21:23:49 dillon Exp $ */ - #ifndef _MACHINE_LOCK_H_ #define _MACHINE_LOCK_H_ +#ifndef _MACHINE_PSL_H_ +#include +#endif -#ifdef LOCORE - +/* + * MP_FREE_LOCK is used by both assembly and C under SMP. + */ #ifdef SMP +#define MP_FREE_LOCK 0xffffffff /* value of lock when free */ +#endif -#define MPLOCKED lock ; +#ifdef LOCORE /* - * Some handy macros to allow logical organization. + * Spinlock assembly support. Note: eax and ecx can be tromped. No + * other register will be. Note that these routines are sometimes + * called with (%edx) as the mem argument. + * + * Under UP the spinlock routines still serve to disable/restore + * interrupts. */ -#define MP_LOCK call _get_mplock -#define MP_TRYLOCK \ - pushl $_mp_lock ; /* GIANT_LOCK */ \ - call _MPtrylock ; /* try to get lock */ \ - add $4, %esp +#ifdef SMP -#define MP_RELLOCK \ - movl $_mp_lock,%edx ; /* GIANT_LOCK */ \ - call _MPrellock_edx +#define SPIN_INIT(mem) \ + movl $0,mem ; \ + +#define SPIN_INIT_NOREG(mem) \ + SPIN_INIT(mem) ; \ + +#define SPIN_LOCK(mem) \ + pushfl ; \ + popl %ecx ; /* flags */ \ + cli ; \ + orl $PSL_C,%ecx ; /* make sure non-zero */ \ +7: ; \ + movl $0,%eax ; /* expected contents of lock */ \ + cmpxchgl %ecx,mem ; /* Z=1 (jz) on success */ \ + jz 8f ; \ + jmp 7b ; \ +8: ; \ + +#define SPIN_LOCK_PUSH_REGS \ + subl $8,%esp ; \ + movl %ecx,(%esp) ; \ + movl %eax,4(%esp) ; \ + +#define SPIN_LOCK_POP_REGS \ + movl (%esp),%ecx ; \ + movl 4(%esp),%eax ; \ + addl $8,%esp ; \ + +#define SPIN_LOCK_FRAME_SIZE 8 + +#define SPIN_LOCK_NOREG(mem) \ + SPIN_LOCK_PUSH_REGS ; \ + SPIN_LOCK(mem) ; \ + SPIN_LOCK_POP_REGS ; \ + +#define SPIN_UNLOCK(mem) \ + pushl mem ; \ + movl $0,mem ; \ + popfl ; \ + +#define SPIN_UNLOCK_PUSH_REGS +#define SPIN_UNLOCK_POP_REGS +#define SPIN_UNLOCK_FRAME_SIZE 0 + +#define SPIN_UNLOCK_NOREG(mem) \ + SPIN_UNLOCK(mem) ; \ -/* - * Protects the IO APIC and apic_imen as a critical region. - */ -#define IMASK_LOCK \ - pushl $_imen_lock ; /* address of lock */ \ - call _s_lock ; /* MP-safe */ \ - addl $4, %esp +#else -#define IMASK_UNLOCK \ - movl $0, _imen_lock +#define SPIN_LOCK(mem) \ + pushfl ; \ + cli ; \ + orl $PSL_C,(%esp) ; \ + popl mem ; \ -#else /* SMP */ +#define SPIN_LOCK_PUSH_RESG +#define SPIN_LOCK_POP_REGS +#define SPIN_LOCK_FRAME_SIZE 0 -#define MPLOCKED /* NOP */ +#define SPIN_UNLOCK(mem) \ + pushl mem ; \ + movl $0,mem ; \ + popfl ; \ -#define MP_LOCK /* NOP */ +#define SPIN_UNLOCK_PUSH_REGS +#define SPIN_UNLOCK_POP_REGS +#define SPIN_UNLOCK_FRAME_SIZE 0 -#endif /* SMP */ +#endif /* SMP */ -#else /* LOCORE */ +#else /* LOCORE */ -#ifdef SMP +/* + * Spinlock functions (UP and SMP). Under UP a spinlock still serves + * to disable/restore interrupts even if it doesn't spin. + */ +struct spinlock { + volatile int opaque; +}; -#include /** xxx_LOCK */ +typedef struct spinlock *spinlock_t; + +void mpintr_lock(void); /* disables int / spinlock combo */ +void mpintr_unlock(void); +void com_lock(void); /* disables int / spinlock combo */ +void com_unlock(void); +void imen_lock(void); /* disables int / spinlock combo */ +void imen_unlock(void); +void clock_lock(void); /* disables int / spinlock combo */ +void clock_unlock(void); +void cons_lock(void); /* disables int / spinlock combo */ +void cons_unlock(void); + +extern struct spinlock smp_rv_spinlock; + +void spin_lock(spinlock_t lock); +void spin_lock_np(spinlock_t lock); +void spin_unlock(spinlock_t lock); +void spin_unlock_np(spinlock_t lock); +#if 0 +void spin_lock_init(spinlock_t lock); +#endif /* - * Locks regions protected in UP kernel via cli/sti. + * Inline version of spinlock routines -- overrides assembly. Only unlock + * and init here please. */ -#ifdef USE_MPINTRLOCK -#define MPINTR_LOCK() s_lock(&mpintr_lock) -#define MPINTR_UNLOCK() s_unlock(&mpintr_lock) -#else -#define MPINTR_LOCK() -#define MPINTR_UNLOCK() -#endif /* USE_MPINTRLOCK */ +static __inline void +spin_lock_init(spinlock_t lock) +{ + lock->opaque = 0; +} /* - * sio/cy lock. - * XXX should rc (RISCom/8) use this? - */ -#ifdef USE_COMLOCK -#define COM_LOCK() s_lock(&com_lock) -#define COM_UNLOCK() s_unlock(&com_lock) -#define COM_DISABLE_INTR() \ - { __asm __volatile("cli" : : : "memory"); COM_LOCK(); } -#define COM_ENABLE_INTR() \ - { COM_UNLOCK(); __asm __volatile("sti"); } -#else -#define COM_LOCK() -#define COM_UNLOCK() -#define COM_DISABLE_INTR() disable_intr() -#define COM_ENABLE_INTR() enable_intr() -#endif /* USE_COMLOCK */ - -/* - * Clock hardware/struct lock. - * XXX pcaudio and friends still need this lock installed. + * MP LOCK functions for SMP and UP. Under UP the MP lock does not exist + * but we leave a few functions intact as macros for convenience. */ -#ifdef USE_CLOCKLOCK -#define CLOCK_LOCK() s_lock(&clock_lock) -#define CLOCK_UNLOCK() s_unlock(&clock_lock) -#define CLOCK_DISABLE_INTR() \ - { __asm __volatile("cli" : : : "memory"); CLOCK_LOCK(); } -#define CLOCK_ENABLE_INTR() \ - { CLOCK_UNLOCK(); __asm __volatile("sti"); } -#else -#define CLOCK_LOCK() -#define CLOCK_UNLOCK() -#define CLOCK_DISABLE_INTR() disable_intr() -#define CLOCK_ENABLE_INTR() enable_intr() -#endif /* USE_CLOCKLOCK */ - -#else /* SMP */ - -#define MPINTR_LOCK() -#define MPINTR_UNLOCK() - -#define COM_LOCK() -#define COM_UNLOCK() -#define CLOCK_LOCK() -#define CLOCK_UNLOCK() +#ifdef SMP -#endif /* SMP */ +void get_mplock(void); +int try_mplock(void); +void rel_mplock(void); +int cpu_try_mplock(void); +#if 0 +void cpu_rel_mplock(void); +#endif +void cpu_get_initial_mplock(void); -/* - * Simple spin lock. - * It is an error to hold one of these locks while a process is sleeping. - */ -struct simplelock { - volatile int lock_data; -}; +extern u_int mp_lock; -/* functions in simplelock.s */ -void s_lock_init __P((struct simplelock *)); -void s_lock __P((struct simplelock *)); -int s_lock_try __P((struct simplelock *)); -void ss_lock __P((struct simplelock *)); -void ss_unlock __P((struct simplelock *)); -void s_lock_np __P((struct simplelock *)); -void s_unlock_np __P((struct simplelock *)); +#define MP_LOCK_HELD() (mp_lock == mycpu->gd_cpuid) +#define ASSERT_MP_LOCK_HELD() KKASSERT(MP_LOCK_HELD()) -/* inline simplelock functions */ static __inline void -s_unlock(struct simplelock *lkp) +cpu_rel_mplock(void) { - lkp->lock_data = 0; + mp_lock = MP_FREE_LOCK; } -/* global data in mp_machdep.c */ -extern struct simplelock imen_lock; -extern struct simplelock cpl_lock; -extern struct simplelock fast_intr_lock; -extern struct simplelock intr_lock; -extern struct simplelock clock_lock; -extern struct simplelock com_lock; -extern struct simplelock mpintr_lock; -extern struct simplelock mcount_lock; - -#if !defined(SIMPLELOCK_DEBUG) && MAXCPU > 1 -/* - * This set of defines turns on the real functions in i386/isa/apic_ipl.s. - */ -#define simple_lock_init(alp) s_lock_init(alp) -#define simple_lock(alp) s_lock(alp) -#define simple_lock_try(alp) s_lock_try(alp) -#define simple_unlock(alp) s_unlock(alp) - -#endif /* !SIMPLELOCK_DEBUG && MAXCPU > 1 */ +#else -#endif /* LOCORE */ +#define get_mplock() +#define try_mplock() 1 +#define rel_mplock() +#define ASSERT_MP_LOCK_HELD() -#endif /* !_MACHINE_LOCK_H_ */ +#endif /* SMP */ +#endif /* LOCORE */ +#endif /* !_MACHINE_LOCK_H_ */ diff --git a/sys/platform/pc32/include/smp.h b/sys/platform/pc32/include/smp.h index 99a253df8a..bad7f94664 100644 --- a/sys/platform/pc32/include/smp.h +++ b/sys/platform/pc32/include/smp.h @@ -7,7 +7,7 @@ * ---------------------------------------------------------------------------- * * $FreeBSD: src/sys/i386/include/smp.h,v 1.50.2.5 2001/02/13 22:32:45 tegge Exp $ - * $DragonFly: src/sys/platform/pc32/include/smp.h,v 1.2 2003/06/17 04:28:36 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/include/smp.h,v 1.3 2003/07/06 21:23:49 dillon Exp $ * */ @@ -47,23 +47,6 @@ extern int bootMP_size; /* functions in mpboot.s */ void bootMP __P((void)); -/* global data in mplock.s */ -extern u_int mp_lock; -extern u_int isr_lock; -#ifdef RECURSIVE_MPINTRLOCK -extern u_int mpintr_lock; -#endif /* RECURSIVE_MPINTRLOCK */ - -/* functions in mplock.s */ -void get_mplock __P((void)); -void rel_mplock __P((void)); -int try_mplock __P((void)); -#ifdef RECURSIVE_MPINTRLOCK -void get_mpintrlock __P((void)); -void rel_mpintrlock __P((void)); -int try_mpintrlock __P((void)); -#endif /* RECURSIVE_MPINTRLOCK */ - /* global data in apic_vector.s */ extern volatile u_int stopped_cpus; extern volatile u_int started_cpus; @@ -80,7 +63,6 @@ void io_apic_write __P((int, int, u_int)); /* global data in mp_machdep.c */ extern int bsp_apic_ready; -extern int mp_ncpus; extern int mp_naps; extern int mp_nbusses; extern int mp_napics; @@ -126,8 +108,8 @@ void assign_apic_irq __P((int apic, int intpin, int irq)); void revoke_apic_irq __P((int irq)); void bsp_apic_configure __P((void)); void init_secondary __P((void)); -void smp_invltlb __P((void)); int stop_cpus __P((u_int)); +void ap_init __P((void)); int restart_cpus __P((u_int)); #ifdef BETTER_CLOCK void forward_statclock __P((int pscnt)); @@ -178,20 +160,6 @@ extern volatile int smp_idle_loops; #endif /* !LOCORE */ #else /* !SMP && !APIC_IO */ -/* - * Create dummy MP lock empties - */ - -static __inline void -get_mplock(void) -{ -} - -static __inline void -rel_mplock(void) -{ -} - #endif #endif /* _KERNEL */ diff --git a/sys/platform/pc32/include/smptests.h b/sys/platform/pc32/include/smptests.h index ab60e49d9b..add6132e31 100644 --- a/sys/platform/pc32/include/smptests.h +++ b/sys/platform/pc32/include/smptests.h @@ -23,7 +23,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/include/smptests.h,v 1.33.2.1 2000/05/16 06:58:10 dillon Exp $ - * $DragonFly: src/sys/platform/pc32/include/Attic/smptests.h,v 1.2 2003/06/17 04:28:36 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/include/Attic/smptests.h,v 1.3 2003/07/06 21:23:49 dillon Exp $ */ #ifndef _MACHINE_SMPTESTS_H_ @@ -64,16 +64,6 @@ #define PUSHDOWN_LEVEL_3_NOT #define PUSHDOWN_LEVEL_4_NOT -/* - * Debug version of simple_lock. This will store the CPU id of the - * holding CPU along with the lock. When a CPU fails to get the lock - * it compares its own id to the holder id. If they are the same it - * panic()s, as simple locks are binary, and this would cause a deadlock. - * - */ -#define SL_DEBUG - - /* * Put FAST_INTR() ISRs at an APIC priority above the regular INTs. * Allow the mp_lock() routines to handle FAST interrupts while spinning. @@ -191,21 +181,6 @@ #define GIANT_LOCK #ifdef APIC_IO -/* - * Enable extra counters for some selected locations in the interrupt handlers. - * Look in apic_vector.s, apic_ipl.s and ipl.s for APIC_ITRACE or - * APIC_INTR_DIAGNOSTIC. - */ -#undef APIC_INTR_DIAGNOSTIC - -/* - * Add extra tracking of a specific interrupt. Look in apic_vector.s, - * apic_ipl.s and ipl.s for APIC_ITRACE and log_intr_event. - * APIC_INTR_DIAGNOSTIC must be defined for this to work. - */ -#ifdef APIC_INTR_DIAGNOSTIC -#define APIC_INTR_DIAGNOSTIC_IRQ 17 -#endif /* * Don't assume that slow interrupt handler X is called from vector diff --git a/sys/platform/pc32/isa/apic_ipl.s b/sys/platform/pc32/isa/apic_ipl.s index 57bf9a4f18..e0e633f81b 100644 --- a/sys/platform/pc32/isa/apic_ipl.s +++ b/sys/platform/pc32/isa/apic_ipl.s @@ -1,6 +1,6 @@ /*- - * Copyright (c) 1997, by Steve Passe - * All rights reserved. + * Copyright (c) 1997, by Steve Passe, All rights reserved. + * Copyright (c) 2003, by Matthew Dillon, All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -23,412 +23,72 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/isa/apic_ipl.s,v 1.27.2.2 2000/09/30 02:49:35 ps Exp $ - * $DragonFly: src/sys/platform/pc32/isa/Attic/apic_ipl.s,v 1.6 2003/07/01 20:31:38 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/isa/Attic/apic_ipl.s,v 1.7 2003/07/06 21:23:49 dillon Exp $ */ -#if 0 - .data ALIGN_DATA -/* - * Routines used by splz_unpend to build an interrupt frame from a - * trap frame. The _vec[] routines build the proper frame on the stack, - * then call one of _Xintr0 thru _XintrNN. - * - * used by: - * i386/isa/apic_ipl.s (this file): splz_unpend JUMPs to HWIs. - * i386/isa/clock.c: setup _vec[clock] to point at _vec8254. - */ - .globl _vec -_vec: - .long vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7 - .long vec8, vec9, vec10, vec11, vec12, vec13, vec14, vec15 - .long vec16, vec17, vec18, vec19, vec20, vec21, vec22, vec23 + /* + * Interrupt mask for APIC interrupts, defaults to all hardware + * interrupts turned off. + */ -/* - * Note: - * This is the UP equivilant of _imen. - * It is OPAQUE, and must NOT be accessed directly. - * It MUST be accessed along with the IO APIC as a 'critical region'. - * Accessed by: - * INTREN() - * INTRDIS() - * MAYBE_MASK_IRQ - * MAYBE_UNMASK_IRQ - * imen_dump() - */ .p2align 2 /* MUST be 32bit aligned */ - .globl _apic_imen -_apic_imen: - .long HWI_MASK + .globl apic_imen +apic_imen: + .long HWI_MASK -/* - * - */ .text SUPERALIGN_TEXT -/* - * splz() - dispatch pending interrupts after cpl reduced - * - * Interrupt priority mechanism - * -- soft splXX masks with group mechanism (cpl) - * -- h/w masks for currently active or unused interrupts (imen) - * -- ipending = active interrupts currently masked by cpl - */ - -ENTRY(splz) - /* - * The caller has restored cpl and checked that (ipending & ~cpl) - * is nonzero. However, since ipending can change at any time - * (by an interrupt or, with SMP, by another cpu), we have to - * repeat the check. At the moment we must own the MP lock in - * the SMP case because the interruput handlers require it. We - * loop until no unmasked pending interrupts remain. - * - * No new unmaksed pending interrupts will be added during the - * loop because, being unmasked, the interrupt code will be able - * to execute the interrupts. - * - * Interrupts come in two flavors: Hardware interrupts and software - * interrupts. We have to detect the type of interrupt (based on the - * position of the interrupt bit) and call the appropriate dispatch - * routine. - * - * NOTE: "bsfl %ecx,%ecx" is undefined when %ecx is 0 so we can't - * rely on the secondary btrl tests. - */ - pushl %ebx - movl _curthread,%ebx - movl TD_CPL(%ebx),%eax -splz_next: - /* - * We don't need any locking here. (ipending & ~cpl) cannot grow - * while we're looking at it - any interrupt will shrink it to 0. - */ - movl $0,_reqpri - movl %eax,%ecx - notl %ecx /* set bit = unmasked level */ - andl _ipending,%ecx /* set bit = unmasked pending INT */ - jne splz_unpend - popl %ebx - ret - - ALIGN_TEXT -splz_unpend: - bsfl %ecx,%ecx - lock - btrl %ecx,_ipending - jnc splz_next - cmpl $NHWI,%ecx - jae splz_swi /* - * We would prefer to call the intr handler directly here but that - * doesn't work for badly behaved handlers that want the interrupt - * frame. Also, there's a problem determining the unit number. - * We should change the interface so that the unit number is not - * determined at config time. - * - * The vec[] routines build the proper frame on the stack so - * the interrupt will eventually return to the caller or splz, - * then calls one of _Xintr0 thru _XintrNN. + * Functions to enable and disable a hardware interrupt. Generally + * called with only one bit set in the mask but can handle multiple + * bits to present the same API as the ICU. */ - popl %ebx - jmp *_vec(,%ecx,4) - - ALIGN_TEXT -splz_swi: - pushl %eax /* save cpl across call */ - orl imasks(,%ecx,4),%eax - movl %eax,TD_CPL(%ebx) /* set cpl for SWI */ - call *_ihandlers(,%ecx,4) - popl %eax - movl %eax,TD_CPL(%ebx) /* restore cpl and loop */ - jmp splz_next - -/* - * Fake clock interrupt(s) so that they appear to come from our caller instead - * of from here, so that system profiling works. - * XXX do this more generally (for all vectors; look up the C entry point). - * XXX frame bogusness stops us from just jumping to the C entry point. - * We have to clear iactive since this is an unpend call, and it will be - * set from the time of the original INT. - */ - -/* - * The 'generic' vector stubs. - */ - -#define BUILD_VEC(irq_num) \ - ALIGN_TEXT ; \ -__CONCAT(vec,irq_num): ; \ - popl %eax ; \ - pushfl ; \ - pushl $KCSEL ; \ - pushl %eax ; \ - cli ; \ - lock ; /* MP-safe */ \ - andl $~IRQ_BIT(irq_num), iactive ; /* lazy masking */ \ - MEXITCOUNT ; \ - APIC_ITRACE(apic_itrace_splz, irq_num, APIC_ITRACE_SPLZ) ; \ - jmp __CONCAT(_Xintr,irq_num) - - BUILD_VEC(0) - BUILD_VEC(1) - BUILD_VEC(2) - BUILD_VEC(3) - BUILD_VEC(4) - BUILD_VEC(5) - BUILD_VEC(6) - BUILD_VEC(7) - BUILD_VEC(8) - BUILD_VEC(9) - BUILD_VEC(10) - BUILD_VEC(11) - BUILD_VEC(12) - BUILD_VEC(13) - BUILD_VEC(14) - BUILD_VEC(15) - BUILD_VEC(16) /* 8 additional INTs in IO APIC */ - BUILD_VEC(17) - BUILD_VEC(18) - BUILD_VEC(19) - BUILD_VEC(20) - BUILD_VEC(21) - BUILD_VEC(22) - BUILD_VEC(23) - - -/****************************************************************************** - * XXX FIXME: figure out where these belong. - */ - -/* this nonsense is to verify that masks ALWAYS have 1 and only 1 bit set */ -#define QUALIFY_MASKS_NOT - -#ifdef QUALIFY_MASKS -#define QUALIFY_MASK \ - btrl %ecx, %eax ; \ - andl %eax, %eax ; \ - jz 1f ; \ - pushl $bad_mask ; \ - call _panic ; \ -1: - -bad_mask: .asciz "bad mask" -#else -#define QUALIFY_MASK -#endif - -/* - * (soon to be) MP-safe function to clear ONE INT mask bit. - * The passed arg is a 32bit u_int MASK. - * It sets the associated bit in _apic_imen. - * It sets the mask bit of the associated IO APIC register. - */ -ENTRY(INTREN) - pushfl /* save state of EI flag */ - cli /* prevent recursion */ +ENTRY(INTRDIS) IMASK_LOCK /* enter critical reg */ - - movl 8(%esp), %eax /* mask into %eax */ - bsfl %eax, %ecx /* get pin index */ - btrl %ecx, apic_imen /* update apic_imen */ - - QUALIFY_MASK - + movl 4(%esp),%eax +1: + bsfl %eax,%ecx + jz 2f + btrl %ecx,%eax + btsl %ecx, apic_imen shll $4, %ecx movl CNAME(int_to_apicintpin) + 8(%ecx), %edx movl CNAME(int_to_apicintpin) + 12(%ecx), %ecx testl %edx, %edx - jz 1f - - movl %ecx, (%edx) /* write the target register index */ - movl 16(%edx), %eax /* read the target register data */ - andl $~IOART_INTMASK, %eax /* clear mask bit */ - movl %eax, 16(%edx) /* write the APIC register data */ -1: + jz 2f + movl %ecx, (%edx) /* target register index */ + orl $IOART_INTMASK,16(%edx) /* set intmask in target apic reg */ + jmp 1b +2: IMASK_UNLOCK /* exit critical reg */ - popfl /* restore old state of EI flag */ ret -/* - * (soon to be) MP-safe function to set ONE INT mask bit. - * The passed arg is a 32bit u_int MASK. - * It clears the associated bit in apic_imen. - * It clears the mask bit of the associated IO APIC register. - */ -ENTRY(INTRDIS) - pushfl /* save state of EI flag */ - cli /* prevent recursion */ +ENTRY(INTREN) IMASK_LOCK /* enter critical reg */ - - movl 8(%esp), %eax /* mask into %eax */ + movl 4(%esp), %eax /* mask into %eax */ +1: bsfl %eax, %ecx /* get pin index */ - btsl %ecx, apic_imen /* update _apic_imen */ - - QUALIFY_MASK - + jz 2f + btrl %ecx,%eax + btrl %ecx, apic_imen /* update apic_imen */ shll $4, %ecx movl CNAME(int_to_apicintpin) + 8(%ecx), %edx movl CNAME(int_to_apicintpin) + 12(%ecx), %ecx testl %edx, %edx - jz 1f - + jz 2f movl %ecx, (%edx) /* write the target register index */ - movl 16(%edx), %eax /* read the target register data */ - orl $IOART_INTMASK, %eax /* set mask bit */ - movl %eax, 16(%edx) /* write the APIC register data */ -1: + andl $~IOART_INTMASK, 16(%edx) /* clear mask bit */ + jmp 1b +2: IMASK_UNLOCK /* exit critical reg */ - popfl /* restore old state of EI flag */ - ret - - -/****************************************************************************** - * - */ - - -/* - * void write_ioapic_mask(int apic, u_int mask); - */ - -#define _INT_MASK 0x00010000 -#define _PIN_MASK 0x00ffffff - -#define _OLD_ESI 0(%esp) -#define _OLD_EBX 4(%esp) -#define _RETADDR 8(%esp) -#define _APIC 12(%esp) -#define _MASK 16(%esp) - - ALIGN_TEXT -write_ioapic_mask: - pushl %ebx /* scratch */ - pushl %esi /* scratch */ - - movl apic_imen, %ebx - xorl _MASK, %ebx /* %ebx = _apic_imen ^ mask */ - andl $_PIN_MASK, %ebx /* %ebx = _apic_imen & 0x00ffffff */ - jz all_done /* no change, return */ - - movl _APIC, %esi /* APIC # */ - movl ioapic, %ecx - movl (%ecx,%esi,4), %esi /* %esi holds APIC base address */ - -next_loop: /* %ebx = diffs, %esi = APIC base */ - bsfl %ebx, %ecx /* %ecx = index if 1st/next set bit */ - jz all_done - - btrl %ecx, %ebx /* clear this bit in diffs */ - leal 16(,%ecx,2), %edx /* calculate register index */ - - movl %edx, (%esi) /* write the target register index */ - movl 16(%esi), %eax /* read the target register data */ - - btl %ecx, _MASK /* test for mask or unmask */ - jnc clear /* bit is clear */ - orl $_INT_MASK, %eax /* set mask bit */ - jmp write -clear: andl $~_INT_MASK, %eax /* clear mask bit */ - -write: movl %eax, 16(%esi) /* write the APIC register data */ - - jmp next_loop /* try another pass */ - -all_done: - popl %esi - popl %ebx - ret - -#undef _OLD_ESI -#undef _OLD_EBX -#undef _RETADDR -#undef _APIC -#undef _MASK - -#undef _PIN_MASK -#undef _INT_MASK - -#ifdef oldcode - -_INTREN: - movl apic_imen, %eax - notl %eax /* mask = ~mask */ - andl apic_imen, %eax /* %eax = _apic_imen & ~mask */ - - pushl %eax /* new (future) _apic_imen value */ - pushl $0 /* APIC# arg */ - call write_ioapic_mask /* modify the APIC registers */ - - addl $4, %esp /* remove APIC# arg from stack */ - popl apic_imen /* _apic_imen |= mask */ - ret - -_INTRDIS: - movl _apic_imen, %eax - orl 4(%esp), %eax /* %eax = _apic_imen | mask */ - - pushl %eax /* new (future) _apic_imen value */ - pushl $0 /* APIC# arg */ - call write_ioapic_mask /* modify the APIC registers */ - - addl $4, %esp /* remove APIC# arg from stack */ - popl apic_imen /* _apic_imen |= mask */ - ret - -#endif /* oldcode */ - - -#ifdef ready - -/* - * u_int read_io_apic_mask(int apic); - */ - ALIGN_TEXT -read_io_apic_mask: ret -/* - * Set INT mask bit for each bit set in 'mask'. - * Ignore INT mask bit for all others. - * - * void set_io_apic_mask(apic, u_int32_t bits); - */ - ALIGN_TEXT -set_io_apic_mask: - ret - -/* - * void set_ioapic_maskbit(int apic, int bit); - */ - ALIGN_TEXT -set_ioapic_maskbit: - ret - -/* - * Clear INT mask bit for each bit set in 'mask'. - * Ignore INT mask bit for all others. - * - * void clr_io_apic_mask(int apic, u_int32_t bits); - */ - ALIGN_TEXT -clr_io_apic_mask: - ret - -/* - * void clr_ioapic_maskbit(int apic, int bit); - */ - ALIGN_TEXT -clr_ioapic_maskbit: - ret - -#endif /** ready */ - /****************************************************************************** * */ @@ -465,4 +125,3 @@ ENTRY(apic_eoi) movl $0, lapic+0xb0 ret -#endif diff --git a/sys/platform/pc32/isa/apic_vector.s b/sys/platform/pc32/isa/apic_vector.s index f4a7869ce6..2c7e7f73aa 100644 --- a/sys/platform/pc32/isa/apic_vector.s +++ b/sys/platform/pc32/isa/apic_vector.s @@ -1,62 +1,23 @@ /* * from: vector.s, 386BSD 0.1 unknown origin * $FreeBSD: src/sys/i386/isa/apic_vector.s,v 1.47.2.5 2001/09/01 22:33:38 tegge Exp $ - * $DragonFly: src/sys/platform/pc32/isa/Attic/apic_vector.s,v 1.7 2003/07/01 20:31:38 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/isa/Attic/apic_vector.s,v 1.8 2003/07/06 21:23:49 dillon Exp $ */ #include #include - #include "i386/isa/intr_machdep.h" /* convert an absolute IRQ# into a bitmask */ -#define IRQ_BIT(irq_num) (1 << (irq_num)) +#define IRQ_LBIT(irq_num) (1 << (irq_num)) /* make an index into the IO APIC from the IRQ# */ #define REDTBL_IDX(irq_num) (0x10 + ((irq_num) * 2)) - -/* - * Macros for interrupt interrupt entry, call to handler, and exit. - */ - -#define FAST_INTR(irq_num, vec_name) \ - .text ; \ - SUPERALIGN_TEXT ; \ -IDTVEC(vec_name) ; \ - pushl %eax ; /* save only call-used registers */ \ - pushl %ecx ; \ - pushl %edx ; \ - pushl %ds ; \ - pushl %es ; \ - pushl %fs ; \ - movl $KDSEL,%eax ; \ - mov %ax,%ds ; \ - movl %ax,%es ; \ - movl $KPSEL,%eax ; \ - mov %ax,%fs ; \ - FAKE_MCOUNT(6*4(%esp)) ; \ - pushl intr_unit + (irq_num) * 4 ; \ - call *intr_handler + (irq_num) * 4 ; /* do the work ASAP */ \ - addl $4, %esp ; \ - movl $0, lapic_eoi ; \ - lock ; \ - incl cnt+V_INTR ; /* book-keeping can wait */ \ - movl intr_countp + (irq_num) * 4, %eax ; \ - lock ; \ - incl (%eax) ; \ - MEXITCOUNT ; \ - popl %fs ; \ - popl %es ; \ - popl %ds ; \ - popl %edx ; \ - popl %ecx ; \ - popl %eax ; \ - iret - /* - * + * Push an interrupt frame in a format acceptable to doreti, reload + * the segment registers for the kernel. */ #define PUSH_FRAME \ pushl $0 ; /* dummy error code */ \ @@ -64,23 +25,54 @@ IDTVEC(vec_name) ; \ pushal ; \ pushl %ds ; /* save data and extra segments ... */ \ pushl %es ; \ - pushl %fs + pushl %fs ; \ + mov $KDSEL,%ax ; \ + mov %ax,%ds ; \ + mov %ax,%es ; \ + mov $KPSEL,%ax ; \ + mov %ax,%fs ; \ +#define PUSH_DUMMY \ + pushfl ; /* phys int frame / flags */ \ + pushl %cs ; /* phys int frame / cs */ \ + pushl 12(%esp) ; /* original caller eip */ \ + pushl $0 ; /* dummy error code */ \ + pushl $0 ; /* dummy trap type */ \ + subl $11*4,%esp ; /* pushal + 3 seg regs (dummy) */ \ + +/* + * Warning: POP_FRAME can only be used if there is no chance of a + * segment register being changed (e.g. by procfs), which is why syscalls + * have to use doreti. + */ #define POP_FRAME \ popl %fs ; \ popl %es ; \ popl %ds ; \ popal ; \ - addl $4+4,%esp + addl $2*4,%esp ; /* dummy trap & error codes */ \ + +#define POP_DUMMY \ + addl $16*4,%esp ; \ #define IOAPICADDR(irq_num) CNAME(int_to_apicintpin) + 16 * (irq_num) + 8 #define REDIRIDX(irq_num) CNAME(int_to_apicintpin) + 16 * (irq_num) + 12 + +/* + * Interrupts are expected to already be disabled when using these + * IMASK_*() macros. + */ +#define IMASK_LOCK \ + SPIN_LOCK(imen_spinlock) ; \ + +#define IMASK_UNLOCK \ + SPIN_UNLOCK(imen_spinlock) ; \ #define MASK_IRQ(irq_num) \ IMASK_LOCK ; /* into critical reg */ \ - testl $IRQ_BIT(irq_num), apic_imen ; \ + testl $IRQ_LBIT(irq_num), apic_imen ; \ jne 7f ; /* masked, don't mask */ \ - orl $IRQ_BIT(irq_num), apic_imen ; /* set the mask bit */ \ + orl $IRQ_LBIT(irq_num), apic_imen ; /* set the mask bit */ \ movl IOAPICADDR(irq_num), %ecx ; /* ioapic addr */ \ movl REDIRIDX(irq_num), %eax ; /* get the index */ \ movl %eax, (%ecx) ; /* write the index */ \ @@ -88,17 +80,18 @@ IDTVEC(vec_name) ; \ orl $IOART_INTMASK, %eax ; /* set the mask */ \ movl %eax, IOAPIC_WINDOW(%ecx) ; /* new value */ \ 7: ; /* already masked */ \ - IMASK_UNLOCK + IMASK_UNLOCK ; \ + /* * Test to see whether we are handling an edge or level triggered INT. * Level-triggered INTs must still be masked as we don't clear the source, * and the EOI cycle would cause redundant INTs to occur. */ #define MASK_LEVEL_IRQ(irq_num) \ - testl $IRQ_BIT(irq_num), apic_pin_trigger ; \ + testl $IRQ_LBIT(irq_num), apic_pin_trigger ; \ jz 9f ; /* edge, don't mask */ \ MASK_IRQ(irq_num) ; \ -9: +9: ; \ #ifdef APIC_INTR_REORDER @@ -108,27 +101,26 @@ IDTVEC(vec_name) ; \ testl apic_isrbit_location + 4 + 8 * (irq_num), %eax ; \ jz 9f ; /* not active */ \ movl $0, lapic_eoi ; \ - APIC_ITRACE(apic_itrace_eoi, irq_num, APIC_ITRACE_EOI) ; \ -9: +9: \ #else + #define EOI_IRQ(irq_num) \ - testl $IRQ_BIT(irq_num), lapic_isr1; \ + testl $IRQ_LBIT(irq_num), lapic_isr1; \ jz 9f ; /* not active */ \ movl $0, lapic_eoi; \ - APIC_ITRACE(apic_itrace_eoi, irq_num, APIC_ITRACE_EOI) ; \ -9: +9: \ + #endif - /* * Test to see if the source is currntly masked, clear if so. */ #define UNMASK_IRQ(irq_num) \ IMASK_LOCK ; /* into critical reg */ \ - testl $IRQ_BIT(irq_num), apic_imen ; \ + testl $IRQ_LBIT(irq_num), apic_imen ; \ je 7f ; /* bit clear, not masked */ \ - andl $~IRQ_BIT(irq_num), apic_imen ;/* clear mask bit */ \ + andl $~IRQ_LBIT(irq_num), apic_imen ;/* clear mask bit */ \ movl IOAPICADDR(irq_num),%ecx ; /* ioapic addr */ \ movl REDIRIDX(irq_num), %eax ; /* get the index */ \ movl %eax,(%ecx) ; /* write the index */ \ @@ -136,174 +128,189 @@ IDTVEC(vec_name) ; \ andl $~IOART_INTMASK,%eax ; /* clear the mask */ \ movl %eax,IOAPIC_WINDOW(%ecx) ; /* new value */ \ 7: ; \ - IMASK_UNLOCK - -#ifdef APIC_INTR_DIAGNOSTIC -#ifdef APIC_INTR_DIAGNOSTIC_IRQ -log_intr_event: - pushf - cli - pushl $CNAME(apic_itrace_debuglock) - call CNAME(s_lock_np) - addl $4, %esp - movl CNAME(apic_itrace_debugbuffer_idx), %ecx - andl $32767, %ecx - movl PCPU(cpuid), %eax - shll $8, %eax - orl 8(%esp), %eax - movw %ax, CNAME(apic_itrace_debugbuffer)(,%ecx,2) - incl %ecx - andl $32767, %ecx - movl %ecx, CNAME(apic_itrace_debugbuffer_idx) - pushl $CNAME(apic_itrace_debuglock) - call CNAME(s_unlock_np) - addl $4, %esp - popf - ret - + IMASK_UNLOCK ; \ -#define APIC_ITRACE(name, irq_num, id) \ - lock ; /* MP-safe */ \ - incl CNAME(name) + (irq_num) * 4 ; \ +/* + * Fast interrupt call handlers run in the following sequence: + * + * - Push the trap frame required by doreti + * - Mask the interrupt and reenable its source + * - If we cannot take the interrupt set its fpending bit and + * doreti. + * - If we can take the interrupt clear its fpending bit, + * call the handler, then unmask and doreti. + * + * YYY can cache gd base opitner instead of using hidden %fs prefixes. + */ + +#define FAST_INTR(irq_num, vec_name) \ + .text ; \ + SUPERALIGN_TEXT ; \ +IDTVEC(vec_name) ; \ + PUSH_FRAME ; \ + FAKE_MCOUNT(13*4(%esp)) ; \ + MASK_LEVEL_IRQ(irq_num) ; \ + EOI_IRQ(irq_num) ; \ + incl PCPU(intr_nesting_level) ; \ + movl PCPU(curthread),%ebx ; \ + movl TD_CPL(%ebx),%eax ; \ pushl %eax ; \ - pushl %ecx ; \ - pushl %edx ; \ - movl $(irq_num), %eax ; \ - cmpl $APIC_INTR_DIAGNOSTIC_IRQ, %eax ; \ - jne 7f ; \ - pushl $id ; \ - call log_intr_event ; \ + cmpl $TDPRI_CRIT,TD_PRI(%ebx) ; \ + jge 1f ; \ + testl $IRQ_LBIT(irq_num), %eax ; \ + jz 2f ; \ +1: ; \ + /* set the pending bit and return, leave interrupt masked */ \ + orl $IRQ_LBIT(irq_num),PCPU(fpending) ; \ + movl $TDPRI_CRIT, PCPU(reqpri) ; \ + jmp 5f ; \ +2: ; \ + /* clear pending bit, run handler */ \ + addl $TDPRI_CRIT,TD_PRI(%ebx) ; \ + andl $~IRQ_LBIT(irq_num),PCPU(fpending) ; \ + pushl intr_unit + (irq_num) * 4 ; \ + call *intr_handler + (irq_num) * 4 ; /* do the work ASAP */ \ addl $4, %esp ; \ -7: ; \ - popl %edx ; \ - popl %ecx ; \ - popl %eax -#else -#define APIC_ITRACE(name, irq_num, id) \ - lock ; /* MP-safe */ \ - incl CNAME(name) + (irq_num) * 4 -#endif + subl $TDPRI_CRIT,TD_PRI(%ebx) ; \ + incl PCPU(cnt)+V_INTR ; /* book-keeping make per cpu YYY */ \ + movl intr_countp + (irq_num) * 4, %eax ; \ + incl (%eax) ; \ + UNMASK_IRQ(irq_num) ; \ +5: ; \ + MEXITCOUNT ; \ + jmp doreti ; \ -#define APIC_ITRACE_ENTER 1 -#define APIC_ITRACE_EOI 2 -#define APIC_ITRACE_TRYISRLOCK 3 -#define APIC_ITRACE_GOTISRLOCK 4 -#define APIC_ITRACE_ENTER2 5 -#define APIC_ITRACE_LEAVE 6 -#define APIC_ITRACE_UNMASK 7 -#define APIC_ITRACE_ACTIVE 8 -#define APIC_ITRACE_MASKED 9 -#define APIC_ITRACE_NOISRLOCK 10 -#define APIC_ITRACE_MASKED2 11 -#define APIC_ITRACE_SPLZ 12 -#define APIC_ITRACE_DORETI 13 - -#else -#define APIC_ITRACE(name, irq_num, id) -#endif - -#define INTR(irq_num, vec_name, maybe_extra_ipending) \ +/* + * Restart fast interrupt held up by critical section or cpl. + * + * - Push a dummy trape frame as required by doreti + * - The interrupt source is already masked + * - Clear the fpending bit + * - Run the handler + * - Unmask the interrupt + * - Pop the dummy frame and do a normal return + * + * YYY can cache gd base pointer instead of using hidden %fs + * prefixes. + */ + +#define FAST_UNPEND(irq_num, vec_name) \ + .text ; \ + SUPERALIGN_TEXT ; \ +IDTVEC(vec_name) ; \ + pushl %ebp ; \ + movl %esp,%ebp ; \ + PUSH_DUMMY ; \ + pushl intr_unit + (irq_num) * 4 ; \ + call *intr_handler + (irq_num) * 4 ; /* do the work ASAP */ \ + addl $4, %esp ; \ + incl PCPU(cnt)+V_INTR ; /* book-keeping make per cpu YYY */ \ + movl intr_countp + (irq_num) * 4, %eax ; \ + incl (%eax) ; \ + UNMASK_IRQ(irq_num) ; \ + POP_DUMMY ; \ + popl %ebp ; \ + ret ; \ + +/* + * Slow interrupt call handlers run in the following sequence: + * + * - Push the trap frame required by doreti. + * - Mask the interrupt and reenable its source. + * - If we cannot take the interrupt set its ipending bit and + * doreti. In addition to checking for a critical section + * and cpl mask we also check to see if the thread is still + * running. + * - If we can take the interrupt clear its ipending bit, + * set its irunning bit, and schedule the thread. Leave + * interrupts masked and doreti. + * + * the interrupt thread will run its handlers and loop if + * ipending is found to be set. ipending/irunning interlock + * the interrupt thread with the interrupt. The handler calls + * UNPEND when it is through. + * + * Note that we do not enable interrupts when calling sched_ithd. + * YYY sched_ithd may preempt us synchronously (fix interrupt stacking) + * + * YYY can cache gd base pointer instead of using hidden %fs + * prefixes. + */ + +#define INTR(irq_num, vec_name, maybe_extra_ipending) \ .text ; \ SUPERALIGN_TEXT ; \ -/* XintrNN: entry point used by IDT/HWIs & splz_unpend via _vec[]. */ \ IDTVEC(vec_name) ; \ PUSH_FRAME ; \ - movl $KDSEL, %eax ; /* reload with kernel's data segment */ \ - mov %ax, %ds ; \ - mov %ax, %es ; \ - movl $KPSEL, %eax ; \ - mov %ax, %fs ; \ -; \ maybe_extra_ipending ; \ -; \ - APIC_ITRACE(apic_itrace_enter, irq_num, APIC_ITRACE_ENTER) ; \ - lock ; /* MP-safe */ \ - btsl $(irq_num), iactive ; /* lazy masking */ \ - jc 1f ; /* already active */ \ ; \ MASK_LEVEL_IRQ(irq_num) ; \ EOI_IRQ(irq_num) ; \ -0: ; \ - APIC_ITRACE(apic_itrace_tryisrlock, irq_num, APIC_ITRACE_TRYISRLOCK) ;\ - MP_TRYLOCK ; /* XXX this is going away... */ \ - testl %eax, %eax ; /* did we get it? */ \ - jz 3f ; /* no */ \ -; \ - APIC_ITRACE(apic_itrace_gotisrlock, irq_num, APIC_ITRACE_GOTISRLOCK) ;\ + incl PCPU(intr_nesting_level) ; \ movl PCPU(curthread),%ebx ; \ - testl $IRQ_BIT(irq_num), TD_MACH+MTD_CPL(%eax) ; \ - jne 2f ; /* this INT masked */ \ + movl TD_CPL(%ebx),%eax ; \ + pushl %eax ; /* cpl do restore */ \ cmpl $TDPRI_CRIT,TD_PRI(%ebx) ; \ - jge 2f ; /* in critical sec */ \ -; \ - incb PCPU(intr_nesting_level) ; \ -; \ - /* entry point used by doreti_unpend for HWIs. */ \ -__CONCAT(Xresume,irq_num): ; \ - FAKE_MCOUNT(13*4(%esp)) ; /* XXX avoid dbl cnt */ \ - lock ; incl _cnt+V_INTR ; /* tally interrupts */ \ - movl _intr_countp + (irq_num) * 4, %eax ; \ - lock ; incl (%eax) ; \ -; \ - movl PCPU(curthread), %ebx ; \ - movl TD_MACH+MTD_CPL(%ebx), %eax ; \ - pushl %eax ; /* cpl restored by doreti */ \ - orl _intr_mask + (irq_num) * 4, %eax ; \ - movl %eax, TD_MACH+MTD_CPL(%ebx) ; \ - lock ; \ - andl $~IRQ_BIT(irq_num), PCPU(ipending) ; \ -; \ - pushl _intr_unit + (irq_num) * 4 ; \ - APIC_ITRACE(apic_itrace_enter2, irq_num, APIC_ITRACE_ENTER2) ; \ + jge 1f ; \ + testl $IRQ_LBIT(irq_num),PCPU(irunning) ; \ + jnz 1f ; \ + testl $IRQ_LBIT(irq_num),%eax ; \ + jz 1f ; \ +1: ; \ + /* set the pending bit and return, leave the interrupt masked */ \ + orl $IRQ_LBIT(irq_num), PCPU(ipending) ; \ + movl $TDPRI_CRIT, PCPU(reqpri) ; \ + jmp 5f ; \ +2: ; \ + addl $TDPRI_CRIT,TD_PRI(%ebx) ; \ + /* set running bit, clear pending bit, run handler */ \ + orl $IRQ_LBIT(irq_num), PCPU(irunning) ; \ + andl $~IRQ_LBIT(irq_num), PCPU(ipending) ; \ sti ; \ - call *_intr_handler + (irq_num) * 4 ; \ - cli ; \ - APIC_ITRACE(apic_itrace_leave, irq_num, APIC_ITRACE_LEAVE) ; \ + pushl $irq_num ; \ + call sched_ithd ; \ addl $4,%esp ; \ -; \ - lock ; andl $~IRQ_BIT(irq_num), iactive ; \ - UNMASK_IRQ(irq_num) ; \ - APIC_ITRACE(apic_itrace_unmask, irq_num, APIC_ITRACE_UNMASK) ; \ - sti ; /* doreti repeats cli/sti */ \ + subl $TDPRI_CRIT,TD_PRI(%ebx) ; \ + incl PCPU(cnt)+V_INTR ; /* book-keeping YYY make per-cpu */ \ + movl intr_countp + (irq_num) * 4,%eax ; \ + incl (%eax) ; \ +5: ; \ MEXITCOUNT ; \ jmp doreti ; \ -; \ - ALIGN_TEXT ; \ -1: ; /* active */ \ - APIC_ITRACE(apic_itrace_active, irq_num, APIC_ITRACE_ACTIVE) ; \ - MASK_IRQ(irq_num) ; \ - EOI_IRQ(irq_num) ; \ - lock ; \ - orl $IRQ_BIT(irq_num), PCPU(ipending) ; \ - movl $TDPRI_CRIT, PCPU(reqpri) ; \ - lock ; \ - btsl $(irq_num), iactive ; /* still active */ \ - jnc 0b ; /* retry */ \ - POP_FRAME ; \ - iret ; /* XXX: iactive bit might be 0 now */ \ - ALIGN_TEXT ; \ -2: ; /* masked by cpl, leave iactive set */ \ - APIC_ITRACE(apic_itrace_masked, irq_num, APIC_ITRACE_MASKED) ; \ - lock ; \ - orl $IRQ_BIT(irq_num), PCPU(ipending) ; \ - movl $TDPRI_CRIT, PCPU(reqpri) ; \ - MP_RELLOCK ; \ - POP_FRAME ; \ - iret ; \ + +/* + * Unmask a slow interrupt. This function is used by interrupt threads + * after they have descheduled themselves to reenable interrupts and + * possibly cause a reschedule to occur. The interrupt's irunning bit + * is cleared prior to unmasking. + */ + +#define INTR_UNMASK(irq_num, vec_name, icu) \ + .text ; \ + SUPERALIGN_TEXT ; \ +IDTVEC(vec_name) ; \ + pushl %ebp ; /* frame for ddb backtrace */ \ + movl %esp, %ebp ; \ + andl $~IRQ_LBIT(irq_num), PCPU(irunning) ; \ + UNMASK_IRQ(irq_num) ; \ + popl %ebp ; \ + ret ; \ + +#if 0 + /* XXX forward_irq to cpu holding the BGL? */ + ALIGN_TEXT ; \ 3: ; /* other cpu has isr lock */ \ - APIC_ITRACE(apic_itrace_noisrlock, irq_num, APIC_ITRACE_NOISRLOCK) ;\ lock ; \ - orl $IRQ_BIT(irq_num), PCPU(ipending) ; \ + orl $IRQ_LBIT(irq_num), PCPU(ipending) ; \ movl $TDPRI_CRIT,_reqpri ; \ - testl $IRQ_BIT(irq_num), TD_MACH+MTD_CPL(%ebx) ; \ + testl $IRQ_LBIT(irq_num), TD_CPL(%ebx) ; \ jne 4f ; /* this INT masked */ \ call forward_irq ; /* forward irq to lock holder */ \ POP_FRAME ; /* and return */ \ iret ; \ ALIGN_TEXT ; \ 4: ; /* blocked */ \ - APIC_ITRACE(apic_itrace_masked2, irq_num, APIC_ITRACE_MASKED2) ;\ POP_FRAME ; /* and return */ \ iret @@ -314,6 +321,9 @@ __CONCAT(Xresume,irq_num): ; \ * 8259 PIC for missing INTs. See the APIC documentation for details. * This routine should NOT do an 'EOI' cycle. */ + +#endif + .text SUPERALIGN_TEXT .globl Xspuriousint @@ -329,8 +339,8 @@ Xspuriousint: */ .text SUPERALIGN_TEXT - .globl _Xinvltlb -_Xinvltlb: + .globl Xinvltlb +Xinvltlb: pushl %eax #ifdef COUNT_XINVLTLB_HITS @@ -353,6 +363,7 @@ _Xinvltlb: iret +#if 0 #ifdef BETTER_CLOCK /* @@ -413,13 +424,14 @@ Xcpucheckstate: iret #endif /* BETTER_CLOCK */ +#endif /* * Executed by a CPU when it receives an Xcpuast IPI from another CPU, * * - Signals its receipt by clearing bit cpuid in checkstate_need_ast. - * - * - We need a better method of triggering asts on other cpus. + * - MP safe in regards to setting AST_PENDING because doreti is in + * a cli mode when it checks. */ .text @@ -427,11 +439,6 @@ Xcpucheckstate: .globl Xcpuast Xcpuast: PUSH_FRAME - movl $KDSEL, %eax - mov %ax, %ds /* use KERNEL data segment */ - mov %ax, %es - movl $KPSEL, %eax - mov %ax, %fs movl PCPU(cpuid), %eax lock /* checkstate_need_ast &= ~(1< #endif -#ifdef SMP -#define disable_intr() CLOCK_DISABLE_INTR() -#define enable_intr() CLOCK_ENABLE_INTR() - #ifdef APIC_IO #include /* The interrupt triggered by the 8254 (timer) chip */ @@ -102,7 +98,6 @@ int apic_8254_intr; static u_long read_intr_count __P((int vec)); static void setup_8254_mixed_mode __P((void)); #endif -#endif /* SMP */ /* * 32-bit time_t's can't reach leap years before 1904 or after 2036, so we @@ -204,7 +199,7 @@ static void clkintr(struct clockframe frame) { if (timecounter->tc_get_timecount == i8254_get_timecount) { - disable_intr(); + clock_lock(); if (i8254_ticked) i8254_ticked = 0; else { @@ -212,7 +207,7 @@ clkintr(struct clockframe frame) i8254_lastcount = 0; } clkintr_pending = 0; - enable_intr(); + clock_unlock(); } timer_func(&frame); switch (timer0_state) { @@ -231,14 +226,14 @@ clkintr(struct clockframe frame) break; case ACQUIRE_PENDING: - disable_intr(); + clock_lock(); i8254_offset = i8254_get_timecount(NULL); i8254_lastcount = 0; timer0_max_count = TIMER_DIV(new_rate); outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT); outb(TIMER_CNTR0, timer0_max_count & 0xff); outb(TIMER_CNTR0, timer0_max_count >> 8); - enable_intr(); + clock_unlock(); timer_func = new_function; timer0_state = ACQUIRED; setdelayed(); @@ -247,7 +242,7 @@ clkintr(struct clockframe frame) case RELEASE_PENDING: if ((timer0_prescaler_count += timer0_max_count) >= hardclock_max_count) { - disable_intr(); + clock_lock(); i8254_offset = i8254_get_timecount(NULL); i8254_lastcount = 0; timer0_max_count = hardclock_max_count; @@ -255,7 +250,7 @@ clkintr(struct clockframe frame) TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT); outb(TIMER_CNTR0, timer0_max_count & 0xff); outb(TIMER_CNTR0, timer0_max_count >> 8); - enable_intr(); + clock_unlock(); timer0_prescaler_count = 0; timer_func = hardclock; timer0_state = RELEASED; @@ -402,11 +397,9 @@ DB_SHOW_COMMAND(rtc, rtc) static int getit(void) { - u_long ef; int high, low; - ef = read_eflags(); - disable_intr(); + clock_lock(); /* Select timer0 and latch counter value. */ outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH); @@ -414,8 +407,7 @@ getit(void) low = inb(TIMER_CNTR0); high = inb(TIMER_CNTR0); - CLOCK_UNLOCK(); - write_eflags(ef); + clock_unlock(); return ((high << 8) | low); } @@ -529,10 +521,10 @@ sysbeep(int pitch, int period) splx(x); return (-1); /* XXX Should be EBUSY, but nobody cares anyway. */ } - disable_intr(); + clock_lock(); outb(TIMER_CNTR2, pitch); outb(TIMER_CNTR2, (pitch>>8)); - enable_intr(); + clock_unlock(); if (!beeping) { /* enable counter2 output to speaker */ outb(IO_PPI, inb(IO_PPI) | 3); @@ -681,11 +673,9 @@ fail: static void set_timer_freq(u_int freq, int intr_freq) { - u_long ef; int new_timer0_max_count; - ef = read_eflags(); - disable_intr(); + clock_lock(); timer_freq = freq; new_timer0_max_count = hardclock_max_count = TIMER_DIV(intr_freq); if (new_timer0_max_count != timer0_max_count) { @@ -694,22 +684,17 @@ set_timer_freq(u_int freq, int intr_freq) outb(TIMER_CNTR0, timer0_max_count & 0xff); outb(TIMER_CNTR0, timer0_max_count >> 8); } - CLOCK_UNLOCK(); - write_eflags(ef); + clock_unlock(); } static void i8254_restore(void) { - u_long ef; - - ef = read_eflags(); - disable_intr(); + clock_lock(); outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT); outb(TIMER_CNTR0, timer0_max_count & 0xff); outb(TIMER_CNTR0, timer0_max_count >> 8); - CLOCK_UNLOCK(); - write_eflags(ef); + clock_unlock(); } static void @@ -1212,7 +1197,7 @@ i8254_get_timecount(struct timecounter *tc) u_int high, low; ef = read_eflags(); - disable_intr(); + clock_lock(); /* Select timer0 and latch counter value. */ outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH); @@ -1236,8 +1221,7 @@ i8254_get_timecount(struct timecounter *tc) } i8254_lastcount = count; count += i8254_offset; - CLOCK_UNLOCK(); - write_eflags(ef); + clock_unlock(); return (count); } diff --git a/sys/platform/pc32/isa/intr_machdep.c b/sys/platform/pc32/isa/intr_machdep.c index 9444244998..d72c4abbcb 100644 --- a/sys/platform/pc32/isa/intr_machdep.c +++ b/sys/platform/pc32/isa/intr_machdep.c @@ -35,7 +35,7 @@ * * from: @(#)isa.c 7.2 (Berkeley) 5/13/91 * $FreeBSD: src/sys/i386/isa/intr_machdep.c,v 1.29.2.5 2001/10/14 06:54:27 luigi Exp $ - * $DragonFly: src/sys/platform/pc32/isa/intr_machdep.c,v 1.5 2003/07/04 00:32:28 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/isa/intr_machdep.c,v 1.6 2003/07/06 21:23:49 dillon Exp $ */ /* * This file contains an aggregated module marked: @@ -472,7 +472,7 @@ icu_setup(int intr, inthand2_t *handler, void *arg, u_int *maskptr, int flags) return (EBUSY); ef = read_eflags(); - disable_intr(); + cpu_disable_intr(); /* YYY */ intr_handler[intr] = handler; intr_mptr[intr] = maskptr; intr_mask[intr] = mask | SWI_CLOCK_MASK | (1 << intr); @@ -530,7 +530,7 @@ icu_unset(intr, handler) INTRDIS(1 << intr); ef = read_eflags(); - disable_intr(); + cpu_disable_intr(); /* YYY */ intr_countp[intr] = &intrcnt[1 + intr]; intr_handler[intr] = isa_strayintr; intr_mptr[intr] = NULL; diff --git a/sys/platform/pc32/isa/intr_machdep.h b/sys/platform/pc32/isa/intr_machdep.h index 8a3e18a863..4ff8e53e4c 100644 --- a/sys/platform/pc32/isa/intr_machdep.h +++ b/sys/platform/pc32/isa/intr_machdep.h @@ -31,15 +31,17 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/isa/intr_machdep.h,v 1.19.2.2 2001/10/14 20:05:50 luigi Exp $ - * $DragonFly: src/sys/platform/pc32/isa/intr_machdep.h,v 1.3 2003/06/29 03:28:43 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/isa/intr_machdep.h,v 1.4 2003/07/06 21:23:49 dillon Exp $ */ #ifndef _I386_ISA_INTR_MACHDEP_H_ #define _I386_ISA_INTR_MACHDEP_H_ #ifndef _SYS_INTERRUPT_H_ +#ifndef LOCORE #include #endif +#endif /* * Low level interrupt code. @@ -99,6 +101,7 @@ #define TPR_IGNORE_HWI 0x5f /* ignore INTs */ #define TPR_BLOCK_FHWI 0x7f /* hardware FAST INTs */ #define TPR_IGNORE_FHWI 0x8f /* ignore FAST INTs */ +#define TPR_IPI_ONLY 0x8f /* ignore FAST INTs */ #define TPR_BLOCK_XINVLTLB 0x9f /* */ #define TPR_BLOCK_XCPUSTOP 0xaf /* */ #define TPR_BLOCK_ALL 0xff /* all INTs */ diff --git a/sys/platform/pc32/isa/npx.c b/sys/platform/pc32/isa/npx.c index 89d7e543a3..91e7b2fb3a 100644 --- a/sys/platform/pc32/isa/npx.c +++ b/sys/platform/pc32/isa/npx.c @@ -33,7 +33,7 @@ * * from: @(#)npx.c 7.2 (Berkeley) 5/12/91 * $FreeBSD: src/sys/i386/isa/npx.c,v 1.80.2.3 2001/10/20 19:04:38 tegge Exp $ - * $DragonFly: src/sys/platform/pc32/isa/npx.c,v 1.6 2003/06/28 04:16:04 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/isa/npx.c,v 1.7 2003/07/06 21:23:49 dillon Exp $ */ #include "opt_cpu.h" @@ -257,7 +257,7 @@ npx_probe(dev) npx_irq = 13; npx_intrno = NRSVIDT + npx_irq; save_eflags = read_eflags(); - disable_intr(); + cpu_disable_intr(); save_icu1_mask = inb(IO_ICU1 + 1); save_icu2_mask = inb(IO_ICU2 + 1); save_idt_npxintr = idt[npx_intrno]; @@ -267,9 +267,9 @@ npx_probe(dev) setidt(16, probetrap, SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(npx_intrno, probeintr, SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); npx_idt_probeintr = idt[npx_intrno]; - enable_intr(); + cpu_enable_intr(); result = npx_probe1(dev); - disable_intr(); + cpu_disable_intr(); outb(IO_ICU1 + 1, save_icu1_mask); outb(IO_ICU2 + 1, save_icu2_mask); idt[npx_intrno] = save_idt_npxintr; @@ -733,6 +733,9 @@ static char fpetable[128] = { * longjmp() out. Both preserving the state and longjmp()ing may be * destroyed by IRQ13 bugs. Clearing FP exceptions is not an acceptable * solution for signals other than SIGFPE. + * + * The MP lock is not held on entry (see i386/i386/exception.s) and + * should not be held on exit. */ void npx_intr(dummy) @@ -744,11 +747,13 @@ npx_intr(dummy) u_long *exstat; if (npxthread == NULL || !npx_exists) { + get_mplock(); printf("npxintr: npxthread = %p, curthread = %p, npx_exists = %d\n", npxthread, curthread, npx_exists); panic("npxintr from nowhere"); } if (npxthread != curthread) { + get_mplock(); printf("npxintr: npxthread = %p, curthread = %p, npx_exists = %d\n", npxthread, curthread, npx_exists); panic("npxintr from non-current process"); @@ -760,6 +765,8 @@ npx_intr(dummy) fnstcw(&control); fnclex(); + get_mplock(); + /* * Pass exception to process. */ @@ -801,6 +808,7 @@ npx_intr(dummy) */ psignal(curproc, SIGFPE); } + rel_mplock(); } /* @@ -874,21 +882,23 @@ npxsave(addr) u_char old_icu1_mask; u_char old_icu2_mask; struct gate_descriptor save_idt_npxintr; + u_long save_eflags; - disable_intr(); + save_eflags = read_eflags(); + cpu_disable_intr(); old_icu1_mask = inb(IO_ICU1 + 1); old_icu2_mask = inb(IO_ICU2 + 1); save_idt_npxintr = idt[npx_intrno]; outb(IO_ICU1 + 1, old_icu1_mask & ~(IRQ_SLAVE | npx0_imask)); outb(IO_ICU2 + 1, old_icu2_mask & ~(npx0_imask >> 8)); idt[npx_intrno] = npx_idt_probeintr; - enable_intr(); + cpu_enable_intr(); stop_emulating(); fnsave(addr); fnop(); start_emulating(); npxthread = NULL; - disable_intr(); + cpu_disable_intr(); icu1_mask = inb(IO_ICU1 + 1); /* masks may have changed */ icu2_mask = inb(IO_ICU2 + 1); outb(IO_ICU1 + 1, @@ -897,7 +907,7 @@ npxsave(addr) (icu2_mask & ~(npx0_imask >> 8)) | (old_icu2_mask & (npx0_imask >> 8))); idt[npx_intrno] = save_idt_npxintr; - enable_intr(); /* back to usual state */ + write_eflags(save_eflags); /* back to usual state */ #endif /* SMP */ } diff --git a/sys/platform/vkernel/i386/genassym.c b/sys/platform/vkernel/i386/genassym.c index 3ef054197b..eb0203c508 100644 --- a/sys/platform/vkernel/i386/genassym.c +++ b/sys/platform/vkernel/i386/genassym.c @@ -35,7 +35,7 @@ * * from: @(#)genassym.c 5.11 (Berkeley) 5/10/91 * $FreeBSD: src/sys/i386/i386/genassym.c,v 1.86.2.3 2002/03/03 05:42:49 nyan Exp $ - * $DragonFly: src/sys/platform/vkernel/i386/genassym.c,v 1.20 2003/07/04 00:32:24 dillon Exp $ + * $DragonFly: src/sys/platform/vkernel/i386/genassym.c,v 1.21 2003/07/06 21:23:48 dillon Exp $ */ #include "opt_user_ldt.h" @@ -50,6 +50,7 @@ #include #include #include +#include #include #include #include @@ -87,8 +88,14 @@ ASSYM(TD_SP, offsetof(struct thread, td_sp)); ASSYM(TD_PRI, offsetof(struct thread, td_pri)); ASSYM(TD_MACH, offsetof(struct thread, td_mach)); ASSYM(TD_WCHAN, offsetof(struct thread, td_wchan)); +#ifdef SMP +ASSYM(TD_MPCOUNT, offsetof(struct thread, td_mpcount)); +#endif ASSYM(TD_FLAGS, offsetof(struct thread, td_flags)); ASSYM(TDF_EXITED, TDF_EXITED); +#ifdef SMP +ASSYM(MP_FREE_LOCK, MP_FREE_LOCK); +#endif ASSYM(RW_OWNER, offsetof(struct lwkt_rwlock, rw_owner)); @@ -101,6 +108,8 @@ ASSYM(SRUN, SRUN); ASSYM(V_TRAP, offsetof(struct vmmeter, v_trap)); ASSYM(V_SYSCALL, offsetof(struct vmmeter, v_syscall)); ASSYM(V_INTR, offsetof(struct vmmeter, v_intr)); +ASSYM(V_FORWARDED_HITS, offsetof(struct vmmeter, v_forwarded_hits)); +ASSYM(V_FORWARDED_MISSES, offsetof(struct vmmeter, v_forwarded_misses)); ASSYM(UPAGES, UPAGES); ASSYM(PAGE_SIZE, PAGE_SIZE); ASSYM(NPTEPG, NPTEPG); diff --git a/sys/sys/buf.h b/sys/sys/buf.h index 346db87081..22d6469add 100644 --- a/sys/sys/buf.h +++ b/sys/sys/buf.h @@ -37,7 +37,7 @@ * * @(#)buf.h 8.9 (Berkeley) 3/30/95 * $FreeBSD: src/sys/sys/buf.h,v 1.88.2.10 2003/01/25 19:02:23 dillon Exp $ - * $DragonFly: src/sys/sys/buf.h,v 1.4 2003/06/26 05:55:19 dillon Exp $ + * $DragonFly: src/sys/sys/buf.h,v 1.5 2003/07/06 21:23:54 dillon Exp $ */ #ifndef _SYS_BUF_H_ @@ -257,7 +257,7 @@ struct buf { /* * Buffer locking. See sys/buf2.h for inline functions. */ -struct simplelock buftimelock; /* Interlock on setting prio and timo */ +struct lwkt_token buftimetoken; /* Interlock on setting prio and timo */ extern char *buf_wmesg; /* Default buffer lock message */ #define BUF_WMESG "bufwait" diff --git a/sys/sys/buf2.h b/sys/sys/buf2.h index 267828695e..4796b1b9c3 100644 --- a/sys/sys/buf2.h +++ b/sys/sys/buf2.h @@ -37,7 +37,7 @@ * * @(#)buf.h 8.9 (Berkeley) 3/30/95 * $FreeBSD: src/sys/sys/buf.h,v 1.88.2.10 2003/01/25 19:02:23 dillon Exp $ - * $DragonFly: src/sys/sys/buf2.h,v 1.2 2003/06/25 03:56:10 dillon Exp $ + * $DragonFly: src/sys/sys/buf2.h,v 1.3 2003/07/06 21:23:54 dillon Exp $ */ #ifndef _SYS_BUF2_H_ @@ -62,12 +62,12 @@ BUF_LOCK(struct buf *bp, int locktype) int s, ret; s = splbio(); - simple_lock(&buftimelock); + lwkt_gettoken(&buftimetoken); locktype |= LK_INTERLOCK; bp->b_lock.lk_wmesg = buf_wmesg; bp->b_lock.lk_prio = PRIBIO + 4; /* bp->b_lock.lk_timo = 0; not necessary */ - ret = lockmgr(&(bp)->b_lock, locktype, &buftimelock, curthread); + ret = lockmgr(&(bp)->b_lock, locktype, &buftimetoken, curthread); splx(s); return ret; } @@ -81,12 +81,12 @@ BUF_TIMELOCK(struct buf *bp, int locktype, char *wmesg, int catch, int timo) int s, ret; s = splbio(); - simple_lock(&buftimelock); + lwkt_gettoken(&buftimetoken); locktype |= LK_INTERLOCK | LK_TIMELOCK; bp->b_lock.lk_wmesg = wmesg; bp->b_lock.lk_prio = (PRIBIO + 4) | catch; bp->b_lock.lk_timo = timo; - ret = lockmgr(&(bp)->b_lock, (locktype), &buftimelock, curthread); + ret = lockmgr(&(bp)->b_lock, (locktype), &buftimetoken, curthread); splx(s); return ret; } diff --git a/sys/sys/lock.h b/sys/sys/lock.h index 3bfdda9731..87f5608f64 100644 --- a/sys/sys/lock.h +++ b/sys/sys/lock.h @@ -36,24 +36,26 @@ * * @(#)lock.h 8.12 (Berkeley) 5/19/95 * $FreeBSD: src/sys/sys/lock.h,v 1.17.2.3 2001/12/25 01:44:44 dillon Exp $ - * $DragonFly: src/sys/sys/lock.h,v 1.3 2003/06/25 03:56:10 dillon Exp $ + * $DragonFly: src/sys/sys/lock.h,v 1.4 2003/07/06 21:23:54 dillon Exp $ */ #ifndef _LOCK_H_ #define _LOCK_H_ - #include +#ifndef _SYS_THREAD_H_ +#include /* lwkt_token */ +#endif /* * The general lock structure. Provides for multiple shared locks, * upgrading from shared to exclusive, and sleeping until the lock - * can be gained. The simple locks are defined in . + * can be gained. */ struct thread; struct lock { - struct simplelock lk_interlock; /* lock on remaining fields */ + lwkt_token lk_interlock; /* lock on remaining fields */ u_int lk_flags; /* see below */ int lk_sharecount; /* # of accepted shared locks */ int lk_waitcount; /* # of processes sleeping for lock */ @@ -183,7 +185,7 @@ void lockinit __P((struct lock *, int prio, char *wmesg, int timo, int flags)); #ifdef DEBUG_LOCKS int debuglockmgr __P((struct lock *, u_int flags, - struct simplelock *, struct thread *p, + struct lwkt_token *, struct thread *p, const char *, const char *, int)); @@ -192,28 +194,10 @@ int debuglockmgr __P((struct lock *, u_int flags, "lockmgr", __FILE__, __LINE__) #else int lockmgr __P((struct lock *, u_int flags, - struct simplelock *, struct thread *td)); + struct lwkt_token *, struct thread *td)); #endif void lockmgr_printinfo __P((struct lock *)); int lockstatus __P((struct lock *, struct thread *)); int lockcount __P((struct lock *)); -#ifdef SIMPLELOCK_DEBUG -void _simple_unlock __P((struct simplelock *alp, const char *, int)); -#define simple_unlock(alp) _simple_unlock(alp, __FILE__, __LINE__) -int _simple_lock_try __P((struct simplelock *alp, const char *, int)); -#define simple_lock_try(alp) _simple_lock_try(alp, __FILE__, __LINE__) -void _simple_lock __P((struct simplelock *alp, const char *, int)); -#define simple_lock(alp) _simple_lock(alp, __FILE__, __LINE__) -void simple_lock_init __P((struct simplelock *alp)); -#else /* !SIMPLELOCK_DEBUG */ -#if MAXCPU == 1 /* no multiprocessor locking is necessary */ -#define NULL_SIMPLELOCKS -#define simple_lock_init(alp) -#define simple_lock(alp) -#define simple_lock_try(alp) (1) /* always succeeds */ -#define simple_unlock(alp) -#endif /* MAXCPU == 1 */ -#endif /* !SIMPLELOCK_DEBUG */ - #endif /* !_LOCK_H_ */ diff --git a/sys/sys/mount.h b/sys/sys/mount.h index 18ad7eff05..52a6f08c7a 100644 --- a/sys/sys/mount.h +++ b/sys/sys/mount.h @@ -32,7 +32,7 @@ * * @(#)mount.h 8.21 (Berkeley) 5/20/95 * $FreeBSD: src/sys/sys/mount.h,v 1.89.2.7 2003/04/04 20:35:57 tegge Exp $ - * $DragonFly: src/sys/sys/mount.h,v 1.4 2003/06/26 05:55:20 dillon Exp $ + * $DragonFly: src/sys/sys/mount.h,v 1.5 2003/07/06 21:23:54 dillon Exp $ */ #ifndef _SYS_MOUNT_H_ @@ -422,7 +422,7 @@ int vfs_setpublicfs /* set publicly exported fs */ int vfs_lock __P((struct mount *)); /* lock a vfs */ void vfs_msync __P((struct mount *, int)); void vfs_unlock __P((struct mount *)); /* unlock a vfs */ -int vfs_busy __P((struct mount *, int, struct simplelock *, struct thread *)); +int vfs_busy __P((struct mount *, int, struct lwkt_token *, struct thread *)); int vfs_export /* process mount export info */ __P((struct mount *, struct netexport *, struct export_args *)); struct netcred *vfs_export_lookup /* lookup host in fs export list */ @@ -439,7 +439,7 @@ void vfs_unmountall __P((void)); int vfs_register __P((struct vfsconf *)); int vfs_unregister __P((struct vfsconf *)); extern TAILQ_HEAD(mntlist, mount) mountlist; /* mounted filesystem list */ -extern struct simplelock mountlist_slock; +extern struct lwkt_token mountlist_token; extern struct nfs_public nfs_pub; /* diff --git a/sys/sys/proc.h b/sys/sys/proc.h index 8110b01e22..35b52b4c1c 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -37,7 +37,7 @@ * * @(#)proc.h 8.15 (Berkeley) 5/19/95 * $FreeBSD: src/sys/sys/proc.h,v 1.99.2.9 2003/06/06 20:21:32 tegge Exp $ - * $DragonFly: src/sys/sys/proc.h,v 1.21 2003/07/04 00:32:32 dillon Exp $ + * $DragonFly: src/sys/sys/proc.h,v 1.22 2003/07/06 21:23:54 dillon Exp $ */ #ifndef _SYS_PROC_H_ @@ -180,8 +180,6 @@ struct proc { u_char p_unused02; /* Last cpu we were on */ char p_rqindex; /* Run queue index */ - short p_locks; /* DEBUG: lockmgr count of held locks */ - short p_simple_locks; /* DEBUG: count of held simple locks */ unsigned int p_stops; /* procfs event bitmask */ unsigned int p_stype; /* procfs stop event type */ char p_step; /* procfs stop *once* flag */ @@ -316,15 +314,13 @@ MALLOC_DECLARE(M_PARGS); } /* - * STOPEVENT is MP SAFE. + * STOPEVENT */ extern void stopevent(struct proc*, unsigned int, unsigned int); #define STOPEVENT(p,e,v) \ do { \ if ((p)->p_stops & (e)) { \ - get_mplock(); \ stopevent(p,e,v); \ - rel_mplock(); \ } \ } while (0) diff --git a/sys/sys/rman.h b/sys/sys/rman.h index 7e8d908fe7..e182296f04 100644 --- a/sys/sys/rman.h +++ b/sys/sys/rman.h @@ -27,7 +27,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/sys/rman.h,v 1.5.2.1 2001/06/05 08:06:07 imp Exp $ - * $DragonFly: src/sys/sys/rman.h,v 1.2 2003/06/17 04:28:58 dillon Exp $ + * $DragonFly: src/sys/sys/rman.h,v 1.3 2003/07/06 21:23:54 dillon Exp $ */ #ifndef _SYS_RMAN_H_ @@ -75,7 +75,7 @@ enum rman_type { RMAN_UNINIT = 0, RMAN_GAUGE, RMAN_ARRAY }; struct rman { struct resource_head rm_list; - struct simplelock *rm_slock; /* mutex used to protect rm_list */ + struct lwkt_token *rm_slock; /* mutex used to protect rm_list */ TAILQ_ENTRY(rman) rm_link; /* link in list of all rmans */ u_long rm_start; /* index of globally first entry */ u_long rm_end; /* index of globally last entry */ diff --git a/sys/sys/signalvar.h b/sys/sys/signalvar.h index 7bc098aee6..18886719cf 100644 --- a/sys/sys/signalvar.h +++ b/sys/sys/signalvar.h @@ -32,7 +32,7 @@ * * @(#)signalvar.h 8.6 (Berkeley) 2/19/95 * $FreeBSD: src/sys/sys/signalvar.h,v 1.34.2.1 2000/05/16 06:58:05 dillon Exp $ - * $DragonFly: src/sys/sys/signalvar.h,v 1.3 2003/06/23 17:55:50 dillon Exp $ + * $DragonFly: src/sys/sys/signalvar.h,v 1.4 2003/07/06 21:23:54 dillon Exp $ */ #ifndef _SYS_SIGNALVAR_H_ /* tmp for user.h */ @@ -40,7 +40,7 @@ #include #include -#include +#include /* * Kernel signal definitions and data structures, @@ -241,9 +241,7 @@ extern __inline int __cursig(struct proc *p) (!(p->p_flag & P_TRACED) && SIGISEMPTY(tmpset))) { return(0); } - get_mplock(); r = issignal(p); - rel_mplock(); return(r); } diff --git a/sys/sys/thread.h b/sys/sys/thread.h index a926663dc9..f2a3274767 100644 --- a/sys/sys/thread.h +++ b/sys/sys/thread.h @@ -4,12 +4,16 @@ * Implements the architecture independant portion of the LWKT * subsystem. * - * $DragonFly: src/sys/sys/thread.h,v 1.18 2003/07/04 00:32:32 dillon Exp $ + * $DragonFly: src/sys/sys/thread.h,v 1.19 2003/07/06 21:23:54 dillon Exp $ */ #ifndef _SYS_THREAD_H_ #define _SYS_THREAD_H_ +#ifndef _SYS_QUEUE_H_ +#include /* TAILQ_* macros */ +#endif + struct globaldata; struct proc; struct thread; @@ -48,6 +52,7 @@ typedef TAILQ_HEAD(lwkt_msg_queue, lwkt_msg) lwkt_msg_queue; typedef struct lwkt_token { int t_cpu; /* the current owner of the token */ int t_reqcpu; /* return ownership to this cpu on release */ + int t_gen; /* generation number */ #if 0 int t_pri; /* raise thread priority to hold token */ #endif @@ -144,6 +149,11 @@ struct thread { u_int64_t td_iticks; /* Statclock hits processing intr (uS) */ int td_locks; /* lockmgr lock debugging YYY */ int td_refs; /* hold position in gd_tdallq / hold free */ +#ifdef SMP + int td_mpcount; /* MP lock held (count) */ +#else + int td_unused001; +#endif char td_comm[MAXCOMLEN+1]; /* typ 16+1 bytes */ struct thread *td_preempted; /* we preempted this thread */ struct md_thread td_mach; @@ -155,9 +165,10 @@ struct thread { * cleaned up the MMU state. */ #define TDF_EXITED 0x0001 /* thread finished exiting */ -#define TDF_RUNQ 0x0002 /* on run queue */ +#define TDF_RUNQ 0x0002 /* on run queue (if not on bglq) */ #define TDF_PREEMPT_LOCK 0x0004 /* I have been preempted */ #define TDF_PREEMPT_DONE 0x0008 /* acknowledge preemption complete */ +#define TDF_BGLQ 0x0010 /* on BGL queue */ #define TDF_ONALLQ 0x0100 /* on gd_tdallq */ #define TDF_ALLOCATED_THREAD 0x0200 /* zalloc allocated thread */ @@ -223,7 +234,8 @@ extern void lwkt_rele(thread_t td); extern void lwkt_block(lwkt_wait_t w, const char *wmesg, int *gen); extern void lwkt_signal(lwkt_wait_t w); -extern void lwkt_gettoken(lwkt_token_t tok); +extern int lwkt_gettoken(lwkt_token_t tok); +extern int lwkt_gentoken(lwkt_token_t tok, int *gen); extern void lwkt_reltoken(lwkt_token_t tok); extern void lwkt_inittoken(lwkt_token_t tok); extern int lwkt_regettoken(lwkt_token_t tok); diff --git a/sys/sys/thread2.h b/sys/sys/thread2.h index 9f1d8e59bc..450527fef4 100644 --- a/sys/sys/thread2.h +++ b/sys/sys/thread2.h @@ -8,7 +8,7 @@ * on a different cpu will not be immediately scheduled by a yield() on * this cpu. * - * $DragonFly: src/sys/sys/thread2.h,v 1.4 2003/06/30 19:50:32 dillon Exp $ + * $DragonFly: src/sys/sys/thread2.h,v 1.5 2003/07/06 21:23:54 dillon Exp $ */ #ifndef _SYS_THREAD2_H_ @@ -56,6 +56,21 @@ crit_exit(void) lwkt_yield_quick(); } +static __inline int +crit_panic_save(void) +{ + thread_t td = curthread; + int pri = td->td_pri; + td->td_pri = td->td_pri & TDPRI_MASK; + return(pri); +} + +static __inline void +crit_panic_restore(int cpri) +{ + curthread->td_pri = cpri; +} + static __inline int lwkt_havetoken(lwkt_token_t tok) { diff --git a/sys/sys/vmmeter.h b/sys/sys/vmmeter.h index 09725f511b..f8930afc5e 100644 --- a/sys/sys/vmmeter.h +++ b/sys/sys/vmmeter.h @@ -32,7 +32,7 @@ * * @(#)vmmeter.h 8.2 (Berkeley) 7/10/94 * $FreeBSD: src/sys/sys/vmmeter.h,v 1.21.2.2 2002/10/10 19:28:21 dillon Exp $ - * $DragonFly: src/sys/sys/vmmeter.h,v 1.4 2003/07/03 18:20:02 dillon Exp $ + * $DragonFly: src/sys/sys/vmmeter.h,v 1.5 2003/07/06 21:23:54 dillon Exp $ */ #ifndef _SYS_VMMETER_H_ @@ -88,7 +88,10 @@ struct vmmeter { u_int v_kthreadpages; /* number of VM pages affected by fork() by kernel */ u_int v_intrans_coll; /* intransit map collisions (total) */ u_int v_intrans_wait; /* intransit map collisions which blocked */ -#define vmmeter_uint_end v_intrans_wait + u_int v_forwarded_ints; + u_int v_forwarded_hits; + u_int v_forwarded_misses; +#define vmmeter_uint_end v_forwarded_misses }; struct vmstats { diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h index 92caf1423e..f020600ee0 100644 --- a/sys/sys/vnode.h +++ b/sys/sys/vnode.h @@ -32,7 +32,7 @@ * * @(#)vnode.h 8.7 (Berkeley) 2/4/94 * $FreeBSD: src/sys/sys/vnode.h,v 1.111.2.19 2002/12/29 18:19:53 dillon Exp $ - * $DragonFly: src/sys/sys/vnode.h,v 1.5 2003/06/26 05:55:20 dillon Exp $ + * $DragonFly: src/sys/sys/vnode.h,v 1.6 2003/07/06 21:23:54 dillon Exp $ */ #ifndef _SYS_VNODE_H_ @@ -80,10 +80,10 @@ struct namecache; /* * Reading or writing any of these items requires holding the appropriate lock. - * v_freelist is locked by the global vnode_free_list simple lock. - * v_mntvnodes is locked by the global mntvnodes simple lock. + * v_freelist is locked by the global vnode_free_list token. + * v_mntvnodes is locked by the global mntvnodes token. * v_flag, v_usecount, v_holdcount and v_writecount are - * locked by the v_interlock simple lock. + * locked by the v_interlock token. * v_pollinfo is locked by the lock contained inside it. */ struct vnode { @@ -116,7 +116,7 @@ struct vnode { daddr_t v_lasta; /* last allocation */ int v_clen; /* length of current cluster */ struct vm_object *v_object; /* Place to store VM object */ - struct simplelock v_interlock; /* lock on usecount and flag */ + struct lwkt_token v_interlock; /* lock on usecount and flag */ struct lock *v_vnlock; /* used for non-locking fs's */ enum vtagtype v_tag; /* type of underlying data */ void *v_data; /* private data for fs */ @@ -125,7 +125,7 @@ struct vnode { struct vnode *v_dd; /* .. vnode */ u_long v_ddid; /* .. capability identifier */ struct { - struct simplelock vpi_lock; /* lock to protect below */ + struct lwkt_token vpi_token; /* lock to protect below */ struct selinfo vpi_selinfo; /* identity of poller(s) */ short vpi_events; /* what they are looking for */ short vpi_revents; /* what has happened */ @@ -320,9 +320,8 @@ extern void (*lease_updatetime) __P((int deltat)); (((vp)->v_flag & VFREE) && \ ((vp)->v_holdcnt || (vp)->v_usecount)) -#define VI_LOCK(vp) simple_lock(&(vp)->v_interlock) -#define VI_TRYLOCK(vp) simple_lock_try(&(vp)->v_interlock) -#define VI_UNLOCK(vp) simple_unlock(&(vp)->v_interlock) +#define VI_LOCK(vp) lwkt_gettoken(&(vp)->v_interlock) +#define VI_UNLOCK(vp) lwkt_reltoken(&(vp)->v_interlock) #endif /* _KERNEL */ @@ -385,7 +384,7 @@ extern struct vnodeop_desc *vnodeop_descs[]; /* * Interlock for scanning list of vnodes attached to a mountpoint */ -extern struct simplelock mntvnode_slock; +extern struct lwkt_token mntvnode_token; /* * This macro is very helpful in defining those offsets in the vdesc struct. @@ -591,7 +590,7 @@ int vinvalbuf __P((struct vnode *vp, int save, int vtruncbuf __P((struct vnode *vp, struct thread *td, off_t length, int blksize)); void vprint __P((char *label, struct vnode *vp)); -int vrecycle __P((struct vnode *vp, struct simplelock *inter_lkp, +int vrecycle __P((struct vnode *vp, struct lwkt_token *inter_lkp, struct thread *td)); int vn_close __P((struct vnode *vp, int flags, struct thread *td)); int vn_isdisk __P((struct vnode *vp, int *errp)); diff --git a/sys/vfs/deadfs/dead_vnops.c b/sys/vfs/deadfs/dead_vnops.c index ee97285a12..3c77270a25 100644 --- a/sys/vfs/deadfs/dead_vnops.c +++ b/sys/vfs/deadfs/dead_vnops.c @@ -32,7 +32,7 @@ * * @(#)dead_vnops.c 8.1 (Berkeley) 6/10/93 * $FreeBSD: src/sys/miscfs/deadfs/dead_vnops.c,v 1.26 1999/08/28 00:46:42 peter Exp $ - * $DragonFly: src/sys/vfs/deadfs/dead_vnops.c,v 1.2 2003/06/17 04:28:42 dillon Exp $ + * $DragonFly: src/sys/vfs/deadfs/dead_vnops.c,v 1.3 2003/07/06 21:23:51 dillon Exp $ */ #include @@ -212,7 +212,7 @@ dead_lock(ap) * the interlock here. */ if (ap->a_flags & LK_INTERLOCK) { - simple_unlock(&vp->v_interlock); + lwkt_reltoken(&vp->v_interlock); ap->a_flags &= ~LK_INTERLOCK; } if (!chkvnlock(vp)) diff --git a/sys/vfs/gnu/ext2fs/ext2_vfsops.c b/sys/vfs/gnu/ext2fs/ext2_vfsops.c index 5fd846e869..54ba478843 100644 --- a/sys/vfs/gnu/ext2fs/ext2_vfsops.c +++ b/sys/vfs/gnu/ext2fs/ext2_vfsops.c @@ -38,7 +38,7 @@ * * @(#)ffs_vfsops.c 8.8 (Berkeley) 4/18/94 * $FreeBSD: src/sys/gnu/ext2fs/ext2_vfsops.c,v 1.63.2.7 2002/07/01 00:18:51 iedowse Exp $ - * $DragonFly: src/sys/vfs/gnu/ext2fs/ext2_vfsops.c,v 1.3 2003/06/26 05:55:12 dillon Exp $ + * $DragonFly: src/sys/vfs/gnu/ext2fs/ext2_vfsops.c,v 1.4 2003/07/06 21:23:48 dillon Exp $ */ #include "opt_quota.h" @@ -528,6 +528,7 @@ ext2_reload(mountp, cred, p) struct ext2_super_block * es; struct ext2_sb_info *fs; int error; + int gen; if ((mountp->mnt_flag & MNT_RDONLY) == 0) return (EINVAL); @@ -561,25 +562,28 @@ ext2_reload(mountp, cred, p) #endif brelse(bp); + gen = lwkt_gettoken(&mntvnode_token); loop: - simple_lock(&mntvnode_slock); for (vp = TAILQ_FIRST(&mountp->mnt_nvnodelist); vp != NULL; vp = nvp) { - if (vp->v_mount != mountp) { - simple_unlock(&mntvnode_slock); - goto loop; - } + KKASSERT(vp->v_mount == mountp); nvp = TAILQ_NEXT(vp, v_nmntvnodes); /* * Step 4: invalidate all inactive vnodes. */ - if (vrecycle(vp, &mntvnode_slock, p)) + if (vrecycle(vp, NULL, p)) { + lwkt_gentoken(&mntvnode_token, &gen); goto loop; + } /* * Step 5: invalidate all cached file data. */ - simple_lock(&vp->v_interlock); - simple_unlock(&mntvnode_slock); + lwkt_gettoken(&vp->v_interlock); + if (lwkt_gentoken(&mntvnode_token, &gen)) { + lwkt_reltoken(&vp->v_interlock); + goto loop; + } if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) { + lwkt_gentoken(&mntvnode_token, &gen); goto loop; } if (vinvalbuf(vp, 0, p, 0, 0)) @@ -600,9 +604,10 @@ loop: &ip->i_din); brelse(bp); vput(vp); - simple_lock(&mntvnode_slock); + if (lwkt_gentoken(&mntvnode_token, &gen)) + goto loop; } - simple_unlock(&mntvnode_slock); + lwkt_reltoken(&mntvnode_token); return (0); } @@ -914,6 +919,7 @@ ext2_sync(mp, waitfor, cred, p) struct ufsmount *ump = VFSTOUFS(mp); struct ext2_sb_info *fs; int error, allerror = 0; + int gen; fs = ump->um_e2fs; if (fs->s_dirt != 0 && fs->s_rd_only != 0) { /* XXX */ @@ -923,29 +929,33 @@ ext2_sync(mp, waitfor, cred, p) /* * Write back each (modified) inode. */ - simple_lock(&mntvnode_slock); + gen = lwkt_gettoken(&mntvnode_token); loop: for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nvp) { /* * If the vnode that we are about to sync is no longer * associated with this mount point, start over. */ - if (vp->v_mount != mp) + if (vp->v_mount != mp) { + lwkt_gentoken(&mntvnode_token, &gen); + goto loop; + } + lwkt_gettoken(&vp->v_interlock); + if (lwkt_gentoken(&mntvnode_token, &gen)) { + lwkt_reltoken(&vp->v_interlock); goto loop; - simple_lock(&vp->v_interlock); + } nvp = TAILQ_NEXT(vp, v_nmntvnodes); ip = VTOI(vp); if (vp->v_type == VNON || ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 && (TAILQ_EMPTY(&vp->v_dirtyblkhd) || waitfor == MNT_LAZY))) { - simple_unlock(&vp->v_interlock); + lwkt_reltoken(&vp->v_interlock); continue; } - simple_unlock(&mntvnode_slock); error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, p); if (error) { - simple_lock(&mntvnode_slock); if (error == ENOENT) goto loop; continue; @@ -954,9 +964,9 @@ loop: allerror = error; VOP_UNLOCK(vp, 0, p); vrele(vp); - simple_lock(&mntvnode_slock); + lwkt_gentoken(&mntvnode_token, &gen); } - simple_unlock(&mntvnode_slock); + lwkt_reltoken(&mntvnode_token); /* * Force stale file system control information to be flushed. */ diff --git a/sys/vfs/hpfs/hpfs.h b/sys/vfs/hpfs/hpfs.h index 606e642f27..f8dfbbc4c6 100644 --- a/sys/vfs/hpfs/hpfs.h +++ b/sys/vfs/hpfs/hpfs.h @@ -24,7 +24,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/fs/hpfs/hpfs.h,v 1.1 1999/12/09 19:09:58 semenu Exp $ - * $DragonFly: src/sys/vfs/hpfs/hpfs.h,v 1.2 2003/06/17 04:28:33 dillon Exp $ + * $DragonFly: src/sys/vfs/hpfs/hpfs.h,v 1.3 2003/07/06 21:23:47 dillon Exp $ */ /*#define HPFS_DEBUG 10*/ @@ -336,9 +336,7 @@ struct hpfsmount { #define H_INVAL 0x0010 /* Invalid node */ struct hpfsnode { struct lock h_lock; /* Must be first, for std vops */ -#ifndef NULL_SIMPLELOCKS - struct simplelock h_interlock; -#endif + struct lwkt_token h_interlock; LIST_ENTRY(hpfsnode) h_hash; diff --git a/sys/vfs/hpfs/hpfs_hash.c b/sys/vfs/hpfs/hpfs_hash.c index 61e11ae383..e7fa3f4c12 100644 --- a/sys/vfs/hpfs/hpfs_hash.c +++ b/sys/vfs/hpfs/hpfs_hash.c @@ -32,7 +32,7 @@ * * @(#)ufs_ihash.c 8.7 (Berkeley) 5/17/95 * $FreeBSD: src/sys/fs/hpfs/hpfs_hash.c,v 1.1 1999/12/09 19:09:58 semenu Exp $ - * $DragonFly: src/sys/vfs/hpfs/hpfs_hash.c,v 1.2 2003/06/17 04:28:33 dillon Exp $ + * $DragonFly: src/sys/vfs/hpfs/hpfs_hash.c,v 1.3 2003/07/06 21:23:47 dillon Exp $ */ #include @@ -55,7 +55,7 @@ static LIST_HEAD(hphashhead, hpfsnode) *hpfs_hphashtbl; static u_long hpfs_hphash; /* size of hash table - 1 */ #define HPNOHASH(dev, lsn) (&hpfs_hphashtbl[(minor(dev) + (lsn)) & hpfs_hphash]) #ifndef NULL_SIMPLELOCKS -static struct simplelock hpfs_hphash_slock; +static struct lwkt_token hpfs_hphash_token; #endif struct lock hpfs_hphash_lock; @@ -69,7 +69,7 @@ hpfs_hphashinit() lockinit (&hpfs_hphash_lock, PINOD, "hpfs_hphashlock", 0, 0); hpfs_hphashtbl = HASHINIT(desiredvnodes, M_HPFSHASH, M_WAITOK, &hpfs_hphash); - simple_lock_init(&hpfs_hphash_slock); + lwkt_inittoken(&hpfs_hphash_token); } /* @@ -83,11 +83,11 @@ hpfs_hphashlookup(dev, ino) { struct hpfsnode *hp; - simple_lock(&hpfs_hphash_slock); + lwkt_gettoken(&hpfs_hphash_token); for (hp = HPNOHASH(dev, ino)->lh_first; hp; hp = hp->h_hash.le_next) if (ino == hp->h_no && dev == hp->h_dev) break; - simple_unlock(&hpfs_hphash_slock); + lwkt_reltoken(&hpfs_hphash_token); return (hp); } @@ -101,14 +101,14 @@ hpfs_hphashget(dev, ino) struct hpfsnode *hp; loop: - simple_lock(&hpfs_hphash_slock); + lwkt_gettoken(&hpfs_hphash_token); for (hp = HPNOHASH(dev, ino)->lh_first; hp; hp = hp->h_hash.le_next) { if (ino == hp->h_no && dev == hp->h_dev) { - LOCKMGR(&hp->h_intlock, LK_EXCLUSIVE | LK_INTERLOCK, &hpfs_hphash_slock, NULL); + LOCKMGR(&hp->h_intlock, LK_EXCLUSIVE | LK_INTERLOCK, &hpfs_hphash_token, NULL); return (hp); } } - simple_unlock(&hpfs_hphash_slock); + lwkt_reltoken(&hpfs_hphash_token); return (hp); } #endif @@ -121,20 +121,24 @@ hpfs_hphashvget(dev, ino, p) { struct hpfsnode *hp; struct vnode *vp; + int gen; + gen = lwkt_gettoken(&hpfs_hphash_token); loop: - simple_lock(&hpfs_hphash_slock); for (hp = HPNOHASH(dev, ino)->lh_first; hp; hp = hp->h_hash.le_next) { if (ino == hp->h_no && dev == hp->h_dev) { vp = HPTOV(hp); - simple_lock (&vp->v_interlock); - simple_unlock (&hpfs_hphash_slock); - if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) + lwkt_gettoken (&vp->v_interlock); + if (lwkt_gentoken(&hpfs_hphash_token, &gen)) goto loop; + if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) { + gen = lwkt_regettoken(&hpfs_hphash_token); + goto loop; + } return (vp); } } - simple_unlock(&hpfs_hphash_slock); + lwkt_reltoken(&hpfs_hphash_token); return (NULLVP); } @@ -147,11 +151,11 @@ hpfs_hphashins(hp) { struct hphashhead *hpp; - simple_lock(&hpfs_hphash_slock); + lwkt_gettoken(&hpfs_hphash_token); hpp = HPNOHASH(hp->h_dev, hp->h_no); hp->h_flag |= H_HASHED; LIST_INSERT_HEAD(hpp, hp, h_hash); - simple_unlock(&hpfs_hphash_slock); + lwkt_reltoken(&hpfs_hphash_token); } /* @@ -161,7 +165,7 @@ void hpfs_hphashrem(hp) struct hpfsnode *hp; { - simple_lock(&hpfs_hphash_slock); + lwkt_gettoken(&hpfs_hphash_token); if (hp->h_flag & H_HASHED) { hp->h_flag &= ~H_HASHED; LIST_REMOVE(hp, h_hash); @@ -170,5 +174,5 @@ hpfs_hphashrem(hp) hp->h_hash.le_prev = NULL; #endif } - simple_unlock(&hpfs_hphash_slock); + lwkt_reltoken(&hpfs_hphash_token); } diff --git a/sys/vfs/hpfs/hpfs_vfsops.c b/sys/vfs/hpfs/hpfs_vfsops.c index 80ea2dc148..e3081da870 100644 --- a/sys/vfs/hpfs/hpfs_vfsops.c +++ b/sys/vfs/hpfs/hpfs_vfsops.c @@ -24,7 +24,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/fs/hpfs/hpfs_vfsops.c,v 1.3.2.2 2001/12/25 01:44:45 dillon Exp $ - * $DragonFly: src/sys/vfs/hpfs/hpfs_vfsops.c,v 1.3 2003/06/26 05:55:12 dillon Exp $ + * $DragonFly: src/sys/vfs/hpfs/hpfs_vfsops.c,v 1.4 2003/07/06 21:23:47 dillon Exp $ */ @@ -668,7 +668,7 @@ hpfs_vget( if (ino == (ino_t)hpmp->hpm_su.su_rootfno) vp->v_flag |= VROOT; - simple_lock_init(&hp->h_interlock); + lwkt_inittoken(&hp->h_interlock); lockinit(&hp->h_lock, PINOD, "hpnode", VLKTIMEOUT, 0); hp->h_flag = H_INVAL; diff --git a/sys/vfs/hpfs/hpfs_vnops.c b/sys/vfs/hpfs/hpfs_vnops.c index b293ab84ac..67e9bde986 100644 --- a/sys/vfs/hpfs/hpfs_vnops.c +++ b/sys/vfs/hpfs/hpfs_vnops.c @@ -24,7 +24,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/fs/hpfs/hpfs_vnops.c,v 1.2.2.2 2002/01/15 18:35:09 semenu Exp $ - * $DragonFly: src/sys/vfs/hpfs/hpfs_vnops.c,v 1.3 2003/06/25 03:55:51 dillon Exp $ + * $DragonFly: src/sys/vfs/hpfs/hpfs_vnops.c,v 1.4 2003/07/06 21:23:47 dillon Exp $ */ #include @@ -651,7 +651,7 @@ hpfs_inactive(ap) if (hp->h_flag & H_INVAL) { VOP__UNLOCK(vp,0,ap->a_p); #if defined(__FreeBSD__) - vrecycle(vp, (struct simplelock *)0, ap->a_p); + vrecycle(vp, NULL, ap->a_p); #else /* defined(__NetBSD__) */ vgone(vp); #endif diff --git a/sys/vfs/isofs/cd9660/cd9660_node.c b/sys/vfs/isofs/cd9660/cd9660_node.c index 6e3c0f2eb0..03277986b6 100644 --- a/sys/vfs/isofs/cd9660/cd9660_node.c +++ b/sys/vfs/isofs/cd9660/cd9660_node.c @@ -37,7 +37,7 @@ * * @(#)cd9660_node.c 8.2 (Berkeley) 1/23/94 * $FreeBSD: src/sys/isofs/cd9660/cd9660_node.c,v 1.29.2.1 2000/07/08 14:35:56 bp Exp $ - * $DragonFly: src/sys/vfs/isofs/cd9660/cd9660_node.c,v 1.3 2003/06/25 03:55:56 dillon Exp $ + * $DragonFly: src/sys/vfs/isofs/cd9660/cd9660_node.c,v 1.4 2003/07/06 21:23:50 dillon Exp $ */ #include @@ -60,7 +60,7 @@ static struct iso_node **isohashtbl; static u_long isohash; #define INOHASH(device, inum) ((minor(device) + ((inum)>>12)) & isohash) #ifndef NULL_SIMPLELOCKS -static struct simplelock cd9660_ihash_slock; +static struct lwkt_token cd9660_ihash_token; #endif static void cd9660_ihashrem __P((struct iso_node *)); @@ -75,7 +75,7 @@ cd9660_init(vfsp) { isohashtbl = hashinit(desiredvnodes, M_ISOFSMNT, &isohash); - simple_lock_init(&cd9660_ihash_slock); + lwkt_inittoken(&cd9660_ihash_token); return (0); } @@ -104,18 +104,18 @@ cd9660_ihashget(dev, inum) struct vnode *vp; loop: - simple_lock(&cd9660_ihash_slock); + lwkt_gettoken(&cd9660_ihash_token); for (ip = isohashtbl[INOHASH(dev, inum)]; ip; ip = ip->i_next) { if (inum == ip->i_number && dev == ip->i_dev) { vp = ITOV(ip); - simple_lock(&vp->v_interlock); - simple_unlock(&cd9660_ihash_slock); + lwkt_gettoken(&vp->v_interlock); /* YYY */ + lwkt_reltoken(&cd9660_ihash_token); if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) goto loop; return (vp); } } - simple_unlock(&cd9660_ihash_slock); + lwkt_reltoken(&cd9660_ihash_token); return (NULL); } @@ -128,16 +128,16 @@ cd9660_ihashins(struct iso_node *ip) struct thread *td = curthread; /* XXX */ struct iso_node **ipp, *iq; - simple_lock(&cd9660_ihash_slock); + lwkt_gettoken(&cd9660_ihash_token); ipp = &isohashtbl[INOHASH(ip->i_dev, ip->i_number)]; if ((iq = *ipp) != NULL) iq->i_prev = &ip->i_next; ip->i_next = iq; ip->i_prev = ipp; *ipp = ip; - simple_unlock(&cd9660_ihash_slock); + lwkt_reltoken(&cd9660_ihash_token); - lockmgr(&ip->i_lock, LK_EXCLUSIVE, (struct simplelock *)0, td); + lockmgr(&ip->i_lock, LK_EXCLUSIVE, NULL, td); } /* @@ -149,7 +149,7 @@ cd9660_ihashrem(ip) { register struct iso_node *iq; - simple_lock(&cd9660_ihash_slock); + lwkt_gettoken(&cd9660_ihash_token); if ((iq = ip->i_next) != NULL) iq->i_prev = ip->i_prev; *ip->i_prev = iq; @@ -157,7 +157,7 @@ cd9660_ihashrem(ip) ip->i_next = NULL; ip->i_prev = NULL; #endif - simple_unlock(&cd9660_ihash_slock); + lwkt_reltoken(&cd9660_ihash_token); } /* @@ -186,7 +186,7 @@ cd9660_inactive(ap) * so that it can be reused immediately. */ if (ip->inode.iso_mode == 0) - vrecycle(vp, (struct simplelock *)0, td); + vrecycle(vp, NULL, td); return error; } diff --git a/sys/vfs/mfs/mfs_vfsops.c b/sys/vfs/mfs/mfs_vfsops.c index f921694650..6f62d6dc59 100644 --- a/sys/vfs/mfs/mfs_vfsops.c +++ b/sys/vfs/mfs/mfs_vfsops.c @@ -32,7 +32,7 @@ * * @(#)mfs_vfsops.c 8.11 (Berkeley) 6/19/95 * $FreeBSD: src/sys/ufs/mfs/mfs_vfsops.c,v 1.81.2.3 2001/07/04 17:35:21 tegge Exp $ - * $DragonFly: src/sys/vfs/mfs/mfs_vfsops.c,v 1.4 2003/06/25 03:56:12 dillon Exp $ + * $DragonFly: src/sys/vfs/mfs/mfs_vfsops.c,v 1.5 2003/07/06 21:23:55 dillon Exp $ */ @@ -331,6 +331,7 @@ mfs_start(struct mount *mp, int flags, struct thread *td) * and the system can loop trying to kill the unkillable ( while * references exist ) MFS process when swap space is low. */ + KKASSERT(curproc); PHOLD(curproc); while (mfsp->mfs_active) { diff --git a/sys/vfs/msdosfs/msdosfs_denode.c b/sys/vfs/msdosfs/msdosfs_denode.c index 297a3d5fbc..8d9ef30c57 100644 --- a/sys/vfs/msdosfs/msdosfs_denode.c +++ b/sys/vfs/msdosfs/msdosfs_denode.c @@ -1,5 +1,5 @@ /* $FreeBSD: src/sys/msdosfs/msdosfs_denode.c,v 1.47.2.3 2002/08/22 16:20:15 trhodes Exp $ */ -/* $DragonFly: src/sys/vfs/msdosfs/msdosfs_denode.c,v 1.4 2003/06/26 05:55:17 dillon Exp $ */ +/* $DragonFly: src/sys/vfs/msdosfs/msdosfs_denode.c,v 1.5 2003/07/06 21:23:52 dillon Exp $ */ /* $NetBSD: msdosfs_denode.c,v 1.28 1998/02/10 14:10:00 mrg Exp $ */ /*- @@ -73,9 +73,7 @@ static struct denode **dehashtbl; static u_long dehash; /* size of hash table - 1 */ #define DEHASH(dev, dcl, doff) (dehashtbl[(minor(dev) + (dcl) + (doff) / \ sizeof(struct direntry)) & dehash]) -#ifndef NULL_SIMPLELOCKS -static struct simplelock dehash_slock; -#endif +static struct lwkt_token dehash_token; union _qcvt { quad_t qcvt; @@ -106,7 +104,7 @@ msdosfs_init(vfsp) struct vfsconf *vfsp; { dehashtbl = hashinit(desiredvnodes/2, M_MSDOSFSMNT, &dehash); - simple_lock_init(&dehash_slock); + lwkt_inittoken(&dehash_token); return (0); } @@ -131,21 +129,21 @@ msdosfs_hashget(dev, dirclust, diroff) struct vnode *vp; loop: - simple_lock(&dehash_slock); + lwkt_gettoken(&dehash_token); for (dep = DEHASH(dev, dirclust, diroff); dep; dep = dep->de_next) { if (dirclust == dep->de_dirclust && diroff == dep->de_diroffset && dev == dep->de_dev && dep->de_refcnt != 0) { vp = DETOV(dep); - simple_lock(&vp->v_interlock); - simple_unlock(&dehash_slock); + lwkt_gettoken(&vp->v_interlock); + lwkt_reltoken(&dehash_token); if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) goto loop; return (dep); } } - simple_unlock(&dehash_slock); + lwkt_reltoken(&dehash_token); return (NULL); } @@ -155,7 +153,7 @@ msdosfs_hashins(dep) { struct denode **depp, *deq; - simple_lock(&dehash_slock); + lwkt_gettoken(&dehash_token); depp = &DEHASH(dep->de_dev, dep->de_dirclust, dep->de_diroffset); deq = *depp; if (deq) @@ -163,7 +161,7 @@ msdosfs_hashins(dep) dep->de_next = deq; dep->de_prev = depp; *depp = dep; - simple_unlock(&dehash_slock); + lwkt_reltoken(&dehash_token); } static void @@ -172,7 +170,7 @@ msdosfs_hashrem(dep) { struct denode *deq; - simple_lock(&dehash_slock); + lwkt_gettoken(&dehash_token); deq = dep->de_next; if (deq) deq->de_prev = dep->de_prev; @@ -181,7 +179,7 @@ msdosfs_hashrem(dep) dep->de_next = NULL; dep->de_prev = NULL; #endif - simple_unlock(&dehash_slock); + lwkt_reltoken(&dehash_token); } /* @@ -278,7 +276,7 @@ deget(pmp, dirclust, diroffset, depp) * of at the start of msdosfs_hashins() so that reinsert() can * call msdosfs_hashins() with a locked denode. */ - if (lockmgr(&ldep->de_lock, LK_EXCLUSIVE, (struct simplelock *)0, td)) + if (lockmgr(&ldep->de_lock, LK_EXCLUSIVE, NULL, td)) panic("deget: unexpected lock failure"); /* @@ -724,6 +722,6 @@ out: dep->de_Name[0]); #endif if (dep->de_Name[0] == SLOT_DELETED) - vrecycle(vp, (struct simplelock *)0, ap->a_td); + vrecycle(vp, NULL, ap->a_td); return (error); } diff --git a/sys/vfs/msdosfs/msdosfs_vfsops.c b/sys/vfs/msdosfs/msdosfs_vfsops.c index 9322d50bab..54a0daf1da 100644 --- a/sys/vfs/msdosfs/msdosfs_vfsops.c +++ b/sys/vfs/msdosfs/msdosfs_vfsops.c @@ -1,5 +1,5 @@ /* $FreeBSD: src/sys/msdosfs/msdosfs_vfsops.c,v 1.60.2.6 2002/09/12 21:33:38 trhodes Exp $ */ -/* $DragonFly: src/sys/vfs/msdosfs/msdosfs_vfsops.c,v 1.4 2003/06/26 05:55:17 dillon Exp $ */ +/* $DragonFly: src/sys/vfs/msdosfs/msdosfs_vfsops.c,v 1.5 2003/07/06 21:23:52 dillon Exp $ */ /* $NetBSD: msdosfs_vfsops.c,v 1.51 1997/11/17 15:36:58 ws Exp $ */ /*- @@ -856,6 +856,7 @@ msdosfs_sync(mp, waitfor, td) struct denode *dep; struct msdosfsmount *pmp = VFSTOMSDOSFS(mp); int error, allerror = 0; + int gen; /* * If we ever switch to not updating all of the fats all the time, @@ -870,31 +871,39 @@ msdosfs_sync(mp, waitfor, td) } /* * Write back each (modified) denode. + * + * YYY gen number handling needs more work. */ - simple_lock(&mntvnode_slock); + gen = lwkt_gettoken(&mntvnode_token); loop: for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nvp) { + lwkt_gettoken(&vp->v_interlock); /* * If the vnode that we are about to sync is no longer - * associated with this mount point, start over. + * associated with this mount point, start over. If + * we lost the mntvnode token, start over. */ - if (vp->v_mount != mp) + if (vp->v_mount != mp) { + lwkt_reltoken(&vp->v_interlock); goto loop; - - simple_lock(&vp->v_interlock); + } + if (lwkt_gentoken(&mntvnode_token, &gen) != 0) { + lwkt_reltoken(&vp->v_interlock); + goto loop; + } nvp = TAILQ_NEXT(vp, v_nmntvnodes); dep = VTODE(vp); if (vp->v_type == VNON || ((dep->de_flag & (DE_ACCESS | DE_CREATE | DE_UPDATE | DE_MODIFIED)) == 0 && (TAILQ_EMPTY(&vp->v_dirtyblkhd) || waitfor == MNT_LAZY))) { - simple_unlock(&vp->v_interlock); + lwkt_reltoken(&vp->v_interlock); continue; } - simple_unlock(&mntvnode_slock); + lwkt_reltoken(&mntvnode_token); error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, td); if (error) { - simple_lock(&mntvnode_slock); + lwkt_gettoken(&mntvnode_token); if (error == ENOENT) goto loop; continue; @@ -904,9 +913,9 @@ loop: allerror = error; VOP_UNLOCK(vp, 0, td); vrele(vp); - simple_lock(&mntvnode_slock); + lwkt_gettoken(&mntvnode_token); } - simple_unlock(&mntvnode_slock); + lwkt_reltoken(&mntvnode_token); /* * Flush filesystem control info. diff --git a/sys/vfs/msdosfs/msdosfs_vnops.c b/sys/vfs/msdosfs/msdosfs_vnops.c index ccc4cb96f8..8e2c131ab3 100644 --- a/sys/vfs/msdosfs/msdosfs_vnops.c +++ b/sys/vfs/msdosfs/msdosfs_vnops.c @@ -1,5 +1,5 @@ /* $FreeBSD: src/sys/msdosfs/msdosfs_vnops.c,v 1.95.2.4 2003/06/13 15:05:47 trhodes Exp $ */ -/* $DragonFly: src/sys/vfs/msdosfs/msdosfs_vnops.c,v 1.6 2003/06/26 05:55:17 dillon Exp $ */ +/* $DragonFly: src/sys/vfs/msdosfs/msdosfs_vnops.c,v 1.7 2003/07/06 21:23:52 dillon Exp $ */ /* $NetBSD: msdosfs_vnops.c,v 1.68 1998/02/10 14:10:04 mrg Exp $ */ /*- @@ -234,12 +234,15 @@ msdosfs_close(ap) struct denode *dep = VTODE(vp); struct timespec ts; - simple_lock(&vp->v_interlock); + lwkt_gettoken(&vp->v_interlock); if (vp->v_usecount > 1) { getnanotime(&ts); - DETIMES(dep, &ts, &ts, &ts); + lwkt_regettoken(&vp->v_interlock); + if (vp->v_usecount > 1) { + DETIMES(dep, &ts, &ts, &ts); + } } - simple_unlock(&vp->v_interlock); + lwkt_reltoken(&vp->v_interlock); return 0; } diff --git a/sys/vfs/nfs/nfs_nqlease.c b/sys/vfs/nfs/nfs_nqlease.c index 4e3e0f6b38..c58c975bb3 100644 --- a/sys/vfs/nfs/nfs_nqlease.c +++ b/sys/vfs/nfs/nfs_nqlease.c @@ -35,7 +35,7 @@ * * @(#)nfs_nqlease.c 8.9 (Berkeley) 5/20/95 * $FreeBSD: src/sys/nfs/nfs_nqlease.c,v 1.50 2000/02/13 03:32:05 peter Exp $ - * $DragonFly: src/sys/vfs/nfs/Attic/nfs_nqlease.c,v 1.4 2003/06/26 05:55:18 dillon Exp $ + * $DragonFly: src/sys/vfs/nfs/Attic/nfs_nqlease.c,v 1.5 2003/07/06 21:23:53 dillon Exp $ */ @@ -1185,9 +1185,9 @@ nqnfs_lease_updatetime(int deltat) * Search the mount list for all nqnfs mounts and do their timer * queues. */ - simple_lock(&mountlist_slock); + lwkt_gettoken(&mountlist_token); for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nxtmp) { - if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, td)) { + if (vfs_busy(mp, LK_NOWAIT, &mountlist_token, td)) { nxtmp = TAILQ_NEXT(mp, mnt_list); continue; } @@ -1201,11 +1201,11 @@ nqnfs_lease_updatetime(int deltat) } } } - simple_lock(&mountlist_slock); + lwkt_gettoken(&mountlist_token); nxtmp = TAILQ_NEXT(mp, mnt_list); vfs_unbusy(mp, td); } - simple_unlock(&mountlist_slock); + lwkt_reltoken(&mountlist_token); } #ifndef NFS_NOSERVER diff --git a/sys/vfs/ntfs/ntfs_ihash.c b/sys/vfs/ntfs/ntfs_ihash.c index 0adc039f5f..9569f5efb7 100644 --- a/sys/vfs/ntfs/ntfs_ihash.c +++ b/sys/vfs/ntfs/ntfs_ihash.c @@ -34,7 +34,7 @@ * * @(#)ufs_ihash.c 8.7 (Berkeley) 5/17/95 * $FreeBSD: src/sys/ntfs/ntfs_ihash.c,v 1.7 1999/12/03 20:37:39 semenu Exp $ - * $DragonFly: src/sys/vfs/ntfs/ntfs_ihash.c,v 1.2 2003/06/17 04:28:54 dillon Exp $ + * $DragonFly: src/sys/vfs/ntfs/ntfs_ihash.c,v 1.3 2003/07/06 21:23:53 dillon Exp $ */ #include @@ -58,9 +58,7 @@ MALLOC_DEFINE(M_NTFSNTHASH, "NTFS nthash", "NTFS ntnode hash tables"); static LIST_HEAD(nthashhead, ntnode) *ntfs_nthashtbl; static u_long ntfs_nthash; /* size of hash table - 1 */ #define NTNOHASH(device, inum) (&ntfs_nthashtbl[(minor(device) + (inum)) & ntfs_nthash]) -#ifndef NULL_SIMPLELOCKS -static struct simplelock ntfs_nthash_slock; -#endif +static struct lwkt_token ntfs_nthash_slock; struct lock ntfs_hashlock; /* @@ -72,7 +70,7 @@ ntfs_nthashinit() lockinit(&ntfs_hashlock, PINOD, "ntfs_nthashlock", 0, 0); ntfs_nthashtbl = HASHINIT(desiredvnodes, M_NTFSNTHASH, M_WAITOK, &ntfs_nthash); - simple_lock_init(&ntfs_nthash_slock); + lwkt_inittoken(&ntfs_nthash_slock); } /* @@ -86,11 +84,11 @@ ntfs_nthashlookup(dev, inum) { struct ntnode *ip; - simple_lock(&ntfs_nthash_slock); + lwkt_gettoken(&ntfs_nthash_slock); for (ip = NTNOHASH(dev, inum)->lh_first; ip; ip = ip->i_hash.le_next) if (inum == ip->i_number && dev == ip->i_dev) break; - simple_unlock(&ntfs_nthash_slock); + lwkt_reltoken(&ntfs_nthash_slock); return (ip); } @@ -104,11 +102,11 @@ ntfs_nthashins(ip) { struct nthashhead *ipp; - simple_lock(&ntfs_nthash_slock); + lwkt_gettoken(&ntfs_nthash_slock); ipp = NTNOHASH(ip->i_dev, ip->i_number); LIST_INSERT_HEAD(ipp, ip, i_hash); ip->i_flag |= IN_HASHED; - simple_unlock(&ntfs_nthash_slock); + lwkt_reltoken(&ntfs_nthash_slock); } /* @@ -118,7 +116,7 @@ void ntfs_nthashrem(ip) struct ntnode *ip; { - simple_lock(&ntfs_nthash_slock); + lwkt_gettoken(&ntfs_nthash_slock); if (ip->i_flag & IN_HASHED) { ip->i_flag &= ~IN_HASHED; LIST_REMOVE(ip, i_hash); @@ -127,5 +125,5 @@ ntfs_nthashrem(ip) ip->i_hash.le_prev = NULL; #endif } - simple_unlock(&ntfs_nthash_slock); + lwkt_reltoken(&ntfs_nthash_slock); } diff --git a/sys/vfs/ntfs/ntfs_inode.h b/sys/vfs/ntfs/ntfs_inode.h index 520610d2a8..f4bfc164b1 100644 --- a/sys/vfs/ntfs/ntfs_inode.h +++ b/sys/vfs/ntfs/ntfs_inode.h @@ -26,7 +26,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/ntfs/ntfs_inode.h,v 1.6 1999/12/03 20:37:39 semenu Exp $ - * $DragonFly: src/sys/vfs/ntfs/ntfs_inode.h,v 1.2 2003/06/17 04:28:54 dillon Exp $ + * $DragonFly: src/sys/vfs/ntfs/ntfs_inode.h,v 1.3 2003/07/06 21:23:53 dillon Exp $ */ /* These flags are kept in i_flag. */ @@ -70,7 +70,7 @@ struct ntnode { /* locking */ struct lock i_lock; - struct simplelock i_interlock; + struct lwkt_token i_interlock; int i_usecount; LIST_HEAD(,fnode) i_fnlist; diff --git a/sys/vfs/ntfs/ntfs_subr.c b/sys/vfs/ntfs/ntfs_subr.c index c440190a1c..052e7a6613 100644 --- a/sys/vfs/ntfs/ntfs_subr.c +++ b/sys/vfs/ntfs/ntfs_subr.c @@ -26,7 +26,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/ntfs/ntfs_subr.c,v 1.7.2.4 2001/10/12 22:08:49 semenu Exp $ - * $DragonFly: src/sys/vfs/ntfs/ntfs_subr.c,v 1.4 2003/06/26 05:55:18 dillon Exp $ + * $DragonFly: src/sys/vfs/ntfs/ntfs_subr.c,v 1.5 2003/07/06 21:23:53 dillon Exp $ */ #include @@ -359,7 +359,7 @@ ntfs_ntget(ip) dprintf(("ntfs_ntget: get ntnode %d: %p, usecount: %d\n", ip->i_number, ip, ip->i_usecount)); - simple_lock(&ip->i_interlock); + lwkt_gettoken(&ip->i_interlock); ip->i_usecount++; LOCKMGR(&ip->i_lock, LK_EXCLUSIVE | LK_INTERLOCK, &ip->i_interlock); @@ -409,7 +409,7 @@ ntfs_ntlookup( /* init lock and lock the newborn ntnode */ lockinit(&ip->i_lock, PINOD, "ntnode", 0, LK_EXCLUSIVE); - simple_lock_init(&ip->i_interlock); + lwkt_inittoken(&ip->i_interlock); ntfs_ntget(ip); ntfs_nthashins(ip); @@ -439,7 +439,7 @@ ntfs_ntput(ip) dprintf(("ntfs_ntput: rele ntnode %d: %p, usecount: %d\n", ip->i_number, ip, ip->i_usecount)); - simple_lock(&ip->i_interlock); + lwkt_gettoken(&ip->i_interlock); ip->i_usecount--; #ifdef DIAGNOSTIC @@ -477,9 +477,9 @@ void ntfs_ntref(ip) struct ntnode *ip; { - simple_lock(&ip->i_interlock); + lwkt_gettoken(&ip->i_interlock); ip->i_usecount++; - simple_unlock(&ip->i_interlock); + lwkt_reltoken(&ip->i_interlock); dprintf(("ntfs_ntref: ino %d, usecount: %d\n", ip->i_number, ip->i_usecount)); @@ -496,13 +496,13 @@ ntfs_ntrele(ip) dprintf(("ntfs_ntrele: rele ntnode %d: %p, usecount: %d\n", ip->i_number, ip, ip->i_usecount)); - simple_lock(&ip->i_interlock); + lwkt_gettoken(&ip->i_interlock); ip->i_usecount--; if (ip->i_usecount < 0) panic("ntfs_ntrele: ino: %d usecount: %d \n", ip->i_number,ip->i_usecount); - simple_unlock(&ip->i_interlock); + lwkt_reltoken(&ip->i_interlock); } /* diff --git a/sys/vfs/ntfs/ntfs_vfsops.c b/sys/vfs/ntfs/ntfs_vfsops.c index 7389f77e78..6dd3b9e0ac 100644 --- a/sys/vfs/ntfs/ntfs_vfsops.c +++ b/sys/vfs/ntfs/ntfs_vfsops.c @@ -26,7 +26,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/ntfs/ntfs_vfsops.c,v 1.20.2.5 2001/12/25 01:44:45 dillon Exp $ - * $DragonFly: src/sys/vfs/ntfs/ntfs_vfsops.c,v 1.4 2003/06/26 05:55:18 dillon Exp $ + * $DragonFly: src/sys/vfs/ntfs/ntfs_vfsops.c,v 1.5 2003/07/06 21:23:53 dillon Exp $ */ @@ -197,9 +197,9 @@ ntfs_mountroot() return (error); } - simple_lock(&mountlist_slock); + lwkt_gettoken(&mountlist_token); CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list); - simple_unlock(&mountlist_slock); + lwkt_reltoken(&mountlist_token); (void)ntfs_statfs(mp, &mp->mnt_stat, td); vfs_unbusy(mp); return (0); diff --git a/sys/vfs/nullfs/null_vnops.c b/sys/vfs/nullfs/null_vnops.c index b3f0bedc11..4bcbef8237 100644 --- a/sys/vfs/nullfs/null_vnops.c +++ b/sys/vfs/nullfs/null_vnops.c @@ -38,7 +38,7 @@ * Ancestors: * @(#)lofs_vnops.c 1.2 (Berkeley) 6/18/92 * $FreeBSD: src/sys/miscfs/nullfs/null_vnops.c,v 1.38.2.6 2002/07/31 00:32:28 semenu Exp $ - * $DragonFly: src/sys/vfs/nullfs/null_vnops.c,v 1.4 2003/06/26 05:55:15 dillon Exp $ + * $DragonFly: src/sys/vfs/nullfs/null_vnops.c,v 1.5 2003/07/06 21:23:52 dillon Exp $ * ...and... * @(#)null_vnodeops.c 1.20 92/07/07 UCLA Ficus project * @@ -589,7 +589,7 @@ null_lock(ap) if (vp->v_vnlock != NULL) { /* lock is shared across layers */ if (flags & LK_INTERLOCK) - simple_unlock(&vp->v_interlock); + lwkt_reltoken(&vp->v_interlock); return 0; } error = lockmgr(&np->null_lock, flags & ~LK_THISLAYER, diff --git a/sys/vfs/nwfs/nwfs_node.c b/sys/vfs/nwfs/nwfs_node.c index bbc0025bba..dacba8d3c8 100644 --- a/sys/vfs/nwfs/nwfs_node.c +++ b/sys/vfs/nwfs/nwfs_node.c @@ -30,7 +30,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/nwfs/nwfs_node.c,v 1.3.2.8 2001/12/25 01:44:45 dillon Exp $ - * $DragonFly: src/sys/vfs/nwfs/nwfs_node.c,v 1.4 2003/06/26 05:55:19 dillon Exp $ + * $DragonFly: src/sys/vfs/nwfs/nwfs_node.c,v 1.5 2003/07/06 21:23:54 dillon Exp $ */ #include #include @@ -143,13 +143,14 @@ nwfs_allocvp(struct mount *mp, ncpfid fid, struct vnode **vpp) struct nwmount *nmp = VFSTONWFS(mp); struct vnode *vp; int error; + int gen; loop: lockmgr(&nwhashlock, LK_EXCLUSIVE, NULL, td); rescan: if (nwfs_hashlookup(nmp, fid, &np) == 0) { vp = NWTOV(np); - simple_lock(&vp->v_interlock); + gen = lwkt_gettoken(&vp->v_interlock); lockmgr(&nwhashlock, LK_RELEASE, NULL, td); if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) goto loop; diff --git a/sys/vfs/nwfs/nwfs_vnops.c b/sys/vfs/nwfs/nwfs_vnops.c index cad9d62932..af52df8ff8 100644 --- a/sys/vfs/nwfs/nwfs_vnops.c +++ b/sys/vfs/nwfs/nwfs_vnops.c @@ -30,7 +30,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/nwfs/nwfs_vnops.c,v 1.6.2.3 2001/03/14 11:26:59 bp Exp $ - * $DragonFly: src/sys/vfs/nwfs/nwfs_vnops.c,v 1.4 2003/06/26 05:55:19 dillon Exp $ + * $DragonFly: src/sys/vfs/nwfs/nwfs_vnops.c,v 1.5 2003/07/06 21:23:54 dillon Exp $ */ #include #include @@ -248,24 +248,25 @@ nwfs_close(ap) if (vp->v_type == VDIR) return 0; /* nothing to do now */ error = 0; - simple_lock(&vp->v_interlock); + lwkt_gettoken(&vp->v_interlock); if (np->opened == 0) { - simple_unlock(&vp->v_interlock); + lwkt_reltoken(&vp->v_interlock); return 0; } - simple_unlock(&vp->v_interlock); + lwkt_reltoken(&vp->v_interlock); error = nwfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1); - simple_lock(&vp->v_interlock); + lwkt_gettoken(&vp->v_interlock); if (np->opened == 0) { - simple_unlock(&vp->v_interlock); + lwkt_reltoken(&vp->v_interlock); return 0; } if (--np->opened == 0) { - simple_unlock(&vp->v_interlock); + lwkt_reltoken(&vp->v_interlock); error = ncp_close_file(NWFSTOCONN(VTONWFS(vp)), &np->n_fh, ap->a_td, proc0.p_ucred); - } else - simple_unlock(&vp->v_interlock); + } else { + lwkt_reltoken(&vp->v_interlock); + } np->n_atime = 0; return (error); } diff --git a/sys/vfs/smbfs/smbfs.h b/sys/vfs/smbfs/smbfs.h index 2e8f3d7350..a555cdccba 100644 --- a/sys/vfs/smbfs/smbfs.h +++ b/sys/vfs/smbfs/smbfs.h @@ -30,7 +30,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/fs/smbfs/smbfs.h,v 1.2.2.2 2003/01/17 08:20:26 tjr Exp $ - * $DragonFly: src/sys/vfs/smbfs/smbfs.h,v 1.4 2003/06/26 05:55:12 dillon Exp $ + * $DragonFly: src/sys/vfs/smbfs/smbfs.h,v 1.5 2003/07/06 21:23:48 dillon Exp $ */ #ifndef _SMBFS_SMBFS_H_ #define _SMBFS_SMBFS_H_ @@ -86,7 +86,7 @@ struct smbmount { int sm_flags; long sm_nextino; struct smb_share * sm_share; -/* struct simplelock sm_npslock;*/ +/* struct lwkt_token sm_npslock;*/ struct smbnode * sm_npstack[SMBFS_MAXPATHCOMP]; int sm_caseopt; struct lock sm_hashlock; diff --git a/sys/vfs/ufs/ffs_vfsops.c b/sys/vfs/ufs/ffs_vfsops.c index 2ad36a37dc..bd7d0d6181 100644 --- a/sys/vfs/ufs/ffs_vfsops.c +++ b/sys/vfs/ufs/ffs_vfsops.c @@ -32,7 +32,7 @@ * * @(#)ffs_vfsops.c 8.31 (Berkeley) 5/20/95 * $FreeBSD: src/sys/ufs/ffs/ffs_vfsops.c,v 1.117.2.10 2002/06/23 22:34:52 iedowse Exp $ - * $DragonFly: src/sys/vfs/ufs/ffs_vfsops.c,v 1.5 2003/07/03 18:35:27 dillon Exp $ + * $DragonFly: src/sys/vfs/ufs/ffs_vfsops.c,v 1.6 2003/07/06 21:23:55 dillon Exp $ */ #include "opt_quota.h" @@ -430,10 +430,10 @@ ffs_reload(struct mount *mp, struct ucred *cred, struct thread *td) struct partinfo dpart; dev_t dev; int i, blks, size, error; + int gen; + int vgen; int32_t *lp; - KKASSERT(td->td_proc && td->td_proc->p_ucred == cred); - if ((mp->mnt_flag & MNT_RDONLY) == 0) return (EINVAL); /* @@ -455,7 +455,7 @@ ffs_reload(struct mount *mp, struct ucred *cred, struct thread *td) if (devvp->v_tag != VT_MFS && vn_isdisk(devvp, NULL)) { vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td); vfs_object_create(devvp, td); - simple_lock(&devvp->v_interlock); + lwkt_gettoken(&devvp->v_interlock); VOP_UNLOCK(devvp, LK_INTERLOCK, td); } @@ -520,25 +520,33 @@ ffs_reload(struct mount *mp, struct ucred *cred, struct thread *td) *lp++ = fs->fs_contigsumsize; } + gen = lwkt_gettoken(&mntvnode_token); loop: - simple_lock(&mntvnode_slock); for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nvp) { if (vp->v_mount != mp) { - simple_unlock(&mntvnode_slock); + lwkt_gentoken(&mntvnode_token, &gen); goto loop; } nvp = TAILQ_NEXT(vp, v_nmntvnodes); /* - * Step 4: invalidate all inactive vnodes. + * Step 4: invalidate all inactive vnodes. */ - if (vrecycle(vp, &mntvnode_slock, td)) + if (vrecycle(vp, NULL, td)) { + lwkt_gentoken(&mntvnode_token, &gen); goto loop; + } /* * Step 5: invalidate all cached file data. */ - simple_lock(&vp->v_interlock); - simple_unlock(&mntvnode_slock); + vgen = lwkt_gettoken(&vp->v_interlock); + if (lwkt_gentoken(&mntvnode_token, &gen) != 0 || + lwkt_gentoken(&vp->v_interlock, &vgen) != 0) { + lwkt_reltoken(&vp->v_interlock); + lwkt_gentoken(&mntvnode_token, &gen); + goto loop; + } if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) { + lwkt_gentoken(&mntvnode_token, &gen); goto loop; } if (vinvalbuf(vp, 0, td, 0, 0)) @@ -559,9 +567,8 @@ loop: ip->i_effnlink = ip->i_nlink; brelse(bp); vput(vp); - simple_lock(&mntvnode_slock); } - simple_unlock(&mntvnode_slock); + lwkt_reltoken(&mntvnode_token); return (0); } @@ -616,7 +623,7 @@ ffs_mountfs(devvp, mp, td, malloctype) if (devvp->v_tag != VT_MFS && vn_isdisk(devvp, NULL)) { vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td); vfs_object_create(devvp, td); - simple_lock(&devvp->v_interlock); + lwkt_gettoken(&devvp->v_interlock); VOP_UNLOCK(devvp, LK_INTERLOCK, td); } @@ -874,10 +881,6 @@ ffs_flushfiles(struct mount *mp, int flags, struct thread *td) { struct ufsmount *ump; int error; - struct ucred *cred; - - KKASSERT(td->td_proc); - cred = td->td_proc->p_ucred; ump = VFSTOUFS(mp); #ifdef QUOTA @@ -966,7 +969,7 @@ ffs_sync(struct mount *mp, int waitfor, struct thread *td) /* * Write back each (modified) inode. */ - simple_lock(&mntvnode_slock); + lwkt_gettoken(&mntvnode_token); loop: for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nvp) { /* @@ -977,7 +980,7 @@ loop: goto loop; /* - * Depend on the mntvnode_slock to keep things stable enough + * Depend on the mntvnode_token to keep things stable enough * for a quick test. Since there might be hundreds of * thousands of vnodes, we cannot afford even a subroutine * call unless there's a good chance that we have work to do. @@ -990,10 +993,10 @@ loop: continue; } if (vp->v_type != VCHR) { - simple_unlock(&mntvnode_slock); + lwkt_reltoken(&mntvnode_token); error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT, td); if (error) { - simple_lock(&mntvnode_slock); + lwkt_gettoken(&mntvnode_token); if (error == ENOENT) goto loop; } else { @@ -1001,7 +1004,7 @@ loop: allerror = error; VOP_UNLOCK(vp, 0, td); vrele(vp); - simple_lock(&mntvnode_slock); + lwkt_gettoken(&mntvnode_token); } } else { /* @@ -1011,16 +1014,16 @@ loop: * we holding a vnode lock? */ VREF(vp); - simple_unlock(&mntvnode_slock); + lwkt_reltoken(&mntvnode_token); /* UFS_UPDATE(vp, waitfor == MNT_WAIT); */ UFS_UPDATE(vp, 0); vrele(vp); - simple_lock(&mntvnode_slock); + lwkt_gettoken(&mntvnode_token); } if (TAILQ_NEXT(vp, v_nmntvnodes) != nvp) goto loop; } - simple_unlock(&mntvnode_slock); + lwkt_reltoken(&mntvnode_token); /* * Force stale file system control information to be flushed. */ diff --git a/sys/vfs/ufs/ufs_ihash.c b/sys/vfs/ufs/ufs_ihash.c index f3b70e4ee1..91b269c3cc 100644 --- a/sys/vfs/ufs/ufs_ihash.c +++ b/sys/vfs/ufs/ufs_ihash.c @@ -32,7 +32,7 @@ * * @(#)ufs_ihash.c 8.7 (Berkeley) 5/17/95 * $FreeBSD: src/sys/ufs/ufs/ufs_ihash.c,v 1.20 1999/08/28 00:52:29 peter Exp $ - * $DragonFly: src/sys/vfs/ufs/ufs_ihash.c,v 1.3 2003/06/25 03:56:12 dillon Exp $ + * $DragonFly: src/sys/vfs/ufs/ufs_ihash.c,v 1.4 2003/07/06 21:23:55 dillon Exp $ */ #include @@ -54,9 +54,7 @@ static MALLOC_DEFINE(M_UFSIHASH, "UFS ihash", "UFS Inode hash tables"); static LIST_HEAD(ihashhead, inode) *ihashtbl; static u_long ihash; /* size of hash table - 1 */ #define INOHASH(device, inum) (&ihashtbl[(minor(device) + (inum)) & ihash]) -#ifndef NULL_SIMPLELOCKS -static struct simplelock ufs_ihash_slock; -#endif +static struct lwkt_token ufs_ihash_token; /* * Initialize inode hash table. @@ -64,9 +62,8 @@ static struct simplelock ufs_ihash_slock; void ufs_ihashinit() { - ihashtbl = hashinit(desiredvnodes, M_UFSIHASH, &ihash); - simple_lock_init(&ufs_ihash_slock); + lwkt_inittoken(&ufs_ihash_token); } /* @@ -80,11 +77,11 @@ ufs_ihashlookup(dev, inum) { struct inode *ip; - simple_lock(&ufs_ihash_slock); + lwkt_gettoken(&ufs_ihash_token); for (ip = INOHASH(dev, inum)->lh_first; ip; ip = ip->i_hash.le_next) if (inum == ip->i_number && dev == ip->i_dev) break; - simple_unlock(&ufs_ihash_slock); + lwkt_reltoken(&ufs_ihash_token); if (ip) return (ITOV(ip)); @@ -101,20 +98,27 @@ ufs_ihashget(dev_t dev, ino_t inum) struct thread *td = curthread; /* XXX */ struct inode *ip; struct vnode *vp; + int gen; + int vgen; + gen = lwkt_gettoken(&ufs_ihash_token); loop: - simple_lock(&ufs_ihash_slock); for (ip = INOHASH(dev, inum)->lh_first; ip; ip = ip->i_hash.le_next) { if (inum == ip->i_number && dev == ip->i_dev) { vp = ITOV(ip); - simple_lock(&vp->v_interlock); - simple_unlock(&ufs_ihash_slock); - if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) + vgen = lwkt_gettoken(&vp->v_interlock); + if (lwkt_gentoken(&ufs_ihash_token, &gen) != 0) { + lwkt_reltoken(&vp->v_interlock); + goto loop; + } + if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) { + lwkt_gentoken(&ufs_ihash_token, &gen); goto loop; + } return (vp); } } - simple_unlock(&ufs_ihash_slock); + lwkt_reltoken(&ufs_ihash_token); return (NULL); } @@ -128,13 +132,13 @@ ufs_ihashins(struct inode *ip) struct ihashhead *ipp; /* lock the inode, then put it on the appropriate hash list */ - lockmgr(&ip->i_lock, LK_EXCLUSIVE, (struct simplelock *)0, td); + lockmgr(&ip->i_lock, LK_EXCLUSIVE, NULL, td); - simple_lock(&ufs_ihash_slock); + lwkt_gettoken(&ufs_ihash_token); ipp = INOHASH(ip->i_dev, ip->i_number); LIST_INSERT_HEAD(ipp, ip, i_hash); ip->i_flag |= IN_HASHED; - simple_unlock(&ufs_ihash_slock); + lwkt_reltoken(&ufs_ihash_token); } /* @@ -144,7 +148,7 @@ void ufs_ihashrem(ip) struct inode *ip; { - simple_lock(&ufs_ihash_slock); + lwkt_gettoken(&ufs_ihash_token); if (ip->i_flag & IN_HASHED) { ip->i_flag &= ~IN_HASHED; LIST_REMOVE(ip, i_hash); @@ -153,5 +157,5 @@ ufs_ihashrem(ip) ip->i_hash.le_prev = NULL; #endif } - simple_unlock(&ufs_ihash_slock); + lwkt_reltoken(&ufs_ihash_token); } diff --git a/sys/vfs/ufs/ufs_inode.c b/sys/vfs/ufs/ufs_inode.c index 98abc0d779..5deb08e21a 100644 --- a/sys/vfs/ufs/ufs_inode.c +++ b/sys/vfs/ufs/ufs_inode.c @@ -37,7 +37,7 @@ * * @(#)ufs_inode.c 8.9 (Berkeley) 5/14/95 * $FreeBSD: src/sys/ufs/ufs/ufs_inode.c,v 1.25.2.3 2002/07/05 22:42:31 dillon Exp $ - * $DragonFly: src/sys/vfs/ufs/ufs_inode.c,v 1.3 2003/06/25 03:56:12 dillon Exp $ + * $DragonFly: src/sys/vfs/ufs/ufs_inode.c,v 1.4 2003/07/06 21:23:55 dillon Exp $ */ #include "opt_quota.h" @@ -103,7 +103,7 @@ out: * so that it can be reused immediately. */ if (ip->i_mode == 0) - vrecycle(vp, (struct simplelock *)0, td); + vrecycle(vp, NULL, td); return (error); } diff --git a/sys/vfs/ufs/ufs_lookup.c b/sys/vfs/ufs/ufs_lookup.c index d9b8be8a23..5a1f13fbf2 100644 --- a/sys/vfs/ufs/ufs_lookup.c +++ b/sys/vfs/ufs/ufs_lookup.c @@ -37,7 +37,7 @@ * * @(#)ufs_lookup.c 8.15 (Berkeley) 6/16/95 * $FreeBSD: src/sys/ufs/ufs/ufs_lookup.c,v 1.33.2.7 2001/09/22 19:22:13 iedowse Exp $ - * $DragonFly: src/sys/vfs/ufs/ufs_lookup.c,v 1.3 2003/06/25 03:56:12 dillon Exp $ + * $DragonFly: src/sys/vfs/ufs/ufs_lookup.c,v 1.4 2003/07/06 21:23:55 dillon Exp $ */ #include "opt_ufs.h" @@ -747,7 +747,7 @@ ufs_direnter(dvp, tvp, dirp, cnp, newdirbp) int error, ret, blkoff, loc, spacefree, flags; char *dirbuf; - KKASSERT(td->td_proc); + KKASSERT(td->td_proc); /* YYY use/require cred passed in cnp? */ cred = td->td_proc->p_ucred; dp = VTOI(dvp); diff --git a/sys/vfs/ufs/ufs_quota.c b/sys/vfs/ufs/ufs_quota.c index c8668ad7be..1f045cfed9 100644 --- a/sys/vfs/ufs/ufs_quota.c +++ b/sys/vfs/ufs/ufs_quota.c @@ -35,7 +35,7 @@ * * @(#)ufs_quota.c 8.5 (Berkeley) 5/20/95 * $FreeBSD: src/sys/ufs/ufs/ufs_quota.c,v 1.27.2.3 2002/01/15 10:33:32 phk Exp $ - * $DragonFly: src/sys/vfs/ufs/ufs_quota.c,v 1.5 2003/06/26 05:55:21 dillon Exp $ + * $DragonFly: src/sys/vfs/ufs/ufs_quota.c,v 1.6 2003/07/06 21:23:55 dillon Exp $ */ #include @@ -655,6 +655,7 @@ qsync(struct mount *mp) struct vnode *vp, *nextvp; struct dquot *dq; int i, error; + int gen; /* * Check if the mount point has any quotas. @@ -669,7 +670,7 @@ qsync(struct mount *mp) * Search vnodes associated with this mount point, * synchronizing any modified dquot structures. */ - simple_lock(&mntvnode_slock); + gen = lwkt_gettoken(&mntvnode_token); again: for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nextvp) { if (vp->v_mount != mp) @@ -677,13 +678,17 @@ again: nextvp = TAILQ_NEXT(vp, v_nmntvnodes); if (vp->v_type == VNON) continue; - simple_lock(&vp->v_interlock); - simple_unlock(&mntvnode_slock); + lwkt_gettoken(&vp->v_interlock); + if (lwkt_gentoken(&mntvnode_token, &gen) != 0) { + lwkt_reltoken(&vp->v_interlock); + goto again; + } error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, td); if (error) { - simple_lock(&mntvnode_slock); - if (error == ENOENT) + if (error == ENOENT) { + lwkt_gentoken(&mntvnode_token, &gen); goto again; + } continue; } for (i = 0; i < MAXQUOTAS; i++) { @@ -692,11 +697,11 @@ again: dqsync(vp, dq); } vput(vp); - simple_lock(&mntvnode_slock); + lwkt_gentoken(&mntvnode_token, &gen); if (TAILQ_NEXT(vp, v_nmntvnodes) != nextvp) goto again; } - simple_unlock(&mntvnode_slock); + lwkt_reltoken(&mntvnode_token); return (0); } diff --git a/sys/vfs/ufs/ufs_readwrite.c b/sys/vfs/ufs/ufs_readwrite.c index 66afd8693b..b1f7a6d59a 100644 --- a/sys/vfs/ufs/ufs_readwrite.c +++ b/sys/vfs/ufs/ufs_readwrite.c @@ -32,7 +32,7 @@ * * @(#)ufs_readwrite.c 8.11 (Berkeley) 5/8/95 * $FreeBSD: src/sys/ufs/ufs/ufs_readwrite.c,v 1.65.2.14 2003/04/04 22:21:29 tegge Exp $ - * $DragonFly: src/sys/vfs/ufs/ufs_readwrite.c,v 1.7 2003/07/03 17:24:04 dillon Exp $ + * $DragonFly: src/sys/vfs/ufs/ufs_readwrite.c,v 1.8 2003/07/06 21:23:55 dillon Exp $ */ #define BLKSIZE(a, b, c) blksize(a, b, c) @@ -466,7 +466,7 @@ ffs_write(ap) * file servers have no limits, I don't think it matters. */ td = uio->uio_td; - if (vp->v_type == VREG && td->td_proc && + if (vp->v_type == VREG && td && td->td_proc && uio->uio_offset + uio->uio_resid > td->td_proc->p_rlimit[RLIMIT_FSIZE].rlim_cur) { psignal(td->td_proc, SIGXFSZ); diff --git a/sys/vfs/ufs/ufs_vnops.c b/sys/vfs/ufs/ufs_vnops.c index 423b2234cd..da2dc2df27 100644 --- a/sys/vfs/ufs/ufs_vnops.c +++ b/sys/vfs/ufs/ufs_vnops.c @@ -37,7 +37,7 @@ * * @(#)ufs_vnops.c 8.27 (Berkeley) 5/27/95 * $FreeBSD: src/sys/ufs/ufs/ufs_vnops.c,v 1.131.2.8 2003/01/02 17:26:19 bde Exp $ - * $DragonFly: src/sys/vfs/ufs/ufs_vnops.c,v 1.5 2003/06/26 05:55:21 dillon Exp $ + * $DragonFly: src/sys/vfs/ufs/ufs_vnops.c,v 1.6 2003/07/06 21:23:55 dillon Exp $ */ #include "opt_quota.h" @@ -294,10 +294,10 @@ ufs_close(ap) { register struct vnode *vp = ap->a_vp; - simple_lock(&vp->v_interlock); + lwkt_gettoken(&vp->v_interlock); if (vp->v_usecount > 1) ufs_itimes(vp); - simple_unlock(&vp->v_interlock); + lwkt_reltoken(&vp->v_interlock); return (0); } @@ -1842,10 +1842,10 @@ ufsspec_close(ap) { struct vnode *vp = ap->a_vp; - simple_lock(&vp->v_interlock); + lwkt_gettoken(&vp->v_interlock); if (vp->v_usecount > 1) ufs_itimes(vp); - simple_unlock(&vp->v_interlock); + lwkt_reltoken(&vp->v_interlock); return (VOCALL(spec_vnodeop_p, VOFFSET(vop_close), ap)); } @@ -1916,10 +1916,10 @@ ufsfifo_close(ap) { struct vnode *vp = ap->a_vp; - simple_lock(&vp->v_interlock); + lwkt_gettoken(&vp->v_interlock); if (vp->v_usecount > 1) ufs_itimes(vp); - simple_unlock(&vp->v_interlock); + lwkt_reltoken(&vp->v_interlock); return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_close), ap)); } @@ -2205,9 +2205,9 @@ ufs_kqfilter(ap) kn->kn_hook = (caddr_t)vp; - simple_lock(&vp->v_pollinfo.vpi_lock); + lwkt_gettoken(&vp->v_pollinfo.vpi_token); SLIST_INSERT_HEAD(&vp->v_pollinfo.vpi_selinfo.si_note, kn, kn_selnext); - simple_unlock(&vp->v_pollinfo.vpi_lock); + lwkt_reltoken(&vp->v_pollinfo.vpi_token); return (0); } @@ -2217,10 +2217,10 @@ filt_ufsdetach(struct knote *kn) { struct vnode *vp = (struct vnode *)kn->kn_hook; - simple_lock(&vp->v_pollinfo.vpi_lock); + lwkt_gettoken(&vp->v_pollinfo.vpi_token); SLIST_REMOVE(&vp->v_pollinfo.vpi_selinfo.si_note, kn, knote, kn_selnext); - simple_unlock(&vp->v_pollinfo.vpi_lock); + lwkt_reltoken(&vp->v_pollinfo.vpi_token); } /*ARGSUSED*/ diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index abc2a3cc63..1b3b614fcb 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -62,7 +62,7 @@ * rights to redistribute these changes. * * $FreeBSD: src/sys/vm/vm_map.c,v 1.187.2.19 2003/05/27 00:47:02 alc Exp $ - * $DragonFly: src/sys/vm/vm_map.c,v 1.4 2003/07/03 17:24:04 dillon Exp $ + * $DragonFly: src/sys/vm/vm_map.c,v 1.5 2003/07/06 21:23:56 dillon Exp $ */ /* @@ -327,42 +327,44 @@ vm_map_init(map, min, max) } /* - * vm_map_entry_dispose: [ internal use only ] + * vm_map_entry_create: [ internal use only ] * - * Inverse of vm_map_entry_create. + * Allocates a VM map entry for insertion. No entry fields are filled + * in. this ruotine may be called from an interrupt. */ -static void -vm_map_entry_dispose(map, entry) +static vm_map_entry_t +vm_map_entry_create(map) vm_map_t map; - vm_map_entry_t entry; { + vm_map_entry_t new_entry; + if (map->system_map || !mapentzone) - zfreei(kmapentzone, entry); + new_entry = zalloc(kmapentzone); else - zfree(mapentzone, entry); + new_entry = zalloc(mapentzone); + if (new_entry == NULL) + panic("vm_map_entry_create: kernel resources exhausted"); + return(new_entry); } /* - * vm_map_entry_create: [ internal use only ] + * vm_map_entry_dispose: [ internal use only ] * - * Allocates a VM map entry for insertion. - * No entry fields are filled in. This routine is + * Dispose of a vm_map_entry that is no longer being referenced. This + * function may be called from an interrupt. */ -static vm_map_entry_t -vm_map_entry_create(map) +static void +vm_map_entry_dispose(map, entry) vm_map_t map; + vm_map_entry_t entry; { - vm_map_entry_t new_entry; - if (map->system_map || !mapentzone) - new_entry = zalloci(kmapentzone); + zfree(kmapentzone, entry); else - new_entry = zalloc(mapentzone); - if (new_entry == NULL) - panic("vm_map_entry_create: kernel resources exhausted"); - return(new_entry); + zfree(mapentzone, entry); } + /* * vm_map_entry_{un,}link: * diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h index 536674e17e..e7cc6f0585 100644 --- a/sys/vm/vm_map.h +++ b/sys/vm/vm_map.h @@ -62,7 +62,7 @@ * rights to redistribute these changes. * * $FreeBSD: src/sys/vm/vm_map.h,v 1.54.2.5 2003/01/13 22:51:17 dillon Exp $ - * $DragonFly: src/sys/vm/vm_map.h,v 1.3 2003/06/25 03:56:13 dillon Exp $ + * $DragonFly: src/sys/vm/vm_map.h,v 1.4 2003/07/06 21:23:56 dillon Exp $ */ /* @@ -300,15 +300,15 @@ _vm_map_lock_upgrade(vm_map_t map, struct thread *td) { #define vm_map_set_recursive(map) \ do { \ - simple_lock(&(map)->lock.lk_interlock); \ + lwkt_gettoken(&(map)->lock.lk_interlock); \ (map)->lock.lk_flags |= LK_CANRECURSE; \ - simple_unlock(&(map)->lock.lk_interlock); \ + lwkt_reltoken(&(map)->lock.lk_interlock); \ } while(0) #define vm_map_clear_recursive(map) \ do { \ - simple_lock(&(map)->lock.lk_interlock); \ + lwkt_gettoken(&(map)->lock.lk_interlock); \ (map)->lock.lk_flags &= ~LK_CANRECURSE; \ - simple_unlock(&(map)->lock.lk_interlock); \ + lwkt_reltoken(&(map)->lock.lk_interlock); \ } while(0) /* diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c index 374743c172..41db379030 100644 --- a/sys/vm/vm_object.c +++ b/sys/vm/vm_object.c @@ -62,7 +62,7 @@ * rights to redistribute these changes. * * $FreeBSD: src/sys/vm/vm_object.c,v 1.171.2.8 2003/05/26 19:17:56 alc Exp $ - * $DragonFly: src/sys/vm/vm_object.c,v 1.5 2003/07/03 17:24:04 dillon Exp $ + * $DragonFly: src/sys/vm/vm_object.c,v 1.6 2003/07/06 21:23:56 dillon Exp $ */ /* @@ -131,9 +131,7 @@ static int vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int cur */ struct object_q vm_object_list; -#ifndef NULL_SIMPLELOCKS -static struct simplelock vm_object_list_lock; -#endif +static struct lwkt_token vm_object_list_token; static long vm_object_count; /* count of all objects */ vm_object_t kernel_object; vm_object_t kmem_object; @@ -202,7 +200,7 @@ void vm_object_init() { TAILQ_INIT(&vm_object_list); - simple_lock_init(&vm_object_list_lock); + lwkt_inittoken(&vm_object_list_token); vm_object_count = 0; kernel_object = &kernel_object_store; @@ -487,9 +485,9 @@ vm_object_terminate(object) /* * Remove the object from the global object list. */ - simple_lock(&vm_object_list_lock); + lwkt_gettoken(&vm_object_list_token); TAILQ_REMOVE(&vm_object_list, object, object_list); - simple_unlock(&vm_object_list_lock); + lwkt_reltoken(&vm_object_list_token); wakeup(object); @@ -645,9 +643,9 @@ vm_object_page_clean(object, start, end, flags) if (object->type == OBJT_VNODE && (vp = (struct vnode *)object->handle) != NULL) { if (vp->v_flag & VOBJDIRTY) { - simple_lock(&vp->v_interlock); + lwkt_gettoken(&vp->v_interlock); vp->v_flag &= ~VOBJDIRTY; - simple_unlock(&vp->v_interlock); + lwkt_reltoken(&vp->v_interlock); } } } @@ -1711,9 +1709,9 @@ vm_object_set_writeable_dirty(vm_object_t object) if (object->type == OBJT_VNODE && (vp = (struct vnode *)object->handle) != NULL) { if ((vp->v_flag & VOBJDIRTY) == 0) { - simple_lock(&vp->v_interlock); + lwkt_gettoken(&vp->v_interlock); vp->v_flag |= VOBJDIRTY; - simple_unlock(&vp->v_interlock); + lwkt_reltoken(&vp->v_interlock); } } } diff --git a/sys/vm/vm_zone.c b/sys/vm/vm_zone.c index 564ab5caca..310216a916 100644 --- a/sys/vm/vm_zone.c +++ b/sys/vm/vm_zone.c @@ -12,7 +12,7 @@ * John S. Dyson. * * $FreeBSD: src/sys/vm/vm_zone.c,v 1.30.2.6 2002/10/10 19:50:16 dillon Exp $ - * $DragonFly: src/sys/vm/vm_zone.c,v 1.3 2003/07/03 17:24:04 dillon Exp $ + * $DragonFly: src/sys/vm/vm_zone.c,v 1.4 2003/07/06 21:23:56 dillon Exp $ */ #include @@ -40,51 +40,55 @@ static MALLOC_DEFINE(M_ZONE, "ZONE", "Zone header"); #define ZONE_ROUNDING 32 #define ZENTRY_FREE 0x12342378 + +static void *zget(vm_zone_t z); + /* - * void *zalloc(vm_zone_t zone) -- - * Returns an item from a specified zone. - * - * void zfree(vm_zone_t zone, void *item) -- - * Frees an item back to a specified zone. + * Return an item from the specified zone. This function is interrupt/MP + * thread safe, but might block. */ -static __inline__ void * -_zalloc(vm_zone_t z) +void * +zalloc(vm_zone_t z) { void *item; #ifdef INVARIANTS - if (z == 0) + if (z == NULL) zerror(ZONE_ERROR_INVALID); #endif - + lwkt_gettoken(&z->zlock); if (z->zfreecnt <= z->zfreemin) { - item = _zget(z); + item = zget(z); /* * PANICFAIL allows the caller to assume that the zalloc() * will always succeed. If it doesn't, we panic here. */ if (item == NULL && (z->zflags & ZONE_PANICFAIL)) panic("zalloc(%s) failed", z->zname); - return(item); - } - - item = z->zitems; - z->zitems = ((void **) item)[0]; + } else { + item = z->zitems; + z->zitems = ((void **) item)[0]; #ifdef INVARIANTS - KASSERT(item != NULL, ("zitems unexpectedly NULL")); - if (((void **) item)[1] != (void *) ZENTRY_FREE) - zerror(ZONE_ERROR_NOTFREE); - ((void **) item)[1] = 0; + KASSERT(item != NULL, ("zitems unexpectedly NULL")); + if (((void **) item)[1] != (void *) ZENTRY_FREE) + zerror(ZONE_ERROR_NOTFREE); + ((void **) item)[1] = 0; #endif - - z->zfreecnt--; - z->znalloc++; + z->zfreecnt--; + z->znalloc++; + } + lwkt_reltoken(&z->zlock); return item; } -static __inline__ void -_zfree(vm_zone_t z, void *item) +/* + * Free an item to the specified zone. This function is interrupt/MP + * thread safe, but might block. + */ +void +zfree(vm_zone_t z, void *item) { + lwkt_gettoken(&z->zlock); ((void **) item)[0] = z->zitems; #ifdef INVARIANTS if (((void **) item)[1] == (void *) ZENTRY_FREE) @@ -93,6 +97,7 @@ _zfree(vm_zone_t z, void *item) #endif z->zitems = item; z->zfreecnt++; + lwkt_reltoken(&z->zlock); } /* @@ -147,7 +152,7 @@ zinitna(vm_zone_t z, vm_object_t obj, char *name, int size, if ((z->zflags & ZONE_BOOT) == 0) { z->zsize = (size + ZONE_ROUNDING - 1) & ~(ZONE_ROUNDING - 1); - simple_lock_init(&z->zlock); + lwkt_inittoken(&z->zlock); z->zfreecnt = 0; z->ztotal = 0; z->zmax = 0; @@ -249,7 +254,7 @@ zbootinit(vm_zone_t z, char *name, int size, void *item, int nitems) z->zpagecount = 0; z->zalloc = 0; z->znalloc = 0; - simple_lock_init(&z->zlock); + lwkt_inittoken(&z->zlock); bzero(item, nitems * z->zsize); z->zitems = NULL; @@ -273,26 +278,6 @@ zbootinit(vm_zone_t z, char *name, int size, void *item, int nitems) } } -/* - * Zone critical region locks. - */ -static __inline int -zlock(vm_zone_t z) -{ - int s; - - s = splhigh(); - simple_lock(&z->zlock); - return s; -} - -static __inline void -zunlock(vm_zone_t z, int s) -{ - simple_unlock(&z->zlock); - splx(s); -} - /* * void *zalloc(vm_zone_t zone) -- * Returns an item from a specified zone. @@ -308,58 +293,11 @@ zunlock(vm_zone_t z, int s) * */ -void * -zalloc(vm_zone_t z) -{ -#if defined(SMP) - return zalloci(z); -#else - return _zalloc(z); -#endif -} - -void -zfree(vm_zone_t z, void *item) -{ -#ifdef SMP - zfreei(z, item); -#else - _zfree(z, item); -#endif -} - -/* - * Zone allocator/deallocator. These are interrupt / (or potentially SMP) - * safe. The raw zalloc/zfree routines are not interrupt safe, but are fast. - */ -void * -zalloci(vm_zone_t z) -{ - int s; - void *item; - - s = zlock(z); - item = _zalloc(z); - zunlock(z, s); - return item; -} - -void -zfreei(vm_zone_t z, void *item) -{ - int s; - - s = zlock(z); - _zfree(z, item); - zunlock(z, s); - return; -} - /* * Internal zone routine. Not to be called from external (non vm_zone) code. */ -void * -_zget(vm_zone_t z) +static void * +zget(vm_zone_t z) { int i; vm_page_t m; @@ -381,9 +319,10 @@ _zget(vm_zone_t z) z->zallocflag); if (m == NULL) break; + lwkt_regettoken(&z->zlock); zkva = z->zkva + z->zpagecount * PAGE_SIZE; - pmap_kenter(zkva, VM_PAGE_TO_PHYS(m)); + pmap_kenter(zkva, VM_PAGE_TO_PHYS(m)); /* YYY */ bzero((caddr_t) zkva, PAGE_SIZE); z->zpagecount++; zone_kmem_pages++; @@ -394,39 +333,30 @@ _zget(vm_zone_t z) nbytes = z->zalloc * PAGE_SIZE; /* - * Check to see if the kernel map is already locked. We could allow - * for recursive locks, but that eliminates a valuable debugging - * mechanism, and opens up the kernel map for potential corruption - * by inconsistent data structure manipulation. We could also use - * the interrupt allocation mechanism, but that has size limitations. - * Luckily, we have kmem_map that is a submap of kernel map available - * for memory allocation, and manipulation of that map doesn't affect - * the kernel map structures themselves. + * Check to see if the kernel map is already locked. + * We could allow for recursive locks, but that eliminates + * a valuable debugging mechanism, and opens up the kernel + * map for potential corruption by inconsistent data structure + * manipulation. We could also use the interrupt allocation + * mechanism, but that has size limitations. Luckily, we + * have kmem_map that is a submap of kernel map available + * for memory allocation, and manipulation of that map doesn't + * affect the kernel map structures themselves. * - * We can wait, so just do normal map allocation in the appropriate - * map. + * We can wait, so just do normal map allocation in the + * appropriate map. */ if (lockstatus(&kernel_map->lock, NULL)) { int s; s = splvm(); -#ifdef SMP - simple_unlock(&z->zlock); -#endif item = (void *) kmem_malloc(kmem_map, nbytes, M_WAITOK); -#ifdef SMP - simple_lock(&z->zlock); -#endif + lwkt_regettoken(&z->zlock); if (item != NULL) zone_kmem_pages += z->zalloc; splx(s); } else { -#ifdef SMP - simple_unlock(&z->zlock); -#endif item = (void *) kmem_alloc(kernel_map, nbytes); -#ifdef SMP - simple_lock(&z->zlock); -#endif + lwkt_regettoken(&z->zlock); if (item != NULL) zone_kern_pages += z->zalloc; } diff --git a/sys/vm/vm_zone.h b/sys/vm/vm_zone.h index b6a46f21a4..f687683d83 100644 --- a/sys/vm/vm_zone.h +++ b/sys/vm/vm_zone.h @@ -12,7 +12,7 @@ * John S. Dyson. * * $FreeBSD: src/sys/vm/vm_zone.h,v 1.13.2.2 2002/10/10 19:50:16 dillon Exp $ - * $DragonFly: src/sys/vm/vm_zone.h,v 1.2 2003/06/17 04:29:00 dillon Exp $ + * $DragonFly: src/sys/vm/vm_zone.h,v 1.3 2003/07/06 21:23:56 dillon Exp $ */ #ifndef _SYS_ZONE_H @@ -23,10 +23,14 @@ #define ZONE_PANICFAIL 0x0002 /* panic if the zalloc fails */ #define ZONE_BOOT 0x0010 /* Internal flag used by zbootinit */ +#ifndef _SYS_THREAD_H_ +#include +#endif + #include typedef struct vm_zone { - struct simplelock zlock; /* lock for data structure */ + struct lwkt_token zlock; /* lock for data structure */ void *zitems; /* linked list of items */ int zfreecnt; /* free entries */ int zfreemin; /* minimum number of free entries */ @@ -53,10 +57,7 @@ int zinitna __P((vm_zone_t z, struct vm_object *obj, char *name, int size, int nentries, int flags, int zalloc)); void * zalloc __P((vm_zone_t z)); void zfree __P((vm_zone_t z, void *item)); -void * zalloci __P((vm_zone_t z)); -void zfreei __P((vm_zone_t z, void *item)); void zbootinit __P((vm_zone_t z, char *name, int size, void *item, int nitems)); -void * _zget __P((vm_zone_t z)); #endif /* _SYS_ZONE_H */ diff --git a/sys/vm/vnode_pager.c b/sys/vm/vnode_pager.c index 7cdd7aa8f9..d3de6114cf 100644 --- a/sys/vm/vnode_pager.c +++ b/sys/vm/vnode_pager.c @@ -39,7 +39,7 @@ * * from: @(#)vnode_pager.c 7.5 (Berkeley) 4/20/91 * $FreeBSD: src/sys/vm/vnode_pager.c,v 1.116.2.7 2002/12/31 09:34:51 dillon Exp $ - * $DragonFly: src/sys/vm/vnode_pager.c,v 1.6 2003/07/03 17:24:04 dillon Exp $ + * $DragonFly: src/sys/vm/vnode_pager.c,v 1.7 2003/07/06 21:23:56 dillon Exp $ */ /* @@ -546,7 +546,7 @@ vnode_pager_input_old(object, m) auio.uio_resid = size; auio.uio_td = curthread; - error = VOP_READ(object->handle, &auio, 0, curproc->p_ucred); + error = VOP_READ(object->handle, &auio, 0, proc0.p_ucred); if (!error) { register int count = size - auio.uio_resid; @@ -1022,7 +1022,7 @@ vnode_pager_generic_putpages(vp, m, bytecount, flags, rtvals) auio.uio_rw = UIO_WRITE; auio.uio_resid = maxsize; auio.uio_td = NULL; - error = VOP_WRITE(vp, &auio, ioflags, curproc->p_ucred); + error = VOP_WRITE(vp, &auio, ioflags, proc0.p_ucred); mycpu->gd_cnt.v_vnodeout++; mycpu->gd_cnt.v_vnodepgsout += ncount; -- 2.41.0