From c8fe38ae0761c8117b13caf9b76ad5fb86ac2135 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Fri, 29 Aug 2008 17:07:21 +0000 Subject: [PATCH] AMD64 - Sync AMD64 support from Jordan Gordeev's svn repository and Google SOC project. This work is still continuing but represents substantial progress in the effort. With this commit the world builds and installs, the loader is able to boot the kernel, and the kernel is able to initialize, probe devices, and exec the init program. The init program is then able to run until it hits its first fork(). For the purposes of the GSOC the project is being considered a big success! The code has been adapted from multiple sources, most notably Peter Wemm and other peoples work from FreeBSD, with many modifications to make it work with DragonFly. Also thanks go to Simon Schubert for working on gdb and compiler issues, and to Noah Yan for a good chunk of precursor work in 2007. While Jordan wishes to be modest on his contribution, frankly we would not have been able to make this much progress without the large number of man-hours Jordan has dedicated to his GSOC project painstakingly gluing code together, tracking down issues, and progressing the boot sequence. Submitted-by: Jordan Gordeev --- sys/cpu/amd64/include/asmacros.h | 7 +- sys/cpu/amd64/include/atomic.h | 194 +- sys/cpu/amd64/include/cpufunc.h | 53 +- sys/cpu/amd64/include/frame.h | 7 +- sys/cpu/amd64/include/npx.h | 5 +- sys/cpu/amd64/include/param.h | 27 +- sys/cpu/amd64/include/pmap.h | 317 +- sys/cpu/amd64/include/segments.h | 231 +- sys/cpu/amd64/include/signal.h | 85 +- sys/cpu/amd64/include/specialreg.h | 4 +- sys/cpu/amd64/include/tls.h | 6 +- sys/cpu/amd64/include/tss.h | 8 +- sys/cpu/amd64/include/types.h | 6 +- sys/cpu/amd64/include/ucontext.h | 41 +- sys/cpu/amd64/misc/amd64-gdbstub.c | 666 ++++ sys/platform/pc64/amd64/autoconf.c | 149 +- sys/platform/pc64/amd64/console.c | 6 +- sys/platform/pc64/amd64/cpu_regs.c | 1264 ------- sys/platform/pc64/amd64/db_disasm.c | 454 ++- sys/platform/pc64/amd64/db_interface.c | 64 +- sys/platform/pc64/amd64/db_trace.c | 173 +- sys/platform/pc64/amd64/exception.S | 512 +++ sys/platform/pc64/amd64/exception.c | 122 - sys/platform/pc64/amd64/fork_tramp.s | 108 - sys/platform/pc64/amd64/genassym.c | 118 +- sys/platform/pc64/amd64/global.s | 15 +- sys/platform/pc64/amd64/globaldata.c | 6 +- sys/platform/pc64/amd64/identcpu.c | 557 +++ sys/platform/pc64/amd64/in_cksum2.s | 26 +- sys/platform/pc64/amd64/init.c | 26 +- sys/platform/pc64/amd64/initcpu.c | 84 + sys/platform/pc64/amd64/ipl.s | 534 +++ sys/platform/pc64/amd64/locore.s | 20 +- sys/platform/pc64/amd64/machdep.c | 2471 +++++++++++++ sys/platform/pc64/amd64/machintr.c | 143 - sys/platform/pc64/amd64/nexus.c | 596 +++ sys/platform/pc64/amd64/npx.c | 80 +- sys/platform/pc64/amd64/pmap.c | 3225 +++++++++++++++-- sys/platform/pc64/amd64/pmap_inval.c | 151 + sys/platform/pc64/amd64/spinlock.s | 112 + sys/platform/pc64/amd64/support.s | 35 +- sys/platform/pc64/amd64/swtch.s | 557 ++- sys/platform/pc64/amd64/systimer.c | 9 +- sys/platform/pc64/amd64/tls.c | 114 +- sys/platform/pc64/amd64/trap.c | 1293 ++++++- sys/platform/pc64/amd64/vm_machdep.c | 211 +- sys/platform/pc64/apic/apic_abi.c | 375 ++ sys/platform/pc64/apic/apic_ipl.h | 57 + sys/platform/pc64/apic/apic_ipl.s | 163 + sys/platform/pc64/apic/apic_vector.s | 536 +++ sys/platform/pc64/apic/apicreg.h | 930 +++++ sys/platform/pc64/apic/apicvar.h | 154 + sys/platform/pc64/apic/mpapic.c | 772 ++++ sys/platform/pc64/apic/mpapic.h | 82 + sys/platform/pc64/conf/files | 77 +- sys/platform/pc64/conf/kern.mk | 9 +- sys/platform/pc64/conf/options | 43 +- .../include/tss.h => platform/pc64/icu/icu.h} | 70 +- sys/platform/pc64/icu/icu_abi.c | 240 ++ .../pc64/{include/pcb_ext.h => icu/icu_ipl.h} | 55 +- sys/platform/pc64/icu/icu_ipl.s | 134 + sys/platform/pc64/icu/icu_vector.s | 276 ++ sys/platform/pc64/include/globaldata.h | 31 +- sys/platform/pc64/include/intr_machdep.h | 11 +- .../pc64/{amd64/console.c => include/ipl.h} | 21 +- sys/platform/pc64/include/lock.h | 54 +- sys/platform/pc64/include/md_var.h | 11 +- .../pc64/include/{pcb_ext.h => metadata.h} | 52 +- .../pc64/include/{pcb_ext.h => nexusvar.h} | 65 +- sys/platform/pc64/include/param.h | 7 +- sys/platform/pc64/include/pc/bios.h | 77 + sys/platform/pc64/include/pc/display.h | 46 + sys/platform/pc64/include/pcb.h | 7 +- sys/platform/pc64/include/pcb_ext.h | 9 +- .../amd64 => platform/pc64}/include/pmap.h | 194 +- sys/platform/pc64/include/thread.h | 10 +- sys/platform/pc64/include/vmparam.h | 35 +- sys/platform/pc64/isa/README.le | 69 + sys/platform/pc64/isa/README.stl | 530 +++ sys/platform/pc64/isa/asc.c | 872 +++++ sys/platform/pc64/isa/ascreg.h | 98 + sys/platform/pc64/isa/clock.c | 1221 +++++++ sys/platform/pc64/isa/ic/Am7990.h | 173 + sys/platform/pc64/isa/ic/am7990.h | 110 + sys/platform/pc64/isa/ic/cd1400.h | 204 ++ sys/platform/pc64/isa/ic/cd180.h | 199 + sys/platform/pc64/isa/ic/hd64570.h | 373 ++ sys/platform/pc64/isa/ic/i8237.h | 13 + sys/platform/pc64/isa/ic/i82586.h | 333 ++ sys/platform/pc64/isa/ic/lemac.h | 178 + sys/platform/pc64/isa/ic/mb86960.h | 341 ++ sys/platform/pc64/isa/ic/sc26198.h | 547 +++ sys/platform/pc64/isa/ic/scd1400.h | 313 ++ sys/platform/pc64/isa/intr_machdep.c | 273 ++ sys/platform/pc64/isa/intr_machdep.h | 166 + sys/platform/pc64/isa/lptreg.h | 35 + sys/platform/pc64/{amd64 => isa}/npx.c | 628 +++- sys/platform/pc64/isa/pmtimer.c | 125 + sys/platform/pc64/isa/prof_machdep.c | 362 ++ sys/platform/pc64/isa/timerreg.h | 145 + sys/sys/tls.h | 6 +- 101 files changed, 23297 insertions(+), 3512 deletions(-) create mode 100644 sys/cpu/amd64/misc/amd64-gdbstub.c delete mode 100644 sys/platform/pc64/amd64/cpu_regs.c create mode 100644 sys/platform/pc64/amd64/exception.S delete mode 100644 sys/platform/pc64/amd64/exception.c delete mode 100644 sys/platform/pc64/amd64/fork_tramp.s create mode 100644 sys/platform/pc64/amd64/identcpu.c create mode 100644 sys/platform/pc64/amd64/initcpu.c create mode 100644 sys/platform/pc64/amd64/ipl.s create mode 100644 sys/platform/pc64/amd64/machdep.c delete mode 100644 sys/platform/pc64/amd64/machintr.c create mode 100644 sys/platform/pc64/amd64/nexus.c create mode 100644 sys/platform/pc64/amd64/pmap_inval.c create mode 100644 sys/platform/pc64/amd64/spinlock.s create mode 100644 sys/platform/pc64/apic/apic_abi.c create mode 100644 sys/platform/pc64/apic/apic_ipl.h create mode 100644 sys/platform/pc64/apic/apic_ipl.s create mode 100644 sys/platform/pc64/apic/apic_vector.s create mode 100644 sys/platform/pc64/apic/apicreg.h create mode 100644 sys/platform/pc64/apic/apicvar.h create mode 100644 sys/platform/pc64/apic/mpapic.c create mode 100644 sys/platform/pc64/apic/mpapic.h copy sys/{cpu/amd64/include/tss.h => platform/pc64/icu/icu.h} (59%) create mode 100644 sys/platform/pc64/icu/icu_abi.c copy sys/platform/pc64/{include/pcb_ext.h => icu/icu_ipl.h} (56%) create mode 100644 sys/platform/pc64/icu/icu_ipl.s create mode 100644 sys/platform/pc64/icu/icu_vector.s copy sys/platform/pc64/{amd64/console.c => include/ipl.h} (84%) copy sys/platform/pc64/include/{pcb_ext.h => metadata.h} (55%) copy sys/platform/pc64/include/{pcb_ext.h => nexusvar.h} (56%) create mode 100644 sys/platform/pc64/include/pc/bios.h create mode 100644 sys/platform/pc64/include/pc/display.h copy sys/{cpu/amd64 => platform/pc64}/include/pmap.h (54%) create mode 100644 sys/platform/pc64/isa/README.le create mode 100644 sys/platform/pc64/isa/README.stl create mode 100644 sys/platform/pc64/isa/asc.c create mode 100644 sys/platform/pc64/isa/ascreg.h create mode 100644 sys/platform/pc64/isa/clock.c create mode 100644 sys/platform/pc64/isa/ic/Am7990.h create mode 100644 sys/platform/pc64/isa/ic/am7990.h create mode 100644 sys/platform/pc64/isa/ic/cd1400.h create mode 100644 sys/platform/pc64/isa/ic/cd180.h create mode 100644 sys/platform/pc64/isa/ic/hd64570.h create mode 100644 sys/platform/pc64/isa/ic/i8237.h create mode 100644 sys/platform/pc64/isa/ic/i82586.h create mode 100644 sys/platform/pc64/isa/ic/lemac.h create mode 100644 sys/platform/pc64/isa/ic/mb86960.h create mode 100644 sys/platform/pc64/isa/ic/sc26198.h create mode 100644 sys/platform/pc64/isa/ic/scd1400.h create mode 100644 sys/platform/pc64/isa/intr_machdep.c create mode 100644 sys/platform/pc64/isa/intr_machdep.h create mode 100644 sys/platform/pc64/isa/lptreg.h copy sys/platform/pc64/{amd64 => isa}/npx.c (54%) create mode 100644 sys/platform/pc64/isa/pmtimer.c create mode 100644 sys/platform/pc64/isa/prof_machdep.c create mode 100644 sys/platform/pc64/isa/timerreg.h diff --git a/sys/cpu/amd64/include/asmacros.h b/sys/cpu/amd64/include/asmacros.h index 6d9f47698c..c5aa487780 100644 --- a/sys/cpu/amd64/include/asmacros.h +++ b/sys/cpu/amd64/include/asmacros.h @@ -1,5 +1,6 @@ -/*- +/* * Copyright (c) 1993 The Regents of the University of California. + * Copyright (c) 2008 The DragonFly Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -27,7 +28,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/amd64/include/asmacros.h,v 1.32 2006/10/28 06:04:29 bde Exp $ - * $DragonFly: src/sys/cpu/amd64/include/asmacros.h,v 1.1 2007/09/23 04:29:30 yanyh Exp $ + * $DragonFly: src/sys/cpu/amd64/include/asmacros.h,v 1.2 2008/08/29 17:07:06 dillon Exp $ */ #ifndef _CPU_ASMACROS_H_ @@ -142,7 +143,7 @@ * Macros to create and destroy a trap frame. */ #define PUSH_FRAME \ - subq $TF_RIP,%rsp ; /* skip dummy tf_err and tf_trapno */ \ + subq $TF_RIP,%rsp ; /* extend hardware frame to trapframe */ \ testb $SEL_RPL_MASK,TF_CS(%rsp) ; /* come from kernel? */ \ jz 1f ; /* Yes, dont swapgs again */ \ swapgs ; \ diff --git a/sys/cpu/amd64/include/atomic.h b/sys/cpu/amd64/include/atomic.h index 33a6367b78..8e344c3d54 100644 --- a/sys/cpu/amd64/include/atomic.h +++ b/sys/cpu/amd64/include/atomic.h @@ -1,5 +1,6 @@ /*- - * Copyright (c) 1998 Doug Rabson + * Copyright (c) 1998 Doug Rabson. + * Copyright (c) 2008 The DragonFly Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -24,7 +25,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/amd64/include/atomic.h,v 1.32 2003/11/21 03:02:00 peter Exp $ - * $DragonFly: src/sys/cpu/amd64/include/atomic.h,v 1.2 2007/09/23 04:29:30 yanyh Exp $ + * $DragonFly: src/sys/cpu/amd64/include/atomic.h,v 1.3 2008/08/29 17:07:06 dillon Exp $ */ #ifndef _CPU_ATOMIC_H_ #define _CPU_ATOMIC_H_ @@ -34,30 +35,30 @@ #endif /* - * Various simple arithmetic on memory which is atomic in the presence - * of interrupts and multiple processors. + * Various simple operations on memory, each of which is atomic in the + * presence of interrupts and multiple processors. * - * atomic_set_char(P, V) (*(u_char*)(P) |= (V)) - * atomic_clear_char(P, V) (*(u_char*)(P) &= ~(V)) - * atomic_add_char(P, V) (*(u_char*)(P) += (V)) - * atomic_subtract_char(P, V) (*(u_char*)(P) -= (V)) + * atomic_set_char(P, V) (*(u_char *)(P) |= (V)) + * atomic_clear_char(P, V) (*(u_char *)(P) &= ~(V)) + * atomic_add_char(P, V) (*(u_char *)(P) += (V)) + * atomic_subtract_char(P, V) (*(u_char *)(P) -= (V)) * - * atomic_set_short(P, V) (*(u_short*)(P) |= (V)) - * atomic_clear_short(P, V) (*(u_short*)(P) &= ~(V)) - * atomic_add_short(P, V) (*(u_short*)(P) += (V)) - * atomic_subtract_short(P, V) (*(u_short*)(P) -= (V)) + * atomic_set_short(P, V) (*(u_short *)(P) |= (V)) + * atomic_clear_short(P, V) (*(u_short *)(P) &= ~(V)) + * atomic_add_short(P, V) (*(u_short *)(P) += (V)) + * atomic_subtract_short(P, V) (*(u_short *)(P) -= (V)) * - * atomic_set_int(P, V) (*(u_int*)(P) |= (V)) - * atomic_clear_int(P, V) (*(u_int*)(P) &= ~(V)) - * atomic_add_int(P, V) (*(u_int*)(P) += (V)) - * atomic_subtract_int(P, V) (*(u_int*)(P) -= (V)) - * atomic_readandclear_int(P) (return *(u_int*)P; *(u_int*)P = 0;) + * atomic_set_int(P, V) (*(u_int *)(P) |= (V)) + * atomic_clear_int(P, V) (*(u_int *)(P) &= ~(V)) + * atomic_add_int(P, V) (*(u_int *)(P) += (V)) + * atomic_subtract_int(P, V) (*(u_int *)(P) -= (V)) + * atomic_readandclear_int(P) (return (*(u_int *)(P)); *(u_int *)(P) = 0;) * - * atomic_set_long(P, V) (*(u_long*)(P) |= (V)) - * atomic_clear_long(P, V) (*(u_long*)(P) &= ~(V)) - * atomic_add_long(P, V) (*(u_long*)(P) += (V)) - * atomic_subtract_long(P, V) (*(u_long*)(P) -= (V)) - * atomic_readandclear_long(P) (return *(u_long*)P; *(u_long*)P = 0;) + * atomic_set_long(P, V) (*(u_long *)(P) |= (V)) + * atomic_clear_long(P, V) (*(u_long *)(P) &= ~(V)) + * atomic_add_long(P, V) (*(u_long *)(P) += (V)) + * atomic_subtract_long(P, V) (*(u_long *)(P) -= (V)) + * atomic_readandclear_long(P) (return (*(u_long *)(P)); *(u_long *)(P) = 0;) */ /* @@ -69,12 +70,14 @@ * This allows kernel modules to be portable between UP and SMP systems. */ #if defined(KLD_MODULE) -#define ATOMIC_ASM(NAME, TYPE, OP, CONS, V) \ +#define ATOMIC_ASM(NAME, TYPE, OP, CONS, V) \ void atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v); \ void atomic_##NAME##_##TYPE##_nonlocked(volatile u_##TYPE *p, u_##TYPE v); -int atomic_cmpset_int(volatile u_int *dst, u_int exp, u_int src); -int atomic_cmpset_long(volatile u_long *dst, u_long exp, u_long src); +int atomic_cmpset_int(volatile u_int *dst, u_int exp, u_int src); +int atomic_cmpset_long(volatile u_long *dst, u_long exp, u_long src); +u_int atomic_fetchadd_int(volatile u_int *p, u_int v); +u_long atomic_fetchadd_long(volatile u_long *p, u_long v); #define ATOMIC_STORE_LOAD(TYPE, LOP, SOP) \ u_##TYPE atomic_load_acq_##TYPE(volatile u_##TYPE *p); \ @@ -85,20 +88,20 @@ void atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v) #ifdef __GNUC__ /* - * For userland, assume the SMP case and use lock prefixes so that - * the binaries will run on both types of systems. + * For userland, always use lock prefixes so that the binaries will run + * on both SMP and !SMP systems. */ #if defined(SMP) || !defined(_KERNEL) -#define MPLOCKED "lock ; " +#define MPLOCKED "lock ; " #else -#define MPLOCKED +#define MPLOCKED #endif /* * The assembly is volatilized to demark potential before-and-after side * effects if an interrupt or SMP collision were to occur. */ -#define ATOMIC_ASM(NAME, TYPE, OP, CONS, V) \ +#define ATOMIC_ASM(NAME, TYPE, OP, CONS, V) \ static __inline void \ atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\ { \ @@ -244,6 +247,43 @@ atomic_intr_cond_enter(atomic_intr_t *p, void (*func)(void *), void *arg) : "ax", "cx", "dx", "di"); /* XXX clobbers more regs */ } +/* + * Atomically add the value of v to the integer pointed to by p and return + * the previous value of *p. + */ +static __inline u_int +atomic_fetchadd_int(volatile u_int *p, u_int v) +{ + + __asm __volatile( + " " MPLOCKED " " + " xaddl %0, %1 ; " + "# atomic_fetchadd_int" + : "+r" (v), /* 0 (result) */ + "=m" (*p) /* 1 */ + : "m" (*p)); /* 2 */ + + return (v); +} + +/* + * Atomically add the value of v to the long integer pointed to by p and return + * the previous value of *p. + */ +static __inline u_long +atomic_fetchadd_long(volatile u_long *p, u_long v) +{ + + __asm __volatile( + " " MPLOCKED " " + " xaddq %0, %1 ; " + "# atomic_fetchadd_long" + : "+r" (v), /* 0 (result) */ + "=m" (*p) /* 1 */ + : "m" (*p)); /* 2 */ + + return (v); +} /* * Attempt to enter the interrupt condition variable. Returns zero on * success, 1 on failure. @@ -343,7 +383,7 @@ atomic_cmpset_long(volatile u_long *dst, u_long exp, u_long src) #if defined(__GNUC__) -#define ATOMIC_STORE_LOAD(TYPE, LOP, SOP) \ +#define ATOMIC_STORE_LOAD(TYPE, LOP, SOP) \ static __inline u_##TYPE \ atomic_load_acq_##TYPE(volatile u_##TYPE *p) \ { \ @@ -410,8 +450,6 @@ ATOMIC_STORE_LOAD(long, "cmpxchgq %0,%1", "xchgq %1,%0"); #define atomic_cmpset_32 atomic_cmpset_int -#if 0 - #undef ATOMIC_ASM #undef ATOMIC_STORE_LOAD @@ -441,8 +479,8 @@ ATOMIC_STORE_LOAD(long, "cmpxchgq %0,%1", "xchgq %1,%0"); #define atomic_add_rel_int atomic_add_int #define atomic_subtract_acq_int atomic_subtract_int #define atomic_subtract_rel_int atomic_subtract_int -#define atomic_cmpset_acq_int atomic_cmpset_int -#define atomic_cmpset_rel_int atomic_cmpset_int +#define atomic_cmpset_acq_int atomic_cmpset_int +#define atomic_cmpset_rel_int atomic_cmpset_int #define atomic_set_acq_long atomic_set_long #define atomic_set_rel_long atomic_set_long @@ -453,9 +491,6 @@ ATOMIC_STORE_LOAD(long, "cmpxchgq %0,%1", "xchgq %1,%0"); #define atomic_subtract_acq_long atomic_subtract_long #define atomic_subtract_rel_long atomic_subtract_long -#define atomic_cmpset_acq_ptr atomic_cmpset_ptr -#define atomic_cmpset_rel_ptr atomic_cmpset_ptr - #define atomic_set_8 atomic_set_char #define atomic_set_acq_8 atomic_set_acq_char #define atomic_set_rel_8 atomic_set_rel_char @@ -471,6 +506,7 @@ ATOMIC_STORE_LOAD(long, "cmpxchgq %0,%1", "xchgq %1,%0"); #define atomic_load_acq_8 atomic_load_acq_char #define atomic_store_rel_8 atomic_store_rel_char +/* Operations on 16-bit words. */ #define atomic_set_16 atomic_set_short #define atomic_set_acq_16 atomic_set_acq_short #define atomic_set_rel_16 atomic_set_rel_short @@ -486,6 +522,7 @@ ATOMIC_STORE_LOAD(long, "cmpxchgq %0,%1", "xchgq %1,%0"); #define atomic_load_acq_16 atomic_load_acq_short #define atomic_store_rel_16 atomic_store_rel_short +/* Operations on 32-bit double words. */ #define atomic_set_32 atomic_set_int #define atomic_set_acq_32 atomic_set_acq_int #define atomic_set_rel_32 atomic_set_rel_int @@ -500,63 +537,38 @@ ATOMIC_STORE_LOAD(long, "cmpxchgq %0,%1", "xchgq %1,%0"); #define atomic_subtract_rel_32 atomic_subtract_rel_int #define atomic_load_acq_32 atomic_load_acq_int #define atomic_store_rel_32 atomic_store_rel_int +#define atomic_cmpset_32 atomic_cmpset_int #define atomic_cmpset_acq_32 atomic_cmpset_acq_int #define atomic_cmpset_rel_32 atomic_cmpset_rel_int #define atomic_readandclear_32 atomic_readandclear_int - -#if !defined(WANT_FUNCTIONS) -static __inline int -atomic_cmpset_ptr(volatile void *dst, void *exp, void *src) -{ - - return (atomic_cmpset_long((volatile u_long *)dst, - (u_long)exp, (u_long)src)); -} - -static __inline void * -atomic_load_acq_ptr(volatile void *p) -{ - /* - * The apparently-bogus cast to intptr_t in the following is to - * avoid a warning from "gcc -Wbad-function-cast". - */ - return ((void *)(intptr_t)atomic_load_acq_long((volatile u_long *)p)); -} - -static __inline void -atomic_store_rel_ptr(volatile void *p, void *v) -{ - atomic_store_rel_long((volatile u_long *)p, (u_long)v); -} - -#define ATOMIC_PTR(NAME) \ -static __inline void \ -atomic_##NAME##_ptr(volatile void *p, uintptr_t v) \ -{ \ - atomic_##NAME##_long((volatile u_long *)p, v); \ -} \ - \ -static __inline void \ -atomic_##NAME##_acq_ptr(volatile void *p, uintptr_t v) \ -{ \ - atomic_##NAME##_acq_long((volatile u_long *)p, v);\ -} \ - \ -static __inline void \ -atomic_##NAME##_rel_ptr(volatile void *p, uintptr_t v) \ -{ \ - atomic_##NAME##_rel_long((volatile u_long *)p, v);\ -} - -ATOMIC_PTR(set) -ATOMIC_PTR(clear) -ATOMIC_PTR(add) -ATOMIC_PTR(subtract) - -#undef ATOMIC_PTR +#define atomic_fetchadd_32 atomic_fetchadd_int + +/* Operations on pointers. */ +#define atomic_set_ptr atomic_set_long +#define atomic_set_acq_ptr atomic_set_acq_long +#define atomic_set_rel_ptr atomic_set_rel_long +#define atomic_clear_ptr atomic_clear_long +#define atomic_clear_acq_ptr atomic_clear_acq_long +#define atomic_clear_rel_ptr atomic_clear_rel_long +#define atomic_add_ptr atomic_add_long +#define atomic_add_acq_ptr atomic_add_acq_long +#define atomic_add_rel_ptr atomic_add_rel_long +#define atomic_subtract_ptr atomic_subtract_long +#define atomic_subtract_acq_ptr atomic_subtract_acq_long +#define atomic_subtract_rel_ptr atomic_subtract_rel_long +#define atomic_load_acq_ptr atomic_load_acq_long +#define atomic_store_rel_ptr atomic_store_rel_long +#define atomic_cmpset_ptr atomic_cmpset_long +#define atomic_cmpset_acq_ptr atomic_cmpset_acq_long +#define atomic_cmpset_rel_ptr atomic_cmpset_rel_long +#define atomic_readandclear_ptr atomic_readandclear_long #if defined(__GNUC__) +#if defined(KLD_MODULE) +extern u_int atomic_readandclear_int(volatile u_int *addr); +extern u_long atomic_readandclear_long(volatile u_long *addr); +#else /* !KLD_MODULE */ static __inline u_int atomic_readandclear_int(volatile u_int *addr) { @@ -586,6 +598,7 @@ atomic_readandclear_long(volatile u_long *addr) return (result); } +#endif /* KLD_MODULE */ #else /* !defined(__GNUC__) */ @@ -594,7 +607,4 @@ extern u_int atomic_readandclear_int(volatile u_int *); #endif /* defined(__GNUC__) */ -#endif /* !defined(WANT_FUNCTIONS) */ -#endif /* 0 */ - #endif /* ! _CPU_ATOMIC_H_ */ diff --git a/sys/cpu/amd64/include/cpufunc.h b/sys/cpu/amd64/include/cpufunc.h index a0d21f80a1..6359332be5 100644 --- a/sys/cpu/amd64/include/cpufunc.h +++ b/sys/cpu/amd64/include/cpufunc.h @@ -1,6 +1,7 @@ /*- * Copyright (c) 2003 Peter Wemm. * Copyright (c) 1993 The Regents of the University of California. + * Copyright (c) 2008 The DragonFly Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -32,7 +33,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/amd64/include/cpufunc.h,v 1.139 2004/01/28 23:53:04 peter Exp $ - * $DragonFly: src/sys/cpu/amd64/include/cpufunc.h,v 1.2 2007/09/23 04:29:30 yanyh Exp $ + * $DragonFly: src/sys/cpu/amd64/include/cpufunc.h,v 1.3 2008/08/29 17:07:06 dillon Exp $ */ /* @@ -112,7 +113,7 @@ bsrq(u_long mask) } static __inline void -disable_intr(void) +cpu_disable_intr(void) { __asm __volatile("cli" : : : "memory"); } @@ -125,6 +126,14 @@ do_cpuid(u_int ax, u_int *p) : "0" (ax)); } +static __inline void +cpuid_count(u_int ax, u_int cx, u_int *p) +{ + __asm __volatile("cpuid" + : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3]) + : "0" (ax), "c" (cx)); +} + static __inline void cpu_enable_intr(void) { @@ -345,6 +354,34 @@ invd(void) __asm __volatile("invd"); } +#if defined(_KERNEL) + +/* + * If we are not a true-SMP box then smp_invltlb() is a NOP. Note that this + * will cause the invl*() functions to be equivalent to the cpu_invl*() + * functions. + */ +#ifdef SMP +void smp_invltlb(void); +#else +#define smp_invltlb() +#endif + +#ifndef _CPU_INVLPG_DEFINED + +/* + * Invalidate a patricular VA on this cpu only + */ +static __inline void +cpu_invlpg(void *addr) +{ + __asm __volatile("invlpg %0" : : "m" (*(char *)addr) : "memory"); +} + +#endif + +#endif /* _KERNEL */ + static __inline u_short inw(u_int port) { @@ -543,7 +580,7 @@ rcr4(void) * Global TLB flush (except for thise for pages marked PG_G) */ static __inline void -invltlb(void) +cpu_invltlb(void) { load_cr3(rcr3()); @@ -618,7 +655,7 @@ load_gs(u_int sel) * being trashed happens to be the kernel gsbase at the time. */ gsbase = MSR_GSBASE; - __asm __volatile("pushfq; cli; rdmsr; movl %0,%%gs; wrmsr; popfq" + __asm __volatile("pushfq; cli; rdmsr; movw %0,%%gs; wrmsr; popfq" : : "rm" (sel), "c" (gsbase) : "eax", "edx"); } #else @@ -775,7 +812,7 @@ intr_disable(void) register_t rflags; rflags = read_rflags(); - disable_intr(); + cpu_disable_intr(); return (rflags); } @@ -791,11 +828,11 @@ int breakpoint(void); void cpu_pause(void); u_int bsfl(u_int mask); u_int bsrl(u_int mask); +void cpu_disable_intr(void); +void cpu_enable_intr(void); void cpu_invlpg(u_long addr); void cpu_invlpg_range(u_long start, u_long end); -void disable_intr(void); void do_cpuid(u_int ax, u_int *p); -void enable_intr(void); void halt(void); u_char inb(u_int port); u_int inl(u_int port); @@ -805,7 +842,7 @@ void insw(u_int port, void *addr, size_t cnt); void invd(void); void invlpg(u_int addr); void invlpg_range(u_int start, u_int end); -void invltlb(void); +void cpu_invltlb(void); u_short inw(u_int port); void load_cr0(u_int cr0); void load_cr3(u_int cr3); diff --git a/sys/cpu/amd64/include/frame.h b/sys/cpu/amd64/include/frame.h index 825e68a30d..ee389dd83d 100644 --- a/sys/cpu/amd64/include/frame.h +++ b/sys/cpu/amd64/include/frame.h @@ -1,6 +1,7 @@ /*- * Copyright (c) 2003 Peter Wemm. * Copyright (c) 1990 The Regents of the University of California. + * Copyright (c) 2008 The DragonFly Project. * All rights reserved. * * This code is derived from software contributed to Berkeley by @@ -36,12 +37,15 @@ * * from: @(#)frame.h 5.2 (Berkeley) 1/18/91 * $FreeBSD: src/sys/amd64/include/frame.h,v 1.26 2003/11/08 04:39:22 peter Exp $ - * $DragonFly: src/sys/cpu/amd64/include/frame.h,v 1.2 2007/09/23 04:29:30 yanyh Exp $ + * $DragonFly: src/sys/cpu/amd64/include/frame.h,v 1.3 2008/08/29 17:07:06 dillon Exp $ */ #ifndef _CPU_FRAME_H_ #define _CPU_FRAME_H_ +/* JG? */ +#include + /* * System stack frames. */ @@ -88,7 +92,6 @@ struct trapframe { struct intrframe { register_t if_vec; /* vec */ - /* ppl */ /* fs XXX */ /* es XXX */ /* ds XXX */ diff --git a/sys/cpu/amd64/include/npx.h b/sys/cpu/amd64/include/npx.h index d698eb9b42..4d74a9c6b3 100644 --- a/sys/cpu/amd64/include/npx.h +++ b/sys/cpu/amd64/include/npx.h @@ -1,5 +1,6 @@ /*- * Copyright (c) 1990 The Regents of the University of California. + * Copyright (c) 2008 The DragonFly Project. * All rights reserved. * * This code is derived from software contributed to Berkeley by @@ -35,7 +36,7 @@ * * from: @(#)npx.h 5.3 (Berkeley) 1/18/91 * $FreeBSD: src/sys/i386/include/npx.h,v 1.18.2.1 2001/08/15 01:23:52 peter Exp $ - * $DragonFly: src/sys/cpu/amd64/include/npx.h,v 1.2 2007/09/23 04:29:30 yanyh Exp $ + * $DragonFly: src/sys/cpu/amd64/include/npx.h,v 1.3 2008/08/29 17:07:06 dillon Exp $ */ /* @@ -156,7 +157,7 @@ struct trapframe; void npxexit (void); void npxinit (u_short control); void npxsave (union savefpu *addr); -int npxdna(struct trapframe *); +int npxdna(void); #endif #endif /* !_CPU_NPX_H_ */ diff --git a/sys/cpu/amd64/include/param.h b/sys/cpu/amd64/include/param.h index ee4da86368..a640d841ba 100644 --- a/sys/cpu/amd64/include/param.h +++ b/sys/cpu/amd64/include/param.h @@ -1,5 +1,6 @@ /*- * Copyright (c) 1990 The Regents of the University of California. + * Copyright (c) 2008 The DragonFly Project. * All rights reserved. * * This code is derived from software contributed to Berkeley by @@ -35,7 +36,7 @@ * * from: @(#)param.h 5.8 (Berkeley) 6/28/91 * $FreeBSD: src/sys/i386/include/param.h,v 1.54.2.8 2002/08/31 21:15:55 dillon Exp $ - * $DragonFly: src/sys/cpu/amd64/include/param.h,v 1.3 2007/09/23 04:29:30 yanyh Exp $ + * $DragonFly: src/sys/cpu/amd64/include/param.h,v 1.4 2008/08/29 17:07:06 dillon Exp $ */ #ifndef _CPU_PARAM_H_ @@ -98,27 +99,33 @@ #define ALIGNBYTES _ALIGNBYTES #define ALIGN(p) _ALIGN(p) +/* JG license? from fbsd/src/sys/amd64/include/param.h */ /* level 1 == page table */ +#define NPTEPGSHIFT 9 /* LOG2(NPTEPG) */ #define PAGE_SHIFT 12 /* LOG2(PAGE_SIZE) */ #define PAGE_SIZE (1<> PAGE_SHIFT) +#define atop(x) ((vm_pindex_t)((x) >> PAGE_SHIFT)) #endif -#define ptoa(x) ((x) << PAGE_SHIFT) +#define ptoa(x) ((vm_paddr_t)(x) << PAGE_SHIFT) -#define i386_btop(x) ((x) >> PAGE_SHIFT) -#define i386_ptob(x) ((x) << PAGE_SHIFT) +#define amd64_btop(x) ((vm_pindex_t)((x) >> PAGE_SHIFT)) +#define amd64_ptob(x) ((vm_paddr_t)(x) << PAGE_SHIFT) #define pgtok(x) ((x) * (PAGE_SIZE / 1024)) diff --git a/sys/cpu/amd64/include/pmap.h b/sys/cpu/amd64/include/pmap.h index 5946d823a6..cbed116ce7 100644 --- a/sys/cpu/amd64/include/pmap.h +++ b/sys/cpu/amd64/include/pmap.h @@ -1,7 +1,13 @@ -/* - * Copyright (c) 2003 Matthew Dillon +/*- + * Copyright (c) 1991 Regents of the University of California. + * Copyright (c) 2003 Peter Wemm. + * Copyright (c) 2008 The DragonFly Project. * All rights reserved. * + * This code is derived from software contributed to Berkeley by + * the Systems Programming Group of the University of Utah Computer + * Science Department and William Jolitz of UUNET Technologies Inc. + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: @@ -10,11 +16,14 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) @@ -23,61 +32,57 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/cpu/amd64/include/pmap.h,v 1.2 2007/09/23 04:29:30 yanyh Exp $ + * Derived from hp300 version by Mike Hibler, this version by William + * Jolitz uses a recursive map [a pde points to the page directory] to + * map the page tables using the pagetables themselves. This is done to + * reduce the impact on kernel virtual memory for lots of sparse address + * space, and to reduce the cost of memory to each process. + * + * from: hp300: @(#)pmap.h 7.2 (Berkeley) 12/16/90 + * from: @(#)pmap.h 7.4 (Berkeley) 5/12/91 + * $FreeBSD$ + * $DragonFly: src/sys/cpu/amd64/include/pmap.h,v 1.3 2008/08/29 17:07:06 dillon Exp $ */ + #ifndef _CPU_PMAP_H_ -#define _CPU_PMAP_H_ +#define _CPU_PMAP_H_ /* - * A four level page table is implemented by the amd64 hardware. Each - * page table represents 9 address bits and eats 4KB of space. There are - * 512 8-byte entries in each table. The last page table contains PTE's - * representing 4K pages (12 bits of address space). - * - * The page tables are named: - * PML4 Represents 512GB per entry (256TB total) LEVEL4 - * PDP Represents 1GB per entry LEVEL3 - * PDE Represents 2MB per entry LEVEL2 - * PTE Represents 4KB per entry LEVEL1 - * - * PG_PAE PAE 2MB extension. In the PDE. If 0 there is another level - * of page table and PG_D and PG_G are ignored. If 1 this is - * the terminating page table and PG_D and PG_G apply. - * - * PG_PWT Page write through. If 1 caching is disabled for data - * represented by the page. - * PG_PCD Page Cache Disable. If 1 the page table entry will not - * be cached in the data cache. - * - * Each entry in the PML4 table represents a 512GB VA space. We use a fixed - * PML4 and adjust entries within it to switch user spaces. + * Page-directory and page-table entries follow this format, with a few + * of the fields not present here and there, depending on a lot of things. */ - -#define PG_V 0x0001LL /* P Present */ -#define PG_RW 0x0002LL /* R/W Writable */ -#define PG_U 0x0004LL /* U/S User */ -#define PG_PWT 0x0008LL /* PWT Page Write Through */ -#define PG_PCD 0x0010LL /* PCD Page Cache Disable */ -#define PG_A 0x0020LL /* A Accessed */ -#define PG_D 0x0040LL /* D Dirty (pte only) */ -#define PG_PS 0x0080LL /* PAT (pte only) */ -#define PG_G 0x0100LL /* G Global (pte only) */ -#define PG_USR0 0x0200LL /* available to os */ -#define PG_USR1 0x0400LL /* available to os */ -#define PG_USR2 0x0800LL /* available to os */ -#define PG_PTE_PAT PG_PAE /* PAT bit for 4K pages */ -#define PG_PDE_PAT 0x1000LL /* PAT bit for 2M pages */ -#define PG_FRAME 0x000000FFFFFF0000LL /* 40 bit phys address */ -#define PG_PHYSRESERVED 0x000FFF0000000000LL /* reserved for future PA */ -#define PG_USR3 0x0010000000000000LL /* avilable to os */ + /* ---- Intel Nomenclature ---- */ +#define PG_V 0x001 /* P Valid */ +#define PG_RW 0x002 /* R/W Read/Write */ +#define PG_U 0x004 /* U/S User/Supervisor */ +#define PG_NC_PWT 0x008 /* PWT Write through */ +#define PG_NC_PCD 0x010 /* PCD Cache disable */ +#define PG_A 0x020 /* A Accessed */ +#define PG_M 0x040 /* D Dirty */ +#define PG_PS 0x080 /* PS Page size (0=4k,1=2M) */ +#define PG_PTE_PAT 0x080 /* PAT PAT index */ +#define PG_G 0x100 /* G Global */ +#define PG_AVAIL1 0x200 /* / Available for system */ +#define PG_AVAIL2 0x400 /* < programmers use */ +#define PG_AVAIL3 0x800 /* \ */ +#define PG_PDE_PAT 0x1000 /* PAT PAT index */ +#define PG_NX (1ul<<63) /* No-execute */ + + +/* Our various interpretations of the above */ +#define PG_W PG_AVAIL1 /* "Wired" pseudoflag */ +#define PG_MANAGED PG_AVAIL2 +#define PG_FRAME (0x000ffffffffff000ul) +#define PG_PS_FRAME (0x000fffffffe00000ul) +#define PG_PROT (PG_RW|PG_U) /* all protection bits . */ +#define PG_N (PG_NC_PWT|PG_NC_PCD) /* Non-cacheable */ /* - * OS assignments + * Promotion to a 2MB (PDE) page mapping requires that the corresponding 4KB + * (PTE) page mappings have identical settings for the following fields: */ -#define PG_W PG_USR0 /* Wired */ -#define PG_MANAGED PG_USR1 /* Managed */ -#define PG_PROT (PG_RW|PG_U) /* all protection bits . */ -#define PG_N (PG_PWT|PG_PCD) /* Non-cacheable */ +#define PG_PTE_PROMOTE (PG_NX | PG_MANAGED | PG_W | PG_G | PG_PTE_PAT | \ + PG_M | PG_A | PG_NC_PCD | PG_NC_PWT | PG_U | PG_RW | PG_V) /* * Page Protection Exception bits @@ -86,213 +91,7 @@ #define PGEX_P 0x01 /* Protection violation vs. not present */ #define PGEX_W 0x02 /* during a Write cycle */ #define PGEX_U 0x04 /* access from User mode (UPL) */ - -#define PGEX_MAILBOX 0x40 -#define PGEX_FPFAULT 0x80 - -/* - * User space is limited to one PML4 entry (512GB). Kernel space is also - * limited to one PML4 entry. Other PML4 entries are used to map foreign - * user spaces into KVM. Typically each cpu in the system reserves two - * PML4 entries for private use. - */ -#define UVA_MAXMEM (512LL*1024*1024*1024) -#define KVA_MAXMEM (512LL*1024*1024*1024) - -/* - * Pte related macros. This is complicated by having to deal with - * the sign extension of the 48th bit. - */ -#define KVADDR(l4, l3, l2, l1) ( \ - ((unsigned long)-1 << 47) | \ - ((unsigned long)(l4) << PML4SHIFT) | \ - ((unsigned long)(l3) << PDPSHIFT) | \ - ((unsigned long)(l2) << PDRSHIFT) | \ - ((unsigned long)(l1) << PAGE_SHIFT)) - -#define UVADDR(l4, l3, l2, l1) ( \ - ((unsigned long)(l4) << PML4SHIFT) | \ - ((unsigned long)(l3) << PDPSHIFT) | \ - ((unsigned long)(l2) << PDRSHIFT) | \ - ((unsigned long)(l1) << PAGE_SHIFT)) - - -#define NKPML4E 1 -#define NKPDPE 1 -#define NKPDE (NKPDPE*NPDEPG) - -#define NUPML4E (NPML4EPG/2) -#define NUPDPE (NUPML4E*NPDPEPG) -#define NUPDE (NUPDPE*NPDEPG) - - -/* - * The *PTDI values control the layout of virtual memory - * - * XXX This works for now, but I am not real happy with it, I'll fix it - * right after I fix locore.s and the magic 28K hole - * - * SMP_PRIVPAGES: The per-cpu address space is 0xff80000 -> 0xffbfffff - */ -#define APTDPTDI (NPDEPG-1) /* alt ptd entry that points to APTD */ -#define MPPTDI (APTDPTDI-1) /* per cpu ptd entry */ -#define KPTDI (MPPTDI-NKPDE) /* start of kernel virtual pde's */ -#define PTDPTDI (KPTDI-1) /* ptd entry that points to ptd! */ -#define UMAXPTDI (PTDPTDI-1) /* ptd entry for user space end */ -#define UMAXPTEOFF (NPTEPG) /* pte entry for user space end */ - -#define KPML4I (NPML4EPG-1) - -#define KPDPI (NPDPEPG-2) - -/* - * XXX doesn't really belong here I guess... - */ -#define ISA_HOLE_START 0xa0000 -#define ISA_HOLE_LENGTH (0x100000-ISA_HOLE_START) - -#ifndef LOCORE - -#include - -/* - * Address of current and alternate address space page table maps - * and directories. - */ -#ifdef _KERNEL -extern pt_entry_t PTmap[], APTmap[], Upte; -extern pd_entry_t PTD[], APTD[], PTDpde, APTDpde, Upde; - -extern pd_entry_t IdlePTD; /* physical address of "Idle" state directory */ -#endif - -#ifdef _KERNEL -/* - * virtual address to page table entry and - * to physical address. Likewise for alternate address space. - * Note: these work recursively, thus vtopte of a pte will give - * the corresponding pde that in turn maps it. - */ -#define vtopte(va) (PTmap + i386_btop(va)) - -#define avtopte(va) (APTmap + i386_btop(va)) - -/* - * Routine: pmap_kextract - * Function: - * Extract the physical page address associated - * kernel virtual address. - */ -static __inline vm_paddr_t -pmap_kextract(vm_offset_t va) -{ - vm_paddr_t pa; - - if ((pa = (vm_offset_t) PTD[va >> PDRSHIFT]) & PG_PS) { - pa = (pa & ~(NBPDR - 1)) | (va & (NBPDR - 1)); - } else { - pa = *(vm_offset_t *)vtopte(va); - pa = (pa & PG_FRAME) | (va & PAGE_MASK); - } - return pa; -} - -/* - * XXX - */ -#define vtophys(va) pmap_kextract(((vm_offset_t)(va))) -#define vtophys_pte(va) ((pt_entry_t)pmap_kextract(((vm_offset_t)(va)))) - -#define avtophys(va) (((vm_offset_t) (*avtopte(va))&PG_FRAME) | ((vm_offset_t)(va) & PAGE_MASK)) - -#endif - -/* - * Pmap stuff - */ -struct pv_entry; - -struct md_page { - int pv_list_count; - TAILQ_HEAD(,pv_entry) pv_list; -}; - -/* - * Each machine dependent implementation is expected to - * keep certain statistics. They may do this anyway they - * so choose, but are expected to return the statistics - * in the following structure. - */ -struct pmap_statistics { - long resident_count; /* # of pages mapped (total) */ - long wired_count; /* # of pages wired */ -}; -typedef struct pmap_statistics *pmap_statistics_t; - -struct vm_object; -struct vm_page; - -struct pmap { - pd_entry_t *pm_pdir; /* KVA of page directory */ - struct vm_object *pm_pteobj; /* Container for pte's */ - TAILQ_HEAD(,pv_entry) pm_pvlist; /* list of mappings in pmap */ - int pm_count; /* reference count */ - cpumask_t pm_active; /* active on cpus */ - struct pmap_statistics pm_stats; /* pmap statistics */ - struct vm_page *pm_ptphint; /* pmap ptp hint */ -}; - -#define pmap_resident_count(pmap) (pmap)->pm_stats.resident_count - -typedef struct pmap *pmap_t; - -#ifdef _KERNEL -extern struct pmap kernel_pmap; -#endif - -/* - * For each vm_page_t, there is a list of all currently valid virtual - * mappings of that page. An entry is a pv_entry_t, the list is pv_list - */ -typedef struct pv_entry { - pmap_t pv_pmap; /* pmap where mapping lies */ - vm_offset_t pv_va; /* virtual address for mapping */ - TAILQ_ENTRY(pv_entry) pv_list; - TAILQ_ENTRY(pv_entry) pv_plist; - struct vm_page *pv_ptem; /* VM page for pte */ -} *pv_entry_t; - -#ifdef _KERNEL - -#define NPPROVMTRR 8 -#define PPRO_VMTRRphysBase0 0x200 -#define PPRO_VMTRRphysMask0 0x201 -struct ppro_vmtrr { - u_int64_t base, mask; -}; -extern struct ppro_vmtrr PPro_vmtrr[NPPROVMTRR]; - -extern caddr_t CADDR1; -extern pt_entry_t *CMAP1; -extern vm_paddr_t avail_end; -extern vm_paddr_t avail_start; -extern vm_offset_t clean_eva; -extern vm_offset_t clean_sva; -extern char *ptvmmap; /* poor name! */ -extern vm_offset_t virtual_avail; - -void pmap_bootstrap ( vm_paddr_t, vm_paddr_t); -pmap_t pmap_kernel (void); -void *pmap_mapdev (vm_paddr_t, vm_size_t); -void pmap_unmapdev (vm_offset_t, vm_size_t); -unsigned *pmap_pte (pmap_t, vm_offset_t) __pure2; -struct vm_page *pmap_use_pt (pmap_t, vm_offset_t); -#ifdef SMP -void pmap_set_opt (void); -#endif - -#endif /* _KERNEL */ - -#endif /* !LOCORE */ +#define PGEX_RSV 0x08 /* reserved PTE field is non-zero */ +#define PGEX_I 0x10 /* during an instruction fetch */ #endif /* !_CPU_PMAP_H_ */ diff --git a/sys/cpu/amd64/include/segments.h b/sys/cpu/amd64/include/segments.h index c1cdfc8902..86eae43ab4 100644 --- a/sys/cpu/amd64/include/segments.h +++ b/sys/cpu/amd64/include/segments.h @@ -1,6 +1,7 @@ /*- * Copyright (c) 1989, 1990 William F. Jolitz * Copyright (c) 1990 The Regents of the University of California. + * Copyright (c) 2008 The DragonFly Project. * All rights reserved. * * This code is derived from software contributed to Berkeley by @@ -36,24 +37,24 @@ * * from: @(#)segments.h 7.1 (Berkeley) 5/9/91 * $FreeBSD: src/sys/i386/include/segments.h,v 1.24 1999/12/29 04:33:07 peter Exp $ - * $DragonFly: src/sys/cpu/amd64/include/segments.h,v 1.2 2007/09/23 04:29:30 yanyh Exp $ + * $DragonFly: src/sys/cpu/amd64/include/segments.h,v 1.3 2008/08/29 17:07:06 dillon Exp $ */ #ifndef _CPU_SEGMENTS_H_ #define _CPU_SEGMENTS_H_ /* - * 386 Segmentation Data Structures and definitions - * William F. Jolitz (william@ernie.berkeley.edu) 6/20/1989 + * AMD64 Segmentation Data Structures and definitions */ /* * Selectors */ -#define ISPL(s) ((s)&3) /* what is the priority level of a selector */ -#define SEL_KPL 0 /* kernel priority level */ -#define SEL_UPL 3 /* user priority level */ +#define SEL_RPL_MASK 3 /* requester priv level */ +#define ISPL(s) ((s)&3) /* what is the privilege level of a selector */ +#define SEL_KPL 0 /* kernel privilege level */ +#define SEL_UPL 3 /* user privilege level */ #define ISLDT(s) ((s)&SEL_LDT) /* is it local or global */ #define SEL_LDT 4 /* local descriptor table */ #define IDXSEL(s) (((s)>>3) & 0x1fff) /* index of selector */ @@ -63,63 +64,68 @@ #ifndef LOCORE /* - * Memory and System segment descriptors + * User segment descriptors (%cs, %ds etc for compatability apps. 64 bit wide) + * For long-mode apps, %cs only has the conforming bit in sd_type, the sd_dpl, + * sd_p, sd_l and sd_def32 which must be zero). %ds only has sd_p. */ -struct segment_descriptor { - unsigned sd_lolimit:16 ; /* segment extent (lsb) */ - unsigned sd_lobase:24 __attribute__ ((packed)); - /* segment base address (lsb) */ - unsigned sd_type:5 ; /* segment type */ - unsigned sd_dpl:2 ; /* segment descriptor priority level */ - unsigned sd_p:1 ; /* segment descriptor present */ - unsigned sd_hilimit:4 ; /* segment extent (msb) */ - unsigned sd_xx:2 ; /* unused */ - unsigned sd_def32:1 ; /* default 32 vs 16 bit size */ - unsigned sd_gran:1 ; /* limit granularity (byte/page units)*/ - unsigned sd_hibase:8 ; /* segment base address (msb) */ -} ; +struct user_segment_descriptor { + u_int64_t sd_lolimit:16; /* segment extent (lsb) */ + u_int64_t sd_lobase:24; /* segment base address (lsb) */ + u_int64_t sd_type:5; /* segment type */ + u_int64_t sd_dpl:2; /* segment descriptor priority level */ + u_int64_t sd_p:1; /* segment descriptor present */ + u_int64_t sd_hilimit:4; /* segment extent (msb) */ + u_int64_t sd_xx:1; /* unused */ + u_int64_t sd_long:1; /* long mode (cs only) */ + u_int64_t sd_def32:1; /* default 32 vs 16 bit size */ + u_int64_t sd_gran:1; /* limit granularity (byte/page units)*/ + u_int64_t sd_hibase:8; /* segment base address (msb) */ +} __packed; /* - * Gate descriptors (e.g. indirect descriptors) + * System segment descriptors (128 bit wide) */ -struct gate_descriptor { - unsigned gd_looffset:16 ; /* gate offset (lsb) */ - unsigned gd_selector:16 ; /* gate segment selector */ - unsigned gd_stkcpy:5 ; /* number of stack wds to cpy */ - unsigned gd_xx:3 ; /* unused */ - unsigned gd_type:5 ; /* segment type */ - unsigned gd_dpl:2 ; /* segment descriptor priority level */ - unsigned gd_p:1 ; /* segment descriptor present */ - unsigned gd_hioffset:16 ; /* gate offset (msb) */ -} ; +struct system_segment_descriptor { + u_int64_t sd_lolimit:16; /* segment extent (lsb) */ + u_int64_t sd_lobase:24; /* segment base address (lsb) */ + u_int64_t sd_type:5; /* segment type */ + u_int64_t sd_dpl:2; /* segment descriptor priority level */ + u_int64_t sd_p:1; /* segment descriptor present */ + u_int64_t sd_hilimit:4; /* segment extent (msb) */ + u_int64_t sd_xx0:3; /* unused */ + u_int64_t sd_gran:1; /* limit granularity (byte/page units)*/ + u_int64_t sd_hibase:40 __packed;/* segment base address (msb) */ + u_int64_t sd_xx1:8; + u_int64_t sd_mbz:5; /* MUST be zero */ + u_int64_t sd_xx2:19; +} __packed; /* - * Generic descriptor + * Gate descriptors (e.g. indirect descriptors, trap, interrupt etc. 128 bit) + * Only interrupt and trap gates have gd_ist. */ -union descriptor { - struct segment_descriptor sd; - struct gate_descriptor gd; -}; +struct gate_descriptor { + u_int64_t gd_looffset:16; /* gate offset (lsb) */ + u_int64_t gd_selector:16; /* gate segment selector */ + u_int64_t gd_ist:3; /* IST table index */ + u_int64_t gd_xx:5; /* unused */ + u_int64_t gd_type:5; /* segment type */ + u_int64_t gd_dpl:2; /* segment descriptor priority level */ + u_int64_t gd_p:1; /* segment descriptor present */ + u_int64_t gd_hioffset:48 __packed; /* gate offset (msb) */ + u_int64_t sd_xx1:32; +} __packed; #endif /* LOCORE */ /* system segments and gate types */ #define SDT_SYSNULL 0 /* system null */ -#define SDT_SYS286TSS 1 /* system 286 TSS available */ -#define SDT_SYSLDT 2 /* system local descriptor table */ -#define SDT_SYS286BSY 3 /* system 286 TSS busy */ -#define SDT_SYS286CGT 4 /* system 286 call gate */ -#define SDT_SYSTASKGT 5 /* system task gate */ -#define SDT_SYS286IGT 6 /* system 286 interrupt gate */ -#define SDT_SYS286TGT 7 /* system 286 trap gate */ -#define SDT_SYSNULL2 8 /* system null again */ -#define SDT_SYS386TSS 9 /* system 386 TSS available */ -#define SDT_SYSNULL3 10 /* system null again */ -#define SDT_SYS386BSY 11 /* system 386 TSS busy */ -#define SDT_SYS386CGT 12 /* system 386 call gate */ -#define SDT_SYSNULL4 13 /* system null again */ -#define SDT_SYS386IGT 14 /* system 386 interrupt gate */ -#define SDT_SYS386TGT 15 /* system 386 trap gate */ +#define SDT_SYSLDT 2 /* system 64-bit local descriptor table */ +#define SDT_SYSTSS 9 /* system available 64-bit TSS */ +#define SDT_SYSBSY 11 /* system busy 64-bit TSS */ +#define SDT_SYSCGT 12 /* system 64-bit call gate */ +#define SDT_SYSIGT 14 /* system 64-bit interrupt gate */ +#define SDT_SYSTGT 15 /* system 64-bit trap gate */ /* memory segment types */ #define SDT_MEMRO 16 /* memory read only */ @@ -142,6 +148,14 @@ union descriptor { #ifndef LOCORE +#ifndef _SYS_TLS_H_ +#include +#endif + +struct savetls { + struct tls_info info[2]; +}; + /* is memory segment descriptor pointer ? */ #define ISMEMSDP(s) ((s->d_type) >= SDT_MEMRO && (s->d_type) <= SDT_MEMERAC) @@ -168,28 +182,24 @@ union descriptor { * when needed to be used by the 386 hardware */ -#if 0 - -struct soft_segment_descriptor { - unsigned ssd_base ; /* segment base address */ - unsigned ssd_limit ; /* segment extent */ - unsigned ssd_type:5 ; /* segment type */ - unsigned ssd_dpl:2 ; /* segment descriptor priority level */ - unsigned ssd_p:1 ; /* segment descriptor present */ - unsigned ssd_xx:4 ; /* unused */ - unsigned ssd_xx1:2 ; /* unused */ - unsigned ssd_def32:1 ; /* default 32 vs 16 bit size */ - unsigned ssd_gran:1 ; /* limit granularity (byte/page units)*/ -}; -#endif /* 0 */ +struct soft_segment_descriptor { + uint64_t ssd_base; /* segment base address */ + uint64_t ssd_limit; /* segment extent */ + uint64_t ssd_type:5; /* segment type */ + uint64_t ssd_dpl:2; /* segment descriptor priority level */ + uint64_t ssd_p:1; /* segment descriptor present */ + uint64_t ssd_long:1; /* long mode (for %cs) */ + uint64_t ssd_def32:1; /* default 32 vs 16 bit size */ + uint64_t ssd_gran:1; /* limit granularity (byte/page units)*/ +} __packed; /* * region descriptors, used to load gdt/idt tables before segments yet exist. */ struct region_descriptor { - unsigned rd_limit:16; /* segment extent */ - unsigned rd_base:32 __attribute__ ((packed)); /* base address */ -}; + uint64_t rd_limit:16; /* segment extent */ + uint64_t rd_base:64 __packed; /* base address */ +} __packed; #endif /* LOCORE */ @@ -209,6 +219,30 @@ struct region_descriptor { * as well just use all of them. */ #define NIDT 256 /* we use them all */ +/* + * Entries in the Interrupt Descriptor Table (IDT) + */ +#define IDT_DE 0 /* #DE: Divide Error */ +#define IDT_DB 1 /* #DB: Debug */ +#define IDT_NMI 2 /* Nonmaskable External Interrupt */ +#define IDT_BP 3 /* #BP: Breakpoint */ +#define IDT_OF 4 /* #OF: Overflow */ +#define IDT_BR 5 /* #BR: Bound Range Exceeded */ +#define IDT_UD 6 /* #UD: Undefined/Invalid Opcode */ +#define IDT_NM 7 /* #NM: No Math Coprocessor */ +#define IDT_DF 8 /* #DF: Double Fault */ +#define IDT_FPUGP 9 /* Coprocessor Segment Overrun */ +#define IDT_TS 10 /* #TS: Invalid TSS */ +#define IDT_NP 11 /* #NP: Segment Not Present */ +#define IDT_SS 12 /* #SS: Stack Segment Fault */ +#define IDT_GP 13 /* #GP: General Protection Fault */ +#define IDT_PF 14 /* #PF: Page Fault */ +#define IDT_MF 16 /* #MF: FPU Floating-Point Error */ +#define IDT_AC 17 /* #AC: Alignment Check */ +#define IDT_MC 18 /* #MC: Machine Check */ +#define IDT_XF 19 /* #XF: SIMD Floating-Point Exception */ +#define IDT_IO_INTS NRSVIDT /* Base of IDT entries for I/O interrupts. */ +#define IDT_SYSCALL 0x80 /* System Call Interrupt Vector */ /* * Entries in the Global Descriptor Table (GDT) @@ -216,59 +250,28 @@ struct region_descriptor { #define GNULL_SEL 0 /* Null Descriptor */ #define GCODE_SEL 1 /* Kernel Code Descriptor */ #define GDATA_SEL 2 /* Kernel Data Descriptor */ -#define GPRIV_SEL 3 /* SMP Per-Processor Private Data */ -#define GPROC0_SEL 4 /* Task state process slot zero and up */ -#define GLDT_SEL 5 /* LDT - eventually one per process */ -#define GUSERLDT_SEL 6 /* User LDT */ -#define GTGATE_SEL 7 /* Process task switch gate */ -#define GBIOSLOWMEM_SEL 8 /* BIOS low memory access (must be entry 8) */ -#define GPANIC_SEL 9 /* Task state to consider panic from */ -#define GBIOSCODE32_SEL 10 /* BIOS interface (32bit Code) */ -#define GBIOSCODE16_SEL 11 /* BIOS interface (16bit Code) */ -#define GBIOSDATA_SEL 12 /* BIOS interface (Data) */ -#define GBIOSUTIL_SEL 13 /* BIOS interface (Utility) */ -#define GBIOSARGS_SEL 14 /* BIOS interface (Arguments) */ -#define GTLS_START 15 /* Thread TLS Descriptor */ -#define GTLS_END 17 /* Thread TLS Descriptor */ - -#define NGTLS (GTLS_END - GTLS_START + 1) -#ifdef BDE_DEBUGGER -#define NGDT 18 /* some of 11-17 are reserved for debugger */ -#else -#define NGDT 15 -#endif - -/* - * Entries in the Local Descriptor Table (LDT) - */ -#define LSYS5CALLS_SEL 0 /* forced by intel BCS */ -#define LSYS5SIGR_SEL 1 -#define L43BSDCALLS_SEL 2 /* notyet */ -#define LUCODE_SEL 3 -#define LSOL26CALLS_SEL 4 /* Solaris >= 2.6 system call gate */ -#define LUDATA_SEL 5 -/* separate stack, es,fs,gs sels ? */ -/* #define LPOSIXCALLS_SEL 5*/ /* notyet */ -#define LBSDICALLS_SEL 16 /* BSDI system call gate */ -#define NLDT (LBSDICALLS_SEL + 1) +#define GUCODE32_SEL 3 /* User 32 bit code Descriptor */ +#define GUDATA_SEL 4 /* User 32/64 bit Data Descriptor */ +#define GUCODE_SEL 5 /* User 64 bit Code Descriptor */ +#define GPROC0_SEL 6 /* TSS for entering kernel etc */ +/* slot 7 is second half of GPROC0_SEL */ +#define GUGS32_SEL 8 /* User 32 bit GS Descriptor */ +#define NGDT 9 #ifndef LOCORE -struct savetls { - struct segment_descriptor tls[NGTLS]; -}; #ifdef _KERNEL -extern int _default_ldt; -extern union descriptor gdt[]; +extern struct user_segment_descriptor gdt[]; extern struct soft_segment_descriptor gdt_segs[]; extern struct gate_descriptor *idt; -extern union descriptor ldt[NLDT]; -void lgdt (struct region_descriptor *rdp); -void sdtossd (struct segment_descriptor *sdp, - struct soft_segment_descriptor *ssdp); -void ssdtosd (struct soft_segment_descriptor *ssdp, - struct segment_descriptor *sdp); +void lgdt(struct region_descriptor *rdp); +void sdtossd(struct user_segment_descriptor *sdp, + struct soft_segment_descriptor *ssdp); +void ssdtosd(struct soft_segment_descriptor *ssdp, + struct user_segment_descriptor *sdp); +void ssdtosyssd(struct soft_segment_descriptor *ssdp, + struct system_segment_descriptor *sdp); #endif /* _KERNEL */ #endif /* LOCORE */ diff --git a/sys/cpu/amd64/include/signal.h b/sys/cpu/amd64/include/signal.h index 49562692b5..5dbc973f05 100644 --- a/sys/cpu/amd64/include/signal.h +++ b/sys/cpu/amd64/include/signal.h @@ -1,6 +1,8 @@ /* * Copyright (c) 1986, 1989, 1991, 1993 - * The Regents of the University of California. All rights reserved. + * The Regents of the University of California. + * Copyright (c) 2008 The DragonFly Project. + * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -32,7 +34,7 @@ * * @(#)signal.h 8.1 (Berkeley) 6/11/93 * $FreeBSD: src/sys/i386/include/signal.h,v 1.12 1999/11/12 13:52:11 marcel Exp $ - * $DragonFly: src/sys/cpu/amd64/include/signal.h,v 1.2 2008/08/25 23:34:31 dillon Exp $ + * $DragonFly: src/sys/cpu/amd64/include/signal.h,v 1.3 2008/08/29 17:07:06 dillon Exp $ */ #ifndef _CPU_SIGNAL_H_ @@ -99,12 +101,6 @@ typedef int sig_atomic_t; #if !defined(_ANSI_SOURCE) && !defined(_POSIX_SOURCE) -/* - * XXX temporarily use a path instead of "bla.h" so the - * XFree86-4-clients port, which uses -I-, builds. Use of -I- should - * be banned, or the option should be fixed to not screw up relative-path - * includes. - */ #include /* codes for SIGILL, SIGFPE */ /* @@ -118,40 +114,45 @@ typedef int sig_atomic_t; * those in mcontext_t. */ struct sigcontext { - sigset_t sc_mask; /* signal mask to restore */ - long sc_onstack; /* sigstack state to restore */ - long sc_rdi; - long sc_rsi; - long sc_rdx; - long sc_rcx; - long sc_r8; - long sc_r9; - long sc_rax; - long sc_rbx; - long sc_rbp; - long sc_r10; - long sc_r11; - long sc_r12; - long sc_r13; - long sc_r14; - long sc_r15; - long sc_trapno; - long sc_addr; - long sc_flags; - long sc_err; - long sc_rip; - long sc_cs; - long sc_rflags; - long sc_rsp; - long sc_ss; - long sc_len; - /* - * XXX - taken from freebsd - */ - long sc_fpformat; - long sc_ownedfp; - long sc_fpstate[64] __aligned(16); - long sc_spare[9]; + sigset_t sc_mask; /* signal mask to restore */ + + long sc_onstack; /* sigstack state to restore */ + long sc_rdi; + long sc_rsi; + long sc_rdx; + long sc_rcx; + long sc_r8; + long sc_r9; + long sc_rax; + long sc_rbx; + long sc_rbp; + long sc_r10; + long sc_r11; + long sc_r12; + long sc_r13; + long sc_r14; + long sc_r15; + long sc_trapno; + long sc_addr; + long sc_flags; + long sc_err; + long sc_rip; + long sc_cs; + long sc_rflags; + long sc_rsp; + long sc_ss; + + unsigned int sc_len; + unsigned int sc_fpformat; + unsigned int sc_ownedfp; + unsigned int sc_reserved; + unsigned int sc_unused01; + unsigned int sc_unused02; + + /* 16 byte aligned */ + + int sc_fpregs[128]; + int __spare__[16]; }; #endif /* !_ANSI_SOURCE && !_POSIX_SOURCE */ diff --git a/sys/cpu/amd64/include/specialreg.h b/sys/cpu/amd64/include/specialreg.h index e949e6b053..92a9bec2c1 100644 --- a/sys/cpu/amd64/include/specialreg.h +++ b/sys/cpu/amd64/include/specialreg.h @@ -1,5 +1,6 @@ /*- * Copyright (c) 1991 The Regents of the University of California. + * Copyright (c) 2008 The DragonFly Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -28,7 +29,7 @@ * * from: @(#)specialreg.h 7.1 (Berkeley) 5/9/91 * $FreeBSD: src/sys/amd64/include/specialreg.h,v 1.39 2007/05/31 11:26:44 des Exp $ - * $DragonFly: src/sys/cpu/amd64/include/specialreg.h,v 1.1 2007/09/23 04:29:30 yanyh Exp $ + * $DragonFly: src/sys/cpu/amd64/include/specialreg.h,v 1.2 2008/08/29 17:07:06 dillon Exp $ */ #ifndef _CPU_SPECIALREG_H_ @@ -135,6 +136,7 @@ #define AMDID_NX 0x00100000 #define AMDID_EXT_MMX 0x00400000 #define AMDID_FFXSR 0x01000000 +#define AMDID_PAGE1GB 0x04000000 #define AMDID_RDTSCP 0x08000000 #define AMDID_LM 0x20000000 #define AMDID_EXT_3DNOW 0x40000000 diff --git a/sys/cpu/amd64/include/tls.h b/sys/cpu/amd64/include/tls.h index 9d1d565ebf..7f17f32f78 100644 --- a/sys/cpu/amd64/include/tls.h +++ b/sys/cpu/amd64/include/tls.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2005 The DragonFly Project. All rights reserved. + * Copyright (c) 2005,2008 The DragonFly Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -28,7 +28,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/cpu/amd64/include/tls.h,v 1.1 2007/08/21 19:40:24 corecode Exp $ + * $DragonFly: src/sys/cpu/amd64/include/tls.h,v 1.2 2008/08/29 17:07:06 dillon Exp $ */ #ifndef _CPU_TLS_H_ @@ -106,7 +106,7 @@ tls_set_tcb(struct tls_tcb *tcb) info.base = tcb; info.size = -1; seg = set_tls_area(0, &info, sizeof(info)); - __asm __volatile("movl %0, %%fs" : : "r" (seg)); + /*__asm __volatile("movl %0, %%fs" : : "r" (seg));*/ } struct tls_tcb *_rtld_allocate_tls(void); diff --git a/sys/cpu/amd64/include/tss.h b/sys/cpu/amd64/include/tss.h index 7053eb73ef..eefafdf37a 100644 --- a/sys/cpu/amd64/include/tss.h +++ b/sys/cpu/amd64/include/tss.h @@ -1,5 +1,6 @@ /*- * Copyright (c) 1990 The Regents of the University of California. + * Copyright (c) 2008 The DragonFly Project. * All rights reserved. * * This code is derived from software contributed to Berkeley by @@ -35,7 +36,7 @@ * * from: @(#)tss.h 5.4 (Berkeley) 1/18/91 * $FreeBSD: src/sys/amd64/include/tss.h,v 1.15 2003/11/17 08:58:14 peter Exp $ - * $DragonFly: src/sys/cpu/amd64/include/tss.h,v 1.1 2007/08/21 19:40:24 corecode Exp $ + * $DragonFly: src/sys/cpu/amd64/include/tss.h,v 1.2 2008/08/29 17:07:06 dillon Exp $ */ #ifndef _CPU_TSS_H_ @@ -55,7 +56,6 @@ struct amd64tss { u_int64_t tss_rsp2 __packed; /* kernel stack pointer ring 2 */ u_int32_t tss_rsvd1; u_int32_t tss_rsvd2; - u_int32_t tss_rsvd3; u_int64_t tss_ist1 __packed; /* Interrupt stack table 1 */ u_int64_t tss_ist2 __packed; /* Interrupt stack table 2 */ u_int64_t tss_ist3 __packed; /* Interrupt stack table 3 */ @@ -63,9 +63,9 @@ struct amd64tss { u_int64_t tss_ist5 __packed; /* Interrupt stack table 5 */ u_int64_t tss_ist6 __packed; /* Interrupt stack table 6 */ u_int64_t tss_ist7 __packed; /* Interrupt stack table 7 */ + u_int32_t tss_rsvd3; u_int32_t tss_rsvd4; - u_int32_t tss_rsvd5; - u_int16_t tss_rsvd6; + u_int16_t tss_rsvd5; u_int16_t tss_iobase; /* io bitmap offset */ }; diff --git a/sys/cpu/amd64/include/types.h b/sys/cpu/amd64/include/types.h index 11f7ae6c62..e234fe93ce 100644 --- a/sys/cpu/amd64/include/types.h +++ b/sys/cpu/amd64/include/types.h @@ -1,6 +1,7 @@ /*- * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. + * Copyright (c) 2008 The DragonFly Project. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -32,7 +33,7 @@ * * @(#)types.h 8.3 (Berkeley) 1/5/94 * $FreeBSD: src/sys/i386/include/types.h,v 1.19.2.1 2001/03/21 10:50:58 peter Exp $ - * $DragonFly: src/sys/cpu/amd64/include/types.h,v 1.1 2007/08/21 19:40:24 corecode Exp $ + * $DragonFly: src/sys/cpu/amd64/include/types.h,v 1.2 2008/08/29 17:07:06 dillon Exp $ */ #ifndef _CPU_TYPES_H_ @@ -66,10 +67,13 @@ typedef __uint64_t uintfptr_t; /* * MMU page tables */ +#ifndef JG_defined_pml4_entry_t +#define JG_defined_pml4_entry_t typedef __uint64_t pml4_entry_t; typedef __uint64_t pdp_entry_t; typedef __uint64_t pd_entry_t; typedef __uint64_t pt_entry_t; +#endif typedef __uint32_t cpumask_t; /* mask representing a set of cpus */ #define PML4SIZE sizeof(pml4_entry_t) /* for assembly files */ diff --git a/sys/cpu/amd64/include/ucontext.h b/sys/cpu/amd64/include/ucontext.h index 139f794583..b53c769e8d 100644 --- a/sys/cpu/amd64/include/ucontext.h +++ b/sys/cpu/amd64/include/ucontext.h @@ -1,6 +1,7 @@ /*- * Copyright (c) 2003 Peter Wemm * Copyright (c) 1999 Marcel Moolenaar + * Copyright (c) 2008 The DragonFly Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -27,7 +28,7 @@ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD: src/sys/amd64/include/ucontext.h,v 1.18 2003/11/08 04:39:22 peter Exp $ - * $DragonFly: src/sys/cpu/amd64/include/ucontext.h,v 1.1 2007/08/21 19:40:24 corecode Exp $ + * $DragonFly: src/sys/cpu/amd64/include/ucontext.h,v 1.2 2008/08/29 17:07:06 dillon Exp $ */ #ifndef _CPU_UCONTEXT_H_ @@ -39,8 +40,8 @@ typedef struct __mcontext { * sigcontext. So that we can support sigcontext * and ucontext_t at the same time. */ - __register_t mc_onstack; /* XXX - sigcontext compat. */ - __register_t mc_rdi; /* machine state (struct trapframe) */ + __register_t mc_onstack; /* XXX - sigcontext compat. */ + __register_t mc_rdi; __register_t mc_rsi; __register_t mc_rdx; __register_t mc_rcx; @@ -62,22 +63,28 @@ typedef struct __mcontext { __register_t mc_rip; __register_t mc_cs; __register_t mc_rflags; - __register_t mc_rsp; + __register_t mc_rsp; /* machine state */ __register_t mc_ss; - long mc_len; /* sizeof(mcontext_t) */ -#define _MC_FPFMT_NODEV 0x10000 /* device not present or configured */ -#define _MC_FPFMT_XMM 0x10002 - long mc_fpformat; -#define _MC_FPOWNED_NONE 0x20000 /* FP state not used */ -#define _MC_FPOWNED_FPU 0x20001 /* FP state came from FPU */ -#define _MC_FPOWNED_PCB 0x20002 /* FP state came from PCB */ - long mc_ownedfp; - /* - * See for the internals of mc_fpstate[]. - */ - long mc_fpstate[64] __aligned(16); - long mc_spare[8]; + unsigned int mc_len; /* sizeof(mcontext_t) */ + unsigned int mc_fpformat; + unsigned int mc_ownedfp; + unsigned int mc_reserved; + unsigned int mc_unused01; + unsigned int mc_unused02; + + /* 16 byte aligned */ + + int mc_fpregs[128]; + int __spare__[16]; } mcontext_t; +#define _MC_FPFMT_NODEV 0x10000 /* device not present or configured */ +#define _MC_FPFMT_387 0x10001 +#define _MC_FPFMT_XMM 0x10002 + +#define _MC_FPOWNED_NONE 0x20000 /* FP state not used */ +#define _MC_FPOWNED_FPU 0x20001 /* FP state came from FPU */ +#define _MC_FPOWNED_PCB 0x20002 /* FP state came from PCB */ + #endif /* !_CPU_UCONTEXT_H_ */ diff --git a/sys/cpu/amd64/misc/amd64-gdbstub.c b/sys/cpu/amd64/misc/amd64-gdbstub.c new file mode 100644 index 0000000000..06ba3c11dd --- /dev/null +++ b/sys/cpu/amd64/misc/amd64-gdbstub.c @@ -0,0 +1,666 @@ +/**************************************************************************** + + THIS SOFTWARE IS NOT COPYRIGHTED + + HP offers the following for use in the public domain. HP makes no + warranty with regard to the software or its performance and the + user accepts the software "AS IS" with all faults. + + HP DISCLAIMS ANY WARRANTIES, EXPRESS OR IMPLIED, WITH REGARD + TO THIS SOFTWARE INCLUDING BUT NOT LIMITED TO THE WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + +****************************************************************************/ + +/**************************************************************************** + * Header: remcom.c,v 1.34 91/03/09 12:29:49 glenne Exp $ + * + * Module name: remcom.c $ + * Revision: 1.34 $ + * Date: 91/03/09 12:29:49 $ + * Contributor: Lake Stevens Instrument Division$ + * + * Description: low level support for gdb debugger. $ + * + * Considerations: only works on target hardware $ + * + * Written by: Glenn Engel $ + * ModuleState: Experimental $ + * + * NOTES: See Below $ + * + * Modified for FreeBSD by Stu Grossman. + * + * To enable debugger support, two things need to happen. One, a + * call to set_debug_traps() is necessary in order to allow any breakpoints + * or error conditions to be properly intercepted and reported to gdb. + * Two, a breakpoint needs to be generated to begin communication. This + * is most easily accomplished by a call to breakpoint(). Breakpoint() + * simulates a breakpoint by executing a trap #1. + * + * The external function exceptionHandler() is + * used to attach a specific handler to a specific 386 vector number. + * It should use the same privilege level it runs at. It should + * install it as an interrupt gate so that interrupts are masked + * while the handler runs. + * Also, need to assign exceptionHook and oldExceptionHook. + * + * Because gdb will sometimes write to the stack area to execute function + * calls, this program cannot rely on using the supervisor stack so it + * uses its own stack area reserved in the int array remcomStack. + * + ************* + * + * The following gdb commands are supported: + * + * command function Return value + * + * g return the value of the CPU registers hex data or ENN + * G set the value of the CPU registers OK or ENN + * + * mAA..AA,LLLL Read LLLL bytes at address AA..AA hex data or ENN + * MAA..AA,LLLL: Write LLLL bytes at address AA.AA OK or ENN + * + * c Resume at current address SNN ( signal NN) + * cAA..AA Continue at address AA..AA SNN + * + * s Step one instruction SNN + * sAA..AA Step one instruction from AA..AA SNN + * + * k kill + * + * ? What was the last sigval ? SNN (signal NN) + * + * D detach OK + * + * All commands and responses are sent with a packet which includes a + * checksum. A packet consists of + * + * $#. + * + * where + * :: + * :: < two hex digits computed as modulo 256 sum of > + * + * When a packet is received, it is first acknowledged with either '+' or '-'. + * '+' indicates a successful transfer. '-' indicates a failed transfer. + * + * Example: + * + * Host: Reply: + * $m0,10#2a +$00010203040506070809101112131415#42 + * + ****************************************************************************/ +/* + * $FreeBSD: src/sys/i386/i386/i386-gdbstub.c,v 1.13.2.1 2000/08/03 00:54:41 peter Exp $ + * $DragonFly: src/sys/cpu/amd64/misc/amd64-gdbstub.c,v 1.1 2008/08/29 17:07:09 dillon Exp $ + */ + +#include "opt_ddb.h" + +#include +#include +#include +#include + +#include + +#include + +void gdb_handle_exception (db_regs_t *, int, int); + +/************************************************************************/ + +extern jmp_buf db_jmpbuf; + +/************************************************************************/ +/* BUFMAX defines the maximum number of characters in inbound/outbound buffers*/ +/* at least NUMREGBYTES*2 are needed for register packets */ +#define BUFMAX 400 + +/* Create private copies of common functions used by the stub. This prevents + nasty interactions between app code and the stub (for instance if user steps + into strlen, etc..) */ + +#define strlen gdb_strlen +#define strcpy gdb_strcpy + +static int +strlen (const char *s) +{ + const char *s1 = s; + + while (*s1++ != '\000'); + + return s1 - s; +} + +static char * +strcpy (char *dst, const char *src) +{ + char *retval = dst; + + while ((*dst++ = *src++) != '\000'); + + return retval; +} + +static int +putDebugChar (int c) /* write a single character */ +{ + if (gdb_tab == NULL) + return 0; + gdb_tab->cn_putc(gdb_tab->cn_gdbprivate, c); + return 1; +} + +static int +getDebugChar (void) /* read and return a single char */ +{ + if (gdb_tab == NULL) + return -1; + return gdb_tab->cn_getc(gdb_tab->cn_gdbprivate); +} + +static const char hexchars[]="0123456789abcdef"; + +static int +hex(char ch) +{ + if ((ch >= 'a') && (ch <= 'f')) return (ch-'a'+10); + if ((ch >= '0') && (ch <= '9')) return (ch-'0'); + if ((ch >= 'A') && (ch <= 'F')) return (ch-'A'+10); + return (-1); +} + +/* scan for the sequence $# */ +static void +getpacket (char *buffer) +{ + unsigned char checksum; + unsigned char xmitcsum; + int i; + int count; + unsigned char ch; + + do + { + /* wait around for the start character, ignore all other characters */ + + while ((ch = (getDebugChar () & 0x7f)) != '$'); + + checksum = 0; + xmitcsum = -1; + + count = 0; + + /* now, read until a # or end of buffer is found */ + + while (count < BUFMAX) + { + ch = getDebugChar () & 0x7f; + if (ch == '#') + break; + checksum = checksum + ch; + buffer[count] = ch; + count = count + 1; + } + buffer[count] = 0; + + if (ch == '#') + { + xmitcsum = hex (getDebugChar () & 0x7f) << 4; + xmitcsum += hex (getDebugChar () & 0x7f); + + if (checksum != xmitcsum) + putDebugChar ('-'); /* failed checksum */ + else + { + putDebugChar ('+'); /* successful transfer */ + /* if a sequence char is present, reply the sequence ID */ + if (buffer[2] == ':') + { + putDebugChar (buffer[0]); + putDebugChar (buffer[1]); + + /* remove sequence chars from buffer */ + + count = strlen (buffer); + for (i=3; i <= count; i++) + buffer[i-3] = buffer[i]; + } + } + } + } + while (checksum != xmitcsum); +} + +/* send the packet in buffer. */ + +static void +putpacket (char *buffer) +{ + unsigned char checksum; + int count; + unsigned char ch; + + /* $#. */ + do + { +/* + * This is a non-standard hack to allow use of the serial console for + * operation as well as debugging. Simply turn on 'remotechat' in gdb. + * + * This extension is not part of the Cygnus protocol, is kinda gross, + * but gets the job done. + */ +#ifdef GDB_REMOTE_CHAT + putDebugChar ('|'); + putDebugChar ('|'); + putDebugChar ('|'); + putDebugChar ('|'); +#endif + putDebugChar ('$'); + checksum = 0; + count = 0; + + while ((ch=buffer[count]) != 0) + { + putDebugChar (ch); + checksum += ch; + count += 1; + } + + putDebugChar ('#'); + putDebugChar (hexchars[checksum >> 4]); + putDebugChar (hexchars[checksum & 0xf]); + } + while ((getDebugChar () & 0x7f) != '+'); +} + +static char remcomInBuffer[BUFMAX]; +static char remcomOutBuffer[BUFMAX]; + +static int +get_char (vm_offset_t addr) +{ + char data; + + if (setjmp (db_jmpbuf)) + return -1; + + db_read_bytes (addr, 1, &data); + + return data & 0xff; +} + +static int +set_char (vm_offset_t addr, int val) +{ + char data; + + if (setjmp (db_jmpbuf)) + return -1; + + data = val; + + db_write_bytes (addr, 1, &data); + return 0; +} + +/* convert the memory pointed to by mem into hex, placing result in buf */ +/* return a pointer to the last char put in buf (null) */ + +static char * +mem2hex (vm_offset_t mem, char *buf, int count) +{ + int i; + int ch; + + for (i=0;i> 4]; + *buf++ = hexchars[ch % 16]; + } + *buf = 0; + return(buf); +} + +/* convert the hex array pointed to by buf into binary to be placed in mem */ +/* return a pointer to the character AFTER the last byte written */ +static char * +hex2mem (char *buf, vm_offset_t mem, int count) +{ + int i; + int ch; + int rv; + + for (i=0;i=0) + { + *intValue = (*intValue <<4) | hexValue; + numChars ++; + } + else + break; + + (*ptr)++; + } + + return (numChars); +} + +/* + * While we find nice hex chars, build a long. + * Return number of chars processed. + */ + +static long +hexToLong(char **ptr, long *longValue) +{ + int numChars = 0; + int hexValue; + + *longValue = 0; + + while (**ptr) + { + hexValue = hex(**ptr); + if (hexValue >=0) + { + *longValue = (*longValue <<4) | hexValue; + numChars ++; + } + else + break; + + (*ptr)++; + } + + return (numChars); +} + +#define NUMREGBYTES (sizeof registers) +#define PC 16 +#define SP 7 +#define FP 6 +#define NUM_REGS 22 + +/* + * This function does all command procesing for interfacing to gdb. + */ +void +gdb_handle_exception (db_regs_t *raw_regs, int type, int code) +{ + int sigval; + long addr; + int length; + char * ptr; + struct amd64regs { + unsigned long rax; + unsigned long rcx; + unsigned long rdx; + unsigned long rbx; + unsigned long rsi; + unsigned long rdi; + unsigned long rbp; + unsigned long rsp; + unsigned long r8; + unsigned long r9; + unsigned long r10; + unsigned long r11; + unsigned long r12; + unsigned long r13; + unsigned long r14; + unsigned long r15; + unsigned long rip; + unsigned long rflags; + unsigned int cs; + unsigned int ss; + }; + struct amd64regs registers; + + registers.rax = raw_regs->tf_rax; + registers.rbx = raw_regs->tf_rbx; + registers.rcx = raw_regs->tf_rcx; + registers.rdx = raw_regs->tf_rdx; + + registers.rsp = raw_regs->tf_rsp; + registers.rbp = raw_regs->tf_rbp; + registers.rsi = raw_regs->tf_rsi; + registers.rdi = raw_regs->tf_rdi; + + registers.r8 = raw_regs->tf_r8; + registers.r9 = raw_regs->tf_r9; + registers.r10 = raw_regs->tf_r10; + registers.r11 = raw_regs->tf_r11; + registers.r12 = raw_regs->tf_r12; + registers.r13 = raw_regs->tf_r13; + registers.r14 = raw_regs->tf_r14; + registers.r15 = raw_regs->tf_r15; + + registers.rip = raw_regs->tf_rip; + registers.rflags = raw_regs->tf_rflags; + + registers.cs = raw_regs->tf_cs; + registers.ss = raw_regs->tf_ss; + + /* reply to host that an exception has occurred */ + sigval = computeSignal (type); + ptr = remcomOutBuffer; + + *ptr++ = 'T'; + *ptr++ = hexchars[sigval >> 4]; + *ptr++ = hexchars[sigval & 0xf]; + + *ptr++ = hexchars[PC >> 4]; + *ptr++ = hexchars[PC & 0xf]; + *ptr++ = ':'; + ptr = mem2hex ((vm_offset_t)®isters.rip, ptr, 8); + *ptr++ = ';'; + + *ptr++ = hexchars[FP >> 4]; + *ptr++ = hexchars[FP & 0xf]; + *ptr++ = ':'; + ptr = mem2hex ((vm_offset_t)®isters.rbp, ptr, 8); + *ptr++ = ';'; + + *ptr++ = hexchars[SP >> 4]; + *ptr++ = hexchars[SP & 0xf]; + *ptr++ = ':'; + ptr = mem2hex ((vm_offset_t)®isters.rsp, ptr, 8); + *ptr++ = ';'; + + *ptr++ = 0; + + putpacket (remcomOutBuffer); + + while (1) + { + remcomOutBuffer[0] = 0; + + getpacket (remcomInBuffer); + switch (remcomInBuffer[0]) + { + case '?': + remcomOutBuffer[0] = 'S'; + remcomOutBuffer[1] = hexchars[sigval >> 4]; + remcomOutBuffer[2] = hexchars[sigval % 16]; + remcomOutBuffer[3] = 0; + break; + + case 'D': /* detach; say OK and turn off gdb */ + putpacket(remcomOutBuffer); + boothowto &= ~RB_GDB; + return; + + case 'g': /* return the value of the CPU registers */ + mem2hex ((vm_offset_t)®isters, remcomOutBuffer, NUMREGBYTES); + break; + + case 'G': /* set the value of the CPU registers - return OK */ + hex2mem (&remcomInBuffer[1], (vm_offset_t)®isters, NUMREGBYTES); + strcpy (remcomOutBuffer, "OK"); + break; + + case 'P': /* Set the value of one register */ + { + int regno; + + ptr = &remcomInBuffer[1]; + + if (hexToInt (&ptr, ®no) + && *ptr++ == '=' + && regno < NUM_REGS) + { + /* JG */ + hex2mem (ptr, (vm_offset_t)®isters + regno * 8, 8); + strcpy(remcomOutBuffer,"OK"); + } + else + strcpy (remcomOutBuffer, "P01"); + break; + } + case 'm': /* mAA..AA,LLLL Read LLLL bytes at address AA..AA */ + /* Try to read %x,%x. */ + + ptr = &remcomInBuffer[1]; + + if (hexToLong (&ptr, &addr) + && *(ptr++) == ',' + && hexToInt (&ptr, &length)) + { + if (mem2hex((vm_offset_t) addr, remcomOutBuffer, length) == NULL) + strcpy (remcomOutBuffer, "E03"); + break; + } + else + strcpy (remcomOutBuffer, "E01"); + break; + + case 'M': /* MAA..AA,LLLL: Write LLLL bytes at address AA.AA return OK */ + + /* Try to read '%x,%x:'. */ + + ptr = &remcomInBuffer[1]; + + if (hexToLong(&ptr,&addr) + && *(ptr++) == ',' + && hexToInt(&ptr, &length) + && *(ptr++) == ':') + { + if (hex2mem(ptr, (vm_offset_t) addr, length) == NULL) + strcpy (remcomOutBuffer, "E03"); + else + strcpy (remcomOutBuffer, "OK"); + } + else + strcpy (remcomOutBuffer, "E02"); + break; + + /* cAA..AA Continue at address AA..AA(optional) */ + /* sAA..AA Step one instruction from AA..AA(optional) */ + case 'c' : + case 's' : + /* try to read optional parameter, pc unchanged if no parm */ + + ptr = &remcomInBuffer[1]; + if (hexToLong(&ptr,&addr)) + registers.rip = addr; + + + /* set the trace bit if we're stepping */ + if (remcomInBuffer[0] == 's') + registers.rflags |= PSL_T; + else + registers.rflags &= ~PSL_T; + + raw_regs->tf_rax = registers.rax; + raw_regs->tf_rbx = registers.rbx; + raw_regs->tf_rcx = registers.rcx; + raw_regs->tf_rdx = registers.rdx; + + raw_regs->tf_rsp = registers.rsp; + raw_regs->tf_rbp = registers.rbp; + raw_regs->tf_rsi = registers.rsi; + raw_regs->tf_rdi = registers.rdi; + + raw_regs->tf_r8 = registers.r8; + raw_regs->tf_r9 = registers.r9; + raw_regs->tf_r10 = registers.r10; + raw_regs->tf_r11 = registers.r11; + raw_regs->tf_r12 = registers.r12; + raw_regs->tf_r13 = registers.r13; + raw_regs->tf_r14 = registers.r14; + raw_regs->tf_r15 = registers.r15; + + raw_regs->tf_rip = registers.rip; + raw_regs->tf_rflags = registers.rflags; + + raw_regs->tf_cs = registers.cs; + raw_regs->tf_ss = registers.ss; + return; + + } /* switch */ + + /* reply to the request */ + putpacket (remcomOutBuffer); + } +} + diff --git a/sys/platform/pc64/amd64/autoconf.c b/sys/platform/pc64/amd64/autoconf.c index 7a51aa7e55..504ac81722 100644 --- a/sys/platform/pc64/amd64/autoconf.c +++ b/sys/platform/pc64/amd64/autoconf.c @@ -1,5 +1,6 @@ /*- * Copyright (c) 1990 The Regents of the University of California. + * Copyright (c) 2008 The DragonFly Project. * All rights reserved. * * This code is derived from software contributed to Berkeley by @@ -35,7 +36,7 @@ * * from: @(#)autoconf.c 7.1 (Berkeley) 5/9/91 * $FreeBSD: src/sys/i386/i386/autoconf.c,v 1.146.2.2 2001/06/07 06:05:58 dd Exp $ - * $DragonFly: src/sys/platform/pc64/amd64/autoconf.c,v 1.2 2008/03/08 07:50:49 sephe Exp $ + * $DragonFly: src/sys/platform/pc64/amd64/autoconf.c,v 1.3 2008/08/29 17:07:10 dillon Exp $ */ /* @@ -60,7 +61,6 @@ #include #include #include -#include #include #include #include @@ -72,19 +72,14 @@ #include #include -#include -#include -#include -#include +#include +#include +#include +#include -#if 0 #include #include -#include -#endif -#include #include -#include #if NISA > 0 #include @@ -92,10 +87,9 @@ device_t isa_bus_device = 0; #endif -static void cpu_startup (void *); -static void configure_first (void *); -static void configure (void *); -static void configure_final (void *); +static void configure_first (void *); +static void configure (void *); +static void configure_final (void *); #if defined(FFS) && defined(FFS_ROOT) static void setroot (void); @@ -107,7 +101,6 @@ static void pxe_setup_nfsdiskless(void); #endif #endif -SYSINIT(cpu, SI_BOOT2_SMP, SI_ORDER_FIRST, cpu_startup, NULL); SYSINIT(configure1, SI_SUB_CONFIGURE, SI_ORDER_FIRST, configure_first, NULL); /* SI_ORDER_SECOND is hookable */ SYSINIT(configure2, SI_SUB_CONFIGURE, SI_ORDER_THIRD, configure, NULL); @@ -117,77 +110,6 @@ SYSINIT(configure3, SI_SUB_CONFIGURE, SI_ORDER_ANY, configure_final, NULL); cdev_t rootdev = NULL; cdev_t dumpdev = NULL; -/* - * - */ -static void -cpu_startup(void *dummy) -{ - vm_offset_t buffer_sva; - vm_offset_t buffer_eva; - vm_offset_t pager_sva; - vm_offset_t pager_eva; - - kprintf("%s", version); - kprintf("real memory = %llu (%lluK bytes)\n", - ptoa(Maxmem), ptoa(Maxmem) / 1024); - - if (nbuf == 0) { - int factor = 4 * BKVASIZE / 1024; - int kbytes = Maxmem * (PAGE_SIZE / 1024); - - nbuf = 50; - if (kbytes > 4096) - nbuf += min((kbytes - 4096) / factor, 65536 / factor); - if (kbytes > 65536) - nbuf += (kbytes - 65536) * 2 / (factor * 5); - if (maxbcache && nbuf > maxbcache / BKVASIZE) - nbuf = maxbcache / BKVASIZE; - } - if (nbuf > (virtual_end - virtual_start) / (BKVASIZE * 2)) { - nbuf = (virtual_end - virtual_start) / (BKVASIZE * 2); - kprintf("Warning: nbufs capped at %d\n", nbuf); - } - - nswbuf = max(min(nbuf/4, 256), 16); -#ifdef NSWBUF_MIN - if (nswbuf < NSWBUF_MIN) - nswbuf = NSWBUF_MIN; -#endif - - /* - * Allocate memory for the buffer cache - */ - buf = (void *)kmem_alloc(&kernel_map, nbuf * sizeof(struct buf)); - swbuf = (void *)kmem_alloc(&kernel_map, nswbuf * sizeof(struct buf)); - - -#ifdef DIRECTIO - ffs_rawread_setup(); -#endif - kmem_suballoc(&kernel_map, &clean_map, &clean_sva, &clean_eva, - (nbuf*BKVASIZE) + (nswbuf*MAXPHYS) + pager_map_size); - kmem_suballoc(&clean_map, &buffer_map, &buffer_sva, &buffer_eva, - (nbuf*BKVASIZE)); - buffer_map.system_map = 1; - kmem_suballoc(&clean_map, &pager_map, &pager_sva, &pager_eva, - (nswbuf*MAXPHYS) + pager_map_size); - pager_map.system_map = 1; -#if defined(USERCONFIG) - userconfig(); - cninit(); /* the preferred console may have changed */ -#endif - kprintf("avail memory = %u (%uK bytes)\n", ptoa(vmstats.v_free_count), - ptoa(vmstats.v_free_count) / 1024); - bufinit(); - vm_pager_bufferinit(); -#ifdef SMP - mp_start(); - mp_announce(); -#endif - cpu_setregs(); -} - /* * Determine i/o configuration for a machine. */ @@ -199,12 +121,6 @@ configure_first(void *dummy) static void configure(void *dummy) { - /* - * Final interrupt support acviation, then enable hardware interrupts. - */ - MachIntrABI.finalize(); - cpu_enable_intr(); - /* * This will configure all devices, generally starting with the * nexus (i386/i386/nexus.c). The nexus ISA code explicitly @@ -235,10 +151,49 @@ configure(void *dummy) static void configure_final(void *dummy) { + int i; + cninit_finish(); - if (bootverbose) + if (bootverbose) { +#ifdef APIC_IO + imen_dump(); +#endif /* APIC_IO */ + +#if JG + /* + * Print out the BIOS's idea of the disk geometries. + */ + kprintf("BIOS Geometries:\n"); + for (i = 0; i < N_BIOS_GEOM; i++) { + unsigned long bios_geom; + int max_cylinder, max_head, max_sector; + + bios_geom = bootinfo.bi_bios_geom[i]; + + /* + * XXX the bootstrap punts a 1200K floppy geometry + * when the get-disk-geometry interrupt fails. Skip + * drives that have this geometry. + */ + if (bios_geom == 0x4f010f) + continue; + + kprintf(" %x:%08lx ", i, bios_geom); + max_cylinder = bios_geom >> 16; + max_head = (bios_geom >> 8) & 0xff; + max_sector = bios_geom & 0xff; + kprintf( + "0..%d=%d cylinders, 0..%d=%d heads, 1..%d=%d sectors\n", + max_cylinder, max_cylinder + 1, + max_head, max_head + 1, + max_sector, max_sector); + } + kprintf(" %d accounted for\n", bootinfo.bi_n_bios_used); + kprintf("Device configuration finished.\n"); +#endif + } } #ifdef BOOTP @@ -270,6 +225,8 @@ SYSINIT(cpu_rootconf, SI_SUB_ROOT_CONF, SI_ORDER_FIRST, cpu_rootconf, NULL) u_long bootdev = 0; /* not a cdev_t - encoding is different */ #if defined(FFS) && defined(FFS_ROOT) +#define FDMAJOR 2 +#define FDUNITSHIFT 6 /* * The boot code uses old block device major numbers to pass bootdev to @@ -465,7 +422,6 @@ pxe_setup_nfsdiskless(void) { struct nfs_diskless *nd = &nfs_diskless; struct ifnet *ifp; - struct ifaddr *ifa; struct sockaddr_dl *sdl, ourdl; struct sockaddr_in myaddr, netmask; char *cp; @@ -487,13 +443,12 @@ pxe_setup_nfsdiskless(void) kprintf("PXE: no hardware address\n"); return; } - ifa = NULL; ifp = TAILQ_FIRST(&ifnet); TAILQ_FOREACH(ifp, &ifnet, if_link) { struct ifaddr_container *ifac; TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { - ifa = ifac->ifa; + struct ifaddr *ifa = ifac->ifa; if ((ifa->ifa_addr->sa_family == AF_LINK) && (sdl = ((struct sockaddr_dl *)ifa->ifa_addr))) { diff --git a/sys/platform/pc64/amd64/console.c b/sys/platform/pc64/amd64/console.c index 6d194c352a..3b5813fb65 100644 --- a/sys/platform/pc64/amd64/console.c +++ b/sys/platform/pc64/amd64/console.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 The DragonFly Project. All rights reserved. + * Copyright (c) 2006-2008 The DragonFly Project. All rights reserved. * * This code is derived from software contributed to The DragonFly Project * by Matthew Dillon @@ -31,11 +31,12 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/platform/pc64/amd64/console.c,v 1.2 2007/09/24 03:24:45 yanyh Exp $ + * $DragonFly: src/sys/platform/pc64/amd64/console.c,v 1.3 2008/08/29 17:07:10 dillon Exp $ */ #include +#if JG /* * Global console locking functions */ @@ -48,3 +49,4 @@ void cons_unlock(void) { } +#endif diff --git a/sys/platform/pc64/amd64/cpu_regs.c b/sys/platform/pc64/amd64/cpu_regs.c deleted file mode 100644 index 676a565ba7..0000000000 --- a/sys/platform/pc64/amd64/cpu_regs.c +++ /dev/null @@ -1,1264 +0,0 @@ -/*- - * Copyright (c) 1992 Terrence R. Lambert. - * Copyright (C) 1994, David Greenman - * Copyright (c) 1982, 1987, 1990, 1993 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * William Jolitz. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 - * $FreeBSD: src/sys/i386/i386/machdep.c,v 1.385.2.30 2003/05/31 08:48:05 alc Exp $ - * $DragonFly: src/sys/platform/pc64/amd64/Attic/cpu_regs.c,v 1.6 2008/04/21 15:47:56 dillon Exp $ - */ - -#include "use_ether.h" -#include "use_npx.h" -#include "use_isa.h" -#include "opt_atalk.h" -#include "opt_compat.h" -#include "opt_ddb.h" -#include "opt_directio.h" -#include "opt_inet.h" -#include "opt_ipx.h" -#include "opt_msgbuf.h" -#include "opt_swap.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include - -#include - -#include -#include -#include -#include -#include /* pcb.h included via sys/user.h */ -#include /* CPU_prvspace */ -#include -#ifdef PERFMON -#include -#endif -#include - -#include -/* #include */ -#include -#include -#include -#include /* umtx_* functions */ - -extern void dblfault_handler (void); - -#ifndef CPU_DISABLE_SSE -static void set_fpregs_xmm (struct save87 *, struct savexmm *); -static void fill_fpregs_xmm (struct savexmm *, struct save87 *); -#endif /* CPU_DISABLE_SSE */ -#ifdef DIRECTIO -extern void ffs_rawread_setup(void); -#endif /* DIRECTIO */ - -#ifdef SMP -int64_t tsc_offsets[MAXCPU]; -#else -int64_t tsc_offsets[1]; -#endif - -#if defined(SWTCH_OPTIM_STATS) -extern int swtch_optim_stats; -SYSCTL_INT(_debug, OID_AUTO, swtch_optim_stats, - CTLFLAG_RD, &swtch_optim_stats, 0, ""); -SYSCTL_INT(_debug, OID_AUTO, tlb_flush_count, - CTLFLAG_RD, &tlb_flush_count, 0, ""); -#endif - -static int -sysctl_hw_physmem(SYSCTL_HANDLER_ARGS) -{ - int error = sysctl_handle_int(oidp, 0, ctob((int)Maxmem), req); - return (error); -} - -SYSCTL_PROC(_hw, HW_PHYSMEM, physmem, CTLTYPE_INT|CTLFLAG_RD, - 0, 0, sysctl_hw_physmem, "IU", ""); - -static int -sysctl_hw_usermem(SYSCTL_HANDLER_ARGS) -{ - int error = sysctl_handle_int(oidp, 0, - ctob((int)Maxmem - vmstats.v_wire_count), req); - return (error); -} - -SYSCTL_PROC(_hw, HW_USERMEM, usermem, CTLTYPE_INT|CTLFLAG_RD, - 0, 0, sysctl_hw_usermem, "IU", ""); - -SYSCTL_ULONG(_hw, OID_AUTO, availpages, CTLFLAG_RD, &Maxmem, NULL, ""); - -#if 0 - -static int -sysctl_machdep_msgbuf(SYSCTL_HANDLER_ARGS) -{ - int error; - - /* Unwind the buffer, so that it's linear (possibly starting with - * some initial nulls). - */ - error=sysctl_handle_opaque(oidp,msgbufp->msg_ptr+msgbufp->msg_bufr, - msgbufp->msg_size-msgbufp->msg_bufr,req); - if(error) return(error); - if(msgbufp->msg_bufr>0) { - error=sysctl_handle_opaque(oidp,msgbufp->msg_ptr, - msgbufp->msg_bufr,req); - } - return(error); -} - -SYSCTL_PROC(_machdep, OID_AUTO, msgbuf, CTLTYPE_STRING|CTLFLAG_RD, - 0, 0, sysctl_machdep_msgbuf, "A","Contents of kernel message buffer"); - -static int msgbuf_clear; - -static int -sysctl_machdep_msgbuf_clear(SYSCTL_HANDLER_ARGS) -{ - int error; - error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, - req); - if (!error && req->newptr) { - /* Clear the buffer and reset write pointer */ - bzero(msgbufp->msg_ptr,msgbufp->msg_size); - msgbufp->msg_bufr=msgbufp->msg_bufx=0; - msgbuf_clear=0; - } - return (error); -} - -SYSCTL_PROC(_machdep, OID_AUTO, msgbuf_clear, CTLTYPE_INT|CTLFLAG_RW, - &msgbuf_clear, 0, sysctl_machdep_msgbuf_clear, "I", - "Clear kernel message buffer"); - -#endif - -/* - * Send an interrupt to process. - * - * Stack is set up to allow sigcode stored - * at top to call routine, followed by kcall - * to sigreturn routine below. After sigreturn - * resets the signal mask, the stack, and the - * frame pointer, it returns to the user - * specified pc, psl. - */ - -extern int _ucodesel, _udatasel; -void -sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code) -{ - struct lwp *lp = curthread->td_lwp; - struct proc *p = lp->lwp_proc; - struct trapframe *regs; - struct sigacts *psp = p->p_sigacts; - struct sigframe sf, *sfp; - int oonstack; - - regs = lp->lwp_md.md_regs; - oonstack = (lp->lwp_sigstk.ss_flags & SS_ONSTACK) ? 1 : 0; - - /* save user context */ - bzero(&sf, sizeof(struct sigframe)); - sf.sf_uc.uc_sigmask = *mask; - sf.sf_uc.uc_stack = lp->lwp_sigstk; - sf.sf_uc.uc_mcontext.mc_onstack = oonstack; - /* bcopy(regs, &sf.sf_uc.uc_mcontext.mc_gs, sizeof(struct trapframe)); */ - - /* make the size of the saved context visible to userland */ - sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); - - /* save mailbox pending state for syscall interlock semantics */ - if (p->p_flag & P_MAILBOX) - sf.sf_uc.uc_mcontext.mc_flags |= PGEX_MAILBOX; - - - /* Allocate and validate space for the signal handler context. */ - if ((lp->lwp_flag & LWP_ALTSTACK) != 0 && !oonstack && - SIGISMEMBER(psp->ps_sigonstack, sig)) { - sfp = (struct sigframe *)(lp->lwp_sigstk.ss_sp + - lp->lwp_sigstk.ss_size - sizeof(struct sigframe)); - lp->lwp_sigstk.ss_flags |= SS_ONSTACK; - } - else - sfp = (struct sigframe *)regs->tf_rsp - 1; - - /* Translate the signal is appropriate */ - if (p->p_sysent->sv_sigtbl) { - if (sig <= p->p_sysent->sv_sigsize) - sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; - } - - /* Build the argument list for the signal handler. */ - sf.sf_signum = sig; - sf.sf_ucontext = (register_t)&sfp->sf_uc; - if (SIGISMEMBER(psp->ps_siginfo, sig)) { - /* Signal handler installed with SA_SIGINFO. */ - sf.sf_siginfo = (register_t)&sfp->sf_si; - sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher; - - /* fill siginfo structure */ - sf.sf_si.si_signo = sig; - sf.sf_si.si_code = code; - sf.sf_si.si_addr = (void*)regs->tf_err; - } - else { - /* Old FreeBSD-style arguments. */ - sf.sf_siginfo = code; - sf.sf_addr = regs->tf_err; - sf.sf_ahu.sf_handler = catcher; - } - -#if 0 - /* - * If we're a vm86 process, we want to save the segment registers. - * We also change eflags to be our emulated eflags, not the actual - * eflags. - */ - if (regs->tf_rflags & PSL_VM) { - struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; - struct vm86_kernel *vm86 = &lp->lwp_thread->td_pcb->pcb_ext->ext_vm86; - - sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs; - sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs; - sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es; - sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds; - - if (vm86->vm86_has_vme == 0) - sf.sf_uc.uc_mcontext.mc_eflags = - (tf->tf_rflags & ~(PSL_VIF | PSL_VIP)) | - (vm86->vm86_eflags & (PSL_VIF | PSL_VIP)); - - /* - * Clear PSL_NT to inhibit T_TSSFLT faults on return from - * syscalls made by the signal handler. This just avoids - * wasting time for our lazy fixup of such faults. PSL_NT - * does nothing in vm86 mode, but vm86 programs can set it - * almost legitimately in probes for old cpu types. - */ - tf->tf_rflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP); - } -#endif - - /* - * Save the FPU state and reinit the FP unit - */ - npxpush(&sf.sf_uc.uc_mcontext); - - /* - * Copy the sigframe out to the user's stack. - */ - if (copyout(&sf, sfp, sizeof(struct sigframe)) != 0) { - /* - * Something is wrong with the stack pointer. - * ...Kill the process. - */ - sigexit(lp, SIGILL); - } - - regs->tf_rsp = (int)sfp; - regs->tf_rip = PS_STRINGS - *(p->p_sysent->sv_szsigcode); - - /* - * amd64 abi specifies that the direction flag must be cleared - * on function entry - */ - regs->tf_rflags &= ~(PSL_T|PSL_D); - - regs->tf_cs = _ucodesel; - /* regs->tf_ds = _udatasel; - regs->tf_es = _udatasel; */ - if (regs->tf_trapno == T_PROTFLT) { - /* regs->tf_fs = _udatasel; - regs->tf_gs = _udatasel; */ - } - regs->tf_ss = _udatasel; -} - -/* - * Sanitize the trapframe for a virtual kernel passing control to a custom - * VM context. - * - * Allow userland to set or maintain PSL_RF, the resume flag. This flag - * basically controls whether the return PC should skip the first instruction - * (as in an explicit system call) or re-execute it (as in an exception). - */ -int -cpu_sanitize_frame(struct trapframe *frame) -{ - frame->tf_cs = _ucodesel; -#if 0 - frame->tf_ds = _udatasel; - frame->tf_es = _udatasel; - frame->tf_fs = _udatasel; - frame->tf_gs = _udatasel; -#endif - frame->tf_ss = _udatasel; - frame->tf_rflags &= (PSL_RF | PSL_USERCHANGE); - frame->tf_rflags |= PSL_RESERVED_DEFAULT | PSL_I; - return(0); -} - -int -cpu_sanitize_tls(struct savetls *tls) -{ - struct segment_descriptor *desc; - int i; - - for (i = 0; i < NGTLS; ++i) { - desc = &tls->tls[i]; - if (desc->sd_dpl == 0 && desc->sd_type == 0) - continue; - if (desc->sd_def32 == 0) - return(ENXIO); - if (desc->sd_type != SDT_MEMRWA) - return(ENXIO); - if (desc->sd_dpl != SEL_UPL) - return(ENXIO); - if (desc->sd_xx != 0 || desc->sd_p != 1) - return(ENXIO); - } - return(0); -} - -/* - * sigreturn(ucontext_t *sigcntxp) - * - * System call to cleanup state after a signal - * has been taken. Reset signal mask and - * stack state from context left by sendsig (above). - * Return to previous pc and psl as specified by - * context left by sendsig. Check carefully to - * make sure that the user has not modified the - * state to gain improper privileges. - */ -#define EFL_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) -#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) - -int -sys_sigreturn(struct sigreturn_args *uap) -{ - struct lwp *lp = curthread->td_lwp; - struct proc *p = lp->lwp_proc; - struct trapframe *regs; - ucontext_t ucp; - int cs; - int rflags; - int error; - - error = copyin(uap->sigcntxp, &ucp, sizeof(ucp)); - if (error) - return (error); - - regs = lp->lwp_md.md_regs; - rflags = ucp.uc_mcontext.mc_rflags; - -#if 0 - if (eflags & PSL_VM) { - struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; - struct vm86_kernel *vm86; - - /* - * if pcb_ext == 0 or vm86_inited == 0, the user hasn't - * set up the vm86 area, and we can't enter vm86 mode. - */ - if (lp->lwp_thread->td_pcb->pcb_ext == 0) - return (EINVAL); - vm86 = &lp->lwp_thread->td_pcb->pcb_ext->ext_vm86; - if (vm86->vm86_inited == 0) - return (EINVAL); - - /* go back to user mode if both flags are set */ - if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) - trapsignal(lp->lwp_proc, SIGBUS, 0); - - if (vm86->vm86_has_vme) { - eflags = (tf->tf_eflags & ~VME_USERCHANGE) | - (eflags & VME_USERCHANGE) | PSL_VM; - } else { - vm86->vm86_eflags = eflags; /* save VIF, VIP */ - eflags = (tf->tf_eflags & ~VM_USERCHANGE) | (eflags & VM_USERCHANGE) | PSL_VM; - } - bcopy(&ucp.uc_mcontext.mc_gs, tf, sizeof(struct trapframe)); - tf->tf_eflags = eflags; - tf->tf_vm86_ds = tf->tf_ds; - tf->tf_vm86_es = tf->tf_es; - tf->tf_vm86_fs = tf->tf_fs; - tf->tf_vm86_gs = tf->tf_gs; - tf->tf_ds = _udatasel; - tf->tf_es = _udatasel; -#if 0 - tf->tf_fs = _udatasel; - tf->tf_gs = _udatasel; -#endif - } else -#endif - { - /* - * Don't allow users to change privileged or reserved flags. - */ - /* - * XXX do allow users to change the privileged flag PSL_RF. - * The cpu sets PSL_RF in tf_eflags for faults. Debuggers - * should sometimes set it there too. tf_eflags is kept in - * the signal context during signal handling and there is no - * other place to remember it, so the PSL_RF bit may be - * corrupted by the signal handler without us knowing. - * Corruption of the PSL_RF bit at worst causes one more or - * one less debugger trap, so allowing it is fairly harmless. - */ - if (!EFL_SECURE(rflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF)) { - kprintf("sigreturn: eflags = 0x%x\n", rflags); - return(EINVAL); - } - - /* - * Don't allow users to load a valid privileged %cs. Let the - * hardware check for invalid selectors, excess privilege in - * other selectors, invalid %eip's and invalid %esp's. - */ - cs = ucp.uc_mcontext.mc_cs; - if (!CS_SECURE(cs)) { - kprintf("sigreturn: cs = 0x%x\n", cs); - trapsignal(lp, SIGBUS, T_PROTFLT); - return(EINVAL); - } - /* bcopy(&ucp.uc_mcontext.mc_gs, regs, sizeof(struct trapframe)); */ - } - - /* - * Restore the FPU state from the frame - */ - npxpop(&ucp.uc_mcontext); - - /* - * Merge saved signal mailbox pending flag to maintain interlock - * semantics against system calls. - */ - if (ucp.uc_mcontext.mc_flags & PGEX_MAILBOX) - p->p_flag |= P_MAILBOX; - - if (ucp.uc_mcontext.mc_onstack & 1) - lp->lwp_sigstk.ss_flags |= SS_ONSTACK; - else - lp->lwp_sigstk.ss_flags &= ~SS_ONSTACK; - - lp->lwp_sigmask = ucp.uc_sigmask; - SIG_CANTMASK(lp->lwp_sigmask); - return(EJUSTRETURN); -} - -/* - * Stack frame on entry to function. %eax will contain the function vector, - * %ecx will contain the function data. flags, ecx, and eax will have - * already been pushed on the stack. - */ -struct upc_frame { - register_t eax; - register_t ecx; - register_t edx; - register_t flags; - register_t oldip; -}; - -void -sendupcall(struct vmupcall *vu, int morepending) -{ - struct lwp *lp = curthread->td_lwp; - struct trapframe *regs; - struct upcall upcall; - struct upc_frame upc_frame; - int crit_count = 0; - - /* - * If we are a virtual kernel running an emulated user process - * context, switch back to the virtual kernel context before - * trying to post the signal. - */ - if (lp->lwp_vkernel && lp->lwp_vkernel->ve) { - lp->lwp_md.md_regs->tf_trapno = 0; - vkernel_trap(lp, lp->lwp_md.md_regs); - } - - /* - * Get the upcall data structure - */ - if (copyin(lp->lwp_upcall, &upcall, sizeof(upcall)) || - copyin((char *)upcall.upc_uthread + upcall.upc_critoff, &crit_count, sizeof(int)) - ) { - vu->vu_pending = 0; - kprintf("bad upcall address\n"); - return; - } - - /* - * If the data structure is already marked pending or has a critical - * section count, mark the data structure as pending and return - * without doing an upcall. vu_pending is left set. - */ - if (upcall.upc_pending || crit_count >= vu->vu_pending) { - if (upcall.upc_pending < vu->vu_pending) { - upcall.upc_pending = vu->vu_pending; - copyout(&upcall.upc_pending, &lp->lwp_upcall->upc_pending, - sizeof(upcall.upc_pending)); - } - return; - } - - /* - * We can run this upcall now, clear vu_pending. - * - * Bump our critical section count and set or clear the - * user pending flag depending on whether more upcalls are - * pending. The user will be responsible for calling - * upc_dispatch(-1) to process remaining upcalls. - */ - vu->vu_pending = 0; - upcall.upc_pending = morepending; - crit_count += TDPRI_CRIT; - copyout(&upcall.upc_pending, &lp->lwp_upcall->upc_pending, - sizeof(upcall.upc_pending)); - copyout(&crit_count, (char *)upcall.upc_uthread + upcall.upc_critoff, - sizeof(int)); - - /* - * Construct a stack frame and issue the upcall - */ - regs = lp->lwp_md.md_regs; - upc_frame.eax = regs->tf_rax; - upc_frame.ecx = regs->tf_rcx; - upc_frame.edx = regs->tf_rdx; - upc_frame.flags = regs->tf_rflags; - upc_frame.oldip = regs->tf_rip; - if (copyout(&upc_frame, (void *)(regs->tf_rsp - sizeof(upc_frame)), - sizeof(upc_frame)) != 0) { - kprintf("bad stack on upcall\n"); - } else { - regs->tf_rax = (register_t)vu->vu_func; - regs->tf_rcx = (register_t)vu->vu_data; - regs->tf_rdx = (register_t)lp->lwp_upcall; - regs->tf_rip = (register_t)vu->vu_ctx; - regs->tf_rsp -= sizeof(upc_frame); - } -} - -/* - * fetchupcall occurs in the context of a system call, which means that - * we have to return EJUSTRETURN in order to prevent eax and edx from - * being overwritten by the syscall return value. - * - * if vu is not NULL we return the new context in %edx, the new data in %ecx, - * and the function pointer in %eax. - */ -int -fetchupcall (struct vmupcall *vu, int morepending, void *rsp) -{ - struct upc_frame upc_frame; - struct lwp *lp = curthread->td_lwp; - struct trapframe *regs; - int error; - struct upcall upcall; - int crit_count; - - regs = lp->lwp_md.md_regs; - - error = copyout(&morepending, &lp->lwp_upcall->upc_pending, sizeof(int)); - if (error == 0) { - if (vu) { - /* - * This jumps us to the next ready context. - */ - vu->vu_pending = 0; - error = copyin(lp->lwp_upcall, &upcall, sizeof(upcall)); - crit_count = 0; - if (error == 0) - error = copyin((char *)upcall.upc_uthread + upcall.upc_critoff, &crit_count, sizeof(int)); - crit_count += TDPRI_CRIT; - if (error == 0) - error = copyout(&crit_count, (char *)upcall.upc_uthread + upcall.upc_critoff, sizeof(int)); - regs->tf_rax = (register_t)vu->vu_func; - regs->tf_rcx = (register_t)vu->vu_data; - regs->tf_rdx = (register_t)lp->lwp_upcall; - regs->tf_rip = (register_t)vu->vu_ctx; - regs->tf_rsp = (register_t)rsp; - } else { - /* - * This returns us to the originally interrupted code. - */ - error = copyin(rsp, &upc_frame, sizeof(upc_frame)); - regs->tf_rax = upc_frame.eax; - regs->tf_rcx = upc_frame.ecx; - regs->tf_rdx = upc_frame.edx; - regs->tf_rflags = (regs->tf_rflags & ~PSL_USERCHANGE) | - (upc_frame.flags & PSL_USERCHANGE); - regs->tf_rip = upc_frame.oldip; - regs->tf_rsp = (register_t)((char *)rsp + sizeof(upc_frame)); - } - } - if (error == 0) - error = EJUSTRETURN; - return(error); -} - -/* - * cpu_idle() represents the idle LWKT. You cannot return from this function - * (unless you want to blow things up!). Instead we look for runnable threads - * and loop or halt as appropriate. Giant is not held on entry to the thread. - * - * The main loop is entered with a critical section held, we must release - * the critical section before doing anything else. lwkt_switch() will - * check for pending interrupts due to entering and exiting its own - * critical section. - * - * Note on cpu_idle_hlt: On an SMP system we rely on a scheduler IPI - * to wake a HLTed cpu up. However, there are cases where the idlethread - * will be entered with the possibility that no IPI will occur and in such - * cases lwkt_switch() sets TDF_IDLE_NOHLT. - */ -static int cpu_idle_hlt = 1; -static int cpu_idle_hltcnt; -static int cpu_idle_spincnt; -SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_hlt, CTLFLAG_RW, - &cpu_idle_hlt, 0, "Idle loop HLT enable"); -SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_hltcnt, CTLFLAG_RW, - &cpu_idle_hltcnt, 0, "Idle loop entry halts"); -SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_spincnt, CTLFLAG_RW, - &cpu_idle_spincnt, 0, "Idle loop entry spins"); - -void -cpu_idle(void) -{ - struct thread *td = curthread; - struct mdglobaldata *gd = mdcpu; - - crit_exit(); - KKASSERT(td->td_pri < TDPRI_CRIT); - for (;;) { - /* - * See if there are any LWKTs ready to go. - */ - lwkt_switch(); - - /* - * The idle loop halts only if no threads are scheduleable - * and no signals have occured. - */ - if (cpu_idle_hlt && !lwkt_runnable() && - (td->td_flags & TDF_IDLE_NOHLT) == 0) { - splz(); - if (!lwkt_runnable()) { -#ifdef DEBUGIDLE - struct timeval tv1, tv2; - gettimeofday(&tv1, NULL); -#endif - /* umtx_sleep(&gd->mi.gd_runqmask, 0, 1000000); */ -#ifdef DEBUGIDLE - gettimeofday(&tv2, NULL); - if (tv2.tv_usec - tv1.tv_usec + - (tv2.tv_sec - tv1.tv_sec) * 1000000 - > 500000) { - kprintf("cpu %d idlelock %08x %08x\n", - gd->mi.gd_cpuid, - gd->mi.gd_runqmask, - gd->gd_fpending); - } -#endif - } -#ifdef SMP - else { - __asm __volatile("pause"); - } -#endif - ++cpu_idle_hltcnt; - } else { - td->td_flags &= ~TDF_IDLE_NOHLT; - splz(); -#ifdef SMP - /*__asm __volatile("sti; pause");*/ - __asm __volatile("pause"); -#else - /*__asm __volatile("sti");*/ -#endif - ++cpu_idle_spincnt; - } - } -} - -#ifdef SMP - -/* - * Called by the LWKT switch core with a critical section held if the only - * schedulable thread needs the MP lock and we couldn't get it. On - * a real cpu we just spin in the scheduler. In the virtual kernel - * we sleep for a bit. - */ -void -cpu_mplock_contested(void) -{ - usleep(1000); -} - -/* - * Called by the spinlock code with or without a critical section held - * when a spinlock is found to be seriously constested. - */ -void -cpu_spinlock_contested(void) -{ - usleep(1000); -} - -#endif - -/* - * Clear registers on exec - */ -void -exec_setregs(u_long entry, u_long stack, u_long ps_strings) -{ - struct thread *td = curthread; - struct lwp *lp = td->td_lwp; - struct trapframe *regs = lp->lwp_md.md_regs; - struct pcb *pcb = lp->lwp_thread->td_pcb; - - /* was i386_user_cleanup() in NetBSD */ - user_ldt_free(pcb); - - bzero((char *)regs, sizeof(struct trapframe)); - regs->tf_rip = entry; - regs->tf_rsp = stack; - regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T); - regs->tf_ss = 0; - /* regs->tf_ds = 0; - regs->tf_es = 0; - regs->tf_fs = 0; - regs->tf_gs = 0; */ - regs->tf_cs = 0; - - /* PS_STRINGS value for BSD/OS binaries. It is 0 for non-BSD/OS. */ - regs->tf_rbx = ps_strings; - - /* - * Reset the hardware debug registers if they were in use. - * They won't have any meaning for the newly exec'd process. - */ - if (pcb->pcb_flags & PCB_DBREGS) { - pcb->pcb_dr0 = 0; - pcb->pcb_dr1 = 0; - pcb->pcb_dr2 = 0; - pcb->pcb_dr3 = 0; - pcb->pcb_dr6 = 0; - pcb->pcb_dr7 = 0; - if (pcb == td->td_pcb) { - /* - * Clear the debug registers on the running - * CPU, otherwise they will end up affecting - * the next process we switch to. - */ - reset_dbregs(); - } - pcb->pcb_flags &= ~PCB_DBREGS; - } - - /* - * Initialize the math emulator (if any) for the current process. - * Actually, just clear the bit that says that the emulator has - * been initialized. Initialization is delayed until the process - * traps to the emulator (if it is done at all) mainly because - * emulators don't provide an entry point for initialization. - */ - /* pcb->pcb_flags &= ~FP_SOFTFP; */ - - /* - * note: do not set CR0_TS here. npxinit() must do it after clearing - * gd_npxthread. Otherwise a preemptive interrupt thread may panic - * in npxdna(). - */ - crit_enter(); -#if 0 - load_cr0(rcr0() | CR0_MP); -#endif - -#if NNPX > 0 - /* Initialize the npx (if any) for the current process. */ - npxinit(__INITIAL_NPXCW__); -#endif - crit_exit(); - - /* - * note: linux emulator needs edx to be 0x0 on entry, which is - * handled in execve simply by setting the 64 bit syscall - * return value to 0. - */ -} - -void -cpu_setregs(void) -{ -#if 0 - unsigned int cr0; - - cr0 = rcr0(); - cr0 |= CR0_NE; /* Done by npxinit() */ - cr0 |= CR0_MP | CR0_TS; /* Done at every execve() too. */ - cr0 |= CR0_WP | CR0_AM; - load_cr0(cr0); - load_gs(_udatasel); -#endif -} - -static int -sysctl_machdep_adjkerntz(SYSCTL_HANDLER_ARGS) -{ - int error; - error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, - req); - if (!error && req->newptr) - resettodr(); - return (error); -} - -SYSCTL_PROC(_machdep, CPU_ADJKERNTZ, adjkerntz, CTLTYPE_INT|CTLFLAG_RW, - &adjkerntz, 0, sysctl_machdep_adjkerntz, "I", ""); - -extern u_long bootdev; /* not a cdev_t - encoding is different */ -SYSCTL_ULONG(_machdep, OID_AUTO, guessed_bootdev, - CTLFLAG_RD, &bootdev, 0, "Boot device (not in cdev_t format)"); - -/* - * Initialize 386 and configure to run kernel - */ - -/* - * Initialize segments & interrupt table - */ - -extern struct user *proc0paddr; - -#if 0 - -extern inthand_t - IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl), - IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm), - IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot), - IDTVEC(page), IDTVEC(mchk), IDTVEC(fpu), IDTVEC(align), - IDTVEC(xmm), IDTVEC(syscall), - IDTVEC(rsvd0); -extern inthand_t - IDTVEC(int0x80_syscall); - -#endif - -#ifdef DEBUG_INTERRUPTS -extern inthand_t *Xrsvdary[256]; -#endif - -int -ptrace_set_pc(struct lwp *lp, unsigned long addr) -{ - lp->lwp_md.md_regs->tf_rip = addr; - return (0); -} - -int -ptrace_single_step(struct lwp *lp) -{ - lp->lwp_md.md_regs->tf_rflags |= PSL_T; - return (0); -} - -int -fill_regs(struct lwp *lp, struct reg *regs) -{ - struct trapframe *tp; - - tp = lp->lwp_md.md_regs; - /* regs->r_gs = tp->tf_gs; - regs->r_fs = tp->tf_fs; - regs->r_es = tp->tf_es; - regs->r_ds = tp->tf_ds; */ - regs->r_rdi = tp->tf_rdi; - regs->r_rsi = tp->tf_rsi; - regs->r_rbp = tp->tf_rbp; - regs->r_rbx = tp->tf_rbx; - regs->r_rdx = tp->tf_rdx; - regs->r_rcx = tp->tf_rcx; - regs->r_rax = tp->tf_rax; - regs->r_rip = tp->tf_rip; - regs->r_cs = tp->tf_cs; - regs->r_rflags = tp->tf_rflags; - regs->r_rsp = tp->tf_rsp; - regs->r_ss = tp->tf_ss; - return (0); -} - -int -set_regs(struct lwp *lp, struct reg *regs) -{ - struct trapframe *tp; - - tp = lp->lwp_md.md_regs; - if (!EFL_SECURE(regs->r_rflags, tp->tf_rflags) || - !CS_SECURE(regs->r_cs)) - return (EINVAL); - /* tp->tf_gs = regs->r_gs; - tp->tf_fs = regs->r_fs; - tp->tf_es = regs->r_es; - tp->tf_ds = regs->r_ds; */ - tp->tf_rdi = regs->r_rdi; - tp->tf_rsi = regs->r_rsi; - tp->tf_rbp = regs->r_rbp; - tp->tf_rbx = regs->r_rbx; - tp->tf_rdx = regs->r_rdx; - tp->tf_rcx = regs->r_rcx; - tp->tf_rax = regs->r_rax; - tp->tf_rip = regs->r_rip; - tp->tf_cs = regs->r_cs; - tp->tf_rflags = regs->r_rflags; - tp->tf_rsp = regs->r_rsp; - tp->tf_ss = regs->r_ss; - return (0); -} - -#ifndef CPU_DISABLE_SSE -static void -fill_fpregs_xmm(struct savexmm *sv_xmm, struct save87 *sv_87) -{ - struct env87 *penv_87 = &sv_87->sv_env; - struct envxmm *penv_xmm = &sv_xmm->sv_env; - int i; - - /* FPU control/status */ - penv_87->en_cw = penv_xmm->en_cw; - penv_87->en_sw = penv_xmm->en_sw; - penv_87->en_tw = penv_xmm->en_tw; - penv_87->en_fip = penv_xmm->en_fip; - penv_87->en_fcs = penv_xmm->en_fcs; - penv_87->en_opcode = penv_xmm->en_opcode; - penv_87->en_foo = penv_xmm->en_foo; - penv_87->en_fos = penv_xmm->en_fos; - - /* FPU registers */ - for (i = 0; i < 8; ++i) - sv_87->sv_ac[i] = sv_xmm->sv_fp[i].fp_acc; - - sv_87->sv_ex_sw = sv_xmm->sv_ex_sw; -} - -static void -set_fpregs_xmm(struct save87 *sv_87, struct savexmm *sv_xmm) -{ - struct env87 *penv_87 = &sv_87->sv_env; - struct envxmm *penv_xmm = &sv_xmm->sv_env; - int i; - - /* FPU control/status */ - penv_xmm->en_cw = penv_87->en_cw; - penv_xmm->en_sw = penv_87->en_sw; - penv_xmm->en_tw = penv_87->en_tw; - penv_xmm->en_fip = penv_87->en_fip; - penv_xmm->en_fcs = penv_87->en_fcs; - penv_xmm->en_opcode = penv_87->en_opcode; - penv_xmm->en_foo = penv_87->en_foo; - penv_xmm->en_fos = penv_87->en_fos; - - /* FPU registers */ - for (i = 0; i < 8; ++i) - sv_xmm->sv_fp[i].fp_acc = sv_87->sv_ac[i]; - - sv_xmm->sv_ex_sw = sv_87->sv_ex_sw; -} -#endif /* CPU_DISABLE_SSE */ - -int -fill_fpregs(struct lwp *lp, struct fpreg *fpregs) -{ -#ifndef CPU_DISABLE_SSE - if (cpu_fxsr) { - fill_fpregs_xmm(&lp->lwp_thread->td_pcb->pcb_save.sv_xmm, - (struct save87 *)fpregs); - return (0); - } -#endif /* CPU_DISABLE_SSE */ - bcopy(&lp->lwp_thread->td_pcb->pcb_save.sv_87, fpregs, sizeof *fpregs); - return (0); -} - -int -set_fpregs(struct lwp *lp, struct fpreg *fpregs) -{ -#ifndef CPU_DISABLE_SSE - if (cpu_fxsr) { - set_fpregs_xmm((struct save87 *)fpregs, - &lp->lwp_thread->td_pcb->pcb_save.sv_xmm); - return (0); - } -#endif /* CPU_DISABLE_SSE */ - bcopy(fpregs, &lp->lwp_thread->td_pcb->pcb_save.sv_87, sizeof *fpregs); - return (0); -} - -int -fill_dbregs(struct lwp *lp, struct dbreg *dbregs) -{ - if (lp == NULL) { - dbregs->dr[0] = rdr0(); - dbregs->dr[1] = rdr1(); - dbregs->dr[2] = rdr2(); - dbregs->dr[3] = rdr3(); - dbregs->dr[4] = rdr4(); - dbregs->dr[5] = rdr5(); - dbregs->dr[6] = rdr6(); - dbregs->dr[7] = rdr7(); - } else { - struct pcb *pcb; - - pcb = lp->lwp_thread->td_pcb; - dbregs->dr[0] = pcb->pcb_dr0; - dbregs->dr[1] = pcb->pcb_dr1; - dbregs->dr[2] = pcb->pcb_dr2; - dbregs->dr[3] = pcb->pcb_dr3; - dbregs->dr[4] = 0; - dbregs->dr[5] = 0; - dbregs->dr[6] = pcb->pcb_dr6; - dbregs->dr[7] = pcb->pcb_dr7; - } - return (0); -} - -int -set_dbregs(struct lwp *lp, struct dbreg *dbregs) -{ - if (lp == NULL) { - load_dr0(dbregs->dr[0]); - load_dr1(dbregs->dr[1]); - load_dr2(dbregs->dr[2]); - load_dr3(dbregs->dr[3]); - load_dr4(dbregs->dr[4]); - load_dr5(dbregs->dr[5]); - load_dr6(dbregs->dr[6]); - load_dr7(dbregs->dr[7]); - } else { - struct pcb *pcb; - struct ucred *ucred; - int i; - uint32_t mask1, mask2; - - /* - * Don't let an illegal value for dr7 get set. Specifically, - * check for undefined settings. Setting these bit patterns - * result in undefined behaviour and can lead to an unexpected - * TRCTRAP. - */ - for (i = 0, mask1 = 0x3<<16, mask2 = 0x2<<16; i < 8; - i++, mask1 <<= 2, mask2 <<= 2) - if ((dbregs->dr[7] & mask1) == mask2) - return (EINVAL); - - pcb = lp->lwp_thread->td_pcb; - ucred = lp->lwp_proc->p_ucred; - - /* - * Don't let a process set a breakpoint that is not within the - * process's address space. If a process could do this, it - * could halt the system by setting a breakpoint in the kernel - * (if ddb was enabled). Thus, we need to check to make sure - * that no breakpoints are being enabled for addresses outside - * process's address space, unless, perhaps, we were called by - * uid 0. - * - * XXX - what about when the watched area of the user's - * address space is written into from within the kernel - * ... wouldn't that still cause a breakpoint to be generated - * from within kernel mode? - */ - - if (suser_cred(ucred, 0) != 0) { - if (dbregs->dr[7] & 0x3) { - /* dr0 is enabled */ - if (dbregs->dr[0] >= VM_MAX_USER_ADDRESS) - return (EINVAL); - } - - if (dbregs->dr[7] & (0x3<<2)) { - /* dr1 is enabled */ - if (dbregs->dr[1] >= VM_MAX_USER_ADDRESS) - return (EINVAL); - } - - if (dbregs->dr[7] & (0x3<<4)) { - /* dr2 is enabled */ - if (dbregs->dr[2] >= VM_MAX_USER_ADDRESS) - return (EINVAL); - } - - if (dbregs->dr[7] & (0x3<<6)) { - /* dr3 is enabled */ - if (dbregs->dr[3] >= VM_MAX_USER_ADDRESS) - return (EINVAL); - } - } - - pcb->pcb_dr0 = dbregs->dr[0]; - pcb->pcb_dr1 = dbregs->dr[1]; - pcb->pcb_dr2 = dbregs->dr[2]; - pcb->pcb_dr3 = dbregs->dr[3]; - pcb->pcb_dr6 = dbregs->dr[6]; - pcb->pcb_dr7 = dbregs->dr[7]; - - pcb->pcb_flags |= PCB_DBREGS; - } - - return (0); -} - -#if 0 -/* - * Return > 0 if a hardware breakpoint has been hit, and the - * breakpoint was in user space. Return 0, otherwise. - */ -int -user_dbreg_trap(void) -{ - u_int32_t dr7, dr6; /* debug registers dr6 and dr7 */ - u_int32_t bp; /* breakpoint bits extracted from dr6 */ - int nbp; /* number of breakpoints that triggered */ - caddr_t addr[4]; /* breakpoint addresses */ - int i; - - dr7 = rdr7(); - if ((dr7 & 0x000000ff) == 0) { - /* - * all GE and LE bits in the dr7 register are zero, - * thus the trap couldn't have been caused by the - * hardware debug registers - */ - return 0; - } - - nbp = 0; - dr6 = rdr6(); - bp = dr6 & 0x0000000f; - - if (!bp) { - /* - * None of the breakpoint bits are set meaning this - * trap was not caused by any of the debug registers - */ - return 0; - } - - /* - * at least one of the breakpoints were hit, check to see - * which ones and if any of them are user space addresses - */ - - if (bp & 0x01) { - addr[nbp++] = (caddr_t)rdr0(); - } - if (bp & 0x02) { - addr[nbp++] = (caddr_t)rdr1(); - } - if (bp & 0x04) { - addr[nbp++] = (caddr_t)rdr2(); - } - if (bp & 0x08) { - addr[nbp++] = (caddr_t)rdr3(); - } - - for (i=0; i>6) -#define f_reg(byte) (((byte)>>3)&0x7) -#define f_rm(byte) ((byte)&0x7) +#define f_mod(rex, byte) ((byte)>>6) +#define f_reg(rex, byte) ((((byte)>>3)&0x7) | (rex & REX_R ? 0x8 : 0x0)) +#define f_rm(rex, byte) (((byte)&0x7) | (rex & REX_B ? 0x8 : 0x0)) -#define sib_ss(byte) ((byte)>>6) -#define sib_index(byte) (((byte)>>3)&0x7) -#define sib_base(byte) ((byte)&0x7) +#define sib_ss(rex, byte) ((byte)>>6) +#define sib_index(rex, byte) ((((byte)>>3)&0x7) | (rex & REX_X ? 0x8 : 0x0)) +#define sib_base(rex, byte) (((byte)&0x7) | (rex & REX_B ? 0x8 : 0x0)) struct i_addr { int is_reg; /* if reg, reg number is in 'disp' */ @@ -821,24 +882,27 @@ struct i_addr { const char * base; const char * index; int ss; - int defss; /* default stack segment */ -}; - -static const char * const db_index_reg_16[8] = { - "%bx,%si", - "%bx,%di", - "%bp,%si", - "%bp,%di", - "%si", - "%di", - "%bp", - "%bx" }; -static const char * const db_reg[3][8] = { - { "%al", "%cl", "%dl", "%bl", "%ah", "%ch", "%dh", "%bh" }, - { "%ax", "%cx", "%dx", "%bx", "%sp", "%bp", "%si", "%di" }, - { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi" } +static const char * const db_reg[2][4][16] = { + + {{"%al", "%cl", "%dl", "%bl", "%ah", "%ch", "%dh", "%bh", + "%r8b", "%r9b", "%r10b", "%r11b", "%r12b", "%r13b", "%r14b", "%r15b" }, + { "%ax", "%cx", "%dx", "%bx", "%sp", "%bp", "%si", "%di", + "%r8w", "%r9w", "%r10w", "%r11w", "%r12w", "%r13w", "%r14w", "%r15w" }, + { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi", + "%r8d", "%r9d", "%r10d", "%r11d", "%r12d", "%r13d", "%r14d", "%r15d" }, + { "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi", + "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" }}, + + {{"%al", "%cl", "%dl", "%bl", "%spl", "%bpl", "%sil", "%dil", + "%r8b", "%r9b", "%r10b", "%r11b", "%r12b", "%r13b", "%r14b", "%r15b" }, + { "%ax", "%cx", "%dx", "%bx", "%sp", "%bp", "%si", "%di", + "%r8w", "%r9w", "%r10w", "%r11w", "%r12w", "%r13w", "%r14w", "%r15w" }, + { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi", + "%r8d", "%r9d", "%r10d", "%r11d", "%r12d", "%r13d", "%r14d", "%r15d" }, + { "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi", + "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" }} }; static const char * const db_seg_reg[8] = { @@ -863,25 +927,29 @@ static const int db_lengths[] = { (loc) += (size); static db_addr_t - db_disasm_esc (db_addr_t loc, int inst, int short_addr, - int size, const char *seg); -static void db_print_address (const char *seg, int size, - struct i_addr *addrp); + db_disasm_esc(db_addr_t loc, int inst, int rex, int short_addr, + int size, const char *seg); +static void db_print_address(const char *seg, int size, int rex, + struct i_addr *addrp); static db_addr_t - db_read_address (db_addr_t loc, int short_addr, - int regmodrm, struct i_addr *addrp); + db_read_address(db_addr_t loc, int short_addr, int rex, int regmodrm, + struct i_addr *addrp); /* * Read address at location and return updated location. */ static db_addr_t -db_read_address(db_addr_t loc, int short_addr, int regmodrm, - struct i_addr *addrp) +db_read_address(loc, short_addr, rex, regmodrm, addrp) + db_addr_t loc; + int short_addr; + int rex; + int regmodrm; + struct i_addr * addrp; /* out */ { - int mod, rm, sib, index, disp; + int mod, rm, sib, index, disp, size, have_sib; - mod = f_mod(regmodrm); - rm = f_rm(regmodrm); + mod = f_mod(rex, regmodrm); + rm = f_rm(rex, regmodrm); if (mod == 3) { addrp->is_reg = TRUE; @@ -890,104 +958,72 @@ db_read_address(db_addr_t loc, int short_addr, int regmodrm, } addrp->is_reg = FALSE; addrp->index = 0; - addrp->ss = 0; - addrp->defss = 0; - - if (short_addr) { - if (mod != 3) { - switch(rm) { - case 0: - case 1: - addrp->index = "%bx"; - break; - case 2: - case 3: - addrp->index = "%bp"; - addrp->defss = 1; - break; - case 6: - if (mod == 1 || mod == 2) - addrp->defss = 1; - break; - } - } - switch (mod) { - case 0: - if (rm == 6) { - get_value_inc(disp, loc, 2, FALSE); - addrp->disp = disp; - addrp->base = 0; - } - else { - addrp->disp = 0; - addrp->base = db_index_reg_16[rm]; - } - break; - case 1: - get_value_inc(disp, loc, 1, TRUE); - disp &= 0xFFFF; - addrp->disp = disp; - addrp->base = db_index_reg_16[rm]; - break; - case 2: - get_value_inc(disp, loc, 2, FALSE); - addrp->disp = disp; - addrp->base = db_index_reg_16[rm]; - break; - } - } else { - if (mod != 3 && rm == 4) { - get_value_inc(sib, loc, 1, FALSE); - rm = sib_base(sib); - index = sib_index(sib); - if (index != 4) - addrp->index = db_reg[LONG][index]; - addrp->ss = sib_ss(sib); - } - - switch (mod) { - case 0: - if (rm == 5) { - get_value_inc(addrp->disp, loc, 4, FALSE); - addrp->base = 0; - } - else { - addrp->disp = 0; - addrp->base = db_reg[LONG][rm]; - } - break; - case 1: - get_value_inc(disp, loc, 1, TRUE); - addrp->disp = disp; - addrp->base = db_reg[LONG][rm]; - break; + if (short_addr) + size = LONG; + else + size = QUAD; + + if ((rm & 0x7) == 4) { + get_value_inc(sib, loc, 1, FALSE); + rm = sib_base(rex, sib); + index = sib_index(rex, sib); + if (index != 4) + addrp->index = db_reg[1][size][index]; + addrp->ss = sib_ss(rex, sib); + have_sib = 1; + } else + have_sib = 0; - case 2: - get_value_inc(disp, loc, 4, FALSE); - addrp->disp = disp; - addrp->base = db_reg[LONG][rm]; - break; - } + switch (mod) { + case 0: + if (rm == 5) { + get_value_inc(addrp->disp, loc, 4, FALSE); + if (have_sib) + addrp->base = 0; + else if (short_addr) + addrp->base = "%eip"; + else + addrp->base = "%rip"; + } else { + addrp->disp = 0; + addrp->base = db_reg[1][size][rm]; + } + break; + + case 1: + get_value_inc(disp, loc, 1, TRUE); + addrp->disp = disp; + addrp->base = db_reg[1][size][rm]; + break; + + case 2: + get_value_inc(disp, loc, 4, FALSE); + addrp->disp = disp; + addrp->base = db_reg[1][size][rm]; + break; } return (loc); } static void -db_print_address(const char *seg, int size, struct i_addr *addrp) +db_print_address(seg, size, rex, addrp) + const char * seg; + int size; + int rex; + struct i_addr * addrp; { if (addrp->is_reg) { - db_printf("%s", db_reg[size][addrp->disp]); + db_printf("%s", db_reg[rex != 0 ? 1 : 0][(size == LONG && (rex & REX_W)) ? QUAD : size][addrp->disp]); return; } if (seg) { db_printf("%s:", seg); - } else if (addrp->defss) { - db_printf("%%ss:"); } - db_printsym((db_addr_t)addrp->disp, DB_STGY_ANY); + if (addrp->disp != 0 || (addrp->base == 0 && addrp->index == 0)) + db_printsym((db_addr_t)addrp->disp, DB_STGY_ANY); if (addrp->base != 0 || addrp->index != 0) { db_printf("("); if (addrp->base) @@ -1003,8 +1039,13 @@ db_print_address(const char *seg, int size, struct i_addr *addrp) * and return updated location. */ static db_addr_t -db_disasm_esc(db_addr_t loc, int inst, int short_addr, int size, - const char *seg) +db_disasm_esc(loc, inst, rex, short_addr, size, seg) + db_addr_t loc; + int inst; + int rex; + int short_addr; + int size; + const char * seg; { int regmodrm; const struct finst * fp; @@ -1013,8 +1054,8 @@ db_disasm_esc(db_addr_t loc, int inst, int short_addr, int size, const char * name; get_value_inc(regmodrm, loc, 1, FALSE); - fp = &db_Esc_inst[inst - 0xd8][f_reg(regmodrm)]; - mod = f_mod(regmodrm); + fp = &db_Esc_inst[inst - 0xd8][f_reg(rex, regmodrm)]; + mod = f_mod(rex, regmodrm); if (mod != 3) { if (*fp->f_name == '\0') { db_printf(""); @@ -1023,7 +1064,7 @@ db_disasm_esc(db_addr_t loc, int inst, int short_addr, int size, /* * Normal address modes. */ - loc = db_read_address(loc, short_addr, regmodrm, &address); + loc = db_read_address(loc, short_addr, rex, regmodrm, &address); db_printf("%s", fp->f_name); switch(fp->f_size) { case SNGL: @@ -1048,7 +1089,7 @@ db_disasm_esc(db_addr_t loc, int inst, int short_addr, int size, break; } db_printf("\t"); - db_print_address(seg, BYTE, &address); + db_print_address(seg, BYTE, rex, &address); } else { /* @@ -1057,24 +1098,24 @@ db_disasm_esc(db_addr_t loc, int inst, int short_addr, int size, switch (fp->f_rrmode) { case op2(ST,STI): name = (fp->f_rrname) ? fp->f_rrname : fp->f_name; - db_printf("%s\t%%st,%%st(%d)",name,f_rm(regmodrm)); + db_printf("%s\t%%st,%%st(%d)",name,f_rm(rex, regmodrm)); break; case op2(STI,ST): name = (fp->f_rrname) ? fp->f_rrname : fp->f_name; - db_printf("%s\t%%st(%d),%%st",name, f_rm(regmodrm)); + db_printf("%s\t%%st(%d),%%st",name, f_rm(rex, regmodrm)); break; case op1(STI): name = (fp->f_rrname) ? fp->f_rrname : fp->f_name; - db_printf("%s\t%%st(%d)",name, f_rm(regmodrm)); + db_printf("%s\t%%st(%d)",name, f_rm(rex, regmodrm)); break; case op1(X): - name = ((const char * const *)fp->f_rrname)[f_rm(regmodrm)]; + name = ((const char * const *)fp->f_rrname)[f_rm(rex, regmodrm)]; if (*name == '\0') goto bad; db_printf("%s", name); break; case op1(XA): - name = ((const char * const *)fp->f_rrname)[f_rm(regmodrm)]; + name = ((const char * const *)fp->f_rrname)[f_rm(rex, regmodrm)]; if (*name == '\0') goto bad; db_printf("%s\t%%ax", name); @@ -1091,14 +1132,14 @@ db_disasm_esc(db_addr_t loc, int inst, int short_addr, int size, /* * Disassemble instruction at 'loc'. 'altfmt' specifies an - * (optional) alternate format. Return the address of the - * start of the next instruction. - * - * If regs is non-null it may be used to obtain context, such as - * whether we are in word or long mode. + * (optional) alternate format. Return address of start of + * next instruction. */ db_addr_t -db_disasm(db_addr_t loc, boolean_t altfmt, db_regs_t *regs) +db_disasm(loc, altfmt, dummy) + db_addr_t loc; + boolean_t altfmt; + db_regs_t *dummy; { int inst; int size; @@ -1108,29 +1149,22 @@ db_disasm(db_addr_t loc, boolean_t altfmt, db_regs_t *regs) const char * i_name; int i_size; int i_mode; + int rex = 0; int regmodrm = 0; boolean_t first; int displ; int prefix; int imm; int imm2; + long imm64; int len; struct i_addr address; get_value_inc(inst, loc, 1, FALSE); + short_addr = FALSE; + size = LONG; seg = 0; -#ifdef _GDT_ARRAY_PRESENT - if (regs && gdt[mycpu->gd_cpuid * NGDT + IDXSEL(regs->tf_cs & 0xFFFF)].sd.sd_def32 == 0) { - size = WORD; - short_addr = TRUE; - } else -#endif - { - size = LONG; - short_addr = FALSE; - } - /* * Get prefixes */ @@ -1174,13 +1208,17 @@ db_disasm(db_addr_t loc, boolean_t altfmt, db_regs_t *regs) prefix = FALSE; break; } + if (inst >= 0x40 && inst < 0x50) { + rex = inst; + prefix = TRUE; + } if (prefix) { get_value_inc(inst, loc, 1, FALSE); } } while (prefix); if (inst >= 0xd8 && inst <= 0xdf) { - loc = db_disasm_esc(loc, inst, short_addr, size, seg); + loc = db_disasm_esc(loc, inst, rex, short_addr, size, seg); db_printf("\n"); return (loc); } @@ -1200,7 +1238,7 @@ db_disasm(db_addr_t loc, boolean_t altfmt, db_regs_t *regs) if (ip->i_has_modrm) { get_value_inc(regmodrm, loc, 1, FALSE); - loc = db_read_address(loc, short_addr, regmodrm, &address); + loc = db_read_address(loc, short_addr, rex, regmodrm, &address); } i_name = ip->i_name; @@ -1210,17 +1248,17 @@ db_disasm(db_addr_t loc, boolean_t altfmt, db_regs_t *regs) if (ip->i_extra == db_Grp1 || ip->i_extra == db_Grp2 || ip->i_extra == db_Grp6 || ip->i_extra == db_Grp7 || ip->i_extra == db_Grp8 || ip->i_extra == db_Grp9) { - i_name = ((const char * const *)ip->i_extra)[f_reg(regmodrm)]; + i_name = ((const char * const *)ip->i_extra)[f_reg(rex, regmodrm)]; } else if (ip->i_extra == db_Grp3) { ip = ip->i_extra; - ip = &ip[f_reg(regmodrm)]; + ip = &ip[f_reg(rex, regmodrm)]; i_name = ip->i_name; i_mode = ip->i_mode; } else if (ip->i_extra == db_Grp4 || ip->i_extra == db_Grp5) { ip = ip->i_extra; - ip = &ip[f_reg(regmodrm)]; + ip = &ip[f_reg(rex, regmodrm)]; i_name = ip->i_name; i_mode = ip->i_mode; i_size = ip->i_size; @@ -1234,6 +1272,10 @@ db_disasm(db_addr_t loc, boolean_t altfmt, db_regs_t *regs) } else { db_printf("%s", i_name); + if ((inst >= 0x50 && inst <= 0x5f) || inst == 0x68 || inst == 0x6a) { + i_size = NONE; + db_printf("q"); + } if (i_size != NONE) { if (i_size == BYTE) { db_printf("b"); @@ -1245,8 +1287,12 @@ db_disasm(db_addr_t loc, boolean_t altfmt, db_regs_t *regs) } else if (size == WORD) db_printf("w"); - else - db_printf("l"); + else { + if (rex & REX_W) + db_printf("q"); + else + db_printf("l"); + } } } db_printf("\t"); @@ -1260,52 +1306,56 @@ db_disasm(db_addr_t loc, boolean_t altfmt, db_regs_t *regs) switch (i_mode & 0xFF) { case E: - db_print_address(seg, size, &address); + db_print_address(seg, size, rex, &address); break; case Eind: db_printf("*"); - db_print_address(seg, size, &address); + db_print_address(seg, size, rex, &address); break; case El: - db_print_address(seg, LONG, &address); + db_print_address(seg, (rex & REX_W) ? QUAD : LONG, rex, &address); + break; + + case EL: + db_print_address(seg, LONG, 0, &address); break; case Ew: - db_print_address(seg, WORD, &address); + db_print_address(seg, WORD, rex, &address); break; case Eb: - db_print_address(seg, BYTE, &address); + db_print_address(seg, BYTE, rex, &address); break; case R: - db_printf("%s", db_reg[size][f_reg(regmodrm)]); + db_printf("%s", db_reg[rex != 0 ? 1 : 0][(size == LONG && (rex & REX_W)) ? QUAD : size][f_reg(rex, regmodrm)]); break; case Rw: - db_printf("%s", db_reg[WORD][f_reg(regmodrm)]); + db_printf("%s", db_reg[rex != 0 ? 1 : 0][WORD][f_reg(rex, regmodrm)]); break; case Ri: - db_printf("%s", db_reg[size][f_rm(inst)]); + db_printf("%s", db_reg[0][QUAD][f_rm(rex, inst)]); break; case Ril: - db_printf("%s", db_reg[LONG][f_rm(inst)]); + db_printf("%s", db_reg[rex != 0 ? 1 : 0][(rex & REX_R) ? QUAD : LONG][f_rm(rex, inst)]); break; case S: - db_printf("%s", db_seg_reg[f_reg(regmodrm)]); + db_printf("%s", db_seg_reg[f_reg(rex, regmodrm)]); break; case Si: - db_printf("%s", db_seg_reg[f_reg(inst)]); + db_printf("%s", db_seg_reg[f_reg(rex, inst)]); break; case A: - db_printf("%s", db_reg[size][0]); /* acc */ + db_printf("%s", db_reg[rex != 0 ? 1 : 0][size][0]); /* acc */ break; case BX: @@ -1325,23 +1375,23 @@ db_disasm(db_addr_t loc, boolean_t altfmt, db_regs_t *regs) case SI: if (seg) db_printf("%s:", seg); - db_printf("(%s)", short_addr ? "%si" : "%esi"); + db_printf("(%s)", short_addr ? "%si" : "%rsi"); break; case DI: - db_printf("%%es:(%s)", short_addr ? "%di" : "%edi"); + db_printf("%%es:(%s)", short_addr ? "%di" : "%rdi"); break; case CR: - db_printf("%%cr%d", f_reg(regmodrm)); + db_printf("%%cr%d", f_reg(rex, regmodrm)); break; case DR: - db_printf("%%dr%d", f_reg(regmodrm)); + db_printf("%%dr%d", f_reg(rex, regmodrm)); break; case TR: - db_printf("%%tr%d", f_reg(regmodrm)); + db_printf("%%tr%d", f_reg(rex, regmodrm)); break; case I: @@ -1351,7 +1401,7 @@ db_disasm(db_addr_t loc, boolean_t altfmt, db_regs_t *regs) break; case Is: - len = db_lengths[size]; + len = db_lengths[(size == LONG && (rex & REX_W)) ? QUAD : size]; get_value_inc(imm, loc, len, FALSE); db_printf("$%+#r", imm); break; @@ -1379,6 +1429,12 @@ db_disasm(db_addr_t loc, boolean_t altfmt, db_regs_t *regs) db_printf("$%#r", imm); break; + case Ilq: + len = db_lengths[rex & REX_W ? QUAD : LONG]; + get_value_inc(imm64, loc, len, FALSE); + db_printf("$%#lr", imm64); + break; + case O: len = (short_addr ? 2 : 4); get_value_inc(displ, loc, len, FALSE); @@ -1397,7 +1453,7 @@ db_disasm(db_addr_t loc, boolean_t altfmt, db_regs_t *regs) break; case Dl: - len = db_lengths[size]; + len = db_lengths[(size == LONG && (rex & REX_W)) ? QUAD : size]; get_value_inc(displ, loc, len, FALSE); displ += loc; if (size == WORD) diff --git a/sys/platform/pc64/amd64/db_interface.c b/sys/platform/pc64/amd64/db_interface.c index c9dde76fa0..c8dde74136 100644 --- a/sys/platform/pc64/amd64/db_interface.c +++ b/sys/platform/pc64/amd64/db_interface.c @@ -1,4 +1,35 @@ /* + * Copyright (c) 2008 The DragonFly Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of The DragonFly Project nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific, prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * -- + * * Mach Operating System * Copyright (c) 1991,1990 Carnegie Mellon University * All Rights Reserved. @@ -24,7 +55,7 @@ * rights to redistribute these changes. * * $FreeBSD: src/sys/i386/i386/db_interface.c,v 1.48.2.1 2000/07/07 00:38:46 obrien Exp $ - * $DragonFly: src/sys/platform/pc64/amd64/db_interface.c,v 1.2 2007/09/24 03:24:45 yanyh Exp $ + * $DragonFly: src/sys/platform/pc64/amd64/db_interface.c,v 1.3 2008/08/29 17:07:10 dillon Exp $ */ /* @@ -50,6 +81,8 @@ static jmp_buf *db_nofault = 0; extern jmp_buf db_jmpbuf; +extern void gdb_handle_exception (db_regs_t *, int, int); + int db_active; db_regs_t ddb_regs; @@ -121,14 +154,6 @@ kdb_trap(int type, int code, struct amd64_saved_state *regs) */ ddb_regs = *regs; - /* - * If in kernel mode, esp and ss are not saved, so dummy them up. - */ - if (ISPL(regs->tf_cs) == 0) { - ddb_regs.tf_rsp = (int)®s->tf_rsp; - ddb_regs.tf_ss = rss(); - } - #ifdef SMP db_printf("\nCPU%d stopping CPUs: 0x%08x\n", mycpu->gd_cpuid, mycpu->gd_other_cpus); @@ -148,7 +173,7 @@ kdb_trap(int type, int code, struct amd64_saved_state *regs) db_trap(type, code); cndbctl(FALSE); } else - /* gdb_handle_exception(&ddb_regs, type, code); */ + gdb_handle_exception(&ddb_regs, type, code); db_active--; /* vcons_set_mode(0); */ db_global_jmpbuf_valid = FALSE; @@ -175,17 +200,22 @@ kdb_trap(int type, int code, struct amd64_saved_state *regs) regs->tf_rdx = ddb_regs.tf_rdx; regs->tf_rbx = ddb_regs.tf_rbx; - /* - * If in user mode, the saved ESP and SS were valid, restore them. - */ - if (ISPL(regs->tf_cs)) { - regs->tf_rsp = ddb_regs.tf_rsp; - regs->tf_ss = ddb_regs.tf_ss & 0xffff; - } + regs->tf_rsp = ddb_regs.tf_rsp; + regs->tf_ss = ddb_regs.tf_ss & 0xffff; regs->tf_rbp = ddb_regs.tf_rbp; regs->tf_rsi = ddb_regs.tf_rsi; regs->tf_rdi = ddb_regs.tf_rdi; + + regs->tf_r8 = ddb_regs.tf_r8; + regs->tf_r9 = ddb_regs.tf_r9; + regs->tf_r10 = ddb_regs.tf_r10; + regs->tf_r11 = ddb_regs.tf_r11; + regs->tf_r12 = ddb_regs.tf_r12; + regs->tf_r13 = ddb_regs.tf_r13; + regs->tf_r14 = ddb_regs.tf_r14; + regs->tf_r15 = ddb_regs.tf_r15; + /* regs->tf_es = ddb_regs.tf_es & 0xffff; */ /* regs->tf_fs = ddb_regs.tf_fs & 0xffff; */ /* regs->tf_gs = ddb_regs.tf_gs & 0xffff; */ diff --git a/sys/platform/pc64/amd64/db_trace.c b/sys/platform/pc64/amd64/db_trace.c index 77ece5d3ed..d1077f2f65 100644 --- a/sys/platform/pc64/amd64/db_trace.c +++ b/sys/platform/pc64/amd64/db_trace.c @@ -1,4 +1,35 @@ /* + * Copyright (c) 2008 The DragonFly Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of The DragonFly Project nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific, prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * -- + * * Mach Operating System * Copyright (c) 1991,1990 Carnegie Mellon University * All Rights Reserved. @@ -24,7 +55,7 @@ * rights to redistribute these changes. * * $FreeBSD: src/sys/i386/i386/db_trace.c,v 1.35.2.3 2002/02/21 22:31:25 silby Exp $ - * $DragonFly: src/sys/platform/pc64/amd64/db_trace.c,v 1.2 2007/09/24 03:24:45 yanyh Exp $ + * $DragonFly: src/sys/platform/pc64/amd64/db_trace.c,v 1.3 2008/08/29 17:07:10 dillon Exp $ */ #include @@ -79,6 +110,14 @@ struct db_variable db_regs[] = { { "rdi", &ddb_regs.tf_rdi, FCN_NULL }, { "rip", &ddb_regs.tf_rip, FCN_NULL }, { "rfl", &ddb_regs.tf_rflags, FCN_NULL }, + { "r8", &ddb_regs.tf_r8, FCN_NULL }, + { "r9", &ddb_regs.tf_r9, FCN_NULL }, + { "r10", &ddb_regs.tf_r10, FCN_NULL }, + { "r11", &ddb_regs.tf_r11, FCN_NULL }, + { "r12", &ddb_regs.tf_r12, FCN_NULL }, + { "r13", &ddb_regs.tf_r13, FCN_NULL }, + { "r14", &ddb_regs.tf_r14, FCN_NULL }, + { "r15", &ddb_regs.tf_r15, FCN_NULL }, { "dr0", NULL, db_dr0 }, { "dr1", NULL, db_dr1 }, { "dr2", NULL, db_dr2 }, @@ -108,8 +147,8 @@ struct amd64_frame { static void db_nextframe(struct amd64_frame **, db_addr_t *); static int db_numargs(struct amd64_frame *); -static void db_print_stack_entry(const char *, int, char **, int *, db_addr_t); -static void dl_symbol_values(int callpc, const char **name); +static void db_print_stack_entry(const char *, int, char **, long *, db_addr_t); +static void dl_symbol_values(long callpc, const char **name); static char *watchtype_str(int type); @@ -127,6 +166,9 @@ void db_md_list_watchpoints(void); static int db_numargs(struct amd64_frame *fp) { +#if 1 + return (0); /* regparm, needs dwarf2 info */ +#else int args; #if 0 int *argp; @@ -152,21 +194,22 @@ db_numargs(struct amd64_frame *fp) #endif args = 5; return(args); +#endif } static void -db_print_stack_entry(const char *name, int narg, char **argnp, int *argp, +db_print_stack_entry(const char *name, int narg, char **argnp, long *argp, db_addr_t callpc) { db_printf("%s(", name); while (narg) { if (argnp) db_printf("%s=", *argnp++); - db_printf("%r", db_get_value((int)argp, 4, FALSE)); + db_printf("%r", db_get_value((long)argp, 8, FALSE)); argp++; if (--narg != 0) db_printf(","); - } + } db_printf(") at "); db_printsym(callpc, DB_STGY_PROC); db_printf("\n"); @@ -180,12 +223,12 @@ db_nextframe(struct amd64_frame **fp, db_addr_t *ip) { struct trapframe *tf; int frame_type; - int eip, esp, ebp; + long rip, rsp, rbp; db_expr_t offset; const char *sym, *name; - eip = db_get_value((int) &(*fp)->f_retaddr, 4, FALSE); - ebp = db_get_value((int) &(*fp)->f_frame, 4, FALSE); + rip = db_get_value((long) &(*fp)->f_retaddr, 8, FALSE); + rbp = db_get_value((long) &(*fp)->f_frame, 8, FALSE); /* * Figure out frame type. @@ -193,9 +236,9 @@ db_nextframe(struct amd64_frame **fp, db_addr_t *ip) frame_type = NORMAL; - sym = db_search_symbol(eip, DB_STGY_ANY, &offset); + sym = db_search_symbol(rip, DB_STGY_ANY, &offset); db_symbol_values(sym, &name, NULL); - dl_symbol_values(eip, &name); + dl_symbol_values(rip, &name); if (name != NULL) { if (!strcmp(name, "calltrap")) { frame_type = TRAP; @@ -210,59 +253,59 @@ db_nextframe(struct amd64_frame **fp, db_addr_t *ip) * Normal frames need no special processing. */ if (frame_type == NORMAL) { - *ip = (db_addr_t) eip; - *fp = (struct amd64_frame *) ebp; + *ip = (db_addr_t) rip; + *fp = (struct amd64_frame *) rbp; return; } - db_print_stack_entry(name, 0, 0, 0, eip); + db_print_stack_entry(name, 0, 0, 0, rip); /* * Point to base of trapframe which is just above the * current frame. */ - tf = (struct trapframe *) ((int)*fp + 8); + tf = (struct trapframe *)((long)*fp + 16); #if 0 - esp = (ISPL(tf->tf_cs) == SEL_UPL) ? tf->tf_rsp : (int)&tf->tf_rsp; + rsp = (ISPL(tf->tf_cs) == SEL_UPL) ? tf->tf_rsp : (long)&tf->tf_rsp; #endif - esp = (int)&tf->tf_rsp; + rsp = (long)&tf->tf_rsp; switch (frame_type) { case TRAP: { - eip = tf->tf_rip; - ebp = tf->tf_rbp; + rip = tf->tf_rip; + rbp = tf->tf_rbp; db_printf( - "--- trap %#r, eip = %#r, esp = %#r, ebp = %#r ---\n", - tf->tf_trapno, eip, esp, ebp); + "--- trap %#r, rip = %#r, rsp = %#r, rbp = %#r ---\n", + tf->tf_trapno, rip, rsp, rbp); } break; case SYSCALL: { - eip = tf->tf_rip; - ebp = tf->tf_rbp; + rip = tf->tf_rip; + rbp = tf->tf_rbp; db_printf( - "--- syscall %#r, eip = %#r, esp = %#r, ebp = %#r ---\n", - tf->tf_rax, eip, esp, ebp); + "--- syscall %#r, rip = %#r, rsp = %#r, rbp = %#r ---\n", + tf->tf_rax, rip, rsp, rbp); } break; case INTERRUPT: - tf = (struct trapframe *)((int)*fp + 16); + tf = (struct trapframe *)((long)*fp + 16); { - eip = tf->tf_rip; - ebp = tf->tf_rbp; + rip = tf->tf_rip; + rbp = tf->tf_rbp; db_printf( - "--- interrupt, eip = %#r, esp = %#r, ebp = %#r ---\n", - eip, esp, ebp); + "--- interrupt, rip = %#r, rsp = %#r, rbp = %#r ---\n", + rip, rsp, rbp); } break; default: break; } - *ip = (db_addr_t) eip; - *fp = (struct amd64_frame *) ebp; + *ip = (db_addr_t) rip; + *fp = (struct amd64_frame *) rbp; } void @@ -281,7 +324,7 @@ db_stack_trace_cmd(db_expr_t addr, boolean_t have_addr, db_expr_t count, if (!have_addr) { frame = (struct amd64_frame *)BP_REGS(&ddb_regs); if (frame == NULL) - frame = (struct amd64_frame *)(SP_REGS(&ddb_regs) - 4); + frame = (struct amd64_frame *)(SP_REGS(&ddb_regs) - 8); callpc = PC_REGS(&ddb_regs); } else { /* @@ -289,10 +332,10 @@ db_stack_trace_cmd(db_expr_t addr, boolean_t have_addr, db_expr_t count, * a convenience. */ frame = (struct amd64_frame *)addr; - for (i = 0; i < 4096; i += 4) { + for (i = 0; i < 4096; i += 8) { struct amd64_frame *check; - check = (struct amd64_frame *)db_get_value((int)((char *)&frame->f_frame + i), 4, FALSE); + check = (struct amd64_frame *)db_get_value((long)((char *)&frame->f_frame + i), 8, FALSE); if ((char *)check - (char *)frame >= 0 && (char *)check - (char *)frame < 4096 ) { @@ -306,7 +349,7 @@ db_stack_trace_cmd(db_expr_t addr, boolean_t have_addr, db_expr_t count, } frame = (void *)((char *)frame + i); db_printf("Trace beginning at frame %p\n", frame); - callpc = (db_addr_t)db_get_value((int)&frame->f_retaddr, 4, FALSE); + callpc = (db_addr_t)db_get_value((long)&frame->f_retaddr, 8, FALSE); } first = TRUE; @@ -340,26 +383,26 @@ db_stack_trace_cmd(db_expr_t addr, boolean_t have_addr, db_expr_t count, int instr; instr = db_get_value(callpc, 4, FALSE); - if ((instr & 0x00ffffff) == 0x00e58955) { - /* pushl %ebp; movl %esp, %ebp */ + if ((instr & 0xffffffff) == 0xe5894855) { + /* pushq %rbp; movq %rsp, %rbp */ actframe = (struct amd64_frame *) - (SP_REGS(&ddb_regs) - 4); - } else if ((instr & 0x0000ffff) == 0x0000e589) { - /* movl %esp, %ebp */ + (SP_REGS(&ddb_regs) - 8); + } else if ((instr & 0xffffff) == 0xe58948) { + /* movq %rsp, %rbp */ actframe = (struct amd64_frame *) SP_REGS(&ddb_regs); if (ddb_regs.tf_rbp == 0) { /* Fake caller's frame better. */ frame = actframe; } - } else if ((instr & 0x000000ff) == 0x000000c3) { + } else if ((instr & 0xff) == 0xc3) { /* ret */ actframe = (struct amd64_frame *) - (SP_REGS(&ddb_regs) - 4); + (SP_REGS(&ddb_regs) - 8); } else if (offset == 0) { /* Probably a symbol in assembler code. */ actframe = (struct amd64_frame *) - (SP_REGS(&ddb_regs) - 4); + (SP_REGS(&ddb_regs) - 8); } } else if (name != NULL && strcmp(name, "fork_trampoline") == 0) { @@ -386,7 +429,7 @@ db_stack_trace_cmd(db_expr_t addr, boolean_t have_addr, db_expr_t count, if (actframe != frame) { /* `frame' belongs to caller. */ callpc = (db_addr_t) - db_get_value((int)&actframe->f_retaddr, 4, FALSE); + db_get_value((long)&actframe->f_retaddr, 8, FALSE); continue; } @@ -399,10 +442,10 @@ db_stack_trace_cmd(db_expr_t addr, boolean_t have_addr, db_expr_t count, void db_print_backtrace(void) { - register_t ebp; + register_t rbp; - /* __asm __volatile("movl %%ebp, %0" : "=r" (ebp)); */ - db_stack_trace_cmd(ebp, 1, -1, NULL); + __asm __volatile("movq %%rbp, %0" : "=r" (rbp)); + db_stack_trace_cmd(rbp, 1, -1, NULL); } #define DB_DRX_FUNC(reg) \ @@ -442,7 +485,7 @@ kamd64_set_watch(int watchnum, unsigned int watchaddr, int size, int access, else return(-1); } - + switch (access) { case DBREG_DR7_EXEC: size = 1; /* size must be 1 for an execution breakpoint */ @@ -455,7 +498,7 @@ kamd64_set_watch(int watchnum, unsigned int watchaddr, int size, int access, } /* - * we can watch a 1, 2, or 4 byte sized location + * we can watch a 1, 2, 4, or 8 byte sized location */ switch (size) { case 1: @@ -466,6 +509,8 @@ kamd64_set_watch(int watchnum, unsigned int watchaddr, int size, int access, break; case 4: mask = 0x03 << 2; + case 8: + mask = 0x02 << 2; break; default: return(-1); @@ -491,10 +536,10 @@ kamd64_clr_watch(int watchnum, struct dbreg *d) { if (watchnum < 0 || watchnum >= 4) return(-1); - + d->dr[7] &= ~((0x3 << (watchnum * 2)) | (0x0f << (watchnum * 4 + 16))); DBREG_DRX(d, watchnum) = 0; - + return(0); } @@ -507,19 +552,21 @@ db_md_set_watchpoint(db_expr_t addr, db_expr_t size) struct dbreg d; fill_dbregs(NULL, &d); - + avail = 0; - for(i=0; i < 4; i++) { + for (i = 0; i < 4; i++) { if ((d.dr[7] & (3 << (i * 2))) == 0) avail++; } - - if (avail * 4 < size) + + if (avail * 8 < size) return(-1); - + for (i=0; i < 4 && (size != 0); i++) { if ((d.dr[7] & (3 << (i * 2))) == 0) { - if (size > 4) + if (size >= 8 || (avail == 1 && size > 4)) + wsize = 8; + else if (size > 2) wsize = 4; else wsize = size; @@ -539,12 +586,12 @@ db_md_set_watchpoint(db_expr_t addr, db_expr_t size) int db_md_clr_watchpoint(db_expr_t addr, db_expr_t size) { - int i; struct dbreg d; + int i; fill_dbregs(NULL, &d); - for(i=0; i<4; i++) { + for(i = 0; i < 4; i++) { if (d.dr[7] & (3 << (i * 2))) { if ((DBREG_DRX((&d), i) >= addr) && (DBREG_DRX((&d), i) < addr + size)) @@ -583,7 +630,7 @@ db_md_list_watchpoints(void) db_printf("\nhardware watchpoints:\n"); db_printf(" watch status type len address\n" " ----- -------- ---------- --- ----------\n"); - for (i=0; i < 4; i++) { + for (i = 0; i < 4; i++) { if (d.dr[7] & (0x03 << (i * 2))) { unsigned type, len; type = (d.dr[7] >> (16 + (i * 4))) & 3; @@ -597,7 +644,7 @@ db_md_list_watchpoints(void) } db_printf("\ndebug register values:\n"); - for (i=0; i < 8; i++) + for (i = 0; i < 8; i++) db_printf(" dr%d 0x%08x\n", i, DBREG_DRX((&d),i)); db_printf("\n"); } @@ -607,7 +654,7 @@ db_md_list_watchpoints(void) */ static void -dl_symbol_values(int callpc, const char **name) +dl_symbol_values(long callpc, const char **name) { Dl_info info; diff --git a/sys/platform/pc64/amd64/exception.S b/sys/platform/pc64/amd64/exception.S new file mode 100644 index 0000000000..ae9c540eeb --- /dev/null +++ b/sys/platform/pc64/amd64/exception.S @@ -0,0 +1,512 @@ +/*- + * Copyright (c) 1989, 1990 William F. Jolitz. + * Copyright (c) 1990 The Regents of the University of California. + * Copyright (c) 2007 The FreeBSD Foundation + * Copyright (c) 2008 The DragonFly Project. + * Copyright (c) 2008 Jordan Gordeev. + * All rights reserved. + * + * Portions of this software were developed by A. Joseph Koshy under + * sponsorship from the FreeBSD Foundation and Google, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + * $DragonFly: src/sys/platform/pc64/amd64/exception.S,v 1.1 2008/08/29 17:07:10 dillon Exp $ + */ + +#if JG +#include "opt_atpic.h" +#endif +#include "opt_compat.h" + +#include +#include +#include + +#include "assym.s" + + .text + +/*****************************************************************************/ +/* Trap handling */ +/*****************************************************************************/ +/* + * Trap and fault vector routines. + * + * All traps are 'interrupt gates', SDT_SYSIGT. An interrupt gate pushes + * state on the stack but also disables interrupts. This is important for + * us for the use of the swapgs instruction. We cannot be interrupted + * until the GS.base value is correct. For most traps, we automatically + * then enable interrupts if the interrupted context had them enabled. + * This is equivalent to the i386 port's use of SDT_SYS386TGT. + * + * The cpu will push a certain amount of state onto the kernel stack for + * the current process. See amd64/include/frame.h. + * This includes the current RFLAGS (status register, which includes + * the interrupt disable state prior to the trap), the code segment register, + * and the return instruction pointer are pushed by the cpu. The cpu + * will also push an 'error' code for certain traps. We push a dummy + * error code for those traps where the cpu doesn't in order to maintain + * a consistent frame. We also push a contrived 'trap number'. + * + * The cpu does not push the general registers, we must do that, and we + * must restore them prior to calling 'iret'. The cpu adjusts the %cs and + * %ss segment registers, but does not mess with %ds, %es, or %fs. Thus we + * must load them with appropriate values for supervisor mode operation. + */ + +MCOUNT_LABEL(user) +MCOUNT_LABEL(btrap) + +/* Traps that we leave interrupts disabled for.. */ +#define TRAP_NOEN(a) \ + subq $TF_RIP,%rsp; \ + movq $(a),TF_TRAPNO(%rsp) ; \ + movq $0,TF_ADDR(%rsp) ; \ + movq $0,TF_ERR(%rsp) ; \ + jmp alltraps_noen +IDTVEC(dbg) + TRAP_NOEN(T_TRCTRAP) +IDTVEC(bpt) + TRAP_NOEN(T_BPTFLT) + +/* Regular traps; The cpu does not supply tf_err for these. */ +#define TRAP(a) \ + subq $TF_RIP,%rsp; \ + movq $(a),TF_TRAPNO(%rsp) ; \ + movq $0,TF_ADDR(%rsp) ; \ + movq $0,TF_ERR(%rsp) ; \ + jmp alltraps +IDTVEC(div) + TRAP(T_DIVIDE) +IDTVEC(ofl) + TRAP(T_OFLOW) +IDTVEC(bnd) + TRAP(T_BOUND) +IDTVEC(ill) + TRAP(T_PRIVINFLT) +IDTVEC(dna) + TRAP(T_DNA) +IDTVEC(fpusegm) + TRAP(T_FPOPFLT) +IDTVEC(mchk) + TRAP(T_MCHK) +IDTVEC(rsvd) + TRAP(T_RESERVED) +IDTVEC(fpu) + TRAP(T_ARITHTRAP) +IDTVEC(xmm) + TRAP(T_XMMFLT) + +/* This group of traps have tf_err already pushed by the cpu */ +#define TRAP_ERR(a) \ + subq $TF_ERR,%rsp; \ + movq $(a),TF_TRAPNO(%rsp) ; \ + movq $0,TF_ADDR(%rsp) ; \ + jmp alltraps +IDTVEC(tss) + TRAP_ERR(T_TSSFLT) +IDTVEC(missing) + TRAP_ERR(T_SEGNPFLT) +IDTVEC(stk) + TRAP_ERR(T_STKFLT) +IDTVEC(align) + TRAP_ERR(T_ALIGNFLT) + + /* + * alltraps entry point. Use swapgs if this is the first time in the + * kernel from userland. Reenable interrupts if they were enabled + * before the trap. This approximates SDT_SYS386TGT on the i386 port. + */ + + SUPERALIGN_TEXT + .globl alltraps + .type alltraps,@function +alltraps: + testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ + jz alltraps_testi /* already running with kernel GS.base */ + swapgs +alltraps_testi: + testq $PSL_I,TF_RFLAGS(%rsp) + jz alltraps_pushregs + sti +alltraps_pushregs: + movq %rdi,TF_RDI(%rsp) +alltraps_pushregs_no_rdi: + movq %rsi,TF_RSI(%rsp) + movq %rdx,TF_RDX(%rsp) + movq %rcx,TF_RCX(%rsp) + movq %r8,TF_R8(%rsp) + movq %r9,TF_R9(%rsp) + movq %rax,TF_RAX(%rsp) + movq %rbx,TF_RBX(%rsp) + movq %rbp,TF_RBP(%rsp) + movq %r10,TF_R10(%rsp) + movq %r11,TF_R11(%rsp) + movq %r12,TF_R12(%rsp) + movq %r13,TF_R13(%rsp) + movq %r14,TF_R14(%rsp) + movq %r15,TF_R15(%rsp) + FAKE_MCOUNT(TF_RIP(%rsp)) + .globl calltrap + .type calltrap,@function +calltrap: + movq %rsp, %rdi + call trap + MEXITCOUNT + jmp doreti /* Handle any pending ASTs */ + + /* + * alltraps_noen entry point. Unlike alltraps above, we want to + * leave the interrupts disabled. This corresponds to + * SDT_SYS386IGT on the i386 port. + */ + SUPERALIGN_TEXT + .globl alltraps_noen + .type alltraps_noen,@function +alltraps_noen: + testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ + jz alltraps_pushregs /* already running with kernel GS.base */ + swapgs + jmp alltraps_pushregs + +IDTVEC(dblfault) + subq $TF_ERR,%rsp + movq $T_DOUBLEFLT,TF_TRAPNO(%rsp) + movq $0,TF_ADDR(%rsp) + movq $0,TF_ERR(%rsp) + movq %rdi,TF_RDI(%rsp) + movq %rsi,TF_RSI(%rsp) + movq %rdx,TF_RDX(%rsp) + movq %rcx,TF_RCX(%rsp) + movq %r8,TF_R8(%rsp) + movq %r9,TF_R9(%rsp) + movq %rax,TF_RAX(%rsp) + movq %rbx,TF_RBX(%rsp) + movq %rbp,TF_RBP(%rsp) + movq %r10,TF_R10(%rsp) + movq %r11,TF_R11(%rsp) + movq %r12,TF_R12(%rsp) + movq %r13,TF_R13(%rsp) + movq %r14,TF_R14(%rsp) + movq %r15,TF_R15(%rsp) + testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ + jz 1f /* already running with kernel GS.base */ + swapgs +1: movq %rsp, %rdi + call dblfault_handler +2: hlt + jmp 2b + +IDTVEC(page) + subq $TF_ERR,%rsp + movq $T_PAGEFLT,TF_TRAPNO(%rsp) + testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ + jz 1f /* already running with kernel GS.base */ + swapgs +1: + movq %rdi,TF_RDI(%rsp) /* free up a GP register */ + movq %cr2,%rdi /* preserve %cr2 before .. */ + movq %rdi,TF_ADDR(%rsp) /* enabling interrupts. */ + testq $PSL_I,TF_RFLAGS(%rsp) + jz alltraps_pushregs_no_rdi + sti + jmp alltraps_pushregs_no_rdi + + /* + * We have to special-case this one. If we get a trap in doreti() at + * the iretq stage, we'll reenter with the wrong gs state. We'll have + * to do a special the swapgs in this case even coming from the kernel. + * XXX linux has a trap handler for their equivalent of load_gs(). + */ +IDTVEC(prot) + subq $TF_ERR,%rsp + movq $T_PROTFLT,TF_TRAPNO(%rsp) + movq $0,TF_ADDR(%rsp) + movq %rdi,TF_RDI(%rsp) /* free up a GP register */ + leaq doreti_iret(%rip),%rdi + cmpq %rdi,TF_RIP(%rsp) + je 2f /* kernel but with user gsbase!! */ + testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ + jz 1f /* already running with kernel GS.base */ +2: + swapgs +1: + testq $PSL_I,TF_RFLAGS(%rsp) + jz alltraps_pushregs_no_rdi + sti + jmp alltraps_pushregs_no_rdi + +/* + * Fast syscall entry point. We enter here with just our new %cs/%ss set, + * and the new privilige level. We are still running on the old user stack + * pointer. We have to juggle a few things around to find our stack etc. + * swapgs gives us access to our PCPU space only. + */ +IDTVEC(fast_syscall) + swapgs + movq %rsp,PCPU(scratch_rsp) + movq PCPU(rsp0),%rsp + /* Now emulate a trapframe. Make the 8 byte alignment odd for call. */ + subq $TF_SIZE,%rsp + /* defer TF_RSP till we have a spare register */ + movq %r11,TF_RFLAGS(%rsp) + movq %rcx,TF_RIP(%rsp) /* %rcx original value is in %r10 */ + movq PCPU(scratch_rsp),%r11 /* %r11 already saved */ + movq %r11,TF_RSP(%rsp) /* user stack pointer */ + sti + movq $KUDSEL,TF_SS(%rsp) + movq $KUCSEL,TF_CS(%rsp) + movq $2,TF_ERR(%rsp) + movq %rdi,TF_RDI(%rsp) /* arg 1 */ + movq %rsi,TF_RSI(%rsp) /* arg 2 */ + movq %rdx,TF_RDX(%rsp) /* arg 3 */ + movq %r10,TF_RCX(%rsp) /* arg 4 */ + movq %r8,TF_R8(%rsp) /* arg 5 */ + movq %r9,TF_R9(%rsp) /* arg 6 */ + movq %rax,TF_RAX(%rsp) /* syscall number */ + movq %rbx,TF_RBX(%rsp) /* C preserved */ + movq %rbp,TF_RBP(%rsp) /* C preserved */ + movq %r12,TF_R12(%rsp) /* C preserved */ + movq %r13,TF_R13(%rsp) /* C preserved */ + movq %r14,TF_R14(%rsp) /* C preserved */ + movq %r15,TF_R15(%rsp) /* C preserved */ + FAKE_MCOUNT(TF_RIP(%rsp)) + movq %rsp, %rdi + call syscall2 + /* JGXXX handle AST's? */ + /* restore preserved registers */ + MEXITCOUNT + movq TF_RDI(%rsp),%rdi /* bonus; preserve arg 1 */ + movq TF_RSI(%rsp),%rsi /* bonus: preserve arg 2 */ + movq TF_RDX(%rsp),%rdx /* return value 2 */ + movq TF_RAX(%rsp),%rax /* return value 1 */ + movq TF_RBX(%rsp),%rbx /* C preserved */ + movq TF_RBP(%rsp),%rbp /* C preserved */ + movq TF_R12(%rsp),%r12 /* C preserved */ + movq TF_R13(%rsp),%r13 /* C preserved */ + movq TF_R14(%rsp),%r14 /* C preserved */ + movq TF_R15(%rsp),%r15 /* C preserved */ + movq TF_RFLAGS(%rsp),%r11 /* original %rflags */ + movq TF_RIP(%rsp),%rcx /* original %rip */ + movq TF_RSP(%rsp),%r9 /* user stack pointer */ + movq %r9,%rsp /* original %rsp */ + swapgs + sysretq + MEXITCOUNT + jmp doreti + +/* + * Here for CYA insurance, in case a "syscall" instruction gets + * issued from 32 bit compatability mode. MSR_CSTAR has to point + * to *something* if EFER_SCE is enabled. + */ +IDTVEC(fast_syscall32) + sysret + +/* + * NMI handling is special. + * + * First, NMIs do not respect the state of the processor's RFLAGS.IF + * bit and the NMI handler may be invoked at any time, including when + * the processor is in a critical section with RFLAGS.IF == 0. In + * particular, this means that the processor's GS.base values could be + * inconsistent on entry to the handler, and so we need to read + * MSR_GSBASE to determine if a 'swapgs' is needed. We use '%ebx', a + * C-preserved register, to remember whether to swap GS back on the + * exit path. + * + * Second, the processor treats NMIs specially, blocking further NMIs + * until an 'iretq' instruction is executed. We therefore need to + * execute the NMI handler with interrupts disabled to prevent a + * nested interrupt from executing an 'iretq' instruction and + * inadvertently taking the processor out of NMI mode. + * + * Third, the NMI handler runs on its own stack (tss_ist1), shared + * with the double fault handler. + */ + +IDTVEC(nmi) + subq $TF_RIP,%rsp + movq $(T_NMI),TF_TRAPNO(%rsp) + movq $0,TF_ADDR(%rsp) + movq $0,TF_ERR(%rsp) + movq %rdi,TF_RDI(%rsp) + movq %rsi,TF_RSI(%rsp) + movq %rdx,TF_RDX(%rsp) + movq %rcx,TF_RCX(%rsp) + movq %r8,TF_R8(%rsp) + movq %r9,TF_R9(%rsp) + movq %rax,TF_RAX(%rsp) + movq %rbx,TF_RBX(%rsp) + movq %rbp,TF_RBP(%rsp) + movq %r10,TF_R10(%rsp) + movq %r11,TF_R11(%rsp) + movq %r12,TF_R12(%rsp) + movq %r13,TF_R13(%rsp) + movq %r14,TF_R14(%rsp) + movq %r15,TF_R15(%rsp) + xorl %ebx,%ebx + testb $SEL_RPL_MASK,TF_CS(%rsp) + jnz nmi_needswapgs /* we came from userland */ + movl $MSR_GSBASE,%ecx + rdmsr + cmpl $VM_MAXUSER_ADDRESS >> 32,%edx + jae nmi_calltrap /* GS.base holds a kernel VA */ +nmi_needswapgs: + incl %ebx + swapgs +/* Note: this label is also used by ddb and gdb: */ +nmi_calltrap: + FAKE_MCOUNT(TF_RIP(%rsp)) + movq %rsp, %rdi + call trap + MEXITCOUNT + testl %ebx,%ebx + jz nmi_restoreregs + swapgs +nmi_restoreregs: + movq TF_RDI(%rsp),%rdi + movq TF_RSI(%rsp),%rsi + movq TF_RDX(%rsp),%rdx + movq TF_RCX(%rsp),%rcx + movq TF_R8(%rsp),%r8 + movq TF_R9(%rsp),%r9 + movq TF_RAX(%rsp),%rax + movq TF_RBX(%rsp),%rbx + movq TF_RBP(%rsp),%rbp + movq TF_R10(%rsp),%r10 + movq TF_R11(%rsp),%r11 + movq TF_R12(%rsp),%r12 + movq TF_R13(%rsp),%r13 + movq TF_R14(%rsp),%r14 + movq TF_R15(%rsp),%r15 + addq $TF_RIP,%rsp + iretq + +/* + * This function is what cpu_heavy_restore jumps to after a new process + * is created. The LWKT subsystem switches while holding a critical + * section and we maintain that abstraction here (e.g. because + * cpu_heavy_restore needs it due to PCB_*() manipulation), then get out of + * it before calling the initial function (typically fork_return()) and/or + * returning to user mode. + * + * The MP lock is held on entry, but for processes fork_return(esi) + * releases it. 'doreti' always runs without the MP lock. + */ +ENTRY(fork_trampoline) + movq PCPU(curthread),%rax + subl $TDPRI_CRIT,TD_PRI(%rax) + + /* + * cpu_set_fork_handler intercepts this function call to + * have this call a non-return function to stay in kernel mode. + * + * initproc has its own fork handler, start_init(), which DOES + * return. + * + * %rbx - chaining function (typically fork_return) + * %r12 -> %rdi (argument) + * frame-> %rsi (trap frame) + * + * void (func:rbx)(arg:rdi, trapframe:rsi) + */ + movq %rsp, %rsi /* pass trapframe by reference */ + movq %r12, %rdi /* arg1 */ + call *%rbx /* function */ + + /* cut from syscall */ + + sti + call splz + +#if defined(INVARIANTS) && defined(SMP) + movq PCPU(curthread),%rax + cmpl $0,TD_MPCOUNT(%rax) + je 1f + movq $pmsg4, %rdi + movl TD_MPCOUNT(%rax), %rsi + movq %rbx, %rdx + xorl %eax, %eax + call panic +pmsg4: .asciz "fork_trampoline mpcount %d after calling %p" + /* JG what's the purpose of this alignment and is it enough on amd64? */ + .p2align 2 +1: +#endif + /* + * Return via doreti to handle ASTs. + * + * trapframe is at the top of the stack. + */ +#if JG + pushl $0 /* cpl to restore */ +#endif + MEXITCOUNT + jmp doreti + +/* + * To efficiently implement classification of trap and interrupt handlers + * for profiling, there must be only trap handlers between the labels btrap + * and bintr, and only interrupt handlers between the labels bintr and + * eintr. This is implemented (partly) by including files that contain + * some of the handlers. Before including the files, set up a normal asm + * environment so that the included files doen't need to know that they are + * included. + */ + +#ifdef COMPAT_IA32 + .data + .p2align 4 + .text + SUPERALIGN_TEXT + +#include +#endif + + .data + .p2align 4 + .text + SUPERALIGN_TEXT +MCOUNT_LABEL(bintr) + +#if JG +#include +#endif + +#ifdef DEV_ATPIC + .data + .p2align 4 + .text + SUPERALIGN_TEXT + +#include +#endif + + .text +MCOUNT_LABEL(eintr) + diff --git a/sys/platform/pc64/amd64/exception.c b/sys/platform/pc64/amd64/exception.c deleted file mode 100644 index fbe8865ef9..0000000000 --- a/sys/platform/pc64/amd64/exception.c +++ /dev/null @@ -1,122 +0,0 @@ - -/* - * Copyright (c) 2006 The DragonFly Project. All rights reserved. - * - * This code is derived from software contributed to The DragonFly Project - * by Matthew Dillon - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * 3. Neither the name of The DragonFly Project nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific, prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $DragonFly: src/sys/platform/pc64/amd64/Attic/exception.c,v 1.2 2007/09/24 03:24:45 yanyh Exp $ - */ - -#include "opt_ddb.h" -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include - -int _ucodesel = LSEL(LUCODE_SEL, SEL_UPL); -int _udatasel = LSEL(LUDATA_SEL, SEL_UPL); - -static void exc_segfault(int signo, siginfo_t *info, void *ctx); -#ifdef DDB -static void exc_debugger(int signo, siginfo_t *info, void *ctx); -#endif - -/* - * IPIs are 'fast' interrupts, so we deal with them directly from our - * signal handler. - */ - -#ifdef SMP - -static -void -ipisig(int nada, siginfo_t *info, void *ctxp) -{ - ++mycpu->gd_intr_nesting_level; - if (curthread->td_pri < TDPRI_CRIT) { - curthread->td_pri += TDPRI_CRIT; - lwkt_process_ipiq(); - curthread->td_pri -= TDPRI_CRIT; - } else { - need_ipiq(); - } - --mycpu->gd_intr_nesting_level; -} - -#endif - -void -init_exceptions(void) -{ -} - -/* - * This function handles a segmentation fault. - * - * XXX We assume that trapframe is a subset of ucontext. It is as of - * this writing. - */ -static void -exc_segfault(int signo, siginfo_t *info, void *ctxp) -{ - ucontext_t *ctx = ctxp; - -#if 0 - kprintf("CAUGHT SEGFAULT EIP %08x ERR %08x TRAPNO %d err %d\n", - ctx->uc_mcontext.mc_eip, - ctx->uc_mcontext.mc_err, - ctx->uc_mcontext.mc_trapno & 0xFFFF, - ctx->uc_mcontext.mc_trapno >> 16); - kern_trap((struct trapframe *)&ctx->uc_mcontext.mc_gs); -#endif - splz(); -} - -#ifdef DDB - -static void -exc_debugger(int signo, siginfo_t *info, void *ctx) -{ - Debugger("interrupt from console"); -} - -#endif diff --git a/sys/platform/pc64/amd64/fork_tramp.s b/sys/platform/pc64/amd64/fork_tramp.s deleted file mode 100644 index cdf93d2cfe..0000000000 --- a/sys/platform/pc64/amd64/fork_tramp.s +++ /dev/null @@ -1,108 +0,0 @@ -/*- - * Copyright (c) 1990 The Regents of the University of California. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD: src/sys/i386/i386/exception.s,v 1.65.2.3 2001/08/15 01:23:49 peter Exp $ - * $DragonFly: src/sys/platform/pc64/amd64/Attic/fork_tramp.s,v 1.2 2007/09/24 03:24:45 yanyh Exp $ - */ - -#include "use_npx.h" - -#include -#include -#include -#include -#include - -#include "assym.s" - - .text - -/* - * This function is what cpu_heavy_restore jumps to after a new process - * is created. The LWKT subsystem switches while holding a critical - * section and we maintain that abstraction here (e.g. because - * cpu_heavy_restore needs it due to PCB_*() manipulation), then get out of - * it before calling the initial function (typically fork_return()) and/or - * returning to user mode. - * - * The MP lock is held on entry, but for processes fork_return(esi) - * releases it. 'doreti' always runs without the MP lock. - */ -ENTRY(fork_trampoline) - movl PCPU(curthread),%eax - subl $TDPRI_CRIT,TD_PRI(%eax) - - /* - * cpu_set_fork_handler intercepts this function call to - * have this call a non-return function to stay in kernel mode. - * - * initproc has its own fork handler, start_init(), which DOES - * return. - * - * The function (set in pcb_esi) gets passed two arguments, - * the primary parameter set in pcb_ebx and a pointer to the - * trapframe. - * void (func)(int arg, struct trapframe *frame); - */ -#if 0 - pushl %esp /* pass frame by reference */ - pushl %ebx /* arg1 */ - call *%esi /* function */ - addl $8,%esp - /* cut from syscall */ -#endif - call splz - -#if defined(INVARIANTS) && defined(SMP) - movl PCPU(curthread),%eax - cmpl $0,TD_MPCOUNT(%eax) - je 1f - pushl %esi - pushl TD_MPCOUNT(%eax) - pushl $pmsg4 - call panic -pmsg4: .asciz "fork_trampoline mpcount %d after calling %p" - .p2align 2 -1: -#endif - /* - * Return via doreti to handle ASTs. - */ -#if 0 - MEXITCOUNT - pushl $0 /* if_ppl */ - pushl $0 /* if_vec */ - pushl %esp /* pass by reference */ - call go_user - /* NOT REACHED */ -#endif - diff --git a/sys/platform/pc64/amd64/genassym.c b/sys/platform/pc64/amd64/genassym.c index 1fe50051ca..64303b4636 100644 --- a/sys/platform/pc64/amd64/genassym.c +++ b/sys/platform/pc64/amd64/genassym.c @@ -1,5 +1,6 @@ /*- * Copyright (c) 1982, 1990 The Regents of the University of California. + * Copyright (c) 2008 The DragonFly Project. * All rights reserved. * * This code is derived from software contributed to Berkeley by @@ -35,7 +36,7 @@ * * from: @(#)genassym.c 5.11 (Berkeley) 5/10/91 * $FreeBSD: src/sys/i386/i386/genassym.c,v 1.86.2.3 2002/03/03 05:42:49 nyan Exp $ - * $DragonFly: src/sys/platform/pc64/amd64/genassym.c,v 1.1 2007/09/23 04:29:31 yanyh Exp $ + * $DragonFly: src/sys/platform/pc64/amd64/genassym.c,v 1.2 2008/08/29 17:07:10 dillon Exp $ */ #include @@ -68,12 +69,14 @@ #include #include #include +#include #include ASSYM(P_VMSPACE, offsetof(struct proc, p_vmspace)); ASSYM(VM_PMAP, offsetof(struct vmspace, vm_pmap)); ASSYM(PM_ACTIVE, offsetof(struct pmap, pm_active)); +ASSYM(LWP_VMSPACE, offsetof(struct lwp, lwp_vmspace)); ASSYM(UPAGES, UPAGES); ASSYM(PAGE_SIZE, PAGE_SIZE); ASSYM(NPTEPG, NPTEPG); @@ -86,20 +89,131 @@ ASSYM(PDRSHIFT, PDRSHIFT); ASSYM(USRSTACK, USRSTACK); ASSYM(KERNBASE, KERNBASE); +ASSYM(V_TRAP, offsetof(struct vmmeter, v_trap)); +ASSYM(V_SYSCALL, offsetof(struct vmmeter, v_syscall)); +ASSYM(V_SENDSYS, offsetof(struct vmmeter, v_sendsys)); +ASSYM(V_WAITSYS, offsetof(struct vmmeter, v_waitsys)); +ASSYM(V_INTR, offsetof(struct vmmeter, v_intr)); +ASSYM(V_FORWARDED_INTS, offsetof(struct vmmeter, v_forwarded_ints)); +ASSYM(V_FORWARDED_HITS, offsetof(struct vmmeter, v_forwarded_hits)); +ASSYM(V_FORWARDED_MISSES, offsetof(struct vmmeter, v_forwarded_misses)); + ASSYM(MAXCOMLEN, MAXCOMLEN); ASSYM(EFAULT, EFAULT); ASSYM(ENAMETOOLONG, ENAMETOOLONG); ASSYM(VM_MAXUSER_ADDRESS, VM_MAXUSER_ADDRESS); + ASSYM(GD_CURTHREAD, offsetof(struct mdglobaldata, mi.gd_curthread)); +ASSYM(GD_CNT, offsetof(struct mdglobaldata, mi.gd_cnt)); +ASSYM(GD_CPUID, offsetof(struct mdglobaldata, mi.gd_cpuid)); + +ASSYM(PCB_CR3, offsetof(struct pcb, pcb_cr3)); +ASSYM(PCB_R15, offsetof(struct pcb, pcb_r15)); +ASSYM(PCB_R14, offsetof(struct pcb, pcb_r14)); +ASSYM(PCB_R13, offsetof(struct pcb, pcb_r13)); +ASSYM(PCB_R12, offsetof(struct pcb, pcb_r12)); +ASSYM(PCB_RSI, offsetof(struct pcb, pcb_rsi)); +ASSYM(PCB_RBP, offsetof(struct pcb, pcb_rbp)); +ASSYM(PCB_RSP, offsetof(struct pcb, pcb_rsp)); +ASSYM(PCB_RBX, offsetof(struct pcb, pcb_rbx)); +ASSYM(PCB_RIP, offsetof(struct pcb, pcb_rip)); +ASSYM(PCB_EXT, offsetof(struct pcb, pcb_ext)); +ASSYM(PCB_FULLCTX, PCB_FULLCTX); +ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags)); ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault)); +ASSYM(PCB_FSBASE, offsetof(struct pcb, pcb_fsbase)); +ASSYM(PCB_GSBASE, offsetof(struct pcb, pcb_gsbase)); + +ASSYM(TF_R15, offsetof(struct trapframe, tf_r15)); +ASSYM(TF_R14, offsetof(struct trapframe, tf_r14)); +ASSYM(TF_R13, offsetof(struct trapframe, tf_r13)); +ASSYM(TF_R12, offsetof(struct trapframe, tf_r12)); +ASSYM(TF_R11, offsetof(struct trapframe, tf_r11)); +ASSYM(TF_R10, offsetof(struct trapframe, tf_r10)); +ASSYM(TF_R9, offsetof(struct trapframe, tf_r9)); +ASSYM(TF_R8, offsetof(struct trapframe, tf_r8)); +ASSYM(TF_RDI, offsetof(struct trapframe, tf_rdi)); +ASSYM(TF_RSI, offsetof(struct trapframe, tf_rsi)); +ASSYM(TF_RBP, offsetof(struct trapframe, tf_rbp)); +ASSYM(TF_RBX, offsetof(struct trapframe, tf_rbx)); +ASSYM(TF_RDX, offsetof(struct trapframe, tf_rdx)); +ASSYM(TF_RCX, offsetof(struct trapframe, tf_rcx)); +ASSYM(TF_RAX, offsetof(struct trapframe, tf_rax)); + +ASSYM(TF_TRAPNO, offsetof(struct trapframe, tf_trapno)); +ASSYM(TF_ADDR, offsetof(struct trapframe, tf_addr)); +ASSYM(TF_ERR, offsetof(struct trapframe, tf_err)); +ASSYM(TF_FLAGS, offsetof(struct trapframe, tf_flags)); + +ASSYM(TF_RIP, offsetof(struct trapframe, tf_rip)); +ASSYM(TF_CS, offsetof(struct trapframe, tf_cs)); +ASSYM(TF_RFLAGS, offsetof(struct trapframe, tf_rflags)); +ASSYM(TF_RSP, offsetof(struct trapframe, tf_rsp)); +ASSYM(TF_SS, offsetof(struct trapframe, tf_ss)); +ASSYM(TF_SIZE, sizeof(struct trapframe)); ASSYM(SIGF_HANDLER, offsetof(struct sigframe, sf_ahu.sf_handler)); ASSYM(SIGF_UC, offsetof(struct sigframe, sf_uc)); +ASSYM(TD_PROC, offsetof(struct thread, td_proc)); +ASSYM(TD_LWP, offsetof(struct thread, td_lwp)); +ASSYM(TD_PCB, offsetof(struct thread, td_pcb)); +ASSYM(TD_SP, offsetof(struct thread, td_sp)); +ASSYM(TD_PRI, offsetof(struct thread, td_pri)); +ASSYM(TD_MACH, offsetof(struct thread, td_mach)); +ASSYM(TD_WCHAN, offsetof(struct thread, td_wchan)); +ASSYM(TD_NEST_COUNT, offsetof(struct thread, td_nest_count)); +#ifdef SMP +ASSYM(TD_MPCOUNT, offsetof(struct thread, td_mpcount)); +#endif +ASSYM(TD_FLAGS, offsetof(struct thread, td_flags)); +ASSYM(TDF_RUNNING, TDF_RUNNING); +ASSYM(TDF_USINGFP, TDF_USINGFP); +ASSYM(TDF_KERNELFP, TDF_KERNELFP); + +ASSYM(FIRST_SOFTINT, FIRST_SOFTINT); +ASSYM(MDGLOBALDATA_BASEALLOC_PAGES, MDGLOBALDATA_BASEALLOC_PAGES); + +ASSYM(GD_PRIVATE_TSS, offsetof(struct mdglobaldata, gd_private_tss)); +ASSYM(GD_SCRATCH_RSP, offsetof(struct mdglobaldata, gd_scratch_rsp)); +ASSYM(GD_RSP0, offsetof(struct mdglobaldata, gd_rsp0)); +ASSYM(GD_USER_FS, offsetof(struct mdglobaldata, gd_user_fs)); +ASSYM(GD_USER_GS, offsetof(struct mdglobaldata, gd_user_gs)); +ASSYM(GD_INTR_NESTING_LEVEL, offsetof(struct mdglobaldata, mi.gd_intr_nesting_level)); + +ASSYM(GD_FPENDING, offsetof(struct mdglobaldata, gd_fpending)); +ASSYM(GD_IPENDING, offsetof(struct mdglobaldata, gd_ipending)); +ASSYM(GD_SPENDING, offsetof(struct mdglobaldata, gd_spending)); +ASSYM(GD_COMMON_TSS, offsetof(struct mdglobaldata, gd_common_tss)); +ASSYM(GD_COMMON_TSSD, offsetof(struct mdglobaldata, gd_common_tssd)); +ASSYM(GD_TSS_GDT, offsetof(struct mdglobaldata, gd_tss_gdt)); +ASSYM(GD_NPXTHREAD, offsetof(struct mdglobaldata, gd_npxthread)); +ASSYM(GD_FPU_LOCK, offsetof(struct mdglobaldata, gd_fpu_lock)); +ASSYM(GD_SAVEFPU, offsetof(struct mdglobaldata, gd_savefpu)); +ASSYM(GD_OTHER_CPUS, offsetof(struct mdglobaldata, gd_other_cpus)); +ASSYM(GD_SS_EFLAGS, offsetof(struct mdglobaldata, gd_ss_eflags)); +ASSYM(GD_REQFLAGS, offsetof(struct mdglobaldata, mi.gd_reqflags)); + +ASSYM(RQF_IPIQ, RQF_IPIQ); +ASSYM(RQF_INTPEND, RQF_INTPEND); +ASSYM(RQF_AST_OWEUPC, RQF_AST_OWEUPC); +ASSYM(RQF_AST_SIGNAL, RQF_AST_SIGNAL); +ASSYM(RQF_AST_USER_RESCHED, RQF_AST_USER_RESCHED); +ASSYM(RQF_AST_LWKT_RESCHED, RQF_AST_LWKT_RESCHED); +ASSYM(RQF_AST_UPCALL, RQF_AST_UPCALL); +ASSYM(RQF_AST_MASK, RQF_AST_MASK); + ASSYM(KCSEL, GSEL(GCODE_SEL, SEL_KPL)); ASSYM(KDSEL, GSEL(GDATA_SEL, SEL_KPL)); +ASSYM(KUCSEL, GSEL(GUCODE_SEL, SEL_UPL)); +ASSYM(KUDSEL, GSEL(GUDATA_SEL, SEL_UPL)); +ASSYM(SEL_RPL_MASK, SEL_RPL_MASK); -ASSYM(TD_PRI, offsetof(struct thread, td_pri)); +ASSYM(MSR_GSBASE, MSR_GSBASE); +ASSYM(MSR_KGSBASE, MSR_KGSBASE); +ASSYM(MSR_FSBASE, MSR_FSBASE); + +ASSYM(MACHINTR_INTREN, offsetof(struct machintr_abi, intren)); ASSYM(TDPRI_CRIT, TDPRI_CRIT); ASSYM(TDPRI_INT_SUPPORT, TDPRI_INT_SUPPORT); diff --git a/sys/platform/pc64/amd64/global.s b/sys/platform/pc64/amd64/global.s index 2e9583a1a9..a6d594e390 100644 --- a/sys/platform/pc64/amd64/global.s +++ b/sys/platform/pc64/amd64/global.s @@ -1,5 +1,6 @@ /*- * Copyright (c) Peter Wemm + * Copyright (c) 2008 The DragonFly Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -24,7 +25,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/i386/globals.s,v 1.13.2.1 2000/05/16 06:58:06 dillon Exp $ - * $DragonFly: src/sys/platform/pc64/amd64/global.s,v 1.2 2007/09/24 03:24:45 yanyh Exp $ + * $DragonFly: src/sys/platform/pc64/amd64/global.s,v 1.3 2008/08/29 17:07:10 dillon Exp $ */ #include @@ -41,6 +42,9 @@ * segment. */ .data + .globl CPU_prvspace, lapic + .set CPU_prvspace,(MPPTDI << PDRSHIFT) + .set lapic,CPU_prvspace + (NPTEPG-1) * PAGE_SIZE .globl globaldata .set globaldata,0 @@ -75,9 +79,12 @@ .globl gd_CMAP1, gd_CMAP2, gd_CMAP3, gd_PMAP1 .globl gd_CADDR1, gd_CADDR2, gd_CADDR3, gd_PADDR1 .globl gd_spending, gd_ipending, gd_fpending - .globl gd_cnt + .globl gd_cnt, gd_private_tss + .globl gd_scratch_rsp, gd_rsp0 + .globl gd_user_fs, gd_user_gs .set gd_cpuid,globaldata + GD_CPUID + .set gd_private_tss,globaldata + GD_PRIVATE_TSS .set gd_other_cpus,globaldata + GD_OTHER_CPUS .set gd_ss_eflags,globaldata + GD_SS_EFLAGS .set gd_intr_nesting_level,globaldata + GD_INTR_NESTING_LEVEL @@ -93,4 +100,8 @@ .set gd_ipending,globaldata + GD_IPENDING .set gd_spending,globaldata + GD_SPENDING .set gd_cnt,globaldata + GD_CNT + .set gd_scratch_rsp,globaldata + GD_SCRATCH_RSP + .set gd_rsp0,globaldata + GD_RSP0 + .set gd_user_fs,globaldata + GD_USER_FS + .set gd_user_gs,globaldata + GD_USER_GS diff --git a/sys/platform/pc64/amd64/globaldata.c b/sys/platform/pc64/amd64/globaldata.c index 5a63261329..71a06ced66 100644 --- a/sys/platform/pc64/amd64/globaldata.c +++ b/sys/platform/pc64/amd64/globaldata.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 The DragonFly Project. All rights reserved. + * Copyright (c) 2008 The DragonFly Project. All rights reserved. * * This code is derived from software contributed to The DragonFly Project * by Matthew Dillon @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/platform/pc64/amd64/globaldata.c,v 1.2 2007/09/24 03:24:45 yanyh Exp $ + * $DragonFly: src/sys/platform/pc64/amd64/globaldata.c,v 1.3 2008/08/29 17:07:10 dillon Exp $ */ #include @@ -46,6 +46,7 @@ #include #include +#if JG struct globaldata * globaldata_find(int cpu) { @@ -60,4 +61,5 @@ int is_globaldata_space(vm_offset_t saddr, vm_offset_t eaddr) { } +#endif diff --git a/sys/platform/pc64/amd64/identcpu.c b/sys/platform/pc64/amd64/identcpu.c new file mode 100644 index 0000000000..22d953ed4e --- /dev/null +++ b/sys/platform/pc64/amd64/identcpu.c @@ -0,0 +1,557 @@ +/*- + * Copyright (c) 1992 Terrence R. Lambert. + * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. + * Copyright (c) 1997 KATO Takenori. + * Copyright (c) 2008 The DragonFly Project. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * William Jolitz. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: Id: machdep.c,v 1.193 1996/06/18 01:22:04 bde Exp + * $DragonFly: src/sys/platform/pc64/amd64/identcpu.c,v 1.1 2008/08/29 17:07:10 dillon Exp $ + */ + +#include "opt_cpu.h" + +#include +#include +#if JG +#include +#endif +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#if JG +#include +#endif + +/* XXX - should be in header file: */ +void printcpuinfo(void); +void identify_cpu(void); +void earlysetcpuclass(void); +void panicifcpuunsupported(void); + +static void print_AMD_info(void); +static void print_AMD_assoc(int i); + +int cpu_class; +char machine[] = "amd64"; +SYSCTL_STRING(_hw, HW_MACHINE, machine, CTLFLAG_RD, + machine, 0, "Machine class"); + +static char cpu_model[128]; +SYSCTL_STRING(_hw, HW_MODEL, model, CTLFLAG_RD, + cpu_model, 0, "Machine model"); + +static int hw_clockrate; +SYSCTL_INT(_hw, OID_AUTO, clockrate, CTLFLAG_RD, + &hw_clockrate, 0, "CPU instruction clock rate"); + +static char cpu_brand[48]; + +static struct { + char *cpu_name; + int cpu_class; +} amd64_cpus[] = { + { "Clawhammer", CPUCLASS_K8 }, /* CPU_CLAWHAMMER */ + { "Sledgehammer", CPUCLASS_K8 }, /* CPU_SLEDGEHAMMER */ +}; + +int cpu_cores; +int cpu_logical; + + +extern int pq_l2size; +extern int pq_l2nways; + +void +printcpuinfo(void) +{ + u_int regs[4], i; + char *brand; + + cpu_class = amd64_cpus[cpu].cpu_class; + kprintf("CPU: "); + strncpy(cpu_model, amd64_cpus[cpu].cpu_name, sizeof (cpu_model)); + + /* Check for extended CPUID information and a processor name. */ + if (cpu_exthigh >= 0x80000004) { + brand = cpu_brand; + for (i = 0x80000002; i < 0x80000005; i++) { + do_cpuid(i, regs); + memcpy(brand, regs, sizeof(regs)); + brand += sizeof(regs); + } + } + + if (strcmp(cpu_vendor, "GenuineIntel") == 0) { + /* Please make up your mind folks! */ + strcat(cpu_model, "EM64T"); + } else if (strcmp(cpu_vendor, "AuthenticAMD") == 0) { + /* + * Values taken from AMD Processor Recognition + * http://www.amd.com/K6/k6docs/pdf/20734g.pdf + * (also describes ``Features'' encodings. + */ + strcpy(cpu_model, "AMD "); + switch (cpu_id & 0xF00) { + case 0xf00: + strcat(cpu_model, "AMD64 Processor"); + break; + default: + strcat(cpu_model, "Unknown"); + break; + } + } + + /* + * Replace cpu_model with cpu_brand minus leading spaces if + * we have one. + */ + brand = cpu_brand; + while (*brand == ' ') + ++brand; + if (*brand != '\0') + strcpy(cpu_model, brand); + + kprintf("%s (", cpu_model); + switch(cpu_class) { + case CPUCLASS_K8: +#if JG + hw_clockrate = (tsc_freq + 5000) / 1000000; + kprintf("%jd.%02d-MHz ", + (intmax_t)(tsc_freq + 4999) / 1000000, + (u_int)((tsc_freq + 4999) / 10000) % 100); +#endif + kprintf("K8"); + break; + default: + kprintf("Unknown"); /* will panic below... */ + } + kprintf("-class CPU)\n"); + if(*cpu_vendor) + kprintf(" Origin = \"%s\"",cpu_vendor); + if(cpu_id) + kprintf(" Id = 0x%x", cpu_id); + + if (strcmp(cpu_vendor, "GenuineIntel") == 0 || + strcmp(cpu_vendor, "AuthenticAMD") == 0) { + kprintf(" Stepping = %u", cpu_id & 0xf); + if (cpu_high > 0) { + u_int cmp = 1, htt = 1; + + /* + * Here we should probably set up flags indicating + * whether or not various features are available. + * The interesting ones are probably VME, PSE, PAE, + * and PGE. The code already assumes without bothering + * to check that all CPUs >= Pentium have a TSC and + * MSRs. + */ + kprintf("\n Features=0x%b", cpu_feature, + "\020" + "\001FPU" /* Integral FPU */ + "\002VME" /* Extended VM86 mode support */ + "\003DE" /* Debugging Extensions (CR4.DE) */ + "\004PSE" /* 4MByte page tables */ + "\005TSC" /* Timestamp counter */ + "\006MSR" /* Machine specific registers */ + "\007PAE" /* Physical address extension */ + "\010MCE" /* Machine Check support */ + "\011CX8" /* CMPEXCH8 instruction */ + "\012APIC" /* SMP local APIC */ + "\013oldMTRR" /* Previous implementation of MTRR */ + "\014SEP" /* Fast System Call */ + "\015MTRR" /* Memory Type Range Registers */ + "\016PGE" /* PG_G (global bit) support */ + "\017MCA" /* Machine Check Architecture */ + "\020CMOV" /* CMOV instruction */ + "\021PAT" /* Page attributes table */ + "\022PSE36" /* 36 bit address space support */ + "\023PN" /* Processor Serial number */ + "\024CLFLUSH" /* Has the CLFLUSH instruction */ + "\025" + "\026DTS" /* Debug Trace Store */ + "\027ACPI" /* ACPI support */ + "\030MMX" /* MMX instructions */ + "\031FXSR" /* FXSAVE/FXRSTOR */ + "\032SSE" /* Streaming SIMD Extensions */ + "\033SSE2" /* Streaming SIMD Extensions #2 */ + "\034SS" /* Self snoop */ + "\035HTT" /* Hyperthreading (see EBX bit 16-23) */ + "\036TM" /* Thermal Monitor clock slowdown */ + "\037IA64" /* CPU can execute IA64 instructions */ + "\040PBE" /* Pending Break Enable */ + ); + + if (cpu_feature2 != 0) { + kprintf("\n Features2=0x%b", cpu_feature2, + "\020" + "\001SSE3" /* SSE3 */ + "\002" + "\003DTES64" /* 64-bit Debug Trace */ + "\004MON" /* MONITOR/MWAIT Instructions */ + "\005DS_CPL" /* CPL Qualified Debug Store */ + "\006VMX" /* Virtual Machine Extensions */ + "\007SMX" /* Safer Mode Extensions */ + "\010EST" /* Enhanced SpeedStep */ + "\011TM2" /* Thermal Monitor 2 */ + "\012SSSE3" /* SSSE3 */ + "\013CNXT-ID" /* L1 context ID available */ + "\014" + "\015" + "\016CX16" /* CMPXCHG16B Instruction */ + "\017xTPR" /* Send Task Priority Messages*/ + "\020PDCM" /* Perf/Debug Capability MSR */ + "\021" + "\022" + "\023DCA" /* Direct Cache Access */ + "\024SSE4.1" + "\025SSE4.2" + "\026x2APIC" /* xAPIC Extensions */ + "\027" + "\030POPCNT" + "\031" + "\032" + "\033" + "\034" + "\035" + "\036" + "\037" + "\040" + ); + } + + /* + * AMD64 Architecture Programmer's Manual Volume 3: + * General-Purpose and System Instructions + * http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/24594.pdf + * + * IA-32 Intel Architecture Software Developer's Manual, + * Volume 2A: Instruction Set Reference, A-M + * ftp://download.intel.com/design/Pentium4/manuals/25366617.pdf + */ + if (amd_feature != 0) { + kprintf("\n AMD Features=0x%b", amd_feature, + "\020" /* in hex */ + "\001" /* Same */ + "\002" /* Same */ + "\003" /* Same */ + "\004" /* Same */ + "\005" /* Same */ + "\006" /* Same */ + "\007" /* Same */ + "\010" /* Same */ + "\011" /* Same */ + "\012" /* Same */ + "\013" /* Undefined */ + "\014SYSCALL" /* Have SYSCALL/SYSRET */ + "\015" /* Same */ + "\016" /* Same */ + "\017" /* Same */ + "\020" /* Same */ + "\021" /* Same */ + "\022" /* Same */ + "\023" /* Reserved, unknown */ + "\024MP" /* Multiprocessor Capable */ + "\025NX" /* Has EFER.NXE, NX */ + "\026" /* Undefined */ + "\027MMX+" /* AMD MMX Extensions */ + "\030" /* Same */ + "\031" /* Same */ + "\032FFXSR" /* Fast FXSAVE/FXRSTOR */ + "\033Page1GB" /* 1-GB large page support */ + "\034RDTSCP" /* RDTSCP */ + "\035" /* Undefined */ + "\036LM" /* 64 bit long mode */ + "\0373DNow!+" /* AMD 3DNow! Extensions */ + "\0403DNow!" /* AMD 3DNow! */ + ); + } + + if (amd_feature2 != 0) { + kprintf("\n AMD Features2=0x%b", amd_feature2, + "\020" + "\001LAHF" /* LAHF/SAHF in long mode */ + "\002CMP" /* CMP legacy */ + "\003SVM" /* Secure Virtual Mode */ + "\004ExtAPIC" /* Extended APIC register */ + "\005CR8" /* CR8 in legacy mode */ + "\006" + "\007" + "\010" + "\011Prefetch" /* 3DNow! Prefetch/PrefetchW */ + "\012" + "\013" + "\014" + "\015" + "\016" + "\017" + "\020" + "\021" + "\022" + "\023" + "\024" + "\025" + "\026" + "\027" + "\030" + "\031" + "\032" + "\033" + "\034" + "\035" + "\036" + "\037" + "\040" + ); + } + + if (cpu_feature & CPUID_HTT && strcmp(cpu_vendor, + "AuthenticAMD") == 0) + cpu_feature &= ~CPUID_HTT; + + /* + * If this CPU supports HTT or CMP then mention the + * number of physical/logical cores it contains. + */ + if (cpu_feature & CPUID_HTT) + htt = (cpu_procinfo & CPUID_HTT_CORES) >> 16; + if (strcmp(cpu_vendor, "AuthenticAMD") == 0 && + (amd_feature2 & AMDID2_CMP)) + cmp = (cpu_procinfo2 & AMDID_CMP_CORES) + 1; + else if (strcmp(cpu_vendor, "GenuineIntel") == 0 && + (cpu_high >= 4)) { + cpuid_count(4, 0, regs); + if ((regs[0] & 0x1f) != 0) + cmp = ((regs[0] >> 26) & 0x3f) + 1; + } + cpu_cores = cmp; + cpu_logical = htt / cmp; + if (cmp > 1) + kprintf("\n Cores per package: %d", cmp); + if ((htt / cmp) > 1) + kprintf("\n Logical CPUs per core: %d", + cpu_logical); + } + } + /* Avoid ugly blank lines: only print newline when we have to. */ + if (*cpu_vendor || cpu_id) + kprintf("\n"); + + if (!bootverbose) + return; + + if (strcmp(cpu_vendor, "AuthenticAMD") == 0) + print_AMD_info(); +} + +void +panicifcpuunsupported(void) +{ + +#ifndef HAMMER_CPU +#error "You need to specify a cpu type" +#endif + /* + * Now that we have told the user what they have, + * let them know if that machine type isn't configured. + */ + switch (cpu_class) { + case CPUCLASS_X86: +#ifndef HAMMER_CPU + case CPUCLASS_K8: +#endif + panic("CPU class not configured"); + default: + break; + } +} + + +#if JG +/* Update TSC freq with the value indicated by the caller. */ +static void +tsc_freq_changed(void *arg, const struct cf_level *level, int status) +{ + /* If there was an error during the transition, don't do anything. */ + if (status != 0) + return; + + /* Total setting for this level gives the new frequency in MHz. */ + hw_clockrate = level->total_set.freq; +} + +EVENTHANDLER_DEFINE(cpufreq_post_change, tsc_freq_changed, NULL, + EVENTHANDLER_PRI_ANY); +#endif + +/* + * Final stage of CPU identification. -- Should I check TI? + */ +void +identify_cpu(void) +{ + u_int regs[4]; + + do_cpuid(0, regs); + cpu_high = regs[0]; + ((u_int *)&cpu_vendor)[0] = regs[1]; + ((u_int *)&cpu_vendor)[1] = regs[3]; + ((u_int *)&cpu_vendor)[2] = regs[2]; + cpu_vendor[12] = '\0'; + + do_cpuid(1, regs); + cpu_id = regs[0]; + cpu_procinfo = regs[1]; + cpu_feature = regs[3]; + cpu_feature2 = regs[2]; + + if (strcmp(cpu_vendor, "GenuineIntel") == 0 || + strcmp(cpu_vendor, "AuthenticAMD") == 0) { + do_cpuid(0x80000000, regs); + cpu_exthigh = regs[0]; + } + if (cpu_exthigh >= 0x80000001) { + do_cpuid(0x80000001, regs); + amd_feature = regs[3] & ~(cpu_feature & 0x0183f3ff); + amd_feature2 = regs[2]; + } + if (cpu_exthigh >= 0x80000008) { + do_cpuid(0x80000008, regs); + cpu_procinfo2 = regs[2]; + } + + /* XXX */ + cpu = CPU_CLAWHAMMER; +} + +static void +print_AMD_assoc(int i) +{ + if (i == 255) + kprintf(", fully associative\n"); + else + kprintf(", %d-way associative\n", i); +} + +static void +print_AMD_l2_assoc(int i) +{ + switch (i & 0x0f) { + case 0: kprintf(", disabled/not present\n"); break; + case 1: kprintf(", direct mapped\n"); break; + case 2: kprintf(", 2-way associative\n"); break; + case 4: kprintf(", 4-way associative\n"); break; + case 6: kprintf(", 8-way associative\n"); break; + case 8: kprintf(", 16-way associative\n"); break; + case 15: kprintf(", fully associative\n"); break; + default: kprintf(", reserved configuration\n"); break; + } +} + +static void +print_AMD_info(void) +{ + u_int regs[4]; + + if (cpu_exthigh < 0x80000005) + return; + + do_cpuid(0x80000005, regs); + kprintf("L1 2MB data TLB: %d entries", (regs[0] >> 16) & 0xff); + print_AMD_assoc(regs[0] >> 24); + + kprintf("L1 2MB instruction TLB: %d entries", regs[0] & 0xff); + print_AMD_assoc((regs[0] >> 8) & 0xff); + + kprintf("L1 4KB data TLB: %d entries", (regs[1] >> 16) & 0xff); + print_AMD_assoc(regs[1] >> 24); + + kprintf("L1 4KB instruction TLB: %d entries", regs[1] & 0xff); + print_AMD_assoc((regs[1] >> 8) & 0xff); + + kprintf("L1 data cache: %d kbytes", regs[2] >> 24); + kprintf(", %d bytes/line", regs[2] & 0xff); + kprintf(", %d lines/tag", (regs[2] >> 8) & 0xff); + print_AMD_assoc((regs[2] >> 16) & 0xff); + + kprintf("L1 instruction cache: %d kbytes", regs[3] >> 24); + kprintf(", %d bytes/line", regs[3] & 0xff); + kprintf(", %d lines/tag", (regs[3] >> 8) & 0xff); + print_AMD_assoc((regs[3] >> 16) & 0xff); + + if (cpu_exthigh >= 0x80000006) { + do_cpuid(0x80000006, regs); + if ((regs[0] >> 16) != 0) { + kprintf("L2 2MB data TLB: %d entries", + (regs[0] >> 16) & 0xfff); + print_AMD_l2_assoc(regs[0] >> 28); + kprintf("L2 2MB instruction TLB: %d entries", + regs[0] & 0xfff); + print_AMD_l2_assoc((regs[0] >> 28) & 0xf); + } else { + kprintf("L2 2MB unified TLB: %d entries", + regs[0] & 0xfff); + print_AMD_l2_assoc((regs[0] >> 28) & 0xf); + } + if ((regs[1] >> 16) != 0) { + kprintf("L2 4KB data TLB: %d entries", + (regs[1] >> 16) & 0xfff); + print_AMD_l2_assoc(regs[1] >> 28); + + kprintf("L2 4KB instruction TLB: %d entries", + (regs[1] >> 16) & 0xfff); + print_AMD_l2_assoc((regs[1] >> 28) & 0xf); + } else { + kprintf("L2 4KB unified TLB: %d entries", + (regs[1] >> 16) & 0xfff); + print_AMD_l2_assoc((regs[1] >> 28) & 0xf); + } + kprintf("L2 unified cache: %d kbytes", regs[2] >> 16); + kprintf(", %d bytes/line", regs[2] & 0xff); + kprintf(", %d lines/tag", (regs[2] >> 8) & 0x0f); + print_AMD_l2_assoc((regs[2] >> 12) & 0x0f); + } +} diff --git a/sys/platform/pc64/amd64/in_cksum2.s b/sys/platform/pc64/amd64/in_cksum2.s index d7492cc922..2378b07d64 100644 --- a/sys/platform/pc64/amd64/in_cksum2.s +++ b/sys/platform/pc64/amd64/in_cksum2.s @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003,2004 The DragonFly Project. All rights reserved. + * Copyright (c) 2003,2004,2008 The DragonFly Project. All rights reserved. * * This code is derived from software contributed to The DragonFly Project * by Matthew Dillon @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/platform/pc64/amd64/in_cksum2.s,v 1.2 2007/09/24 03:24:45 yanyh Exp $ + * $DragonFly: src/sys/platform/pc64/amd64/in_cksum2.s,v 1.3 2008/08/29 17:07:10 dillon Exp $ */ #include /* miscellaneous asm macros */ @@ -58,29 +58,29 @@ */ .p2align 4 ENTRY(asm_ones32) - movl 4(%esp),%edx /* %edx = buffer pointer */ - movl 8(%esp),%ecx /* %ecx = counter */ - subl %eax,%eax /* %eax = checksum */ + movq %rdi,%rdx /* %rdx = buffer pointer */ + movl %esi,%ecx /* %ecx = counter */ + xorl %eax,%eax /* %eax = checksum */ cmpl $5,%ecx jl 2f 1: subl $5,%ecx - addl (%edx),%eax - adcl 4(%edx),%eax - adcl 8(%edx),%eax - adcl 12(%edx),%eax - adcl 16(%edx),%eax + addl (%rdx),%eax + adcl 4(%rdx),%eax + adcl 8(%rdx),%eax + adcl 12(%rdx),%eax + adcl 16(%rdx),%eax adcl $0,%eax - addl $20,%edx + addq $20,%rdx cmpl $5,%ecx jge 1b 2: testl %ecx,%ecx je 4f 3: - addl (%edx),%eax + addl (%rdx),%eax adcl $0,%eax - addl $4,%edx + addq $4,%rdx decl %ecx jnz 3b 4: diff --git a/sys/platform/pc64/amd64/init.c b/sys/platform/pc64/amd64/init.c index 9ce29ab553..89d78daefd 100644 --- a/sys/platform/pc64/amd64/init.c +++ b/sys/platform/pc64/amd64/init.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 The DragonFly Project. All rights reserved. + * Copyright (c) 2006-2008 The DragonFly Project. All rights reserved. * * This code is derived from software contributed to The DragonFly Project * by Matthew Dillon @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/platform/pc64/amd64/init.c,v 1.2 2007/09/24 03:24:45 yanyh Exp $ + * $DragonFly: src/sys/platform/pc64/amd64/init.c,v 1.3 2008/08/29 17:07:10 dillon Exp $ */ #include @@ -64,27 +64,31 @@ #include #include +#if JG struct privatespace CPU_prvspace[]; +#endif -vm_paddr_t phys_avail[16]; -vm_paddr_t Maxmem; +extern vm_paddr_t phys_avail[16]; +extern vm_paddr_t Maxmem; vm_paddr_t Maxmem_bytes; int MemImageFd = -1; int DiskNum; int NetifNum; char *pid_file; -struct msgbuf *msgbufp; -caddr_t ptvmmap; +extern struct msgbuf *msgbufp; +extern caddr_t ptvmmap; +#if JG u_int tsc_present; vm_offset_t KvaStart; vm_offset_t KvaEnd; vm_offset_t KvaSize; vm_offset_t virtual_start; -vm_offset_t virtual_end; -vm_offset_t kernel_vm_end; +#endif +extern vm_offset_t virtual_end; +extern vm_offset_t kernel_vm_end; vm_offset_t crashdumpmap; -vm_offset_t clean_sva; -vm_offset_t clean_eva; +extern vm_offset_t clean_sva; +extern vm_offset_t clean_eva; static void init_sys_memory(char *imageFile); static void init_kern_memory(void); @@ -166,6 +170,7 @@ usage(const char *ctl, ...) { } +#if JG void cpu_reset(void) { @@ -175,3 +180,4 @@ void cpu_halt(void) { } +#endif diff --git a/sys/platform/pc64/amd64/initcpu.c b/sys/platform/pc64/amd64/initcpu.c new file mode 100644 index 0000000000..1c4ce0b778 --- /dev/null +++ b/sys/platform/pc64/amd64/initcpu.c @@ -0,0 +1,84 @@ +/*- + * Copyright (c) KATO Takenori, 1997, 1998. + * Copyright (c) 2008 The DragonFly Project. + * + * All rights reserved. Unpublished rights reserved under the copyright + * laws of Japan. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer as + * the first lines of this file unmodified. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $DragonFly: src/sys/platform/pc64/amd64/initcpu.c,v 1.1 2008/08/29 17:07:10 dillon Exp $ + */ + +#include "opt_cpu.h" + +#include +#include +#include +#include + +#include +#include +#include + +#include +#include + +static int hw_instruction_sse; +SYSCTL_INT(_hw, OID_AUTO, instruction_sse, CTLFLAG_RD, + &hw_instruction_sse, 0, "SIMD/MMX2 instructions available in CPU"); + +int cpu; /* Are we 386, 386sx, 486, etc? */ +u_int cpu_feature; /* Feature flags */ +u_int cpu_feature2; /* Feature flags */ +u_int amd_feature; /* AMD feature flags */ +u_int amd_feature2; /* AMD feature flags */ +u_int cpu_high; /* Highest arg to CPUID */ +u_int cpu_exthigh; /* Highest arg to extended CPUID */ +u_int cpu_id; /* Stepping ID */ +u_int cpu_procinfo; /* HyperThreading Info / Brand Index / CLFUSH */ +u_int cpu_procinfo2; /* Multicore info */ +char cpu_vendor[20]; /* CPU Origin code */ +u_int cpu_fxsr; /* SSE enabled */ +u_int cpu_mxcsr_mask; /* Valid bits in mxcsr */ + +/* + * Initialize CPU control registers + */ +void +initializecpu(void) +{ + uint64_t msr; + + if ((cpu_feature & CPUID_XMM) && (cpu_feature & CPUID_FXSR)) { + load_cr4(rcr4() | CR4_FXSR | CR4_XMM); + cpu_fxsr = hw_instruction_sse = 1; + } + if ((amd_feature & AMDID_NX) != 0) { + msr = rdmsr(MSR_EFER) | EFER_NXE; + wrmsr(MSR_EFER, msr); +#if JG + pg_nx = PG_NX; +#endif + } +} diff --git a/sys/platform/pc64/amd64/ipl.s b/sys/platform/pc64/amd64/ipl.s new file mode 100644 index 0000000000..839dbf1a12 --- /dev/null +++ b/sys/platform/pc64/amd64/ipl.s @@ -0,0 +1,534 @@ +/* + * Copyright (c) 2008 The DragonFly Project. All rights reserved. + * + * This code is derived from software contributed to The DragonFly Project + * by Matthew Dillon + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of The DragonFly Project nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific, prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * --- + * + * Copyright (c) 1989, 1990 William F. Jolitz. + * Copyright (c) 1990 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * William Jolitz. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ipl.s + * + * $FreeBSD: src/sys/i386/isa/ipl.s,v 1.32.2.3 2002/05/16 16:03:56 bde Exp $ + * $DragonFly: src/sys/platform/pc64/amd64/ipl.s,v 1.1 2008/08/29 17:07:10 dillon Exp $ + */ + +#include +#include +#include +#include +#include +#include + +#include "assym.s" + +/* + * AT/386 + * Vector interrupt control section + * + * ipending - Pending interrupts (set when a masked interrupt occurs) + * spending - Pending software interrupts + */ + .data + ALIGN_DATA + + .globl fastunpend_count +fastunpend_count: .long 0 + + .text + SUPERALIGN_TEXT + + /* + * GENERAL NOTES + * + * - fast interrupts are always called with a critical section + * held + * + * - we release our critical section when scheduling interrupt + * or softinterrupt threads in order so they can preempt + * (unless we are called manually from a critical section, in + * which case there will still be a critical section and + * they won't preempt anyway). + * + * - TD_NEST_COUNT prevents splz from nesting too deeply within + * itself. It is *not* actually an interrupt nesting count. + * PCPU(intr_nesting_level) is an interrupt nesting count. + * + * - We have to be careful in regards to local interrupts + * occuring simultaniously with our doreti and splz + * processing. + */ + + /* + * DORETI + * + * Handle return from interrupts, traps and syscalls. This function + * checks the cpl for unmasked pending interrupts (fast, normal, or + * soft) and schedules them if appropriate, then irets. + * + * If we are in a critical section we cannot run any pending ints + * nor can be play with mp_lock. + * + * The stack contains a trapframe at the start of doreti. + */ + SUPERALIGN_TEXT + .globl doreti + .type doreti,@function +doreti: + FAKE_MCOUNT(bintr) /* init "from" bintr -> doreti */ + movq $0,%rax /* irq mask unavailable due to BGL */ + movq PCPU(curthread),%rbx + cli /* interlock with TDPRI_CRIT */ + cmpl $0,PCPU(reqflags) /* short cut if nothing to do */ + je 5f + cmpl $TDPRI_CRIT,TD_PRI(%rbx) /* can't unpend if in critical sec */ + jge 5f + addl $TDPRI_CRIT,TD_PRI(%rbx) /* force all ints to pending */ +doreti_next: + sti /* allow new interrupts */ + movl %eax,%ecx /* irq mask unavailable due to BGL */ + notl %ecx + cli /* disallow YYY remove */ +#ifdef SMP + testl $RQF_IPIQ,PCPU(reqflags) + jnz doreti_ipiq +#endif + testl PCPU(fpending),%ecx /* check for an unmasked fast int */ + jnz doreti_fast + + testl PCPU(ipending),%ecx /* check for an unmasked slow int */ + jnz doreti_intr + + movl PCPU(spending),%ecx /* check for a pending software int */ + cmpl $0,%ecx + jnz doreti_soft + + testl $RQF_AST_MASK,PCPU(reqflags) /* any pending ASTs? */ + jz 2f + + /* ASTs are only applicable when returning to userland */ + testb $SEL_RPL_MASK,TF_CS(%rsp) + jnz doreti_ast +2: + /* + * Nothing left to do, finish up. Interrupts are still disabled. + * %eax contains the mask of IRQ's that are not available due to + * BGL requirements. We can only clear RQF_INTPEND if *ALL* pending + * interrupts have been processed. + */ + subl $TDPRI_CRIT,TD_PRI(%rbx) /* interlocked with cli */ + testl %eax,%eax + jnz 5f + andl $~RQF_INTPEND,PCPU(reqflags) +5: + MEXITCOUNT + + /* + * Restore register and iret. iret can fault on %rip (which is + * really stupid). If this occurs we re-fault and vector to + * doreti_iret_fault(). + * + * ... + * can be set from user mode, this can result in a kernel mode + * exception. The trap code will revector to the *_fault code + * which then sets up a T_PROTFLT signal. If the signal is + * sent to userland, sendsig() will automatically clean up all + * the segment registers to avoid a loop. + */ + .globl doreti_iret + .globl doreti_syscall_ret +doreti_syscall_ret: + POP_FRAME /* registers and %gs (+cli) */ + /* special global also used by exception.S */ +doreti_iret: + iretq + + /* + * doreti_iret_fault. Alternative return code for + * the case where we get a fault in the doreti_exit code + * above. trap() (sys/platform/pc64/amd64/trap.c) catches this specific + * case, sends the process a signal and continues in the + * corresponding place in the code below. + */ + ALIGN_TEXT + .globl doreti_iret_fault +doreti_iret_fault: + PUSH_FRAME + testq $PSL_I,TF_RFLAGS(%rsp) + jz 2f + sti +2: + movq $T_PROTFLT,TF_TRAPNO(%rsp) + movq $0,TF_ERR(%rsp) /* XXX should be the error code */ + movq $0,TF_ADDR(%rsp) + FAKE_MCOUNT(TF_RIP(%rsp)) + jmp calltrap + + /* + * FAST interrupt pending. NOTE: stack context holds frame structure + * for fast interrupt procedure, do not do random pushes or pops! + */ + ALIGN_TEXT +doreti_fast: + andl PCPU(fpending),%ecx /* only check fast ints */ + bsfl %ecx, %ecx /* locate the next dispatchable int */ + btrl %ecx, PCPU(fpending) /* is it really still pending? */ + jnc doreti_next + pushq %rax /* save IRQ mask unavailable for BGL */ + /* NOTE: is also CPL in frame */ +#if 0 +#ifdef SMP + pushq %rcx /* save ecx */ + call try_mplock + popq %rcx + testl %eax,%eax + jz 1f + /* MP lock successful */ +#endif +#endif + incl PCPU(intr_nesting_level) + call dofastunpend /* unpend fast intr %ecx */ + decl PCPU(intr_nesting_level) +#if 0 +#ifdef SMP + call rel_mplock +#endif +#endif + popq %rax + jmp doreti_next +1: + btsl %ecx, PCPU(fpending) /* oops, couldn't get the MP lock */ + popq %rax /* add to temp. cpl mask to ignore */ + orl PCPU(fpending),%eax + jmp doreti_next + + /* + * INTR interrupt pending + * + * Temporarily back-out our critical section to allow an interrupt + * preempt us when we schedule it. Bump intr_nesting_level to + * prevent the switch code from recursing via splz too deeply. + */ + ALIGN_TEXT +doreti_intr: + andl PCPU(ipending),%ecx /* only check normal ints */ + bsfl %ecx, %ecx /* locate the next dispatchable int */ + btrl %ecx, PCPU(ipending) /* is it really still pending? */ + jnc doreti_next + pushq %rax + movl %ecx,%edi /* argument to C function */ + incl TD_NEST_COUNT(%rbx) /* prevent doreti/splz nesting */ + subl $TDPRI_CRIT,TD_PRI(%rbx) /* so we can preempt */ + call sched_ithd /* YYY must pull in imasks */ + addl $TDPRI_CRIT,TD_PRI(%rbx) + decl TD_NEST_COUNT(%rbx) + popq %rax + jmp doreti_next + + /* + * SOFT interrupt pending + * + * Temporarily back-out our critical section to allow an interrupt + * preempt us when we schedule it. Bump intr_nesting_level to + * prevent the switch code from recursing via splz too deeply. + */ + ALIGN_TEXT +doreti_soft: + bsfl %ecx,%ecx /* locate the next pending softint */ + btrl %ecx,PCPU(spending) /* make sure its still pending */ + jnc doreti_next + addl $FIRST_SOFTINT,%ecx /* actual intr number */ + pushq %rax + movl %ecx,%edi /* argument to C call */ + incl TD_NEST_COUNT(%rbx) /* prevent doreti/splz nesting */ + subl $TDPRI_CRIT,TD_PRI(%rbx) /* so we can preempt */ + call sched_ithd /* YYY must pull in imasks */ + addl $TDPRI_CRIT,TD_PRI(%rbx) + decl TD_NEST_COUNT(%rbx) + popq %rax + jmp doreti_next + + /* + * AST pending. We clear RQF_AST_SIGNAL automatically, the others + * are cleared by the trap as they are processed. + * + * Temporarily back-out our critical section because trap() can be + * a long-winded call, and we want to be more syscall-like. + * + * YYY theoretically we can call lwkt_switch directly if all we need + * to do is a reschedule. + */ +doreti_ast: + andl $~(RQF_AST_SIGNAL|RQF_AST_UPCALL),PCPU(reqflags) + sti + movl %eax,%esi /* save cpl (can't use stack) */ + movl $T_ASTFLT,TF_TRAPNO(%rsp) + movq %rsp,%rdi /* pass frame by ref (%edi = C arg) */ + subl $TDPRI_CRIT,TD_PRI(%rbx) + call trap + addl $TDPRI_CRIT,TD_PRI(%rbx) + movl %esi,%eax /* restore cpl for loop */ + jmp doreti_next + +#ifdef SMP + /* + * IPIQ message pending. We clear RQF_IPIQ automatically. + */ +doreti_ipiq: + movl %eax,%esi /* save cpl (can't use stack) */ + incl PCPU(intr_nesting_level) + andl $~RQF_IPIQ,PCPU(reqflags) + subl $16,%rsp /* add dummy vec and ppl */ + movq %rsp,%rdi /* pass frame by ref (C arg) */ + call lwkt_process_ipiq_frame + addl $16,%rsp + decl PCPU(intr_nesting_level) + movl %esi,%eax /* restore cpl for loop */ + jmp doreti_next + +#endif + + /* + * SPLZ() a C callable procedure to dispatch any unmasked pending + * interrupts regardless of critical section nesting. ASTs + * are not dispatched. + * + * Use %eax to track those IRQs that could not be processed + * due to BGL requirements. + */ + SUPERALIGN_TEXT + +ENTRY(splz) + pushfq + pushq %rbx + movq PCPU(curthread),%rbx + addl $TDPRI_CRIT,TD_PRI(%rbx) + movl $0,%eax + +splz_next: + cli + movl %eax,%ecx /* ecx = ~CPL */ + notl %ecx +#ifdef SMP + testl $RQF_IPIQ,PCPU(reqflags) + jnz splz_ipiq +#endif + testl PCPU(fpending),%ecx /* check for an unmasked fast int */ + jnz splz_fast + + testl PCPU(ipending),%ecx + jnz splz_intr + + movl PCPU(spending),%ecx + cmpl $0,%ecx + jnz splz_soft + + subl $TDPRI_CRIT,TD_PRI(%rbx) + + /* + * Nothing left to do, finish up. Interrupts are still disabled. + * If our mask of IRQs we couldn't process due to BGL requirements + * is 0 then there are no pending interrupt sources left and we + * can clear RQF_INTPEND. + */ + testl %eax,%eax + jnz 5f + andl $~RQF_INTPEND,PCPU(reqflags) +5: + popq %rbx + popfq + ret + + /* + * FAST interrupt pending + */ + ALIGN_TEXT +splz_fast: + andl PCPU(fpending),%ecx /* only check fast ints */ + bsfl %ecx, %ecx /* locate the next dispatchable int */ + btrl %ecx, PCPU(fpending) /* is it really still pending? */ + jnc splz_next + pushq %rax +#if 0 +#ifdef SMP + movl %ecx,%edi /* argument to try_mplock */ + call try_mplock + testl %eax,%eax + jz 1f +#endif +#endif + incl PCPU(intr_nesting_level) + call dofastunpend /* unpend fast intr %ecx */ + decl PCPU(intr_nesting_level) +#if 0 +#ifdef SMP + call rel_mplock +#endif +#endif + popq %rax + jmp splz_next +1: + btsl %ecx, PCPU(fpending) /* oops, couldn't get the MP lock */ + popq %rax + orl PCPU(fpending),%eax + jmp splz_next + + /* + * INTR interrupt pending + * + * Temporarily back-out our critical section to allow the interrupt + * preempt us. + */ + ALIGN_TEXT +splz_intr: + andl PCPU(ipending),%ecx /* only check normal ints */ + bsfl %ecx, %ecx /* locate the next dispatchable int */ + btrl %ecx, PCPU(ipending) /* is it really still pending? */ + jnc splz_next + sti + pushq %rax + movl %ecx,%edi /* C argument */ + subl $TDPRI_CRIT,TD_PRI(%rbx) + incl TD_NEST_COUNT(%rbx) /* prevent doreti/splz nesting */ + call sched_ithd /* YYY must pull in imasks */ + addl $TDPRI_CRIT,TD_PRI(%rbx) + decl TD_NEST_COUNT(%rbx) /* prevent doreti/splz nesting */ + popq %rax + jmp splz_next + + /* + * SOFT interrupt pending + * + * Temporarily back-out our critical section to allow the interrupt + * preempt us. + */ + ALIGN_TEXT +splz_soft: + bsfl %ecx,%ecx /* locate the next pending softint */ + btrl %ecx,PCPU(spending) /* make sure its still pending */ + jnc splz_next + addl $FIRST_SOFTINT,%ecx /* actual intr number */ + sti + pushq %rax + movl %ecx,%edi /* C argument */ + subl $TDPRI_CRIT,TD_PRI(%rbx) + incl TD_NEST_COUNT(%rbx) /* prevent doreti/splz nesting */ + call sched_ithd /* YYY must pull in imasks */ + addl $TDPRI_CRIT,TD_PRI(%rbx) + decl TD_NEST_COUNT(%rbx) /* prevent doreti/splz nesting */ + popq %rax + jmp splz_next + +#ifdef SMP +splz_ipiq: + andl $~RQF_IPIQ,PCPU(reqflags) + pushq %rax + call lwkt_process_ipiq + popq %rax + jmp splz_next +#endif + + /* + * dofastunpend(%ecx:intr) + * + * A FAST interrupt previously made pending can now be run, + * execute it by pushing a dummy interrupt frame and + * calling ithread_fast_handler to execute or schedule it. + * + * ithread_fast_handler() returns 0 if it wants us to unmask + * further interrupts. + */ +#define PUSH_DUMMY \ + pushfq ; /* phys int frame / flags */ \ + movl %cs,%eax ; \ + pushq %rax ; /* phys int frame / cs */ \ + pushq 3*8(%rsp) ; /* original caller eip */ \ + subq $TF_RIP,%rsp ; /* trap frame */ \ + movq $0,TF_TRAPNO(%rsp) ; /* extras */ \ + movq $0,TF_ADDR(%rsp) ; /* extras */ \ + movq $0,TF_FLAGS(%rsp) ; /* extras */ \ + movq $0,TF_ERR(%rsp) ; /* extras */ \ + +#define POP_DUMMY \ + addq $TF_RIP+(3*8),%rsp ; \ + +dofastunpend: + pushq %rbp /* frame for backtrace */ + movq %rsp,%rbp + PUSH_DUMMY + pushq %rcx /* last part of intrframe = intr */ + incl fastunpend_count + movq %rsp,%rdi /* pass frame by reference C arg */ + call ithread_fast_handler /* returns 0 to unmask */ + popq %rdi /* intrframe->trapframe */ + /* + also rdi C arg to next call */ + cmpl $0,%eax + jnz 1f + movq MachIntrABI + MACHINTR_INTREN, %rax + callq *%rax /* MachIntrABI.intren(intr) */ +1: + POP_DUMMY + popq %rbp + ret + diff --git a/sys/platform/pc64/amd64/locore.s b/sys/platform/pc64/amd64/locore.s index 5c4ca0257e..d38027cddf 100644 --- a/sys/platform/pc64/amd64/locore.s +++ b/sys/platform/pc64/amd64/locore.s @@ -1,5 +1,6 @@ /*- * Copyright (c) 2003 Peter Wemm + * Copyright (c) 2008 The DragonFly Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -24,7 +25,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/amd64/amd64/locore.S,v 1.175 2003/05/31 06:54:28 peter Exp $ - * $DragonFly: src/sys/platform/pc64/amd64/locore.s,v 1.1 2007/09/23 04:29:31 yanyh Exp $ + * $DragonFly: src/sys/platform/pc64/amd64/locore.s,v 1.2 2008/08/29 17:07:10 dillon Exp $ */ #include @@ -38,9 +39,19 @@ * PTmap is recursive pagemap at top of virtual address space. * Within PTmap, the page directory can be found (third indirection). */ - .globl PTmap,PTD + .globl PTmap,PTD,PTDpde .set PTmap,(PTDPTDI << PDRSHIFT) - .set PTD,PTmap + (PTDPTDI * PAGE_SIZE) + .set PTD,PTmap + (PTDPTDI * PAGE_SIZE) + .set PTDpde,PTD + (PTDPTDI * PDESIZE) + +/* + * APTmap, APTD is the alternate recursive pagemap. + * It's used when modifying another process's page tables. + */ + .globl APTmap,APTD,APTDpde + .set APTmap,APTDPTDI << PDRSHIFT + .set APTD,APTmap + (APTDPTDI * PAGE_SIZE) + .set APTDpde,PTD + (APTDPTDI * PDESIZE) /* * Compiled KERNBASE location @@ -56,6 +67,7 @@ .set dmapend,DMAP_MAX_ADDRESS .text + /********************************************************************** * * This is where the loader trampoline start us, set the ball rolling... @@ -85,7 +97,7 @@ NON_GPROF_ENTRY(btext) movq $bootstack,%rsp xorl %ebp, %ebp - /* call hammer_time*/ /* set up cpu for unix operation */ + call hammer_time /* set up cpu for unix operation */ movq %rax,%rsp /* set up kstack for mi_startup() */ call mi_startup /* autoconfiguration, mountroot etc */ 0: hlt diff --git a/sys/platform/pc64/amd64/machdep.c b/sys/platform/pc64/amd64/machdep.c new file mode 100644 index 0000000000..6065f0bf89 --- /dev/null +++ b/sys/platform/pc64/amd64/machdep.c @@ -0,0 +1,2471 @@ +/*- + * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. + * Copyright (c) 1992 Terrence R. Lambert. + * Copyright (c) 2003 Peter Wemm. + * Copyright (c) 2008 The DragonFly Project. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * William Jolitz. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 + * $FreeBSD: src/sys/i386/i386/machdep.c,v 1.385.2.30 2003/05/31 08:48:05 alc Exp $ + * $DragonFly: src/sys/platform/pc64/amd64/machdep.c,v 1.1 2008/08/29 17:07:10 dillon Exp $ + */ + +#include "use_ether.h" +//#include "use_npx.h" +#include "use_isa.h" +#include "opt_atalk.h" +#include "opt_compat.h" +#include "opt_cpu.h" +#include "opt_ddb.h" +#include "opt_directio.h" +#include "opt_inet.h" +#include "opt_ipx.h" +#include "opt_msgbuf.h" +#include "opt_swap.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +#include + +#include +#include +#include +#if JG +#include +#endif +#include /* for inthand_t */ +#include +#include +#include +#include /* pcb.h included via sys/user.h */ +#include /* CPU_prvspace */ +#include +#ifdef PERFMON +#include +#endif +#include + +#ifdef OLD_BUS_ARCH +#include +#endif +#include +#include +#include +#include +#include + +#define PHYSMAP_ENTRIES 10 + +extern void init386(int first); +extern void dblfault_handler(void); +extern u_int64_t hammer_time(u_int64_t, u_int64_t); + +extern void printcpuinfo(void); /* XXX header file */ +extern void identify_cpu(void); +#if JG +extern void finishidentcpu(void); +#endif +extern void panicifcpuunsupported(void); +extern void initializecpu(void); + +extern void init_paging(vm_paddr_t *); + +static void cpu_startup(void *); +#ifndef CPU_DISABLE_SSE +static void set_fpregs_xmm(struct save87 *, struct savexmm *); +static void fill_fpregs_xmm(struct savexmm *, struct save87 *); +#endif /* CPU_DISABLE_SSE */ +#ifdef DIRECTIO +extern void ffs_rawread_setup(void); +#endif /* DIRECTIO */ +static void init_locks(void); + +SYSINIT(cpu, SI_BOOT2_SMP, SI_ORDER_FIRST, cpu_startup, NULL) + +#ifdef DDB +extern vm_offset_t ksym_start, ksym_end; +#endif + +uint64_t common_lvl4_phys; +uint64_t common_lvl3_phys; +uint64_t IdlePTD; +uint64_t KPTphys; +uint64_t SMPptpa; +pt_entry_t *SMPpt; +pdp_entry_t *link_pdpe; + + +int _udatasel, _ucodesel, _ucode32sel; +u_long atdevbase; +#ifdef SMP +int64_t tsc_offsets[MAXCPU]; +#else +int64_t tsc_offsets[1]; +#endif + +#if defined(SWTCH_OPTIM_STATS) +extern int swtch_optim_stats; +SYSCTL_INT(_debug, OID_AUTO, swtch_optim_stats, + CTLFLAG_RD, &swtch_optim_stats, 0, ""); +SYSCTL_INT(_debug, OID_AUTO, tlb_flush_count, + CTLFLAG_RD, &tlb_flush_count, 0, ""); +#endif + +int physmem = 0; + +static int +sysctl_hw_physmem(SYSCTL_HANDLER_ARGS) +{ + int error = sysctl_handle_int(oidp, 0, ctob(physmem), req); + return (error); +} + +SYSCTL_PROC(_hw, HW_PHYSMEM, physmem, CTLTYPE_INT|CTLFLAG_RD, + 0, 0, sysctl_hw_physmem, "IU", ""); + +static int +sysctl_hw_usermem(SYSCTL_HANDLER_ARGS) +{ + int error = sysctl_handle_int(oidp, 0, + ctob(physmem - vmstats.v_wire_count), req); + return (error); +} + +SYSCTL_PROC(_hw, HW_USERMEM, usermem, CTLTYPE_INT|CTLFLAG_RD, + 0, 0, sysctl_hw_usermem, "IU", ""); + +static int +sysctl_hw_availpages(SYSCTL_HANDLER_ARGS) +{ +#if JG + int error = sysctl_handle_int(oidp, 0, + i386_btop(avail_end - avail_start), req); + return (error); +#else + return -1; +#endif +} + +SYSCTL_PROC(_hw, OID_AUTO, availpages, CTLTYPE_INT|CTLFLAG_RD, + 0, 0, sysctl_hw_availpages, "I", ""); + +static int +sysctl_machdep_msgbuf(SYSCTL_HANDLER_ARGS) +{ + int error; + + /* Unwind the buffer, so that it's linear (possibly starting with + * some initial nulls). + */ + error=sysctl_handle_opaque(oidp,msgbufp->msg_ptr+msgbufp->msg_bufr, + msgbufp->msg_size-msgbufp->msg_bufr,req); + if(error) return(error); + if(msgbufp->msg_bufr>0) { + error=sysctl_handle_opaque(oidp,msgbufp->msg_ptr, + msgbufp->msg_bufr,req); + } + return(error); +} + +SYSCTL_PROC(_machdep, OID_AUTO, msgbuf, CTLTYPE_STRING|CTLFLAG_RD, + 0, 0, sysctl_machdep_msgbuf, "A","Contents of kernel message buffer"); + +static int msgbuf_clear; + +static int +sysctl_machdep_msgbuf_clear(SYSCTL_HANDLER_ARGS) +{ + int error; + error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, + req); + if (!error && req->newptr) { + /* Clear the buffer and reset write pointer */ + bzero(msgbufp->msg_ptr,msgbufp->msg_size); + msgbufp->msg_bufr=msgbufp->msg_bufx=0; + msgbuf_clear=0; + } + return (error); +} + +SYSCTL_PROC(_machdep, OID_AUTO, msgbuf_clear, CTLTYPE_INT|CTLFLAG_RW, + &msgbuf_clear, 0, sysctl_machdep_msgbuf_clear, "I", + "Clear kernel message buffer"); + +vm_paddr_t Maxmem = 0; + +/* + * The number of PHYSMAP entries must be one less than the number of + * PHYSSEG entries because the PHYSMAP entry that spans the largest + * physical address that is accessible by ISA DMA is split into two + * PHYSSEG entries. + */ +#define PHYSMAP_SIZE (2 * (VM_PHYSSEG_MAX - 1)) + +vm_paddr_t phys_avail[PHYSMAP_SIZE + 2]; +vm_paddr_t dump_avail[PHYSMAP_SIZE + 2]; + +/* must be 2 less so 0 0 can signal end of chunks */ +#define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(phys_avail[0])) - 2) +#define DUMP_AVAIL_ARRAY_END ((sizeof(dump_avail) / sizeof(dump_avail[0])) - 2) + +static vm_offset_t buffer_sva, buffer_eva; +vm_offset_t clean_sva, clean_eva; +static vm_offset_t pager_sva, pager_eva; +static struct trapframe proc0_tf; + +static void +cpu_startup(void *dummy) +{ + caddr_t v; + vm_size_t size = 0; + vm_offset_t firstaddr; + + if (boothowto & RB_VERBOSE) + bootverbose++; + + /* + * Good {morning,afternoon,evening,night}. + */ + kprintf("%s", version); + startrtclock(); + printcpuinfo(); + panicifcpuunsupported(); +#ifdef PERFMON + perfmon_init(); +#endif + kprintf("real memory = %llu (%lluK bytes)\n", ptoa(Maxmem), ptoa(Maxmem) / 1024); + /* + * Display any holes after the first chunk of extended memory. + */ + if (bootverbose) { + int indx; + + kprintf("Physical memory chunk(s):\n"); + for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) { + vm_paddr_t size1 = phys_avail[indx + 1] - phys_avail[indx]; + + kprintf("0x%08llx - 0x%08llx, %llu bytes (%llu pages)\n", + phys_avail[indx], phys_avail[indx + 1] - 1, size1, + size1 / PAGE_SIZE); + } + } + + /* + * Allocate space for system data structures. + * The first available kernel virtual address is in "v". + * As pages of kernel virtual memory are allocated, "v" is incremented. + * As pages of memory are allocated and cleared, + * "firstaddr" is incremented. + * An index into the kernel page table corresponding to the + * virtual memory address maintained in "v" is kept in "mapaddr". + */ + + /* + * Make two passes. The first pass calculates how much memory is + * needed and allocates it. The second pass assigns virtual + * addresses to the various data structures. + */ + firstaddr = 0; +again: + v = (caddr_t)firstaddr; + +#define valloc(name, type, num) \ + (name) = (type *)v; v = (caddr_t)((name)+(num)) +#define valloclim(name, type, num, lim) \ + (name) = (type *)v; v = (caddr_t)((lim) = ((name)+(num))) + + /* + * The nominal buffer size (and minimum KVA allocation) is BKVASIZE. + * For the first 64MB of ram nominally allocate sufficient buffers to + * cover 1/4 of our ram. Beyond the first 64MB allocate additional + * buffers to cover 1/20 of our ram over 64MB. When auto-sizing + * the buffer cache we limit the eventual kva reservation to + * maxbcache bytes. + * + * factor represents the 1/4 x ram conversion. + */ + if (nbuf == 0) { + int factor = 4 * BKVASIZE / 1024; + int kbytes = physmem * (PAGE_SIZE / 1024); + + nbuf = 50; + if (kbytes > 4096) + nbuf += min((kbytes - 4096) / factor, 65536 / factor); + if (kbytes > 65536) + nbuf += (kbytes - 65536) * 2 / (factor * 5); + if (maxbcache && nbuf > maxbcache / BKVASIZE) + nbuf = maxbcache / BKVASIZE; + } + + /* + * Do not allow the buffer_map to be more then 1/2 the size of the + * kernel_map. + */ + if (nbuf > (virtual_end - virtual_start) / (BKVASIZE * 2)) { + nbuf = (virtual_end - virtual_start) / (BKVASIZE * 2); + kprintf("Warning: nbufs capped at %d\n", nbuf); + } + + nswbuf = max(min(nbuf/4, 256), 16); +#ifdef NSWBUF_MIN + if (nswbuf < NSWBUF_MIN) + nswbuf = NSWBUF_MIN; +#endif +#ifdef DIRECTIO + ffs_rawread_setup(); +#endif + + valloc(swbuf, struct buf, nswbuf); + valloc(buf, struct buf, nbuf); + + /* + * End of first pass, size has been calculated so allocate memory + */ + if (firstaddr == 0) { + size = (vm_size_t)(v - firstaddr); + firstaddr = kmem_alloc(&kernel_map, round_page(size)); + if (firstaddr == 0) + panic("startup: no room for tables"); + goto again; + } + + /* + * End of second pass, addresses have been assigned + */ + if ((vm_size_t)(v - firstaddr) != size) + panic("startup: table size inconsistency"); + + kmem_suballoc(&kernel_map, &clean_map, &clean_sva, &clean_eva, + (nbuf*BKVASIZE) + (nswbuf*MAXPHYS) + pager_map_size); + kmem_suballoc(&clean_map, &buffer_map, &buffer_sva, &buffer_eva, + (nbuf*BKVASIZE)); + buffer_map.system_map = 1; + kmem_suballoc(&clean_map, &pager_map, &pager_sva, &pager_eva, + (nswbuf*MAXPHYS) + pager_map_size); + pager_map.system_map = 1; + +#if defined(USERCONFIG) + userconfig(); + cninit(); /* the preferred console may have changed */ +#endif + + kprintf("avail memory = %u (%uK bytes)\n", ptoa(vmstats.v_free_count), + ptoa(vmstats.v_free_count) / 1024); + + /* + * Set up buffers, so they can be used to read disk labels. + */ + bufinit(); + vm_pager_bufferinit(); + +#ifdef SMP + /* + * OK, enough kmem_alloc/malloc state should be up, lets get on with it! + */ + mp_start(); /* fire up the APs and APICs */ + mp_announce(); +#endif /* SMP */ + cpu_setregs(); +} + +/* + * Send an interrupt to process. + * + * Stack is set up to allow sigcode stored + * at top to call routine, followed by kcall + * to sigreturn routine below. After sigreturn + * resets the signal mask, the stack, and the + * frame pointer, it returns to the user + * specified pc, psl. + */ +void +sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code) +{ + kprintf0("sendsig\n"); + struct lwp *lp = curthread->td_lwp; + struct proc *p = lp->lwp_proc; + struct trapframe *regs; + struct sigacts *psp = p->p_sigacts; + struct sigframe sf, *sfp; + int oonstack; + + regs = lp->lwp_md.md_regs; + oonstack = (lp->lwp_sigstk.ss_flags & SS_ONSTACK) ? 1 : 0; + + /* save user context */ + bzero(&sf, sizeof(struct sigframe)); + sf.sf_uc.uc_sigmask = *mask; + sf.sf_uc.uc_stack = lp->lwp_sigstk; + sf.sf_uc.uc_mcontext.mc_onstack = oonstack; +#if JG + bcopy(regs, &sf.sf_uc.uc_mcontext.mc_gs, sizeof(struct trapframe)); +#endif + + /* make the size of the saved context visible to userland */ + sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); + + /* save mailbox pending state for syscall interlock semantics */ +#if JG + if (p->p_flag & P_MAILBOX) + sf.sf_uc.uc_mcontext.mc_xflags |= PGEX_MAILBOX; +#endif + + /* Allocate and validate space for the signal handler context. */ + if ((lp->lwp_flag & LWP_ALTSTACK) != 0 && !oonstack && + SIGISMEMBER(psp->ps_sigonstack, sig)) { + sfp = (struct sigframe *)(lp->lwp_sigstk.ss_sp + + lp->lwp_sigstk.ss_size - sizeof(struct sigframe)); + lp->lwp_sigstk.ss_flags |= SS_ONSTACK; + } else { +#if JG + sfp = (struct sigframe *)regs->tf_esp - 1; +#endif + } + + /* Translate the signal is appropriate */ + if (p->p_sysent->sv_sigtbl) { + if (sig <= p->p_sysent->sv_sigsize) + sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; + } + + /* Build the argument list for the signal handler. */ + sf.sf_signum = sig; + sf.sf_ucontext = (register_t)&sfp->sf_uc; + if (SIGISMEMBER(psp->ps_siginfo, sig)) { + /* Signal handler installed with SA_SIGINFO. */ + sf.sf_siginfo = (register_t)&sfp->sf_si; + sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher; + + /* fill siginfo structure */ + sf.sf_si.si_signo = sig; + sf.sf_si.si_code = code; + sf.sf_si.si_addr = (void*)regs->tf_err; + } + else { + /* Old FreeBSD-style arguments. */ + sf.sf_siginfo = code; + sf.sf_addr = regs->tf_err; + sf.sf_ahu.sf_handler = catcher; + } + + /* + * If we're a vm86 process, we want to save the segment registers. + * We also change eflags to be our emulated eflags, not the actual + * eflags. + */ +#if JG + if (regs->tf_eflags & PSL_VM) { + struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; + struct vm86_kernel *vm86 = &lp->lwp_thread->td_pcb->pcb_ext->ext_vm86; + + sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs; + sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs; + sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es; + sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds; + + if (vm86->vm86_has_vme == 0) + sf.sf_uc.uc_mcontext.mc_eflags = + (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) | + (vm86->vm86_eflags & (PSL_VIF | PSL_VIP)); + + /* + * Clear PSL_NT to inhibit T_TSSFLT faults on return from + * syscalls made by the signal handler. This just avoids + * wasting time for our lazy fixup of such faults. PSL_NT + * does nothing in vm86 mode, but vm86 programs can set it + * almost legitimately in probes for old cpu types. + */ + tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP); + } +#endif + + /* + * Save the FPU state and reinit the FP unit + */ +#if JG + npxpush(&sf.sf_uc.uc_mcontext); +#endif + + /* + * Copy the sigframe out to the user's stack. + */ + if (copyout(&sf, sfp, sizeof(struct sigframe)) != 0) { + /* + * Something is wrong with the stack pointer. + * ...Kill the process. + */ + sigexit(lp, SIGILL); + } + +#if JG + regs->tf_esp = (int)sfp; + regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode); +#endif + + /* + * i386 abi specifies that the direction flag must be cleared + * on function entry + */ +#if JG + regs->tf_eflags &= ~(PSL_T|PSL_D); +#endif + + regs->tf_cs = _ucodesel; +#if JG + regs->tf_ds = _udatasel; + regs->tf_es = _udatasel; +#endif + + /* + * Allow the signal handler to inherit %fs in addition to %gs as + * the userland program might be using both. + * + * However, if a T_PROTFLT occured the segment registers could be + * totally broken. They must be reset in order to be able to + * return to userland. + */ + if (regs->tf_trapno == T_PROTFLT) { +#if JG + regs->tf_fs = _udatasel; + regs->tf_gs = _udatasel; +#endif + } + regs->tf_ss = _udatasel; +} + +/* + * Sanitize the trapframe for a virtual kernel passing control to a custom + * VM context. Remove any items that would otherwise create a privilage + * issue. + * + * XXX at the moment we allow userland to set the resume flag. Is this a + * bad idea? + */ +int +cpu_sanitize_frame(struct trapframe *frame) +{ + kprintf0("cpu_sanitize_frame\n"); + frame->tf_cs = _ucodesel; +#if JG + frame->tf_ds = _udatasel; + frame->tf_es = _udatasel; /* XXX allow userland this one too? */ +#endif +#if 0 + frame->tf_fs = _udatasel; + frame->tf_gs = _udatasel; +#endif + frame->tf_ss = _udatasel; +#if JG + frame->tf_eflags &= (PSL_RF | PSL_USERCHANGE); + frame->tf_eflags |= PSL_RESERVED_DEFAULT | PSL_I; +#endif + return(0); +} + +/* + * Sanitize the tls so loading the descriptor does not blow up + * on us. For AMD64 we don't have to do anything. + */ +int +cpu_sanitize_tls(struct savetls *tls) +{ + return(0); +} + +/* + * sigreturn(ucontext_t *sigcntxp) + * + * System call to cleanup state after a signal + * has been taken. Reset signal mask and + * stack state from context left by sendsig (above). + * Return to previous pc and psl as specified by + * context left by sendsig. Check carefully to + * make sure that the user has not modified the + * state to gain improper privileges. + */ +#define EFL_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) +#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) + +int +sys_sigreturn(struct sigreturn_args *uap) +{ + struct lwp *lp = curthread->td_lwp; + struct proc *p = lp->lwp_proc; + struct trapframe *regs; + ucontext_t uc; + ucontext_t *ucp; + int cs; + int eflags; + int error; + + /* + * We have to copy the information into kernel space so userland + * can't modify it while we are sniffing it. + */ + regs = lp->lwp_md.md_regs; + error = copyin(uap->sigcntxp, &uc, sizeof(uc)); + if (error) + return (error); + ucp = &uc; +#if JG + eflags = ucp->uc_mcontext.mc_eflags; +#endif + +#if JG + if (eflags & PSL_VM) { + struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; + struct vm86_kernel *vm86; + + /* + * if pcb_ext == 0 or vm86_inited == 0, the user hasn't + * set up the vm86 area, and we can't enter vm86 mode. + */ + if (lp->lwp_thread->td_pcb->pcb_ext == 0) + return (EINVAL); + vm86 = &lp->lwp_thread->td_pcb->pcb_ext->ext_vm86; + if (vm86->vm86_inited == 0) + return (EINVAL); + + /* go back to user mode if both flags are set */ + if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) + trapsignal(lp, SIGBUS, 0); + + if (vm86->vm86_has_vme) { +#if JG + eflags = (tf->tf_eflags & ~VME_USERCHANGE) | + (eflags & VME_USERCHANGE) | PSL_VM; +#endif + } else { +#if JG + vm86->vm86_eflags = eflags; /* save VIF, VIP */ + eflags = (tf->tf_eflags & ~VM_USERCHANGE) | + (eflags & VM_USERCHANGE) | PSL_VM; +#endif + } +#if JG + bcopy(&ucp->uc_mcontext.mc_gs, tf, sizeof(struct trapframe)); + tf->tf_eflags = eflags; +#endif + tf->tf_vm86_ds = tf->tf_ds; + tf->tf_vm86_es = tf->tf_es; + tf->tf_vm86_fs = tf->tf_fs; + tf->tf_vm86_gs = tf->tf_gs; + tf->tf_ds = _udatasel; + tf->tf_es = _udatasel; +#if 0 + tf->tf_fs = _udatasel; + tf->tf_gs = _udatasel; +#endif + } else { + /* + * Don't allow users to change privileged or reserved flags. + */ + /* + * XXX do allow users to change the privileged flag PSL_RF. + * The cpu sets PSL_RF in tf_eflags for faults. Debuggers + * should sometimes set it there too. tf_eflags is kept in + * the signal context during signal handling and there is no + * other place to remember it, so the PSL_RF bit may be + * corrupted by the signal handler without us knowing. + * Corruption of the PSL_RF bit at worst causes one more or + * one less debugger trap, so allowing it is fairly harmless. + */ +#if JG + if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) { + kprintf("sigreturn: eflags = 0x%x\n", eflags); + return(EINVAL); + } +#endif + + /* + * Don't allow users to load a valid privileged %cs. Let the + * hardware check for invalid selectors, excess privilege in + * other selectors, invalid %eip's and invalid %esp's. + */ + cs = ucp->uc_mcontext.mc_cs; + if (!CS_SECURE(cs)) { + kprintf("sigreturn: cs = 0x%x\n", cs); + trapsignal(lp, SIGBUS, T_PROTFLT); + return(EINVAL); + } +#if JG + bcopy(&ucp->uc_mcontext.mc_gs, regs, sizeof(struct trapframe)); +#endif + } +#endif + + /* + * Restore the FPU state from the frame + */ +#if JG + npxpop(&ucp->uc_mcontext); +#endif + + /* + * Merge saved signal mailbox pending flag to maintain interlock + * semantics against system calls. + */ +#if JG + if (ucp->uc_mcontext.mc_xflags & PGEX_MAILBOX) + p->p_flag |= P_MAILBOX; +#endif + + if (ucp->uc_mcontext.mc_onstack & 1) + lp->lwp_sigstk.ss_flags |= SS_ONSTACK; + else + lp->lwp_sigstk.ss_flags &= ~SS_ONSTACK; + + lp->lwp_sigmask = ucp->uc_sigmask; + SIG_CANTMASK(lp->lwp_sigmask); + return(EJUSTRETURN); +} + +/* + * Stack frame on entry to function. %eax will contain the function vector, + * %ecx will contain the function data. flags, ecx, and eax will have + * already been pushed on the stack. + */ +struct upc_frame { + register_t eax; + register_t ecx; + register_t edx; + register_t flags; + register_t oldip; +}; + +void +sendupcall(struct vmupcall *vu, int morepending) +{ + struct lwp *lp = curthread->td_lwp; + struct trapframe *regs; + struct upcall upcall; + struct upc_frame upc_frame; + int crit_count = 0; + + /* + * If we are a virtual kernel running an emulated user process + * context, switch back to the virtual kernel context before + * trying to post the signal. + */ + if (lp->lwp_vkernel && lp->lwp_vkernel->ve) { + lp->lwp_md.md_regs->tf_trapno = 0; + vkernel_trap(lp, lp->lwp_md.md_regs); + } + + /* + * Get the upcall data structure + */ + if (copyin(lp->lwp_upcall, &upcall, sizeof(upcall)) || + copyin((char *)upcall.upc_uthread + upcall.upc_critoff, &crit_count, sizeof(int)) + ) { + vu->vu_pending = 0; + kprintf("bad upcall address\n"); + return; + } + + /* + * If the data structure is already marked pending or has a critical + * section count, mark the data structure as pending and return + * without doing an upcall. vu_pending is left set. + */ + if (upcall.upc_pending || crit_count >= vu->vu_pending) { + if (upcall.upc_pending < vu->vu_pending) { + upcall.upc_pending = vu->vu_pending; + copyout(&upcall.upc_pending, &lp->lwp_upcall->upc_pending, + sizeof(upcall.upc_pending)); + } + return; + } + + /* + * We can run this upcall now, clear vu_pending. + * + * Bump our critical section count and set or clear the + * user pending flag depending on whether more upcalls are + * pending. The user will be responsible for calling + * upc_dispatch(-1) to process remaining upcalls. + */ + vu->vu_pending = 0; + upcall.upc_pending = morepending; + crit_count += TDPRI_CRIT; + copyout(&upcall.upc_pending, &lp->lwp_upcall->upc_pending, + sizeof(upcall.upc_pending)); + copyout(&crit_count, (char *)upcall.upc_uthread + upcall.upc_critoff, + sizeof(int)); + + /* + * Construct a stack frame and issue the upcall + */ + regs = lp->lwp_md.md_regs; +#if JG + upc_frame.eax = regs->tf_eax; + upc_frame.ecx = regs->tf_ecx; + upc_frame.edx = regs->tf_edx; + upc_frame.flags = regs->tf_eflags; + upc_frame.oldip = regs->tf_eip; + if (copyout(&upc_frame, (void *)(regs->tf_esp - sizeof(upc_frame)), + sizeof(upc_frame)) != 0) { + kprintf("bad stack on upcall\n"); + } else { + regs->tf_eax = (register_t)vu->vu_func; + regs->tf_ecx = (register_t)vu->vu_data; + regs->tf_edx = (register_t)lp->lwp_upcall; + regs->tf_eip = (register_t)vu->vu_ctx; + regs->tf_esp -= sizeof(upc_frame); + } +#endif +} + +/* + * fetchupcall occurs in the context of a system call, which means that + * we have to return EJUSTRETURN in order to prevent eax and edx from + * being overwritten by the syscall return value. + * + * if vu is not NULL we return the new context in %edx, the new data in %ecx, + * and the function pointer in %eax. + */ +int +fetchupcall(struct vmupcall *vu, int morepending, void *rsp) +{ + struct upc_frame upc_frame; + struct lwp *lp = curthread->td_lwp; + struct trapframe *regs; + int error; + struct upcall upcall; + int crit_count; + + regs = lp->lwp_md.md_regs; + + error = copyout(&morepending, &lp->lwp_upcall->upc_pending, sizeof(int)); + if (error == 0) { + if (vu) { + /* + * This jumps us to the next ready context. + */ + vu->vu_pending = 0; + error = copyin(lp->lwp_upcall, &upcall, sizeof(upcall)); + crit_count = 0; + if (error == 0) + error = copyin((char *)upcall.upc_uthread + upcall.upc_critoff, &crit_count, sizeof(int)); + crit_count += TDPRI_CRIT; + if (error == 0) + error = copyout(&crit_count, (char *)upcall.upc_uthread + upcall.upc_critoff, sizeof(int)); +#if JG + regs->tf_eax = (register_t)vu->vu_func; + regs->tf_ecx = (register_t)vu->vu_data; + regs->tf_edx = (register_t)lp->lwp_upcall; + regs->tf_eip = (register_t)vu->vu_ctx; + regs->tf_esp = (register_t)rsp; +#endif + } else { + /* + * This returns us to the originally interrupted code. + */ + error = copyin(rsp, &upc_frame, sizeof(upc_frame)); +#if JG + regs->tf_eax = upc_frame.eax; + regs->tf_ecx = upc_frame.ecx; + regs->tf_edx = upc_frame.edx; + regs->tf_eflags = (regs->tf_eflags & ~PSL_USERCHANGE) | + (upc_frame.flags & PSL_USERCHANGE); + regs->tf_eip = upc_frame.oldip; + regs->tf_esp = (register_t)((char *)rsp + sizeof(upc_frame)); +#endif + } + } + if (error == 0) + error = EJUSTRETURN; + return(error); +} + +/* + * Machine dependent boot() routine + * + * I haven't seen anything to put here yet + * Possibly some stuff might be grafted back here from boot() + */ +void +cpu_boot(int howto) +{ +} + +/* + * Shutdown the CPU as much as possible + */ +void +cpu_halt(void) +{ + for (;;) + __asm__ __volatile("hlt"); +} + +/* + * cpu_idle() represents the idle LWKT. You cannot return from this function + * (unless you want to blow things up!). Instead we look for runnable threads + * and loop or halt as appropriate. Giant is not held on entry to the thread. + * + * The main loop is entered with a critical section held, we must release + * the critical section before doing anything else. lwkt_switch() will + * check for pending interrupts due to entering and exiting its own + * critical section. + * + * Note on cpu_idle_hlt: On an SMP system we rely on a scheduler IPI + * to wake a HLTed cpu up. However, there are cases where the idlethread + * will be entered with the possibility that no IPI will occur and in such + * cases lwkt_switch() sets TDF_IDLE_NOHLT. + */ +static int cpu_idle_hlt = 1; +static int cpu_idle_hltcnt; +static int cpu_idle_spincnt; +SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_hlt, CTLFLAG_RW, + &cpu_idle_hlt, 0, "Idle loop HLT enable"); +SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_hltcnt, CTLFLAG_RW, + &cpu_idle_hltcnt, 0, "Idle loop entry halts"); +SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_spincnt, CTLFLAG_RW, + &cpu_idle_spincnt, 0, "Idle loop entry spins"); + +static void +cpu_idle_default_hook(void) +{ + /* + * We must guarentee that hlt is exactly the instruction + * following the sti. + */ + __asm __volatile("sti; hlt"); +} + +/* Other subsystems (e.g., ACPI) can hook this later. */ +void (*cpu_idle_hook)(void) = cpu_idle_default_hook; + +void +cpu_idle(void) +{ + struct thread *td = curthread; + + crit_exit(); + KKASSERT(td->td_pri < TDPRI_CRIT); + for (;;) { + /* + * See if there are any LWKTs ready to go. + */ + lwkt_switch(); + + /* + * If we are going to halt call splz unconditionally after + * CLIing to catch any interrupt races. Note that we are + * at SPL0 and interrupts are enabled. + */ + if (cpu_idle_hlt && !lwkt_runnable() && + (td->td_flags & TDF_IDLE_NOHLT) == 0) { + __asm __volatile("cli"); + splz(); + if (!lwkt_runnable()) + cpu_idle_hook(); +#ifdef SMP + else + __asm __volatile("pause"); +#endif + ++cpu_idle_hltcnt; + } else { + td->td_flags &= ~TDF_IDLE_NOHLT; + splz(); +#ifdef SMP + __asm __volatile("sti; pause"); +#else + __asm __volatile("sti"); +#endif + ++cpu_idle_spincnt; + } + } +} + +/* + * This routine is called when the only runnable threads require + * the MP lock, and the scheduler couldn't get it. On a real cpu + * we let the scheduler spin. + */ +void +cpu_mplock_contested(void) +{ + cpu_pause(); +} + +/* + * This routine is called if a spinlock has been held through the + * exponential backoff period and is seriously contested. On a real cpu + * we let it spin. + */ +void +cpu_spinlock_contested(void) +{ + cpu_pause(); +} + +/* + * Clear registers on exec + */ +void +exec_setregs(u_long entry, u_long stack, u_long ps_strings) +{ + struct thread *td = curthread; + struct lwp *lp = td->td_lwp; + struct pcb *pcb = td->td_pcb; + struct trapframe *regs = lp->lwp_md.md_regs; + + kprintf0("exec_setregs\n"); + + /* was i386_user_cleanup() in NetBSD */ + user_ldt_free(pcb); + + bzero((char *)regs, sizeof(struct trapframe)); + regs->tf_rip = entry; + regs->tf_rsp = ((stack - 8) & ~0xFul) + 8; /* align the stack */ + regs->tf_rdi = stack; /* argv */ + regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T); + regs->tf_ss = _udatasel; + regs->tf_cs = _ucodesel; + regs->tf_rbx = ps_strings; + + /* + * Reset the hardware debug registers if they were in use. + * They won't have any meaning for the newly exec'd process. + */ + if (pcb->pcb_flags & PCB_DBREGS) { + pcb->pcb_dr0 = 0; + pcb->pcb_dr1 = 0; + pcb->pcb_dr2 = 0; + pcb->pcb_dr3 = 0; + pcb->pcb_dr6 = 0; + pcb->pcb_dr7 = 0; + if (pcb == td->td_pcb) { + /* + * Clear the debug registers on the running + * CPU, otherwise they will end up affecting + * the next process we switch to. + */ + reset_dbregs(); + } + pcb->pcb_flags &= ~PCB_DBREGS; + } + + /* + * Initialize the math emulator (if any) for the current process. + * Actually, just clear the bit that says that the emulator has + * been initialized. Initialization is delayed until the process + * traps to the emulator (if it is done at all) mainly because + * emulators don't provide an entry point for initialization. + */ +#if JG + pcb->pcb_flags &= ~FP_SOFTFP; +#endif + + /* + * note: do not set CR0_TS here. npxinit() must do it after clearing + * gd_npxthread. Otherwise a preemptive interrupt thread may panic + * in npxdna(). + */ + crit_enter(); + load_cr0(rcr0() | CR0_MP); + + wrmsr(MSR_FSBASE, 0); + wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */ + pcb->pcb_fsbase = 0; + pcb->pcb_gsbase = 0; + +#if NNPX > 0 + /* Initialize the npx (if any) for the current process. */ + npxinit(__INITIAL_NPXCW__); +#endif + crit_exit(); + + pcb->pcb_ds = _udatasel; + pcb->pcb_es = _udatasel; + pcb->pcb_fs = _udatasel; + pcb->pcb_gs = _udatasel; +} + +void +cpu_setregs(void) +{ + register_t cr0; + + cr0 = rcr0(); + cr0 |= CR0_NE; /* Done by npxinit() */ + cr0 |= CR0_MP | CR0_TS; /* Done at every execve() too. */ + cr0 |= CR0_WP | CR0_AM; + load_cr0(cr0); + load_gs(_udatasel); +} + +static int +sysctl_machdep_adjkerntz(SYSCTL_HANDLER_ARGS) +{ + int error; + error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, + req); + if (!error && req->newptr) + resettodr(); + return (error); +} + +SYSCTL_PROC(_machdep, CPU_ADJKERNTZ, adjkerntz, CTLTYPE_INT|CTLFLAG_RW, + &adjkerntz, 0, sysctl_machdep_adjkerntz, "I", ""); + +#if JG +SYSCTL_INT(_machdep, CPU_DISRTCSET, disable_rtc_set, + CTLFLAG_RW, &disable_rtc_set, 0, ""); +#endif + +#if JG +SYSCTL_STRUCT(_machdep, CPU_BOOTINFO, bootinfo, + CTLFLAG_RD, &bootinfo, bootinfo, ""); +#endif + +SYSCTL_INT(_machdep, CPU_WALLCLOCK, wall_cmos_clock, + CTLFLAG_RW, &wall_cmos_clock, 0, ""); + +extern u_long bootdev; /* not a cdev_t - encoding is different */ +SYSCTL_ULONG(_machdep, OID_AUTO, guessed_bootdev, + CTLFLAG_RD, &bootdev, 0, "Boot device (not in cdev_t format)"); + +/* + * Initialize 386 and configure to run kernel + */ + +/* + * Initialize segments & interrupt table + */ + +int _default_ldt; +struct user_segment_descriptor gdt[NGDT * MAXCPU]; /* global descriptor table */ +static struct gate_descriptor idt0[NIDT]; +struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */ +#if JG +union descriptor ldt[NLDT]; /* local descriptor table */ +#endif + +/* table descriptors - used to load tables by cpu */ +struct region_descriptor r_gdt, r_idt; + +#if defined(I586_CPU) && !defined(NO_F00F_HACK) +extern int has_f00f_bug; +#endif + +static char dblfault_stack[PAGE_SIZE] __aligned(16); + +/* JG proc0paddr is a virtual address */ +void *proc0paddr; +/* JG alignment? */ +char proc0paddr_buff[LWKT_THREAD_STACK]; + + +/* software prototypes -- in more palatable form */ +struct soft_segment_descriptor gdt_segs[] = { +/* GNULL_SEL 0 Null Descriptor */ +{ 0x0, /* segment base address */ + 0x0, /* length */ + 0, /* segment type */ + 0, /* segment descriptor priority level */ + 0, /* segment descriptor present */ + 0, /* long */ + 0, /* default 32 vs 16 bit size */ + 0 /* limit granularity (byte/page units)*/ }, +/* GCODE_SEL 1 Code Descriptor for kernel */ +{ 0x0, /* segment base address */ + 0xfffff, /* length - all address space */ + SDT_MEMERA, /* segment type */ + SEL_KPL, /* segment descriptor priority level */ + 1, /* segment descriptor present */ + 1, /* long */ + 0, /* default 32 vs 16 bit size */ + 1 /* limit granularity (byte/page units)*/ }, +/* GDATA_SEL 2 Data Descriptor for kernel */ +{ 0x0, /* segment base address */ + 0xfffff, /* length - all address space */ + SDT_MEMRWA, /* segment type */ + SEL_KPL, /* segment descriptor priority level */ + 1, /* segment descriptor present */ + 1, /* long */ + 0, /* default 32 vs 16 bit size */ + 1 /* limit granularity (byte/page units)*/ }, +/* GUCODE32_SEL 3 32 bit Code Descriptor for user */ +{ 0x0, /* segment base address */ + 0xfffff, /* length - all address space */ + SDT_MEMERA, /* segment type */ + SEL_UPL, /* segment descriptor priority level */ + 1, /* segment descriptor present */ + 0, /* long */ + 1, /* default 32 vs 16 bit size */ + 1 /* limit granularity (byte/page units)*/ }, +/* GUDATA_SEL 4 32/64 bit Data Descriptor for user */ +{ 0x0, /* segment base address */ + 0xfffff, /* length - all address space */ + SDT_MEMRWA, /* segment type */ + SEL_UPL, /* segment descriptor priority level */ + 1, /* segment descriptor present */ + 0, /* long */ + 1, /* default 32 vs 16 bit size */ + 1 /* limit granularity (byte/page units)*/ }, +/* GUCODE_SEL 5 64 bit Code Descriptor for user */ +{ 0x0, /* segment base address */ + 0xfffff, /* length - all address space */ + SDT_MEMERA, /* segment type */ + SEL_UPL, /* segment descriptor priority level */ + 1, /* segment descriptor present */ + 1, /* long */ + 0, /* default 32 vs 16 bit size */ + 1 /* limit granularity (byte/page units)*/ }, +/* GPROC0_SEL 6 Proc 0 Tss Descriptor */ +{ + 0x0, /* segment base address */ + sizeof(struct amd64tss)-1,/* length - all address space */ + SDT_SYSTSS, /* segment type */ + SEL_KPL, /* segment descriptor priority level */ + 1, /* segment descriptor present */ + 0, /* long */ + 0, /* unused - default 32 vs 16 bit size */ + 0 /* limit granularity (byte/page units)*/ }, +/* Actually, the TSS is a system descriptor which is double size */ +{ 0x0, /* segment base address */ + 0x0, /* length */ + 0, /* segment type */ + 0, /* segment descriptor priority level */ + 0, /* segment descriptor present */ + 0, /* long */ + 0, /* default 32 vs 16 bit size */ + 0 /* limit granularity (byte/page units)*/ }, +/* GUGS32_SEL 8 32 bit GS Descriptor for user */ +{ 0x0, /* segment base address */ + 0xfffff, /* length - all address space */ + SDT_MEMRWA, /* segment type */ + SEL_UPL, /* segment descriptor priority level */ + 1, /* segment descriptor present */ + 0, /* long */ + 1, /* default 32 vs 16 bit size */ + 1 /* limit granularity (byte/page units)*/ }, +}; + +void +setidt(int idx, inthand_t *func, int typ, int dpl, int ist) +{ + struct gate_descriptor *ip; + + ip = idt + idx; + ip->gd_looffset = (uintptr_t)func; + ip->gd_selector = GSEL(GCODE_SEL, SEL_KPL); + ip->gd_ist = ist; + ip->gd_xx = 0; + ip->gd_type = typ; + ip->gd_dpl = dpl; + ip->gd_p = 1; + ip->gd_hioffset = ((uintptr_t)func)>>16 ; +} + +#define IDTVEC(name) __CONCAT(X,name) + +extern inthand_t + IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl), + IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm), + IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot), + IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align), + IDTVEC(xmm), IDTVEC(dblfault), + IDTVEC(fast_syscall), IDTVEC(fast_syscall32); + +#ifdef DEBUG_INTERRUPTS +extern inthand_t *Xrsvdary[256]; +#endif + +void +sdtossd(struct user_segment_descriptor *sd, struct soft_segment_descriptor *ssd) +{ + ssd->ssd_base = (sd->sd_hibase << 24) | sd->sd_lobase; + ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit; + ssd->ssd_type = sd->sd_type; + ssd->ssd_dpl = sd->sd_dpl; + ssd->ssd_p = sd->sd_p; + ssd->ssd_def32 = sd->sd_def32; + ssd->ssd_gran = sd->sd_gran; +} + +void +ssdtosd(struct soft_segment_descriptor *ssd, struct user_segment_descriptor *sd) +{ + + sd->sd_lobase = (ssd->ssd_base) & 0xffffff; + sd->sd_hibase = (ssd->ssd_base >> 24) & 0xff; + sd->sd_lolimit = (ssd->ssd_limit) & 0xffff; + sd->sd_hilimit = (ssd->ssd_limit >> 16) & 0xf; + sd->sd_type = ssd->ssd_type; + sd->sd_dpl = ssd->ssd_dpl; + sd->sd_p = ssd->ssd_p; + sd->sd_long = ssd->ssd_long; + sd->sd_def32 = ssd->ssd_def32; + sd->sd_gran = ssd->ssd_gran; +} + +void +ssdtosyssd(struct soft_segment_descriptor *ssd, + struct system_segment_descriptor *sd) +{ + + sd->sd_lobase = (ssd->ssd_base) & 0xffffff; + sd->sd_hibase = (ssd->ssd_base >> 24) & 0xfffffffffful; + sd->sd_lolimit = (ssd->ssd_limit) & 0xffff; + sd->sd_hilimit = (ssd->ssd_limit >> 16) & 0xf; + sd->sd_type = ssd->ssd_type; + sd->sd_dpl = ssd->ssd_dpl; + sd->sd_p = ssd->ssd_p; + sd->sd_gran = ssd->ssd_gran; +} + +u_int basemem; + +/* + * Populate the (physmap) array with base/bound pairs describing the + * available physical memory in the system, then test this memory and + * build the phys_avail array describing the actually-available memory. + * + * If we cannot accurately determine the physical memory map, then use + * value from the 0xE801 call, and failing that, the RTC. + * + * Total memory size may be set by the kernel environment variable + * hw.physmem or the compile-time define MAXMEM. + * + * XXX first should be vm_paddr_t. + */ +static void +getmemsize(caddr_t kmdp, u_int64_t first) +{ + int i, off, physmap_idx, pa_indx, da_indx; + vm_paddr_t pa, physmap[PHYSMAP_SIZE]; + u_long physmem_tunable; + pt_entry_t *pte; + struct bios_smap *smapbase, *smap, *smapend; + u_int32_t smapsize; + quad_t dcons_addr, dcons_size; + + bzero(physmap, sizeof(physmap)); + basemem = 0; + physmap_idx = 0; + + /* + * get memory map from INT 15:E820, kindly supplied by the loader. + * + * subr_module.c says: + * "Consumer may safely assume that size value precedes data." + * ie: an int32_t immediately precedes smap. + */ + smapbase = (struct bios_smap *)preload_search_info(kmdp, + MODINFO_METADATA | MODINFOMD_SMAP); + if (smapbase == NULL) + panic("No BIOS smap info from loader!"); + + smapsize = *((u_int32_t *)smapbase - 1); + smapend = (struct bios_smap *)((uintptr_t)smapbase + smapsize); + + for (smap = smapbase; smap < smapend; smap++) { + if (boothowto & RB_VERBOSE) + kprintf("SMAP type=%02x base=%016lx len=%016lx\n", + smap->type, smap->base, smap->length); + + if (smap->type != SMAP_TYPE_MEMORY) + continue; + + if (smap->length == 0) + continue; + + for (i = 0; i <= physmap_idx; i += 2) { + if (smap->base < physmap[i + 1]) { + if (boothowto & RB_VERBOSE) + kprintf( + "Overlapping or non-monotonic memory region, ignoring second region\n"); + continue; + } + } + + if (smap->base == physmap[physmap_idx + 1]) { + physmap[physmap_idx + 1] += smap->length; + continue; + } + + physmap_idx += 2; + if (physmap_idx == PHYSMAP_SIZE) { + kprintf( + "Too many segments in the physical address map, giving up\n"); + break; + } + physmap[physmap_idx] = smap->base; + physmap[physmap_idx + 1] = smap->base + smap->length; + } + + /* + * Find the 'base memory' segment for SMP + */ + basemem = 0; + for (i = 0; i <= physmap_idx; i += 2) { + if (physmap[i] == 0x00000000) { + basemem = physmap[i + 1] / 1024; + break; + } + } + if (basemem == 0) + panic("BIOS smap did not include a basemem segment!"); + +#ifdef SMP + /* make hole for AP bootstrap code */ + physmap[1] = mp_bootaddress(physmap[1] / 1024); +#endif + + /* + * Maxmem isn't the "maximum memory", it's one larger than the + * highest page of the physical address space. It should be + * called something like "Maxphyspage". We may adjust this + * based on ``hw.physmem'' and the results of the memory test. + */ + Maxmem = atop(physmap[physmap_idx + 1]); + +#ifdef MAXMEM + Maxmem = MAXMEM / 4; +#endif + + if (TUNABLE_ULONG_FETCH("hw.physmem", &physmem_tunable)) + Maxmem = atop(physmem_tunable); + + /* + * Don't allow MAXMEM or hw.physmem to extend the amount of memory + * in the system. + */ + if (Maxmem > atop(physmap[physmap_idx + 1])) + Maxmem = atop(physmap[physmap_idx + 1]); + + if (atop(physmap[physmap_idx + 1]) != Maxmem && + (boothowto & RB_VERBOSE)) + kprintf("Physical memory use set to %ldK\n", Maxmem * 4); + + /* call pmap initialization to make new kernel address space */ + pmap_bootstrap(&first, 0); + + /* + * Size up each available chunk of physical memory. + */ + physmap[0] = PAGE_SIZE; /* mask off page 0 */ + pa_indx = 0; + da_indx = 1; + phys_avail[pa_indx++] = physmap[0]; + phys_avail[pa_indx] = physmap[0]; + dump_avail[da_indx] = physmap[0]; + pte = CMAP1; + + /* + * Get dcons buffer address + */ + if (kgetenv_quad("dcons.addr", &dcons_addr) == 0 || + kgetenv_quad("dcons.size", &dcons_size) == 0) + dcons_addr = 0; + + /* + * physmap is in bytes, so when converting to page boundaries, + * round up the start address and round down the end address. + */ + for (i = 0; i <= physmap_idx; i += 2) { + vm_paddr_t end; + + end = ptoa((vm_paddr_t)Maxmem); + if (physmap[i + 1] < end) + end = trunc_page(physmap[i + 1]); + for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) { + int tmp, page_bad, full; + int *ptr = (int *)CADDR1; + + full = FALSE; + /* + * block out kernel memory as not available. + */ + if (pa >= 0x100000 && pa < first) + goto do_dump_avail; + + /* + * block out dcons buffer + */ + if (dcons_addr > 0 + && pa >= trunc_page(dcons_addr) + && pa < dcons_addr + dcons_size) + goto do_dump_avail; + + page_bad = FALSE; + + /* + * map page into kernel: valid, read/write,non-cacheable + */ + *pte = pa | PG_V | PG_RW | PG_N; + cpu_invltlb(); + + tmp = *(int *)ptr; + /* + * Test for alternating 1's and 0's + */ + *(volatile int *)ptr = 0xaaaaaaaa; + if (*(volatile int *)ptr != 0xaaaaaaaa) + page_bad = TRUE; + /* + * Test for alternating 0's and 1's + */ + *(volatile int *)ptr = 0x55555555; + if (*(volatile int *)ptr != 0x55555555) + page_bad = TRUE; + /* + * Test for all 1's + */ + *(volatile int *)ptr = 0xffffffff; + if (*(volatile int *)ptr != 0xffffffff) + page_bad = TRUE; + /* + * Test for all 0's + */ + *(volatile int *)ptr = 0x0; + if (*(volatile int *)ptr != 0x0) + page_bad = TRUE; + /* + * Restore original value. + */ + *(int *)ptr = tmp; + + /* + * Adjust array of valid/good pages. + */ + if (page_bad == TRUE) + continue; + /* + * If this good page is a continuation of the + * previous set of good pages, then just increase + * the end pointer. Otherwise start a new chunk. + * Note that "end" points one higher than end, + * making the range >= start and < end. + * If we're also doing a speculative memory + * test and we at or past the end, bump up Maxmem + * so that we keep going. The first bad page + * will terminate the loop. + */ + if (phys_avail[pa_indx] == pa) { + phys_avail[pa_indx] += PAGE_SIZE; + } else { + pa_indx++; + if (pa_indx == PHYS_AVAIL_ARRAY_END) { + kprintf( + "Too many holes in the physical address space, giving up\n"); + pa_indx--; + full = TRUE; + goto do_dump_avail; + } + phys_avail[pa_indx++] = pa; /* start */ + phys_avail[pa_indx] = pa + PAGE_SIZE; /* end */ + } + physmem++; +do_dump_avail: + if (dump_avail[da_indx] == pa) { + dump_avail[da_indx] += PAGE_SIZE; + } else { + da_indx++; + if (da_indx == DUMP_AVAIL_ARRAY_END) { + da_indx--; + goto do_next; + } + dump_avail[da_indx++] = pa; /* start */ + dump_avail[da_indx] = pa + PAGE_SIZE; /* end */ + } +do_next: + if (full) + break; + } + } + *pte = 0; + cpu_invltlb(); + + /* + * XXX + * The last chunk must contain at least one page plus the message + * buffer to avoid complicating other code (message buffer address + * calculation, etc.). + */ + while (phys_avail[pa_indx - 1] + PAGE_SIZE + + round_page(MSGBUF_SIZE) >= phys_avail[pa_indx]) { + physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]); + phys_avail[pa_indx--] = 0; + phys_avail[pa_indx--] = 0; + } + + Maxmem = atop(phys_avail[pa_indx]); + + /* Trim off space for the message buffer. */ + phys_avail[pa_indx] -= round_page(MSGBUF_SIZE); + + /* Map the message buffer. */ + for (off = 0; off < round_page(MSGBUF_SIZE); off += PAGE_SIZE) + pmap_kenter((vm_offset_t)msgbufp + off, phys_avail[pa_indx] + + off); +} + +/* + * IDT VECTORS: + * 0 Divide by zero + * 1 Debug + * 2 NMI + * 3 BreakPoint + * 4 OverFlow + * 5 Bound-Range + * 6 Invalid OpCode + * 7 Device Not Available (x87) + * 8 Double-Fault + * 9 Coprocessor Segment overrun (unsupported, reserved) + * 10 Invalid-TSS + * 11 Segment not present + * 12 Stack + * 13 General Protection + * 14 Page Fault + * 15 Reserved + * 16 x87 FP Exception pending + * 17 Alignment Check + * 18 Machine Check + * 19 SIMD floating point + * 20-31 reserved + * 32-255 INTn/external sources + */ +u_int64_t +hammer_time(u_int64_t modulep, u_int64_t physfree) +{ + caddr_t kmdp; + int gsel_tss, metadata_missing, off, x; + struct mdglobaldata *gd; + u_int64_t msr; + char *env; + + /* + * This must be done before the first references + * to CPU_prvspace[0] are made. + */ + init_paging(&physfree); + + /* + * Prevent lowering of the ipl if we call tsleep() early. + */ + gd = &CPU_prvspace[0].mdglobaldata; + bzero(gd, sizeof(*gd)); + + /* + * Note: on both UP and SMP curthread must be set non-NULL + * early in the boot sequence because the system assumes + * that 'curthread' is never NULL. + */ + + gd->mi.gd_curthread = &thread0; + thread0.td_gd = &gd->mi; + + atdevbase = ISA_HOLE_START + PTOV_OFFSET; + +#if JG + metadata_missing = 0; + if (bootinfo.bi_modulep) { + preload_metadata = (caddr_t)bootinfo.bi_modulep + KERNBASE; + preload_bootstrap_relocate(KERNBASE); + } else { + metadata_missing = 1; + } + if (bootinfo.bi_envp) + kern_envp = (caddr_t)bootinfo.bi_envp + KERNBASE; +#endif + + preload_metadata = (caddr_t)(uintptr_t)(modulep + PTOV_OFFSET); + preload_bootstrap_relocate(PTOV_OFFSET); + kmdp = preload_search_by_type("elf kernel"); + if (kmdp == NULL) + kmdp = preload_search_by_type("elf64 kernel"); + boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int); + kern_envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *) + PTOV_OFFSET; +#ifdef DDB + ksym_start = MD_FETCH(kmdp, MODINFOMD_SSYM, uintptr_t); + ksym_end = MD_FETCH(kmdp, MODINFOMD_ESYM, uintptr_t); +#endif + + /* + * start with one cpu. Note: with one cpu, ncpus2_shift, ncpus2_mask, + * and ncpus_fit_mask remain 0. + */ + ncpus = 1; + ncpus2 = 1; + ncpus_fit = 1; + /* Init basic tunables, hz etc */ + init_param1(); + + /* + * make gdt memory segments + */ + gdt_segs[GPROC0_SEL].ssd_base = + (uintptr_t) &CPU_prvspace[0].mdglobaldata.gd_common_tss; + + gd->mi.gd_prvspace = &CPU_prvspace[0]; + + for (x = 0; x < NGDT; x++) { + if (x != GPROC0_SEL && x != (GPROC0_SEL + 1)) + ssdtosd(&gdt_segs[x], &gdt[x]); + } + ssdtosyssd(&gdt_segs[GPROC0_SEL], + (struct system_segment_descriptor *)&gdt[GPROC0_SEL]); + r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; + r_gdt.rd_base = (long) gdt; + lgdt(&r_gdt); + + wrmsr(MSR_FSBASE, 0); /* User value */ + wrmsr(MSR_GSBASE, (u_int64_t)&gd->mi); + wrmsr(MSR_KGSBASE, 0); /* User value while in the kernel */ + + mi_gdinit(&gd->mi, 0); + cpu_gdinit(gd, 0); + proc0paddr = proc0paddr_buff; + mi_proc0init(&gd->mi, proc0paddr); + safepri = TDPRI_MAX; + + /* spinlocks and the BGL */ + init_locks(); + + /* exceptions */ + for (x = 0; x < NIDT; x++) + setidt(x, &IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0); + setidt(IDT_DE, &IDTVEC(div), SDT_SYSIGT, SEL_KPL, 0); + setidt(IDT_DB, &IDTVEC(dbg), SDT_SYSIGT, SEL_KPL, 0); + setidt(IDT_NMI, &IDTVEC(nmi), SDT_SYSIGT, SEL_KPL, 1); + setidt(IDT_BP, &IDTVEC(bpt), SDT_SYSIGT, SEL_UPL, 0); + setidt(IDT_OF, &IDTVEC(ofl), SDT_SYSIGT, SEL_KPL, 0); + setidt(IDT_BR, &IDTVEC(bnd), SDT_SYSIGT, SEL_KPL, 0); + setidt(IDT_UD, &IDTVEC(ill), SDT_SYSIGT, SEL_KPL, 0); + setidt(IDT_NM, &IDTVEC(dna), SDT_SYSIGT, SEL_KPL, 0); + setidt(IDT_DF, &IDTVEC(dblfault), SDT_SYSIGT, SEL_KPL, 1); + setidt(IDT_FPUGP, &IDTVEC(fpusegm), SDT_SYSIGT, SEL_KPL, 0); + setidt(IDT_TS, &IDTVEC(tss), SDT_SYSIGT, SEL_KPL, 0); + setidt(IDT_NP, &IDTVEC(missing), SDT_SYSIGT, SEL_KPL, 0); + setidt(IDT_SS, &IDTVEC(stk), SDT_SYSIGT, SEL_KPL, 0); + setidt(IDT_GP, &IDTVEC(prot), SDT_SYSIGT, SEL_KPL, 0); + setidt(IDT_PF, &IDTVEC(page), SDT_SYSIGT, SEL_KPL, 0); + setidt(IDT_MF, &IDTVEC(fpu), SDT_SYSIGT, SEL_KPL, 0); + setidt(IDT_AC, &IDTVEC(align), SDT_SYSIGT, SEL_KPL, 0); + setidt(IDT_MC, &IDTVEC(mchk), SDT_SYSIGT, SEL_KPL, 0); + setidt(IDT_XF, &IDTVEC(xmm), SDT_SYSIGT, SEL_KPL, 0); + + r_idt.rd_limit = sizeof(idt0) - 1; + r_idt.rd_base = (long) idt; + lidt(&r_idt); + + /* + * Initialize the console before we print anything out. + */ + cninit(); + +#if JG + if (metadata_missing) + kprintf("WARNING: loader(8) metadata is missing!\n"); +#endif + +#if NISA >0 + isa_defaultirq(); +#endif + rand_initialize(); + +#ifdef DDB + kdb_init(); + if (boothowto & RB_KDB) + Debugger("Boot flags requested debugger"); +#endif + +#if JG + finishidentcpu(); /* Final stage of CPU initialization */ + setidt(6, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); + setidt(13, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); +#endif + identify_cpu(); /* Final stage of CPU initialization */ + initializecpu(); /* Initialize CPU registers */ + + /* make an initial tss so cpu can get interrupt stack on syscall! */ + gd->gd_common_tss.tss_rsp0 = thread0.td_kstack + \ + KSTACK_PAGES * PAGE_SIZE - sizeof(struct pcb); + /* Ensure the stack is aligned to 16 bytes */ + gd->gd_common_tss.tss_rsp0 &= ~0xFul; + gd->gd_rsp0 = gd->gd_common_tss.tss_rsp0; + + /* doublefault stack space, runs on ist1 */ + gd->gd_common_tss.tss_ist1 = (long)&dblfault_stack[sizeof(dblfault_stack)]; + + /* Set the IO permission bitmap (empty due to tss seg limit) */ + gd->gd_common_tss.tss_iobase = sizeof(struct amd64tss); + + gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); + gd->gd_tss_gdt = &gdt[GPROC0_SEL]; + gd->gd_common_tssd = *gd->gd_tss_gdt; + ltr(gsel_tss); + + /* Set up the fast syscall stuff */ + msr = rdmsr(MSR_EFER) | EFER_SCE; + wrmsr(MSR_EFER, msr); + wrmsr(MSR_LSTAR, (u_int64_t)IDTVEC(fast_syscall)); + wrmsr(MSR_CSTAR, (u_int64_t)IDTVEC(fast_syscall32)); + msr = ((u_int64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) | + ((u_int64_t)GSEL(GUCODE32_SEL, SEL_UPL) << 48); + wrmsr(MSR_STAR, msr); + wrmsr(MSR_SF_MASK, PSL_NT|PSL_T|PSL_I|PSL_C|PSL_D); + + getmemsize(kmdp, physfree); + init_param2(physmem); + + /* now running on new page tables, configured,and u/iom is accessible */ + + /* Map the message buffer. */ +#if JG + for (off = 0; off < round_page(MSGBUF_SIZE); off += PAGE_SIZE) + pmap_kenter((vm_offset_t)msgbufp + off, avail_end + off); +#endif + + msgbufinit(msgbufp, MSGBUF_SIZE); + + + /* transfer to user mode */ + + _ucodesel = GSEL(GUCODE_SEL, SEL_UPL); + _udatasel = GSEL(GUDATA_SEL, SEL_UPL); + _ucode32sel = GSEL(GUCODE32_SEL, SEL_UPL); + + load_ds(_udatasel); + load_es(_udatasel); + load_fs(_udatasel); + + /* setup proc 0's pcb */ + thread0.td_pcb->pcb_flags = 0; +#if JG + thread0.td_pcb->pcb_cr3 = KPML4phys; +#else + thread0.td_pcb->pcb_cr3 = IdlePTD; +#endif + thread0.td_pcb->pcb_ext = 0; + lwp0.lwp_md.md_regs = &proc0_tf; + env = kgetenv("kernelname"); + if (env != NULL) + strlcpy(kernelname, env, sizeof(kernelname)); + + /* Location of kernel stack for locore */ + return ((u_int64_t)thread0.td_pcb); +} + +/* + * Initialize machine-dependant portions of the global data structure. + * Note that the global data area and cpu0's idlestack in the private + * data space were allocated in locore. + * + * Note: the idlethread's cpl is 0 + * + * WARNING! Called from early boot, 'mycpu' may not work yet. + */ +void +cpu_gdinit(struct mdglobaldata *gd, int cpu) +{ + if (cpu) + gd->mi.gd_curthread = &gd->mi.gd_idlethread; + + lwkt_init_thread(&gd->mi.gd_idlethread, + gd->mi.gd_prvspace->idlestack, + sizeof(gd->mi.gd_prvspace->idlestack), + TDF_MPSAFE, &gd->mi); + lwkt_set_comm(&gd->mi.gd_idlethread, "idle_%d", cpu); + gd->mi.gd_idlethread.td_switch = cpu_lwkt_switch; + gd->mi.gd_idlethread.td_sp -= sizeof(void *); + *(void **)gd->mi.gd_idlethread.td_sp = cpu_idle_restore; +} + +int +is_globaldata_space(vm_offset_t saddr, vm_offset_t eaddr) +{ + if (saddr >= (vm_offset_t)&CPU_prvspace[0] && + eaddr <= (vm_offset_t)&CPU_prvspace[MAXCPU]) { + return (TRUE); + } + return (FALSE); +} + +struct globaldata * +globaldata_find(int cpu) +{ + KKASSERT(cpu >= 0 && cpu < ncpus); + return(&CPU_prvspace[cpu].mdglobaldata.mi); +} + +#if defined(I586_CPU) && !defined(NO_F00F_HACK) +static void f00f_hack(void *unused); +SYSINIT(f00f_hack, SI_BOOT2_BIOS, SI_ORDER_ANY, f00f_hack, NULL); + +static void +f00f_hack(void *unused) +{ + struct gate_descriptor *new_idt; + vm_offset_t tmp; + + if (!has_f00f_bug) + return; + + kprintf("Intel Pentium detected, installing workaround for F00F bug\n"); + + r_idt.rd_limit = sizeof(idt0) - 1; + + tmp = kmem_alloc(&kernel_map, PAGE_SIZE * 2); + if (tmp == 0) + panic("kmem_alloc returned 0"); + if (((unsigned int)tmp & (PAGE_SIZE-1)) != 0) + panic("kmem_alloc returned non-page-aligned memory"); + /* Put the first seven entries in the lower page */ + new_idt = (struct gate_descriptor*)(tmp + PAGE_SIZE - (7*8)); + bcopy(idt, new_idt, sizeof(idt0)); + r_idt.rd_base = (int)new_idt; + lidt(&r_idt); + idt = new_idt; + if (vm_map_protect(&kernel_map, tmp, tmp + PAGE_SIZE, + VM_PROT_READ, FALSE) != KERN_SUCCESS) + panic("vm_map_protect failed"); + return; +} +#endif /* defined(I586_CPU) && !NO_F00F_HACK */ + +int +ptrace_set_pc(struct lwp *lp, unsigned long addr) +{ +#if JG + lp->lwp_md.md_regs->tf_eip = addr; +#endif + return (0); +} + +int +ptrace_single_step(struct lwp *lp) +{ +#if JG + lp->lwp_md.md_regs->tf_eflags |= PSL_T; +#endif + return (0); +} + +int +fill_regs(struct lwp *lp, struct reg *regs) +{ + struct pcb *pcb; + struct trapframe *tp; + + tp = lp->lwp_md.md_regs; +#if JG + regs->r_gs = tp->tf_gs; + regs->r_fs = tp->tf_fs; + regs->r_es = tp->tf_es; + regs->r_ds = tp->tf_ds; + regs->r_edi = tp->tf_edi; + regs->r_esi = tp->tf_esi; + regs->r_ebp = tp->tf_ebp; + regs->r_ebx = tp->tf_ebx; + regs->r_edx = tp->tf_edx; + regs->r_ecx = tp->tf_ecx; + regs->r_eax = tp->tf_eax; + regs->r_eip = tp->tf_eip; +#endif + regs->r_cs = tp->tf_cs; +#if JG + regs->r_eflags = tp->tf_eflags; + regs->r_esp = tp->tf_esp; +#endif + regs->r_ss = tp->tf_ss; + pcb = lp->lwp_thread->td_pcb; + return (0); +} + +int +set_regs(struct lwp *lp, struct reg *regs) +{ + struct pcb *pcb; + struct trapframe *tp; + + tp = lp->lwp_md.md_regs; +#if JG + if (!EFL_SECURE(regs->r_eflags, tp->tf_eflags) || + !CS_SECURE(regs->r_cs)) + return (EINVAL); + tp->tf_gs = regs->r_gs; + tp->tf_fs = regs->r_fs; + tp->tf_es = regs->r_es; + tp->tf_ds = regs->r_ds; + tp->tf_edi = regs->r_edi; + tp->tf_esi = regs->r_esi; + tp->tf_ebp = regs->r_ebp; + tp->tf_ebx = regs->r_ebx; + tp->tf_edx = regs->r_edx; + tp->tf_ecx = regs->r_ecx; + tp->tf_eax = regs->r_eax; + tp->tf_eip = regs->r_eip; +#endif + tp->tf_cs = regs->r_cs; +#if JG + tp->tf_eflags = regs->r_eflags; + tp->tf_esp = regs->r_esp; +#endif + tp->tf_ss = regs->r_ss; + pcb = lp->lwp_thread->td_pcb; + return (0); +} + +#ifndef CPU_DISABLE_SSE +static void +fill_fpregs_xmm(struct savexmm *sv_xmm, struct save87 *sv_87) +{ + struct env87 *penv_87 = &sv_87->sv_env; + struct envxmm *penv_xmm = &sv_xmm->sv_env; + int i; + + /* FPU control/status */ + penv_87->en_cw = penv_xmm->en_cw; + penv_87->en_sw = penv_xmm->en_sw; + penv_87->en_tw = penv_xmm->en_tw; + penv_87->en_fip = penv_xmm->en_fip; + penv_87->en_fcs = penv_xmm->en_fcs; + penv_87->en_opcode = penv_xmm->en_opcode; + penv_87->en_foo = penv_xmm->en_foo; + penv_87->en_fos = penv_xmm->en_fos; + + /* FPU registers */ + for (i = 0; i < 8; ++i) + sv_87->sv_ac[i] = sv_xmm->sv_fp[i].fp_acc; + + sv_87->sv_ex_sw = sv_xmm->sv_ex_sw; +} + +static void +set_fpregs_xmm(struct save87 *sv_87, struct savexmm *sv_xmm) +{ + struct env87 *penv_87 = &sv_87->sv_env; + struct envxmm *penv_xmm = &sv_xmm->sv_env; + int i; + + /* FPU control/status */ + penv_xmm->en_cw = penv_87->en_cw; + penv_xmm->en_sw = penv_87->en_sw; + penv_xmm->en_tw = penv_87->en_tw; + penv_xmm->en_fip = penv_87->en_fip; + penv_xmm->en_fcs = penv_87->en_fcs; + penv_xmm->en_opcode = penv_87->en_opcode; + penv_xmm->en_foo = penv_87->en_foo; + penv_xmm->en_fos = penv_87->en_fos; + + /* FPU registers */ + for (i = 0; i < 8; ++i) + sv_xmm->sv_fp[i].fp_acc = sv_87->sv_ac[i]; + + sv_xmm->sv_ex_sw = sv_87->sv_ex_sw; +} +#endif /* CPU_DISABLE_SSE */ + +int +fill_fpregs(struct lwp *lp, struct fpreg *fpregs) +{ +#ifndef CPU_DISABLE_SSE + if (cpu_fxsr) { + fill_fpregs_xmm(&lp->lwp_thread->td_pcb->pcb_save.sv_xmm, + (struct save87 *)fpregs); + return (0); + } +#endif /* CPU_DISABLE_SSE */ + bcopy(&lp->lwp_thread->td_pcb->pcb_save.sv_87, fpregs, sizeof *fpregs); + return (0); +} + +int +set_fpregs(struct lwp *lp, struct fpreg *fpregs) +{ +#ifndef CPU_DISABLE_SSE + if (cpu_fxsr) { + set_fpregs_xmm((struct save87 *)fpregs, + &lp->lwp_thread->td_pcb->pcb_save.sv_xmm); + return (0); + } +#endif /* CPU_DISABLE_SSE */ + bcopy(fpregs, &lp->lwp_thread->td_pcb->pcb_save.sv_87, sizeof *fpregs); + return (0); +} + +int +fill_dbregs(struct lwp *lp, struct dbreg *dbregs) +{ + if (lp == NULL) { +#if JG + dbregs->dr0 = rdr0(); + dbregs->dr1 = rdr1(); + dbregs->dr2 = rdr2(); + dbregs->dr3 = rdr3(); + dbregs->dr4 = rdr4(); + dbregs->dr5 = rdr5(); + dbregs->dr6 = rdr6(); + dbregs->dr7 = rdr7(); +#endif + } else { + struct pcb *pcb; + + pcb = lp->lwp_thread->td_pcb; +#if JG + dbregs->dr0 = pcb->pcb_dr0; + dbregs->dr1 = pcb->pcb_dr1; + dbregs->dr2 = pcb->pcb_dr2; + dbregs->dr3 = pcb->pcb_dr3; + dbregs->dr4 = 0; + dbregs->dr5 = 0; + dbregs->dr6 = pcb->pcb_dr6; + dbregs->dr7 = pcb->pcb_dr7; +#endif + } + return (0); +} + +int +set_dbregs(struct lwp *lp, struct dbreg *dbregs) +{ + if (lp == NULL) { +#if JG + load_dr0(dbregs->dr0); + load_dr1(dbregs->dr1); + load_dr2(dbregs->dr2); + load_dr3(dbregs->dr3); + load_dr4(dbregs->dr4); + load_dr5(dbregs->dr5); + load_dr6(dbregs->dr6); + load_dr7(dbregs->dr7); +#endif + } else { + struct pcb *pcb; + struct ucred *ucred; + int i; + uint32_t mask1, mask2; + + /* + * Don't let an illegal value for dr7 get set. Specifically, + * check for undefined settings. Setting these bit patterns + * result in undefined behaviour and can lead to an unexpected + * TRCTRAP. + */ + for (i = 0, mask1 = 0x3<<16, mask2 = 0x2<<16; i < 8; + i++, mask1 <<= 2, mask2 <<= 2) +#if JG + if ((dbregs->dr7 & mask1) == mask2) + return (EINVAL); +#endif + + pcb = lp->lwp_thread->td_pcb; + ucred = lp->lwp_proc->p_ucred; + + /* + * Don't let a process set a breakpoint that is not within the + * process's address space. If a process could do this, it + * could halt the system by setting a breakpoint in the kernel + * (if ddb was enabled). Thus, we need to check to make sure + * that no breakpoints are being enabled for addresses outside + * process's address space, unless, perhaps, we were called by + * uid 0. + * + * XXX - what about when the watched area of the user's + * address space is written into from within the kernel + * ... wouldn't that still cause a breakpoint to be generated + * from within kernel mode? + */ + + if (suser_cred(ucred, 0) != 0) { +#if JG + if (dbregs->dr7 & 0x3) { + /* dr0 is enabled */ + if (dbregs->dr0 >= VM_MAX_USER_ADDRESS) + return (EINVAL); + } + + if (dbregs->dr7 & (0x3<<2)) { + /* dr1 is enabled */ + if (dbregs->dr1 >= VM_MAX_USER_ADDRESS) + return (EINVAL); + } + + if (dbregs->dr7 & (0x3<<4)) { + /* dr2 is enabled */ + if (dbregs->dr2 >= VM_MAX_USER_ADDRESS) + return (EINVAL); + } + + if (dbregs->dr7 & (0x3<<6)) { + /* dr3 is enabled */ + if (dbregs->dr3 >= VM_MAX_USER_ADDRESS) + return (EINVAL); + } +#endif + } + +#if JG + pcb->pcb_dr0 = dbregs->dr0; + pcb->pcb_dr1 = dbregs->dr1; + pcb->pcb_dr2 = dbregs->dr2; + pcb->pcb_dr3 = dbregs->dr3; + pcb->pcb_dr6 = dbregs->dr6; + pcb->pcb_dr7 = dbregs->dr7; +#endif + + pcb->pcb_flags |= PCB_DBREGS; + } + + return (0); +} + +/* + * Return > 0 if a hardware breakpoint has been hit, and the + * breakpoint was in user space. Return 0, otherwise. + */ +int +user_dbreg_trap(void) +{ + u_int32_t dr7, dr6; /* debug registers dr6 and dr7 */ + u_int32_t bp; /* breakpoint bits extracted from dr6 */ + int nbp; /* number of breakpoints that triggered */ + caddr_t addr[4]; /* breakpoint addresses */ + int i; + + dr7 = rdr7(); + if ((dr7 & 0x000000ff) == 0) { + /* + * all GE and LE bits in the dr7 register are zero, + * thus the trap couldn't have been caused by the + * hardware debug registers + */ + return 0; + } + + nbp = 0; + dr6 = rdr6(); + bp = dr6 & 0x0000000f; + + if (!bp) { + /* + * None of the breakpoint bits are set meaning this + * trap was not caused by any of the debug registers + */ + return 0; + } + + /* + * at least one of the breakpoints were hit, check to see + * which ones and if any of them are user space addresses + */ + + if (bp & 0x01) { + addr[nbp++] = (caddr_t)rdr0(); + } + if (bp & 0x02) { + addr[nbp++] = (caddr_t)rdr1(); + } + if (bp & 0x04) { + addr[nbp++] = (caddr_t)rdr2(); + } + if (bp & 0x08) { + addr[nbp++] = (caddr_t)rdr3(); + } + + for (i=0; i, and de-inlined. + */ + +#undef inb +#undef outb + +/* silence compiler warnings */ +u_char inb(u_int); +void outb(u_int, u_char); + +u_char +inb(u_int port) +{ + u_char data; + /* + * We use %%dx and not %1 here because i/o is done at %dx and not at + * %edx, while gcc generates inferior code (movw instead of movl) + * if we tell it to load (u_short) port. + */ + __asm __volatile("inb %%dx,%0" : "=a" (data) : "d" (port)); + return (data); +} + +void +outb(u_int port, u_char data) +{ + u_char al; + /* + * Use an unnecessary assignment to help gcc's register allocator. + * This make a large difference for gcc-1.40 and a tiny difference + * for gcc-2.6.0. For gcc-1.40, al had to be ``asm("ax")'' for + * best results. gcc-2.6.0 can't handle this. + */ + al = data; + __asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port)); +} + +#endif /* DDB */ + + + +#include "opt_cpu.h" + + +/* + * initialize all the SMP locks + */ + +/* critical region when masking or unmasking interupts */ +struct spinlock_deprecated imen_spinlock; + +/* Make FAST_INTR() routines sequential */ +struct spinlock_deprecated fast_intr_spinlock; + +/* critical region for old style disable_intr/enable_intr */ +struct spinlock_deprecated mpintr_spinlock; + +/* critical region around INTR() routines */ +struct spinlock_deprecated intr_spinlock; + +/* lock region used by kernel profiling */ +struct spinlock_deprecated mcount_spinlock; + +/* locks com (tty) data/hardware accesses: a FASTINTR() */ +struct spinlock_deprecated com_spinlock; + +/* locks kernel kprintfs */ +struct spinlock_deprecated cons_spinlock; + +/* lock regions around the clock hardware */ +struct spinlock_deprecated clock_spinlock; + +/* lock around the MP rendezvous */ +struct spinlock_deprecated smp_rv_spinlock; + +static void +init_locks(void) +{ + /* + * mp_lock = 0; BSP already owns the MP lock + */ + /* + * Get the initial mp_lock with a count of 1 for the BSP. + * This uses a LOGICAL cpu ID, ie BSP == 0. + */ +#ifdef SMP + cpu_get_initial_mplock(); +#endif + /* DEPRECATED */ + spin_lock_init(&mcount_spinlock); + spin_lock_init(&fast_intr_spinlock); + spin_lock_init(&intr_spinlock); + spin_lock_init(&mpintr_spinlock); + spin_lock_init(&imen_spinlock); + spin_lock_init(&smp_rv_spinlock); + spin_lock_init(&com_spinlock); + spin_lock_init(&clock_spinlock); + spin_lock_init(&cons_spinlock); + + /* our token pool needs to work early */ + lwkt_token_pool_init(); +} + diff --git a/sys/platform/pc64/amd64/machintr.c b/sys/platform/pc64/amd64/machintr.c deleted file mode 100644 index ab657e5334..0000000000 --- a/sys/platform/pc64/amd64/machintr.c +++ /dev/null @@ -1,143 +0,0 @@ -/* - * Copyright (c) 2006 The DragonFly Project. All rights reserved. - * - * This code is derived from software contributed to The DragonFly Project - * by Matthew Dillon - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * 3. Neither the name of The DragonFly Project nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific, prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $DragonFly: src/sys/platform/pc64/amd64/Attic/machintr.c,v 1.2 2007/09/24 03:24:45 yanyh Exp $ - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * Interrupt Subsystem ABI - */ - -static void dummy_intrdis(int); -static void dummy_intren(int); -static int dummy_vectorctl(int, int, int); -static int dummy_setvar(int, const void *); -static int dummy_getvar(int, void *); -static void dummy_finalize(void); -static void dummy_intrcleanup(void); - -struct machintr_abi MachIntrABI = { - MACHINTR_GENERIC, - .intrdis = dummy_intrdis, - .intren = dummy_intren, - .vectorctl = dummy_vectorctl, - .setvar = dummy_setvar, - .getvar = dummy_getvar, - .finalize = dummy_finalize, - .cleanup = dummy_intrcleanup -}; - -static void -dummy_intrdis(int intr) -{ -} - -static void -dummy_intren(int intr) -{ -} - -static int -dummy_vectorctl(int op, int intr, int flags) -{ - return (0); - /* return (EOPNOTSUPP); */ -} - -static int -dummy_setvar(int varid, const void *buf) -{ - return (ENOENT); -} - -static int -dummy_getvar(int varid, void *buf) -{ - return (ENOENT); -} - -static void -dummy_finalize(void) -{ -} - -static void -dummy_intrcleanup(void) -{ -} - -/* - * Process pending interrupts - */ -void -splz(void) -{ -} - -/* - * Allows an unprotected signal handler or mailbox to signal an interrupt - */ -void -signalintr(int intr) -{ -} - -void -cpu_disable_intr(void) -{ -} - -void -cpu_invlpg(void *addr) -{ -} - -void -cpu_invltlb(void) -{ -} - diff --git a/sys/platform/pc64/amd64/nexus.c b/sys/platform/pc64/amd64/nexus.c new file mode 100644 index 0000000000..9dba3d15d1 --- /dev/null +++ b/sys/platform/pc64/amd64/nexus.c @@ -0,0 +1,596 @@ +/* + * Copyright 1998 Massachusetts Institute of Technology + * Copyright (c) 2008 The DragonFly Project. + * + * Permission to use, copy, modify, and distribute this software and + * its documentation for any purpose and without fee is hereby + * granted, provided that both the above copyright notice and this + * permission notice appear in all copies, that both the above + * copyright notice and this permission notice appear in all + * supporting documentation, and that the name of M.I.T. not be used + * in advertising or publicity pertaining to distribution of the + * software without specific, written prior permission. M.I.T. makes + * no representations about the suitability of this software for any + * purpose. It is provided "as is" without express or implied + * warranty. + * + * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS + * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT + * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/sys/i386/i386/nexus.c,v 1.26.2.10 2003/02/22 13:16:45 imp Exp $ + * $DragonFly: src/sys/platform/pc64/amd64/nexus.c,v 1.1 2008/08/29 17:07:10 dillon Exp $ + */ + +/* + * This code implements a `root nexus' for Intel Architecture + * machines. The function of the root nexus is to serve as an + * attachment point for both processors and buses, and to manage + * resources which are common to all of them. In particular, + * this code implements the core resource managers for interrupt + * requests, DMA requests (which rightfully should be a part of the + * ISA code but it's easier to do it here for now), I/O port addresses, + * and I/O memory address space. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include + +#define I386_BUS_SPACE_IO 0 /* space is i/o space */ +#define I386_BUS_SPACE_MEM 1 /* space is mem space */ + +static MALLOC_DEFINE(M_NEXUSDEV, "nexusdev", "Nexus device"); +struct nexus_device { + struct resource_list nx_resources; + int nx_pcibus; +}; + +#define DEVTONX(dev) ((struct nexus_device *)device_get_ivars(dev)) + +static struct rman irq_rman, drq_rman, port_rman, mem_rman; + +static int nexus_probe(device_t); +static int nexus_attach(device_t); +static int nexus_print_all_resources(device_t dev); +static int nexus_print_child(device_t, device_t); +static device_t nexus_add_child(device_t bus, device_t parent, int order, + const char *name, int unit); +static struct resource *nexus_alloc_resource(device_t, device_t, int, int *, + u_long, u_long, u_long, u_int); +static int nexus_read_ivar(device_t, device_t, int, uintptr_t *); +static int nexus_write_ivar(device_t, device_t, int, uintptr_t); +static int nexus_activate_resource(device_t, device_t, int, int, + struct resource *); +static int nexus_deactivate_resource(device_t, device_t, int, int, + struct resource *); +static int nexus_release_resource(device_t, device_t, int, int, + struct resource *); +static int nexus_setup_intr(device_t, device_t, struct resource *, int flags, + void (*)(void *), void *, + void **, lwkt_serialize_t); +static int nexus_teardown_intr(device_t, device_t, struct resource *, + void *); +static int nexus_set_resource(device_t, device_t, int, int, u_long, u_long); +static int nexus_get_resource(device_t, device_t, int, int, u_long *, u_long *); +static void nexus_delete_resource(device_t, device_t, int, int); + +/* + * The device_identify method will cause nexus to automatically associate + * and attach to the root bus. + */ +static device_method_t nexus_methods[] = { + /* Device interface */ + DEVMETHOD(device_identify, bus_generic_identify), + DEVMETHOD(device_probe, nexus_probe), + DEVMETHOD(device_attach, nexus_attach), + DEVMETHOD(device_detach, bus_generic_detach), + DEVMETHOD(device_shutdown, bus_generic_shutdown), + DEVMETHOD(device_suspend, bus_generic_suspend), + DEVMETHOD(device_resume, bus_generic_resume), + + /* Bus interface */ + DEVMETHOD(bus_print_child, nexus_print_child), + DEVMETHOD(bus_add_child, nexus_add_child), + DEVMETHOD(bus_read_ivar, nexus_read_ivar), + DEVMETHOD(bus_write_ivar, nexus_write_ivar), + DEVMETHOD(bus_alloc_resource, nexus_alloc_resource), + DEVMETHOD(bus_release_resource, nexus_release_resource), + DEVMETHOD(bus_activate_resource, nexus_activate_resource), + DEVMETHOD(bus_deactivate_resource, nexus_deactivate_resource), + DEVMETHOD(bus_setup_intr, nexus_setup_intr), + DEVMETHOD(bus_teardown_intr, nexus_teardown_intr), + DEVMETHOD(bus_set_resource, nexus_set_resource), + DEVMETHOD(bus_get_resource, nexus_get_resource), + DEVMETHOD(bus_delete_resource, nexus_delete_resource), + + { 0, 0 } +}; + +static driver_t nexus_driver = { + "nexus", + nexus_methods, + 1, /* no softc */ +}; +static devclass_t nexus_devclass; + +DRIVER_MODULE(nexus, root, nexus_driver, nexus_devclass, 0, 0); + +static int +nexus_probe(device_t dev) +{ + device_quiet(dev); /* suppress attach message for neatness */ + + /* + * IRQ's are on the mainboard on old systems, but on the ISA part + * of PCI->ISA bridges. There would be multiple sets of IRQs on + * multi-ISA-bus systems. PCI interrupts are routed to the ISA + * component, so in a way, PCI can be a partial child of an ISA bus(!). + * APIC interrupts are global though. + * In the non-APIC case, disallow the use of IRQ 2. + */ + irq_rman.rm_start = 0; + irq_rman.rm_type = RMAN_ARRAY; + irq_rman.rm_descr = "Interrupt request lines"; +#ifdef APIC_IO + irq_rman.rm_end = APIC_INTMAPSIZE - 1; + if (rman_init(&irq_rman) + || rman_manage_region(&irq_rman, + irq_rman.rm_start, irq_rman.rm_end)) + panic("nexus_probe irq_rman"); +#else + irq_rman.rm_end = 15; + if (rman_init(&irq_rman) + || rman_manage_region(&irq_rman, irq_rman.rm_start, 1) + || rman_manage_region(&irq_rman, 3, irq_rman.rm_end)) + panic("nexus_probe irq_rman"); +#endif + + /* + * ISA DMA on PCI systems is implemented in the ISA part of each + * PCI->ISA bridge and the channels can be duplicated if there are + * multiple bridges. (eg: laptops with docking stations) + */ + drq_rman.rm_start = 0; + drq_rman.rm_end = 7; + drq_rman.rm_type = RMAN_ARRAY; + drq_rman.rm_descr = "DMA request lines"; + /* XXX drq 0 not available on some machines */ + if (rman_init(&drq_rman) + || rman_manage_region(&drq_rman, + drq_rman.rm_start, drq_rman.rm_end)) + panic("nexus_probe drq_rman"); + + /* + * However, IO ports and Memory truely are global at this level, + * as are APIC interrupts (however many IO APICS there turn out + * to be on large systems..) + */ + port_rman.rm_start = 0; + port_rman.rm_end = 0xffff; + port_rman.rm_type = RMAN_ARRAY; + port_rman.rm_descr = "I/O ports"; + if (rman_init(&port_rman) + || rman_manage_region(&port_rman, 0, 0xffff)) + panic("nexus_probe port_rman"); + + mem_rman.rm_start = 0; + mem_rman.rm_end = ~0u; + mem_rman.rm_type = RMAN_ARRAY; + mem_rman.rm_descr = "I/O memory addresses"; + if (rman_init(&mem_rman) + || rman_manage_region(&mem_rman, 0, ~0)) + panic("nexus_probe mem_rman"); + + return bus_generic_probe(dev); +} + +static int +nexus_attach(device_t dev) +{ + device_t child; + + /* + * First, let our child driver's identify any child devices that + * they can find. Once that is done attach any devices that we + * found. + */ +#if 0 /* FUTURE */ + bus_generic_probe(dev); +#endif + bus_generic_attach(dev); + + /* + * And if we didn't see EISA or ISA on a pci bridge, create some + * connection points now so they show up "on motherboard". + */ + if (!devclass_get_device(devclass_find("eisa"), 0)) { + child = BUS_ADD_CHILD(dev, dev, 0, "eisa", 0); + if (child == NULL) + panic("nexus_attach eisa"); + device_probe_and_attach(child); + } + if (!devclass_get_device(devclass_find("isa"), 0)) { + child = BUS_ADD_CHILD(dev, dev, 0, "isa", 0); + if (child == NULL) + panic("nexus_attach isa"); + device_probe_and_attach(child); + } + + return 0; +} + +static int +nexus_print_all_resources(device_t dev) +{ + struct nexus_device *ndev = DEVTONX(dev); + struct resource_list *rl = &ndev->nx_resources; + int retval = 0; + + if (SLIST_FIRST(rl) || ndev->nx_pcibus != -1) + retval += kprintf(" at"); + + retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx"); + retval += resource_list_print_type(rl, "iomem", SYS_RES_MEMORY, "%#lx"); + retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld"); + + return retval; +} + +static int +nexus_print_child(device_t bus, device_t child) +{ + struct nexus_device *ndev = DEVTONX(child); + int retval = 0; + + retval += bus_print_child_header(bus, child); + retval += nexus_print_all_resources(child); + if (ndev->nx_pcibus != -1) + retval += kprintf(" pcibus %d", ndev->nx_pcibus); + retval += kprintf(" on motherboard\n"); + + return (retval); +} + +static device_t +nexus_add_child(device_t bus, device_t parent, int order, + const char *name, int unit) +{ + device_t child; + struct nexus_device *ndev; + + ndev = kmalloc(sizeof(struct nexus_device), M_NEXUSDEV, M_INTWAIT|M_ZERO); + if (!ndev) + return(0); + resource_list_init(&ndev->nx_resources); + ndev->nx_pcibus = -1; + + child = device_add_child_ordered(parent, order, name, unit); + + /* should we free this in nexus_child_detached? */ + device_set_ivars(child, ndev); + + return(child); +} + +static int +nexus_read_ivar(device_t dev, device_t child, int which, uintptr_t *result) +{ + struct nexus_device *ndev = DEVTONX(child); + + switch (which) { + case NEXUS_IVAR_PCIBUS: + *result = ndev->nx_pcibus; + break; + default: + return ENOENT; + } + return 0; +} + +static int +nexus_write_ivar(device_t dev, device_t child, int which, uintptr_t value) +{ + struct nexus_device *ndev = DEVTONX(child); + + switch (which) { + case NEXUS_IVAR_PCIBUS: + ndev->nx_pcibus = value; + break; + default: + return ENOENT; + } + return 0; +} + +/* + * Allocate a resource on behalf of child. NB: child is usually going to be a + * child of one of our descendants, not a direct child of nexus0. + * (Exceptions include npx.) + */ +static struct resource * +nexus_alloc_resource(device_t bus, device_t child, int type, int *rid, + u_long start, u_long end, u_long count, u_int flags) +{ + struct nexus_device *ndev = DEVTONX(child); + struct resource *rv; + struct resource_list_entry *rle; + struct rman *rm; + int needactivate = flags & RF_ACTIVE; + + /* + * If this is an allocation of the "default" range for a given RID, and + * we know what the resources for this device are (ie. they aren't maintained + * by a child bus), then work out the start/end values. + */ + if ((start == 0UL) && (end == ~0UL) && (count == 1)) { + if (ndev == NULL) + return(NULL); + rle = resource_list_find(&ndev->nx_resources, type, *rid); + if (rle == NULL) + return(NULL); + start = rle->start; + end = rle->end; + count = rle->count; + } + + flags &= ~RF_ACTIVE; + + switch (type) { + case SYS_RES_IRQ: + rm = &irq_rman; + break; + + case SYS_RES_DRQ: + rm = &drq_rman; + break; + + case SYS_RES_IOPORT: + rm = &port_rman; + break; + + case SYS_RES_MEMORY: + rm = &mem_rman; + break; + + default: + return 0; + } + + rv = rman_reserve_resource(rm, start, end, count, flags, child); + if (rv == 0) + return 0; + + if (type == SYS_RES_MEMORY) { + rman_set_bustag(rv, I386_BUS_SPACE_MEM); + } else if (type == SYS_RES_IOPORT) { + rman_set_bustag(rv, I386_BUS_SPACE_IO); + rman_set_bushandle(rv, rv->r_start); + } + + if (needactivate) { + if (bus_activate_resource(child, type, *rid, rv)) { + rman_release_resource(rv); + return 0; + } + } + + return rv; +} + +static int +nexus_activate_resource(device_t bus, device_t child, int type, int rid, + struct resource *r) +{ + /* + * If this is a memory resource, map it into the kernel. + */ + if (rman_get_bustag(r) == I386_BUS_SPACE_MEM) { + caddr_t vaddr = 0; + + if (rman_get_end(r) < 1024 * 1024) { + /* + * The first 1Mb is mapped at KERNBASE. + */ + vaddr = (caddr_t)(uintptr_t)(KERNBASE + rman_get_start(r)); + } else { + u_int64_t paddr; + u_int64_t psize; + u_int32_t poffs; + + paddr = rman_get_start(r); + psize = rman_get_size(r); + + poffs = paddr - trunc_page(paddr); + vaddr = (caddr_t) pmap_mapdev(paddr-poffs, psize+poffs) + poffs; + } + rman_set_virtual(r, vaddr); + /* IBM-PC: the type of bus_space_handle_t is u_int */ + rman_set_bushandle(r, (bus_space_handle_t) vaddr); + } + return (rman_activate_resource(r)); +} + +static int +nexus_deactivate_resource(device_t bus, device_t child, int type, int rid, + struct resource *r) +{ + /* + * If this is a memory resource, unmap it. + */ + if ((rman_get_bustag(r) == I386_BUS_SPACE_MEM) && + (rman_get_end(r) >= 1024 * 1024)) { + u_int32_t psize; + + psize = rman_get_size(r); + pmap_unmapdev((vm_offset_t)rman_get_virtual(r), psize); + } + + return (rman_deactivate_resource(r)); +} + +static int +nexus_release_resource(device_t bus, device_t child, int type, int rid, + struct resource *r) +{ + if (rman_get_flags(r) & RF_ACTIVE) { + int error = bus_deactivate_resource(child, type, rid, r); + if (error) + return error; + } + return (rman_release_resource(r)); +} + +/* + * Currently this uses the really grody interface from kern/kern_intr.c + * (which really doesn't belong in kern/anything.c). Eventually, all of + * the code in kern_intr.c and machdep_intr.c should get moved here, since + * this is going to be the official interface. + */ +static int +nexus_setup_intr(device_t bus, device_t child, struct resource *irq, + int flags, void (*ihand)(void *), void *arg, + void **cookiep, lwkt_serialize_t serializer) +{ + driver_t *driver; + int error, icflags; + + /* somebody tried to setup an irq that failed to allocate! */ + if (irq == NULL) + panic("nexus_setup_intr: NULL irq resource!"); + + *cookiep = 0; + icflags = flags; + if ((irq->r_flags & RF_SHAREABLE) == 0) + icflags |= INTR_EXCL; + + driver = device_get_driver(child); + + /* + * We depend here on rman_activate_resource() being idempotent. + */ + error = rman_activate_resource(irq); + if (error) + return (error); + + /* + * XXX cast the interrupt handler function to an inthand2_t. The + * difference is that an additional frame argument is passed which + * we do not currently want to expose the BUS subsystem to. + */ + *cookiep = register_int(irq->r_start, (inthand2_t *)ihand, arg, + device_get_nameunit(child), serializer, + icflags); + if (*cookiep == NULL) + error = EINVAL; + return (error); +} + +static int +nexus_teardown_intr(device_t dev, device_t child, struct resource *r, void *ih) +{ + if (ih) { + unregister_int(ih); + return (0); + } + return(-1); +} + +static int +nexus_set_resource(device_t dev, device_t child, int type, int rid, u_long start, u_long count) +{ + struct nexus_device *ndev = DEVTONX(child); + struct resource_list *rl = &ndev->nx_resources; + + /* XXX this should return a success/failure indicator */ + resource_list_add(rl, type, rid, start, start + count - 1, count); + return(0); +} + +static int +nexus_get_resource(device_t dev, device_t child, int type, int rid, u_long *startp, u_long *countp) +{ + struct nexus_device *ndev = DEVTONX(child); + struct resource_list *rl = &ndev->nx_resources; + struct resource_list_entry *rle; + + rle = resource_list_find(rl, type, rid); + device_printf(child, "type %d rid %d startp %p countp %p - got %p\n", + type, rid, startp, countp, rle); + if (!rle) + return(ENOENT); + if (startp) + *startp = rle->start; + if (countp) + *countp = rle->count; + return(0); +} + +static void +nexus_delete_resource(device_t dev, device_t child, int type, int rid) +{ + struct nexus_device *ndev = DEVTONX(child); + struct resource_list *rl = &ndev->nx_resources; + + resource_list_delete(rl, type, rid); +} + +/* + * Temporary Debugging + */ + +static void PCHAR_(int); + +int +kprintf0(const char *fmt, ...) +{ + __va_list ap; + int retval; + + __va_start(ap, fmt); + retval = kvcprintf(fmt, PCHAR_, NULL, 10, ap); + __va_end(ap); + return (retval); +} + +static void +PCHAR_(int c) +{ + const int COMC_TXWAIT = 0x40000; + const int COMPORT = 0x3f8; + const int LSR_TXRDY = 0x20; + const int com_lsr = 5; + const int com_data = 0; + int wait; + + for (wait = COMC_TXWAIT; wait > 0; wait--) { + if (inb(COMPORT + com_lsr) & LSR_TXRDY) { + outb(COMPORT + com_data, (u_char)c); + break; + } + } +} + diff --git a/sys/platform/pc64/amd64/npx.c b/sys/platform/pc64/amd64/npx.c index 9baf8b4bb6..ef547b12ec 100644 --- a/sys/platform/pc64/amd64/npx.c +++ b/sys/platform/pc64/amd64/npx.c @@ -1,12 +1,10 @@ /* - * Copyright (c) 2006 The DragonFly Project. All rights reserved. * Copyright (c) 1990 William Jolitz. * Copyright (c) 1991 The Regents of the University of California. + * Copyright (c) 2006 The DragonFly Project. + * Copyright (c) 2006 Matthew Dillon. * All rights reserved. * - * This code is derived from software contributed to The DragonFly Project - * by Matthew Dillon - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: @@ -36,7 +34,7 @@ * * from: @(#)npx.c 7.2 (Berkeley) 5/12/91 * $FreeBSD: src/sys/i386/isa/npx.c,v 1.80.2.3 2001/10/20 19:04:38 tegge Exp $ - * $DragonFly: src/sys/platform/pc64/amd64/npx.c,v 1.3 2007/12/12 23:49:22 dillon Exp $ + * $DragonFly: src/sys/platform/pc64/amd64/npx.c,v 1.4 2008/08/29 17:07:10 dillon Exp $ */ #include "opt_debug_npx.h" @@ -83,6 +81,9 @@ #define fxrstor(addr) __asm("fxrstor %0" : : "m" (*(addr))) #define fxsave(addr) __asm __volatile("fxsave %0" : "=m" (*(addr))) #endif +#define start_emulating() __asm("smsw %%ax; orb %0,%%al; lmsw %%ax" \ + : : "n" (CR0_TS) : "ax") +#define stop_emulating() __asm("clts") #ifndef CPU_DISABLE_SSE #define GET_FPU_EXSW_PTR(td) \ @@ -99,18 +100,12 @@ typedef u_char bool_t; static void fpu_clean_state(void); #endif -u_int cpu_fxsr = 0; +static struct krate badfprate = { 1 }; static int npx_attach (device_t dev); static void fpusave (union savefpu *); static void fpurstor (union savefpu *); -#if (defined(I586_CPU) || defined(I686_CPU)) && !defined(CPU_DISABLE_SSE) -int mmxopt = 1; -SYSCTL_INT(_kern, OID_AUTO, mmxopt, CTLFLAG_RD, &mmxopt, 0, - "MMX/XMM optimized bcopy/copyin/copyout support"); -#endif - /* * Attach routine - announce which it is, and wire into system */ @@ -136,11 +131,11 @@ npxinit(u_short control) */ npxsave(&dummy); crit_enter(); - /*stop_emulating();*/ + stop_emulating(); fldcw(&control); fpusave(curthread->td_savefpu); mdcpu->gd_npxthread = NULL; - /*start_emulating();*/ + start_emulating(); crit_exit(); } @@ -453,9 +448,11 @@ npx_intr(void *dummy) * section to stabilize the FP state. */ int -npxdna(struct trapframe *frame) +npxdna(void) { + thread_t td = curthread; u_long *exstat; + int didinit = 0; if (mdcpu->gd_npxthread != NULL) { kprintf("npxdna: npxthread = %p, curthread = %p\n", @@ -468,9 +465,10 @@ npxdna(struct trapframe *frame) * used the FP unit. This also occurs when a thread pushes a * signal handler and uses FP in the handler. */ - if ((curthread->td_flags & TDF_USINGFP) == 0) { - curthread->td_flags |= TDF_USINGFP; + if ((td->td_flags & (TDF_USINGFP | TDF_KERNELFP)) == 0) { + td->td_flags |= TDF_USINGFP; npxinit(__INITIAL_NPXCW__); + didinit = 1; } /* @@ -481,12 +479,12 @@ npxdna(struct trapframe *frame) * fpstate. */ crit_enter(); - /*stop_emulating();*/ + stop_emulating(); /* * Record new context early in case frstor causes an IRQ13. */ - mdcpu->gd_npxthread = curthread; - exstat = GET_FPU_EXSW_PTR(curthread); + mdcpu->gd_npxthread = td; + exstat = GET_FPU_EXSW_PTR(td); *exstat = 0; /* * The following frstor may cause an IRQ13 when the state being @@ -500,7 +498,18 @@ npxdna(struct trapframe *frame) * fnsave are broken, so our treatment breaks fnclex if it is the * first FPU instruction after a context switch. */ - fpurstor(curthread->td_savefpu); + if ((td->td_savefpu->sv_xmm.sv_env.en_mxcsr & ~0xFFBF) +#ifndef CPU_DISABLE_SSE + && cpu_fxsr +#endif + ) { + krateprintf(&badfprate, + "FXRSTR: illegal FP MXCSR %08x didinit = %d\n", + td->td_savefpu->sv_xmm.sv_env.en_mxcsr, didinit); + td->td_savefpu->sv_xmm.sv_env.en_mxcsr &= 0xFFBF; + lwpsignal(curproc, curthread->td_lwp, SIGFPE); + } + fpurstor(td->td_savefpu); crit_exit(); return (1); @@ -530,20 +539,22 @@ void npxsave(union savefpu *addr) { crit_enter(); - /*stop_emulating();*/ + stop_emulating(); fpusave(addr); mdcpu->gd_npxthread = NULL; fninit(); - /*start_emulating();*/ + start_emulating(); crit_exit(); } static void fpusave(union savefpu *addr) { +#ifndef CPU_DISABLE_SSE if (cpu_fxsr) fxsave(addr); else +#endif fnsave(addr); } @@ -558,6 +569,8 @@ npxpush(mcontext_t *mctx) { thread_t td = curthread; + KKASSERT((td->td_flags & TDF_KERNELFP) == 0); + if (td->td_flags & TDF_USINGFP) { if (mdcpu->gd_npxthread == td) { /* @@ -572,8 +585,14 @@ npxpush(mcontext_t *mctx) } bcopy(td->td_savefpu, mctx->mc_fpregs, sizeof(mctx->mc_fpregs)); td->td_flags &= ~TDF_USINGFP; + mctx->mc_fpformat = +#ifndef CPU_DISABLE_SSE + (cpu_fxsr) ? _MC_FPFMT_XMM : +#endif + _MC_FPFMT_387; } else { mctx->mc_ownedfp = _MC_FPOWNED_NONE; + mctx->mc_fpformat = _MC_FPFMT_NODEV; } } @@ -610,10 +629,25 @@ npxpop(mcontext_t *mctx) * XXX: This is bit inefficient, if the code being returned * to is actively using the FP this results in multiple * kernel faults. + * + * WARNING: The saved state was exposed to userland and may + * have to be sanitized to avoid a GP fault in the kernel. */ if (td == mdcpu->gd_npxthread) npxsave(td->td_savefpu); bcopy(mctx->mc_fpregs, td->td_savefpu, sizeof(*td->td_savefpu)); + if ((td->td_savefpu->sv_xmm.sv_env.en_mxcsr & ~0xFFBF) +#ifndef CPU_DISABLE_SSE + && cpu_fxsr +#endif + ) { + krateprintf(&badfprate, + "pid %d (%s) signal return from user: " + "illegal FP MXCSR %08x\n", + td->td_proc->p_pid, + td->td_proc->p_comm, + td->td_savefpu->sv_xmm.sv_env.en_mxcsr); + } td->td_flags |= TDF_USINGFP; break; } diff --git a/sys/platform/pc64/amd64/pmap.c b/sys/platform/pc64/amd64/pmap.c index c24e8aa321..fb1c753f37 100644 --- a/sys/platform/pc64/amd64/pmap.c +++ b/sys/platform/pc64/amd64/pmap.c @@ -1,534 +1,1606 @@ /* - * Copyright (c) 2006 The DragonFly Project. All rights reserved. * Copyright (c) 1991 Regents of the University of California. - * All rights reserved. * Copyright (c) 1994 John S. Dyson - * All rights reserved. * Copyright (c) 1994 David Greenman + * Copyright (c) 2008 The DragonFly Project. + * Copyright (c) 2008 Jordan Gordeev. * All rights reserved. - * Copyright (c) 2004-2006 Matthew Dillon - * All rights reserved. - * + * + * This code is derived from software contributed to Berkeley by + * the Systems Programming Group of the University of Utah Computer + * Science Department and William Jolitz of UUNET Technologies Inc. + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: - * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * 3. Neither the name of The DragonFly Project nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific, prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 + * + * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 * $FreeBSD: src/sys/i386/i386/pmap.c,v 1.250.2.18 2002/03/06 22:48:53 silby Exp $ - * $DragonFly: src/sys/platform/pc64/amd64/pmap.c,v 1.2 2007/09/24 03:24:45 yanyh Exp $ + * $DragonFly: src/sys/platform/pc64/amd64/pmap.c,v 1.3 2008/08/29 17:07:10 dillon Exp $ */ + /* - * NOTE: PMAP_INVAL_ADD: In pc32 this function is called prior to adjusting - * the PTE in the page table, because a cpu synchronization might be required. - * The actual invalidation is delayed until the following call or flush. In - * the VKERNEL build this function is called prior to adjusting the PTE and - * invalidates the table synchronously (not delayed), and is not SMP safe - * as a consequence. - */ + * Manages physical address maps. + * + * In addition to hardware address maps, this + * module is called upon to provide software-use-only + * maps which may or may not be stored in the same + * form as hardware maps. These pseudo-maps are + * used to store intermediate results from copy + * operations to and from address spaces. + * + * Since the information managed by this module is + * also stored by the logical address mapping module, + * this module may throw away valid virtual-to-physical + * mappings at almost any time. However, invalidations + * of virtual-to-physical mappings must be done as + * requested. + * + * In order to cope with hardware architectures which + * make virtual-to-physical map invalidates expensive, + * this module may delay invalidate or reduced protection + * operations until such time as they are actually + * necessary. This module is given full information as + * to which processors are currently using which maps, + * and to when physical maps must be made correct. + */ + +#if JG +#include "opt_disable_pse.h" +#include "opt_pmap.h" +#endif +#include "opt_msgbuf.h" -#include +#include #include #include -#include -#include #include -#include -#include -#include +#include +#include +#include -#include -#include -#include +#include +#include +#include +#include #include +#include +#include #include -#include +#include #include +#include +#include + +#include +#include +#include +#include #include -#include -#include +#include +#include +#include #include +#include +#include + +#include + +#define PMAP_KEEP_PDIRS +#ifndef PMAP_SHPGPERPROC +#define PMAP_SHPGPERPROC 200 +#endif + +#if defined(DIAGNOSTIC) +#define PMAP_DIAGNOSTIC +#endif + +#define MINPV 2048 + +#if !defined(PMAP_DIAGNOSTIC) +#define PMAP_INLINE __inline +#else +#define PMAP_INLINE +#endif + +/* + * Get PDEs and PTEs for user/kernel address space + */ +#define pmap_pde(m, v) (&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT])) +#define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT]) + +#define pmap_pde_v(pte) ((*(pd_entry_t *)pte & PG_V) != 0) +#define pmap_pte_w(pte) ((*(pt_entry_t *)pte & PG_W) != 0) +#define pmap_pte_m(pte) ((*(pt_entry_t *)pte & PG_M) != 0) +#define pmap_pte_u(pte) ((*(pt_entry_t *)pte & PG_A) != 0) +#define pmap_pte_v(pte) ((*(pt_entry_t *)pte & PG_V) != 0) + + +/* + * Given a map and a machine independent protection code, + * convert to a vax protection code. + */ +#define pte_prot(m, p) \ + (protection_codes[p & (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE)]) +static int protection_codes[8]; struct pmap kernel_pmap; +static TAILQ_HEAD(,pmap) pmap_list = TAILQ_HEAD_INITIALIZER(pmap_list); -void -pmap_init(void) -{ -} +vm_paddr_t avail_start; /* PA of first available physical page */ +vm_paddr_t avail_end; /* PA of last available physical page */ +vm_offset_t virtual_start; /* VA of first avail page (after kernel bss) */ +vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ +vm_offset_t KvaStart; /* VA start of KVA space */ +vm_offset_t KvaEnd; /* VA end of KVA space (non-inclusive) */ +vm_offset_t KvaSize; /* max size of kernel virtual address space */ +static boolean_t pmap_initialized = FALSE; /* Has pmap_init completed? */ +static int pgeflag; /* PG_G or-in */ +static int pseflag; /* PG_PS or-in */ -void -pmap_init2(void) -{ -} +static vm_object_t kptobj; + +static int nkpt; +vm_offset_t kernel_vm_end; /* - * Bootstrap the kernel_pmap so it can be used with pmap_enter(). - * - * NOTE! pm_pdir for the kernel pmap is offset so VA's translate - * directly into PTD indexes (PTA is also offset for the same reason). - * This is necessary because, for now, KVA is not mapped at address 0. - * - * Page table pages are not managed like they are in normal pmaps, so - * no pteobj is needed. + * Data for the pv entry allocation mechanism */ -void -pmap_bootstrap(vm_paddr_t firstaddr, vm_paddr_t loadaddr) -{ -} +static vm_zone_t pvzone; +static struct vm_zone pvzone_store; +static struct vm_object pvzone_obj; +static int pv_entry_count=0, pv_entry_max=0, pv_entry_high_water=0; +static int pmap_pagedaemon_waken = 0; +static struct pv_entry *pvinit; /* - * Initialize pmap0/vmspace0 . Since process 0 never enters user mode we - * just dummy it up so it works well enough for fork(). - * - * In DragonFly, process pmaps may only be used to manipulate user address - * space, never kernel address space. + * All those kernel PT submaps that BSD is so fond of */ -void -pmap_pinit0(struct pmap *pmap) -{ -} +pt_entry_t *CMAP1 = 0, *ptmmap; +caddr_t CADDR1 = 0, ptvmmap = 0; +static pt_entry_t *msgbufmap; +struct msgbuf *msgbufp=0; -/************************************************************************ - * Procedures to manage whole physical maps * - ************************************************************************ - * - * Initialize a preallocated and zeroed pmap structure, - * such as one in a vmspace structure. +/* + * Crashdump maps. */ -void -pmap_pinit(struct pmap *pmap) -{ -} +static pt_entry_t *pt_crashdumpmap; +static caddr_t crashdumpmap; + +extern uint64_t KPTphys; +extern pt_entry_t *SMPpt; +extern uint64_t SMPptpa; + +#define DISABLE_PSE + +static PMAP_INLINE void free_pv_entry (pv_entry_t pv); +static pt_entry_t * get_ptbase (pmap_t pmap); +static pv_entry_t get_pv_entry (void); +static void i386_protection_init (void); +static __inline void pmap_clearbit (vm_page_t m, int bit); + +static void pmap_remove_all (vm_page_t m); +static void pmap_enter_quick (pmap_t pmap, vm_offset_t va, vm_page_t m); +static int pmap_remove_pte (struct pmap *pmap, pt_entry_t *ptq, + vm_offset_t sva, pmap_inval_info_t info); +static void pmap_remove_page (struct pmap *pmap, + vm_offset_t va, pmap_inval_info_t info); +static int pmap_remove_entry (struct pmap *pmap, vm_page_t m, + vm_offset_t va, pmap_inval_info_t info); +static boolean_t pmap_testbit (vm_page_t m, int bit); +static void pmap_insert_entry (pmap_t pmap, vm_offset_t va, + vm_page_t mpte, vm_page_t m); + +static vm_page_t pmap_allocpte (pmap_t pmap, vm_offset_t va); + +static int pmap_release_free_page (pmap_t pmap, vm_page_t p); +static vm_page_t _pmap_allocpte (pmap_t pmap, vm_pindex_t ptepindex); +static pt_entry_t * pmap_pte_quick (pmap_t pmap, vm_offset_t va); +static vm_page_t pmap_page_lookup (vm_object_t object, vm_pindex_t pindex); +static int pmap_unuse_pt (pmap_t, vm_offset_t, vm_page_t, pmap_inval_info_t); +static vm_offset_t pmap_kmem_choose(vm_offset_t addr); + +static unsigned pdir4mb; /* - * Clean up a pmap structure so it can be physically freed + * Move the kernel virtual free pointer to the next + * 4MB. This is used to help improve performance + * by using a large (4MB) page for much of the kernel + * (.text, .data, .bss) */ -void -pmap_puninit(pmap_t pmap) +static vm_offset_t +pmap_kmem_choose(vm_offset_t addr) { + vm_offset_t newaddr = addr; +#ifndef DISABLE_PSE + if (cpu_feature & CPUID_PSE) { + newaddr = (addr + (NBPDR - 1)) & ~(NBPDR - 1); + } +#endif + return newaddr; } - /* - * Wire in kernel global address entries. To avoid a race condition - * between pmap initialization and pmap_growkernel, this procedure - * adds the pmap to the master list (which growkernel scans to update), - * then copies the template. + * pmap_pte: + * + * Extract the page table entry associated with the given map/virtual + * pair. * - * In a virtual kernel there are no kernel global address entries. + * This function may NOT be called from an interrupt. */ -void -pmap_pinit2(struct pmap *pmap) +PMAP_INLINE pt_entry_t * +pmap_pte(pmap_t pmap, vm_offset_t va) { + pd_entry_t *pdeaddr; + + if (pmap) { + pdeaddr = pmap_pde(pmap, va); + if (*pdeaddr & PG_PS) + return pdeaddr; + if (*pdeaddr) { + return get_ptbase(pmap) + amd64_btop(va); + } + } + return (0); } /* - * Release all resources held by the given physical map. + * pmap_pte_quick: * - * Should only be called if the map contains no valid mappings. - */ -static int pmap_release_callback(struct vm_page *p, void *data); + * Super fast pmap_pte routine best used when scanning the pv lists. + * This eliminates many course-grained invltlb calls. Note that many of + * the pv list scans are across different pmaps and it is very wasteful + * to do an entire invltlb when checking a single mapping. + * + * Should only be called while in a critical section. + */ +static pt_entry_t * +pmap_pte_quick(pmap_t pmap, vm_offset_t va) +{ + struct mdglobaldata *gd = mdcpu; + pd_entry_t pde, newpf; + + if ((pde = pmap->pm_pdir[va >> PDRSHIFT]) != 0) { + pd_entry_t frame = pmap->pm_pdir[PTDPTDI] & PG_FRAME; + vm_pindex_t index = amd64_btop(va); + /* are we current address space or kernel? */ + if ((pmap == &kernel_pmap) || + (frame == (PTDpde & PG_FRAME))) { + return (pt_entry_t *) PTmap + index; + } + newpf = pde & PG_FRAME; + if ( ((* (pt_entry_t *) gd->gd_PMAP1) & PG_FRAME) != newpf) { + * (pt_entry_t *) gd->gd_PMAP1 = newpf | PG_RW | PG_V; + cpu_invlpg(gd->gd_PADDR1); + } + return gd->gd_PADDR1 + (index & (NPTEPG - 1)); + } + return (0); +} -void -pmap_release(struct pmap *pmap) + +static u_int64_t +allocpages(vm_paddr_t *firstaddr, int n) { + u_int64_t ret; + + ret = *firstaddr; + bzero((void *)ret, n * PAGE_SIZE); + *firstaddr += n * PAGE_SIZE; + return (ret); } -static int -pmap_release_callback(struct vm_page *p, void *data) -{ - return(0); +void +create_pagetables(vm_paddr_t *firstaddr) +{ + int i; + int count; + uint64_t cpu0pp, cpu0idlestk; + int idlestk_page_offset = offsetof(struct privatespace, idlestack) / PAGE_SIZE; + + /* we are running (mostly) V=P at this point */ + + common_lvl4_phys = allocpages(firstaddr, 1); /* 512 512G mappings */ + common_lvl3_phys = allocpages(firstaddr, 1); /* 512 1G mappings */ + KPTphys = allocpages(firstaddr, NKPT); /* kernel page table */ + IdlePTD = allocpages(firstaddr, 1); /* kernel page dir */ + cpu0pp = allocpages(firstaddr, MDGLOBALDATA_BASEALLOC_PAGES); + cpu0idlestk = allocpages(firstaddr, UPAGES); + SMPptpa = allocpages(firstaddr, 1); + SMPpt = (void *)(SMPptpa + KERNBASE); + + + /* + * Load kernel page table with kernel memory mappings + */ + for (i = 0; (i << PAGE_SHIFT) < *firstaddr; i++) { + ((pt_entry_t *)KPTphys)[i] = i << PAGE_SHIFT; + ((pt_entry_t *)KPTphys)[i] |= PG_RW | PG_V; + } + +#ifndef JG + for (i = 0; i < NKPT; i++) { + ((pd_entry_t *)IdlePTD)[i] = KPTphys + (i << PAGE_SHIFT); + ((pd_entry_t *)IdlePTD)[i] |= PG_RW | PG_V; + } +#endif + + /* + * Set up the kernel page table itself. + */ + for (i = 0; i < NKPT; i++) { + ((pd_entry_t *)IdlePTD)[KPTDI + i] = KPTphys + (i << PAGE_SHIFT); + ((pd_entry_t *)IdlePTD)[KPTDI + i] |= PG_RW | PG_V; + } + +#ifndef JG + count = ISA_HOLE_LENGTH >> PAGE_SHIFT; + for (i = 0; i < count; i++) { + ((pt_entry_t *)KPTphys)[amd64_btop(ISA_HOLE_START) + i] = \ + (ISA_HOLE_START + i * PAGE_SIZE) | PG_RW | PG_V; + } +#endif + + /* + * Self-mapping + */ + ((pd_entry_t *)IdlePTD)[PTDPTDI] = (pd_entry_t)IdlePTD | PG_RW | PG_V; + + /* + * Map CPU_prvspace[0].mdglobaldata + */ + for (i = 0; i < MDGLOBALDATA_BASEALLOC_PAGES; i++) { + ((pt_entry_t *)SMPptpa)[i] = \ + (cpu0pp + i * PAGE_SIZE) | PG_RW | PG_V; + } + + /* + * Map CPU_prvspace[0].idlestack + */ + for (i = 0; i < UPAGES; i++) { + ((pt_entry_t *)SMPptpa)[idlestk_page_offset + i] = \ + (cpu0idlestk + i * PAGE_SIZE) | PG_RW | PG_V; + } + + /* + * Link SMPpt. + */ + ((pd_entry_t *)IdlePTD)[MPPTDI] = SMPptpa | PG_RW | PG_V; + + /* + * PML4 maps level 3 + */ + ((pml4_entry_t *)common_lvl4_phys)[LINKPML4I] = common_lvl3_phys | PG_RW | PG_V | PG_U; + + /* + * location of "virtual CR3" - a PDP entry that is loaded + * with a PD physical address (+ page attributes). + * Matt: location of user page directory entry (representing 1G) + */ + link_pdpe = &((pdp_entry_t *)common_lvl3_phys)[LINKPDPI]; +} + +void +init_paging(vm_paddr_t *firstaddr) { + create_pagetables(firstaddr); + + /* switch to the newly created page table */ + *link_pdpe = IdlePTD | PG_RW | PG_V | PG_U; + load_cr3(common_lvl4_phys); + link_pdpe = (void *)((char *)link_pdpe + KERNBASE); + + KvaStart = (vm_offset_t)VADDR(PTDPTDI, 0); + KvaEnd = (vm_offset_t)VADDR(APTDPTDI, 0); + KvaSize = KvaEnd - KvaStart; } /* - * Retire the given physical map from service. Should only be called if - * the map contains no valid mappings. + * Bootstrap the system enough to run with virtual memory. + * + * On the i386 this is called after mapping has already been enabled + * and just syncs the pmap module with what has already been done. + * [We can't call it easily with mapping off since the kernel is not + * mapped with PA == VA, hence we would have to relocate every address + * from the linked base (virtual) address "KERNBASE" to the actual + * (physical) address starting relative to 0] */ void -pmap_destroy(pmap_t pmap) -{ +pmap_bootstrap(vm_paddr_t *firstaddr, vm_paddr_t loadaddr) +{ + vm_offset_t va; + pt_entry_t *pte; + struct mdglobaldata *gd; + int i; + int pg; + + avail_start = *firstaddr; + + /* + * XXX The calculation of virtual_start is wrong. It's NKPT*PAGE_SIZE + * too large. It should instead be correctly calculated in locore.s and + * not based on 'first' (which is a physical address, not a virtual + * address, for the start of unused physical memory). The kernel + * page tables are NOT double mapped and thus should not be included + * in this calculation. + */ + virtual_start = (vm_offset_t) PTOV_OFFSET + *firstaddr; + virtual_start = pmap_kmem_choose(virtual_start); + virtual_end = VADDR(KPTDI+NKPDE-1, NPTEPG-1); + + /* + * Initialize protection array. + */ + i386_protection_init(); + + /* + * The kernel's pmap is statically allocated so we don't have to use + * pmap_create, which is unlikely to work correctly at this part of + * the boot sequence (XXX and which no longer exists). + */ + kernel_pmap.pm_pdir = (pd_entry_t *)(PTOV_OFFSET + (uint64_t)IdlePTD); + kernel_pmap.pm_count = 1; + kernel_pmap.pm_active = (cpumask_t)-1; /* don't allow deactivation */ + TAILQ_INIT(&kernel_pmap.pm_pvlist); + nkpt = NKPT; + + /* + * Reserve some special page table entries/VA space for temporary + * mapping of pages. + */ +#define SYSMAP(c, p, v, n) \ + v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n); + + va = virtual_start; + pte = (pt_entry_t *) pmap_pte(&kernel_pmap, va); + + /* + * CMAP1/CMAP2 are used for zeroing and copying pages. + */ + SYSMAP(caddr_t, CMAP1, CADDR1, 1) + + /* + * Crashdump maps. + */ + SYSMAP(caddr_t, pt_crashdumpmap, crashdumpmap, MAXDUMPPGS); + + /* + * ptvmmap is used for reading arbitrary physical pages via + * /dev/mem. + */ + SYSMAP(caddr_t, ptmmap, ptvmmap, 1) + + /* + * msgbufp is used to map the system message buffer. + * XXX msgbufmap is not used. + */ + SYSMAP(struct msgbuf *, msgbufmap, msgbufp, + atop(round_page(MSGBUF_SIZE))) + + virtual_start = va; + + *CMAP1 = 0; + for (i = 0; i < NKPT; i++) + PTD[i] = 0; + + /* + * PG_G is terribly broken on SMP because we IPI invltlb's in some + * cases rather then invl1pg. Actually, I don't even know why it + * works under UP because self-referential page table mappings + */ +#ifdef SMP + pgeflag = 0; +#else + if (cpu_feature & CPUID_PGE) + pgeflag = PG_G; +#endif + +/* + * Initialize the 4MB page size flag + */ + pseflag = 0; +/* + * The 4MB page version of the initial + * kernel page mapping. + */ + pdir4mb = 0; + +#if !defined(DISABLE_PSE) + if (cpu_feature & CPUID_PSE) { + pt_entry_t ptditmp; + /* + * Note that we have enabled PSE mode + */ + pseflag = PG_PS; + ptditmp = *(PTmap + amd64_btop(KERNBASE)); + ptditmp &= ~(NBPDR - 1); + ptditmp |= PG_V | PG_RW | PG_PS | PG_U | pgeflag; + pdir4mb = ptditmp; + +#ifndef SMP + /* + * Enable the PSE mode. If we are SMP we can't do this + * now because the APs will not be able to use it when + * they boot up. + */ + load_cr4(rcr4() | CR4_PSE); + + /* + * We can do the mapping here for the single processor + * case. We simply ignore the old page table page from + * now on. + */ + /* + * For SMP, we still need 4K pages to bootstrap APs, + * PSE will be enabled as soon as all APs are up. + */ + PTD[KPTDI] = (pd_entry_t)ptditmp; + kernel_pmap.pm_pdir[KPTDI] = (pd_entry_t)ptditmp; + cpu_invltlb(); +#endif + } +#endif +#ifdef SMP + if (cpu_apic_address == 0) + panic("pmap_bootstrap: no local apic!"); + + /* local apic is mapped on last page */ + SMPpt[NPTEPG - 1] = (pt_entry_t)(PG_V | PG_RW | PG_N | pgeflag | + (cpu_apic_address & PG_FRAME)); +#endif + + /* + * We need to finish setting up the globaldata page for the BSP. + * locore has already populated the page table for the mdglobaldata + * portion. + */ + pg = MDGLOBALDATA_BASEALLOC_PAGES; + gd = &CPU_prvspace[0].mdglobaldata; + gd->gd_CMAP1 = &SMPpt[pg + 0]; + gd->gd_CMAP2 = &SMPpt[pg + 1]; + gd->gd_CMAP3 = &SMPpt[pg + 2]; + gd->gd_PMAP1 = &SMPpt[pg + 3]; + gd->gd_CADDR1 = CPU_prvspace[0].CPAGE1; + gd->gd_CADDR2 = CPU_prvspace[0].CPAGE2; + gd->gd_CADDR3 = CPU_prvspace[0].CPAGE3; + gd->gd_PADDR1 = (pt_entry_t *)CPU_prvspace[0].PPAGE1; + + cpu_invltlb(); } +#ifdef SMP /* - * Add a reference to the specified pmap. + * Set 4mb pdir for mp startup */ void -pmap_reference(pmap_t pmap) -{ +pmap_set_opt(void) +{ + if (pseflag && (cpu_feature & CPUID_PSE)) { + load_cr4(rcr4() | CR4_PSE); + if (pdir4mb && mycpu->gd_cpuid == 0) { /* only on BSP */ + kernel_pmap.pm_pdir[KPTDI] = + PTD[KPTDI] = (pd_entry_t)pdir4mb; + cpu_invltlb(); + } + } } +#endif -/************************************************************************ - * VMSPACE MANAGEMENT * - ************************************************************************ - * - * The VMSPACE management we do in our virtual kernel must be reflected - * in the real kernel. This is accomplished by making vmspace system - * calls to the real kernel. +/* + * Initialize the pmap module. + * Called by vm_init, to initialize any structures that the pmap + * system needs to map virtual memory. + * pmap_init has been enhanced to support in a fairly consistant + * way, discontiguous physical memory. */ void -cpu_vmspace_alloc(struct vmspace *vm) +pmap_init(void) { + int i; + int initial_pvs; + + /* + * object for kernel page table pages + */ + kptobj = vm_object_allocate(OBJT_DEFAULT, NKPDE); + + /* + * Allocate memory for random pmap data structures. Includes the + * pv_head_table. + */ + + for(i = 0; i < vm_page_array_size; i++) { + vm_page_t m; + + m = &vm_page_array[i]; + TAILQ_INIT(&m->md.pv_list); + m->md.pv_list_count = 0; + } + + /* + * init the pv free list + */ + initial_pvs = vm_page_array_size; + if (initial_pvs < MINPV) + initial_pvs = MINPV; + pvzone = &pvzone_store; + pvinit = (struct pv_entry *) kmem_alloc(&kernel_map, + initial_pvs * sizeof (struct pv_entry)); + zbootinit(pvzone, "PV ENTRY", sizeof (struct pv_entry), pvinit, + initial_pvs); + + /* + * Now it is safe to enable pv_table recording. + */ + pmap_initialized = TRUE; } +/* + * Initialize the address space (zone) for the pv_entries. Set a + * high water mark so that the system can recover from excessive + * numbers of pv entries. + */ void -cpu_vmspace_free(struct vmspace *vm) +pmap_init2(void) { + int shpgperproc = PMAP_SHPGPERPROC; + + TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); + pv_entry_max = shpgperproc * maxproc + vm_page_array_size; + TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max); + pv_entry_high_water = 9 * (pv_entry_max / 10); + zinitna(pvzone, &pvzone_obj, NULL, 0, pv_entry_max, ZONE_INTERRUPT, 1); } -/************************************************************************ - * Procedures which operate directly on the kernel PMAP * - ************************************************************************/ + +/*************************************************** + * Low level helper routines..... + ***************************************************/ + +#if defined(PMAP_DIAGNOSTIC) /* - * This maps the requested page table and gives us access to it. + * This code checks for non-writeable/modified pages. + * This should be an invalid condition. */ -static vpte_t * -get_ptbase(struct pmap *pmap, vm_offset_t va) +static int +pmap_nw_modified(pt_entry_t ptea) { - return NULL; + int pte; + + pte = (int) ptea; + + if ((pte & (PG_M|PG_RW)) == PG_M) + return 1; + else + return 0; } +#endif + -static vpte_t * -get_ptbase1(struct pmap *pmap, vm_offset_t va) +/* + * this routine defines the region(s) of memory that should + * not be tested for the modified bit. + */ +static PMAP_INLINE int +pmap_track_modified(vm_offset_t va) { - return NULL; + if ((va < clean_sva) || (va >= clean_eva)) + return 1; + else + return 0; } -static vpte_t * -get_ptbase2(struct pmap *pmap, vm_offset_t va) +static pt_entry_t * +get_ptbase(pmap_t pmap) { - return NULL; + pd_entry_t frame = pmap->pm_pdir[PTDPTDI] & PG_FRAME; + struct globaldata *gd = mycpu; + + /* are we current address space or kernel? */ + if (pmap == &kernel_pmap || frame == (PTDpde & PG_FRAME)) { + return (pt_entry_t *) PTmap; + } + + /* otherwise, we are alternate address space */ + KKASSERT(gd->gd_intr_nesting_level == 0 && + (gd->gd_curthread->td_flags & TDF_INTTHREAD) == 0); + + if (frame != (((pd_entry_t) APTDpde) & PG_FRAME)) { + APTDpde = (pd_entry_t)(frame | PG_RW | PG_V); + /* The page directory is not shared between CPUs */ + cpu_invltlb(); + } + return (pt_entry_t *) APTmap; } /* - * When removing a page directory the related VA range in the self-mapping - * of the page table must be invalidated. + * pmap_extract: + * + * Extract the physical page address associated with the map/VA pair. + * + * This function may not be called from an interrupt if the pmap is + * not kernel_pmap. */ -static void -inval_ptbase_pagedir(pmap_t pmap, vm_pindex_t pindex) +vm_paddr_t +pmap_extract(pmap_t pmap, vm_offset_t va) { + vm_offset_t rtval; + vm_offset_t pdirindex; + + pdirindex = va >> PDRSHIFT; + if (pmap && (rtval = pmap->pm_pdir[pdirindex])) { + pt_entry_t *pte; + if ((rtval & PG_PS) != 0) { + rtval &= ~(NBPDR - 1); + rtval |= va & (NBPDR - 1); + return rtval; + } + pte = get_ptbase(pmap) + amd64_btop(va); + rtval = ((*pte & PG_FRAME) | (va & PAGE_MASK)); + return rtval; + } + return 0; } +/*************************************************** + * Low level mapping routines..... + ***************************************************/ + /* - * Enter a mapping into kernel_pmap. Mappings created in this fashion - * are not managed. Mappings must be immediately accessible on all cpus. - * - * Call pmap_inval_pte() to invalidate the virtual pte and clean out the - * real pmap and handle related races before storing the new vpte. + * Routine: pmap_kenter + * Function: + * Add a wired page to the KVA + * NOTE! note that in order for the mapping to take effect -- you + * should do an invltlb after doing the pmap_kenter(). */ -void +void pmap_kenter(vm_offset_t va, vm_paddr_t pa) { + pt_entry_t *pte; + pt_entry_t npte; + pmap_inval_info info; + + pmap_inval_init(&info); + npte = pa | PG_RW | PG_V | pgeflag; + pte = vtopte(va); + pmap_inval_add(&info, &kernel_pmap, va); + *pte = npte; + pmap_inval_flush(&info); } /* - * Synchronize a kvm mapping originally made for the private use on - * some other cpu so it can be used on all cpus. - * - * XXX add MADV_RESYNC to improve performance. + * Routine: pmap_kenter_quick + * Function: + * Similar to pmap_kenter(), except we only invalidate the + * mapping on the current CPU. */ +void +pmap_kenter_quick(vm_offset_t va, vm_paddr_t pa) +{ + pt_entry_t *pte; + pt_entry_t npte; + + npte = pa | PG_RW | PG_V | pgeflag; + pte = vtopte(va); + *pte = npte; + cpu_invlpg((void *)va); +} + void pmap_kenter_sync(vm_offset_t va) { + pmap_inval_info info; + + pmap_inval_init(&info); + pmap_inval_add(&info, &kernel_pmap, va); + pmap_inval_flush(&info); } -/* - * Synchronize a kvm mapping originally made for the private use on - * some other cpu so it can be used on our cpu. Turns out to be the - * same madvise() call, because we have to sync the real pmaps anyway. - * - * XXX add MADV_RESYNC to improve performance. - */ void pmap_kenter_sync_quick(vm_offset_t va) { + cpu_invlpg((void *)va); } -#if 0 /* - * Make a previously read-only kernel mapping R+W (not implemented by - * virtual kernels). + * remove a page from the kernel pagetables */ void -pmap_kmodify_rw(vm_offset_t va) +pmap_kremove(vm_offset_t va) { - *pmap_kpte(va) |= VPTE_R | VPTE_W; - madvise((void *)va, PAGE_SIZE, MADV_INVAL); + pt_entry_t *pte; + pmap_inval_info info; + + pmap_inval_init(&info); + pte = vtopte(va); + pmap_inval_add(&info, &kernel_pmap, va); + *pte = 0; + pmap_inval_flush(&info); +} + +void +pmap_kremove_quick(vm_offset_t va) +{ + pt_entry_t *pte; + pte = vtopte(va); + *pte = 0; + cpu_invlpg((void *)va); } /* - * Make a kernel mapping non-cacheable (not applicable to virtual kernels) + * XXX these need to be recoded. They are not used in any critical path. */ void -pmap_kmodify_nc(vm_offset_t va) +pmap_kmodify_rw(vm_offset_t va) { - *pmap_kpte(va) |= VPTE_N; - madvise((void *)va, PAGE_SIZE, MADV_INVAL); + *vtopte(va) |= PG_RW; + cpu_invlpg((void *)va); } -#endif +void +pmap_kmodify_nc(vm_offset_t va) +{ + *vtopte(va) |= PG_N; + cpu_invlpg((void *)va); +} /* - * Map a contiguous range of physical memory to a KVM + * Used to map a range of physical addresses into kernel + * virtual address space. + * + * For now, VM is already on, we only need to map the + * specified memory. */ vm_offset_t pmap_map(vm_offset_t virt, vm_paddr_t start, vm_paddr_t end, int prot) { - return (NULL); + while (start < end) { + pmap_kenter(virt, start); + virt += PAGE_SIZE; + start += PAGE_SIZE; + } + return (virt); } + /* - * Enter an unmanaged KVA mapping for the private use of the current - * cpu only. pmap_kenter_sync() may be called to make the mapping usable - * by other cpus. - * - * It is illegal for the mapping to be accessed by other cpus unleess - * pmap_kenter_sync*() is called. + * Add a list of wired pages to the kva + * this routine is only used for temporary + * kernel mappings that do not need to have + * page modification or references recorded. + * Note that old mappings are simply written + * over. The page *must* be wired. */ void -pmap_kenter_quick(vm_offset_t va, vm_paddr_t pa) +pmap_qenter(vm_offset_t va, vm_page_t *m, int count) { + vm_offset_t end_va; + + end_va = va + count * PAGE_SIZE; + + while (va < end_va) { + pt_entry_t *pte; + + pte = vtopte(va); + *pte = VM_PAGE_TO_PHYS(*m) | PG_RW | PG_V | pgeflag; + cpu_invlpg((void *)va); + va += PAGE_SIZE; + m++; + } +#ifdef SMP + smp_invltlb(); /* XXX */ +#endif +} + +void +pmap_qenter2(vm_offset_t va, vm_page_t *m, int count, cpumask_t *mask) +{ + vm_offset_t end_va; + cpumask_t cmask = mycpu->gd_cpumask; + + end_va = va + count * PAGE_SIZE; + + while (va < end_va) { + pt_entry_t *pte; + pt_entry_t pteval; + + /* + * Install the new PTE. If the pte changed from the prior + * mapping we must reset the cpu mask and invalidate the page. + * If the pte is the same but we have not seen it on the + * current cpu, invlpg the existing mapping. Otherwise the + * entry is optimal and no invalidation is required. + */ + pte = vtopte(va); + pteval = VM_PAGE_TO_PHYS(*m) | PG_A | PG_RW | PG_V | pgeflag; + if (*pte != pteval) { + *mask = 0; + *pte = pteval; + cpu_invlpg((void *)va); + } else if ((*mask & cmask) == 0) { + cpu_invlpg((void *)va); + } + va += PAGE_SIZE; + m++; + } + *mask |= cmask; } /* - * Make a temporary mapping for a physical address. This is only intended - * to be used for panic dumps. + * this routine jerks page mappings from the + * kernel -- it is meant only for temporary mappings. */ -void * -pmap_kenter_temporary(vm_paddr_t pa, int i) +void +pmap_qremove(vm_offset_t va, int count) { - return (NULL); + vm_offset_t end_va; + + end_va = va + count*PAGE_SIZE; + + while (va < end_va) { + pt_entry_t *pte; + + pte = vtopte(va); + *pte = 0; + cpu_invlpg((void *)va); + va += PAGE_SIZE; + } +#ifdef SMP + smp_invltlb(); +#endif } /* - * Remove an unmanaged mapping created with pmap_kenter*(). + * This routine works like vm_page_lookup() but also blocks as long as the + * page is busy. This routine does not busy the page it returns. + * + * Unless the caller is managing objects whos pages are in a known state, + * the call should be made with a critical section held so the page's object + * association remains valid on return. */ -void -pmap_kremove(vm_offset_t va) +static vm_page_t +pmap_page_lookup(vm_object_t object, vm_pindex_t pindex) { + vm_page_t m; + + do { + m = vm_page_lookup(object, pindex); + } while (m && vm_page_sleep_busy(m, FALSE, "pplookp")); + + return(m); } /* - * Remove an unmanaged mapping created with pmap_kenter*() but synchronize - * only with this cpu. - * - * Unfortunately because we optimize new entries by testing VPTE_V later - * on, we actually still have to synchronize with all the cpus. XXX maybe - * store a junk value and test against 0 in the other places instead? + * Create a new thread and optionally associate it with a (new) process. + * NOTE! the new thread's cpu may not equal the current cpu. */ void -pmap_kremove_quick(vm_offset_t va) +pmap_init_thread(thread_t td) { + /* enforce pcb placement */ + td->td_pcb = (struct pcb *)(td->td_kstack + td->td_kstack_size) - 1; + td->td_savefpu = &td->td_pcb->pcb_save; + td->td_sp = (char *)td->td_pcb - 16; } /* - * Map a set of unmanaged VM pages into KVM. + * This routine directly affects the fork perf for a process. */ void -pmap_qenter(vm_offset_t va, struct vm_page **m, int count) +pmap_init_proc(struct proc *p) { } /* - * Map a set of VM pages to kernel virtual memory. If a mapping changes - * clear the supplied mask. The caller handles any SMP interactions. - * The mask is used to provide the caller with hints on what SMP interactions - * might be needed. + * Dispose the UPAGES for a process that has exited. + * This routine directly impacts the exit perf of a process. */ void -pmap_qenter2(vm_offset_t va, struct vm_page **m, int count, cpumask_t *mask) +pmap_dispose_proc(struct proc *p) { + KASSERT(p->p_lock == 0, ("attempt to dispose referenced proc! %p", p)); } +/*************************************************** + * Page table page management routines..... + ***************************************************/ + /* - * Undo the effects of pmap_qenter*(). + * This routine unholds page table pages, and if the hold count + * drops to zero, then it decrements the wire count. */ -void -pmap_qremove(vm_offset_t va, int count) +static int +_pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, pmap_inval_info_t info) +{ + /* + * Wait until we can busy the page ourselves. We cannot have + * any active flushes if we block. + */ + if (m->flags & PG_BUSY) { + pmap_inval_flush(info); + while (vm_page_sleep_busy(m, FALSE, "pmuwpt")) + ; + } + KASSERT(m->queue == PQ_NONE, + ("_pmap_unwire_pte_hold: %p->queue != PQ_NONE", m)); + + if (m->hold_count == 1) { + /* + * Unmap the page table page + */ + vm_page_busy(m); + pmap_inval_add(info, pmap, -1); + pmap->pm_pdir[m->pindex] = 0; + + KKASSERT(pmap->pm_stats.resident_count > 0); + --pmap->pm_stats.resident_count; + + if (pmap->pm_ptphint == m) + pmap->pm_ptphint = NULL; + + /* + * This was our last hold, the page had better be unwired + * after we decrement wire_count. + * + * FUTURE NOTE: shared page directory page could result in + * multiple wire counts. + */ + vm_page_unhold(m); + --m->wire_count; + KKASSERT(m->wire_count == 0); + --vmstats.v_wire_count; + vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); + vm_page_flash(m); + vm_page_free_zero(m); + return 1; + } else { + KKASSERT(m->hold_count > 1); + vm_page_unhold(m); + return 0; + } +} + +static PMAP_INLINE int +pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, pmap_inval_info_t info) { + KKASSERT(m->hold_count > 0); + if (m->hold_count > 1) { + vm_page_unhold(m); + return 0; + } else { + return _pmap_unwire_pte_hold(pmap, m, info); + } } -/************************************************************************ - * Misc support glue called by machine independant code * - ************************************************************************ - * - * These routines are called by machine independant code to operate on - * certain machine-dependant aspects of processes, threads, and pmaps. +/* + * After removing a page table entry, this routine is used to + * conditionally free the page, and manage the hold/wire counts. */ +static int +pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t mpte, + pmap_inval_info_t info) +{ + vm_pindex_t ptepindex; + if (va >= UPT_MIN_ADDRESS) + return 0; + + if (mpte == NULL) { + ptepindex = (va >> PDRSHIFT); + if (pmap->pm_ptphint && + (pmap->pm_ptphint->pindex == ptepindex)) { + mpte = pmap->pm_ptphint; + } else { + pmap_inval_flush(info); + mpte = pmap_page_lookup( pmap->pm_pteobj, ptepindex); + pmap->pm_ptphint = mpte; + } + } + + return pmap_unwire_pte_hold(pmap, mpte, info); +} /* - * Initialize MD portions of the thread structure. + * Initialize pmap0/vmspace0. This pmap is not added to pmap_list because + * it, and IdlePTD, represents the template used to update all other pmaps. + * + * On architectures where the kernel pmap is not integrated into the user + * process pmap, this pmap represents the process pmap, not the kernel pmap. + * kernel_pmap should be used to directly access the kernel_pmap. */ void -pmap_init_thread(thread_t td) +pmap_pinit0(struct pmap *pmap) { + pmap->pm_pdir = + (pd_entry_t *)kmem_alloc_pageable(&kernel_map, PAGE_SIZE); + pmap_kenter((vm_offset_t)pmap->pm_pdir, (vm_offset_t) IdlePTD); + pmap->pm_count = 1; + pmap->pm_active = 0; + pmap->pm_ptphint = NULL; + TAILQ_INIT(&pmap->pm_pvlist); + bzero(&pmap->pm_stats, sizeof pmap->pm_stats); } /* - * This routine directly affects the fork perf for a process. + * Initialize a preallocated and zeroed pmap structure, + * such as one in a vmspace structure. */ void -pmap_init_proc(struct proc *p) +pmap_pinit(struct pmap *pmap) { + vm_page_t ptdpg; + + /* + * No need to allocate page table space yet but we do need a valid + * page directory table. + */ + if (pmap->pm_pdir == NULL) { + pmap->pm_pdir = + (pd_entry_t *)kmem_alloc_pageable(&kernel_map, PAGE_SIZE); + } + + /* + * Allocate an object for the ptes + */ + if (pmap->pm_pteobj == NULL) + pmap->pm_pteobj = vm_object_allocate(OBJT_DEFAULT, PTDPTDI + 1); + + /* + * Allocate the page directory page, unless we already have + * one cached. If we used the cached page the wire_count will + * already be set appropriately. + */ + if ((ptdpg = pmap->pm_pdirm) == NULL) { + ptdpg = vm_page_grab(pmap->pm_pteobj, PTDPTDI, + VM_ALLOC_NORMAL | VM_ALLOC_RETRY); + pmap->pm_pdirm = ptdpg; + vm_page_flag_clear(ptdpg, PG_MAPPED | PG_BUSY); + ptdpg->valid = VM_PAGE_BITS_ALL; + ptdpg->wire_count = 1; + ++vmstats.v_wire_count; + pmap_kenter((vm_offset_t)pmap->pm_pdir, VM_PAGE_TO_PHYS(ptdpg)); + } + if ((ptdpg->flags & PG_ZERO) == 0) + bzero(pmap->pm_pdir, PAGE_SIZE); + + pmap->pm_pdir[MPPTDI] = PTD[MPPTDI]; + + /* install self-referential address mapping entry */ + *(pd_entry_t *) (pmap->pm_pdir + PTDPTDI) = + VM_PAGE_TO_PHYS(ptdpg) | PG_V | PG_RW | PG_A | PG_M; + + pmap->pm_count = 1; + pmap->pm_active = 0; + pmap->pm_ptphint = NULL; + TAILQ_INIT(&pmap->pm_pvlist); + bzero(&pmap->pm_stats, sizeof pmap->pm_stats); + pmap->pm_stats.resident_count = 1; } /* - * Destroy the UPAGES for a process that has exited and disassociate - * the process from its thread. + * Clean up a pmap structure so it can be physically freed. This routine + * is called by the vmspace dtor function. A great deal of pmap data is + * left passively mapped to improve vmspace management so we have a bit + * of cleanup work to do here. */ void -pmap_dispose_proc(struct proc *p) +pmap_puninit(pmap_t pmap) { + vm_page_t p; + + KKASSERT(pmap->pm_active == 0); + if ((p = pmap->pm_pdirm) != NULL) { + KKASSERT(pmap->pm_pdir != NULL); + pmap_kremove((vm_offset_t)pmap->pm_pdir); + p->wire_count--; + vmstats.v_wire_count--; + KKASSERT((p->flags & PG_BUSY) == 0); + vm_page_busy(p); + vm_page_free_zero(p); + pmap->pm_pdirm = NULL; + } + if (pmap->pm_pdir) { + kmem_free(&kernel_map, (vm_offset_t)pmap->pm_pdir, PAGE_SIZE); + pmap->pm_pdir = NULL; + } + if (pmap->pm_pteobj) { + vm_object_deallocate(pmap->pm_pteobj); + pmap->pm_pteobj = NULL; + } } /* - * We pre-allocate all page table pages for kernel virtual memory so - * this routine will only be called if KVM has been exhausted. + * Wire in kernel global address entries. To avoid a race condition + * between pmap initialization and pmap_growkernel, this procedure + * adds the pmap to the master list (which growkernel scans to update), + * then copies the template. */ void -pmap_growkernel(vm_offset_t addr) +pmap_pinit2(struct pmap *pmap) { + crit_enter(); + TAILQ_INSERT_TAIL(&pmap_list, pmap, pm_pmnode); + /* XXX copies current process, does not fill in MPPTDI */ + bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * PTESIZE); + crit_exit(); } /* - * The modification bit is not tracked for any pages in this range. XXX - * such pages in this maps should always use pmap_k*() functions and not - * be managed anyhow. + * Attempt to release and free a vm_page in a pmap. Returns 1 on success, + * 0 on failure (if the procedure had to sleep). * - * XXX User and kernel address spaces are independant for virtual kernels, - * this function only applies to the kernel pmap. + * When asked to remove the page directory page itself, we actually just + * leave it cached so we do not have to incur the SMP inval overhead of + * removing the kernel mapping. pmap_puninit() will take care of it. */ static int -pmap_track_modified(pmap_t pmap, vm_offset_t va) +pmap_release_free_page(struct pmap *pmap, vm_page_t p) { + pd_entry_t *pde = (pd_entry_t *) pmap->pm_pdir; + /* + * This code optimizes the case of freeing non-busy + * page-table pages. Those pages are zero now, and + * might as well be placed directly into the zero queue. + */ + if (vm_page_sleep_busy(p, FALSE, "pmaprl")) return 0; -} -/************************************************************************ - * Procedures supporting managed page table pages * - ************************************************************************ - * - * These procedures are used to track managed page table pages. These pages - * use the page table page's vm_page_t to track PTEs in the page. The - * page table pages themselves are arranged in a VM object, pmap->pm_pteobj. - * - * This allows the system to throw away page table pages for user processes - * at will and reinstantiate them on demand. - */ + vm_page_busy(p); + + /* + * Remove the page table page from the processes address space. + */ + pde[p->pindex] = 0; + KKASSERT(pmap->pm_stats.resident_count > 0); + --pmap->pm_stats.resident_count; + + if (p->hold_count) { + panic("pmap_release: freeing held page table page"); + } + if (pmap->pm_ptphint && (pmap->pm_ptphint->pindex == p->pindex)) + pmap->pm_ptphint = NULL; + + /* + * We leave the page directory page cached, wired, and mapped in + * the pmap until the dtor function (pmap_puninit()) gets called. + * However, still clean it up so we can set PG_ZERO. + */ + if (p->pindex == PTDPTDI) { + bzero(pde + KPTDI, nkpt * PTESIZE); + pde[MPPTDI] = 0; + pde[APTDPTDI] = 0; + vm_page_flag_set(p, PG_ZERO); + vm_page_wakeup(p); + } else { + p->wire_count--; + vmstats.v_wire_count--; + vm_page_free_zero(p); + } + return 1; +} /* - * This routine works like vm_page_lookup() but also blocks as long as the - * page is busy. This routine does not busy the page it returns. - * - * Unless the caller is managing objects whos pages are in a known state, - * the call should be made with a critical section held so the page's object - * association remains valid on return. + * this routine is called if the page table page is not + * mapped correctly. */ static vm_page_t -pmap_page_lookup(vm_object_t object, vm_pindex_t pindex) +_pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex) +{ + vm_offset_t pteva, ptepa; + vm_page_t m; + + /* + * Find or fabricate a new pagetable page + */ + m = vm_page_grab(pmap->pm_pteobj, ptepindex, + VM_ALLOC_NORMAL | VM_ALLOC_ZERO | VM_ALLOC_RETRY); + + KASSERT(m->queue == PQ_NONE, + ("_pmap_allocpte: %p->queue != PQ_NONE", m)); + + /* + * Increment the hold count for the page we will be returning to + * the caller. + */ + m->hold_count++; + + /* + * It is possible that someone else got in and mapped by the page + * directory page while we were blocked, if so just unbusy and + * return the held page. + */ + if ((ptepa = pmap->pm_pdir[ptepindex]) != 0) { + KKASSERT((ptepa & PG_FRAME) == VM_PAGE_TO_PHYS(m)); + vm_page_wakeup(m); + return(m); + } + + if (m->wire_count == 0) + vmstats.v_wire_count++; + m->wire_count++; + + + /* + * Map the pagetable page into the process address space, if + * it isn't already there. + */ + + ++pmap->pm_stats.resident_count; + + ptepa = VM_PAGE_TO_PHYS(m); + pmap->pm_pdir[ptepindex] = + (pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_A | PG_M); + + /* + * Set the page table hint + */ + pmap->pm_ptphint = m; + + /* + * Try to use the new mapping, but if we cannot, then + * do it with the routine that maps the page explicitly. + */ + if ((m->flags & PG_ZERO) == 0) { + if ((pmap->pm_pdir[PTDPTDI] & PG_FRAME) == + (((pd_entry_t) PTDpde) & PG_FRAME)) { + pteva = UPT_MIN_ADDRESS + amd64_ptob(ptepindex); + bzero((caddr_t) pteva, PAGE_SIZE); + } else { + pmap_zero_page(ptepa); + } + } + + m->valid = VM_PAGE_BITS_ALL; + vm_page_flag_clear(m, PG_ZERO); + vm_page_flag_set(m, PG_MAPPED); + vm_page_wakeup(m); + + return m; +} + +static vm_page_t +pmap_allocpte(pmap_t pmap, vm_offset_t va) { - return(NULL); + vm_pindex_t ptepindex; + vm_offset_t ptepa; + vm_page_t m; + + /* + * Calculate pagetable page index + */ + ptepindex = va >> PDRSHIFT; + + /* + * Get the page directory entry + */ + ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex]; + + /* + * This supports switching from a 4MB page to a + * normal 4K page. + */ + if (ptepa & PG_PS) { + pmap->pm_pdir[ptepindex] = 0; + ptepa = 0; + cpu_invltlb(); + smp_invltlb(); + } + + /* + * If the page table page is mapped, we just increment the + * hold count, and activate it. + */ + if (ptepa) { + /* + * In order to get the page table page, try the + * hint first. + */ + if (pmap->pm_ptphint && + (pmap->pm_ptphint->pindex == ptepindex)) { + m = pmap->pm_ptphint; + } else { + m = pmap_page_lookup( pmap->pm_pteobj, ptepindex); + pmap->pm_ptphint = m; + } + m->hold_count++; + return m; + } + /* + * Here if the pte page isn't mapped, or if it has been deallocated. + */ + return _pmap_allocpte(pmap, ptepindex); } + +/*************************************************** + * Pmap allocation/deallocation routines. + ***************************************************/ + /* - * This routine unholds page table pages, and if the hold count - * drops to zero, then it decrements the wire count. + * Release any resources held by the given physical map. + * Called when a pmap initialized by pmap_pinit is being released. + * Should only be called if the map contains no valid mappings. */ -static int -_pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) -{ - return 0; -} +static int pmap_release_callback(struct vm_page *p, void *data); -static __inline int -pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) +void +pmap_release(struct pmap *pmap) { - return 0; + vm_object_t object = pmap->pm_pteobj; + struct rb_vm_page_scan_info info; + + KASSERT(pmap->pm_active == 0, ("pmap still active! %08x", pmap->pm_active)); +#if defined(DIAGNOSTIC) + if (object->ref_count != 1) + panic("pmap_release: pteobj reference count != 1"); +#endif + + info.pmap = pmap; + info.object = object; + crit_enter(); + TAILQ_REMOVE(&pmap_list, pmap, pm_pmnode); + crit_exit(); + + do { + crit_enter(); + info.error = 0; + info.mpte = NULL; + info.limit = object->generation; + + vm_page_rb_tree_RB_SCAN(&object->rb_memq, NULL, + pmap_release_callback, &info); + if (info.error == 0 && info.mpte) { + if (!pmap_release_free_page(pmap, info.mpte)) + info.error = 1; + } + crit_exit(); + } while (info.error); } -/* - * After removing a page table entry, this routine is used to - * conditionally free the page, and manage the hold/wire counts. - */ static int -pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t mpte) +pmap_release_callback(struct vm_page *p, void *data) { - return 0; + struct rb_vm_page_scan_info *info = data; + + if (p->pindex == PTDPTDI) { + info->mpte = p; + return(0); + } + if (!pmap_release_free_page(info->pmap, p)) { + info->error = 1; + return(-1); + } + if (info->object->generation != info->limit) { + info->error = 1; + return(-1); + } + return(0); } /* - * Attempt to release and free an vm_page in a pmap. Returns 1 on success, - * 0 on failure (if the procedure had to sleep). + * Grow the number of kernel page table entries, if needed. */ -static int -pmap_release_free_page(struct pmap *pmap, vm_page_t p) + +void +pmap_growkernel(vm_offset_t addr) { - return 1; + struct pmap *pmap; + vm_offset_t ptppaddr; + vm_page_t nkpg; + pd_entry_t newpdir; + + crit_enter(); + if (kernel_vm_end == 0) { + kernel_vm_end = KERNBASE; + nkpt = 0; + while (pdir_pde(PTD, kernel_vm_end)) { + kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); + nkpt++; + } + } + addr = (addr + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); + while (kernel_vm_end < addr) { + if (pdir_pde(PTD, kernel_vm_end)) { + kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); + continue; + } + + /* + * This index is bogus, but out of the way + */ + nkpg = vm_page_alloc(kptobj, nkpt, + VM_ALLOC_NORMAL | VM_ALLOC_SYSTEM | VM_ALLOC_INTERRUPT); + if (nkpg == NULL) + panic("pmap_growkernel: no memory to grow kernel"); + + vm_page_wire(nkpg); + ptppaddr = VM_PAGE_TO_PHYS(nkpg); + pmap_zero_page(ptppaddr); + newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M); + pdir_pde(PTD, kernel_vm_end) = newpdir; + *pmap_pde(&kernel_pmap, kernel_vm_end) = newpdir; + nkpt++; + + /* + * This update must be interlocked with pmap_pinit2. + */ + TAILQ_FOREACH(pmap, &pmap_list, pm_pmnode) { + *pmap_pde(pmap, kernel_vm_end) = newpdir; + } + kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & + ~(PAGE_SIZE * NPTEPG - 1); + } + crit_exit(); } /* - * This routine is called if the page table page is not mapped in the page - * table directory. - * - * The routine is broken up into two parts for readability. + * Retire the given physical map from service. + * Should only be called if the map contains + * no valid mappings. */ -static vm_page_t -_pmap_allocpte(pmap_t pmap, unsigned ptepindex) +void +pmap_destroy(pmap_t pmap) { - return (NULL); + int count; + + if (pmap == NULL) + return; + + count = --pmap->pm_count; + if (count == 0) { + pmap_release(pmap); + panic("destroying a pmap is not yet implemented"); + } } /* - * Determine the page table page required to access the VA in the pmap - * and allocate it if necessary. Return a held vm_page_t for the page. - * - * Only used with user pmaps. + * Add a reference to the specified pmap. */ -static vm_page_t -pmap_allocpte(pmap_t pmap, vm_offset_t va) +void +pmap_reference(pmap_t pmap) { - return NULL; + if (pmap != NULL) { + pmap->pm_count++; + } } -/************************************************************************ - * Managed pages in pmaps * - ************************************************************************ - * - * All pages entered into user pmaps and some pages entered into the kernel - * pmap are managed, meaning that pmap_protect() and other related management - * functions work on these pages. - */ +/*************************************************** +* page management routines. + ***************************************************/ /* * free the pv_entry back to the free list. This function may be * called from an interrupt. */ -static __inline void +static PMAP_INLINE void free_pv_entry(pv_entry_t pv) { + pv_entry_count--; + zfree(pvzone, pv); } /* @@ -538,7 +1610,14 @@ free_pv_entry(pv_entry_t pv) static pv_entry_t get_pv_entry(void) { - return NULL; + pv_entry_count++; + if (pv_entry_high_water && + (pv_entry_count > pv_entry_high_water) && + (pmap_pagedaemon_waken == 0)) { + pmap_pagedaemon_waken = 1; + wakeup (&vm_pages_needed); + } + return zalloc(pvzone); } /* @@ -548,8 +1627,29 @@ get_pv_entry(void) void pmap_collect(void) { + int i; + vm_page_t m; + static int warningdone=0; + + if (pmap_pagedaemon_waken == 0) + return; + + if (warningdone < 5) { + kprintf("pmap_collect: collecting pv entries -- suggest increasing PMAP_SHPGPERPROC\n"); + warningdone++; + } + + for(i = 0; i < vm_page_array_size; i++) { + m = &vm_page_array[i]; + if (m->wire_count || m->hold_count || m->busy || + (m->flags & PG_BUSY)) + continue; + pmap_remove_all(m); + } + pmap_pagedaemon_waken = 0; } + /* * If it is the first entry on the list, it is actually * in the header and we must copy the following entry up @@ -557,26 +1657,105 @@ pmap_collect(void) * the entry. In either case we free the now unused entry. */ static int -pmap_remove_entry(struct pmap *pmap, vm_page_t m, vm_offset_t va) -{ - return 0; +pmap_remove_entry(struct pmap *pmap, vm_page_t m, + vm_offset_t va, pmap_inval_info_t info) +{ + pv_entry_t pv; + int rtval; + + crit_enter(); + if (m->md.pv_list_count < pmap->pm_stats.resident_count) { + TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { + if (pmap == pv->pv_pmap && va == pv->pv_va) + break; + } + } else { + TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) { + if (va == pv->pv_va) + break; + } + } + + rtval = 0; + if (pv) { + TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); + m->md.pv_list_count--; + if (TAILQ_EMPTY(&m->md.pv_list)) + vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); + TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); + ++pmap->pm_generation; + rtval = pmap_unuse_pt(pmap, va, pv->pv_ptem, info); + free_pv_entry(pv); + } + crit_exit(); + return rtval; } /* - * Create a pv entry for page at pa for (pmap, va). If the page table page - * holding the VA is managed, mpte will be non-NULL. + * Create a pv entry for page at pa for + * (pmap, va). */ static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t mpte, vm_page_t m) { + pv_entry_t pv; + + crit_enter(); + pv = get_pv_entry(); + pv->pv_va = va; + pv->pv_pmap = pmap; + pv->pv_ptem = mpte; + + TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist); + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); + m->md.pv_list_count++; + + crit_exit(); } /* * pmap_remove_pte: do the things to unmap a page in a process */ static int -pmap_remove_pte(struct pmap *pmap, vpte_t *ptq, vm_offset_t va) -{ +pmap_remove_pte(struct pmap *pmap, pt_entry_t *ptq, vm_offset_t va, + pmap_inval_info_t info) +{ + pt_entry_t oldpte; + vm_page_t m; + + pmap_inval_add(info, pmap, va); + oldpte = pte_load_clear(ptq); + if (oldpte & PG_W) + pmap->pm_stats.wired_count -= 1; + /* + * Machines that don't support invlpg, also don't support + * PG_G. XXX PG_G is disabled for SMP so don't worry about + * the SMP case. + */ + if (oldpte & PG_G) + cpu_invlpg((void *)va); + KKASSERT(pmap->pm_stats.resident_count > 0); + --pmap->pm_stats.resident_count; + if (oldpte & PG_MANAGED) { + m = PHYS_TO_VM_PAGE(oldpte); + if (oldpte & PG_M) { +#if defined(PMAP_DIAGNOSTIC) + if (pmap_nw_modified((pt_entry_t) oldpte)) { + kprintf( + "pmap_remove: modified page not writable: va: 0x%x, pte: 0x%x\n", + va, oldpte); + } +#endif + if (pmap_track_modified(va)) + vm_page_dirty(m); + } + if (oldpte & PG_A) + vm_page_flag_set(m, PG_REFERENCED); + return pmap_remove_entry(pmap, m, va, info); + } else { + return pmap_unuse_pt(pmap, va, NULL, info); + } + return 0; } @@ -589,8 +1768,20 @@ pmap_remove_pte(struct pmap *pmap, vpte_t *ptq, vm_offset_t va) * not kernel_pmap. */ static void -pmap_remove_page(struct pmap *pmap, vm_offset_t va) -{ +pmap_remove_page(struct pmap *pmap, vm_offset_t va, pmap_inval_info_t info) +{ + pt_entry_t *ptq; + + /* + * if there is no pte for this address, just skip it!!! Otherwise + * get a local va for mappings for this pmap and remove the entry. + */ + if (*pmap_pde(pmap, va) != 0) { + ptq = get_ptbase(pmap) + amd64_btop(va); + if (*ptq) { + pmap_remove_pte(pmap, ptq, va, info); + } + } } /* @@ -607,19 +1798,151 @@ pmap_remove_page(struct pmap *pmap, vm_offset_t va) void pmap_remove(struct pmap *pmap, vm_offset_t sva, vm_offset_t eva) { + pt_entry_t *ptbase; + vm_offset_t pdnxt; + vm_offset_t ptpaddr; + vm_offset_t sindex, eindex; + struct pmap_inval_info info; + + if (pmap == NULL) + return; + + if (pmap->pm_stats.resident_count == 0) + return; + + pmap_inval_init(&info); + + /* + * special handling of removing one page. a very + * common operation and easy to short circuit some + * code. + */ + if (((sva + PAGE_SIZE) == eva) && + ((pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) { + pmap_remove_page(pmap, sva, &info); + pmap_inval_flush(&info); + return; + } + + /* + * Get a local virtual address for the mappings that are being + * worked with. + */ + sindex = amd64_btop(sva); + eindex = amd64_btop(eva); + + for (; sindex < eindex; sindex = pdnxt) { + vm_pindex_t pdirindex; + + /* + * Calculate index for next page table. + */ + pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1)); + if (pmap->pm_stats.resident_count == 0) + break; + + pdirindex = sindex / NPDEPG; + if (((ptpaddr = pmap->pm_pdir[pdirindex]) & PG_PS) != 0) { + pmap_inval_add(&info, pmap, -1); + pmap->pm_pdir[pdirindex] = 0; + pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; + continue; + } + + /* + * Weed out invalid mappings. Note: we assume that the page + * directory table is always allocated, and in kernel virtual. + */ + if (ptpaddr == 0) + continue; + + /* + * Limit our scan to either the end of the va represented + * by the current page table page, or to the end of the + * range being removed. + */ + if (pdnxt > eindex) { + pdnxt = eindex; + } + + /* + * NOTE: pmap_remove_pte() can block. + */ + for (; sindex != pdnxt; sindex++) { + vm_offset_t va; + + ptbase = get_ptbase(pmap); + if (ptbase[sindex] == 0) + continue; + va = amd64_ptob(sindex); + if (pmap_remove_pte(pmap, ptbase + sindex, va, &info)) + break; + } + } + pmap_inval_flush(&info); } /* * pmap_remove_all: * - * Removes this physical page from all physical maps in which it resides. - * Reflects back modify bits to the pager. + * Removes this physical page from all physical maps in which it resides. + * Reflects back modify bits to the pager. * - * This routine may not be called from an interrupt. + * This routine may not be called from an interrupt. */ + static void pmap_remove_all(vm_page_t m) { + struct pmap_inval_info info; + pt_entry_t *pte, tpte; + pv_entry_t pv; + + if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) + return; + + pmap_inval_init(&info); + crit_enter(); + while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { + KKASSERT(pv->pv_pmap->pm_stats.resident_count > 0); + --pv->pv_pmap->pm_stats.resident_count; + + pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); + pmap_inval_add(&info, pv->pv_pmap, pv->pv_va); + tpte = pte_load_clear(pte); + + if (tpte & PG_W) + pv->pv_pmap->pm_stats.wired_count--; + + if (tpte & PG_A) + vm_page_flag_set(m, PG_REFERENCED); + + /* + * Update the vm_page_t clean and reference bits. + */ + if (tpte & PG_M) { +#if defined(PMAP_DIAGNOSTIC) + if (pmap_nw_modified((pt_entry_t) tpte)) { + kprintf( + "pmap_remove_all: modified page not writable: va: 0x%x, pte: 0x%x\n", + pv->pv_va, tpte); + } +#endif + if (pmap_track_modified(pv->pv_va)) + vm_page_dirty(m); + } + TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); + TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist); + ++pv->pv_pmap->pm_generation; + m->md.pv_list_count--; + if (TAILQ_EMPTY(&m->md.pv_list)) + vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); + pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem, &info); + free_pv_entry(pv); + } + crit_exit(); + KKASSERT((m->flags & (PG_MAPPED|PG_WRITEABLE)) == 0); + pmap_inval_flush(&info); } /* @@ -634,55 +1957,402 @@ pmap_remove_all(vm_page_t m) void pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) { + pt_entry_t *ptbase; + vm_offset_t pdnxt, ptpaddr; + vm_pindex_t sindex, eindex; + pmap_inval_info info; + + if (pmap == NULL) + return; + + if ((prot & VM_PROT_READ) == VM_PROT_NONE) { + pmap_remove(pmap, sva, eva); + return; + } + + if (prot & VM_PROT_WRITE) + return; + + pmap_inval_init(&info); + + ptbase = get_ptbase(pmap); + + sindex = amd64_btop(sva); + eindex = amd64_btop(eva); + + for (; sindex < eindex; sindex = pdnxt) { + + vm_pindex_t pdirindex; + + pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1)); + + pdirindex = sindex / NPDEPG; + if (((ptpaddr = pmap->pm_pdir[pdirindex]) & PG_PS) != 0) { + pmap_inval_add(&info, pmap, -1); + pmap->pm_pdir[pdirindex] &= ~(PG_M|PG_RW); + pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; + continue; + } + + /* + * Weed out invalid mappings. Note: we assume that the page + * directory table is always allocated, and in kernel virtual. + */ + if (ptpaddr == 0) + continue; + + if (pdnxt > eindex) { + pdnxt = eindex; + } + + for (; sindex != pdnxt; sindex++) { + + pt_entry_t pbits; + vm_page_t m; + + /* + * XXX non-optimal. Note also that there can be + * no pmap_inval_flush() calls until after we modify + * ptbase[sindex] (or otherwise we have to do another + * pmap_inval_add() call). + */ + pmap_inval_add(&info, pmap, amd64_ptob(sindex)); + pbits = ptbase[sindex]; + + if (pbits & PG_MANAGED) { + m = NULL; + if (pbits & PG_A) { + m = PHYS_TO_VM_PAGE(pbits); + vm_page_flag_set(m, PG_REFERENCED); + pbits &= ~PG_A; + } + if (pbits & PG_M) { + if (pmap_track_modified(amd64_ptob(sindex))) { + if (m == NULL) + m = PHYS_TO_VM_PAGE(pbits); + vm_page_dirty(m); + pbits &= ~PG_M; + } + } + } + + pbits &= ~PG_RW; + + if (pbits != ptbase[sindex]) { + ptbase[sindex] = pbits; + } + } + } + pmap_inval_flush(&info); } /* - * Enter a managed page into a pmap. If the page is not wired related pmap - * data can be destroyed at any time for later demand-operation. + * Insert the given physical page (p) at + * the specified virtual address (v) in the + * target physical map with the protection requested. * - * Insert the vm_page (m) at virtual address (v) in (pmap), with the - * specified protection, and wire the mapping if requested. + * If specified, the page will be wired down, meaning + * that the related pte can not be reclaimed. * - * NOTE: This routine may not lazy-evaluate or lose information. The - * page must actually be inserted into the given map NOW. - * - * NOTE: When entering a page at a KVA address, the pmap must be the - * kernel_pmap. + * NB: This is the only routine which MAY NOT lazy-evaluate + * or lose information. That is, this routine must actually + * insert this page into the given map NOW. */ void pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, boolean_t wired) { + vm_paddr_t pa; + pt_entry_t *pte; + vm_paddr_t opa; + vm_offset_t origpte, newpte; + vm_page_t mpte; + pmap_inval_info info; + + if (pmap == NULL) + return; + + va &= PG_FRAME; +#ifdef PMAP_DIAGNOSTIC + if (va >= KvaEnd) + panic("pmap_enter: toobig"); + if ((va >= UPT_MIN_ADDRESS) && (va < UPT_MAX_ADDRESS)) + panic("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)", va); +#endif + if (va < UPT_MAX_ADDRESS && pmap == &kernel_pmap) { + kprintf("Warning: pmap_enter called on UVA with kernel_pmap\n"); +#ifdef DDB + db_print_backtrace(); +#endif + } + if (va >= UPT_MAX_ADDRESS && pmap != &kernel_pmap) { + kprintf("Warning: pmap_enter called on KVA without kernel_pmap\n"); +#ifdef DDB + db_print_backtrace(); +#endif + } + + /* + * In the case that a page table page is not + * resident, we are creating it here. + */ + if (va < UPT_MIN_ADDRESS) + mpte = pmap_allocpte(pmap, va); + else + mpte = NULL; + + pmap_inval_init(&info); + pte = pmap_pte(pmap, va); + + /* + * Page Directory table entry not valid, we need a new PT page + */ + if (pte == NULL) { + panic("pmap_enter: invalid page directory pdir=%x, va=0x%x\n", + pmap->pm_pdir[PTDPTDI], va); + } + + pa = VM_PAGE_TO_PHYS(m) & PG_FRAME; + origpte = *(vm_offset_t *)pte; + opa = origpte & PG_FRAME; + + if (origpte & PG_PS) + panic("pmap_enter: attempted pmap_enter on 4MB page"); + + /* + * Mapping has not changed, must be protection or wiring change. + */ + if (origpte && (opa == pa)) { + /* + * Wiring change, just update stats. We don't worry about + * wiring PT pages as they remain resident as long as there + * are valid mappings in them. Hence, if a user page is wired, + * the PT page will be also. + */ + if (wired && ((origpte & PG_W) == 0)) + pmap->pm_stats.wired_count++; + else if (!wired && (origpte & PG_W)) + pmap->pm_stats.wired_count--; + +#if defined(PMAP_DIAGNOSTIC) + if (pmap_nw_modified((pt_entry_t) origpte)) { + kprintf( + "pmap_enter: modified page not writable: va: 0x%x, pte: 0x%x\n", + va, origpte); + } +#endif + + /* + * Remove the extra pte reference. Note that we cannot + * optimize the RO->RW case because we have adjusted the + * wiring count above and may need to adjust the wiring + * bits below. + */ + if (mpte) + mpte->hold_count--; + + /* + * We might be turning off write access to the page, + * so we go ahead and sense modify status. + */ + if (origpte & PG_MANAGED) { + if ((origpte & PG_M) && pmap_track_modified(va)) { + vm_page_t om; + om = PHYS_TO_VM_PAGE(opa); + vm_page_dirty(om); + } + pa |= PG_MANAGED; + KKASSERT(m->flags & PG_MAPPED); + } + goto validate; + } + /* + * Mapping has changed, invalidate old range and fall through to + * handle validating new mapping. + */ + if (opa) { + int err; + err = pmap_remove_pte(pmap, pte, va, &info); + if (err) + panic("pmap_enter: pte vanished, va: 0x%x", va); + } + + /* + * Enter on the PV list if part of our managed memory. Note that we + * raise IPL while manipulating pv_table since pmap_enter can be + * called at interrupt time. + */ + if (pmap_initialized && + (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) { + pmap_insert_entry(pmap, va, mpte, m); + pa |= PG_MANAGED; + vm_page_flag_set(m, PG_MAPPED); + } + + /* + * Increment counters + */ + ++pmap->pm_stats.resident_count; + if (wired) + pmap->pm_stats.wired_count++; + +validate: + /* + * Now validate mapping with desired protection/wiring. + */ + newpte = (vm_offset_t) (pa | pte_prot(pmap, prot) | PG_V); + + if (wired) + newpte |= PG_W; + if (va < UPT_MIN_ADDRESS) + newpte |= PG_U; + if (pmap == &kernel_pmap) + newpte |= pgeflag; + + /* + * if the mapping or permission bits are different, we need + * to update the pte. + */ + if ((origpte & ~(PG_M|PG_A)) != newpte) { + pmap_inval_add(&info, pmap, va); + *pte = newpte | PG_A; + if (newpte & PG_RW) + vm_page_flag_set(m, PG_WRITEABLE); + } + KKASSERT((newpte & PG_MANAGED) == 0 || (m->flags & PG_MAPPED)); + pmap_inval_flush(&info); } /* - * This is a quick version of pmap_enter(). It is used only under the - * following conditions: + * This code works like pmap_enter() but assumes VM_PROT_READ and not-wired. + * This code also assumes that the pmap has no pre-existing entry for this + * VA. * - * (1) The pmap is not the kernel_pmap - * (2) The page is not to be wired into the map - * (3) The page is to mapped read-only in the pmap (initially that is) - * (4) The calling procedure is responsible for flushing the TLB - * (5) The page is always managed - * (6) There is no prior mapping at the VA + * This code currently may only be used on user pmaps, not kernel_pmap. */ - -static vm_page_t -pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t mpte) +static void +pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m) { - return NULL; + pt_entry_t *pte; + vm_paddr_t pa; + vm_page_t mpte; + vm_pindex_t ptepindex; + vm_offset_t ptepa; + pmap_inval_info info; + + pmap_inval_init(&info); + + if (va < UPT_MAX_ADDRESS && pmap == &kernel_pmap) { + kprintf("Warning: pmap_enter_quick called on UVA with kernel_pmap\n"); +#ifdef DDB + db_print_backtrace(); +#endif + } + if (va >= UPT_MAX_ADDRESS && pmap != &kernel_pmap) { + kprintf("Warning: pmap_enter_quick called on KVA without kernel_pmap\n"); +#ifdef DDB + db_print_backtrace(); +#endif + } + + KKASSERT(va < UPT_MIN_ADDRESS); /* assert used on user pmaps only */ + + /* + * Calculate the page table page (mpte), allocating it if necessary. + * + * A held page table page (mpte), or NULL, is passed onto the + * section following. + */ + if (va < UPT_MIN_ADDRESS) { + /* + * Calculate pagetable page index + */ + ptepindex = va >> PDRSHIFT; + + do { + /* + * Get the page directory entry + */ + ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex]; + + /* + * If the page table page is mapped, we just increment + * the hold count, and activate it. + */ + if (ptepa) { + if (ptepa & PG_PS) + panic("pmap_enter_quick: unexpected mapping into 4MB page"); + if (pmap->pm_ptphint && + (pmap->pm_ptphint->pindex == ptepindex)) { + mpte = pmap->pm_ptphint; + } else { + mpte = pmap_page_lookup( pmap->pm_pteobj, ptepindex); + pmap->pm_ptphint = mpte; + } + if (mpte) + mpte->hold_count++; + } else { + mpte = _pmap_allocpte(pmap, ptepindex); + } + } while (mpte == NULL); + } else { + mpte = NULL; + /* this code path is not yet used */ + } + + /* + * With a valid (and held) page directory page, we can just use + * vtopte() to get to the pte. If the pte is already present + * we do not disturb it. + */ + pte = vtopte(va); + if (*pte & PG_V) { + if (mpte) + pmap_unwire_pte_hold(pmap, mpte, &info); + pa = VM_PAGE_TO_PHYS(m); + KKASSERT(((*pte ^ pa) & PG_FRAME) == 0); + return; + } + + /* + * Enter on the PV list if part of our managed memory + */ + if ((m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) { + pmap_insert_entry(pmap, va, mpte, m); + vm_page_flag_set(m, PG_MAPPED); + } + + /* + * Increment counters + */ + ++pmap->pm_stats.resident_count; + + pa = VM_PAGE_TO_PHYS(m); + + /* + * Now validate mapping with RO protection + */ + if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) + *pte = pa | PG_V | PG_U; + else + *pte = pa | PG_V | PG_U | PG_MANAGED; +/* pmap_inval_add(&info, pmap, va); shouldn't be needed inval->valid */ + pmap_inval_flush(&info); } /* - * Extract the physical address for the translation at the specified - * virtual address in the pmap. + * Make a temporary mapping for a physical address. This is only intended + * to be used for panic dumps. */ -vm_paddr_t -pmap_extract(pmap_t pmap, vm_offset_t va) +void * +pmap_kenter_temporary(vm_paddr_t pa, int i) { - return(0); + pmap_kenter((vm_offset_t)crashdumpmap + (i * PAGE_SIZE), pa); + return ((void *)crashdumpmap); } +#define MAX_INIT_PT (96) + /* * This routine preloads the ptes for a given object into the specified pmap. * This eliminates the blast of soft faults on process startup and @@ -695,12 +2365,85 @@ pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_prot_t prot, vm_object_t object, vm_pindex_t pindex, vm_size_t size, int limit) { + struct rb_vm_page_scan_info info; + struct lwp *lp; + int psize; + + /* + * We can't preinit if read access isn't set or there is no pmap + * or object. + */ + if ((prot & VM_PROT_READ) == 0 || pmap == NULL || object == NULL) + return; + + /* + * We can't preinit if the pmap is not the current pmap + */ + lp = curthread->td_lwp; + if (lp == NULL || pmap != vmspace_pmap(lp->lwp_vmspace)) + return; + + psize = amd64_btop(size); + + if ((object->type != OBJT_VNODE) || + ((limit & MAP_PREFAULT_PARTIAL) && (psize > MAX_INIT_PT) && + (object->resident_page_count > MAX_INIT_PT))) { + return; + } + + if (psize + pindex > object->size) { + if (object->size < pindex) + return; + psize = object->size - pindex; + } + + if (psize == 0) + return; + + /* + * Use a red-black scan to traverse the requested range and load + * any valid pages found into the pmap. + * + * We cannot safely scan the object's memq unless we are in a + * critical section since interrupts can remove pages from objects. + */ + info.start_pindex = pindex; + info.end_pindex = pindex + psize - 1; + info.limit = limit; + info.mpte = NULL; + info.addr = addr; + info.pmap = pmap; + + crit_enter(); + vm_page_rb_tree_RB_SCAN(&object->rb_memq, rb_vm_page_scancmp, + pmap_object_init_pt_callback, &info); + crit_exit(); } static int pmap_object_init_pt_callback(vm_page_t p, void *data) { + struct rb_vm_page_scan_info *info = data; + vm_pindex_t rel_index; + /* + * don't allow an madvise to blow away our really + * free pages allocating pv entries. + */ + if ((info->limit & MAP_PREFAULT_MADVISE) && + vmstats.v_free_count < vmstats.v_free_reserved) { + return(-1); + } + if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && + (p->busy == 0) && (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) { + if ((p->queue - p->pc) == PQ_CACHE) + vm_page_deactivate(p); + vm_page_busy(p); + rel_index = p->pindex - info->start_pindex; + pmap_enter_quick(info->pmap, + info->addr + amd64_ptob(rel_index), p); + vm_page_wakeup(p); + } return(0); } @@ -723,6 +2466,89 @@ static int pmap_prefault_pageorder[] = { void pmap_prefault(pmap_t pmap, vm_offset_t addra, vm_map_entry_t entry) { + int i; + vm_offset_t starta; + vm_offset_t addr; + vm_pindex_t pindex; + vm_page_t m; + vm_object_t object; + struct lwp *lp; + + /* + * We do not currently prefault mappings that use virtual page + * tables. We do not prefault foreign pmaps. + */ + if (entry->maptype == VM_MAPTYPE_VPAGETABLE) + return; + lp = curthread->td_lwp; + if (lp == NULL || (pmap != vmspace_pmap(lp->lwp_vmspace))) + return; + + object = entry->object.vm_object; + + starta = addra - PFBAK * PAGE_SIZE; + if (starta < entry->start) + starta = entry->start; + else if (starta > addra) + starta = 0; + + /* + * critical section protection is required to maintain the + * page/object association, interrupts can free pages and remove + * them from their objects. + */ + crit_enter(); + for (i = 0; i < PAGEORDER_SIZE; i++) { + vm_object_t lobject; + pt_entry_t *pte; + + addr = addra + pmap_prefault_pageorder[i]; + if (addr > addra + (PFFOR * PAGE_SIZE)) + addr = 0; + + if (addr < starta || addr >= entry->end) + continue; + + if ((*pmap_pde(pmap, addr)) == 0) + continue; + + pte = vtopte(addr); + if (*pte) + continue; + + pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT; + lobject = object; + + for (m = vm_page_lookup(lobject, pindex); + (!m && (lobject->type == OBJT_DEFAULT) && + (lobject->backing_object)); + lobject = lobject->backing_object + ) { + if (lobject->backing_object_offset & PAGE_MASK) + break; + pindex += (lobject->backing_object_offset >> PAGE_SHIFT); + m = vm_page_lookup(lobject->backing_object, pindex); + } + + /* + * give-up when a page is not in memory + */ + if (m == NULL) + break; + + if (((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && + (m->busy == 0) && + (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) { + + if ((m->queue - m->pc) == PQ_CACHE) { + vm_page_deactivate(m); + } + vm_page_busy(m); + pmap_enter_quick(pmap, addr, m); + vm_page_wakeup(m); + } + } + crit_exit(); } /* @@ -735,8 +2561,40 @@ pmap_prefault(pmap_t pmap, vm_offset_t addra, vm_map_entry_t entry) void pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired) { + pt_entry_t *pte; + + if (pmap == NULL) + return; + + pte = pmap_pte(pmap, va); + + if (wired && !pmap_pte_w(pte)) + pmap->pm_stats.wired_count++; + else if (!wired && pmap_pte_w(pte)) + pmap->pm_stats.wired_count--; + + /* + * Wiring is not a hardware characteristic so there is no need to + * invalidate TLB. However, in an SMP environment we must use + * a locked bus cycle to update the pte (if we are not using + * the pmap_inval_*() API that is)... it's ok to do this for simple + * wiring changes. + */ +#ifdef SMP + if (wired) + atomic_set_int(pte, PG_W); + else + atomic_clear_int(pte, PG_W); +#else + if (wired) + atomic_set_int_nonlocked(pte, PG_W); + else + atomic_clear_int_nonlocked(pte, PG_W); +#endif } + + /* * Copy the range specified by src_addr/len * from the source map to the range dst_addr/len @@ -748,6 +2606,143 @@ void pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, vm_offset_t src_addr) { + pmap_inval_info info; + vm_offset_t addr; + vm_offset_t end_addr = src_addr + len; + vm_offset_t pdnxt; + pd_entry_t src_frame, dst_frame; + vm_page_t m; + + if (dst_addr != src_addr) + return; + /* + * XXX BUGGY. Amoung other things srcmpte is assumed to remain + * valid through blocking calls, and that's just not going to + * be the case. + * + * FIXME! + */ + return; + + src_frame = src_pmap->pm_pdir[PTDPTDI] & PG_FRAME; + if (src_frame != (PTDpde & PG_FRAME)) { + return; + } + + dst_frame = dst_pmap->pm_pdir[PTDPTDI] & PG_FRAME; + if (dst_frame != (APTDpde & PG_FRAME)) { + APTDpde = (pd_entry_t) (dst_frame | PG_RW | PG_V); + /* The page directory is not shared between CPUs */ + cpu_invltlb(); + } + pmap_inval_init(&info); + pmap_inval_add(&info, dst_pmap, -1); + pmap_inval_add(&info, src_pmap, -1); + + /* + * critical section protection is required to maintain the page/object + * association, interrupts can free pages and remove them from + * their objects. + */ + crit_enter(); + for (addr = src_addr; addr < end_addr; addr = pdnxt) { + pt_entry_t *src_pte, *dst_pte; + vm_page_t dstmpte, srcmpte; + vm_offset_t srcptepaddr; + vm_pindex_t ptepindex; + + if (addr >= UPT_MIN_ADDRESS) + panic("pmap_copy: invalid to pmap_copy page tables\n"); + + /* + * Don't let optional prefaulting of pages make us go + * way below the low water mark of free pages or way + * above high water mark of used pv entries. + */ + if (vmstats.v_free_count < vmstats.v_free_reserved || + pv_entry_count > pv_entry_high_water) + break; + + pdnxt = ((addr + PAGE_SIZE*NPTEPG) & ~(PAGE_SIZE*NPTEPG - 1)); + ptepindex = addr >> PDRSHIFT; + + srcptepaddr = (vm_offset_t) src_pmap->pm_pdir[ptepindex]; + if (srcptepaddr == 0) + continue; + + if (srcptepaddr & PG_PS) { + if (dst_pmap->pm_pdir[ptepindex] == 0) { + dst_pmap->pm_pdir[ptepindex] = (pd_entry_t) srcptepaddr; + dst_pmap->pm_stats.resident_count += NBPDR / PAGE_SIZE; + } + continue; + } + + srcmpte = vm_page_lookup(src_pmap->pm_pteobj, ptepindex); + if ((srcmpte == NULL) || (srcmpte->hold_count == 0) || + (srcmpte->flags & PG_BUSY)) { + continue; + } + + if (pdnxt > end_addr) + pdnxt = end_addr; + + src_pte = vtopte(addr); + dst_pte = avtopte(addr); + while (addr < pdnxt) { + pt_entry_t ptetemp; + + ptetemp = *src_pte; + /* + * we only virtual copy managed pages + */ + if ((ptetemp & PG_MANAGED) != 0) { + /* + * We have to check after allocpte for the + * pte still being around... allocpte can + * block. + * + * pmap_allocpte() can block. If we lose + * our page directory mappings we stop. + */ + dstmpte = pmap_allocpte(dst_pmap, addr); + + if (src_frame != (PTDpde & PG_FRAME) || + dst_frame != (APTDpde & PG_FRAME) + ) { + kprintf("WARNING: pmap_copy: detected and corrected race\n"); + pmap_unwire_pte_hold(dst_pmap, dstmpte, &info); + goto failed; + } else if ((*dst_pte == 0) && + (ptetemp = *src_pte) != 0 && + (ptetemp & PG_MANAGED)) { + /* + * Clear the modified and + * accessed (referenced) bits + * during the copy. + */ + m = PHYS_TO_VM_PAGE(ptetemp); + *dst_pte = ptetemp & ~(PG_M | PG_A); + ++dst_pmap->pm_stats.resident_count; + pmap_insert_entry(dst_pmap, addr, + dstmpte, m); + KKASSERT(m->flags & PG_MAPPED); + } else { + kprintf("WARNING: pmap_copy: dst_pte race detected and corrected\n"); + pmap_unwire_pte_hold(dst_pmap, dstmpte, &info); + goto failed; + } + if (dstmpte->hold_count >= srcmpte->hold_count) + break; + } + addr += PAGE_SIZE; + src_pte++; + dst_pte++; + } + } +failed: + crit_exit(); + pmap_inval_flush(&info); } /* @@ -762,6 +2757,23 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, void pmap_zero_page(vm_paddr_t phys) { + struct mdglobaldata *gd = mdcpu; + + crit_enter(); + if (*gd->gd_CMAP3) + panic("pmap_zero_page: CMAP3 busy"); + *gd->gd_CMAP3 = + PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M; + cpu_invlpg(gd->gd_CADDR3); + +#if defined(I686_CPU) + if (cpu_class == CPUCLASS_686) + i686_pagezero(gd->gd_CADDR3); + else +#endif + bzero(gd->gd_CADDR3, PAGE_SIZE); + *gd->gd_CMAP3 = 0; + crit_exit(); } /* @@ -772,6 +2784,23 @@ pmap_zero_page(vm_paddr_t phys) void pmap_page_assertzero(vm_paddr_t phys) { + struct mdglobaldata *gd = mdcpu; + int i; + + crit_enter(); + if (*gd->gd_CMAP3) + panic("pmap_zero_page: CMAP3 busy"); + *gd->gd_CMAP3 = + PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M; + cpu_invlpg(gd->gd_CADDR3); + for (i = 0; i < PAGE_SIZE; i += sizeof(int)) { + if (*(int *)((char *)gd->gd_CADDR3 + i) != 0) { + panic("pmap_page_assertzero() @ %p not zero!\n", + (void *)gd->gd_CADDR3); + } + } + *gd->gd_CMAP3 = 0; + crit_exit(); } /* @@ -785,6 +2814,22 @@ pmap_page_assertzero(vm_paddr_t phys) void pmap_zero_page_area(vm_paddr_t phys, int off, int size) { + struct mdglobaldata *gd = mdcpu; + + crit_enter(); + if (*gd->gd_CMAP3) + panic("pmap_zero_page: CMAP3 busy"); + *gd->gd_CMAP3 = PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M; + cpu_invlpg(gd->gd_CADDR3); + +#if defined(I686_CPU) + if (cpu_class == CPUCLASS_686 && off == 0 && size == PAGE_SIZE) + i686_pagezero(gd->gd_CADDR3); + else +#endif + bzero((char *)gd->gd_CADDR3 + off, size); + *gd->gd_CMAP3 = 0; + crit_exit(); } /* @@ -797,6 +2842,25 @@ pmap_zero_page_area(vm_paddr_t phys, int off, int size) void pmap_copy_page(vm_paddr_t src, vm_paddr_t dst) { + struct mdglobaldata *gd = mdcpu; + + crit_enter(); + if (*gd->gd_CMAP1) + panic("pmap_copy_page: CMAP1 busy"); + if (*gd->gd_CMAP2) + panic("pmap_copy_page: CMAP2 busy"); + + *gd->gd_CMAP1 = PG_V | (src & PG_FRAME) | PG_A; + *gd->gd_CMAP2 = PG_V | PG_RW | (dst & PG_FRAME) | PG_A | PG_M; + + cpu_invlpg(gd->gd_CADDR1); + cpu_invlpg(gd->gd_CADDR2); + + bcopy(gd->gd_CADDR1, gd->gd_CADDR2, PAGE_SIZE); + + *gd->gd_CMAP1 = 0; + *gd->gd_CMAP2 = 0; + crit_exit(); } /* @@ -809,6 +2873,27 @@ pmap_copy_page(vm_paddr_t src, vm_paddr_t dst) void pmap_copy_page_frag(vm_paddr_t src, vm_paddr_t dst, size_t bytes) { + struct mdglobaldata *gd = mdcpu; + + crit_enter(); + if (*gd->gd_CMAP1) + panic("pmap_copy_page: CMAP1 busy"); + if (*gd->gd_CMAP2) + panic("pmap_copy_page: CMAP2 busy"); + + *gd->gd_CMAP1 = PG_V | (src & PG_FRAME) | PG_A; + *gd->gd_CMAP2 = PG_V | PG_RW | (dst & PG_FRAME) | PG_A | PG_M; + + cpu_invlpg(gd->gd_CADDR1); + cpu_invlpg(gd->gd_CADDR2); + + bcopy((char *)gd->gd_CADDR1 + (src & PAGE_MASK), + (char *)gd->gd_CADDR2 + (dst & PAGE_MASK), + bytes); + + *gd->gd_CMAP1 = 0; + *gd->gd_CMAP2 = 0; + crit_exit(); } /* @@ -821,6 +2906,24 @@ pmap_copy_page_frag(vm_paddr_t src, vm_paddr_t dst, size_t bytes) boolean_t pmap_page_exists_quick(pmap_t pmap, vm_page_t m) { + pv_entry_t pv; + int loops = 0; + + if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) + return FALSE; + + crit_enter(); + + TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { + if (pv->pv_pmap == pmap) { + crit_exit(); + return TRUE; + } + loops++; + if (loops >= 16) + break; + } + crit_exit(); return (FALSE); } @@ -835,26 +2938,217 @@ pmap_page_exists_quick(pmap_t pmap, vm_page_t m) void pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { + struct lwp *lp; + pt_entry_t *pte, tpte; + pv_entry_t pv, npv; + vm_page_t m; + pmap_inval_info info; + int iscurrentpmap; + int32_t save_generation; + + lp = curthread->td_lwp; + if (lp && pmap == vmspace_pmap(lp->lwp_vmspace)) + iscurrentpmap = 1; + else + iscurrentpmap = 0; + + pmap_inval_init(&info); + crit_enter(); + for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) { + if (pv->pv_va >= eva || pv->pv_va < sva) { + npv = TAILQ_NEXT(pv, pv_plist); + continue; + } + + KKASSERT(pmap == pv->pv_pmap); + + if (iscurrentpmap) + pte = vtopte(pv->pv_va); + else + pte = pmap_pte_quick(pmap, pv->pv_va); + if (pmap->pm_active) + pmap_inval_add(&info, pmap, pv->pv_va); + + /* + * We cannot remove wired pages from a process' mapping + * at this time + */ + if (*pte & PG_W) { + npv = TAILQ_NEXT(pv, pv_plist); + continue; + } + tpte = pte_load_clear(pte); + + m = PHYS_TO_VM_PAGE(tpte); + + KASSERT(m < &vm_page_array[vm_page_array_size], + ("pmap_remove_pages: bad tpte %x", tpte)); + + KKASSERT(pmap->pm_stats.resident_count > 0); + --pmap->pm_stats.resident_count; + + /* + * Update the vm_page_t clean and reference bits. + */ + if (tpte & PG_M) { + vm_page_dirty(m); + } + + npv = TAILQ_NEXT(pv, pv_plist); + TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); + save_generation = ++pmap->pm_generation; + + m->md.pv_list_count--; + TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); + if (TAILQ_EMPTY(&m->md.pv_list)) + vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); + + pmap_unuse_pt(pmap, pv->pv_va, pv->pv_ptem, &info); + free_pv_entry(pv); + + /* + * Restart the scan if we blocked during the unuse or free + * calls and other removals were made. + */ + if (save_generation != pmap->pm_generation) { + kprintf("Warning: pmap_remove_pages race-A avoided\n"); + pv = TAILQ_FIRST(&pmap->pm_pvlist); + } + } + pmap_inval_flush(&info); + crit_exit(); } /* - * pmap_testbit tests bits in active mappings of a VM page. + * pmap_testbit tests bits in pte's + * note that the testbit/clearbit routines are inline, + * and a lot of things compile-time evaluate. */ static boolean_t pmap_testbit(vm_page_t m, int bit) { + pv_entry_t pv; + pt_entry_t *pte; + + if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) + return FALSE; + + if (TAILQ_FIRST(&m->md.pv_list) == NULL) + return FALSE; + + crit_enter(); + + TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { + /* + * if the bit being tested is the modified bit, then + * mark clean_map and ptes as never + * modified. + */ + if (bit & (PG_A|PG_M)) { + if (!pmap_track_modified(pv->pv_va)) + continue; + } + +#if defined(PMAP_DIAGNOSTIC) + if (!pv->pv_pmap) { + kprintf("Null pmap (tb) at va: 0x%x\n", pv->pv_va); + continue; + } +#endif + pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); + if (*pte & bit) { + crit_exit(); + return TRUE; + } + } + crit_exit(); return (FALSE); } /* - * This routine is used to clear bits in ptes. Certain bits require special - * handling, in particular (on virtual kernels) the VPTE_M (modify) bit. - * - * This routine is only called with certain VPTE_* bit combinations. + * this routine is used to modify bits in ptes */ static __inline void pmap_clearbit(vm_page_t m, int bit) { + struct pmap_inval_info info; + pv_entry_t pv; + pt_entry_t *pte; + pt_entry_t pbits; + + if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) + return; + + pmap_inval_init(&info); + crit_enter(); + + /* + * Loop over all current mappings setting/clearing as appropos If + * setting RO do we need to clear the VAC? + */ + TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { + /* + * don't write protect pager mappings + */ + if (bit == PG_RW) { + if (!pmap_track_modified(pv->pv_va)) + continue; + } + +#if defined(PMAP_DIAGNOSTIC) + if (!pv->pv_pmap) { + kprintf("Null pmap (cb) at va: 0x%x\n", pv->pv_va); + continue; + } +#endif + + /* + * Careful here. We can use a locked bus instruction to + * clear PG_A or PG_M safely but we need to synchronize + * with the target cpus when we mess with PG_RW. + * + * We do not have to force synchronization when clearing + * PG_M even for PTEs generated via virtual memory maps, + * because the virtual kernel will invalidate the pmap + * entry when/if it needs to resynchronize the Modify bit. + */ + if (bit & PG_RW) + pmap_inval_add(&info, pv->pv_pmap, pv->pv_va); + pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); +again: + pbits = *pte; + if (pbits & bit) { + if (bit == PG_RW) { + if (pbits & PG_M) { + vm_page_dirty(m); + atomic_clear_int(pte, PG_M|PG_RW); + } else { + /* + * The cpu may be trying to set PG_M + * simultaniously with our clearing + * of PG_RW. + */ + if (!atomic_cmpset_int(pte, pbits, + pbits & ~PG_RW)) + goto again; + } + } else if (bit == PG_M) { + /* + * We could also clear PG_RW here to force + * a fault on write to redetect PG_M for + * virtual kernels, but it isn't necessary + * since virtual kernels invalidate the pte + * when they clear the VPTE_M bit in their + * virtual page tables. + */ + atomic_clear_int(pte, PG_M); + } else { + atomic_clear_int(pte, bit); + } + } + } + pmap_inval_flush(&info); + crit_exit(); } /* @@ -865,12 +3159,20 @@ pmap_clearbit(vm_page_t m, int bit) void pmap_page_protect(vm_page_t m, vm_prot_t prot) { + if ((prot & VM_PROT_WRITE) == 0) { + if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) { + pmap_clearbit(m, PG_RW); + vm_page_flag_clear(m, PG_WRITEABLE); + } else { + pmap_remove_all(m); + } + } } vm_paddr_t -pmap_phys_address(int ppn) +pmap_phys_address(vm_pindex_t ppn) { - return NULL; + return (amd64_ptob(ppn)); } /* @@ -888,7 +3190,47 @@ pmap_phys_address(int ppn) int pmap_ts_referenced(vm_page_t m) { - return (0); + pv_entry_t pv, pvf, pvn; + pt_entry_t *pte; + int rtval = 0; + + if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) + return (rtval); + + crit_enter(); + + if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { + + pvf = pv; + + do { + pvn = TAILQ_NEXT(pv, pv_list); + + TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); + + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); + + if (!pmap_track_modified(pv->pv_va)) + continue; + + pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); + + if (pte && (*pte & PG_A)) { +#ifdef SMP + atomic_clear_int(pte, PG_A); +#else + atomic_clear_int_nonlocked(pte, PG_A); +#endif + rtval++; + if (rtval > 4) { + break; + } + } + } while ((pv = pvn) != NULL && pv != pvf); + } + crit_exit(); + + return (rtval); } /* @@ -900,7 +3242,7 @@ pmap_ts_referenced(vm_page_t m) boolean_t pmap_is_modified(vm_page_t m) { - return NULL; + return pmap_testbit(m, PG_M); } /* @@ -909,6 +3251,7 @@ pmap_is_modified(vm_page_t m) void pmap_clear_modify(vm_page_t m) { + pmap_clearbit(m, PG_M); } /* @@ -919,9 +3262,9 @@ pmap_clear_modify(vm_page_t m) void pmap_clear_reference(vm_page_t m) { + pmap_clearbit(m, PG_A); } -#if 0 /* * Miscellaneous support routines follow */ @@ -933,13 +3276,24 @@ i386_protection_init(void) kp = protection_codes; for (prot = 0; prot < 8; prot++) { - if (prot & VM_PROT_READ) - *kp |= VPTE_R; - if (prot & VM_PROT_WRITE) - *kp |= VPTE_W; - if (prot & VM_PROT_EXECUTE) - *kp |= VPTE_X; - ++kp; + switch (prot) { + case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE: + /* + * Read access is also 0. There isn't any execute bit, + * so just make it readable. + */ + case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE: + case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE: + case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE: + *kp++ = 0; + break; + case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE: + case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE: + case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE: + case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE: + *kp++ = PG_RW; + break; + } } } @@ -956,7 +3310,7 @@ void * pmap_mapdev(vm_paddr_t pa, vm_size_t size) { vm_offset_t va, tmpva, offset; - vpte_t *pte; + pt_entry_t *pte; offset = pa & PAGE_MASK; size = roundup(offset + size, PAGE_SIZE); @@ -965,10 +3319,10 @@ pmap_mapdev(vm_paddr_t pa, vm_size_t size) if (!va) panic("pmap_mapdev: Couldn't alloc kernel virtual memory"); - pa = pa & VPTE_FRAME; + pa = pa & PG_FRAME; for (tmpva = va; size > 0;) { - pte = KernelPTA + (tmpva >> PAGE_SHIFT); - *pte = pa | VPTE_R | VPTE_W | VPTE_V; /* | pgeflag; */ + pte = vtopte(tmpva); + *pte = pa | PG_RW | PG_V; /* | pgeflag; */ size -= PAGE_SIZE; tmpva += PAGE_SIZE; pa += PAGE_SIZE; @@ -984,38 +3338,199 @@ pmap_unmapdev(vm_offset_t va, vm_size_t size) { vm_offset_t base, offset; - base = va & VPTE_FRAME; + base = va & PG_FRAME; offset = va & PAGE_MASK; size = roundup(offset + size, PAGE_SIZE); pmap_qremove(va, size >> PAGE_SHIFT); kmem_free(&kernel_map, base, size); } -#endif - /* * perform the pmap work for mincore */ int pmap_mincore(pmap_t pmap, vm_offset_t addr) { - return 0; -} + pt_entry_t *ptep, pte; + vm_page_t m; + int val = 0; + + ptep = pmap_pte(pmap, addr); + if (ptep == 0) { + return 0; + } + if ((pte = *ptep) != 0) { + vm_offset_t pa; + + val = MINCORE_INCORE; + if ((pte & PG_MANAGED) == 0) + return val; + + pa = pte & PG_FRAME; + + m = PHYS_TO_VM_PAGE(pa); + + /* + * Modified by us + */ + if (pte & PG_M) + val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER; + /* + * Modified by someone + */ + else if (m->dirty || pmap_is_modified(m)) + val |= MINCORE_MODIFIED_OTHER; + /* + * Referenced by us + */ + if (pte & PG_A) + val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER; + + /* + * Referenced by someone + */ + else if ((m->flags & PG_REFERENCED) || pmap_ts_referenced(m)) { + val |= MINCORE_REFERENCED_OTHER; + vm_page_flag_set(m, PG_REFERENCED); + } + } + return val; +} + +/* + * Replace p->p_vmspace with a new one. If adjrefs is non-zero the new + * vmspace will be ref'd and the old one will be deref'd. + * + * The vmspace for all lwps associated with the process will be adjusted + * and cr3 will be reloaded if any lwp is the current lwp. + */ void pmap_replacevm(struct proc *p, struct vmspace *newvm, int adjrefs) { + struct vmspace *oldvm; + struct lwp *lp; + + crit_enter(); + oldvm = p->p_vmspace; + if (oldvm != newvm) { + p->p_vmspace = newvm; + KKASSERT(p->p_nthreads == 1); + lp = RB_ROOT(&p->p_lwp_tree); + pmap_setlwpvm(lp, newvm); + if (adjrefs) { + sysref_get(&newvm->vm_sysref); + sysref_put(&oldvm->vm_sysref); + } + } + crit_exit(); } +/* + * Set the vmspace for a LWP. The vmspace is almost universally set the + * same as the process vmspace, but virtual kernels need to swap out contexts + * on a per-lwp basis. + */ void pmap_setlwpvm(struct lwp *lp, struct vmspace *newvm) { -} + struct vmspace *oldvm; + struct pmap *pmap; + crit_enter(); + oldvm = lp->lwp_vmspace; + + if (oldvm != newvm) { + lp->lwp_vmspace = newvm; + if (curthread->td_lwp == lp) { + pmap = vmspace_pmap(newvm); +#if defined(SMP) + atomic_set_int(&pmap->pm_active, 1 << mycpu->gd_cpuid); +#else + pmap->pm_active |= 1; +#endif +#if defined(SWTCH_OPTIM_STATS) + tlb_flush_count++; +#endif + curthread->td_pcb->pcb_cr3 = vtophys(pmap->pm_pdir); + curthread->td_pcb->pcb_cr3 |= PG_RW | PG_U | PG_V; + *link_pdpe = curthread->td_pcb->pcb_cr3 | PG_RW | PG_U | PG_V; + load_cr3(common_lvl4_phys); + pmap = vmspace_pmap(oldvm); +#if defined(SMP) + atomic_clear_int(&pmap->pm_active, + 1 << mycpu->gd_cpuid); +#else + pmap->pm_active &= ~1; +#endif + } + } + crit_exit(); +} vm_offset_t pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size) { - return NULL; + + if ((obj == NULL) || (size < NBPDR) || (obj->type != OBJT_DEVICE)) { + return addr; + } + + addr = (addr + (NBPDR - 1)) & ~(NBPDR - 1); + return addr; +} + + +#if defined(DEBUG) + +static void pads (pmap_t pm); +void pmap_pvdump (vm_paddr_t pa); + +/* print address space of pmap*/ +static void +pads(pmap_t pm) +{ + vm_offset_t va; + unsigned i, j; + pt_entry_t *ptep; + + if (pm == &kernel_pmap) + return; + crit_enter(); + for (i = 0; i < NPDEPG; i++) { + if (pm->pm_pdir[i]) { + for (j = 0; j < NPTEPG; j++) { + va = (i << PDRSHIFT) + (j << PAGE_SHIFT); + if (pm == &kernel_pmap && va < KERNBASE) + continue; + if (pm != &kernel_pmap && va > UPT_MAX_ADDRESS) + continue; + ptep = pmap_pte_quick(pm, va); + if (pmap_pte_v(ptep)) + kprintf("%lx:%lx ", va, *ptep); + }; + } + } + crit_exit(); + } +void +pmap_pvdump(vm_paddr_t pa) +{ + pv_entry_t pv; + vm_page_t m; + + kprintf("pa %08llx", (long long)pa); + m = PHYS_TO_VM_PAGE(pa); + TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { +#ifdef used_to_be + kprintf(" -> pmap %p, va %x, flags %x", + (void *)pv->pv_pmap, pv->pv_va, pv->pv_flags); +#endif + kprintf(" -> pmap %p, va %x", (void *)pv->pv_pmap, pv->pv_va); + pads(pv->pv_pmap); + } + kprintf(" "); +} +#endif diff --git a/sys/platform/pc64/amd64/pmap_inval.c b/sys/platform/pc64/amd64/pmap_inval.c new file mode 100644 index 0000000000..7d65fa62e3 --- /dev/null +++ b/sys/platform/pc64/amd64/pmap_inval.c @@ -0,0 +1,151 @@ +/* + * Copyright (c) 2003,2004,2008 The DragonFly Project. All rights reserved. + * + * This code is derived from software contributed to The DragonFly Project + * by Matthew Dillon + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of The DragonFly Project nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific, prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $DragonFly: src/sys/platform/pc64/amd64/pmap_inval.c,v 1.1 2008/08/29 17:07:10 dillon Exp $ + */ + +/* + * pmap invalidation support code. Certain hardware requirements must + * be dealt with when manipulating page table entries and page directory + * entries within a pmap. In particular, we cannot safely manipulate + * page tables which are in active use by another cpu (even if it is + * running in userland) for two reasons: First, TLB writebacks will + * race against our own modifications and tests. Second, even if we + * were to use bus-locked instruction we can still screw up the + * target cpu's instruction pipeline due to Intel cpu errata. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#ifdef SMP + +static void +_cpu_invltlb(void *dummy) +{ + cpu_invltlb(); +} + +static void +_cpu_invl1pg(void *data) +{ + cpu_invlpg(data); +} + +#endif + +/* + * Initialize for add or flush + */ +void +pmap_inval_init(pmap_inval_info_t info) +{ + info->pir_flags = 0; +} + +/* + * Add a (pmap, va) pair to the invalidation list and protect access + * as appropriate. + */ +void +pmap_inval_add(pmap_inval_info_t info, pmap_t pmap, vm_offset_t va) +{ +#ifdef SMP + if ((info->pir_flags & PIRF_CPUSYNC) == 0) { + info->pir_flags |= PIRF_CPUSYNC; + info->pir_cpusync.cs_run_func = NULL; + info->pir_cpusync.cs_fin1_func = NULL; + info->pir_cpusync.cs_fin2_func = NULL; + lwkt_cpusync_start(pmap->pm_active, &info->pir_cpusync); + } else if (pmap->pm_active & ~info->pir_cpusync.cs_mask) { + lwkt_cpusync_add(pmap->pm_active, &info->pir_cpusync); + } +#else + if (pmap->pm_active == 0) + return; +#endif + if ((info->pir_flags & (PIRF_INVLTLB|PIRF_INVL1PG)) == 0) { + if (va == (vm_offset_t)-1) { + info->pir_flags |= PIRF_INVLTLB; +#ifdef SMP + info->pir_cpusync.cs_fin2_func = _cpu_invltlb; +#endif + } else { + info->pir_flags |= PIRF_INVL1PG; + info->pir_cpusync.cs_data = (void *)va; +#ifdef SMP + info->pir_cpusync.cs_fin2_func = _cpu_invl1pg; +#endif + } + } else { + info->pir_flags |= PIRF_INVLTLB; +#ifdef SMP + info->pir_cpusync.cs_fin2_func = _cpu_invltlb; +#endif + } +} + +/* + * Synchronize changes with target cpus. + */ +void +pmap_inval_flush(pmap_inval_info_t info) +{ +#ifdef SMP + if (info->pir_flags & PIRF_CPUSYNC) + lwkt_cpusync_finish(&info->pir_cpusync); +#else + if (info->pir_flags & PIRF_INVLTLB) + cpu_invltlb(); + else if (info->pir_flags & PIRF_INVL1PG) + cpu_invlpg(info->pir_cpusync.cs_data); +#endif + info->pir_flags = 0; +} + diff --git a/sys/platform/pc64/amd64/spinlock.s b/sys/platform/pc64/amd64/spinlock.s new file mode 100644 index 0000000000..d7410966c9 --- /dev/null +++ b/sys/platform/pc64/amd64/spinlock.s @@ -0,0 +1,112 @@ +/*- + * Copyright (c) 2003 Matthew Dillon. + * Copyright (c) 2008 The DragonFly Project. + * All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. The name of the developer may NOT be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/sys/i386/i386/simplelock.s,v 1.11.2.2 2003/02/04 20:55:28 jhb Exp $ + * $DragonFly: src/sys/platform/pc64/amd64/spinlock.s,v 1.1 2008/08/29 17:07:10 dillon Exp $ + */ + +#include /* miscellaneous macros */ +#include + +/* + * The spinlock routines may only be used for low level debugging, like + * kernel printfs, and when no other option is available such as situations + * relating to hardware interrupt masks. Spinlock routines should not be + * used in interrupt service routines or in any other situation. + * + * NOTE: for UP the spinlock routines still disable/restore interrupts + */ +ENTRY(spin_lock_deprecated) + movl 4(%esp),%edx + SPIN_LOCK((%edx)) /* note: %eax, %ecx tromped */ + ret + +ENTRY(spin_unlock_deprecated) + movl 4(%esp),%edx + SPIN_UNLOCK((%edx)) /* note: %eax, %ecx tromped */ + ret + +NON_GPROF_ENTRY(spin_lock_np) + movl 4(%esp),%edx + SPIN_LOCK((%edx)) /* note: %eax, %ecx tromped */ + NON_GPROF_RET + +NON_GPROF_ENTRY(spin_unlock_np) + movl 4(%esp), %edx /* get the address of the lock */ + SPIN_UNLOCK((%edx)) + NON_GPROF_RET + +/* + * Auxillary convenience routines. Note that these functions disable and + * restore interrupts as well, on SMP, as performing spin locking functions. + */ +NON_GPROF_ENTRY(imen_lock) + SPIN_LOCK(imen_spinlock) + NON_GPROF_RET + +NON_GPROF_ENTRY(imen_unlock) + SPIN_UNLOCK(imen_spinlock) + NON_GPROF_RET + +NON_GPROF_ENTRY(intr_lock) + SPIN_LOCK(intr_spinlock) + NON_GPROF_RET + +NON_GPROF_ENTRY(intr_unlock) + SPIN_UNLOCK(intr_spinlock) + NON_GPROF_RET + +NON_GPROF_ENTRY(mpintr_lock) + SPIN_LOCK(mpintr_spinlock) + NON_GPROF_RET + +NON_GPROF_ENTRY(mpintr_unlock) + SPIN_UNLOCK(mpintr_spinlock) + NON_GPROF_RET + +NON_GPROF_ENTRY(clock_lock) + SPIN_LOCK(clock_spinlock) + NON_GPROF_RET + +NON_GPROF_ENTRY(clock_unlock) + SPIN_UNLOCK(clock_spinlock) + NON_GPROF_RET + +NON_GPROF_ENTRY(com_lock) + SPIN_LOCK(com_spinlock) + NON_GPROF_RET + +NON_GPROF_ENTRY(com_unlock) + SPIN_UNLOCK(com_spinlock) + NON_GPROF_RET + +NON_GPROF_ENTRY(cons_lock) + SPIN_LOCK(cons_spinlock) + NON_GPROF_RET + +NON_GPROF_ENTRY(cons_unlock) + SPIN_UNLOCK(cons_spinlock) + NON_GPROF_RET + diff --git a/sys/platform/pc64/amd64/support.s b/sys/platform/pc64/amd64/support.s index 8098ae010b..3bb5f99f7e 100644 --- a/sys/platform/pc64/amd64/support.s +++ b/sys/platform/pc64/amd64/support.s @@ -1,6 +1,7 @@ /*- - * Copyright (c) 2003 Peter Wemm. * Copyright (c) 1993 The Regents of the University of California. + * Copyright (c) 2003 Peter Wemm. + * Copyright (c) 2008 The DragonFly Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -28,7 +29,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/amd64/amd64/support.S,v 1.127 2007/05/23 08:33:04 kib Exp $ - * $DragonFly: src/sys/platform/pc64/amd64/support.s,v 1.1 2007/09/23 04:29:31 yanyh Exp $ + * $DragonFly: src/sys/platform/pc64/amd64/support.s,v 1.2 2008/08/29 17:07:10 dillon Exp $ */ #include "opt_ddb.h" @@ -71,9 +72,6 @@ ENTRY(bzero) stosb ret -ENTRY(memset) - ret - /* Address: %rdi */ ENTRY(pagezero) movq $-PAGE_SIZE,%rdx @@ -149,8 +147,10 @@ ENTRY(bcopy) cld ret ENTRY(ovbcopy) + ud2 ret ENTRY(reset_dbregs) + ud2 ret /* @@ -223,6 +223,7 @@ ENTRY(fillw) */ ENTRY(copyout) movq PCPU(curthread),%rax + movq TD_PCB(%rax), %rax movq $copyout_fault,PCB_ONFAULT(%rax) testq %rdx,%rdx /* anything to do? */ jz done_copyout @@ -266,12 +267,14 @@ ENTRY(copyout) done_copyout: xorl %eax,%eax movq PCPU(curthread),%rdx + movq TD_PCB(%rdx), %rdx movq %rax,PCB_ONFAULT(%rdx) ret ALIGN_TEXT copyout_fault: movq PCPU(curthread),%rdx + movq TD_PCB(%rdx), %rdx movq $0,PCB_ONFAULT(%rdx) movq $EFAULT,%rax ret @@ -282,6 +285,7 @@ copyout_fault: */ ENTRY(copyin) movq PCPU(curthread),%rax + movq TD_PCB(%rax), %rax movq $copyin_fault,PCB_ONFAULT(%rax) testq %rdx,%rdx /* anything to do? */ jz done_copyin @@ -311,12 +315,14 @@ ENTRY(copyin) done_copyin: xorl %eax,%eax movq PCPU(curthread),%rdx + movq TD_PCB(%rdx), %rdx movq %rax,PCB_ONFAULT(%rdx) ret ALIGN_TEXT copyin_fault: movq PCPU(curthread),%rdx + movq TD_PCB(%rdx), %rdx movq $0,PCB_ONFAULT(%rdx) movq $EFAULT,%rax ret @@ -327,6 +333,7 @@ copyin_fault: */ ENTRY(casuword32) movq PCPU(curthread),%rcx + movq TD_PCB(%rcx), %rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-4,%rax @@ -346,6 +353,7 @@ ENTRY(casuword32) */ movq PCPU(curthread),%rcx + movq TD_PCB(%rcx), %rcx movq $0,PCB_ONFAULT(%rcx) ret @@ -355,6 +363,7 @@ ENTRY(casuword32) */ ENTRY(casuword) movq PCPU(curthread),%rcx + movq TD_PCB(%rcx), %rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-4,%rax @@ -374,6 +383,7 @@ ENTRY(casuword) */ movq PCPU(curthread),%rcx + movq TD_PCB(%rcx), %rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $0,PCB_ONFAULT(%rcx) ret @@ -387,6 +397,7 @@ ENTRY(casuword) ALTENTRY(fuword64) ENTRY(fuword) movq PCPU(curthread),%rcx + movq TD_PCB(%rcx), %rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-8,%rax @@ -399,6 +410,7 @@ ENTRY(fuword) ENTRY(fuword32) movq PCPU(curthread),%rcx + movq TD_PCB(%rcx), %rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-4,%rax @@ -423,6 +435,7 @@ ENTRY(fuswintr) ENTRY(fuword16) movq PCPU(curthread),%rcx + movq TD_PCB(%rcx), %rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-2,%rax @@ -435,6 +448,7 @@ ENTRY(fuword16) ENTRY(fubyte) movq PCPU(curthread),%rcx + movq TD_PCB(%rcx), %rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-1,%rax @@ -449,6 +463,7 @@ ENTRY(fubyte) fusufault: movq PCPU(curthread),%rcx xorl %eax,%eax + movq TD_PCB(%rcx), %rcx movq %rax,PCB_ONFAULT(%rcx) decq %rax ret @@ -461,6 +476,7 @@ fusufault: ALTENTRY(suword64) ENTRY(suword) movq PCPU(curthread),%rcx + movq TD_PCB(%rcx), %rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-8,%rax @@ -470,11 +486,13 @@ ENTRY(suword) movq %rsi,(%rdi) xorl %eax,%eax movq PCPU(curthread),%rcx + movq TD_PCB(%rcx), %rcx movq %rax,PCB_ONFAULT(%rcx) ret ENTRY(suword32) movq PCPU(curthread),%rcx + movq TD_PCB(%rcx), %rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-4,%rax @@ -484,11 +502,13 @@ ENTRY(suword32) movl %esi,(%rdi) xorl %eax,%eax movq PCPU(curthread),%rcx + movq TD_PCB(%rcx), %rcx movq %rax,PCB_ONFAULT(%rcx) ret ENTRY(suword16) movq PCPU(curthread),%rcx + movq TD_PCB(%rcx), %rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-2,%rax @@ -498,11 +518,13 @@ ENTRY(suword16) movw %si,(%rdi) xorl %eax,%eax movq PCPU(curthread),%rcx /* restore trashed register */ + movq TD_PCB(%rcx), %rcx movq %rax,PCB_ONFAULT(%rcx) ret ENTRY(subyte) movq PCPU(curthread),%rcx + movq TD_PCB(%rcx), %rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-1,%rax @@ -513,6 +535,7 @@ ENTRY(subyte) movb %al,(%rdi) xorl %eax,%eax movq PCPU(curthread),%rcx /* restore trashed register */ + movq TD_PCB(%rcx), %rcx movq %rax,PCB_ONFAULT(%rcx) ret @@ -530,6 +553,7 @@ ENTRY(copyinstr) movq %rcx,%r9 /* %r9 = *len */ xchgq %rdi,%rsi /* %rdi = from, %rsi = to */ movq PCPU(curthread),%rcx + movq TD_PCB(%rcx), %rcx movq $cpystrflt,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS,%rax @@ -575,6 +599,7 @@ cpystrflt: cpystrflt_x: /* set *lencopied and return %eax */ movq PCPU(curthread),%rcx + movq TD_PCB(%rcx), %rcx movq $0,PCB_ONFAULT(%rcx) testq %r9,%r9 diff --git a/sys/platform/pc64/amd64/swtch.s b/sys/platform/pc64/amd64/swtch.s index ae18442c93..f966c882c4 100644 --- a/sys/platform/pc64/amd64/swtch.s +++ b/sys/platform/pc64/amd64/swtch.s @@ -1,5 +1,6 @@ /* - * Copyright (c) 2003,2004 The DragonFly Project. All rights reserved. + * Copyright (c) 2003,2004,2008 The DragonFly Project. All rights reserved. + * Copyright (c) 2008 Jordan Gordeev. * * This code is derived from software contributed to The DragonFly Project * by Matthew Dillon @@ -66,10 +67,10 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/i386/swtch.s,v 1.89.2.10 2003/01/23 03:36:24 ps Exp $ - * $DragonFly: src/sys/platform/pc64/amd64/swtch.s,v 1.2 2007/09/24 03:24:45 yanyh Exp $ + * $DragonFly: src/sys/platform/pc64/amd64/swtch.s,v 1.3 2008/08/29 17:07:10 dillon Exp $ */ -#include "use_npx.h" +//#include "use_npx.h" #include @@ -77,8 +78,14 @@ #include #include +#if JG +#include +#endif #include +#define CHECKNZ(expr, scratch_reg) \ + movq expr, scratch_reg; testq scratch_reg, scratch_reg; jnz 7f; int $3; 7: + #include "assym.s" #if defined(SMP) @@ -101,7 +108,7 @@ tlb_flush_count: .long 0 /* - * cpu_heavy_switch(next_thread) + * cpu_heavy_switch(struct thread *next_thread) * * Switch from the current thread to a new thread. This entry * is normally called via the thread->td_switch function, and will @@ -112,9 +119,100 @@ tlb_flush_count: .long 0 * YYY disable interrupts once giant is removed. */ ENTRY(cpu_heavy_switch) + /* + * Save RIP, RSP and callee-saved registers (RBX, RBP, R12-R15). + */ + movq PCPU(curthread),%rcx + /* On top of the stack is the return adress. */ + movq (%rsp),%rax /* (reorder optimization) */ + movq TD_PCB(%rcx),%rdx /* RDX = PCB */ + movq %rax,PCB_RIP(%rdx) /* return PC may be modified */ + movq %rbx,PCB_RBX(%rdx) + movq %rsp,PCB_RSP(%rdx) + movq %rbp,PCB_RBP(%rdx) + movq %r12,PCB_R12(%rdx) + movq %r13,PCB_R13(%rdx) + movq %r14,PCB_R14(%rdx) + movq %r15,PCB_R15(%rdx) + + movq %rcx,%rbx /* RBX = curthread */ + movq TD_LWP(%rcx),%rcx + movl PCPU(cpuid), %eax + movq LWP_VMSPACE(%rcx), %rcx /* RCX = vmspace */ + MPLOCKED btrl %eax, VM_PMAP+PM_ACTIVE(%rcx) + + /* + * Push the LWKT switch restore function, which resumes a heavy + * weight process. Note that the LWKT switcher is based on + * TD_SP, while the heavy weight process switcher is based on + * PCB_RSP. TD_SP is usually two ints pushed relative to + * PCB_RSP. We push the flags for later restore by cpu_heavy_restore. + */ + pushfq + movq $cpu_heavy_restore, %rax + pushq %rax + movq %rsp,TD_SP(%rbx) + +#if JG + /* + * Save debug regs if necessary + */ + movb PCB_FLAGS(%edx),%al + andb $PCB_DBREGS,%al + jz 1f /* no, skip over */ + movl %dr7,%eax /* yes, do the save */ + movl %eax,PCB_DR7(%edx) + andl $0x0000fc00, %eax /* disable all watchpoints */ + movl %eax,%dr7 + movl %dr6,%eax + movl %eax,PCB_DR6(%edx) + movl %dr3,%eax + movl %eax,PCB_DR3(%edx) + movl %dr2,%eax + movl %eax,PCB_DR2(%edx) + movl %dr1,%eax + movl %eax,PCB_DR1(%edx) + movl %dr0,%eax + movl %eax,PCB_DR0(%edx) +1: +#endif + +#if JG +#if NNPX > 0 + /* + * Save the FP state if we have used the FP. Note that calling + * npxsave will NULL out PCPU(npxthread). + */ + cmpl %ebx,PCPU(npxthread) + jne 1f + pushl TD_SAVEFPU(%ebx) + call npxsave /* do it in a big C function */ + addl $4,%esp /* EAX, ECX, EDX trashed */ +1: +#endif +#endif /* NNPX > 0 */ + + /* + * Switch to the next thread, which was passed as an argument + * to cpu_heavy_switch(). The argument is in %rdi. + * Set the current thread, load the stack pointer, + * and 'ret' into the switch-restore function. + * + * The switch restore function expects the new thread to be in %rax + * and the old one to be in %rbx. + * + * There is a one-instruction window where curthread is the new + * thread but %rsp still points to the old thread's stack, but + * we are protected by a critical section so it is ok. + */ + movq %rdi,%rax /* RAX = newtd, RBX = oldtd */ + movq %rax,PCPU(curthread) + movq TD_SP(%rax),%rsp + CHECKNZ((%rsp), %r9) + ret /* - * cpu_exit_switch() + * cpu_exit_switch(struct thread *next) * * The switch function is changed to this when a thread is going away * for good. We have to ensure that the MMU state is not cached, and @@ -125,9 +223,53 @@ ENTRY(cpu_heavy_switch) * complete. */ ENTRY(cpu_exit_switch) + /* + * Get us out of the vmspace + */ +#if JG + movq %cr3,%rax + cmpq %rcx,%rax + je 1f + /* JG no increment of statistics counters? see cpu_heavy_restore */ + movq %rcx,%cr3 +1: +#else + movq IdlePTD, %rcx + orq $(PG_RW|PG_V), %rcx + movq link_pdpe,%r12 + movq %rcx, (%r12) + movq %cr3, %rcx + movq %rcx, %cr3 +#endif + movq PCPU(curthread),%rbx + + /* + * If this is a process/lwp, deactivate the pmap after we've + * switched it out. + */ + movq TD_LWP(%rbx),%rcx + testq %rcx,%rcx + jz 2f + movl PCPU(cpuid), %eax + movq LWP_VMSPACE(%rcx), %rcx /* RCX = vmspace */ + MPLOCKED btrl %eax, VM_PMAP+PM_ACTIVE(%rcx) +2: + /* + * Switch to the next thread. RET into the restore function, which + * expects the new thread in RAX and the old in RBX. + * + * There is a one-instruction window where curthread is the new + * thread but %rsp still points to the old thread's stack, but + * we are protected by a critical section so it is ok. + */ + movq %rdi,%rax + movq %rax,PCPU(curthread) + movq TD_SP(%rax),%rsp + CHECKNZ((%rsp), %r9) + ret /* - * cpu_heavy_restore() (current thread in %eax on entry) + * cpu_heavy_restore() (current thread in %rax on entry) * * Restore the thread after an LWKT switch. This entry is normally * called via the LWKT switch restore function, which was pulled @@ -154,18 +296,284 @@ ENTRY(cpu_exit_switch) */ ENTRY(cpu_heavy_restore) + popfq + movq TD_PCB(%rax),%rdx /* RDX = PCB */ + movq TD_LWP(%rax),%rcx + +#if defined(SWTCH_OPTIM_STATS) + incl _swtch_optim_stats +#endif + /* + * Tell the pmap that our cpu is using the VMSPACE now. We cannot + * safely test/reload %cr3 until after we have set the bit in the + * pmap (remember, we do not hold the MP lock in the switch code). + */ + movq LWP_VMSPACE(%rcx), %rcx /* RCX = vmspace */ + movl PCPU(cpuid), %esi + MPLOCKED btsl %esi, VM_PMAP+PM_ACTIVE(%rcx) + + /* + * Restore the MMU address space. If it is the same as the last + * thread we don't have to invalidate the tlb (i.e. reload cr3). + * YYY which naturally also means that the PM_ACTIVE bit had better + * already have been set before we set it above, check? YYY + */ +#if JG + movq %cr3,%rsi + movq PCB_CR3(%rdx),%rcx + cmpq %rsi,%rcx + je 4f +#if defined(SWTCH_OPTIM_STATS) + decl _swtch_optim_stats + incl _tlb_flush_count +#endif + movq %rcx,%cr3 +4: +#else + movq PCB_CR3(%rdx),%rcx + orq $(PG_RW|PG_U|PG_V), %rcx + /*XXX*/ + movq link_pdpe,%r12 + movq %rcx, (%r12) + movq %cr3, %rcx + movq %rcx, %cr3 +#endif + /* + * Clear TDF_RUNNING flag in old thread only after cleaning up + * %cr3. The target thread is already protected by being TDF_RUNQ + * so setting TDF_RUNNING isn't as big a deal. + */ + andl $~TDF_RUNNING,TD_FLAGS(%rbx) + orl $TDF_RUNNING,TD_FLAGS(%rax) + + /* + * Deal with the PCB extension, restore the private tss + */ + movq PCB_EXT(%rdx),%rdi /* check for a PCB extension */ + /* JG cheaper than "movq $1,%rbx", right? */ + /* JG what's that magic value $1? */ + movl $1,%ebx /* maybe mark use of a private tss */ + testq %rdi,%rdi +#if JG + jnz 2f +#endif + + /* JG + * Going back to the common_tss. We may need to update TSS_ESP0 + * which sets the top of the supervisor stack when entering from + * usermode. The PCB is at the top of the stack but we need another + * 16 bytes to take vm86 into account. + */ +#if JG + leal -16(%edx),%ebx + movl %ebx, PCPU(common_tss) + TSS_ESP0 + + cmpl $0,PCPU(private_tss) /* don't have to reload if */ + je 3f /* already using the common TSS */ + + /* JG? */ + subl %ebx,%ebx /* unmark use of private tss */ + + /* + * Get the address of the common TSS descriptor for the ltr. + * There is no way to get the address of a segment-accessed variable + * so we store a self-referential pointer at the base of the per-cpu + * data area and add the appropriate offset. + */ + /* JG movl? */ + movq $gd_common_tssd, %rdi + /* JG name for "%gs:0"? */ + addq %gs:0, %rdi + + /* + * Move the correct TSS descriptor into the GDT slot, then reload + * ltr. + */ +2: + /* JG */ + movl %ebx,PCPU(private_tss) /* mark/unmark private tss */ + movq PCPU(tss_gdt), %rbx /* entry in GDT */ + movq 0(%rdi), %rax + movq %rax, 0(%rbx) + movl $GPROC0_SEL*8, %esi /* GSEL(entry, SEL_KPL) */ + ltr %si +#endif + +3: + /* + * Restore the user %gs and %fs + */ + movq PCB_FSBASE(%rdx),%r9 + cmpq PCPU(user_fs),%r9 + je 4f + movq %rdx,%r10 + movq %r9,PCPU(user_fs) + movl $MSR_FSBASE,%ecx + movl PCB_FSBASE(%r10),%eax + movl PCB_FSBASE+4(%r10),%edx + wrmsr + movq %r10,%rdx +4: + movq PCB_GSBASE(%rdx),%r9 + cmpq PCPU(user_gs),%r9 + je 5f + movq %rdx,%r10 + movq %r9,PCPU(user_gs) + movl $MSR_KGSBASE,%ecx /* later swapgs moves it to GSBASE */ + movl PCB_GSBASE(%r10),%eax + movl PCB_GSBASE+4(%r10),%edx + wrmsr + movq %r10,%rdx +5: + + /* + * Restore general registers. + */ + movq PCB_RBX(%rdx), %rbx + movq PCB_RSP(%rdx), %rsp + movq PCB_RBP(%rdx), %rbp + movq PCB_R12(%rdx), %r12 + movq PCB_R13(%rdx), %r13 + movq PCB_R14(%rdx), %r14 + movq PCB_R15(%rdx), %r15 + movq PCB_RIP(%rdx), %rax + movq %rax, (%rsp) + +#if JG + /* + * Restore the user LDT if we have one + */ + cmpl $0, PCB_USERLDT(%edx) + jnz 1f + movl _default_ldt,%eax + cmpl PCPU(currentldt),%eax + je 2f + lldt _default_ldt + movl %eax,PCPU(currentldt) + jmp 2f +1: pushl %edx + call set_user_ldt + popl %edx +2: +#endif +#if JG + /* + * Restore the user TLS if we have one + */ + pushl %edx + call set_user_TLS + popl %edx +#endif + +#if JG + /* + * Restore the DEBUG register state if necessary. + */ + movb PCB_FLAGS(%edx),%al + andb $PCB_DBREGS,%al + jz 1f /* no, skip over */ + movl PCB_DR6(%edx),%eax /* yes, do the restore */ + movl %eax,%dr6 + movl PCB_DR3(%edx),%eax + movl %eax,%dr3 + movl PCB_DR2(%edx),%eax + movl %eax,%dr2 + movl PCB_DR1(%edx),%eax + movl %eax,%dr1 + movl PCB_DR0(%edx),%eax + movl %eax,%dr0 + movl %dr7,%eax /* load dr7 so as not to disturb */ + andl $0x0000fc00,%eax /* reserved bits */ + pushl %ebx + movl PCB_DR7(%edx),%ebx + andl $~0x0000fc00,%ebx + orl %ebx,%eax + popl %ebx + movl %eax,%dr7 +1: +#endif + + CHECKNZ((%rsp), %r9) + ret /* - * savectx(pcb) + * savectx(struct pcb *pcb) * * Update pcb, saving current processor state. */ ENTRY(savectx) + /* fetch PCB */ + /* JG use %rdi instead of %rcx everywhere? */ + movq %rdi,%rcx + + /* caller's return address - child won't execute this routine */ + movq (%rsp),%rax + movq %rax,PCB_RIP(%rcx) + + movq %cr3,%rax +#ifndef JG + movq (%rax), %rax + movq $0x000ffffffffff000, %rcx + andq %rcx, %rax + movq (%rax), %rax + andq %rcx, %rax +#endif + movq %rax,PCB_CR3(%rcx) + + movq %rbx,PCB_RBX(%rcx) + movq %rsp,PCB_RSP(%rcx) + movq %rbp,PCB_RBP(%rcx) + movq %r12,PCB_R12(%rcx) + movq %r13,PCB_R13(%rcx) + movq %r14,PCB_R14(%rcx) + movq %r15,PCB_R15(%rcx) + +#if JG +#if NNPX > 0 + /* + * If npxthread == NULL, then the npx h/w state is irrelevant and the + * state had better already be in the pcb. This is true for forks + * but not for dumps (the old book-keeping with FP flags in the pcb + * always lost for dumps because the dump pcb has 0 flags). + * + * If npxthread != NULL, then we have to save the npx h/w state to + * npxthread's pcb and copy it to the requested pcb, or save to the + * requested pcb and reload. Copying is easier because we would + * have to handle h/w bugs for reloading. We used to lose the + * parent's npx state for forks by forgetting to reload. + */ + movl PCPU(npxthread),%eax + testl %eax,%eax + je 1f + + pushl %ecx /* target pcb */ + movl TD_SAVEFPU(%eax),%eax /* originating savefpu area */ + pushl %eax + + pushl %eax + call npxsave + addl $4,%esp + + popl %eax + popl %ecx + + pushl $PCB_SAVEFPU_SIZE + leal PCB_SAVEFPU(%ecx),%ecx + pushl %ecx + pushl %eax + call bcopy + addl $12,%esp +#endif /* NNPX > 0 */ + +1: +#endif + CHECKNZ((%rsp), %r9) + ret /* - * cpu_idle_restore() (current thread in %eax on entry) (one-time execution) + * cpu_idle_restore() (current thread in %rax on entry) (one-time execution) * - * Don't bother setting up any regs other then %ebp so backtraces + * Don't bother setting up any regs other than %rbp so backtraces * don't die. This restore function is used to bootstrap into the * cpu_idle() LWKT only, after that cpu_lwkt_*() will be used for * switching. @@ -179,11 +587,36 @@ ENTRY(savectx) * cpus. */ ENTRY(cpu_idle_restore) + /* cli */ + movq IdlePTD,%rcx + /* JG xor? */ + movl $0,%ebp + /* JG push RBP? */ + pushq $0 + orq $(PG_RW|PG_V), %rcx + movq link_pdpe,%r12 + movq %rcx, (%r12) + movq %cr3, %rcx + movq %rcx,%cr3 + andl $~TDF_RUNNING,TD_FLAGS(%rbx) + orl $TDF_RUNNING,TD_FLAGS(%rax) +#ifdef SMP + cmpl $0,PCPU(cpuid) + je 1f + call ap_init +1: +#endif + /* + * ap_init can decide to enable interrupts early, but otherwise, or if + * we are UP, do it here. + */ + sti + jmp cpu_idle /* - * cpu_kthread_restore() (current thread is %eax on entry) (one-time execution) + * cpu_kthread_restore() (current thread is %rax on entry) (one-time execution) * - * Don't bother setting up any regs other then %ebp so backtraces + * Don't bother setting up any regs other then %rbp so backtraces * don't die. This restore function is used to bootstrap into an * LWKT based kernel thread only. cpu_lwkt_switch() will be used * after this. @@ -192,9 +625,28 @@ ENTRY(cpu_idle_restore) * we can release our critical section and enable interrupts early. */ ENTRY(cpu_kthread_restore) + sti + movq IdlePTD,%rcx + movq TD_PCB(%rax),%rdx + /* JG "movq $0, %rbp"? "xorq %rbp, %rbp"? */ + movl $0,%ebp + orq $(PG_RW|PG_V), %rcx + movq link_pdpe,%r12 + movq %rcx, (%r12) + movq %cr3, %rcx + movq %rcx,%cr3 + /* rax and rbx come from the switchout code */ + andl $~TDF_RUNNING,TD_FLAGS(%rbx) + orl $TDF_RUNNING,TD_FLAGS(%rax) + subl $TDPRI_CRIT,TD_PRI(%rax) + movq PCB_R12(%rdx),%rdi /* argument to RBX function */ + movq PCB_RBX(%rdx),%rax /* thread function */ + /* note: top of stack return address inherited by function */ + CHECKNZ(%rax, %r9) + jmp *%rax /* - * cpu_lwkt_switch() + * cpu_lwkt_switch(struct thread *) * * Standard LWKT switching function. Only non-scratch registers are * saved and we don't bother with the MMU state or anything else. @@ -202,15 +654,55 @@ ENTRY(cpu_kthread_restore) * This function is always called while in a critical section. * * There is a one-instruction window where curthread is the new - * thread but %esp still points to the old thread's stack, but + * thread but %rsp still points to the old thread's stack, but * we are protected by a critical section so it is ok. * * YYY BGL, SPL */ ENTRY(cpu_lwkt_switch) + pushq %rbp /* JG note: GDB hacked to locate ebp relative to td_sp */ + /* JG we've got more registers on AMD64 */ + pushq %rbx + movq PCPU(curthread),%rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + pushfq + +#if JG +#if NNPX > 0 + /* + * Save the FP state if we have used the FP. Note that calling + * npxsave will NULL out PCPU(npxthread). + * + * We have to deal with the FP state for LWKT threads in case they + * happen to get preempted or block while doing an optimized + * bzero/bcopy/memcpy. + */ + cmpl %ebx,PCPU(npxthread) + jne 1f + pushl TD_SAVEFPU(%ebx) + call npxsave /* do it in a big C function */ + addl $4,%esp /* EAX, ECX, EDX trashed */ +1: +#endif /* NNPX > 0 */ +#endif + + movq %rdi,%rax /* switch to this thread */ + pushq $cpu_lwkt_restore + movq %rsp,TD_SP(%rbx) + movq %rax,PCPU(curthread) + movq TD_SP(%rax),%rsp + + /* + * %rax contains new thread, %rbx contains old thread. + */ + CHECKNZ((%rsp), %r9) + ret /* - * cpu_lwkt_restore() (current thread in %eax on entry) + * cpu_lwkt_restore() (current thread in %rax on entry) * * Standard LWKT restore function. This function is always called * while in a critical section. @@ -218,12 +710,35 @@ ENTRY(cpu_lwkt_switch) * Warning: due to preemption the restore function can be used to * 'return' to the original thread. Interrupt disablement must be * protected through the switch so we cannot run splz here. - */ -ENTRY(cpu_lwkt_restore) - -/* - * bootstrap_idle() * - * Make AP become the idle loop. + * YYY we theoretically do not need to load KPML4phys into cr3, but if + * so we need a way to detect when the PTD we are using is being + * deleted due to a process exiting. */ -ENTRY(bootstrap_idle) +ENTRY(cpu_lwkt_restore) +#if JG + movq common_lvl4_phys,%rcx /* YYY borrow but beware desched/cpuchg/exit */ +#endif + movq IdlePTD, %rcx + orq $(PG_RW|PG_V), %rcx + movq link_pdpe,%r12 + movq %rcx, (%r12) + movq %cr3, %rcx + movq %rcx, %cr3 +#if JG + movq %cr3,%rdx + cmpq %rcx,%rdx + je 1f + movq %rcx,%cr3 +1: +#endif + andl $~TDF_RUNNING,TD_FLAGS(%rbx) + orl $TDF_RUNNING,TD_FLAGS(%rax) + popfq + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + popq %rbp + ret diff --git a/sys/platform/pc64/amd64/systimer.c b/sys/platform/pc64/amd64/systimer.c index 3ea13405f6..e4c49ceaf8 100644 --- a/sys/platform/pc64/amd64/systimer.c +++ b/sys/platform/pc64/amd64/systimer.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 The DragonFly Project. All rights reserved. + * Copyright (c) 2006,2008 The DragonFly Project. All rights reserved. * * This code is derived from software contributed to The DragonFly Project * by Matthew Dillon @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/platform/pc64/amd64/systimer.c,v 1.2 2007/09/24 03:24:45 yanyh Exp $ + * $DragonFly: src/sys/platform/pc64/amd64/systimer.c,v 1.3 2008/08/29 17:07:10 dillon Exp $ */ #include @@ -46,12 +46,15 @@ #include #include +#if JG int adjkerntz; int wall_cmos_clock = 0; +#endif /* * SYSTIMER IMPLEMENTATION */ +#if JG /* * Initialize the systimer subsystem, called from MI code in early boot. */ @@ -105,4 +108,4 @@ void DRIVERSLEEP(int usec) { } - +#endif diff --git a/sys/platform/pc64/amd64/tls.c b/sys/platform/pc64/amd64/tls.c index 3331ed8479..e7bdc46051 100644 --- a/sys/platform/pc64/amd64/tls.c +++ b/sys/platform/pc64/amd64/tls.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003,2004 The DragonFly Project. All rights reserved. + * Copyright (c) 2003,2004,2008 The DragonFly Project. All rights reserved. * * This code is derived from software contributed to The DragonFly Project * by David Xu and Matthew Dillon @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/platform/pc64/amd64/tls.c,v 1.3 2008/06/29 19:04:02 dillon Exp $ + * $DragonFly: src/sys/platform/pc64/amd64/tls.c,v 1.4 2008/08/29 17:07:10 dillon Exp $ */ #include @@ -43,21 +43,24 @@ #include #include #include +#include + #include #include #include #include +#include #include #include /* pcb.h included via sys/user.h */ #include /* CPU_prvspace */ #include +#include /* - * set a TLS descriptor and resync the GDT. A descriptor may be cleared - * by passing info=NULL and infosize=0. Note that hardware limitations may - * cause the size passed in tls_info to be approximated. - * + * set a TLS descriptor. For AMD64 descriptor 0 identifies %fs and + * descriptor 1 identifies %gs, and 0 is returned in sysmsg_result. + * * Returns the value userland needs to load into %gs representing the * TLS descriptor or -1 on error. * @@ -67,15 +70,16 @@ int sys_set_tls_area(struct set_tls_area_args *uap) { struct tls_info info; - struct segment_descriptor *desc; int error; int i; /* * Sanity checks + * + * which 0 == %fs, which 1 == %gs */ i = uap->which; - if (i < 0 || i >= NGTLS) + if (i < 0 || i > 1) return (ERANGE); if (uap->infosize < 0) return (EINVAL); @@ -94,56 +98,13 @@ sys_set_tls_area(struct set_tls_area_args *uap) return (error); if (info.size < -1) return (EINVAL); - if (info.size > (1 << 20)) - info.size = (info.size + PAGE_MASK) & ~PAGE_MASK; /* - * Load the descriptor. A critical section is required in case - * an interrupt thread comes along and switches us out and then back - * in. + * For AMD64 we can only adjust FSBASE and GSBASE */ - desc = &curthread->td_tls.tls[i]; - crit_enter(); - if (info.size == 0) { - bzero(desc, sizeof(*desc)); - } else { - desc->sd_lobase = (intptr_t)info.base; - desc->sd_hibase = (intptr_t)info.base >> 24; - desc->sd_def32 = 1; - desc->sd_type = SDT_MEMRWA; - desc->sd_dpl = SEL_UPL; - desc->sd_xx = 0; - desc->sd_p = 1; - if (info.size == -1) { - /* - * A descriptor size of -1 is a hack to map the - * whole address space. This type of mapping is - * required for direct-tls accesses of variable - * data, e.g. %gs:OFFSET where OFFSET is negative. - */ - desc->sd_lolimit = -1; - desc->sd_hilimit = -1; - desc->sd_gran = 1; - } else if (info.size >= (1 << 20)) { - /* - * A descriptor size greater then 1MB requires page - * granularity (the lo+hilimit field is only 20 bits) - */ - desc->sd_lolimit = info.size >> PAGE_SHIFT; - desc->sd_hilimit = info.size >> (PAGE_SHIFT + 16); - desc->sd_gran = 1; - } else { - /* - * Otherwise a byte-granular size is supported. - */ - desc->sd_lolimit = info.size; - desc->sd_hilimit = info.size >> 16; - desc->sd_gran = 0; - } - } - crit_exit(); - uap->sysmsg_result = GSEL(GTLS_START + i, SEL_UPL); + curthread->td_tls.info[i] = info; set_user_TLS(); + uap->sysmsg_result = 0; /* segment descriptor $0 */ return(0); } @@ -159,7 +120,6 @@ int sys_get_tls_area(struct get_tls_area_args *uap) { struct tls_info info; - struct segment_descriptor *desc; int error; int i; @@ -167,41 +127,35 @@ sys_get_tls_area(struct get_tls_area_args *uap) * Sanity checks */ i = uap->which; - if (i < 0 || i >= NGTLS) + if (i < 0 || i > 1) return (ERANGE); if (uap->infosize < 0) return (EINVAL); - /* - * unpack the descriptor, ENOENT is returned for any descriptor - * which has not been loaded. uap->info may be NULL. - */ - desc = &curthread->td_tls.tls[i]; - if (desc->sd_p) { - if (uap->info && uap->infosize > 0) { - bzero(&info, sizeof(info)); - info.base = (void *)(intptr_t) - ((desc->sd_hibase << 24) | desc->sd_lobase); - info.size = (desc->sd_hilimit << 16) | desc->sd_lolimit; - if (desc->sd_gran) - info.size <<= PAGE_SHIFT; - error = copyout(&info, uap->info, - min(sizeof(info), uap->infosize)); - } else { - error = 0; - } - uap->sysmsg_result = GSEL(GTLS_START + i, SEL_UPL); - } else { - error = ENOENT; - } + info = curthread->td_tls.info[i]; + + error = copyout(&info, uap->info, min(sizeof(info), uap->infosize)); return(error); } /* - * This function is a NOP because the TLS segments are proactively copied - * by vmspace_ctl() when we switch to the (emulated) user process. + * Install the TLS */ void set_user_TLS(void) { + struct mdglobaldata *gd = mdcpu; + thread_t td = gd->mi.gd_curthread; + + td->td_pcb->pcb_fsbase = (register_t)td->td_tls.info[0].base; + td->td_pcb->pcb_gsbase = (register_t)td->td_tls.info[1].base; + if (gd->gd_user_fs != td->td_pcb->pcb_fsbase) { + gd->gd_user_fs = td->td_pcb->pcb_fsbase; + wrmsr(MSR_FSBASE, gd->gd_user_fs); + } + if (gd->gd_user_gs != td->td_pcb->pcb_gsbase) { + gd->gd_user_gs = td->td_pcb->pcb_gsbase; + wrmsr(MSR_KGSBASE, gd->gd_user_gs); + } } + diff --git a/sys/platform/pc64/amd64/trap.c b/sys/platform/pc64/amd64/trap.c index acb338db4b..db29f39c01 100644 --- a/sys/platform/pc64/amd64/trap.c +++ b/sys/platform/pc64/amd64/trap.c @@ -1,7 +1,9 @@ /*- - * Copyright (C) 1994, David Greenman * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. + * Copyright (C) 1994, David Greenman + * Copyright (c) 2008 The DragonFly Project. + * Copyright (c) 2008 Jordan Gordeev. * * This code is derived from software contributed to Berkeley by * the University of Utah, and William Jolitz. @@ -34,39 +36,1316 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 + * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 * $FreeBSD: src/sys/i386/i386/trap.c,v 1.147.2.11 2003/02/27 19:09:59 luoqi Exp $ - * $DragonFly: src/sys/platform/pc64/amd64/trap.c,v 1.1 2007/09/23 04:29:31 yanyh Exp $ + * $DragonFly: src/sys/platform/pc64/amd64/trap.c,v 1.2 2008/08/29 17:07:10 dillon Exp $ */ -#include +/* + * AMD64 Trap and System call handling + */ + +#include "opt_ddb.h" +#include "opt_ktrace.h" #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef KTRACE +#include +#endif +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#ifdef SMP + +#define MAKEMPSAFE(have_mplock) \ + if (have_mplock == 0) { \ + get_mplock(); \ + have_mplock = 1; \ + } + +#else + +#define MAKEMPSAFE(have_mplock) + +#endif + +extern void trap(struct trapframe *frame); +extern void syscall2(struct trapframe *frame); + +static int trap_pfault(struct trapframe *, int); +static void trap_fatal(struct trapframe *, vm_offset_t); +void dblfault_handler(struct trapframe *frame); + +#define PCPU_GET(member) ((mycpu)->gd_##member) +#define PCPU_INC(member) ((mycpu)->gd_##member)++ + +#define MAX_TRAP_MSG 30 +static char *trap_msg[] = { + "", /* 0 unused */ + "privileged instruction fault", /* 1 T_PRIVINFLT */ + "", /* 2 unused */ + "breakpoint instruction fault", /* 3 T_BPTFLT */ + "", /* 4 unused */ + "", /* 5 unused */ + "arithmetic trap", /* 6 T_ARITHTRAP */ + "system forced exception", /* 7 T_ASTFLT */ + "", /* 8 unused */ + "general protection fault", /* 9 T_PROTFLT */ + "trace trap", /* 10 T_TRCTRAP */ + "", /* 11 unused */ + "page fault", /* 12 T_PAGEFLT */ + "", /* 13 unused */ + "alignment fault", /* 14 T_ALIGNFLT */ + "", /* 15 unused */ + "", /* 16 unused */ + "", /* 17 unused */ + "integer divide fault", /* 18 T_DIVIDE */ + "non-maskable interrupt trap", /* 19 T_NMI */ + "overflow trap", /* 20 T_OFLOW */ + "FPU bounds check fault", /* 21 T_BOUND */ + "FPU device not available", /* 22 T_DNA */ + "double fault", /* 23 T_DOUBLEFLT */ + "FPU operand fetch fault", /* 24 T_FPOPFLT */ + "invalid TSS fault", /* 25 T_TSSFLT */ + "segment not present fault", /* 26 T_SEGNPFLT */ + "stack fault", /* 27 T_STKFLT */ + "machine check trap", /* 28 T_MCHK */ + "SIMD floating-point exception", /* 29 T_XMMFLT */ + "reserved (unknown) fault", /* 30 T_RESERVED */ +}; + +#ifdef DDB +static int ddb_on_nmi = 1; +SYSCTL_INT(_machdep, OID_AUTO, ddb_on_nmi, CTLFLAG_RW, + &ddb_on_nmi, 0, "Go to DDB on NMI"); +#endif +static int panic_on_nmi = 1; +SYSCTL_INT(_machdep, OID_AUTO, panic_on_nmi, CTLFLAG_RW, + &panic_on_nmi, 0, "Panic on NMI"); +static int fast_release; +SYSCTL_INT(_machdep, OID_AUTO, fast_release, CTLFLAG_RW, + &fast_release, 0, "Passive Release was optimal"); +static int slow_release; +SYSCTL_INT(_machdep, OID_AUTO, slow_release, CTLFLAG_RW, + &slow_release, 0, "Passive Release was nonoptimal"); +#ifdef SMP +static int syscall_mpsafe = 1; +SYSCTL_INT(_kern, OID_AUTO, syscall_mpsafe, CTLFLAG_RW, + &syscall_mpsafe, 0, "Allow MPSAFE marked syscalls to run without BGL"); +TUNABLE_INT("kern.syscall_mpsafe", &syscall_mpsafe); +static int trap_mpsafe = 1; +SYSCTL_INT(_kern, OID_AUTO, trap_mpsafe, CTLFLAG_RW, + &trap_mpsafe, 0, "Allow traps to mostly run without the BGL"); +TUNABLE_INT("kern.trap_mpsafe", &trap_mpsafe); +#endif + + + +/* + * Passive USER->KERNEL transition. This only occurs if we block in the + * kernel while still holding our userland priority. We have to fixup our + * priority in order to avoid potential deadlocks before we allow the system + * to switch us to another thread. + */ +static void +passive_release(struct thread *td) +{ + struct lwp *lp = td->td_lwp; + + td->td_release = NULL; + lwkt_setpri_self(TDPRI_KERN_USER); + lp->lwp_proc->p_usched->release_curproc(lp); +} + +/* + * userenter() passively intercepts the thread switch function to increase + * the thread priority from a user priority to a kernel priority, reducing + * syscall and trap overhead for the case where no switch occurs. + */ + +static __inline void +userenter(struct thread *curtd) +{ + curtd->td_release = passive_release; +} + +/* + * Handle signals, upcalls, profiling, and other AST's and/or tasks that + * must be completed before we can return to or try to return to userland. + * + * Note that td_sticks is a 64 bit quantity, but there's no point doing 64 + * arithmatic on the delta calculation so the absolute tick values are + * truncated to an integer. + */ +static void +userret(struct lwp *lp, struct trapframe *frame, int sticks) +{ + struct proc *p = lp->lwp_proc; + int sig; + + /* + * Charge system time if profiling. Note: times are in microseconds. + * This may do a copyout and block, so do it first even though it + * means some system time will be charged as user time. + */ + if (p->p_flag & P_PROFIL) { + addupc_task(p, frame->tf_rip, + (u_int)((int)lp->lwp_thread->td_sticks - sticks)); + } + +recheck: + /* + * If the jungle wants us dead, so be it. + */ + if (lp->lwp_flag & LWP_WEXIT) { + get_mplock(); + lwp_exit(0); + rel_mplock(); /* NOT REACHED */ + } + + /* + * Block here if we are in a stopped state. + */ + if (p->p_stat == SSTOP) { + get_mplock(); + tstop(); + rel_mplock(); + goto recheck; + } + + /* + * Post any pending upcalls. If running a virtual kernel be sure + * to restore the virtual kernel's vmspace before posting the upcall. + */ + if (p->p_flag & P_UPCALLPEND) { + p->p_flag &= ~P_UPCALLPEND; + get_mplock(); + postupcall(lp); + rel_mplock(); + goto recheck; + } + + /* + * Post any pending signals. If running a virtual kernel be sure + * to restore the virtual kernel's vmspace before posting the signal. + */ + if ((sig = CURSIG(lp)) != 0) { + get_mplock(); + postsig(sig); + rel_mplock(); + goto recheck; + } + + /* + * block here if we are swapped out, but still process signals + * (such as SIGKILL). proc0 (the swapin scheduler) is already + * aware of our situation, we do not have to wake it up. + */ + if (p->p_flag & P_SWAPPEDOUT) { + get_mplock(); + p->p_flag |= P_SWAPWAIT; + swapin_request(); + if (p->p_flag & P_SWAPWAIT) + tsleep(p, PCATCH, "SWOUT", 0); + p->p_flag &= ~P_SWAPWAIT; + rel_mplock(); + goto recheck; + } + + /* + * Make sure postsig() handled request to restore old signal mask after + * running signal handler. + */ + KKASSERT((lp->lwp_flag & LWP_OLDMASK) == 0); +} + +/* + * Cleanup from userenter and any passive release that might have occured. + * We must reclaim the current-process designation before we can return + * to usermode. We also handle both LWKT and USER reschedule requests. + */ +static __inline void +userexit(struct lwp *lp) +{ + struct thread *td = lp->lwp_thread; + globaldata_t gd = td->td_gd; + +#if 0 + /* + * If a user reschedule is requested force a new process to be + * chosen by releasing the current process. Our process will only + * be chosen again if it has a considerably better priority. + */ + if (user_resched_wanted()) + lp->lwp_proc->p_usched->release_curproc(lp); +#endif + + /* + * Handle a LWKT reschedule request first. Since our passive release + * is still in place we do not have to do anything special. + */ + while (lwkt_resched_wanted()) { + lwkt_switch(); + + /* + * The thread that preempted us may have stopped our process. + */ + while (lp->lwp_proc->p_stat == SSTOP) { + get_mplock(); + tstop(); + rel_mplock(); + } + } + + /* + * Acquire the current process designation for this user scheduler + * on this cpu. This will also handle any user-reschedule requests. + */ + lp->lwp_proc->p_usched->acquire_curproc(lp); + /* We may have switched cpus on acquisition */ + gd = td->td_gd; + + /* + * Reduce our priority in preparation for a return to userland. If + * our passive release function was still in place, our priority was + * never raised and does not need to be reduced. + */ + if (td->td_release == NULL) + lwkt_setpri_self(TDPRI_USER_NORM); + td->td_release = NULL; + + /* + * After reducing our priority there might be other kernel-level + * LWKTs that now have a greater priority. Run them as necessary. + * We don't have to worry about losing cpu to userland because + * we still control the current-process designation and we no longer + * have a passive release function installed. + */ + if (lwkt_checkpri_self()) + lwkt_switch(); +} + + +/* + * Exception, fault, and trap interface to the kernel. + * This common code is called from assembly language IDT gate entry + * routines that prepare a suitable stack frame, and restore this + * frame after the exception has been processed. + * + * This function is also called from doreti in an interlock to handle ASTs. + * For example: hardwareint->INTROUTINE->(set ast)->doreti->trap + * + * NOTE! We have to retrieve the fault address prior to obtaining the + * MP lock because get_mplock() may switch out. YYY cr2 really ought + * to be retrieved by the assembly code, not here. + * + * XXX gd_trap_nesting_level currently prevents lwkt_switch() from panicing + * if an attempt is made to switch from a fast interrupt or IPI. This is + * necessary to properly take fatal kernel traps on SMP machines if + * get_mplock() has to block. + */ + +void +trap(struct trapframe *frame) +{ + struct globaldata *gd = mycpu; + struct thread *td = gd->gd_curthread; + struct lwp *lp = td->td_lwp; + struct proc *p; + int sticks = 0; + int i = 0, ucode = 0, type, code; +#ifdef SMP + int have_mplock = 0; +#endif +#ifdef INVARIANTS + int crit_count = td->td_pri & ~TDPRI_MASK; +#endif + vm_offset_t eva; + + p = td->td_proc; + +#ifndef JG + kprintf0("TRAP "); + kprintf0("\"%s\" type=%ld\n", + trap_msg[frame->tf_trapno], frame->tf_trapno); + kprintf0(" rip=%lx rsp=%lx\n", frame->tf_rip, frame->tf_rsp); + kprintf0(" err=%lx addr=%lx\n", frame->tf_err, frame->tf_addr); + kprintf0(" cs=%lx ss=%lx rflags=%lx\n", (unsigned long)frame->tf_cs, (unsigned long)frame->tf_ss, frame->tf_rflags); +#endif + +#ifdef DDB + if (db_active) { + ++gd->gd_trap_nesting_level; + MAKEMPSAFE(have_mplock); + trap_fatal(frame, frame->tf_addr); + --gd->gd_trap_nesting_level; + goto out2; + } +#endif +#ifdef DDB + if (db_active) { + eva = (frame->tf_trapno == T_PAGEFLT ? frame->tf_addr : 0); + ++gd->gd_trap_nesting_level; + MAKEMPSAFE(have_mplock); + trap_fatal(frame, eva); + --gd->gd_trap_nesting_level; + goto out2; + } +#endif + + eva = 0; + +#ifdef SMP + if (trap_mpsafe == 0) { + ++gd->gd_trap_nesting_level; + MAKEMPSAFE(have_mplock); + --gd->gd_trap_nesting_level; + } +#endif + + if ((frame->tf_rflags & PSL_I) == 0) { + /* + * Buggy application or kernel code has disabled interrupts + * and then trapped. Enabling interrupts now is wrong, but + * it is better than running with interrupts disabled until + * they are accidentally enabled later. + */ + type = frame->tf_trapno; + if (ISPL(frame->tf_cs) == SEL_UPL) { + MAKEMPSAFE(have_mplock); + /* JG curproc can be NULL */ + kprintf( + "pid %ld (%s): trap %d with interrupts disabled\n", + (long)curproc->p_pid, curproc->p_comm, type); + } else if (type != T_NMI && type != T_BPTFLT && + type != T_TRCTRAP) { + /* + * XXX not quite right, since this may be for a + * multiple fault in user mode. + */ + MAKEMPSAFE(have_mplock); + kprintf("kernel trap %d with interrupts disabled\n", + type); + } + cpu_enable_intr(); + } + + type = frame->tf_trapno; + code = frame->tf_err; + + if (ISPL(frame->tf_cs) == SEL_UPL) { + /* user trap */ + + KTR_LOG(kernentry_trap, p->p_pid, lp->lwp_tid, + frame->tf_trapno, eva); + + userenter(td); + + sticks = (int)td->td_sticks; + lp->lwp_md.md_regs = frame; + + switch (type) { + case T_PRIVINFLT: /* privileged instruction fault */ + ucode = ILL_PRVOPC; + i = SIGILL; + break; + + case T_BPTFLT: /* bpt instruction fault */ + case T_TRCTRAP: /* trace trap */ + frame->tf_rflags &= ~PSL_T; + i = SIGTRAP; + break; + + case T_ARITHTRAP: /* arithmetic trap */ + ucode = code; + i = SIGFPE; +#if 0 +#if JG + ucode = fputrap(); +#else + ucode = code; +#endif + i = SIGFPE; +#endif + break; + + case T_ASTFLT: /* Allow process switch */ + mycpu->gd_cnt.v_soft++; + if (mycpu->gd_reqflags & RQF_AST_OWEUPC) { + atomic_clear_int_nonlocked(&mycpu->gd_reqflags, + RQF_AST_OWEUPC); + addupc_task(p, p->p_prof.pr_addr, + p->p_prof.pr_ticks); + } + goto out; + + case T_PROTFLT: /* general protection fault */ + case T_SEGNPFLT: /* segment not present fault */ + case T_TSSFLT: /* invalid TSS fault */ + case T_DOUBLEFLT: /* double fault */ + default: + ucode = code + BUS_SEGM_FAULT ; + i = SIGBUS; + break; + + case T_PAGEFLT: /* page fault */ + MAKEMPSAFE(have_mplock); + i = trap_pfault(frame, TRUE); + kprintf("TRAP_PFAULT %d\n", i); + if (frame->tf_rip == 0) + Debugger("debug"); + if (i == -1) + goto out; + if (i == 0) + goto out; + + ucode = T_PAGEFLT; + break; + + case T_DIVIDE: /* integer divide fault */ + ucode = FPE_INTDIV; + i = SIGFPE; + break; + + case T_NMI: + MAKEMPSAFE(have_mplock); + /* machine/parity/power fail/"kitchen sink" faults */ + if (isa_nmi(code) == 0) { +#ifdef DDB + /* + * NMI can be hooked up to a pushbutton + * for debugging. + */ + if (ddb_on_nmi) { + kprintf ("NMI ... going to debugger\n"); + kdb_trap(type, 0, frame); + } +#endif /* DDB */ + goto out2; + } else if (panic_on_nmi) + panic("NMI indicates hardware failure"); + break; + + case T_OFLOW: /* integer overflow fault */ + ucode = FPE_INTOVF; + i = SIGFPE; + break; + + case T_BOUND: /* bounds check fault */ + ucode = FPE_FLTSUB; + i = SIGFPE; + break; + + case T_DNA: + /* + * Virtual kernel intercept - pass the DNA exception + * to the virtual kernel if it asked to handle it. + * This occurs when the virtual kernel is holding + * onto the FP context for a different emulated + * process then the one currently running. + * + * We must still call npxdna() since we may have + * saved FP state that the virtual kernel needs + * to hand over to a different emulated process. + */ + if (lp->lwp_vkernel && lp->lwp_vkernel->ve && + (td->td_pcb->pcb_flags & FP_VIRTFP) + ) { + npxdna(); + break; + } + + /* + * The kernel may have switched out the FP unit's + * state, causing the user process to take a fault + * when it tries to use the FP unit. Restore the + * state here + */ + if (npxdna()) + goto out; + i = SIGFPE; + ucode = FPE_FPU_NP_TRAP; + break; + + case T_FPOPFLT: /* FPU operand fetch fault */ + ucode = T_FPOPFLT; + i = SIGILL; + break; + + case T_XMMFLT: /* SIMD floating-point exception */ + ucode = 0; /* XXX */ + i = SIGFPE; + break; + } + } else { + /* kernel trap */ + + switch (type) { + case T_PAGEFLT: /* page fault */ + MAKEMPSAFE(have_mplock); + trap_pfault(frame, FALSE); + goto out2; + + case T_DNA: + /* + * The kernel is apparently using fpu for copying. + * XXX this should be fatal unless the kernel has + * registered such use. + */ + if (npxdna()) + goto out2; + break; + + case T_STKFLT: /* stack fault */ + break; + + case T_PROTFLT: /* general protection fault */ + case T_SEGNPFLT: /* segment not present fault */ + /* + * Invalid segment selectors and out of bounds + * %rip's and %rsp's can be set up in user mode. + * This causes a fault in kernel mode when the + * kernel tries to return to user mode. We want + * to get this fault so that we can fix the + * problem here and not have to check all the + * selectors and pointers when the user changes + * them. + */ + kprintf0("trap.c line %d\n", __LINE__); + if (mycpu->gd_intr_nesting_level == 0) { + if (td->td_pcb->pcb_onfault) { + frame->tf_rip = (register_t) + td->td_pcb->pcb_onfault; + goto out2; + } + } + break; + + case T_TSSFLT: + /* + * PSL_NT can be set in user mode and isn't cleared + * automatically when the kernel is entered. This + * causes a TSS fault when the kernel attempts to + * `iret' because the TSS link is uninitialized. We + * want to get this fault so that we can fix the + * problem here and not every time the kernel is + * entered. + */ + if (frame->tf_rflags & PSL_NT) { + frame->tf_rflags &= ~PSL_NT; + goto out2; + } + break; + + case T_TRCTRAP: /* trace trap */ +#if 0 + if (frame->tf_rip == (int)IDTVEC(syscall)) { + /* + * We've just entered system mode via the + * syscall lcall. Continue single stepping + * silently until the syscall handler has + * saved the flags. + */ + goto out2; + } + if (frame->tf_rip == (int)IDTVEC(syscall) + 1) { + /* + * The syscall handler has now saved the + * flags. Stop single stepping it. + */ + frame->tf_rflags &= ~PSL_T; + goto out2; + } +#endif + + /* + * Ignore debug register trace traps due to + * accesses in the user's address space, which + * can happen under several conditions such as + * if a user sets a watchpoint on a buffer and + * then passes that buffer to a system call. + * We still want to get TRCTRAPS for addresses + * in kernel space because that is useful when + * debugging the kernel. + */ +#if JG + if (user_dbreg_trap()) { + /* + * Reset breakpoint bits because the + * processor doesn't + */ + /* XXX check upper bits here */ + load_dr6(rdr6() & 0xfffffff0); + goto out2; + } +#endif + /* + * FALLTHROUGH (TRCTRAP kernel mode, kernel address) + */ + case T_BPTFLT: + /* + * If DDB is enabled, let it handle the debugger trap. + * Otherwise, debugger traps "can't happen". + */ +#ifdef DDB + MAKEMPSAFE(have_mplock); + if (kdb_trap(type, 0, frame)) + goto out2; +#endif + break; + + case T_NMI: + MAKEMPSAFE(have_mplock); + /* machine/parity/power fail/"kitchen sink" faults */ +#if NISA > 0 + if (isa_nmi(code) == 0) { +#ifdef DDB + /* + * NMI can be hooked up to a pushbutton + * for debugging. + */ + if (ddb_on_nmi) { + kprintf ("NMI ... going to debugger\n"); + kdb_trap(type, 0, frame); + } +#endif /* DDB */ + goto out2; + } else if (panic_on_nmi == 0) + goto out2; + /* FALL THROUGH */ +#endif /* NISA > 0 */ + } + MAKEMPSAFE(have_mplock); + trap_fatal(frame, 0); + goto out2; + } + + /* + * Virtual kernel intercept - if the fault is directly related to a + * VM context managed by a virtual kernel then let the virtual kernel + * handle it. + */ + if (lp->lwp_vkernel && lp->lwp_vkernel->ve) { + vkernel_trap(lp, frame); + goto out2; + } + + /* + * Virtual kernel intercept - if the fault is directly related to a + * VM context managed by a virtual kernel then let the virtual kernel + * handle it. + */ + if (lp->lwp_vkernel && lp->lwp_vkernel->ve) { + vkernel_trap(lp, frame); + goto out; + } + + /* + * Translate fault for emulators (e.g. Linux) + */ + if (*p->p_sysent->sv_transtrap) + i = (*p->p_sysent->sv_transtrap)(i, type); + + MAKEMPSAFE(have_mplock); + trapsignal(lp, i, ucode); + +#ifdef DEBUG + if (type <= MAX_TRAP_MSG) { + uprintf("fatal process exception: %s", + trap_msg[type]); + if ((type == T_PAGEFLT) || (type == T_PROTFLT)) + uprintf(", fault VA = 0x%lx", frame->tf_addr); + uprintf("\n"); + } +#endif + +out: +#ifdef SMP + if (ISPL(frame->tf_cs) == SEL_UPL) + KASSERT(td->td_mpcount == have_mplock, ("badmpcount trap/end from %p", (void *)frame->tf_rip)); +#endif + userret(lp, frame, sticks); + userexit(lp); +out2: ; +#ifdef SMP + if (have_mplock) + rel_mplock(); +#endif + if (p != NULL && lp != NULL) + KTR_LOG(kernentry_trap_ret, p->p_pid, lp->lwp_tid); +#ifdef INVARIANTS + KASSERT(crit_count == (td->td_pri & ~TDPRI_MASK), + ("syscall: critical section count mismatch! %d/%d", + crit_count / TDPRI_CRIT, td->td_pri / TDPRI_CRIT)); +#endif +} + +static int +trap_pfault(struct trapframe *frame, int usermode) +{ + vm_offset_t va; + struct vmspace *vm = NULL; + vm_map_t map; + int rv = 0; + vm_prot_t ftype; + thread_t td = curthread; + struct lwp *lp = td->td_lwp; + + va = trunc_page(frame->tf_addr); + if (va >= VM_MIN_KERNEL_ADDRESS) { + /* + * Don't allow user-mode faults in kernel address space. + */ + if (usermode) + goto nogo; + + map = &kernel_map; + } else { + /* + * This is a fault on non-kernel virtual memory. + * vm is initialized above to NULL. If curproc is NULL + * or curproc->p_vmspace is NULL the fault is fatal. + */ + if (lp != NULL) + vm = lp->lwp_vmspace; + + if (vm == NULL) + goto nogo; + + map = &vm->vm_map; + } + + /* + * PGEX_I is defined only if the execute disable bit capability is + * supported and enabled. + */ + if (frame->tf_err & PGEX_W) + ftype = VM_PROT_WRITE; +#if JG + else if ((frame->tf_err & PGEX_I) && pg_nx != 0) + ftype = VM_PROT_EXECUTE; +#endif + else + ftype = VM_PROT_READ; + + if (map != &kernel_map) { + /* + * Keep swapout from messing with us during this + * critical time. + */ + PHOLD(lp->lwp_proc); + + /* + * Grow the stack if necessary + */ + /* grow_stack returns false only if va falls into + * a growable stack region and the stack growth + * fails. It returns true if va was not within + * a growable stack region, or if the stack + * growth succeeded. + */ + if (!grow_stack(lp->lwp_proc, va)) { + rv = KERN_FAILURE; + PRELE(lp->lwp_proc); + goto nogo; + } + + /* Fault in the user page: */ + rv = vm_fault(map, va, ftype, + (ftype & VM_PROT_WRITE) ? VM_FAULT_DIRTY + : VM_FAULT_NORMAL); + + PRELE(lp->lwp_proc); + } else { + /* + * Don't have to worry about process locking or stacks + * in the kernel. + */ + rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL); + } + + if (rv == KERN_SUCCESS) + return (0); +nogo: + if (!usermode) { + if (td->td_gd->gd_intr_nesting_level == 0 && + td->td_pcb->pcb_onfault) { + frame->tf_rip = (register_t)td->td_pcb->pcb_onfault; + return (0); + } + trap_fatal(frame, frame->tf_addr); + return (-1); + } + + /* + * NOTE: on amd64 we have a tf_addr field in the trapframe, no + * kludge is needed to pass the fault address to signal handlers. + */ + + return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV); +} + +static void +trap_fatal(struct trapframe *frame, vm_offset_t eva) +{ + int code, ss; + u_int type; + long rsp; + struct soft_segment_descriptor softseg; + char *msg; + + code = frame->tf_err; + type = frame->tf_trapno; + sdtossd(&gdt[IDXSEL(frame->tf_cs & 0xffff)], &softseg); + + if (type <= MAX_TRAP_MSG) + msg = trap_msg[type]; + else + msg = "UNKNOWN"; + kprintf("\n\nFatal trap %d: %s while in %s mode\n", type, msg, + ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel"); +#ifdef SMP + /* two separate prints in case of a trap on an unmapped page */ + kprintf("cpuid = %d; ", PCPU_GET(cpuid)); + kprintf("apic id = %02x\n", PCPU_GET(apic_id)); +#endif + if (type == T_PAGEFLT) { + kprintf("fault virtual address = 0x%lx\n", eva); + kprintf("fault code = %s %s %s, %s\n", + code & PGEX_U ? "user" : "supervisor", + code & PGEX_W ? "write" : "read", + code & PGEX_I ? "instruction" : "data", + code & PGEX_P ? "protection violation" : "page not present"); + } + kprintf("instruction pointer = 0x%lx:0x%lx\n", + frame->tf_cs & 0xffff, frame->tf_rip); + if (ISPL(frame->tf_cs) == SEL_UPL) { + ss = frame->tf_ss & 0xffff; + rsp = frame->tf_rsp; + } else { + ss = GSEL(GDATA_SEL, SEL_KPL); + rsp = (long)&frame->tf_rsp; + } + kprintf("stack pointer = 0x%x:0x%lx\n", ss, rsp); + kprintf("frame pointer = 0x%x:0x%lx\n", ss, frame->tf_rbp); + kprintf("code segment = base 0x%lx, limit 0x%lx, type 0x%x\n", + softseg.ssd_base, softseg.ssd_limit, softseg.ssd_type); + kprintf(" = DPL %d, pres %d, long %d, def32 %d, gran %d\n", + softseg.ssd_dpl, softseg.ssd_p, softseg.ssd_long, softseg.ssd_def32, + softseg.ssd_gran); + kprintf("processor eflags = "); + if (frame->tf_rflags & PSL_T) + kprintf("trace trap, "); + if (frame->tf_rflags & PSL_I) + kprintf("interrupt enabled, "); + if (frame->tf_rflags & PSL_NT) + kprintf("nested task, "); + if (frame->tf_rflags & PSL_RF) + kprintf("resume, "); + kprintf("IOPL = %ld\n", (frame->tf_rflags & PSL_IOPL) >> 12); + kprintf("current process = "); + if (curproc) { + kprintf("%lu\n", + (u_long)curproc->p_pid); + } else { + kprintf("Idle\n"); + } + kprintf("current thread = pri %d ", curthread->td_pri); + if (curthread->td_pri >= TDPRI_CRIT) + kprintf("(CRIT)"); + kprintf("\n"); + +#ifdef DDB + if ((debugger_on_panic || db_active) && kdb_trap(type, code, frame)) + return; +#endif + kprintf("trap number = %d\n", type); + if (type <= MAX_TRAP_MSG) + panic("%s", trap_msg[type]); + else + panic("unknown/reserved trap"); +} + +/* + * Double fault handler. Called when a fault occurs while writing + * a frame for a trap/exception onto the stack. This usually occurs + * when the stack overflows (such is the case with infinite recursion, + * for example). + */ +void +dblfault_handler(struct trapframe *frame) +{ + kprintf0("DOUBLE FAULT\n"); + kprintf("\nFatal double fault\n"); + kprintf("rip = 0x%lx\n", frame->tf_rip); + kprintf("rsp = 0x%lx\n", frame->tf_rsp); + kprintf("rbp = 0x%lx\n", frame->tf_rbp); +#ifdef SMP + /* two separate prints in case of a trap on an unmapped page */ + kprintf("cpuid = %d; ", PCPU_GET(cpuid)); + kprintf("apic id = %02x\n", PCPU_GET(apic_id)); +#endif + panic("double fault"); +} + +/* + * syscall2 - MP aware system call request C handler + * + * A system call is essentially treated as a trap except that the + * MP lock is not held on entry or return. We are responsible for + * obtaining the MP lock if necessary and for handling ASTs + * (e.g. a task switch) prior to return. + * + * In general, only simple access and manipulation of curproc and + * the current stack is allowed without having to hold MP lock. + * + * MPSAFE - note that large sections of this routine are run without + * the MP lock. + */ +void +syscall2(struct trapframe *frame) +{ + struct thread *td = curthread; + struct proc *p = td->td_proc; + struct lwp *lp = td->td_lwp; + caddr_t params; + struct sysent *callp; + register_t orig_tf_rflags; + int sticks; + int error; + int narg; +#ifdef INVARIANTS + int crit_count = td->td_pri & ~TDPRI_MASK; +#endif +#ifdef SMP + int have_mplock = 0; +#endif + register_t *argp; + u_int code; + int reg, regcnt; + union sysunion args; + register_t *argsdst; + kprintf0("SYSCALL rip = %016llx\n", frame->tf_rip); + + PCPU_INC(cnt.v_syscall); + + kprintf0("\033[31mSYSCALL %ld\033[39m\n", frame->tf_rax); +#ifdef DIAGNOSTIC + if (ISPL(frame->tf_cs) != SEL_UPL) { + get_mplock(); + panic("syscall"); + /* NOT REACHED */ + } +#endif + + KTR_LOG(kernentry_syscall, p->p_pid, lp->lwp_tid, + frame->tf_eax); + +#ifdef SMP + KASSERT(td->td_mpcount == 0, ("badmpcount syscall2 from %p", (void *)frame->tf_eip)); + if (syscall_mpsafe == 0) + MAKEMPSAFE(have_mplock); +#endif + userenter(td); /* lazy raise our priority */ + + reg = 0; + regcnt = 6; + /* + * Misc + */ + sticks = (int)td->td_sticks; + orig_tf_rflags = frame->tf_rflags; + + /* + * Virtual kernel intercept - if a VM context managed by a virtual + * kernel issues a system call the virtual kernel handles it, not us. + * Restore the virtual kernel context and return from its system + * call. The current frame is copied out to the virtual kernel. + */ + if (lp->lwp_vkernel && lp->lwp_vkernel->ve) { + error = vkernel_trap(lp, frame); + frame->tf_rax = error; + if (error) + frame->tf_rflags |= PSL_C; + error = EJUSTRETURN; + goto out; + } + + /* + * Get the system call parameters and account for time + */ + lp->lwp_md.md_regs = frame; + params = (caddr_t)frame->tf_rsp + sizeof(register_t); + code = frame->tf_rax; + + if (p->p_sysent->sv_prepsyscall) { + (*p->p_sysent->sv_prepsyscall)( + frame, (int *)(&args.nosys.sysmsg + 1), + &code, ¶ms); + } else { + if (code == SYS_syscall || code == SYS___syscall) { + code = frame->tf_rdi; + reg++; + regcnt--; + } + } + + if (p->p_sysent->sv_mask) + code &= p->p_sysent->sv_mask; + + if (code >= p->p_sysent->sv_size) + callp = &p->p_sysent->sv_table[0]; + else + callp = &p->p_sysent->sv_table[code]; + + narg = callp->sy_narg & SYF_ARGMASK; + + /* + * On amd64 we get up to six arguments in registers. The rest are + * on the stack. The first six members of 'struct trampframe' happen + * to be the registers used to pass arguments, in exactly the right + * order. + */ + argp = &frame->tf_rdi; + argp += reg; + argsdst = (register_t *)(&args.nosys.sysmsg + 1); + /* + * JG can we overflow the space pointed to by 'argsdst' + * either with 'bcopy' or with 'copyin'? + */ + bcopy(argp, argsdst, sizeof(register_t) * regcnt); + /* + * copyin is MP aware, but the tracing code is not + */ + if (narg > regcnt) { + KASSERT(params != NULL, ("copyin args with no params!")); + error = copyin(params, &argsdst[regcnt], + (narg - regcnt) * sizeof(register_t)); + if (error) { +#ifdef KTRACE + if (KTRPOINT(td, KTR_SYSCALL)) { + MAKEMPSAFE(have_mplock); + + ktrsyscall(lp, code, narg, + (void *)(&args.nosys.sysmsg + 1)); + } +#endif + goto bad; + } + } + +#ifdef KTRACE + if (KTRPOINT(td, KTR_SYSCALL)) { + MAKEMPSAFE(have_mplock); + ktrsyscall(lp, code, narg, (void *)(&args.nosys.sysmsg + 1)); + } +#endif + + /* + * Default return value is 0 (will be copied to %rax). Double-value + * returns use %rax and %rdx. %rdx is left unchanged for system + * calls which return only one result. + */ + args.sysmsg_fds[0] = 0; + args.sysmsg_fds[1] = frame->tf_rdx; + + /* + * The syscall might manipulate the trap frame. If it does it + * will probably return EJUSTRETURN. + */ + args.sysmsg_frame = frame; + + STOPEVENT(p, S_SCE, narg); /* MP aware */ + +#ifdef SMP + /* + * Try to run the syscall without the MP lock if the syscall + * is MP safe. We have to obtain the MP lock no matter what if + * we are ktracing + */ + if ((callp->sy_narg & SYF_MPSAFE) == 0) + MAKEMPSAFE(have_mplock); +#endif + + error = (*callp->sy_call)(&args); + +out: + /* + * MP SAFE (we may or may not have the MP lock at this point) + */ + kprintf("SYSMSG %d ", error); + switch (error) { + case 0: + /* + * Reinitialize proc pointer `p' as it may be different + * if this is a child returning from fork syscall. + */ + p = curproc; + lp = curthread->td_lwp; + frame->tf_rax = args.sysmsg_fds[0]; + frame->tf_rdx = args.sysmsg_fds[1]; + kprintf0("RESULT %lld %lld\n", frame->tf_rax, frame->tf_rdx); + frame->tf_rflags &= ~PSL_C; + break; + case ERESTART: + /* + * Reconstruct pc, we know that 'syscall' is 2 bytes. + * We have to do a full context restore so that %r10 + * (which was holding the value of %rcx) is restored for + * the next iteration. + */ + frame->tf_rip -= frame->tf_err; + frame->tf_r10 = frame->tf_rcx; + td->td_pcb->pcb_flags |= PCB_FULLCTX; + break; + case EJUSTRETURN: + break; + case EASYNC: + panic("Unexpected EASYNC return value (for now)"); + default: +bad: + if (p->p_sysent->sv_errsize) { + if (error >= p->p_sysent->sv_errsize) + error = -1; /* XXX */ + else + error = p->p_sysent->sv_errtbl[error]; + } + kprintf0("ERROR %d\n", error); + frame->tf_rax = error; + frame->tf_rflags |= PSL_C; + break; + } + + /* + * Traced syscall. trapsignal() is not MP aware. + */ + if (orig_tf_rflags & PSL_T) { + MAKEMPSAFE(have_mplock); + frame->tf_rflags &= ~PSL_T; + trapsignal(lp, SIGTRAP, 0); + } + + /* + * Handle reschedule and other end-of-syscall issues + */ + userret(lp, frame, sticks); + +#ifdef KTRACE + if (KTRPOINT(td, KTR_SYSRET)) { + MAKEMPSAFE(have_mplock); + ktrsysret(lp, code, error, args.sysmsg_result); + } +#endif + + /* + * This works because errno is findable through the + * register set. If we ever support an emulation where this + * is not the case, this code will need to be revisited. + */ + STOPEVENT(p, S_SCX, code); + + userexit(lp); +#ifdef SMP + /* + * Release the MP lock if we had to get it + */ + KASSERT(td->td_mpcount == have_mplock, + ("badmpcount syscall2/end from %p", (void *)frame->tf_eip)); + if (have_mplock) + rel_mplock(); +#endif + KTR_LOG(kernentry_syscall_ret, p->p_pid, lp->lwp_tid, error); +#ifdef INVARIANTS + KASSERT(crit_count == (td->td_pri & ~TDPRI_MASK), + ("syscall: critical section count mismatch! %d/%d", + crit_count / TDPRI_CRIT, td->td_pri / TDPRI_CRIT)); +#endif +} void fork_return(struct lwp *lp, struct trapframe *frame) { + kprintf0("fork return\n"); + frame->tf_rax = 0; /* Child returns zero */ + frame->tf_rflags &= ~PSL_C; /* success */ + frame->tf_rdx = 1; + + generic_lwp_return(lp, frame); + KTR_LOG(kernentry_fork_ret, lp->lwp_proc->p_pid, lp->lwp_tid); } /* * Simplified back end of syscall(), used when returning from fork() - * or lwp_create() directly into user mode. MP lock is held on entry and - * should be released on return. This code will return back into the fork + * directly into user mode. MP lock is held on entry and should be + * released on return. This code will return back into the fork * trampoline code which then runs doreti. */ void generic_lwp_return(struct lwp *lp, struct trapframe *frame) { + kprintf0("generic_lwp_return\n"); + struct proc *p = lp->lwp_proc; + + /* + * Newly forked processes are given a kernel priority. We have to + * adjust the priority to a normal user priority and fake entry + * into the kernel (call userenter()) to install a passive release + * function just in case userret() decides to stop the process. This + * can occur when ^Z races a fork. If we do not install the passive + * release function the current process designation will not be + * released when the thread goes to sleep. + */ + lwkt_setpri_self(TDPRI_USER_NORM); + userenter(lp->lwp_thread); + userret(lp, frame, 0); +#ifdef KTRACE + if (KTRPOINT(lp->lwp_thread, KTR_SYSRET)) + ktrsysret(lp, SYS_fork, 0, 0); +#endif + p->p_flag |= P_PASSIVE_ACQ; + userexit(lp); + p->p_flag &= ~P_PASSIVE_ACQ; +#ifdef SMP + KKASSERT(lp->lwp_thread->td_mpcount == 1); + rel_mplock(); +#endif } /* * If PGEX_FPFAULT is set then set FP_VIRTFP in the PCB to force a T_DNA * fault (which is then passed back to the virtual kernel) if an attempt is * made to use the FP unit. - * + * * XXX this is a fairly big hack. */ void set_vkernel_fp(struct trapframe *frame) { + /* JGXXX */ } diff --git a/sys/platform/pc64/amd64/vm_machdep.c b/sys/platform/pc64/amd64/vm_machdep.c index 4602aa5cce..f9621f3184 100644 --- a/sys/platform/pc64/amd64/vm_machdep.c +++ b/sys/platform/pc64/amd64/vm_machdep.c @@ -2,6 +2,7 @@ * Copyright (c) 1982, 1986 The Regents of the University of California. * Copyright (c) 1989, 1990 William Jolitz * Copyright (c) 1994 John Dyson + * Copyright (c) 2008 The DragonFly Project. * All rights reserved. * * This code is derived from software contributed to Berkeley by @@ -39,7 +40,7 @@ * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91 * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$ * $FreeBSD: src/sys/i386/i386/vm_machdep.c,v 1.132.2.9 2003/01/25 19:02:23 dillon Exp $ - * $DragonFly: src/sys/platform/pc64/amd64/vm_machdep.c,v 1.2 2007/09/24 03:24:45 yanyh Exp $ + * $DragonFly: src/sys/platform/pc64/amd64/vm_machdep.c,v 1.3 2008/08/29 17:07:10 dillon Exp $ */ #include @@ -73,11 +74,9 @@ #include -char machine[] = MACHINE; - -char cpu_vendor[] = "DragonFly"; /* XXX */ -u_int cpu_id = 0x80000000; /* XXX */ +#include +static void cpu_reset_real (void); /* * Finish a fork operation, with lwp lp2 nearly set up. * Copy and update the pcb, set up the stack so that the child @@ -86,6 +85,109 @@ u_int cpu_id = 0x80000000; /* XXX */ void cpu_fork(struct lwp *lp1, struct lwp *lp2, int flags) { + struct pcb *pcb2; + + if ((flags & RFPROC) == 0) { + if ((flags & RFMEM) == 0) { + /* unshare user LDT */ + struct pcb *pcb1 = lp1->lwp_thread->td_pcb; + struct pcb_ldt *pcb_ldt = pcb1->pcb_ldt; + if (pcb_ldt && pcb_ldt->ldt_refcnt > 1) { + pcb_ldt = user_ldt_alloc(pcb1,pcb_ldt->ldt_len); + user_ldt_free(pcb1); + pcb1->pcb_ldt = pcb_ldt; + set_user_ldt(pcb1); + } + } + return; + } + +#if NNPX > 0 + /* Ensure that lp1's pcb is up to date. */ + if (mdcpu->gd_npxthread == lp1->lwp_thread) + npxsave(lp1->lwp_thread->td_savefpu); +#endif + + /* + * Copy lp1's PCB. This really only applies to the + * debug registers and FP state, but its faster to just copy the + * whole thing. Because we only save the PCB at switchout time, + * the register state may not be current. + */ + pcb2 = lp2->lwp_thread->td_pcb; + *pcb2 = *lp1->lwp_thread->td_pcb; + + /* + * Create a new fresh stack for the new process. + * Copy the trap frame for the return to user mode as if from a + * syscall. This copies the user mode register values. + * + * pcb_rsp must allocate an additional call-return pointer below + * the trap frame which will be restored by cpu_heavy_restore from + * PCB_RIP, and the thread's td_sp pointer must allocate an + * additonal two quadwords below the pcb_rsp call-return pointer to + * hold the LWKT restore function pointer and rflags. + * + * The LWKT restore function pointer must be set to cpu_heavy_restore, + * which is our standard heavy-weight process switch-in function. + * YYY eventually we should shortcut fork_return and fork_trampoline + * to use the LWKT restore function directly so we can get rid of + * all the extra crap we are setting up. + */ + lp2->lwp_md.md_regs = (struct trapframe *)pcb2 - 1; + bcopy(lp1->lwp_md.md_regs, lp2->lwp_md.md_regs, sizeof(*lp2->lwp_md.md_regs)); + + /* + * Set registers for trampoline to user mode. Leave space for the + * return address on stack. These are the kernel mode register values. + */ + pcb2->pcb_cr3 = vtophys(vmspace_pmap(lp2->lwp_proc->p_vmspace)->pm_pdir); + pcb2->pcb_cr3 |= PG_RW | PG_U | PG_V; + pcb2->pcb_rbx = (unsigned long)fork_return; /* fork_trampoline argument */ + pcb2->pcb_rbp = 0; + pcb2->pcb_rsp = (unsigned long)lp2->lwp_md.md_regs - sizeof(void *); + pcb2->pcb_r12 = (unsigned long)lp2; /* fork_trampoline argument */ + pcb2->pcb_r13 = 0; + pcb2->pcb_r14 = 0; + pcb2->pcb_r15 = 0; + pcb2->pcb_rip = (unsigned long)fork_trampoline; + lp2->lwp_thread->td_sp = (char *)(pcb2->pcb_rsp - sizeof(void *)); + *(u_int64_t *)lp2->lwp_thread->td_sp = PSL_USER; + lp2->lwp_thread->td_sp -= sizeof(void *); + *(void **)lp2->lwp_thread->td_sp = (void *)cpu_heavy_restore; + + /* + * pcb2->pcb_ldt: duplicated below, if necessary. + * pcb2->pcb_savefpu: cloned above. + * pcb2->pcb_flags: cloned above (always 0 here?). + * pcb2->pcb_onfault: cloned above (always NULL here?). + */ + + /* + * XXX don't copy the i/o pages. this should probably be fixed. + */ + pcb2->pcb_ext = 0; + + /* Copy the LDT, if necessary. */ + if (pcb2->pcb_ldt != 0) { + if (flags & RFMEM) { + pcb2->pcb_ldt->ldt_refcnt++; + } else { + pcb2->pcb_ldt = user_ldt_alloc(pcb2, + pcb2->pcb_ldt->ldt_len); + } + } + bcopy(&lp1->lwp_thread->td_tls, &lp2->lwp_thread->td_tls, + sizeof(lp2->lwp_thread->td_tls)); + /* + * Now, cpu_switch() can schedule the new lwp. + * pcb_rsp is loaded pointing to the cpu_switch() stack frame + * containing the return address when exiting cpu_switch. + * This will normally be to fork_trampoline(), which will have + * %rbx loaded with the new lwp's pointer. fork_trampoline() + * will set up a stack to call fork_return(lp, frame); to complete + * the return to user-mode. + */ } /* @@ -94,6 +196,7 @@ cpu_fork(struct lwp *lp1, struct lwp *lp2, int flags) int cpu_prepare_lwp(struct lwp *lp, struct lwp_params *params) { + panic("dummy called in vm_machdep.c: line: %d", __LINE__); return (0); } @@ -107,16 +210,30 @@ void cpu_set_fork_handler(struct lwp *lp, void (*func)(void *, struct trapframe *), void *arg) { + /* + * Note that the trap frame follows the args, so the function + * is really called like this: func(arg, frame); + */ + lp->lwp_thread->td_pcb->pcb_rbx = (long)func; /* function */ + lp->lwp_thread->td_pcb->pcb_r12 = (long)arg; /* first arg */ } void cpu_set_thread_handler(thread_t td, void (*rfunc)(void), void *func, void *arg) { + td->td_pcb->pcb_rbx = (long)func; + td->td_pcb->pcb_r12 = (long)arg; + td->td_switch = cpu_lwkt_switch; + td->td_sp -= sizeof(void *); + *(void **)td->td_sp = rfunc; /* exit function on return */ + td->td_sp -= sizeof(void *); + *(void **)td->td_sp = cpu_kthread_restore; } void cpu_lwp_exit(void) { + panic("dummy called in vm_machdep.c: line: %d", __LINE__); } /* @@ -131,6 +248,7 @@ cpu_lwp_exit(void) void cpu_thread_exit(void) { + panic("dummy called in vm_machdep.c: line: %d", __LINE__); } /* @@ -140,11 +258,94 @@ cpu_thread_exit(void) void cpu_proc_wait(struct proc *p) { + panic("dummy called in vm_machdep.c: line: %d", __LINE__); +} + +void +cpu_reset(void) +{ + cpu_reset_real(); +} + +static void +cpu_reset_real(void) +{ + /* + * Attempt to do a CPU reset via the keyboard controller, + * do not turn of the GateA20, as any machine that fails + * to do the reset here would then end up in no man's land. + */ + +#if !defined(BROKEN_KEYBOARD_RESET) + outb(IO_KBD + 4, 0xFE); + DELAY(500000); /* wait 0.5 sec to see if that did it */ + kprintf("Keyboard reset did not work, attempting CPU shutdown\n"); + DELAY(1000000); /* wait 1 sec for kprintf to complete */ +#endif +#if JG + /* force a shutdown by unmapping entire address space ! */ + bzero((caddr_t) PTD, PAGE_SIZE); +#endif + + /* "good night, sweet prince .... " */ + cpu_invltlb(); + /* NOTREACHED */ + while(1); +} + +int +grow_stack(struct proc *p, u_long sp) +{ + int rv; + + rv = vm_map_growstack (p, sp); + if (rv != KERN_SUCCESS) + return (0); + + return (1); +} + +/* + * Tell whether this address is in some physical memory region. + * Currently used by the kernel coredump code in order to avoid + * dumping the ``ISA memory hole'' which could cause indefinite hangs, + * or other unpredictable behaviour. + */ + +int +is_physical_memory(vm_offset_t addr) +{ +#if NISA > 0 + /* The ISA ``memory hole''. */ + if (addr >= 0xa0000 && addr < 0x100000) + return 0; +#endif + /* + * stuff other tests for known memory-mapped devices (PCI?) + * here + */ + + return 1; +} + +/* + * platform-specific vmspace initialization (nothing for amd64) + */ +void +cpu_vmspace_alloc(struct vmspace *vm __unused) +{ +} + +void +cpu_vmspace_free(struct vmspace *vm __unused) +{ + panic("dummy called in vm_machdep.c: line: %d", __LINE__); } int kvm_access_check(vm_offset_t saddr, vm_offset_t eaddr, int prot) { + panic("dummy called in vm_machdep.c: line: %d", __LINE__); return 0; } diff --git a/sys/platform/pc64/apic/apic_abi.c b/sys/platform/pc64/apic/apic_abi.c new file mode 100644 index 0000000000..5477663771 --- /dev/null +++ b/sys/platform/pc64/apic/apic_abi.c @@ -0,0 +1,375 @@ +/* + * Copyright (c) 1991 The Regents of the University of California. + * Copyright (c) 1996, by Steve Passe. All rights reserved. + * Copyright (c) 2005,2008 The DragonFly Project. All rights reserved. + * All rights reserved. + * + * This code is derived from software contributed to The DragonFly Project + * by Matthew Dillon + * + * This code is derived from software contributed to Berkeley by + * William Jolitz. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of The DragonFly Project nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific, prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $DragonFly: src/sys/platform/pc64/apic/apic_abi.c,v 1.1 2008/08/29 17:07:12 dillon Exp $ + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include /* apic_8254_intr */ +#include +#include +#include + +#include + +#include "apic_ipl.h" + +#ifdef APIC_IO + +extern void APIC_INTREN(int); +extern void APIC_INTRDIS(int); + +extern inthand_t + IDTVEC(apic_fastintr0), IDTVEC(apic_fastintr1), + IDTVEC(apic_fastintr2), IDTVEC(apic_fastintr3), + IDTVEC(apic_fastintr4), IDTVEC(apic_fastintr5), + IDTVEC(apic_fastintr6), IDTVEC(apic_fastintr7), + IDTVEC(apic_fastintr8), IDTVEC(apic_fastintr9), + IDTVEC(apic_fastintr10), IDTVEC(apic_fastintr11), + IDTVEC(apic_fastintr12), IDTVEC(apic_fastintr13), + IDTVEC(apic_fastintr14), IDTVEC(apic_fastintr15), + IDTVEC(apic_fastintr16), IDTVEC(apic_fastintr17), + IDTVEC(apic_fastintr18), IDTVEC(apic_fastintr19), + IDTVEC(apic_fastintr20), IDTVEC(apic_fastintr21), + IDTVEC(apic_fastintr22), IDTVEC(apic_fastintr23); + +extern inthand_t + IDTVEC(apic_slowintr0), IDTVEC(apic_slowintr1), + IDTVEC(apic_slowintr2), IDTVEC(apic_slowintr3), + IDTVEC(apic_slowintr4), IDTVEC(apic_slowintr5), + IDTVEC(apic_slowintr6), IDTVEC(apic_slowintr7), + IDTVEC(apic_slowintr8), IDTVEC(apic_slowintr9), + IDTVEC(apic_slowintr10), IDTVEC(apic_slowintr11), + IDTVEC(apic_slowintr12), IDTVEC(apic_slowintr13), + IDTVEC(apic_slowintr14), IDTVEC(apic_slowintr15), + IDTVEC(apic_slowintr16), IDTVEC(apic_slowintr17), + IDTVEC(apic_slowintr18), IDTVEC(apic_slowintr19), + IDTVEC(apic_slowintr20), IDTVEC(apic_slowintr21), + IDTVEC(apic_slowintr22), IDTVEC(apic_slowintr23); + +extern inthand_t + IDTVEC(apic_wrongintr0), IDTVEC(apic_wrongintr1), + IDTVEC(apic_wrongintr2), IDTVEC(apic_wrongintr3), + IDTVEC(apic_wrongintr4), IDTVEC(apic_wrongintr5), + IDTVEC(apic_wrongintr6), IDTVEC(apic_wrongintr7), + IDTVEC(apic_wrongintr8), IDTVEC(apic_wrongintr9), + IDTVEC(apic_wrongintr10), IDTVEC(apic_wrongintr11), + IDTVEC(apic_wrongintr12), IDTVEC(apic_wrongintr13), + IDTVEC(apic_wrongintr14), IDTVEC(apic_wrongintr15), + IDTVEC(apic_wrongintr16), IDTVEC(apic_wrongintr17), + IDTVEC(apic_wrongintr18), IDTVEC(apic_wrongintr19), + IDTVEC(apic_wrongintr20), IDTVEC(apic_wrongintr21), + IDTVEC(apic_wrongintr22), IDTVEC(apic_wrongintr23); + +static int apic_setvar(int, const void *); +static int apic_getvar(int, void *); +static int apic_vectorctl(int, int, int); +static void apic_finalize(void); +static void apic_cleanup(void); + +static inthand_t *apic_fastintr[APIC_HWI_VECTORS] = { + &IDTVEC(apic_fastintr0), &IDTVEC(apic_fastintr1), + &IDTVEC(apic_fastintr2), &IDTVEC(apic_fastintr3), + &IDTVEC(apic_fastintr4), &IDTVEC(apic_fastintr5), + &IDTVEC(apic_fastintr6), &IDTVEC(apic_fastintr7), + &IDTVEC(apic_fastintr8), &IDTVEC(apic_fastintr9), + &IDTVEC(apic_fastintr10), &IDTVEC(apic_fastintr11), + &IDTVEC(apic_fastintr12), &IDTVEC(apic_fastintr13), + &IDTVEC(apic_fastintr14), &IDTVEC(apic_fastintr15), + &IDTVEC(apic_fastintr16), &IDTVEC(apic_fastintr17), + &IDTVEC(apic_fastintr18), &IDTVEC(apic_fastintr19), + &IDTVEC(apic_fastintr20), &IDTVEC(apic_fastintr21), + &IDTVEC(apic_fastintr22), &IDTVEC(apic_fastintr23) +}; + +static inthand_t *apic_slowintr[APIC_HWI_VECTORS] = { + &IDTVEC(apic_slowintr0), &IDTVEC(apic_slowintr1), + &IDTVEC(apic_slowintr2), &IDTVEC(apic_slowintr3), + &IDTVEC(apic_slowintr4), &IDTVEC(apic_slowintr5), + &IDTVEC(apic_slowintr6), &IDTVEC(apic_slowintr7), + &IDTVEC(apic_slowintr8), &IDTVEC(apic_slowintr9), + &IDTVEC(apic_slowintr10), &IDTVEC(apic_slowintr11), + &IDTVEC(apic_slowintr12), &IDTVEC(apic_slowintr13), + &IDTVEC(apic_slowintr14), &IDTVEC(apic_slowintr15), + &IDTVEC(apic_slowintr16), &IDTVEC(apic_slowintr17), + &IDTVEC(apic_slowintr18), &IDTVEC(apic_slowintr19), + &IDTVEC(apic_slowintr20), &IDTVEC(apic_slowintr21), + &IDTVEC(apic_slowintr22), &IDTVEC(apic_slowintr23) +}; + +static inthand_t *apic_wrongintr[APIC_HWI_VECTORS] = { + &IDTVEC(apic_wrongintr0), &IDTVEC(apic_wrongintr1), + &IDTVEC(apic_wrongintr2), &IDTVEC(apic_wrongintr3), + &IDTVEC(apic_wrongintr4), &IDTVEC(apic_wrongintr5), + &IDTVEC(apic_wrongintr6), &IDTVEC(apic_wrongintr7), + &IDTVEC(apic_wrongintr8), &IDTVEC(apic_wrongintr9), + &IDTVEC(apic_wrongintr10), &IDTVEC(apic_wrongintr11), + &IDTVEC(apic_wrongintr12), &IDTVEC(apic_wrongintr13), + &IDTVEC(apic_wrongintr14), &IDTVEC(apic_wrongintr15), + &IDTVEC(apic_wrongintr16), &IDTVEC(apic_wrongintr17), + &IDTVEC(apic_wrongintr18), &IDTVEC(apic_wrongintr19), + &IDTVEC(apic_wrongintr20), &IDTVEC(apic_wrongintr21), + &IDTVEC(apic_wrongintr22), &IDTVEC(apic_wrongintr23) +}; + +static int apic_imcr_present; + +struct machintr_abi MachIntrABI = { + MACHINTR_APIC, + .intrdis = APIC_INTRDIS, + .intren = APIC_INTREN, + .vectorctl = apic_vectorctl, + .setvar = apic_setvar, + .getvar = apic_getvar, + .finalize = apic_finalize, + .cleanup = apic_cleanup +}; + +static int +apic_setvar(int varid, const void *buf) +{ + int error = 0; + + switch(varid) { + case MACHINTR_VAR_IMCR_PRESENT: + apic_imcr_present = *(const int *)buf; + break; + default: + error = ENOENT; + break; + } + return (error); +} + +static int +apic_getvar(int varid, void *buf) +{ + int error = 0; + + switch(varid) { + case MACHINTR_VAR_IMCR_PRESENT: + *(int *)buf = apic_imcr_present; + break; + default: + error = ENOENT; + break; + } + return (error); +} + +/* + * Called before interrupts are physically enabled, this routine does the + * final configuration of the BSP's local APIC: + * + * - disable 'pic mode'. + * - disable 'virtual wire mode'. + * - enable NMI. + */ +static void +apic_finalize(void) +{ + u_int32_t temp; + + /* + * If an IMCR is present, program bit 0 to disconnect the 8259 + * from the BSP. The 8259 may still be connected to LINT0 on + * the BSP's LAPIC. + */ + if (apic_imcr_present) { + outb(0x22, 0x70); /* select IMCR */ + outb(0x23, 0x01); /* disconnect 8259 */ + } + + /* + * Setup lint0 (the 8259 'virtual wire' connection). We + * mask the interrupt, completing the disconnection of the + * 8259. + */ + temp = lapic.lvt_lint0; + temp |= APIC_LVT_MASKED; + lapic.lvt_lint0 = temp; + + /* + * setup lint1 to handle an NMI + */ + temp = lapic.lvt_lint1; + temp &= ~APIC_LVT_MASKED; + lapic.lvt_lint1 = temp; + + if (bootverbose) + apic_dump("bsp_apic_configure()"); +} + +/* + * This routine is called after physical interrupts are enabled but before + * the critical section is released. We need to clean out any interrupts + * that had already been posted to the cpu. + */ +static void +apic_cleanup(void) +{ + mdcpu->gd_fpending = 0; + mdcpu->gd_ipending = 0; +} + +static +int +apic_vectorctl(int op, int intr, int flags) +{ + int error; + int vector; + int select; + u_int32_t value; + u_long ef; + + if (intr < 0 || intr >= APIC_HWI_VECTORS) + return (EINVAL); + + ef = read_eflags(); + cpu_disable_intr(); + error = 0; + + switch(op) { + case MACHINTR_VECTOR_SETUP: + /* + * Setup an interrupt vector. First install the vector in the + * cpu's Interrupt Descriptor Table (IDT). + */ + if (flags & INTR_FAST) { + vector = TPR_SLOW_INTS + intr; + setidt(vector, apic_wrongintr[intr], + SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); + vector = TPR_FAST_INTS + intr; + setidt(vector, apic_fastintr[intr], + SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); + } else { + vector = TPR_SLOW_INTS + intr; + + /* + * This is probably not needed any more. XXX + */ + if (intr == apic_8254_intr || intr == 8) { + vector = TPR_FAST_INTS + intr; + } + setidt(vector, apic_slowintr[intr], + SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); + } + + /* + * Now reprogram the vector in the IO APIC. In order to avoid + * losing an EOI for a level interrupt, which is vector based, + * make sure that the IO APIC is programmed for edge-triggering + * first, then reprogrammed with the new vector. This should + * clear the IRR bit. + */ + if (int_to_apicintpin[intr].ioapic >= 0) { + imen_lock(); + select = int_to_apicintpin[intr].redirindex; + value = io_apic_read(int_to_apicintpin[intr].ioapic, select); + io_apic_write(int_to_apicintpin[intr].ioapic, + select, (value & ~APIC_TRIGMOD_MASK)); + io_apic_write(int_to_apicintpin[intr].ioapic, + select, (value & ~IOART_INTVEC) | vector); + imen_unlock(); + } + machintr_intren(intr); + break; + case MACHINTR_VECTOR_TEARDOWN: + /* + * Teardown an interrupt vector. The vector should already be + * installed in the cpu's IDT, but make sure. + */ + machintr_intrdis(intr); + vector = TPR_SLOW_INTS + intr; + setidt(vector, apic_slowintr[intr], SDT_SYS386IGT, SEL_KPL, + GSEL(GCODE_SEL, SEL_KPL)); + + /* + * And then reprogram the IO APIC to point to the SLOW vector (it may + * have previously been pointed to the FAST version of the vector). + * This will allow us to keep track of spurious interrupts. + * + * In order to avoid losing an EOI for a level interrupt, which is + * vector based, make sure that the IO APIC is programmed for + * edge-triggering first, then reprogrammed with the new vector. + * This should clear the IRR bit. + */ + if (int_to_apicintpin[intr].ioapic >= 0) { + imen_lock(); + select = int_to_apicintpin[intr].redirindex; + value = io_apic_read(int_to_apicintpin[intr].ioapic, select); + io_apic_write(int_to_apicintpin[intr].ioapic, + select, (value & ~APIC_TRIGMOD_MASK)); + io_apic_write(int_to_apicintpin[intr].ioapic, + select, (value & ~IOART_INTVEC) | vector); + imen_unlock(); + } + break; + case MACHINTR_VECTOR_SETDEFAULT: + /* + * This is a just-in-case an int pin is running through the 8259 + * when we don't expect it to, or an IO APIC pin somehow wound + * up getting enabled without us specifically programming it in + * this ABI. Note that IO APIC pins are by default programmed + * to IDT_OFFSET + intr. + */ + vector = IDT_OFFSET + intr; + setidt(vector, apic_slowintr[intr], SDT_SYS386IGT, SEL_KPL, + GSEL(GCODE_SEL, SEL_KPL)); + break; + default: + error = EOPNOTSUPP; + break; + } + + write_eflags(ef); + return (error); +} + +#endif + diff --git a/sys/platform/pc64/apic/apic_ipl.h b/sys/platform/pc64/apic/apic_ipl.h new file mode 100644 index 0000000000..f3d73582c9 --- /dev/null +++ b/sys/platform/pc64/apic/apic_ipl.h @@ -0,0 +1,57 @@ +/*- + * Copyright (c) 1997, by Steve Passe + * Copyright (c) 2008 The DragonFly Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. The name of the developer may NOT be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/sys/i386/isa/apic_ipl.h,v 1.3 1999/08/28 00:44:36 peter Exp $ + * $DragonFly: src/sys/platform/pc64/apic/apic_ipl.h,v 1.1 2008/08/29 17:07:12 dillon Exp $ + */ + +#ifndef _ARCH_APIC_IPL_H_ +#define _ARCH_APIC_IPL_H_ + +#ifdef APIC_IO + +/* IDT vector base for regular (aka. slow) and fast interrupts */ +#define TPR_SLOW_INTS 0x20 +#define TPR_FAST_INTS 0x60 + +#define APIC_HWI_VECTORS 24 +#define APIC_HWI_MASK ((1 << APIC_HWI_VECTORS) - 1) + +#endif + +#ifdef LOCORE + +/* + * Interrupts may or may not be disabled when using these functions. + */ +#define APIC_IMASK_LOCK \ + SPIN_LOCK(imen_spinlock) ; \ + +#define APIC_IMASK_UNLOCK \ + SPIN_UNLOCK(imen_spinlock) ; \ + +#endif + +#endif /* !_ARCH_APIC_IPL_H_ */ diff --git a/sys/platform/pc64/apic/apic_ipl.s b/sys/platform/pc64/apic/apic_ipl.s new file mode 100644 index 0000000000..ce155366af --- /dev/null +++ b/sys/platform/pc64/apic/apic_ipl.s @@ -0,0 +1,163 @@ +/* + * Copyright (c) 2003,2004,2008 The DragonFly Project. All rights reserved. + * + * This code is derived from software contributed to The DragonFly Project + * by Matthew Dillon + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of The DragonFly Project nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific, prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Copyright (c) 1997, by Steve Passe, All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. The name of the developer may NOT be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/sys/i386/isa/apic_ipl.s,v 1.27.2.2 2000/09/30 02:49:35 ps Exp $ + * $DragonFly: src/sys/platform/pc64/apic/apic_ipl.s,v 1.1 2008/08/29 17:07:12 dillon Exp $ + */ + +#include "use_npx.h" + +#include +#include +#include +#include +#include + +#include "apicreg.h" +#include "apic_ipl.h" +#include "assym.s" + +#ifdef APIC_IO + + .data + ALIGN_DATA + + /* + * Interrupt mask for APIC interrupts, defaults to all hardware + * interrupts turned off. + */ + + .p2align 2 /* MUST be 32bit aligned */ + + .globl apic_imen +apic_imen: + .long APIC_HWI_MASK + + .text + SUPERALIGN_TEXT + + /* + * Functions to enable and disable a hardware interrupt. The + * IRQ number is passed as an argument. + */ +ENTRY(APIC_INTRDIS) + APIC_IMASK_LOCK /* enter critical reg */ + movl 4(%esp),%eax +1: + btsl %eax, apic_imen + shll $4, %eax + movl CNAME(int_to_apicintpin) + 8(%eax), %edx + movl CNAME(int_to_apicintpin) + 12(%eax), %ecx + testl %edx, %edx + jz 2f + movl %ecx, (%edx) /* target register index */ + orl $IOART_INTMASK,16(%edx) /* set intmask in target apic reg */ +2: + APIC_IMASK_UNLOCK /* exit critical reg */ + ret + +ENTRY(APIC_INTREN) + APIC_IMASK_LOCK /* enter critical reg */ + movl 4(%esp), %eax /* mask into %eax */ +1: + btrl %eax, apic_imen /* update apic_imen */ + shll $4, %eax + movl CNAME(int_to_apicintpin) + 8(%eax), %edx + movl CNAME(int_to_apicintpin) + 12(%eax), %ecx + testl %edx, %edx + jz 2f + movl %ecx, (%edx) /* write the target register index */ + andl $~IOART_INTMASK, 16(%edx) /* clear mask bit */ +2: + APIC_IMASK_UNLOCK /* exit critical reg */ + ret + +/****************************************************************************** + * + */ + +/* + * u_int io_apic_write(int apic, int select); + */ +ENTRY(io_apic_read) + movl 4(%esp), %ecx /* APIC # */ + movl ioapic, %eax + movl (%eax,%ecx,4), %edx /* APIC base register address */ + movl 8(%esp), %eax /* target register index */ + movl %eax, (%edx) /* write the target register index */ + movl 16(%edx), %eax /* read the APIC register data */ + ret /* %eax = register value */ + +/* + * void io_apic_write(int apic, int select, int value); + */ +ENTRY(io_apic_write) + movl 4(%esp), %ecx /* APIC # */ + movl ioapic, %eax + movl (%eax,%ecx,4), %edx /* APIC base register address */ + movl 8(%esp), %eax /* target register index */ + movl %eax, (%edx) /* write the target register index */ + movl 12(%esp), %eax /* target register value */ + movl %eax, 16(%edx) /* write the APIC register data */ + ret /* %eax = void */ + +/* + * Send an EOI to the local APIC. + */ +ENTRY(apic_eoi) + movl $0, lapic+0xb0 + ret + +#endif diff --git a/sys/platform/pc64/apic/apic_vector.s b/sys/platform/pc64/apic/apic_vector.s new file mode 100644 index 0000000000..52e2ff1c68 --- /dev/null +++ b/sys/platform/pc64/apic/apic_vector.s @@ -0,0 +1,536 @@ +/* + * Copyright (c) 2008 The DragonFly Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of The DragonFly Project nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific, prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: vector.s, 386BSD 0.1 unknown origin + * $FreeBSD: src/sys/i386/isa/apic_vector.s,v 1.47.2.5 2001/09/01 22:33:38 tegge Exp $ + * $DragonFly: src/sys/platform/pc64/apic/apic_vector.s,v 1.1 2008/08/29 17:07:12 dillon Exp $ + */ + +#include "use_npx.h" +#include "opt_auto_eoi.h" + +#include +#include +#include +#include + +#include +#include + +#include "assym.s" + +#include "apicreg.h" +#include "apic_ipl.h" +#include +#include + +/* convert an absolute IRQ# into a bitmask */ +#define IRQ_LBIT(irq_num) (1 << (irq_num)) + +/* make an index into the IO APIC from the IRQ# */ +#define REDTBL_IDX(irq_num) (0x10 + ((irq_num) * 2)) + +#ifdef SMP +#define MPLOCKED lock ; +#else +#define MPLOCKED +#endif + +/* + * Push an interrupt frame in a format acceptable to doreti, reload + * the segment registers for the kernel. + */ +#define PUSH_FRAME \ + pushl $0 ; /* dummy error code */ \ + pushl $0 ; /* dummy trap type */ \ + pushl $0 ; /* dummy xflags type */ \ + pushal ; \ + pushl %ds ; /* save data and extra segments ... */ \ + pushl %es ; \ + pushl %fs ; \ + pushl %gs ; \ + cld ; \ + mov $KDSEL,%ax ; \ + mov %ax,%ds ; \ + mov %ax,%es ; \ + mov %ax,%gs ; \ + mov $KPSEL,%ax ; \ + mov %ax,%fs ; \ + +#define PUSH_DUMMY \ + pushfl ; /* phys int frame / flags */ \ + pushl %cs ; /* phys int frame / cs */ \ + pushl 12(%esp) ; /* original caller eip */ \ + pushl $0 ; /* dummy error code */ \ + pushl $0 ; /* dummy trap type */ \ + pushl $0 ; /* dummy xflags type */ \ + subl $13*4,%esp ; /* pushal + 4 seg regs (dummy) + CPL */ \ + +/* + * Warning: POP_FRAME can only be used if there is no chance of a + * segment register being changed (e.g. by procfs), which is why syscalls + * have to use doreti. + */ +#define POP_FRAME \ + popl %gs ; \ + popl %fs ; \ + popl %es ; \ + popl %ds ; \ + popal ; \ + addl $3*4,%esp ; /* dummy xflags, trap & error codes */ \ + +#define POP_DUMMY \ + addl $19*4,%esp ; \ + +#define IOAPICADDR(irq_num) CNAME(int_to_apicintpin) + 16 * (irq_num) + 8 +#define REDIRIDX(irq_num) CNAME(int_to_apicintpin) + 16 * (irq_num) + 12 + +#define MASK_IRQ(irq_num) \ + APIC_IMASK_LOCK ; /* into critical reg */ \ + testl $IRQ_LBIT(irq_num), apic_imen ; \ + jne 7f ; /* masked, don't mask */ \ + orl $IRQ_LBIT(irq_num), apic_imen ; /* set the mask bit */ \ + movl IOAPICADDR(irq_num), %ecx ; /* ioapic addr */ \ + movl REDIRIDX(irq_num), %eax ; /* get the index */ \ + movl %eax, (%ecx) ; /* write the index */ \ + movl IOAPIC_WINDOW(%ecx), %eax ; /* current value */ \ + orl $IOART_INTMASK, %eax ; /* set the mask */ \ + movl %eax, IOAPIC_WINDOW(%ecx) ; /* new value */ \ +7: ; /* already masked */ \ + APIC_IMASK_UNLOCK ; \ + +/* + * Test to see whether we are handling an edge or level triggered INT. + * Level-triggered INTs must still be masked as we don't clear the source, + * and the EOI cycle would cause redundant INTs to occur. + */ +#define MASK_LEVEL_IRQ(irq_num) \ + testl $IRQ_LBIT(irq_num), apic_pin_trigger ; \ + jz 9f ; /* edge, don't mask */ \ + MASK_IRQ(irq_num) ; \ +9: ; \ + +/* + * Test to see if the source is currntly masked, clear if so. + */ +#define UNMASK_IRQ(irq_num) \ + cmpl $0,%eax ; \ + jnz 8f ; \ + APIC_IMASK_LOCK ; /* into critical reg */ \ + testl $IRQ_LBIT(irq_num), apic_imen ; \ + je 7f ; /* bit clear, not masked */ \ + andl $~IRQ_LBIT(irq_num), apic_imen ;/* clear mask bit */ \ + movl IOAPICADDR(irq_num),%ecx ; /* ioapic addr */ \ + movl REDIRIDX(irq_num), %eax ; /* get the index */ \ + movl %eax,(%ecx) ; /* write the index */ \ + movl IOAPIC_WINDOW(%ecx),%eax ; /* current value */ \ + andl $~IOART_INTMASK,%eax ; /* clear the mask */ \ + movl %eax,IOAPIC_WINDOW(%ecx) ; /* new value */ \ +7: ; \ + APIC_IMASK_UNLOCK ; \ +8: ; \ + +#ifdef APIC_IO + +/* + * Fast interrupt call handlers run in the following sequence: + * + * - Push the trap frame required by doreti + * - Mask the interrupt and reenable its source + * - If we cannot take the interrupt set its fpending bit and + * doreti. Note that we cannot mess with mp_lock at all + * if we entered from a critical section! + * - If we can take the interrupt clear its fpending bit, + * call the handler, then unmask and doreti. + * + * YYY can cache gd base opitner instead of using hidden %fs prefixes. + */ + +#define FAST_INTR(irq_num, vec_name) \ + .text ; \ + SUPERALIGN_TEXT ; \ +IDTVEC(vec_name) ; \ + PUSH_FRAME ; \ + FAKE_MCOUNT(15*4(%esp)) ; \ + MASK_LEVEL_IRQ(irq_num) ; \ + movl $0, lapic_eoi ; \ + movl PCPU(curthread),%ebx ; \ + movl $0,%eax ; /* CURRENT CPL IN FRAME (REMOVED) */ \ + pushl %eax ; \ + testl $-1,TD_NEST_COUNT(%ebx) ; \ + jne 1f ; \ + cmpl $TDPRI_CRIT,TD_PRI(%ebx) ; \ + jl 2f ; \ +1: ; \ + /* in critical section, make interrupt pending */ \ + /* set the pending bit and return, leave interrupt masked */ \ + orl $IRQ_LBIT(irq_num),PCPU(fpending) ; \ + orl $RQF_INTPEND,PCPU(reqflags) ; \ + jmp 5f ; \ +2: ; \ + /* clear pending bit, run handler */ \ + andl $~IRQ_LBIT(irq_num),PCPU(fpending) ; \ + pushl $irq_num ; \ + pushl %esp ; /* pass frame by reference */ \ + call ithread_fast_handler ; /* returns 0 to unmask */ \ + addl $8, %esp ; \ + UNMASK_IRQ(irq_num) ; \ +5: ; \ + MEXITCOUNT ; \ + jmp doreti ; \ + +/* + * Slow interrupt call handlers run in the following sequence: + * + * - Push the trap frame required by doreti. + * - Mask the interrupt and reenable its source. + * - If we cannot take the interrupt set its ipending bit and + * doreti. In addition to checking for a critical section + * and cpl mask we also check to see if the thread is still + * running. Note that we cannot mess with mp_lock at all + * if we entered from a critical section! + * - If we can take the interrupt clear its ipending bit + * and schedule the thread. Leave interrupts masked and doreti. + * + * Note that calls to sched_ithd() are made with interrupts enabled + * and outside a critical section. YYY sched_ithd may preempt us + * synchronously (fix interrupt stacking). + * + * YYY can cache gd base pointer instead of using hidden %fs + * prefixes. + */ + +#define SLOW_INTR(irq_num, vec_name, maybe_extra_ipending) \ + .text ; \ + SUPERALIGN_TEXT ; \ +IDTVEC(vec_name) ; \ + PUSH_FRAME ; \ + maybe_extra_ipending ; \ +; \ + MASK_LEVEL_IRQ(irq_num) ; \ + incl PCPU(cnt) + V_INTR ; \ + movl $0, lapic_eoi ; \ + movl PCPU(curthread),%ebx ; \ + movl $0,%eax ; /* CURRENT CPL IN FRAME (REMOVED) */ \ + pushl %eax ; /* cpl do restore */ \ + testl $-1,TD_NEST_COUNT(%ebx) ; \ + jne 1f ; \ + cmpl $TDPRI_CRIT,TD_PRI(%ebx) ; \ + jl 2f ; \ +1: ; \ + /* set the pending bit and return, leave the interrupt masked */ \ + orl $IRQ_LBIT(irq_num), PCPU(ipending) ; \ + orl $RQF_INTPEND,PCPU(reqflags) ; \ + jmp 5f ; \ +2: ; \ + /* set running bit, clear pending bit, run handler */ \ + andl $~IRQ_LBIT(irq_num), PCPU(ipending) ; \ + incl TD_NEST_COUNT(%ebx) ; \ + sti ; \ + pushl $irq_num ; \ + call sched_ithd ; \ + addl $4,%esp ; \ + cli ; \ + decl TD_NEST_COUNT(%ebx) ; \ +5: ; \ + MEXITCOUNT ; \ + jmp doreti ; \ + +/* + * Wrong interrupt call handlers. We program these into APIC vectors + * that should otherwise never occur. For example, we program the SLOW + * vector for irq N with this when we program the FAST vector with the + * real interrupt. + * + * XXX for now all we can do is EOI it. We can't call do_wrongintr + * (yet) because we could be in a critical section. + */ +#define WRONGINTR(irq_num,vec_name) \ + .text ; \ + SUPERALIGN_TEXT ; \ +IDTVEC(vec_name) ; \ + PUSH_FRAME ; \ + movl $0, lapic_eoi ; /* End Of Interrupt to APIC */ \ + /*pushl $irq_num ;*/ \ + /*call do_wrongintr ;*/ \ + /*addl $4,%esp ;*/ \ + POP_FRAME ; \ + iret ; \ + +#endif + +/* + * Handle "spurious INTerrupts". + * Notes: + * This is different than the "spurious INTerrupt" generated by an + * 8259 PIC for missing INTs. See the APIC documentation for details. + * This routine should NOT do an 'EOI' cycle. + */ + .text + SUPERALIGN_TEXT + .globl Xspuriousint +Xspuriousint: + + /* No EOI cycle used here */ + + iret + + +/* + * Handle TLB shootdowns. + */ + .text + SUPERALIGN_TEXT + .globl Xinvltlb +Xinvltlb: + pushl %eax + + movl %cr3, %eax /* invalidate the TLB */ + movl %eax, %cr3 + + ss /* stack segment, avoid %ds load */ + movl $0, lapic_eoi /* End Of Interrupt to APIC */ + + popl %eax + iret + + +/* + * Executed by a CPU when it receives an Xcpustop IPI from another CPU, + * + * - Signals its receipt. + * - Waits for permission to restart. + * - Processing pending IPIQ events while waiting. + * - Signals its restart. + */ + + .text + SUPERALIGN_TEXT + .globl Xcpustop +Xcpustop: + pushl %ebp + movl %esp, %ebp + pushl %eax + pushl %ecx + pushl %edx + pushl %ds /* save current data segment */ + pushl %fs + + movl $KDSEL, %eax + mov %ax, %ds /* use KERNEL data segment */ + movl $KPSEL, %eax + mov %ax, %fs + + movl $0, lapic_eoi /* End Of Interrupt to APIC */ + + movl PCPU(cpuid), %eax + imull $PCB_SIZE, %eax + leal CNAME(stoppcbs)(%eax), %eax + pushl %eax + call CNAME(savectx) /* Save process context */ + addl $4, %esp + + + movl PCPU(cpuid), %eax + + /* + * Indicate that we have stopped and loop waiting for permission + * to start again. We must still process IPI events while in a + * stopped state. + */ + MPLOCKED + btsl %eax, stopped_cpus /* stopped_cpus |= (1< + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of The DragonFly Project nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific, prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Copyright (c) 1996, by Peter Wemm and Steve Passe, All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. The name of the developer may NOT be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/sys/i386/include/apic.h,v 1.14.2.2 2003/03/21 21:46:15 jhb Exp $ + * $DragonFly: src/sys/platform/pc64/apic/apicreg.h,v 1.1 2008/08/29 17:07:12 dillon Exp $ + */ + +#ifndef _MACHINE_APICREG_H_ +#define _MACHINE_APICREG_H_ + +/* + * Local && I/O APIC definitions for Pentium P54C+ Built-in APIC. + * + * A per-cpu APIC resides in memory location 0xFEE00000. + * + * 31 ... 24 23 ... 16 15 ... 8 7 ... 0 + * +-----------+-----------+-----------+-----------+ + * 0000 | | | | | + * 0010 | | | | | + * +-----------+-----------+-----------+-----------+ + * + * +-----------+-----------+-----------+-----------+ + * 0020 ID | | ID | | | | RW + * +-----------+-----------+-----------+-----------+ + * + * The physical APIC ID is used with physical interrupt + * delivery modes. + * + * +-----------+-----------+-----------+-----------+ + * 0030 VER | | | | | + * +-----------+-----------+-----------+-----------+ + * 0040 | | | | | + * 0050 | | | | | + * 0060 | | | | | + * 0070 | | | | | + * +-----------+-----------+-----------+-----------+ + * 0080 TPR | | | | PRIO SUBC | + * 0090 APR | | | | | + * 00A0 PPR | | | | | + * +-----------+-----------+-----------+-----------+ + * + * The Task Priority Register provides a priority threshold + * mechanism for interrupting the processor. Only interrupts + * with a higher priority then that specified in the TPR will + * be served. Other interrupts are recorded and serviced + * as soon as the TPR value decreases enough to allow that + * (unless EOId by another APIC). + * + * PRIO (7:4). Main priority. If 15 the APIC will not + * accept any interrupts. + * SUBC (3:0) Sub priority. See APR/PPR. + * + * + * The Processor Priority Register determines whether a + * pending interrupt can be dispensed to the processor. ISRV + * Is the vector of the highest priority ISR bit set or + * zero if no ISR bit is set. + * + * IF TPR[7:4] >= ISRV[7:4] + * PPR[7:0] = TPR[7:0] + * ELSE + * PPR[7:0] = ISRV[7:4].000 + * + * The Arbitration Priority Register holds the current + * lowest priority of the procsesor, a value used during + * lowest-priority arbitration. + * + * IF (TPR[7:4] >= IRRV[7:4] AND TPR[7:4] > ISRV[7:4]) + * APR[7:0] = TPR[7:0] + * ELSE + * APR[7:4] = max((TPR[7:4]&ISRV[7:4]),IRRV[7:4]).000 + * + * +-----------+-----------+-----------+-----------+ + * 00B0 EOI | | | | | + * +-----------+-----------+-----------+-----------+ + * 00C0 | | | | | + * +-----------+-----------+-----------+-----------+ + * 00D0 LDR |LOG APICID | | | | + * +-----------+-----------+-----------+-----------+ + * 00E0 DFR |MODEL| | | | | + * +-----------+-----------+-----------+-----------+ + * + * The logical APIC ID is used with logical interrupt + * delivery modes. Interpretation of logical destination + * information depends on the MODEL bits in the Destination + * Format Regiuster. + * + * MODEL=1111 FLAT MODEL - The MDA is interpreted as + * a decoded address. By setting + * one bit in the LDR for each + * local apic 8 APICs can coexist. + * + * MODEL=0000 CLUSTER MODEL - + * + * 31 ... 24 23 ... 16 15 ... 8 7 ... 0 + * +-----------+-----------+-----------+-----------+ + * 00F0 SVR | | | FE | vvvvvvvv | + * +-----------+-----------+-----------+-----------+ + * + * Spurious interrupt vector register. The 4 low + * vector bits must be programmed to 1111, e.g. + * vvvv1111. + * + * E - LAPIC disable (1 = disable, 0 = enable) + * + * F - Focus processor disable (1 = disable, 0 = enable) + * + * NOTE: The handler for the spurious interrupt vector + * should *NOT* issue an EOI because the spurious + * interrupt does not effect the ISR. + * + * +-----------+-----------+-----------+-----------+ + * 0100-0170 ISR| | | | | + * 0180-01F0 TMR| | | | | + * 0200-0270 IRR| | | | | + * +-----------+-----------+-----------+-----------+ + * + * These registers represent 256 bits, one bit for each + * possible interrupt. Interrupts 0-15 are reserved so + * bits 0-15 are also reserved. + * + * TMR - Trigger mode register. Upon acceptance of an int + * the corresponding bit is cleared for edge-trig and + * set for level-trig. If the TMR bit is set (level), + * the local APIC sends an EOI to all I/O APICs as + * a result of software issuing an EOI command. + * + * IRR - Interrupt Request Register. Contains active + * interrupt requests that have been accepted but not + * yet dispensed by the current local APIC. The bit is + * cleared and the corresponding ISR bit is set when + * the INTA cycle is issued. + * + * ISR - Interrupt In-Service register. Interrupt has been + * delivered but not yet fully serviced. Cleared when + * an EOI is issued from the processor. An EOI will + * also send an EOI to all I/O APICs if TMR was set. + * + * +-----------+-----------+-----------+-----------+ + * 0280 ESR | | | | | + * 0290-02F0 | | | | | + * +--FEDCBA98-+--76543210-+--FEDCBA98-+-----------+ + * 0300 ICR_LO | | XX | TL SDMMM | vector | + * 0310 ICR_HI | DEST FIELD| | | | + * +-----------+-----------+-----------+-----------+ + * + * The interrupt command register. Generally speaking + * writing to ICR_LO initiates a command. All fields + * are R/W except the 'S' (delivery status) field, which + * is read-only. When + * + * + * XX: Destination Shorthand field: + * + * 00 Use Destination field + * 01 Self only. Dest field ignored. + * 10 All including self (uses a + * destination field of 0x0F) + * 11 All excluding self (uses a + * destination field of 0x0F) + * + * T: 1 = Level 0 = Edge Trigger modde, used for + * the INIT level de-assert delivery mode only + * to de-assert a request. + * + * L: 0 = De-Assert, 1 = Assert. Always write as + * 1 when initiating a new command. Can only + * write as 0 for INIT mode de-assertion of + * command. + * + * S: 1 = Send Pending. Interrupt has been injected + * but APIC has not yet accepted it. + * + * D: 0=physical 1=logical. In physical mode + * only 24-27 of DEST FIELD is used from ICR_HI. + * + * MMM: 000 Fixed. Deliver to all processors according + * to the ICR. Always treated as edge trig. + * + * 001 Lowest Priority. Deliver to just the + * processor running at the lowest priority. + * + * 010 SMI. The vector must be 00B. Only edge + * triggered is allowed. The vector field + * must be programmed to zero (huh?). + * + * 011 + * + * 100 NMI. Deliver as an NMI to all processors + * listed in the destination field. The + * vector is ignored. Alawys treated as + * edge triggered. + * + * 101 INIT. Deliver as an INIT signal to all + * processors (like FIXED). Vector is ignored + * and it is always edge-triggered. + * + * 110 Start Up. Sends a special message between + * cpus. the vector contains a start-up + * address for MP boot protocol. + * Always edge triggered. Note: a startup + * int is not automatically tried in case of + * failure. + * + * 111 + * + * +-----------+--------10-+--FEDCBA98-+-----------+ + * 0320 LTIMER | | TM | ---S---- | vector | + * 0330 | | | | | + * +-----------+--------10-+--FEDCBA98-+-----------+ + * 0340 LVPCINT | | -M | ---S-MMM | vector | + * 0350 LVINT0 | | -M | LRPS-MMM | vector | + * 0360 LVINT1 | | -M | LRPS-MMM | vector | + * 0370 LVERROR | | -M | -------- | vector | + * +-----------+-----------+-----------+-----------+ + * + * T: 1 = periodic, 0 = one-shot + * + * M: 1 = masked + * + * L: 1 = level, 0 = edge + * + * R: For level triggered only, set to 1 when a + * level int is accepted, cleared by EOI. + * + * P: Pin Polarity 0 = Active High, 1 = Active Low + * + * S: 1 = Send Pending. Interrupt has been injected + * but APIC has not yet accepted it. + * + * MMM 000 = Fixed deliver to cpu according to LVT + * + * MMM 100 = NMI deliver as an NMI. Always edge + * + * MMM 111 = ExtInt deliver from 8259, routes INTA + * bus cycle to external + * controller. Controller is + * expected to supply vector. + * Always level. + * + * +-----------+-----------+-----------+-----------+ + * 0380 TMR_ICR | | | | | + * 0390 TMR_CCR | | | | | + * +-----------+-----------+-----------+-----------+ + * + * The timer initial count register and current count + * register (32 bits) + * + * +-----------+-----------+-----------+-----------+ + * 03A0 | | | | | + * 03B0 | | | | | + * 03C0 | | | | | + * 03D0 | | | | | + * +-----------+-----------+-----------+-----------+ + * 03E0 TMR_DCR | | | | d-dd | + * +-----------+-----------+-----------+-----------+ + * + * The timer divide configuration register. d-dd is: + * + * 0000 - divide by 2 + * 0001 - divide by 4 + * 0010 - divide by 8 + * 0011 - divide by 16 + * 1000 - divide by 32 + * 1001 - divide by 64 + * 1010 - divide by 128 + * 1011 - divide by 1 + * + * NOTE ON EOI: Upon receiving an EOI the APIC clears the highest priority + * interrupt in the ISR and selects the next highest priority interrupt + * for posting to the CPU. If the interrupt being EOId was level + * triggered the APIC will send an EOI to all I/O APICs. For the moment + * you can write garbage to the EOI register but for future compatibility + * 0 should be written. + */ + +#ifndef LOCORE +#include + +#define PAD3 int : 32; int : 32; int : 32 +#define PAD4 int : 32; int : 32; int : 32; int : 32 + +struct LAPIC { + /* reserved */ PAD4; + /* reserved */ PAD4; + u_int32_t id; PAD3; /* 0020 R/W */ + u_int32_t version; PAD3; /* 0030 RO */ + /* reserved */ PAD4; + /* reserved */ PAD4; + /* reserved */ PAD4; + /* reserved */ PAD4; + u_int32_t tpr; PAD3; + u_int32_t apr; PAD3; + u_int32_t ppr; PAD3; + u_int32_t eoi; PAD3; + /* reserved */ PAD4; + u_int32_t ldr; PAD3; + u_int32_t dfr; PAD3; + u_int32_t svr; PAD3; + u_int32_t isr0; PAD3; + u_int32_t isr1; PAD3; + u_int32_t isr2; PAD3; + u_int32_t isr3; PAD3; + u_int32_t isr4; PAD3; + u_int32_t isr5; PAD3; + u_int32_t isr6; PAD3; + u_int32_t isr7; PAD3; + u_int32_t tmr0; PAD3; + u_int32_t tmr1; PAD3; + u_int32_t tmr2; PAD3; + u_int32_t tmr3; PAD3; + u_int32_t tmr4; PAD3; + u_int32_t tmr5; PAD3; + u_int32_t tmr6; PAD3; + u_int32_t tmr7; PAD3; + u_int32_t irr0; PAD3; + u_int32_t irr1; PAD3; + u_int32_t irr2; PAD3; + u_int32_t irr3; PAD3; + u_int32_t irr4; PAD3; + u_int32_t irr5; PAD3; + u_int32_t irr6; PAD3; + u_int32_t irr7; PAD3; + u_int32_t esr; PAD3; + /* reserved */ PAD4; + /* reserved */ PAD4; + /* reserved */ PAD4; + /* reserved */ PAD4; + /* reserved */ PAD4; + /* reserved */ PAD4; + /* reserved */ PAD4; + u_int32_t icr_lo; PAD3; + u_int32_t icr_hi; PAD3; + u_int32_t lvt_timer; PAD3; + /* reserved */ PAD4; + u_int32_t lvt_pcint; PAD3; + u_int32_t lvt_lint0; PAD3; + u_int32_t lvt_lint1; PAD3; + u_int32_t lvt_error; PAD3; + u_int32_t icr_timer; PAD3; + u_int32_t ccr_timer; PAD3; + /* reserved */ PAD4; + /* reserved */ PAD4; + /* reserved */ PAD4; + /* reserved */ PAD4; + u_int32_t dcr_timer; PAD3; + /* reserved */ PAD4; +}; + +typedef struct LAPIC lapic_t; + +/****************************************************************************** + * I/O APIC structure + */ + +struct IOAPIC { + u_int32_t ioregsel; PAD3; + u_int32_t iowin; PAD3; +}; + +typedef struct IOAPIC ioapic_t; + +#undef PAD4 +#undef PAD3 + +#endif /* !LOCORE */ + + +/****************************************************************************** + * various code 'logical' values + */ + +/* + * TPR loads to prioritize which cpu grabs an interrupt + * + * (note: some fields of the TPR are reserved) + */ +#define LOPRIO_LEVEL 0x00000010 /* TPR of CPU accepting INTs */ +#define ALLHWI_LEVEL 0x00000000 /* TPR of CPU grabbing INTs */ + +/****************************************************************************** + * LOCAL APIC defines + */ + +/* + * default physical location for the LOCAL (CPU) APIC + */ +#define DEFAULT_APIC_BASE 0xfee00000 + +/* + * lapic.id (rw) + */ +#define APIC_ID_MASK 0xff000000 +#define APIC_ID_SHIFT 24 +#define APIC_ID_CLUSTER 0xf0 +#define APIC_ID_CLUSTER_ID 0x0f +#define APIC_MAX_CLUSTER 0xe +#define APIC_MAX_INTRACLUSTER_ID 3 +#define APIC_ID_CLUSTER_SHIFT 4 + +/* + * lapic.ver (ro) + */ +#define APIC_VER_VERSION 0x000000ff +#define APIC_VER_MAXLVT 0x00ff0000 +#define MAXLVTSHIFT 16 + +/* + * lapic.ldr (rw) + */ +#define APIC_LDR_RESERVED 0x00ffffff + +/* + * lapic.dfr (rw) + * + * The logical APIC ID is used with logical interrupt + * delivery modes. Interpretation of logical destination + * information depends on the MODEL bits in the Destination + * Format Regiuster. + * + * MODEL=1111 FLAT MODEL - The MDA is interpreted as + * a decoded address. By setting + * one bit in the LDR for each + * local apic 8 APICs can coexist. + * + * MODEL=0000 CLUSTER MODEL - + */ +#define APIC_DFR_RESERVED 0x0fffffff +#define APIC_DFR_MODEL_MASK 0xf0000000 +#define APIC_DFR_MODEL_FLAT 0xf0000000 +#define APIC_DFR_MODEL_CLUSTER 0x00000000 + +/* + * lapic.svr + * + * Contains the spurious interrupt vector and bits to enable/disable + * the local apic and focus processor. + */ +#define APIC_SVR_VECTOR 0x000000ff +#define APIC_SVR_ENABLE 0x00000100 +#define APIC_SVR_FOCUS_DISABLE 0x00000200 + +/* + * lapic.tpr + * + * PRIO (7:4). Main priority. If 15 the APIC will not + * accept any interrupts. + * SUBC (3:0) Sub priority. See APR/PPR. + */ +#define APIC_TPR_PRIO 0x000000ff +#define APIC_TPR_INT 0x000000f0 +#define APIC_TPR_SUB 0x0000000f + +/* + * lapic.icr_lo -------- ----XXRR TL-SDMMM vvvvvvvv + * + * The interrupt command register. Generally speaking + * writing to ICR_LO initiates a command. All fields + * are R/W except the 'S' (delivery status) field, which + * is read-only. When + * + * XX: Destination Shorthand field: + * + * 00 - Use Destination field + * 01 - Self only. Dest field ignored. + * 10 - All including self (uses a + * destination field of 0x0F) + * 11 - All excluding self (uses a + * destination field of 0x0F) + * + * RR: RR mode (? needs documentation) + * + * T: 1 = Level 0 = Edge Trigger modde, used for + * the INIT level de-assert delivery mode only + * to de-assert a request. + * + * L: 0 = De-Assert, 1 = Assert. Always write as + * 1 when initiating a new command. Can only + * write as 0 for INIT mode de-assertion of + * command. + * + * S: 1 = Send Pending. Interrupt has been injected but the APIC + * has not yet accepted it. + * + * D: 0 = physical 1 = logical. In physical mode only bits 24-27 + * of the DEST field is used from ICR_HI. + * + * MMM: Delivery mode + * + * 000 - Fixed. Deliver to all processors according to the + * ICR. Always treated as edge triggered. + * + * 001 - Lowest Priority. Deliver to just the processor + * running at the lowest priority. + * + * 010 - SMI. The vector must be 00B. Only edge triggered + * is allowed. The vector field must be programmed to + * 0 (huh?) + * + * 011 - RR Delivery mode (?? needs documentation). + * + * 100 - NMI. Deliver as an NMI to all processors listed in + * the destination field. The vector is ignored. Always + * treated as edge triggered. + * + * 101 - INIT. Deliver as an INIT signal to all processors + * (like FIXED) according to the ICR. The vector is + * ignored and delivery is always edge-triggered. + * + * 110 - Startup. Send a special message between cpus. The + * vector contains a startup address for the MP boot + * protocol. Always edge triggered. Note: a startup + * interrupt is not automatically tried in case of failure. + * + * 111 - + */ +#define APIC_VECTOR_MASK 0x000000ff + +#define APIC_DELMODE_MASK 0x00000700 +#define APIC_DELMODE_FIXED 0x00000000 +#define APIC_DELMODE_LOWPRIO 0x00000100 +#define APIC_DELMODE_SMI 0x00000200 +#define APIC_DELMODE_RR 0x00000300 +#define APIC_DELMODE_NMI 0x00000400 +#define APIC_DELMODE_INIT 0x00000500 +#define APIC_DELMODE_STARTUP 0x00000600 +#define APIC_DELMODE_RESV7 0x00000700 + +#define APIC_DESTMODE_MASK 0x00000800 +#define APIC_DESTMODE_PHY 0x00000000 +#define APIC_DESTMODE_LOG 0x00000800 + +#define APIC_DELSTAT_MASK 0x00001000 +#define APIC_DELSTAT_IDLE 0x00000000 +#define APIC_DELSTAT_PEND 0x00001000 + +#define APIC_LEVEL_MASK 0x00004000 +#define APIC_LEVEL_DEASSERT 0x00000000 +#define APIC_LEVEL_ASSERT 0x00004000 + +#define APIC_TRIGMOD_MASK 0x00008000 +#define APIC_TRIGMOD_EDGE 0x00000000 +#define APIC_TRIGMOD_LEVEL 0x00008000 + +#define APIC_RRSTAT_MASK 0x00030000 +#define APIC_RRSTAT_INVALID 0x00000000 +#define APIC_RRSTAT_INPROG 0x00010000 +#define APIC_RRSTAT_VALID 0x00020000 +#define APIC_RRSTAT_RESV 0x00030000 + +#define APIC_DEST_MASK 0x000c0000 +#define APIC_DEST_DESTFLD 0x00000000 +#define APIC_DEST_SELF 0x00040000 +#define APIC_DEST_ALLISELF 0x00080000 +#define APIC_DEST_ALLESELF 0x000c0000 + +#define APIC_ICRLO_RESV_MASK 0xfff02000 + +/* + * lapic.icr_hi + */ +#define APIC_ICRH_ID_MASK APIC_ID_MASK + +/* + * lapic.lvt_timer + * lapic.lvt_pcint + * lapic.lvt_lint0 + * lapic.lvt_lint1 + * lapic.lvt_error + * + * +-----------+--------10-+--FEDCBA98-+-----------+ + * 0320 LTIMER | | TM | ---S---- | vector | + * 0330 | | | | | + * +-----------+--------10-+--FEDCBA98-+-----------+ + * 0340 LVPCINT | | -M | ---S-MMM | vector | + * 0350 LVINT0 | | -M | LRPS-MMM | vector | + * 0360 LVINT1 | | -M | LRPS-MMM | vector | + * 0370 LVERROR | | -M | -------- | vector | + * +-----------+-----------+-----------+-----------+ + * + * T: 1 = periodic, 0 = one-shot + * (LTIMER only) + * + * M: 1 = masked + * + * L: 1 = level, 0 = edge + * (LVINT0/1 only) + * + * R: For level triggered only, set to 1 when a + * level int is accepted, cleared by EOI. + * (LVINT0/1 only) + * + * P: Pin Polarity 0 = Active High, 1 = Active Low + * (LVINT0/1 only) + * + * S: 1 = Send Pending. Interrupt has been injected + * but APIC has not yet accepted it. + * + * MMM 000 = Fixed deliver to cpu according to LVT + * + * MMM 100 = NMI deliver as an NMI. Always edge + * + * MMM 111 = ExtInt deliver from 8259, routes INTA + * bus cycle to external + * controller. Controller is + * expected to supply vector. + * Always level. + */ +#define APIC_LVT_VECTOR 0x000000ff + +#define APIC_LVT_DM_MASK 0x00000700 +#define APIC_LVT_DM_FIXED 0x00000000 +#define APIC_LVT_DM_NMI 0x00000400 +#define APIC_LVT_DM_EXTINT 0x00000700 + +#define APIC_LVT_DS 0x00001000 /* (S) Send Pending */ +#define APIC_LVT_POLARITY_MASK 0x00002000 +#define APIC_LVT_POLARITY_LO 0x00002000 /* (P) Pin Polarity */ +#define APIC_LVT_POLARITY_HI 0x00000000 +#define APIC_LVT_LEVELSTATUS 0x00004000 /* (R) level trig status */ +#define APIC_LVT_TRIG_MASK 0x00008000 +#define APIC_LVT_LEVELTRIG 0x00008000 /* (L) 1 = level, 0 = edge */ +#define APIC_LVT_MASKED 0x00010000 /* (M) 1 = masked */ + +/* + * lapic.lvt_timer + */ +#define APIC_LVTT_VECTOR APIC_LVT_VECTOR +#define APIC_LVTT_DS APIC_LVT_DS +#define APIC_LVTT_MASKED APIC_LVT_MASKED +#define APIC_LVTT_PERIODIC 0x00020000 + +#define APIC_TIMER_MAX_COUNT 0xffffffff + +/* + * lapic.icr_timer - initial count register (32 bits) + * lapic.ccr_timer - current count register (32 bits) + */ + +/* + * lapic.dcr_timer - timer divider register + * + * d0dd + * + * 0000 - divide by 2 + * 0001 - divide by 4 + * 0010 - divide by 8 + * 0011 - divide by 16 + * 1000 - divide by 32 + * 1001 - divide by 64 + * 1010 - divide by 128 + * 1011 - divide by 1 + */ +#define APIC_TDCR_2 0x00 +#define APIC_TDCR_4 0x01 +#define APIC_TDCR_8 0x02 +#define APIC_TDCR_16 0x03 +#define APIC_TDCR_32 0x08 +#define APIC_TDCR_64 0x09 +#define APIC_TDCR_128 0x0a +#define APIC_TDCR_1 0x0b + +/* + * fields in IRR + * ISA INTerrupts are in bits 16-31 of the 1st IRR register. + * these masks DON'T EQUAL the isa IRQs of the same name. + */ +#define APIC_IRQ0 0 +#define APIC_IRQ1 1 +#define APIC_IRQ2 2 +#define APIC_IRQ3 3 +#define APIC_IRQ4 4 +#define APIC_IRQ5 5 +#define APIC_IRQ6 6 +#define APIC_IRQ7 7 +#define APIC_IRQ8 8 +#define APIC_IRQ9 9 +#define APIC_IRQ10 10 +#define APIC_IRQ11 11 +#define APIC_IRQ12 12 +#define APIC_IRQ13 13 +#define APIC_IRQ14 14 +#define APIC_IRQ15 15 +#define APIC_IRQ16 16 +#define APIC_IRQ17 17 +#define APIC_IRQ18 18 +#define APIC_IRQ19 19 +#define APIC_IRQ20 20 +#define APIC_IRQ21 21 +#define APIC_IRQ22 22 +#define APIC_IRQ23 23 + +/****************************************************************************** + * I/O APIC defines + */ + +/* default physical locations of an IO APIC */ +#define DEFAULT_IO_APIC_BASE 0xfec00000 + +/* window register offset */ +#define IOAPIC_WINDOW 0x10 + +/* + * indexes into IO APIC (index into array of 32 bit entities) + */ +#define IOAPIC_ID 0x00 +#define IOAPIC_VER 0x01 +#define IOAPIC_ARB 0x02 +#define IOAPIC_REDTBL 0x10 +#define IOAPIC_REDTBL0 IOAPIC_REDTBL +#define IOAPIC_REDTBL1 (IOAPIC_REDTBL+0x02) +#define IOAPIC_REDTBL2 (IOAPIC_REDTBL+0x04) +#define IOAPIC_REDTBL3 (IOAPIC_REDTBL+0x06) +#define IOAPIC_REDTBL4 (IOAPIC_REDTBL+0x08) +#define IOAPIC_REDTBL5 (IOAPIC_REDTBL+0x0a) +#define IOAPIC_REDTBL6 (IOAPIC_REDTBL+0x0c) +#define IOAPIC_REDTBL7 (IOAPIC_REDTBL+0x0e) +#define IOAPIC_REDTBL8 (IOAPIC_REDTBL+0x10) +#define IOAPIC_REDTBL9 (IOAPIC_REDTBL+0x12) +#define IOAPIC_REDTBL10 (IOAPIC_REDTBL+0x14) +#define IOAPIC_REDTBL11 (IOAPIC_REDTBL+0x16) +#define IOAPIC_REDTBL12 (IOAPIC_REDTBL+0x18) +#define IOAPIC_REDTBL13 (IOAPIC_REDTBL+0x1a) +#define IOAPIC_REDTBL14 (IOAPIC_REDTBL+0x1c) +#define IOAPIC_REDTBL15 (IOAPIC_REDTBL+0x1e) +#define IOAPIC_REDTBL16 (IOAPIC_REDTBL+0x20) +#define IOAPIC_REDTBL17 (IOAPIC_REDTBL+0x22) +#define IOAPIC_REDTBL18 (IOAPIC_REDTBL+0x24) +#define IOAPIC_REDTBL19 (IOAPIC_REDTBL+0x26) +#define IOAPIC_REDTBL20 (IOAPIC_REDTBL+0x28) +#define IOAPIC_REDTBL21 (IOAPIC_REDTBL+0x2a) +#define IOAPIC_REDTBL22 (IOAPIC_REDTBL+0x2c) +#define IOAPIC_REDTBL23 (IOAPIC_REDTBL+0x2e) + +/* fields in VER */ +#define IOART_VER_VERSION 0x000000ff +#define IOART_VER_MAXREDIR 0x00ff0000 +#define MAXREDIRSHIFT 16 + +/* + * fields in the IO APIC's redirection table entries + */ + +/* + * High 32 bit word. The high 8 bits contain the destination field. + * + * If this entry is set up for Physical Mode, bits 59:56 (the low 4 bits + * of the 8 bit destination field) contain an APIC ID. + * + * If this entry is set up for Logical Mode, the destination field potentially + * defines a set of processors. Bits 63:56 (all 8 bits) specify the logical + * destination address. + * + * Current we use IOART_HI_DEST_BROADCAST to broadcast to all LAPICs + */ +#define IOART_HI_DEST_MASK APIC_ID_MASK +#define IOART_HI_DEST_RESV ~APIC_ID_MASK +#define IOART_HI_DEST_BROADCAST IOART_HI_DEST_MASK + +/* + * Low 32 bit word + */ +#define IOART_RESV 0x00fe0000 /* reserved */ + +/* + * Interrupt mask bit. If 1 the interrupt is masked. An edge sensitive + * interrupt which is masked will be lost. + */ +#define IOART_INTMASK 0x00010000 /* R/W: INTerrupt mask */ +#define IOART_INTMCLR 0x00000000 /* clear, allow INTs */ +#define IOART_INTMSET 0x00010000 /* set, inhibit INTs */ + +/* + * Select trigger mode. + */ +#define IOART_TRGRMOD 0x00008000 /* R/W: trigger mode */ +#define IOART_TRGREDG 0x00000000 /* edge */ +#define IOART_TRGRLVL 0x00008000 /* level */ + +/* + * Remote IRR. Only applies to level triggered interrupts, this bit + * is set to 1 when the IOAPIC has delivered a level triggered interrupt + * to a local APIC. It is cleared when the LAPIC EOI's the interrupt. + * This field is read-only. + */ +#define IOART_REM_IRR 0x00004000 /* RO: remote IRR */ + +/* + * Select interrupt pin polarity + */ +#define IOART_INTPOL 0x00002000 /* R/W: INT input pin polarity */ +#define IOART_INTAHI 0x00000000 /* active high */ +#define IOART_INTALO 0x00002000 /* active low */ + +/* + * Delivery Status (read only). 0 = no interrupt pending, 1 = interrupt + * pending for tranmission to an LAPIC. Note that this bit does not + * indicate whether the interrupt has been processed or is undergoing + * processing by a cpu. + */ +#define IOART_DELIVS 0x00001000 /* RO: delivery status */ + +/* + * Destination mode. + * + * In physical mode the destination APIC is identified by its ID. + * Bits 56-59 specify the 4 bit APIC ID. + * + * In logical mode destinations are identified by matching on the logical + * destination under the control of the destination format register and + * logical destination register in each local APIC. + * + */ +#define IOART_DESTMOD 0x00000800 /* R/W: destination mode */ +#define IOART_DESTPHY 0x00000000 /* physical */ +#define IOART_DESTLOG 0x00000800 /* logical */ + +/* + * Delivery mode. + * + * 000 Fixed Deliver the signal on the INTR signal for + * all processor core's LAPICs listed in the + * destination. The trigger mode may be + * edge or level. + * + * 001 Lowest Pri Deliver to the processor core whos LAPIC + * is operating at the lowest priority (TPR). + * The trigger mode may be edge or level. + * + * 010 SMI System management interrupt. the vector + * information is ignored but must be programmed + * to all zero's for future compatibility. + * Must be edge triggered. + * + * 011 Reserved + * + * 100 NMI Deliver on the NMI signal for all cpu cores + * listed in the destination. Vector information + * is ignored. NMIs are treated as edge triggered + * interrupts even if programmed as level + * triggered. For proper operation the pin must + * be programmed as an edge trigger. + * + * 101 INIT Deliver to all processor cores listed in + * the destination by asserting their INIT signal. + * All addressed LAPICs will assume their INIT + * state. Always treated as edge-triggered even + * if programmed as level. For proper operation + * the pin must be programed as an edge trigger. + * + * 110 Reserved + * + * 111 ExINT Deliver as an INTR signal to all processor + * cores listed in the destination as an + * interrupt originating in an externally + * connected interrupt controller. + * The INTA cycle corresponding to this ExINT + * will be routed to the external controller + * that is expected to supply the vector. + * Must be edge triggered. + * + */ +#define IOART_DELMOD 0x00000700 /* R/W: delivery mode */ +#define IOART_DELFIXED 0x00000000 /* fixed */ +#define IOART_DELLOPRI 0x00000100 /* lowest priority */ +#define IOART_DELSMI 0x00000200 /* System Management INT */ +#define IOART_DELRSV1 0x00000300 /* reserved */ +#define IOART_DELNMI 0x00000400 /* NMI signal */ +#define IOART_DELINIT 0x00000500 /* INIT signal */ +#define IOART_DELRSV2 0x00000600 /* reserved */ +#define IOART_DELEXINT 0x00000700 /* External INTerrupt */ + +/* + * The interrupt vector. Valid values range from 0x10 to 0xFE. + */ +#define IOART_INTVEC 0x000000ff /* R/W: INTerrupt vector field */ + +#endif /* _MACHINE_APIC_H_ */ diff --git a/sys/platform/pc64/apic/apicvar.h b/sys/platform/pc64/apic/apicvar.h new file mode 100644 index 0000000000..5140b84375 --- /dev/null +++ b/sys/platform/pc64/apic/apicvar.h @@ -0,0 +1,154 @@ +/*- + * Copyright (c) 2003 John Baldwin + * Copyright (c) 2008 The DragonFly Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the author nor the names of any co-contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/sys/i386/include/apicvar.h,v 1.5 2003/11/14 22:21:30 peter Exp $ + * $DragonFly: src/sys/platform/pc64/apic/apicvar.h,v 1.1 2008/08/29 17:07:12 dillon Exp $ + */ + +#ifndef _MACHINE_APICVAR_H_ +#define _MACHINE_APICVAR_H_ + +/* + * Local && I/O APIC variable definitions. + */ + +/* + * Layout of local APIC interrupt vectors: + * + * 0xff (255) +-------------+ + * | | 15 (Spurious / IPIs / Local Interrupts) + * 0xf0 (240) +-------------+ + * | | 14 (I/O Interrupts) + * 0xe0 (224) +-------------+ + * | | 13 (I/O Interrupts) + * 0xd0 (208) +-------------+ + * | | 12 (I/O Interrupts) + * 0xc0 (192) +-------------+ + * | | 11 (I/O Interrupts) + * 0xb0 (176) +-------------+ + * | | 10 (I/O Interrupts) + * 0xa0 (160) +-------------+ + * | | 9 (I/O Interrupts) + * 0x90 (144) +-------------+ + * | | 8 (I/O Interrupts / System Calls) + * 0x80 (128) +-------------+ + * | | 7 (I/O Interrupts) + * 0x70 (112) +-------------+ + * | | 6 (I/O Interrupts) + * 0x60 (96) +-------------+ + * | | 5 (I/O Interrupts) + * 0x50 (80) +-------------+ + * | | 4 (I/O Interrupts) + * 0x40 (64) +-------------+ + * | | 3 (I/O Interrupts) + * 0x30 (48) +-------------+ + * | | 2 (ATPIC Interrupts) + * 0x20 (32) +-------------+ + * | | 1 (Exceptions, traps, faults, etc.) + * 0x10 (16) +-------------+ + * | | 0 (Exceptions, traps, faults, etc.) + * 0x00 (0) +-------------+ + * + * Note: 0x80 needs to be handled specially and not allocated to an + * I/O device! + */ + +#define APIC_ID_ALL 0xff +#define APIC_IO_INTS (IDT_IO_INTS + 16) +#define APIC_NUM_IOINTS 192 + +#define APIC_LOCAL_INTS 240 + +/* XXX put APIC interrupt and IPI assignments here */ + +#define APIC_SPURIOUS_INT 255 + +#define LVT_LINT0 0 +#define LVT_LINT1 1 +#define LVT_TIMER 2 +#define LVT_ERROR 3 +#define LVT_PMC 4 +#define LVT_THERMAL 5 +#define LVT_MAX LVT_THERMAL + +#ifndef LOCORE + +#define APIC_IPI_DEST_SELF -1 +#define APIC_IPI_DEST_ALL -2 +#define APIC_IPI_DEST_OTHERS -3 + +/* + * An APIC enumerator is a psuedo bus driver that enumerates APIC's including + * CPU's and I/O APIC's. + */ +struct apic_enumerator { + const char *apic_name; + int (*apic_probe)(void); + int (*apic_probe_cpus)(void); + int (*apic_setup_local)(void); + int (*apic_setup_io)(void); + SLIST_ENTRY(apic_enumerator) apic_next; +}; + +u_int apic_irq_to_idt(u_int irq); +u_int apic_idt_to_irq(u_int vector); +void apic_register_enumerator(struct apic_enumerator *enumerator); +void *ioapic_create(uintptr_t addr, int32_t id, int intbase); +int ioapic_disable_pin(void *cookie, u_int pin); +void ioapic_enable_mixed_mode(void); +int ioapic_get_vector(void *cookie, u_int pin); +int ioapic_next_logical_cluster(void); +void ioapic_register(void *cookie); +int ioapic_remap_vector(void *cookie, u_int pin, int vector); +int ioapic_set_extint(void *cookie, u_int pin); +int ioapic_set_nmi(void *cookie, u_int pin); +int ioapic_set_polarity(void *cookie, u_int pin, char activehi); +int ioapic_set_triggermode(void *cookie, u_int pin, char edgetrigger); +int ioapic_set_smi(void *cookie, u_int pin); +void lapic_create(u_int apic_id, int boot_cpu); +void lapic_disable(void); +void lapic_dump(const char *str); +void lapic_enable_intr(u_int vector); +void lapic_eoi(void); +int lapic_id(void); +void lapic_init(uintptr_t addr); +int lapic_intr_pending(u_int vector); +void lapic_ipi_raw(register_t icrlo, u_int dest); +void lapic_ipi_vectored(u_int vector, int dest); +int lapic_ipi_wait(int delay); +void lapic_handle_intr(struct intrframe *frame); +void lapic_set_logical_id(u_int apic_id, u_int cluster, u_int cluster_id); +int lapic_set_lvt_mask(u_int apic_id, u_int lvt, u_char masked); +int lapic_set_lvt_mode(u_int apic_id, u_int lvt, u_int32_t mode); +int lapic_set_lvt_polarity(u_int apic_id, u_int lvt, u_char activehi); +int lapic_set_lvt_triggermode(u_int apic_id, u_int lvt, u_char edgetrigger); +void lapic_setup(void); + +#endif /* !LOCORE */ +#endif /* _MACHINE_APICVAR_H_ */ diff --git a/sys/platform/pc64/apic/mpapic.c b/sys/platform/pc64/apic/mpapic.c new file mode 100644 index 0000000000..173d7b1c54 --- /dev/null +++ b/sys/platform/pc64/apic/mpapic.c @@ -0,0 +1,772 @@ +/* + * Copyright (c) 1996, by Steve Passe + * Copyright (c) 2008 The DragonFly Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. The name of the developer may NOT be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/sys/i386/i386/mpapic.c,v 1.37.2.7 2003/01/25 02:31:47 peter Exp $ + * $DragonFly: src/sys/platform/pc64/apic/mpapic.c,v 1.1 2008/08/29 17:07:12 dillon Exp $ + */ + +#include +#include +#include +#include +#include +#include +#include + +#include /* Xspuriousint() */ + +/* EISA Edge/Level trigger control registers */ +#define ELCR0 0x4d0 /* eisa irq 0-7 */ +#define ELCR1 0x4d1 /* eisa irq 8-15 */ + +/* + * pointers to pmapped apic hardware. + */ + +volatile ioapic_t **ioapic; + +/* + * Enable APIC, configure interrupts. + */ +void +apic_initialize(void) +{ + u_int temp; + + /* + * setup LVT1 as ExtINT on the BSP. This is theoretically an + * aggregate interrupt input from the 8259. The INTA cycle + * will be routed to the external controller (the 8259) which + * is expected to supply the vector. + * + * Must be setup edge triggered, active high. + * + * Disable LVT1 on the APs. It doesn't matter what delivery + * mode we use because we leave it masked. + */ + temp = lapic.lvt_lint0; + temp &= ~(APIC_LVT_MASKED | APIC_LVT_TRIG_MASK | + APIC_LVT_POLARITY_MASK | APIC_LVT_DM_MASK); + if (mycpu->gd_cpuid == 0) + temp |= APIC_LVT_DM_EXTINT; + else + temp |= APIC_LVT_DM_FIXED | APIC_LVT_MASKED; + lapic.lvt_lint0 = temp; + + /* + * setup LVT2 as NMI, masked till later. Edge trigger, active high. + */ + temp = lapic.lvt_lint1; + temp &= ~(APIC_LVT_MASKED | APIC_LVT_TRIG_MASK | + APIC_LVT_POLARITY_MASK | APIC_LVT_DM_MASK); + temp |= APIC_LVT_MASKED | APIC_LVT_DM_NMI; + lapic.lvt_lint1 = temp; + + /* + * Mask the apic error interrupt, apic performance counter + * interrupt, and the apic timer interrupt. + */ + lapic.lvt_error = lapic.lvt_error | APIC_LVT_MASKED; + lapic.lvt_pcint = lapic.lvt_pcint | APIC_LVT_MASKED; + lapic.lvt_timer = lapic.lvt_timer | APIC_LVT_MASKED; + + /* + * Set the Task Priority Register as needed. At the moment allow + * interrupts on all cpus (the APs will remain CLId until they are + * ready to deal). We could disable all but IPIs by setting + * temp |= TPR_IPI_ONLY for cpu != 0. + */ + temp = lapic.tpr; + temp &= ~APIC_TPR_PRIO; /* clear priority field */ +#ifndef APIC_IO + /* + * If we are NOT running the IO APICs, the LAPIC will only be used + * for IPIs. Set the TPR to prevent any unintentional interrupts. + */ + temp |= TPR_IPI_ONLY; +#endif + + lapic.tpr = temp; + + /* + * enable the local APIC + */ + temp = lapic.svr; + temp |= APIC_SVR_ENABLE; /* enable the APIC */ + temp &= ~APIC_SVR_FOCUS_DISABLE; /* enable lopri focus processor */ + + /* + * Set the spurious interrupt vector. The low 4 bits of the vector + * must be 1111. + */ + if ((XSPURIOUSINT_OFFSET & 0x0F) != 0x0F) + panic("bad XSPURIOUSINT_OFFSET: 0x%08x", XSPURIOUSINT_OFFSET); + temp &= ~APIC_SVR_VECTOR; + temp |= XSPURIOUSINT_OFFSET; + + lapic.svr = temp; + + /* + * Pump out a few EOIs to clean out interrupts that got through + * before we were able to set the TPR. + */ + lapic.eoi = 0; + lapic.eoi = 0; + lapic.eoi = 0; + + if (bootverbose) + apic_dump("apic_initialize()"); +} + + +/* + * dump contents of local APIC registers + */ +void +apic_dump(char* str) +{ + kprintf("SMP: CPU%d %s:\n", mycpu->gd_cpuid, str); + kprintf(" lint0: 0x%08x lint1: 0x%08x TPR: 0x%08x SVR: 0x%08x\n", + lapic.lvt_lint0, lapic.lvt_lint1, lapic.tpr, lapic.svr); +} + + +#if defined(APIC_IO) + +/* + * IO APIC code, + */ + +#define IOAPIC_ISA_INTS 16 +#define REDIRCNT_IOAPIC(A) \ + ((int)((io_apic_versions[(A)] & IOART_VER_MAXREDIR) >> MAXREDIRSHIFT) + 1) + +static int trigger (int apic, int pin, u_int32_t * flags); +static void polarity (int apic, int pin, u_int32_t * flags, int level); + +#define DEFAULT_FLAGS \ + ((u_int32_t) \ + (IOART_INTMSET | \ + IOART_DESTPHY | \ + IOART_DELLOPRI)) + +#define DEFAULT_ISA_FLAGS \ + ((u_int32_t) \ + (IOART_INTMSET | \ + IOART_TRGREDG | \ + IOART_INTAHI | \ + IOART_DESTPHY | \ + IOART_DELLOPRI)) + +void +io_apic_set_id(int apic, int id) +{ + u_int32_t ux; + + ux = io_apic_read(apic, IOAPIC_ID); /* get current contents */ + if (((ux & APIC_ID_MASK) >> 24) != id) { + kprintf("Changing APIC ID for IO APIC #%d" + " from %d to %d on chip\n", + apic, ((ux & APIC_ID_MASK) >> 24), id); + ux &= ~APIC_ID_MASK; /* clear the ID field */ + ux |= (id << 24); + io_apic_write(apic, IOAPIC_ID, ux); /* write new value */ + ux = io_apic_read(apic, IOAPIC_ID); /* re-read && test */ + if (((ux & APIC_ID_MASK) >> 24) != id) + panic("can't control IO APIC #%d ID, reg: 0x%08x", + apic, ux); + } +} + + +int +io_apic_get_id(int apic) +{ + return (io_apic_read(apic, IOAPIC_ID) & APIC_ID_MASK) >> 24; +} + + + +/* + * Setup the IO APIC. + */ + +extern int apic_pin_trigger; /* 'opaque' */ + +void +io_apic_setup_intpin(int apic, int pin) +{ + int bus, bustype, irq; + u_char select; /* the select register is 8 bits */ + u_int32_t flags; /* the window register is 32 bits */ + u_int32_t target; /* the window register is 32 bits */ + u_int32_t vector; /* the window register is 32 bits */ + int level; + + select = pin * 2 + IOAPIC_REDTBL0; /* register */ + + /* + * Always clear an IO APIC pin before [re]programming it. This is + * particularly important if the pin is set up for a level interrupt + * as the IOART_REM_IRR bit might be set. When we reprogram the + * vector any EOI from pending ints on this pin could be lost and + * IRR might never get reset. + * + * To fix this problem, clear the vector and make sure it is + * programmed as an edge interrupt. This should theoretically + * clear IRR so we can later, safely program it as a level + * interrupt. + */ + imen_lock(); + + flags = io_apic_read(apic, select) & IOART_RESV; + flags |= IOART_INTMSET | IOART_TRGREDG | IOART_INTAHI; + flags |= IOART_DESTPHY | IOART_DELFIXED; + + target = io_apic_read(apic, select + 1) & IOART_HI_DEST_RESV; + target |= 0; /* fixed mode cpu mask of 0 - don't deliver anywhere */ + + vector = 0; + + io_apic_write(apic, select, flags | vector); + io_apic_write(apic, select + 1, target); + + imen_unlock(); + + /* + * We only deal with vectored interrupts here. ? documentation is + * lacking, I'm guessing an interrupt type of 0 is the 'INT' type, + * vs ExTINT, etc. + * + * This test also catches unconfigured pins. + */ + if (apic_int_type(apic, pin) != 0) + return; + + /* + * Leave the pin unprogrammed if it does not correspond to + * an IRQ. + */ + irq = apic_irq(apic, pin); + if (irq < 0) + return; + + /* determine the bus type for this pin */ + bus = apic_src_bus_id(apic, pin); + if (bus < 0) + return; + bustype = apic_bus_type(bus); + + if ((bustype == ISA) && + (pin < IOAPIC_ISA_INTS) && + (irq == pin) && + (apic_polarity(apic, pin) == 0x1) && + (apic_trigger(apic, pin) == 0x3)) { + /* + * A broken BIOS might describe some ISA + * interrupts as active-high level-triggered. + * Use default ISA flags for those interrupts. + */ + flags = DEFAULT_ISA_FLAGS; + } else { + /* + * Program polarity and trigger mode according to + * interrupt entry. + */ + flags = DEFAULT_FLAGS; + level = trigger(apic, pin, &flags); + if (level == 1) + apic_pin_trigger |= (1 << irq); + polarity(apic, pin, &flags, level); + } + + if (bootverbose) { + kprintf("IOAPIC #%d intpin %d -> irq %d\n", + apic, pin, irq); + } + + /* + * Program the appropriate registers. This routing may be + * overridden when an interrupt handler for a device is + * actually added (see register_int(), which calls through + * the MACHINTR ABI to set up an interrupt handler/vector). + * + * The order in which we must program the two registers for + * safety is unclear! XXX + */ + imen_lock(); + + vector = IDT_OFFSET + irq; /* IDT vec */ + target = io_apic_read(apic, select + 1) & IOART_HI_DEST_RESV; + target |= IOART_HI_DEST_BROADCAST; + flags |= io_apic_read(apic, select) & IOART_RESV; + io_apic_write(apic, select, flags | vector); + io_apic_write(apic, select + 1, target); + + imen_unlock(); +} + +int +io_apic_setup(int apic) +{ + int maxpin; + int pin; + + if (apic == 0) + apic_pin_trigger = 0; /* default to edge-triggered */ + + maxpin = REDIRCNT_IOAPIC(apic); /* pins in APIC */ + kprintf("Programming %d pins in IOAPIC #%d\n", maxpin, apic); + + for (pin = 0; pin < maxpin; ++pin) { + io_apic_setup_intpin(apic, pin); + } + while (pin < 32) { + if (apic_int_type(apic, pin) >= 0) { + kprintf("Warning: IOAPIC #%d pin %d does not exist," + " cannot program!\n", apic, pin); + } + ++pin; + } + + /* return GOOD status */ + return 0; +} +#undef DEFAULT_ISA_FLAGS +#undef DEFAULT_FLAGS + + +#define DEFAULT_EXTINT_FLAGS \ + ((u_int32_t) \ + (IOART_INTMSET | \ + IOART_TRGREDG | \ + IOART_INTAHI | \ + IOART_DESTPHY | \ + IOART_DELLOPRI)) + +/* + * Setup the source of External INTerrupts. + */ +int +ext_int_setup(int apic, int intr) +{ + u_char select; /* the select register is 8 bits */ + u_int32_t flags; /* the window register is 32 bits */ + u_int32_t target; /* the window register is 32 bits */ + u_int32_t vector; /* the window register is 32 bits */ + + if (apic_int_type(apic, intr) != 3) + return -1; + + target = IOART_HI_DEST_BROADCAST; + select = IOAPIC_REDTBL0 + (2 * intr); + vector = IDT_OFFSET + intr; + flags = DEFAULT_EXTINT_FLAGS; + + io_apic_write(apic, select, flags | vector); + io_apic_write(apic, select + 1, target); + + return 0; +} +#undef DEFAULT_EXTINT_FLAGS + + +/* + * Set the trigger level for an IO APIC pin. + */ +static int +trigger(int apic, int pin, u_int32_t * flags) +{ + int id; + int eirq; + int level; + static int intcontrol = -1; + + switch (apic_trigger(apic, pin)) { + + case 0x00: + break; + + case 0x01: + *flags &= ~IOART_TRGRLVL; /* *flags |= IOART_TRGREDG */ + return 0; + + case 0x03: + *flags |= IOART_TRGRLVL; + return 1; + + case -1: + default: + goto bad; + } + + if ((id = apic_src_bus_id(apic, pin)) == -1) + goto bad; + + switch (apic_bus_type(id)) { + case ISA: + *flags &= ~IOART_TRGRLVL; /* *flags |= IOART_TRGREDG; */ + return 0; + + case EISA: + eirq = apic_src_bus_irq(apic, pin); + + if (eirq < 0 || eirq > 15) { + kprintf("EISA IRQ %d?!?!\n", eirq); + goto bad; + } + + if (intcontrol == -1) { + intcontrol = inb(ELCR1) << 8; + intcontrol |= inb(ELCR0); + kprintf("EISA INTCONTROL = %08x\n", intcontrol); + } + + /* Use ELCR settings to determine level or edge mode */ + level = (intcontrol >> eirq) & 1; + + /* + * Note that on older Neptune chipset based systems, any + * pci interrupts often show up here and in the ELCR as well + * as level sensitive interrupts attributed to the EISA bus. + */ + + if (level) + *flags |= IOART_TRGRLVL; + else + *flags &= ~IOART_TRGRLVL; + + return level; + + case PCI: + *flags |= IOART_TRGRLVL; + return 1; + + case -1: + default: + goto bad; + } + +bad: + panic("bad APIC IO INT flags"); +} + + +/* + * Set the polarity value for an IO APIC pin. + */ +static void +polarity(int apic, int pin, u_int32_t * flags, int level) +{ + int id; + + switch (apic_polarity(apic, pin)) { + + case 0x00: + break; + + case 0x01: + *flags &= ~IOART_INTALO; /* *flags |= IOART_INTAHI */ + return; + + case 0x03: + *flags |= IOART_INTALO; + return; + + case -1: + default: + goto bad; + } + + if ((id = apic_src_bus_id(apic, pin)) == -1) + goto bad; + + switch (apic_bus_type(id)) { + case ISA: + *flags &= ~IOART_INTALO; /* *flags |= IOART_INTAHI */ + return; + + case EISA: + /* polarity converter always gives active high */ + *flags &= ~IOART_INTALO; + return; + + case PCI: + *flags |= IOART_INTALO; + return; + + case -1: + default: + goto bad; + } + +bad: + panic("bad APIC IO INT flags"); +} + + +/* + * Print contents of apic_imen. + */ +extern u_int apic_imen; /* keep apic_imen 'opaque' */ +void +imen_dump(void) +{ + int x; + + kprintf("SMP: enabled INTs: "); + for (x = 0; x < 24; ++x) + if ((apic_imen & (1 << x)) == 0) + kprintf("%d, ", x); + kprintf("apic_imen: 0x%08x\n", apic_imen); +} + + +/* + * Inter Processor Interrupt functions. + */ + +#endif /* APIC_IO */ + +/* + * Send APIC IPI 'vector' to 'destType' via 'deliveryMode'. + * + * destType is 1 of: APIC_DEST_SELF, APIC_DEST_ALLISELF, APIC_DEST_ALLESELF + * vector is any valid SYSTEM INT vector + * delivery_mode is 1 of: APIC_DELMODE_FIXED, APIC_DELMODE_LOWPRIO + * + * A backlog of requests can create a deadlock between cpus. To avoid this + * we have to be able to accept IPIs at the same time we are trying to send + * them. The critical section prevents us from attempting to send additional + * IPIs reentrantly, but also prevents IPIQ processing so we have to call + * lwkt_process_ipiq() manually. It's rather messy and expensive for this + * to occur but fortunately it does not happen too often. + */ +int +apic_ipi(int dest_type, int vector, int delivery_mode) +{ + u_long icr_lo; + + crit_enter(); + if ((lapic.icr_lo & APIC_DELSTAT_MASK) != 0) { + unsigned int eflags = read_eflags(); + cpu_enable_intr(); + while ((lapic.icr_lo & APIC_DELSTAT_MASK) != 0) { + lwkt_process_ipiq(); + } + write_eflags(eflags); + } + + icr_lo = (lapic.icr_lo & APIC_ICRLO_RESV_MASK) | dest_type | + delivery_mode | vector; + lapic.icr_lo = icr_lo; + crit_exit(); + return 0; +} + +void +single_apic_ipi(int cpu, int vector, int delivery_mode) +{ + u_long icr_lo; + u_long icr_hi; + + crit_enter(); + if ((lapic.icr_lo & APIC_DELSTAT_MASK) != 0) { + unsigned int eflags = read_eflags(); + cpu_enable_intr(); + while ((lapic.icr_lo & APIC_DELSTAT_MASK) != 0) { + lwkt_process_ipiq(); + } + write_eflags(eflags); + } + icr_hi = lapic.icr_hi & ~APIC_ID_MASK; + icr_hi |= (CPU_TO_ID(cpu) << 24); + lapic.icr_hi = icr_hi; + + /* build IRC_LOW */ + icr_lo = (lapic.icr_lo & APIC_ICRLO_RESV_MASK) + | APIC_DEST_DESTFLD | delivery_mode | vector; + + /* write APIC ICR */ + lapic.icr_lo = icr_lo; + crit_exit(); +} + +#if 0 + +/* + * Returns 0 if the apic is busy, 1 if we were able to queue the request. + * + * NOT WORKING YET! The code as-is may end up not queueing an IPI at all + * to the target, and the scheduler does not 'poll' for IPI messages. + */ +int +single_apic_ipi_passive(int cpu, int vector, int delivery_mode) +{ + u_long icr_lo; + u_long icr_hi; + + crit_enter(); + if ((lapic.icr_lo & APIC_DELSTAT_MASK) != 0) { + crit_exit(); + return(0); + } + icr_hi = lapic.icr_hi & ~APIC_ID_MASK; + icr_hi |= (CPU_TO_ID(cpu) << 24); + lapic.icr_hi = icr_hi; + + /* build IRC_LOW */ + icr_lo = (lapic.icr_lo & APIC_RESV2_MASK) + | APIC_DEST_DESTFLD | delivery_mode | vector; + + /* write APIC ICR */ + lapic.icr_lo = icr_lo; + crit_exit(); + return(1); +} + +#endif + +/* + * Send APIC IPI 'vector' to 'target's via 'delivery_mode'. + * + * target is a bitmask of destination cpus. Vector is any + * valid system INT vector. Delivery mode may be either + * APIC_DELMODE_FIXED or APIC_DELMODE_LOWPRIO. + */ +void +selected_apic_ipi(u_int target, int vector, int delivery_mode) +{ + crit_enter(); + while (target) { + int n = bsfl(target); + target &= ~(1 << n); + single_apic_ipi(n, vector, delivery_mode); + } + crit_exit(); +} + +/* + * Timer code, in development... + * - suggested by rgrimes@gndrsh.aac.dev.com + */ + +/** XXX FIXME: temp hack till we can determin bus clock */ +#ifndef BUS_CLOCK +#define BUS_CLOCK 66000000 +#define bus_clock() 66000000 +#endif + +#if defined(READY) +int acquire_apic_timer (void); +int release_apic_timer (void); + +/* + * Acquire the APIC timer for exclusive use. + */ +int +acquire_apic_timer(void) +{ +#if 1 + return 0; +#else + /** XXX FIXME: make this really do something */ + panic("APIC timer in use when attempting to acquire"); +#endif +} + + +/* + * Return the APIC timer. + */ +int +release_apic_timer(void) +{ +#if 1 + return 0; +#else + /** XXX FIXME: make this really do something */ + panic("APIC timer was already released"); +#endif +} +#endif /* READY */ + + +/* + * Load a 'downcount time' in uSeconds. + */ +void +set_apic_timer(int value) +{ + u_long lvtt; + long ticks_per_microsec; + + /* + * Calculate divisor and count from value: + * + * timeBase == CPU bus clock divisor == [1,2,4,8,16,32,64,128] + * value == time in uS + */ + lapic.dcr_timer = APIC_TDCR_1; + ticks_per_microsec = bus_clock() / 1000000; + + /* configure timer as one-shot */ + lvtt = lapic.lvt_timer; + lvtt &= ~(APIC_LVTT_VECTOR | APIC_LVTT_DS); + lvtt &= ~(APIC_LVTT_PERIODIC); + lvtt |= APIC_LVTT_MASKED; /* no INT, one-shot */ + lapic.lvt_timer = lvtt; + + /* */ + lapic.icr_timer = value * ticks_per_microsec; +} + + +/* + * Read remaining time in timer. + */ +int +read_apic_timer(void) +{ +#if 0 + /** XXX FIXME: we need to return the actual remaining time, + * for now we just return the remaining count. + */ +#else + return lapic.ccr_timer; +#endif +} + + +/* + * Spin-style delay, set delay time in uS, spin till it drains. + */ +void +u_sleep(int count) +{ + set_apic_timer(count); + while (read_apic_timer()) + /* spin */ ; +} diff --git a/sys/platform/pc64/apic/mpapic.h b/sys/platform/pc64/apic/mpapic.h new file mode 100644 index 0000000000..b50a4040bf --- /dev/null +++ b/sys/platform/pc64/apic/mpapic.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 1996, by Steve Passe + * Copyright (c) 2008 The DragonFly Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. The name of the developer may NOT be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/sys/i386/include/mpapic.h,v 1.14.2.2 2000/09/30 02:49:34 ps Exp $ + * $DragonFly: src/sys/platform/pc64/apic/mpapic.h,v 1.1 2008/08/29 17:07:12 dillon Exp $ + */ + +#ifndef _MACHINE_MPAPIC_H_ +#define _MACHINE_MPAPIC_H_ + +#include "apicreg.h" + +#include + +/* + * Size of APIC ID list. + * Also used a MAX size of various other arrays. + */ +#define NAPICID 16 + +/* these don't really belong in here... */ +enum busTypes { + CBUS = 1, + CBUSII = 2, + EISA = 3, + MCA = 4, + ISA = 6, + PCI = 13, + XPRESS = 18, + MAX_BUSTYPE = 18, + UNKNOWN_BUSTYPE = 0xff +}; + + +/* + * the physical/logical APIC ID management macros + */ +#define CPU_TO_ID(CPU) (cpu_num_to_apic_id[CPU]) +#define ID_TO_CPU(ID) (apic_id_to_logical[ID]) +#ifdef APIC_IO +#define IO_TO_ID(IO) (io_num_to_apic_id[IO]) +#define ID_TO_IO(ID) (apic_id_to_logical[ID]) +#endif + +#ifdef SMP + +/* + * send an IPI INTerrupt containing 'vector' to all CPUs EXCEPT myself + */ +static __inline int +all_but_self_ipi(int vector) +{ + if (smp_active_mask == 1) + return 0; + return apic_ipi(APIC_DEST_ALLESELF, vector, APIC_DELMODE_FIXED); +} + +#endif + +#endif /* _MACHINE_MPAPIC_H */ diff --git a/sys/platform/pc64/conf/files b/sys/platform/pc64/conf/files index 7b24d8bde1..9223b0827d 100644 --- a/sys/platform/pc64/conf/files +++ b/sys/platform/pc64/conf/files @@ -1,7 +1,7 @@ # This file tells config what files go into building a kernel, # files marked standard are always included. # -# $DragonFly: src/sys/platform/pc64/conf/files,v 1.2 2007/09/24 03:24:45 yanyh Exp $ +# $DragonFly: src/sys/platform/pc64/conf/files,v 1.3 2008/08/29 17:07:15 dillon Exp $ # linux32_genassym.o optional compat_linux32 \ @@ -16,6 +16,22 @@ linux32_assym.h optional compat_linux32 \ no-obj no-implicit-rule before-depend \ clean "linux32_assym.h" # +font.h optional sc_dflt_font \ + compile-with "uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x16.fnt && file2c 'static u_char dflt_font_16[16*256] = {' '};' < ${SC_DFLT_FONT}-8x16 > font.h && uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x14.fnt && file2c 'static u_char dflt_font_14[14*256] = {' '};' < ${SC_DFLT_FONT}-8x14 >> font.h && uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x8.fnt && file2c 'static u_char dflt_font_8[8*256] = {' '};' < ${SC_DFLT_FONT}-8x8 >> font.h" \ + no-obj no-implicit-rule before-depend \ + clean "font.h" +# +atkbdmap.h optional atkbd_dflt_keymap \ + compile-with "/usr/sbin/kbdcontrol -L ${ATKBD_DFLT_KEYMAP} | sed -e 's/^static keymap_t.* = /static keymap_t key_map = /' -e 's/^static accentmap_t.* = /static accentmap_t accent_map = /' > atkbdmap.h" \ + no-obj no-implicit-rule before-depend \ + clean "atkbdmap.h" +# +ukbdmap.h optional ukbd_dflt_keymap \ + compile-with "/usr/sbin/kbdcontrol -L ${UKBD_DFLT_KEYMAP} | sed -e 's/^static keymap_t.* = /static keymap_t key_map = /' -e 's/^static accentmap_t.* = /static accentmap_t accent_map = /' > ukbdmap.h" \ + no-obj no-implicit-rule before-depend \ + clean "ukbdmap.h" +# +# emulation/linux/linux_file.c optional compat_linux32 emulation/linux/linux_getcwd.c optional compat_linux32 emulation/linux/linux_ioctl.c optional compat_linux32 @@ -35,6 +51,25 @@ emulation/linux/amd64/linux32_machdep.c optional compat_linux32 emulation/linux/amd64/linux32_sysent.c optional compat_linux32 emulation/linux/amd64/linux32_sysvec.c optional compat_linux32 +dev/video/fb/fb.c optional vga +dev/video/fb/splash.c optional splash +dev/video/fb/vga.c optional vga +dev/misc/kbd/atkbd.c optional atkbd +dev/misc/kbd/atkbdc.c optional atkbdc +dev/misc/kbd/kbd.c optional atkbd +dev/misc/kbd/kbd.c optional kbd +dev/misc/kbd/kbd.c optional sc +dev/misc/kbd/kbd.c optional ukbd +dev/misc/syscons/schistory.c optional sc +dev/misc/syscons/scmouse.c optional sc +dev/misc/syscons/scterm.c optional sc +dev/misc/syscons/scterm-dumb.c optional sc +dev/misc/syscons/scterm-sc.c optional sc +dev/misc/syscons/scvgarndr.c optional sc vga +dev/misc/syscons/scvidctl.c optional sc +dev/misc/syscons/scvtb.c optional sc +dev/misc/syscons/syscons.c optional sc +dev/misc/syscons/sysmouse.c optional sc vfs/smbfs/smbfs_io.c optional smbfs vfs/smbfs/smbfs_node.c optional smbfs vfs/smbfs/smbfs_smb.c optional smbfs @@ -50,16 +85,17 @@ platform/pc64/amd64/mp.c optional smp \ platform/pc64/amd64/mplock.s optional smp # DDB XXX +cpu/amd64/misc/amd64-gdbstub.c optional ddb platform/pc64/amd64/elf_machdep.c standard platform/pc64/amd64/in_cksum2.s optional inet platform/pc64/amd64/ktr.c optional ktr platform/pc64/amd64/db_disasm.c optional ddb -# platform/pc64/amd64/i386-gdbstub.c optional ddb # # DOS mbr and gpt kern/subr_diskmbr.c standard kern/subr_diskgpt.c standard +libkern/memset.c standard libkern/stack_protector.c standard # DEVICES @@ -67,31 +103,52 @@ libkern/stack_protector.c standard # PLATFORM FILES # +platform/pc64/amd64/exception.S standard +platform/pc64/amd64/ipl.s standard platform/pc64/amd64/global.s standard platform/pc64/amd64/support.s standard platform/pc64/amd64/sigtramp.s standard platform/pc64/amd64/swtch.s standard -platform/pc64/amd64/npx.c mandatory npx +platform/pc64/amd64/npx.c standard platform/pc64/amd64/db_interface.c standard platform/pc64/amd64/db_trace.c standard platform/pc64/amd64/vm_machdep.c standard -platform/pc64/amd64/cpu_regs.c standard +platform/pc64/amd64/machdep.c standard platform/pc64/amd64/userldt.c standard platform/pc64/amd64/tls.c standard platform/pc64/amd64/trap.c standard -platform/pc64/amd64/exception.c standard platform/pc64/amd64/procfs_machdep.c standard -platform/pc64/amd64/fork_tramp.s standard -#platform/pc64/amd64/machdep.c standard -#platform/pc64/amd64/initcpu.c standard -#platform/pc64/amd64/identcpu.c standard +platform/pc64/amd64/initcpu.c standard +platform/pc64/amd64/identcpu.c standard + +bus/isa/amd64/isa.c optional isa +bus/isa/amd64/isa_compat.c optional isa compat_oldisa +bus/isa/amd64/isa_dma.c optional isa + +platform/pc64/amd64/nexus.c standard +bus/pci/amd64/pci_cfgreg.c optional pci +bus/pci/amd64/pcibus.c optional pci + +platform/pc64/icu/icu_abi.c standard +platform/pc64/icu/icu_ipl.s standard +platform/pc64/icu/icu_vector.s standard platform/pc64/amd64/init.c standard platform/pc64/amd64/globaldata.c standard -platform/pc64/amd64/machintr.c standard platform/pc64/amd64/pmap.c standard +platform/pc64/amd64/pmap_inval.c standard platform/pc64/amd64/busdma_machdep.c standard platform/pc64/amd64/sysarch.c standard platform/pc64/amd64/systimer.c standard platform/pc64/amd64/console.c standard platform/pc64/amd64/ipl_funcs.c standard +kern/syscalls.c standard +dev/misc/atkbd/atkbd_isa.c optional atkbd +dev/misc/atkbdc_layer/atkbdc_isa.c optional atkbdc +dev/misc/psm/psm.c optional psm +dev/serial/sio/sio.c optional sio +bus/isa/syscons_isa.c optional sc +bus/isa/vga_isa.c optional vga +platform/pc64/isa/clock.c standard +platform/pc64/isa/intr_machdep.c standard +platform/pc64/amd64/spinlock.s standard diff --git a/sys/platform/pc64/conf/kern.mk b/sys/platform/pc64/conf/kern.mk index 168811e630..6cae74924c 100644 --- a/sys/platform/pc64/conf/kern.mk +++ b/sys/platform/pc64/conf/kern.mk @@ -1,5 +1,5 @@ # $FreeBSD: src/sys/conf/kern.mk,v 1.52 2007/05/24 21:53:42 obrien Exp $ -# $DragonFly: src/sys/platform/pc64/conf/kern.mk,v 1.1 2007/09/23 04:29:31 yanyh Exp $ +# $DragonFly: src/sys/platform/pc64/conf/kern.mk,v 1.2 2008/08/29 17:07:15 dillon Exp $ # # Warning flags for compiling the kernel and components of the kernel. @@ -9,7 +9,8 @@ # reserved for user applications. # CFLAGS+= -mpreferred-stack-boundary=4 -CFLAGS+= -mcmodel=kernel -mno-red-zone \ - -mfpmath=387 -mno-sse -mno-sse2 -mno-mmx -mno-3dnow \ - -msoft-float -fno-asynchronous-unwind-tables +CFLAGS+= -mcmodel=small -mno-red-zone \ + -mfpmath=387 -mno-sse -mno-sse2 -mno-sse3 -mno-mmx -mno-3dnow \ + -msoft-float -fno-asynchronous-unwind-tables \ + -fno-omit-frame-pointer INLINE_LIMIT?= 8000 diff --git a/sys/platform/pc64/conf/options b/sys/platform/pc64/conf/options index 0be3530797..fd2e575880 100644 --- a/sys/platform/pc64/conf/options +++ b/sys/platform/pc64/conf/options @@ -1,13 +1,54 @@ # -# $DragonFly: src/sys/platform/pc64/conf/options,v 1.3 2008/01/31 11:48:55 swildner Exp $ +# $DragonFly: src/sys/platform/pc64/conf/options,v 1.4 2008/08/29 17:07:15 dillon Exp $ # # The cpu type # HAMMER_CPU opt_cpu.h +MAXCONS opt_syscons.h +SC_ALT_MOUSE_IMAGE opt_syscons.h +SC_DEBUG_LEVEL opt_syscons.h +SC_DFLT_FONT opt_syscons.h +SC_DISABLE_DDBKEY opt_syscons.h +SC_DISABLE_REBOOT opt_syscons.h +SC_HISTORY_SIZE opt_syscons.h +SC_KERNEL_CONS_ATTR opt_syscons.h +SC_KERNEL_CONS_REV_ATTR opt_syscons.h +SC_MOUSE_CHAR opt_syscons.h +SC_NO_CUTPASTE opt_syscons.h +SC_NO_FONT_LOADING opt_syscons.h +SC_NO_HISTORY opt_syscons.h +SC_NO_SYSMOUSE opt_syscons.h +SC_NORM_ATTR opt_syscons.h +SC_NORM_REV_ATTR opt_syscons.h +SC_PIXEL_MODE opt_syscons.h +SC_TWOBUTTON_MOUSE opt_syscons.h + +VGA_ALT_SEQACCESS opt_vga.h +VGA_DEBUG opt_vga.h +VGA_NO_FONT_LOADING opt_vga.h +VGA_NO_MODE_CHANGE opt_vga.h +VGA_SLOW_IOACCESS opt_vga.h +VGA_WIDTH90 opt_vga.h + +PSM_HOOKRESUME opt_psm.h +PSM_RESETAFTERSUSPEND opt_psm.h +PSM_DEBUG opt_psm.h + +ATKBD_DFLT_KEYMAP opt_atkbd.h + +KBD_DISABLE_KEYMAP_LOAD opt_kbd.h +KBD_INSTALL_CDEV opt_kbd.h +KBD_MAXRETRY opt_kbd.h +KBD_MAXWAIT opt_kbd.h +KBD_RESETDELAY opt_kbd.h +KBDIO_DEBUG opt_kbd.h # # SHOW_BUSYBUFS PANIC_REBOOT_WAIT_TIME opt_panic.h +AUTO_EOI_1 opt_auto_eoi.h +AUTO_EOI_2 opt_auto_eoi.h + diff --git a/sys/cpu/amd64/include/tss.h b/sys/platform/pc64/icu/icu.h similarity index 59% copy from sys/cpu/amd64/include/tss.h copy to sys/platform/pc64/icu/icu.h index 7053eb73ef..0aa6a77e9c 100644 --- a/sys/cpu/amd64/include/tss.h +++ b/sys/platform/pc64/icu/icu.h @@ -1,5 +1,6 @@ /*- * Copyright (c) 1990 The Regents of the University of California. + * Copyright (c) 2008 The DragonFly Project. * All rights reserved. * * This code is derived from software contributed to Berkeley by @@ -33,40 +34,47 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * from: @(#)tss.h 5.4 (Berkeley) 1/18/91 - * $FreeBSD: src/sys/amd64/include/tss.h,v 1.15 2003/11/17 08:58:14 peter Exp $ - * $DragonFly: src/sys/cpu/amd64/include/tss.h,v 1.1 2007/08/21 19:40:24 corecode Exp $ + * from: @(#)icu.h 5.6 (Berkeley) 5/9/91 + * $FreeBSD: src/sys/i386/isa/icu.h,v 1.18 1999/12/26 12:43:47 bde Exp $ + * $DragonFly: src/sys/platform/pc64/icu/icu.h,v 1.1 2008/08/29 17:07:16 dillon Exp $ */ -#ifndef _CPU_TSS_H_ -#define _CPU_TSS_H_ +/* + * AT/386 Interrupt Control constants + * W. Jolitz 8/89 + */ + +#ifndef _ARCH_ICU_ICU_H_ +#define _ARCH_ICU_ICU_H_ /* - * amd64 Context Data Type - * - * The alignment is pretty messed up here due to reuse of the original 32 bit - * fields. It might be worth trying to set the tss on a +4 byte offset to - * make the 64 bit fields aligned in practice. + * Note: The APIC uses different values for IRQxxx. + * Unfortunately many drivers use the 8259 values as indexes + * into tables, etc. The APIC equivilants are kept as APIC_IRQxxx. + * The 8259 versions have to be used in SMP for legacy operation + * of the drivers. + */ + +/* + * Interrupt enable bit numbers - in normal order of priority + * (which we change) */ -struct amd64tss { - u_int32_t tss_rsvd0; - u_int64_t tss_rsp0 __packed; /* kernel stack pointer ring 0 */ - u_int64_t tss_rsp1 __packed; /* kernel stack pointer ring 1 */ - u_int64_t tss_rsp2 __packed; /* kernel stack pointer ring 2 */ - u_int32_t tss_rsvd1; - u_int32_t tss_rsvd2; - u_int32_t tss_rsvd3; - u_int64_t tss_ist1 __packed; /* Interrupt stack table 1 */ - u_int64_t tss_ist2 __packed; /* Interrupt stack table 2 */ - u_int64_t tss_ist3 __packed; /* Interrupt stack table 3 */ - u_int64_t tss_ist4 __packed; /* Interrupt stack table 4 */ - u_int64_t tss_ist5 __packed; /* Interrupt stack table 5 */ - u_int64_t tss_ist6 __packed; /* Interrupt stack table 6 */ - u_int64_t tss_ist7 __packed; /* Interrupt stack table 7 */ - u_int32_t tss_rsvd4; - u_int32_t tss_rsvd5; - u_int16_t tss_rsvd6; - u_int16_t tss_iobase; /* io bitmap offset */ -}; +#define ICU_IRQ0 0 /* highest priority - timer */ +#define ICU_IRQ1 1 +#define ICU_IRQ_SLAVE 2 +#define ICU_IRQ8 8 +#define ICU_IRQ9 9 +#define ICU_IRQ2 ICU_IRQ9 +#define ICU_IRQ10 10 +#define ICU_IRQ11 11 +#define ICU_IRQ12 12 +#define ICU_IRQ13 13 +#define ICU_IRQ14 14 +#define ICU_IRQ15 15 +#define ICU_IRQ3 3 /* this is highest after rotation */ +#define ICU_IRQ4 4 +#define ICU_IRQ5 5 +#define ICU_IRQ6 6 +#define ICU_IRQ7 7 /* lowest - parallel printer */ -#endif /* _CPU_TSS_H_ */ +#endif /* !_ARCH_ICU_ICU_H_ */ diff --git a/sys/platform/pc64/icu/icu_abi.c b/sys/platform/pc64/icu/icu_abi.c new file mode 100644 index 0000000000..961276f638 --- /dev/null +++ b/sys/platform/pc64/icu/icu_abi.c @@ -0,0 +1,240 @@ +/* + * Copyright (c) 1991 The Regents of the University of California. + * Copyright (c) 2005,2008 The DragonFly Project. + * All rights reserved. + * + * This code is derived from software contributed to The DragonFly Project + * by Matthew Dillon + * + * This code is derived from software contributed to Berkeley by + * William Jolitz. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of The DragonFly Project nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific, prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $DragonFly: src/sys/platform/pc64/icu/icu_abi.c,v 1.1 2008/08/29 17:07:16 dillon Exp $ + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include "icu.h" +#include "icu_ipl.h" + +#ifndef APIC_IO + +extern void ICU_INTREN(int); +extern void ICU_INTRDIS(int); + +extern inthand_t + IDTVEC(icu_fastintr0), IDTVEC(icu_fastintr1), + IDTVEC(icu_fastintr2), IDTVEC(icu_fastintr3), + IDTVEC(icu_fastintr4), IDTVEC(icu_fastintr5), + IDTVEC(icu_fastintr6), IDTVEC(icu_fastintr7), + IDTVEC(icu_fastintr8), IDTVEC(icu_fastintr9), + IDTVEC(icu_fastintr10), IDTVEC(icu_fastintr11), + IDTVEC(icu_fastintr12), IDTVEC(icu_fastintr13), + IDTVEC(icu_fastintr14), IDTVEC(icu_fastintr15); + +extern inthand_t + IDTVEC(icu_slowintr0), IDTVEC(icu_slowintr1), + IDTVEC(icu_slowintr2), IDTVEC(icu_slowintr3), + IDTVEC(icu_slowintr4), IDTVEC(icu_slowintr5), + IDTVEC(icu_slowintr6), IDTVEC(icu_slowintr7), + IDTVEC(icu_slowintr8), IDTVEC(icu_slowintr9), + IDTVEC(icu_slowintr10), IDTVEC(icu_slowintr11), + IDTVEC(icu_slowintr12), IDTVEC(icu_slowintr13), + IDTVEC(icu_slowintr14), IDTVEC(icu_slowintr15); + +static int icu_vectorctl(int, int, int); +static int icu_setvar(int, const void *); +static int icu_getvar(int, void *); +static void icu_finalize(void); +static void icu_cleanup(void); + +static inthand_t *icu_fastintr[ICU_HWI_VECTORS] = { + &IDTVEC(icu_fastintr0), &IDTVEC(icu_fastintr1), + &IDTVEC(icu_fastintr2), &IDTVEC(icu_fastintr3), + &IDTVEC(icu_fastintr4), &IDTVEC(icu_fastintr5), + &IDTVEC(icu_fastintr6), &IDTVEC(icu_fastintr7), + &IDTVEC(icu_fastintr8), &IDTVEC(icu_fastintr9), + &IDTVEC(icu_fastintr10), &IDTVEC(icu_fastintr11), + &IDTVEC(icu_fastintr12), &IDTVEC(icu_fastintr13), + &IDTVEC(icu_fastintr14), &IDTVEC(icu_fastintr15) +}; + +static inthand_t *icu_slowintr[ICU_HWI_VECTORS] = { + &IDTVEC(icu_slowintr0), &IDTVEC(icu_slowintr1), + &IDTVEC(icu_slowintr2), &IDTVEC(icu_slowintr3), + &IDTVEC(icu_slowintr4), &IDTVEC(icu_slowintr5), + &IDTVEC(icu_slowintr6), &IDTVEC(icu_slowintr7), + &IDTVEC(icu_slowintr8), &IDTVEC(icu_slowintr9), + &IDTVEC(icu_slowintr10), &IDTVEC(icu_slowintr11), + &IDTVEC(icu_slowintr12), &IDTVEC(icu_slowintr13), + &IDTVEC(icu_slowintr14), &IDTVEC(icu_slowintr15) +}; + +struct machintr_abi MachIntrABI = { + MACHINTR_ICU, + .intrdis = ICU_INTRDIS, + .intren = ICU_INTREN, + .vectorctl =icu_vectorctl, + .setvar = icu_setvar, + .getvar = icu_getvar, + .finalize = icu_finalize, + .cleanup = icu_cleanup +}; + +static int icu_imcr_present; + +/* + * WARNING! SMP builds can use the ICU now so this code must be MP safe. + */ +static +int +icu_setvar(int varid, const void *buf) +{ + int error = 0; + + switch(varid) { + case MACHINTR_VAR_IMCR_PRESENT: + icu_imcr_present = *(const int *)buf; + break; + default: + error = ENOENT; + break; + } + return (error); +} + +static +int +icu_getvar(int varid, void *buf) +{ + int error = 0; + + switch(varid) { + case MACHINTR_VAR_IMCR_PRESENT: + *(int *)buf = icu_imcr_present; + break; + default: + error = ENOENT; + break; + } + return (error); +} + +/* + * Called before interrupts are physically enabled + */ +static void +icu_finalize(void) +{ + int intr; + + for (intr = 0; intr < ICU_HWI_VECTORS; ++intr) { + machintr_intrdis(intr); + } + machintr_intren(ICU_IRQ_SLAVE); + + /* + * If an IMCR is present, programming bit 0 disconnects the 8259 + * from the BSP. The 8259 may still be connected to LINT0 on the BSP's + * LAPIC. + * + * If we are running SMP the LAPIC is active, try to use virtual wire + * mode so we can use other interrupt sources within the LAPIC in + * addition to the 8259. + */ + if (icu_imcr_present) { +#if defined(SMP) + outb(0x22, 0x70); + outb(0x23, 0x01); +#endif + } +} + +/* + * Called after interrupts physically enabled but before the + * critical section is released. + */ +static +void +icu_cleanup(void) +{ + mdcpu->gd_fpending = 0; + mdcpu->gd_ipending = 0; +} + + +static +int +icu_vectorctl(int op, int intr, int flags) +{ + int error; + register_t ef; + + if (intr < 0 || intr >= ICU_HWI_VECTORS || intr == ICU_IRQ_SLAVE) + return (EINVAL); + + ef = read_rflags(); + cpu_disable_intr(); + error = 0; + + switch(op) { + case MACHINTR_VECTOR_SETUP: + setidt(IDT_OFFSET + intr, + flags & INTR_FAST ? icu_fastintr[intr] : icu_slowintr[intr], + SDT_SYSIGT, SEL_KPL, 0); + machintr_intren(intr); + break; + case MACHINTR_VECTOR_TEARDOWN: + case MACHINTR_VECTOR_SETDEFAULT: + setidt(IDT_OFFSET + intr, icu_slowintr[intr], + SDT_SYSIGT, SEL_KPL, 0); + machintr_intrdis(intr); + break; + default: + error = EOPNOTSUPP; + break; + } + write_rflags(ef); + return (error); +} + +#endif diff --git a/sys/platform/pc64/include/pcb_ext.h b/sys/platform/pc64/icu/icu_ipl.h similarity index 56% copy from sys/platform/pc64/include/pcb_ext.h copy to sys/platform/pc64/icu/icu_ipl.h index ed044ea65b..1c3362089b 100644 --- a/sys/platform/pc64/include/pcb_ext.h +++ b/sys/platform/pc64/icu/icu_ipl.h @@ -1,5 +1,6 @@ /*- - * Copyright (c) 1997 Jonathan Lemon + * Copyright (c) 1997 Bruce Evans. + * Copyright (c) 2008 The DragonFly Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -23,50 +24,32 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $FreeBSD: src/sys/i386/include/pcb_ext.h,v 1.4 1999/12/29 04:33:04 peter Exp $ - * $DragonFly: src/sys/platform/pc64/include/pcb_ext.h,v 1.2 2007/09/24 03:24:45 yanyh Exp $ + * $FreeBSD: src/sys/i386/isa/icu_ipl.h,v 1.3 1999/08/28 00:44:42 peter Exp $ + * $DragonFly: src/sys/platform/pc64/icu/icu_ipl.h,v 1.1 2008/08/29 17:07:16 dillon Exp $ */ -#ifndef _MACHINE_PCB_EXT_H_ -#define _MACHINE_PCB_EXT_H_ +#ifndef _ARCH_ICU_ICU_IPL_H_ +#define _ARCH_ICU_ICU_IPL_H_ -#ifndef _SYS_TYPES_H_ -#include -#endif +#define ICU_HWI_VECTORS 16 +#define ICU_HWI_MASK ((1 << ICU_HWI_VECTORS) - 1) + +#ifdef LOCORE /* - * Extension to the 386 process control block + * SMP interrupt mask protection. The first version is used + * when interrupts might not be disabled, the second version is + * used when interrupts are disabled. */ -#ifndef _MACHINE_TSS_H_ -#include -#endif -#ifndef _MACHINE_SEGMENTS_H_ -#include -#endif - -struct pcb_ext { - struct segment_descriptor ext_tssd; /* tss descriptor */ - struct amd64tss ext_tss; /* per-process amd64tss */ - caddr_t ext_iomap; /* i/o permission bitmap */ - /* struct vm86_kernel ext_vm86; */ /* vm86 area */ -}; -struct pcb_ldt { - caddr_t ldt_base; - int ldt_len; - int ldt_refcnt; - u_long ldt_active; - struct segment_descriptor ldt_sd; -}; +#define ICU_IMASK_LOCK \ + SPIN_LOCK(imen_spinlock) ; \ -#ifdef _KERNEL +#define ICU_IMASK_UNLOCK \ + SPIN_UNLOCK(imen_spinlock) ; \ -struct pcb; +#endif /* LOCORE */ -void set_user_ldt (struct pcb *); -struct pcb_ldt *user_ldt_alloc (struct pcb *, int); -void user_ldt_free (struct pcb *); -#endif +#endif /* !_ARCH_ICU_ICU_IPL_H_ */ -#endif /* _MACHINE_PCB_EXT_H_ */ diff --git a/sys/platform/pc64/icu/icu_ipl.s b/sys/platform/pc64/icu/icu_ipl.s new file mode 100644 index 0000000000..16c300534f --- /dev/null +++ b/sys/platform/pc64/icu/icu_ipl.s @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2003,2004,2008 The DragonFly Project. All rights reserved. + * + * This code is derived from software contributed to The DragonFly Project + * by Matthew Dillon + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of The DragonFly Project nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific, prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Copyright (c) 1989, 1990 William F. Jolitz. + * Copyright (c) 1990 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * William Jolitz. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/sys/i386/isa/icu_ipl.s,v 1.6 1999/08/28 00:44:42 peter Exp $ + * $DragonFly: src/sys/platform/pc64/icu/icu_ipl.s,v 1.1 2008/08/29 17:07:16 dillon Exp $ + */ + +#include +#include +#include +#include +#include + +#include +#include "assym.s" +#include "icu_ipl.h" + +/* + * WARNING! SMP builds can use the ICU now so this code must be MP safe. + */ + +#ifndef APIC_IO + + .data + ALIGN_DATA + + /* + * Interrupt mask for ICU interrupts, defaults to all hardware + * interrupts turned off. + */ + .p2align 2 /* MUST be 32bit aligned */ + + .globl icu_imen +icu_imen: + .long ICU_HWI_MASK + + .text + SUPERALIGN_TEXT + + /* + * Functions to enable and disable a hardware interrupt. Only + * 16 ICU interrupts exist. + * + * INTREN(irq:%edi) + * INTRDIS(irq:%edi) + */ +ENTRY(ICU_INTRDIS) + ICU_IMASK_LOCK + movl %edi,%eax /* C argument */ + btsl %eax,icu_imen + movl icu_imen,%eax + outb %al,$IO_ICU1+ICU_IMR_OFFSET + mov %ah,%al + outb %al,$IO_ICU2+ICU_IMR_OFFSET + ICU_IMASK_UNLOCK + ret + +ENTRY(ICU_INTREN) + ICU_IMASK_LOCK + movl %edi,%eax /* C argument */ + btrl %eax,icu_imen + movl icu_imen,%eax + outb %al,$IO_ICU1+ICU_IMR_OFFSET + mov %ah,%al + outb %al,$IO_ICU2+ICU_IMR_OFFSET + ICU_IMASK_UNLOCK + ret + +#endif diff --git a/sys/platform/pc64/icu/icu_vector.s b/sys/platform/pc64/icu/icu_vector.s new file mode 100644 index 0000000000..b0b18e1223 --- /dev/null +++ b/sys/platform/pc64/icu/icu_vector.s @@ -0,0 +1,276 @@ +/* + * Copyright (c) 2008 The DragonFly Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of The DragonFly Project nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific, prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: vector.s, 386BSD 0.1 unknown origin + * $FreeBSD: src/sys/i386/isa/icu_vector.s,v 1.14.2.2 2000/07/18 21:12:42 dfr Exp $ + * $DragonFly: src/sys/platform/pc64/icu/icu_vector.s,v 1.1 2008/08/29 17:07:16 dillon Exp $ + */ +/* + * WARNING! SMP builds can use the ICU now so this code must be MP safe. + */ + +#include "opt_auto_eoi.h" + +#include +#include +#include +#include + +#include +#include + +#include "assym.s" +#include "icu_ipl.h" + +#ifndef APIC_IO + +#define ICU_IMR_OFFSET 1 /* IO_ICU{1,2} + 1 */ + +#define ICU_EOI 0x20 /* XXX - define elsewhere */ + +#define IRQ_LBIT(irq_num) (1 << (irq_num)) +#define IRQ_BIT(irq_num) (1 << ((irq_num) % 8)) +#define IRQ_BYTE(irq_num) ((irq_num) >> 3) + +#ifdef AUTO_EOI_1 +#define ENABLE_ICU1 /* use auto-EOI to reduce i/o */ +#define OUTB_ICU1 +#else +#define ENABLE_ICU1 \ + movb $ICU_EOI,%al ; /* as soon as possible send EOI ... */ \ + OUTB_ICU1 ; /* ... to clear in service bit */ \ + +#define OUTB_ICU1 \ + outb %al,$IO_ICU1 ; \ + +#endif + +#ifdef AUTO_EOI_2 +/* + * The data sheet says no auto-EOI on slave, but it sometimes works. + */ +#define ENABLE_ICU1_AND_2 ENABLE_ICU1 +#else +#define ENABLE_ICU1_AND_2 \ + movb $ICU_EOI,%al ; /* as above */ \ + outb %al,$IO_ICU2 ; /* but do second icu first ... */ \ + OUTB_ICU1 ; /* ... then first icu (if !AUTO_EOI_1) */ \ + +#endif + +/* + * Macro helpers + */ +#define ICU_PUSH_FRAME \ + PUSH_FRAME ; /* 15 regs + space for 4 extras */ \ + movl $0,TF_TRAPNO(%rsp) ; \ + movl $0,TF_ADDR(%rsp) ; \ + movl $0,TF_FLAGS(%rsp) ; \ + movl $0,TF_ERR(%rsp) ; \ + cld ; \ + +#define MASK_IRQ(icu, irq_num) \ + ICU_IMASK_LOCK ; \ + movb icu_imen + IRQ_BYTE(irq_num),%al ; \ + orb $IRQ_BIT(irq_num),%al ; \ + movb %al,icu_imen + IRQ_BYTE(irq_num) ; \ + outb %al,$icu+ICU_IMR_OFFSET ; \ + ICU_IMASK_UNLOCK ; \ + +#define UNMASK_IRQ(icu, irq_num) \ + cmpl $0,%eax ; \ + jnz 8f ; \ + ICU_IMASK_LOCK ; \ + movb icu_imen + IRQ_BYTE(irq_num),%al ; \ + andb $~IRQ_BIT(irq_num),%al ; \ + movb %al,icu_imen + IRQ_BYTE(irq_num) ; \ + outb %al,$icu+ICU_IMR_OFFSET ; \ + ICU_IMASK_UNLOCK ; \ +8: ; \ + +/* + * Fast interrupt call handlers run in the following sequence: + * + * - Push the trap frame required by doreti. + * - Mask the interrupt and reenable its source. + * - If we cannot take the interrupt set its fpending bit and + * doreti. + * - If we can take the interrupt clear its fpending bit, + * call the handler, then unmask the interrupt and doreti. + * + * YYY can cache gd base pointer instead of using hidden %fs + * prefixes. + */ + +#define FAST_INTR(irq_num, vec_name, icu, enable_icus) \ + .text ; \ + SUPERALIGN_TEXT ; \ +IDTVEC(vec_name) ; \ + ICU_PUSH_FRAME ; \ + FAKE_MCOUNT(15*4(%esp)) ; \ + MASK_IRQ(icu, irq_num) ; \ + enable_icus ; \ + movq PCPU(curthread),%rbx ; \ + testl $-1,TD_NEST_COUNT(%rbx) ; \ + jne 1f ; \ + cmpl $TDPRI_CRIT,TD_PRI(%rbx) ; \ + jl 2f ; \ +1: ; \ + /* set pending bit and return, leave interrupt masked */ \ + orl $IRQ_LBIT(irq_num),PCPU(fpending) ; \ + orl $RQF_INTPEND, PCPU(reqflags) ; \ + jmp 5f ; \ +2: ; \ + /* clear pending bit, run handler */ \ + andl $~IRQ_LBIT(irq_num),PCPU(fpending) ; \ + pushq $irq_num ; \ + movq %rsp,%rdi ; /* rdi = call argument */ \ + call ithread_fast_handler ; /* returns 0 to unmask int */ \ + addq $8,%rsp ; /* intr frame -> trap frame */ \ + UNMASK_IRQ(icu, irq_num) ; \ +5: ; \ + MEXITCOUNT ; \ + jmp doreti ; \ + +/* + * Slow interrupt call handlers run in the following sequence: + * + * - Push the trap frame required by doreti. + * - Mask the interrupt and reenable its source. + * - If we cannot take the interrupt set its ipending bit and + * doreti. In addition to checking for a critical section + * and cpl mask we also check to see if the thread is still + * running. + * - If we can take the interrupt clear its ipending bit + * and schedule its thread. Leave interrupts masked and doreti. + * + * sched_ithd() is called with interrupts enabled and outside of a + * critical section (so it can preempt us). + * + * YYY sched_ithd may preempt us synchronously (fix interrupt stacking) + * + * Note that intr_nesting_level is not bumped during sched_ithd because + * blocking allocations are allowed in the preemption case. + * + * YYY can cache gd base pointer instead of using hidden %fs + * prefixes. + */ + +#define SLOW_INTR(irq_num, vec_name, icu, enable_icus) \ + .text ; \ + SUPERALIGN_TEXT ; \ +IDTVEC(vec_name) ; \ + ICU_PUSH_FRAME ; \ + FAKE_MCOUNT(15*4(%esp)) ; \ + MASK_IRQ(icu, irq_num) ; \ + incl PCPU(cnt) + V_INTR ; \ + enable_icus ; \ + movq PCPU(curthread),%rbx ; \ + testl $-1,TD_NEST_COUNT(%rbx) ; \ + jne 1f ; \ + cmpl $TDPRI_CRIT,TD_PRI(%rbx) ; \ + jl 2f ; \ +1: ; \ + /* set the pending bit and return, leave interrupt masked */ \ + orl $IRQ_LBIT(irq_num), PCPU(ipending) ; \ + orl $RQF_INTPEND, PCPU(reqflags) ; \ + jmp 5f ; \ +2: ; \ + /* set running bit, clear pending bit, run handler */ \ + andl $~IRQ_LBIT(irq_num), PCPU(ipending) ; \ + incl TD_NEST_COUNT(%rbx) ; \ + sti ; \ + movq $irq_num,%rdi ; /* %rdi = argument to call */ \ + call sched_ithd ; \ + cli ; \ + decl TD_NEST_COUNT(%rbx) ; \ +5: ; \ + MEXITCOUNT ; \ + jmp doreti ; \ + +/* + * Unmask a slow interrupt. This function is used by interrupt threads + * after they have descheduled themselves to reenable interrupts and + * possibly cause a reschedule to occur. + */ + +#define INTR_UNMASK(irq_num, vec_name, icu) \ + .text ; \ + SUPERALIGN_TEXT ; \ +IDTVEC(vec_name) ; \ + pushq %rbp ; /* frame for ddb backtrace */ \ + movq %rsp, %rbp ; \ + subq %rax, %rax ; \ + UNMASK_IRQ(icu, irq_num) ; \ + popq %rbp ; \ + ret ; \ + +MCOUNT_LABEL(bintr) + FAST_INTR(0,icu_fastintr0, IO_ICU1, ENABLE_ICU1) + FAST_INTR(1,icu_fastintr1, IO_ICU1, ENABLE_ICU1) + FAST_INTR(2,icu_fastintr2, IO_ICU1, ENABLE_ICU1) + FAST_INTR(3,icu_fastintr3, IO_ICU1, ENABLE_ICU1) + FAST_INTR(4,icu_fastintr4, IO_ICU1, ENABLE_ICU1) + FAST_INTR(5,icu_fastintr5, IO_ICU1, ENABLE_ICU1) + FAST_INTR(6,icu_fastintr6, IO_ICU1, ENABLE_ICU1) + FAST_INTR(7,icu_fastintr7, IO_ICU1, ENABLE_ICU1) + FAST_INTR(8,icu_fastintr8, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(9,icu_fastintr9, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(10,icu_fastintr10, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(11,icu_fastintr11, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(12,icu_fastintr12, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(13,icu_fastintr13, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(14,icu_fastintr14, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(15,icu_fastintr15, IO_ICU2, ENABLE_ICU1_AND_2) + + SLOW_INTR(0,icu_slowintr0, IO_ICU1, ENABLE_ICU1) + SLOW_INTR(1,icu_slowintr1, IO_ICU1, ENABLE_ICU1) + SLOW_INTR(2,icu_slowintr2, IO_ICU1, ENABLE_ICU1) + SLOW_INTR(3,icu_slowintr3, IO_ICU1, ENABLE_ICU1) + SLOW_INTR(4,icu_slowintr4, IO_ICU1, ENABLE_ICU1) + SLOW_INTR(5,icu_slowintr5, IO_ICU1, ENABLE_ICU1) + SLOW_INTR(6,icu_slowintr6, IO_ICU1, ENABLE_ICU1) + SLOW_INTR(7,icu_slowintr7, IO_ICU1, ENABLE_ICU1) + SLOW_INTR(8,icu_slowintr8, IO_ICU2, ENABLE_ICU1_AND_2) + SLOW_INTR(9,icu_slowintr9, IO_ICU2, ENABLE_ICU1_AND_2) + SLOW_INTR(10,icu_slowintr10, IO_ICU2, ENABLE_ICU1_AND_2) + SLOW_INTR(11,icu_slowintr11, IO_ICU2, ENABLE_ICU1_AND_2) + SLOW_INTR(12,icu_slowintr12, IO_ICU2, ENABLE_ICU1_AND_2) + SLOW_INTR(13,icu_slowintr13, IO_ICU2, ENABLE_ICU1_AND_2) + SLOW_INTR(14,icu_slowintr14, IO_ICU2, ENABLE_ICU1_AND_2) + SLOW_INTR(15,icu_slowintr15, IO_ICU2, ENABLE_ICU1_AND_2) + +MCOUNT_LABEL(eintr) + + .data + + .text + +#endif diff --git a/sys/platform/pc64/include/globaldata.h b/sys/platform/pc64/include/globaldata.h index 9f5bb2c6be..302117fae0 100644 --- a/sys/platform/pc64/include/globaldata.h +++ b/sys/platform/pc64/include/globaldata.h @@ -1,5 +1,6 @@ /*- * Copyright (c) Peter Wemm + * Copyright (c) 2008 The DragonFly Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -28,7 +29,7 @@ * should not include this file. * * $FreeBSD: src/sys/i386/include/globaldata.h,v 1.11.2.1 2000/05/16 06:58:10 dillon Exp $ - * $DragonFly: src/sys/platform/pc64/include/globaldata.h,v 1.1 2007/08/21 19:45:45 corecode Exp $ + * $DragonFly: src/sys/platform/pc64/include/globaldata.h,v 1.2 2008/08/29 17:07:17 dillon Exp $ */ #ifndef _MACHINE_GLOBALDATA_H_ @@ -43,10 +44,13 @@ #include /* struct thread */ #endif #ifndef _MACHINE_SEGMENTS_H_ -#include /* struct segment_descriptor */ +#include /* struct user_segment_descriptor */ #endif #ifndef _MACHINE_TSS_H_ -#include /* struct i386tss */ +#include /* struct amd64tss */ +#endif +#ifndef _MACHINE_NPX_H_ +#include #endif /* @@ -63,10 +67,12 @@ */ struct mdglobaldata { struct globaldata mi; - struct segment_descriptor gd_common_tssd; - struct segment_descriptor *gd_tss_gdt; + struct user_segment_descriptor gd_common_tssd; + struct user_segment_descriptor *gd_tss_gdt; struct thread *gd_npxthread; struct amd64tss gd_common_tss; + union savefpu gd_savefpu; /* fast bcopy/zero temp fpu save area */ + int gd_fpu_lock; /* fast bcopy/zero cpu lock */ int gd_fpending; /* fast interrupt pending */ int gd_ipending; /* normal interrupt pending */ int gd_spending; /* software interrupt pending */ @@ -83,15 +89,26 @@ struct mdglobaldata { caddr_t gd_CADDR1; caddr_t gd_CADDR2; caddr_t gd_CADDR3; - unsigned *gd_PADDR1; + pt_entry_t *gd_PADDR1; + register_t gd_scratch_rsp; + register_t gd_rsp0; + register_t gd_user_fs; /* current user fs in MSR */ + register_t gd_user_gs; /* current user gs in MSR */ }; +#define MDGLOBALDATA_BASEALLOC_SIZE \ + ((sizeof(struct mdglobaldata) + PAGE_MASK) & ~PAGE_MASK) +#define MDGLOBALDATA_BASEALLOC_PAGES \ + (MDGLOBALDATA_BASEALLOC_SIZE / PAGE_SIZE) +#define MDGLOBALDATA_PAD \ + (MDGLOBALDATA_BASEALLOC_SIZE - sizeof(struct mdglobaldata)) + /* * This is the upper (0xff800000) address space layout that is per-cpu. * It is setup in locore.s and pmap.c for the BSP and in mp_machdep.c for * each AP. genassym helps export this to the assembler code. * - * WARNING! page-bounded fields are hardwired for SMPpt[] setup in + * JG WARNING! page-bounded fields are hardwired for SMPpt[] setup in * i386/i386/mp_machdep.c and locore.s. */ struct privatespace { diff --git a/sys/platform/pc64/include/intr_machdep.h b/sys/platform/pc64/include/intr_machdep.h index 9742c1559b..d826013fb1 100644 --- a/sys/platform/pc64/include/intr_machdep.h +++ b/sys/platform/pc64/include/intr_machdep.h @@ -1,5 +1,6 @@ /*- * Copyright (c) 2003 John Baldwin + * Copyright (c) 2008 The DragonFly Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -24,7 +25,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/amd64/include/intr_machdep.h,v 1.18 2007/05/08 21:29:13 jhb Exp $ - * $DragonFly: src/sys/platform/pc64/include/intr_machdep.h,v 1.1 2007/09/23 04:42:07 yanyh Exp $ + * $DragonFly: src/sys/platform/pc64/include/intr_machdep.h,v 1.2 2008/08/29 17:07:17 dillon Exp $ */ #ifndef __MACHINE_INTR_MACHDEP_H__ @@ -69,5 +70,13 @@ #define INTRCNT_COUNT (1 + NUM_IO_INTS * 2 + 1) #endif +#ifndef LOCORE + +#ifndef JG_defined_inthand_t +#define JG_defined_inthand_t +typedef void inthand_t(u_int cs, u_int ef, u_int esp, u_int ss); +#endif + +#endif /* !LOCORE */ #endif /* _KERNEL */ #endif /* !__MACHINE_INTR_MACHDEP_H__ */ diff --git a/sys/platform/pc64/amd64/console.c b/sys/platform/pc64/include/ipl.h similarity index 84% copy from sys/platform/pc64/amd64/console.c copy to sys/platform/pc64/include/ipl.h index 6d194c352a..a358cc3f36 100644 --- a/sys/platform/pc64/amd64/console.c +++ b/sys/platform/pc64/include/ipl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 The DragonFly Project. All rights reserved. + * Copyright (c) 2006-2008 The DragonFly Project. All rights reserved. * * This code is derived from software contributed to The DragonFly Project * by Matthew Dillon @@ -31,20 +31,13 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/platform/pc64/amd64/console.c,v 1.2 2007/09/24 03:24:45 yanyh Exp $ + * $DragonFly: src/sys/platform/pc64/include/ipl.h,v 1.1 2008/08/29 17:07:17 dillon Exp $ */ -#include +#ifndef _MACHINE_IPL_H_ +#define _MACHINE_IPL_H_ -/* - * Global console locking functions - */ -void -cons_lock(void) -{ -} +#include +#include -void -cons_unlock(void) -{ -} +#endif diff --git a/sys/platform/pc64/include/lock.h b/sys/platform/pc64/include/lock.h index 430f3f672d..40522086f0 100644 --- a/sys/platform/pc64/include/lock.h +++ b/sys/platform/pc64/include/lock.h @@ -1,5 +1,7 @@ /* - * Copyright (c) 2003 Matthew Dillon, All rights reserved. + * Copyright (c) 2003,2008 The DragonFly Project. + * Copyright (c) 2003 Matthew Dillon. + * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -22,7 +24,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/include/lock.h,v 1.11.2.2 2000/09/30 02:49:34 ps Exp $ - * $DragonFly: src/sys/platform/pc64/include/lock.h,v 1.3 2008/06/19 21:32:55 aggelos Exp $ + * $DragonFly: src/sys/platform/pc64/include/lock.h,v 1.4 2008/08/29 17:07:17 dillon Exp $ */ #ifndef _MACHINE_LOCK_H_ @@ -42,7 +44,7 @@ #ifdef LOCORE /* - * Spinlock assembly support. Note: eax and ecx can be tromped. No + * Spinlock assembly support. Note: rax and rcx can be tromped. No * other register will be. Note that these routines are sometimes * called with (%edx) as the mem argument. * @@ -54,30 +56,30 @@ #ifdef SMP #define SPIN_INIT(mem) \ - movl $0,mem ; \ + movq $0,mem ; \ #define SPIN_INIT_NOREG(mem) \ SPIN_INIT(mem) ; \ #define SPIN_LOCK(mem) \ - pushfl ; \ - popl %ecx ; /* flags */ \ + pushfq ; \ + popq %rcx ; /* flags */ \ cli ; \ - orl $PSL_C,%ecx ; /* make sure non-zero */ \ + orl $PSL_C,%rcx ; /* make sure non-zero */ \ 7: ; \ - movl $0,%eax ; /* expected contents of lock */ \ - lock cmpxchgl %ecx,mem ; /* Z=1 (jz) on success */ \ + movq $0,%rax ; /* expected contents of lock */ \ + lock cmpxchgq %rcx,mem ; /* Z=1 (jz) on success */ \ jnz 7b ; \ #define SPIN_LOCK_PUSH_REGS \ - subl $8,%esp ; \ - movl %ecx,(%esp) ; \ - movl %eax,4(%esp) ; \ + subq $2*8,%rsp ; \ + movq %rcx,(%rsp) ; \ + movq %rax,8(%rsp) ; \ #define SPIN_LOCK_POP_REGS \ - movl (%esp),%ecx ; \ - movl 4(%esp),%eax ; \ - addl $8,%esp ; \ + movq (%rsp),%rcx ; \ + movq 8(%rsp),%rax ; \ + addq $2*8,%rsp ; \ #define SPIN_LOCK_FRAME_SIZE 8 @@ -87,9 +89,9 @@ SPIN_LOCK_POP_REGS ; \ #define SPIN_UNLOCK(mem) \ - pushl mem ; \ - movl $0,mem ; \ - popfl ; \ + pushq mem ; \ + movq $0,mem ; \ + popfq ; \ #define SPIN_UNLOCK_PUSH_REGS #define SPIN_UNLOCK_POP_REGS @@ -98,22 +100,22 @@ #define SPIN_UNLOCK_NOREG(mem) \ SPIN_UNLOCK(mem) ; \ -#else +#else /* !SMP */ #define SPIN_LOCK(mem) \ - pushfl ; \ + pushfq ; \ cli ; \ - orl $PSL_C,(%esp) ; \ - popl mem ; \ + orq $PSL_C,(%rsp) ; \ + popq mem ; \ #define SPIN_LOCK_PUSH_RESG #define SPIN_LOCK_POP_REGS #define SPIN_LOCK_FRAME_SIZE 0 #define SPIN_UNLOCK(mem) \ - pushl mem ; \ - movl $0,mem ; \ - popfl ; \ + pushq mem ; \ + movq $0,mem ; \ + popfq ; \ #define SPIN_UNLOCK_PUSH_REGS #define SPIN_UNLOCK_POP_REGS @@ -130,7 +132,7 @@ * to disable/restore interrupts even if it doesn't spin. */ struct spinlock_deprecated { - volatile int opaque; + volatile long opaque; }; typedef struct spinlock_deprecated *spinlock_t; diff --git a/sys/platform/pc64/include/md_var.h b/sys/platform/pc64/include/md_var.h index 1ae1145233..288c1eae41 100644 --- a/sys/platform/pc64/include/md_var.h +++ b/sys/platform/pc64/include/md_var.h @@ -1,5 +1,6 @@ /*- * Copyright (c) 1995 Bruce D. Evans. + * Copyright (c) 2008 The DragonFly Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -27,12 +28,14 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/amd64/include/md_var.h,v 1.71 2004/01/29 00:05:03 peter Exp $ - * $DragonFly: src/sys/platform/pc64/include/md_var.h,v 1.4 2007/12/12 23:49:23 dillon Exp $ + * $DragonFly: src/sys/platform/pc64/include/md_var.h,v 1.5 2008/08/29 17:07:17 dillon Exp $ */ #ifndef _MACHINE_MD_VAR_H_ #define _MACHINE_MD_VAR_H_ +#include + /* * Miscellaneous machine-dependent declarations. */ @@ -41,10 +44,13 @@ extern u_long atdevbase; /* offset in virtual memory of ISA io mem */ extern u_int basemem; extern int busdma_swi_pending; extern u_int cpu_exthigh; +extern u_int amd_feature; +extern u_int amd_feature2; extern u_int cpu_fxsr; extern u_int cpu_high; extern u_int cpu_id; extern u_int cpu_procinfo; +extern u_int cpu_procinfo2; extern char cpu_vendor[]; extern char kstack[]; extern char sigcode[]; @@ -58,6 +64,8 @@ struct dbreg; struct __mcontext; void busdma_swi(void); +void cpu_gdinit (struct mdglobaldata *gd, int cpu); +void cpu_idle_restore (void); /* cannot be called from C */ void cpu_setregs(void); void doreti_iret(void) __asm(__STRING(doreti_iret)); void doreti_iret_fault(void) __asm(__STRING(doreti_iret_fault)); @@ -65,6 +73,7 @@ void enable_sse(void); void fillw(int /*u_short*/ pat, void *base, size_t cnt); void pagezero(void *addr); int isa_nmi(int cd); +void pagecopy(void *from, void *to); void setidt(int idx, alias_for_inthand_t *func, int typ, int dpl, int ist); int user_dbreg_trap(void); void fpstate_drop(struct thread *td); diff --git a/sys/platform/pc64/include/pcb_ext.h b/sys/platform/pc64/include/metadata.h similarity index 55% copy from sys/platform/pc64/include/pcb_ext.h copy to sys/platform/pc64/include/metadata.h index ed044ea65b..96d4d2475c 100644 --- a/sys/platform/pc64/include/pcb_ext.h +++ b/sys/platform/pc64/include/metadata.h @@ -1,5 +1,6 @@ /*- - * Copyright (c) 1997 Jonathan Lemon + * Copyright (c) 2003 Peter Wemm + * Copyright (c) 2008 The DragonFly Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -23,50 +24,13 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $FreeBSD: src/sys/i386/include/pcb_ext.h,v 1.4 1999/12/29 04:33:04 peter Exp $ - * $DragonFly: src/sys/platform/pc64/include/pcb_ext.h,v 1.2 2007/09/24 03:24:45 yanyh Exp $ + * $FreeBSD$ + * $DragonFly: src/sys/platform/pc64/include/metadata.h,v 1.1 2008/08/29 17:07:17 dillon Exp $ */ -#ifndef _MACHINE_PCB_EXT_H_ -#define _MACHINE_PCB_EXT_H_ +#ifndef _MACHINE_METADATA_H_ +#define _MACHINE_METADATA_H_ -#ifndef _SYS_TYPES_H_ -#include -#endif +#define MODINFOMD_SMAP 0x1001 -/* - * Extension to the 386 process control block - */ -#ifndef _MACHINE_TSS_H_ -#include -#endif -#ifndef _MACHINE_SEGMENTS_H_ -#include -#endif - -struct pcb_ext { - struct segment_descriptor ext_tssd; /* tss descriptor */ - struct amd64tss ext_tss; /* per-process amd64tss */ - caddr_t ext_iomap; /* i/o permission bitmap */ - /* struct vm86_kernel ext_vm86; */ /* vm86 area */ -}; - -struct pcb_ldt { - caddr_t ldt_base; - int ldt_len; - int ldt_refcnt; - u_long ldt_active; - struct segment_descriptor ldt_sd; -}; - -#ifdef _KERNEL - -struct pcb; - -void set_user_ldt (struct pcb *); -struct pcb_ldt *user_ldt_alloc (struct pcb *, int); -void user_ldt_free (struct pcb *); - -#endif - -#endif /* _MACHINE_PCB_EXT_H_ */ +#endif /* !_MACHINE_METADATA_H_ */ diff --git a/sys/platform/pc64/include/pcb_ext.h b/sys/platform/pc64/include/nexusvar.h similarity index 56% copy from sys/platform/pc64/include/pcb_ext.h copy to sys/platform/pc64/include/nexusvar.h index ed044ea65b..3eeea71b34 100644 --- a/sys/platform/pc64/include/pcb_ext.h +++ b/sys/platform/pc64/include/nexusvar.h @@ -1,5 +1,6 @@ /*- - * Copyright (c) 1997 Jonathan Lemon + * Copyright (c) 2000 Peter Wemm + * Copyright (c) 2008 The DragonFly Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -23,50 +24,46 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $FreeBSD: src/sys/i386/include/pcb_ext.h,v 1.4 1999/12/29 04:33:04 peter Exp $ - * $DragonFly: src/sys/platform/pc64/include/pcb_ext.h,v 1.2 2007/09/24 03:24:45 yanyh Exp $ + * $FreeBSD: src/sys/i386/include/nexusvar.h,v 1.1 2000/09/28 00:37:31 peter Exp $ + * $DragonFly: src/sys/platform/pc64/include/nexusvar.h,v 1.1 2008/08/29 17:07:17 dillon Exp $ */ -#ifndef _MACHINE_PCB_EXT_H_ -#define _MACHINE_PCB_EXT_H_ +#ifndef _MACHINE_NEXUSVAR_H_ +#define _MACHINE_NEXUSVAR_H_ + +#if defined(_KERNEL) || defined(_KERNEL_STRUCTURES) #ifndef _SYS_TYPES_H_ #include #endif - -/* - * Extension to the 386 process control block - */ -#ifndef _MACHINE_TSS_H_ -#include -#endif -#ifndef _MACHINE_SEGMENTS_H_ -#include +#ifndef _SYS_CONF_H_ +#include #endif -struct pcb_ext { - struct segment_descriptor ext_tssd; /* tss descriptor */ - struct amd64tss ext_tss; /* per-process amd64tss */ - caddr_t ext_iomap; /* i/o permission bitmap */ - /* struct vm86_kernel ext_vm86; */ /* vm86 area */ +enum nexus_device_ivars { + NEXUS_IVAR_PCIBUS }; -struct pcb_ldt { - caddr_t ldt_base; - int ldt_len; - int ldt_refcnt; - u_long ldt_active; - struct segment_descriptor ldt_sd; -}; +#define NEXUS_ACCESSOR(A, B, T) \ + \ +static __inline T nexus_get_ ## A(device_t dev) \ +{ \ + uintptr_t v; \ + BUS_READ_IVAR(device_get_parent(dev), dev, NEXUS_IVAR_ ## B, &v); \ + return (T) v; \ +} \ + \ +static __inline void nexus_set_ ## A(device_t dev, T t) \ +{ \ + uintptr_t v = (uintptr_t) t; \ + BUS_WRITE_IVAR(device_get_parent(dev), dev, NEXUS_IVAR_ ## B, v); \ +} #ifdef _KERNEL - -struct pcb; - -void set_user_ldt (struct pcb *); -struct pcb_ldt *user_ldt_alloc (struct pcb *, int); -void user_ldt_free (struct pcb *); - +NEXUS_ACCESSOR(pcibus, PCIBUS, u_int32_t) #endif -#endif /* _MACHINE_PCB_EXT_H_ */ +#undef NEXUS_ACCESSOR + +#endif /* _KERNEL || _KERNEL_STRUCTURES */ +#endif /* !_MACHINE_NEXUSVAR_H_ */ diff --git a/sys/platform/pc64/include/param.h b/sys/platform/pc64/include/param.h index 2e0ee72e51..c832e947ee 100644 --- a/sys/platform/pc64/include/param.h +++ b/sys/platform/pc64/include/param.h @@ -1,5 +1,5 @@ /* - * $DragonFly: src/sys/platform/pc64/include/param.h,v 1.1 2007/09/23 04:42:07 yanyh Exp $ + * $DragonFly: src/sys/platform/pc64/include/param.h,v 1.2 2008/08/29 17:07:17 dillon Exp $ */ #ifndef _MACHINE_PARAM_H_ @@ -22,5 +22,10 @@ #include +/* JG from fbsd/sys/amd64/include/param.h */ +#ifndef KSTACK_PAGES +#define KSTACK_PAGES 4 /* pages of kstack (with pcb) */ +#endif + #endif diff --git a/sys/platform/pc64/include/pc/bios.h b/sys/platform/pc64/include/pc/bios.h new file mode 100644 index 0000000000..86801857b8 --- /dev/null +++ b/sys/platform/pc64/include/pc/bios.h @@ -0,0 +1,77 @@ +/*- + * Copyright (c) 1997 Michael Smith + * Copyright (c) 1998 Jonathan Lemon + * Copyright (c) 2008 The DragonFly Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + * $DragonFly: src/sys/platform/pc64/include/pc/bios.h,v 1.1 2008/08/29 17:07:18 dillon Exp $ + */ + +#ifndef _MACHINE_PC_BIOS_H_ +#define _MACHINE_PC_BIOS_H_ + +extern u_int32_t bios_sigsearch(u_int32_t start, u_char *sig, int siglen, + int paralen, int sigofs); + +#define BIOS_PADDRTOVADDR(x) ((x) + KERNBASE) +#define BIOS_VADDRTOPADDR(x) ((x) - KERNBASE) + +/* + * Int 15:E820 'SMAP' structure + */ + +#define SMAP_SIG 0x534D4150 /* 'SMAP' */ + +#define SMAP_TYPE_MEMORY 1 +#define SMAP_TYPE_RESERVED 2 +#define SMAP_TYPE_ACPI_RECLAIM 3 +#define SMAP_TYPE_ACPI_NVS 4 +#define SMAP_TYPE_ACPI_ERROR 5 + +struct bios_smap { + u_int64_t base; + u_int64_t length; + u_int32_t type; +} __packed; + +struct bios_oem_signature { + char * anchor; /* search anchor string in BIOS memory */ + size_t offset; /* offset from anchor (may be negative) */ + size_t totlen; /* total length of BIOS string to copy */ +} __packed; +struct bios_oem_range { + u_int from; /* shouldn't be below 0xe0000 */ + u_int to; /* shouldn't be above 0xfffff */ +} __packed; +struct bios_oem { + struct bios_oem_range range; + struct bios_oem_signature signature[]; +} __packed; + +extern int +bios_oem_strings(struct bios_oem *oem, u_char *buffer, size_t maxlen); + + +#endif /* _MACHINE_PC_BIOS_H_ */ diff --git a/sys/platform/pc64/include/pc/display.h b/sys/platform/pc64/include/pc/display.h new file mode 100644 index 0000000000..14c129708b --- /dev/null +++ b/sys/platform/pc64/include/pc/display.h @@ -0,0 +1,46 @@ +/* + * IBM PC display definitions + * + * $FreeBSD: src/sys/i386/include/pc/display.h,v 1.5.2.1 2001/12/17 10:31:05 nyan Exp $ + * $DragonFly: src/sys/platform/pc64/include/pc/display.h,v 1.1 2008/08/29 17:07:18 dillon Exp $ + */ + +/* Color attributes for foreground text */ + +#define FG_BLACK 0 +#define FG_BLUE 1 +#define FG_GREEN 2 +#define FG_CYAN 3 +#define FG_RED 4 +#define FG_MAGENTA 5 +#define FG_BROWN 6 +#define FG_LIGHTGREY 7 +#define FG_DARKGREY 8 +#define FG_LIGHTBLUE 9 +#define FG_LIGHTGREEN 10 +#define FG_LIGHTCYAN 11 +#define FG_LIGHTRED 12 +#define FG_LIGHTMAGENTA 13 +#define FG_YELLOW 14 +#define FG_WHITE 15 +#define FG_BLINK 0x80 + +/* Color attributes for text background */ + +#define BG_BLACK 0x00 +#define BG_BLUE 0x10 +#define BG_GREEN 0x20 +#define BG_CYAN 0x30 +#define BG_RED 0x40 +#define BG_MAGENTA 0x50 +#define BG_BROWN 0x60 +#define BG_LIGHTGREY 0x70 + +/* Monochrome attributes for foreground text */ + +#define FG_UNDERLINE 0x01 +#define FG_INTENSE 0x08 + +/* Monochrome attributes for text background */ + +#define BG_INTENSE 0x10 diff --git a/sys/platform/pc64/include/pcb.h b/sys/platform/pc64/include/pcb.h index 7866630d5d..4656a9d604 100644 --- a/sys/platform/pc64/include/pcb.h +++ b/sys/platform/pc64/include/pcb.h @@ -1,6 +1,7 @@ /*- - * Copyright (c) 2003 Peter Wemm. * Copyright (c) 1990 The Regents of the University of California. + * Copyright (c) 2003 Peter Wemm. + * Copyright (c) 2008 The DragonFly Project. * All rights reserved. * * This code is derived from software contributed to Berkeley by @@ -36,7 +37,7 @@ * * from: @(#)pcb.h 5.10 (Berkeley) 5/12/91 * $FreeBSD: src/sys/amd64/include/pcb.h,v 1.57 2004/01/28 23:54:31 peter Exp $ - * $DragonFly: src/sys/platform/pc64/include/pcb.h,v 1.2 2007/09/23 04:29:31 yanyh Exp $ + * $DragonFly: src/sys/platform/pc64/include/pcb.h,v 1.3 2008/08/29 17:07:17 dillon Exp $ */ #ifndef _MACHINE_PCB_H_ @@ -63,6 +64,7 @@ struct pcb { register_t pcb_rflags; register_t pcb_fsbase; register_t pcb_gsbase; + u_long pcb_flags; u_int32_t pcb_ds; u_int32_t pcb_es; u_int32_t pcb_fs; @@ -76,7 +78,6 @@ struct pcb { struct pcb_ldt *pcb_ldt; union savefpu pcb_save; - u_long pcb_flags; #define PCB_DBREGS 0x02 /* process using debug registers */ #define PCB_FPUINITDONE 0x08 /* fpu state is initialized */ #define PCB_FULLCTX 0x80 /* full context restore on sysret */ diff --git a/sys/platform/pc64/include/pcb_ext.h b/sys/platform/pc64/include/pcb_ext.h index ed044ea65b..71effadd64 100644 --- a/sys/platform/pc64/include/pcb_ext.h +++ b/sys/platform/pc64/include/pcb_ext.h @@ -1,5 +1,6 @@ /*- * Copyright (c) 1997 Jonathan Lemon + * Copyright (c) 2008 The DragonFly Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -24,7 +25,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/include/pcb_ext.h,v 1.4 1999/12/29 04:33:04 peter Exp $ - * $DragonFly: src/sys/platform/pc64/include/pcb_ext.h,v 1.2 2007/09/24 03:24:45 yanyh Exp $ + * $DragonFly: src/sys/platform/pc64/include/pcb_ext.h,v 1.3 2008/08/29 17:07:17 dillon Exp $ */ #ifndef _MACHINE_PCB_EXT_H_ @@ -45,18 +46,18 @@ #endif struct pcb_ext { - struct segment_descriptor ext_tssd; /* tss descriptor */ + struct user_segment_descriptor ext_tssd; /* tss descriptor */ struct amd64tss ext_tss; /* per-process amd64tss */ caddr_t ext_iomap; /* i/o permission bitmap */ - /* struct vm86_kernel ext_vm86; */ /* vm86 area */ }; +/* JG remove this structure? */ struct pcb_ldt { caddr_t ldt_base; int ldt_len; int ldt_refcnt; u_long ldt_active; - struct segment_descriptor ldt_sd; + struct user_segment_descriptor ldt_sd; }; #ifdef _KERNEL diff --git a/sys/cpu/amd64/include/pmap.h b/sys/platform/pc64/include/pmap.h similarity index 54% copy from sys/cpu/amd64/include/pmap.h copy to sys/platform/pc64/include/pmap.h index 5946d823a6..36e8971009 100644 --- a/sys/cpu/amd64/include/pmap.h +++ b/sys/platform/pc64/include/pmap.h @@ -1,7 +1,12 @@ /* - * Copyright (c) 2003 Matthew Dillon + * Copyright (c) 1991 Regents of the University of California. + * Copyright (c) 2008 The DragonFly Project. * All rights reserved. * + * This code is derived from software contributed to Berkeley by + * the Systems Programming Group of the University of Utah Computer + * Science Department and William Jolitz of UUNET Technologies Inc. + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: @@ -10,11 +15,18 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) @@ -23,108 +35,46 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/cpu/amd64/include/pmap.h,v 1.2 2007/09/23 04:29:30 yanyh Exp $ - */ -#ifndef _CPU_PMAP_H_ -#define _CPU_PMAP_H_ - -/* - * A four level page table is implemented by the amd64 hardware. Each - * page table represents 9 address bits and eats 4KB of space. There are - * 512 8-byte entries in each table. The last page table contains PTE's - * representing 4K pages (12 bits of address space). - * - * The page tables are named: - * PML4 Represents 512GB per entry (256TB total) LEVEL4 - * PDP Represents 1GB per entry LEVEL3 - * PDE Represents 2MB per entry LEVEL2 - * PTE Represents 4KB per entry LEVEL1 + * Derived from hp300 version by Mike Hibler, this version by William + * Jolitz uses a recursive map [a pde points to the page directory] to + * map the page tables using the pagetables themselves. This is done to + * reduce the impact on kernel virtual memory for lots of sparse address + * space, and to reduce the cost of memory to each process. * - * PG_PAE PAE 2MB extension. In the PDE. If 0 there is another level - * of page table and PG_D and PG_G are ignored. If 1 this is - * the terminating page table and PG_D and PG_G apply. - * - * PG_PWT Page write through. If 1 caching is disabled for data - * represented by the page. - * PG_PCD Page Cache Disable. If 1 the page table entry will not - * be cached in the data cache. - * - * Each entry in the PML4 table represents a 512GB VA space. We use a fixed - * PML4 and adjust entries within it to switch user spaces. - */ - -#define PG_V 0x0001LL /* P Present */ -#define PG_RW 0x0002LL /* R/W Writable */ -#define PG_U 0x0004LL /* U/S User */ -#define PG_PWT 0x0008LL /* PWT Page Write Through */ -#define PG_PCD 0x0010LL /* PCD Page Cache Disable */ -#define PG_A 0x0020LL /* A Accessed */ -#define PG_D 0x0040LL /* D Dirty (pte only) */ -#define PG_PS 0x0080LL /* PAT (pte only) */ -#define PG_G 0x0100LL /* G Global (pte only) */ -#define PG_USR0 0x0200LL /* available to os */ -#define PG_USR1 0x0400LL /* available to os */ -#define PG_USR2 0x0800LL /* available to os */ -#define PG_PTE_PAT PG_PAE /* PAT bit for 4K pages */ -#define PG_PDE_PAT 0x1000LL /* PAT bit for 2M pages */ -#define PG_FRAME 0x000000FFFFFF0000LL /* 40 bit phys address */ -#define PG_PHYSRESERVED 0x000FFF0000000000LL /* reserved for future PA */ -#define PG_USR3 0x0010000000000000LL /* avilable to os */ - -/* - * OS assignments - */ -#define PG_W PG_USR0 /* Wired */ -#define PG_MANAGED PG_USR1 /* Managed */ -#define PG_PROT (PG_RW|PG_U) /* all protection bits . */ -#define PG_N (PG_PWT|PG_PCD) /* Non-cacheable */ - -/* - * Page Protection Exception bits + * from: hp300: @(#)pmap.h 7.2 (Berkeley) 12/16/90 + * from: @(#)pmap.h 7.4 (Berkeley) 5/12/91 + * $FreeBSD: src/sys/i386/include/pmap.h,v 1.65.2.3 2001/10/03 07:15:37 peter Exp $ + * $DragonFly: src/sys/platform/pc64/include/pmap.h,v 1.1 2008/08/29 17:07:17 dillon Exp $ */ -#define PGEX_P 0x01 /* Protection violation vs. not present */ -#define PGEX_W 0x02 /* during a Write cycle */ -#define PGEX_U 0x04 /* access from User mode (UPL) */ +#ifndef _MACHINE_PMAP_H_ +#define _MACHINE_PMAP_H_ -#define PGEX_MAILBOX 0x40 -#define PGEX_FPFAULT 0x80 +#include /* - * User space is limited to one PML4 entry (512GB). Kernel space is also - * limited to one PML4 entry. Other PML4 entries are used to map foreign - * user spaces into KVM. Typically each cpu in the system reserves two - * PML4 entries for private use. + * Size of Kernel address space. This is the number of page table pages + * (2MB each) to use for the kernel. 256 pages == 512 Megabyte. + * This **MUST** be a multiple of 4 (eg: 252, 256, 260, etc). */ -#define UVA_MAXMEM (512LL*1024*1024*1024) -#define KVA_MAXMEM (512LL*1024*1024*1024) +#ifndef KVA_PAGES +#define KVA_PAGES 256 +#endif /* - * Pte related macros. This is complicated by having to deal with - * the sign extension of the 48th bit. + * Pte related macros */ -#define KVADDR(l4, l3, l2, l1) ( \ - ((unsigned long)-1 << 47) | \ - ((unsigned long)(l4) << PML4SHIFT) | \ - ((unsigned long)(l3) << PDPSHIFT) | \ - ((unsigned long)(l2) << PDRSHIFT) | \ - ((unsigned long)(l1) << PAGE_SHIFT)) - -#define UVADDR(l4, l3, l2, l1) ( \ - ((unsigned long)(l4) << PML4SHIFT) | \ - ((unsigned long)(l3) << PDPSHIFT) | \ - ((unsigned long)(l2) << PDRSHIFT) | \ - ((unsigned long)(l1) << PAGE_SHIFT)) - - -#define NKPML4E 1 -#define NKPDPE 1 -#define NKPDE (NKPDPE*NPDEPG) - -#define NUPML4E (NPML4EPG/2) -#define NUPDPE (NUPML4E*NPDPEPG) -#define NUPDE (NUPDPE*NPDEPG) +#define VADDR(pdi, pti) ((vm_offset_t)(((pdi)< KVA_PAGES - 2 +#error "Maximum NKPDE is KVA_PAGES - 2" +#endif /* * The *PTDI values control the layout of virtual memory @@ -141,9 +91,8 @@ #define UMAXPTDI (PTDPTDI-1) /* ptd entry for user space end */ #define UMAXPTEOFF (NPTEPG) /* pte entry for user space end */ -#define KPML4I (NPML4EPG-1) - -#define KPDPI (NPDPEPG-2) +#define LINKPML4I 0 +#define LINKPDPI 0 /* * XXX doesn't really belong here I guess... @@ -153,7 +102,18 @@ #ifndef LOCORE +#ifndef _SYS_TYPES_H_ +#include +#endif +#ifndef _SYS_QUEUE_H_ #include +#endif +#ifndef _MACHINE_TYPES_H_ +#include +#endif +#ifndef _MACHINE_PARAM_H_ +#include +#endif /* * Address of current and alternate address space page table maps @@ -163,7 +123,11 @@ extern pt_entry_t PTmap[], APTmap[], Upte; extern pd_entry_t PTD[], APTD[], PTDpde, APTDpde, Upde; -extern pd_entry_t IdlePTD; /* physical address of "Idle" state directory */ +extern uint64_t IdlePTD; /* physical address of "Idle" state directory */ + +extern uint64_t common_lvl4_phys; +extern uint64_t common_lvl3_phys; +extern pdp_entry_t *link_pdpe; #endif #ifdef _KERNEL @@ -173,9 +137,9 @@ extern pd_entry_t IdlePTD; /* physical address of "Idle" state directory */ * Note: these work recursively, thus vtopte of a pte will give * the corresponding pde that in turn maps it. */ -#define vtopte(va) (PTmap + i386_btop(va)) +#define vtopte(va) (PTmap + amd64_btop(va)) -#define avtopte(va) (APTmap + i386_btop(va)) +#define avtopte(va) (APTmap + amd64_btop(va)) /* * Routine: pmap_kextract @@ -203,14 +167,16 @@ pmap_kextract(vm_offset_t va) #define vtophys(va) pmap_kextract(((vm_offset_t)(va))) #define vtophys_pte(va) ((pt_entry_t)pmap_kextract(((vm_offset_t)(va)))) -#define avtophys(va) (((vm_offset_t) (*avtopte(va))&PG_FRAME) | ((vm_offset_t)(va) & PAGE_MASK)) - #endif +#define pte_load_clear(pte) atomic_readandclear_long(pte) + /* * Pmap stuff */ -struct pv_entry; +struct pv_entry; +struct vm_page; +struct vm_object; struct md_page { int pv_list_count; @@ -222,6 +188,9 @@ struct md_page { * keep certain statistics. They may do this anyway they * so choose, but are expected to return the statistics * in the following structure. + * + * NOTE: We try to match the size of the pc32 pmap with the vkernel pmap + * so the same utilities (like 'ps') can be used on both. */ struct pmap_statistics { long resident_count; /* # of pages mapped (total) */ @@ -229,17 +198,18 @@ struct pmap_statistics { }; typedef struct pmap_statistics *pmap_statistics_t; -struct vm_object; -struct vm_page; - struct pmap { pd_entry_t *pm_pdir; /* KVA of page directory */ + struct vm_page *pm_pdirm; /* VM page for pg directory */ struct vm_object *pm_pteobj; /* Container for pte's */ + TAILQ_ENTRY(pmap) pm_pmnode; /* list of pmaps */ TAILQ_HEAD(,pv_entry) pm_pvlist; /* list of mappings in pmap */ int pm_count; /* reference count */ cpumask_t pm_active; /* active on cpus */ + int pm_filler02; /* (filler sync w/vkernel) */ struct pmap_statistics pm_stats; /* pmap statistics */ struct vm_page *pm_ptphint; /* pmap ptp hint */ + int pm_generation; /* detect pvlist deletions */ }; #define pmap_resident_count(pmap) (pmap)->pm_stats.resident_count @@ -247,12 +217,12 @@ struct pmap { typedef struct pmap *pmap_t; #ifdef _KERNEL -extern struct pmap kernel_pmap; +extern struct pmap kernel_pmap; #endif /* * For each vm_page_t, there is a list of all currently valid virtual - * mappings of that page. An entry is a pv_entry_t, the list is pv_list + * mappings of that page. An entry is a pv_entry_t, the list is pv_table. */ typedef struct pv_entry { pmap_t pv_pmap; /* pmap where mapping lies */ @@ -279,13 +249,11 @@ extern vm_paddr_t avail_start; extern vm_offset_t clean_eva; extern vm_offset_t clean_sva; extern char *ptvmmap; /* poor name! */ -extern vm_offset_t virtual_avail; -void pmap_bootstrap ( vm_paddr_t, vm_paddr_t); -pmap_t pmap_kernel (void); +void pmap_bootstrap ( vm_paddr_t *, vm_paddr_t); void *pmap_mapdev (vm_paddr_t, vm_size_t); void pmap_unmapdev (vm_offset_t, vm_size_t); -unsigned *pmap_pte (pmap_t, vm_offset_t) __pure2; +pt_entry_t *pmap_pte (pmap_t, vm_offset_t) __pure2; struct vm_page *pmap_use_pt (pmap_t, vm_offset_t); #ifdef SMP void pmap_set_opt (void); @@ -295,4 +263,4 @@ void pmap_set_opt (void); #endif /* !LOCORE */ -#endif /* !_CPU_PMAP_H_ */ +#endif /* !_MACHINE_PMAP_H_ */ diff --git a/sys/platform/pc64/include/thread.h b/sys/platform/pc64/include/thread.h index 4c31b5c4c5..cae597152e 100644 --- a/sys/platform/pc64/include/thread.h +++ b/sys/platform/pc64/include/thread.h @@ -1,5 +1,7 @@ /* - * Copyright (c) 2003 Matt Dillon , All rights reserved. + * Copyright (c) 2003 Matt Dillon + * Copyright (c) 2008 The DragonFly Project. + * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -24,7 +26,7 @@ * * Machine independant code should not directly include this file. * - * $DragonFly: src/sys/platform/pc64/include/thread.h,v 1.2 2007/09/23 04:29:31 yanyh Exp $ + * $DragonFly: src/sys/platform/pc64/include/thread.h,v 1.3 2008/08/29 17:07:17 dillon Exp $ */ #ifndef _MACHINE_THREAD_H_ @@ -37,7 +39,7 @@ struct md_thread { unsigned int mtd_cpl; union savefpu *mtd_savefpu; - struct savetls mtd_savetls; + struct savetls mtd_savetls; }; #ifdef _KERNEL @@ -68,7 +70,7 @@ _get_mycpu(void) { struct globaldata *gd; - __asm ("movq %%fs:globaldata,%0" : "=r" (gd) : "m"(__mycpu__dummy)); + __asm ("movq %%gs:globaldata,%0" : "=r" (gd) : "m"(__mycpu__dummy)); return(gd); } diff --git a/sys/platform/pc64/include/vmparam.h b/sys/platform/pc64/include/vmparam.h index 35c1d9ec36..98c11740b1 100644 --- a/sys/platform/pc64/include/vmparam.h +++ b/sys/platform/pc64/include/vmparam.h @@ -1,9 +1,8 @@ /*- * Copyright (c) 1990 The Regents of the University of California. - * All rights reserved. * Copyright (c) 1994 John S. Dyson - * All rights reserved. * Copyright (c) 2003 Peter Wemm + * Copyright (c) 2008 The DragonFly Project. * All rights reserved. * * This code is derived from software contributed to Berkeley by @@ -39,7 +38,7 @@ * * from: @(#)vmparam.h 5.9 (Berkeley) 5/12/91 * $FreeBSD: src/sys/amd64/include/vmparam.h,v 1.44 2003/12/07 04:51:04 alc Exp $ - * $DragonFly: src/sys/platform/pc64/include/vmparam.h,v 1.1 2007/08/21 19:45:45 corecode Exp $ + * $DragonFly: src/sys/platform/pc64/include/vmparam.h,v 1.2 2008/08/29 17:07:17 dillon Exp $ */ @@ -58,13 +57,13 @@ #define DFLDSIZ (128UL*1024*1024) /* initial data size limit */ #endif #ifndef MAXDSIZ -#define MAXDSIZ (256UL*1024*1024*1024) /* max data size */ +#define MAXDSIZ (256UL*1024*1024) /* max data size */ #endif #ifndef DFLSSIZ #define DFLSSIZ (8UL*1024*1024) /* initial stack size limit */ #endif #ifndef MAXSSIZ -#define MAXSSIZ (128UL*1024*1024*1024) /* max stack size */ +#define MAXSSIZ (128UL*1024*1024) /* max stack size */ #endif #ifndef SGROWSIZ #define SGROWSIZ (128UL*1024) /* amount to grow stack */ @@ -88,6 +87,14 @@ */ #define UMA_MD_SMALL_ALLOC +/* + * The number of PHYSSEG entries must be one greater than the number + * of phys_avail entries because the phys_avail entry that spans the + * largest physical address that is accessible by ISA DMA is split + * into two PHYSSEG entries. + */ +#define VM_PHYSSEG_MAX 31 + /* * Virtual addresses of things. Derived from the page directory and * page table indexes from pmap.h for precision. @@ -95,18 +102,22 @@ * messy at times, but hey, we'll do anything to save a page :-) */ -#define VM_MAX_KERNEL_ADDRESS KVADDR(KPML4I, NPDPEPG-1, NKPDE-1, NPTEPG-1) -#define VM_MIN_KERNEL_ADDRESS KVADDR(KPML4I, KPDPI, 0, 0) +#define VM_MAX_KERNEL_ADDRESS (1024UL * 1024 * 1024) +#define VM_MIN_KERNEL_ADDRESS (512UL * 1024 * 1024) #define DMAP_MIN_ADDRESS KVADDR(DMPML4I, 0, 0, 0) #define DMAP_MAX_ADDRESS KVADDR(DMPML4I+1, 0, 0, 0) -#define KERNBASE KVADDR(KPML4I, KPDPI, 0, 0) +#define KERNBASE (512 * 1024 * 1024) +#define PTOV_OFFSET KERNBASE + +#define KPT_MAX_ADDRESS VADDR(PTDPTDI, KPTDI+NKPT) +#define KPT_MIN_ADDRESS VADDR(PTDPTDI, KPTDI) -#define UPT_MAX_ADDRESS KVADDR(PML4PML4I, PML4PML4I, PML4PML4I, PML4PML4I) -#define UPT_MIN_ADDRESS KVADDR(PML4PML4I, 0, 0, 0) +#define UPT_MAX_ADDRESS VADDR(PTDPTDI, PTDPTDI) +#define UPT_MIN_ADDRESS VADDR(PTDPTDI, 0) -#define VM_MAXUSER_ADDRESS UVADDR(NUPML4E, 0, 0, 0) +#define VM_MAXUSER_ADDRESS UPT_MIN_ADDRESS #define USRSTACK VM_MAXUSER_ADDRESS @@ -114,7 +125,7 @@ #define VM_MIN_ADDRESS (0) #define VM_MIN_USER_ADDRESS ((vm_offset_t)0) -#define VM_MAX_USER_ADDRESS UVADDR(PTDPTDI, 0, 0, 0) +#define VM_MAX_USER_ADDRESS VM_MAXUSER_ADDRESS #define PHYS_TO_DMAP(x) ((x) | DMAP_MIN_ADDRESS) #define DMAP_TO_PHYS(x) ((x) & ~DMAP_MIN_ADDRESS) diff --git a/sys/platform/pc64/isa/README.le b/sys/platform/pc64/isa/README.le new file mode 100644 index 0000000000..95d664be73 --- /dev/null +++ b/sys/platform/pc64/isa/README.le @@ -0,0 +1,69 @@ +$FreeBSD: src/sys/i386/isa/README.le,v 1.6 1999/08/28 00:44:35 peter Exp $ +$DragonFly: src/sys/platform/pc64/isa/README.le,v 1.1 2008/08/29 17:07:19 dillon Exp $ + +---------------- + +This driver is in no way supported by Digital Equipment. See the +disclaimers in the sources for more. + +This driver supports all the DEC EtherWORKS III NICs (DE203, DE204, +and DE205) and the later DEC EtherWORKS II NICs (DE200, DE201, DE202, +DE422). DEPCA-style boards prior to the DE200 have not been tested +and may not work. + +This driver is not EISA aware. If you are using a DE422 or have +configured the EtherWORKS III in EISA mode, make sure you specify +the I/O port properly as this driver will *not* probe for it. +(The I/O port should be 0xNc00 where N is the EISA slot number). + +This driver does not yet use the full 128KB allowed by the DE422. +Someday, it might. For EtherWORKS III NICs, the driver will override +the EEPROM setting of MemoryMode and *always* use the 2K for best +results. + +The driver includes full support for both BPF and IP Multicast. + +[All paths are relative to the top of sys source area, usually +/usr/src/sys.] + +The following files need to be moved into their respective +directories: + + if_le.c --> i386/isa + am7990.h --> i386/isa/ic + lemac.h --> i386/isa/ic + +You will need to apply the patch provided in pat.files.i386 to +i386/conf/files.i386 file. + +After that is done you will need to edit your config file (in +i386/conf) and a line similar to: + +device le0 at isa? port 0x300 net irq 5 iomem 0xd0000 + +[The above line assumes the board is still at the factory defaults.] +Change the port, irq, and iomem value if needed to your configuration. + +Now you are ready to rebuild your kernel, reboot, and see if the +driver can configure your board. When the system boots, you will +hopefully something close to: + + EtherWORKS II: + + le0 at 0x300-0x30f irq 5 maddr 0xd0000 msize 65536 on isa + le0: DE202 ethernet address 08:00:2b:2d:c8:45 + bpf: le0 attached + + EtherWORKS III: + + le0 at 0x320-0x33f irq 5 maddr 0xd0000 msize 2048 on isa + le0: DE205-AB ethernet address 08:00:2b:bb:23:e0 + le0 attached + +in the startup log. If so, the board configured properly and +should be ready to use. + +-- +Mail: thomas@lkg.dec.com +URL: http://ftp.digital.com/~thomas/ + diff --git a/sys/platform/pc64/isa/README.stl b/sys/platform/pc64/isa/README.stl new file mode 100644 index 0000000000..cded0f24c4 --- /dev/null +++ b/sys/platform/pc64/isa/README.stl @@ -0,0 +1,530 @@ +$FreeBSD: src/sys/i386/isa/README.stl,v 1.3.6.2 2001/08/30 12:29:57 murray Exp $ +$DragonFly: src/sys/platform/pc64/isa/README.stl,v 1.1 2008/08/29 17:07:19 dillon Exp $ + +Stallion Multiport Serial Driver Readme +--------------------------------------- + +Version: 2.0.0 +Date: 22JAN98 +Author: Greg Ungerer (gerg@stallion.com) + + + +1. INTRODUCTION + +This is a set of FreeBSD drivers for most of the Stallion Technologies +range of multiport serial boards. + +This driver has not been developed by Stallion Technologies. I developed it +in my spare time in the hope that it would be useful. As such there is no +warranty or support of any form. What this means is that this driver is not +officially supported by Stallion Technologies, so don't ring their support +if you can't get it working. They will probably not be able to help you. +Instead email me if you have problems or bug reports and I will do what I +can... (Sorry to sound so heavy handed, but I need to stress that this driver +is not officially supported in any way.) + +This package actually contains two drivers. One is for the true Stallion +intelligent multiport boards, and the other is for the smart range of boards. + +All host driver source is included in this package, and is copyrighted under +a BSD style copyright. The board "firmware" code in this package is copyright +Stallion Technologies (the files cdk.sys and 2681.sys). + + +1.1 SMART MULTIPORT BOARD DRIVER + +This driver supports the EasyIO, EasyConnection 8/32 and EasyConnection +8/64-PCI range of boards. These boards are not classic intelligent multiport +boards, but are host based multiport boards that use Cirrus Logic CL-CD1400 +UART's, or on newer versions of the hardware use the Signetics 26C198 UART. +Both of these are high performance UART's with built in FIFO's, automatic +flow control and a host of other features. + +The EasyIO range of cards comes in 4 forms, the EasyIO-4, EasyIO-8, +EasyIO-8M and EasyIO-8-PCI. The first three are ISA based boards while +the last is a PCI bus board. All of these are non-expandable, low cost, +multiport boards with 4 or 8 RS-232C ports. Each ISA EasyIO board requires 8 +bytes of I/O address space and 1 interrupt. The PCI EasyIO board uses 64 +bytes of I/O address space and 1 interrupt. On EISA and PCI systems it is +possible to share 1 interrupt between multiple boards. The EasyIO-4 has 10 +pin RJ connectors, and the EasyIO-8 comes with a dongle cable with either 10 +pin RJ connectors or DB-25 connectors. The EasyIO-8M has 6 pin RJ connectors. + +The EasyConnection 8/32 family of boards is a relatively low cost modular +range of multiport serial boards. The EasyConnection 8/32 boards can be +configured to have from 8 to 32 serial ports by plugging in external serial +port modules that contain either 8 or 16 ports each. There is a wide range +of external modules available that offer: DB-25 connectors, RJ-45 connectors +(both with RS-232 D and E compatible drivers), and also RS-422 and RS-485 +ports. The EasyConnection 8/32 boards come in ISA, PCI and MCA bus versions. +The board takes the form of a host adapter card, with an external connector +cable that plugs into the external modules. The external modules just clip +together to add ports (BTW, they are NOT hot pluggable). Each ISA +EasyConnection 8/32 board requires two separate I/O address ranges, one two +bytes in size and a secondary region of 32 bytes. Each PCI EasyConnection +8/32 requires two regions of I/O address space, normally these will be +automatically allocated by the system BIOS at power on time. Each MCA +EasyConnection board requires one I/O address region 64 bytes in size. All +board types also require one interrupt. On EISA systems multiple boards can +share one interrupt. The secondary I/O range of the ISA board (the 32 byte +range) can be shared between multiple boards on any bus type. + +The EasyConnection 8/64-PCI family is similar to the EasyConnection 8/32-PCI +board, and uses the same external modules. It is supported by the smart +board driver - not the intelligent board driver. It uses 2 regions of I/O +address space, both 64 bytes in size, and 1 interrupt. + + +1.2 INTELLIGENT MULTIPORT BOARD DRIVER + +This driver is for Stallion's range of true intelligent multiport boards. +It supports the EasyConnection 8/64, ONboard and Brumby families of multiport +boards. The EasyConnection 8/64 and ONboard boards come in ISA, EISA and +Microchannel bus versions. The Brumby boards are only available in ISA +versions. This driver can also work with the original Stallion board, but +these are no longer supported by Stallion Technologies. + +The EasyConnection 8/64 family of boards is a medium cost, high performance, +modular range of intelligent multiport serial boards. The EasyConnection 8/64 +boards can be configured to have from 8 to 64 serial ports by plugging in +external serial port modules that contain either 8 or 16 ports each (these +modules are the same used by the EasyConnection 8/32 board). There is a wide +range of external modules available that offer: DB-25 connectors, RJ-45 +connectors (both with RS-232 D and E compatible drivers), and also RS-422 and +RS-485 ports. The board takes the form of a host adapter card, with an external +connector cable that plugs into the external modules. The external modules +just clip together to add ports (BTW, they are NOT hot pluggable). Each +EasyConnection 8/64 board requires 4 bytes of I/O address space and a region +of memory space. The size of the memory region required depends on the exact +board type. The EISA version requires 64 Kbytes of address space (that can +reside anywhere in the 4 Gigabyte physical address space). The ISA and MCA +boards require 4 Kbytes of address space (which must reside in the lower +1 Mbyte of physical address space - typically in the c8000 to e0000 range). +No interrupts are required. The physical memory region of multiple +EasyConnection 8/64 boards can be shared, but each board must have a separate +I/O address. + +The ONboard family of boards are traditional intelligent multiport serial +boards. They are Stallion's older range of boards with a limited expansion +capability. They come in 4, 8, 12, 16 and 32 port versions. The board uses +the same base card (which has 4 ports on it) and is expanded to more ports via +a mezzanine board that attaches directly onto the base card. External panels +plug into the ONboard providing RS-232C ports with DB-25 plugs. An RS-422 +DB-25 dual interface panel is also available. The ISA and microchannel +ONboards require 16 bytes of I/O address space and 64K bytes of memory +space. The memory space can be anywhere in the 16 Mbyte ISA bus address +range. No interrupt is required. The EISA ONboard requires 64 Kbytes of +memory space that can be anywhere in the 4 Gigabyte physical address space. +All ONboard boards can share their memory region with other ONboards (or +EasyConnection 8/64 boards). + +The Brumby family of boards are traditional, low cost intelligent multiport +serial boards. They are non-expandable and come in 4, 8 and 16 port versions. +They are only available for the ISA bus. The serial ports are all on DB-25 +"dongle" cables that attach to the rear of the board. Each Brumby board +requires 16 bytes of I/O address space and 16 Kbytes of memory space. No +interrupts are required. + +The original Stallion boards are old. They went out of production some years +back and are no longer supported. They offer limited expandability and are +available in 8 or 16 port configurations. An external panel houses 16 RS-232C +ports with DB-9 connectors. They require 16 bytes of I/O address space, and +either 64K or 128K of memory space. No interrupt is required. + +That's the boards supported by the second driver. The ONboard, Brumby and +Stallion boards are Stallion's older range of intelligent multiports - so +there are lots of them around. They only support a maximum baud rate of +38400. The EasyConnection 8/64 is a true high performance intelligent +multiport board, having much greater throughput than any of Stallion's +older boards. It also supports speeds up to 460800 baud. + + +1.3 HOW TO GET BOARDS + +Stallion Technologies has offices all over the world, as well as many more +distributors and resellers. To find out about local availability please +contact the nearest Stallion office and they can give you all the information +you need. Look in the "Offices" file in the driver package for a current list +of Stallion Technologies offices. + +Another good source of information about the Stallion range of boards and +local availability is on the Stallion Web page. Check it out at +http://www.stallion.com. + + + +2. INSTALLATION + +This driver, as is, will work on a FreeBSD 2.2.5 system. It will run on any +FreeBSD system version 2.0.5 and up, including -current version systems. +For systems other than 2.2.0 and 2.2.5 you will need to change the version +define in the driver source. Look for the symbol name VFREEBSD, then change +it to match the version number of your FreeBSD system (for example 2.2.5 is +225, 3.0.0 would be 300, etc). + +Recent versions of FreeBSD 2.1.5 and above include the Stallion drivers +in the distribution. You may still need to use this source for these systems. +If the code in this package is newer than the version enclosed on your +FreeBSD installation then you should use this source instead. + +You will need to build a new kernel to use this driver. So the first thing +you need is to have the full kernel source. Most people will have this +(I hope!). The following assumes that the kernel source is in /usr/src/sys. + +The drivers can support up to 8 boards. For the smart board driver any +combination of EasyIO, EasyConnection 8/32 and EasyConnection 8/64-PCI +boards can be installed. For the intelligent any combination of +EasyConnection 8/64 (ISA and EISA), ONboard, Brumby or original Stallion. +So there is a theoretical maximum of 512 ports. + +2.1 Instructions to install: + +1. Copy the driver source files into the kernel source tree. + + cp stallion.c istallion.c /usr/src/sys/i386/isa + cp cdk.h comstats.h /usr/src/sys/i386/include + cp scd1400.h sc26198.h /usr/src/sys/i386/isa/ic + + Note: if you are NOT using FreeBSD 2.2.5 then you will need to edit the + stallion.c and istallion.c files and change the VFREEBSD define to match + your version. This define is near the top of the file, and should be + easy to find. + +2. If you are using FreeBSD version 2.2.0 or above then jump to step 4 now. + + Add a character device switch table entry for the driver that you wish + to use into the cdevsw table structure. This involves adding some code + into the kernel conf.c file. + + If you are using an EasyIO, EasyConnection 8/32 or EasyConnection 8/64-PCI + then you need to use the stallion.c driver. All other board types + (EasyConnection 8/64 (ISA and EISA), ONboard, Brumby, Stallion) use the + istallion.c driver. You can also have a mix of boards using both drivers. + You will need to use a different major device number for the second driver + though (not the default 72 - see below for more details on this). + +2.1. If using the stallion.c driver then do: + + cd /usr/src/sys/i386/i386 + vi conf.c + - add the following lines (in 2.1.0 I put them at line 729): + +/* Stallion Multiport Serial Driver */ +#include "use_stl.h" +#if NSTL > 0 +d_open_t stlopen; +d_close_t stlclose; +d_read_t stlread; +d_write_t stlwrite; +d_ioctl_t stlioctl; +d_stop_t stlstop; +d_ttycv_t stldevtotty; +#define stlreset nxreset +#define stlmmap nxmmap +#define stlstrategy nxstrategy +#else +#define stlopen nxopen +#define stlclose nxclose +#define stlread nxread +#define stlwrite nxwrite +#define stlioctl nxioctl +#define stlstop nxstop +#define stlreset nxreset +#define stlmmap nxmmap +#define stlstrategy nxstrategy +#define stldevtotty nxdevtotty +#endif + + + - and then inside the actual cdevsw structure definition, at the + last entry add (this is now line 1384 in the 2.1 conf.c): + + { stlopen, stlclose, stlread, stlwrite, /*72*/ + stlioctl, stlstop, stlreset, stldevtotty,/*stallion*/ + ttselect, stlmmap, stlstrategy }, + + - the line above used major number 72, but this may be different + on your system. Take note of what major number you are using. + + - save the file and exit vi. + + +2.2. If using the istallion.c driver then do: + + cd /usr/src/sys/i386/i386 + vi conf.c + - add the following lines (in 2.1.0 I put them at line 729): + +/* Stallion Intelligent Multiport Serial Driver */ +#include "use_stl.h" +#if NSTL > 0 +d_open_t stliopen; +d_close_t stliclose; +d_read_t stliread; +d_write_t stliwrite; +d_ioctl_t stliioctl; +d_stop_t stlistop; +d_ttycv_t stlidevtotty; +#define stlireset nxreset +#define stlimmap nxmmap +#define stlistrategy nxstrategy +#else +#define stliopen nxopen +#define stliclose nxclose +#define stliread nxread +#define stliwrite nxwrite +#define stliioctl nxioctl +#define stlistop nxstop +#define stlireset nxreset +#define stlimmap nxmmap +#define stlistrategy nxstrategy +#define stlidevtotty nxdevtotty +#endif + + + - and then inside the actual cdevsw structure definition, at the + last entry add (this is now line 1384 in the 2.1 conf.c): + + { stliopen, stliclose, stliread, stliwrite, /*72*/ + stliioctl, stlistop, stlireset, stlidevtotty,/*istallion*/ + ttselect, stlimmap, stlistrategy }, + + - the line above used major number 72, but this may be different + on your system. Take note of what major number you are using. + + - save the file and exit vi. + +3. Add the driver source files to the kernel files list: + + cd /usr/src/sys/i386/conf + vi files.i386 + - add the following definition lines into the list (it is stored + alphabetically, so insert them appropriately): + +i386/isa/istallion.c optional stli device-driver + +i386/isa/stallion.c optional stl device-driver + + - save the file and exit vi. + +4. Add board probe entries into the kernel configuration file: + + cd /usr/src/sys/i386/conf + cp GENERIC MYKERNEL + - if you already have a kernel config that you use then you + could just use that (instead of MYKERNEL) + vi MYKERNEL + - if only using PCI boards then you don't need to enter a + configuration line, the kernel will automatically detect + the board at boot up, so skip to step 5. + - enter a line for each board that you want to use. For stallion.c + boards entries should look like: + +device stl0 at isa? port 0x2a0 tty irq 10 vector stlintr + + For istallion.c boards, the entries should look like: + +device stli0 at isa? port 0x2a0 tty iomem 0xcc000 iosiz 0x1000 flags 23 + + (I suggest you put them after the sio entries) + (Don't enter lines for PCI boards) + - change the entry resources as required. For the stallion.c + entries this may involve changing the port address or irq. + For the istallion.c entries this may involve changing the port + address, iomem address, iosiz value and the flags. Select from + the following table for appropriate flags and iosiz values for + your board type: + + EasyConnection 8/64 ISA: flags 23 iosiz 0x1000 + EasyConnection 8/64 EISA: flags 24 iosiz 0x10000 + EasyConnection 8/64 MCA: flags 25 iosiz 0x1000 + ONboard ISA: flags 4 iosiz 0x10000 + ONboard EISA: flags 7 iosiz 0x10000 + ONboard MCA: flags 3 iosiz 0x10000 + Brumby: flags 2 iosiz 0x4000 + Stallion: flags 1 iosiz 0x10000 + + - save the file and exit + - more detailed information about board configuration and + some helpful examples are contained in the driver manual + pages. + +5. Build a new kernel using this configuration. + + cd /usr/src/sys/i386/conf + config MYKERNEL + cd ../../compile/MYKERNEL + make depend + make all + make install + + +And there you have it! + +Once you have a new kernel built reboot to start it up. On startup the +Stallion board probes will report on whether the boards were found or not. +For each board found the driver will print out the type of board found, +and how many panels and ports it has. + +If a board is not found by the driver but is actually in the system then the +most likely problem is that the IO address is incorrect. The easiest thing to +do is change the DIP switches on the board to the desired address and reboot. + +On EasyIO and EasyConnection 8/32 boards the IRQ is software programmable, +so if there is a conflict you may need to change the IRQ used for a board in +the MYKERNEL configuration file and rebuild the kernel. + +Note that the secondary IO address of the EasyConnection 8/32 boards is hard +coded into the stallion.c driver code. It is currently set to IO address +0x280. If you need to use a different address then you will need to edit this +file and change the variable named stl_ioshared. + +On intelligent boards it is possible that the board shared memory region is +clashing with that of some other device. Check for this and change the device +or kernel configuration as required. + + +2.2 INTELLIGENT DRIVER OPERATION + +The intelligent boards also need to have their "firmware" code downloaded +to them. This is done via a user level application supplied in the driver +package called "stlload". Compile this program where ever you dropped the +package files, by typing "make". In its simplest form you can then type + ./stlload -i cdk.sys +in this directory and that will download board 0 (assuming board 0 is an +EasyConnection 8/64 board). To download to an ONboard, Brumby or Stallion do: + ./stlload -i 2681.sys + +Normally you would want all boards to be downloaded as part of the standard +system startup. To achieve this, add one of the lines above into the +/etc/rc.serial file. To download each board just add the "-b " +option to the line. You will need to download code for every board. You should +probably move the stlload program into a system directory, such as /usr/sbin. +Also, the default location of the cdk.sys image file in the stlload +down-loader is /usr/libdata/stallion. Create that directory and put the +cdk.sys and 2681.sys files in it. (It's a convenient place to put them +anyway). As an example your /etc/rc.serial file might have the following +lines added to it (if you had 3 boards): + /usr/sbin/stlload -b 0 -i /usr/libdata/stallion/cdk.sys + /usr/sbin/stlload -b 1 -i /usr/libdata/stallion/2681.sys + /usr/sbin/stlload -b 2 -i /usr/libdata/stallion/2681.sys + +The image files cdk.sys and 2681.sys are specific to the board types. The +cdk.sys will only function correctly on an EasyConnection 8/64 (ISA and EISA) +board. Similarly the 2681.sys image will only operate on ONboard, Brumby and +Stallion boards. If you load the wrong image file into a board it will fail +to start up, and of course the ports will not be operational! + + + +3. USING THE DRIVER + +Once the driver is installed you will need to setup some device nodes to +access the serial ports. Use the supplied "mkdevnods" script to automatically +create all required device entries for your boards. To make device nodes for +more than 1 board then just supply the number of boards you are using as a +command line parameter to mkdevnods and it will create nodes for that number +of boards. By default it will create device nodes for 1 board only. + +Note that if the driver is not installed at character major number 72 then +you will need to edit the mkdevnods script and modify the STL_SERIALMAJOR +variable to the major number you are using. + +Device nodes created for the normal serial port devices are named /dev/ttyEX +where X is the port number. (The second boards ports will start from ttyE64, +the third boards from ttyE128, etc). It will also create a set of modem call +out devices named cueX where again X is the port number. + +For the most part the Stallion driver tries to emulate the standard PC system +com ports and the standard sio serial driver. The idea is that you should +be able to use Stallion board ports and com ports inter-changeably without +modifying anything but the device name. Anything that doesn't work like that +should be considered a bug in this driver! + +Since this driver tries to emulate the standard serial ports as much as +possible then most system utilities should work as they do for the standard +com ports. Most importantly "stty" works as expected and "comcontrol" can be +used just like for the serial ports. + +This driver should work with anything that works on standard com serial ports. +Having said that, I have used it on at least the following types of "things" +under FreeBSD: + a) standard dumb terminals (using getty) + b) modems (using cu, etc) + c) ppp (through pppd, kernel ppp) + + + +4. NOTES + +Please email me any feedback on bugs, problems, or even good experiences +with these drivers! + +You can use both drivers at once if you have a mix of board types installed +in a system. On FreeBSD 2.1.5 and later systems each driver has been +assigned a different major number. On earlier FreeBSD versions you may +need to manually assign each driver a different major number. You will +also need to adjust the names of the device nodes for each board. To do +this modify the mkdevnods script to make device nodes based on those new +major numbers. For example, you could change the istallion.c driver to use +major number 75. You will also need to create device nodes with different +names for the ports, for eg ttyFXXX. + +Currently the intelligent board driver (istallion.c) does not have the +ability to share a boards memory region with other boards (you can only do +this on EasyConnection 8/64 and ONboards normally anyway). It also does +not currently support any memory address ranges above the low 1Mb region. +These will be fixed in a future release of the driver. + +Finding a free physical memory address range can be a problem. The older +boards like the Stallion and ONboard need large areas (64K or even 128K), so +they can be very difficult to get into a system. If you have 16 Mb of RAM +then you have no choice but to put them somewhere in the 640K -> 1Mb range. +ONboards require 64K, so typically 0xd0000 is good, or 0xe0000 on some +systems. If you have an original Stallion board, "V4.0" or Rev.O, then you +need a 64K memory address space, so again 0xd0000 and 0xe0000 are good. Older +Stallion boards are a much bigger problem. They need 128K of address space and +must be on a 128K boundary. If you don't have a VGA card then 0xc0000 might be +usable - there is really no other place you can put them below 1Mb. + +Both the ONboard and old Stallion boards can use higher memory addresses as +well, but you must have less than 16Mb of RAM to be able to use them. Usual +high memory addresses used include 0xec0000 and 0xf00000. + +The Brumby boards only require 16Kb of address space, so you can usually +squeeze them in somewhere. Common addresses are 0xc8000, 0xcc000, or in +the 0xd0000 range. EasyConnection 8/64 boards are even better, they only +require 4Kb of address space, again usually 0xc8000, 0xcc000 or 0xd0000 +are good. + +If you are using an EasyConnection 8/64-EI or ONboard/E then usually the +0xd0000 or 0xe0000 ranges are the best options below 1Mb. If neither of +them can be used then the high memory support to use the really high address +ranges is the best option. Typically the 2Gb range is convenient for them, +and gets them well out of the way. + +The ports of the EasyIO-8M board do not have DCD or DTR signals. So these +ports cannot be used as real modem devices. Generally when using these +ports you should only use the cueX devices. + +There is a utility in this package that reports statistics on the serial +ports. You will need to have the ncurses library installed on your system +to build it. + +To build the statistics display program type: + make stlstats +Once compiled simply run it (you will need to be root) and it will display +a port summary for the first board and panel installed. Use the digits to +select different board numbers, or 'n' to cycle through the panels on a +board. To look at detailed port information then hit 'p', that will display +detailed port 0 information. Use the digits and letters 'a' through 'f' to +select the different ports (on this board and panel). + + + +5. ACKNOWLEDGEMENTS + +This driver is loosely based on the code of the FreeBSD sio serial driver. +A big thanks to Stallion Technologies for the use of their equipment. + diff --git a/sys/platform/pc64/isa/asc.c b/sys/platform/pc64/isa/asc.c new file mode 100644 index 0000000000..50b8299d75 --- /dev/null +++ b/sys/platform/pc64/isa/asc.c @@ -0,0 +1,872 @@ +/* asc.c - device driver for hand scanners + * + * Current version supports: + * + * - AmiScan (Mustek) Color and BW hand scanners (GI1904 chipset) + * + * Copyright (c) 1995 Gunther Schadow. All rights reserved. + * Copyright (c) 1995,1996,1997 Luigi Rizzo. All rights reserved. + * Copyright (c) 2008 The DragonFly Project. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by Gunther Schadow + * and Luigi Rizzo. + * 4. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +/* + * $FreeBSD: src/sys/i386/isa/asc.c,v 1.42.2.2 2001/03/01 03:22:39 jlemon Exp $ + * $DragonFly: src/sys/platform/pc64/isa/asc.c,v 1.1 2008/08/29 17:07:19 dillon Exp $ + */ + +#include "use_asc.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include "ascreg.h" + +/*** + *** CONSTANTS & DEFINES + *** + ***/ + +#define PROBE_FAIL 0 +#define PROBE_SUCCESS IO_ASCSIZE +#define ATTACH_FAIL 0 +#define ATTACH_SUCCESS 1 +#define SUCCESS 0 +#define FAIL -1 +#define INVALID FAIL + +#define DMA1_READY 0x08 +#define ASCDEBUG +#ifdef ASCDEBUG +# define lprintf if(scu->flags & FLAG_DEBUG) kprintf +#else +# define lprintf (void) +#endif + +#define TIMEOUT (hz*15) /* timeout while reading a buffer - default value */ + +/*** + *** LAYOUT OF THE MINOR NUMBER + ***/ + +#define UNIT_MASK 0xc0 /* unit asc0 .. asc3 */ +#define UNIT(x) (x >> 6) +#define DBUG_MASK 0x20 +#define FRMT_MASK 0x18 /* output format */ +#define FRMT_RAW 0x00 /* output bits as read from scanner */ +#define FRMT_GRAY 0x1 /* output gray mode for color scanner */ +#define FRMT_PBM 0x08 /* output pbm format */ +#define FRMT_PGM 0x18 + +/*** + *** THE GEMOMETRY TABLE + ***/ + +#define GREY_LINE 826 /* 825, or 826 , or 550 ??? */ +static const struct asc_geom { + int dpi; /* dots per inch */ + int dpl; /* dots per line */ + int bpl; /* bytes per line */ + int g_res; /* get resolution value (ASC_STAT) */ +} geomtab[] = { + { 800, 3312, 414, ASC_RES_800}, + { 700, 2896, 362, ASC_RES_700}, + { 600, 2480, 310, ASC_RES_600}, + { 500, 1656, 258, ASC_RES_500}, + { 400, 1656, 207, ASC_RES_400}, + { 300, 1240, 155, ASC_RES_300}, + { 200, 832, 104, ASC_RES_200}, + { 100, 416, 52, ASC_RES_100}, + { 200, 3*GREY_LINE, 3*GREY_LINE, 0 /* returned by color scanner */}, + { 200, GREY_LINE, GREY_LINE, 0 /* color scanner, grey mode */}, + { INVALID, 416, 52, INVALID } /* terminator */ +}; + +/*** + *** THE TABLE OF UNITS + ***/ + +struct _sbuf { + size_t size; + size_t rptr; + size_t wptr; /* only changed in ascintr */ + size_t count; + char *base; +}; + +struct asc_unit { + long thedev; /* XXX */ + int base; /* base address */ + int dma_num; /* dma number */ + char dma_byte; /* mask of byte for setting DMA value */ + char int_byte; /* mask of byte for setting int value */ + char cfg_byte; /* mirror of byte written to config reg (ASC_CFG). */ + char cmd_byte; /* mirror of byte written to cmd port (ASC_CMD)*/ + char portf_byte; + int flags; +#define ATTACHED 0x01 +#define OPEN 0x02 +#define READING 0x04 +#define DMA_ACTIVE 0x08 +#define SLEEPING 0x10 +#define SEL_COLL 0x20 +#define PBM_MODE 0x40 +#define FLAG_DEBUG 0x80 + int geometry; /* resolution as geomtab index */ + int linesize; /* length of one scan line (from geom.table) */ + int blen; /* length of buffer in lines */ + int btime; /* timeout of buffer in seconds/hz */ + struct _sbuf sbuf; + long icnt; /* interrupt count XXX for debugging */ + struct selinfo selp; + int height; /* height, for pnm modes */ + size_t bcount; /* bytes to read, for pnm modes */ +}; + +static struct asc_unit unittab[NASC]; + +/*** I could not find a reasonable buffer size limit other than by + *** experiments. MAXPHYS is obviously too much, while DEV_BSIZE and + *** PAGE_SIZE are really too small. There must be something wrong + *** with isa_dmastart/isa_dmarangecheck HELP!!! + *** + *** Note, must be DEFAULT_BLEN * samples_per_line <= MAX_BUFSIZE + ***/ +#define MAX_BUFSIZE 0xb000 /* XXX was 0x3000 */ +#define DEFAULT_BLEN 16 + +/*** + *** THE PER-DRIVER RECORD FOR ISA.C + ***/ +static int ascprobe (struct isa_device *isdp); +static int ascattach(struct isa_device *isdp); +struct isa_driver ascdriver = { ascprobe, ascattach, "asc" }; + +static void ascintr(void *); + +static d_open_t ascopen; +static d_close_t ascclose; +static d_read_t ascread; +static d_ioctl_t ascioctl; +static d_poll_t ascpoll; + +#define CDEV_MAJOR 71 + +static struct dev_ops asc_ops = { + { "asc", CDEV_MAJOR, 0 }, + .d_open = ascopen, + .d_close = ascclose, + .d_read = ascread, + .d_ioctl = ascioctl, + .d_poll = ascpoll, +}; + +#define STATIC static + +/*** + *** LOCALLY USED SUBROUTINES + *** + ***/ + +/*** + *** get_resolution + *** read resolution from the scanner + ***/ +static void +get_resolution(struct asc_unit *scu) +{ + int res, i, delay; + + res=0; + scu->cmd_byte = ASC_STANDBY; + outb(ASC_CMD, scu->cmd_byte); + tsleep((caddr_t)scu, PCATCH, "ascres", hz/10); + for(delay= 100; (res=inb(ASC_STAT)) & ASC_RDY_FLAG; delay--) + { + i = tsleep((caddr_t)scu, PCATCH, "ascres0", 1); + if ( ( i == 0 ) || ( i == EWOULDBLOCK ) ) + i = SUCCESS; + else + break; + } + if (delay==0) { + lprintf("asc.get_resolution: timeout completing command\n"); + return /* -1 */; + } + /* ... actual read resolution... */ + res &= ASC_RES_MASK; + for (i=0; geomtab[i].dpi != INVALID; i++) { + if (geomtab[i].g_res == res) break; + } + if (geomtab[i].dpi==INVALID) { + scu->geometry= i; /* INVALID; */ + lprintf("asc.get_resolution: wrong resolution\n"); + } else { + lprintf("asc.get_resolution: %d dpi\n",geomtab[i].dpi); + scu->geometry = i; + } + scu->portf_byte=0; /* default */ + if (geomtab[scu->geometry].g_res==0 && !(scu->thedev&FRMT_GRAY)) { + /* color scanner seems to require this */ + scu->portf_byte=2; + /* scu->geometry++; */ + } + scu->linesize = geomtab[scu->geometry].bpl; + scu->height = geomtab[scu->geometry].dpl; /* default... */ +} + +/*** + *** buffer_allocate + *** allocate/reallocate a buffer + *** Now just checks that the preallocated buffer is large enough. + ***/ + +static int +buffer_allocate(struct asc_unit *scu) +{ + size_t size, size1; + + size = scu->blen * scu->linesize; + + lprintf("asc.buffer_allocate: need 0x%x bytes\n", size); + + if ( size > MAX_BUFSIZE ) { + size1=size; + size= ( (MAX_BUFSIZE+scu->linesize-1) / scu->linesize)*scu->linesize; + lprintf("asc.buffer_allocate: 0x%x bytes are too much, try 0x%x\n", + size1, size); + return ENOMEM; + } + + scu->sbuf.size = size; + scu->sbuf.rptr = 0; + scu->sbuf.wptr = 0; + scu->sbuf.count = 0; /* available data for reading */ + + lprintf("asc.buffer_allocate: ok\n"); + + return SUCCESS; +} + +/*** dma_restart + *** invoked locally to start dma. Must run in a critical section + ***/ +static void +dma_restart(struct asc_unit *scu) +{ + unsigned char al=scu->cmd_byte; + + if (geomtab[scu->geometry].g_res==0) {/* color */ + isa_dmastart(BUF_CMD_READ, 0, scu->sbuf.base+scu->sbuf.wptr, + scu->linesize + 90 /* XXX */ , scu->dma_num); + /* + * looks like we have to set and then clear this + * bit to enable the scanner to send interrupts + */ + outb( ASC_CMD, al |= 4 ); /* seems to disable interrupts */ +#if 0 + outb( ASC_CMD, al |= 8 ); /* ??? seems useless */ +#endif + outb( ASC_CMD, al &= 0xfb ); + scu->cmd_byte = al; + } else { /* normal */ + isa_dmastart(BUF_CMD_READ, 0, scu->sbuf.base+scu->sbuf.wptr, + scu->linesize, scu->dma_num); + /*** this is done in sub_20, after dmastart ? ***/ +#if 0 + outb( ASC_CMD, al |= 4 ); + outb( ASC_CMD, al |= 8 ); /* ??? seems useless */ + outb( ASC_CMD, al &= 0xfb ); + scu->cmd_byte = al; +#else + outb( ASC_CMD, ASC_OPERATE); +#endif + } + scu->flags |= DMA_ACTIVE; +} + +/*** + *** the main functions + ***/ + +/*** asc_reset + *** resets the scanner and the config bytes... + ***/ +static void +asc_reset(struct asc_unit *scu) +{ + scu->cfg_byte = 0 ; /* clear... */ + scu->cmd_byte = 0 ; /* clear... */ + + outb(ASC_CFG,scu->cfg_byte); /* for safety, do this here */ + outb(ASC_CMD,scu->cmd_byte); /* probably not needed */ + tsleep((caddr_t)scu, PCATCH, "ascres", hz/10); /* sleep .1 sec */ + + scu->blen = DEFAULT_BLEN; + scu->btime = TIMEOUT; + scu->height = 0 ; /* don't know better... */ +} +/************************************************************************** + *** + *** ascprobe + *** read status port and check for proper configuration: + *** - if address group matches (status byte has reasonable value) + *** cannot check interrupt/dma, only clear the config byte. + ***/ +static int +ascprobe (struct isa_device *isdp) +{ + int unit = isdp->id_unit; + struct asc_unit *scu = unittab + unit; + int stb; + + scu->base = isdp->id_iobase; /*** needed by the following macros ***/ + scu->flags = FLAG_DEBUG; + + if ( isdp->id_iobase < 0 ) { + lprintf("asc%d.probe: no iobase given\n", unit); + return PROBE_FAIL; + } + + if ((stb=inb(ASC_PROBE)) != ASC_PROBE_VALUE) { + lprintf("asc%d.probe: failed, got 0x%02x instead of 0x%02x\n", + unit, stb, ASC_PROBE_VALUE); + return PROBE_FAIL; + } + +/* + * NOTE NOTE NOTE + * the new AmiScan Color board uses int 10,11,12 instead of 3,5,10 + * respectively. This means that the driver must act accordingly. + * Unfortunately there is no easy way of telling which board one has, + * other than trying to get an interrupt and noticing that it is + * missing. use "option ASC_NEW_BOARD" if you have a new board. + * + */ + +#if ASC_NEW_BOARD +#define ASC_IRQ_A 10 +#define ASC_IRQ_B 11 +#define ASC_IRQ_C 12 +#else +#define ASC_IRQ_A 3 +#define ASC_IRQ_B 5 +#define ASC_IRQ_C 10 +#endif + + switch(ffs(isdp->id_irq) - 1) { + case ASC_IRQ_A : + scu->int_byte = ASC_CNF_IRQ3; + break; + case ASC_IRQ_B : + scu->int_byte = ASC_CNF_IRQ5; + break; + case ASC_IRQ_C : + scu->int_byte = ASC_CNF_IRQ10; + break; +#if 0 + case -1: + scu->int_byte = 0; + lprintf("asc%d.probe: warning - going interruptless\n", unit); + break; +#endif + default: + lprintf("asc%d.probe: unsupported INT %d (only 3, 5, 10)\n", + unit, ffs(isdp->id_irq) - 1 ); + return PROBE_FAIL; + } + scu->dma_num = isdp->id_drq; + switch(scu->dma_num) { + case 1: + scu->dma_byte = ASC_CNF_DMA1; + break; + case 3: + scu->dma_byte = ASC_CNF_DMA3; + break; + default: + lprintf("asc%d.probe: unsupported DMA %d (only 1 or 3)\n", + unit, scu->dma_num); + return PROBE_FAIL; + } + asc_reset(scu); +/* lprintf("asc%d.probe: ok\n", unit); */ + + scu->flags &= ~FLAG_DEBUG; + scu->icnt = 0; + return PROBE_SUCCESS; +} + +/************************************************************************** + *** + *** ascattach + *** finish initialization of unit structure, get geometry value (?) + ***/ + +static int +ascattach(struct isa_device *isdp) +{ + int unit = isdp->id_unit; + struct asc_unit *scu = unittab + unit; + + isdp->id_intr = (inthand2_t *)ascintr; + scu->flags |= FLAG_DEBUG; + kprintf("asc%d: [GI1904/Trust Ami-Scan Grey/Color]\n", unit); + + /* + * Initialize buffer structure. + * XXX this must be done early to give a good chance of getting a + * contiguous buffer. This wastes memory. + */ + scu->sbuf.base = contigmalloc((unsigned long)MAX_BUFSIZE, M_DEVBUF, M_NOWAIT, + 0ul, 0xfffffful, 1ul, 0x10000ul); + if ( scu->sbuf.base == NULL ) + { + lprintf("asc%d.attach: buffer allocation failed\n", unit); + return ATTACH_FAIL; /* XXX attach must not fail */ + } + scu->sbuf.size = INVALID; + scu->sbuf.rptr = INVALID; + + scu->flags |= ATTACHED; +/* lprintf("asc%d.attach: ok\n", unit); */ + scu->flags &= ~FLAG_DEBUG; + + scu->selp.si_flags=0; + scu->selp.si_pid=(pid_t)0; +#define ASC_UID 0 +#define ASC_GID 13 + dev_ops_add(&asc_ops, 0xc0, unit << 6); + make_dev(&asc_ops, unit<<6, ASC_UID, ASC_GID, 0666, "asc%d", unit); + make_dev(&asc_ops, ((unit<<6) + FRMT_PBM), + ASC_UID, ASC_GID, 0666, "asc%dp", unit); + make_dev(&asc_ops, ((unit<<6) + DBUG_MASK), + ASC_UID, ASC_GID, 0666, "asc%dd", unit); + make_dev(&asc_ops, ((unit<<6) + DBUG_MASK+FRMT_PBM), + ASC_UID, ASC_GID, 0666, "asc%dpd", unit); + return ATTACH_SUCCESS; +} + +/************************************************************************** + *** + *** ascintr + *** the interrupt routine, at the end of DMA... + ***/ +static void +ascintr(void *arg) +{ + int unit = (int)arg; + struct asc_unit *scu = unittab + unit; + int chan_bit = 0x01 << scu->dma_num; + + scu->icnt++; + /* ignore stray interrupts... */ + if ((scu->flags & (OPEN |READING)) != (OPEN | READING) ) { + /* must be after closing... */ + scu->flags &= ~(OPEN | READING | DMA_ACTIVE | SLEEPING | SEL_COLL); + return; + } + if ( (scu->flags & DMA_ACTIVE) && (inb(DMA1_READY) & chan_bit) != 0) { + outb( ASC_CMD, ASC_STANDBY); + scu->flags &= ~DMA_ACTIVE; + /* bounce buffers... */ + isa_dmadone(BUF_CMD_READ, 0, scu->sbuf.base+scu->sbuf.wptr, + scu->linesize, scu->dma_num); + scu->sbuf.wptr += scu->linesize; + if (scu->sbuf.wptr >= scu->sbuf.size) scu->sbuf.wptr=0; + scu->sbuf.count += scu->linesize; + if (scu->flags & SLEEPING) { + scu->flags &= ~SLEEPING; + wakeup((caddr_t)scu); + } + if (scu->sbuf.size - scu->sbuf.count >= scu->linesize) { + dma_restart(scu); + } + if (scu->selp.si_pid) { + selwakeup(&scu->selp); + scu->selp.si_pid=(pid_t)0; + scu->selp.si_flags = 0; + } + } +} + +/************************************************************************** + *** + *** ascopen + *** set open flag, set modes according to minor number + *** FOR RELEASE: + *** don't switch scanner on, wait until first read or ioctls go before + ***/ + +STATIC int +ascopen(struct dev_open_args *ap) +{ + cdev_t dev = ap->a_head.a_dev; + struct asc_unit *scu; + int unit; + + unit = UNIT(minor(dev)) & UNIT_MASK; + if ( unit >= NASC ) + { +#ifdef ASCDEBUG + /* XXX lprintf isn't valid here since there is no scu. */ + kprintf("asc%d.open: unconfigured unit number (max %d)\n", unit, NASC); +#endif + return ENXIO; + } + scu = unittab + unit; + if ( !( scu->flags & ATTACHED ) ) + { + lprintf("asc%d.open: unit was not attached successfully 0x%04x\n", + unit, scu->flags); + return ENXIO; + } + + if ( minor(dev) & DBUG_MASK ) + scu->flags |= FLAG_DEBUG; + else + scu->flags &= ~FLAG_DEBUG; + + switch(minor(dev) & FRMT_MASK) { + case FRMT_PBM: + scu->flags |= PBM_MODE; + lprintf("asc%d.open: pbm mode\n", unit); + break; + case FRMT_RAW: + lprintf("asc%d.open: raw mode\n", unit); + scu->flags &= ~PBM_MODE; + break; + default: + lprintf("asc%d.open: gray maps are not yet supported", unit); + return ENXIO; + } + + lprintf("asc%d.open: minor %d icnt %ld\n", unit, minor(dev), scu->icnt); + + if ( scu->flags & OPEN ) { + lprintf("asc%d.open: already open", unit); + return EBUSY; + } + if (isa_dma_acquire(scu->dma_num)) + return(EBUSY); + + scu->flags = ATTACHED | OPEN; + + asc_reset(scu); + get_resolution(scu); + return SUCCESS; +} + +static int +asc_startread(struct asc_unit *scu) +{ + /*** from here on, things can be delayed to the first read/ioctl ***/ + /*** this was done in sub_12... ***/ + scu->cfg_byte= scu->cmd_byte=0; /* init scanner */ + outb(ASC_CMD, scu->cmd_byte); + /*** this was done in sub_16, set scan len... ***/ + outb(ASC_BOH, scu->portf_byte ); + if (geomtab[scu->geometry].g_res==0) { /* color */ + scu->cmd_byte = 0x00 ; + } else { + scu->cmd_byte = 0x90 ; + } + outb(ASC_CMD, scu->cmd_byte); + outb(ASC_LEN_L, scu->linesize & 0xff /* len_low */); + outb(ASC_LEN_H, (scu->linesize >>8) & 0xff /* len_high */); + /*** this was done in sub_21, config DMA ... ***/ + scu->cfg_byte |= scu->dma_byte; + outb(ASC_CFG, scu->cfg_byte); + /*** sub_22: enable int on the scanner ***/ + scu->cfg_byte |= scu->int_byte; + outb(ASC_CFG, scu->cfg_byte); + /*** sub_28: light on etc...***/ + scu->cmd_byte = ASC_STANDBY; + outb(ASC_CMD, scu->cmd_byte); + tsleep((caddr_t)scu, PCATCH, "ascstrd", hz/10); /* sleep .1 sec */ + return SUCCESS; +} + +/************************************************************************** + *** + *** ascclose + *** turn off scanner, release the buffer + *** should probably terminate dma ops, release int and dma. lr 12mar95 + ***/ + +STATIC int +ascclose(struct dev_close_args *ap) +{ + cdev_t dev = ap->a_head.a_dev; + int unit = UNIT(minor(dev)); + struct asc_unit *scu = unittab + unit; + + lprintf("asc%d.close: minor %d\n", + unit, minor(dev)); + + if ( unit >= NASC || !( scu->flags & ATTACHED ) ) { + lprintf("asc%d.close: unit was not attached successfully 0x%04x\n", + unit, scu->flags); + return ENXIO; + } + /* all this is in sub_29... */ + /* cli(); */ + outb(ASC_CFG, 0 ); /* don't save in CFG byte!!! */ + scu->cmd_byte &= ~ASC_LIGHT_ON; + outb(ASC_CMD, scu->cmd_byte);/* light off */ + tsleep((caddr_t)scu, PCATCH, "ascclo", hz/2); /* sleep 1/2 sec */ + scu->cfg_byte &= ~ scu->dma_byte ; /* disable scanner dma */ + scu->cfg_byte &= ~ scu->int_byte ; /* disable scanner int */ + outb(ASC_CFG, scu->cfg_byte); + /* --- disable dma controller ? --- */ + isa_dma_release(scu->dma_num); + /* --- disable interrupts on the controller (sub_24) --- */ + + scu->sbuf.size = INVALID; + scu->sbuf.rptr = INVALID; + + scu->flags &= ~(FLAG_DEBUG | OPEN | READING); + + return SUCCESS; +} + +static void +pbm_init(struct asc_unit *scu) +{ + int width = geomtab[scu->geometry].dpl; + int l= ksprintf(scu->sbuf.base,"P4 %d %d\n", width, scu->height); + char *p; + + scu->bcount = scu->height * width / 8 + l; + + /* move header to end of sbuf */ + scu->sbuf.rptr=scu->sbuf.size-l; + bcopy(scu->sbuf.base, scu->sbuf.base+scu->sbuf.rptr,l); + scu->sbuf.count = l; + if (geomtab[scu->geometry].g_res!=0) { /* BW scanner */ + for(p = scu->sbuf.base + scu->sbuf.rptr; l; p++, l--) + *p = ~*p; +} +} +/************************************************************************** + *** + *** ascread + ***/ + +STATIC int +ascread(struct dev_read_args *ap) +{ + cdev_t dev = ap->a_head.a_dev; + struct uio *uio = ap->a_uio; + int unit = UNIT(minor(dev)); + struct asc_unit *scu = unittab + unit; + size_t nbytes; + int res; + unsigned char *p; + + lprintf("asc%d.read: minor %d icnt %ld\n", unit, minor(dev), scu->icnt); + + if ( unit >= NASC || !( scu->flags & ATTACHED ) ) { + lprintf("asc%d.read: unit was not attached successfully 0x%04x\n", + unit, scu->flags); + return ENXIO; + } + + if ( !(scu->flags & READING) ) { /*** first read... ***/ + /* allocate a buffer for reading data and init things */ + if ( (res = buffer_allocate(scu)) == SUCCESS ) scu->flags |= READING; + else return res; + asc_startread(scu); + if ( scu->flags & PBM_MODE ) { /* initialize for pbm mode */ + pbm_init(scu); + } + } + + lprintf("asc%d.read(before): " + "sz 0x%x, rptr 0x%x, wptr 0x%x, cnt 0x%x bcnt 0x%x flags 0x%x icnt %ld\n", + unit, scu->sbuf.size, scu->sbuf.rptr, + scu->sbuf.wptr, scu->sbuf.count, scu->bcount,scu->flags, + scu->icnt); + + crit_enter(); + if ( scu->sbuf.count == 0 ) { /* no data avail., must wait */ + if (!(scu->flags & DMA_ACTIVE)) dma_restart(scu); + scu->flags |= SLEEPING; + res = tsleep((caddr_t)scu, PCATCH, "ascread", 0); + scu->flags &= ~SLEEPING; + if ( res == 0 ) res = SUCCESS; + } + crit_exit(); + if (scu->flags & FLAG_DEBUG) + tsleep((caddr_t)scu, PCATCH, "ascdly",hz); + lprintf("asc%d.read(after): " + "sz 0x%x, rptr 0x%x, wptr 0x%x, cnt 0x%x bcnt 0x%x flags 0x%x icnt %ld\n", + unit, scu->sbuf.size, scu->sbuf.rptr, + scu->sbuf.wptr, scu->sbuf.count, scu->bcount,scu->flags,scu->icnt); + + /* first, not more than available... */ + nbytes = min( uio->uio_resid, scu->sbuf.count ); + /* second, contiguous data... */ + nbytes = min( nbytes, (scu->sbuf.size - scu->sbuf.rptr) ); + /* third, one line (will remove this later, XXX) */ + nbytes = min( nbytes, scu->linesize ); + if ( (scu->flags & PBM_MODE) ) + nbytes = min( nbytes, scu->bcount ); + lprintf("asc%d.read: transferring 0x%x bytes\n", unit, nbytes); + if (geomtab[scu->geometry].g_res!=0) { /* BW scanner */ + lprintf("asc%d.read: invert buffer\n",unit); + for(p = scu->sbuf.base + scu->sbuf.rptr, res=nbytes; res; p++, res--) + *p = ~*p; + } + res = uiomove(scu->sbuf.base + scu->sbuf.rptr, nbytes, uio); + if ( res != SUCCESS ) { + lprintf("asc%d.read: uiomove failed %d", unit, res); + return res; + } + + crit_enter(); + scu->sbuf.rptr += nbytes; + if (scu->sbuf.rptr >= scu->sbuf.size) scu->sbuf.rptr=0; + scu->sbuf.count -= nbytes; + /* having moved some data, can read mode */ + if (!(scu->flags & DMA_ACTIVE)) dma_restart(scu); + crit_exit(); + if ( scu->flags & PBM_MODE ) scu->bcount -= nbytes; + + lprintf("asc%d.read: size 0x%x, pointer 0x%x, bcount 0x%x, ok\n", + unit, scu->sbuf.size, scu->sbuf.rptr, scu->bcount); + + return SUCCESS; +} + +/************************************************************************** + *** + *** ascioctl + ***/ + +STATIC int +ascioctl(struct dev_ioctl_args *ap) +{ + cdev_t dev = ap->a_head.a_dev; + caddr_t data = ap->a_data; + int unit = UNIT(minor(dev)); + struct asc_unit *scu = unittab + unit; + + lprintf("asc%d.ioctl: minor %d\n", + unit, minor(dev)); + + if ( unit >= NASC || !( scu->flags & ATTACHED ) ) { + lprintf("asc%d.ioctl: unit was not attached successfully 0x%04x\n", + unit, scu->flags); + return ENXIO; + } + switch(ap->a_cmd) { + case ASC_GRES: + asc_reset(scu); + get_resolution(scu); + *(int *)data=geomtab[scu->geometry].dpi; + lprintf("asc%d.ioctl:ASC_GRES %ddpi\n", unit, *(int *)data); + return SUCCESS; + case ASC_GWIDTH: + *(int *)data=geomtab[scu->geometry].dpl; + lprintf("asc%d.ioctl:ASC_GWIDTH %d\n", unit, *(int *)data); + return SUCCESS; + case ASC_GHEIGHT: + *(int *)data=scu->height; + lprintf("asc%d.ioctl:ASC_GHEIGHT %d\n", unit, *(int *)data); + return SUCCESS; + case ASC_SHEIGHT: + lprintf("asc%d.ioctl:ASC_SHEIGHT %d\n", unit, *(int *)data); + if ( scu->flags & READING ) { + lprintf("asc%d:ioctl on already reading unit\n", unit); + return EBUSY; + } + scu->height=*(int *)data; + return SUCCESS; +#if 0 + case ASC_GBLEN: + *(int *)data=scu->blen; + lprintf("asc%d.ioctl:ASC_GBLEN %d\n", unit, *(int *)data); + return SUCCESS; + case ASC_SBLEN: + lprintf("asc%d.ioctl:ASC_SBLEN %d\n", unit, *(int *)data); + if (*(int *)data * geomtab[scu->geometry].dpl / 8 > MAX_BUFSIZE) + { + lprintf("asc%d:ioctl buffer size too high\n", unit); + return ENOMEM; + } + scu->blen=*(int *)data; + return SUCCESS; + case ASC_GBTIME: + *(int *)data = scu->btime / hz; + lprintf("asc%d.ioctl:ASC_GBTIME %d\n", unit, *(int *)data); + return SUCCESS; + case ASC_SBTIME: + scu->btime = *(int *)data * hz; + lprintf("asc%d.ioctl:ASC_SBTIME %d\n", unit, *(int *)data); + return SUCCESS; +#endif + default: return ENOTTY; + } + return SUCCESS; +} + +STATIC int +ascpoll(struct dev_poll_args *ap) +{ + cdev_t dev = ap->a_head.a_dev; + int unit = UNIT(minor(dev)); + struct asc_unit *scu = unittab + unit; + int revents = 0; + + crit_enter(); + + if (ap->a_events & (POLLIN | POLLRDNORM)) { + if (scu->sbuf.count >0) + revents |= ap->a_events & (POLLIN | POLLRDNORM); + else { + if (!(scu->flags & DMA_ACTIVE)) + dma_restart(scu); + + selrecord(curthread, &scu->selp); + } + } + crit_exit(); + ap->a_events = revents; + return (0); +} diff --git a/sys/platform/pc64/isa/ascreg.h b/sys/platform/pc64/isa/ascreg.h new file mode 100644 index 0000000000..199ad7c3e6 --- /dev/null +++ b/sys/platform/pc64/isa/ascreg.h @@ -0,0 +1,98 @@ +/* ascreg.h - port and bit definitions for the GI-1904 interface + * + * Copyright (c) 1995 Gunther Schadow. All rights reserved. + * Copyright (c) 1995 Luigi Rizzo. All rights reserved. + * Copyright (c) 2008 The DragonFly Project. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by Gunther Schadow. + * and Luigi Rizzo + * 4. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * $FreeBSD: src/sys/i386/isa/ascreg.h,v 1.5 1999/08/28 00:44:37 peter Exp $ + * $DragonFly: src/sys/platform/pc64/isa/ascreg.h,v 1.1 2008/08/29 17:07:19 dillon Exp $ + */ + + /*** Registers (base=3EB): ************/ +#define ASC_CFG (scu->base) + /*** ASC_CFG 3EB: configuration register. Write only, mirror in RAM + *** 7 6 5 4 3 2 1 0 + *** - - I_5 I_3 I10 D_3 - D_1 + ***/ + /*** #define ASC_CNF_MASK 0x3D */ /* was 0x5a */ +#define ASC_CNF_DMA1 0x01 /* was (~0x02 & ASC_CNF_MASK) */ +#define ASC_CNF_DMA3 0x04 /* was (~0x08 & ASC_CNF_MASK) */ +#define ASC_CNF_IRQ3 0x10 /* was (~0x10 & ASC_CNF_MASK) */ +#define ASC_CNF_IRQ5 0x20 /* was (~0x40 & ASC_CNF_MASK) */ +#define ASC_CNF_IRQ10 0x08 /* was (~0x40 & ASC_CNF_MASK) */ + + /*** ASC_STAT 3EC: command/status; rw, mirror in ram + *** 7 6 5 4 3 2 1 0 + *** BSY - - - - - - - + *** [<-- Resolution -->] 13h,10h,0eh,0ch,09h, 07h, 04h, 02h + ***/ +#define ASC_STAT (scu->base + 1) + +#define ASC_RDY_FLAG 0x80 +#define ASC_RES_MASK 0x3f +#define ASC_RES_800 0x13 +#define ASC_RES_700 0x10 +#define ASC_RES_600 0x0e +#define ASC_RES_500 0x0c +#define ASC_RES_400 0x09 /* 0x00 */ +#define ASC_RES_300 0x07 /* 0x04 */ +#define ASC_RES_200 0x04 /* 0x20 */ +#define ASC_RES_100 0x02 /* 0x24 */ + + /*** ASC_CMD 3EC: command/status; rw, mirror in ram + *** W: 7 6 5 4 3 2 1 0 + *** . - - . . . . . + *** b0: 1: light on & get resolution, 0: light off + *** b1: 0: load scan len (sub_16, with b4=1, b7=1) + *** b2: 1/0 : dma stuff + *** b3: 0/1 : dma stuff + *** b4: 1 : load scan len (sub_16, with b1=0, b7=1) + *** b5: ? + *** b6: ? + *** b7: ? : set at beginning of sub_16 + ***/ +#define ASC_CMD (scu->base + 1) + +#define ASC_LIGHT_ON 0x01 +#define ASC_SET_B2 0x04 +#define ASC_OPERATE 0x91 /* from linux driver... */ +#define ASC_STANDBY 0x05 /* from linux driver... */ + + /*** ASC_LEN_L, ASC_LEN_H 3ED, 3EE: transfer length, lsb first ***/ +#define ASC_LEN_L ((scu->base)+2) +#define ASC_LEN_H ((scu->base)+3) + + /*** 3EE ASC_PROBE (must read ASC_PROBE_VALUE) ***/ +#define ASC_PROBE ((scu->base)+3) +#define ASC_PROBE_VALUE 0xA5 + + /*** ASC_BOH 3EF: always write 0 at the moment, read some values ? ***/ +#define ASC_BOH ((scu->base)+4) diff --git a/sys/platform/pc64/isa/clock.c b/sys/platform/pc64/isa/clock.c new file mode 100644 index 0000000000..6265c31ca0 --- /dev/null +++ b/sys/platform/pc64/isa/clock.c @@ -0,0 +1,1221 @@ +/*- + * Copyright (c) 1990 The Regents of the University of California. + * Copyright (c) 2008 The DragonFly Project. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * William Jolitz and Don Ahn. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)clock.c 7.2 (Berkeley) 5/12/91 + * $FreeBSD: src/sys/i386/isa/clock.c,v 1.149.2.6 2002/11/02 04:41:50 iwasaki Exp $ + * $DragonFly: src/sys/platform/pc64/isa/clock.c,v 1.1 2008/08/29 17:07:19 dillon Exp $ + */ + +/* + * Routines to handle clock hardware. + */ + +/* + * inittodr, settodr and support routines written + * by Christoph Robitschko + * + * reintroduced and updated by Chris Stenton 8/10/94 + */ + +//#include "use_apm.h" +//#include "opt_clock.h" + +#include +#include +#include +#include +#include +#include +#ifndef SMP +#include +#endif +#include +#include +#include +#include +#include +#include +#include + +#include +#ifdef CLK_CALIBRATION_LOOP +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +#ifdef APIC_IO +/* The interrupt triggered by the 8254 (timer) chip */ +int apic_8254_intr; +static void setup_8254_mixed_mode (void); +#endif +static void i8254_restore(void); +static void resettodr_on_shutdown(void *arg __unused); + +/* + * 32-bit time_t's can't reach leap years before 1904 or after 2036, so we + * can use a simple formula for leap years. + */ +#define LEAPYEAR(y) ((u_int)(y) % 4 == 0) +#define DAYSPERYEAR (31+28+31+30+31+30+31+31+30+31+30+31) + +#ifndef TIMER_FREQ +#define TIMER_FREQ 1193182 +#endif + +static uint8_t i8254_walltimer_sel; +static uint16_t i8254_walltimer_cntr; + +int adjkerntz; /* local offset from GMT in seconds */ +int disable_rtc_set; /* disable resettodr() if != 0 */ +int statclock_disable = 1; /* we don't use the statclock right now */ +int tsc_present; +int64_t tsc_frequency; +int tsc_is_broken; +int wall_cmos_clock; /* wall CMOS clock assumed if != 0 */ +int timer0_running; +enum tstate { RELEASED, ACQUIRED }; +enum tstate timer0_state; +enum tstate timer1_state; +enum tstate timer2_state; + +static int beeping = 0; +static const u_char daysinmonth[] = {31,28,31,30,31,30,31,31,30,31,30,31}; +static u_char rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF; +static u_char rtc_statusb = RTCSB_24HR | RTCSB_PINTR; +static int rtc_loaded; + +static int i8254_cputimer_div; + +static struct callout sysbeepstop_ch; + +static sysclock_t i8254_cputimer_count(void); +static void i8254_cputimer_construct(struct cputimer *cputimer, sysclock_t last); +static void i8254_cputimer_destruct(struct cputimer *cputimer); + +static struct cputimer i8254_cputimer = { + SLIST_ENTRY_INITIALIZER, + "i8254", + CPUTIMER_PRI_8254, + 0, + i8254_cputimer_count, + cputimer_default_fromhz, + cputimer_default_fromus, + i8254_cputimer_construct, + i8254_cputimer_destruct, + TIMER_FREQ, + 0, 0, 0 +}; + +/* + * timer0 clock interrupt. Timer0 is in one-shot mode and has stopped + * counting as of this interrupt. We use timer1 in free-running mode (not + * generating any interrupts) as our main counter. Each cpu has timeouts + * pending. + * + * This code is INTR_MPSAFE and may be called without the BGL held. + */ +static void +clkintr(void *dummy, void *frame_arg) +{ + static sysclock_t sysclock_count; /* NOTE! Must be static */ + struct globaldata *gd = mycpu; +#ifdef SMP + struct globaldata *gscan; + int n; +#endif + + /* + * SWSTROBE mode is a one-shot, the timer is no longer running + */ + timer0_running = 0; + + /* + * XXX the dispatcher needs work. right now we call systimer_intr() + * directly or via IPI for any cpu with systimers queued, which is + * usually *ALL* of them. We need to use the LAPIC timer for this. + */ + sysclock_count = sys_cputimer->count(); +#ifdef SMP + for (n = 0; n < ncpus; ++n) { + gscan = globaldata_find(n); + if (TAILQ_FIRST(&gscan->gd_systimerq) == NULL) + continue; + if (gscan != gd) { + lwkt_send_ipiq3(gscan, (ipifunc3_t)systimer_intr, + &sysclock_count, 0); + } else { + systimer_intr(&sysclock_count, 0, frame_arg); + } + } +#else + if (TAILQ_FIRST(&gd->gd_systimerq) != NULL) + systimer_intr(&sysclock_count, 0, frame_arg); +#endif +} + + +/* + * NOTE! not MP safe. + */ +int +acquire_timer2(int mode) +{ + if (timer2_state != RELEASED) + return (-1); + timer2_state = ACQUIRED; + + /* + * This access to the timer registers is as atomic as possible + * because it is a single instruction. We could do better if we + * knew the rate. + */ + outb(TIMER_MODE, TIMER_SEL2 | (mode & 0x3f)); + return (0); +} + +int +release_timer2(void) +{ + if (timer2_state != ACQUIRED) + return (-1); + outb(TIMER_MODE, TIMER_SEL2 | TIMER_SQWAVE | TIMER_16BIT); + timer2_state = RELEASED; + return (0); +} + +/* + * This routine receives statistical clock interrupts from the RTC. + * As explained above, these occur at 128 interrupts per second. + * When profiling, we receive interrupts at a rate of 1024 Hz. + * + * This does not actually add as much overhead as it sounds, because + * when the statistical clock is active, the hardclock driver no longer + * needs to keep (inaccurate) statistics on its own. This decouples + * statistics gathering from scheduling interrupts. + * + * The RTC chip requires that we read status register C (RTC_INTR) + * to acknowledge an interrupt, before it will generate the next one. + * Under high interrupt load, rtcintr() can be indefinitely delayed and + * the clock can tick immediately after the read from RTC_INTR. In this + * case, the mc146818A interrupt signal will not drop for long enough + * to register with the 8259 PIC. If an interrupt is missed, the stat + * clock will halt, considerably degrading system performance. This is + * why we use 'while' rather than a more straightforward 'if' below. + * Stat clock ticks can still be lost, causing minor loss of accuracy + * in the statistics, but the stat clock will no longer stop. + */ +static void +rtcintr(void *dummy, void *frame) +{ + while (rtcin(RTC_INTR) & RTCIR_PERIOD) + ; + /* statclock(frame); no longer used */ +} + +#include "opt_ddb.h" +#ifdef DDB +#include + +DB_SHOW_COMMAND(rtc, rtc) +{ + kprintf("%02x/%02x/%02x %02x:%02x:%02x, A = %02x, B = %02x, C = %02x\n", + rtcin(RTC_YEAR), rtcin(RTC_MONTH), rtcin(RTC_DAY), + rtcin(RTC_HRS), rtcin(RTC_MIN), rtcin(RTC_SEC), + rtcin(RTC_STATUSA), rtcin(RTC_STATUSB), rtcin(RTC_INTR)); +} +#endif /* DDB */ + +/* + * Return the current cpu timer count as a 32 bit integer. + */ +static +sysclock_t +i8254_cputimer_count(void) +{ + static __uint16_t cputimer_last; + __uint16_t count; + sysclock_t ret; + + clock_lock(); + outb(TIMER_MODE, i8254_walltimer_sel | TIMER_LATCH); + count = (__uint8_t)inb(i8254_walltimer_cntr); /* get countdown */ + count |= ((__uint8_t)inb(i8254_walltimer_cntr) << 8); + count = -count; /* -> countup */ + if (count < cputimer_last) /* rollover */ + i8254_cputimer.base += 0x00010000; + ret = i8254_cputimer.base | count; + cputimer_last = count; + clock_unlock(); + return(ret); +} + +/* + * This function is called whenever the system timebase changes, allowing + * us to calculate what is needed to convert a system timebase tick + * into an 8254 tick for the interrupt timer. If we can convert to a + * simple shift, multiplication, or division, we do so. Otherwise 64 + * bit arithmatic is required every time the interrupt timer is reloaded. + */ +void +cputimer_intr_config(struct cputimer *timer) +{ + int freq; + int div; + + /* + * Will a simple divide do the trick? + */ + div = (timer->freq + (i8254_cputimer.freq / 2)) / i8254_cputimer.freq; + freq = i8254_cputimer.freq * div; + + if (freq >= timer->freq - 1 && freq <= timer->freq + 1) + i8254_cputimer_div = div; + else + i8254_cputimer_div = 0; +} + +/* + * Reload for the next timeout. It is possible for the reload value + * to be 0 or negative, indicating that an immediate timer interrupt + * is desired. For now make the minimum 2 ticks. + * + * We may have to convert from the system timebase to the 8254 timebase. + */ +void +cputimer_intr_reload(sysclock_t reload) +{ + __uint16_t count; + + if (i8254_cputimer_div) + reload /= i8254_cputimer_div; + else + reload = (int64_t)reload * i8254_cputimer.freq / sys_cputimer->freq; + + if ((int)reload < 2) + reload = 2; + + clock_lock(); + if (timer0_running) { + outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH); /* count-down timer */ + count = (__uint8_t)inb(TIMER_CNTR0); /* lsb */ + count |= ((__uint8_t)inb(TIMER_CNTR0) << 8); /* msb */ + if (reload < count) { + outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT); + outb(TIMER_CNTR0, (__uint8_t)reload); /* lsb */ + outb(TIMER_CNTR0, (__uint8_t)(reload >> 8)); /* msb */ + } + } else { + timer0_running = 1; + if (reload > 0xFFFF) + reload = 0; /* full count */ + outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT); + outb(TIMER_CNTR0, (__uint8_t)reload); /* lsb */ + outb(TIMER_CNTR0, (__uint8_t)(reload >> 8)); /* msb */ + } + clock_unlock(); +} + +/* + * DELAY(usec) - Spin for the specified number of microseconds. + * DRIVERSLEEP(usec) - Spin for the specified number of microseconds, + * but do a thread switch in the loop + * + * Relies on timer 1 counting down from (cputimer_freq / hz) + * Note: timer had better have been programmed before this is first used! + */ +static void +DODELAY(int n, int doswitch) +{ + int delta, prev_tick, tick, ticks_left; + +#ifdef DELAYDEBUG + int getit_calls = 1; + int n1; + static int state = 0; + + if (state == 0) { + state = 1; + for (n1 = 1; n1 <= 10000000; n1 *= 10) + DELAY(n1); + state = 2; + } + if (state == 1) + kprintf("DELAY(%d)...", n); +#endif + /* + * Guard against the timer being uninitialized if we are called + * early for console i/o. + */ + if (timer0_state == RELEASED) + i8254_restore(); + + /* + * Read the counter first, so that the rest of the setup overhead is + * counted. Then calculate the number of hardware timer ticks + * required, rounding up to be sure we delay at least the requested + * number of microseconds. + */ + prev_tick = sys_cputimer->count(); + ticks_left = ((u_int)n * (int64_t)sys_cputimer->freq + 999999) / + 1000000; + + /* + * Loop until done. + */ + while (ticks_left > 0) { + tick = sys_cputimer->count(); +#ifdef DELAYDEBUG + ++getit_calls; +#endif + delta = tick - prev_tick; + prev_tick = tick; + if (delta < 0) + delta = 0; + ticks_left -= delta; + if (doswitch && ticks_left > 0) + lwkt_switch(); + } +#ifdef DELAYDEBUG + if (state == 1) + kprintf(" %d calls to getit() at %d usec each\n", + getit_calls, (n + 5) / getit_calls); +#endif +} + +void +DELAY(int n) +{ + DODELAY(n, 0); +} + +void +DRIVERSLEEP(int usec) +{ + globaldata_t gd = mycpu; + + if (gd->gd_intr_nesting_level || + gd->gd_spinlock_rd || + gd->gd_spinlocks_wr) { + DODELAY(usec, 0); + } else { + DODELAY(usec, 1); + } +} + +static void +sysbeepstop(void *chan) +{ + outb(IO_PPI, inb(IO_PPI)&0xFC); /* disable counter2 output to speaker */ + beeping = 0; + release_timer2(); +} + +int +sysbeep(int pitch, int period) +{ + if (acquire_timer2(TIMER_SQWAVE|TIMER_16BIT)) + return(-1); + /* + * Nobody else is using timer2, we do not need the clock lock + */ + outb(TIMER_CNTR2, pitch); + outb(TIMER_CNTR2, (pitch>>8)); + if (!beeping) { + /* enable counter2 output to speaker */ + outb(IO_PPI, inb(IO_PPI) | 3); + beeping = period; + callout_reset(&sysbeepstop_ch, period, sysbeepstop, NULL); + } + return (0); +} + +/* + * RTC support routines + */ + +int +rtcin(int reg) +{ + u_char val; + + crit_enter(); + outb(IO_RTC, reg); + inb(0x84); + val = inb(IO_RTC + 1); + inb(0x84); + crit_exit(); + return (val); +} + +static __inline void +writertc(u_char reg, u_char val) +{ + crit_enter(); + inb(0x84); + outb(IO_RTC, reg); + inb(0x84); + outb(IO_RTC + 1, val); + inb(0x84); /* XXX work around wrong order in rtcin() */ + crit_exit(); +} + +static __inline int +readrtc(int port) +{ + return(bcd2bin(rtcin(port))); +} + +static u_int +calibrate_clocks(void) +{ + u_int64_t old_tsc; + u_int count, prev_count, tot_count; + int sec, start_sec, timeout; + + if (bootverbose) + kprintf("Calibrating clock(s) ... "); + if (!(rtcin(RTC_STATUSD) & RTCSD_PWR)) + goto fail; + timeout = 100000000; + + /* Read the mc146818A seconds counter. */ + for (;;) { + if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) { + sec = rtcin(RTC_SEC); + break; + } + if (--timeout == 0) + goto fail; + } + + /* Wait for the mC146818A seconds counter to change. */ + start_sec = sec; + for (;;) { + if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) { + sec = rtcin(RTC_SEC); + if (sec != start_sec) + break; + } + if (--timeout == 0) + goto fail; + } + + /* Start keeping track of the i8254 counter. */ + prev_count = sys_cputimer->count(); + tot_count = 0; + + if (tsc_present) + old_tsc = rdtsc(); + else + old_tsc = 0; /* shut up gcc */ + + /* + * Wait for the mc146818A seconds counter to change. Read the i8254 + * counter for each iteration since this is convenient and only + * costs a few usec of inaccuracy. The timing of the final reads + * of the counters almost matches the timing of the initial reads, + * so the main cause of inaccuracy is the varying latency from + * inside getit() or rtcin(RTC_STATUSA) to the beginning of the + * rtcin(RTC_SEC) that returns a changed seconds count. The + * maximum inaccuracy from this cause is < 10 usec on 486's. + */ + start_sec = sec; + for (;;) { + if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) + sec = rtcin(RTC_SEC); + count = sys_cputimer->count(); + tot_count += (int)(count - prev_count); + prev_count = count; + if (sec != start_sec) + break; + if (--timeout == 0) + goto fail; + } + + /* + * Read the cpu cycle counter. The timing considerations are + * similar to those for the i8254 clock. + */ + if (tsc_present) { + tsc_frequency = rdtsc() - old_tsc; + } + + if (tsc_present) + kprintf("TSC clock: %llu Hz, ", tsc_frequency); + kprintf("i8254 clock: %u Hz\n", tot_count); + return (tot_count); + +fail: + kprintf("failed, using default i8254 clock of %u Hz\n", + i8254_cputimer.freq); + return (i8254_cputimer.freq); +} + +static void +i8254_restore(void) +{ + timer0_state = ACQUIRED; + + clock_lock(); + + /* + * Timer0 is our fine-grained variable clock interrupt + */ + outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT); + outb(TIMER_CNTR0, 2); /* lsb */ + outb(TIMER_CNTR0, 0); /* msb */ + clock_unlock(); + + /* + * Timer1 or timer2 is our free-running clock, but only if another + * has not been selected. + */ + cputimer_register(&i8254_cputimer); + cputimer_select(&i8254_cputimer, 0); +} + +static void +i8254_cputimer_construct(struct cputimer *timer, sysclock_t oldclock) +{ + int which; + + /* + * Should we use timer 1 or timer 2 ? + */ + which = 0; + TUNABLE_INT_FETCH("hw.i8254.walltimer", &which); + if (which != 1 && which != 2) + which = 2; + + switch(which) { + case 1: + timer->name = "i8254_timer1"; + timer->type = CPUTIMER_8254_SEL1; + i8254_walltimer_sel = TIMER_SEL1; + i8254_walltimer_cntr = TIMER_CNTR1; + timer1_state = ACQUIRED; + break; + case 2: + timer->name = "i8254_timer2"; + timer->type = CPUTIMER_8254_SEL2; + i8254_walltimer_sel = TIMER_SEL2; + i8254_walltimer_cntr = TIMER_CNTR2; + timer2_state = ACQUIRED; + break; + } + + timer->base = (oldclock + 0xFFFF) & ~0xFFFF; + + clock_lock(); + outb(TIMER_MODE, i8254_walltimer_sel | TIMER_RATEGEN | TIMER_16BIT); + outb(i8254_walltimer_cntr, 0); /* lsb */ + outb(i8254_walltimer_cntr, 0); /* msb */ + outb(IO_PPI, inb(IO_PPI) | 1); /* bit 0: enable gate, bit 1: spkr */ + clock_unlock(); +} + +static void +i8254_cputimer_destruct(struct cputimer *timer) +{ + switch(timer->type) { + case CPUTIMER_8254_SEL1: + timer1_state = RELEASED; + break; + case CPUTIMER_8254_SEL2: + timer2_state = RELEASED; + break; + default: + break; + } + timer->type = 0; +} + +static void +rtc_restore(void) +{ + /* Restore all of the RTC's "status" (actually, control) registers. */ + writertc(RTC_STATUSB, RTCSB_24HR); + writertc(RTC_STATUSA, rtc_statusa); + writertc(RTC_STATUSB, rtc_statusb); +} + +/* + * Restore all the timers. + * + * This function is called to resynchronize our core timekeeping after a + * long halt, e.g. from apm_default_resume() and friends. It is also + * called if after a BIOS call we have detected munging of the 8254. + * It is necessary because cputimer_count() counter's delta may have grown + * too large for nanouptime() and friends to handle, or (in the case of 8254 + * munging) might cause the SYSTIMER code to prematurely trigger. + */ +void +timer_restore(void) +{ + crit_enter(); + i8254_restore(); /* restore timer_freq and hz */ + rtc_restore(); /* reenable RTC interrupts */ + crit_exit(); +} + +/* + * Initialize 8254 timer 0 early so that it can be used in DELAY(). + */ +void +startrtclock(void) +{ + u_int delta, freq; + + /* + * Can we use the TSC? + */ + if (cpu_feature & CPUID_TSC) + tsc_present = 1; + else + tsc_present = 0; + + /* + * Initial RTC state, don't do anything unexpected + */ + writertc(RTC_STATUSA, rtc_statusa); + writertc(RTC_STATUSB, RTCSB_24HR); + + /* + * Set the 8254 timer0 in TIMER_SWSTROBE mode and cause it to + * generate an interrupt, which we will ignore for now. + * + * Set the 8254 timer1 in TIMER_RATEGEN mode and load 0x0000 + * (so it counts a full 2^16 and repeats). We will use this timer + * for our counting. + */ + i8254_restore(); + freq = calibrate_clocks(); +#ifdef CLK_CALIBRATION_LOOP + if (bootverbose) { + kprintf( + "Press a key on the console to abort clock calibration\n"); + while (cncheckc() == -1) + calibrate_clocks(); + } +#endif + + /* + * Use the calibrated i8254 frequency if it seems reasonable. + * Otherwise use the default, and don't use the calibrated i586 + * frequency. + */ + delta = freq > i8254_cputimer.freq ? + freq - i8254_cputimer.freq : i8254_cputimer.freq - freq; + if (delta < i8254_cputimer.freq / 100) { +#ifndef CLK_USE_I8254_CALIBRATION + if (bootverbose) + kprintf( +"CLK_USE_I8254_CALIBRATION not specified - using default frequency\n"); + freq = i8254_cputimer.freq; +#endif + cputimer_set_frequency(&i8254_cputimer, freq); + } else { + if (bootverbose) + kprintf( + "%d Hz differs from default of %d Hz by more than 1%%\n", + freq, i8254_cputimer.freq); + tsc_frequency = 0; + } + +#ifndef CLK_USE_TSC_CALIBRATION + if (tsc_frequency != 0) { + if (bootverbose) + kprintf( +"CLK_USE_TSC_CALIBRATION not specified - using old calibration method\n"); + tsc_frequency = 0; + } +#endif + if (tsc_present && tsc_frequency == 0) { + /* + * Calibration of the i586 clock relative to the mc146818A + * clock failed. Do a less accurate calibration relative + * to the i8254 clock. + */ + u_int64_t old_tsc = rdtsc(); + + DELAY(1000000); + tsc_frequency = rdtsc() - old_tsc; +#ifdef CLK_USE_TSC_CALIBRATION + if (bootverbose) { + kprintf("TSC clock: %llu Hz (Method B)\n", + tsc_frequency); + } +#endif + } + + EVENTHANDLER_REGISTER(shutdown_post_sync, resettodr_on_shutdown, NULL, SHUTDOWN_PRI_LAST); + +#if !defined(SMP) + /* + * We can not use the TSC in SMP mode, until we figure out a + * cheap (impossible), reliable and precise (yeah right!) way + * to synchronize the TSCs of all the CPUs. + * Curse Intel for leaving the counter out of the I/O APIC. + */ + +#if NAPM > 0 + /* + * We can not use the TSC if we support APM. Precise timekeeping + * on an APM'ed machine is at best a fools pursuit, since + * any and all of the time spent in various SMM code can't + * be reliably accounted for. Reading the RTC is your only + * source of reliable time info. The i8254 looses too of course + * but we need to have some kind of time... + * We don't know at this point whether APM is going to be used + * or not, nor when it might be activated. Play it safe. + */ + return; +#endif /* NAPM > 0 */ + +#endif /* !defined(SMP) */ +} + +/* + * Sync the time of day back to the RTC on shutdown, but only if + * we have already loaded it and have not crashed. + */ +static void +resettodr_on_shutdown(void *arg __unused) +{ + if (rtc_loaded && panicstr == NULL) { + resettodr(); + } +} + +/* + * Initialize the time of day register, based on the time base which is, e.g. + * from a filesystem. + */ +void +inittodr(time_t base) +{ + unsigned long sec, days; + int yd; + int year, month; + int y, m; + struct timespec ts; + + if (base) { + ts.tv_sec = base; + ts.tv_nsec = 0; + set_timeofday(&ts); + } + + /* Look if we have a RTC present and the time is valid */ + if (!(rtcin(RTC_STATUSD) & RTCSD_PWR)) + goto wrong_time; + + /* wait for time update to complete */ + /* If RTCSA_TUP is zero, we have at least 244us before next update */ + crit_enter(); + while (rtcin(RTC_STATUSA) & RTCSA_TUP) { + crit_exit(); + crit_enter(); + } + + days = 0; +#ifdef USE_RTC_CENTURY + year = readrtc(RTC_YEAR) + readrtc(RTC_CENTURY) * 100; +#else + year = readrtc(RTC_YEAR) + 1900; + if (year < 1970) + year += 100; +#endif + if (year < 1970) { + crit_exit(); + goto wrong_time; + } + month = readrtc(RTC_MONTH); + for (m = 1; m < month; m++) + days += daysinmonth[m-1]; + if ((month > 2) && LEAPYEAR(year)) + days ++; + days += readrtc(RTC_DAY) - 1; + yd = days; + for (y = 1970; y < year; y++) + days += DAYSPERYEAR + LEAPYEAR(y); + sec = ((( days * 24 + + readrtc(RTC_HRS)) * 60 + + readrtc(RTC_MIN)) * 60 + + readrtc(RTC_SEC)); + /* sec now contains the number of seconds, since Jan 1 1970, + in the local time zone */ + + sec += tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0); + + y = time_second - sec; + if (y <= -2 || y >= 2) { + /* badly off, adjust it */ + ts.tv_sec = sec; + ts.tv_nsec = 0; + set_timeofday(&ts); + } + rtc_loaded = 1; + crit_exit(); + return; + +wrong_time: + kprintf("Invalid time in real time clock.\n"); + kprintf("Check and reset the date immediately!\n"); +} + +/* + * Write system time back to RTC + */ +void +resettodr(void) +{ + struct timeval tv; + unsigned long tm; + int m; + int y; + + if (disable_rtc_set) + return; + + microtime(&tv); + tm = tv.tv_sec; + + crit_enter(); + /* Disable RTC updates and interrupts. */ + writertc(RTC_STATUSB, RTCSB_HALT | RTCSB_24HR); + + /* Calculate local time to put in RTC */ + + tm -= tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0); + + writertc(RTC_SEC, bin2bcd(tm%60)); tm /= 60; /* Write back Seconds */ + writertc(RTC_MIN, bin2bcd(tm%60)); tm /= 60; /* Write back Minutes */ + writertc(RTC_HRS, bin2bcd(tm%24)); tm /= 24; /* Write back Hours */ + + /* We have now the days since 01-01-1970 in tm */ + writertc(RTC_WDAY, (tm+4)%7); /* Write back Weekday */ + for (y = 1970, m = DAYSPERYEAR + LEAPYEAR(y); + tm >= m; + y++, m = DAYSPERYEAR + LEAPYEAR(y)) + tm -= m; + + /* Now we have the years in y and the day-of-the-year in tm */ + writertc(RTC_YEAR, bin2bcd(y%100)); /* Write back Year */ +#ifdef USE_RTC_CENTURY + writertc(RTC_CENTURY, bin2bcd(y/100)); /* ... and Century */ +#endif + for (m = 0; ; m++) { + int ml; + + ml = daysinmonth[m]; + if (m == 1 && LEAPYEAR(y)) + ml++; + if (tm < ml) + break; + tm -= ml; + } + + writertc(RTC_MONTH, bin2bcd(m + 1)); /* Write back Month */ + writertc(RTC_DAY, bin2bcd(tm + 1)); /* Write back Month Day */ + + /* Reenable RTC updates and interrupts. */ + writertc(RTC_STATUSB, rtc_statusb); + crit_exit(); +} + + +/* + * Start both clocks running. DragonFly note: the stat clock is no longer + * used. Instead, 8254 based systimers are used for all major clock + * interrupts. statclock_disable is set by default. + */ +void +cpu_initclocks(void *arg __unused) +{ + int diag; +#ifdef APIC_IO + int apic_8254_trial; + void *clkdesc; +#endif /* APIC_IO */ + + if (statclock_disable) { + /* + * The stat interrupt mask is different without the + * statistics clock. Also, don't set the interrupt + * flag which would normally cause the RTC to generate + * interrupts. + */ + rtc_statusb = RTCSB_24HR; + } else { + /* Setting stathz to nonzero early helps avoid races. */ + stathz = RTC_NOPROFRATE; + profhz = RTC_PROFRATE; + } + + /* Finish initializing 8253 timer 0. */ +#ifdef APIC_IO + + apic_8254_intr = isa_apic_irq(0); + apic_8254_trial = 0; + if (apic_8254_intr >= 0 ) { + if (apic_int_type(0, 0) == 3) + apic_8254_trial = 1; + } else { + /* look for ExtInt on pin 0 */ + if (apic_int_type(0, 0) == 3) { + apic_8254_intr = apic_irq(0, 0); + setup_8254_mixed_mode(); + } else + panic("APIC_IO: Cannot route 8254 interrupt to CPU"); + } + + clkdesc = register_int(apic_8254_intr, clkintr, NULL, "clk", + NULL, + INTR_EXCL | INTR_FAST | + INTR_NOPOLL | INTR_MPSAFE | + INTR_NOENTROPY); + machintr_intren(apic_8254_intr); + +#else /* APIC_IO */ + + register_int(0, clkintr, NULL, "clk", NULL, + INTR_EXCL | INTR_FAST | + INTR_NOPOLL | INTR_MPSAFE | + INTR_NOENTROPY); + machintr_intren(ICU_IRQ0); + +#endif /* APIC_IO */ + + /* Initialize RTC. */ + writertc(RTC_STATUSA, rtc_statusa); + writertc(RTC_STATUSB, RTCSB_24HR); + + if (statclock_disable == 0) { + diag = rtcin(RTC_DIAG); + if (diag != 0) + kprintf("RTC BIOS diagnostic error %b\n", diag, RTCDG_BITS); + +#ifdef APIC_IO + if (isa_apic_irq(8) != 8) + panic("APIC RTC != 8"); +#endif /* APIC_IO */ + + register_int(8, (inthand2_t *)rtcintr, NULL, "rtc", NULL, + INTR_EXCL | INTR_FAST | INTR_NOPOLL | + INTR_NOENTROPY); + machintr_intren(8); + + writertc(RTC_STATUSB, rtc_statusb); + } + +#ifdef APIC_IO + if (apic_8254_trial) { + sysclock_t base; + long lastcnt; + + lastcnt = get_interrupt_counter(apic_8254_intr); + + /* + * XXX this assumes the 8254 is the cpu timer. Force an + * 8254 Timer0 interrupt and wait 1/100s for it to happen, + * then see if we got it. + */ + kprintf("APIC_IO: Testing 8254 interrupt delivery\n"); + cputimer_intr_reload(2); /* XXX assumes 8254 */ + base = sys_cputimer->count(); + while (sys_cputimer->count() - base < sys_cputimer->freq / 100) + ; /* nothing */ + if (get_interrupt_counter(apic_8254_intr) - lastcnt == 0) { + /* + * The MP table is broken. + * The 8254 was not connected to the specified pin + * on the IO APIC. + * Workaround: Limited variant of mixed mode. + */ + machintr_intrdis(apic_8254_intr); + unregister_int(clkdesc); + kprintf("APIC_IO: Broken MP table detected: " + "8254 is not connected to " + "IOAPIC #%d intpin %d\n", + int_to_apicintpin[apic_8254_intr].ioapic, + int_to_apicintpin[apic_8254_intr].int_pin); + /* + * Revoke current ISA IRQ 0 assignment and + * configure a fallback interrupt routing from + * the 8254 Timer via the 8259 PIC to the + * an ExtInt interrupt line on IOAPIC #0 intpin 0. + * We reuse the low level interrupt handler number. + */ + if (apic_irq(0, 0) < 0) { + revoke_apic_irq(apic_8254_intr); + assign_apic_irq(0, 0, apic_8254_intr); + } + apic_8254_intr = apic_irq(0, 0); + setup_8254_mixed_mode(); + register_int(apic_8254_intr, clkintr, NULL, "clk", + NULL, + INTR_EXCL | INTR_FAST | + INTR_NOPOLL | INTR_MPSAFE | + INTR_NOENTROPY); + machintr_intren(apic_8254_intr); + } + + } + if (apic_int_type(0, 0) != 3 || + int_to_apicintpin[apic_8254_intr].ioapic != 0 || + int_to_apicintpin[apic_8254_intr].int_pin != 0) { + kprintf("APIC_IO: routing 8254 via IOAPIC #%d intpin %d\n", + int_to_apicintpin[apic_8254_intr].ioapic, + int_to_apicintpin[apic_8254_intr].int_pin); + } else { + kprintf("APIC_IO: " + "routing 8254 via 8259 and IOAPIC #0 intpin 0\n"); + } +#endif + callout_init(&sysbeepstop_ch); +} +SYSINIT(clocks8254, SI_BOOT2_CLOCKREG, SI_ORDER_FIRST, cpu_initclocks, NULL) + +#ifdef APIC_IO + +static void +setup_8254_mixed_mode(void) +{ + /* + * Allow 8254 timer to INTerrupt 8259: + * re-initialize master 8259: + * reset; prog 4 bytes, single ICU, edge triggered + */ + outb(IO_ICU1, 0x13); + outb(IO_ICU1 + 1, IDT_OFFSET); /* start vector (unused) */ + outb(IO_ICU1 + 1, 0x00); /* ignore slave */ + outb(IO_ICU1 + 1, 0x03); /* auto EOI, 8086 */ + outb(IO_ICU1 + 1, 0xfe); /* unmask INT0 */ + + /* program IO APIC for type 3 INT on INT0 */ + if (ext_int_setup(0, 0) < 0) + panic("8254 redirect via APIC pin0 impossible!"); +} +#endif + +void +setstatclockrate(int newhz) +{ + if (newhz == RTC_PROFRATE) + rtc_statusa = RTCSA_DIVIDER | RTCSA_PROF; + else + rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF; + writertc(RTC_STATUSA, rtc_statusa); +} + +#if 0 +static unsigned +tsc_get_timecount(struct timecounter *tc) +{ + return (rdtsc()); +} +#endif + +#ifdef KERN_TIMESTAMP +#define KERN_TIMESTAMP_SIZE 16384 +static u_long tsc[KERN_TIMESTAMP_SIZE] ; +SYSCTL_OPAQUE(_debug, OID_AUTO, timestamp, CTLFLAG_RD, tsc, + sizeof(tsc), "LU", "Kernel timestamps"); +void +_TSTMP(u_int32_t x) +{ + static int i; + + tsc[i] = (u_int32_t)rdtsc(); + tsc[i+1] = x; + i = i + 2; + if (i >= KERN_TIMESTAMP_SIZE) + i = 0; + tsc[i] = 0; /* mark last entry */ +} +#endif /* KERN_TIMESTAMP */ + +/* + * + */ + +static int +hw_i8254_timestamp(SYSCTL_HANDLER_ARGS) +{ + sysclock_t count; + __uint64_t tscval; + char buf[32]; + + crit_enter(); + if (sys_cputimer == &i8254_cputimer) + count = sys_cputimer->count(); + else + count = 0; + if (tsc_present) + tscval = rdtsc(); + else + tscval = 0; + crit_exit(); + ksnprintf(buf, sizeof(buf), "%08x %016llx", count, (long long)tscval); + return(SYSCTL_OUT(req, buf, strlen(buf) + 1)); +} + +SYSCTL_NODE(_hw, OID_AUTO, i8254, CTLFLAG_RW, 0, "I8254"); +SYSCTL_UINT(_hw_i8254, OID_AUTO, freq, CTLFLAG_RD, &i8254_cputimer.freq, 0, + "frequency"); +SYSCTL_PROC(_hw_i8254, OID_AUTO, timestamp, CTLTYPE_STRING|CTLFLAG_RD, + 0, 0, hw_i8254_timestamp, "A", ""); + +SYSCTL_INT(_hw, OID_AUTO, tsc_present, CTLFLAG_RD, + &tsc_present, 0, "TSC Available"); +SYSCTL_QUAD(_hw, OID_AUTO, tsc_frequency, CTLFLAG_RD, + &tsc_frequency, 0, "TSC Frequency"); + diff --git a/sys/platform/pc64/isa/ic/Am7990.h b/sys/platform/pc64/isa/ic/Am7990.h new file mode 100644 index 0000000000..920a13f950 --- /dev/null +++ b/sys/platform/pc64/isa/ic/Am7990.h @@ -0,0 +1,173 @@ +/* + * Am7990, Local Area Network Controller for Ethernet (LANCE) + * + * Copyright (c) 1994, Paul Richards. This software may be used, + * modified, copied, distributed, and sold, in both source and binary + * form provided that the above copyright and these terms are retained. + * Under no circumstances is the author responsible for the proper + * functioning of this software, nor does the author assume any + * responsibility for damages incurred with its use. + * + * $DragonFly: src/sys/platform/pc64/isa/ic/Attic/Am7990.h,v 1.1 2008/08/29 17:07:21 dillon Exp $ + */ + +/* + * The LANCE has four Control and Status Registers(CSRs) which are accessed + * through two bus addressable ports, the address port (RAP) and the data + * port (RDP). + * + */ + +#define CSR0 0 +#define CSR1 1 +#define CSR2 2 +#define CSR3 3 +#define CSR88 88 +#define CSR89 89 + +#define BCR49 49 +#define BCR32 32 +#define BCR33 33 +#define BCR34 34 + + +/* Control and Status Register Masks */ + +/* CSR0 */ + +#define ERR 0x8000 +#define BABL 0x4000 +#define CERR 0x2000 +#define MISS 0x1000 +#define MERR 0x0800 +#define RINT 0x0400 +#define TINT 0x0200 +#define IDON 0x0100 +#define INTR 0x0080 +#define INEA 0x0040 +#define RXON 0x0020 +#define TXON 0x0010 +#define TDMD 0x0008 +#define STOP 0x0004 +#define STRT 0x0002 +#define INIT 0x0001 + +/* + * CSR3 + * + * Bits 3-15 are reserved. + * + */ + +#define BSWP 0x0004 +#define ACON 0x0002 +#define BCON 0x0001 + +/* Initialisation block */ + +struct init_block { + u_short mode; /* Mode register */ + u_char padr[6]; /* Ethernet address */ + u_char ladrf[8]; /* Logical address filter (multicast) */ + u_short rdra; /* Low order pointer to receive ring */ + u_short rlen; /* High order pointer and no. rings */ + u_short tdra; /* Low order pointer to transmit ring */ + u_short tlen; /* High order pointer and no rings */ +}; + +/* Initialisation Block Mode Register Masks */ + +#define PROM 0x8000 /* Promiscuous Mode */ +#define DRCVBC 0x4000 /* Disable Receive Broadcast */ +#define DRCVPA 0x2000 /* Disable Receive Physical Address */ +#define DLNKTST 0x1000 /* Disable Link Status */ +#define DAPC 0x0800 /* Disable Automatic Polarity Correction */ +#define MENDECL 0x0400 /* MENDEC Loopback Mode */ +#define LRT 0x0200 /* Low Receive Threshold (T-MAU mode only) */ +#define TSEL 0x0200 /* Transmit Mode Select (AUI mode only) */ +#define PORTSEL 0x0180 /* Port Select bits */ +#define INTL 0x0040 /* Internal Loopback */ +#define DRTY 0x0020 /* Disable Retry */ +#define FCOLL 0x0010 /* Force Collision */ +#define DXMTFCS 0x0008 /* Disable transmit CRC (FCS) */ +#define LOOP 0x0004 /* Loopback Enabl */ +#define DTX 0x0002 /* Disable the transmitter */ +#define DRX 0x0001 /* Disable the receiver */ + +/* + * Message Descriptor Structure + * + * Each transmit or receive descriptor ring entry (RDRE's and TDRE's) + * is composed of 4, 16-bit, message descriptors. They contain the following + * information. + * + * 1. The address of the actual message data buffer in user (host) memory. + * 2. The length of that message buffer. + * 3. The status information for that particular buffer. The eight most + * significant bits of md1 are collectively termed the STATUS of the + * descriptor. + * + * Descriptor md0 contains LADR 0-15, the low order 16 bits of the 24-bit + * address of the actual data buffer. Bits 0-7 of descriptor md1 contain + * HADR, the high order 8-bits of the 24-bit data buffer address. Bits 8-15 + * of md1 contain the status flags of the buffer. Descriptor md2 contains the + * buffer byte count in bits 0-11 as a two's complement number and must have + * 1's written to bits 12-15. For the receive entry md3 has the Message Byte + * Count in bits 0-11, this is the length of the received message and is valid + * only when ERR is cleared and ENP is set. For the transmit entry it contains + * more status information. + * + */ + +struct mds { + u_short md0; + u_short md1; + short md2; + u_short md3; +}; + +/* Receive STATUS flags for md1 */ + +#define OWN 0x8000 /* Owner bit, 0=host, 1=Lance */ +#define MDERR 0x4000 /* Error */ +#define FRAM 0x2000 /* Framing error error */ +#define OFLO 0x1000 /* Silo overflow */ +#define CRC 0x0800 /* CRC error */ +#define RBUFF 0x0400 /* Buffer error */ +#define STP 0x0200 /* Start of packet */ +#define ENP 0x0100 /* End of packet */ +#define HADR 0x00FF /* High order address bits */ + +/* Receive STATUS flags for md2 */ + +#define BCNT 0x0FFF /* Size of data buffer as 2's comp. no. */ + +/* Receive STATUS flags for md3 */ + +#define MCNT 0x0FFF /* Total size of data for received packet */ + +/* Transmit STATUS flags for md1 */ + +#define ADD_FCS 0x2000 /* Controls generation of FCS */ +#define MORE 0x1000 /* Indicates more than one retry was needed */ +#define ONE 0x0800 /* Exactly one retry was needed */ +#define DEF 0x0400 /* Packet transmit deferred -- channel busy */ + +/* + * Transmit status flags for md2 + * + * Same as for receive descriptor. + * + * BCNT 0x0FFF Size of data buffer as 2's complement number. + * + */ + +/* Transmit status flags for md3 */ + +#define TBUFF 0x8000 /* Buffer error */ +#define UFLO 0x4000 /* Silo underflow */ +#define LCOL 0x1000 /* Late collision */ +#define LCAR 0x0800 /* Loss of carrier */ +#define RTRY 0x0400 /* Tried 16 times */ +#define TDR 0x03FF /* Time domain reflectometry */ + diff --git a/sys/platform/pc64/isa/ic/am7990.h b/sys/platform/pc64/isa/ic/am7990.h new file mode 100644 index 0000000000..798f627784 --- /dev/null +++ b/sys/platform/pc64/isa/ic/am7990.h @@ -0,0 +1,110 @@ +/* + * AMD 7990 (LANCE) definitions + * + * $FreeBSD: src/sys/i386/isa/ic/am7990.h,v 1.3.6.1 2000/08/03 01:01:25 peter Exp $ + * $DragonFly: src/sys/platform/pc64/isa/ic/am7990.h,v 1.1 2008/08/29 17:07:21 dillon Exp $ + */ + +#if defined(BYTE_ORDER) && BYTE_ORDER == BIG_ENDIAN +#define LN_BITFIELD2(a, b) b, a +#define LN_BITFIELD3(a, b, c) c, b, a +#define LN_BITFIELD4(a, b, c, d) d, c, b, a +#else +#define LN_BITFIELD2(a, b) a, b +#define LN_BITFIELD3(a, b, c) a, b, c +#define LN_BITFIELD4(a, b, c, d) a, b, c, d +#endif + +#define LN_ADDR_LO(addr) ((addr) & 0xFFFF) +#define LN_ADDR_HI(addr) (((addr) >> 16) & 0xFF) + +typedef struct { + unsigned short r_addr_lo; + unsigned int LN_BITFIELD3(r_addr_hi : 8, + : 5, + r_log2_size : 3); +} ln_ring_t; + +#define LN_MC_MASK 0x3F /* Only 6 bits of the CRC */ + +typedef struct { + unsigned short ln_mode; +#define LN_MODE_RXD 0x0001 /* ( W) Receiver Disabled */ +#define LN_MODE_TXD 0x0002 /* ( W) Transmitter Disabled */ +#define LN_MODE_LOOP 0x0004 /* ( W) Enable Loopback */ +#define LN_MODE_NOTXCRC 0x0008 /* ( W) Don't Calculate TX CRCs */ +#define LN_MODE_FRCCOLL 0x0010 /* ( W) Force Collision */ +#define LN_MODE_NORETRY 0x0020 /* ( W) No Transmit Retries */ +#define LN_MODE_INTLOOP 0x0040 /* ( W) Internal Loopback */ +#define LN_MODE_PROMISC 0x8000 /* ( W) Promiscious Mode */ + unsigned short ln_physaddr[3]; + unsigned short ln_multi_mask[4]; + ln_ring_t ln_rxring; + ln_ring_t ln_txring; +} ln_initb_t; + +typedef struct { + unsigned short d_addr_lo; + unsigned char d_addr_hi; + unsigned char d_flag; +#define LN_DFLAG_EOP 0x0001 /* (RW) End Of Packet */ +#define LN_DFLAG_SOP 0x0002 /* (RW) Start Of Packet */ +#define LN_DFLAG_RxBUFERROR 0x0004 /* (R ) Receive - Buffer Error */ +#define LN_DFLAG_TxDEFERRED 0x0004 /* (R ) Transmit - Initially Deferred */ +#define LN_DFLAG_RxBADCRC 0x0008 /* (R ) Receive - Bad Checksum */ +#define LN_DFLAG_TxONECOLL 0x0008 /* (R ) Transmit - Single Collision */ +#define LN_DFLAG_RxOVERFLOW 0x0010 /* (R ) Receive - Overflow Error */ +#define LN_DFLAG_TxMULTCOLL 0x0010 /* (R ) Transmit - Multiple Collisions */ +#define LN_DFLAG_RxFRAMING 0x0020 /* (R ) Receive - Framing Error */ +#define LN_DFLAG_RxERRSUM 0x0040 /* (R ) Receive - Error Summary */ +#define LN_DFLAG_TxERRSUM 0x0040 /* (R ) Transmit - Error Summary */ +#define LN_DFLAG_OWNER 0x0080 /* (RW) Owner (1=Am7990, 0=host) */ + signed short d_buflen; /* ( W) Two's complement */ + unsigned short d_status; +#define LN_DSTS_RxLENMASK 0x0FFF /* (R ) Recieve Length */ +#define LN_DSTS_TxTDRMASK 0x03FF /* (R ) Transmit - Time Domain Reflectometer */ +#define LN_DSTS_TxEXCCOLL 0x0400 /* (R ) Transmit - Excessive Collisions */ +#define LN_DSTS_TxCARRLOSS 0x0800 /* (R ) Transmit - Carrier Loss */ +#define LN_DSTS_TxLATECOLL 0x1000 /* (R ) Transmit - Late Collision */ +#define LN_DSTS_TxUNDERFLOW 0x4000 /* (R ) Transmit - Underflow */ +#define LN_DSTS_TxBUFERROR 0x8000 /* (R ) Transmit - Buffer Error */ +} ln_desc_t; + + + + +#define LN_CSR0 0x0000 + +#define LN_CSR0_INIT 0x0001 /* (RS) Initialize Am 7990 */ +#define LN_CSR0_START 0x0002 /* (RS) Start Am7990 */ +#define LN_CSR0_STOP 0x0004 /* (RS) Reset Am7990 */ +#define LN_CSR0_TXDEMAND 0x0008 /* (RS) Transmit On Demand */ +#define LN_CSR0_TXON 0x0010 /* (R ) Transmitter Enabled */ +#define LN_CSR0_RXON 0x0020 /* (R ) Receiver Enabled */ +#define LN_CSR0_ENABINTR 0x0040 /* (RW) Interrupt Enabled */ +#define LN_CSR0_PENDINTR 0x0080 /* (R ) Interrupt Pending */ +#define LN_CSR0_INITDONE 0x0100 /* (RC) Initialization Done */ +#define LN_CSR0_TXINT 0x0200 /* (RC) Transmit Interrupt */ +#define LN_CSR0_RXINT 0x0400 /* (RC) Receive Interrupt */ +#define LN_CSR0_MEMERROR 0x0800 /* (RC) Memory Error */ +#define LN_CSR0_MISS 0x1000 /* (RC) No Available Receive Buffers */ +#define LN_CSR0_CERR 0x2000 /* (RC) SQE failed */ +#define LN_CSR0_BABL 0x4000 /* (RC) Transmit Babble */ +#define LN_CSR0_ERRSUM 0x8000 /* (R ) Error Summary (last 4) */ +#define LN_CSR0_CLEAR 0x7F00 /* Clear Status Bit */ + +/* + * CSR1 -- Init Block Address (Low 16 Bits -- Must be Word Aligned) + * CSR2 -- Init Block Address (High 8 Bits) + */ +#define LN_CSR1 0x0001 +#define LN_CSR2 0x0002 + +/* + * CSR3 -- Hardware Control + */ + +#define LN_CSR3 0x0003 +#define LN_CSR3_BCON 0x0001 /* (RW) BM/HOLD Control */ +#define LN_CSR3_ALE 0x0002 /* (RW) ALE Control */ +#define LN_CSR3_BSWP 0x0004 /* (RW) Byte Swap */ diff --git a/sys/platform/pc64/isa/ic/cd1400.h b/sys/platform/pc64/isa/ic/cd1400.h new file mode 100644 index 0000000000..ffa7236a55 --- /dev/null +++ b/sys/platform/pc64/isa/ic/cd1400.h @@ -0,0 +1,204 @@ +/*- + * cyclades cyclom-y serial driver + * Andrew Herbert , 17 August 1993 + * + * Copyright (c) 1993 Andrew Herbert. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name Andrew Herbert may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN + * NO EVENT SHALL I BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD: src/sys/i386/isa/ic/cd1400.h,v 1.6 1999/08/28 00:45:12 peter Exp $ + * $DragonFly: src/sys/platform/pc64/isa/ic/cd1400.h,v 1.1 2008/08/29 17:07:21 dillon Exp $ + */ + +/* + * Definitions for Cirrus Logic CD1400 serial/parallel chips. + */ + +#define CD1400_NO_OF_CHANNELS 4 /* 4 serial channels per chip */ +#define CD1400_RX_FIFO_SIZE 12 +#define CD1400_TX_FIFO_SIZE 12 + +/* + * Global registers. + */ +#define CD1400_GFRCR 0x40 /* global firmware revision code */ +#define CD1400_CAR 0x68 /* channel access */ +#define CD1400_CAR_CHAN (3<<0) /* channel select */ +#define CD1400_GCR 0x4B /* global configuration */ +#define CD1400_GCR_PARALLEL (1<<7) /* channel 0 is parallel */ +#define CD1400_SVRR 0x67 /* service request */ +#define CD1400_SVRR_MDMCH (1<<2) +#define CD1400_SVRR_TXRDY (1<<1) +#define CD1400_SVRR_RXRDY (1<<0) +#define CD1400_RICR 0x44 /* receive interrupting channel */ +#define CD1400_TICR 0x45 /* transmit interrupting channel */ +#define CD1400_MICR 0x46 /* modem interrupting channel */ +#define CD1400_RIR 0x6B /* receive interrupt status */ +#define CD1400_RIR_RDIREQ (1<<7) /* rx service required */ +#define CD1400_RIR_RBUSY (1<<6) /* rx service in progress */ +#define CD1400_RIR_CHAN (3<<0) /* channel select */ +#define CD1400_TIR 0x6A /* transmit interrupt status */ +#define CD1400_TIR_RDIREQ (1<<7) /* tx service required */ +#define CD1400_TIR_RBUSY (1<<6) /* tx service in progress */ +#define CD1400_TIR_CHAN (3<<0) /* channel select */ +#define CD1400_MIR 0x69 /* modem interrupt status */ +#define CD1400_MIR_RDIREQ (1<<7) /* modem service required */ +#define CD1400_MIR_RBUSY (1<<6) /* modem service in progress */ +#define CD1400_MIR_CHAN (3<<0) /* channel select */ +#define CD1400_PPR 0x7E /* prescaler period */ +#define CD1400_PPR_PRESCALER 512 + +/* + * Virtual registers. + */ +#define CD1400_RIVR 0x43 /* receive interrupt vector */ +#define CD1400_RIVR_EXCEPTION (1<<2) /* receive exception bit */ +#define CD1400_TIVR 0x42 /* transmit interrupt vector */ +#define CD1400_MIVR 0x41 /* modem interrupt vector */ +#define CD1400_TDR 0x63 /* transmit data */ +#define CD1400_RDSR 0x62 /* receive data/status */ +#define CD1400_RDSR_TIMEOUT (1<<7) /* rx timeout */ +#define CD1400_RDSR_SPECIAL_SHIFT 4 /* rx special char shift */ +#define CD1400_RDSR_SPECIAL (7<<4) /* rx special char */ +#define CD1400_RDSR_BREAK (1<<3) /* rx break */ +#define CD1400_RDSR_PE (1<<2) /* rx parity error */ +#define CD1400_RDSR_FE (1<<1) /* rx framing error */ +#define CD1400_RDSR_OE (1<<0) /* rx overrun error */ +#define CD1400_MISR 0x4C /* modem interrupt status */ +#define CD1400_MISR_DSRd (1<<7) /* DSR delta */ +#define CD1400_MISR_CTSd (1<<6) /* CTS delta */ +#define CD1400_MISR_RId (1<<5) /* RI delta */ +#define CD1400_MISR_CDd (1<<4) /* CD delta */ +#define CD1400_EOSRR 0x60 /* end of service request */ + +/* + * Channel registers. + */ +#define CD1400_LIVR 0x18 /* local interrupt vector */ +#define CD1400_CCR 0x05 /* channel control */ +#define CD1400_CCR_CMDRESET (1<<7) /* enables following: */ +#define CD1400_CCR_FTF (1<<1) /* flush tx fifo */ +#define CD1400_CCR_FULLRESET (1<<0) /* full reset */ +#define CD1400_CCR_CHANRESET 0 /* current channel */ +#define CD1400_CCR_CMDCORCHG (1<<6) /* enables following: */ +#define CD1400_CCR_COR3 (1<<3) /* COR3 changed */ +#define CD1400_CCR_COR2 (1<<2) /* COR2 changed */ +#define CD1400_CCR_COR1 (1<<1) /* COR1 changed */ +#define CD1400_CCR_CMDSENDSC (1<<5) /* enables following: */ +#define CD1400_CCR_SC (7<<0) /* special char 1-4 */ +#define CD1400_CCR_CMDCHANCTL (1<<4) /* enables following: */ +#define CD1400_CCR_XMTEN (1<<3) /* tx enable */ +#define CD1400_CCR_XMTDIS (1<<2) /* tx disable */ +#define CD1400_CCR_RCVEN (1<<1) /* rx enable */ +#define CD1400_CCR_RCVDIS (1<<0) /* rx disable */ +#define CD1400_SRER 0x06 /* service request enable */ +#define CD1400_SRER_MDMCH (1<<7) /* modem change */ +#define CD1400_SRER_RXDATA (1<<4) /* rx data */ +#define CD1400_SRER_TXRDY (1<<2) /* tx fifo empty */ +#define CD1400_SRER_TXMPTY (1<<1) /* tx shift reg empty */ +#define CD1400_SRER_NNDT (1<<0) /* no new data */ +#define CD1400_COR1 0x08 /* channel option 1 */ +#define CD1400_COR1_PARODD (1<<7) +#define CD1400_COR1_PARNORMAL (2<<5) +#define CD1400_COR1_PARFORCE (1<<5) /* odd/even = force 1/0 */ +#define CD1400_COR1_PARNONE (0<<5) +#define CD1400_COR1_NOINPCK (1<<4) +#define CD1400_COR1_STOP2 (2<<2) +#define CD1400_COR1_STOP15 (1<<2) /* 1.5 stop bits */ +#define CD1400_COR1_STOP1 (0<<2) +#define CD1400_COR1_CS8 (3<<0) +#define CD1400_COR1_CS7 (2<<0) +#define CD1400_COR1_CS6 (1<<0) +#define CD1400_COR1_CS5 (0<<0) +#define CD1400_COR2 0x09 /* channel option 2 */ +#define CD1400_COR2_IXANY (1<<7) /* implied XON mode */ +#define CD1400_COR2_IXOFF (1<<6) /* in-band tx flow control */ +#define CD1400_COR2_ETC (1<<5) /* embedded tx command */ +#define CD1400_ETC_CMD 0x00 /* start an ETC */ +#define CD1400_ETC_SENDBREAK 0x81 +#define CD1400_ETC_INSERTDELAY 0x82 +#define CD1400_ETC_STOPBREAK 0x83 +#define CD1400_COR2_LLM (1<<4) /* local loopback mode */ +#define CD1400_COR2_RLM (1<<3) /* remote loopback mode */ +#define CD1400_COR2_RTSAO (1<<2) /* RTS auto output */ +#define CD1400_COR2_CCTS_OFLOW (1<<1) /* CTS auto enable */ +#define CD1400_COR2_CDSR_OFLOW (1<<0) /* DSR auto enable */ +#define CD1400_COR3 0x0A /* channel option 3 */ +#define CD1400_COR3_SCDRNG (1<<7) /* special char detect range */ +#define CD1400_COR3_SCD34 (1<<6) /* special char detect 3-4 */ +#define CD1400_COR3_FTC (1<<5) /* flow control transparency */ +#define CD1400_COR3_SCD12 (1<<4) /* special char detect 1-2 */ +#define CD1400_COR3_RXTH (15<<0) /* rx fifo threshold */ +#define CD1400_COR4 0x1E /* channel option 4 */ +#define CD1400_COR4_IGNCR (1<<7) +#define CD1400_COR4_ICRNL (1<<6) +#define CD1400_COR4_INLCR (1<<5) +#define CD1400_COR4_IGNBRK (1<<4) +#define CD1400_COR4_NOBRKINT (1<<3) +#define CD1400_COR4_PFO_ESC (4<<0) /* parity/framing/overrun... */ +#define CD1400_COR4_PFO_NUL (3<<0) +#define CD1400_COR4_PFO_DISCARD (2<<0) +#define CD1400_COR4_PFO_GOOD (1<<0) +#define CD1400_COR4_PFO_EXCEPTION (0<<0) +#define CD1400_COR5 0x1F /* channel option 5 */ +#define CD1400_COR5_ISTRIP (1<<7) +#define CD1400_COR5_LNEXT (1<<6) +#define CD1400_COR5_CMOE (1<<5) /* char matching on error */ +#define CD1400_COR5_EBD (1<<2) /* end of break detected */ +#define CD1400_COR5_ONLCR (1<<1) +#define CD1400_COR5_OCRNL (1<<0) +#define CD1400_CCSR 0x0B /* channel control status */ +#define CD1400_RDCR 0x0E /* received data count */ +#define CD1400_SCHR1 0x1A /* special character 1 */ +#define CD1400_SCHR2 0x1B /* special character 2 */ +#define CD1400_SCHR3 0x1C /* special character 3 */ +#define CD1400_SCHR4 0x1D /* special character 4 */ +#define CD1400_SCRL 0x22 /* special character range, low */ +#define CD1400_SCRH 0x23 /* special character range, high */ +#define CD1400_LNC 0x24 /* lnext character */ +#define CD1400_MCOR1 0x15 /* modem change option 1 */ +#define CD1400_MCOR1_DSRzd (1<<7) /* DSR one-to-zero delta */ +#define CD1400_MCOR1_CTSzd (1<<6) +#define CD1400_MCOR1_RIzd (1<<5) +#define CD1400_MCOR1_CDzd (1<<4) +#define CD1400_MCOR1_DTRth (15<<0) /* dtrflow threshold */ +#define CD1400_MCOR2 0x16 /* modem change option 2 */ +#define CD1400_MCOR2_DSRod (1<<7) /* DSR zero-to-one delta */ +#define CD1400_MCOR2_CTSod (1<<6) +#define CD1400_MCOR2_RIod (1<<5) +#define CD1400_MCOR2_CDod (1<<4) +#define CD1400_RTPR 0x21 /* receive timeout period */ +#define CD1400_MSVR1 0x6C /* modem signal value 1 */ +#define CD1400_MSVR1_RTS (1<<0) /* RTS line (r/w) */ +#define CD1400_MSVR2 0x6D /* modem signal value 2 */ +#define CD1400_MSVR2_DSR (1<<7) /* !DSR line (r) */ +#define CD1400_MSVR2_CTS (1<<6) /* !CTS line (r) */ +#define CD1400_MSVR2_RI (1<<5) /* !RI line (r) */ +#define CD1400_MSVR2_CD (1<<4) /* !CD line (r) */ +#define CD1400_MSVR2_DTR (1<<1) /* DTR line (r/w) */ +#define CD1400_PSVR 0x6F /* printer signal value */ +#define CD1400_RBPR 0x78 /* receive baud rate period */ +#define CD1400_RCOR 0x7C /* receive clock option */ +#define CD1400_TBPR 0x72 /* transmit baud rate period */ +#define CD1400_TCOR 0x76 /* transmit clock option */ diff --git a/sys/platform/pc64/isa/ic/cd180.h b/sys/platform/pc64/isa/ic/cd180.h new file mode 100644 index 0000000000..05ad5b18b7 --- /dev/null +++ b/sys/platform/pc64/isa/ic/cd180.h @@ -0,0 +1,199 @@ +/* + * Copyright (C) 1995 by Pavel Antonov, Moscow, Russia. + * Copyright (C) 1995 by Andrey A. Chernov, Moscow, Russia. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/sys/i386/isa/ic/cd180.h,v 1.2.12.1 2000/08/03 01:01:25 peter Exp $ + * $DragonFly: src/sys/platform/pc64/isa/ic/cd180.h,v 1.1 2008/08/29 17:07:21 dillon Exp $ + */ + +/* + * Cirrus Logic CD180 registers + */ + +/* Global registers */ +#define CD180_GIVR 0x40 /* Global Interrupt Verctor Register */ +#define CD180_GICR 0x41 /* Global Interrupting Channel Register */ +#define CD180_PILR1 0x61 /* Priority Interrupt Level Register 1 */ +#define CD180_PILR2 0x62 /* Priority Interrupt Level Register 2 */ +#define CD180_PILR3 0x63 /* Priority Interrupt Level Register 3 */ +#define CD180_CAR 0x64 /* Channel Access Register */ +#define CD180_GFRCR 0x6B /* Global Firmware Revision Code Register */ +#define CD180_PPRH 0x70 /* Prescaler Period Register MSB */ +#define CD180_PPRL 0x71 /* Prescaler Period Register LSB */ +#define CD180_RDR 0x78 /* Receiver Data Register */ +#define CD180_RCSR 0x7A /* Receiver Character Status Register */ +#define CD180_TDR 0x7B /* Transmit Data Register */ +#define CD180_EOIR 0x7F /* End of Interrupt Register */ + +/* Channel Registers */ +#define CD180_CCR 0x01 /* Channel Command Register */ +#define CD180_IER 0x02 /* Interrupt Enable Register */ +#define CD180_COR1 0x03 /* Channel Option Register 1 */ +#define CD180_COR2 0x04 /* Channel Option Register 1 */ +#define CD180_COR3 0x05 /* Channel Option Register 1 */ +#define CD180_CCSR 0x06 /* Channel Control STatus Register */ +#define CD180_RDCR 0x07 /* Receive Data Count Register */ +#define CD180_SCHR1 0x09 /* Special Character Register 1 */ +#define CD180_SCHR2 0x0A /* Special Character Register 2 */ +#define CD180_SCHR3 0x0B /* Special Character Register 3 */ +#define CD180_SCHR4 0x0C /* Special Character Register 4 */ +#define CD180_MCOR1 0x10 /* Modem Change Option 1 Register */ +#define CD180_MCOR2 0x11 /* Modem Change Option 2 Register */ +#define CD180_MCR 0x12 /* Modem Change Register */ +#define CD180_RTPR 0x18 /* Receive Timeout Period Register */ +#define CD180_MSVR 0x28 /* Modem Signal Value Register */ +#define CD180_RBPRH 0x31 /* Receive Baud Rate Period Register MSB */ +#define CD180_RBPRL 0x32 /* Receive Baud Rate Period Register LSB */ +#define CD180_TBPRH 0x39 /* Transmit Baud Rate Period Register MSB */ +#define CD180_TBPRL 0x3A /* Transmit Baud Rate Period Register LSB */ + +/** Register descritpions **/ + +/* Global Interrupt Vector Register */ +#define GIVR_IT_MSCI 0x01 /* Modem Signal Change Interrupt */ +#define GIVR_IT_TDI 0x02 /* Transmit Data Interrupt */ +#define GIVR_IT_RGDI 0x03 /* Receive Good Data Interrupt */ +#define GIVR_IT_REI 0x07 /* Receive Exception Interrupt */ + +/* Global Interrupt Channel Register */ +#define GICR_CHAN 0x1C /* Channel Number Mask */ +#define GICR_LSH 2 /* Channel Number Shift */ + +/* Channel Address Register */ +#define CAR_CHAN 0x07 /* Channel Number Mask */ +#define CAR_A7 0x08 /* Addres bit 7 (unused) */ + +/* Receive Character Status Register */ +#define RCSR_OE 0x01 /* Overrun Error */ +#define RCSR_FE 0x02 /* Frame Error */ +#define RCSR_PE 0x04 /* Parity Error */ +#define RCSR_Break 0x08 /* Break detected */ +#define RCSR_Timeout 0x80 /* Rx Timeout */ +#define RCSR_SCMASK 0x70 /* Special Character Detected Mask */ +#define RCSR_SC1 0x10 /* Special Char 1 (or 1 & 3 seq matched) */ +#define RCSR_SC2 0x20 /* Special Char 2 (or 2 & 4 seq matched) */ +#define RCSR_SC3 0x30 /* Special Char 3 */ +#define RCSR_SC4 0x40 /* Special Char 4 */ + +/* Channel Command Register */ +#define CCR_ResetChan 0x80 /* Reset Channel */ +#define CCR_HWRESET 0x81 /* Hardware Reset (all channels) */ +#define CCR_CORCHG1 0x42 /* Channel Option Register 1 Changed */ +#define CCR_CORCHG2 0x44 /* Channel Option Register 2 Changed */ +#define CCR_CORCHG3 0x48 /* Channel Option Register 3 Changed */ +#define CCR_SENDSPCH1 0x21 /* Send Special Character 1 */ +#define CCR_SENDSPCH2 0x22 /* Send Special Character 2 */ +#define CCR_SENDSPCH3 0x23 /* Send Special Character 3 */ +#define CCR_SENDSPCH4 0x24 /* Send Special Character 4 */ +#define CCR_RCVRDIS 0x11 /* Receiver Disable */ +#define CCR_RCVREN 0x12 /* Receiver Enable */ +#define CCR_XMTRDIS 0x14 /* Transmitter Disable */ +#define CCR_XMTREN 0x18 /* Transmitter Enable */ + +/* Interrupt Enable Register */ +#define IER_DSR 0x80 /* Enable interrupt on DSR change */ +#define IER_CD 0x40 /* Enable interrupt on CD change */ +#define IER_CTS 0x20 /* Enable interrupt on CTS change */ +#define IER_RxData 0x10 /* Enable interrupt on Receive Data */ +#define IER_RxSC 0x08 /* Enable interrupt on Receive Spec. Char */ +#define IER_TxRdy 0x04 /* Enable interrupt on TX FIFO empty */ +#define IER_TxMpty 0x02 /* Enable interrupt on TX completely empty*/ +#define IER_RET 0x01 /* Enable interrupt on RX Except. Timeout */ + +/* Channel Option Register 1 */ +#define COR1_ODDP 0x80 /* Odd Parity */ +#define COR1_ParMMASK 0x60 /* Parity Mode mask */ +#define COR1_NOPAR 0x02 /* No Parity */ +#define COR1_FORCEPAR 0x20 /* Force Parity */ +#define COR1_NORMPAR 0x40 /* Normal Parity */ +#define COR1_Ignore 0x10 /* Ignore Parity on RX */ +#define COR1_StopMASK 0x0C /* Stop Bits mode mask */ +#define COR1_1SB 0x00 /* 1 Stop Bit */ +#define COR1_15SB 0x04 /* 1.5 Stop Bits */ +#define COR1_2SB 0x08 /* 2 Stop Bits */ +#define COR1_CHLMASK 0x03 /* Character Length mask */ +#define COR1_5BITS 0x00 /* 5 bits */ +#define COR1_6BITS 0x01 /* 6 bits */ +#define COR1_7BITS 0x02 /* 7 bits */ +#define COR1_8BITS 0x03 /* 8 bits */ + +/* Channel Option Register 2 */ +#define COR2_IXM 0x80 /* Implied XON mode */ +#define COR2_TxIBE 0x40 /* Enable In-Band XON/XOFF Flow Control */ +#define COR2_ETC 0x20 /* Embedded Tx Commands Enable */ +#define COR2_LLM 0x10 /* Local Loopback Mode */ +#define COR2_RLM 0x08 /* Remote Loopback Mode */ +#define COR2_RtsAO 0x04 /* RTS Automatic Output Enable */ +#define COR2_CtsAE 0x02 /* CTS Automatic Enable */ +#define COR2_DsrAE 0x01 /* DSR Automatic Enable */ + +/* Channel Option Register 3 */ +#define COR3_XonCH 0x80 /* XON is a double seq (1 & 3) */ +#define COR3_XoffCH 0x40 /* XOFF is a double seq (1 & 3) */ +#define COR3_FCT 0x20 /* Flow-Control Transparency Mode */ +#define COR3_SCDE 0x10 /* Special Character Detection Enable */ +#define COR3_RxTHMASK 0x0F /* RX FIFO Threshold value (1-8) */ + +/* Channel Control Status Register */ +#define CCSR_RxEn 0x80 /* Revceiver Enabled */ +#define CCSR_RxFloff 0x40 /* Receive Flow Off (XOFF sent) */ +#define CCSR_RxFlon 0x20 /* Receive Flow On (XON sent) */ +#define CCSR_TxEn 0x08 /* Transmitter Enabled */ +#define CCSR_TxFloff 0x04 /* Transmit Flow Off (got XOFF) */ +#define CCSR_TxFlon 0x02 /* Transmit Flow On (got XON) */ + +/* Modem Change Option Register 1 */ +#define MCOR1_DSRzd 0x80 /* Detect 0->1 transition of DSR */ +#define MCOR1_CDzd 0x40 /* Detect 0->1 transition of CD */ +#define MCOR1_CTSzd 0x20 /* Detect 0->1 transition of CTS */ +#define MCOR1_DTRthMASK 0x0F /* Automatic DTR FC Threshold (1-8) chars */ + +/* Modem Change Option Register 2 */ +#define MCOR2_DSRod 0x80 /* Detect 1->0 transition of DSR */ +#define MCOR2_CDod 0x40 /* Detect 1->0 transition of CD */ +#define MCOR2_CTSod 0x20 /* Detect 1->0 transition of CTS */ + +/* Modem Change Register */ +#define MCR_DSRchg 0x80 /* DSR Changed */ +#define MCR_CDchg 0x40 /* CD Changed */ +#define MCR_CTSchg 0x20 /* CTS Changed */ + +/* Modem Signal Value Register */ +#define MSVR_DSR 0x80 /* Current state of DSR input */ +#define MSVR_CD 0x40 /* Current state of DSR input */ +#define MSVR_CTS 0x20 /* Current state of CTS input */ +#define MSVR_DTR 0x02 /* Current state of DTR output */ +#define MSVR_RTS 0x01 /* Current state of RTS output */ + +/* Escape characters */ +#define CD180_C_ESC 0x00 /* Escape character */ +#define CD180_C_SBRK 0x81 /* Start sending BREAK */ +#define CD180_C_DELAY 0x82 /* Delay output */ +#define CD180_C_EBRK 0x83 /* Stop sending BREAK */ + +/* Miscellaneous */ +#define CD180_NCHAN 8 /* 8 channels per chip */ +#define CD180_CTICKS 16 /* 16 ticks for character processing */ +#define CD180_NFIFO 8 /* 8 bytes in FIFO */ diff --git a/sys/platform/pc64/isa/ic/hd64570.h b/sys/platform/pc64/isa/ic/hd64570.h new file mode 100644 index 0000000000..c417998a33 --- /dev/null +++ b/sys/platform/pc64/isa/ic/hd64570.h @@ -0,0 +1,373 @@ +/* + * Copyright (c) 1995 John Hay. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by [your name] + * and [any other names deserving credit ] + * 4. Neither the name of the author nor the names of any co-contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY [your name] AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/sys/i386/isa/ic/hd64570.h,v 1.6 1999/08/28 00:45:13 peter Exp $ + * $DragonFly: src/sys/platform/pc64/isa/ic/hd64570.h,v 1.1 2008/08/29 17:07:21 dillon Exp $ + */ +#ifndef _HD64570_H_ +#define _HD64570_H_ + +typedef struct msci_channel + { + union + { + unsigned short us_trb; /* rw */ + struct + { + unsigned char uc_trbl; + unsigned char uc_trbh; + }uc_trb; + }u_trb; + unsigned char st0; /* ro */ + unsigned char st1; /* rw */ + unsigned char st2; /* rw */ + unsigned char st3; /* ro */ + unsigned char fst; /* rw */ + unsigned char unused0; + unsigned char ie0; /* rw */ + unsigned char ie1; /* rw */ + unsigned char ie2; /* rw */ + unsigned char fie; /* rw */ + unsigned char cmd; /* wo */ + unsigned char unused1; + unsigned char md0; /* rw */ + unsigned char md1; /* rw */ + unsigned char md2; /* rw */ + unsigned char ctl; /* rw */ + unsigned char sa0; /* rw */ + unsigned char sa1; /* rw */ + unsigned char idl; /* rw */ + unsigned char tmc; /* rw */ + unsigned char rxs; /* rw */ + unsigned char txs; /* rw */ + unsigned char trc0; /* rw */ + unsigned char trc1; /* rw */ + unsigned char rrc; /* rw */ + unsigned char unused2; + unsigned char cst0; /* rw */ + unsigned char cst1; /* rw */ + unsigned char unused3[2]; + }msci_channel; + +#define trb u_trb.us_trb +#define trbl u_trb.uc_trb.uc_trbl +#define trbh u_trb.uc_trb.uc_trbh + +typedef struct timer_channel + { + unsigned short tcnt; /* rw */ + unsigned short tconr; /* wo */ + unsigned char tcsr; /* rw */ + unsigned char tepr; /* rw */ + unsigned char unused[2]; + }timer_channel; + +typedef struct dmac_channel + { + unsigned short dar; /* rw */ + unsigned char darb; /* rw */ + unsigned char unused0; + unsigned short sar; /* rw On odd numbered dmacs (tx) only */ + unsigned char sarb; /* rw */ +#define cpb sarb + unsigned char unused1; + unsigned short cda; /* rw */ + unsigned short eda; /* rw */ + unsigned short bfl; /* rw On even numbered dmacs (rx) only */ + unsigned short bcr; /* rw */ + unsigned char dsr; /* rw */ + unsigned char dmr; /* rw */ + unsigned char unused2; + unsigned char fct; /* rw */ + unsigned char dir; /* rw */ + unsigned char dcr; /* rw */ + unsigned char unused3[10]; + }dmac_channel; + +/* x is the channel number. rx channels are even numbered and tx, odd. */ +#define DMAC_RXCH(x) ((x*2) + 0) +#define DMAC_TXCH(x) ((x*2) + 1) + +typedef struct sca_regs + { + unsigned char lpr; /* rw */ + unsigned char unused0; /* -- */ + /* Wait system */ + unsigned char pabr0; /* rw */ + unsigned char pabr1; /* rw */ + unsigned char wcrl; /* rw */ + unsigned char wcrm; /* rw */ + unsigned char wcrh; /* rw */ + unsigned char unused1; + /* DMAC */ + unsigned char pcr; /* rw */ + unsigned char dmer; /* rw */ + unsigned char unused2[6]; + /* Interrupt */ + unsigned char isr0; /* ro */ + unsigned char isr1; /* ro */ + unsigned char isr2; /* ro */ + unsigned char unused3; + unsigned char ier0; /* rw */ + unsigned char ier1; /* rw */ + unsigned char ier2; /* rw */ + unsigned char unused4; + unsigned char itcr; /* rw */ + unsigned char unused5; + unsigned char ivr; /* rw */ + unsigned char unused6; + unsigned char imvr; /* rw */ + unsigned char unused7[3]; + /* MSCI Channel 0 */ + msci_channel msci[2]; + timer_channel timer[4]; + dmac_channel dmac[4]; + }sca_regs; + +#define SCA_CMD_TXRESET 0x01 +#define SCA_CMD_TXENABLE 0x02 +#define SCA_CMD_TXDISABLE 0x03 +#define SCA_CMD_TXCRCINIT 0x04 +#define SCA_CMD_TXCRCEXCL 0x05 +#define SCA_CMS_TXEOM 0x06 +#define SCA_CMD_TXABORT 0x07 +#define SCA_CMD_MPON 0x08 +#define SCA_CMD_TXBCLEAR 0x09 + +#define SCA_CMD_RXRESET 0x11 +#define SCA_CMD_RXENABLE 0x12 +#define SCA_CMD_RXDISABLE 0x13 +#define SCA_CMD_RXCRCINIT 0x14 +#define SCA_CMD_RXMSGREJ 0x15 +#define SCA_CMD_MPSEARCH 0x16 +#define SCA_CMD_RXCRCEXCL 0x17 +#define SCA_CMD_RXCRCCALC 0x18 + +#define SCA_CMD_NOP 0x00 +#define SCA_CMD_RESET 0x21 +#define SCA_CMD_SEARCH 0x31 + +#define SCA_MD0_CRC_1 0x01 +#define SCA_MD0_CRC_CCITT 0x02 +#define SCA_MD0_CRC_ENABLE 0x04 +#define SCA_MD0_AUTO_ENABLE 0x10 +#define SCA_MD0_MODE_ASYNC 0x00 +#define SCA_MD0_MODE_BYTESYNC1 0x20 +#define SCA_MD0_MODE_BISYNC 0x40 +#define SCA_MD0_MODE_BYTESYNC2 0x60 +#define SCA_MD0_MODE_HDLC 0x80 + +#define SCA_MD1_NOADDRCHK 0x00 +#define SCA_MD1_SNGLADDR1 0x40 +#define SCA_MD1_SNGLADDR2 0x80 +#define SCA_MD1_DUALADDR 0xC0 + +#define SCA_MD2_DUPLEX 0x00 +#define SCA_MD2_ECHO 0x01 +#define SCA_MD2_LOOPBACK 0x03 +#define SCA_MD2_ADPLLx8 0x00 +#define SCA_MD2_ADPLLx16 0x08 +#define SCA_MD2_ADPLLx32 0x10 +#define SCA_MD2_NRZ 0x00 +#define SCA_MD2_NRZI 0x20 +#define SCA_MD2_MANCHESTER 0x80 +#define SCA_MD2_FM0 0xC0 +#define SCA_MD2_FM1 0xA0 + +#define SCA_CTL_RTS 0x01 +#define SCA_CTL_IDLPAT 0x10 +#define SCA_CTL_UDRNC 0x20 + +#define SCA_RXS_DIV_MASK 0x0F +#define SCA_RXS_DIV1 0x00 +#define SCA_RXS_DIV2 0x01 +#define SCA_RXS_DIV4 0x02 +#define SCA_RXS_DIV8 0x03 +#define SCA_RXS_DIV16 0x04 +#define SCA_RXS_DIV32 0x05 +#define SCA_RXS_DIV64 0x06 +#define SCA_RXS_DIV128 0x07 +#define SCA_RXS_DIV256 0x08 +#define SCA_RXS_DIV512 0x09 +#define SCA_RXS_CLK_RXC0 0x00 +#define SCA_RXS_CLK_RXC1 0x20 +#define SCA_RXS_CLK_INT 0x40 +#define SCA_RXS_CLK_ADPLL_OUT 0x60 +#define SCA_RXS_CLK_ADPLL_IN 0x70 + +#define SCA_TXS_DIV_MASK 0x0F +#define SCA_TXS_DIV1 0x00 +#define SCA_TXS_DIV2 0x01 +#define SCA_TXS_DIV4 0x02 +#define SCA_TXS_DIV8 0x03 +#define SCA_TXS_DIV16 0x04 +#define SCA_TXS_DIV32 0x05 +#define SCA_TXS_DIV64 0x06 +#define SCA_TXS_DIV128 0x07 +#define SCA_TXS_DIV256 0x08 +#define SCA_TXS_DIV512 0x09 +#define SCA_TXS_CLK_TXC 0x00 +#define SCA_TXS_CLK_INT 0x40 +#define SCA_TXS_CLK_RX 0x60 + +#define SCA_ST0_RXRDY 0x01 +#define SCA_ST0_TXRDY 0x02 +#define SCA_ST0_RXINT 0x40 +#define SCA_ST0_TXINT 0x80 + +#define SCA_ST1_IDLST 0x01 +#define SCA_ST1_ABTST 0x02 +#define SCA_ST1_DCDCHG 0x04 +#define SCA_ST1_CTSCHG 0x08 +#define SCA_ST1_FLAG 0x10 +#define SCA_ST1_TXIDL 0x40 +#define SCA_ST1_UDRN 0x80 + +/* ST2 and FST look the same */ +#define SCA_FST_CRCERR 0x04 +#define SCA_FST_OVRN 0x08 +#define SCA_FST_RESFRM 0x10 +#define SCA_FST_ABRT 0x20 +#define SCA_FST_SHRT 0x40 +#define SCA_FST_EOM 0x80 + +#define SCA_ST3_RXENA 0x01 +#define SCA_ST3_TXENA 0x02 +#define SCA_ST3_DCD 0x04 +#define SCA_ST3_CTS 0x08 +#define SCA_ST3_ADPLLSRCH 0x10 +#define SCA_ST3_TXDATA 0x20 + +#define SCA_FIE_EOMFE 0x80 + +#define SCA_IE0_RXRDY 0x01 +#define SCA_IE0_TXRDY 0x02 +#define SCA_IE0_RXINT 0x40 +#define SCA_IE0_TXINT 0x80 + +#define SCA_IE1_IDLDE 0x01 +#define SCA_IE1_ABTDE 0x02 +#define SCA_IE1_DCD 0x04 +#define SCA_IE1_CTS 0x08 +#define SCA_IE1_FLAG 0x10 +#define SCA_IE1_IDL 0x40 +#define SCA_IE1_UDRN 0x80 + +#define SCA_IE2_CRCERR 0x04 +#define SCA_IE2_OVRN 0x08 +#define SCA_IE2_RESFRM 0x10 +#define SCA_IE2_ABRT 0x20 +#define SCA_IE2_SHRT 0x40 +#define SCA_IE2_EOM 0x80 + +/* This is for RRC, TRC0 and TRC1. */ +#define SCA_RCR_MASK 0x1F + +#define SCA_IE1_ + +#define SCA_IV_CHAN0 0x00 +#define SCA_IV_CHAN1 0x20 + +#define SCA_IV_RXRDY 0x04 +#define SCA_IV_TXRDY 0x06 +#define SCA_IV_RXINT 0x08 +#define SCA_IV_TXINT 0x0A + +#define SCA_IV_DMACH0 0x00 +#define SCA_IV_DMACH1 0x08 +#define SCA_IV_DMACH2 0x20 +#define SCA_IV_DMACH3 0x28 + +#define SCA_IV_DMIA 0x14 +#define SCA_IV_DMIB 0x16 + +#define SCA_IV_TIMER0 0x1C +#define SCA_IV_TIMER1 0x1E +#define SCA_IV_TIMER2 0x3C +#define SCA_IV_TIMER3 0x3E + +/* + * DMA registers + */ +#define SCA_DSR_EOT 0x80 +#define SCA_DSR_EOM 0x40 +#define SCA_DSR_BOF 0x20 +#define SCA_DSR_COF 0x10 +#define SCA_DSR_DE 0x02 +#define SCA_DSR_DWE 0x01 + +#define SCA_DMR_TMOD 0x10 +#define SCA_DMR_NF 0x04 +#define SCA_DMR_CNTE 0x02 + +#define SCA_DMER_EN 0x80 + +#define SCA_DCR_ABRT 0x01 +#define SCA_DCR_FCCLR 0x02 /* Clear frame end intr counter */ + +#define SCA_DIR_EOT 0x80 +#define SCA_DIR_EOM 0x40 +#define SCA_DIR_BOF 0x20 +#define SCA_DIR_COF 0x10 + +#define SCA_PCR_BRC 0x10 +#define SCA_PCR_CCC 0x08 +#define SCA_PCR_PR2 0x04 +#define SCA_PCR_PR1 0x02 +#define SCA_PCR_PR0 0x01 + +typedef struct sca_descriptor + { + unsigned short cp; + unsigned short bp; + unsigned char bpb; + unsigned char unused0; + unsigned short len; + unsigned char stat; + unsigned char unused1; + }sca_descriptor; + +#define SCA_DESC_EOT 0x01 +#define SCA_DESC_CRC 0x04 +#define SCA_DESC_OVRN 0x08 +#define SCA_DESC_RESD 0x10 +#define SCA_DESC_ABORT 0x20 +#define SCA_DESC_SHRTFRM 0x40 +#define SCA_DESC_EOM 0x80 +#define SCA_DESC_ERRORS 0x7C + +/* +*************************************************************************** +** END +*************************************************************************** +**/ +#endif /* _HD64570_H_ */ + diff --git a/sys/platform/pc64/isa/ic/i8237.h b/sys/platform/pc64/isa/ic/i8237.h new file mode 100644 index 0000000000..d1ecee6f2e --- /dev/null +++ b/sys/platform/pc64/isa/ic/i8237.h @@ -0,0 +1,13 @@ +/* + * Intel 8237 DMA Controller + * + * $FreeBSD: src/sys/i386/isa/ic/i8237.h,v 1.7 1999/08/28 00:45:14 peter Exp $ + * $DragonFly: src/sys/platform/pc64/isa/ic/i8237.h,v 1.1 2008/08/29 17:07:21 dillon Exp $ + */ + +#define DMA37MD_SINGLE 0x40 /* single pass mode */ +#define DMA37MD_CASCADE 0xc0 /* cascade mode */ +#define DMA37MD_AUTO 0x50 /* autoinitialise single pass mode */ +#define DMA37MD_WRITE 0x04 /* read the device, write memory operation */ +#define DMA37MD_READ 0x08 /* write the device, read memory operation */ + diff --git a/sys/platform/pc64/isa/ic/i82586.h b/sys/platform/pc64/isa/ic/i82586.h new file mode 100644 index 0000000000..d7e5c14815 --- /dev/null +++ b/sys/platform/pc64/isa/ic/i82586.h @@ -0,0 +1,333 @@ +/*- + * Copyright (c) 1992, University of Vermont and State Agricultural College. + * Copyright (c) 1992, Garrett A. Wollman. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * Vermont and State Agricultural College and Garrett A. Wollman. + * 4. Neither the name of the University nor the name of the author + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OR AUTHOR BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/sys/i386/isa/ic/i82586.h,v 1.9 1999/08/28 00:45:14 peter Exp $ + * $DragonFly: src/sys/platform/pc64/isa/ic/i82586.h,v 1.1 2008/08/29 17:07:21 dillon Exp $ + */ + +/* + * Intel 82586 Ethernet chip + * Register, bit, and structure definitions. + * + * Written by GAW with reference to the Clarkson Packet Driver code for this + * chip written by Russ Nelson and others. + */ + +struct ie_en_addr { + u_char data[6]; +}; + +/* + * This is the master configuration block. It tells the hardware where all + * the rest of the stuff is. + */ +struct ie_sys_conf_ptr { + u_short mbz; /* must be zero */ + u_char ie_bus_use; /* true if 8-bit only */ + u_char mbz2[5]; /* must be zero */ + caddr_t ie_iscp_ptr; /* 24-bit physaddr of ISCP */ +}; + +/* + * Note that this is wired in hardware; the SCP is always located here, no + * matter what. + */ +#define IE_SCP_ADDR 0xfffff4 + +/* + * The tells the hardware where all the rest of the stuff is, too. + * FIXME: some of these should be re-commented after we figure out their + * REAL function. + */ +struct ie_int_sys_conf_ptr { + u_char ie_busy; /* zeroed after init */ + u_char mbz; + u_short ie_scb_offset; /* 16-bit physaddr of next struct */ + caddr_t ie_base; /* 24-bit physaddr for all 16-bit vars */ +}; + +/* + * This FINALLY tells the hardware what to do and where to put it. + */ +struct ie_sys_ctl_block { + u_short ie_status; /* status word */ + u_short ie_command; /* command word */ + u_short ie_command_list; /* 16-pointer to command block list */ + u_short ie_recv_list; /* 16-pointer to receive frame list */ + u_short ie_err_crc; /* CRC errors */ + u_short ie_err_align; /* Alignment errors */ + u_short ie_err_resource; /* Resource errors */ + u_short ie_err_overrun; /* Overrun errors */ +}; + +/* Command values */ +#define IE_RU_COMMAND 0x0070 /* mask for RU command */ +#define IE_RU_NOP 0 /* for completeness */ +#define IE_RU_START 0x0010 /* start receive unit command */ +#define IE_RU_ENABLE 0x0020 /* enable receiver command */ +#define IE_RU_DISABLE 0x0030 /* disable receiver command */ +#define IE_RU_ABORT 0x0040 /* abort current receive operation */ + +#define IE_CU_COMMAND 0x0700 /* mask for CU command */ +#define IE_CU_NOP 0 /* included for completeness */ +#define IE_CU_START 0x0100 /* do-command command */ +#define IE_CU_RESUME 0x0200 /* resume a suspended cmd list */ +#define IE_CU_STOP 0x0300 /* SUSPEND was already taken */ +#define IE_CU_ABORT 0x0400 /* abort current command */ + +#define IE_ACK_COMMAND 0xf000 /* mask for ACK command */ +#define IE_ACK_CX 0x8000 /* ack IE_ST_DONE */ +#define IE_ACK_FR 0x4000 /* ack IE_ST_RECV */ +#define IE_ACK_CNA 0x2000 /* ack IE_ST_ALLDONE */ +#define IE_ACK_RNR 0x1000 /* ack IE_ST_RNR */ + +#define IE_ACTION_COMMAND(x) (((x) & IE_CU_COMMAND) == IE_CU_START) + /* is this command an action command? */ + +/* Status values */ +#define IE_ST_WHENCE 0xf000 /* mask for cause of interrupt */ +#define IE_ST_DONE 0x8000 /* command with I bit completed */ +#define IE_ST_RECV 0x4000 /* frame received */ +#define IE_ST_ALLDONE 0x2000 /* all commands completed */ +#define IE_ST_RNR 0x1000 /* receive not ready */ + +#define IE_CU_STATUS 0x700 /* mask for command unit status */ +#define IE_CU_ACTIVE 0x200 /* command unit is active */ +#define IE_CU_SUSPEND 0x100 /* command unit is suspended */ + +#define IE_RU_STATUS 0x70 /* mask for receiver unit status */ +#define IE_RU_SUSPEND 0x10 /* receiver is suspended */ +#define IE_RU_NOSPACE 0x20 /* receiver has no resources */ +#define IE_RU_READY 0x40 /* reveiver is ready */ + +/* + * This is filled in partially by the chip, partially by us. + */ +struct ie_recv_frame_desc { + u_short ie_fd_status; /* status for this frame */ + u_short ie_fd_last; /* end of frame list flag */ + u_short ie_fd_next; /* 16-pointer to next RFD */ + u_short ie_fd_buf_desc; /* 16-pointer to list of buffer desc's */ + struct ie_en_addr dest; /* destination ether */ + struct ie_en_addr src; /* source ether */ + u_short ie_length; /* 802 length/Ether type */ + u_short mbz; /* must be zero */ +}; + +#define IE_FD_LAST 0x8000 /* last rfd in list */ +#define IE_FD_SUSP 0x4000 /* suspend RU after receipt */ + +#define IE_FD_COMPLETE 0x8000 /* frame is complete */ +#define IE_FD_BUSY 0x4000 /* frame is busy */ +#define IE_FD_OK 0x2000 /* frame is bad */ +#define IE_FD_RNR 0x0200 /* receiver out of resources here */ + +/* + * linked list of buffers... + */ +struct ie_recv_buf_desc { + u_short ie_rbd_actual; /* status for this buffer */ + u_short ie_rbd_next; /* 16-pointer to next RBD */ + caddr_t ie_rbd_buffer; /* 24-pointer to buffer for this RBD */ + u_short ie_rbd_length; /* length of the buffer */ + u_short mbz; /* must be zero */ +}; + +#define IE_RBD_LAST 0x8000 /* last buffer */ +#define IE_RBD_USED 0x4000 /* this buffer has data */ +/* + * All commands share this in common. + */ +struct ie_cmd_common { + u_short ie_cmd_status; /* status of this command */ + u_short ie_cmd_cmd; /* command word */ + u_short ie_cmd_link; /* link to next command */ +}; + +#define IE_STAT_COMPL 0x8000 /* command is completed */ +#define IE_STAT_BUSY 0x4000 /* command is running now */ +#define IE_STAT_OK 0x2000 /* command completed successfully */ + +#define IE_CMD_NOP 0x0000 /* NOP */ +#define IE_CMD_IASETUP 0x0001 /* initial address setup */ +#define IE_CMD_CONFIG 0x0002 /* configure command */ +#define IE_CMD_MCAST 0x0003 /* multicast setup command */ +#define IE_CMD_XMIT 0x0004 /* transmit command */ +#define IE_CMD_TDR 0x0005 /* time-domain reflectometer command */ +#define IE_CMD_DUMP 0x0006 /* dump command */ +#define IE_CMD_DIAGNOSE 0x0007 /* diagnostics command */ + +#define IE_CMD_LAST 0x8000 /* this is the last command in the list */ +#define IE_CMD_SUSPEND 0x4000 /* suspend CU after this command */ +#define IE_CMD_INTR 0x2000 /* post an interrupt after completion */ + +/* + * This is the command to transmit a frame. + */ +struct ie_xmit_cmd { + struct ie_cmd_common com; /* common part */ +#define ie_xmit_status com.ie_cmd_status + + u_short ie_xmit_desc; /* 16-pointer to buffer descriptor */ + struct ie_en_addr ie_xmit_addr; /* destination address */ + + u_short ie_xmit_length; /* 802.3 length/Ether type field */ +}; + +#define IE_XS_MAXCOLL 0x000f /* number of collisions during transmit */ +#define IE_XS_EXCMAX 0x0020 /* exceeded maximum number of collisions */ +#define IE_XS_SQE 0x0040 /* SQE positive */ +#define IE_XS_DEFERRED 0x0080 /* transmission deferred */ +#define IE_XS_UNDERRUN 0x0100 /* DMA underrun */ +#define IE_XS_LOSTCTS 0x0200 /* Lost CTS */ +#define IE_XS_NOCARRIER 0x0400 /* No Carrier */ +#define IE_XS_LATECOLL 0x0800 /* Late collision */ + +/* + * This is a buffer descriptor for a frame to be transmitted. + */ + +struct ie_xmit_buf { + u_short ie_xmit_flags; /* see below */ + u_short ie_xmit_next; /* 16-pointer to next desc. */ + caddr_t ie_xmit_buf; /* 24-pointer to the actual buffer */ +}; + +#define IE_XMIT_LAST 0x8000 /* this TBD is the last one */ +/* The rest of the `flags' word is actually the length. */ + +/* + * Multicast setup command. + */ + +#define MAXMCAST 50 /* must fit in transmit buffer */ + +struct ie_mcast_cmd { + struct ie_cmd_common com; /* common part */ +#define ie_mcast_status com.ie_cmd_status + + u_short ie_mcast_bytes; /* size (in bytes) of multicast addresses */ + struct ie_en_addr ie_mcast_addrs[MAXMCAST + 1]; /* space for them */ +}; + +/* + * Time Domain Reflectometer command. + */ + +struct ie_tdr_cmd { + struct ie_cmd_common com; /* common part */ +#define ie_tdr_status com.ie_cmd_status + + u_short ie_tdr_time; /* error bits and time */ +}; + +#define IE_TDR_SUCCESS 0x8000 /* TDR succeeded without error */ +#define IE_TDR_XCVR 0x4000 /* detected a transceiver problem */ +#define IE_TDR_OPEN 0x2000 /* detected an open */ +#define IE_TDR_SHORT 0x1000 /* TDR detected a short */ +#define IE_TDR_TIME 0x07ff /* mask for reflection time */ + +/* + * Initial Address Setup command + */ +struct ie_iasetup_cmd { + struct ie_cmd_common com; +#define ie_iasetup_status com.ie_cmd_status + + struct ie_en_addr ie_address; +}; + +/* + * Configuration command + */ +struct ie_config_cmd { + struct ie_cmd_common com; /* common part */ +#define ie_config_status com.ie_cmd_status + + u_char ie_config_count; /* byte count (0x0c) */ + u_char ie_fifo; /* fifo (8) */ + u_char ie_save_bad; /* save bad frames (0x40) */ + u_char ie_addr_len; /* address length (0x2e) (AL-LOC == 1) */ + u_char ie_priority; /* priority and backoff (0x0) */ + u_char ie_ifs; /* inter-frame spacing (0x60) */ + u_char ie_slot_low; /* slot time, LSB (0x0) */ + u_char ie_slot_high; /* slot time, MSN, and retries (0xf2) */ + u_char ie_promisc; /* 1 if promiscuous, else 0 */ + u_char ie_crs_cdt; /* CSMA/CD parameters (0x0) */ + u_char ie_min_len; /* min frame length (0x40) */ + u_char ie_junk; /* stuff for 82596 (0xff) */ +}; + +/* + * Here are a few useful functions. We could have done these as macros, + * but since we have the inline facility, it makes sense to use that + * instead. + */ +static __inline void +ie_setup_config(volatile struct ie_config_cmd *cmd, + int promiscuous, int manchester) { + cmd->ie_config_count = 0x0c; + cmd->ie_fifo = 8; + cmd->ie_save_bad = 0x40; + cmd->ie_addr_len = 0x2e; + cmd->ie_priority = 0; + cmd->ie_ifs = 0x60; + cmd->ie_slot_low = 0; + cmd->ie_slot_high = 0xf2; + cmd->ie_promisc = !!promiscuous | manchester << 2; + cmd->ie_crs_cdt = 0; + cmd->ie_min_len = 64; + cmd->ie_junk = 0xff; +} + +static __inline void * +Align(void *ptr) { + uintptr_t l = (uintptr_t)ptr; + l = (l + 3) & ~3L; + return (void *)l; +} + +static __inline volatile void * +Alignvol(volatile void *ptr) { + uintptr_t l = (uintptr_t)ptr; + l = (l + 3) & ~3L; + return (volatile void *)l; +} + +static __inline void +ie_ack(volatile struct ie_sys_ctl_block *scb, + u_int mask, int unit, + void (*ca)(int)) { + scb->ie_command = scb->ie_status & mask; + (*ca)(unit); +} diff --git a/sys/platform/pc64/isa/ic/lemac.h b/sys/platform/pc64/isa/ic/lemac.h new file mode 100644 index 0000000000..b3d1fa5683 --- /dev/null +++ b/sys/platform/pc64/isa/ic/lemac.h @@ -0,0 +1,178 @@ +/* + * Copyright (c) 1994 Matt Thomas (thomas@lkg.dec.com) + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. The name of the author may not be used to endorse or promote products + * derived from this software withough specific prior written permission + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD: src/sys/i386/isa/ic/lemac.h,v 1.5 1999/08/28 00:45:14 peter Exp $ + * $DragonFly: src/sys/platform/pc64/isa/ic/lemac.h,v 1.1 2008/08/29 17:07:21 dillon Exp $ + */ +#ifndef _LEMAC_H_ +#define _LEMAC_H_ + +/* + * This is list of registers used on a DEC EtherWORKS III card. + * Each board occupies a 32 byte register space. This can be + * in either EISA or ISA space. Currently we only support ISA + * space. + */ + +#define LEMAC_REG_CS 0x00 /* Control and Status */ +#define LEMAC_REG_CTL 0x01 /* Control */ +#define LEMAC_REG_IC 0x02 /* Interrupt Control */ +#define LEMAC_REG_TS 0x03 /* Transmit Status */ +#define LEMAC_REG_RSVD1 0x04 /* Reserved (not used) */ +#define LEMAC_REG_RSVD2 0x05 /* Reserved (not used) */ +#define LEMAC_REG_FMQ 0x06 /* Free Memory Queue */ +#define LEMAC_REG_FMC 0x07 /* Free Memory Queue Count */ +#define LEMAC_REG_RQ 0x08 /* Receive Queue */ +#define LEMAC_REG_RQC 0x09 /* Receive Queue Count */ +#define LEMAC_REG_TQ 0x0A /* Transmit Queue */ +#define LEMAC_REG_TQC 0x0B /* Transmit Queue Count */ +#define LEMAC_REG_TDQ 0x0C /* Transmit Done Queue */ +#define LEMAC_REG_TDC 0x0D /* Transmit Done Queue Count */ +#define LEMAC_REG_PI1 0x0E /* Page Index #1 */ +#define LEMAC_REG_PI2 0x0F /* Page Index #2 */ +#define LEMAC_REG_DAT 0x10 /* Data */ +#define LEMAC_REG_IOP 0x11 /* I/O Page */ +#define LEMAC_REG_IOB 0x12 /* I/O Base */ +#define LEMAC_REG_MPN 0x13 /* Memory Page */ +#define LEMAC_REG_MBR 0x14 /* Memory Base */ +#define LEMAC_REG_APD 0x15 /* Address PROM */ +#define LEMAC_REG_EE1 0x16 /* EEPROM Data #1 */ +#define LEMAC_REG_EE2 0x17 /* EEPROM Data #2 */ +#define LEMAC_REG_PA0 0x18 /* Physical Address (Byte 0) */ +#define LEMAC_REG_PA1 0x19 /* Physical Address (Byte 1) */ +#define LEMAC_REG_PA2 0x1A /* Physical Address (Byte 2) */ +#define LEMAC_REG_PA3 0x1B /* Physical Address (Byte 3) */ +#define LEMAC_REG_PA4 0x1C /* Physical Address (Byte 4) */ +#define LEMAC_REG_PA5 0x1D /* Physical Address (Byte 5) */ +#define LEMAC_REG_CNF 0x1E /* Configuration Management */ +#define LEMAC_IOSPACE 0x20 /* LEMAC uses 32 bytes of IOSPACE */ + + +#define LEMAC_REG_EID0 0x80 /* EISA Identification 0 */ +#define LEMAC_REG_EID1 0x81 /* EISA Identification 1 */ +#define LEMAC_REG_EID2 0x82 /* EISA Identification 2 */ +#define LEMAC_REG_EID3 0x83 /* EISA Identification 3 */ +#define LEMAC_REG_EIC 0x84 /* EISA Control */ + +/* Control Page (Page 0) Definitions */ + +#define LEMAC_MCTBL_BITS 9 +#define LEMAC_MCTBL_OFF 512 +#define LEMAC_MCTBL_SIZE (1 << (LEMAC_MCTBL_BITS - 3)) +#define LEMAC_CRC32_POLY 0xEDB88320UL /* CRC-32 Poly -- Little Endian) */ + +/* EEPROM Definitions */ + +#define LEMAC_EEP_CKSUM 0 /* The valid checksum is 0 */ +#define LEMAC_EEP_SIZE 32 /* EEPROM is 32 bytes */ +#define LEMAC_EEP_DELAY 2000 /* 2ms = 2000us */ +#define LEMAC_EEP_PRDNM 8 /* Product Name Offset */ +#define LEMAC_EEP_PRDNMSZ 8 /* Product Name Size */ +#define LEMAC_EEP_SWFLAGS 16 /* Software Options Offset */ +#define LEMAC_EEP_SETUP 23 /* Setup Options Offset */ + +#define LEMAC_EEP_SW_SQE 0x10 /* Enable TX_SQE on Transmits */ +#define LEMAC_EEP_SW_LAB 0x08 /* Enable TX_LAB on Transmits */ +#define LEMAC_EEP_ST_DRAM 0x02 /* Enable extra DRAM */ + +#define LEMAC_ADP_ROMSZ 32 /* Size of Address PROM */ + +/* Receive Status Definitions */ + +#define LEMAC_RX_PLL 0x01 /* Phase Lock Lost */ +#define LEMAC_RX_CRC 0x02 /* CRC Error */ +#define LEMAC_RX_DBE 0x04 /* Dribble Bit Error */ +#define LEMAC_RX_MCM 0x08 /* Multicast Match */ +#define LEMAC_RX_IAM 0x10 /* Individual Address Match */ +#define LEMAC_RX_OK 0x80 /* No Errors */ + +/* Transmit Status Definitions (not valid if TXD == 0) */ + +#define LEMAC_TS_RTRYMSK 0x0F /* Retries of last TX PDU */ +#define LEMAC_TS_ECL 0x10 /* Excessive collision of ... */ +#define LEMAC_TS_LCL 0x20 /* Late collision of ... */ +#define LEMAC_TS_ID 0x40 /* Initially Deferred ... */ + +/* Transmit Control Definitions */ + +#define LEMAC_TX_ISA 0x01 /* Insert Source Address (no) */ +#define LEMAC_TX_IFC 0x02 /* Insert Frame Check (yes) */ +#define LEMAC_TX_PAD 0x04 /* Zero PAD to mininum length (yes) */ +#define LEMAC_TX_LAB 0x08 /* Less Agressive Backoff (no) */ +#define LEMAC_TX_QMD 0x10 /* Q-Mode (yes) */ +#define LEMAC_TX_STP 0x20 /* Stop on Error (yes) */ +#define LEMAC_TX_SQE 0x40 /* SQE Enable (yes) */ + +#define LEMAC_TX_FLAGS (LEMAC_TX_IFC|LEMAC_TX_PAD|LEMAC_TX_QMD|\ + LEMAC_TX_STP|LEMAC_TX_SQE) +#define LEMAC_TX_HDRSZ 4 /* Size of TX header */ + +/* Transmit Done Queue Status Definitions */ + +#define LEMAC_TDQ_COL 0x03 /* Collision Mask */ +#define LEMAC_TDQ_NOCOL 0x00 /* No Collisions */ +#define LEMAC_TDQ_ONECOL 0x01 /* One Collision */ +#define LEMAC_TDQ_MULCOL 0x02 /* Multiple Collisions */ +#define LEMAC_TDQ_EXCCOL 0x03 /* Excesive Collisions */ +#define LEMAC_TDQ_ID 0x04 /* Initially Deferred */ +#define LEMAC_TDQ_LCL 0x08 /* Late Collision (will TX_STP) */ + +/* Control / Status Definitions */ + +#define LEMAC_CS_RXD 0x01 /* Receiver Disabled */ +#define LEMAC_CS_TXD 0x02 /* Transmitter Disabled */ +#define LEMAC_CS_RNE 0x04 /* Receive Queue Not Empty */ +#define LEMAC_CS_TNE 0x08 /* Transmit Done Queue Not Empty */ +#define LEMAC_CS_MBZ4 0x10 /* MBZ */ +#define LEMAC_CS_MCE 0x20 /* Multicast Enable */ +#define LEMAC_CS_PME 0x40 /* Promiscuous Mode Enable */ +#define LEMAC_CS_RA 0x80 /* Runt Accept */ + +/* Control Definitions */ + +#define LEMAC_CTL_LED 0x02 /* LED state (inverted) */ + +/* Interrupt Control Definitions */ + +#define LEMAC_IC_RXD 0x01 /* Enable RXD Interrupt */ +#define LEMAC_IC_TXD 0x02 /* Enable TXD Interrupt */ +#define LEMAC_IC_RNE 0x04 /* Enable RNE Interrupt */ +#define LEMAC_IC_TNE 0x08 /* Enable TNE Interrupt */ +#define LEMAC_IC_ALL 0x0F /* Enable RXD,TXD,RNE,TNE */ +#define LEMAC_IC_IRQMSK 0x60 /* Interrupt Select */ +#define LEMAC_IC_IRQ5 0x00 /* Select IRQ 5 */ +#define LEMAC_IC_IRQ10 0x20 /* Select IRQ 10 */ +#define LEMAC_IC_IRQ11 0x40 /* Select IRQ 11 */ +#define LEMAC_IC_IRQ15 0x60 /* Select IRQ 15 */ +#define LEMAC_IC_IE 0x80 /* Interrupt Enable */ + +/* I/O Page Definitions */ + +#define LEMAC_IOP_EEINIT 0xC0 /* Perform a board init/reset */ +#define LEMAC_IOP_EEREAD 0xE0 /* Start a read from EEPROM */ + +/* Configuration / Management Definitions */ + +#define LEMAC_CNF_DRAM 0x02 /* Extra on-board DRAM is available */ + +#endif /* _LEMAC_H_ */ diff --git a/sys/platform/pc64/isa/ic/mb86960.h b/sys/platform/pc64/isa/ic/mb86960.h new file mode 100644 index 0000000000..6f7cb0ce84 --- /dev/null +++ b/sys/platform/pc64/isa/ic/mb86960.h @@ -0,0 +1,341 @@ +/* + * All Rights Reserved, Copyright (C) Fujitsu Limited 1995 + * + * This software may be used, modified, copied, distributed, and sold, in + * both source and binary form provided that the above copyright, these + * terms and the following disclaimer are retained. The name of the author + * and/or the contributor may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND THE CONTRIBUTOR ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR THE CONTRIBUTOR BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/sys/i386/isa/ic/mb86960.h,v 1.2.8.1 2000/08/03 01:01:25 peter Exp $ + * $DragonFly: src/sys/platform/pc64/isa/ic/mb86960.h,v 1.1 2008/08/29 17:07:21 dillon Exp $ + */ + +/* + * Registers of Fujitsu MB86960A/MB86965A series Ethernet controllers. + * Written and contributed by M.S. + */ + +/* + * Notes on register naming: + * + * Fujitsu documents for MB86960A/MB86965A uses no mnemorable names + * for their registers. They defined only three names for 32 + * registers and appended numbers to distinguish registers of + * same name. Surprisingly, the numbers represent I/O address + * offsets of the registers from the base addresses, and their + * names correspond to the "bank" the registers are allocated. + * All this means that, for example, to say "read DLCR8" has no more + * than to say "read a register at offset 8 on bank DLCR." + * + * The following definitions may look silly, but that's what Fujitsu + * did, and it is necessary to know these names to read Fujitsu + * documents.. + */ + +/* Data Link Control Registrs, on invaliant port addresses. */ +#define FE_DLCR0 0 +#define FE_DLCR1 1 +#define FE_DLCR2 2 +#define FE_DLCR3 3 +#define FE_DLCR4 4 +#define FE_DLCR5 5 +#define FE_DLCR6 6 +#define FE_DLCR7 7 + +/* More DLCRs, on register bank #0. */ +#define FE_DLCR8 8 +#define FE_DLCR9 9 +#define FE_DLCR10 10 +#define FE_DLCR11 11 +#define FE_DLCR12 12 +#define FE_DLCR13 13 +#define FE_DLCR14 14 +#define FE_DLCR15 15 + +/* Malticast Address Registers. On register bank #1. */ +#define FE_MAR8 8 +#define FE_MAR9 9 +#define FE_MAR10 10 +#define FE_MAR11 11 +#define FE_MAR12 12 +#define FE_MAR13 13 +#define FE_MAR14 14 +#define FE_MAR15 15 + +/* Buffer Memory Port Registers. On register back #2. */ +#define FE_BMPR8 8 +#define FE_BMPR9 9 +#define FE_BMPR10 10 +#define FE_BMPR11 11 +#define FE_BMPR12 12 +#define FE_BMPR13 13 +#define FE_BMPR14 14 +#define FE_BMPR15 15 + +/* More BMPRs, only on 86965, accessible only when JLI mode. */ +#define FE_BMPR16 16 +#define FE_BMPR17 17 +#define FE_BMPR18 18 +#define FE_BMPR19 19 + +/* + * Definitions of registers. + * I don't have Fujitsu documents of MB86960A/MB86965A, so I don't + * know the official names for each flags and fields. The following + * names are assigned by me (the author of this file,) since I cannot + * mnemorize hexadecimal constants for all of these functions. + * Comments? + * + * I've got documents from Fujitsu web site, recently. However, it's + * too late. Names for some fields (bits) are kept different from + * those used in the Fujitsu documents... + */ + +/* DLCR0 -- transmitter status */ +#define FE_D0_BUSERR 0x01 /* Bus write error? */ +#define FE_D0_COLL16 0x02 /* Collision limit (16) encountered */ +#define FE_D0_COLLID 0x04 /* Collision on last transmission */ +#define FE_D0_JABBER 0x08 /* Jabber */ +#define FE_D0_CRLOST 0x10 /* Carrier lost on last transmission */ +#define FE_D0_PKTRCD 0x20 /* Last packet looped back correctly */ +#define FE_D0_NETBSY 0x40 /* Network Busy (Carrier Detected) */ +#define FE_D0_TXDONE 0x80 /* Transmission complete */ + +/* DLCR1 -- receiver status */ +#define FE_D1_OVRFLO 0x01 /* Receiver buffer overflow */ +#define FE_D1_CRCERR 0x02 /* CRC error on last packet */ +#define FE_D1_ALGERR 0x04 /* Alignment error on last packet */ +#define FE_D1_SRTPKT 0x08 /* Short (RUNT) packet is received */ +#define FE_D1_RMTRST 0x10 /* Remote reset packet (type = 0x0900) */ +#define FE_D1_DMAEOP 0x20 /* Host asserted End of DMA OPeration */ +#define FE_D1_BUSERR 0x40 /* Bus read error */ +#define FE_D1_PKTRDY 0x80 /* Packet(s) ready on receive buffer */ + +/* DLCR2 -- transmitter interrupt control; same layout as DLCR0 */ +#define FE_D2_BUSERR FE_D0_BUSERR +#define FE_D2_COLL16 FE_D0_COLL16 +#define FE_D2_COLLID FE_D0_COLLID +#define FE_D2_JABBER FE_D0_JABBER +#define FE_D2_TXDONE FE_D0_TXDONE + +#define FE_D2_RESERVED 0x70 + +/* DLCR3 -- receiver interrupt control; same layout as DLCR1 */ +#define FE_D3_OVRFLO FE_D1_OVRFLO +#define FE_D3_CRCERR FE_D1_CRCERR +#define FE_D3_ALGERR FE_D1_ALGERR +#define FE_D3_SRTPKT FE_D1_SRTPKT +#define FE_D3_RMTRST FE_D1_RMTRST +#define FE_D3_DMAEOP FE_D1_DMAEOP +#define FE_D3_BUSERR FE_D1_BUSERR +#define FE_D3_PKTRDY FE_D1_PKTRDY + +/* DLCR4 -- transmitter operation mode */ +#define FE_D4_DSC 0x01 /* Disable carrier sense on trans. */ +#define FE_D4_LBC 0x02 /* Loop back test control */ +#define FE_D4_CNTRL 0x04 /* - tied to CNTRL pin of the chip */ +#define FE_D4_TEST1 0x08 /* Test output #1 */ +#define FE_D4_COL 0xF0 /* Collision counter */ + +#define FE_D4_LBC_ENABLE 0x00 /* Perform loop back test */ +#define FE_D4_LBC_DISABLE 0x02 /* Normal operation */ + +#define FE_D4_COL_SHIFT 4 + +/* DLCR5 -- receiver operation mode */ +#define FE_D5_AFM0 0x01 /* Receive packets for other stations */ +#define FE_D5_AFM1 0x02 /* Receive packets for this station */ +#define FE_D5_RMTRST 0x04 /* Enable remote reset operation */ +#define FE_D5_SRTPKT 0x08 /* Accept short (RUNT) packets */ +#define FE_D5_SRTADR 0x10 /* Short (16 bits?) MAC address */ +#define FE_D5_BADPKT 0x20 /* Accept packets with error */ +#define FE_D5_BUFEMP 0x40 /* Receive buffer is empty */ +#define FE_D5_TEST2 0x80 /* Test output #2 */ + +/* DLCR6 -- hardware configuration #0 */ +#define FE_D6_BUFSIZ 0x03 /* Size of NIC buffer SRAM */ +#define FE_D6_TXBSIZ 0x0C /* Size (and config)of trans. buffer */ +#define FE_D6_BBW 0x10 /* Buffer SRAM bus width */ +#define FE_D6_SBW 0x20 /* System bus width */ +#define FE_D6_SRAM 0x40 /* Buffer SRAM access time */ +#define FE_D6_DLC 0x80 /* Disable DLC (recever/transmitter) */ + +#define FE_D6_BUFSIZ_8KB 0x00 /* The board has 8KB SRAM */ +#define FE_D6_BUFSIZ_16KB 0x01 /* The board has 16KB SRAM */ +#define FE_D6_BUFSIZ_32KB 0x02 /* The board has 32KB SRAM */ +#define FE_D6_BUFSIZ_64KB 0x03 /* The board has 64KB SRAM */ + +#define FE_D6_TXBSIZ_1x2KB 0x00 /* Single 2KB buffer for trans. */ +#define FE_D6_TXBSIZ_2x2KB 0x04 /* Double 2KB buffers */ +#define FE_D6_TXBSIZ_2x4KB 0x08 /* Double 4KB buffers */ +#define FE_D6_TXBSIZ_2x8KB 0x0C /* Double 8KB buffers */ + +#define FE_D6_BBW_WORD 0x00 /* SRAM has 16 bit data line */ +#define FE_D6_BBW_BYTE 0x10 /* SRAM has 8 bit data line */ + +#define FE_D6_SBW_WORD 0x00 /* Access with 16 bit (AT) bus */ +#define FE_D6_SBW_BYTE 0x20 /* Access with 8 bit (XT) bus */ + +#define FE_D6_SRAM_150ns 0x00 /* The board has slow SRAM */ +#define FE_D6_SRAM_100ns 0x40 /* The board has fast SRAM */ + +#define FE_D6_DLC_ENABLE 0x00 /* Normal operation */ +#define FE_D6_DLC_DISABLE 0x80 /* Stop sending/receiving */ + +/* DLC7 -- hardware configuration #1 */ +#define FE_D7_BYTSWP 0x01 /* Host byte order control */ +#define FE_D7_EOPPOL 0x02 /* Polarity of DMA EOP signal */ +#define FE_D7_RBS 0x0C /* Register bank select */ +#define FE_D7_RDYPNS 0x10 /* Senses RDYPNSEL input signal */ +#define FE_D7_POWER 0x20 /* Stand-by (power down) mode control */ +#define FE_D7_IDENT 0xC0 /* Chip identification */ + +#define FE_D7_BYTSWP_LH 0x00 /* DEC/Intel byte order */ +#define FE_D7_BYTSWP_HL 0x01 /* IBM/Motorolla byte order */ + +#define FE_D7_RBS_DLCR 0x00 /* Select DLCR8-15 */ +#define FE_D7_RBS_MAR 0x04 /* Select MAR8-15 */ +#define FE_D7_RBS_BMPR 0x08 /* Select BMPR8-15 */ + +#define FE_D7_POWER_DOWN 0x00 /* Power down (stand-by) mode */ +#define FE_D7_POWER_UP 0x20 /* Normal operation */ + +#define FE_D7_IDENT_TDK 0x00 /* TDK chips? */ +#define FE_D7_IDENT_NICE 0x80 /* Fujitsu NICE (86960) */ +#define FE_D7_IDENT_EC 0xC0 /* Fujitsu EtherCoupler (86965) */ + +/* DLCR8 thru DLCR13 are for Ethernet station address. */ + +/* DLCR14 and DLCR15 are for TDR. (TDR is used for cable diagnostic.) */ + +/* MAR8 thru MAR15 are for Multicast address filter. */ + +/* BMPR8 and BMPR9 are for packet data. */ + +/* BMPR10 -- transmitter start trigger */ +#define FE_B10_START 0x80 /* Start transmitter */ +#define FE_B10_COUNT 0x7F /* Packet count */ + +/* BMPR11 -- 16 collisions control */ +#define FE_B11_CTRL 0x01 /* Skip or resend errored packets */ +#define FE_B11_MODE1 0x02 /* Restart transmitter after COLL16 */ +#define FE_B11_MODE2 0x04 /* Automatic restart enable */ + +#define FE_B11_CTRL_RESEND 0x00 /* Re-send the collided packet */ +#define FE_B11_CTRL_SKIP 0x01 /* Skip the collided packet */ + +/* BMPR12 -- DMA enable */ +#define FE_B12_TXDMA 0x01 /* Enable transmitter DMA */ +#define FE_B12_RXDMA 0x02 /* Enable receiver DMA */ + +/* BMPR13 -- DMA control */ +#define FE_B13_BSTCTL 0x03 /* DMA burst mode control */ +#define FE_B13_TPTYPE 0x04 /* Twisted pair cable impedance */ +#define FE_B13_PORT 0x18 /* Port (TP/AUI) selection */ +#define FE_B13_LNKTST 0x20 /* Link test enable */ +#define FE_B13_SQTHLD 0x40 /* Lower squelch threshold */ +#define FE_B13_IOUNLK 0x80 /* Change I/O base address, on JLI mode */ + +#define FE_B13_BSTCTL_1 0x00 +#define FE_B13_BSTCTL_4 0x01 +#define FE_B13_BSTCTL_8 0x02 +#define FE_B13_BSTCLT_12 0x03 + +#define FE_B13_TPTYPE_UTP 0x00 /* Unshielded (standard) cable */ +#define FE_B13_TPTYPE_STP 0x04 /* Shielded (IBM) cable */ + +#define FE_B13_PORT_AUTO 0x00 /* Auto detected */ +#define FE_B13_PORT_TP 0x08 /* Force TP */ +#define FE_B13_PORT_AUI 0x18 /* Force AUI */ + +/* BMPR14 -- More receiver control and more transmission interrupts */ +#define FE_B14_FILTER 0x01 /* Filter out self-originated packets */ +#define FE_B14_SQE 0x02 /* SQE interrupt enable */ +#define FE_B14_SKIP 0x04 /* Skip a received packet */ +#define FE_B14_RJAB 0x20 /* RJAB interrupt enable */ +#define FE_B14_LLD 0x40 /* Local-link-down interrupt enable */ +#define FE_B14_RLD 0x80 /* Remote-link-down interrupt enable */ + +/* BMPR15 -- More transmitter status; basically same layout as BMPR14 */ +#define FE_B15_SQE FE_B14_SQE +#define FE_B15_RCVPOL 0x08 /* Reversed receive line polarity */ +#define FE_B15_RMTPRT 0x10 /* ??? */ +#define FE_B15_RAJB FE_B14_RJAB +#define FE_B15_LLD FE_B14_LLD +#define FE_B15_RLD FE_B14_RLD + +/* BMPR16 -- EEPROM control */ +#define FE_B16_DOUT 0x04 /* EEPROM Data in (CPU to EEPROM) */ +#define FE_B16_SELECT 0x20 /* EEPROM chip select */ +#define FE_B16_CLOCK 0x40 /* EEPROM shift clock */ +#define FE_B16_DIN 0x80 /* EEPROM data out (EEPROM to CPU) */ + +/* BMPR17 -- EEPROM data */ +#define FE_B17_DATA 0x80 /* EEPROM data bit */ + +/* BMPR18 -- cycle I/O address setting in JLI mode */ + +/* BMPR19 -- ISA interface configuration in JLI mode */ +#define FE_B19_IRQ 0xC0 +#define FE_B19_IRQ_SHIFT 6 + +#define FE_B19_ROM 0x38 +#define FE_B19_ROM_SHIFT 3 + +#define FE_B19_ADDR 0x07 +#define FE_B19_ADDR_SHIFT 0 + +/* + * An extra I/O port address to reset 86965. This location is called + * "ID ROM area" by Fujitsu document. + */ + +/* + * Flags in Receive Packet Header... Basically same layout as DLCR1. + */ +#define FE_RPH_OVRFLO FE_D1_OVRFLO +#define FE_RPH_CRCERR FE_D1_CRCERR +#define FE_RPH_ALGERR FE_D1_ALGERR +#define FE_RPH_SRTPKT FE_D1_SRTPKT +#define FE_RPH_RMTRST FE_D1_RMTRST +#define FE_RPH_GOOD 0x20 /* Good packet follows */ + +/* + * EEPROM specification (of JLI mode). + */ + +/* Number of bytes in an EEPROM accessible through 86965. */ +#define FE_EEPROM_SIZE 32 + +/* Offset for JLI config; automatically copied into BMPR19 at startup. */ +#define FE_EEPROM_CONF 0 + +/* + * Some 8696x specific constants. + */ + +/* Length (in bytes) of a Multicast Address Filter. */ +#define FE_FILTER_LEN 8 + +/* How many packets we can put in the transmission buffer on NIC memory. */ +#define FE_QUEUEING_MAX 127 + +/* Length (in bytes) of a "packet length" word in transmission buffer. */ +#define FE_DATA_LEN_LEN 2 + +/* Special Multicast Address Filter value. */ +#define FE_FILTER_NOTHING { 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 } +#define FE_FILTER_ALL { 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF } diff --git a/sys/platform/pc64/isa/ic/sc26198.h b/sys/platform/pc64/isa/ic/sc26198.h new file mode 100644 index 0000000000..fe236c7e18 --- /dev/null +++ b/sys/platform/pc64/isa/ic/sc26198.h @@ -0,0 +1,547 @@ +/*****************************************************************************/ + +/* + * sc26198.h -- SC26198 UART hardware info. + * + * Copyright (c) 1995-1998 Greg Ungerer (gerg@stallion.oz.au). + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by Greg Ungerer. + * 4. Neither the name of the author nor the names of any co-contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/sys/i386/isa/ic/sc26198.h,v 1.1.2.1 2001/08/30 12:29:55 murray Exp $ + * $DragonFly: src/sys/platform/pc64/isa/ic/sc26198.h,v 1.1 2008/08/29 17:07:21 dillon Exp $ + */ + +/*****************************************************************************/ +#ifndef _SC26198_H +#define _SC26198_H +/*****************************************************************************/ + +/* + * Define the number of async ports per sc26198 uart device. + */ +#define SC26198_PORTS 8 + +/* + * Baud rate timing clocks. All derived from a master 14.7456 MHz clock. + */ +#define SC26198_MASTERCLOCK 14745600L +#define SC26198_DCLK (SC26198_MASTERCLOCK) +#define SC26198_CCLK (SC26198_MASTERCLOCK / 2) +#define SC26198_BCLK (SC26198_MASTERCLOCK / 4) + +/* + * Define internal FIFO sizes for the 26198 ports. + */ +#define SC26198_TXFIFOSIZE 16 +#define SC26198_RXFIFOSIZE 16 + +/*****************************************************************************/ + +/* + * Global register definitions. These registers are global to each 26198 + * device, not specific ports on it. + */ +#define TSTR 0x0d +#define GCCR 0x0f +#define ICR 0x1b +#define WDTRCR 0x1d +#define IVR 0x1f +#define BRGTRUA 0x84 +#define GPOSR 0x87 +#define GPOC 0x8b +#define UCIR 0x8c +#define CIR 0x8c +#define BRGTRUB 0x8d +#define GRXFIFO 0x8e +#define GTXFIFO 0x8e +#define GCCR2 0x8f +#define BRGTRLA 0x94 +#define GPOR 0x97 +#define GPOD 0x9b +#define BRGTCR 0x9c +#define GICR 0x9c +#define BRGTRLB 0x9d +#define GIBCR 0x9d +#define GITR 0x9f + +/* + * Per port channel registers. These are the register offsets within + * the port address space, so need to have the port address (0 to 7) + * inserted in bit positions 4:6. + */ +#define MR0 0x00 +#define MR1 0x01 +#define IOPCR 0x02 +#define BCRBRK 0x03 +#define BCRCOS 0x04 +#define BCRX 0x06 +#define BCRA 0x07 +#define XONCR 0x08 +#define XOFFCR 0x09 +#define ARCR 0x0a +#define RXCSR 0x0c +#define TXCSR 0x0e +#define MR2 0x80 +#define SR 0x81 +#define SCCR 0x81 +#define ISR 0x82 +#define IMR 0x82 +#define TXFIFO 0x83 +#define RXFIFO 0x83 +#define IPR 0x84 +#define IOPIOR 0x85 +#define XISR 0x86 + +/* + * For any given port calculate the address to use to access a specified + * register. This is only used for unusual access... + */ +#define SC26198_PORTREG(port,reg) ((((port) & 0x07) << 4) | (reg)) + +/*****************************************************************************/ + +/* + * Global configuration control register bit definitions. + */ +#define GCCR_NOACK 0x00 +#define GCCR_IVRACK 0x02 +#define GCCR_IVRCHANACK 0x04 +#define GCCR_IVRTYPCHANACK 0x06 +#define GCCR_ASYNCCYCLE 0x00 +#define GCCR_SYNCCYCLE 0x40 + +/*****************************************************************************/ + +/* + * Mode register 0 bit definitions. + */ +#define MR0_ADDRNONE 0x00 +#define MR0_AUTOWAKE 0x01 +#define MR0_AUTODOZE 0x02 +#define MR0_AUTOWAKEDOZE 0x03 +#define MR0_SWFNONE 0x00 +#define MR0_SWFTX 0x04 +#define MR0_SWFRX 0x08 +#define MR0_SWFRXTX 0x0c +#define MR0_TXMASK 0x30 +#define MR0_TXEMPTY 0x00 +#define MR0_TXHIGH 0x10 +#define MR0_TXHALF 0x20 +#define MR0_TXRDY 0x00 +#define MR0_ADDRNT 0x00 +#define MR0_ADDRT 0x40 +#define MR0_SWFNT 0x00 +#define MR0_SWFT 0x80 + +/* + * Mode register 1 bit definitions. + */ +#define MR1_CS5 0x00 +#define MR1_CS6 0x01 +#define MR1_CS7 0x02 +#define MR1_CS8 0x03 +#define MR1_PAREVEN 0x00 +#define MR1_PARODD 0x04 +#define MR1_PARENB 0x00 +#define MR1_PARFORCE 0x08 +#define MR1_PARNONE 0x10 +#define MR1_PARSPECIAL 0x18 +#define MR1_ERRCHAR 0x00 +#define MR1_ERRBLOCK 0x20 +#define MR1_ISRUNMASKED 0x00 +#define MR1_ISRMASKED 0x40 +#define MR1_AUTORTS 0x80 + +/* + * Mode register 2 bit definitions. + */ +#define MR2_STOP1 0x00 +#define MR2_STOP15 0x01 +#define MR2_STOP2 0x02 +#define MR2_STOP916 0x03 +#define MR2_RXFIFORDY 0x00 +#define MR2_RXFIFOHALF 0x04 +#define MR2_RXFIFOHIGH 0x08 +#define MR2_RXFIFOFULL 0x0c +#define MR2_AUTOCTS 0x10 +#define MR2_TXRTS 0x20 +#define MR2_MODENORM 0x00 +#define MR2_MODEAUTOECHO 0x40 +#define MR2_MODELOOP 0x80 +#define MR2_MODEREMECHO 0xc0 + +/*****************************************************************************/ + +/* + * Baud Rate Generator (BRG) selector values. + */ +#define BRG_50 0x00 +#define BRG_75 0x01 +#define BRG_150 0x02 +#define BRG_200 0x03 +#define BRG_300 0x04 +#define BRG_450 0x05 +#define BRG_600 0x06 +#define BRG_900 0x07 +#define BRG_1200 0x08 +#define BRG_1800 0x09 +#define BRG_2400 0x0a +#define BRG_3600 0x0b +#define BRG_4800 0x0c +#define BRG_7200 0x0d +#define BRG_9600 0x0e +#define BRG_14400 0x0f +#define BRG_19200 0x10 +#define BRG_28200 0x11 +#define BRG_38400 0x12 +#define BRG_57600 0x13 +#define BRG_115200 0x14 +#define BRG_230400 0x15 +#define BRG_GIN0 0x16 +#define BRG_GIN1 0x17 +#define BRG_CT0 0x18 +#define BRG_CT1 0x19 +#define BRG_RX2TX316 0x1b +#define BRG_RX2TX31 0x1c + +/*****************************************************************************/ + +/* + * Command register command definitions. + */ +#define CR_NULL 0x04 +#define CR_ADDRNORMAL 0x0c +#define CR_RXRESET 0x14 +#define CR_TXRESET 0x1c +#define CR_CLEARRXERR 0x24 +#define CR_BREAKRESET 0x2c +#define CR_TXSTARTBREAK 0x34 +#define CR_TXSTOPBREAK 0x3c +#define CR_RTSON 0x44 +#define CR_RTSOFF 0x4c +#define CR_ADDRINIT 0x5c +#define CR_RXERRBLOCK 0x6c +#define CR_TXSENDXON 0x84 +#define CR_TXSENDXOFF 0x8c +#define CR_GANGXONSET 0x94 +#define CR_GANGXOFFSET 0x9c +#define CR_GANGXONINIT 0xa4 +#define CR_GANGXOFFINIT 0xac +#define CR_HOSTXON 0xb4 +#define CR_HOSTXOFF 0xbc +#define CR_CANCELXOFF 0xc4 +#define CR_ADDRRESET 0xdc +#define CR_RESETALLPORTS 0xf4 +#define CR_RESETALL 0xfc + +#define CR_RXENABLE 0x01 +#define CR_TXENABLE 0x02 + +/*****************************************************************************/ + +/* + * Channel status register. + */ +#define SR_RXRDY 0x01 +#define SR_RXFULL 0x02 +#define SR_TXRDY 0x04 +#define SR_TXEMPTY 0x08 +#define SR_RXOVERRUN 0x10 +#define SR_RXPARITY 0x20 +#define SR_RXFRAMING 0x40 +#define SR_RXBREAK 0x80 + +#define SR_RXERRS (SR_RXPARITY | SR_RXFRAMING | SR_RXOVERRUN) + +/*****************************************************************************/ + +/* + * Interrupt status register and interrupt mask register bit definitions. + */ +#define IR_TXRDY 0x01 +#define IR_RXRDY 0x02 +#define IR_RXBREAK 0x04 +#define IR_XONXOFF 0x10 +#define IR_ADDRRECOG 0x20 +#define IR_RXWATCHDOG 0x40 +#define IR_IOPORT 0x80 + +/*****************************************************************************/ + +/* + * Interrupt vector register field definitions. + */ +#define IVR_CHANMASK 0x07 +#define IVR_TYPEMASK 0x18 +#define IVR_CONSTMASK 0xc0 + +#define IVR_RXDATA 0x10 +#define IVR_RXBADDATA 0x18 +#define IVR_TXDATA 0x08 +#define IVR_OTHER 0x00 + +/*****************************************************************************/ + +/* + * BRG timer control register bit definitions. + */ +#define BRGCTCR_DISABCLK0 0x00 +#define BRGCTCR_ENABCLK0 0x08 +#define BRGCTCR_DISABCLK1 0x00 +#define BRGCTCR_ENABCLK1 0x80 + +#define BRGCTCR_0SCLK16 0x00 +#define BRGCTCR_0SCLK32 0x01 +#define BRGCTCR_0SCLK64 0x02 +#define BRGCTCR_0SCLK128 0x03 +#define BRGCTCR_0X1 0x04 +#define BRGCTCR_0X12 0x05 +#define BRGCTCR_0IO1A 0x06 +#define BRGCTCR_0GIN0 0x07 + +#define BRGCTCR_1SCLK16 0x00 +#define BRGCTCR_1SCLK32 0x10 +#define BRGCTCR_1SCLK64 0x20 +#define BRGCTCR_1SCLK128 0x30 +#define BRGCTCR_1X1 0x40 +#define BRGCTCR_1X12 0x50 +#define BRGCTCR_1IO1B 0x60 +#define BRGCTCR_1GIN1 0x70 + +/*****************************************************************************/ + +/* + * Watch dog timer enable register. + */ +#define WDTRCR_ENABALL 0xff + +/*****************************************************************************/ + +/* + * XON/XOFF interrupt status register. + */ +#define XISR_TXCHARMASK 0x03 +#define XISR_TXCHARNORMAL 0x00 +#define XISR_TXWAIT 0x01 +#define XISR_TXXOFFPEND 0x02 +#define XISR_TXXONPEND 0x03 + +#define XISR_TXFLOWMASK 0x0c +#define XISR_TXNORMAL 0x00 +#define XISR_TXSTOPPEND 0x04 +#define XISR_TXSTARTED 0x08 +#define XISR_TXSTOPPED 0x0c + +#define XISR_RXFLOWMASK 0x30 +#define XISR_RXFLOWNONE 0x00 +#define XISR_RXXONSENT 0x10 +#define XISR_RXXOFFSENT 0x20 + +#define XISR_RXXONGOT 0x40 +#define XISR_RXXOFFGOT 0x80 + +/*****************************************************************************/ + +/* + * Current interrupt register. + */ +#define CIR_TYPEMASK 0xc0 +#define CIR_TYPEOTHER 0x00 +#define CIR_TYPETX 0x40 +#define CIR_TYPERXGOOD 0x80 +#define CIR_TYPERXBAD 0xc0 + +#define CIR_RXDATA 0x80 +#define CIR_RXBADDATA 0x40 +#define CIR_TXDATA 0x40 + +#define CIR_CHANMASK 0x07 +#define CIR_CNTMASK 0x38 + +#define CIR_SUBTYPEMASK 0x38 +#define CIR_SUBNONE 0x00 +#define CIR_SUBCOS 0x08 +#define CIR_SUBADDR 0x10 +#define CIR_SUBXONXOFF 0x18 +#define CIR_SUBBREAK 0x28 + +/*****************************************************************************/ + +/* + * Global interrupting channel register. + */ +#define GICR_CHANMASK 0x07 + +/*****************************************************************************/ + +/* + * Global interrupting byte count register. + */ +#define GICR_COUNTMASK 0x0f + +/*****************************************************************************/ + +/* + * Global interrupting type register. + */ +#define GITR_RXMASK 0xc0 +#define GITR_RXNONE 0x00 +#define GITR_RXBADDATA 0x80 +#define GITR_RXGOODDATA 0xc0 +#define GITR_TXDATA 0x20 + +#define GITR_SUBTYPEMASK 0x07 +#define GITR_SUBNONE 0x00 +#define GITR_SUBCOS 0x01 +#define GITR_SUBADDR 0x02 +#define GITR_SUBXONXOFF 0x03 +#define GITR_SUBBREAK 0x05 + +/*****************************************************************************/ + +/* + * Input port change register. + */ +#define IPR_CTS 0x01 +#define IPR_DTR 0x02 +#define IPR_RTS 0x04 +#define IPR_DCD 0x08 +#define IPR_CTSCHANGE 0x10 +#define IPR_DTRCHANGE 0x20 +#define IPR_RTSCHANGE 0x40 +#define IPR_DCDCHANGE 0x80 + +#define IPR_CHANGEMASK 0xf0 + +/*****************************************************************************/ + +/* + * IO port interrupt and output register. + */ +#define IOPR_CTS 0x01 +#define IOPR_DTR 0x02 +#define IOPR_RTS 0x04 +#define IOPR_DCD 0x08 +#define IOPR_CTSCOS 0x10 +#define IOPR_DTRCOS 0x20 +#define IOPR_RTSCOS 0x40 +#define IOPR_DCDCOS 0x80 + +/*****************************************************************************/ + +/* + * IO port configuration register. + */ +#define IOPCR_SETCTS 0x00 +#define IOPCR_SETDTR 0x04 +#define IOPCR_SETRTS 0x10 +#define IOPCR_SETDCD 0x00 + +#define IOPCR_SETSIGS (IOPCR_SETRTS | IOPCR_SETRTS | IOPCR_SETDTR | IOPCR_SETDCD) + +/*****************************************************************************/ + +/* + * General purpose output select register. + */ +#define GPORS_TXC1XA 0x08 +#define GPORS_TXC16XA 0x09 +#define GPORS_RXC16XA 0x0a +#define GPORS_TXC16XB 0x0b +#define GPORS_GPOR3 0x0c +#define GPORS_GPOR2 0x0d +#define GPORS_GPOR1 0x0e +#define GPORS_GPOR0 0x0f + +/*****************************************************************************/ + +/* + * General purpose output register. + */ +#define GPOR_0 0x01 +#define GPOR_1 0x02 +#define GPOR_2 0x04 +#define GPOR_3 0x08 + +/*****************************************************************************/ + +/* + * General purpose output clock register. + */ +#define GPORC_0NONE 0x00 +#define GPORC_0GIN0 0x01 +#define GPORC_0GIN1 0x02 +#define GPORC_0IO3A 0x02 + +#define GPORC_1NONE 0x00 +#define GPORC_1GIN0 0x04 +#define GPORC_1GIN1 0x08 +#define GPORC_1IO3C 0x0c + +#define GPORC_2NONE 0x00 +#define GPORC_2GIN0 0x10 +#define GPORC_2GIN1 0x20 +#define GPORC_2IO3E 0x20 + +#define GPORC_3NONE 0x00 +#define GPORC_3GIN0 0x40 +#define GPORC_3GIN1 0x80 +#define GPORC_3IO3G 0xc0 + +/*****************************************************************************/ + +/* + * General purpose output data register. + */ +#define GPOD_0MASK 0x03 +#define GPOD_0SET1 0x00 +#define GPOD_0SET0 0x01 +#define GPOD_0SETR0 0x02 +#define GPOD_0SETIO3B 0x03 + +#define GPOD_1MASK 0x0c +#define GPOD_1SET1 0x00 +#define GPOD_1SET0 0x04 +#define GPOD_1SETR0 0x08 +#define GPOD_1SETIO3D 0x0c + +#define GPOD_2MASK 0x30 +#define GPOD_2SET1 0x00 +#define GPOD_2SET0 0x10 +#define GPOD_2SETR0 0x20 +#define GPOD_2SETIO3F 0x30 + +#define GPOD_3MASK 0xc0 +#define GPOD_3SET1 0x00 +#define GPOD_3SET0 0x40 +#define GPOD_3SETR0 0x80 +#define GPOD_3SETIO3H 0xc0 + +/*****************************************************************************/ +#endif diff --git a/sys/platform/pc64/isa/ic/scd1400.h b/sys/platform/pc64/isa/ic/scd1400.h new file mode 100644 index 0000000000..3517fbf201 --- /dev/null +++ b/sys/platform/pc64/isa/ic/scd1400.h @@ -0,0 +1,313 @@ +/*****************************************************************************/ + +/* + * cd1400.h -- cd1400 UART hardware info. + * + * Copyright (c) 1995 Greg Ungerer (gerg@stallion.oz.au). + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by Greg Ungerer. + * 4. Neither the name of the author nor the names of any co-contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/sys/i386/isa/ic/scd1400.h,v 1.5 1999/08/28 00:45:15 peter Exp $ + * $DragonFly: src/sys/platform/pc64/isa/ic/scd1400.h,v 1.1 2008/08/29 17:07:21 dillon Exp $ + */ + +/*****************************************************************************/ +#ifndef _CD1400_H +#define _CD1400_H +/*****************************************************************************/ + +/* + * Define the number of async ports per cd1400 uart chip. + */ +#define CD1400_PORTS 4 + +/* + * Define the cd1400 uarts internal FIFO sizes. + */ +#define CD1400_TXFIFOSIZE 12 +#define CD1400_RXFIFOSIZE 12 + +/* + * Local RX FIFO thresh hold level. Also define the RTS thresh hold + * based on the RX thresh hold. + */ +#define FIFO_RXTHRESHOLD 6 +#define FIFO_RTSTHRESHOLD 7 + +/*****************************************************************************/ + +/* + * Define the cd1400 register addresses. These are all the valid + * registers with the cd1400. Some are global, some virtual, some + * per port. + */ +#define GFRCR 0x40 +#define CAR 0x68 +#define GCR 0x4b +#define SVRR 0x67 +#define RICR 0x44 +#define TICR 0x45 +#define MICR 0x46 +#define RIR 0x6b +#define TIR 0x6a +#define MIR 0x69 +#define PPR 0x7e + +#define RIVR 0x43 +#define TIVR 0x42 +#define MIVR 0x41 +#define TDR 0x63 +#define RDSR 0x62 +#define MISR 0x4c +#define EOSRR 0x60 + +#define LIVR 0x18 +#define CCR 0x05 +#define SRER 0x06 +#define COR1 0x08 +#define COR2 0x09 +#define COR3 0x0a +#define COR4 0x1e +#define COR5 0x1f +#define CCSR 0x0b +#define RDCR 0x0e +#define SCHR1 0x1a +#define SCHR2 0x1b +#define SCHR3 0x1c +#define SCHR4 0x1d +#define SCRL 0x22 +#define SCRH 0x23 +#define LNC 0x24 +#define MCOR1 0x15 +#define MCOR2 0x16 +#define RTPR 0x21 +#define MSVR1 0x6c +#define MSVR2 0x6d +#define PSVR 0x6f +#define RBPR 0x78 +#define RCOR 0x7c +#define TBPR 0x72 +#define TCOR 0x76 + +/*****************************************************************************/ + +/* + * Define the set of baud rate clock divisors. + */ +#define CD1400_CLK0 8 +#define CD1400_CLK1 32 +#define CD1400_CLK2 128 +#define CD1400_CLK3 512 +#define CD1400_CLK4 2048 + +#define CD1400_NUMCLKS 5 + +/*****************************************************************************/ + +/* + * Define the clock pre-scalar value to be a 5 ms clock. This should be + * OK for now. It would probably be better to make it 10 ms, but we + * can't fit that divisor into 8 bits! + */ +#define PPR_SCALAR 244 + +/*****************************************************************************/ + +/* + * Define values used to set character size options. + */ +#define COR1_CHL5 0x00 +#define COR1_CHL6 0x01 +#define COR1_CHL7 0x02 +#define COR1_CHL8 0x03 + +/* + * Define values used to set the number of stop bits. + */ +#define COR1_STOP1 0x00 +#define COR1_STOP15 0x04 +#define COR1_STOP2 0x08 + +/* + * Define values used to set the parity scheme in use. + */ +#define COR1_PARNONE 0x00 +#define COR1_PARFORCE 0x20 +#define COR1_PARENB 0x40 +#define COR1_PARIGNORE 0x10 + +#define COR1_PARODD 0x80 +#define COR1_PAREVEN 0x00 + +#define COR2_IXM 0x80 +#define COR2_TXIBE 0x40 +#define COR2_ETC 0x20 +#define COR2_LLM 0x10 +#define COR2_RLM 0x08 +#define COR2_RTSAO 0x04 +#define COR2_CTSAE 0x02 + +#define COR3_SCDRNG 0x80 +#define COR3_SCD34 0x40 +#define COR3_FCT 0x20 +#define COR3_SCD12 0x10 + +/* + * Define the bit values of COR4. + */ +#define COR4_BRKINT 0x08 +#define COR4_IGNBRK 0x18 + +/* + * Define the bit values of COR5. + */ +#define COR5_ISTRIP 0x80 + +/*****************************************************************************/ + +/* + * Define the modem control register values. + * Note that the actual hardware is a little different to the conventional + * pin names on the cd1400. + */ +#define MSVR1_DTR 0x01 +#define MSVR1_DSR 0x10 +#define MSVR1_RI 0x20 +#define MSVR1_CTS 0x40 +#define MSVR1_DCD 0x80 + +#define MSVR2_RTS 0x02 +#define MSVR2_DSR 0x10 +#define MSVR2_RI 0x20 +#define MSVR2_CTS 0x40 +#define MSVR2_DCD 0x80 + +#define MCOR1_DCD 0x80 +#define MCOR1_CTS 0x40 +#define MCOR1_RI 0x20 +#define MCOR1_DSR 0x10 + +#define MCOR2_DCD 0x80 +#define MCOR2_CTS 0x40 +#define MCOR2_RI 0x20 +#define MCOR2_DSR 0x10 + +/*****************************************************************************/ + +/* + * Define the bits used with the service (interrupt) enable register. + */ +#define SRER_NNDT 0x01 +#define SRER_TXEMPTY 0x02 +#define SRER_TXDATA 0x04 +#define SRER_RXDATA 0x10 +#define SRER_MODEM 0x80 + +/*****************************************************************************/ + +/* + * Define operational commands for the command register. + */ +#define CCR_RESET 0x80 +#define CCR_CORCHANGE 0x4e +#define CCR_SENDCH 0x20 +#define CCR_CHANCTRL 0x10 + +#define CCR_TXENABLE (CCR_CHANCTRL | 0x08) +#define CCR_TXDISABLE (CCR_CHANCTRL | 0x04) +#define CCR_RXENABLE (CCR_CHANCTRL | 0x02) +#define CCR_RXDISABLE (CCR_CHANCTRL | 0x01) + +#define CCR_SENDSCHR1 (CCR_SENDCH | 0x01) +#define CCR_SENDSCHR2 (CCR_SENDCH | 0x02) +#define CCR_SENDSCHR3 (CCR_SENDCH | 0x03) +#define CCR_SENDSCHR4 (CCR_SENDCH | 0x04) + +#define CCR_RESETCHAN (CCR_RESET | 0x00) +#define CCR_RESETFULL (CCR_RESET | 0x01) +#define CCR_TXFLUSHFIFO (CCR_RESET | 0x02) + +#define CCR_MAXWAIT 10000 + +/*****************************************************************************/ + +/* + * Define the valid acknowledgement types (for hw ack cycle). + */ +#define ACK_TYPMASK 0x07 +#define ACK_TYPTX 0x02 +#define ACK_TYPMDM 0x01 +#define ACK_TYPRXGOOD 0x03 +#define ACK_TYPRXBAD 0x07 + +#define SVRR_RX 0x01 +#define SVRR_TX 0x02 +#define SVRR_MDM 0x04 + +#define ST_OVERRUN 0x01 +#define ST_FRAMING 0x02 +#define ST_PARITY 0x04 +#define ST_BREAK 0x08 +#define ST_SCHAR1 0x10 +#define ST_SCHAR2 0x20 +#define ST_SCHAR3 0x30 +#define ST_SCHAR4 0x40 +#define ST_RANGE 0x70 +#define ST_SCHARMASK 0x70 +#define ST_TIMEOUT 0x80 + +#define MISR_DCD 0x80 +#define MISR_CTS 0x40 +#define MISR_RI 0x20 +#define MISR_DSR 0x10 + +/*****************************************************************************/ + +/* + * Defines for the CCSR status register. + */ +#define CCSR_RXENABLED 0x80 +#define CCSR_RXFLOWON 0x40 +#define CCSR_RXFLOWOFF 0x20 +#define CCSR_TXENABLED 0x08 +#define CCSR_TXFLOWON 0x04 +#define CCSR_TXFLOWOFF 0x02 + +/*****************************************************************************/ + +/* + * Define the embedded commands. + */ +#define ETC_CMD 0x00 +#define ETC_STARTBREAK 0x81 +#define ETC_DELAY 0x82 +#define ETC_STOPBREAK 0x83 + +/*****************************************************************************/ +#endif diff --git a/sys/platform/pc64/isa/intr_machdep.c b/sys/platform/pc64/isa/intr_machdep.c new file mode 100644 index 0000000000..9993231095 --- /dev/null +++ b/sys/platform/pc64/isa/intr_machdep.c @@ -0,0 +1,273 @@ +/*- + * Copyright (c) 1991 The Regents of the University of California. + * Copyright (c) 2008 The DragonFly Project. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * William Jolitz. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)isa.c 7.2 (Berkeley) 5/13/91 + * $FreeBSD: src/sys/i386/isa/intr_machdep.c,v 1.29.2.5 2001/10/14 06:54:27 luigi Exp $ + * $DragonFly: src/sys/platform/pc64/isa/intr_machdep.c,v 1.1 2008/08/29 17:07:19 dillon Exp $ + */ +/* + * This file contains an aggregated module marked: + * Copyright (c) 1997, Stefan Esser + * All rights reserved. + * See the notice for details. + */ + +#include "use_isa.h" +//#include "opt_auto_eoi.h" + +#include +#ifndef SMP +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#if NISA > 0 +#include +#endif +#include +#include +#include +#include + +/* XXX should be in suitable include files */ +#define ICU_IMR_OFFSET 1 /* IO_ICU{1,2} + 1 */ + +#ifdef APIC_IO +/* + * This is to accommodate "mixed-mode" programming for + * motherboards that don't connect the 8254 to the IO APIC. + */ +#define AUTO_EOI_1 1 +#endif + +static void init_i8259(void); + +#define NMI_PARITY (1 << 7) +#define NMI_IOCHAN (1 << 6) +#define ENMI_WATCHDOG (1 << 7) +#define ENMI_BUSTIMER (1 << 6) +#define ENMI_IOSTATUS (1 << 5) + +/* + * Handle a NMI, possibly a machine check. + * return true to panic system, false to ignore. + */ +int +isa_nmi(int cd) +{ + int retval = 0; + int isa_port = inb(0x61); + int eisa_port = inb(0x461); + + log(LOG_CRIT, "NMI ISA %x, EISA %x\n", isa_port, eisa_port); + + if (isa_port & NMI_PARITY) { + log(LOG_CRIT, "RAM parity error, likely hardware failure."); + retval = 1; + } + + if (isa_port & NMI_IOCHAN) { + log(LOG_CRIT, "I/O channel check, likely hardware failure."); + retval = 1; + } + + /* + * On a real EISA machine, this will never happen. However it can + * happen on ISA machines which implement XT style floating point + * error handling (very rare). Save them from a meaningless panic. + */ + if (eisa_port == 0xff) + return(retval); + + if (eisa_port & ENMI_WATCHDOG) { + log(LOG_CRIT, "EISA watchdog timer expired, likely hardware failure."); + retval = 1; + } + + if (eisa_port & ENMI_BUSTIMER) { + log(LOG_CRIT, "EISA bus timeout, likely hardware failure."); + retval = 1; + } + + if (eisa_port & ENMI_IOSTATUS) { + log(LOG_CRIT, "EISA I/O port status error."); + retval = 1; + } + return(retval); +} + +/* + * ICU reinitialize when ICU configuration has lost. + */ +void +icu_reinit(void) +{ + int i; + + init_i8259(); + for (i = 0; i < MAX_HARDINTS; ++i) { + if (count_registered_ints(i)) + machintr_intren(i); + } +} + +/* + * Fill in default interrupt table (in case of spurious interrupt + * during configuration of kernel, setup interrupt control unit + */ +void +isa_defaultirq(void) +{ + int i; + + /* icu vectors */ + for (i = 0; i < MAX_HARDINTS; i++) + machintr_vector_setdefault(i); + init_i8259(); +} + +static void +init_i8259(void) +{ + + /* initialize 8259's */ + outb(IO_ICU1, 0x11); /* reset; program device, four bytes */ + outb(IO_ICU1+ICU_IMR_OFFSET, IDT_OFFSET); /* starting at this vector index */ + outb(IO_ICU1+ICU_IMR_OFFSET, 1 << ICU_IRQ_SLAVE); /* slave on line 2 */ +#ifdef AUTO_EOI_1 + outb(IO_ICU1+ICU_IMR_OFFSET, 2 | 1); /* auto EOI, 8086 mode */ +#else + outb(IO_ICU1+ICU_IMR_OFFSET, 1); /* 8086 mode */ +#endif + outb(IO_ICU1+ICU_IMR_OFFSET, 0xff); /* leave interrupts masked */ + outb(IO_ICU1, 0x0a); /* default to IRR on read */ + outb(IO_ICU1, 0xc0 | (3 - 1)); /* pri order 3-7, 0-2 (com2 first) */ + outb(IO_ICU2, 0x11); /* reset; program device, four bytes */ + outb(IO_ICU2+ICU_IMR_OFFSET, IDT_OFFSET+8); /* staring at this vector index */ + outb(IO_ICU2+ICU_IMR_OFFSET, ICU_IRQ_SLAVE); +#ifdef AUTO_EOI_2 + outb(IO_ICU2+ICU_IMR_OFFSET, 2 | 1); /* auto EOI, 8086 mode */ +#else + outb(IO_ICU2+ICU_IMR_OFFSET,1); /* 8086 mode */ +#endif + outb(IO_ICU2+ICU_IMR_OFFSET, 0xff); /* leave interrupts masked */ + outb(IO_ICU2, 0x0a); /* default to IRR on read */ +} + +#if NISA > 0 +/* + * Return a bitmap of the current interrupt requests. This is 8259-specific + * and is only suitable for use at probe time. + */ +intrmask_t +isa_irq_pending(void) +{ + u_char irr1; + u_char irr2; + + irr1 = inb(IO_ICU1); + irr2 = inb(IO_ICU2); + return ((irr2 << 8) | irr1); +} + +#endif + +/* The following notice applies beyond this point in the file */ + +/* + * Copyright (c) 1997, Stefan Esser + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD: src/sys/i386/isa/intr_machdep.c,v 1.29.2.5 2001/10/14 06:54:27 luigi Exp $ + * + */ + +#ifdef SMP +/* + * forward_fast_remote() + * + * This function is called from the receiving end of an IPIQ when a + * remote cpu wishes to forward a fast interrupt to us. All we have to + * do is set the interrupt pending and let the IPI's doreti deal with it. + */ +void +forward_fastint_remote(void *arg) +{ + int irq = (int)arg; + struct mdglobaldata *gd = mdcpu; + + atomic_set_int_nonlocked(&gd->gd_fpending, 1 << irq); + atomic_set_int_nonlocked(&gd->mi.gd_reqflags, RQF_INTPEND); +} + +#endif diff --git a/sys/platform/pc64/isa/intr_machdep.h b/sys/platform/pc64/isa/intr_machdep.h new file mode 100644 index 0000000000..23da2b188d --- /dev/null +++ b/sys/platform/pc64/isa/intr_machdep.h @@ -0,0 +1,166 @@ +/*- + * Copyright (c) 1991 The Regents of the University of California. + * Copyright (c) 2008 The DragonFly Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/sys/i386/isa/intr_machdep.h,v 1.19.2.2 2001/10/14 20:05:50 luigi Exp $ + * $DragonFly: src/sys/platform/pc64/isa/intr_machdep.h,v 1.1 2008/08/29 17:07:19 dillon Exp $ + */ + +#ifndef _ARCH_ISA_INTR_MACHDEP_H_ +#define _ARCH_ISA_INTR_MACHDEP_H_ + +#ifndef LOCORE +#ifndef _SYS_INTERRUPT_H_ +#include +#endif +#ifndef _SYS_SERIALIZE_H_ +#include +#endif +#endif + +/* + * Low level interrupt code. + */ + +#ifdef _KERNEL + +#define IDT_OFFSET 32 + +#if defined(SMP) +/* + * XXX FIXME: rethink location for all IPI vectors. + */ + +/* + APIC TPR priority vector levels: + + 0xff (255) +-------------+ + | | 15 (IPIs: Xspuriousint) + 0xf0 (240) +-------------+ + | | 14 + 0xe0 (224) +-------------+ + | | 13 + 0xd0 (208) +-------------+ + | | 12 + 0xc0 (192) +-------------+ + | | 11 + 0xb0 (176) +-------------+ + | | 10 (IPIs: Xcpustop) + 0xa0 (160) +-------------+ + | | 9 (IPIs: Xinvltlb) + 0x90 (144) +-------------+ + | | 8 (linux/BSD syscall, IGNORE FAST HW INTS) + 0x80 (128) +-------------+ + | | 7 (FAST_INTR 16-23) + 0x70 (112) +-------------+ + | | 6 (FAST_INTR 0-15) + 0x60 (96) +-------------+ + | | 5 (IGNORE HW INTS) + 0x50 (80) +-------------+ + | | 4 (2nd IO APIC) + 0x40 (64) +------+------+ + | | | 3 (upper APIC hardware INTs: PCI) + 0x30 (48) +------+------+ + | | 2 (start of hardware INTs: ISA) + 0x20 (32) +-------------+ + | | 1 (exceptions, traps, etc.) + 0x10 (16) +-------------+ + | | 0 (exceptions, traps, etc.) + 0x00 (0) +-------------+ + */ + +/* blocking values for local APIC Task Priority Register */ +#define TPR_BLOCK_HWI 0x4f /* hardware INTs */ +#define TPR_IGNORE_HWI 0x5f /* ignore INTs */ +#define TPR_BLOCK_FHWI 0x7f /* hardware FAST INTs */ +#define TPR_IGNORE_FHWI 0x8f /* ignore FAST INTs */ +#define TPR_IPI_ONLY 0x8f /* ignore FAST INTs */ +#define TPR_BLOCK_XINVLTLB 0x9f /* */ +#define TPR_BLOCK_XCPUSTOP 0xaf /* */ +#define TPR_BLOCK_ALL 0xff /* all INTs */ + + +/* TLB shootdowns */ +#define XINVLTLB_OFFSET (IDT_OFFSET + 112) + +/* unused/open (was inter-cpu clock handling) */ +#define XUNUSED113_OFFSET (IDT_OFFSET + 113) + +/* inter-CPU rendezvous */ +#define XUNUSED114_OFFSET (IDT_OFFSET + 114) + +/* IPIQ rendezvous */ +#define XIPIQ_OFFSET (IDT_OFFSET + 115) + +/* IPI to signal CPUs to stop and wait for another CPU to restart them */ +#define XCPUSTOP_OFFSET (IDT_OFFSET + 128) + +/* + * Note: this vector MUST be xxxx1111, 32 + 223 = 255 = 0xff: + */ +#define XSPURIOUSINT_OFFSET (IDT_OFFSET + 223) + +#endif /* SMP */ + +#ifndef LOCORE + +/* + * Type of the first (asm) part of an interrupt handler. + */ +#ifndef JG_defined_inthand_t +#define JG_defined_inthand_t +typedef void inthand_t(u_int cs, u_int ef, u_int esp, u_int ss); +typedef void unpendhand_t(void); +#endif + +#define IDTVEC(name) __CONCAT(X,name) + +#if defined(SMP) +inthand_t + Xinvltlb, /* TLB shootdowns */ + Xcpuast, /* Additional software trap on other cpu */ + Xforward_irq, /* Forward irq to cpu holding ISR lock */ + Xcpustop, /* CPU stops & waits for another CPU to restart it */ + Xspuriousint, /* handle APIC "spurious INTs" */ + Xipiq; /* handle lwkt_send_ipiq() requests */ +#endif /* SMP */ + +void call_fast_unpend(int irq); +void isa_defaultirq (void); +int isa_nmi (int cd); +void icu_reinit (void); + +#endif /* LOCORE */ + +#endif /* _KERNEL */ + +#endif /* !_ARCH_ISA_INTR_MACHDEP_H_ */ diff --git a/sys/platform/pc64/isa/lptreg.h b/sys/platform/pc64/isa/lptreg.h new file mode 100644 index 0000000000..9cf22c3560 --- /dev/null +++ b/sys/platform/pc64/isa/lptreg.h @@ -0,0 +1,35 @@ +/*- + * Copyright (c) 1990 The Regents of the University of California. + * Copyright (c) 2008 The DragonFly Project. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * William Jolitz. + * + * form: @(#)lptreg.h 1.1 (Berkeley) 12/19/90 + * $FreeBSD: src/sys/i386/isa/lptreg.h,v 1.7 1999/08/28 00:44:57 peter Exp $ + * $DragonFly: src/sys/platform/pc64/isa/lptreg.h,v 1.1 2008/08/29 17:07:19 dillon Exp $ + */ + +/* + * AT Parallel Port (for lineprinter) + * Interface port and bit definitions + * Written by William Jolitz 12/18/90 + * Copyright (C) William Jolitz 1990 + */ + +#define lpt_data 0 /* Data to/from printer (R/W) */ + +#define lpt_status 1 /* Status of printer (R) */ +#define LPS_NERR 0x08 /* printer no error */ +#define LPS_SEL 0x10 /* printer selected */ +#define LPS_OUT 0x20 /* printer out of paper */ +#define LPS_NACK 0x40 /* printer no ack of data */ +#define LPS_NBSY 0x80 /* printer no ack of data */ + +#define lpt_control 2 /* Control printer (R/W) */ +#define LPC_STB 0x01 /* strobe data to printer */ +#define LPC_AUTOL 0x02 /* automatic linefeed */ +#define LPC_NINIT 0x04 /* initialize printer */ +#define LPC_SEL 0x08 /* printer selected */ +#define LPC_ENA 0x10 /* enable IRQ */ diff --git a/sys/platform/pc64/amd64/npx.c b/sys/platform/pc64/isa/npx.c similarity index 54% copy from sys/platform/pc64/amd64/npx.c copy to sys/platform/pc64/isa/npx.c index 9baf8b4bb6..82162bc270 100644 --- a/sys/platform/pc64/amd64/npx.c +++ b/sys/platform/pc64/isa/npx.c @@ -1,45 +1,45 @@ -/* - * Copyright (c) 2006 The DragonFly Project. All rights reserved. +/*- * Copyright (c) 1990 William Jolitz. * Copyright (c) 1991 The Regents of the University of California. + * Copyright (c) 2008 The DragonFly Project. * All rights reserved. - * - * This code is derived from software contributed to The DragonFly Project - * by Matthew Dillon - * + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: - * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * 3. Neither the name of The DragonFly Project nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific, prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * from: @(#)npx.c 7.2 (Berkeley) 5/12/91 + * + * from: @(#)npx.c 7.2 (Berkeley) 5/12/91 * $FreeBSD: src/sys/i386/isa/npx.c,v 1.80.2.3 2001/10/20 19:04:38 tegge Exp $ - * $DragonFly: src/sys/platform/pc64/amd64/npx.c,v 1.3 2007/12/12 23:49:22 dillon Exp $ + * $DragonFly: src/sys/platform/pc64/isa/npx.c,v 1.1 2008/08/29 17:07:19 dillon Exp $ */ +#include "opt_cpu.h" #include "opt_debug_npx.h" +#include "opt_math_emulate.h" #include #include @@ -61,6 +61,7 @@ #endif #include #include +#include #include #include #include @@ -71,6 +72,24 @@ #include #include +#ifndef SMP +#include +#include +#include +#endif + +/* + * 387 and 287 Numeric Coprocessor Extension (NPX) Driver. + */ + +/* Configuration flags. */ +#define NPX_DISABLE_I586_OPTIMIZED_BCOPY (1 << 0) +#define NPX_DISABLE_I586_OPTIMIZED_BZERO (1 << 1) +#define NPX_DISABLE_I586_OPTIMIZED_COPYIO (1 << 2) +#define NPX_PREFER_EMULATOR (1 << 3) + +#ifdef __GNUC__ + #define fldcw(addr) __asm("fldcw %0" : : "m" (*(addr))) #define fnclex() __asm("fnclex") #define fninit() __asm("fninit") @@ -78,11 +97,35 @@ #define fnsave(addr) __asm __volatile("fnsave %0" : "=m" (*(addr))) #define fnstcw(addr) __asm __volatile("fnstcw %0" : "=m" (*(addr))) #define fnstsw(addr) __asm __volatile("fnstsw %0" : "=m" (*(addr))) +#define fp_divide_by_0() __asm("fldz; fld1; fdiv %st,%st(1); fnop") #define frstor(addr) __asm("frstor %0" : : "m" (*(addr))) #ifndef CPU_DISABLE_SSE #define fxrstor(addr) __asm("fxrstor %0" : : "m" (*(addr))) #define fxsave(addr) __asm __volatile("fxsave %0" : "=m" (*(addr))) #endif +#define start_emulating() __asm("smsw %%ax; orb %0,%%al; lmsw %%ax" \ + : : "n" (CR0_TS) : "ax") +#define stop_emulating() __asm("clts") + +#else /* not __GNUC__ */ + +void fldcw (caddr_t addr); +void fnclex (void); +void fninit (void); +void fnop (void); +void fnsave (caddr_t addr); +void fnstcw (caddr_t addr); +void fnstsw (caddr_t addr); +void fp_divide_by_0 (void); +void frstor (caddr_t addr); +#ifndef CPU_DISABLE_SSE +void fxsave (caddr_t addr); +void fxrstor (caddr_t addr); +#endif +void start_emulating (void); +void stop_emulating (void); + +#endif /* __GNUC__ */ #ifndef CPU_DISABLE_SSE #define GET_FPU_EXSW_PTR(td) \ @@ -99,26 +142,372 @@ typedef u_char bool_t; static void fpu_clean_state(void); #endif -u_int cpu_fxsr = 0; static int npx_attach (device_t dev); + void npx_intr (void *); +static int npx_probe (device_t dev); +static int npx_probe1 (device_t dev); static void fpusave (union savefpu *); static void fpurstor (union savefpu *); +int hw_float; /* XXX currently just alias for npx_exists */ + +SYSCTL_INT(_hw,HW_FLOATINGPT, floatingpoint, + CTLFLAG_RD, &hw_float, 0, + "Floatingpoint instructions executed in hardware"); #if (defined(I586_CPU) || defined(I686_CPU)) && !defined(CPU_DISABLE_SSE) int mmxopt = 1; SYSCTL_INT(_kern, OID_AUTO, mmxopt, CTLFLAG_RD, &mmxopt, 0, "MMX/XMM optimized bcopy/copyin/copyout support"); #endif +#ifndef SMP +static u_int npx0_imask; +static struct gate_descriptor npx_idt_probeintr; +static int npx_intrno; +static volatile u_int npx_intrs_while_probing; +static volatile u_int npx_traps_while_probing; +#endif + +static bool_t npx_ex16; +static bool_t npx_exists; +static bool_t npx_irq13; +static int npx_irq; /* irq number */ + +#ifndef SMP +/* + * Special interrupt handlers. Someday intr0-intr15 will be used to count + * interrupts. We'll still need a special exception 16 handler. The busy + * latch stuff in probeintr() can be moved to npxprobe(). + */ +inthand_t probeintr; +__asm(" \n\ + .text \n\ + .p2align 2,0x90 \n\ + .type " __XSTRING(CNAME(probeintr)) ",@function \n\ +" __XSTRING(CNAME(probeintr)) ": \n\ + ss \n\ + incl " __XSTRING(CNAME(npx_intrs_while_probing)) " \n\ + pushl %eax \n\ + movb $0x20,%al # EOI (asm in strings loses cpp features) \n\ + outb %al,$0xa0 # IO_ICU2 \n\ + outb %al,$0x20 # IO_ICU1 \n\ + movb $0,%al \n\ + outb %al,$0xf0 # clear BUSY# latch \n\ + popl %eax \n\ + iret \n\ +"); + +inthand_t probetrap; +__asm(" \n\ + .text \n\ + .p2align 2,0x90 \n\ + .type " __XSTRING(CNAME(probetrap)) ",@function \n\ +" __XSTRING(CNAME(probetrap)) ": \n\ + ss \n\ + incl " __XSTRING(CNAME(npx_traps_while_probing)) " \n\ + fnclex \n\ + iret \n\ +"); +#endif /* SMP */ + +static struct krate badfprate = { 1 }; + +/* + * Probe routine. Initialize cr0 to give correct behaviour for [f]wait + * whether the device exists or not (XXX should be elsewhere). Set flags + * to tell npxattach() what to do. Modify device struct if npx doesn't + * need to use interrupts. Return 1 if device exists. + */ +static int +npx_probe(device_t dev) +{ +#ifdef SMP + + if (resource_int_value("npx", 0, "irq", &npx_irq) != 0) + npx_irq = 13; + return npx_probe1(dev); + +#else /* SMP */ + + int result; + u_long save_eflags; + u_char save_icu1_mask; + u_char save_icu2_mask; + struct gate_descriptor save_idt_npxintr; + struct gate_descriptor save_idt_npxtrap; + /* + * This routine is now just a wrapper for npxprobe1(), to install + * special npx interrupt and trap handlers, to enable npx interrupts + * and to disable other interrupts. Someday isa_configure() will + * install suitable handlers and run with interrupts enabled so we + * won't need to do so much here. + */ + if (resource_int_value("npx", 0, "irq", &npx_irq) != 0) + npx_irq = 13; + npx_intrno = IDT_OFFSET + npx_irq; + save_eflags = read_eflags(); + cpu_disable_intr(); + save_icu1_mask = inb(IO_ICU1 + 1); + save_icu2_mask = inb(IO_ICU2 + 1); + save_idt_npxintr = idt[npx_intrno]; + save_idt_npxtrap = idt[16]; + outb(IO_ICU1 + 1, ~(1 << ICU_IRQ_SLAVE)); + outb(IO_ICU2 + 1, ~(1 << (npx_irq - 8))); + setidt(16, probetrap, SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); + setidt(npx_intrno, probeintr, SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); + npx_idt_probeintr = idt[npx_intrno]; + cpu_enable_intr(); + result = npx_probe1(dev); + cpu_disable_intr(); + outb(IO_ICU1 + 1, save_icu1_mask); + outb(IO_ICU2 + 1, save_icu2_mask); + idt[npx_intrno] = save_idt_npxintr; + idt[16] = save_idt_npxtrap; + write_eflags(save_eflags); + return (result); + +#endif /* SMP */ +} + +static int +npx_probe1(device_t dev) +{ +#ifndef SMP + u_short control; + u_short status; +#endif + + /* + * Partially reset the coprocessor, if any. Some BIOS's don't reset + * it after a warm boot. + */ + outb(0xf1, 0); /* full reset on some systems, NOP on others */ + outb(0xf0, 0); /* clear BUSY# latch */ + /* + * Prepare to trap all ESC (i.e., NPX) instructions and all WAIT + * instructions. We must set the CR0_MP bit and use the CR0_TS + * bit to control the trap, because setting the CR0_EM bit does + * not cause WAIT instructions to trap. It's important to trap + * WAIT instructions - otherwise the "wait" variants of no-wait + * control instructions would degenerate to the "no-wait" variants + * after FP context switches but work correctly otherwise. It's + * particularly important to trap WAITs when there is no NPX - + * otherwise the "wait" variants would always degenerate. + * + * Try setting CR0_NE to get correct error reporting on 486DX's. + * Setting it should fail or do nothing on lesser processors. + */ + load_cr0(rcr0() | CR0_MP | CR0_NE); + /* + * But don't trap while we're probing. + */ + stop_emulating(); + /* + * Finish resetting the coprocessor, if any. If there is an error + * pending, then we may get a bogus IRQ13, but probeintr() will handle + * it OK. Bogus halts have never been observed, but we enabled + * IRQ13 and cleared the BUSY# latch early to handle them anyway. + */ + fninit(); + +#ifdef SMP + /* + * Exception 16 MUST work for SMP. + */ + npx_irq13 = 0; + npx_ex16 = hw_float = npx_exists = 1; + device_set_desc(dev, "math processor"); + return (0); + +#else /* !SMP */ + device_set_desc(dev, "math processor"); + + /* + * Don't use fwait here because it might hang. + * Don't use fnop here because it usually hangs if there is no FPU. + */ + DELAY(1000); /* wait for any IRQ13 */ +#ifdef DIAGNOSTIC + if (npx_intrs_while_probing != 0) + kprintf("fninit caused %u bogus npx interrupt(s)\n", + npx_intrs_while_probing); + if (npx_traps_while_probing != 0) + kprintf("fninit caused %u bogus npx trap(s)\n", + npx_traps_while_probing); +#endif + /* + * Check for a status of mostly zero. + */ + status = 0x5a5a; + fnstsw(&status); + if ((status & 0xb8ff) == 0) { + /* + * Good, now check for a proper control word. + */ + control = 0x5a5a; + fnstcw(&control); + if ((control & 0x1f3f) == 0x033f) { + hw_float = npx_exists = 1; + /* + * We have an npx, now divide by 0 to see if exception + * 16 works. + */ + control &= ~(1 << 2); /* enable divide by 0 trap */ + fldcw(&control); + npx_traps_while_probing = npx_intrs_while_probing = 0; + fp_divide_by_0(); + if (npx_traps_while_probing != 0) { + /* + * Good, exception 16 works. + */ + npx_ex16 = 1; + return (0); + } + if (npx_intrs_while_probing != 0) { + int rid; + struct resource *r; + void *intr; + /* + * Bad, we are stuck with IRQ13. + */ + npx_irq13 = 1; + /* + * npxattach would be too late to set npx0_imask + */ + npx0_imask |= (1 << npx_irq); + + /* + * We allocate these resources permanently, + * so there is no need to keep track of them. + */ + rid = 0; + r = bus_alloc_resource(dev, SYS_RES_IOPORT, + &rid, IO_NPX, IO_NPX, + IO_NPXSIZE, RF_ACTIVE); + if (r == 0) + panic("npx: can't get ports"); + rid = 0; + r = bus_alloc_resource(dev, SYS_RES_IRQ, + &rid, npx_irq, npx_irq, + 1, RF_ACTIVE); + if (r == 0) + panic("npx: can't get IRQ"); + BUS_SETUP_INTR(device_get_parent(dev), + dev, r, 0, + npx_intr, 0, &intr, NULL); + if (intr == 0) + panic("npx: can't create intr"); + + return (0); + } + /* + * Worse, even IRQ13 is broken. Use emulator. + */ + } + } + /* + * Probe failed, but we want to get to npxattach to initialize the + * emulator and say that it has been installed. XXX handle devices + * that aren't really devices better. + */ + return (0); +#endif /* SMP */ +} + /* * Attach routine - announce which it is, and wire into system */ int npx_attach(device_t dev) { + int flags; + + if (resource_int_value("npx", 0, "flags", &flags) != 0) + flags = 0; + + if (flags) + device_printf(dev, "flags 0x%x ", flags); + if (npx_irq13) { + device_printf(dev, "using IRQ 13 interface\n"); + } else { +#if defined(MATH_EMULATE) + if (npx_ex16) { + if (!(flags & NPX_PREFER_EMULATOR)) + device_printf(dev, "INT 16 interface\n"); + else { + device_printf(dev, "FPU exists, but flags request " + "emulator\n"); + hw_float = npx_exists = 0; + } + } else if (npx_exists) { + device_printf(dev, "error reporting broken; using 387 emulator\n"); + hw_float = npx_exists = 0; + } else + device_printf(dev, "387 emulator\n"); +#else + if (npx_ex16) { + device_printf(dev, "INT 16 interface\n"); + if (flags & NPX_PREFER_EMULATOR) { + device_printf(dev, "emulator requested, but none compiled " + "into kernel, using FPU\n"); + } + } else + device_printf(dev, "no 387 emulator in kernel and no FPU!\n"); +#endif + } npxinit(__INITIAL_NPXCW__); - return (0); + +#if (defined(I586_CPU) || defined(I686_CPU)) && !defined(CPU_DISABLE_SSE) + /* + * The asm_mmx_*() routines actually use XMM as well, so only + * enable them if we have SSE2 and are using FXSR (fxsave/fxrstore). + */ + TUNABLE_INT_FETCH("kern.mmxopt", &mmxopt); + if ((cpu_feature & CPUID_MMX) && (cpu_feature & CPUID_SSE) && + (cpu_feature & CPUID_SSE2) && + npx_ex16 && npx_exists && mmxopt && cpu_fxsr + ) { + if ((flags & NPX_DISABLE_I586_OPTIMIZED_BCOPY) == 0) { + bcopy_vector = (void **)asm_xmm_bcopy; + ovbcopy_vector = (void **)asm_xmm_bcopy; + memcpy_vector = (void **)asm_xmm_memcpy; + kprintf("Using XMM optimized bcopy/copyin/copyout\n"); + } + if ((flags & NPX_DISABLE_I586_OPTIMIZED_BZERO) == 0) { + /* XXX */ + } + } else if ((cpu_feature & CPUID_MMX) && (cpu_feature & CPUID_SSE) && + npx_ex16 && npx_exists && mmxopt && cpu_fxsr + ) { + if ((flags & NPX_DISABLE_I586_OPTIMIZED_BCOPY) == 0) { + bcopy_vector = (void **)asm_mmx_bcopy; + ovbcopy_vector = (void **)asm_mmx_bcopy; + memcpy_vector = (void **)asm_mmx_memcpy; + kprintf("Using MMX optimized bcopy/copyin/copyout\n"); + } + if ((flags & NPX_DISABLE_I586_OPTIMIZED_BZERO) == 0) { + /* XXX */ + } + } +#endif +#if 0 + if (cpu_class == CPUCLASS_586 && npx_ex16 && npx_exists && + timezero("i586_bzero()", i586_bzero) < + timezero("bzero()", bzero) * 4 / 5) { + if (!(flags & NPX_DISABLE_I586_OPTIMIZED_BCOPY)) { + bcopy_vector = i586_bcopy; + ovbcopy_vector = i586_bcopy; + } + if (!(flags & NPX_DISABLE_I586_OPTIMIZED_BZERO)) + bzero_vector = i586_bzero; + if (!(flags & NPX_DISABLE_I586_OPTIMIZED_COPYIO)) { + copyin_vector = i586_copyin; + copyout_vector = i586_copyout; + } + } +#endif + return (0); /* XXX unused */ } /* @@ -129,6 +518,8 @@ npxinit(u_short control) { static union savefpu dummy __aligned(16); + if (!npx_exists) + return; /* * fninit has the same h/w bugs as fnsave. Use the detoxified * fnsave to throw away any junk in the fpu. npxsave() initializes @@ -136,11 +527,11 @@ npxinit(u_short control) */ npxsave(&dummy); crit_enter(); - /*stop_emulating();*/ + stop_emulating(); fldcw(&control); fpusave(curthread->td_savefpu); mdcpu->gd_npxthread = NULL; - /*start_emulating();*/ + start_emulating(); crit_exit(); } @@ -152,6 +543,23 @@ npxexit(void) { if (curthread == mdcpu->gd_npxthread) npxsave(curthread->td_savefpu); +#ifdef NPX_DEBUG + if (npx_exists) { + u_int masked_exceptions; + + masked_exceptions = + curthread->td_savefpu->sv_87.sv_env.en_cw + & curthread->td_savefpu->sv_87.sv_env.en_sw & 0x7f; + /* + * Log exceptions that would have trapped with the old + * control word (overflow, divide by 0, and invalid operand). + */ + if (masked_exceptions & 0x0d) + log(LOG_ERR, + "pid %d (%s) exited with masked floating point exceptions 0x%02x\n", + curproc->p_pid, curproc->p_comm, masked_exceptions); + } +#endif } /* @@ -325,8 +733,6 @@ static char fpetable[128] = { FPE_FLTSUB, /* 7F - INV | DNML | DZ | OFL | UFL | IMP | STK */ }; -#if 0 - /* * Preserve the FP status word, clear FP exceptions, then generate a SIGFPE. * @@ -366,26 +772,22 @@ npx_intr(void *dummy) * before we entered our critical section. If that occured, the * TS bit will be set and npxthread will be NULL. */ - panic("npx_intr: not coded"); - /* XXX FP STATE FLAG MUST BE PART OF CONTEXT SUPPLIED BY REAL KERNEL */ -#if 0 - if (rcr0() & CR0_TS) { + if (npx_exists && (rcr0() & CR0_TS)) { KASSERT(mdcpu->gd_npxthread == NULL, ("gd_npxthread was %p with TS set!", mdcpu->gd_npxthread)); npxdna(); crit_exit(); return; } -#endif - if (mdcpu->gd_npxthread == NULL) { + if (mdcpu->gd_npxthread == NULL || !npx_exists) { get_mplock(); - kprintf("npxintr: npxthread = %p, curthread = %p\n", - mdcpu->gd_npxthread, curthread); + kprintf("npxintr: npxthread = %p, curthread = %p, npx_exists = %d\n", + mdcpu->gd_npxthread, curthread, npx_exists); panic("npxintr from nowhere"); } if (mdcpu->gd_npxthread != curthread) { get_mplock(); - kprintf("npxintr: npxthread = %p, curthread = %p\n", - mdcpu->gd_npxthread, curthread); + kprintf("npxintr: npxthread = %p, curthread = %p, npx_exists = %d\n", + mdcpu->gd_npxthread, curthread, npx_exists); panic("npxintr from non-current process"); } @@ -401,7 +803,7 @@ npx_intr(void *dummy) * Pass exception to process. */ frame = (struct intrframe *)&dummy; /* XXX */ - if ((ISPL(frame->if_cs) == SEL_UPL) /*||(frame->if_eflags&PSL_VM)*/) { + if ((ISPL(frame->if_cs) == SEL_UPL) || (frame->if_eflags & PSL_VM)) { /* * Interrupt is essentially a trap, so we can afford to call * the SIGFPE handler (if any) as soon as the interrupt @@ -442,8 +844,6 @@ npx_intr(void *dummy) crit_exit(); } -#endif - /* * Implement the device not available (DNA) exception. gd_npxthread had * better be NULL. Restore the current thread's FP state and set gd_npxthread @@ -453,13 +853,17 @@ npx_intr(void *dummy) * section to stabilize the FP state. */ int -npxdna(struct trapframe *frame) +npxdna(void) { + thread_t td = curthread; u_long *exstat; + int didinit = 0; + if (!npx_exists) + return (0); if (mdcpu->gd_npxthread != NULL) { kprintf("npxdna: npxthread = %p, curthread = %p\n", - mdcpu->gd_npxthread, curthread); + mdcpu->gd_npxthread, td); panic("npxdna"); } @@ -468,9 +872,10 @@ npxdna(struct trapframe *frame) * used the FP unit. This also occurs when a thread pushes a * signal handler and uses FP in the handler. */ - if ((curthread->td_flags & TDF_USINGFP) == 0) { - curthread->td_flags |= TDF_USINGFP; + if ((td->td_flags & (TDF_USINGFP | TDF_KERNELFP)) == 0) { + td->td_flags |= TDF_USINGFP; npxinit(__INITIAL_NPXCW__); + didinit = 1; } /* @@ -481,12 +886,12 @@ npxdna(struct trapframe *frame) * fpstate. */ crit_enter(); - /*stop_emulating();*/ + stop_emulating(); /* * Record new context early in case frstor causes an IRQ13. */ - mdcpu->gd_npxthread = curthread; - exstat = GET_FPU_EXSW_PTR(curthread); + mdcpu->gd_npxthread = td; + exstat = GET_FPU_EXSW_PTR(td); *exstat = 0; /* * The following frstor may cause an IRQ13 when the state being @@ -500,7 +905,18 @@ npxdna(struct trapframe *frame) * fnsave are broken, so our treatment breaks fnclex if it is the * first FPU instruction after a context switch. */ - fpurstor(curthread->td_savefpu); + if ((td->td_savefpu->sv_xmm.sv_env.en_mxcsr & ~0xFFBF) +#ifndef CPU_DISABLE_SSE + && cpu_fxsr +#endif + ) { + krateprintf(&badfprate, + "FXRSTR: illegal FP MXCSR %08x didinit = %d\n", + td->td_savefpu->sv_xmm.sv_env.en_mxcsr, didinit); + td->td_savefpu->sv_xmm.sv_env.en_mxcsr &= 0xFFBF; + lwpsignal(curproc, curthread->td_lwp, SIGFPE); + } + fpurstor(td->td_savefpu); crit_exit(); return (1); @@ -529,21 +945,61 @@ npxdna(struct trapframe *frame) void npxsave(union savefpu *addr) { +#if defined(SMP) || !defined(CPU_DISABLE_SSE) + crit_enter(); - /*stop_emulating();*/ + stop_emulating(); fpusave(addr); mdcpu->gd_npxthread = NULL; fninit(); - /*start_emulating();*/ + start_emulating(); crit_exit(); + +#else /* !SMP and CPU_DISABLE_SSE */ + + u_char icu1_mask; + u_char icu2_mask; + u_char old_icu1_mask; + u_char old_icu2_mask; + struct gate_descriptor save_idt_npxintr; + u_long save_eflags; + + save_eflags = read_eflags(); + cpu_disable_intr(); + old_icu1_mask = inb(IO_ICU1 + 1); + old_icu2_mask = inb(IO_ICU2 + 1); + save_idt_npxintr = idt[npx_intrno]; + outb(IO_ICU1 + 1, old_icu1_mask & ~((1 << ICU_IRQ_SLAVE) | npx0_imask)); + outb(IO_ICU2 + 1, old_icu2_mask & ~(npx0_imask >> 8)); + idt[npx_intrno] = npx_idt_probeintr; + cpu_enable_intr(); + stop_emulating(); + fnsave(addr); + fnop(); + cpu_disable_intr(); + mdcpu->gd_npxthread = NULL; + start_emulating(); + icu1_mask = inb(IO_ICU1 + 1); /* masks may have changed */ + icu2_mask = inb(IO_ICU2 + 1); + outb(IO_ICU1 + 1, + (icu1_mask & ~npx0_imask) | (old_icu1_mask & npx0_imask)); + outb(IO_ICU2 + 1, + (icu2_mask & ~(npx0_imask >> 8)) + | (old_icu2_mask & (npx0_imask >> 8))); + idt[npx_intrno] = save_idt_npxintr; + write_eflags(save_eflags); /* back to usual state */ + +#endif /* SMP */ } static void fpusave(union savefpu *addr) { +#ifndef CPU_DISABLE_SSE if (cpu_fxsr) fxsave(addr); else +#endif fnsave(addr); } @@ -558,6 +1014,8 @@ npxpush(mcontext_t *mctx) { thread_t td = curthread; + KKASSERT((td->td_flags & TDF_KERNELFP) == 0); + if (td->td_flags & TDF_USINGFP) { if (mdcpu->gd_npxthread == td) { /* @@ -572,8 +1030,14 @@ npxpush(mcontext_t *mctx) } bcopy(td->td_savefpu, mctx->mc_fpregs, sizeof(mctx->mc_fpregs)); td->td_flags &= ~TDF_USINGFP; + mctx->mc_fpformat = +#ifndef CPU_DISABLE_SSE + (cpu_fxsr) ? _MC_FPFMT_XMM : +#endif + _MC_FPFMT_387; } else { mctx->mc_ownedfp = _MC_FPOWNED_NONE; + mctx->mc_fpformat = _MC_FPFMT_NODEV; } } @@ -585,6 +1049,8 @@ npxpop(mcontext_t *mctx) { thread_t td = curthread; + KKASSERT((td->td_flags & TDF_KERNELFP) == 0); + switch(mctx->mc_ownedfp) { case _MC_FPOWNED_NONE: /* @@ -610,16 +1076,31 @@ npxpop(mcontext_t *mctx) * XXX: This is bit inefficient, if the code being returned * to is actively using the FP this results in multiple * kernel faults. + * + * WARNING: The saved state was exposed to userland and may + * have to be sanitized to avoid a GP fault in the kernel. */ if (td == mdcpu->gd_npxthread) npxsave(td->td_savefpu); bcopy(mctx->mc_fpregs, td->td_savefpu, sizeof(*td->td_savefpu)); + if ((td->td_savefpu->sv_xmm.sv_env.en_mxcsr & ~0xFFBF) +#ifndef CPU_DISABLE_SSE + && cpu_fxsr +#endif + ) { + krateprintf(&badfprate, + "pid %d (%s) signal return from user: " + "illegal FP MXCSR %08x\n", + td->td_proc->p_pid, + td->td_proc->p_comm, + td->td_savefpu->sv_xmm.sv_env.en_mxcsr); + td->td_savefpu->sv_xmm.sv_env.en_mxcsr &= 0xFFBF; + } td->td_flags |= TDF_USINGFP; break; } } - #ifndef CPU_DISABLE_SSE /* * On AuthenticAMD processors, the fxrstor instruction does not restore @@ -668,3 +1149,34 @@ fpurstor(union savefpu *addr) #endif } +/* + * Because npx is a static device that always exists under nexus, + * and is not scanned by the nexus device, we need an identify + * function to install the device. + */ +static device_method_t npx_methods[] = { + /* Device interface */ + DEVMETHOD(device_identify, bus_generic_identify), + DEVMETHOD(device_probe, npx_probe), + DEVMETHOD(device_attach, npx_attach), + DEVMETHOD(device_detach, bus_generic_detach), + DEVMETHOD(device_shutdown, bus_generic_shutdown), + DEVMETHOD(device_suspend, bus_generic_suspend), + DEVMETHOD(device_resume, bus_generic_resume), + + { 0, 0 } +}; + +static driver_t npx_driver = { + "npx", + npx_methods, + 1, /* no softc */ +}; + +static devclass_t npx_devclass; + +/* + * We prefer to attach to the root nexus so that the usual case (exception 16) + * doesn't describe the processor as being `on isa'. + */ +DRIVER_MODULE(npx, nexus, npx_driver, npx_devclass, 0, 0); diff --git a/sys/platform/pc64/isa/pmtimer.c b/sys/platform/pc64/isa/pmtimer.c new file mode 100644 index 0000000000..f023078d4d --- /dev/null +++ b/sys/platform/pc64/isa/pmtimer.c @@ -0,0 +1,125 @@ +/*- + * Copyright (c) 2000 Mitsuru IWASAKI + * Copyright (c) 2008 The DragonFly Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $DragonFly: src/sys/platform/pc64/isa/pmtimer.c,v 1.1 2008/08/29 17:07:20 dillon Exp $ + */ + +#include + +/* + * Timer device driver for power management events. + * The code for suspend/resume is derived from APM device driver. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include + +static devclass_t pmtimer_devclass; + +/* reject any PnP devices for now */ +static struct isa_pnp_id pmtimer_ids[] = { + {0} +}; + +static int +pmtimer_probe(device_t dev) +{ + + if (ISA_PNP_PROBE(device_get_parent(dev), dev, pmtimer_ids) == ENXIO) { + return (ENXIO); + } + + /* only one instance always */ + return (device_get_unit(dev)); +} + +static struct timeval suspend_time; +static struct timeval diff_time; + +static int +pmtimer_suspend(device_t dev) +{ + crit_enter(); + microtime(&diff_time); + inittodr(0); + microtime(&suspend_time); + timevalsub(&diff_time, &suspend_time); + crit_exit(); + return (0); +} + +static int +pmtimer_resume(device_t dev) +{ + u_int second, minute, hour; + struct timeval resume_time; + + /* modified for adjkerntz */ + crit_enter(); + timer_restore(); /* restore the all timers */ + inittodr(0); /* adjust time to RTC */ + microtime(&resume_time); + + crit_exit(); + second = resume_time.tv_sec - suspend_time.tv_sec; + hour = second / 3600; + second %= 3600; + minute = second / 60; + second %= 60; + log(LOG_NOTICE, "wakeup from sleeping state (slept %02d:%02d:%02d)\n", + hour, minute, second); + return (0); +} + +/* + * Because pmtimer is a static device that always exists under any attached + * isa device, and not scanned by the isa device, we need an identify + * function to install the device. + */ +static device_method_t pmtimer_methods[] = { + /* Device interface */ + DEVMETHOD(device_identify, bus_generic_identify), + DEVMETHOD(device_probe, pmtimer_probe), + DEVMETHOD(device_attach, bus_generic_attach), + DEVMETHOD(device_suspend, pmtimer_suspend), + DEVMETHOD(device_resume, pmtimer_resume), + { 0, 0 } +}; + +static driver_t pmtimer_driver = { + "pmtimer", + pmtimer_methods, + 1, /* no softc */ +}; + +DRIVER_MODULE(pmtimer, isa, pmtimer_driver, pmtimer_devclass, 0, 0); diff --git a/sys/platform/pc64/isa/prof_machdep.c b/sys/platform/pc64/isa/prof_machdep.c new file mode 100644 index 0000000000..148cdb99dc --- /dev/null +++ b/sys/platform/pc64/isa/prof_machdep.c @@ -0,0 +1,362 @@ +/*- + * Copyright (c) 1996 Bruce D. Evans. + * Copyright (c) 2008 The DragonFly Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/sys/i386/isa/prof_machdep.c,v 1.14.2.1 2000/08/03 00:09:30 ps Exp $ + * $DragonFly: src/sys/platform/pc64/isa/prof_machdep.c,v 1.1 2008/08/29 17:07:20 dillon Exp $ + */ + +#ifdef GUPROF +#include "opt_i586_guprof.h" +#include "opt_perfmon.h" + +#include +#include +#include +#include +#include + +#include +#include +#include +#undef MCOUNT +#endif + +#include + +#include +#include + +#ifdef GUPROF +#define CPUTIME_CLOCK_UNINITIALIZED 0 +#define CPUTIME_CLOCK_I8254 1 +#define CPUTIME_CLOCK_TSC 2 +#define CPUTIME_CLOCK_I586_PMC 3 +#define CPUTIME_CLOCK_I8254_SHIFT 7 + +int cputime_bias = 1; /* initialize for locality of reference */ + +static int cputime_clock = CPUTIME_CLOCK_UNINITIALIZED; +#ifdef I586_PMC_GUPROF +static u_int cputime_clock_pmc_conf = I586_PMC_GUPROF; +static int cputime_clock_pmc_init; +static struct gmonparam saved_gmp; +#endif +#endif /* GUPROF */ + +#ifdef __GNUC__ +__asm(" \n\ +GM_STATE = 0 \n\ +GMON_PROF_OFF = 3 \n\ + \n\ + .text \n\ + .p2align 4,0x90 \n\ + .globl __mcount \n\ + .type __mcount,@function \n\ +__mcount: \n\ + # \n\ + # Check that we are profiling. Do it early for speed. \n\ + # \n\ + cmpl $GMON_PROF_OFF," __XSTRING(CNAME(_gmonparam)) "+GM_STATE \n\ + je .mcount_exit \n\ + # \n\ + # __mcount is the same as [.]mcount except the caller \n\ + # hasn't changed the stack except to call here, so the \n\ + # caller's raddr is above our raddr. \n\ + # \n\ + pushl %eax \n\ + pushl %ecx \n\ + pushl %edx \n\ + movl 12+4(%esp),%edx \n\ + jmp .got_frompc \n\ + \n\ + .p2align 4,0x90 \n\ + .globl " __XSTRING(HIDENAME(mcount)) " \n\ +" __XSTRING(HIDENAME(mcount)) ": \n\ + cmpl $GMON_PROF_OFF," __XSTRING(CNAME(_gmonparam)) "+GM_STATE \n\ + je .mcount_exit \n\ + # \n\ + # The caller's stack frame has already been built, so \n\ + # %ebp is the caller's frame pointer. The caller's \n\ + # raddr is in the caller's frame following the caller's \n\ + # caller's frame pointer. \n\ + # \n\ + pushl %eax \n\ + pushl %ecx \n\ + pushl %edx \n\ + movl 4(%ebp),%edx \n\ +.got_frompc: \n\ + # \n\ + # Our raddr is the caller's pc. \n\ + # \n\ + movl (%esp),%eax \n\ + \n\ + pushfl \n\ + pushl %eax \n\ + pushl %edx \n\ + cli \n\ + call " __XSTRING(CNAME(mcount)) " \n\ + addl $8,%esp \n\ + popfl \n\ + popl %edx \n\ + popl %ecx \n\ + popl %eax \n\ +.mcount_exit: \n\ + ret \n\ +"); +#else /* !__GNUC__ */ +#error +#endif /* __GNUC__ */ + +#ifdef GUPROF +/* + * [.]mexitcount saves the return register(s), loads selfpc and calls + * mexitcount(selfpc) to do the work. Someday it should be in a machine + * dependent file together with cputime(), __mcount and [.]mcount. cputime() + * can't just be put in machdep.c because it has to be compiled without -pg. + */ +#ifdef __GNUC__ +__asm(" \n\ + .text \n\ +# \n\ +# Dummy label to be seen when gprof -u hides [.]mexitcount. \n\ +# \n\ + .p2align 4,0x90 \n\ + .globl __mexitcount \n\ + .type __mexitcount,@function \n\ +__mexitcount: \n\ + nop \n\ + \n\ +GMON_PROF_HIRES = 4 \n\ + \n\ + .p2align 4,0x90 \n\ + .globl " __XSTRING(HIDENAME(mexitcount)) " \n\ +" __XSTRING(HIDENAME(mexitcount)) ": \n\ + cmpl $GMON_PROF_HIRES," __XSTRING(CNAME(_gmonparam)) "+GM_STATE \n\ + jne .mexitcount_exit \n\ + pushl %edx \n\ + pushl %ecx \n\ + pushl %eax \n\ + movl 12(%esp),%eax \n\ + pushfl \n\ + pushl %eax \n\ + cli \n\ + call " __XSTRING(CNAME(mexitcount)) " \n\ + addl $4,%esp \n\ + popfl \n\ + popl %eax \n\ + popl %ecx \n\ + popl %edx \n\ +.mexitcount_exit: \n\ + ret \n\ +"); +#else /* !__GNUC__ */ +#error +#endif /* __GNUC__ */ + +/* + * Return the time elapsed since the last call. The units are machine- + * dependent. + */ +int +cputime(void) +{ + u_int count; + int delta; +#if (defined(I586_CPU) || defined(I686_CPU)) && !defined(SMP) && \ + defined(PERFMON) && defined(I586_PMC_GUPROF) + u_quad_t event_count; +#endif + u_char high, low; + static u_int prev_count; + +#if (defined(I586_CPU) || defined(I686_CPU)) && !defined(SMP) + if (cputime_clock == CPUTIME_CLOCK_TSC) { + count = (u_int)rdtsc(); + delta = (int)(count - prev_count); + prev_count = count; + return (delta); + } +#if defined(PERFMON) && defined(I586_PMC_GUPROF) + if (cputime_clock == CPUTIME_CLOCK_I586_PMC) { + /* + * XXX permon_read() should be inlined so that the + * perfmon module doesn't need to be compiled with + * profiling disabled and so that it is fast. + */ + perfmon_read(0, &event_count); + + count = (u_int)event_count; + delta = (int)(count - prev_count); + prev_count = count; + return (delta); + } +#endif /* PERFMON && I586_PMC_GUPROF */ +#endif /* (I586_CPU || I686_CPU) && !SMP */ + + /* + * Read the current value of the 8254 timer counter 0. + */ + outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH); + low = inb(TIMER_CNTR0); + high = inb(TIMER_CNTR0); + count = ((high << 8) | low) << CPUTIME_CLOCK_I8254_SHIFT; + + /* + * The timer counts down from TIMER_CNTR0_MAX to 0 and then resets. + * While profiling is enabled, this routine is called at least twice + * per timer reset (for mcounting and mexitcounting hardclock()), + * so at most one reset has occurred since the last call, and one + * has occurred iff the current count is larger than the previous + * count. This allows counter underflow to be detected faster + * than in microtime(). + */ + delta = prev_count - count; + prev_count = count; + if ((int) delta <= 0) + return (delta + (timer0_max_count << CPUTIME_CLOCK_I8254_SHIFT)); + return (delta); +} + +static int +sysctl_machdep_cputime_clock(SYSCTL_HANDLER_ARGS) +{ + int clock; + int error; +#if defined(PERFMON) && defined(I586_PMC_GUPROF) + int event; + struct pmc pmc; +#endif + + clock = cputime_clock; +#if defined(PERFMON) && defined(I586_PMC_GUPROF) + if (clock == CPUTIME_CLOCK_I586_PMC) { + pmc.pmc_val = cputime_clock_pmc_conf; + clock += pmc.pmc_event; + } +#endif + error = sysctl_handle_opaque(oidp, &clock, sizeof clock, req); + if (error == 0 && req->newptr != NULL) { +#if defined(PERFMON) && defined(I586_PMC_GUPROF) + if (clock >= CPUTIME_CLOCK_I586_PMC) { + event = clock - CPUTIME_CLOCK_I586_PMC; + if (event >= 256) + return (EINVAL); + pmc.pmc_num = 0; + pmc.pmc_event = event; + pmc.pmc_unit = 0; + pmc.pmc_flags = PMCF_E | PMCF_OS | PMCF_USR; + pmc.pmc_mask = 0; + cputime_clock_pmc_conf = pmc.pmc_val; + cputime_clock = CPUTIME_CLOCK_I586_PMC; + } else +#endif + { + if (clock < 0 || clock >= CPUTIME_CLOCK_I586_PMC) + return (EINVAL); + cputime_clock = clock; + } + } + return (error); +} + +SYSCTL_PROC(_machdep, OID_AUTO, cputime_clock, CTLTYPE_INT | CTLFLAG_RW, + 0, sizeof(u_int), sysctl_machdep_cputime_clock, "I", ""); + +/* + * The start and stop routines need not be here since we turn off profiling + * before calling them. They are here for convenience. + */ + +void +startguprof(struct gmonparam *gp) +{ + if (cputime_clock == CPUTIME_CLOCK_UNINITIALIZED) { + cputime_clock = CPUTIME_CLOCK_I8254; +#if (defined(I586_CPU) || defined(I686_CPU)) && !defined(SMP) + if (tsc_frequency != 0) + cputime_clock = CPUTIME_CLOCK_TSC; +#endif + } + gp->profrate = timer_freq << CPUTIME_CLOCK_I8254_SHIFT; +#if (defined(I586_CPU) || defined(I686_CPU)) && !defined(SMP) + if (cputime_clock == CPUTIME_CLOCK_TSC) + gp->profrate = (u_int)tsc_frequency; /* XXX */ +#if defined(PERFMON) && defined(I586_PMC_GUPROF) + else if (cputime_clock == CPUTIME_CLOCK_I586_PMC) { + if (perfmon_avail() && + perfmon_setup(0, cputime_clock_pmc_conf) == 0) { + if (perfmon_start(0) != 0) + perfmon_fini(0); + else { + /* XXX 1 event == 1 us. */ + gp->profrate = 1000000; + + saved_gmp = *gp; + + /* Zap overheads. They are invalid. */ + gp->cputime_overhead = 0; + gp->mcount_overhead = 0; + gp->mcount_post_overhead = 0; + gp->mcount_pre_overhead = 0; + gp->mexitcount_overhead = 0; + gp->mexitcount_post_overhead = 0; + gp->mexitcount_pre_overhead = 0; + + cputime_clock_pmc_init = TRUE; + } + } + } +#endif /* PERFMON && I586_PMC_GUPROF */ +#endif /* (I586_CPU || I686_CPU) && !SMP */ + cputime_bias = 0; + cputime(); +} + +void +stopguprof(struct gmonparam *gp) +{ +#if defined(PERFMON) && defined(I586_PMC_GUPROF) + if (cputime_clock_pmc_init) { + *gp = saved_gmp; + perfmon_fini(0); + cputime_clock_pmc_init = FALSE; + } +#endif +} + +#else /* !GUPROF */ +#ifdef __GNUC__ +__asm(" \n\ + .text \n\ + .p2align 4,0x90 \n\ + .globl " __XSTRING(HIDENAME(mexitcount)) " \n\ +" __XSTRING(HIDENAME(mexitcount)) ": \n\ + ret \n\ +"); +#else /* !__GNUC__ */ +#error +#endif /* __GNUC__ */ +#endif /* GUPROF */ diff --git a/sys/platform/pc64/isa/timerreg.h b/sys/platform/pc64/isa/timerreg.h new file mode 100644 index 0000000000..124e890b0c --- /dev/null +++ b/sys/platform/pc64/isa/timerreg.h @@ -0,0 +1,145 @@ +/*- + * Copyright (c) 1993 The Regents of the University of California. + * Copyright (c) 2008 The DragonFly Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: Header: timerreg.h,v 1.2 93/02/28 15:08:58 mccanne Exp + * $FreeBSD: src/sys/i386/isa/timerreg.h,v 1.6 1999/08/28 00:45:04 peter Exp $ + * $DragonFly: src/sys/platform/pc64/isa/timerreg.h,v 1.1 2008/08/29 17:07:20 dillon Exp $ + */ + +/* + * + * Register definitions for the Intel 8253 Programmable Interval Timer. + * + * This chip has three independent 16-bit down counters that can be + * read on the fly. There are three mode registers and three countdown + * registers. The countdown registers are addressed directly, via the + * first three I/O ports. The three mode registers are accessed via + * the fourth I/O port, with two bits in the mode byte indicating the + * register. (Why are hardware interfaces always so braindead?). + * + * To write a value into the countdown register, the mode register + * is first programmed with a command indicating the which byte of + * the two byte register is to be modified. The three possibilities + * are load msb (TMR_MR_MSB), load lsb (TMR_MR_LSB), or load lsb then + * msb (TMR_MR_BOTH). + * + * To read the current value ("on the fly") from the countdown register, + * you write a "latch" command into the mode register, then read the stable + * value from the corresponding I/O port. For example, you write + * TMR_MR_LATCH into the corresponding mode register. Presumably, + * after doing this, a write operation to the I/O port would result + * in undefined behavior (but hopefully not fry the chip). + * Reading in this manner has no side effects. + * + * [IBM-PC] + * The outputs of the three timers are connected as follows: + * + * timer 0 -> irq 0 + * timer 1 -> dma chan 0 (for dram refresh) + * timer 2 -> speaker (via keyboard controller) + * + * Timer 0 is used to call hardclock. + * Timer 2 is used to generate console beeps. + * + * [PC-9801] + * The outputs of the three timers are connected as follows: + * + * timer 0 -> irq 0 + * timer 1 -> speaker (via keyboard controller) + * timer 2 -> RS232C + * + * Timer 0 is used to call hardclock. + * Timer 1 is used to generate console beeps. + * + * TIMER_INTTC: Interrupt on Terminal Count. OUT initially low, + * goes high on terminal count and remains + * high until a new count or a mode 0 control + * word is written. + * + * TIMER_ONESHOT: Hardware Retriggerable One Shot. Out initially high, + * out goes low following the trigger and remains low + * until terminal count, then goes high and remains + * high until the next trigger. + * + * TIMER_RATEGEN: Rate Generator. OUT is initially high. When the + * count has decremented to 1 OUT goes low for one CLK + * pulse, then goes high again. Counter reloads and + * the sequence is repeated. + * + * TIMER_SQWAVE: Square Wave Generator. OUT is initially high. When + * half the count is expired, OUT goes low. Counter + * reloads, OUT goes high, and the sequence repepats. + * + * TIMER_SWSTROBE: S/W Triggered Strobe. OUT initially high. On + * terminal count OUT goes low for one CLK pulse + * and then goes high again. Counting stops. + * The counting sequence is 'triggered' by writing + * the initial count. Writing a control word and + * initial count resets and reloads the counter. + * + * TIMER_HWSTROBE: H/W Triggered Strobe. OUT initially high. A rising + * edge on GATE loads the counter and counting begins. + * On terminal count OUT goes low for one CLK and then + * high again. + * + * NOTE: the largest possible initial count is 0x0000. This is equivalent + * to 2^16 binary and 10^4 BCD counts. The counter does not stop when it + * reaches zero. In Modes INTTC, ONESHOT, SWSTROBE, and HWSTROBE the + * counter wraps aroudn to the highest count (0xFFFF or 9999bcd) and + * continues counting. In MODES RATEGEN and SQWAVE (which are periodic) + * the counter reloads itself with the initial count and continues counting + * from there. + */ + +/* + * Macros for specifying values to be written into a mode register. + */ +#define TIMER_CNTR0 (IO_TIMER1 + 0) /* timer 0 counter port */ +#define TIMER_CNTR1 (IO_TIMER1 + 1) /* timer 1 counter port */ +#define TIMER_CNTR2 (IO_TIMER1 + 2) /* timer 2 counter port */ +#define TIMER_MODE (IO_TIMER1 + 3) /* timer mode port */ +#define TIMER_SEL0 0x00 /* select counter 0 */ +#define TIMER_SEL1 0x40 /* select counter 1 */ +#define TIMER_SEL2 0x80 /* select counter 2 */ +#define TIMER_INTTC 0x00 /* mode 0, intr on terminal cnt */ +#define TIMER_ONESHOT 0x02 /* mode 1, one shot */ +#define TIMER_RATEGEN 0x04 /* mode 2, rate generator */ +#define TIMER_SQWAVE 0x06 /* mode 3, square wave */ +#define TIMER_SWSTROBE 0x08 /* mode 4, s/w triggered strobe */ +#define TIMER_HWSTROBE 0x0a /* mode 5, h/w triggered strobe */ +#define TIMER_LATCH 0x00 /* latch counter for reading */ +#define TIMER_LSB 0x10 /* r/w counter LSB */ +#define TIMER_MSB 0x20 /* r/w counter MSB */ +#define TIMER_16BIT 0x30 /* r/w counter 16 bits, LSB first */ +#define TIMER_BCD 0x01 /* count in BCD */ + diff --git a/sys/sys/tls.h b/sys/sys/tls.h index 690f962aba..d7e0fb0c13 100644 --- a/sys/sys/tls.h +++ b/sys/sys/tls.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2005 The DragonFly Project. All rights reserved. + * Copyright (c) 2005,2008 The DragonFly Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -28,7 +28,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/sys/tls.h,v 1.9 2008/08/25 23:34:35 dillon Exp $ + * $DragonFly: src/sys/sys/tls.h,v 1.10 2008/08/29 17:07:04 dillon Exp $ */ #ifndef _SYS_TLS_H_ @@ -38,7 +38,7 @@ struct tls_info { void *base; - long size; + size_t size; }; int set_tls_area(int which, struct tls_info *info, size_t infosize); -- 2.41.0