From 46d4e165ab29c2d6bed416c38b6a5277d3bf0769 Mon Sep 17 00:00:00 2001 From: Jordan Gordeev Date: Wed, 1 Jul 2009 22:32:11 +0300 Subject: [PATCH] amd64: Big batch of SMP changes. --- sys/cpu/amd64/include/atomic.h | 459 ++-- sys/cpu/amd64/include/cpu.h | 73 +- sys/cpu/amd64/include/cpufunc.h | 4 + sys/kern/lwkt_ipiq.c | 24 + sys/platform/pc64/amd64/exception.S | 2 +- sys/platform/pc64/amd64/genassym.c | 15 + sys/platform/pc64/amd64/identcpu.c | 2 +- sys/platform/pc64/amd64/ipl.s | 27 +- sys/platform/pc64/amd64/machdep.c | 3 +- sys/platform/pc64/amd64/mp.c | 457 ---- sys/platform/pc64/amd64/mp_machdep.c | 2651 ++++++++++++++++++++++ sys/platform/pc64/amd64/mpboot.S | 236 ++ sys/platform/pc64/amd64/mplock.s | 52 +- sys/platform/pc64/amd64/support.s | 11 +- sys/platform/pc64/amd64/trap.c | 25 +- sys/platform/pc64/apic/apic_abi.c | 25 +- sys/platform/pc64/apic/apic_ipl.s | 65 +- sys/platform/pc64/apic/apic_vector.s | 282 ++- sys/platform/pc64/apic/apicreg.h | 1 + sys/platform/pc64/apic/mpapic.c | 433 +++- sys/platform/pc64/conf/files | 9 +- sys/platform/pc64/conf/options | 3 + sys/platform/pc64/include/clock.h | 30 +- sys/platform/pc64/include/globaldata.h | 7 +- sys/platform/pc64/include/intr_machdep.h | 82 - sys/platform/pc64/include/lock.h | 68 +- sys/platform/pc64/include/smp.h | 158 +- sys/platform/pc64/isa/intr_machdep.h | 4 + sys/platform/pc64/isa/npx.c | 4 +- 29 files changed, 3945 insertions(+), 1267 deletions(-) delete mode 100644 sys/platform/pc64/amd64/mp.c create mode 100644 sys/platform/pc64/amd64/mp_machdep.c create mode 100644 sys/platform/pc64/amd64/mpboot.S delete mode 100644 sys/platform/pc64/include/intr_machdep.h diff --git a/sys/cpu/amd64/include/atomic.h b/sys/cpu/amd64/include/atomic.h index 0317245bac..99a0cbf4f5 100644 --- a/sys/cpu/amd64/include/atomic.h +++ b/sys/cpu/amd64/include/atomic.h @@ -1,6 +1,5 @@ /*- - * Copyright (c) 1998 Doug Rabson. - * Copyright (c) 2008 The DragonFly Project. + * Copyright (c) 1998 Doug Rabson * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -24,8 +23,8 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $FreeBSD: src/sys/amd64/include/atomic.h,v 1.32 2003/11/21 03:02:00 peter Exp $ - * $DragonFly: src/sys/cpu/amd64/include/atomic.h,v 1.3 2008/08/29 17:07:06 dillon Exp $ + * $FreeBSD: src/sys/i386/include/atomic.h,v 1.9.2.1 2000/07/07 00:38:47 obrien Exp $ + * $DragonFly: src/sys/cpu/i386/include/atomic.h,v 1.25 2008/06/26 23:06:50 dillon Exp $ */ #ifndef _CPU_ATOMIC_H_ #define _CPU_ATOMIC_H_ @@ -35,95 +34,167 @@ #endif /* - * Various simple operations on memory, each of which is atomic in the - * presence of interrupts and multiple processors. + * Various simple arithmetic on memory which is atomic in the presence + * of interrupts and multiple processors. * - * atomic_set_char(P, V) (*(u_char *)(P) |= (V)) - * atomic_clear_char(P, V) (*(u_char *)(P) &= ~(V)) - * atomic_add_char(P, V) (*(u_char *)(P) += (V)) - * atomic_subtract_char(P, V) (*(u_char *)(P) -= (V)) + * atomic_set_char(P, V) (*(u_char*)(P) |= (V)) + * atomic_clear_char(P, V) (*(u_char*)(P) &= ~(V)) + * atomic_add_char(P, V) (*(u_char*)(P) += (V)) + * atomic_subtract_char(P, V) (*(u_char*)(P) -= (V)) * - * atomic_set_short(P, V) (*(u_short *)(P) |= (V)) - * atomic_clear_short(P, V) (*(u_short *)(P) &= ~(V)) - * atomic_add_short(P, V) (*(u_short *)(P) += (V)) - * atomic_subtract_short(P, V) (*(u_short *)(P) -= (V)) + * atomic_set_short(P, V) (*(u_short*)(P) |= (V)) + * atomic_clear_short(P, V) (*(u_short*)(P) &= ~(V)) + * atomic_add_short(P, V) (*(u_short*)(P) += (V)) + * atomic_subtract_short(P, V) (*(u_short*)(P) -= (V)) * - * atomic_set_int(P, V) (*(u_int *)(P) |= (V)) - * atomic_clear_int(P, V) (*(u_int *)(P) &= ~(V)) - * atomic_add_int(P, V) (*(u_int *)(P) += (V)) - * atomic_subtract_int(P, V) (*(u_int *)(P) -= (V)) - * atomic_readandclear_int(P) (return (*(u_int *)(P)); *(u_int *)(P) = 0;) + * atomic_set_int(P, V) (*(u_int*)(P) |= (V)) + * atomic_clear_int(P, V) (*(u_int*)(P) &= ~(V)) + * atomic_add_int(P, V) (*(u_int*)(P) += (V)) + * atomic_subtract_int(P, V) (*(u_int*)(P) -= (V)) * - * atomic_set_long(P, V) (*(u_long *)(P) |= (V)) - * atomic_clear_long(P, V) (*(u_long *)(P) &= ~(V)) - * atomic_add_long(P, V) (*(u_long *)(P) += (V)) - * atomic_subtract_long(P, V) (*(u_long *)(P) -= (V)) - * atomic_readandclear_long(P) (return (*(u_long *)(P)); *(u_long *)(P) = 0;) + * atomic_set_long(P, V) (*(u_long*)(P) |= (V)) + * atomic_clear_long(P, V) (*(u_long*)(P) &= ~(V)) + * atomic_add_long(P, V) (*(u_long*)(P) += (V)) + * atomic_subtract_long(P, V) (*(u_long*)(P) -= (V)) + * atomic_readandclear_long(P) (return (*(u_long*)(P)); *(u_long*)(P) = 0;) */ /* * The above functions are expanded inline in the statically-linked * kernel. Lock prefixes are generated if an SMP kernel is being - * built. + * built, or if user code is using these functions. * * Kernel modules call real functions which are built into the kernel. * This allows kernel modules to be portable between UP and SMP systems. */ #if defined(KLD_MODULE) -#define ATOMIC_ASM(NAME, TYPE, OP, CONS, V) \ -void atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v); \ -void atomic_##NAME##_##TYPE##_nonlocked(volatile u_##TYPE *p, u_##TYPE v); - -int atomic_cmpset_int(volatile u_int *dst, u_int exp, u_int src); -int atomic_cmpset_long(volatile u_long *dst, u_long exp, u_long src); -u_int atomic_fetchadd_int(volatile u_int *p, u_int v); -u_long atomic_fetchadd_long(volatile u_long *p, u_long v); - -#define ATOMIC_STORE_LOAD(TYPE, LOP, SOP) \ -u_##TYPE atomic_load_acq_##TYPE(volatile u_##TYPE *p); \ -void atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v) - +#define ATOMIC_ASM(NAME, TYPE, OP, V) \ + extern void atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v); \ + extern void atomic_##NAME##_##TYPE##_nonlocked(volatile u_##TYPE *p, u_##TYPE v); #else /* !KLD_MODULE */ - -#ifdef __GNUC__ - -/* - * For userland, always use lock prefixes so that the binaries will run - * on both SMP and !SMP systems. - */ #if defined(SMP) || !defined(_KERNEL) -#define MPLOCKED "lock ; " +#define MPLOCKED "lock ; " #else -#define MPLOCKED +#define MPLOCKED #endif /* * The assembly is volatilized to demark potential before-and-after side - * effects if an interrupt or SMP collision were to occur. + * effects if an interrupt or SMP collision were to occur. The primary + * atomic instructions are MP safe, the nonlocked instructions are + * local-interrupt-safe (so we don't depend on C 'X |= Y' generating an + * atomic instruction). + * + * +m - memory is read and written (=m - memory is only written) + * iq - integer constant or %ax/%bx/%cx/%dx (ir = int constant or any reg) + * (Note: byte instructions only work on %ax,%bx,%cx, or %dx). iq + * is good enough for our needs so don't get fancy. */ -#define ATOMIC_ASM(NAME, TYPE, OP, CONS, V) \ + +/* egcs 1.1.2+ version */ +#define ATOMIC_ASM(NAME, TYPE, OP, V) \ static __inline void \ atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\ { \ __asm __volatile(MPLOCKED OP \ : "+m" (*p) \ - : CONS (V)); \ + : "iq" (V)); \ } \ -static __inline void \ +static __inline void \ atomic_##NAME##_##TYPE##_nonlocked(volatile u_##TYPE *p, u_##TYPE v)\ -{ \ - __asm __volatile(OP \ - : "+m" (*p) \ - : CONS (V)); \ +{ \ + __asm __volatile(OP \ + : "+m" (*p) \ + : "iq" (V)); \ +} + +#endif /* KLD_MODULE */ + +/* egcs 1.1.2+ version */ +ATOMIC_ASM(set, char, "orb %b1,%0", v) +ATOMIC_ASM(clear, char, "andb %b1,%0", ~v) +ATOMIC_ASM(add, char, "addb %b1,%0", v) +ATOMIC_ASM(subtract, char, "subb %b1,%0", v) + +ATOMIC_ASM(set, short, "orw %w1,%0", v) +ATOMIC_ASM(clear, short, "andw %w1,%0", ~v) +ATOMIC_ASM(add, short, "addw %w1,%0", v) +ATOMIC_ASM(subtract, short, "subw %w1,%0", v) + +ATOMIC_ASM(set, int, "orl %1,%0", v) +ATOMIC_ASM(clear, int, "andl %1,%0", ~v) +ATOMIC_ASM(add, int, "addl %1,%0", v) +ATOMIC_ASM(subtract, int, "subl %1,%0", v) + +ATOMIC_ASM(set, long, "orq %1,%0", v) +ATOMIC_ASM(clear, long, "andq %1,%0", ~v) +ATOMIC_ASM(add, long, "addq %1,%0", v) +ATOMIC_ASM(subtract, long, "subq %1,%0", v) + +#if defined(KLD_MODULE) +u_long atomic_readandclear_long(volatile u_long *addr); +#else /* !KLD_MODULE */ +static __inline u_long +atomic_readandclear_long(volatile u_long *addr) +{ + u_long res; + + res = 0; + __asm __volatile( + " xchgq %1,%0 ; " + "# atomic_readandclear_long" + : "+r" (res), /* 0 */ + "=m" (*addr) /* 1 */ + : "m" (*addr)); + + return (res); +} +#endif /* KLD_MODULE */ + +/* + * atomic_poll_acquire_int(P) Returns non-zero on success, 0 if the lock + * has already been acquired. + * atomic_poll_release_int(P) + * + * These support the NDIS driver and are also used for IPIQ interlocks + * between cpus. Both the acquisition and release must be + * cache-synchronizing instructions. + */ + +#if defined(KLD_MODULE) + +extern int atomic_swap_int(volatile int *addr, int value); +extern int atomic_poll_acquire_int(volatile u_int *p); +extern void atomic_poll_release_int(volatile u_int *p); + +#else + +static __inline int +atomic_swap_int(volatile int *addr, int value) +{ + __asm __volatile("xchgl %0, %1" : + "=r" (value), "=m" (*addr) : "0" (value) : "memory"); + return (value); } -#else /* !__GNUC__ */ +static __inline +int +atomic_poll_acquire_int(volatile u_int *p) +{ + u_int data; -#define ATOMIC_ASM(NAME, TYPE, OP, CONS, V) \ -extern void atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v); \ -extern void atomic_##NAME##_##TYPE##_nonlocked(volatile u_##TYPE *p, u_##TYPE v); + __asm __volatile(MPLOCKED "btsl $0,%0; setnc %%al; andl $255,%%eax" : "+m" (*p), "=a" (data)); + return(data); +} -#endif /* __GNUC__ */ +static __inline +void +atomic_poll_release_int(volatile u_int *p) +{ + __asm __volatile(MPLOCKED "btrl $0,%0" : "+m" (*p)); +} + +#endif /* * These functions operate on a 32 bit interrupt interlock which is defined @@ -177,34 +248,29 @@ extern void atomic_##NAME##_##TYPE##_nonlocked(volatile u_##TYPE *p, u_##TYPE v) * the field is ignored. */ -#ifndef __ATOMIC_INTR_T -#define __ATOMIC_INTR_T -typedef volatile int atomic_intr_t; -#endif - #if defined(KLD_MODULE) -void atomic_intr_init(atomic_intr_t *p); -int atomic_intr_handler_disable(atomic_intr_t *p); -void atomic_intr_handler_enable(atomic_intr_t *p); -int atomic_intr_handler_is_enabled(atomic_intr_t *p); -int atomic_intr_cond_test(atomic_intr_t *p); -int atomic_intr_cond_try(atomic_intr_t *p); -void atomic_intr_cond_enter(atomic_intr_t *p, void (*func)(void *), void *arg); -void atomic_intr_cond_exit(atomic_intr_t *p, void (*func)(void *), void *arg); +void atomic_intr_init(__atomic_intr_t *p); +int atomic_intr_handler_disable(__atomic_intr_t *p); +void atomic_intr_handler_enable(__atomic_intr_t *p); +int atomic_intr_handler_is_enabled(__atomic_intr_t *p); +int atomic_intr_cond_test(__atomic_intr_t *p); +int atomic_intr_cond_try(__atomic_intr_t *p); +void atomic_intr_cond_enter(__atomic_intr_t *p, void (*func)(void *), void *arg); +void atomic_intr_cond_exit(__atomic_intr_t *p, void (*func)(void *), void *arg); -#else /* !KLD_MODULE */ +#else static __inline void -atomic_intr_init(atomic_intr_t *p) +atomic_intr_init(__atomic_intr_t *p) { *p = 0; } static __inline int -atomic_intr_handler_disable(atomic_intr_t *p) +atomic_intr_handler_disable(__atomic_intr_t *p) { int data; @@ -216,14 +282,14 @@ atomic_intr_handler_disable(atomic_intr_t *p) static __inline void -atomic_intr_handler_enable(atomic_intr_t *p) +atomic_intr_handler_enable(__atomic_intr_t *p) { __asm __volatile(MPLOCKED "andl $0xBFFFFFFF,%0" : "+m" (*p)); } static __inline int -atomic_intr_handler_is_enabled(atomic_intr_t *p) +atomic_intr_handler_is_enabled(__atomic_intr_t *p) { int data; @@ -234,7 +300,7 @@ atomic_intr_handler_is_enabled(atomic_intr_t *p) static __inline void -atomic_intr_cond_enter(atomic_intr_t *p, void (*func)(void *), void *arg) +atomic_intr_cond_enter(__atomic_intr_t *p, void (*func)(void *), void *arg) { __asm __volatile(MPLOCKED "incl %0; " \ "1: ;" \ @@ -248,50 +314,13 @@ atomic_intr_cond_enter(atomic_intr_t *p, void (*func)(void *), void *arg) /* YYY the function call may clobber even more registers? */ } -/* - * Atomically add the value of v to the integer pointed to by p and return - * the previous value of *p. - */ -static __inline u_int -atomic_fetchadd_int(volatile u_int *p, u_int v) -{ - - __asm __volatile( - " " MPLOCKED " " - " xaddl %0, %1 ; " - "# atomic_fetchadd_int" - : "+r" (v), /* 0 (result) */ - "=m" (*p) /* 1 */ - : "m" (*p)); /* 2 */ - - return (v); -} - -/* - * Atomically add the value of v to the long integer pointed to by p and return - * the previous value of *p. - */ -static __inline u_long -atomic_fetchadd_long(volatile u_long *p, u_long v) -{ - - __asm __volatile( - " " MPLOCKED " " - " xaddq %0, %1 ; " - "# atomic_fetchadd_long" - : "+r" (v), /* 0 (result) */ - "=m" (*p) /* 1 */ - : "m" (*p)); /* 2 */ - - return (v); -} /* * Attempt to enter the interrupt condition variable. Returns zero on * success, 1 on failure. */ static __inline int -atomic_intr_cond_try(atomic_intr_t *p) +atomic_intr_cond_try(__atomic_intr_t *p) { int ret; @@ -306,7 +335,7 @@ atomic_intr_cond_try(atomic_intr_t *p) #ifdef __clang__ : : "ax", "cx", "dx"); #else - : : "cx", "dx"); + : : "cx", "dx"); #endif return (ret); } @@ -314,14 +343,14 @@ atomic_intr_cond_try(atomic_intr_t *p) static __inline int -atomic_intr_cond_test(atomic_intr_t *p) +atomic_intr_cond_test(__atomic_intr_t *p) { return((int)(*p & 0x80000000)); } static __inline void -atomic_intr_cond_exit(atomic_intr_t *p, void (*func)(void *), void *arg) +atomic_intr_cond_exit(__atomic_intr_t *p, void (*func)(void *), void *arg) { __asm __volatile(MPLOCKED "decl %0; " \ MPLOCKED "btrl $31,%0; " \ @@ -330,134 +359,65 @@ atomic_intr_cond_exit(atomic_intr_t *p, void (*func)(void *), void *arg) "1: ;" \ : "+m" (*p) \ : "r"(func), "m"(arg) \ - : "ax", "cx", "dx", "di"); /* XXX clobbers more regs */ + : "ax", "cx", "dx", "rsi", "rdi", "r8", "r9", "r10", "r11"); + /* YYY the function call may clobber even more registers? */ } #endif /* - * Atomic compare and set, used by the mutex functions + * Atomic compare and set * - * if (*dst == exp) *dst = src (all 32 bit words) + * if (*_dst == _old) *_dst = _new (all 32 bit words) * * Returns 0 on failure, non-zero on success */ +#if defined(KLD_MODULE) + +extern int atomic_cmpset_int(volatile u_int *_dst, u_int _old, u_int _new); +extern int atomic_cmpset_long(volatile u_long *dst, u_long exp, u_long src); +extern u_int atomic_fetchadd_int(volatile u_int *p, u_int v); -#if defined(__GNUC__) +#else static __inline int -atomic_cmpset_int(volatile u_int *dst, u_int exp, u_int src) +atomic_cmpset_int(volatile u_int *_dst, u_int _old, u_int _new) { - int res = exp; - - __asm __volatile ( - MPLOCKED - " cmpxchgl %1,%2 ; " - " setz %%al ; " - " movzbl %%al,%0 ; " - "1: " - "# atomic_cmpset_int" - : "+a" (res) /* 0 (result) */ - : "r" (src), /* 1 */ - "m" (*(dst)) /* 2 */ - : "memory"); - - return (res); + int res = _old; + + __asm __volatile(MPLOCKED "cmpxchgl %2,%1; " \ + "setz %%al; " \ + "movzbl %%al,%0; " \ + : "+a" (res), "=m" (*_dst) \ + : "r" (_new), "m" (*_dst) \ + : "memory"); + return res; } static __inline int atomic_cmpset_long(volatile u_long *dst, u_long exp, u_long src) { - long res = exp; - - __asm __volatile ( - MPLOCKED - " cmpxchgq %1,%2 ; " - " setz %%al ; " - " movzbq %%al,%0 ; " - "1: " - "# atomic_cmpset_long" - : "+a" (res) /* 0 (result) */ - : "r" (src), /* 1 */ - "m" (*(dst)) /* 2 */ - : "memory"); - - return (res); + return (atomic_cmpset_int((volatile u_int *)dst, (u_int)exp, + (u_int)src)); } -#endif /* defined(__GNUC__) */ - -#if defined(__GNUC__) - -#define ATOMIC_STORE_LOAD(TYPE, LOP, SOP) \ -static __inline u_##TYPE \ -atomic_load_acq_##TYPE(volatile u_##TYPE *p) \ -{ \ - u_##TYPE res; \ - \ - __asm __volatile(MPLOCKED LOP \ - : "=a" (res), /* 0 (result) */\ - "+m" (*p) /* 1 */ \ - : : "memory"); \ - \ - return (res); \ -} \ - \ -/* \ - * The XCHG instruction asserts LOCK automagically. \ - */ \ -static __inline void \ -atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\ -{ \ - __asm __volatile(SOP \ - : "+m" (*p), /* 0 */ \ - "+r" (v) /* 1 */ \ - : : "memory"); \ -} \ -struct __hack - -#else /* !defined(__GNUC__) */ - -extern int atomic_cmpset_int(volatile u_int *, u_int, u_int); -extern int atomic_cmpset_long(volatile u_long *, u_long, u_long); - -#define ATOMIC_STORE_LOAD(TYPE, LOP, SOP) \ -extern u_##TYPE atomic_load_acq_##TYPE(volatile u_##TYPE *p); \ -extern void atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v) - -#endif /* defined(__GNUC__) */ - -#endif /* !KLD_MODULE */ - -ATOMIC_ASM(set, char, "orb %b1,%0", "iq", v); -ATOMIC_ASM(clear, char, "andb %b1,%0", "iq", ~v); -ATOMIC_ASM(add, char, "addb %b1,%0", "iq", v); -ATOMIC_ASM(subtract, char, "subb %b1,%0", "iq", v); - -ATOMIC_ASM(set, short, "orw %w1,%0", "ir", v); -ATOMIC_ASM(clear, short, "andw %w1,%0", "ir", ~v); -ATOMIC_ASM(add, short, "addw %w1,%0", "ir", v); -ATOMIC_ASM(subtract, short, "subw %w1,%0", "ir", v); -ATOMIC_ASM(set, int, "orl %1,%0", "ir", v); -ATOMIC_ASM(clear, int, "andl %1,%0", "ir", ~v); -ATOMIC_ASM(add, int, "addl %1,%0", "ir", v); -ATOMIC_ASM(subtract, int, "subl %1,%0", "ir", v); - -ATOMIC_ASM(set, long, "orq %1,%0", "ir", v); -ATOMIC_ASM(clear, long, "andq %1,%0", "ir", ~v); -ATOMIC_ASM(add, long, "addq %1,%0", "ir", v); -ATOMIC_ASM(subtract, long, "subq %1,%0", "ir", v); - -ATOMIC_STORE_LOAD(char, "cmpxchgb %b0,%1", "xchgb %b1,%0"); -ATOMIC_STORE_LOAD(short,"cmpxchgw %w0,%1", "xchgw %w1,%0"); -ATOMIC_STORE_LOAD(int, "cmpxchgl %0,%1", "xchgl %1,%0"); -ATOMIC_STORE_LOAD(long, "cmpxchgq %0,%1", "xchgq %1,%0"); - -#define atomic_cmpset_32 atomic_cmpset_int +/* + * Atomically add the value of v to the integer pointed to by p and return + * the previous value of *p. + */ +static __inline u_int +atomic_fetchadd_int(volatile u_int *p, u_int v) +{ + __asm __volatile(MPLOCKED "xaddl %0,%1; " \ + : "+r" (v), "=m" (*p) \ + : "m" (*p) \ + : "memory"); + return (v); +} -#undef ATOMIC_ASM -#undef ATOMIC_STORE_LOAD +#endif /* KLD_MODULE */ +/* Acquire and release variants are identical to the normal ones. */ #define atomic_set_acq_char atomic_set_char #define atomic_set_rel_char atomic_set_char #define atomic_clear_acq_char atomic_clear_char @@ -495,7 +455,10 @@ ATOMIC_STORE_LOAD(long, "cmpxchgq %0,%1", "xchgq %1,%0"); #define atomic_add_rel_long atomic_add_long #define atomic_subtract_acq_long atomic_subtract_long #define atomic_subtract_rel_long atomic_subtract_long +#define atomic_cmpset_acq_long atomic_cmpset_long +#define atomic_cmpset_rel_long atomic_cmpset_long +/* Operations on 8-bit bytes. */ #define atomic_set_8 atomic_set_char #define atomic_set_acq_8 atomic_set_acq_char #define atomic_set_rel_8 atomic_set_rel_char @@ -568,48 +531,4 @@ ATOMIC_STORE_LOAD(long, "cmpxchgq %0,%1", "xchgq %1,%0"); #define atomic_cmpset_rel_ptr atomic_cmpset_rel_long #define atomic_readandclear_ptr atomic_readandclear_long -#if defined(__GNUC__) - -#if defined(KLD_MODULE) -extern u_int atomic_readandclear_int(volatile u_int *addr); -extern u_long atomic_readandclear_long(volatile u_long *addr); -#else /* !KLD_MODULE */ -static __inline u_int -atomic_readandclear_int(volatile u_int *addr) -{ - u_int result; - - __asm __volatile ( - " xorl %0,%0 ; " - " xchgl %1,%0 ; " - "# atomic_readandclear_int" - : "=&r" (result) /* 0 (result) */ - : "m" (*addr)); /* 1 (addr) */ - - return (result); -} - -static __inline u_long -atomic_readandclear_long(volatile u_long *addr) -{ - u_long result; - - __asm __volatile ( - " xorq %0,%0 ; " - " xchgq %1,%0 ; " - "# atomic_readandclear_int" - : "=&r" (result) /* 0 (result) */ - : "m" (*addr)); /* 1 (addr) */ - - return (result); -} -#endif /* KLD_MODULE */ - -#else /* !defined(__GNUC__) */ - -extern u_long atomic_readandclear_long(volatile u_long *); -extern u_int atomic_readandclear_int(volatile u_int *); - -#endif /* defined(__GNUC__) */ - #endif /* ! _CPU_ATOMIC_H_ */ diff --git a/sys/cpu/amd64/include/cpu.h b/sys/cpu/amd64/include/cpu.h index c4af70b6c9..ad0e7546ac 100644 --- a/sys/cpu/amd64/include/cpu.h +++ b/sys/cpu/amd64/include/cpu.h @@ -35,14 +35,36 @@ * * from: @(#)cpu.h 5.4 (Berkeley) 5/9/91 * $FreeBSD: src/sys/i386/include/cpu.h,v 1.43.2.2 2001/06/15 09:37:57 scottl Exp $ - * $DragonFly: src/sys/cpu/amd64/include/cpu.h,v 1.2 2007/09/23 04:29:30 yanyh Exp $ + * $DragonFly: src/sys/cpu/i386/include/cpu.h,v 1.25 2007/03/01 01:46:52 corecode Exp $ */ #ifndef _CPU_CPU_H_ #define _CPU_CPU_H_ +/* + * Definitions unique to amd64 cpu support. + */ +#ifndef _CPU_PSL_H_ +#include +#endif +#ifndef _CPU_FRAME_H_ +#include +#endif +#ifndef _CPU_SEGMENTS_H_ +#include +#endif + +/* + * definitions of cpu-dependent requirements + * referenced in generic code + */ + +#define cpu_exec(p) /* nothing */ +#define cpu_swapin(p) /* nothing */ +#define cpu_setstack(lp, ap) ((lp)->lwp_md.md_regs[SP] = (ap)) + #define CLKF_INTR(framep) (mycpu->gd_intr_nesting_level > 1 || (curthread->td_flags & TDF_INTTHREAD)) -#define CLKF_PC(framep) ((framep)->if_rip) +#define CLKF_PC(framep) ((framep)->if_rip) /* * Preempt the current process if in interrupt from user mode, @@ -52,24 +74,28 @@ * atomic instruction because an interrupt on the local cpu can modify * the gd_reqflags field. */ -#define need_lwkt_resched() \ - atomic_set_int_nonlocked(&mycpu->gd_reqflags, RQF_AST_LWKT_RESCHED) -#define need_user_resched() \ - atomic_set_int_nonlocked(&mycpu->gd_reqflags, RQF_AST_USER_RESCHED) -#define need_proftick() \ - atomic_set_int_nonlocked(&mycpu->gd_reqflags, RQF_AST_OWEUPC) -#define signotify() \ - atomic_set_int_nonlocked(&mycpu->gd_reqflags, RQF_AST_SIGNAL) -#define sigupcall() \ - atomic_set_int_nonlocked(&mycpu->gd_reqflags, RQF_AST_UPCALL) -#define clear_lwkt_resched() \ - atomic_clear_int_nonlocked(&mycpu->gd_reqflags, RQF_AST_LWKT_RESCHED) -#define clear_user_resched() \ - atomic_clear_int_nonlocked(&mycpu->gd_reqflags, RQF_AST_USER_RESCHED) -#define user_resched_wanted() \ - (mycpu->gd_reqflags & RQF_AST_USER_RESCHED) -#define lwkt_resched_wanted() \ - (mycpu->gd_reqflags & RQF_AST_LWKT_RESCHED) +#define need_lwkt_resched() \ + atomic_set_int_nonlocked(&mycpu->gd_reqflags, RQF_AST_LWKT_RESCHED) +#define need_user_resched() \ + atomic_set_int_nonlocked(&mycpu->gd_reqflags, RQF_AST_USER_RESCHED) +#define need_proftick() \ + atomic_set_int_nonlocked(&mycpu->gd_reqflags, RQF_AST_OWEUPC) +#define need_ipiq() \ + atomic_set_int_nonlocked(&mycpu->gd_reqflags, RQF_IPIQ) +#define signotify() \ + atomic_set_int_nonlocked(&mycpu->gd_reqflags, RQF_AST_SIGNAL) +#define sigupcall() \ + atomic_set_int_nonlocked(&mycpu->gd_reqflags, RQF_AST_UPCALL) +#define clear_user_resched() \ + atomic_clear_int_nonlocked(&mycpu->gd_reqflags, RQF_AST_USER_RESCHED) +#define clear_lwkt_resched() \ + atomic_clear_int_nonlocked(&mycpu->gd_reqflags, RQF_AST_LWKT_RESCHED) +#define user_resched_wanted() \ + (mycpu->gd_reqflags & RQF_AST_USER_RESCHED) +#define lwkt_resched_wanted() \ + (mycpu->gd_reqflags & RQF_AST_LWKT_RESCHED) +#define any_resched_wanted() \ + (mycpu->gd_reqflags & (RQF_AST_LWKT_RESCHED|RQF_AST_USER_RESCHED)) /* * CTL_MACHDEP definitions. @@ -91,9 +117,16 @@ } #ifdef _KERNEL + +struct lwp; + +extern char btext[]; +extern char etext[]; + void fork_trampoline (void); void generic_lwp_return (struct lwp *, struct trapframe *); void fork_return (struct lwp *, struct trapframe *); + #endif #endif /* !_CPU_CPU_H_ */ diff --git a/sys/cpu/amd64/include/cpufunc.h b/sys/cpu/amd64/include/cpufunc.h index a25e87a791..7818eda785 100644 --- a/sys/cpu/amd64/include/cpufunc.h +++ b/sys/cpu/amd64/include/cpufunc.h @@ -64,6 +64,10 @@ __BEGIN_DECLS #ifdef __GNUC__ +#ifdef SMP +#include /* XXX */ +#endif + static __inline void breakpoint(void) { diff --git a/sys/kern/lwkt_ipiq.c b/sys/kern/lwkt_ipiq.c index 1149b0d8e6..18e617200d 100644 --- a/sys/kern/lwkt_ipiq.c +++ b/sys/kern/lwkt_ipiq.c @@ -172,7 +172,11 @@ lwkt_send_ipiq3(globaldata_t target, ipifunc3_t func, void *arg1, int arg2) * enabled while we liveloop to avoid deadlocking the APIC. */ if (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 2) { +#if defined(__i386__) unsigned int eflags = read_eflags(); +#elif defined(__amd64__) + unsigned long rflags = read_rflags(); +#endif if (atomic_poll_acquire_int(&ip->ip_npoll) || ipiq_optimized == 0) { logipiq(cpu_send, func, arg1, arg2, gd, target); @@ -184,7 +188,11 @@ lwkt_send_ipiq3(globaldata_t target, ipifunc3_t func, void *arg1, int arg2) KKASSERT(ip->ip_windex - ip->ip_rindex != MAXCPUFIFO - 1); lwkt_process_ipiq(); } +#if defined(__i386__) write_eflags(eflags); +#elif defined(__amd64__) + write_rflags(rflags); +#endif } /* @@ -255,7 +263,11 @@ lwkt_send_ipiq3_passive(globaldata_t target, ipifunc3_t func, * enabled while we liveloop to avoid deadlocking the APIC. */ if (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 2) { +#if defined(__i386__) unsigned int eflags = read_eflags(); +#elif defined(__amd64__) + unsigned long rflags = read_rflags(); +#endif if (atomic_poll_acquire_int(&ip->ip_npoll) || ipiq_optimized == 0) { logipiq(cpu_send, func, arg1, arg2, gd, target); @@ -267,7 +279,11 @@ lwkt_send_ipiq3_passive(globaldata_t target, ipifunc3_t func, KKASSERT(ip->ip_windex - ip->ip_rindex != MAXCPUFIFO - 1); lwkt_process_ipiq(); } +#if defined(__i386__) write_eflags(eflags); +#elif defined(__amd64__) + write_rflags(rflags); +#endif } /* @@ -394,7 +410,11 @@ lwkt_wait_ipiq(globaldata_t target, int seq) if (target != mycpu) { ip = &mycpu->gd_ipiq[target->gd_cpuid]; if ((int)(ip->ip_xindex - seq) < 0) { +#if defined(__i386__) unsigned int eflags = read_eflags(); +#elif defined(__amd64__) + unsigned long rflags = read_rflags(); +#endif cpu_enable_intr(); while ((int)(ip->ip_xindex - seq) < 0) { crit_enter(); @@ -411,7 +431,11 @@ lwkt_wait_ipiq(globaldata_t target, int seq) */ cpu_lfence(); } +#if defined(__i386__) write_eflags(eflags); +#elif defined(__amd64__) + write_rflags(rflags); +#endif } } } diff --git a/sys/platform/pc64/amd64/exception.S b/sys/platform/pc64/amd64/exception.S index 0dea2babc6..a925abea4f 100644 --- a/sys/platform/pc64/amd64/exception.S +++ b/sys/platform/pc64/amd64/exception.S @@ -432,7 +432,7 @@ ENTRY(fork_trampoline) cmpl $0,TD_MPCOUNT(%rax) je 1f movq $pmsg4, %rdi - movl TD_MPCOUNT(%rax), %rsi + movl TD_MPCOUNT(%rax), %esi movq %rbx, %rdx xorl %eax, %eax call panic diff --git a/sys/platform/pc64/amd64/genassym.c b/sys/platform/pc64/amd64/genassym.c index 5059f1fae9..b36116dd0a 100644 --- a/sys/platform/pc64/amd64/genassym.c +++ b/sys/platform/pc64/amd64/genassym.c @@ -66,17 +66,21 @@ #include #include +#include #include #include #include #include #include +#include ASSYM(P_VMSPACE, offsetof(struct proc, p_vmspace)); ASSYM(VM_PMAP, offsetof(struct vmspace, vm_pmap)); ASSYM(PM_ACTIVE, offsetof(struct pmap, pm_active)); ASSYM(LWP_VMSPACE, offsetof(struct lwp, lwp_vmspace)); +ASSYM(V_IPI, offsetof(struct vmmeter, v_ipi)); +ASSYM(V_TIMER, offsetof(struct vmmeter, v_timer)); ASSYM(UPAGES, UPAGES); ASSYM(PAGE_SIZE, PAGE_SIZE); ASSYM(NPTEPG, NPTEPG); @@ -132,6 +136,8 @@ ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault)); ASSYM(PCB_FSBASE, offsetof(struct pcb, pcb_fsbase)); ASSYM(PCB_GSBASE, offsetof(struct pcb, pcb_gsbase)); +ASSYM(PCB_SIZE, sizeof(struct pcb)); + ASSYM(TF_R15, offsetof(struct trapframe, tf_r15)); ASSYM(TF_R14, offsetof(struct trapframe, tf_r14)); ASSYM(TF_R13, offsetof(struct trapframe, tf_r13)); @@ -210,8 +216,11 @@ ASSYM(RQF_AST_SIGNAL, RQF_AST_SIGNAL); ASSYM(RQF_AST_USER_RESCHED, RQF_AST_USER_RESCHED); ASSYM(RQF_AST_LWKT_RESCHED, RQF_AST_LWKT_RESCHED); ASSYM(RQF_AST_UPCALL, RQF_AST_UPCALL); +ASSYM(RQF_TIMER, RQF_TIMER); ASSYM(RQF_AST_MASK, RQF_AST_MASK); +ASSYM(LA_EOI, offsetof(struct LAPIC, eoi)); + ASSYM(KCSEL, GSEL(GCODE_SEL, SEL_KPL)); ASSYM(KDSEL, GSEL(GDATA_SEL, SEL_KPL)); ASSYM(KUCSEL, GSEL(GUCODE_SEL, SEL_UPL)); @@ -226,3 +235,9 @@ ASSYM(MACHINTR_INTREN, offsetof(struct machintr_abi, intren)); ASSYM(TDPRI_CRIT, TDPRI_CRIT); ASSYM(TDPRI_INT_SUPPORT, TDPRI_INT_SUPPORT); + +#ifdef SMP +ASSYM(AIMI_APIC_ADDRESS, offsetof(struct apic_intmapinfo, apic_address)); +ASSYM(AIMI_REDIRINDEX, offsetof(struct apic_intmapinfo, redirindex)); +ASSYM(AIMI_SIZE, sizeof(struct apic_intmapinfo)); +#endif diff --git a/sys/platform/pc64/amd64/identcpu.c b/sys/platform/pc64/amd64/identcpu.c index b5678dc189..7c5ce7e122 100644 --- a/sys/platform/pc64/amd64/identcpu.c +++ b/sys/platform/pc64/amd64/identcpu.c @@ -57,7 +57,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/sys/platform/pc64/amd64/ipl.s b/sys/platform/pc64/amd64/ipl.s index d63fcaa588..9fc2efb8e6 100644 --- a/sys/platform/pc64/amd64/ipl.s +++ b/sys/platform/pc64/amd64/ipl.s @@ -153,6 +153,8 @@ doreti_next: #ifdef SMP testl $RQF_IPIQ,PCPU(reqflags) jnz doreti_ipiq + testl $RQF_TIMER,PCPU(reqflags) + jnz doreti_timer #endif testl PCPU(fpending),%ecx /* check for an unmasked fast int */ jnz doreti_fast @@ -339,10 +341,22 @@ doreti_ipiq: movl %eax,%esi /* save cpl (can't use stack) */ incl PCPU(intr_nesting_level) andl $~RQF_IPIQ,PCPU(reqflags) - subl $16,%rsp /* add dummy vec and ppl */ + subq $16,%rsp /* add dummy vec and ppl */ movq %rsp,%rdi /* pass frame by ref (C arg) */ call lwkt_process_ipiq_frame - addl $16,%rsp + addq $16,%rsp + decl PCPU(intr_nesting_level) + movl %esi,%eax /* restore cpl for loop */ + jmp doreti_next + +doreti_timer: + movl %eax,%esi /* save cpl (can't use stack) */ + incl PCPU(intr_nesting_level) + andl $~RQF_TIMER,PCPU(reqflags) + subq $16,%rsp /* add dummy vec and ppl */ + movq %rsp,%rdi /* pass frame by ref (C arg) */ + call lapic_timer_process_frame + addq $16,%rsp decl PCPU(intr_nesting_level) movl %esi,%eax /* restore cpl for loop */ jmp doreti_next @@ -373,6 +387,8 @@ splz_next: #ifdef SMP testl $RQF_IPIQ,PCPU(reqflags) jnz splz_ipiq + testl $RQF_TIMER,PCPU(reqflags) + jnz splz_timer #endif testl PCPU(fpending),%ecx /* check for an unmasked fast int */ jnz splz_fast @@ -487,6 +503,13 @@ splz_ipiq: call lwkt_process_ipiq popq %rax jmp splz_next + +splz_timer: + andl $~RQF_TIMER,PCPU(reqflags) + pushq %rax + call lapic_timer_process + popq %rax + jmp splz_next #endif /* diff --git a/sys/platform/pc64/amd64/machdep.c b/sys/platform/pc64/amd64/machdep.c index e54154087d..3bbeb46dde 100644 --- a/sys/platform/pc64/amd64/machdep.c +++ b/sys/platform/pc64/amd64/machdep.c @@ -99,7 +99,6 @@ #if JG #include #endif -#include /* for inthand_t */ #include #include #include @@ -112,7 +111,7 @@ #include #ifdef OLD_BUS_ARCH -#include +#include #endif #include #include diff --git a/sys/platform/pc64/amd64/mp.c b/sys/platform/pc64/amd64/mp.c deleted file mode 100644 index 0ae3b46e48..0000000000 --- a/sys/platform/pc64/amd64/mp.c +++ /dev/null @@ -1,457 +0,0 @@ -/* - * Copyright (c) 2007 The DragonFly Project. All rights reserved. - * - * This code is derived from software contributed to The DragonFly Project - * by Matthew Dillon - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * 3. Neither the name of The DragonFly Project nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific, prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $DragonFly: src/sys/platform/pc64/amd64/mp.c,v 1.2 2007/09/24 03:24:45 yanyh Exp $ - */ - - -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -extern pt_entry_t *KPTphys; - -volatile u_int stopped_cpus; -cpumask_t smp_active_mask = 1; /* which cpus are ready for IPIs etc? */ -static int boot_address; -static cpumask_t smp_startup_mask = 1; /* which cpus have been started */ -int mp_naps; /* # of Applications processors */ -static int mp_finish; - -/* function prototypes XXX these should go elsewhere */ -void bootstrap_idle(void); -void single_cpu_ipi(int, int, int); -void selected_cpu_ipi(u_int, int, int); -#if 0 -void ipi_handler(int); -#endif - -pt_entry_t *SMPpt; - -/* AP uses this during bootstrap. Do not staticize. */ -char *bootSTK; -static int bootAP; - - -/* XXX these need to go into the appropriate header file */ -static int start_all_aps(u_int); -void init_secondary(void); -void *start_ap(void *); - -/* - * Get SMP fully working before we start initializing devices. - */ -static -void -ap_finish(void) -{ - int i; - cpumask_t ncpus_mask = 0; - - for (i = 1; i <= ncpus; i++) - ncpus_mask |= (1 << i); - - mp_finish = 1; - if (bootverbose) - kprintf("Finish MP startup\n"); - - /* build our map of 'other' CPUs */ - mycpu->gd_other_cpus = smp_startup_mask & ~(1 << mycpu->gd_cpuid); - - /* - * Let the other cpu's finish initializing and build their map - * of 'other' CPUs. - */ - rel_mplock(); - while (smp_active_mask != smp_startup_mask) { - DELAY(100000); - cpu_lfence(); - } - - while (try_mplock() == 0) - DELAY(100000); - if (bootverbose) - kprintf("Active CPU Mask: %08x\n", smp_active_mask); -} - -SYSINIT(finishsmp, SI_BOOT2_FINISH_SMP, SI_ORDER_FIRST, ap_finish, NULL) - - -void * -start_ap(void *arg __unused) -{ - init_secondary(); - setrealcpu(); - bootstrap_idle(); - - return(NULL); /* NOTREACHED */ -} - -/* storage for AP thread IDs */ -pthread_t ap_tids[MAXCPU]; - -void -mp_start(void) -{ - int shift; - - ncpus = optcpus; - - mp_naps = ncpus - 1; - - /* ncpus2 -- ncpus rounded down to the nearest power of 2 */ - for (shift = 0; (1 << shift) <= ncpus; ++shift) - ; - --shift; - ncpus2_shift = shift; - ncpus2 = 1 << shift; - ncpus2_mask = ncpus2 - 1; - - /* ncpus_fit -- ncpus rounded up to the nearest power of 2 */ - if ((1 << shift) < ncpus) - ++shift; - ncpus_fit = 1 << shift; - ncpus_fit_mask = ncpus_fit - 1; - - /* - * cpu0 initialization - */ - mycpu->gd_ipiq = (void *)kmem_alloc(&kernel_map, - sizeof(lwkt_ipiq) * ncpus); - bzero(mycpu->gd_ipiq, sizeof(lwkt_ipiq) * ncpus); - - /* - * cpu 1-(n-1) - */ - start_all_aps(boot_address); - -} - -void -mp_announce(void) -{ - int x; - - kprintf("DragonFly/MP: Multiprocessor\n"); - kprintf(" cpu0 (BSP)\n"); - - for (x = 1; x <= mp_naps; ++x) - kprintf(" cpu%d (AP)\n", x); -} - -void -forward_fastint_remote(void *arg) -{ - panic("XXX forward_fastint_remote()"); -} - -void -cpu_send_ipiq(int dcpu) -{ - if ((1 << dcpu) & smp_active_mask) - if (pthread_kill(ap_tids[dcpu], SIGUSR1) != 0) - panic("pthread_kill failed in cpu_send_ipiq"); -#if 0 - panic("XXX cpu_send_ipiq()"); -#endif -} - -void -smp_invltlb(void) -{ -#ifdef SMP -#endif -} - -void -single_cpu_ipi(int cpu, int vector, int delivery_mode) -{ - kprintf("XXX single_cpu_ipi\n"); -} - -void -selected_cpu_ipi(u_int target, int vector, int delivery_mode) -{ - crit_enter(); - while (target) { - int n = bsfl(target); - target &= ~(1 << n); - single_cpu_ipi(n, vector, delivery_mode); - } - crit_exit(); -} - -int -stop_cpus(u_int map) -{ - map &= smp_active_mask; - - crit_enter(); - while (map) { - int n = bsfl(map); - map &= ~(1 << n); - if (pthread_kill(ap_tids[n], SIGSTOP) != 0) - panic("stop_cpus: pthread_kill failed"); - } - crit_exit(); -#if 0 - panic("XXX stop_cpus()"); -#endif - - return(1); -} - -int -restart_cpus(u_int map) -{ - map &= smp_active_mask; - - crit_enter(); - while (map) { - int n = bsfl(map); - map &= ~(1 << n); - if (pthread_kill(ap_tids[n], SIGCONT) != 0) - panic("restart_cpus: pthread_kill failed"); - } - crit_exit(); -#if 0 - panic("XXX restart_cpus()"); -#endif - - return(1); -} - -void -ap_init(void) -{ - /* - * Adjust smp_startup_mask to signal the BSP that we have started - * up successfully. Note that we do not yet hold the BGL. The BSP - * is waiting for our signal. - * - * We can't set our bit in smp_active_mask yet because we are holding - * interrupts physically disabled and remote cpus could deadlock - * trying to send us an IPI. - */ - smp_startup_mask |= 1 << mycpu->gd_cpuid; - cpu_mfence(); - - /* - * Interlock for finalization. Wait until mp_finish is non-zero, - * then get the MP lock. - * - * Note: We are in a critical section. - * - * Note: We have to synchronize td_mpcount to our desired MP state - * before calling cpu_try_mplock(). - * - * Note: we are the idle thread, we can only spin. - * - * Note: The load fence is memory volatile and prevents the compiler - * from improperly caching mp_finish, and the cpu from improperly - * caching it. - */ - - while (mp_finish == 0) { - cpu_lfence(); - DELAY(500000); - } - ++curthread->td_mpcount; - while (cpu_try_mplock() == 0) - DELAY(100000); - - /* BSP may have changed PTD while we're waiting for the lock */ - cpu_invltlb(); - - /* Build our map of 'other' CPUs. */ - mycpu->gd_other_cpus = smp_startup_mask & ~(1 << mycpu->gd_cpuid); - - kprintf("SMP: AP CPU #%d Launched!\n", mycpu->gd_cpuid); - - - /* Set memory range attributes for this CPU to match the BSP */ - mem_range_AP_init(); - /* - * Once we go active we must process any IPIQ messages that may - * have been queued, because no actual IPI will occur until we - * set our bit in the smp_active_mask. If we don't the IPI - * message interlock could be left set which would also prevent - * further IPIs. - * - * The idle loop doesn't expect the BGL to be held and while - * lwkt_switch() normally cleans things up this is a special case - * because we returning almost directly into the idle loop. - * - * The idle thread is never placed on the runq, make sure - * nothing we've done put it there. - */ - KKASSERT(curthread->td_mpcount == 1); - smp_active_mask |= 1 << mycpu->gd_cpuid; - - mdcpu->gd_fpending = 0; - mdcpu->gd_ipending = 0; - initclocks_pcpu(); /* clock interrupts (via IPIs) */ - lwkt_process_ipiq(); - - /* - * Releasing the mp lock lets the BSP finish up the SMP init - */ - rel_mplock(); - KKASSERT((curthread->td_flags & TDF_RUNQ) == 0); -} - -void -init_secondary(void) -{ - int myid = bootAP; - struct mdglobaldata *md; - struct privatespace *ps; - - ps = &CPU_prvspace[myid]; - - KKASSERT(ps->mdglobaldata.mi.gd_prvspace == ps); - - /* - * Setup the %gs for cpu #n. The mycpu macro works after this - * point. - */ - tls_set_fs(&CPU_prvspace[myid], sizeof(struct privatespace)); - - md = mdcpu; /* loaded through %fs:0 (mdglobaldata.mi.gd_prvspace)*/ - - md->gd_common_tss.tss_esp0 = 0; /* not used until after switch */ - md->gd_common_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL); - md->gd_common_tss.tss_ioopt = (sizeof md->gd_common_tss) << 16; - - /* - * Set to a known state: - * Set by mpboot.s: CR0_PG, CR0_PE - * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM - */ -} - -static int -start_all_aps(u_int boot_addr) -{ - int x, i; - struct mdglobaldata *gd; - struct privatespace *ps; - vm_page_t m; - vm_offset_t va; -#if 0 - struct lwp_params params; -#endif - - /* - * needed for ipis to initial thread - * FIXME: rename ap_tids? - */ - ap_tids[0] = pthread_self(); - - for (x = 1; x <= mp_naps; x++) - { - /* Allocate space for the CPU's private space. */ - va = (vm_offset_t)&CPU_prvspace[x]; - for (i = 0; i < sizeof(struct mdglobaldata); i += PAGE_SIZE) { - va =(vm_offset_t)&CPU_prvspace[x].mdglobaldata + i; - m = vm_page_alloc(&kernel_object, va, VM_ALLOC_SYSTEM); - pmap_kenter_quick(va, m->phys_addr); - } - - for (i = 0; i < sizeof(CPU_prvspace[x].idlestack); i += PAGE_SIZE) { - va =(vm_offset_t)&CPU_prvspace[x].idlestack + i; - m = vm_page_alloc(&kernel_object, va, VM_ALLOC_SYSTEM); - pmap_kenter_quick(va, m->phys_addr); - } - - gd = &CPU_prvspace[x].mdglobaldata; /* official location */ - bzero(gd, sizeof(*gd)); - gd->mi.gd_prvspace = ps = &CPU_prvspace[x]; - - /* prime data page for it to use */ - mi_gdinit(&gd->mi, x); - cpu_gdinit(gd, x); - -#if 0 - gd->gd_CMAP1 = pmap_kpte((vm_offset_t)CPU_prvspace[x].CPAGE1); - gd->gd_CMAP2 = pmap_kpte((vm_offset_t)CPU_prvspace[x].CPAGE2); - gd->gd_CMAP3 = pmap_kpte((vm_offset_t)CPU_prvspace[x].CPAGE3); - gd->gd_PMAP1 = pmap_kpte((vm_offset_t)CPU_prvspace[x].PPAGE1); - gd->gd_CADDR1 = ps->CPAGE1; - gd->gd_CADDR2 = ps->CPAGE2; - gd->gd_CADDR3 = ps->CPAGE3; - gd->gd_PADDR1 = (vpte_t *)ps->PPAGE1; -#endif - - gd->mi.gd_ipiq = (void *)kmem_alloc(&kernel_map, sizeof(lwkt_ipiq) * (mp_naps + 1)); - bzero(gd->mi.gd_ipiq, sizeof(lwkt_ipiq) * (mp_naps + 1)); - - /* - * Setup the AP boot stack - */ - bootSTK = &ps->idlestack[UPAGES*PAGE_SIZE/2]; - bootAP = x; - - /* - * Setup the AP's lwp, this is the 'cpu' - */ - pthread_create(&ap_tids[x], NULL, start_ap, NULL); - - while((smp_startup_mask & (1 << x)) == 0) { - cpu_lfence(); /* XXX spin until the AP has started */ - DELAY(1000); - } - } - - return(ncpus - 1); -} diff --git a/sys/platform/pc64/amd64/mp_machdep.c b/sys/platform/pc64/amd64/mp_machdep.c new file mode 100644 index 0000000000..011b538104 --- /dev/null +++ b/sys/platform/pc64/amd64/mp_machdep.c @@ -0,0 +1,2651 @@ +/* + * Copyright (c) 1996, by Steve Passe + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. The name of the developer may NOT be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/sys/i386/i386/mp_machdep.c,v 1.115.2.15 2003/03/14 21:22:35 jhb Exp $ + * $DragonFly: src/sys/platform/pc32/i386/mp_machdep.c,v 1.60 2008/06/07 12:03:52 mneumann Exp $ + */ + +#include "opt_cpu.h" + +#include +#include +#include +#include +#include +#include +#include /* cngetc() */ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef GPROF +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include /* setidt() */ +#include /* IPIs */ +#include /* IPIs */ + +#define FIXUP_EXTRA_APIC_INTS 8 /* additional entries we may create */ + +#define WARMBOOT_TARGET 0 +#define WARMBOOT_OFF (KERNBASE + 0x0467) +#define WARMBOOT_SEG (KERNBASE + 0x0469) + +#define BIOS_BASE (0xf0000) +#define BIOS_SIZE (0x10000) +#define BIOS_COUNT (BIOS_SIZE/4) + +#define CMOS_REG (0x70) +#define CMOS_DATA (0x71) +#define BIOS_RESET (0x0f) +#define BIOS_WARM (0x0a) + +#define PROCENTRY_FLAG_EN 0x01 +#define PROCENTRY_FLAG_BP 0x02 +#define IOAPICENTRY_FLAG_EN 0x01 + + +/* MP Floating Pointer Structure */ +typedef struct MPFPS { + char signature[4]; + u_int32_t pap; + u_char length; + u_char spec_rev; + u_char checksum; + u_char mpfb1; + u_char mpfb2; + u_char mpfb3; + u_char mpfb4; + u_char mpfb5; +} *mpfps_t; + +/* MP Configuration Table Header */ +typedef struct MPCTH { + char signature[4]; + u_short base_table_length; + u_char spec_rev; + u_char checksum; + u_char oem_id[8]; + u_char product_id[12]; + void *oem_table_pointer; + u_short oem_table_size; + u_short entry_count; + void *apic_address; + u_short extended_table_length; + u_char extended_table_checksum; + u_char reserved; +} *mpcth_t; + + +typedef struct PROCENTRY { + u_char type; + u_char apic_id; + u_char apic_version; + u_char cpu_flags; + u_long cpu_signature; + u_long feature_flags; + u_long reserved1; + u_long reserved2; +} *proc_entry_ptr; + +typedef struct BUSENTRY { + u_char type; + u_char bus_id; + char bus_type[6]; +} *bus_entry_ptr; + +typedef struct IOAPICENTRY { + u_char type; + u_char apic_id; + u_char apic_version; + u_char apic_flags; + void *apic_address; +} *io_apic_entry_ptr; + +typedef struct INTENTRY { + u_char type; + u_char int_type; + u_short int_flags; + u_char src_bus_id; + u_char src_bus_irq; + u_char dst_apic_id; + u_char dst_apic_int; +} *int_entry_ptr; + +/* descriptions of MP basetable entries */ +typedef struct BASETABLE_ENTRY { + u_char type; + u_char length; + char name[16]; +} basetable_entry; + +/* + * this code MUST be enabled here and in mpboot.s. + * it follows the very early stages of AP boot by placing values in CMOS ram. + * it NORMALLY will never be needed and thus the primitive method for enabling. + * + */ +#if defined(CHECK_POINTS) +#define CHECK_READ(A) (outb(CMOS_REG, (A)), inb(CMOS_DATA)) +#define CHECK_WRITE(A,D) (outb(CMOS_REG, (A)), outb(CMOS_DATA, (D))) + +#define CHECK_INIT(D); \ + CHECK_WRITE(0x34, (D)); \ + CHECK_WRITE(0x35, (D)); \ + CHECK_WRITE(0x36, (D)); \ + CHECK_WRITE(0x37, (D)); \ + CHECK_WRITE(0x38, (D)); \ + CHECK_WRITE(0x39, (D)); + +#define CHECK_PRINT(S); \ + kprintf("%s: %d, %d, %d, %d, %d, %d\n", \ + (S), \ + CHECK_READ(0x34), \ + CHECK_READ(0x35), \ + CHECK_READ(0x36), \ + CHECK_READ(0x37), \ + CHECK_READ(0x38), \ + CHECK_READ(0x39)); + +#else /* CHECK_POINTS */ + +#define CHECK_INIT(D) +#define CHECK_PRINT(S) + +#endif /* CHECK_POINTS */ + +/* + * Values to send to the POST hardware. + */ +#define MP_BOOTADDRESS_POST 0x10 +#define MP_PROBE_POST 0x11 +#define MPTABLE_PASS1_POST 0x12 + +#define MP_START_POST 0x13 +#define MP_ENABLE_POST 0x14 +#define MPTABLE_PASS2_POST 0x15 + +#define START_ALL_APS_POST 0x16 +#define INSTALL_AP_TRAMP_POST 0x17 +#define START_AP_POST 0x18 + +#define MP_ANNOUNCE_POST 0x19 + +static int need_hyperthreading_fixup; +static u_int logical_cpus; +u_int logical_cpus_mask; + +/** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */ +int current_postcode; + +/** XXX FIXME: what system files declare these??? */ +extern struct region_descriptor r_gdt, r_idt; + +int bsp_apic_ready = 0; /* flags useability of BSP apic */ +int mp_naps; /* # of Applications processors */ +int mp_nbusses; /* # of busses */ +#ifdef APIC_IO +int mp_napics; /* # of IO APICs */ +#endif +int boot_cpu_id; /* designated BSP */ +vm_offset_t cpu_apic_address; +#ifdef APIC_IO +vm_offset_t io_apic_address[NAPICID]; /* NAPICID is more than enough */ +u_int32_t *io_apic_versions; +#endif +extern int nkpt; + +u_int32_t cpu_apic_versions[MAXCPU]; +int64_t tsc0_offset; +extern int64_t tsc_offsets[]; + +#ifdef APIC_IO +struct apic_intmapinfo int_to_apicintpin[APIC_INTMAPSIZE]; +#endif + +/* + * APIC ID logical/physical mapping structures. + * We oversize these to simplify boot-time config. + */ +int cpu_num_to_apic_id[NAPICID]; +#ifdef APIC_IO +int io_num_to_apic_id[NAPICID]; +#endif +int apic_id_to_logical[NAPICID]; + +/* AP uses this during bootstrap. Do not staticize. */ +char *bootSTK; +static int bootAP; + +/* Hotwire a 0->4MB V==P mapping */ +extern pt_entry_t *KPTphys; + +/* + * SMP page table page. Setup by locore to point to a page table + * page from which we allocate per-cpu privatespace areas io_apics, + * and so forth. + */ + +#define IO_MAPPING_START_INDEX \ + (SMP_MAXCPU * sizeof(struct privatespace) / PAGE_SIZE) + +extern pt_entry_t *SMPpt; +static int SMPpt_alloc_index = IO_MAPPING_START_INDEX; + +struct pcb stoppcbs[MAXCPU]; + +extern inthand_t IDTVEC(fast_syscall), IDTVEC(fast_syscall32); + +extern void initializecpu(void); + +/* + * Local data and functions. + */ + +static int mp_capable; +static u_int boot_address; +static u_int base_memory; +static int mp_finish; + +static mpfps_t mpfps; +static int search_for_sig(u_int32_t target, int count); +static void mp_enable(u_int boot_addr); + +static void mptable_hyperthread_fixup(u_int id_mask); +static void mptable_pass1(void); +static int mptable_pass2(void); +static void default_mp_table(int type); +static void fix_mp_table(void); +#ifdef APIC_IO +static void setup_apic_irq_mapping(void); +static int apic_int_is_bus_type(int intr, int bus_type); +#endif +static int start_all_aps(u_int boot_addr); +static void install_ap_tramp(u_int boot_addr); +static int start_ap(struct mdglobaldata *gd, u_int boot_addr); + +static cpumask_t smp_startup_mask = 1; /* which cpus have been started */ +cpumask_t smp_active_mask = 1; /* which cpus are ready for IPIs etc? */ +SYSCTL_INT(_machdep, OID_AUTO, smp_active, CTLFLAG_RD, &smp_active_mask, 0, ""); +static u_int bootMP_size; + +/* + * Calculate usable address in base memory for AP trampoline code. + */ +u_int +mp_bootaddress(u_int basemem) +{ + POSTCODE(MP_BOOTADDRESS_POST); + + bootMP_size = mptramp_end - mptramp_start; + base_memory = basemem; + + boot_address = base_memory & ~0xfff; /* round down to 4k boundary */ + if ((base_memory - boot_address) < bootMP_size) + boot_address -= 4096; /* not enough, lower by 4k */ + /* 3 levels of page table pages */ + mptramp_pagetables = boot_address - (PAGE_SIZE * 3); + + return mptramp_pagetables; +} + + +/* + * Look for an Intel MP spec table (ie, SMP capable hardware). + */ +int +mp_probe(void) +{ + int x; + u_long segment; + u_int32_t target; + + /* + * Make sure our SMPpt[] page table is big enough to hold all the + * mappings we need. + */ + KKASSERT(IO_MAPPING_START_INDEX < NPTEPG - 2); + + POSTCODE(MP_PROBE_POST); + + /* see if EBDA exists */ + if ((segment = (u_long) * (u_short *) (KERNBASE + 0x40e)) != 0) { + /* search first 1K of EBDA */ + target = (u_int32_t) (segment << 4); + if ((x = search_for_sig(target, 1024 / 4)) >= 0) + goto found; + } else { + /* last 1K of base memory, effective 'top of base' passed in */ + target = (u_int32_t) (base_memory - 0x400); + if ((x = search_for_sig(target, 1024 / 4)) >= 0) + goto found; + } + + /* search the BIOS */ + target = (u_int32_t) BIOS_BASE; + if ((x = search_for_sig(target, BIOS_COUNT)) >= 0) + goto found; + + /* nothing found */ + mpfps = (mpfps_t)0; + mp_capable = 0; + return 0; + +found: + /* + * Calculate needed resources. We can safely map physical + * memory into SMPpt after mptable_pass1() completes. + */ + mpfps = (mpfps_t)x; + mptable_pass1(); + + /* flag fact that we are running multiple processors */ + mp_capable = 1; + return 1; +} + + +/* + * Startup the SMP processors. + */ +void +mp_start(void) +{ + POSTCODE(MP_START_POST); + + /* look for MP capable motherboard */ + if (mp_capable) + mp_enable(boot_address); + else + panic("MP hardware not found!"); +} + + +/* + * Print various information about the SMP system hardware and setup. + */ +void +mp_announce(void) +{ + int x; + + POSTCODE(MP_ANNOUNCE_POST); + + kprintf("DragonFly/MP: Multiprocessor motherboard\n"); + kprintf(" cpu0 (BSP): apic id: %2d", CPU_TO_ID(0)); + kprintf(", version: 0x%08x", cpu_apic_versions[0]); + kprintf(", at 0x%08x\n", cpu_apic_address); + for (x = 1; x <= mp_naps; ++x) { + kprintf(" cpu%d (AP): apic id: %2d", x, CPU_TO_ID(x)); + kprintf(", version: 0x%08x", cpu_apic_versions[x]); + kprintf(", at 0x%08x\n", cpu_apic_address); + } + +#if defined(APIC_IO) + for (x = 0; x < mp_napics; ++x) { + kprintf(" io%d (APIC): apic id: %2d", x, IO_TO_ID(x)); + kprintf(", version: 0x%08x", io_apic_versions[x]); + kprintf(", at 0x%08x\n", io_apic_address[x]); + } +#else + kprintf(" Warning: APIC I/O disabled\n"); +#endif /* APIC_IO */ +} + +/* + * AP cpu's call this to sync up protected mode. + * + * WARNING! We must ensure that the cpu is sufficiently initialized to + * be able to use to the FP for our optimized bzero/bcopy code before + * we enter more mainstream C code. + * + * WARNING! %fs is not set up on entry. This routine sets up %fs. + */ +void +init_secondary(void) +{ + int gsel_tss; + int x, myid = bootAP; + u_int64_t msr, cr0; + struct mdglobaldata *md; + struct privatespace *ps; + + ps = &CPU_prvspace[myid]; + + gdt_segs[GPROC0_SEL].ssd_base = + (long) &ps->mdglobaldata.gd_common_tss; + ps->mdglobaldata.mi.gd_prvspace = ps; + + /* We fill the 32-bit segment descriptors */ + for (x = 0; x < NGDT; x++) { + if (x != GPROC0_SEL && x != (GPROC0_SEL + 1)) + ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x]); + } + /* And now a 64-bit one */ + ssdtosyssd(&gdt_segs[GPROC0_SEL], + (struct system_segment_descriptor *)&gdt[myid * NGDT + GPROC0_SEL]); + + r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; + r_gdt.rd_base = (long) &gdt[myid * NGDT]; + lgdt(&r_gdt); /* does magic intra-segment return */ + + lidt(&r_idt); + +#if 0 + lldt(_default_ldt); + mdcpu->gd_currentldt = _default_ldt; +#endif + + gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); + gdt[myid * NGDT + GPROC0_SEL].sd_type = SDT_SYSTSS; + + md = mdcpu; /* loaded through %gs:0 (mdglobaldata.mi.gd_prvspace)*/ + + md->gd_common_tss.tss_rsp0 = 0; /* not used until after switch */ +#if 0 /* JG XXX */ + md->gd_common_tss.tss_ioopt = (sizeof md->gd_common_tss) << 16; +#endif + md->gd_tss_gdt = &gdt[myid * NGDT + GPROC0_SEL]; + md->gd_common_tssd = *md->gd_tss_gdt; +#if 0 /* JG XXX */ + md->gd_common_tss.tss_ist1 = (long)&doublefault_stack[PAGE_SIZE]; +#endif + ltr(gsel_tss); + + wrmsr(MSR_FSBASE, 0); /* User value */ + wrmsr(MSR_GSBASE, (u_int64_t)md); + wrmsr(MSR_KGSBASE, 0); /* XXX User value while we're in the kernel */ + + /* + * Set to a known state: + * Set by mpboot.s: CR0_PG, CR0_PE + * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM + */ + cr0 = rcr0(); + cr0 &= ~(CR0_CD | CR0_NW | CR0_EM); + load_cr0(cr0); + + /* Set up the fast syscall stuff */ + msr = rdmsr(MSR_EFER) | EFER_SCE; + wrmsr(MSR_EFER, msr); + wrmsr(MSR_LSTAR, (u_int64_t)IDTVEC(fast_syscall)); + wrmsr(MSR_CSTAR, (u_int64_t)IDTVEC(fast_syscall32)); + msr = ((u_int64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) | + ((u_int64_t)GSEL(GUCODE32_SEL, SEL_UPL) << 48); + wrmsr(MSR_STAR, msr); + wrmsr(MSR_SF_MASK, PSL_NT|PSL_T|PSL_I|PSL_C|PSL_D); + + pmap_set_opt(); /* PSE/4MB pages, etc */ +#if JGXXX + /* Initialize the PAT MSR. */ + pmap_init_pat(); +#endif + + /* set up CPU registers and state */ + cpu_setregs(); + + /* set up SSE/NX registers */ + initializecpu(); + + /* set up FPU state on the AP */ + npxinit(__INITIAL_NPXCW__); +} + +/******************************************************************* + * local functions and data + */ + +/* + * start the SMP system + */ +static void +mp_enable(u_int boot_addr) +{ + int x; +#if defined(APIC_IO) + int apic; + u_int ux; +#endif /* APIC_IO */ + + POSTCODE(MP_ENABLE_POST); + +#if 0 /* JGXXX */ + /* turn on 4MB of V == P addressing so we can get to MP table */ + *(int *)PTD = PG_V | PG_RW | ((uintptr_t)(void *)KPTphys & PG_FRAME); + cpu_invltlb(); + + /* examine the MP table for needed info, uses physical addresses */ + x = mptable_pass2(); + + *(int *)PTD = 0; + cpu_invltlb(); +#endif /* 0 JGXXX */ + + /* can't process default configs till the CPU APIC is pmapped */ + if (x) + default_mp_table(x); + + /* post scan cleanup */ + fix_mp_table(); + +#if defined(APIC_IO) + + setup_apic_irq_mapping(); + + /* fill the LOGICAL io_apic_versions table */ + for (apic = 0; apic < mp_napics; ++apic) { + ux = io_apic_read(apic, IOAPIC_VER); + io_apic_versions[apic] = ux; + io_apic_set_id(apic, IO_TO_ID(apic)); + } + + /* program each IO APIC in the system */ + for (apic = 0; apic < mp_napics; ++apic) + if (io_apic_setup(apic) < 0) + panic("IO APIC setup failure"); + +#endif /* APIC_IO */ + + /* + * These are required for SMP operation + */ + + /* install a 'Spurious INTerrupt' vector */ + setidt(XSPURIOUSINT_OFFSET, Xspuriousint, + SDT_SYSIGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); + + /* install an inter-CPU IPI for TLB invalidation */ + setidt(XINVLTLB_OFFSET, Xinvltlb, + SDT_SYSIGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); + + /* install an inter-CPU IPI for IPIQ messaging */ + setidt(XIPIQ_OFFSET, Xipiq, + SDT_SYSIGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); + + /* install a timer vector */ + setidt(XTIMER_OFFSET, Xtimer, + SDT_SYSIGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); + + /* install an inter-CPU IPI for CPU stop/restart */ + setidt(XCPUSTOP_OFFSET, Xcpustop, + SDT_SYSIGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); + + /* start each Application Processor */ + start_all_aps(boot_addr); +} + + +/* + * look for the MP spec signature + */ + +/* string defined by the Intel MP Spec as identifying the MP table */ +#define MP_SIG 0x5f504d5f /* _MP_ */ +#define NEXT(X) ((X) += 4) +static int +search_for_sig(u_int32_t target, int count) +{ + int x; + u_int32_t *addr = (u_int32_t *) (KERNBASE + target); + + for (x = 0; x < count; NEXT(x)) + if (addr[x] == MP_SIG) + /* make array index a byte index */ + return (target + (x * sizeof(u_int32_t))); + + return -1; +} + + +static basetable_entry basetable_entry_types[] = +{ + {0, 20, "Processor"}, + {1, 8, "Bus"}, + {2, 8, "I/O APIC"}, + {3, 8, "I/O INT"}, + {4, 8, "Local INT"} +}; + +typedef struct BUSDATA { + u_char bus_id; + enum busTypes bus_type; +} bus_datum; + +typedef struct INTDATA { + u_char int_type; + u_short int_flags; + u_char src_bus_id; + u_char src_bus_irq; + u_char dst_apic_id; + u_char dst_apic_int; + u_char int_vector; +} io_int, local_int; + +typedef struct BUSTYPENAME { + u_char type; + char name[7]; +} bus_type_name; + +static bus_type_name bus_type_table[] = +{ + {CBUS, "CBUS"}, + {CBUSII, "CBUSII"}, + {EISA, "EISA"}, + {MCA, "MCA"}, + {UNKNOWN_BUSTYPE, "---"}, + {ISA, "ISA"}, + {MCA, "MCA"}, + {UNKNOWN_BUSTYPE, "---"}, + {UNKNOWN_BUSTYPE, "---"}, + {UNKNOWN_BUSTYPE, "---"}, + {UNKNOWN_BUSTYPE, "---"}, + {UNKNOWN_BUSTYPE, "---"}, + {PCI, "PCI"}, + {UNKNOWN_BUSTYPE, "---"}, + {UNKNOWN_BUSTYPE, "---"}, + {UNKNOWN_BUSTYPE, "---"}, + {UNKNOWN_BUSTYPE, "---"}, + {XPRESS, "XPRESS"}, + {UNKNOWN_BUSTYPE, "---"} +}; +/* from MP spec v1.4, table 5-1 */ +static int default_data[7][5] = +{ +/* nbus, id0, type0, id1, type1 */ + {1, 0, ISA, 255, 255}, + {1, 0, EISA, 255, 255}, + {1, 0, EISA, 255, 255}, + {1, 0, MCA, 255, 255}, + {2, 0, ISA, 1, PCI}, + {2, 0, EISA, 1, PCI}, + {2, 0, MCA, 1, PCI} +}; + + +/* the bus data */ +static bus_datum *bus_data; + +#ifdef APIC_IO +/* the IO INT data, one entry per possible APIC INTerrupt */ +static io_int *io_apic_ints; +static int nintrs; +#endif + +static int processor_entry (proc_entry_ptr entry, int cpu); +static int bus_entry (bus_entry_ptr entry, int bus); +#ifdef APIC_IO +static int io_apic_entry (io_apic_entry_ptr entry, int apic); +static int int_entry (int_entry_ptr entry, int intr); +#endif +static int lookup_bus_type (char *name); + + +/* + * 1st pass on motherboard's Intel MP specification table. + * + * initializes: + * ncpus = 1 + * + * determines: + * cpu_apic_address (common to all CPUs) + * io_apic_address[N] + * mp_naps + * mp_nbusses + * mp_napics + * nintrs + */ +static void +mptable_pass1(void) +{ +#ifdef APIC_IO + int x; +#endif + mpcth_t cth; + int totalSize; + void* position; + int count; + int type; + u_int id_mask; + + POSTCODE(MPTABLE_PASS1_POST); + +#ifdef APIC_IO + /* clear various tables */ + for (x = 0; x < NAPICID; ++x) { + io_apic_address[x] = ~0; /* IO APIC address table */ + } +#endif + + /* init everything to empty */ + mp_naps = 0; + mp_nbusses = 0; +#ifdef APIC_IO + mp_napics = 0; + nintrs = 0; +#endif + id_mask = 0; + + /* check for use of 'default' configuration */ + if (mpfps->mpfb1 != 0) { + /* use default addresses */ + cpu_apic_address = DEFAULT_APIC_BASE; +#ifdef APIC_IO + io_apic_address[0] = DEFAULT_IO_APIC_BASE; +#endif + + /* fill in with defaults */ + mp_naps = 2; /* includes BSP */ + mp_nbusses = default_data[mpfps->mpfb1 - 1][0]; +#if defined(APIC_IO) + mp_napics = 1; + nintrs = 16; +#endif /* APIC_IO */ + } + else { + if ((cth = mpfps->pap) == 0) + panic("MP Configuration Table Header MISSING!"); + + cpu_apic_address = (vm_offset_t) cth->apic_address; + + /* walk the table, recording info of interest */ + totalSize = cth->base_table_length - sizeof(struct MPCTH); + position = (u_char *) cth + sizeof(struct MPCTH); + count = cth->entry_count; + + while (count--) { + switch (type = *(u_char *) position) { + case 0: /* processor_entry */ + if (((proc_entry_ptr)position)->cpu_flags + & PROCENTRY_FLAG_EN) { + ++mp_naps; + id_mask |= 1 << + ((proc_entry_ptr)position)->apic_id; + } + break; + case 1: /* bus_entry */ + ++mp_nbusses; + break; + case 2: /* io_apic_entry */ +#ifdef APIC_IO + if (((io_apic_entry_ptr)position)->apic_flags + & IOAPICENTRY_FLAG_EN) + io_apic_address[mp_napics++] = + (vm_offset_t)((io_apic_entry_ptr) + position)->apic_address; +#endif + break; + case 3: /* int_entry */ +#ifdef APIC_IO + ++nintrs; +#endif + break; + case 4: /* int_entry */ + break; + default: + panic("mpfps Base Table HOSED!"); + /* NOTREACHED */ + } + + totalSize -= basetable_entry_types[type].length; + position = (uint8_t *)position + + basetable_entry_types[type].length; + } + } + + /* qualify the numbers */ + if (mp_naps > MAXCPU) { + kprintf("Warning: only using %d of %d available CPUs!\n", + MAXCPU, mp_naps); + mp_naps = MAXCPU; + } + + /* See if we need to fixup HT logical CPUs. */ + mptable_hyperthread_fixup(id_mask); + + /* + * Count the BSP. + * This is also used as a counter while starting the APs. + */ + ncpus = 1; + + --mp_naps; /* subtract the BSP */ +} + + +/* + * 2nd pass on motherboard's Intel MP specification table. + * + * sets: + * boot_cpu_id + * ID_TO_IO(N), phy APIC ID to log CPU/IO table + * CPU_TO_ID(N), logical CPU to APIC ID table + * IO_TO_ID(N), logical IO to APIC ID table + * bus_data[N] + * io_apic_ints[N] + */ +static int +mptable_pass2(void) +{ + struct PROCENTRY proc; + int x; + mpcth_t cth; + int totalSize; + void* position; + int count; + int type; + int apic, bus, cpu, intr; + int i; + + POSTCODE(MPTABLE_PASS2_POST); + + /* Initialize fake proc entry for use with HT fixup. */ + bzero(&proc, sizeof(proc)); + proc.type = 0; + proc.cpu_flags = PROCENTRY_FLAG_EN; + +#ifdef APIC_IO + MALLOC(io_apic_versions, u_int32_t *, sizeof(u_int32_t) * mp_napics, + M_DEVBUF, M_WAITOK); + MALLOC(ioapic, volatile ioapic_t **, sizeof(ioapic_t *) * mp_napics, + M_DEVBUF, M_WAITOK | M_ZERO); + MALLOC(io_apic_ints, io_int *, sizeof(io_int) * (nintrs + FIXUP_EXTRA_APIC_INTS), + M_DEVBUF, M_WAITOK); +#endif + MALLOC(bus_data, bus_datum *, sizeof(bus_datum) * mp_nbusses, + M_DEVBUF, M_WAITOK); + +#ifdef APIC_IO + for (i = 0; i < mp_napics; i++) { + ioapic[i] = permanent_io_mapping(io_apic_address[i]); + } +#endif + + /* clear various tables */ + for (x = 0; x < NAPICID; ++x) { + CPU_TO_ID(x) = -1; /* logical CPU to APIC ID table */ +#ifdef APIC_IO + ID_TO_IO(x) = -1; /* phy APIC ID to log CPU/IO table */ + IO_TO_ID(x) = -1; /* logical IO to APIC ID table */ +#endif + } + + /* clear bus data table */ + for (x = 0; x < mp_nbusses; ++x) + bus_data[x].bus_id = 0xff; + +#ifdef APIC_IO + /* clear IO APIC INT table */ + for (x = 0; x < (nintrs + 1); ++x) { + io_apic_ints[x].int_type = 0xff; + io_apic_ints[x].int_vector = 0xff; + } +#endif + + /* setup the cpu/apic mapping arrays */ + boot_cpu_id = -1; + + /* record whether PIC or virtual-wire mode */ + machintr_setvar_simple(MACHINTR_VAR_IMCR_PRESENT, mpfps->mpfb2 & 0x80); + + /* check for use of 'default' configuration */ + if (mpfps->mpfb1 != 0) + return mpfps->mpfb1; /* return default configuration type */ + + if ((cth = mpfps->pap) == 0) + panic("MP Configuration Table Header MISSING!"); + + /* walk the table, recording info of interest */ + totalSize = cth->base_table_length - sizeof(struct MPCTH); + position = (u_char *) cth + sizeof(struct MPCTH); + count = cth->entry_count; + apic = bus = intr = 0; + cpu = 1; /* pre-count the BSP */ + + while (count--) { + switch (type = *(u_char *) position) { + case 0: + if (processor_entry(position, cpu)) + ++cpu; + + if (need_hyperthreading_fixup) { + /* + * Create fake mptable processor entries + * and feed them to processor_entry() to + * enumerate the logical CPUs. + */ + proc.apic_id = ((proc_entry_ptr)position)->apic_id; + for (i = 1; i < logical_cpus; i++) { + proc.apic_id++; + processor_entry(&proc, cpu); + logical_cpus_mask |= (1 << cpu); + cpu++; + } + } + break; + case 1: + if (bus_entry(position, bus)) + ++bus; + break; + case 2: +#ifdef APIC_IO + if (io_apic_entry(position, apic)) + ++apic; +#endif + break; + case 3: +#ifdef APIC_IO + if (int_entry(position, intr)) + ++intr; +#endif + break; + case 4: + /* int_entry(position); */ + break; + default: + panic("mpfps Base Table HOSED!"); + /* NOTREACHED */ + } + + totalSize -= basetable_entry_types[type].length; + position = (uint8_t *)position + basetable_entry_types[type].length; + } + + if (boot_cpu_id == -1) + panic("NO BSP found!"); + + /* report fact that its NOT a default configuration */ + return 0; +} + +/* + * Check if we should perform a hyperthreading "fix-up" to + * enumerate any logical CPU's that aren't already listed + * in the table. + * + * XXX: We assume that all of the physical CPUs in the + * system have the same number of logical CPUs. + * + * XXX: We assume that APIC ID's are allocated such that + * the APIC ID's for a physical processor are aligned + * with the number of logical CPU's in the processor. + */ +static void +mptable_hyperthread_fixup(u_int id_mask) +{ + u_int i, id; + + /* Nothing to do if there is no HTT support. */ + if ((cpu_feature & CPUID_HTT) == 0) + return; + logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16; + if (logical_cpus <= 1) + return; + + /* + * For each APIC ID of a CPU that is set in the mask, + * scan the other candidate APIC ID's for this + * physical processor. If any of those ID's are + * already in the table, then kill the fixup. + */ + for (id = 0; id <= MAXCPU; id++) { + if ((id_mask & 1 << id) == 0) + continue; + /* First, make sure we are on a logical_cpus boundary. */ + if (id % logical_cpus != 0) + return; + for (i = id + 1; i < id + logical_cpus; i++) + if ((id_mask & 1 << i) != 0) + return; + } + + /* + * Ok, the ID's checked out, so enable the fixup. We have to fixup + * mp_naps right now. + */ + need_hyperthreading_fixup = 1; + mp_naps *= logical_cpus; +} + +#ifdef APIC_IO + +void +assign_apic_irq(int apic, int intpin, int irq) +{ + int x; + + if (int_to_apicintpin[irq].ioapic != -1) + panic("assign_apic_irq: inconsistent table"); + + int_to_apicintpin[irq].ioapic = apic; + int_to_apicintpin[irq].int_pin = intpin; + int_to_apicintpin[irq].apic_address = ioapic[apic]; + int_to_apicintpin[irq].redirindex = IOAPIC_REDTBL + 2 * intpin; + + for (x = 0; x < nintrs; x++) { + if ((io_apic_ints[x].int_type == 0 || + io_apic_ints[x].int_type == 3) && + io_apic_ints[x].int_vector == 0xff && + io_apic_ints[x].dst_apic_id == IO_TO_ID(apic) && + io_apic_ints[x].dst_apic_int == intpin) + io_apic_ints[x].int_vector = irq; + } +} + +void +revoke_apic_irq(int irq) +{ + int x; + int oldapic; + int oldintpin; + + if (int_to_apicintpin[irq].ioapic == -1) + panic("revoke_apic_irq: inconsistent table"); + + oldapic = int_to_apicintpin[irq].ioapic; + oldintpin = int_to_apicintpin[irq].int_pin; + + int_to_apicintpin[irq].ioapic = -1; + int_to_apicintpin[irq].int_pin = 0; + int_to_apicintpin[irq].apic_address = NULL; + int_to_apicintpin[irq].redirindex = 0; + + for (x = 0; x < nintrs; x++) { + if ((io_apic_ints[x].int_type == 0 || + io_apic_ints[x].int_type == 3) && + io_apic_ints[x].int_vector != 0xff && + io_apic_ints[x].dst_apic_id == IO_TO_ID(oldapic) && + io_apic_ints[x].dst_apic_int == oldintpin) + io_apic_ints[x].int_vector = 0xff; + } +} + +/* + * Allocate an IRQ + */ +static void +allocate_apic_irq(int intr) +{ + int apic; + int intpin; + int irq; + + if (io_apic_ints[intr].int_vector != 0xff) + return; /* Interrupt handler already assigned */ + + if (io_apic_ints[intr].int_type != 0 && + (io_apic_ints[intr].int_type != 3 || + (io_apic_ints[intr].dst_apic_id == IO_TO_ID(0) && + io_apic_ints[intr].dst_apic_int == 0))) + return; /* Not INT or ExtInt on != (0, 0) */ + + irq = 0; + while (irq < APIC_INTMAPSIZE && + int_to_apicintpin[irq].ioapic != -1) + irq++; + + if (irq >= APIC_INTMAPSIZE) + return; /* No free interrupt handlers */ + + apic = ID_TO_IO(io_apic_ints[intr].dst_apic_id); + intpin = io_apic_ints[intr].dst_apic_int; + + assign_apic_irq(apic, intpin, irq); + io_apic_setup_intpin(apic, intpin); +} + + +static void +swap_apic_id(int apic, int oldid, int newid) +{ + int x; + int oapic; + + + if (oldid == newid) + return; /* Nothing to do */ + + kprintf("Changing APIC ID for IO APIC #%d from %d to %d in MP table\n", + apic, oldid, newid); + + /* Swap physical APIC IDs in interrupt entries */ + for (x = 0; x < nintrs; x++) { + if (io_apic_ints[x].dst_apic_id == oldid) + io_apic_ints[x].dst_apic_id = newid; + else if (io_apic_ints[x].dst_apic_id == newid) + io_apic_ints[x].dst_apic_id = oldid; + } + + /* Swap physical APIC IDs in IO_TO_ID mappings */ + for (oapic = 0; oapic < mp_napics; oapic++) + if (IO_TO_ID(oapic) == newid) + break; + + if (oapic < mp_napics) { + kprintf("Changing APIC ID for IO APIC #%d from " + "%d to %d in MP table\n", + oapic, newid, oldid); + IO_TO_ID(oapic) = oldid; + } + IO_TO_ID(apic) = newid; +} + + +static void +fix_id_to_io_mapping(void) +{ + int x; + + for (x = 0; x < NAPICID; x++) + ID_TO_IO(x) = -1; + + for (x = 0; x <= mp_naps; x++) + if (CPU_TO_ID(x) < NAPICID) + ID_TO_IO(CPU_TO_ID(x)) = x; + + for (x = 0; x < mp_napics; x++) + if (IO_TO_ID(x) < NAPICID) + ID_TO_IO(IO_TO_ID(x)) = x; +} + + +static int +first_free_apic_id(void) +{ + int freeid, x; + + for (freeid = 0; freeid < NAPICID; freeid++) { + for (x = 0; x <= mp_naps; x++) + if (CPU_TO_ID(x) == freeid) + break; + if (x <= mp_naps) + continue; + for (x = 0; x < mp_napics; x++) + if (IO_TO_ID(x) == freeid) + break; + if (x < mp_napics) + continue; + return freeid; + } + return freeid; +} + + +static int +io_apic_id_acceptable(int apic, int id) +{ + int cpu; /* Logical CPU number */ + int oapic; /* Logical IO APIC number for other IO APIC */ + + if (id >= NAPICID) + return 0; /* Out of range */ + + for (cpu = 0; cpu <= mp_naps; cpu++) + if (CPU_TO_ID(cpu) == id) + return 0; /* Conflict with CPU */ + + for (oapic = 0; oapic < mp_napics && oapic < apic; oapic++) + if (IO_TO_ID(oapic) == id) + return 0; /* Conflict with other APIC */ + + return 1; /* ID is acceptable for IO APIC */ +} + +static +io_int * +io_apic_find_int_entry(int apic, int pin) +{ + int x; + + /* search each of the possible INTerrupt sources */ + for (x = 0; x < nintrs; ++x) { + if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) && + (pin == io_apic_ints[x].dst_apic_int)) + return (&io_apic_ints[x]); + } + return NULL; +} + +#endif + +/* + * parse an Intel MP specification table + */ +static void +fix_mp_table(void) +{ + int x; +#ifdef APIC_IO + int id; + int apic; /* IO APIC unit number */ + int freeid; /* Free physical APIC ID */ + int physid; /* Current physical IO APIC ID */ + io_int *io14; +#endif + int bus_0 = 0; /* Stop GCC warning */ + int bus_pci = 0; /* Stop GCC warning */ + int num_pci_bus; + + /* + * Fix mis-numbering of the PCI bus and its INT entries if the BIOS + * did it wrong. The MP spec says that when more than 1 PCI bus + * exists the BIOS must begin with bus entries for the PCI bus and use + * actual PCI bus numbering. This implies that when only 1 PCI bus + * exists the BIOS can choose to ignore this ordering, and indeed many + * MP motherboards do ignore it. This causes a problem when the PCI + * sub-system makes requests of the MP sub-system based on PCI bus + * numbers. So here we look for the situation and renumber the + * busses and associated INTs in an effort to "make it right". + */ + + /* find bus 0, PCI bus, count the number of PCI busses */ + for (num_pci_bus = 0, x = 0; x < mp_nbusses; ++x) { + if (bus_data[x].bus_id == 0) { + bus_0 = x; + } + if (bus_data[x].bus_type == PCI) { + ++num_pci_bus; + bus_pci = x; + } + } + /* + * bus_0 == slot of bus with ID of 0 + * bus_pci == slot of last PCI bus encountered + */ + + /* check the 1 PCI bus case for sanity */ + /* if it is number 0 all is well */ + if (num_pci_bus == 1 && + bus_data[bus_pci].bus_id != 0) { + + /* mis-numbered, swap with whichever bus uses slot 0 */ + + /* swap the bus entry types */ + bus_data[bus_pci].bus_type = bus_data[bus_0].bus_type; + bus_data[bus_0].bus_type = PCI; + +#ifdef APIC_IO + /* swap each relavant INTerrupt entry */ + id = bus_data[bus_pci].bus_id; + for (x = 0; x < nintrs; ++x) { + if (io_apic_ints[x].src_bus_id == id) { + io_apic_ints[x].src_bus_id = 0; + } + else if (io_apic_ints[x].src_bus_id == 0) { + io_apic_ints[x].src_bus_id = id; + } + } +#endif + } + +#ifdef APIC_IO + /* Assign IO APIC IDs. + * + * First try the existing ID. If a conflict is detected, try + * the ID in the MP table. If a conflict is still detected, find + * a free id. + * + * We cannot use the ID_TO_IO table before all conflicts has been + * resolved and the table has been corrected. + */ + for (apic = 0; apic < mp_napics; ++apic) { /* For all IO APICs */ + + /* First try to use the value set by the BIOS */ + physid = io_apic_get_id(apic); + if (io_apic_id_acceptable(apic, physid)) { + if (IO_TO_ID(apic) != physid) + swap_apic_id(apic, IO_TO_ID(apic), physid); + continue; + } + + /* Then check if the value in the MP table is acceptable */ + if (io_apic_id_acceptable(apic, IO_TO_ID(apic))) + continue; + + /* Last resort, find a free APIC ID and use it */ + freeid = first_free_apic_id(); + if (freeid >= NAPICID) + panic("No free physical APIC IDs found"); + + if (io_apic_id_acceptable(apic, freeid)) { + swap_apic_id(apic, IO_TO_ID(apic), freeid); + continue; + } + panic("Free physical APIC ID not usable"); + } + fix_id_to_io_mapping(); +#endif + +#ifdef APIC_IO + /* detect and fix broken Compaq MP table */ + if (apic_int_type(0, 0) == -1) { + kprintf("APIC_IO: MP table broken: 8259->APIC entry missing!\n"); + io_apic_ints[nintrs].int_type = 3; /* ExtInt */ + io_apic_ints[nintrs].int_vector = 0xff; /* Unassigned */ + /* XXX fixme, set src bus id etc, but it doesn't seem to hurt */ + io_apic_ints[nintrs].dst_apic_id = IO_TO_ID(0); + io_apic_ints[nintrs].dst_apic_int = 0; /* Pin 0 */ + nintrs++; + } else if (apic_int_type(0, 0) == 0) { + kprintf("APIC_IO: MP table broken: ExtINT entry corrupt!\n"); + for (x = 0; x < nintrs; ++x) + if ((0 == ID_TO_IO(io_apic_ints[x].dst_apic_id)) && + (0 == io_apic_ints[x].dst_apic_int)) { + io_apic_ints[x].int_type = 3; + io_apic_ints[x].int_vector = 0xff; + break; + } + } + + /* + * Fix missing IRQ 15 when IRQ 14 is an ISA interrupt. IDE + * controllers universally come in pairs. If IRQ 14 is specified + * as an ISA interrupt, then IRQ 15 had better be too. + * + * [ Shuttle XPC / AMD Athlon X2 ] + * The MPTable is missing an entry for IRQ 15. Note that the + * ACPI table has an entry for both 14 and 15. + */ + if (apic_int_type(0, 14) == 0 && apic_int_type(0, 15) == -1) { + kprintf("APIC_IO: MP table broken: IRQ 15 not ISA when IRQ 14 is!\n"); + io14 = io_apic_find_int_entry(0, 14); + io_apic_ints[nintrs] = *io14; + io_apic_ints[nintrs].src_bus_irq = 15; + io_apic_ints[nintrs].dst_apic_int = 15; + nintrs++; + } +#endif +} + +#ifdef APIC_IO + +/* Assign low level interrupt handlers */ +static void +setup_apic_irq_mapping(void) +{ + int x; + int int_vector; + + /* Clear array */ + for (x = 0; x < APIC_INTMAPSIZE; x++) { + int_to_apicintpin[x].ioapic = -1; + int_to_apicintpin[x].int_pin = 0; + int_to_apicintpin[x].apic_address = NULL; + int_to_apicintpin[x].redirindex = 0; + } + + /* First assign ISA/EISA interrupts */ + for (x = 0; x < nintrs; x++) { + int_vector = io_apic_ints[x].src_bus_irq; + if (int_vector < APIC_INTMAPSIZE && + io_apic_ints[x].int_vector == 0xff && + int_to_apicintpin[int_vector].ioapic == -1 && + (apic_int_is_bus_type(x, ISA) || + apic_int_is_bus_type(x, EISA)) && + io_apic_ints[x].int_type == 0) { + assign_apic_irq(ID_TO_IO(io_apic_ints[x].dst_apic_id), + io_apic_ints[x].dst_apic_int, + int_vector); + } + } + + /* Assign ExtInt entry if no ISA/EISA interrupt 0 entry */ + for (x = 0; x < nintrs; x++) { + if (io_apic_ints[x].dst_apic_int == 0 && + io_apic_ints[x].dst_apic_id == IO_TO_ID(0) && + io_apic_ints[x].int_vector == 0xff && + int_to_apicintpin[0].ioapic == -1 && + io_apic_ints[x].int_type == 3) { + assign_apic_irq(0, 0, 0); + break; + } + } + /* PCI interrupt assignment is deferred */ +} + +#endif + +static int +processor_entry(proc_entry_ptr entry, int cpu) +{ + /* check for usability */ + if (!(entry->cpu_flags & PROCENTRY_FLAG_EN)) + return 0; + + if(entry->apic_id >= NAPICID) + panic("CPU APIC ID out of range (0..%d)", NAPICID - 1); + /* check for BSP flag */ + if (entry->cpu_flags & PROCENTRY_FLAG_BP) { + boot_cpu_id = entry->apic_id; + CPU_TO_ID(0) = entry->apic_id; + ID_TO_CPU(entry->apic_id) = 0; + return 0; /* its already been counted */ + } + + /* add another AP to list, if less than max number of CPUs */ + else if (cpu < MAXCPU) { + CPU_TO_ID(cpu) = entry->apic_id; + ID_TO_CPU(entry->apic_id) = cpu; + return 1; + } + + return 0; +} + + +static int +bus_entry(bus_entry_ptr entry, int bus) +{ + int x; + char c, name[8]; + + /* encode the name into an index */ + for (x = 0; x < 6; ++x) { + if ((c = entry->bus_type[x]) == ' ') + break; + name[x] = c; + } + name[x] = '\0'; + + if ((x = lookup_bus_type(name)) == UNKNOWN_BUSTYPE) + panic("unknown bus type: '%s'", name); + + bus_data[bus].bus_id = entry->bus_id; + bus_data[bus].bus_type = x; + + return 1; +} + +#ifdef APIC_IO + +static int +io_apic_entry(io_apic_entry_ptr entry, int apic) +{ + if (!(entry->apic_flags & IOAPICENTRY_FLAG_EN)) + return 0; + + IO_TO_ID(apic) = entry->apic_id; + if (entry->apic_id < NAPICID) + ID_TO_IO(entry->apic_id) = apic; + + return 1; +} + +#endif + +static int +lookup_bus_type(char *name) +{ + int x; + + for (x = 0; x < MAX_BUSTYPE; ++x) + if (strcmp(bus_type_table[x].name, name) == 0) + return bus_type_table[x].type; + + return UNKNOWN_BUSTYPE; +} + +#ifdef APIC_IO + +static int +int_entry(int_entry_ptr entry, int intr) +{ + int apic; + + io_apic_ints[intr].int_type = entry->int_type; + io_apic_ints[intr].int_flags = entry->int_flags; + io_apic_ints[intr].src_bus_id = entry->src_bus_id; + io_apic_ints[intr].src_bus_irq = entry->src_bus_irq; + if (entry->dst_apic_id == 255) { + /* This signal goes to all IO APICS. Select an IO APIC + with sufficient number of interrupt pins */ + for (apic = 0; apic < mp_napics; apic++) + if (((io_apic_read(apic, IOAPIC_VER) & + IOART_VER_MAXREDIR) >> MAXREDIRSHIFT) >= + entry->dst_apic_int) + break; + if (apic < mp_napics) + io_apic_ints[intr].dst_apic_id = IO_TO_ID(apic); + else + io_apic_ints[intr].dst_apic_id = entry->dst_apic_id; + } else + io_apic_ints[intr].dst_apic_id = entry->dst_apic_id; + io_apic_ints[intr].dst_apic_int = entry->dst_apic_int; + + return 1; +} + +static int +apic_int_is_bus_type(int intr, int bus_type) +{ + int bus; + + for (bus = 0; bus < mp_nbusses; ++bus) + if ((bus_data[bus].bus_id == io_apic_ints[intr].src_bus_id) + && ((int) bus_data[bus].bus_type == bus_type)) + return 1; + + return 0; +} + +/* + * Given a traditional ISA INT mask, return an APIC mask. + */ +u_int +isa_apic_mask(u_int isa_mask) +{ + int isa_irq; + int apic_pin; + +#if defined(SKIP_IRQ15_REDIRECT) + if (isa_mask == (1 << 15)) { + kprintf("skipping ISA IRQ15 redirect\n"); + return isa_mask; + } +#endif /* SKIP_IRQ15_REDIRECT */ + + isa_irq = ffs(isa_mask); /* find its bit position */ + if (isa_irq == 0) /* doesn't exist */ + return 0; + --isa_irq; /* make it zero based */ + + apic_pin = isa_apic_irq(isa_irq); /* look for APIC connection */ + if (apic_pin == -1) + return 0; + + return (1 << apic_pin); /* convert pin# to a mask */ +} + +/* + * Determine which APIC pin an ISA/EISA INT is attached to. + */ +#define INTTYPE(I) (io_apic_ints[(I)].int_type) +#define INTPIN(I) (io_apic_ints[(I)].dst_apic_int) +#define INTIRQ(I) (io_apic_ints[(I)].int_vector) +#define INTAPIC(I) (ID_TO_IO(io_apic_ints[(I)].dst_apic_id)) + +#define SRCBUSIRQ(I) (io_apic_ints[(I)].src_bus_irq) +int +isa_apic_irq(int isa_irq) +{ + int intr; + + for (intr = 0; intr < nintrs; ++intr) { /* check each record */ + if (INTTYPE(intr) == 0) { /* standard INT */ + if (SRCBUSIRQ(intr) == isa_irq) { + if (apic_int_is_bus_type(intr, ISA) || + apic_int_is_bus_type(intr, EISA)) { + if (INTIRQ(intr) == 0xff) + return -1; /* unassigned */ + return INTIRQ(intr); /* found */ + } + } + } + } + return -1; /* NOT found */ +} + + +/* + * Determine which APIC pin a PCI INT is attached to. + */ +#define SRCBUSID(I) (io_apic_ints[(I)].src_bus_id) +#define SRCBUSDEVICE(I) ((io_apic_ints[(I)].src_bus_irq >> 2) & 0x1f) +#define SRCBUSLINE(I) (io_apic_ints[(I)].src_bus_irq & 0x03) +int +pci_apic_irq(int pciBus, int pciDevice, int pciInt) +{ + int intr; + + --pciInt; /* zero based */ + + for (intr = 0; intr < nintrs; ++intr) { /* check each record */ + if ((INTTYPE(intr) == 0) /* standard INT */ + && (SRCBUSID(intr) == pciBus) + && (SRCBUSDEVICE(intr) == pciDevice) + && (SRCBUSLINE(intr) == pciInt)) { /* a candidate IRQ */ + if (apic_int_is_bus_type(intr, PCI)) { + if (INTIRQ(intr) == 0xff) + allocate_apic_irq(intr); + if (INTIRQ(intr) == 0xff) + return -1; /* unassigned */ + return INTIRQ(intr); /* exact match */ + } + } + } + + return -1; /* NOT found */ +} + +int +next_apic_irq(int irq) +{ + int intr, ointr; + int bus, bustype; + + bus = 0; + bustype = 0; + for (intr = 0; intr < nintrs; intr++) { + if (INTIRQ(intr) != irq || INTTYPE(intr) != 0) + continue; + bus = SRCBUSID(intr); + bustype = apic_bus_type(bus); + if (bustype != ISA && + bustype != EISA && + bustype != PCI) + continue; + break; + } + if (intr >= nintrs) { + return -1; + } + for (ointr = intr + 1; ointr < nintrs; ointr++) { + if (INTTYPE(ointr) != 0) + continue; + if (bus != SRCBUSID(ointr)) + continue; + if (bustype == PCI) { + if (SRCBUSDEVICE(intr) != SRCBUSDEVICE(ointr)) + continue; + if (SRCBUSLINE(intr) != SRCBUSLINE(ointr)) + continue; + } + if (bustype == ISA || bustype == EISA) { + if (SRCBUSIRQ(intr) != SRCBUSIRQ(ointr)) + continue; + } + if (INTPIN(intr) == INTPIN(ointr)) + continue; + break; + } + if (ointr >= nintrs) { + return -1; + } + return INTIRQ(ointr); +} +#undef SRCBUSLINE +#undef SRCBUSDEVICE +#undef SRCBUSID +#undef SRCBUSIRQ + +#undef INTPIN +#undef INTIRQ +#undef INTAPIC +#undef INTTYPE + +#endif + +/* + * Reprogram the MB chipset to NOT redirect an ISA INTerrupt. + * + * XXX FIXME: + * Exactly what this means is unclear at this point. It is a solution + * for motherboards that redirect the MBIRQ0 pin. Generically a motherboard + * could route any of the ISA INTs to upper (>15) IRQ values. But most would + * NOT be redirected via MBIRQ0, thus "undirect()ing" them would NOT be an + * option. + */ +int +undirect_isa_irq(int rirq) +{ +#if defined(READY) + if (bootverbose) + kprintf("Freeing redirected ISA irq %d.\n", rirq); + /** FIXME: tickle the MB redirector chip */ + return /* XXX */; +#else + if (bootverbose) + kprintf("Freeing (NOT implemented) redirected ISA irq %d.\n", rirq); + return 0; +#endif /* READY */ +} + + +/* + * Reprogram the MB chipset to NOT redirect a PCI INTerrupt + */ +int +undirect_pci_irq(int rirq) +{ +#if defined(READY) + if (bootverbose) + kprintf("Freeing redirected PCI irq %d.\n", rirq); + + /** FIXME: tickle the MB redirector chip */ + return /* XXX */; +#else + if (bootverbose) + kprintf("Freeing (NOT implemented) redirected PCI irq %d.\n", + rirq); + return 0; +#endif /* READY */ +} + + +/* + * given a bus ID, return: + * the bus type if found + * -1 if NOT found + */ +int +apic_bus_type(int id) +{ + int x; + + for (x = 0; x < mp_nbusses; ++x) + if (bus_data[x].bus_id == id) + return bus_data[x].bus_type; + + return -1; +} + +#ifdef APIC_IO + +/* + * given a LOGICAL APIC# and pin#, return: + * the associated src bus ID if found + * -1 if NOT found + */ +int +apic_src_bus_id(int apic, int pin) +{ + int x; + + /* search each of the possible INTerrupt sources */ + for (x = 0; x < nintrs; ++x) + if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) && + (pin == io_apic_ints[x].dst_apic_int)) + return (io_apic_ints[x].src_bus_id); + + return -1; /* NOT found */ +} + +/* + * given a LOGICAL APIC# and pin#, return: + * the associated src bus IRQ if found + * -1 if NOT found + */ +int +apic_src_bus_irq(int apic, int pin) +{ + int x; + + for (x = 0; x < nintrs; x++) + if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) && + (pin == io_apic_ints[x].dst_apic_int)) + return (io_apic_ints[x].src_bus_irq); + + return -1; /* NOT found */ +} + + +/* + * given a LOGICAL APIC# and pin#, return: + * the associated INTerrupt type if found + * -1 if NOT found + */ +int +apic_int_type(int apic, int pin) +{ + int x; + + /* search each of the possible INTerrupt sources */ + for (x = 0; x < nintrs; ++x) { + if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) && + (pin == io_apic_ints[x].dst_apic_int)) + return (io_apic_ints[x].int_type); + } + return -1; /* NOT found */ +} + +/* + * Return the IRQ associated with an APIC pin + */ +int +apic_irq(int apic, int pin) +{ + int x; + int res; + + for (x = 0; x < nintrs; ++x) { + if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) && + (pin == io_apic_ints[x].dst_apic_int)) { + res = io_apic_ints[x].int_vector; + if (res == 0xff) + return -1; + if (apic != int_to_apicintpin[res].ioapic) + panic("apic_irq: inconsistent table %d/%d", apic, int_to_apicintpin[res].ioapic); + if (pin != int_to_apicintpin[res].int_pin) + panic("apic_irq inconsistent table (2)"); + return res; + } + } + return -1; +} + + +/* + * given a LOGICAL APIC# and pin#, return: + * the associated trigger mode if found + * -1 if NOT found + */ +int +apic_trigger(int apic, int pin) +{ + int x; + + /* search each of the possible INTerrupt sources */ + for (x = 0; x < nintrs; ++x) + if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) && + (pin == io_apic_ints[x].dst_apic_int)) + return ((io_apic_ints[x].int_flags >> 2) & 0x03); + + return -1; /* NOT found */ +} + + +/* + * given a LOGICAL APIC# and pin#, return: + * the associated 'active' level if found + * -1 if NOT found + */ +int +apic_polarity(int apic, int pin) +{ + int x; + + /* search each of the possible INTerrupt sources */ + for (x = 0; x < nintrs; ++x) + if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) && + (pin == io_apic_ints[x].dst_apic_int)) + return (io_apic_ints[x].int_flags & 0x03); + + return -1; /* NOT found */ +} + +#endif + +/* + * set data according to MP defaults + * FIXME: probably not complete yet... + */ +static void +default_mp_table(int type) +{ + int ap_cpu_id; +#if defined(APIC_IO) + int io_apic_id; + int pin; +#endif /* APIC_IO */ + +#if 0 + kprintf(" MP default config type: %d\n", type); + switch (type) { + case 1: + kprintf(" bus: ISA, APIC: 82489DX\n"); + break; + case 2: + kprintf(" bus: EISA, APIC: 82489DX\n"); + break; + case 3: + kprintf(" bus: EISA, APIC: 82489DX\n"); + break; + case 4: + kprintf(" bus: MCA, APIC: 82489DX\n"); + break; + case 5: + kprintf(" bus: ISA+PCI, APIC: Integrated\n"); + break; + case 6: + kprintf(" bus: EISA+PCI, APIC: Integrated\n"); + break; + case 7: + kprintf(" bus: MCA+PCI, APIC: Integrated\n"); + break; + default: + kprintf(" future type\n"); + break; + /* NOTREACHED */ + } +#endif /* 0 */ + + boot_cpu_id = (lapic->id & APIC_ID_MASK) >> 24; + ap_cpu_id = (boot_cpu_id == 0) ? 1 : 0; + + /* BSP */ + CPU_TO_ID(0) = boot_cpu_id; + ID_TO_CPU(boot_cpu_id) = 0; + + /* one and only AP */ + CPU_TO_ID(1) = ap_cpu_id; + ID_TO_CPU(ap_cpu_id) = 1; + +#if defined(APIC_IO) + /* one and only IO APIC */ + io_apic_id = (io_apic_read(0, IOAPIC_ID) & APIC_ID_MASK) >> 24; + + /* + * sanity check, refer to MP spec section 3.6.6, last paragraph + * necessary as some hardware isn't properly setting up the IO APIC + */ +#if defined(REALLY_ANAL_IOAPICID_VALUE) + if (io_apic_id != 2) { +#else + if ((io_apic_id == 0) || (io_apic_id == 1) || (io_apic_id == 15)) { +#endif /* REALLY_ANAL_IOAPICID_VALUE */ + io_apic_set_id(0, 2); + io_apic_id = 2; + } + IO_TO_ID(0) = io_apic_id; + ID_TO_IO(io_apic_id) = 0; +#endif /* APIC_IO */ + + /* fill out bus entries */ + switch (type) { + case 1: + case 2: + case 3: + case 4: + case 5: + case 6: + case 7: + bus_data[0].bus_id = default_data[type - 1][1]; + bus_data[0].bus_type = default_data[type - 1][2]; + bus_data[1].bus_id = default_data[type - 1][3]; + bus_data[1].bus_type = default_data[type - 1][4]; + break; + + /* case 4: case 7: MCA NOT supported */ + default: /* illegal/reserved */ + panic("BAD default MP config: %d", type); + /* NOTREACHED */ + } + +#if defined(APIC_IO) + /* general cases from MP v1.4, table 5-2 */ + for (pin = 0; pin < 16; ++pin) { + io_apic_ints[pin].int_type = 0; + io_apic_ints[pin].int_flags = 0x05; /* edge/active-hi */ + io_apic_ints[pin].src_bus_id = 0; + io_apic_ints[pin].src_bus_irq = pin; /* IRQ2 caught below */ + io_apic_ints[pin].dst_apic_id = io_apic_id; + io_apic_ints[pin].dst_apic_int = pin; /* 1-to-1 */ + } + + /* special cases from MP v1.4, table 5-2 */ + if (type == 2) { + io_apic_ints[2].int_type = 0xff; /* N/C */ + io_apic_ints[13].int_type = 0xff; /* N/C */ +#if !defined(APIC_MIXED_MODE) + /** FIXME: ??? */ + panic("sorry, can't support type 2 default yet"); +#endif /* APIC_MIXED_MODE */ + } + else + io_apic_ints[2].src_bus_irq = 0; /* ISA IRQ0 is on APIC INT 2 */ + + if (type == 7) + io_apic_ints[0].int_type = 0xff; /* N/C */ + else + io_apic_ints[0].int_type = 3; /* vectored 8259 */ +#endif /* APIC_IO */ +} + +/* + * Map a physical memory address representing I/O into KVA. The I/O + * block is assumed not to cross a page boundary. + */ +void * +permanent_io_mapping(vm_paddr_t pa) +{ + vm_offset_t vaddr; + int pgeflag; + int i; + + KKASSERT(pa < 0x100000000LL); + + pgeflag = 0; /* not used for SMP yet */ + + /* + * If the requested physical address has already been incidently + * mapped, just use the existing mapping. Otherwise create a new + * mapping. + */ + for (i = IO_MAPPING_START_INDEX; i < SMPpt_alloc_index; ++i) { + if (((vm_offset_t)SMPpt[i] & PG_FRAME) == + ((vm_offset_t)pa & PG_FRAME)) { + break; + } + } + if (i == SMPpt_alloc_index) { + if (i == NPTEPG - 2) { + panic("permanent_io_mapping: We ran out of space" + " in SMPpt[]!"); + } + SMPpt[i] = (pt_entry_t)(PG_V | PG_RW | pgeflag | + ((vm_offset_t)pa & PG_FRAME)); + ++SMPpt_alloc_index; + } + vaddr = (vm_offset_t)CPU_prvspace + (i * PAGE_SIZE) + + ((vm_offset_t)pa & PAGE_MASK); + return ((void *)vaddr); +} + +/* + * start each AP in our list + */ +static int +start_all_aps(u_int boot_addr) +{ + vm_offset_t va = boot_address + KERNBASE; + u_int64_t *pt4, *pt3, *pt2; + int x, i, pg; + int shift; + u_char mpbiosreason; + u_long mpbioswarmvec; + struct mdglobaldata *gd; + struct privatespace *ps; + char *stack; + uintptr_t kptbase; + + POSTCODE(START_ALL_APS_POST); + + /* Initialize BSP's local APIC */ + apic_initialize(TRUE); + bsp_apic_ready = 1; + + /* install the AP 1st level boot code */ + pmap_kenter(va, boot_address); + cpu_invlpg(va); /* JG XXX */ + bcopy(mptramp_start, (void *)va, bootMP_size); + + /* Locate the page tables, they'll be below the trampoline */ + pt4 = (u_int64_t *)(uintptr_t)(mptramp_pagetables + KERNBASE); + pt3 = pt4 + (PAGE_SIZE) / sizeof(u_int64_t); + pt2 = pt3 + (PAGE_SIZE) / sizeof(u_int64_t); + + /* Create the initial 1GB replicated page tables */ + for (i = 0; i < 512; i++) { + /* Each slot of the level 4 pages points to the same level 3 page */ + pt4[i] = (u_int64_t)(uintptr_t)(mptramp_pagetables + PAGE_SIZE); + pt4[i] |= PG_V | PG_RW | PG_U; + + /* Each slot of the level 3 pages points to the same level 2 page */ + pt3[i] = (u_int64_t)(uintptr_t)(mptramp_pagetables + (2 * PAGE_SIZE)); + pt3[i] |= PG_V | PG_RW | PG_U; + + /* The level 2 page slots are mapped with 2MB pages for 1GB. */ + pt2[i] = i * (2 * 1024 * 1024); + pt2[i] |= PG_V | PG_RW | PG_PS | PG_U; + } + + /* save the current value of the warm-start vector */ + mpbioswarmvec = *((u_int32_t *) WARMBOOT_OFF); + outb(CMOS_REG, BIOS_RESET); + mpbiosreason = inb(CMOS_DATA); + + /* setup a vector to our boot code */ + *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET; + *((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4); + outb(CMOS_REG, BIOS_RESET); + outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */ + + /* start each AP */ + for (x = 1; x <= mp_naps; ++x) { + + /* This is a bit verbose, it will go away soon. */ + + /* first page of AP's private space */ + pg = x * amd64_btop(sizeof(struct privatespace)); + + /* allocate new private data page(s) */ + gd = (struct mdglobaldata *)kmem_alloc(&kernel_map, + MDGLOBALDATA_BASEALLOC_SIZE); +#if JGXXX + /* wire it into the private page table page */ + for (i = 0; i < MDGLOBALDATA_BASEALLOC_SIZE; i += PAGE_SIZE) { + SMPpt[pg + i / PAGE_SIZE] = (pt_entry_t) + (PG_V | PG_RW | vtophys_pte((char *)gd + i)); + } + pg += MDGLOBALDATA_BASEALLOC_PAGES; + + SMPpt[pg + 0] = 0; /* *gd_CMAP1 */ + SMPpt[pg + 1] = 0; /* *gd_CMAP2 */ + SMPpt[pg + 2] = 0; /* *gd_CMAP3 */ + SMPpt[pg + 3] = 0; /* *gd_PMAP1 */ + + /* allocate and set up an idle stack data page */ + stack = (char *)kmem_alloc(&kernel_map, UPAGES*PAGE_SIZE); + for (i = 0; i < UPAGES; i++) { + SMPpt[pg + 4 + i] = (pt_entry_t) + (PG_V | PG_RW | vtophys_pte(PAGE_SIZE * i + stack)); + } +#endif + + gd = &CPU_prvspace[x].mdglobaldata; /* official location */ + bzero(gd, sizeof(*gd)); + gd->mi.gd_prvspace = ps = &CPU_prvspace[x]; + + /* prime data page for it to use */ + mi_gdinit(&gd->mi, x); + cpu_gdinit(gd, x); + gd->gd_CMAP1 = &SMPpt[pg + 0]; + gd->gd_CMAP2 = &SMPpt[pg + 1]; + gd->gd_CMAP3 = &SMPpt[pg + 2]; + gd->gd_PMAP1 = &SMPpt[pg + 3]; + gd->gd_CADDR1 = ps->CPAGE1; + gd->gd_CADDR2 = ps->CPAGE2; + gd->gd_CADDR3 = ps->CPAGE3; + gd->gd_PADDR1 = (unsigned *)ps->PPAGE1; + gd->mi.gd_ipiq = (void *)kmem_alloc(&kernel_map, sizeof(lwkt_ipiq) * (mp_naps + 1)); + bzero(gd->mi.gd_ipiq, sizeof(lwkt_ipiq) * (mp_naps + 1)); + + /* setup a vector to our boot code */ + *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET; + *((volatile u_short *) WARMBOOT_SEG) = (boot_addr >> 4); + outb(CMOS_REG, BIOS_RESET); + outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */ + + /* + * Setup the AP boot stack + */ + bootSTK = &ps->idlestack[UPAGES*PAGE_SIZE/2]; + bootAP = x; + + /* attempt to start the Application Processor */ + CHECK_INIT(99); /* setup checkpoints */ + if (!start_ap(gd, boot_addr)) { + kprintf("AP #%d (PHY# %d) failed!\n", x, CPU_TO_ID(x)); + CHECK_PRINT("trace"); /* show checkpoints */ + /* better panic as the AP may be running loose */ + kprintf("panic y/n? [y] "); + if (cngetc() != 'n') + panic("bye-bye"); + } + CHECK_PRINT("trace"); /* show checkpoints */ + + /* record its version info */ + cpu_apic_versions[x] = cpu_apic_versions[0]; + } + + /* set ncpus to 1 + highest logical cpu. Not all may have come up */ + ncpus = x; + + /* ncpus2 -- ncpus rounded down to the nearest power of 2 */ + for (shift = 0; (1 << shift) <= ncpus; ++shift) + ; + --shift; + ncpus2_shift = shift; + ncpus2 = 1 << shift; + ncpus2_mask = ncpus2 - 1; + + /* ncpus_fit -- ncpus rounded up to the nearest power of 2 */ + if ((1 << shift) < ncpus) + ++shift; + ncpus_fit = 1 << shift; + ncpus_fit_mask = ncpus_fit - 1; + + /* build our map of 'other' CPUs */ + mycpu->gd_other_cpus = smp_startup_mask & ~(1 << mycpu->gd_cpuid); + mycpu->gd_ipiq = (void *)kmem_alloc(&kernel_map, sizeof(lwkt_ipiq) * ncpus); + bzero(mycpu->gd_ipiq, sizeof(lwkt_ipiq) * ncpus); + + /* fill in our (BSP) APIC version */ + cpu_apic_versions[0] = lapic->version; + + /* restore the warmstart vector */ + *(u_long *) WARMBOOT_OFF = mpbioswarmvec; + outb(CMOS_REG, BIOS_RESET); + outb(CMOS_DATA, mpbiosreason); + + /* + * NOTE! The idlestack for the BSP was setup by locore. Finish + * up, clean out the P==V mapping we did earlier. + */ +#if JGXXX + for (x = 0; x < NKPT; x++) + PTD[x] = 0; +#endif + pmap_set_opt(); + + /* number of APs actually started */ + return ncpus - 1; +} + + +/* + * load the 1st level AP boot code into base memory. + */ + +/* targets for relocation */ +extern void bigJump(void); +extern void bootCodeSeg(void); +extern void bootDataSeg(void); +extern void MPentry(void); +extern u_int MP_GDT; +extern u_int mp_gdtbase; + +static void +install_ap_tramp(u_int boot_addr) +{ + int x; + int size = *(int *) ((u_long) & bootMP_size); + u_char *src = (u_char *) ((u_long) bootMP); + u_char *dst = (u_char *) boot_addr + KERNBASE; + u_int boot_base = (u_int) bootMP; + u_int8_t *dst8; + u_int16_t *dst16; + u_int32_t *dst32; + + POSTCODE(INSTALL_AP_TRAMP_POST); + + for (x = 0; x < size; ++x) + *dst++ = *src++; + + /* + * modify addresses in code we just moved to basemem. unfortunately we + * need fairly detailed info about mpboot.s for this to work. changes + * to mpboot.s might require changes here. + */ + + /* boot code is located in KERNEL space */ + dst = (u_char *) boot_addr + KERNBASE; + + /* modify the lgdt arg */ + dst32 = (u_int32_t *) (dst + ((u_int) & mp_gdtbase - boot_base)); + *dst32 = boot_addr + ((u_int) & MP_GDT - boot_base); + + /* modify the ljmp target for MPentry() */ + dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1); + *dst32 = ((u_int) MPentry - KERNBASE); + + /* modify the target for boot code segment */ + dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base)); + dst8 = (u_int8_t *) (dst16 + 1); + *dst16 = (u_int) boot_addr & 0xffff; + *dst8 = ((u_int) boot_addr >> 16) & 0xff; + + /* modify the target for boot data segment */ + dst16 = (u_int16_t *) (dst + ((u_int) bootDataSeg - boot_base)); + dst8 = (u_int8_t *) (dst16 + 1); + *dst16 = (u_int) boot_addr & 0xffff; + *dst8 = ((u_int) boot_addr >> 16) & 0xff; +} + + +/* + * this function starts the AP (application processor) identified + * by the APIC ID 'physicalCpu'. It does quite a "song and dance" + * to accomplish this. This is necessary because of the nuances + * of the different hardware we might encounter. It ain't pretty, + * but it seems to work. + * + * NOTE: eventually an AP gets to ap_init(), which is called just + * before the AP goes into the LWKT scheduler's idle loop. + */ +static int +start_ap(struct mdglobaldata *gd, u_int boot_addr) +{ + int physical_cpu; + int vector; + u_long icr_lo, icr_hi; + + POSTCODE(START_AP_POST); + + /* get the PHYSICAL APIC ID# */ + physical_cpu = CPU_TO_ID(gd->mi.gd_cpuid); + + /* calculate the vector */ + vector = (boot_addr >> 12) & 0xff; + + /* Make sure the target cpu sees everything */ + wbinvd(); + + /* + * first we do an INIT/RESET IPI this INIT IPI might be run, reseting + * and running the target CPU. OR this INIT IPI might be latched (P5 + * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be + * ignored. + */ + + /* setup the address for the target AP */ + icr_hi = lapic->icr_hi & ~APIC_ID_MASK; + icr_hi |= (physical_cpu << 24); + lapic->icr_hi = icr_hi; + + /* do an INIT IPI: assert RESET */ + icr_lo = lapic->icr_lo & 0xfff00000; + lapic->icr_lo = icr_lo | 0x0000c500; + + /* wait for pending status end */ + while (lapic->icr_lo & APIC_DELSTAT_MASK) + /* spin */ ; + + /* do an INIT IPI: deassert RESET */ + lapic->icr_lo = icr_lo | 0x00008500; + + /* wait for pending status end */ + u_sleep(10000); /* wait ~10mS */ + while (lapic->icr_lo & APIC_DELSTAT_MASK) + /* spin */ ; + + /* + * next we do a STARTUP IPI: the previous INIT IPI might still be + * latched, (P5 bug) this 1st STARTUP would then terminate + * immediately, and the previously started INIT IPI would continue. OR + * the previous INIT IPI has already run. and this STARTUP IPI will + * run. OR the previous INIT IPI was ignored. and this STARTUP IPI + * will run. + */ + + /* do a STARTUP IPI */ + lapic->icr_lo = icr_lo | 0x00000600 | vector; + while (lapic->icr_lo & APIC_DELSTAT_MASK) + /* spin */ ; + u_sleep(200); /* wait ~200uS */ + + /* + * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF + * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR + * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is + * recognized after hardware RESET or INIT IPI. + */ + + lapic->icr_lo = icr_lo | 0x00000600 | vector; + while (lapic->icr_lo & APIC_DELSTAT_MASK) + /* spin */ ; + u_sleep(200); /* wait ~200uS */ + + /* wait for it to start, see ap_init() */ + set_apic_timer(5000000);/* == 5 seconds */ + while (read_apic_timer()) { + if (smp_startup_mask & (1 << gd->mi.gd_cpuid)) + return 1; /* return SUCCESS */ + } + return 0; /* return FAILURE */ +} + + +/* + * Lazy flush the TLB on all other CPU's. DEPRECATED. + * + * If for some reason we were unable to start all cpus we cannot safely + * use broadcast IPIs. + */ +void +smp_invltlb(void) +{ +#ifdef SMP + if (smp_startup_mask == smp_active_mask) { + all_but_self_ipi(XINVLTLB_OFFSET); + } else { + selected_apic_ipi(smp_active_mask, XINVLTLB_OFFSET, + APIC_DELMODE_FIXED); + } +#endif +} + +/* + * When called the executing CPU will send an IPI to all other CPUs + * requesting that they halt execution. + * + * Usually (but not necessarily) called with 'other_cpus' as its arg. + * + * - Signals all CPUs in map to stop. + * - Waits for each to stop. + * + * Returns: + * -1: error + * 0: NA + * 1: ok + * + * XXX FIXME: this is not MP-safe, needs a lock to prevent multiple CPUs + * from executing at same time. + */ +int +stop_cpus(u_int map) +{ + map &= smp_active_mask; + + /* send the Xcpustop IPI to all CPUs in map */ + selected_apic_ipi(map, XCPUSTOP_OFFSET, APIC_DELMODE_FIXED); + + while ((stopped_cpus & map) != map) + /* spin */ ; + + return 1; +} + + +/* + * Called by a CPU to restart stopped CPUs. + * + * Usually (but not necessarily) called with 'stopped_cpus' as its arg. + * + * - Signals all CPUs in map to restart. + * - Waits for each to restart. + * + * Returns: + * -1: error + * 0: NA + * 1: ok + */ +int +restart_cpus(u_int map) +{ + /* signal other cpus to restart */ + started_cpus = map & smp_active_mask; + + while ((stopped_cpus & map) != 0) /* wait for each to clear its bit */ + /* spin */ ; + + return 1; +} + +/* + * This is called once the mpboot code has gotten us properly relocated + * and the MMU turned on, etc. ap_init() is actually the idle thread, + * and when it returns the scheduler will call the real cpu_idle() main + * loop for the idlethread. Interrupts are disabled on entry and should + * remain disabled at return. + */ +void +ap_init(void) +{ + u_int apic_id; + + /* + * Adjust smp_startup_mask to signal the BSP that we have started + * up successfully. Note that we do not yet hold the BGL. The BSP + * is waiting for our signal. + * + * We can't set our bit in smp_active_mask yet because we are holding + * interrupts physically disabled and remote cpus could deadlock + * trying to send us an IPI. + */ + smp_startup_mask |= 1 << mycpu->gd_cpuid; + cpu_mfence(); + + /* + * Interlock for finalization. Wait until mp_finish is non-zero, + * then get the MP lock. + * + * Note: We are in a critical section. + * + * Note: We have to synchronize td_mpcount to our desired MP state + * before calling cpu_try_mplock(). + * + * Note: we are the idle thread, we can only spin. + * + * Note: The load fence is memory volatile and prevents the compiler + * from improperly caching mp_finish, and the cpu from improperly + * caching it. + */ + while (mp_finish == 0) + cpu_lfence(); + ++curthread->td_mpcount; + while (cpu_try_mplock() == 0) + ; + + if (cpu_feature & CPUID_TSC) { + /* + * The BSP is constantly updating tsc0_offset, figure out the + * relative difference to synchronize ktrdump. + */ + tsc_offsets[mycpu->gd_cpuid] = rdtsc() - tsc0_offset; + } + + /* BSP may have changed PTD while we're waiting for the lock */ + cpu_invltlb(); + +#if defined(I586_CPU) && !defined(NO_F00F_HACK) + lidt(&r_idt); +#endif + + /* Build our map of 'other' CPUs. */ + mycpu->gd_other_cpus = smp_startup_mask & ~(1 << mycpu->gd_cpuid); + + kprintf("SMP: AP CPU #%d Launched!\n", mycpu->gd_cpuid); + + /* A quick check from sanity claus */ + apic_id = (apic_id_to_logical[(lapic->id & 0x0f000000) >> 24]); + if (mycpu->gd_cpuid != apic_id) { + kprintf("SMP: cpuid = %d\n", mycpu->gd_cpuid); + kprintf("SMP: apic_id = %d\n", apic_id); +#if JGXXX + kprintf("PTD[MPPTDI] = %p\n", (void *)PTD[MPPTDI]); +#endif + panic("cpuid mismatch! boom!!"); + } + + /* Initialize AP's local APIC for irq's */ + apic_initialize(FALSE); + + /* Set memory range attributes for this CPU to match the BSP */ + mem_range_AP_init(); + + /* + * Once we go active we must process any IPIQ messages that may + * have been queued, because no actual IPI will occur until we + * set our bit in the smp_active_mask. If we don't the IPI + * message interlock could be left set which would also prevent + * further IPIs. + * + * The idle loop doesn't expect the BGL to be held and while + * lwkt_switch() normally cleans things up this is a special case + * because we returning almost directly into the idle loop. + * + * The idle thread is never placed on the runq, make sure + * nothing we've done put it there. + */ + KKASSERT(curthread->td_mpcount == 1); + smp_active_mask |= 1 << mycpu->gd_cpuid; + + /* + * Enable interrupts here. idle_restore will also do it, but + * doing it here lets us clean up any strays that got posted to + * the CPU during the AP boot while we are still in a critical + * section. + */ + __asm __volatile("sti; pause; pause"::); + mdcpu->gd_fpending = 0; + mdcpu->gd_ipending = 0; + + initclocks_pcpu(); /* clock interrupts (via IPIs) */ + lwkt_process_ipiq(); + + /* + * Releasing the mp lock lets the BSP finish up the SMP init + */ + rel_mplock(); + KKASSERT((curthread->td_flags & TDF_RUNQ) == 0); +} + +/* + * Get SMP fully working before we start initializing devices. + */ +static +void +ap_finish(void) +{ + mp_finish = 1; + if (bootverbose) + kprintf("Finish MP startup\n"); + if (cpu_feature & CPUID_TSC) + tsc0_offset = rdtsc(); + tsc_offsets[0] = 0; + rel_mplock(); + while (smp_active_mask != smp_startup_mask) { + cpu_lfence(); + if (cpu_feature & CPUID_TSC) + tsc0_offset = rdtsc(); + } + while (try_mplock() == 0) + ; + if (bootverbose) + kprintf("Active CPU Mask: %08x\n", smp_active_mask); +} + +SYSINIT(finishsmp, SI_BOOT2_FINISH_SMP, SI_ORDER_FIRST, ap_finish, NULL) + +void +cpu_send_ipiq(int dcpu) +{ + if ((1 << dcpu) & smp_active_mask) + single_apic_ipi(dcpu, XIPIQ_OFFSET, APIC_DELMODE_FIXED); +} + +#if 0 /* single_apic_ipi_passive() not working yet */ +/* + * Returns 0 on failure, 1 on success + */ +int +cpu_send_ipiq_passive(int dcpu) +{ + int r = 0; + if ((1 << dcpu) & smp_active_mask) { + r = single_apic_ipi_passive(dcpu, XIPIQ_OFFSET, + APIC_DELMODE_FIXED); + } + return(r); +} +#endif + diff --git a/sys/platform/pc64/amd64/mpboot.S b/sys/platform/pc64/amd64/mpboot.S new file mode 100644 index 0000000000..baea011dff --- /dev/null +++ b/sys/platform/pc64/amd64/mpboot.S @@ -0,0 +1,236 @@ +/*- + * Copyright (c) 2003 Peter Wemm + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: head/sys/amd64/amd64/mpboot.S 130224 2004-06-08 01:02:52Z peter $ + */ + +#include /* miscellaneous asm macros */ +#include + +#include "assym.s" + + .data /* So we can modify it */ + + .p2align 4,0 + .globl mptramp_start +mptramp_start: + .code16 + /* + * The AP enters here in response to the startup IPI. + * We are in real mode. %cs is the only segment register set. + */ + cli /* make sure no interrupts */ + mov %cs, %ax /* copy %cs to %ds. Remember these */ + mov %ax, %ds /* are offsets rather than selectors */ + mov %ax, %ss + + /* + * Find relocation base and patch the gdt descript and ljmp targets + */ + xorl %ebx,%ebx + mov %cs, %bx + sall $4, %ebx /* %ebx is now our relocation base */ + orl %ebx, lgdt_desc-mptramp_start+2 + orl %ebx, jmp_32-mptramp_start+2 + orl %ebx, jmp_64-mptramp_start+1 + + /* + * Load the descriptor table pointer. We'll need it when running + * in 16 bit protected mode. + */ + lgdt lgdt_desc-mptramp_start + + /* Enable protected mode */ + movl $CR0_PE, %eax + mov %eax, %cr0 + + /* + * Now execute a far jump to turn on protected mode. This + * causes the segment registers to turn into selectors and causes + * %cs to be loaded from the gdt. + * + * The following instruction is: + * ljmpl $bootcode-gdt, $protmode-mptramp_start + * but gas cannot assemble that. And besides, we patch the targets + * in early startup and its a little clearer what we are patching. + */ +jmp_32: + .byte 0x66 /* size override to 32 bits */ + .byte 0xea /* opcode for far jump */ + .long protmode-mptramp_start /* offset in segment */ + .word bootcode-gdt /* index in gdt for 32 bit code */ + + /* + * At this point, we are running in 32 bit legacy protected mode. + */ + .code32 +protmode: + mov $bootdata-gdt, %eax + mov %ax, %ds + + /* Turn on the PAE, PSE and PGE bits for when paging is enabled */ + mov %cr4, %eax + orl $(CR4_PAE | CR4_PSE), %eax + mov %eax, %cr4 + + /* + * Enable EFER.LME so that we get long mode when all the prereqs are + * in place. In this case, it turns on when CR0_PG is finally enabled. + * Pick up a few other EFER bits that we'll use need we're here. + */ + movl $MSR_EFER, %ecx + rdmsr + orl $EFER_LME | EFER_SCE, %eax + wrmsr + + /* + * Point to the embedded page tables for startup. Note that this + * only gets accessed after we're actually in 64 bit mode, however + * we can only set the bottom 32 bits of %cr3 in this state. This + * means we are required to use a temporary page table that is below + * the 4GB limit. %ebx is still our relocation base. We could just + * subtract 3 * PAGE_SIZE, but that would be too easy. + */ + leal mptramp_pagetables-mptramp_start(%ebx),%eax + movl (%eax), %eax + mov %eax, %cr3 + + /* + * Finally, switch to long bit mode by enabling paging. We have + * to be very careful here because all the segmentation disappears + * out from underneath us. The spec says we can depend on the + * subsequent pipelined branch to execute, but *only if* everthing + * is still identity mapped. If any mappings change, the pipeline + * will flush. + */ + mov %cr0, %eax + orl $CR0_PG, %eax + mov %eax, %cr0 + + /* + * At this point paging is enabled, and we are in "compatability" mode. + * We do another far jump to reload %cs with the 64 bit selector. + * %cr3 points to a 4-level page table page. + * We cannot yet jump all the way to the kernel because we can only + * specify a 32 bit linear address. So, yet another trampoline. + * + * The following instruction is: + * ljmp $kernelcode-gdt, $tramp_64-mptramp_start + * but gas cannot assemble that. And besides, we patch the targets + * in early startup and its a little clearer what we are patching. + */ +jmp_64: + .byte 0xea /* opcode for far jump */ + .long tramp_64-mptramp_start /* offset in segment */ + .word kernelcode-gdt /* index in gdt for 64 bit code */ + + /* + * Yeehar! We're running in 64 bit mode! We can mostly ignore our + * segment registers, and get on with it. + * Note that we are running at the correct virtual address, but with + * a 1:1 1GB mirrored mapping over entire address space. We had better + * switch to a real %cr3 promptly so that we can get to the direct map + * space. Remember that jmp is relative and that we've been relocated, + * so use an indirect jump. + */ + .code64 +tramp_64: + movabsq $entry_64,%rax /* 64 bit immediate load */ + jmp *%rax + + .p2align 4,0 +gdt: + /* + * All segment descriptor tables start with a null descriptor + */ + .long 0x00000000 + .long 0x00000000 + + /* + * This is the 64 bit long mode code descriptor. There is no + * 64 bit data descriptor. + */ +kernelcode: + .long 0x00000000 + .long 0x00209800 + + /* + * This is the descriptor for the 32 bit boot code. + * %cs: +A, +R, -C, DPL=0, +P, +D, +G + * Accessed, Readable, Present, 32 bit, 4G granularity + */ +bootcode: + .long 0x0000ffff + .long 0x00cf9b00 + + /* + * This is the descriptor for the 32 bit boot data. + * We load it into %ds and %ss. The bits for each selector + * are interpreted slightly differently. + * %ds: +A, +W, -E, DPL=0, +P, +D, +G + * %ss: +A, +W, -E, DPL=0, +P, +B, +G + * Accessed, Writeable, Expand up, Present, 32 bit, 4GB + * For %ds, +D means 'default operand size is 32 bit'. + * For %ss, +B means the stack register is %esp rather than %sp. + */ +bootdata: + .long 0x0000ffff + .long 0x00cf9300 + +gdtend: + + /* + * The address of our page table pages that the boot code + * uses to trampoline up to kernel address space. + */ + .globl mptramp_pagetables +mptramp_pagetables: + .long 0 + + /* + * The pseudo descriptor for lgdt to use. + */ +lgdt_desc: + .word gdtend-gdt /* Length */ + .long gdt-mptramp_start /* Offset plus %ds << 4 */ + + .globl mptramp_end +mptramp_end: + + /* + * From here on down is executed in the kernel .text section. + * + * Load a real %cr3 that has all the direct map stuff and switches + * off the 1GB replicated mirror. Load a stack pointer and jump + * into AP startup code in C. + */ + .text + .code64 + .p2align 4,0 +entry_64: + movq KPML4phys, %rax + movq %rax, %cr3 + movq bootSTK, %rsp + jmp init_secondary diff --git a/sys/platform/pc64/amd64/mplock.s b/sys/platform/pc64/amd64/mplock.s index 5453bf5951..b0fc0a80a7 100644 --- a/sys/platform/pc64/amd64/mplock.s +++ b/sys/platform/pc64/amd64/mplock.s @@ -1,6 +1,6 @@ /* * $FreeBSD: src/sys/i386/i386/mplock.s,v 1.29.2.2 2000/05/16 06:58:06 dillon Exp $ - * $DragonFly: src/sys/platform/pc64/amd64/mplock.s,v 1.2 2007/09/24 03:24:45 yanyh Exp $ + * $DragonFly: src/sys/platform/pc32/i386/mplock.s,v 1.21 2006/11/07 06:43:24 dillon Exp $ * * Copyright (c) 2003,2004 The DragonFly Project. All rights reserved. * @@ -63,9 +63,7 @@ */ #include -#if 0 #include -#endif #include "assym.s" @@ -91,8 +89,8 @@ mp_lock: * Z=1 (jz) on success. A lock prefix is required for MP. */ NON_GPROF_ENTRY(cpu_get_initial_mplock) - movl PCPU(curthread),%ecx - movl $1,TD_MPCOUNT(%ecx) /* curthread has mpcount of 1 */ + movq PCPU(curthread),%rcx + movl $1,TD_MPCOUNT(%rcx) /* curthread has mpcount of 1 */ movl $0,mp_lock /* owned by cpu 0 */ NON_GPROF_RET @@ -114,7 +112,7 @@ NON_GPROF_ENTRY(cpu_try_mplock) lock cmpxchgl %ecx,mp_lock /* ecx<->mem if eax matches */ jnz 1f #ifdef PARANOID_INVLTLB - movl %cr3,%eax; movl %eax,%cr3 /* YYY check and remove */ + movq %cr3,%rax; movq %rax,%cr3 /* YYY check and remove */ #endif movl $1,%eax NON_GPROF_RET @@ -137,8 +135,8 @@ NON_GPROF_ENTRY(cpu_try_mplock) */ NON_GPROF_ENTRY(get_mplock) movl PCPU(cpuid),%ecx - movl PCPU(curthread),%edx - incl TD_MPCOUNT(%edx) /* predispose */ + movq PCPU(curthread),%rdx + incl TD_MPCOUNT(%rdx) /* predispose */ cmpl %ecx,mp_lock jne 1f NON_GPROF_RET /* success! */ @@ -151,6 +149,9 @@ NON_GPROF_ENTRY(get_mplock) movl $-1,%eax lock cmpxchgl %ecx,mp_lock jnz 2f +#ifdef PARANOID_INVLTLB + movq %cr3,%rax; movq %rax,%cr3 /* YYY check and remove */ +#endif NON_GPROF_RET /* success */ /* @@ -162,10 +163,10 @@ NON_GPROF_ENTRY(get_mplock) * backtrace properly. */ 2: - pushl %ebp - movl %esp,%ebp + pushq %rbp + movq %rsp,%rbp call lwkt_mp_lock_contested - popl %ebp + popq %rbp #ifdef INVARIANTS movl PCPU(cpuid),%eax /* failure */ cmpl %eax,mp_lock @@ -190,8 +191,8 @@ NON_GPROF_ENTRY(get_mplock) */ NON_GPROF_ENTRY(try_mplock) movl PCPU(cpuid),%ecx - movl PCPU(curthread),%edx - incl TD_MPCOUNT(%edx) /* pre-dispose for race */ + movq PCPU(curthread),%rdx + incl TD_MPCOUNT(%rdx) /* pre-dispose for race */ cmpl %ecx,mp_lock je 1f /* trivial success */ movl $-1,%eax @@ -201,7 +202,7 @@ NON_GPROF_ENTRY(try_mplock) * Success */ #ifdef PARANOID_INVLTLB - movl %cr3,%eax; movl %eax,%cr3 /* YYY check and remove */ + movq %cr3,%rax; movq %rax,%cr3 /* YYY check and remove */ #endif 1: movl $1,%eax /* success (cmpxchgl good!) */ @@ -216,8 +217,8 @@ NON_GPROF_ENTRY(try_mplock) * make sure we don't own the lock in case we did win it in a race. */ 2: - decl TD_MPCOUNT(%edx) - cmpl $0,TD_MPCOUNT(%edx) + decl TD_MPCOUNT(%rdx) + cmpl $0,TD_MPCOUNT(%rdx) jne 3f movl PCPU(cpuid),%eax movl $-1,%ecx @@ -234,32 +235,39 @@ NON_GPROF_ENTRY(try_mplock) * above. */ NON_GPROF_ENTRY(rel_mplock) - movl PCPU(curthread),%edx - movl TD_MPCOUNT(%edx),%eax + movq PCPU(curthread),%rdx + movl TD_MPCOUNT(%rdx),%eax #ifdef INVARIANTS cmpl $0,%eax je badmp_rel #endif subl $1,%eax - movl %eax,TD_MPCOUNT(%edx) + movl %eax,TD_MPCOUNT(%rdx) cmpl $0,%eax jne 3f movl PCPU(cpuid),%eax movl $-1,%ecx lock cmpxchgl %ecx,mp_lock + movl mp_lock_contention_mask,%eax + cmpl $0,%eax + je 3f + call lwkt_mp_lock_uncontested 3: NON_GPROF_RET #ifdef INVARIANTS badmp_get: - pushl $bmpsw1 + movq $bmpsw1,%rdi + movl $0,%eax call panic badmp_get2: - pushl $bmpsw1a + movq $bmpsw1a,%rdi + movl $0,%eax call panic badmp_rel: - pushl $bmpsw2 + movq $bmpsw2,%rdi + movl $0,%eax call panic .data diff --git a/sys/platform/pc64/amd64/support.s b/sys/platform/pc64/amd64/support.s index 8c61fb0463..847e49ba40 100644 --- a/sys/platform/pc64/amd64/support.s +++ b/sys/platform/pc64/amd64/support.s @@ -35,21 +35,12 @@ #include "opt_ddb.h" #include -#include +#include #include #include "assym.s" ALIGN_DATA - .globl intrcnt, eintrcnt -intrcnt: - .space INTRCNT_COUNT * 8 -eintrcnt: - - .globl intrnames, eintrnames -intrnames: - .space INTRCNT_COUNT * (MAXCOMLEN + 1) -eintrnames: .text diff --git a/sys/platform/pc64/amd64/trap.c b/sys/platform/pc64/amd64/trap.c index d7ed5b06e0..e8ab454d5c 100644 --- a/sys/platform/pc64/amd64/trap.c +++ b/sys/platform/pc64/amd64/trap.c @@ -75,11 +75,13 @@ #include #include #include +#include #include #include #include #include +#include #ifdef SMP @@ -102,9 +104,6 @@ static int trap_pfault(struct trapframe *, int); static void trap_fatal(struct trapframe *, vm_offset_t); void dblfault_handler(struct trapframe *frame); -#define PCPU_GET(member) ((mycpu)->gd_##member) -#define PCPU_INC(member) ((mycpu)->gd_##member)++ - #define MAX_TRAP_MSG 30 static char *trap_msg[] = { "", /* 0 unused */ @@ -925,9 +924,10 @@ trap_fatal(struct trapframe *frame, vm_offset_t eva) kprintf("\n\nFatal trap %d: %s while in %s mode\n", type, msg, ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel"); #ifdef SMP - /* two separate prints in case of a trap on an unmapped page */ - kprintf("cpuid = %d; ", PCPU_GET(cpuid)); - kprintf("apic id = %02x\n", PCPU_GET(apic_id)); + /* three separate prints in case of a trap on an unmapped page */ + kprintf("mp_lock = %08x; ", mp_lock); + kprintf("cpuid = %d; ", mycpu->gd_cpuid); + kprintf("lapic->id = %08x\n", lapic->id); #endif if (type == T_PAGEFLT) { kprintf("fault virtual address = 0x%lx\n", eva); @@ -1001,9 +1001,10 @@ dblfault_handler(struct trapframe *frame) kprintf("rsp = 0x%lx\n", frame->tf_rsp); kprintf("rbp = 0x%lx\n", frame->tf_rbp); #ifdef SMP - /* two separate prints in case of a trap on an unmapped page */ - kprintf("cpuid = %d; ", PCPU_GET(cpuid)); - kprintf("apic id = %02x\n", PCPU_GET(apic_id)); + /* three separate prints in case of a trap on an unmapped page */ + kprintf("mp_lock = %08x; ", mp_lock); + kprintf("cpuid = %d; ", mycpu->gd_cpuid); + kprintf("lapic->id = %08x\n", lapic->id); #endif panic("double fault"); } @@ -1046,7 +1047,7 @@ syscall2(struct trapframe *frame) union sysunion args; register_t *argsdst; - PCPU_INC(cnt.v_syscall); + mycpu->gd_cnt.v_syscall++; #ifdef DIAGNOSTIC if (ISPL(frame->tf_cs) != SEL_UPL) { @@ -1060,7 +1061,7 @@ syscall2(struct trapframe *frame) frame->tf_eax); #ifdef SMP - KASSERT(td->td_mpcount == 0, ("badmpcount syscall2 from %p", (void *)frame->tf_eip)); + KASSERT(td->td_mpcount == 0, ("badmpcount syscall2 from %p", (void *)frame->tf_rip)); if (syscall_mpsafe == 0) MAKEMPSAFE(have_mplock); #endif @@ -1265,7 +1266,7 @@ bad: * Release the MP lock if we had to get it */ KASSERT(td->td_mpcount == have_mplock, - ("badmpcount syscall2/end from %p", (void *)frame->tf_eip)); + ("badmpcount syscall2/end from %p", (void *)frame->tf_rip)); if (have_mplock) rel_mplock(); #endif diff --git a/sys/platform/pc64/apic/apic_abi.c b/sys/platform/pc64/apic/apic_abi.c index 5477663771..692d025f2a 100644 --- a/sys/platform/pc64/apic/apic_abi.c +++ b/sys/platform/pc64/apic/apic_abi.c @@ -230,16 +230,16 @@ apic_finalize(void) * mask the interrupt, completing the disconnection of the * 8259. */ - temp = lapic.lvt_lint0; + temp = lapic->lvt_lint0; temp |= APIC_LVT_MASKED; - lapic.lvt_lint0 = temp; + lapic->lvt_lint0 = temp; /* * setup lint1 to handle an NMI */ - temp = lapic.lvt_lint1; + temp = lapic->lvt_lint1; temp &= ~APIC_LVT_MASKED; - lapic.lvt_lint1 = temp; + lapic->lvt_lint1 = temp; if (bootverbose) apic_dump("bsp_apic_configure()"); @@ -270,7 +270,7 @@ apic_vectorctl(int op, int intr, int flags) if (intr < 0 || intr >= APIC_HWI_VECTORS) return (EINVAL); - ef = read_eflags(); + ef = read_rflags(); cpu_disable_intr(); error = 0; @@ -283,10 +283,10 @@ apic_vectorctl(int op, int intr, int flags) if (flags & INTR_FAST) { vector = TPR_SLOW_INTS + intr; setidt(vector, apic_wrongintr[intr], - SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); + SDT_SYSIGT, SEL_KPL, 0); vector = TPR_FAST_INTS + intr; setidt(vector, apic_fastintr[intr], - SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); + SDT_SYSIGT, SEL_KPL, 0); } else { vector = TPR_SLOW_INTS + intr; @@ -297,7 +297,7 @@ apic_vectorctl(int op, int intr, int flags) vector = TPR_FAST_INTS + intr; } setidt(vector, apic_slowintr[intr], - SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); + SDT_SYSIGT, SEL_KPL, 0); } /* @@ -326,8 +326,8 @@ apic_vectorctl(int op, int intr, int flags) */ machintr_intrdis(intr); vector = TPR_SLOW_INTS + intr; - setidt(vector, apic_slowintr[intr], SDT_SYS386IGT, SEL_KPL, - GSEL(GCODE_SEL, SEL_KPL)); + setidt(vector, apic_slowintr[intr], SDT_SYSIGT, SEL_KPL, + 0); /* * And then reprogram the IO APIC to point to the SLOW vector (it may @@ -359,15 +359,14 @@ apic_vectorctl(int op, int intr, int flags) * to IDT_OFFSET + intr. */ vector = IDT_OFFSET + intr; - setidt(vector, apic_slowintr[intr], SDT_SYS386IGT, SEL_KPL, - GSEL(GCODE_SEL, SEL_KPL)); + setidt(vector, apic_slowintr[intr], SDT_SYSIGT, SEL_KPL, 0); break; default: error = EOPNOTSUPP; break; } - write_eflags(ef); + write_rflags(ef); return (error); } diff --git a/sys/platform/pc64/apic/apic_ipl.s b/sys/platform/pc64/apic/apic_ipl.s index ce155366af..52afe63f70 100644 --- a/sys/platform/pc64/apic/apic_ipl.s +++ b/sys/platform/pc64/apic/apic_ipl.s @@ -57,7 +57,9 @@ * $DragonFly: src/sys/platform/pc64/apic/apic_ipl.s,v 1.1 2008/08/29 17:07:12 dillon Exp $ */ +#if 0 #include "use_npx.h" +#endif #include #include @@ -94,31 +96,33 @@ apic_imen: */ ENTRY(APIC_INTRDIS) APIC_IMASK_LOCK /* enter critical reg */ - movl 4(%esp),%eax + movl %edi, %eax 1: btsl %eax, apic_imen - shll $4, %eax - movl CNAME(int_to_apicintpin) + 8(%eax), %edx - movl CNAME(int_to_apicintpin) + 12(%eax), %ecx - testl %edx, %edx + imull $AIMI_SIZE, %eax + addq $CNAME(int_to_apicintpin), %rax + movq AIMI_APIC_ADDRESS(%rax), %rdx + movl AIMI_REDIRINDEX(%rax), %ecx + testq %rdx, %rdx jz 2f - movl %ecx, (%edx) /* target register index */ - orl $IOART_INTMASK,16(%edx) /* set intmask in target apic reg */ + movl %ecx, (%rdx) /* target register index */ + orl $IOART_INTMASK,16(%rdx) /* set intmask in target apic reg */ 2: APIC_IMASK_UNLOCK /* exit critical reg */ ret ENTRY(APIC_INTREN) APIC_IMASK_LOCK /* enter critical reg */ - movl 4(%esp), %eax /* mask into %eax */ + movl %edi, %eax 1: btrl %eax, apic_imen /* update apic_imen */ - shll $4, %eax - movl CNAME(int_to_apicintpin) + 8(%eax), %edx - movl CNAME(int_to_apicintpin) + 12(%eax), %ecx - testl %edx, %edx + imull $AIMI_SIZE, %eax + addq $CNAME(int_to_apicintpin), %rax + movq AIMI_APIC_ADDRESS(%rax), %rdx + movl AIMI_REDIRINDEX(%rax), %ecx + testq %rdx, %rdx jz 2f - movl %ecx, (%edx) /* write the target register index */ + movl %ecx, (%rdx) /* write the target register index */ andl $~IOART_INTMASK, 16(%edx) /* clear mask bit */ 2: APIC_IMASK_UNLOCK /* exit critical reg */ @@ -129,35 +133,24 @@ ENTRY(APIC_INTREN) */ /* - * u_int io_apic_write(int apic, int select); + * u_int io_apic_read(int apic, int select); */ ENTRY(io_apic_read) - movl 4(%esp), %ecx /* APIC # */ - movl ioapic, %eax - movl (%eax,%ecx,4), %edx /* APIC base register address */ - movl 8(%esp), %eax /* target register index */ - movl %eax, (%edx) /* write the target register index */ - movl 16(%edx), %eax /* read the APIC register data */ + movl %edi, %ecx /* APIC # */ + movq ioapic, %rax + movq (%rax,%rcx,8), %rdx /* APIC base register address */ + movl %esi, (%rdx) /* write the target register index */ + movl 16(%rdx), %eax /* read the APIC register data */ ret /* %eax = register value */ /* - * void io_apic_write(int apic, int select, int value); + * void io_apic_write(int apic, int select, u_int value); */ ENTRY(io_apic_write) - movl 4(%esp), %ecx /* APIC # */ - movl ioapic, %eax - movl (%eax,%ecx,4), %edx /* APIC base register address */ - movl 8(%esp), %eax /* target register index */ - movl %eax, (%edx) /* write the target register index */ - movl 12(%esp), %eax /* target register value */ - movl %eax, 16(%edx) /* write the APIC register data */ + movl %edi, %ecx /* APIC # */ + movq ioapic, %rax + movq (%rax,%rcx,8), %r8 /* APIC base register address */ + movl %esi, (%r8) /* write the target register index */ + movl %edx, 16(%r8) /* write the APIC register data */ ret /* %eax = void */ - -/* - * Send an EOI to the local APIC. - */ -ENTRY(apic_eoi) - movl $0, lapic+0xb0 - ret - #endif diff --git a/sys/platform/pc64/apic/apic_vector.s b/sys/platform/pc64/apic/apic_vector.s index 226d024009..1435a66a40 100644 --- a/sys/platform/pc64/apic/apic_vector.s +++ b/sys/platform/pc64/apic/apic_vector.s @@ -1,40 +1,13 @@ /* - * Copyright (c) 2008 The DragonFly Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * 3. Neither the name of The DragonFly Project nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific, prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * from: vector.s, 386BSD 0.1 unknown origin + * from: vector.s, 386BSD 0.1 unknown origin * $FreeBSD: src/sys/i386/isa/apic_vector.s,v 1.47.2.5 2001/09/01 22:33:38 tegge Exp $ - * $DragonFly: src/sys/platform/pc64/apic/apic_vector.s,v 1.1 2008/08/29 17:07:12 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/apic/apic_vector.s,v 1.39 2008/08/02 01:14:43 dillon Exp $ */ +#if 0 #include "use_npx.h" #include "opt_auto_eoi.h" +#endif #include #include @@ -63,66 +36,37 @@ #define MPLOCKED #endif -/* - * Push an interrupt frame in a format acceptable to doreti, reload - * the segment registers for the kernel. - */ -#define PUSH_FRAME \ - pushl $0 ; /* dummy error code */ \ - pushl $0 ; /* dummy trap type */ \ - pushl $0 ; /* dummy xflags type */ \ - pushal ; \ - pushl %ds ; /* save data and extra segments ... */ \ - pushl %es ; \ - pushl %fs ; \ - pushl %gs ; \ +#define APIC_PUSH_FRAME \ + PUSH_FRAME ; /* 15 regs + space for 5 extras */ \ + movq $0,TF_XFLAGS(%rsp) ; \ + movq $0,TF_TRAPNO(%rsp) ; \ + movq $0,TF_ADDR(%rsp) ; \ + movq $0,TF_FLAGS(%rsp) ; \ + movq $0,TF_ERR(%rsp) ; \ cld ; \ - mov $KDSEL,%ax ; \ - mov %ax,%ds ; \ - mov %ax,%es ; \ - mov %ax,%gs ; \ - mov $KPSEL,%ax ; \ - mov %ax,%fs ; \ - -#define PUSH_DUMMY \ - pushfl ; /* phys int frame / flags */ \ - pushl %cs ; /* phys int frame / cs */ \ - pushl 12(%esp) ; /* original caller eip */ \ - pushl $0 ; /* dummy error code */ \ - pushl $0 ; /* dummy trap type */ \ - pushl $0 ; /* dummy xflags type */ \ - subl $13*4,%esp ; /* pushal + 4 seg regs (dummy) + CPL */ \ /* - * Warning: POP_FRAME can only be used if there is no chance of a + * JG stale? Warning: POP_FRAME can only be used if there is no chance of a * segment register being changed (e.g. by procfs), which is why syscalls * have to use doreti. */ -#define POP_FRAME \ - popl %gs ; \ - popl %fs ; \ - popl %es ; \ - popl %ds ; \ - popal ; \ - addl $3*4,%esp ; /* dummy xflags, trap & error codes */ \ +#define APIC_POP_FRAME POP_FRAME -#define POP_DUMMY \ - addl $19*4,%esp ; \ - -#define IOAPICADDR(irq_num) CNAME(int_to_apicintpin) + 16 * (irq_num) + 8 -#define REDIRIDX(irq_num) CNAME(int_to_apicintpin) + 16 * (irq_num) + 12 +/* sizeof(struct apic_intmapinfo) == 24 */ +#define IOAPICADDR(irq_num) CNAME(int_to_apicintpin) + 24 * (irq_num) + 8 +#define REDIRIDX(irq_num) CNAME(int_to_apicintpin) + 24 * (irq_num) + 16 #define MASK_IRQ(irq_num) \ APIC_IMASK_LOCK ; /* into critical reg */ \ testl $IRQ_LBIT(irq_num), apic_imen ; \ jne 7f ; /* masked, don't mask */ \ orl $IRQ_LBIT(irq_num), apic_imen ; /* set the mask bit */ \ - movl IOAPICADDR(irq_num), %ecx ; /* ioapic addr */ \ + movq IOAPICADDR(irq_num), %rcx ; /* ioapic addr */ \ movl REDIRIDX(irq_num), %eax ; /* get the index */ \ - movl %eax, (%ecx) ; /* write the index */ \ - movl IOAPIC_WINDOW(%ecx), %eax ; /* current value */ \ + movl %eax, (%rcx) ; /* write the index */ \ + movl IOAPIC_WINDOW(%rcx), %eax ; /* current value */ \ orl $IOART_INTMASK, %eax ; /* set the mask */ \ - movl %eax, IOAPIC_WINDOW(%ecx) ; /* new value */ \ + movl %eax, IOAPIC_WINDOW(%rcx) ; /* new value */ \ 7: ; /* already masked */ \ APIC_IMASK_UNLOCK ; \ @@ -147,12 +91,12 @@ testl $IRQ_LBIT(irq_num), apic_imen ; \ je 7f ; /* bit clear, not masked */ \ andl $~IRQ_LBIT(irq_num), apic_imen ;/* clear mask bit */ \ - movl IOAPICADDR(irq_num),%ecx ; /* ioapic addr */ \ + movq IOAPICADDR(irq_num),%rcx ; /* ioapic addr */ \ movl REDIRIDX(irq_num), %eax ; /* get the index */ \ - movl %eax,(%ecx) ; /* write the index */ \ - movl IOAPIC_WINDOW(%ecx),%eax ; /* current value */ \ + movl %eax,(%rcx) ; /* write the index */ \ + movl IOAPIC_WINDOW(%rcx),%eax ; /* current value */ \ andl $~IOART_INTMASK,%eax ; /* clear the mask */ \ - movl %eax,IOAPIC_WINDOW(%ecx) ; /* new value */ \ + movl %eax,IOAPIC_WINDOW(%rcx) ; /* new value */ \ 7: ; \ APIC_IMASK_UNLOCK ; \ 8: ; \ @@ -177,16 +121,15 @@ .text ; \ SUPERALIGN_TEXT ; \ IDTVEC(vec_name) ; \ - PUSH_FRAME ; \ + APIC_PUSH_FRAME ; \ FAKE_MCOUNT(15*4(%esp)) ; \ MASK_LEVEL_IRQ(irq_num) ; \ - movl $0, lapic_eoi ; \ - movl PCPU(curthread),%ebx ; \ - movl $0,%eax ; /* CURRENT CPL IN FRAME (REMOVED) */ \ - pushl %eax ; \ - testl $-1,TD_NEST_COUNT(%ebx) ; \ + movq lapic, %rax ; \ + movl $0, LA_EOI(%rax) ; \ + movq PCPU(curthread),%rbx ; \ + testl $-1,TD_NEST_COUNT(%rbx) ; \ jne 1f ; \ - cmpl $TDPRI_CRIT,TD_PRI(%ebx) ; \ + cmpl $TDPRI_CRIT,TD_PRI(%rbx) ; \ jl 2f ; \ 1: ; \ /* in critical section, make interrupt pending */ \ @@ -197,10 +140,10 @@ IDTVEC(vec_name) ; \ 2: ; \ /* clear pending bit, run handler */ \ andl $~IRQ_LBIT(irq_num),PCPU(fpending) ; \ - pushl $irq_num ; \ - pushl %esp ; /* pass frame by reference */ \ - call ithread_fast_handler ; /* returns 0 to unmask */ \ - addl $8, %esp ; \ + pushq $irq_num ; /* trapframe -> intrframe */ \ + movq %rsp, %rdi ; /* pass frame by reference */ \ + call ithread_fast_handler ; /* returns 0 to unmask */ \ + addq $8, %rsp ; /* intrframe -> trapframe */ \ UNMASK_IRQ(irq_num) ; \ 5: ; \ MEXITCOUNT ; \ @@ -231,18 +174,17 @@ IDTVEC(vec_name) ; \ .text ; \ SUPERALIGN_TEXT ; \ IDTVEC(vec_name) ; \ - PUSH_FRAME ; \ + APIC_PUSH_FRAME ; \ maybe_extra_ipending ; \ ; \ MASK_LEVEL_IRQ(irq_num) ; \ incl PCPU(cnt) + V_INTR ; \ - movl $0, lapic_eoi ; \ - movl PCPU(curthread),%ebx ; \ - movl $0,%eax ; /* CURRENT CPL IN FRAME (REMOVED) */ \ - pushl %eax ; /* cpl do restore */ \ - testl $-1,TD_NEST_COUNT(%ebx) ; \ + movq lapic, %rax ; \ + movl $0, LA_EOI(%rax) ; \ + movq PCPU(curthread),%rbx ; \ + testl $-1,TD_NEST_COUNT(%rbx) ; \ jne 1f ; \ - cmpl $TDPRI_CRIT,TD_PRI(%ebx) ; \ + cmpl $TDPRI_CRIT,TD_PRI(%rbx) ; \ jl 2f ; \ 1: ; \ /* set the pending bit and return, leave the interrupt masked */ \ @@ -252,13 +194,12 @@ IDTVEC(vec_name) ; \ 2: ; \ /* set running bit, clear pending bit, run handler */ \ andl $~IRQ_LBIT(irq_num), PCPU(ipending) ; \ - incl TD_NEST_COUNT(%ebx) ; \ + incl TD_NEST_COUNT(%rbx) ; \ sti ; \ - pushl $irq_num ; \ + movq $irq_num,%rdi ; \ call sched_ithd ; \ - addl $4,%esp ; \ cli ; \ - decl TD_NEST_COUNT(%ebx) ; \ + decl TD_NEST_COUNT(%rbx) ; \ 5: ; \ MEXITCOUNT ; \ jmp doreti ; \ @@ -276,12 +217,13 @@ IDTVEC(vec_name) ; \ .text ; \ SUPERALIGN_TEXT ; \ IDTVEC(vec_name) ; \ - PUSH_FRAME ; \ - movl $0, lapic_eoi ; /* End Of Interrupt to APIC */ \ + APIC_PUSH_FRAME ; \ + movq lapic,%rax ; \ + movl $0,LA_EOI(%rax) ; /* End Of Interrupt to APIC */ \ /*pushl $irq_num ;*/ \ /*call do_wrongintr ;*/ \ /*addl $4,%esp ;*/ \ - POP_FRAME ; \ + APIC_POP_FRAME ; \ iret ; \ #endif @@ -310,15 +252,15 @@ Xspuriousint: SUPERALIGN_TEXT .globl Xinvltlb Xinvltlb: - pushl %eax + pushq %rax - movl %cr3, %eax /* invalidate the TLB */ - movl %eax, %cr3 + movq %cr3, %rax /* invalidate the TLB */ + movq %rax, %cr3 - ss /* stack segment, avoid %ds load */ - movl $0, lapic_eoi /* End Of Interrupt to APIC */ + movq lapic, %rax + movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */ - popl %eax + popq %rax iret @@ -335,11 +277,22 @@ Xinvltlb: SUPERALIGN_TEXT .globl Xcpustop Xcpustop: - pushl %ebp - movl %esp, %ebp - pushl %eax - pushl %ecx - pushl %edx + pushq %rbp + movq %rsp, %rbp + /* We save registers that are not preserved across function calls. */ + /* JG can be re-written with mov's */ + pushq %rax + pushq %rcx + pushq %rdx + pushq %rsi + pushq %rdi + pushq %r8 + pushq %r9 + pushq %r10 + pushq %r11 + +#if JG + /* JGXXX switch to kernel %gs? */ pushl %ds /* save current data segment */ pushl %fs @@ -347,15 +300,17 @@ Xcpustop: mov %ax, %ds /* use KERNEL data segment */ movl $KPSEL, %eax mov %ax, %fs +#endif - movl $0, lapic_eoi /* End Of Interrupt to APIC */ + movq lapic, %rax + movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */ + /* JG */ movl PCPU(cpuid), %eax imull $PCB_SIZE, %eax - leal CNAME(stoppcbs)(%eax), %eax - pushl %eax + leaq CNAME(stoppcbs), %rdi + addq %rax, %rdi call CNAME(savectx) /* Save process context */ - addl $4, %esp movl PCPU(cpuid), %eax @@ -369,9 +324,9 @@ Xcpustop: btsl %eax, stopped_cpus /* stopped_cpus |= (1< #include +#include #include #include +#include #include #include #include @@ -41,20 +42,68 @@ #define ELCR0 0x4d0 /* eisa irq 0-7 */ #define ELCR1 0x4d1 /* eisa irq 8-15 */ +volatile lapic_t *lapic; + +static void lapic_timer_calibrate(void); +static void lapic_timer_set_divisor(int); +static void lapic_timer_fixup_handler(void *); +static void lapic_timer_restart_handler(void *); + +void lapic_timer_process(void); +void lapic_timer_process_frame(struct intrframe *); + +static int lapic_timer_enable = 1; +TUNABLE_INT("hw.lapic_timer_enable", &lapic_timer_enable); + +static void lapic_timer_intr_reload(struct cputimer_intr *, sysclock_t); +static void lapic_timer_intr_enable(struct cputimer_intr *); +static void lapic_timer_intr_restart(struct cputimer_intr *); +static void lapic_timer_intr_pmfixup(struct cputimer_intr *); + +static struct cputimer_intr lapic_cputimer_intr = { + .freq = 0, + .reload = lapic_timer_intr_reload, + .enable = lapic_timer_intr_enable, + .config = cputimer_intr_default_config, + .restart = lapic_timer_intr_restart, + .pmfixup = lapic_timer_intr_pmfixup, + .initclock = cputimer_intr_default_initclock, + .next = SLIST_ENTRY_INITIALIZER, + .name = "lapic", + .type = CPUTIMER_INTR_LAPIC, + .prio = CPUTIMER_INTR_PRIO_LAPIC, + .caps = CPUTIMER_INTR_CAP_NONE +}; + /* * pointers to pmapped apic hardware. */ volatile ioapic_t **ioapic; -void lapic_timer_fixup(void); +static int lapic_timer_divisor_idx = -1; +static const uint32_t lapic_timer_divisors[] = { + APIC_TDCR_2, APIC_TDCR_4, APIC_TDCR_8, APIC_TDCR_16, + APIC_TDCR_32, APIC_TDCR_64, APIC_TDCR_128, APIC_TDCR_1 +}; +#define APIC_TIMER_NDIVISORS \ + (int)(sizeof(lapic_timer_divisors) / sizeof(lapic_timer_divisors[0])) + + +void +lapic_eoi(void) +{ + + lapic->eoi = 0; +} /* * Enable APIC, configure interrupts. */ void -apic_initialize(void) +apic_initialize(boolean_t bsp) { + uint32_t timer; u_int temp; /* @@ -68,31 +117,37 @@ apic_initialize(void) * Disable LVT1 on the APs. It doesn't matter what delivery * mode we use because we leave it masked. */ - temp = lapic.lvt_lint0; + temp = lapic->lvt_lint0; temp &= ~(APIC_LVT_MASKED | APIC_LVT_TRIG_MASK | APIC_LVT_POLARITY_MASK | APIC_LVT_DM_MASK); if (mycpu->gd_cpuid == 0) temp |= APIC_LVT_DM_EXTINT; else temp |= APIC_LVT_DM_FIXED | APIC_LVT_MASKED; - lapic.lvt_lint0 = temp; + lapic->lvt_lint0 = temp; /* * setup LVT2 as NMI, masked till later. Edge trigger, active high. */ - temp = lapic.lvt_lint1; + temp = lapic->lvt_lint1; temp &= ~(APIC_LVT_MASKED | APIC_LVT_TRIG_MASK | APIC_LVT_POLARITY_MASK | APIC_LVT_DM_MASK); temp |= APIC_LVT_MASKED | APIC_LVT_DM_NMI; - lapic.lvt_lint1 = temp; + lapic->lvt_lint1 = temp; /* * Mask the apic error interrupt, apic performance counter - * interrupt, and the apic timer interrupt. + * interrupt. */ - lapic.lvt_error = lapic.lvt_error | APIC_LVT_MASKED; - lapic.lvt_pcint = lapic.lvt_pcint | APIC_LVT_MASKED; - lapic.lvt_timer = lapic.lvt_timer | APIC_LVT_MASKED; + lapic->lvt_error = lapic->lvt_error | APIC_LVT_MASKED; + lapic->lvt_pcint = lapic->lvt_pcint | APIC_LVT_MASKED; + + /* Set apic timer vector and mask the apic timer interrupt. */ + timer = lapic->lvt_timer; + timer &= ~APIC_LVTT_VECTOR; + timer |= XTIMER_OFFSET; + timer |= APIC_LVTT_MASKED; + lapic->lvt_timer = timer; /* * Set the Task Priority Register as needed. At the moment allow @@ -100,7 +155,7 @@ apic_initialize(void) * ready to deal). We could disable all but IPIs by setting * temp |= TPR_IPI_ONLY for cpu != 0. */ - temp = lapic.tpr; + temp = lapic->tpr; temp &= ~APIC_TPR_PRIO; /* clear priority field */ #ifndef APIC_IO /* @@ -110,12 +165,12 @@ apic_initialize(void) temp |= TPR_IPI_ONLY; #endif - lapic.tpr = temp; + lapic->tpr = temp; /* * enable the local APIC */ - temp = lapic.svr; + temp = lapic->svr; temp |= APIC_SVR_ENABLE; /* enable the APIC */ temp &= ~APIC_SVR_FOCUS_DISABLE; /* enable lopri focus processor */ @@ -128,26 +183,214 @@ apic_initialize(void) temp &= ~APIC_SVR_VECTOR; temp |= XSPURIOUSINT_OFFSET; - lapic.svr = temp; + lapic->svr = temp; /* * Pump out a few EOIs to clean out interrupts that got through * before we were able to set the TPR. */ - lapic.eoi = 0; - lapic.eoi = 0; - lapic.eoi = 0; + lapic_eoi(); + lapic_eoi(); + lapic_eoi(); + + if (bsp) { + lapic_timer_calibrate(); + if (lapic_timer_enable) { + cputimer_intr_register(&lapic_cputimer_intr); + cputimer_intr_select(&lapic_cputimer_intr, 0); + } + } else { + lapic_timer_set_divisor(lapic_timer_divisor_idx); + } if (bootverbose) apic_dump("apic_initialize()"); } + +static void +lapic_timer_set_divisor(int divisor_idx) +{ + KKASSERT(divisor_idx >= 0 && divisor_idx < APIC_TIMER_NDIVISORS); + lapic->dcr_timer = lapic_timer_divisors[divisor_idx]; +} + +static void +lapic_timer_oneshot(u_int count) +{ + uint32_t value; + + value = lapic->lvt_timer; + value &= ~APIC_LVTT_PERIODIC; + lapic->lvt_timer = value; + lapic->icr_timer = count; +} + +static void +lapic_timer_oneshot_quick(u_int count) +{ + lapic->icr_timer = count; +} + +static void +lapic_timer_calibrate(void) +{ + sysclock_t value; + + /* Try to calibrate the local APIC timer. */ + for (lapic_timer_divisor_idx = 0; + lapic_timer_divisor_idx < APIC_TIMER_NDIVISORS; + lapic_timer_divisor_idx++) { + lapic_timer_set_divisor(lapic_timer_divisor_idx); + lapic_timer_oneshot(APIC_TIMER_MAX_COUNT); + DELAY(2000000); + value = APIC_TIMER_MAX_COUNT - lapic->ccr_timer; + if (value != APIC_TIMER_MAX_COUNT) + break; + } + if (lapic_timer_divisor_idx >= APIC_TIMER_NDIVISORS) + panic("lapic: no proper timer divisor?!\n"); + lapic_cputimer_intr.freq = value / 2; + + kprintf("lapic: divisor index %d, frequency %u Hz\n", + lapic_timer_divisor_idx, lapic_cputimer_intr.freq); +} + +static void +lapic_timer_process_oncpu(struct globaldata *gd, struct intrframe *frame) +{ + sysclock_t count; + + gd->gd_timer_running = 0; + + count = sys_cputimer->count(); + if (TAILQ_FIRST(&gd->gd_systimerq) != NULL) + systimer_intr(&count, 0, frame); +} + void -lapic_timer_fixup(void) +lapic_timer_process(void) +{ + lapic_timer_process_oncpu(mycpu, NULL); +} + +void +lapic_timer_process_frame(struct intrframe *frame) +{ + lapic_timer_process_oncpu(mycpu, frame); +} + +static void +lapic_timer_intr_reload(struct cputimer_intr *cti, sysclock_t reload) +{ + struct globaldata *gd = mycpu; + + reload = (int64_t)reload * cti->freq / sys_cputimer->freq; + if (reload < 2) + reload = 2; + + if (gd->gd_timer_running) { + if (reload < lapic->ccr_timer) + lapic_timer_oneshot_quick(reload); + } else { + gd->gd_timer_running = 1; + lapic_timer_oneshot_quick(reload); + } +} + +static void +lapic_timer_intr_enable(struct cputimer_intr *cti __unused) +{ + uint32_t timer; + + timer = lapic->lvt_timer; + timer &= ~(APIC_LVTT_MASKED | APIC_LVTT_PERIODIC); + lapic->lvt_timer = timer; + + lapic_timer_fixup_handler(NULL); +} + +static void +lapic_timer_fixup_handler(void *arg) { - /* TODO */ + int *started = arg; + + if (started != NULL) + *started = 0; + + if (strcmp(cpu_vendor, "AuthenticAMD") == 0) { + /* + * Detect the presence of C1E capability mostly on latest + * dual-cores (or future) k8 family. This feature renders + * the local APIC timer dead, so we disable it by reading + * the Interrupt Pending Message register and clearing both + * C1eOnCmpHalt (bit 28) and SmiOnCmpHalt (bit 27). + * + * Reference: + * "BIOS and Kernel Developer's Guide for AMD NPT + * Family 0Fh Processors" + * #32559 revision 3.00 + */ + if ((cpu_id & 0x00000f00) == 0x00000f00 && + (cpu_id & 0x0fff0000) >= 0x00040000) { + uint64_t msr; + + msr = rdmsr(0xc0010055); + if (msr & 0x18000000) { + struct globaldata *gd = mycpu; + + kprintf("cpu%d: AMD C1E detected\n", + gd->gd_cpuid); + wrmsr(0xc0010055, msr & ~0x18000000ULL); + + /* + * We are kinda stalled; + * kick start again. + */ + gd->gd_timer_running = 1; + lapic_timer_oneshot_quick(2); + + if (started != NULL) + *started = 1; + } + } + } } +static void +lapic_timer_restart_handler(void *dummy __unused) +{ + int started; + + lapic_timer_fixup_handler(&started); + if (!started) { + struct globaldata *gd = mycpu; + + gd->gd_timer_running = 1; + lapic_timer_oneshot_quick(2); + } +} + +/* + * This function is called only by ACPI-CA code currently: + * - AMD C1E fixup. AMD C1E only seems to happen after ACPI + * module controls PM. So once ACPI-CA is attached, we try + * to apply the fixup to prevent LAPIC timer from hanging. + */ +static void +lapic_timer_intr_pmfixup(struct cputimer_intr *cti __unused) +{ + lwkt_send_ipiq_mask(smp_active_mask, + lapic_timer_fixup_handler, NULL); +} + +static void +lapic_timer_intr_restart(struct cputimer_intr *cti __unused) +{ + lwkt_send_ipiq_mask(smp_active_mask, lapic_timer_restart_handler, NULL); +} + + /* * dump contents of local APIC registers */ @@ -156,7 +399,7 @@ apic_dump(char* str) { kprintf("SMP: CPU%d %s:\n", mycpu->gd_cpuid, str); kprintf(" lint0: 0x%08x lint1: 0x%08x TPR: 0x%08x SVR: 0x%08x\n", - lapic.lvt_lint0, lapic.lvt_lint1, lapic.tpr, lapic.svr); + lapic->lvt_lint0, lapic->lvt_lint1, lapic->tpr, lapic->svr); } @@ -231,6 +474,8 @@ io_apic_setup_intpin(int apic, int pin) u_int32_t target; /* the window register is 32 bits */ u_int32_t vector; /* the window register is 32 bits */ int level; + int cpuid; + char envpath[32]; select = pin * 2 + IOAPIC_REDTBL0; /* register */ @@ -308,10 +553,18 @@ io_apic_setup_intpin(int apic, int pin) apic_pin_trigger |= (1 << irq); polarity(apic, pin, &flags, level); } - + + cpuid = 0; + ksnprintf(envpath, sizeof(envpath), "hw.irq.%d.dest", irq); + kgetenv_int(envpath, &cpuid); + + /* ncpus may not be available yet */ + if (cpuid > mp_naps) + cpuid = 0; + if (bootverbose) { - kprintf("IOAPIC #%d intpin %d -> irq %d\n", - apic, pin, irq); + kprintf("IOAPIC #%d intpin %d -> irq %d (CPU%d)\n", + apic, pin, irq, cpuid); } /* @@ -327,7 +580,9 @@ io_apic_setup_intpin(int apic, int pin) vector = IDT_OFFSET + irq; /* IDT vec */ target = io_apic_read(apic, select + 1) & IOART_HI_DEST_RESV; - target |= IOART_HI_DEST_BROADCAST; + /* Deliver all interrupts to CPU0 (BSP) */ + target |= (CPU_TO_ID(cpuid) << IOART_HI_DEST_SHIFT) & + IOART_HI_DEST_MASK; flags |= io_apic_read(apic, select) & IOART_RESV; io_apic_write(apic, select, flags | vector); io_apic_write(apic, select + 1, target); @@ -374,6 +629,7 @@ io_apic_setup(int apic) IOART_DELLOPRI)) /* + * XXX this function is only used by 8254 setup * Setup the source of External INTerrupts. */ int @@ -383,11 +639,23 @@ ext_int_setup(int apic, int intr) u_int32_t flags; /* the window register is 32 bits */ u_int32_t target; /* the window register is 32 bits */ u_int32_t vector; /* the window register is 32 bits */ + int cpuid; + char envpath[32]; if (apic_int_type(apic, intr) != 3) return -1; - target = IOART_HI_DEST_BROADCAST; + cpuid = 0; + ksnprintf(envpath, sizeof(envpath), "hw.irq.%d.dest", intr); + kgetenv_int(envpath, &cpuid); + + /* ncpus may not be available yet */ + if (cpuid > mp_naps) + cpuid = 0; + + /* Deliver interrupts to CPU0 (BSP) */ + target = (CPU_TO_ID(cpuid) << IOART_HI_DEST_SHIFT) & + IOART_HI_DEST_MASK; select = IOAPIC_REDTBL0 + (2 * intr); vector = IDT_OFFSET + intr; flags = DEFAULT_EXTINT_FLAGS; @@ -577,18 +845,18 @@ apic_ipi(int dest_type, int vector, int delivery_mode) u_long icr_lo; crit_enter(); - if ((lapic.icr_lo & APIC_DELSTAT_MASK) != 0) { - unsigned int eflags = read_eflags(); + if ((lapic->icr_lo & APIC_DELSTAT_MASK) != 0) { + unsigned long rflags = read_rflags(); cpu_enable_intr(); - while ((lapic.icr_lo & APIC_DELSTAT_MASK) != 0) { + while ((lapic->icr_lo & APIC_DELSTAT_MASK) != 0) { lwkt_process_ipiq(); } - write_eflags(eflags); + write_rflags(rflags); } - icr_lo = (lapic.icr_lo & APIC_ICRLO_RESV_MASK) | dest_type | + icr_lo = (lapic->icr_lo & APIC_ICRLO_RESV_MASK) | dest_type | delivery_mode | vector; - lapic.icr_lo = icr_lo; + lapic->icr_lo = icr_lo; crit_exit(); return 0; } @@ -600,24 +868,24 @@ single_apic_ipi(int cpu, int vector, int delivery_mode) u_long icr_hi; crit_enter(); - if ((lapic.icr_lo & APIC_DELSTAT_MASK) != 0) { - unsigned int eflags = read_eflags(); + if ((lapic->icr_lo & APIC_DELSTAT_MASK) != 0) { + unsigned long rflags = read_rflags(); cpu_enable_intr(); - while ((lapic.icr_lo & APIC_DELSTAT_MASK) != 0) { + while ((lapic->icr_lo & APIC_DELSTAT_MASK) != 0) { lwkt_process_ipiq(); } - write_eflags(eflags); + write_rflags(rflags); } - icr_hi = lapic.icr_hi & ~APIC_ID_MASK; + icr_hi = lapic->icr_hi & ~APIC_ID_MASK; icr_hi |= (CPU_TO_ID(cpu) << 24); - lapic.icr_hi = icr_hi; + lapic->icr_hi = icr_hi; /* build ICR_LOW */ - icr_lo = (lapic.icr_lo & APIC_ICRLO_RESV_MASK) + icr_lo = (lapic->icr_lo & APIC_ICRLO_RESV_MASK) | APIC_DEST_DESTFLD | delivery_mode | vector; /* write APIC ICR */ - lapic.icr_lo = icr_lo; + lapic->icr_lo = icr_lo; crit_exit(); } @@ -636,20 +904,20 @@ single_apic_ipi_passive(int cpu, int vector, int delivery_mode) u_long icr_hi; crit_enter(); - if ((lapic.icr_lo & APIC_DELSTAT_MASK) != 0) { + if ((lapic->icr_lo & APIC_DELSTAT_MASK) != 0) { crit_exit(); return(0); } - icr_hi = lapic.icr_hi & ~APIC_ID_MASK; + icr_hi = lapic->icr_hi & ~APIC_ID_MASK; icr_hi |= (CPU_TO_ID(cpu) << 24); - lapic.icr_hi = icr_hi; + lapic->icr_hi = icr_hi; /* build IRC_LOW */ - icr_lo = (lapic.icr_lo & APIC_RESV2_MASK) + icr_lo = (lapic->icr_lo & APIC_RESV2_MASK) | APIC_DEST_DESTFLD | delivery_mode | vector; /* write APIC ICR */ - lapic.icr_lo = icr_lo; + lapic->icr_lo = icr_lo; crit_exit(); return(1); } @@ -680,74 +948,25 @@ selected_apic_ipi(u_int target, int vector, int delivery_mode) * - suggested by rgrimes@gndrsh.aac.dev.com */ -/** XXX FIXME: temp hack till we can determin bus clock */ -#ifndef BUS_CLOCK -#define BUS_CLOCK 66000000 -#define bus_clock() 66000000 -#endif - -#if defined(READY) -int acquire_apic_timer (void); -int release_apic_timer (void); - -/* - * Acquire the APIC timer for exclusive use. - */ -int -acquire_apic_timer(void) -{ -#if 1 - return 0; -#else - /** XXX FIXME: make this really do something */ - panic("APIC timer in use when attempting to acquire"); -#endif -} - - -/* - * Return the APIC timer. - */ -int -release_apic_timer(void) -{ -#if 1 - return 0; -#else - /** XXX FIXME: make this really do something */ - panic("APIC timer was already released"); -#endif -} -#endif /* READY */ - - /* * Load a 'downcount time' in uSeconds. */ void -set_apic_timer(int value) +set_apic_timer(int us) { - u_long lvtt; - long ticks_per_microsec; + u_int count; /* - * Calculate divisor and count from value: - * - * timeBase == CPU bus clock divisor == [1,2,4,8,16,32,64,128] - * value == time in uS + * When we reach here, lapic timer's frequency + * must have been calculated as well as the + * divisor (lapic->dcr_timer is setup during the + * divisor calculation). */ - lapic.dcr_timer = APIC_TDCR_1; - ticks_per_microsec = bus_clock() / 1000000; - - /* configure timer as one-shot */ - lvtt = lapic.lvt_timer; - lvtt &= ~(APIC_LVTT_VECTOR | APIC_LVTT_DS); - lvtt &= ~(APIC_LVTT_PERIODIC); - lvtt |= APIC_LVTT_MASKED; /* no INT, one-shot */ - lapic.lvt_timer = lvtt; - - /* */ - lapic.icr_timer = value * ticks_per_microsec; + KKASSERT(lapic_cputimer_intr.freq != 0 && + lapic_timer_divisor_idx >= 0); + + count = ((us * (int64_t)lapic_cputimer_intr.freq) + 999999) / 1000000; + lapic_timer_oneshot(count); } @@ -762,7 +981,7 @@ read_apic_timer(void) * for now we just return the remaining count. */ #else - return lapic.ccr_timer; + return lapic->ccr_timer; #endif } diff --git a/sys/platform/pc64/conf/files b/sys/platform/pc64/conf/files index dce96ad991..2cd1fcb400 100644 --- a/sys/platform/pc64/conf/files +++ b/sys/platform/pc64/conf/files @@ -79,8 +79,7 @@ vfs/smbfs/smbfs_vnops.c optional smbfs platform/pc64/amd64/atomic.c standard \ compile-with "${CC} -c ${CFLAGS} ${DEFINED_PROF:S/^$/-fomit-frame-pointer/} ${.IMPSRC}" platform/pc64/amd64/autoconf.c standard -platform/pc64/amd64/mp.c optional smp \ - compile-with "${CC} -c -pthread ${CFLAGS} -I/usr/include ${.IMPSRC}" +platform/pc64/amd64/mpboot.S optional smp platform/pc64/amd64/mplock.s optional smp # DDB XXX @@ -120,6 +119,11 @@ platform/pc64/amd64/procfs_machdep.c standard platform/pc64/amd64/initcpu.c standard platform/pc64/amd64/identcpu.c standard +platform/pc64/apic/apic_abi.c optional smp +platform/pc64/apic/mpapic.c optional smp +platform/pc64/apic/apic_ipl.s optional smp +platform/pc64/apic/apic_vector.s optional smp + bus/isa/amd64/isa.c optional isa bus/isa/amd64/isa_compat.c optional isa compat_oldisa bus/isa/amd64/isa_dma.c optional isa @@ -142,6 +146,7 @@ platform/pc64/amd64/systimer.c standard platform/pc64/amd64/console.c standard platform/pc64/amd64/ipl_funcs.c standard kern/syscalls.c standard +platform/pc64/amd64/mp_machdep.c optional smp dev/misc/atkbd/atkbd_isa.c optional atkbd dev/misc/atkbdc_layer/atkbdc_isa.c optional atkbdc dev/misc/psm/psm.c optional psm diff --git a/sys/platform/pc64/conf/options b/sys/platform/pc64/conf/options index fd2e575880..0ce4868f48 100644 --- a/sys/platform/pc64/conf/options +++ b/sys/platform/pc64/conf/options @@ -2,6 +2,9 @@ # $DragonFly: src/sys/platform/pc64/conf/options,v 1.4 2008/08/29 17:07:15 dillon Exp $ # +# amd64 SMP options +APIC_IO opt_global.h + # The cpu type # HAMMER_CPU opt_cpu.h diff --git a/sys/platform/pc64/include/clock.h b/sys/platform/pc64/include/clock.h index c72ea92ec2..51427177e7 100644 --- a/sys/platform/pc64/include/clock.h +++ b/sys/platform/pc64/include/clock.h @@ -1,41 +1,47 @@ -/*- +/* * Kernel interface to machine-dependent clock driver. * Garrett Wollman, September 1994. * This file is in the public domain. * - * $FreeBSD: src/sys/amd64/include/clock.h,v 1.54 2007/01/23 08:01:19 bde Exp $ - * $DragonFly: src/sys/platform/pc64/include/clock.h,v 1.2 2008/05/10 17:24:10 dillon Exp $ + * $FreeBSD: src/sys/i386/include/clock.h,v 1.38.2.1 2002/11/02 04:41:50 iwasaki Exp $ + * $DragonFly: src/sys/platform/pc32/include/clock.h,v 1.9 2008/05/10 17:24:08 dillon Exp $ */ #ifndef _MACHINE_CLOCK_H_ #define _MACHINE_CLOCK_H_ #ifdef _KERNEL + +#ifndef _SYS_TYPES_H_ +#include +#endif + /* * i386 to clock driver interface. * XXX large parts of the driver and its interface are misplaced. */ extern int adjkerntz; -extern int clkintr_pending; -extern int pscnt; -extern int psdiv; +extern int disable_rtc_set; extern int statclock_disable; extern u_int timer_freq; extern int timer0_max_count; extern int tsc_present; +extern int64_t tsc_frequency; extern int tsc_is_broken; extern int wall_cmos_clock; +#ifdef APIC_IO +extern int apic_8254_intr; +#endif /* * Driver to clock driver interface. */ -int acquire_timer2(int mode); -int release_timer2(void); -int rtcin(int val); -int sysbeep(int pitch, int period); -void init_TSC(void); -void init_TSC_tc(void); +int rtcin (int val); +int acquire_timer2 (int mode); +int release_timer2 (void); +int sysbeep (int pitch, int period); +void timer_restore (void); #endif /* _KERNEL */ diff --git a/sys/platform/pc64/include/globaldata.h b/sys/platform/pc64/include/globaldata.h index 517b03e1be..988b07b1f5 100644 --- a/sys/platform/pc64/include/globaldata.h +++ b/sys/platform/pc64/include/globaldata.h @@ -64,6 +64,9 @@ * the service routine will loop. * * The current thread's cpl is stored in the thread structure. + * + * Note: the embedded globaldata and/or the mdglobaldata structure + * may exceed the size of a page. */ struct mdglobaldata { struct globaldata mi; @@ -79,7 +82,7 @@ struct mdglobaldata { int gd_sdelayed; /* delayed software ints */ int gd_currentldt; int gd_private_tss; - u_int gd_unused001; + u_int unused001; u_int gd_other_cpus; u_int gd_ss_eflags; pt_entry_t *gd_CMAP1; @@ -90,6 +93,8 @@ struct mdglobaldata { caddr_t gd_CADDR2; caddr_t gd_CADDR3; pt_entry_t *gd_PADDR1; + u_int gd_acpi_id; + u_int gd_apic_id; register_t gd_scratch_rsp; register_t gd_rsp0; register_t gd_user_fs; /* current user fs in MSR */ diff --git a/sys/platform/pc64/include/intr_machdep.h b/sys/platform/pc64/include/intr_machdep.h deleted file mode 100644 index d826013fb1..0000000000 --- a/sys/platform/pc64/include/intr_machdep.h +++ /dev/null @@ -1,82 +0,0 @@ -/*- - * Copyright (c) 2003 John Baldwin - * Copyright (c) 2008 The DragonFly Project. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD: src/sys/amd64/include/intr_machdep.h,v 1.18 2007/05/08 21:29:13 jhb Exp $ - * $DragonFly: src/sys/platform/pc64/include/intr_machdep.h,v 1.2 2008/08/29 17:07:17 dillon Exp $ - */ - -#ifndef __MACHINE_INTR_MACHDEP_H__ -#define __MACHINE_INTR_MACHDEP_H__ - -#ifdef _KERNEL - -/* - * The maximum number of I/O interrupts we allow. This number is rather - * arbitrary as it is just the maximum IRQ resource value. The interrupt - * source for a given IRQ maps that I/O interrupt to device interrupt - * source whether it be a pin on an interrupt controller or an MSI interrupt. - * The 16 ISA IRQs are assigned fixed IDT vectors, but all other device - * interrupts allocate IDT vectors on demand. Currently we have 191 IDT - * vectors available for device interrupts. On many systems with I/O APICs, - * a lot of the IRQs are not used, so this number can be much larger than - * 191 and still be safe since only interrupt sources in actual use will - * allocate IDT vectors. - * - * The first 255 IRQs (0 - 254) are reserved for ISA IRQs and PCI intline IRQs. - * IRQ values beyond 256 are used by MSI. We leave 255 unused to avoid - * confusion since 255 is used in PCI to indicate an invalid IRQ. - */ -#define NUM_MSI_INTS 128 -#define FIRST_MSI_INT 256 -#define NUM_IO_INTS (FIRST_MSI_INT + NUM_MSI_INTS) - -/* - * Default base address for MSI messages on x86 platforms. - */ -#define MSI_INTEL_ADDR_BASE 0xfee00000 - -/* - * - 1 ??? dummy counter. - * - 2 counters for each I/O interrupt. - * - 1 counter for each CPU for lapic timer. - * - 7 counters for each CPU for IPI counters for SMP. - */ -#ifdef SMP -#define INTRCNT_COUNT (1 + NUM_IO_INTS * 2 + (1 + 7) * MAXCPU) -#else -#define INTRCNT_COUNT (1 + NUM_IO_INTS * 2 + 1) -#endif - -#ifndef LOCORE - -#ifndef JG_defined_inthand_t -#define JG_defined_inthand_t -typedef void inthand_t(u_int cs, u_int ef, u_int esp, u_int ss); -#endif - -#endif /* !LOCORE */ -#endif /* _KERNEL */ -#endif /* !__MACHINE_INTR_MACHDEP_H__ */ diff --git a/sys/platform/pc64/include/lock.h b/sys/platform/pc64/include/lock.h index 40522086f0..ce80365438 100644 --- a/sys/platform/pc64/include/lock.h +++ b/sys/platform/pc64/include/lock.h @@ -1,30 +1,38 @@ /* - * Copyright (c) 2003,2008 The DragonFly Project. - * Copyright (c) 2003 Matthew Dillon. - * All rights reserved. - * + * Copyright (c) 2003,2004 The DragonFly Project. All rights reserved. + * + * This code is derived from software contributed to The DragonFly Project + * by Matthew Dillon + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: + * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. - * 2. The name of the developer may NOT be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of The DragonFly Project nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific, prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * + * * $FreeBSD: src/sys/i386/include/lock.h,v 1.11.2.2 2000/09/30 02:49:34 ps Exp $ - * $DragonFly: src/sys/platform/pc64/include/lock.h,v 1.4 2008/08/29 17:07:17 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/include/lock.h,v 1.17 2008/06/19 21:32:55 aggelos Exp $ */ #ifndef _MACHINE_LOCK_H_ @@ -65,23 +73,24 @@ pushfq ; \ popq %rcx ; /* flags */ \ cli ; \ - orl $PSL_C,%rcx ; /* make sure non-zero */ \ + orq $PSL_C,%rcx ; /* make sure non-zero */ \ 7: ; \ movq $0,%rax ; /* expected contents of lock */ \ lock cmpxchgq %rcx,mem ; /* Z=1 (jz) on success */ \ + pause ; \ jnz 7b ; \ #define SPIN_LOCK_PUSH_REGS \ - subq $2*8,%rsp ; \ + subq $16,%rsp ; \ movq %rcx,(%rsp) ; \ movq %rax,8(%rsp) ; \ #define SPIN_LOCK_POP_REGS \ movq (%rsp),%rcx ; \ movq 8(%rsp),%rax ; \ - addq $2*8,%rsp ; \ + addq $16,%rsp ; \ -#define SPIN_LOCK_FRAME_SIZE 8 +#define SPIN_LOCK_FRAME_SIZE 16 #define SPIN_LOCK_NOREG(mem) \ SPIN_LOCK_PUSH_REGS ; \ @@ -148,8 +157,8 @@ void clock_unlock(void); extern struct spinlock_deprecated smp_rv_spinlock; -void spin_lock_deprecated(spinlock_t); -void spin_unlock_deprecated(spinlock_t); +void spin_lock_deprecated(spinlock_t lock); +void spin_unlock_deprecated(spinlock_t lock); /* * Inline version of spinlock routines -- overrides assembly. Only unlock @@ -188,11 +197,18 @@ cpu_rel_mplock(void) mp_lock = MP_FREE_LOCK; } -#else +static __inline int +owner_mplock(void) +{ + return (mp_lock); +} + +#else /* !SMP */ #define get_mplock() #define try_mplock() 1 #define rel_mplock() +#define owner_mplock() 0 /* always cpu 0 */ #define MP_LOCK_HELD() (!0) #define ASSERT_MP_LOCK_HELD(td) diff --git a/sys/platform/pc64/include/smp.h b/sys/platform/pc64/include/smp.h index a016bff08f..fdfa12f401 100644 --- a/sys/platform/pc64/include/smp.h +++ b/sys/platform/pc64/include/smp.h @@ -1,4 +1,4 @@ -/*- +/* * ---------------------------------------------------------------------------- * "THE BEER-WARE LICENSE" (Revision 42): * wrote this file. As long as you retain this notice you @@ -6,8 +6,8 @@ * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp * ---------------------------------------------------------------------------- * - * $FreeBSD: src/sys/amd64/include/smp.h,v 1.90 2007/05/19 05:01:43 kan Exp $ - * $DragonFly: src/sys/platform/pc64/include/smp.h,v 1.1 2007/09/23 04:42:07 yanyh Exp $ + * $FreeBSD: src/sys/i386/include/smp.h,v 1.50.2.5 2001/02/13 22:32:45 tegge Exp $ + * $DragonFly: src/sys/platform/pc32/include/smp.h,v 1.20 2006/11/07 06:43:24 dillon Exp $ * */ @@ -16,14 +16,26 @@ #ifdef _KERNEL -#ifdef SMP +#if defined(SMP) #ifndef LOCORE -#include -#include -#include -#include +/* + * For sending values to POST displays. + * XXX FIXME: where does this really belong, isa.h/isa.c perhaps? + */ +extern int current_postcode; /** XXX currently in mp_machdep.c */ +#define POSTCODE(X) current_postcode = (X), \ + outb(0x80, current_postcode) +#define POSTCODE_LO(X) current_postcode &= 0xf0, \ + current_postcode |= ((X) & 0x0f), \ + outb(0x80, current_postcode) +#define POSTCODE_HI(X) current_postcode &= 0x0f, \ + current_postcode |= (((X) << 4) & 0xf0), \ + outb(0x80, current_postcode) + + +#include #include /* global symbols in mpboot.S */ @@ -31,54 +43,108 @@ extern char mptramp_start[]; extern char mptramp_end[]; extern u_int32_t mptramp_pagetables; +/* functions in mpboot.s */ +void bootMP (void); + +/* global data in apic_vector.s */ +extern volatile u_int stopped_cpus; +extern volatile u_int started_cpus; + +extern volatile u_int checkstate_probed_cpus; +extern void (*cpustop_restartfunc) (void); + +/* functions in apic_ipl.s */ +u_int io_apic_read (int, int); +void io_apic_write (int, int, u_int); + /* global data in mp_machdep.c */ +extern int bsp_apic_ready; extern int mp_naps; +extern int mp_nbusses; +extern int mp_napics; extern int boot_cpu_id; +extern vm_offset_t cpu_apic_address; +extern vm_offset_t io_apic_address[]; +extern u_int32_t cpu_apic_versions[]; +extern u_int32_t *io_apic_versions; +extern int cpu_num_to_apic_id[]; +extern int io_num_to_apic_id[]; +extern int apic_id_to_logical[]; +#define APIC_INTMAPSIZE 24 +struct apic_intmapinfo { + int ioapic; + int int_pin; + volatile void *apic_address; + int redirindex; +}; +extern struct apic_intmapinfo int_to_apicintpin[]; extern struct pcb stoppcbs[]; -extern struct mtx smp_tlb_mtx; -extern int cpu_apic_ids[]; - -/* IPI handlers */ -inthand_t - IDTVEC(invltlb), /* TLB shootdowns - global */ - IDTVEC(invlpg), /* TLB shootdowns - 1 page */ - IDTVEC(invlrng), /* TLB shootdowns - page range */ - IDTVEC(invlcache), /* Write back and invalidate cache */ - IDTVEC(ipi_intr_bitmap_handler), /* Bitmap based IPIs */ - IDTVEC(cpustop), /* CPU stops & waits to be restarted */ - IDTVEC(rendezvous); /* handle CPU rendezvous */ /* functions in mp_machdep.c */ -void cpu_add(u_int apic_id, char boot_cpu); -void cpustop_handler(void); -void init_secondary(void); -void ipi_selected(u_int cpus, u_int ipi); -void ipi_all(u_int ipi); -void ipi_all_but_self(u_int ipi); -void ipi_self(u_int ipi); -void ipi_bitmap_handler(struct trapframe frame); -u_int mp_bootaddress(u_int); -int mp_grab_cpu_hlt(void); -void mp_topology(void); -void smp_cache_flush(void); -void smp_invlpg(vm_offset_t addr); -void smp_masked_invlpg(u_int mask, vm_offset_t addr); -void smp_invlpg_range(vm_offset_t startva, vm_offset_t endva); -void smp_masked_invlpg_range(u_int mask, vm_offset_t startva, - vm_offset_t endva); -void smp_invltlb(void); -void smp_masked_invltlb(u_int mask); - -#ifdef STOP_NMI -int ipi_nmi_handler(void); -#endif +void *permanent_io_mapping(vm_paddr_t); +u_int mp_bootaddress (u_int); +int mp_probe (void); +void mp_start (void); +void mp_announce (void); +u_int isa_apic_mask (u_int); +int isa_apic_irq (int); +int pci_apic_irq (int, int, int); +int apic_irq (int, int); +int next_apic_irq (int); +int undirect_isa_irq (int); +int undirect_pci_irq (int); +int apic_bus_type (int); +int apic_src_bus_id (int, int); +int apic_src_bus_irq (int, int); +int apic_int_type (int, int); +int apic_trigger (int, int); +int apic_polarity (int, int); +void assign_apic_irq (int apic, int intpin, int irq); +void revoke_apic_irq (int irq); +void init_secondary (void); +int stop_cpus (u_int); +void ap_init (void); +int restart_cpus (u_int); +void forward_signal (struct proc *); + +/* global data in mpapic.c */ +extern volatile lapic_t *lapic; +extern volatile ioapic_t **ioapic; + +/* functions in mpapic.c */ +void apic_dump (char*); +void apic_initialize (boolean_t); +void imen_dump (void); +int apic_ipi (int, int, int); +void selected_apic_ipi (u_int, int, int); +void single_apic_ipi(int cpu, int vector, int delivery_mode); +int single_apic_ipi_passive(int cpu, int vector, int delivery_mode); +int io_apic_setup (int); +void io_apic_setup_intpin (int, int); +void io_apic_set_id (int, int); +int io_apic_get_id (int); +int ext_int_setup (int, int); + +#if defined(READY) +void clr_io_apic_mask24 (int, u_int32_t); +void set_io_apic_mask24 (int, u_int32_t); +#endif /* READY */ + +void set_apic_timer (int); +int read_apic_timer (void); +void u_sleep (int); +void cpu_send_ipiq (int); +int cpu_send_ipiq_passive (int); + +/* global data in init_smp.c */ +extern cpumask_t smp_active_mask; #endif /* !LOCORE */ -#else /* !SMP */ +#else /* !SMP */ -#define smp_active_mask 1 /* smp_active_mask always 1 on UP machines */ +#define smp_active_mask 1 /* smp_active_mask always 1 on UP machines */ -#endif /* !SMP */ +#endif #endif /* _KERNEL */ #endif /* _MACHINE_SMP_H_ */ diff --git a/sys/platform/pc64/isa/intr_machdep.h b/sys/platform/pc64/isa/intr_machdep.h index 23da2b188d..4495939da8 100644 --- a/sys/platform/pc64/isa/intr_machdep.h +++ b/sys/platform/pc64/isa/intr_machdep.h @@ -121,6 +121,9 @@ /* IPIQ rendezvous */ #define XIPIQ_OFFSET (IDT_OFFSET + 115) +/* TIMER rendezvous */ +#define XTIMER_OFFSET (IDT_OFFSET + 116) + /* IPI to signal CPUs to stop and wait for another CPU to restart them */ #define XCPUSTOP_OFFSET (IDT_OFFSET + 128) @@ -151,6 +154,7 @@ inthand_t Xforward_irq, /* Forward irq to cpu holding ISR lock */ Xcpustop, /* CPU stops & waits for another CPU to restart it */ Xspuriousint, /* handle APIC "spurious INTs" */ + Xtimer, /* handle LAPIC timer INT */ Xipiq; /* handle lwkt_send_ipiq() requests */ #endif /* SMP */ diff --git a/sys/platform/pc64/isa/npx.c b/sys/platform/pc64/isa/npx.c index 10ac6e1125..df35c5c60c 100644 --- a/sys/platform/pc64/isa/npx.c +++ b/sys/platform/pc64/isa/npx.c @@ -254,8 +254,8 @@ npx_probe(device_t dev) save_idt_npxtrap = idt[16]; outb(IO_ICU1 + 1, ~(1 << ICU_IRQ_SLAVE)); outb(IO_ICU2 + 1, ~(1 << (npx_irq - 8))); - setidt(16, probetrap, SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); - setidt(npx_intrno, probeintr, SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); + setidt(16, probetrap, SDT_SYS386TGT, SEL_KPL, 0); + setidt(npx_intrno, probeintr, SDT_SYS386IGT, SEL_KPL, 0); npx_idt_probeintr = idt[npx_intrno]; cpu_enable_intr(); result = npx_probe1(dev); -- 2.41.0