amd64: Big batch of SMP changes.
authorJordan Gordeev <jgordeev@dir.bg>
Wed, 1 Jul 2009 19:32:11 +0000 (22:32 +0300)
committerJordan Gordeev <jgordeev@dir.bg>
Wed, 1 Jul 2009 19:32:11 +0000 (22:32 +0300)
29 files changed:
sys/cpu/amd64/include/atomic.h
sys/cpu/amd64/include/cpu.h
sys/cpu/amd64/include/cpufunc.h
sys/kern/lwkt_ipiq.c
sys/platform/pc64/amd64/exception.S
sys/platform/pc64/amd64/genassym.c
sys/platform/pc64/amd64/identcpu.c
sys/platform/pc64/amd64/ipl.s
sys/platform/pc64/amd64/machdep.c
sys/platform/pc64/amd64/mp.c [deleted file]
sys/platform/pc64/amd64/mp_machdep.c [new file with mode: 0644]
sys/platform/pc64/amd64/mpboot.S [new file with mode: 0644]
sys/platform/pc64/amd64/mplock.s
sys/platform/pc64/amd64/support.s
sys/platform/pc64/amd64/trap.c
sys/platform/pc64/apic/apic_abi.c
sys/platform/pc64/apic/apic_ipl.s
sys/platform/pc64/apic/apic_vector.s
sys/platform/pc64/apic/apicreg.h
sys/platform/pc64/apic/mpapic.c
sys/platform/pc64/conf/files
sys/platform/pc64/conf/options
sys/platform/pc64/include/clock.h
sys/platform/pc64/include/globaldata.h
sys/platform/pc64/include/intr_machdep.h [deleted file]
sys/platform/pc64/include/lock.h
sys/platform/pc64/include/smp.h
sys/platform/pc64/isa/intr_machdep.h
sys/platform/pc64/isa/npx.c

index 0317245..99a0cbf 100644 (file)
@@ -1,6 +1,5 @@
 /*-
- * Copyright (c) 1998 Doug Rabson.
- * Copyright (c) 2008 The DragonFly Project.
+ * Copyright (c) 1998 Doug Rabson
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -24,8 +23,8 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/amd64/include/atomic.h,v 1.32 2003/11/21 03:02:00 peter Exp $
- * $DragonFly: src/sys/cpu/amd64/include/atomic.h,v 1.3 2008/08/29 17:07:06 dillon Exp $
+ * $FreeBSD: src/sys/i386/include/atomic.h,v 1.9.2.1 2000/07/07 00:38:47 obrien Exp $
+ * $DragonFly: src/sys/cpu/i386/include/atomic.h,v 1.25 2008/06/26 23:06:50 dillon Exp $
  */
 #ifndef _CPU_ATOMIC_H_
 #define _CPU_ATOMIC_H_
 #endif
 
 /*
- * Various simple operations on memory, each of which is atomic in the
- * presence of interrupts and multiple processors.
+ * Various simple arithmetic on memory which is atomic in the presence
+ * of interrupts and multiple processors.
  *
- * atomic_set_char(P, V)       (*(u_char *)(P) |= (V))
- * atomic_clear_char(P, V)     (*(u_char *)(P) &= ~(V))
- * atomic_add_char(P, V)       (*(u_char *)(P) += (V))
- * atomic_subtract_char(P, V)  (*(u_char *)(P) -= (V))
+ * atomic_set_char(P, V)       (*(u_char*)(P) |= (V))
+ * atomic_clear_char(P, V)     (*(u_char*)(P) &= ~(V))
+ * atomic_add_char(P, V)       (*(u_char*)(P) += (V))
+ * atomic_subtract_char(P, V)  (*(u_char*)(P) -= (V))
  *
- * atomic_set_short(P, V)      (*(u_short *)(P) |= (V))
- * atomic_clear_short(P, V)    (*(u_short *)(P) &= ~(V))
- * atomic_add_short(P, V)      (*(u_short *)(P) += (V))
- * atomic_subtract_short(P, V) (*(u_short *)(P) -= (V))
+ * atomic_set_short(P, V)      (*(u_short*)(P) |= (V))
+ * atomic_clear_short(P, V)    (*(u_short*)(P) &= ~(V))
+ * atomic_add_short(P, V)      (*(u_short*)(P) += (V))
+ * atomic_subtract_short(P, V) (*(u_short*)(P) -= (V))
  *
- * atomic_set_int(P, V)                (*(u_int *)(P) |= (V))
- * atomic_clear_int(P, V)      (*(u_int *)(P) &= ~(V))
- * atomic_add_int(P, V)                (*(u_int *)(P) += (V))
- * atomic_subtract_int(P, V)   (*(u_int *)(P) -= (V))
- * atomic_readandclear_int(P)  (return (*(u_int *)(P)); *(u_int *)(P) = 0;)
+ * atomic_set_int(P, V)                (*(u_int*)(P) |= (V))
+ * atomic_clear_int(P, V)      (*(u_int*)(P) &= ~(V))
+ * atomic_add_int(P, V)                (*(u_int*)(P) += (V))
+ * atomic_subtract_int(P, V)   (*(u_int*)(P) -= (V))
  *
- * atomic_set_long(P, V)       (*(u_long *)(P) |= (V))
- * atomic_clear_long(P, V)     (*(u_long *)(P) &= ~(V))
- * atomic_add_long(P, V)       (*(u_long *)(P) += (V))
- * atomic_subtract_long(P, V)  (*(u_long *)(P) -= (V))
- * atomic_readandclear_long(P) (return (*(u_long *)(P)); *(u_long *)(P) = 0;)
+ * atomic_set_long(P, V)       (*(u_long*)(P) |= (V))
+ * atomic_clear_long(P, V)     (*(u_long*)(P) &= ~(V))
+ * atomic_add_long(P, V)       (*(u_long*)(P) += (V))
+ * atomic_subtract_long(P, V)  (*(u_long*)(P) -= (V))
+ * atomic_readandclear_long(P) (return (*(u_long*)(P)); *(u_long*)(P) = 0;)
  */
 
 /*
  * The above functions are expanded inline in the statically-linked
  * kernel.  Lock prefixes are generated if an SMP kernel is being
- * built.
+ * built, or if user code is using these functions.
  *
  * Kernel modules call real functions which are built into the kernel.
  * This allows kernel modules to be portable between UP and SMP systems.
  */
 #if defined(KLD_MODULE)
-#define        ATOMIC_ASM(NAME, TYPE, OP, CONS, V)                     \
-void atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v); \
-void atomic_##NAME##_##TYPE##_nonlocked(volatile u_##TYPE *p, u_##TYPE v);
-
-int    atomic_cmpset_int(volatile u_int *dst, u_int exp, u_int src);
-int    atomic_cmpset_long(volatile u_long *dst, u_long exp, u_long src);
-u_int  atomic_fetchadd_int(volatile u_int *p, u_int v);
-u_long atomic_fetchadd_long(volatile u_long *p, u_long v);
-
-#define        ATOMIC_STORE_LOAD(TYPE, LOP, SOP)                       \
-u_##TYPE       atomic_load_acq_##TYPE(volatile u_##TYPE *p);   \
-void           atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)
-
+#define ATOMIC_ASM(NAME, TYPE, OP, V)                  \
+       extern void atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v); \
+       extern void atomic_##NAME##_##TYPE##_nonlocked(volatile u_##TYPE *p, u_##TYPE v);
 #else /* !KLD_MODULE */
-
-#ifdef __GNUC__
-
-/*
- * For userland, always use lock prefixes so that the binaries will run
- * on both SMP and !SMP systems.
- */
 #if defined(SMP) || !defined(_KERNEL)
-#define        MPLOCKED        "lock ; "
+#define MPLOCKED       "lock ; "
 #else
-#define        MPLOCKED
+#define MPLOCKED
 #endif
 
 /*
  * The assembly is volatilized to demark potential before-and-after side
- * effects if an interrupt or SMP collision were to occur.
+ * effects if an interrupt or SMP collision were to occur.  The primary
+ * atomic instructions are MP safe, the nonlocked instructions are 
+ * local-interrupt-safe (so we don't depend on C 'X |= Y' generating an
+ * atomic instruction).
+ *
+ * +m - memory is read and written (=m - memory is only written)
+ * iq - integer constant or %ax/%bx/%cx/%dx (ir = int constant or any reg)
+ *     (Note: byte instructions only work on %ax,%bx,%cx, or %dx).  iq
+ *     is good enough for our needs so don't get fancy.
  */
-#define        ATOMIC_ASM(NAME, TYPE, OP, CONS, V)             \
+
+/* egcs 1.1.2+ version */
+#define ATOMIC_ASM(NAME, TYPE, OP, V)                  \
 static __inline void                                   \
 atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
 {                                                      \
        __asm __volatile(MPLOCKED OP                    \
                         : "+m" (*p)                    \
-                        : CONS (V));                   \
+                        : "iq" (V));                   \
 }                                                      \
-static __inline void                                    \
+static __inline void                                   \
 atomic_##NAME##_##TYPE##_nonlocked(volatile u_##TYPE *p, u_##TYPE v)\
-{                                                       \
-        __asm __volatile(OP                             \
-                         : "+m" (*p)                    \
-                         : CONS (V));                   \
+{                                                      \
+       __asm __volatile(OP                             \
+                        : "+m" (*p)                    \
+                        : "iq" (V));                   \
+}
+
+#endif /* KLD_MODULE */
+
+/* egcs 1.1.2+ version */
+ATOMIC_ASM(set,             char,  "orb %b1,%0",   v)
+ATOMIC_ASM(clear,    char,  "andb %b1,%0", ~v)
+ATOMIC_ASM(add,             char,  "addb %b1,%0",  v)
+ATOMIC_ASM(subtract, char,  "subb %b1,%0",  v)
+
+ATOMIC_ASM(set,             short, "orw %w1,%0",   v)
+ATOMIC_ASM(clear,    short, "andw %w1,%0", ~v)
+ATOMIC_ASM(add,             short, "addw %w1,%0",  v)
+ATOMIC_ASM(subtract, short, "subw %w1,%0",  v)
+
+ATOMIC_ASM(set,             int,   "orl %1,%0",   v)
+ATOMIC_ASM(clear,    int,   "andl %1,%0", ~v)
+ATOMIC_ASM(add,             int,   "addl %1,%0",  v)
+ATOMIC_ASM(subtract, int,   "subl %1,%0",  v)
+
+ATOMIC_ASM(set,             long,  "orq %1,%0",   v)
+ATOMIC_ASM(clear,    long,  "andq %1,%0", ~v)
+ATOMIC_ASM(add,             long,  "addq %1,%0",  v)
+ATOMIC_ASM(subtract, long,  "subq %1,%0",  v)
+
+#if defined(KLD_MODULE)
+u_long atomic_readandclear_long(volatile u_long *addr);
+#else /* !KLD_MODULE */
+static __inline u_long
+atomic_readandclear_long(volatile u_long *addr)
+{
+       u_long res;
+
+       res = 0;
+       __asm __volatile(
+       "       xchgq   %1,%0 ;         "
+       "# atomic_readandclear_long"
+       : "+r" (res),                   /* 0 */
+         "=m" (*addr)                  /* 1 */
+       : "m" (*addr));
+
+       return (res);
+}
+#endif /* KLD_MODULE */
+
+/*
+ * atomic_poll_acquire_int(P)  Returns non-zero on success, 0 if the lock
+ *                             has already been acquired.
+ * atomic_poll_release_int(P)
+ *
+ * These support the NDIS driver and are also used for IPIQ interlocks
+ * between cpus.  Both the acquisition and release must be 
+ * cache-synchronizing instructions.
+ */
+
+#if defined(KLD_MODULE)
+
+extern int atomic_swap_int(volatile int *addr, int value);
+extern int atomic_poll_acquire_int(volatile u_int *p);
+extern void atomic_poll_release_int(volatile u_int *p);
+
+#else
+
+static __inline int
+atomic_swap_int(volatile int *addr, int value)
+{
+       __asm __volatile("xchgl %0, %1" :
+           "=r" (value), "=m" (*addr) : "0" (value) : "memory");
+       return (value);
 }
 
-#else /* !__GNUC__ */
+static __inline
+int
+atomic_poll_acquire_int(volatile u_int *p)
+{
+       u_int data;
 
-#define ATOMIC_ASM(NAME, TYPE, OP, CONS, V)                            \
-extern void atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v);  \
-extern void atomic_##NAME##_##TYPE##_nonlocked(volatile u_##TYPE *p, u_##TYPE v);
+       __asm __volatile(MPLOCKED "btsl $0,%0; setnc %%al; andl $255,%%eax" : "+m" (*p), "=a" (data));
+       return(data);
+}
 
-#endif /* __GNUC__ */
+static __inline
+void
+atomic_poll_release_int(volatile u_int *p)
+{
+       __asm __volatile(MPLOCKED "btrl $0,%0" : "+m" (*p));
+}
+
+#endif
 
 /*
  * These functions operate on a 32 bit interrupt interlock which is defined
@@ -177,34 +248,29 @@ extern void atomic_##NAME##_##TYPE##_nonlocked(volatile u_##TYPE *p, u_##TYPE v)
  *                             the field is ignored.
  */
 
-#ifndef __ATOMIC_INTR_T
-#define __ATOMIC_INTR_T
-typedef volatile int atomic_intr_t;
-#endif
-
 #if defined(KLD_MODULE)
 
-void atomic_intr_init(atomic_intr_t *p);
-int atomic_intr_handler_disable(atomic_intr_t *p);
-void atomic_intr_handler_enable(atomic_intr_t *p);
-int atomic_intr_handler_is_enabled(atomic_intr_t *p);
-int atomic_intr_cond_test(atomic_intr_t *p);
-int atomic_intr_cond_try(atomic_intr_t *p);
-void atomic_intr_cond_enter(atomic_intr_t *p, void (*func)(void *), void *arg);
-void atomic_intr_cond_exit(atomic_intr_t *p, void (*func)(void *), void *arg);
+void atomic_intr_init(__atomic_intr_t *p);
+int atomic_intr_handler_disable(__atomic_intr_t *p);
+void atomic_intr_handler_enable(__atomic_intr_t *p);
+int atomic_intr_handler_is_enabled(__atomic_intr_t *p);
+int atomic_intr_cond_test(__atomic_intr_t *p);
+int atomic_intr_cond_try(__atomic_intr_t *p);
+void atomic_intr_cond_enter(__atomic_intr_t *p, void (*func)(void *), void *arg);
+void atomic_intr_cond_exit(__atomic_intr_t *p, void (*func)(void *), void *arg);
 
-#else /* !KLD_MODULE */
+#else
 
 static __inline
 void
-atomic_intr_init(atomic_intr_t *p)
+atomic_intr_init(__atomic_intr_t *p)
 {
        *p = 0;
 }
 
 static __inline
 int
-atomic_intr_handler_disable(atomic_intr_t *p)
+atomic_intr_handler_disable(__atomic_intr_t *p)
 {
        int data;
 
@@ -216,14 +282,14 @@ atomic_intr_handler_disable(atomic_intr_t *p)
 
 static __inline
 void
-atomic_intr_handler_enable(atomic_intr_t *p)
+atomic_intr_handler_enable(__atomic_intr_t *p)
 {
        __asm __volatile(MPLOCKED "andl $0xBFFFFFFF,%0" : "+m" (*p));
 }
 
 static __inline
 int
-atomic_intr_handler_is_enabled(atomic_intr_t *p)
+atomic_intr_handler_is_enabled(__atomic_intr_t *p)
 {
        int data;
 
@@ -234,7 +300,7 @@ atomic_intr_handler_is_enabled(atomic_intr_t *p)
 
 static __inline
 void
-atomic_intr_cond_enter(atomic_intr_t *p, void (*func)(void *), void *arg)
+atomic_intr_cond_enter(__atomic_intr_t *p, void (*func)(void *), void *arg)
 {
        __asm __volatile(MPLOCKED "incl %0; " \
                         "1: ;" \
@@ -249,49 +315,12 @@ atomic_intr_cond_enter(atomic_intr_t *p, void (*func)(void *), void *arg)
 }
 
 /*
- * Atomically add the value of v to the integer pointed to by p and return
- * the previous value of *p.
- */
-static __inline u_int
-atomic_fetchadd_int(volatile u_int *p, u_int v)
-{
-
-       __asm __volatile(
-       "       " MPLOCKED "            "
-       "       xaddl   %0, %1 ;        "
-       "# atomic_fetchadd_int"
-       : "+r" (v),                     /* 0 (result) */
-         "=m" (*p)                     /* 1 */
-       : "m" (*p));                    /* 2 */
-
-       return (v);
-}
-
-/*
- * Atomically add the value of v to the long integer pointed to by p and return
- * the previous value of *p.
- */
-static __inline u_long
-atomic_fetchadd_long(volatile u_long *p, u_long v)
-{
-
-       __asm __volatile(
-       "       " MPLOCKED "            "
-       "       xaddq   %0, %1 ;        "
-       "# atomic_fetchadd_long"
-       : "+r" (v),                     /* 0 (result) */
-         "=m" (*p)                     /* 1 */
-       : "m" (*p));                    /* 2 */
-
-       return (v);
-}
-/*
  * Attempt to enter the interrupt condition variable.  Returns zero on
  * success, 1 on failure.
  */
 static __inline
 int
-atomic_intr_cond_try(atomic_intr_t *p)
+atomic_intr_cond_try(__atomic_intr_t *p)
 {
        int ret;
 
@@ -306,7 +335,7 @@ atomic_intr_cond_try(atomic_intr_t *p)
 #ifdef __clang__
                          : : "ax", "cx", "dx");
 #else
-                        : : "cx", "dx");
+                         : : "cx", "dx");
 #endif
        return (ret);
 }
@@ -314,14 +343,14 @@ atomic_intr_cond_try(atomic_intr_t *p)
 
 static __inline
 int
-atomic_intr_cond_test(atomic_intr_t *p)
+atomic_intr_cond_test(__atomic_intr_t *p)
 {
        return((int)(*p & 0x80000000));
 }
 
 static __inline
 void
-atomic_intr_cond_exit(atomic_intr_t *p, void (*func)(void *), void *arg)
+atomic_intr_cond_exit(__atomic_intr_t *p, void (*func)(void *), void *arg)
 {
        __asm __volatile(MPLOCKED "decl %0; " \
                        MPLOCKED "btrl $31,%0; " \
@@ -330,134 +359,65 @@ atomic_intr_cond_exit(atomic_intr_t *p, void (*func)(void *), void *arg)
                         "1: ;" \
                         : "+m" (*p) \
                         : "r"(func), "m"(arg) \
-                        : "ax", "cx", "dx", "di");     /* XXX clobbers more regs */
+                        : "ax", "cx", "dx", "rsi", "rdi", "r8", "r9", "r10", "r11");
+               /* YYY the function call may clobber even more registers? */
 }
 
 #endif
 
 /*
- * Atomic compare and set, used by the mutex functions
+ * Atomic compare and set
  *
- * if (*dst == exp) *dst = src (all 32 bit words)
+ * if (*_dst == _old) *_dst = _new (all 32 bit words)
  *
  * Returns 0 on failure, non-zero on success
  */
+#if defined(KLD_MODULE)
+
+extern int atomic_cmpset_int(volatile u_int *_dst, u_int _old, u_int _new);
+extern int atomic_cmpset_long(volatile u_long *dst, u_long exp, u_long src);
+extern u_int atomic_fetchadd_int(volatile u_int *p, u_int v);
 
-#if defined(__GNUC__)
+#else
 
 static __inline int
-atomic_cmpset_int(volatile u_int *dst, u_int exp, u_int src)
+atomic_cmpset_int(volatile u_int *_dst, u_int _old, u_int _new)
 {
-       int res = exp;
-
-       __asm __volatile (
-               MPLOCKED
-       "       cmpxchgl %1,%2 ;        "
-       "       setz    %%al ;          "
-       "       movzbl  %%al,%0 ;       "
-       "1:                             "
-       "# atomic_cmpset_int"
-       : "+a" (res)                    /* 0 (result) */
-       : "r" (src),                    /* 1 */
-         "m" (*(dst))                  /* 2 */
-       : "memory");                             
-
-       return (res);
+       int res = _old;
+
+       __asm __volatile(MPLOCKED "cmpxchgl %2,%1; " \
+                        "setz %%al; " \
+                        "movzbl %%al,%0; " \
+                        : "+a" (res), "=m" (*_dst) \
+                        : "r" (_new), "m" (*_dst) \
+                        : "memory");
+       return res;
 }
 
 static __inline int
 atomic_cmpset_long(volatile u_long *dst, u_long exp, u_long src)
 {
-       long res = exp;
-
-       __asm __volatile (
-               MPLOCKED
-       "       cmpxchgq %1,%2 ;        "
-       "       setz    %%al ;          "
-       "       movzbq  %%al,%0 ;       "
-       "1:                             "
-       "# atomic_cmpset_long"
-       : "+a" (res)                    /* 0 (result) */
-       : "r" (src),                    /* 1 */
-         "m" (*(dst))                  /* 2 */
-       : "memory");                             
-
-       return (res);
+        return (atomic_cmpset_int((volatile u_int *)dst, (u_int)exp,
+                                  (u_int)src));
 }
-#endif /* defined(__GNUC__) */
-
-#if defined(__GNUC__)
-
-#define        ATOMIC_STORE_LOAD(TYPE, LOP, SOP)               \
-static __inline u_##TYPE                               \
-atomic_load_acq_##TYPE(volatile u_##TYPE *p)           \
-{                                                      \
-       u_##TYPE res;                                   \
-                                                       \
-       __asm __volatile(MPLOCKED LOP                   \
-       : "=a" (res),                   /* 0 (result) */\
-         "+m" (*p)                     /* 1 */         \
-       : : "memory");                                  \
-                                                       \
-       return (res);                                   \
-}                                                      \
-                                                       \
-/*                                                     \
- * The XCHG instruction asserts LOCK automagically.    \
- */                                                    \
-static __inline void                                   \
-atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
-{                                                      \
-       __asm __volatile(SOP                            \
-       : "+m" (*p),                    /* 0 */         \
-         "+r" (v)                      /* 1 */         \
-       : : "memory");                                  \
-}                                                      \
-struct __hack
-
-#else /* !defined(__GNUC__) */
-
-extern int atomic_cmpset_int(volatile u_int *, u_int, u_int);
-extern int atomic_cmpset_long(volatile u_long *, u_long, u_long);
-
-#define ATOMIC_STORE_LOAD(TYPE, LOP, SOP)                              \
-extern u_##TYPE atomic_load_acq_##TYPE(volatile u_##TYPE *p);          \
-extern void atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)
-
-#endif /* defined(__GNUC__) */
-
-#endif /* !KLD_MODULE */
-
-ATOMIC_ASM(set,             char,  "orb %b1,%0",  "iq",  v);
-ATOMIC_ASM(clear,    char,  "andb %b1,%0", "iq", ~v);
-ATOMIC_ASM(add,             char,  "addb %b1,%0", "iq",  v);
-ATOMIC_ASM(subtract, char,  "subb %b1,%0", "iq",  v);
-
-ATOMIC_ASM(set,             short, "orw %w1,%0",  "ir",  v);
-ATOMIC_ASM(clear,    short, "andw %w1,%0", "ir", ~v);
-ATOMIC_ASM(add,             short, "addw %w1,%0", "ir",  v);
-ATOMIC_ASM(subtract, short, "subw %w1,%0", "ir",  v);
 
-ATOMIC_ASM(set,             int,   "orl %1,%0",   "ir",  v);
-ATOMIC_ASM(clear,    int,   "andl %1,%0",  "ir", ~v);
-ATOMIC_ASM(add,             int,   "addl %1,%0",  "ir",  v);
-ATOMIC_ASM(subtract, int,   "subl %1,%0",  "ir",  v);
-
-ATOMIC_ASM(set,             long,  "orq %1,%0",   "ir",  v);
-ATOMIC_ASM(clear,    long,  "andq %1,%0",  "ir", ~v);
-ATOMIC_ASM(add,             long,  "addq %1,%0",  "ir",  v);
-ATOMIC_ASM(subtract, long,  "subq %1,%0",  "ir",  v);
-
-ATOMIC_STORE_LOAD(char,        "cmpxchgb %b0,%1", "xchgb %b1,%0");
-ATOMIC_STORE_LOAD(short,"cmpxchgw %w0,%1", "xchgw %w1,%0");
-ATOMIC_STORE_LOAD(int, "cmpxchgl %0,%1",  "xchgl %1,%0");
-ATOMIC_STORE_LOAD(long,        "cmpxchgq %0,%1",  "xchgq %1,%0");
-
-#define        atomic_cmpset_32        atomic_cmpset_int
+/*
+ * Atomically add the value of v to the integer pointed to by p and return
+ * the previous value of *p.
+ */
+static __inline u_int
+atomic_fetchadd_int(volatile u_int *p, u_int v)
+{
+       __asm __volatile(MPLOCKED "xaddl %0,%1; " \
+                        : "+r" (v), "=m" (*p)  \
+                        : "m" (*p)             \
+                        : "memory");
+       return (v);
+}
 
-#undef ATOMIC_ASM
-#undef ATOMIC_STORE_LOAD
+#endif /* KLD_MODULE */
 
+/* Acquire and release variants are identical to the normal ones. */
 #define        atomic_set_acq_char             atomic_set_char
 #define        atomic_set_rel_char             atomic_set_char
 #define        atomic_clear_acq_char           atomic_clear_char
@@ -495,7 +455,10 @@ ATOMIC_STORE_LOAD(long,    "cmpxchgq %0,%1",  "xchgq %1,%0");
 #define        atomic_add_rel_long             atomic_add_long
 #define        atomic_subtract_acq_long        atomic_subtract_long
 #define        atomic_subtract_rel_long        atomic_subtract_long
+#define        atomic_cmpset_acq_long          atomic_cmpset_long
+#define        atomic_cmpset_rel_long          atomic_cmpset_long
 
+/* Operations on 8-bit bytes. */
 #define        atomic_set_8            atomic_set_char
 #define        atomic_set_acq_8        atomic_set_acq_char
 #define        atomic_set_rel_8        atomic_set_rel_char
@@ -568,48 +531,4 @@ ATOMIC_STORE_LOAD(long,    "cmpxchgq %0,%1",  "xchgq %1,%0");
 #define        atomic_cmpset_rel_ptr   atomic_cmpset_rel_long
 #define        atomic_readandclear_ptr atomic_readandclear_long
 
-#if defined(__GNUC__)
-
-#if defined(KLD_MODULE)
-extern u_int atomic_readandclear_int(volatile u_int *addr);
-extern u_long atomic_readandclear_long(volatile u_long *addr);
-#else /* !KLD_MODULE */
-static __inline u_int
-atomic_readandclear_int(volatile u_int *addr)
-{
-       u_int result;
-
-       __asm __volatile (
-       "       xorl    %0,%0 ;         "
-       "       xchgl   %1,%0 ;         "
-       "# atomic_readandclear_int"
-       : "=&r" (result)                /* 0 (result) */
-       : "m" (*addr));                 /* 1 (addr) */
-
-       return (result);
-}
-
-static __inline u_long
-atomic_readandclear_long(volatile u_long *addr)
-{
-       u_long result;
-
-       __asm __volatile (
-       "       xorq    %0,%0 ;         "
-       "       xchgq   %1,%0 ;         "
-       "# atomic_readandclear_int"
-       : "=&r" (result)                /* 0 (result) */
-       : "m" (*addr));                 /* 1 (addr) */
-
-       return (result);
-}
-#endif /* KLD_MODULE */
-
-#else /* !defined(__GNUC__) */
-
-extern u_long  atomic_readandclear_long(volatile u_long *);
-extern u_int   atomic_readandclear_int(volatile u_int *);
-
-#endif /* defined(__GNUC__) */
-
 #endif /* ! _CPU_ATOMIC_H_ */
index c4af70b..ad0e754 100644 (file)
  *
  *     from: @(#)cpu.h 5.4 (Berkeley) 5/9/91
  * $FreeBSD: src/sys/i386/include/cpu.h,v 1.43.2.2 2001/06/15 09:37:57 scottl Exp $
- * $DragonFly: src/sys/cpu/amd64/include/cpu.h,v 1.2 2007/09/23 04:29:30 yanyh Exp $
+ * $DragonFly: src/sys/cpu/i386/include/cpu.h,v 1.25 2007/03/01 01:46:52 corecode Exp $
  */
 
 #ifndef _CPU_CPU_H_
 #define        _CPU_CPU_H_
 
+/*
+ * Definitions unique to amd64 cpu support.
+ */
+#ifndef _CPU_PSL_H_
+#include <machine/psl.h>
+#endif
+#ifndef _CPU_FRAME_H_
+#include <machine/frame.h>
+#endif
+#ifndef _CPU_SEGMENTS_H_
+#include <machine/segments.h>
+#endif
+
+/*
+ * definitions of cpu-dependent requirements
+ * referenced in generic code
+ */
+
+#define        cpu_exec(p)     /* nothing */
+#define cpu_swapin(p)  /* nothing */
+#define cpu_setstack(lp, ap)           ((lp)->lwp_md.md_regs[SP] = (ap))
+
 #define CLKF_INTR(framep)      (mycpu->gd_intr_nesting_level > 1 || (curthread->td_flags & TDF_INTTHREAD))
-#define CLKF_PC(framep)                ((framep)->if_rip)
+#define        CLKF_PC(framep)         ((framep)->if_rip)
 
 /*
  * Preempt the current process if in interrupt from user mode,
  * atomic instruction because an interrupt on the local cpu can modify
  * the gd_reqflags field.
  */
-#define need_lwkt_resched()    \
-       atomic_set_int_nonlocked(&mycpu->gd_reqflags, RQF_AST_LWKT_RESCHED)
-#define need_user_resched()    \
-       atomic_set_int_nonlocked(&mycpu->gd_reqflags, RQF_AST_USER_RESCHED)
-#define need_proftick()         \
-       atomic_set_int_nonlocked(&mycpu->gd_reqflags, RQF_AST_OWEUPC)
-#define signotify()    \
-       atomic_set_int_nonlocked(&mycpu->gd_reqflags, RQF_AST_SIGNAL)
-#define sigupcall()             \
-       atomic_set_int_nonlocked(&mycpu->gd_reqflags, RQF_AST_UPCALL)
-#define clear_lwkt_resched()    \
-       atomic_clear_int_nonlocked(&mycpu->gd_reqflags, RQF_AST_LWKT_RESCHED)
-#define clear_user_resched()   \
-       atomic_clear_int_nonlocked(&mycpu->gd_reqflags, RQF_AST_USER_RESCHED)
-#define user_resched_wanted()  \
-       (mycpu->gd_reqflags & RQF_AST_USER_RESCHED)
-#define lwkt_resched_wanted()   \
-       (mycpu->gd_reqflags & RQF_AST_LWKT_RESCHED)
+#define        need_lwkt_resched()     \
+    atomic_set_int_nonlocked(&mycpu->gd_reqflags, RQF_AST_LWKT_RESCHED)
+#define        need_user_resched()     \
+    atomic_set_int_nonlocked(&mycpu->gd_reqflags, RQF_AST_USER_RESCHED)
+#define        need_proftick()         \
+    atomic_set_int_nonlocked(&mycpu->gd_reqflags, RQF_AST_OWEUPC)
+#define        need_ipiq()             \
+    atomic_set_int_nonlocked(&mycpu->gd_reqflags, RQF_IPIQ)
+#define        signotify()             \
+    atomic_set_int_nonlocked(&mycpu->gd_reqflags, RQF_AST_SIGNAL)
+#define        sigupcall()             \
+    atomic_set_int_nonlocked(&mycpu->gd_reqflags, RQF_AST_UPCALL)
+#define        clear_user_resched()    \
+    atomic_clear_int_nonlocked(&mycpu->gd_reqflags, RQF_AST_USER_RESCHED)
+#define        clear_lwkt_resched()    \
+    atomic_clear_int_nonlocked(&mycpu->gd_reqflags, RQF_AST_LWKT_RESCHED)
+#define        user_resched_wanted()   \
+    (mycpu->gd_reqflags & RQF_AST_USER_RESCHED)
+#define        lwkt_resched_wanted()   \
+    (mycpu->gd_reqflags & RQF_AST_LWKT_RESCHED)
+#define        any_resched_wanted()    \
+    (mycpu->gd_reqflags & (RQF_AST_LWKT_RESCHED|RQF_AST_USER_RESCHED))
 
 /*
  * CTL_MACHDEP definitions.
 }
 
 #ifdef _KERNEL
+
+struct lwp;
+
+extern char    btext[];
+extern char    etext[];
+
 void   fork_trampoline (void);
 void   generic_lwp_return (struct lwp *, struct trapframe *);
 void   fork_return (struct lwp *, struct trapframe *);
+
 #endif
 
 #endif /* !_CPU_CPU_H_ */
index a25e87a..7818eda 100644 (file)
@@ -64,6 +64,10 @@ __BEGIN_DECLS
 
 #ifdef __GNUC__
 
+#ifdef SMP
+#include <machine/lock.h>              /* XXX */
+#endif
+
 static __inline void
 breakpoint(void)
 {
index 1149b0d..18e6172 100644 (file)
@@ -172,7 +172,11 @@ lwkt_send_ipiq3(globaldata_t target, ipifunc3_t func, void *arg1, int arg2)
      * enabled while we liveloop to avoid deadlocking the APIC.
      */
     if (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 2) {
+#if defined(__i386__)
        unsigned int eflags = read_eflags();
+#elif defined(__amd64__)
+       unsigned long rflags = read_rflags();
+#endif
 
        if (atomic_poll_acquire_int(&ip->ip_npoll) || ipiq_optimized == 0) {
            logipiq(cpu_send, func, arg1, arg2, gd, target);
@@ -184,7 +188,11 @@ lwkt_send_ipiq3(globaldata_t target, ipifunc3_t func, void *arg1, int arg2)
            KKASSERT(ip->ip_windex - ip->ip_rindex != MAXCPUFIFO - 1);
            lwkt_process_ipiq();
        }
+#if defined(__i386__)
        write_eflags(eflags);
+#elif defined(__amd64__)
+       write_rflags(rflags);
+#endif
     }
 
     /*
@@ -255,7 +263,11 @@ lwkt_send_ipiq3_passive(globaldata_t target, ipifunc3_t func,
      * enabled while we liveloop to avoid deadlocking the APIC.
      */
     if (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 2) {
+#if defined(__i386__)
        unsigned int eflags = read_eflags();
+#elif defined(__amd64__)
+       unsigned long rflags = read_rflags();
+#endif
 
        if (atomic_poll_acquire_int(&ip->ip_npoll) || ipiq_optimized == 0) {
            logipiq(cpu_send, func, arg1, arg2, gd, target);
@@ -267,7 +279,11 @@ lwkt_send_ipiq3_passive(globaldata_t target, ipifunc3_t func,
            KKASSERT(ip->ip_windex - ip->ip_rindex != MAXCPUFIFO - 1);
            lwkt_process_ipiq();
        }
+#if defined(__i386__)
        write_eflags(eflags);
+#elif defined(__amd64__)
+       write_rflags(rflags);
+#endif
     }
 
     /*
@@ -394,7 +410,11 @@ lwkt_wait_ipiq(globaldata_t target, int seq)
     if (target != mycpu) {
        ip = &mycpu->gd_ipiq[target->gd_cpuid];
        if ((int)(ip->ip_xindex - seq) < 0) {
+#if defined(__i386__)
            unsigned int eflags = read_eflags();
+#elif defined(__amd64__)
+           unsigned long rflags = read_rflags();
+#endif
            cpu_enable_intr();
            while ((int)(ip->ip_xindex - seq) < 0) {
                crit_enter();
@@ -411,7 +431,11 @@ lwkt_wait_ipiq(globaldata_t target, int seq)
                 */
                cpu_lfence();
            }
+#if defined(__i386__)
            write_eflags(eflags);
+#elif defined(__amd64__)
+           write_rflags(rflags);
+#endif
        }
     }
 }
index 0dea2ba..a925abe 100644 (file)
@@ -432,7 +432,7 @@ ENTRY(fork_trampoline)
        cmpl    $0,TD_MPCOUNT(%rax)
        je      1f
        movq    $pmsg4, %rdi
-       movl    TD_MPCOUNT(%rax), %rsi
+       movl    TD_MPCOUNT(%rax), %esi
        movq    %rbx, %rdx
        xorl    %eax, %eax
        call    panic
index 5059f1f..b36116d 100644 (file)
 #include <vfs/nfs/nfs.h>
 #include <vfs/nfs/nfsdiskless.h>
 
+#include <machine_base/apic/apicreg.h>
 #include <machine/segments.h>
 #include <machine/sigframe.h>
 #include <machine/globaldata.h>
 #include <machine/specialreg.h>
 #include <machine/pcb.h>
+#include <machine/smp.h>
 
 ASSYM(P_VMSPACE, offsetof(struct proc, p_vmspace));
 ASSYM(VM_PMAP, offsetof(struct vmspace, vm_pmap));
 ASSYM(PM_ACTIVE, offsetof(struct pmap, pm_active));
 
 ASSYM(LWP_VMSPACE, offsetof(struct lwp, lwp_vmspace));
+ASSYM(V_IPI, offsetof(struct vmmeter, v_ipi));
+ASSYM(V_TIMER, offsetof(struct vmmeter, v_timer));
 ASSYM(UPAGES, UPAGES);
 ASSYM(PAGE_SIZE, PAGE_SIZE);
 ASSYM(NPTEPG, NPTEPG);
@@ -132,6 +136,8 @@ ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault));
 ASSYM(PCB_FSBASE, offsetof(struct pcb, pcb_fsbase));
 ASSYM(PCB_GSBASE, offsetof(struct pcb, pcb_gsbase));
 
+ASSYM(PCB_SIZE, sizeof(struct pcb));
+
 ASSYM(TF_R15, offsetof(struct trapframe, tf_r15));
 ASSYM(TF_R14, offsetof(struct trapframe, tf_r14));
 ASSYM(TF_R13, offsetof(struct trapframe, tf_r13));
@@ -210,8 +216,11 @@ ASSYM(RQF_AST_SIGNAL, RQF_AST_SIGNAL);
 ASSYM(RQF_AST_USER_RESCHED, RQF_AST_USER_RESCHED);
 ASSYM(RQF_AST_LWKT_RESCHED, RQF_AST_LWKT_RESCHED);
 ASSYM(RQF_AST_UPCALL, RQF_AST_UPCALL);
+ASSYM(RQF_TIMER, RQF_TIMER);
 ASSYM(RQF_AST_MASK, RQF_AST_MASK);
 
+ASSYM(LA_EOI, offsetof(struct LAPIC, eoi));
+
 ASSYM(KCSEL, GSEL(GCODE_SEL, SEL_KPL));
 ASSYM(KDSEL, GSEL(GDATA_SEL, SEL_KPL));
 ASSYM(KUCSEL, GSEL(GUCODE_SEL, SEL_UPL));
@@ -226,3 +235,9 @@ ASSYM(MACHINTR_INTREN, offsetof(struct machintr_abi, intren));
 
 ASSYM(TDPRI_CRIT, TDPRI_CRIT);
 ASSYM(TDPRI_INT_SUPPORT, TDPRI_INT_SUPPORT);
+
+#ifdef SMP
+ASSYM(AIMI_APIC_ADDRESS, offsetof(struct apic_intmapinfo, apic_address));
+ASSYM(AIMI_REDIRINDEX, offsetof(struct apic_intmapinfo, redirindex));
+ASSYM(AIMI_SIZE, sizeof(struct apic_intmapinfo));
+#endif
index b5678dc..7c5ce7e 100644 (file)
@@ -57,7 +57,7 @@
 #include <machine/clock.h>
 #include <machine/cputypes.h>
 #include <machine/frame.h>
-#include <machine/intr_machdep.h>
+#include <machine_base/isa/intr_machdep.h>
 #include <machine/segments.h>
 #include <machine/specialreg.h>
 #include <machine/md_var.h>
index d63fcaa..9fc2efb 100644 (file)
@@ -153,6 +153,8 @@ doreti_next:
 #ifdef SMP
        testl   $RQF_IPIQ,PCPU(reqflags)
        jnz     doreti_ipiq
+       testl   $RQF_TIMER,PCPU(reqflags)
+       jnz     doreti_timer
 #endif
        testl   PCPU(fpending),%ecx     /* check for an unmasked fast int */
        jnz     doreti_fast
@@ -339,10 +341,22 @@ doreti_ipiq:
        movl    %eax,%esi               /* save cpl (can't use stack) */
        incl    PCPU(intr_nesting_level)
        andl    $~RQF_IPIQ,PCPU(reqflags)
-       subl    $16,%rsp                /* add dummy vec and ppl */
+       subq    $16,%rsp                /* add dummy vec and ppl */
        movq    %rsp,%rdi               /* pass frame by ref (C arg) */
        call    lwkt_process_ipiq_frame
-       addl    $16,%rsp
+       addq    $16,%rsp
+       decl    PCPU(intr_nesting_level)
+       movl    %esi,%eax               /* restore cpl for loop */
+       jmp     doreti_next
+
+doreti_timer:
+       movl    %eax,%esi               /* save cpl (can't use stack) */
+       incl    PCPU(intr_nesting_level)
+       andl    $~RQF_TIMER,PCPU(reqflags)
+       subq    $16,%rsp                        /* add dummy vec and ppl */
+       movq    %rsp,%rdi                       /* pass frame by ref (C arg) */
+       call    lapic_timer_process_frame
+       addq    $16,%rsp
        decl    PCPU(intr_nesting_level)
        movl    %esi,%eax               /* restore cpl for loop */
        jmp     doreti_next
@@ -373,6 +387,8 @@ splz_next:
 #ifdef SMP
        testl   $RQF_IPIQ,PCPU(reqflags)
        jnz     splz_ipiq
+       testl   $RQF_TIMER,PCPU(reqflags)
+       jnz     splz_timer
 #endif
        testl   PCPU(fpending),%ecx     /* check for an unmasked fast int */
        jnz     splz_fast
@@ -487,6 +503,13 @@ splz_ipiq:
        call    lwkt_process_ipiq
        popq    %rax
        jmp     splz_next
+
+splz_timer:
+       andl    $~RQF_TIMER,PCPU(reqflags)
+       pushq   %rax
+       call    lapic_timer_process
+       popq    %rax
+       jmp     splz_next
 #endif
 
        /*
index e541540..3bbeb46 100644 (file)
@@ -99,7 +99,6 @@
 #if JG
 #include <machine/bootinfo.h>
 #endif
-#include <machine/intr_machdep.h>      /* for inthand_t */
 #include <machine/md_var.h>
 #include <machine/metadata.h>
 #include <machine/pc/bios.h>
 #include <machine/cputypes.h>
 
 #ifdef OLD_BUS_ARCH
-#include <bus/isa/i386/isa_device.h>
+#include <bus/isa/isa_device.h>
 #endif
 #include <machine_base/isa/intr_machdep.h>
 #include <bus/isa/rtc.h>
diff --git a/sys/platform/pc64/amd64/mp.c b/sys/platform/pc64/amd64/mp.c
deleted file mode 100644 (file)
index 0ae3b46..0000000
+++ /dev/null
@@ -1,457 +0,0 @@
-/*
- * Copyright (c) 2007 The DragonFly Project.  All rights reserved.
- *
- * This code is derived from software contributed to The DragonFly Project
- * by Matthew Dillon <dillon@backplane.com>
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- * 3. Neither the name of The DragonFly Project nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific, prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
- * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $DragonFly: src/sys/platform/pc64/amd64/mp.c,v 1.2 2007/09/24 03:24:45 yanyh Exp $
- */
-
-
-#include <sys/interrupt.h>
-#include <sys/kernel.h>
-#include <sys/memrange.h>
-#include <sys/tls.h>
-#include <sys/types.h>
-
-#include <vm/vm_extern.h>
-#include <vm/vm_kern.h>
-#include <vm/vm_object.h>
-#include <vm/vm_page.h>
-
-#include <machine/cpu.h>
-#include <machine/cpufunc.h>
-#include <machine/globaldata.h>
-#include <machine/md_var.h>
-#include <machine/pmap.h>
-#include <machine/smp.h>
-#include <machine/tls.h>
-
-#include <unistd.h>
-#include <pthread.h>
-#include <signal.h>
-#include <stdio.h>
-
-extern pt_entry_t *KPTphys;
-
-volatile u_int stopped_cpus;
-cpumask_t      smp_active_mask = 1;  /* which cpus are ready for IPIs etc? */
-static int     boot_address;
-static cpumask_t smp_startup_mask = 1;  /* which cpus have been started */
-int            mp_naps;                /* # of Applications processors */
-static int  mp_finish;
-
-/* function prototypes XXX these should go elsewhere */
-void bootstrap_idle(void);
-void single_cpu_ipi(int, int, int);
-void selected_cpu_ipi(u_int, int, int);
-#if 0
-void ipi_handler(int);
-#endif
-
-pt_entry_t *SMPpt;
-
-/* AP uses this during bootstrap.  Do not staticize.  */
-char *bootSTK;
-static int bootAP;
-
-
-/* XXX these need to go into the appropriate header file */
-static int start_all_aps(u_int);
-void init_secondary(void);
-void *start_ap(void *);
-
-/*
- * Get SMP fully working before we start initializing devices.
- */
-static
-void
-ap_finish(void)
-{
-       int i;
-       cpumask_t ncpus_mask = 0;
-
-       for (i = 1; i <= ncpus; i++)
-               ncpus_mask |= (1 << i);
-
-        mp_finish = 1;
-        if (bootverbose)
-                kprintf("Finish MP startup\n");
-
-       /* build our map of 'other' CPUs */
-       mycpu->gd_other_cpus = smp_startup_mask & ~(1 << mycpu->gd_cpuid);
-
-       /*
-        * Let the other cpu's finish initializing and build their map
-        * of 'other' CPUs.
-        */
-        rel_mplock();
-        while (smp_active_mask != smp_startup_mask) {
-               DELAY(100000);
-                cpu_lfence();
-       }
-
-        while (try_mplock() == 0)
-               DELAY(100000);
-        if (bootverbose)
-                kprintf("Active CPU Mask: %08x\n", smp_active_mask);
-}
-
-SYSINIT(finishsmp, SI_BOOT2_FINISH_SMP, SI_ORDER_FIRST, ap_finish, NULL)
-
-
-void *
-start_ap(void *arg __unused)
-{
-       init_secondary();
-       setrealcpu();
-       bootstrap_idle();
-
-       return(NULL); /* NOTREACHED */
-}
-
-/* storage for AP thread IDs */
-pthread_t ap_tids[MAXCPU];
-
-void
-mp_start(void)
-{
-       int shift;
-
-       ncpus = optcpus;
-
-       mp_naps = ncpus - 1;
-
-       /* ncpus2 -- ncpus rounded down to the nearest power of 2 */
-       for (shift = 0; (1 << shift) <= ncpus; ++shift)
-               ;
-       --shift;
-       ncpus2_shift = shift;
-       ncpus2 = 1 << shift;
-       ncpus2_mask = ncpus2 - 1;
-
-        /* ncpus_fit -- ncpus rounded up to the nearest power of 2 */
-        if ((1 << shift) < ncpus)
-                ++shift;
-        ncpus_fit = 1 << shift;
-        ncpus_fit_mask = ncpus_fit - 1;
-
-       /*
-        * cpu0 initialization
-        */
-       mycpu->gd_ipiq = (void *)kmem_alloc(&kernel_map,
-                                           sizeof(lwkt_ipiq) * ncpus);
-       bzero(mycpu->gd_ipiq, sizeof(lwkt_ipiq) * ncpus);
-
-       /*
-        * cpu 1-(n-1)
-        */
-       start_all_aps(boot_address);
-
-}
-
-void
-mp_announce(void)
-{
-       int x;
-
-       kprintf("DragonFly/MP: Multiprocessor\n");
-       kprintf(" cpu0 (BSP)\n");
-
-       for (x = 1; x <= mp_naps; ++x)
-               kprintf(" cpu%d (AP)\n", x);
-}
-
-void
-forward_fastint_remote(void *arg)
-{
-       panic("XXX forward_fastint_remote()");
-}
-
-void
-cpu_send_ipiq(int dcpu)
-{
-       if ((1 << dcpu) & smp_active_mask)
-               if (pthread_kill(ap_tids[dcpu], SIGUSR1) != 0)
-                       panic("pthread_kill failed in cpu_send_ipiq");
-#if 0
-       panic("XXX cpu_send_ipiq()");
-#endif
-}
-
-void
-smp_invltlb(void)
-{
-#ifdef SMP
-#endif
-}
-
-void
-single_cpu_ipi(int cpu, int vector, int delivery_mode)
-{
-       kprintf("XXX single_cpu_ipi\n");
-}
-
-void
-selected_cpu_ipi(u_int target, int vector, int delivery_mode)
-{
-       crit_enter();
-       while (target) {
-               int n = bsfl(target);
-               target &= ~(1 << n);
-               single_cpu_ipi(n, vector, delivery_mode);
-       }
-       crit_exit();
-}
-
-int
-stop_cpus(u_int map)
-{
-       map &= smp_active_mask;
-
-       crit_enter();
-       while (map) {
-               int n = bsfl(map);
-               map &= ~(1 << n);
-               if (pthread_kill(ap_tids[n], SIGSTOP) != 0)
-                       panic("stop_cpus: pthread_kill failed");
-       }
-       crit_exit();
-#if 0
-       panic("XXX stop_cpus()");
-#endif
-
-       return(1);
-}
-
-int
-restart_cpus(u_int map)
-{
-       map &= smp_active_mask;
-
-       crit_enter();
-       while (map) {
-               int n = bsfl(map);
-               map &= ~(1 << n);
-               if (pthread_kill(ap_tids[n], SIGCONT) != 0)
-                       panic("restart_cpus: pthread_kill failed");
-       }
-       crit_exit();
-#if 0
-       panic("XXX restart_cpus()");
-#endif
-
-       return(1);
-}
-
-void
-ap_init(void)
-{
-        /*
-         * Adjust smp_startup_mask to signal the BSP that we have started
-         * up successfully.  Note that we do not yet hold the BGL.  The BSP
-         * is waiting for our signal.
-         *
-         * We can't set our bit in smp_active_mask yet because we are holding
-         * interrupts physically disabled and remote cpus could deadlock
-         * trying to send us an IPI.
-         */
-       smp_startup_mask |= 1 << mycpu->gd_cpuid;
-       cpu_mfence();
-
-        /*
-         * Interlock for finalization.  Wait until mp_finish is non-zero,
-         * then get the MP lock.
-         *
-         * Note: We are in a critical section.
-         *
-         * Note: We have to synchronize td_mpcount to our desired MP state
-         * before calling cpu_try_mplock().
-         *
-         * Note: we are the idle thread, we can only spin.
-         *
-         * Note: The load fence is memory volatile and prevents the compiler
-         * from improperly caching mp_finish, and the cpu from improperly
-         * caching it.
-         */
-
-       while (mp_finish == 0) {
-               cpu_lfence();
-               DELAY(500000);
-       }
-        ++curthread->td_mpcount;
-        while (cpu_try_mplock() == 0)
-               DELAY(100000);
-
-        /* BSP may have changed PTD while we're waiting for the lock */
-        cpu_invltlb();
-
-        /* Build our map of 'other' CPUs. */
-        mycpu->gd_other_cpus = smp_startup_mask & ~(1 << mycpu->gd_cpuid);
-
-        kprintf("SMP: AP CPU #%d Launched!\n", mycpu->gd_cpuid);
-
-
-        /* Set memory range attributes for this CPU to match the BSP */
-        mem_range_AP_init();
-        /*
-         * Once we go active we must process any IPIQ messages that may
-         * have been queued, because no actual IPI will occur until we
-         * set our bit in the smp_active_mask.  If we don't the IPI
-         * message interlock could be left set which would also prevent
-         * further IPIs.
-         *
-         * The idle loop doesn't expect the BGL to be held and while
-         * lwkt_switch() normally cleans things up this is a special case
-         * because we returning almost directly into the idle loop.
-         *
-         * The idle thread is never placed on the runq, make sure
-         * nothing we've done put it there.
-         */
-        KKASSERT(curthread->td_mpcount == 1);
-        smp_active_mask |= 1 << mycpu->gd_cpuid;
-
-       mdcpu->gd_fpending = 0;
-       mdcpu->gd_ipending = 0;
-       initclocks_pcpu();      /* clock interrupts (via IPIs) */
-       lwkt_process_ipiq();
-
-        /*
-         * Releasing the mp lock lets the BSP finish up the SMP init
-         */
-        rel_mplock();
-        KKASSERT((curthread->td_flags & TDF_RUNQ) == 0);
-}
-
-void
-init_secondary(void)
-{
-        int     myid = bootAP;
-        struct mdglobaldata *md;
-        struct privatespace *ps;
-
-        ps = &CPU_prvspace[myid];
-
-       KKASSERT(ps->mdglobaldata.mi.gd_prvspace == ps);
-
-       /*
-        * Setup the %gs for cpu #n.  The mycpu macro works after this
-        * point.
-        */
-       tls_set_fs(&CPU_prvspace[myid], sizeof(struct privatespace));
-
-        md = mdcpu;     /* loaded through %fs:0 (mdglobaldata.mi.gd_prvspace)*/
-
-        md->gd_common_tss.tss_esp0 = 0; /* not used until after switch */
-        md->gd_common_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
-        md->gd_common_tss.tss_ioopt = (sizeof md->gd_common_tss) << 16;
-
-        /*
-         * Set to a known state:
-         * Set by mpboot.s: CR0_PG, CR0_PE
-         * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM
-         */
-}
-
-static int
-start_all_aps(u_int boot_addr)
-{
-       int x, i;
-       struct mdglobaldata *gd;
-       struct privatespace *ps;
-       vm_page_t m;
-       vm_offset_t va;
-#if 0
-       struct lwp_params params;
-#endif
-
-       /*
-        * needed for ipis to initial thread
-        * FIXME: rename ap_tids?
-        */
-       ap_tids[0] = pthread_self();
-
-       for (x = 1; x <= mp_naps; x++)
-       {
-               /* Allocate space for the CPU's private space. */
-               va = (vm_offset_t)&CPU_prvspace[x];
-               for (i = 0; i < sizeof(struct mdglobaldata); i += PAGE_SIZE) {
-                       va =(vm_offset_t)&CPU_prvspace[x].mdglobaldata + i;
-                       m = vm_page_alloc(&kernel_object, va, VM_ALLOC_SYSTEM);
-                       pmap_kenter_quick(va, m->phys_addr);
-               }
-
-               for (i = 0; i < sizeof(CPU_prvspace[x].idlestack); i += PAGE_SIZE) {
-                       va =(vm_offset_t)&CPU_prvspace[x].idlestack + i;
-                       m = vm_page_alloc(&kernel_object, va, VM_ALLOC_SYSTEM);
-                       pmap_kenter_quick(va, m->phys_addr);
-               }
-
-                gd = &CPU_prvspace[x].mdglobaldata;     /* official location */
-                bzero(gd, sizeof(*gd));
-                gd->mi.gd_prvspace = ps = &CPU_prvspace[x];
-
-                /* prime data page for it to use */
-                mi_gdinit(&gd->mi, x);
-                cpu_gdinit(gd, x);
-
-#if 0
-                gd->gd_CMAP1 = pmap_kpte((vm_offset_t)CPU_prvspace[x].CPAGE1);
-                gd->gd_CMAP2 = pmap_kpte((vm_offset_t)CPU_prvspace[x].CPAGE2);
-                gd->gd_CMAP3 = pmap_kpte((vm_offset_t)CPU_prvspace[x].CPAGE3);
-                gd->gd_PMAP1 = pmap_kpte((vm_offset_t)CPU_prvspace[x].PPAGE1);
-                gd->gd_CADDR1 = ps->CPAGE1;
-                gd->gd_CADDR2 = ps->CPAGE2;
-                gd->gd_CADDR3 = ps->CPAGE3;
-                gd->gd_PADDR1 = (vpte_t *)ps->PPAGE1;
-#endif
-
-                gd->mi.gd_ipiq = (void *)kmem_alloc(&kernel_map, sizeof(lwkt_ipiq) * (mp_naps + 1));
-                bzero(gd->mi.gd_ipiq, sizeof(lwkt_ipiq) * (mp_naps + 1));
-
-                /*
-                 * Setup the AP boot stack
-                 */
-                bootSTK = &ps->idlestack[UPAGES*PAGE_SIZE/2];
-                bootAP = x;
-
-               /*
-                * Setup the AP's lwp, this is the 'cpu'
-                */
-               pthread_create(&ap_tids[x], NULL, start_ap, NULL);
-
-               while((smp_startup_mask & (1 << x)) == 0) {
-                       cpu_lfence(); /* XXX spin until the AP has started */
-                       DELAY(1000);
-               }
-       }
-
-       return(ncpus - 1);
-}
diff --git a/sys/platform/pc64/amd64/mp_machdep.c b/sys/platform/pc64/amd64/mp_machdep.c
new file mode 100644 (file)
index 0000000..011b538
--- /dev/null
@@ -0,0 +1,2651 @@
+/*
+ * Copyright (c) 1996, by Steve Passe
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. The name of the developer may NOT be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: src/sys/i386/i386/mp_machdep.c,v 1.115.2.15 2003/03/14 21:22:35 jhb Exp $
+ * $DragonFly: src/sys/platform/pc32/i386/mp_machdep.c,v 1.60 2008/06/07 12:03:52 mneumann Exp $
+ */
+
+#include "opt_cpu.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
+#include <sys/malloc.h>
+#include <sys/memrange.h>
+#include <sys/cons.h>  /* cngetc() */
+#include <sys/machintr.h>
+
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/pmap.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_extern.h>
+#include <sys/lock.h>
+#include <vm/vm_map.h>
+#include <sys/user.h>
+#ifdef GPROF 
+#include <sys/gmon.h>
+#endif
+
+#include <machine/smp.h>
+#include <machine_base/apic/apicreg.h>
+#include <machine/atomic.h>
+#include <machine/cpufunc.h>
+#include <machine_base/apic/mpapic.h>
+#include <machine/psl.h>
+#include <machine/segments.h>
+#include <machine/tss.h>
+#include <machine/specialreg.h>
+#include <machine/globaldata.h>
+
+#include <machine/md_var.h>            /* setidt() */
+#include <machine_base/icu/icu.h>              /* IPIs */
+#include <machine_base/isa/intr_machdep.h>     /* IPIs */
+
+#define FIXUP_EXTRA_APIC_INTS  8       /* additional entries we may create */
+
+#define WARMBOOT_TARGET                0
+#define WARMBOOT_OFF           (KERNBASE + 0x0467)
+#define WARMBOOT_SEG           (KERNBASE + 0x0469)
+
+#define BIOS_BASE              (0xf0000)
+#define BIOS_SIZE              (0x10000)
+#define BIOS_COUNT             (BIOS_SIZE/4)
+
+#define CMOS_REG               (0x70)
+#define CMOS_DATA              (0x71)
+#define BIOS_RESET             (0x0f)
+#define BIOS_WARM              (0x0a)
+
+#define PROCENTRY_FLAG_EN      0x01
+#define PROCENTRY_FLAG_BP      0x02
+#define IOAPICENTRY_FLAG_EN    0x01
+
+
+/* MP Floating Pointer Structure */
+typedef struct MPFPS {
+       char    signature[4];
+       u_int32_t pap;
+       u_char  length;
+       u_char  spec_rev;
+       u_char  checksum;
+       u_char  mpfb1;
+       u_char  mpfb2;
+       u_char  mpfb3;
+       u_char  mpfb4;
+       u_char  mpfb5;
+}      *mpfps_t;
+
+/* MP Configuration Table Header */
+typedef struct MPCTH {
+       char    signature[4];
+       u_short base_table_length;
+       u_char  spec_rev;
+       u_char  checksum;
+       u_char  oem_id[8];
+       u_char  product_id[12];
+       void   *oem_table_pointer;
+       u_short oem_table_size;
+       u_short entry_count;
+       void   *apic_address;
+       u_short extended_table_length;
+       u_char  extended_table_checksum;
+       u_char  reserved;
+}      *mpcth_t;
+
+
+typedef struct PROCENTRY {
+       u_char  type;
+       u_char  apic_id;
+       u_char  apic_version;
+       u_char  cpu_flags;
+       u_long  cpu_signature;
+       u_long  feature_flags;
+       u_long  reserved1;
+       u_long  reserved2;
+}      *proc_entry_ptr;
+
+typedef struct BUSENTRY {
+       u_char  type;
+       u_char  bus_id;
+       char    bus_type[6];
+}      *bus_entry_ptr;
+
+typedef struct IOAPICENTRY {
+       u_char  type;
+       u_char  apic_id;
+       u_char  apic_version;
+       u_char  apic_flags;
+       void   *apic_address;
+}      *io_apic_entry_ptr;
+
+typedef struct INTENTRY {
+       u_char  type;
+       u_char  int_type;
+       u_short int_flags;
+       u_char  src_bus_id;
+       u_char  src_bus_irq;
+       u_char  dst_apic_id;
+       u_char  dst_apic_int;
+}      *int_entry_ptr;
+
+/* descriptions of MP basetable entries */
+typedef struct BASETABLE_ENTRY {
+       u_char  type;
+       u_char  length;
+       char    name[16];
+}       basetable_entry;
+
+/*
+ * this code MUST be enabled here and in mpboot.s.
+ * it follows the very early stages of AP boot by placing values in CMOS ram.
+ * it NORMALLY will never be needed and thus the primitive method for enabling.
+ *
+ */
+#if defined(CHECK_POINTS)
+#define CHECK_READ(A)   (outb(CMOS_REG, (A)), inb(CMOS_DATA))
+#define CHECK_WRITE(A,D) (outb(CMOS_REG, (A)), outb(CMOS_DATA, (D)))
+
+#define CHECK_INIT(D);                         \
+       CHECK_WRITE(0x34, (D));                 \
+       CHECK_WRITE(0x35, (D));                 \
+       CHECK_WRITE(0x36, (D));                 \
+       CHECK_WRITE(0x37, (D));                 \
+       CHECK_WRITE(0x38, (D));                 \
+       CHECK_WRITE(0x39, (D));
+
+#define CHECK_PRINT(S);                                \
+       kprintf("%s: %d, %d, %d, %d, %d, %d\n", \
+          (S),                                 \
+          CHECK_READ(0x34),                    \
+          CHECK_READ(0x35),                    \
+          CHECK_READ(0x36),                    \
+          CHECK_READ(0x37),                    \
+          CHECK_READ(0x38),                    \
+          CHECK_READ(0x39));
+
+#else                          /* CHECK_POINTS */
+
+#define CHECK_INIT(D)
+#define CHECK_PRINT(S)
+
+#endif                         /* CHECK_POINTS */
+
+/*
+ * Values to send to the POST hardware.
+ */
+#define MP_BOOTADDRESS_POST    0x10
+#define MP_PROBE_POST          0x11
+#define MPTABLE_PASS1_POST     0x12
+
+#define MP_START_POST          0x13
+#define MP_ENABLE_POST         0x14
+#define MPTABLE_PASS2_POST     0x15
+
+#define START_ALL_APS_POST     0x16
+#define INSTALL_AP_TRAMP_POST  0x17
+#define START_AP_POST          0x18
+
+#define MP_ANNOUNCE_POST       0x19
+
+static int need_hyperthreading_fixup;
+static u_int logical_cpus;
+u_int  logical_cpus_mask;
+
+/** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */
+int    current_postcode;
+
+/** XXX FIXME: what system files declare these??? */
+extern struct region_descriptor r_gdt, r_idt;
+
+int    bsp_apic_ready = 0;     /* flags useability of BSP apic */
+int    mp_naps;                /* # of Applications processors */
+int    mp_nbusses;             /* # of busses */
+#ifdef APIC_IO
+int    mp_napics;              /* # of IO APICs */
+#endif
+int    boot_cpu_id;            /* designated BSP */
+vm_offset_t cpu_apic_address;
+#ifdef APIC_IO
+vm_offset_t io_apic_address[NAPICID];  /* NAPICID is more than enough */
+u_int32_t *io_apic_versions;
+#endif
+extern int nkpt;
+
+u_int32_t cpu_apic_versions[MAXCPU];
+int64_t tsc0_offset;
+extern int64_t tsc_offsets[];
+
+#ifdef APIC_IO
+struct apic_intmapinfo int_to_apicintpin[APIC_INTMAPSIZE];
+#endif
+
+/*
+ * APIC ID logical/physical mapping structures.
+ * We oversize these to simplify boot-time config.
+ */
+int     cpu_num_to_apic_id[NAPICID];
+#ifdef APIC_IO
+int     io_num_to_apic_id[NAPICID];
+#endif
+int     apic_id_to_logical[NAPICID];
+
+/* AP uses this during bootstrap.  Do not staticize.  */
+char *bootSTK;
+static int bootAP;
+
+/* Hotwire a 0->4MB V==P mapping */
+extern pt_entry_t *KPTphys;
+
+/*
+ * SMP page table page.  Setup by locore to point to a page table
+ * page from which we allocate per-cpu privatespace areas io_apics,
+ * and so forth.
+ */
+
+#define IO_MAPPING_START_INDEX \
+               (SMP_MAXCPU * sizeof(struct privatespace) / PAGE_SIZE)
+
+extern pt_entry_t *SMPpt;
+static int SMPpt_alloc_index = IO_MAPPING_START_INDEX;
+
+struct pcb stoppcbs[MAXCPU];
+
+extern inthand_t IDTVEC(fast_syscall), IDTVEC(fast_syscall32);
+
+extern void initializecpu(void);
+
+/*
+ * Local data and functions.
+ */
+
+static int     mp_capable;
+static u_int   boot_address;
+static u_int   base_memory;
+static int     mp_finish;
+
+static mpfps_t mpfps;
+static int     search_for_sig(u_int32_t target, int count);
+static void    mp_enable(u_int boot_addr);
+
+static void    mptable_hyperthread_fixup(u_int id_mask);
+static void    mptable_pass1(void);
+static int     mptable_pass2(void);
+static void    default_mp_table(int type);
+static void    fix_mp_table(void);
+#ifdef APIC_IO
+static void    setup_apic_irq_mapping(void);
+static int     apic_int_is_bus_type(int intr, int bus_type);
+#endif
+static int     start_all_aps(u_int boot_addr);
+static void    install_ap_tramp(u_int boot_addr);
+static int     start_ap(struct mdglobaldata *gd, u_int boot_addr);
+
+static cpumask_t smp_startup_mask = 1; /* which cpus have been started */
+cpumask_t smp_active_mask = 1; /* which cpus are ready for IPIs etc? */
+SYSCTL_INT(_machdep, OID_AUTO, smp_active, CTLFLAG_RD, &smp_active_mask, 0, "");
+static u_int   bootMP_size;
+
+/*
+ * Calculate usable address in base memory for AP trampoline code.
+ */
+u_int
+mp_bootaddress(u_int basemem)
+{
+       POSTCODE(MP_BOOTADDRESS_POST);
+
+       bootMP_size = mptramp_end - mptramp_start;
+       base_memory = basemem;
+
+       boot_address = base_memory & ~0xfff;    /* round down to 4k boundary */
+       if ((base_memory - boot_address) < bootMP_size)
+               boot_address -= 4096;   /* not enough, lower by 4k */
+       /* 3 levels of page table pages */
+       mptramp_pagetables = boot_address - (PAGE_SIZE * 3);
+
+       return mptramp_pagetables;
+}
+
+
+/*
+ * Look for an Intel MP spec table (ie, SMP capable hardware).
+ */
+int
+mp_probe(void)
+{
+       int     x;
+       u_long  segment;
+       u_int32_t target;
+       /*
+        * Make sure our SMPpt[] page table is big enough to hold all the
+        * mappings we need.
+        */
+       KKASSERT(IO_MAPPING_START_INDEX < NPTEPG - 2);
+
+       POSTCODE(MP_PROBE_POST);
+
+       /* see if EBDA exists */
+       if ((segment = (u_long) * (u_short *) (KERNBASE + 0x40e)) != 0) {
+               /* search first 1K of EBDA */
+               target = (u_int32_t) (segment << 4);
+               if ((x = search_for_sig(target, 1024 / 4)) >= 0)
+                       goto found;
+       } else {
+               /* last 1K of base memory, effective 'top of base' passed in */
+               target = (u_int32_t) (base_memory - 0x400);
+               if ((x = search_for_sig(target, 1024 / 4)) >= 0)
+                       goto found;
+       }
+
+       /* search the BIOS */
+       target = (u_int32_t) BIOS_BASE;
+       if ((x = search_for_sig(target, BIOS_COUNT)) >= 0)
+               goto found;
+
+       /* nothing found */
+       mpfps = (mpfps_t)0;
+       mp_capable = 0;
+       return 0;
+
+found:
+       /*
+        * Calculate needed resources.  We can safely map physical
+        * memory into SMPpt after mptable_pass1() completes.
+        */
+       mpfps = (mpfps_t)x;
+       mptable_pass1();
+
+       /* flag fact that we are running multiple processors */
+       mp_capable = 1;
+       return 1;
+}
+
+
+/*
+ * Startup the SMP processors.
+ */
+void
+mp_start(void)
+{
+       POSTCODE(MP_START_POST);
+
+       /* look for MP capable motherboard */
+       if (mp_capable)
+               mp_enable(boot_address);
+       else
+               panic("MP hardware not found!");
+}
+
+
+/*
+ * Print various information about the SMP system hardware and setup.
+ */
+void
+mp_announce(void)
+{
+       int     x;
+
+       POSTCODE(MP_ANNOUNCE_POST);
+
+       kprintf("DragonFly/MP: Multiprocessor motherboard\n");
+       kprintf(" cpu0 (BSP): apic id: %2d", CPU_TO_ID(0));
+       kprintf(", version: 0x%08x", cpu_apic_versions[0]);
+       kprintf(", at 0x%08x\n", cpu_apic_address);
+       for (x = 1; x <= mp_naps; ++x) {
+               kprintf(" cpu%d (AP):  apic id: %2d", x, CPU_TO_ID(x));
+               kprintf(", version: 0x%08x", cpu_apic_versions[x]);
+               kprintf(", at 0x%08x\n", cpu_apic_address);
+       }
+
+#if defined(APIC_IO)
+       for (x = 0; x < mp_napics; ++x) {
+               kprintf(" io%d (APIC): apic id: %2d", x, IO_TO_ID(x));
+               kprintf(", version: 0x%08x", io_apic_versions[x]);
+               kprintf(", at 0x%08x\n", io_apic_address[x]);
+       }
+#else
+       kprintf(" Warning: APIC I/O disabled\n");
+#endif /* APIC_IO */
+}
+
+/*
+ * AP cpu's call this to sync up protected mode.
+ *
+ * WARNING!  We must ensure that the cpu is sufficiently initialized to
+ * be able to use to the FP for our optimized bzero/bcopy code before
+ * we enter more mainstream C code.
+ *
+ * WARNING! %fs is not set up on entry.  This routine sets up %fs.
+ */
+void
+init_secondary(void)
+{
+       int     gsel_tss;
+       int     x, myid = bootAP;
+       u_int64_t msr, cr0;
+       struct mdglobaldata *md;
+       struct privatespace *ps;
+
+       ps = &CPU_prvspace[myid];
+
+       gdt_segs[GPROC0_SEL].ssd_base =
+               (long) &ps->mdglobaldata.gd_common_tss;
+       ps->mdglobaldata.mi.gd_prvspace = ps;
+
+       /* We fill the 32-bit segment descriptors */
+       for (x = 0; x < NGDT; x++) {
+               if (x != GPROC0_SEL && x != (GPROC0_SEL + 1))
+                       ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x]);
+       }
+       /* And now a 64-bit one */
+       ssdtosyssd(&gdt_segs[GPROC0_SEL],
+           (struct system_segment_descriptor *)&gdt[myid * NGDT + GPROC0_SEL]);
+
+       r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
+       r_gdt.rd_base = (long) &gdt[myid * NGDT];
+       lgdt(&r_gdt);                   /* does magic intra-segment return */
+
+       lidt(&r_idt);
+
+#if 0
+       lldt(_default_ldt);
+       mdcpu->gd_currentldt = _default_ldt;
+#endif
+
+       gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
+       gdt[myid * NGDT + GPROC0_SEL].sd_type = SDT_SYSTSS;
+
+       md = mdcpu;     /* loaded through %gs:0 (mdglobaldata.mi.gd_prvspace)*/
+
+       md->gd_common_tss.tss_rsp0 = 0; /* not used until after switch */
+#if 0 /* JG XXX */
+       md->gd_common_tss.tss_ioopt = (sizeof md->gd_common_tss) << 16;
+#endif
+       md->gd_tss_gdt = &gdt[myid * NGDT + GPROC0_SEL];
+       md->gd_common_tssd = *md->gd_tss_gdt;
+#if 0 /* JG XXX */
+       md->gd_common_tss.tss_ist1 = (long)&doublefault_stack[PAGE_SIZE];
+#endif
+       ltr(gsel_tss);
+
+       wrmsr(MSR_FSBASE, 0);           /* User value */
+       wrmsr(MSR_GSBASE, (u_int64_t)md);
+       wrmsr(MSR_KGSBASE, 0);          /* XXX User value while we're in the kernel */
+
+       /*
+        * Set to a known state:
+        * Set by mpboot.s: CR0_PG, CR0_PE
+        * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM
+        */
+       cr0 = rcr0();
+       cr0 &= ~(CR0_CD | CR0_NW | CR0_EM);
+       load_cr0(cr0);
+
+       /* Set up the fast syscall stuff */
+       msr = rdmsr(MSR_EFER) | EFER_SCE;
+       wrmsr(MSR_EFER, msr);
+       wrmsr(MSR_LSTAR, (u_int64_t)IDTVEC(fast_syscall));
+       wrmsr(MSR_CSTAR, (u_int64_t)IDTVEC(fast_syscall32));
+       msr = ((u_int64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) |
+             ((u_int64_t)GSEL(GUCODE32_SEL, SEL_UPL) << 48);
+       wrmsr(MSR_STAR, msr);
+       wrmsr(MSR_SF_MASK, PSL_NT|PSL_T|PSL_I|PSL_C|PSL_D);
+
+       pmap_set_opt();         /* PSE/4MB pages, etc */
+#if JGXXX
+       /* Initialize the PAT MSR. */
+       pmap_init_pat();
+#endif
+
+       /* set up CPU registers and state */
+       cpu_setregs();
+
+       /* set up SSE/NX registers */
+       initializecpu();
+
+       /* set up FPU state on the AP */
+       npxinit(__INITIAL_NPXCW__);
+}
+
+/*******************************************************************
+ * local functions and data
+ */
+
+/*
+ * start the SMP system
+ */
+static void
+mp_enable(u_int boot_addr)
+{
+       int     x;
+#if defined(APIC_IO)
+       int     apic;
+       u_int   ux;
+#endif /* APIC_IO */
+
+       POSTCODE(MP_ENABLE_POST);
+
+#if 0 /* JGXXX */
+       /* turn on 4MB of V == P addressing so we can get to MP table */
+       *(int *)PTD = PG_V | PG_RW | ((uintptr_t)(void *)KPTphys & PG_FRAME);
+       cpu_invltlb();
+
+       /* examine the MP table for needed info, uses physical addresses */
+       x = mptable_pass2();
+
+       *(int *)PTD = 0;
+       cpu_invltlb();
+#endif /* 0 JGXXX */
+
+       /* can't process default configs till the CPU APIC is pmapped */
+       if (x)
+               default_mp_table(x);
+
+       /* post scan cleanup */
+       fix_mp_table();
+
+#if defined(APIC_IO)
+
+       setup_apic_irq_mapping();
+
+       /* fill the LOGICAL io_apic_versions table */
+       for (apic = 0; apic < mp_napics; ++apic) {
+               ux = io_apic_read(apic, IOAPIC_VER);
+               io_apic_versions[apic] = ux;
+               io_apic_set_id(apic, IO_TO_ID(apic));
+       }
+
+       /* program each IO APIC in the system */
+       for (apic = 0; apic < mp_napics; ++apic)
+               if (io_apic_setup(apic) < 0)
+                       panic("IO APIC setup failure");
+
+#endif /* APIC_IO */
+
+       /*
+        * These are required for SMP operation
+        */
+
+       /* install a 'Spurious INTerrupt' vector */
+       setidt(XSPURIOUSINT_OFFSET, Xspuriousint,
+              SDT_SYSIGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+
+       /* install an inter-CPU IPI for TLB invalidation */
+       setidt(XINVLTLB_OFFSET, Xinvltlb,
+              SDT_SYSIGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+
+       /* install an inter-CPU IPI for IPIQ messaging */
+       setidt(XIPIQ_OFFSET, Xipiq,
+              SDT_SYSIGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+
+       /* install a timer vector */
+       setidt(XTIMER_OFFSET, Xtimer,
+              SDT_SYSIGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+       
+       /* install an inter-CPU IPI for CPU stop/restart */
+       setidt(XCPUSTOP_OFFSET, Xcpustop,
+              SDT_SYSIGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+
+       /* start each Application Processor */
+       start_all_aps(boot_addr);
+}
+
+
+/*
+ * look for the MP spec signature
+ */
+
+/* string defined by the Intel MP Spec as identifying the MP table */
+#define MP_SIG         0x5f504d5f      /* _MP_ */
+#define NEXT(X)                ((X) += 4)
+static int
+search_for_sig(u_int32_t target, int count)
+{
+       int     x;
+       u_int32_t *addr = (u_int32_t *) (KERNBASE + target);
+
+       for (x = 0; x < count; NEXT(x))
+               if (addr[x] == MP_SIG)
+                       /* make array index a byte index */
+                       return (target + (x * sizeof(u_int32_t)));
+
+       return -1;
+}
+
+
+static basetable_entry basetable_entry_types[] =
+{
+       {0, 20, "Processor"},
+       {1, 8, "Bus"},
+       {2, 8, "I/O APIC"},
+       {3, 8, "I/O INT"},
+       {4, 8, "Local INT"}
+};
+
+typedef struct BUSDATA {
+       u_char  bus_id;
+       enum busTypes bus_type;
+}       bus_datum;
+
+typedef struct INTDATA {
+       u_char  int_type;
+       u_short int_flags;
+       u_char  src_bus_id;
+       u_char  src_bus_irq;
+       u_char  dst_apic_id;
+       u_char  dst_apic_int;
+       u_char  int_vector;
+}       io_int, local_int;
+
+typedef struct BUSTYPENAME {
+       u_char  type;
+       char    name[7];
+}       bus_type_name;
+
+static bus_type_name bus_type_table[] =
+{
+       {CBUS, "CBUS"},
+       {CBUSII, "CBUSII"},
+       {EISA, "EISA"},
+       {MCA, "MCA"},
+       {UNKNOWN_BUSTYPE, "---"},
+       {ISA, "ISA"},
+       {MCA, "MCA"},
+       {UNKNOWN_BUSTYPE, "---"},
+       {UNKNOWN_BUSTYPE, "---"},
+       {UNKNOWN_BUSTYPE, "---"},
+       {UNKNOWN_BUSTYPE, "---"},
+       {UNKNOWN_BUSTYPE, "---"},
+       {PCI, "PCI"},
+       {UNKNOWN_BUSTYPE, "---"},
+       {UNKNOWN_BUSTYPE, "---"},
+       {UNKNOWN_BUSTYPE, "---"},
+       {UNKNOWN_BUSTYPE, "---"},
+       {XPRESS, "XPRESS"},
+       {UNKNOWN_BUSTYPE, "---"}
+};
+/* from MP spec v1.4, table 5-1 */
+static int default_data[7][5] =
+{
+/*   nbus, id0, type0, id1, type1 */
+       {1, 0, ISA, 255, 255},
+       {1, 0, EISA, 255, 255},
+       {1, 0, EISA, 255, 255},
+       {1, 0, MCA, 255, 255},
+       {2, 0, ISA, 1, PCI},
+       {2, 0, EISA, 1, PCI},
+       {2, 0, MCA, 1, PCI}
+};
+
+
+/* the bus data */
+static bus_datum *bus_data;
+
+#ifdef APIC_IO
+/* the IO INT data, one entry per possible APIC INTerrupt */
+static io_int  *io_apic_ints;
+static int nintrs;
+#endif
+
+static int processor_entry     (proc_entry_ptr entry, int cpu);
+static int bus_entry           (bus_entry_ptr entry, int bus);
+#ifdef APIC_IO
+static int io_apic_entry       (io_apic_entry_ptr entry, int apic);
+static int int_entry           (int_entry_ptr entry, int intr);
+#endif
+static int lookup_bus_type     (char *name);
+
+
+/*
+ * 1st pass on motherboard's Intel MP specification table.
+ *
+ * initializes:
+ *     ncpus = 1
+ *
+ * determines:
+ *     cpu_apic_address (common to all CPUs)
+ *     io_apic_address[N]
+ *     mp_naps
+ *     mp_nbusses
+ *     mp_napics
+ *     nintrs
+ */
+static void
+mptable_pass1(void)
+{
+#ifdef APIC_IO
+       int     x;
+#endif
+       mpcth_t cth;
+       int     totalSize;
+       void*   position;
+       int     count;
+       int     type;
+       u_int   id_mask;
+
+       POSTCODE(MPTABLE_PASS1_POST);
+
+#ifdef APIC_IO
+       /* clear various tables */
+       for (x = 0; x < NAPICID; ++x) {
+               io_apic_address[x] = ~0;        /* IO APIC address table */
+       }
+#endif
+
+       /* init everything to empty */
+       mp_naps = 0;
+       mp_nbusses = 0;
+#ifdef APIC_IO
+       mp_napics = 0;
+       nintrs = 0;
+#endif
+       id_mask = 0;
+
+       /* check for use of 'default' configuration */
+       if (mpfps->mpfb1 != 0) {
+               /* use default addresses */
+               cpu_apic_address = DEFAULT_APIC_BASE;
+#ifdef APIC_IO
+               io_apic_address[0] = DEFAULT_IO_APIC_BASE;
+#endif
+
+               /* fill in with defaults */
+               mp_naps = 2;            /* includes BSP */
+               mp_nbusses = default_data[mpfps->mpfb1 - 1][0];
+#if defined(APIC_IO)
+               mp_napics = 1;
+               nintrs = 16;
+#endif /* APIC_IO */
+       }
+       else {
+               if ((cth = mpfps->pap) == 0)
+                       panic("MP Configuration Table Header MISSING!");
+
+               cpu_apic_address = (vm_offset_t) cth->apic_address;
+
+               /* walk the table, recording info of interest */
+               totalSize = cth->base_table_length - sizeof(struct MPCTH);
+               position = (u_char *) cth + sizeof(struct MPCTH);
+               count = cth->entry_count;
+
+               while (count--) {
+                       switch (type = *(u_char *) position) {
+                       case 0: /* processor_entry */
+                               if (((proc_entry_ptr)position)->cpu_flags
+                                   & PROCENTRY_FLAG_EN) {
+                                       ++mp_naps;
+                                       id_mask |= 1 <<
+                                           ((proc_entry_ptr)position)->apic_id;
+                               }
+                               break;
+                       case 1: /* bus_entry */
+                               ++mp_nbusses;
+                               break;
+                       case 2: /* io_apic_entry */
+#ifdef APIC_IO
+                               if (((io_apic_entry_ptr)position)->apic_flags
+                                       & IOAPICENTRY_FLAG_EN)
+                                       io_apic_address[mp_napics++] =
+                                           (vm_offset_t)((io_apic_entry_ptr)
+                                               position)->apic_address;
+#endif
+                               break;
+                       case 3: /* int_entry */
+#ifdef APIC_IO
+                               ++nintrs;
+#endif
+                               break;
+                       case 4: /* int_entry */
+                               break;
+                       default:
+                               panic("mpfps Base Table HOSED!");
+                               /* NOTREACHED */
+                       }
+
+                       totalSize -= basetable_entry_types[type].length;
+                       position = (uint8_t *)position +
+                           basetable_entry_types[type].length;
+               }
+       }
+
+       /* qualify the numbers */
+       if (mp_naps > MAXCPU) {
+               kprintf("Warning: only using %d of %d available CPUs!\n",
+                       MAXCPU, mp_naps);
+               mp_naps = MAXCPU;
+       }
+
+       /* See if we need to fixup HT logical CPUs. */
+       mptable_hyperthread_fixup(id_mask);
+       
+       /*
+        * Count the BSP.
+        * This is also used as a counter while starting the APs.
+        */
+       ncpus = 1;
+
+       --mp_naps;      /* subtract the BSP */
+}
+
+
+/*
+ * 2nd pass on motherboard's Intel MP specification table.
+ *
+ * sets:
+ *     boot_cpu_id
+ *     ID_TO_IO(N), phy APIC ID to log CPU/IO table
+ *     CPU_TO_ID(N), logical CPU to APIC ID table
+ *     IO_TO_ID(N), logical IO to APIC ID table
+ *     bus_data[N]
+ *     io_apic_ints[N]
+ */
+static int
+mptable_pass2(void)
+{
+       struct PROCENTRY proc;
+       int     x;
+       mpcth_t cth;
+       int     totalSize;
+       void*   position;
+       int     count;
+       int     type;
+       int     apic, bus, cpu, intr;
+       int     i;
+
+       POSTCODE(MPTABLE_PASS2_POST);
+
+       /* Initialize fake proc entry for use with HT fixup. */
+       bzero(&proc, sizeof(proc));
+       proc.type = 0;
+       proc.cpu_flags = PROCENTRY_FLAG_EN;
+
+#ifdef APIC_IO
+       MALLOC(io_apic_versions, u_int32_t *, sizeof(u_int32_t) * mp_napics,
+           M_DEVBUF, M_WAITOK);
+       MALLOC(ioapic, volatile ioapic_t **, sizeof(ioapic_t *) * mp_napics,
+           M_DEVBUF, M_WAITOK | M_ZERO);
+       MALLOC(io_apic_ints, io_int *, sizeof(io_int) * (nintrs + FIXUP_EXTRA_APIC_INTS),
+           M_DEVBUF, M_WAITOK);
+#endif
+       MALLOC(bus_data, bus_datum *, sizeof(bus_datum) * mp_nbusses,
+           M_DEVBUF, M_WAITOK);
+
+#ifdef APIC_IO
+       for (i = 0; i < mp_napics; i++) {
+               ioapic[i] = permanent_io_mapping(io_apic_address[i]);
+       }
+#endif
+
+       /* clear various tables */
+       for (x = 0; x < NAPICID; ++x) {
+               CPU_TO_ID(x) = -1;      /* logical CPU to APIC ID table */
+#ifdef APIC_IO
+               ID_TO_IO(x) = -1;       /* phy APIC ID to log CPU/IO table */
+               IO_TO_ID(x) = -1;       /* logical IO to APIC ID table */
+#endif
+       }
+
+       /* clear bus data table */
+       for (x = 0; x < mp_nbusses; ++x)
+               bus_data[x].bus_id = 0xff;
+
+#ifdef APIC_IO
+       /* clear IO APIC INT table */
+       for (x = 0; x < (nintrs + 1); ++x) {
+               io_apic_ints[x].int_type = 0xff;
+               io_apic_ints[x].int_vector = 0xff;
+       }
+#endif
+
+       /* setup the cpu/apic mapping arrays */
+       boot_cpu_id = -1;
+
+       /* record whether PIC or virtual-wire mode */
+       machintr_setvar_simple(MACHINTR_VAR_IMCR_PRESENT, mpfps->mpfb2 & 0x80);
+
+       /* check for use of 'default' configuration */
+       if (mpfps->mpfb1 != 0)
+               return mpfps->mpfb1;    /* return default configuration type */
+
+       if ((cth = mpfps->pap) == 0)
+               panic("MP Configuration Table Header MISSING!");
+
+       /* walk the table, recording info of interest */
+       totalSize = cth->base_table_length - sizeof(struct MPCTH);
+       position = (u_char *) cth + sizeof(struct MPCTH);
+       count = cth->entry_count;
+       apic = bus = intr = 0;
+       cpu = 1;                                /* pre-count the BSP */
+
+       while (count--) {
+               switch (type = *(u_char *) position) {
+               case 0:
+                       if (processor_entry(position, cpu))
+                               ++cpu;
+
+                       if (need_hyperthreading_fixup) {
+                               /*
+                                * Create fake mptable processor entries
+                                * and feed them to processor_entry() to
+                                * enumerate the logical CPUs.
+                                */
+                               proc.apic_id = ((proc_entry_ptr)position)->apic_id;
+                               for (i = 1; i < logical_cpus; i++) {
+                                       proc.apic_id++;
+                                       processor_entry(&proc, cpu);
+                                       logical_cpus_mask |= (1 << cpu);
+                                       cpu++;
+                               }
+                       }
+                       break;
+               case 1:
+                       if (bus_entry(position, bus))
+                               ++bus;
+                       break;
+               case 2:
+#ifdef APIC_IO
+                       if (io_apic_entry(position, apic))
+                               ++apic;
+#endif
+                       break;
+               case 3:
+#ifdef APIC_IO
+                       if (int_entry(position, intr))
+                               ++intr;
+#endif
+                       break;
+               case 4:
+                       /* int_entry(position); */
+                       break;
+               default:
+                       panic("mpfps Base Table HOSED!");
+                       /* NOTREACHED */
+               }
+
+               totalSize -= basetable_entry_types[type].length;
+               position = (uint8_t *)position + basetable_entry_types[type].length;
+       }
+
+       if (boot_cpu_id == -1)
+               panic("NO BSP found!");
+
+       /* report fact that its NOT a default configuration */
+       return 0;
+}
+
+/*
+ * Check if we should perform a hyperthreading "fix-up" to
+ * enumerate any logical CPU's that aren't already listed
+ * in the table.
+ *
+ * XXX: We assume that all of the physical CPUs in the
+ * system have the same number of logical CPUs.
+ *
+ * XXX: We assume that APIC ID's are allocated such that
+ * the APIC ID's for a physical processor are aligned
+ * with the number of logical CPU's in the processor.
+ */
+static void
+mptable_hyperthread_fixup(u_int id_mask)
+{
+       u_int i, id;
+
+       /* Nothing to do if there is no HTT support. */
+       if ((cpu_feature & CPUID_HTT) == 0)
+               return;
+       logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16;
+       if (logical_cpus <= 1)
+               return;
+
+       /*
+        * For each APIC ID of a CPU that is set in the mask,
+        * scan the other candidate APIC ID's for this
+        * physical processor.  If any of those ID's are
+        * already in the table, then kill the fixup.
+        */
+       for (id = 0; id <= MAXCPU; id++) {
+               if ((id_mask & 1 << id) == 0)
+                       continue;
+               /* First, make sure we are on a logical_cpus boundary. */
+               if (id % logical_cpus != 0)
+                       return;
+               for (i = id + 1; i < id + logical_cpus; i++)
+                       if ((id_mask & 1 << i) != 0)
+                               return;
+       }
+
+       /*
+        * Ok, the ID's checked out, so enable the fixup.  We have to fixup
+        * mp_naps right now.
+        */
+       need_hyperthreading_fixup = 1;
+       mp_naps *= logical_cpus;
+}
+
+#ifdef APIC_IO
+
+void
+assign_apic_irq(int apic, int intpin, int irq)
+{
+       int x;
+       
+       if (int_to_apicintpin[irq].ioapic != -1)
+               panic("assign_apic_irq: inconsistent table");
+       
+       int_to_apicintpin[irq].ioapic = apic;
+       int_to_apicintpin[irq].int_pin = intpin;
+       int_to_apicintpin[irq].apic_address = ioapic[apic];
+       int_to_apicintpin[irq].redirindex = IOAPIC_REDTBL + 2 * intpin;
+       
+       for (x = 0; x < nintrs; x++) {
+               if ((io_apic_ints[x].int_type == 0 || 
+                    io_apic_ints[x].int_type == 3) &&
+                   io_apic_ints[x].int_vector == 0xff &&
+                   io_apic_ints[x].dst_apic_id == IO_TO_ID(apic) &&
+                   io_apic_ints[x].dst_apic_int == intpin)
+                       io_apic_ints[x].int_vector = irq;
+       }
+}
+
+void
+revoke_apic_irq(int irq)
+{
+       int x;
+       int oldapic;
+       int oldintpin;
+       
+       if (int_to_apicintpin[irq].ioapic == -1)
+               panic("revoke_apic_irq: inconsistent table");
+       
+       oldapic = int_to_apicintpin[irq].ioapic;
+       oldintpin = int_to_apicintpin[irq].int_pin;
+
+       int_to_apicintpin[irq].ioapic = -1;
+       int_to_apicintpin[irq].int_pin = 0;
+       int_to_apicintpin[irq].apic_address = NULL;
+       int_to_apicintpin[irq].redirindex = 0;
+       
+       for (x = 0; x < nintrs; x++) {
+               if ((io_apic_ints[x].int_type == 0 || 
+                    io_apic_ints[x].int_type == 3) &&
+                   io_apic_ints[x].int_vector != 0xff &&
+                   io_apic_ints[x].dst_apic_id == IO_TO_ID(oldapic) &&
+                   io_apic_ints[x].dst_apic_int == oldintpin)
+                       io_apic_ints[x].int_vector = 0xff;
+       }
+}
+
+/*
+ * Allocate an IRQ 
+ */
+static void
+allocate_apic_irq(int intr)
+{
+       int apic;
+       int intpin;
+       int irq;
+       
+       if (io_apic_ints[intr].int_vector != 0xff)
+               return;         /* Interrupt handler already assigned */
+       
+       if (io_apic_ints[intr].int_type != 0 &&
+           (io_apic_ints[intr].int_type != 3 ||
+            (io_apic_ints[intr].dst_apic_id == IO_TO_ID(0) &&
+             io_apic_ints[intr].dst_apic_int == 0)))
+               return;         /* Not INT or ExtInt on != (0, 0) */
+       
+       irq = 0;
+       while (irq < APIC_INTMAPSIZE &&
+              int_to_apicintpin[irq].ioapic != -1)
+               irq++;
+       
+       if (irq >= APIC_INTMAPSIZE)
+               return;         /* No free interrupt handlers */
+       
+       apic = ID_TO_IO(io_apic_ints[intr].dst_apic_id);
+       intpin = io_apic_ints[intr].dst_apic_int;
+       
+       assign_apic_irq(apic, intpin, irq);
+       io_apic_setup_intpin(apic, intpin);
+}
+
+
+static void
+swap_apic_id(int apic, int oldid, int newid)
+{
+       int x;
+       int oapic;
+       
+
+       if (oldid == newid)
+               return;                 /* Nothing to do */
+       
+       kprintf("Changing APIC ID for IO APIC #%d from %d to %d in MP table\n",
+              apic, oldid, newid);
+       
+       /* Swap physical APIC IDs in interrupt entries */
+       for (x = 0; x < nintrs; x++) {
+               if (io_apic_ints[x].dst_apic_id == oldid)
+                       io_apic_ints[x].dst_apic_id = newid;
+               else if (io_apic_ints[x].dst_apic_id == newid)
+                       io_apic_ints[x].dst_apic_id = oldid;
+       }
+       
+       /* Swap physical APIC IDs in IO_TO_ID mappings */
+       for (oapic = 0; oapic < mp_napics; oapic++)
+               if (IO_TO_ID(oapic) == newid)
+                       break;
+       
+       if (oapic < mp_napics) {
+               kprintf("Changing APIC ID for IO APIC #%d from "
+                      "%d to %d in MP table\n",
+                      oapic, newid, oldid);
+               IO_TO_ID(oapic) = oldid;
+       }
+       IO_TO_ID(apic) = newid;
+}
+
+
+static void
+fix_id_to_io_mapping(void)
+{
+       int x;
+
+       for (x = 0; x < NAPICID; x++)
+               ID_TO_IO(x) = -1;
+       
+       for (x = 0; x <= mp_naps; x++)
+               if (CPU_TO_ID(x) < NAPICID)
+                       ID_TO_IO(CPU_TO_ID(x)) = x;
+       
+       for (x = 0; x < mp_napics; x++)
+               if (IO_TO_ID(x) < NAPICID)
+                       ID_TO_IO(IO_TO_ID(x)) = x;
+}
+
+
+static int
+first_free_apic_id(void)
+{
+       int freeid, x;
+       
+       for (freeid = 0; freeid < NAPICID; freeid++) {
+               for (x = 0; x <= mp_naps; x++)
+                       if (CPU_TO_ID(x) == freeid)
+                               break;
+               if (x <= mp_naps)
+                       continue;
+               for (x = 0; x < mp_napics; x++)
+                       if (IO_TO_ID(x) == freeid)
+                               break;
+               if (x < mp_napics)
+                       continue;
+               return freeid;
+       }
+       return freeid;
+}
+
+
+static int
+io_apic_id_acceptable(int apic, int id)
+{
+       int cpu;                /* Logical CPU number */
+       int oapic;              /* Logical IO APIC number for other IO APIC */
+
+       if (id >= NAPICID)
+               return 0;       /* Out of range */
+       
+       for (cpu = 0; cpu <= mp_naps; cpu++)
+               if (CPU_TO_ID(cpu) == id)
+                       return 0;       /* Conflict with CPU */
+       
+       for (oapic = 0; oapic < mp_napics && oapic < apic; oapic++)
+               if (IO_TO_ID(oapic) == id)
+                       return 0;       /* Conflict with other APIC */
+       
+       return 1;               /* ID is acceptable for IO APIC */
+}
+
+static
+io_int *
+io_apic_find_int_entry(int apic, int pin)
+{
+       int     x;
+
+       /* search each of the possible INTerrupt sources */
+       for (x = 0; x < nintrs; ++x) {
+               if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
+                   (pin == io_apic_ints[x].dst_apic_int))
+                       return (&io_apic_ints[x]);
+       }
+       return NULL;
+}
+
+#endif
+
+/*
+ * parse an Intel MP specification table
+ */
+static void
+fix_mp_table(void)
+{
+       int     x;
+#ifdef APIC_IO
+       int     id;
+       int     apic;           /* IO APIC unit number */
+       int     freeid;         /* Free physical APIC ID */
+       int     physid;         /* Current physical IO APIC ID */
+       io_int *io14;
+#endif
+       int     bus_0 = 0;      /* Stop GCC warning */
+       int     bus_pci = 0;    /* Stop GCC warning */
+       int     num_pci_bus;
+
+       /*
+        * Fix mis-numbering of the PCI bus and its INT entries if the BIOS
+        * did it wrong.  The MP spec says that when more than 1 PCI bus
+        * exists the BIOS must begin with bus entries for the PCI bus and use
+        * actual PCI bus numbering.  This implies that when only 1 PCI bus
+        * exists the BIOS can choose to ignore this ordering, and indeed many
+        * MP motherboards do ignore it.  This causes a problem when the PCI
+        * sub-system makes requests of the MP sub-system based on PCI bus
+        * numbers.     So here we look for the situation and renumber the
+        * busses and associated INTs in an effort to "make it right".
+        */
+
+       /* find bus 0, PCI bus, count the number of PCI busses */
+       for (num_pci_bus = 0, x = 0; x < mp_nbusses; ++x) {
+               if (bus_data[x].bus_id == 0) {
+                       bus_0 = x;
+               }
+               if (bus_data[x].bus_type == PCI) {
+                       ++num_pci_bus;
+                       bus_pci = x;
+               }
+       }
+       /*
+        * bus_0 == slot of bus with ID of 0
+        * bus_pci == slot of last PCI bus encountered
+        */
+
+       /* check the 1 PCI bus case for sanity */
+       /* if it is number 0 all is well */
+       if (num_pci_bus == 1 &&
+           bus_data[bus_pci].bus_id != 0) {
+               
+               /* mis-numbered, swap with whichever bus uses slot 0 */
+
+               /* swap the bus entry types */
+               bus_data[bus_pci].bus_type = bus_data[bus_0].bus_type;
+               bus_data[bus_0].bus_type = PCI;
+
+#ifdef APIC_IO
+               /* swap each relavant INTerrupt entry */
+               id = bus_data[bus_pci].bus_id;
+               for (x = 0; x < nintrs; ++x) {
+                       if (io_apic_ints[x].src_bus_id == id) {
+                               io_apic_ints[x].src_bus_id = 0;
+                       }
+                       else if (io_apic_ints[x].src_bus_id == 0) {
+                               io_apic_ints[x].src_bus_id = id;
+                       }
+               }
+#endif
+       }
+
+#ifdef APIC_IO
+       /* Assign IO APIC IDs.
+        * 
+        * First try the existing ID. If a conflict is detected, try
+        * the ID in the MP table.  If a conflict is still detected, find
+        * a free id.
+        *
+        * We cannot use the ID_TO_IO table before all conflicts has been
+        * resolved and the table has been corrected.
+        */
+       for (apic = 0; apic < mp_napics; ++apic) { /* For all IO APICs */
+               
+               /* First try to use the value set by the BIOS */
+               physid = io_apic_get_id(apic);
+               if (io_apic_id_acceptable(apic, physid)) {
+                       if (IO_TO_ID(apic) != physid)
+                               swap_apic_id(apic, IO_TO_ID(apic), physid);
+                       continue;
+               }
+
+               /* Then check if the value in the MP table is acceptable */
+               if (io_apic_id_acceptable(apic, IO_TO_ID(apic)))
+                       continue;
+
+               /* Last resort, find a free APIC ID and use it */
+               freeid = first_free_apic_id();
+               if (freeid >= NAPICID)
+                       panic("No free physical APIC IDs found");
+               
+               if (io_apic_id_acceptable(apic, freeid)) {
+                       swap_apic_id(apic, IO_TO_ID(apic), freeid);
+                       continue;
+               }
+               panic("Free physical APIC ID not usable");
+       }
+       fix_id_to_io_mapping();
+#endif
+
+#ifdef APIC_IO
+       /* detect and fix broken Compaq MP table */
+       if (apic_int_type(0, 0) == -1) {
+               kprintf("APIC_IO: MP table broken: 8259->APIC entry missing!\n");
+               io_apic_ints[nintrs].int_type = 3;      /* ExtInt */
+               io_apic_ints[nintrs].int_vector = 0xff; /* Unassigned */
+               /* XXX fixme, set src bus id etc, but it doesn't seem to hurt */
+               io_apic_ints[nintrs].dst_apic_id = IO_TO_ID(0);
+               io_apic_ints[nintrs].dst_apic_int = 0;  /* Pin 0 */
+               nintrs++;
+       } else if (apic_int_type(0, 0) == 0) {
+               kprintf("APIC_IO: MP table broken: ExtINT entry corrupt!\n");
+               for (x = 0; x < nintrs; ++x)
+                       if ((0 == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
+                           (0 == io_apic_ints[x].dst_apic_int)) {
+                               io_apic_ints[x].int_type = 3;
+                               io_apic_ints[x].int_vector = 0xff;
+                               break;
+                       }
+       }
+
+       /*
+        * Fix missing IRQ 15 when IRQ 14 is an ISA interrupt.  IDE
+        * controllers universally come in pairs.  If IRQ 14 is specified
+        * as an ISA interrupt, then IRQ 15 had better be too.
+        *
+        * [ Shuttle XPC / AMD Athlon X2 ]
+        *      The MPTable is missing an entry for IRQ 15.  Note that the
+        *      ACPI table has an entry for both 14 and 15.
+        */
+       if (apic_int_type(0, 14) == 0 && apic_int_type(0, 15) == -1) {
+               kprintf("APIC_IO: MP table broken: IRQ 15 not ISA when IRQ 14 is!\n");
+               io14 = io_apic_find_int_entry(0, 14);
+               io_apic_ints[nintrs] = *io14;
+               io_apic_ints[nintrs].src_bus_irq = 15;
+               io_apic_ints[nintrs].dst_apic_int = 15;
+               nintrs++;
+       }
+#endif
+}
+
+#ifdef APIC_IO
+
+/* Assign low level interrupt handlers */
+static void
+setup_apic_irq_mapping(void)
+{
+       int     x;
+       int     int_vector;
+
+       /* Clear array */
+       for (x = 0; x < APIC_INTMAPSIZE; x++) {
+               int_to_apicintpin[x].ioapic = -1;
+               int_to_apicintpin[x].int_pin = 0;
+               int_to_apicintpin[x].apic_address = NULL;
+               int_to_apicintpin[x].redirindex = 0;
+       }
+
+       /* First assign ISA/EISA interrupts */
+       for (x = 0; x < nintrs; x++) {
+               int_vector = io_apic_ints[x].src_bus_irq;
+               if (int_vector < APIC_INTMAPSIZE &&
+                   io_apic_ints[x].int_vector == 0xff && 
+                   int_to_apicintpin[int_vector].ioapic == -1 &&
+                   (apic_int_is_bus_type(x, ISA) ||
+                    apic_int_is_bus_type(x, EISA)) &&
+                   io_apic_ints[x].int_type == 0) {
+                       assign_apic_irq(ID_TO_IO(io_apic_ints[x].dst_apic_id), 
+                                       io_apic_ints[x].dst_apic_int,
+                                       int_vector);
+               }
+       }
+
+       /* Assign ExtInt entry if no ISA/EISA interrupt 0 entry */
+       for (x = 0; x < nintrs; x++) {
+               if (io_apic_ints[x].dst_apic_int == 0 &&
+                   io_apic_ints[x].dst_apic_id == IO_TO_ID(0) &&
+                   io_apic_ints[x].int_vector == 0xff && 
+                   int_to_apicintpin[0].ioapic == -1 &&
+                   io_apic_ints[x].int_type == 3) {
+                       assign_apic_irq(0, 0, 0);
+                       break;
+               }
+       }
+       /* PCI interrupt assignment is deferred */
+}
+
+#endif
+
+static int
+processor_entry(proc_entry_ptr entry, int cpu)
+{
+       /* check for usability */
+       if (!(entry->cpu_flags & PROCENTRY_FLAG_EN))
+               return 0;
+
+       if(entry->apic_id >= NAPICID)
+               panic("CPU APIC ID out of range (0..%d)", NAPICID - 1);
+       /* check for BSP flag */
+       if (entry->cpu_flags & PROCENTRY_FLAG_BP) {
+               boot_cpu_id = entry->apic_id;
+               CPU_TO_ID(0) = entry->apic_id;
+               ID_TO_CPU(entry->apic_id) = 0;
+               return 0;       /* its already been counted */
+       }
+
+       /* add another AP to list, if less than max number of CPUs */
+       else if (cpu < MAXCPU) {
+               CPU_TO_ID(cpu) = entry->apic_id;
+               ID_TO_CPU(entry->apic_id) = cpu;
+               return 1;
+       }
+
+       return 0;
+}
+
+
+static int
+bus_entry(bus_entry_ptr entry, int bus)
+{
+       int     x;
+       char    c, name[8];
+
+       /* encode the name into an index */
+       for (x = 0; x < 6; ++x) {
+               if ((c = entry->bus_type[x]) == ' ')
+                       break;
+               name[x] = c;
+       }
+       name[x] = '\0';
+
+       if ((x = lookup_bus_type(name)) == UNKNOWN_BUSTYPE)
+               panic("unknown bus type: '%s'", name);
+
+       bus_data[bus].bus_id = entry->bus_id;
+       bus_data[bus].bus_type = x;
+
+       return 1;
+}
+
+#ifdef APIC_IO
+
+static int
+io_apic_entry(io_apic_entry_ptr entry, int apic)
+{
+       if (!(entry->apic_flags & IOAPICENTRY_FLAG_EN))
+               return 0;
+
+       IO_TO_ID(apic) = entry->apic_id;
+       if (entry->apic_id < NAPICID)
+               ID_TO_IO(entry->apic_id) = apic;
+
+       return 1;
+}
+
+#endif
+
+static int
+lookup_bus_type(char *name)
+{
+       int     x;
+
+       for (x = 0; x < MAX_BUSTYPE; ++x)
+               if (strcmp(bus_type_table[x].name, name) == 0)
+                       return bus_type_table[x].type;
+
+       return UNKNOWN_BUSTYPE;
+}
+
+#ifdef APIC_IO
+
+static int
+int_entry(int_entry_ptr entry, int intr)
+{
+       int apic;
+
+       io_apic_ints[intr].int_type = entry->int_type;
+       io_apic_ints[intr].int_flags = entry->int_flags;
+       io_apic_ints[intr].src_bus_id = entry->src_bus_id;
+       io_apic_ints[intr].src_bus_irq = entry->src_bus_irq;
+       if (entry->dst_apic_id == 255) {
+               /* This signal goes to all IO APICS.  Select an IO APIC
+                  with sufficient number of interrupt pins */
+               for (apic = 0; apic < mp_napics; apic++)
+                       if (((io_apic_read(apic, IOAPIC_VER) & 
+                             IOART_VER_MAXREDIR) >> MAXREDIRSHIFT) >= 
+                           entry->dst_apic_int)
+                               break;
+               if (apic < mp_napics)
+                       io_apic_ints[intr].dst_apic_id = IO_TO_ID(apic);
+               else
+                       io_apic_ints[intr].dst_apic_id = entry->dst_apic_id;
+       } else
+               io_apic_ints[intr].dst_apic_id = entry->dst_apic_id;
+       io_apic_ints[intr].dst_apic_int = entry->dst_apic_int;
+
+       return 1;
+}
+
+static int
+apic_int_is_bus_type(int intr, int bus_type)
+{
+       int     bus;
+
+       for (bus = 0; bus < mp_nbusses; ++bus)
+               if ((bus_data[bus].bus_id == io_apic_ints[intr].src_bus_id)
+                   && ((int) bus_data[bus].bus_type == bus_type))
+                       return 1;
+
+       return 0;
+}
+
+/*
+ * Given a traditional ISA INT mask, return an APIC mask.
+ */
+u_int
+isa_apic_mask(u_int isa_mask)
+{
+       int isa_irq;
+       int apic_pin;
+
+#if defined(SKIP_IRQ15_REDIRECT)
+       if (isa_mask == (1 << 15)) {
+               kprintf("skipping ISA IRQ15 redirect\n");
+               return isa_mask;
+       }
+#endif  /* SKIP_IRQ15_REDIRECT */
+
+       isa_irq = ffs(isa_mask);                /* find its bit position */
+       if (isa_irq == 0)                       /* doesn't exist */
+               return 0;
+       --isa_irq;                              /* make it zero based */
+
+       apic_pin = isa_apic_irq(isa_irq);       /* look for APIC connection */
+       if (apic_pin == -1)
+               return 0;
+
+       return (1 << apic_pin);                 /* convert pin# to a mask */
+}
+
+/*
+ * Determine which APIC pin an ISA/EISA INT is attached to.
+ */
+#define INTTYPE(I)     (io_apic_ints[(I)].int_type)
+#define INTPIN(I)      (io_apic_ints[(I)].dst_apic_int)
+#define INTIRQ(I)      (io_apic_ints[(I)].int_vector)
+#define INTAPIC(I)     (ID_TO_IO(io_apic_ints[(I)].dst_apic_id))
+
+#define SRCBUSIRQ(I)   (io_apic_ints[(I)].src_bus_irq)
+int
+isa_apic_irq(int isa_irq)
+{
+       int     intr;
+
+       for (intr = 0; intr < nintrs; ++intr) {         /* check each record */
+               if (INTTYPE(intr) == 0) {               /* standard INT */
+                       if (SRCBUSIRQ(intr) == isa_irq) {
+                               if (apic_int_is_bus_type(intr, ISA) ||
+                                   apic_int_is_bus_type(intr, EISA)) {
+                                       if (INTIRQ(intr) == 0xff)
+                                               return -1; /* unassigned */
+                                       return INTIRQ(intr);    /* found */
+                               }
+                       }
+               }
+       }
+       return -1;                                      /* NOT found */
+}
+
+
+/*
+ * Determine which APIC pin a PCI INT is attached to.
+ */
+#define SRCBUSID(I)    (io_apic_ints[(I)].src_bus_id)
+#define SRCBUSDEVICE(I)        ((io_apic_ints[(I)].src_bus_irq >> 2) & 0x1f)
+#define SRCBUSLINE(I)  (io_apic_ints[(I)].src_bus_irq & 0x03)
+int
+pci_apic_irq(int pciBus, int pciDevice, int pciInt)
+{
+       int     intr;
+
+       --pciInt;                                       /* zero based */
+
+       for (intr = 0; intr < nintrs; ++intr) {         /* check each record */
+               if ((INTTYPE(intr) == 0)                /* standard INT */
+                   && (SRCBUSID(intr) == pciBus)
+                   && (SRCBUSDEVICE(intr) == pciDevice)
+                   && (SRCBUSLINE(intr) == pciInt)) {  /* a candidate IRQ */
+                       if (apic_int_is_bus_type(intr, PCI)) {
+                               if (INTIRQ(intr) == 0xff)
+                                       allocate_apic_irq(intr);
+                               if (INTIRQ(intr) == 0xff)
+                                       return -1;      /* unassigned */
+                               return INTIRQ(intr);    /* exact match */
+                       }
+               }
+       }
+
+       return -1;                                      /* NOT found */
+}
+
+int
+next_apic_irq(int irq) 
+{
+       int intr, ointr;
+       int bus, bustype;
+
+       bus = 0;
+       bustype = 0;
+       for (intr = 0; intr < nintrs; intr++) {
+               if (INTIRQ(intr) != irq || INTTYPE(intr) != 0)
+                       continue;
+               bus = SRCBUSID(intr);
+               bustype = apic_bus_type(bus);
+               if (bustype != ISA &&
+                   bustype != EISA &&
+                   bustype != PCI)
+                       continue;
+               break;
+       }
+       if (intr >= nintrs) {
+               return -1;
+       }
+       for (ointr = intr + 1; ointr < nintrs; ointr++) {
+               if (INTTYPE(ointr) != 0)
+                       continue;
+               if (bus != SRCBUSID(ointr))
+                       continue;
+               if (bustype == PCI) {
+                       if (SRCBUSDEVICE(intr) != SRCBUSDEVICE(ointr))
+                               continue;
+                       if (SRCBUSLINE(intr) != SRCBUSLINE(ointr))
+                               continue;
+               }
+               if (bustype == ISA || bustype == EISA) {
+                       if (SRCBUSIRQ(intr) != SRCBUSIRQ(ointr))
+                               continue;
+               }
+               if (INTPIN(intr) == INTPIN(ointr))
+                       continue;
+               break;
+       }
+       if (ointr >= nintrs) {
+               return -1;
+       }
+       return INTIRQ(ointr);
+}
+#undef SRCBUSLINE
+#undef SRCBUSDEVICE
+#undef SRCBUSID
+#undef SRCBUSIRQ
+
+#undef INTPIN
+#undef INTIRQ
+#undef INTAPIC
+#undef INTTYPE
+
+#endif
+
+/*
+ * Reprogram the MB chipset to NOT redirect an ISA INTerrupt.
+ *
+ * XXX FIXME:
+ *  Exactly what this means is unclear at this point.  It is a solution
+ *  for motherboards that redirect the MBIRQ0 pin.  Generically a motherboard
+ *  could route any of the ISA INTs to upper (>15) IRQ values.  But most would
+ *  NOT be redirected via MBIRQ0, thus "undirect()ing" them would NOT be an
+ *  option.
+ */
+int
+undirect_isa_irq(int rirq)
+{
+#if defined(READY)
+       if (bootverbose)
+           kprintf("Freeing redirected ISA irq %d.\n", rirq);
+       /** FIXME: tickle the MB redirector chip */
+       return /* XXX */;
+#else
+       if (bootverbose)
+           kprintf("Freeing (NOT implemented) redirected ISA irq %d.\n", rirq);
+       return 0;
+#endif  /* READY */
+}
+
+
+/*
+ * Reprogram the MB chipset to NOT redirect a PCI INTerrupt
+ */
+int
+undirect_pci_irq(int rirq)
+{
+#if defined(READY)
+       if (bootverbose)
+               kprintf("Freeing redirected PCI irq %d.\n", rirq);
+
+       /** FIXME: tickle the MB redirector chip */
+       return /* XXX */;
+#else
+       if (bootverbose)
+               kprintf("Freeing (NOT implemented) redirected PCI irq %d.\n",
+                      rirq);
+       return 0;
+#endif  /* READY */
+}
+
+
+/*
+ * given a bus ID, return:
+ *  the bus type if found
+ *  -1 if NOT found
+ */
+int
+apic_bus_type(int id)
+{
+       int     x;
+
+       for (x = 0; x < mp_nbusses; ++x)
+               if (bus_data[x].bus_id == id)
+                       return bus_data[x].bus_type;
+
+       return -1;
+}
+
+#ifdef APIC_IO
+
+/*
+ * given a LOGICAL APIC# and pin#, return:
+ *  the associated src bus ID if found
+ *  -1 if NOT found
+ */
+int
+apic_src_bus_id(int apic, int pin)
+{
+       int     x;
+
+       /* search each of the possible INTerrupt sources */
+       for (x = 0; x < nintrs; ++x)
+               if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
+                   (pin == io_apic_ints[x].dst_apic_int))
+                       return (io_apic_ints[x].src_bus_id);
+
+       return -1;              /* NOT found */
+}
+
+/*
+ * given a LOGICAL APIC# and pin#, return:
+ *  the associated src bus IRQ if found
+ *  -1 if NOT found
+ */
+int
+apic_src_bus_irq(int apic, int pin)
+{
+       int     x;
+
+       for (x = 0; x < nintrs; x++)
+               if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
+                   (pin == io_apic_ints[x].dst_apic_int))
+                       return (io_apic_ints[x].src_bus_irq);
+
+       return -1;              /* NOT found */
+}
+
+
+/*
+ * given a LOGICAL APIC# and pin#, return:
+ *  the associated INTerrupt type if found
+ *  -1 if NOT found
+ */
+int
+apic_int_type(int apic, int pin)
+{
+       int     x;
+
+       /* search each of the possible INTerrupt sources */
+       for (x = 0; x < nintrs; ++x) {
+               if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
+                   (pin == io_apic_ints[x].dst_apic_int))
+                       return (io_apic_ints[x].int_type);
+       }
+       return -1;              /* NOT found */
+}
+
+/*
+ * Return the IRQ associated with an APIC pin
+ */
+int 
+apic_irq(int apic, int pin)
+{
+       int x;
+       int res;
+
+       for (x = 0; x < nintrs; ++x) {
+               if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
+                   (pin == io_apic_ints[x].dst_apic_int)) {
+                       res = io_apic_ints[x].int_vector;
+                       if (res == 0xff)
+                               return -1;
+                       if (apic != int_to_apicintpin[res].ioapic)
+                               panic("apic_irq: inconsistent table %d/%d", apic, int_to_apicintpin[res].ioapic);
+                       if (pin != int_to_apicintpin[res].int_pin)
+                               panic("apic_irq inconsistent table (2)");
+                       return res;
+               }
+       }
+       return -1;
+}
+
+
+/*
+ * given a LOGICAL APIC# and pin#, return:
+ *  the associated trigger mode if found
+ *  -1 if NOT found
+ */
+int
+apic_trigger(int apic, int pin)
+{
+       int     x;
+
+       /* search each of the possible INTerrupt sources */
+       for (x = 0; x < nintrs; ++x)
+               if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
+                   (pin == io_apic_ints[x].dst_apic_int))
+                       return ((io_apic_ints[x].int_flags >> 2) & 0x03);
+
+       return -1;              /* NOT found */
+}
+
+
+/*
+ * given a LOGICAL APIC# and pin#, return:
+ *  the associated 'active' level if found
+ *  -1 if NOT found
+ */
+int
+apic_polarity(int apic, int pin)
+{
+       int     x;
+
+       /* search each of the possible INTerrupt sources */
+       for (x = 0; x < nintrs; ++x)
+               if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
+                   (pin == io_apic_ints[x].dst_apic_int))
+                       return (io_apic_ints[x].int_flags & 0x03);
+
+       return -1;              /* NOT found */
+}
+
+#endif
+
+/*
+ * set data according to MP defaults
+ * FIXME: probably not complete yet...
+ */
+static void
+default_mp_table(int type)
+{
+       int     ap_cpu_id;
+#if defined(APIC_IO)
+       int     io_apic_id;
+       int     pin;
+#endif /* APIC_IO */
+
+#if 0
+       kprintf("  MP default config type: %d\n", type);
+       switch (type) {
+       case 1:
+               kprintf("   bus: ISA, APIC: 82489DX\n");
+               break;
+       case 2:
+               kprintf("   bus: EISA, APIC: 82489DX\n");
+               break;
+       case 3:
+               kprintf("   bus: EISA, APIC: 82489DX\n");
+               break;
+       case 4:
+               kprintf("   bus: MCA, APIC: 82489DX\n");
+               break;
+       case 5:
+               kprintf("   bus: ISA+PCI, APIC: Integrated\n");
+               break;
+       case 6:
+               kprintf("   bus: EISA+PCI, APIC: Integrated\n");
+               break;
+       case 7:
+               kprintf("   bus: MCA+PCI, APIC: Integrated\n");
+               break;
+       default:
+               kprintf("   future type\n");
+               break;
+               /* NOTREACHED */
+       }
+#endif /* 0 */
+
+       boot_cpu_id = (lapic->id & APIC_ID_MASK) >> 24;
+       ap_cpu_id = (boot_cpu_id == 0) ? 1 : 0;
+
+       /* BSP */
+       CPU_TO_ID(0) = boot_cpu_id;
+       ID_TO_CPU(boot_cpu_id) = 0;
+
+       /* one and only AP */
+       CPU_TO_ID(1) = ap_cpu_id;
+       ID_TO_CPU(ap_cpu_id) = 1;
+
+#if defined(APIC_IO)
+       /* one and only IO APIC */
+       io_apic_id = (io_apic_read(0, IOAPIC_ID) & APIC_ID_MASK) >> 24;
+
+       /*
+        * sanity check, refer to MP spec section 3.6.6, last paragraph
+        * necessary as some hardware isn't properly setting up the IO APIC
+        */
+#if defined(REALLY_ANAL_IOAPICID_VALUE)
+       if (io_apic_id != 2) {
+#else
+       if ((io_apic_id == 0) || (io_apic_id == 1) || (io_apic_id == 15)) {
+#endif /* REALLY_ANAL_IOAPICID_VALUE */
+               io_apic_set_id(0, 2);
+               io_apic_id = 2;
+       }
+       IO_TO_ID(0) = io_apic_id;
+       ID_TO_IO(io_apic_id) = 0;
+#endif /* APIC_IO */
+
+       /* fill out bus entries */
+       switch (type) {
+       case 1:
+       case 2:
+       case 3:
+       case 4:
+       case 5:
+       case 6:
+       case 7:
+               bus_data[0].bus_id = default_data[type - 1][1];
+               bus_data[0].bus_type = default_data[type - 1][2];
+               bus_data[1].bus_id = default_data[type - 1][3];
+               bus_data[1].bus_type = default_data[type - 1][4];
+               break;
+
+       /* case 4: case 7:                 MCA NOT supported */
+       default:                /* illegal/reserved */
+               panic("BAD default MP config: %d", type);
+               /* NOTREACHED */
+       }
+
+#if defined(APIC_IO)
+       /* general cases from MP v1.4, table 5-2 */
+       for (pin = 0; pin < 16; ++pin) {
+               io_apic_ints[pin].int_type = 0;
+               io_apic_ints[pin].int_flags = 0x05;     /* edge/active-hi */
+               io_apic_ints[pin].src_bus_id = 0;
+               io_apic_ints[pin].src_bus_irq = pin;    /* IRQ2 caught below */
+               io_apic_ints[pin].dst_apic_id = io_apic_id;
+               io_apic_ints[pin].dst_apic_int = pin;   /* 1-to-1 */
+       }
+
+       /* special cases from MP v1.4, table 5-2 */
+       if (type == 2) {
+               io_apic_ints[2].int_type = 0xff;        /* N/C */
+               io_apic_ints[13].int_type = 0xff;       /* N/C */
+#if !defined(APIC_MIXED_MODE)
+               /** FIXME: ??? */
+               panic("sorry, can't support type 2 default yet");
+#endif /* APIC_MIXED_MODE */
+       }
+       else
+               io_apic_ints[2].src_bus_irq = 0;        /* ISA IRQ0 is on APIC INT 2 */
+
+       if (type == 7)
+               io_apic_ints[0].int_type = 0xff;        /* N/C */
+       else
+               io_apic_ints[0].int_type = 3;   /* vectored 8259 */
+#endif /* APIC_IO */
+}
+
+/*
+ * Map a physical memory address representing I/O into KVA.  The I/O
+ * block is assumed not to cross a page boundary.
+ */
+void *
+permanent_io_mapping(vm_paddr_t pa)
+{
+       vm_offset_t vaddr;
+       int pgeflag;
+       int i;
+
+       KKASSERT(pa < 0x100000000LL);
+
+       pgeflag = 0;    /* not used for SMP yet */
+
+       /*
+        * If the requested physical address has already been incidently
+        * mapped, just use the existing mapping.  Otherwise create a new
+        * mapping.
+        */
+       for (i = IO_MAPPING_START_INDEX; i < SMPpt_alloc_index; ++i) {
+               if (((vm_offset_t)SMPpt[i] & PG_FRAME) ==
+                   ((vm_offset_t)pa & PG_FRAME)) {
+                       break;
+               }
+       }
+       if (i == SMPpt_alloc_index) {
+               if (i == NPTEPG - 2) {
+                       panic("permanent_io_mapping: We ran out of space"
+                             " in SMPpt[]!");
+               }
+               SMPpt[i] = (pt_entry_t)(PG_V | PG_RW | pgeflag |
+                          ((vm_offset_t)pa & PG_FRAME));
+               ++SMPpt_alloc_index;
+       }
+       vaddr = (vm_offset_t)CPU_prvspace + (i * PAGE_SIZE) +
+               ((vm_offset_t)pa & PAGE_MASK);
+       return ((void *)vaddr);
+}
+
+/*
+ * start each AP in our list
+ */
+static int
+start_all_aps(u_int boot_addr)
+{
+       vm_offset_t va = boot_address + KERNBASE;
+       u_int64_t *pt4, *pt3, *pt2;
+       int     x, i, pg;
+       int     shift;
+       u_char  mpbiosreason;
+       u_long  mpbioswarmvec;
+       struct mdglobaldata *gd;
+       struct privatespace *ps;
+       char *stack;
+       uintptr_t kptbase;
+
+       POSTCODE(START_ALL_APS_POST);
+
+       /* Initialize BSP's local APIC */
+       apic_initialize(TRUE);
+       bsp_apic_ready = 1;
+
+       /* install the AP 1st level boot code */
+       pmap_kenter(va, boot_address);
+       cpu_invlpg(va); /* JG XXX */
+       bcopy(mptramp_start, (void *)va, bootMP_size);
+
+       /* Locate the page tables, they'll be below the trampoline */
+       pt4 = (u_int64_t *)(uintptr_t)(mptramp_pagetables + KERNBASE);
+       pt3 = pt4 + (PAGE_SIZE) / sizeof(u_int64_t);
+       pt2 = pt3 + (PAGE_SIZE) / sizeof(u_int64_t);
+
+       /* Create the initial 1GB replicated page tables */
+       for (i = 0; i < 512; i++) {
+               /* Each slot of the level 4 pages points to the same level 3 page */
+               pt4[i] = (u_int64_t)(uintptr_t)(mptramp_pagetables + PAGE_SIZE);
+               pt4[i] |= PG_V | PG_RW | PG_U;
+
+               /* Each slot of the level 3 pages points to the same level 2 page */
+               pt3[i] = (u_int64_t)(uintptr_t)(mptramp_pagetables + (2 * PAGE_SIZE));
+               pt3[i] |= PG_V | PG_RW | PG_U;
+
+               /* The level 2 page slots are mapped with 2MB pages for 1GB. */
+               pt2[i] = i * (2 * 1024 * 1024);
+               pt2[i] |= PG_V | PG_RW | PG_PS | PG_U;
+       }
+
+       /* save the current value of the warm-start vector */
+       mpbioswarmvec = *((u_int32_t *) WARMBOOT_OFF);
+       outb(CMOS_REG, BIOS_RESET);
+       mpbiosreason = inb(CMOS_DATA);
+
+       /* setup a vector to our boot code */
+       *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
+       *((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4);
+       outb(CMOS_REG, BIOS_RESET);
+       outb(CMOS_DATA, BIOS_WARM);     /* 'warm-start' */
+
+       /* start each AP */
+       for (x = 1; x <= mp_naps; ++x) {
+
+               /* This is a bit verbose, it will go away soon.  */
+
+               /* first page of AP's private space */
+               pg = x * amd64_btop(sizeof(struct privatespace));
+
+               /* allocate new private data page(s) */
+               gd = (struct mdglobaldata *)kmem_alloc(&kernel_map, 
+                               MDGLOBALDATA_BASEALLOC_SIZE);
+#if JGXXX
+               /* wire it into the private page table page */
+               for (i = 0; i < MDGLOBALDATA_BASEALLOC_SIZE; i += PAGE_SIZE) {
+                       SMPpt[pg + i / PAGE_SIZE] = (pt_entry_t)
+                           (PG_V | PG_RW | vtophys_pte((char *)gd + i));
+               }
+               pg += MDGLOBALDATA_BASEALLOC_PAGES;
+
+               SMPpt[pg + 0] = 0;              /* *gd_CMAP1 */
+               SMPpt[pg + 1] = 0;              /* *gd_CMAP2 */
+               SMPpt[pg + 2] = 0;              /* *gd_CMAP3 */
+               SMPpt[pg + 3] = 0;              /* *gd_PMAP1 */
+
+               /* allocate and set up an idle stack data page */
+               stack = (char *)kmem_alloc(&kernel_map, UPAGES*PAGE_SIZE);
+               for (i = 0; i < UPAGES; i++) {
+                       SMPpt[pg + 4 + i] = (pt_entry_t)
+                           (PG_V | PG_RW | vtophys_pte(PAGE_SIZE * i + stack));
+               }
+#endif
+
+               gd = &CPU_prvspace[x].mdglobaldata;     /* official location */
+               bzero(gd, sizeof(*gd));
+               gd->mi.gd_prvspace = ps = &CPU_prvspace[x];
+
+               /* prime data page for it to use */
+               mi_gdinit(&gd->mi, x);
+               cpu_gdinit(gd, x);
+               gd->gd_CMAP1 = &SMPpt[pg + 0];
+               gd->gd_CMAP2 = &SMPpt[pg + 1];
+               gd->gd_CMAP3 = &SMPpt[pg + 2];
+               gd->gd_PMAP1 = &SMPpt[pg + 3];
+               gd->gd_CADDR1 = ps->CPAGE1;
+               gd->gd_CADDR2 = ps->CPAGE2;
+               gd->gd_CADDR3 = ps->CPAGE3;
+               gd->gd_PADDR1 = (unsigned *)ps->PPAGE1;
+               gd->mi.gd_ipiq = (void *)kmem_alloc(&kernel_map, sizeof(lwkt_ipiq) * (mp_naps + 1));
+               bzero(gd->mi.gd_ipiq, sizeof(lwkt_ipiq) * (mp_naps + 1));
+
+               /* setup a vector to our boot code */
+               *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
+               *((volatile u_short *) WARMBOOT_SEG) = (boot_addr >> 4);
+               outb(CMOS_REG, BIOS_RESET);
+               outb(CMOS_DATA, BIOS_WARM);     /* 'warm-start' */
+
+               /*
+                * Setup the AP boot stack
+                */
+               bootSTK = &ps->idlestack[UPAGES*PAGE_SIZE/2];
+               bootAP = x;
+
+               /* attempt to start the Application Processor */
+               CHECK_INIT(99); /* setup checkpoints */
+               if (!start_ap(gd, boot_addr)) {
+                       kprintf("AP #%d (PHY# %d) failed!\n", x, CPU_TO_ID(x));
+                       CHECK_PRINT("trace");   /* show checkpoints */
+                       /* better panic as the AP may be running loose */
+                       kprintf("panic y/n? [y] ");
+                       if (cngetc() != 'n')
+                               panic("bye-bye");
+               }
+               CHECK_PRINT("trace");           /* show checkpoints */
+
+               /* record its version info */
+               cpu_apic_versions[x] = cpu_apic_versions[0];
+       }
+
+       /* set ncpus to 1 + highest logical cpu.  Not all may have come up */
+       ncpus = x;
+
+       /* ncpus2 -- ncpus rounded down to the nearest power of 2 */
+       for (shift = 0; (1 << shift) <= ncpus; ++shift)
+               ;
+       --shift;
+       ncpus2_shift = shift;
+       ncpus2 = 1 << shift;
+       ncpus2_mask = ncpus2 - 1;
+
+       /* ncpus_fit -- ncpus rounded up to the nearest power of 2 */
+       if ((1 << shift) < ncpus)
+               ++shift;
+       ncpus_fit = 1 << shift;
+       ncpus_fit_mask = ncpus_fit - 1;
+
+       /* build our map of 'other' CPUs */
+       mycpu->gd_other_cpus = smp_startup_mask & ~(1 << mycpu->gd_cpuid);
+       mycpu->gd_ipiq = (void *)kmem_alloc(&kernel_map, sizeof(lwkt_ipiq) * ncpus);
+       bzero(mycpu->gd_ipiq, sizeof(lwkt_ipiq) * ncpus);
+
+       /* fill in our (BSP) APIC version */
+       cpu_apic_versions[0] = lapic->version;
+
+       /* restore the warmstart vector */
+       *(u_long *) WARMBOOT_OFF = mpbioswarmvec;
+       outb(CMOS_REG, BIOS_RESET);
+       outb(CMOS_DATA, mpbiosreason);
+
+       /*
+        * NOTE!  The idlestack for the BSP was setup by locore.  Finish
+        * up, clean out the P==V mapping we did earlier.
+        */
+#if JGXXX
+       for (x = 0; x < NKPT; x++)
+               PTD[x] = 0;
+#endif
+       pmap_set_opt();
+
+       /* number of APs actually started */
+       return ncpus - 1;
+}
+
+
+/*
+ * load the 1st level AP boot code into base memory.
+ */
+
+/* targets for relocation */
+extern void bigJump(void);
+extern void bootCodeSeg(void);
+extern void bootDataSeg(void);
+extern void MPentry(void);
+extern u_int MP_GDT;
+extern u_int mp_gdtbase;
+
+static void
+install_ap_tramp(u_int boot_addr)
+{
+       int     x;
+       int     size = *(int *) ((u_long) & bootMP_size);
+       u_char *src = (u_char *) ((u_long) bootMP);
+       u_char *dst = (u_char *) boot_addr + KERNBASE;
+       u_int   boot_base = (u_int) bootMP;
+       u_int8_t *dst8;
+       u_int16_t *dst16;
+       u_int32_t *dst32;
+
+       POSTCODE(INSTALL_AP_TRAMP_POST);
+
+       for (x = 0; x < size; ++x)
+               *dst++ = *src++;
+
+       /*
+        * modify addresses in code we just moved to basemem. unfortunately we
+        * need fairly detailed info about mpboot.s for this to work.  changes
+        * to mpboot.s might require changes here.
+        */
+
+       /* boot code is located in KERNEL space */
+       dst = (u_char *) boot_addr + KERNBASE;
+
+       /* modify the lgdt arg */
+       dst32 = (u_int32_t *) (dst + ((u_int) & mp_gdtbase - boot_base));
+       *dst32 = boot_addr + ((u_int) & MP_GDT - boot_base);
+
+       /* modify the ljmp target for MPentry() */
+       dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1);
+       *dst32 = ((u_int) MPentry - KERNBASE);
+
+       /* modify the target for boot code segment */
+       dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base));
+       dst8 = (u_int8_t *) (dst16 + 1);
+       *dst16 = (u_int) boot_addr & 0xffff;
+       *dst8 = ((u_int) boot_addr >> 16) & 0xff;
+
+       /* modify the target for boot data segment */
+       dst16 = (u_int16_t *) (dst + ((u_int) bootDataSeg - boot_base));
+       dst8 = (u_int8_t *) (dst16 + 1);
+       *dst16 = (u_int) boot_addr & 0xffff;
+       *dst8 = ((u_int) boot_addr >> 16) & 0xff;
+}
+
+
+/*
+ * this function starts the AP (application processor) identified
+ * by the APIC ID 'physicalCpu'.  It does quite a "song and dance"
+ * to accomplish this.  This is necessary because of the nuances
+ * of the different hardware we might encounter.  It ain't pretty,
+ * but it seems to work.
+ *
+ * NOTE: eventually an AP gets to ap_init(), which is called just 
+ * before the AP goes into the LWKT scheduler's idle loop.
+ */
+static int
+start_ap(struct mdglobaldata *gd, u_int boot_addr)
+{
+       int     physical_cpu;
+       int     vector;
+       u_long  icr_lo, icr_hi;
+
+       POSTCODE(START_AP_POST);
+
+       /* get the PHYSICAL APIC ID# */
+       physical_cpu = CPU_TO_ID(gd->mi.gd_cpuid);
+
+       /* calculate the vector */
+       vector = (boot_addr >> 12) & 0xff;
+
+       /* Make sure the target cpu sees everything */
+       wbinvd();
+
+       /*
+        * first we do an INIT/RESET IPI this INIT IPI might be run, reseting
+        * and running the target CPU. OR this INIT IPI might be latched (P5
+        * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
+        * ignored.
+        */
+
+       /* setup the address for the target AP */
+       icr_hi = lapic->icr_hi & ~APIC_ID_MASK;
+       icr_hi |= (physical_cpu << 24);
+       lapic->icr_hi = icr_hi;
+
+       /* do an INIT IPI: assert RESET */
+       icr_lo = lapic->icr_lo & 0xfff00000;
+       lapic->icr_lo = icr_lo | 0x0000c500;
+
+       /* wait for pending status end */
+       while (lapic->icr_lo & APIC_DELSTAT_MASK)
+                /* spin */ ;
+
+       /* do an INIT IPI: deassert RESET */
+       lapic->icr_lo = icr_lo | 0x00008500;
+
+       /* wait for pending status end */
+       u_sleep(10000);         /* wait ~10mS */
+       while (lapic->icr_lo & APIC_DELSTAT_MASK)
+                /* spin */ ;
+
+       /*
+        * next we do a STARTUP IPI: the previous INIT IPI might still be
+        * latched, (P5 bug) this 1st STARTUP would then terminate
+        * immediately, and the previously started INIT IPI would continue. OR
+        * the previous INIT IPI has already run. and this STARTUP IPI will
+        * run. OR the previous INIT IPI was ignored. and this STARTUP IPI
+        * will run.
+        */
+
+       /* do a STARTUP IPI */
+       lapic->icr_lo = icr_lo | 0x00000600 | vector;
+       while (lapic->icr_lo & APIC_DELSTAT_MASK)
+                /* spin */ ;
+       u_sleep(200);           /* wait ~200uS */
+
+       /*
+        * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
+        * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
+        * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
+        * recognized after hardware RESET or INIT IPI.
+        */
+
+       lapic->icr_lo = icr_lo | 0x00000600 | vector;
+       while (lapic->icr_lo & APIC_DELSTAT_MASK)
+                /* spin */ ;
+       u_sleep(200);           /* wait ~200uS */
+
+       /* wait for it to start, see ap_init() */
+       set_apic_timer(5000000);/* == 5 seconds */
+       while (read_apic_timer()) {
+               if (smp_startup_mask & (1 << gd->mi.gd_cpuid))
+                       return 1;       /* return SUCCESS */
+       }
+       return 0;               /* return FAILURE */
+}
+
+
+/*
+ * Lazy flush the TLB on all other CPU's.  DEPRECATED.
+ *
+ * If for some reason we were unable to start all cpus we cannot safely
+ * use broadcast IPIs.
+ */
+void
+smp_invltlb(void)
+{
+#ifdef SMP
+       if (smp_startup_mask == smp_active_mask) {
+               all_but_self_ipi(XINVLTLB_OFFSET);
+       } else {
+               selected_apic_ipi(smp_active_mask, XINVLTLB_OFFSET,
+                       APIC_DELMODE_FIXED);
+       }
+#endif
+}
+
+/*
+ * When called the executing CPU will send an IPI to all other CPUs
+ *  requesting that they halt execution.
+ *
+ * Usually (but not necessarily) called with 'other_cpus' as its arg.
+ *
+ *  - Signals all CPUs in map to stop.
+ *  - Waits for each to stop.
+ *
+ * Returns:
+ *  -1: error
+ *   0: NA
+ *   1: ok
+ *
+ * XXX FIXME: this is not MP-safe, needs a lock to prevent multiple CPUs
+ *            from executing at same time.
+ */
+int
+stop_cpus(u_int map)
+{
+       map &= smp_active_mask;
+
+       /* send the Xcpustop IPI to all CPUs in map */
+       selected_apic_ipi(map, XCPUSTOP_OFFSET, APIC_DELMODE_FIXED);
+       
+       while ((stopped_cpus & map) != map)
+               /* spin */ ;
+
+       return 1;
+}
+
+
+/*
+ * Called by a CPU to restart stopped CPUs. 
+ *
+ * Usually (but not necessarily) called with 'stopped_cpus' as its arg.
+ *
+ *  - Signals all CPUs in map to restart.
+ *  - Waits for each to restart.
+ *
+ * Returns:
+ *  -1: error
+ *   0: NA
+ *   1: ok
+ */
+int
+restart_cpus(u_int map)
+{
+       /* signal other cpus to restart */
+       started_cpus = map & smp_active_mask;
+
+       while ((stopped_cpus & map) != 0) /* wait for each to clear its bit */
+               /* spin */ ;
+
+       return 1;
+}
+
+/*
+ * This is called once the mpboot code has gotten us properly relocated
+ * and the MMU turned on, etc.   ap_init() is actually the idle thread,
+ * and when it returns the scheduler will call the real cpu_idle() main
+ * loop for the idlethread.  Interrupts are disabled on entry and should
+ * remain disabled at return.
+ */
+void
+ap_init(void)
+{
+       u_int   apic_id;
+
+       /*
+        * Adjust smp_startup_mask to signal the BSP that we have started
+        * up successfully.  Note that we do not yet hold the BGL.  The BSP
+        * is waiting for our signal.
+        *
+        * We can't set our bit in smp_active_mask yet because we are holding
+        * interrupts physically disabled and remote cpus could deadlock
+        * trying to send us an IPI.
+        */
+       smp_startup_mask |= 1 << mycpu->gd_cpuid;
+       cpu_mfence();
+
+       /*
+        * Interlock for finalization.  Wait until mp_finish is non-zero,
+        * then get the MP lock.
+        *
+        * Note: We are in a critical section.
+        *
+        * Note: We have to synchronize td_mpcount to our desired MP state
+        * before calling cpu_try_mplock().
+        *
+        * Note: we are the idle thread, we can only spin.
+        *
+        * Note: The load fence is memory volatile and prevents the compiler
+        * from improperly caching mp_finish, and the cpu from improperly
+        * caching it.
+        */
+       while (mp_finish == 0)
+           cpu_lfence();
+       ++curthread->td_mpcount;
+       while (cpu_try_mplock() == 0)
+           ;
+
+       if (cpu_feature & CPUID_TSC) {
+           /*
+            * The BSP is constantly updating tsc0_offset, figure out the
+            * relative difference to synchronize ktrdump.
+            */
+           tsc_offsets[mycpu->gd_cpuid] = rdtsc() - tsc0_offset;
+       }
+
+       /* BSP may have changed PTD while we're waiting for the lock */
+       cpu_invltlb();
+
+#if defined(I586_CPU) && !defined(NO_F00F_HACK)
+       lidt(&r_idt);
+#endif
+
+       /* Build our map of 'other' CPUs. */
+       mycpu->gd_other_cpus = smp_startup_mask & ~(1 << mycpu->gd_cpuid);
+
+       kprintf("SMP: AP CPU #%d Launched!\n", mycpu->gd_cpuid);
+
+       /* A quick check from sanity claus */
+       apic_id = (apic_id_to_logical[(lapic->id & 0x0f000000) >> 24]);
+       if (mycpu->gd_cpuid != apic_id) {
+               kprintf("SMP: cpuid = %d\n", mycpu->gd_cpuid);
+               kprintf("SMP: apic_id = %d\n", apic_id);
+#if JGXXX
+               kprintf("PTD[MPPTDI] = %p\n", (void *)PTD[MPPTDI]);
+#endif
+               panic("cpuid mismatch! boom!!");
+       }
+
+       /* Initialize AP's local APIC for irq's */
+       apic_initialize(FALSE);
+
+       /* Set memory range attributes for this CPU to match the BSP */
+       mem_range_AP_init();
+
+       /*
+        * Once we go active we must process any IPIQ messages that may
+        * have been queued, because no actual IPI will occur until we
+        * set our bit in the smp_active_mask.  If we don't the IPI
+        * message interlock could be left set which would also prevent
+        * further IPIs.
+        *
+        * The idle loop doesn't expect the BGL to be held and while
+        * lwkt_switch() normally cleans things up this is a special case
+        * because we returning almost directly into the idle loop.
+        *
+        * The idle thread is never placed on the runq, make sure
+        * nothing we've done put it there.
+        */
+       KKASSERT(curthread->td_mpcount == 1);
+       smp_active_mask |= 1 << mycpu->gd_cpuid;
+
+       /*
+        * Enable interrupts here.  idle_restore will also do it, but
+        * doing it here lets us clean up any strays that got posted to
+        * the CPU during the AP boot while we are still in a critical
+        * section.
+        */
+       __asm __volatile("sti; pause; pause"::);
+       mdcpu->gd_fpending = 0;
+       mdcpu->gd_ipending = 0;
+
+       initclocks_pcpu();      /* clock interrupts (via IPIs) */
+       lwkt_process_ipiq();
+
+       /*
+        * Releasing the mp lock lets the BSP finish up the SMP init
+        */
+       rel_mplock();
+       KKASSERT((curthread->td_flags & TDF_RUNQ) == 0);
+}
+
+/*
+ * Get SMP fully working before we start initializing devices.
+ */
+static
+void
+ap_finish(void)
+{
+       mp_finish = 1;
+       if (bootverbose)
+               kprintf("Finish MP startup\n");
+       if (cpu_feature & CPUID_TSC)
+               tsc0_offset = rdtsc();
+       tsc_offsets[0] = 0;
+       rel_mplock();
+       while (smp_active_mask != smp_startup_mask) {
+               cpu_lfence();
+               if (cpu_feature & CPUID_TSC)
+                       tsc0_offset = rdtsc();
+       }
+       while (try_mplock() == 0)
+               ;
+       if (bootverbose)
+               kprintf("Active CPU Mask: %08x\n", smp_active_mask);
+}
+
+SYSINIT(finishsmp, SI_BOOT2_FINISH_SMP, SI_ORDER_FIRST, ap_finish, NULL)
+
+void
+cpu_send_ipiq(int dcpu)
+{
+        if ((1 << dcpu) & smp_active_mask)
+                single_apic_ipi(dcpu, XIPIQ_OFFSET, APIC_DELMODE_FIXED);
+}
+
+#if 0  /* single_apic_ipi_passive() not working yet */
+/*
+ * Returns 0 on failure, 1 on success
+ */
+int
+cpu_send_ipiq_passive(int dcpu)
+{
+        int r = 0;
+        if ((1 << dcpu) & smp_active_mask) {
+                r = single_apic_ipi_passive(dcpu, XIPIQ_OFFSET,
+                                        APIC_DELMODE_FIXED);
+        }
+       return(r);
+}
+#endif
+
diff --git a/sys/platform/pc64/amd64/mpboot.S b/sys/platform/pc64/amd64/mpboot.S
new file mode 100644 (file)
index 0000000..baea011
--- /dev/null
@@ -0,0 +1,236 @@
+/*-
+ * Copyright (c) 2003 Peter Wemm
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/amd64/amd64/mpboot.S 130224 2004-06-08 01:02:52Z peter $
+ */
+
+#include <machine/asmacros.h>          /* miscellaneous asm macros */
+#include <machine/specialreg.h>
+
+#include "assym.s"
+
+       .data                           /* So we can modify it */
+
+       .p2align 4,0
+       .globl  mptramp_start
+mptramp_start:
+       .code16
+       /*
+        * The AP enters here in response to the startup IPI.
+        * We are in real mode. %cs is the only segment register set.
+        */
+       cli                             /* make sure no interrupts */
+       mov     %cs, %ax                /* copy %cs to %ds.  Remember these */
+       mov     %ax, %ds                /* are offsets rather than selectors */
+       mov     %ax, %ss
+
+       /*
+        * Find relocation base and patch the gdt descript and ljmp targets
+        */
+       xorl    %ebx,%ebx
+       mov     %cs, %bx
+       sall    $4, %ebx                /* %ebx is now our relocation base */
+       orl     %ebx, lgdt_desc-mptramp_start+2
+       orl     %ebx, jmp_32-mptramp_start+2
+       orl     %ebx, jmp_64-mptramp_start+1
+
+       /*
+        * Load the descriptor table pointer.  We'll need it when running
+        * in 16 bit protected mode.
+        */
+       lgdt    lgdt_desc-mptramp_start
+
+       /* Enable protected mode */
+       movl    $CR0_PE, %eax
+       mov     %eax, %cr0 
+
+       /*
+        * Now execute a far jump to turn on protected mode.  This
+        * causes the segment registers to turn into selectors and causes
+        * %cs to be loaded from the gdt.
+        *
+        * The following instruction is:
+        * ljmpl $bootcode-gdt, $protmode-mptramp_start
+        * but gas cannot assemble that.  And besides, we patch the targets
+        * in early startup and its a little clearer what we are patching.
+        */
+jmp_32:
+       .byte   0x66                    /* size override to 32 bits */
+       .byte   0xea                    /* opcode for far jump */
+       .long   protmode-mptramp_start  /* offset in segment */
+       .word   bootcode-gdt            /* index in gdt for 32 bit code */
+
+       /*
+        * At this point, we are running in 32 bit legacy protected mode.
+        */
+       .code32
+protmode:
+       mov     $bootdata-gdt, %eax
+       mov     %ax, %ds
+
+       /* Turn on the PAE, PSE and PGE bits for when paging is enabled */
+       mov     %cr4, %eax
+       orl     $(CR4_PAE | CR4_PSE), %eax
+       mov     %eax, %cr4
+
+       /*
+        * Enable EFER.LME so that we get long mode when all the prereqs are
+        * in place.  In this case, it turns on when CR0_PG is finally enabled.
+        * Pick up a few other EFER bits that we'll use need we're here.
+        */
+       movl    $MSR_EFER, %ecx
+       rdmsr
+       orl     $EFER_LME | EFER_SCE, %eax
+       wrmsr
+
+       /*
+        * Point to the embedded page tables for startup.  Note that this
+        * only gets accessed after we're actually in 64 bit mode, however
+        * we can only set the bottom 32 bits of %cr3 in this state.  This
+        * means we are required to use a temporary page table that is below
+        * the 4GB limit.  %ebx is still our relocation base.  We could just
+        * subtract 3 * PAGE_SIZE, but that would be too easy.
+        */
+       leal    mptramp_pagetables-mptramp_start(%ebx),%eax
+       movl    (%eax), %eax
+       mov     %eax, %cr3
+
+       /*
+        * Finally, switch to long bit mode by enabling paging.  We have
+        * to be very careful here because all the segmentation disappears
+        * out from underneath us.  The spec says we can depend on the
+        * subsequent pipelined branch to execute, but *only if* everthing
+        * is still identity mapped.  If any mappings change, the pipeline
+        * will flush.
+        */
+       mov     %cr0, %eax
+       orl     $CR0_PG, %eax
+       mov     %eax, %cr0
+
+       /*
+        * At this point paging is enabled, and we are in "compatability" mode.
+        * We do another far jump to reload %cs with the 64 bit selector.
+        * %cr3 points to a 4-level page table page.
+        * We cannot yet jump all the way to the kernel because we can only
+        * specify a 32 bit linear address.  So, yet another trampoline.
+        *
+        * The following instruction is:
+        * ljmp $kernelcode-gdt, $tramp_64-mptramp_start
+        * but gas cannot assemble that.  And besides, we patch the targets
+        * in early startup and its a little clearer what we are patching.
+        */
+jmp_64:
+       .byte   0xea                    /* opcode for far jump */
+       .long   tramp_64-mptramp_start  /* offset in segment */
+       .word   kernelcode-gdt          /* index in gdt for 64 bit code */
+
+       /*
+        * Yeehar!  We're running in 64 bit mode!  We can mostly ignore our
+        * segment registers, and get on with it.
+        * Note that we are running at the correct virtual address, but with
+        * a 1:1 1GB mirrored mapping over entire address space.  We had better
+        * switch to a real %cr3 promptly so that we can get to the direct map
+        * space. Remember that jmp is relative and that we've been relocated,
+        * so use an indirect jump.
+        */
+       .code64
+tramp_64:
+       movabsq $entry_64,%rax          /* 64 bit immediate load */
+       jmp     *%rax
+
+       .p2align 4,0
+gdt:
+       /*
+        * All segment descriptor tables start with a null descriptor
+        */
+       .long   0x00000000
+       .long   0x00000000
+
+       /*
+        * This is the 64 bit long mode code descriptor.  There is no
+        * 64 bit data descriptor.
+        */
+kernelcode:
+       .long   0x00000000
+       .long   0x00209800
+
+       /*
+        * This is the descriptor for the 32 bit boot code.
+        * %cs:  +A, +R, -C, DPL=0, +P, +D, +G
+        * Accessed, Readable, Present, 32 bit, 4G granularity
+        */
+bootcode:
+       .long   0x0000ffff
+       .long   0x00cf9b00
+
+       /*
+        * This is the descriptor for the 32 bit boot data.
+        * We load it into %ds and %ss.  The bits for each selector
+        * are interpreted slightly differently.
+        * %ds:  +A, +W, -E, DPL=0, +P, +D, +G
+        * %ss:  +A, +W, -E, DPL=0, +P, +B, +G
+        * Accessed, Writeable, Expand up, Present, 32 bit, 4GB 
+        * For %ds, +D means 'default operand size is 32 bit'.
+        * For %ss, +B means the stack register is %esp rather than %sp.
+        */
+bootdata:
+       .long   0x0000ffff
+       .long   0x00cf9300
+
+gdtend:
+
+       /*
+        * The address of our page table pages that the boot code
+        * uses to trampoline up to kernel address space.
+        */
+       .globl  mptramp_pagetables
+mptramp_pagetables:
+       .long   0
+
+       /*
+        * The pseudo descriptor for lgdt to use.
+        */
+lgdt_desc:     
+       .word   gdtend-gdt              /* Length */
+       .long   gdt-mptramp_start       /* Offset plus %ds << 4 */
+
+       .globl  mptramp_end
+mptramp_end:
+
+       /*
+        * From here on down is executed in the kernel .text section.
+        *
+        * Load a real %cr3 that has all the direct map stuff and switches
+        * off the 1GB replicated mirror.  Load a stack pointer and jump
+        * into AP startup code in C.
+        */
+       .text
+       .code64
+       .p2align 4,0
+entry_64:
+       movq    KPML4phys, %rax
+       movq    %rax, %cr3
+       movq    bootSTK, %rsp
+       jmp     init_secondary
index 5453bf5..b0fc0a8 100644 (file)
@@ -1,6 +1,6 @@
 /*
  * $FreeBSD: src/sys/i386/i386/mplock.s,v 1.29.2.2 2000/05/16 06:58:06 dillon Exp $
- * $DragonFly: src/sys/platform/pc64/amd64/mplock.s,v 1.2 2007/09/24 03:24:45 yanyh Exp $
+ * $DragonFly: src/sys/platform/pc32/i386/mplock.s,v 1.21 2006/11/07 06:43:24 dillon Exp $
  *
  * Copyright (c) 2003,2004 The DragonFly Project.  All rights reserved.
  * 
@@ -63,9 +63,7 @@
  */
 
 #include <machine/asmacros.h>
-#if 0
 #include <machine_base/apic/apicreg.h>
-#endif
 
 #include "assym.s"
 
@@ -91,8 +89,8 @@ mp_lock:
         * Z=1 (jz) on success.   A lock prefix is required for MP.
         */
 NON_GPROF_ENTRY(cpu_get_initial_mplock)
-       movl    PCPU(curthread),%ecx
-       movl    $1,TD_MPCOUNT(%ecx)     /* curthread has mpcount of 1 */
+       movq    PCPU(curthread),%rcx
+       movl    $1,TD_MPCOUNT(%rcx)     /* curthread has mpcount of 1 */
        movl    $0,mp_lock              /* owned by cpu 0 */
        NON_GPROF_RET
 
@@ -114,7 +112,7 @@ NON_GPROF_ENTRY(cpu_try_mplock)
        lock cmpxchgl %ecx,mp_lock      /* ecx<->mem if eax matches */
        jnz     1f
 #ifdef PARANOID_INVLTLB
-       movl    %cr3,%eax; movl %eax,%cr3       /* YYY check and remove */
+       movq    %cr3,%rax; movq %rax,%cr3       /* YYY check and remove */
 #endif
        movl    $1,%eax
        NON_GPROF_RET
@@ -137,8 +135,8 @@ NON_GPROF_ENTRY(cpu_try_mplock)
         */
 NON_GPROF_ENTRY(get_mplock)
        movl    PCPU(cpuid),%ecx
-       movl    PCPU(curthread),%edx
-       incl    TD_MPCOUNT(%edx)        /* predispose */
+       movq    PCPU(curthread),%rdx
+       incl    TD_MPCOUNT(%rdx)        /* predispose */
        cmpl    %ecx,mp_lock
        jne     1f
        NON_GPROF_RET                   /* success! */
@@ -151,6 +149,9 @@ NON_GPROF_ENTRY(get_mplock)
        movl    $-1,%eax
        lock cmpxchgl %ecx,mp_lock
        jnz     2f
+#ifdef PARANOID_INVLTLB
+       movq    %cr3,%rax; movq %rax,%cr3 /* YYY check and remove */
+#endif
        NON_GPROF_RET                   /* success */
 
        /*
@@ -162,10 +163,10 @@ NON_GPROF_ENTRY(get_mplock)
         * backtrace properly.
         */
 2:
-       pushl   %ebp
-       movl    %esp,%ebp
+       pushq   %rbp
+       movq    %rsp,%rbp
        call    lwkt_mp_lock_contested
-       popl    %ebp
+       popq    %rbp
 #ifdef INVARIANTS
        movl    PCPU(cpuid),%eax        /* failure */
        cmpl    %eax,mp_lock
@@ -190,8 +191,8 @@ NON_GPROF_ENTRY(get_mplock)
         */
 NON_GPROF_ENTRY(try_mplock)
        movl    PCPU(cpuid),%ecx
-       movl    PCPU(curthread),%edx
-       incl    TD_MPCOUNT(%edx)                /* pre-dispose for race */
+       movq    PCPU(curthread),%rdx
+       incl    TD_MPCOUNT(%rdx)                /* pre-dispose for race */
        cmpl    %ecx,mp_lock
        je      1f                              /* trivial success */
        movl    $-1,%eax
@@ -201,7 +202,7 @@ NON_GPROF_ENTRY(try_mplock)
         * Success
         */
 #ifdef PARANOID_INVLTLB
-       movl    %cr3,%eax; movl %eax,%cr3       /* YYY check and remove */
+       movq    %cr3,%rax; movq %rax,%cr3       /* YYY check and remove */
 #endif
 1:
        movl    $1,%eax                         /* success (cmpxchgl good!) */
@@ -216,8 +217,8 @@ NON_GPROF_ENTRY(try_mplock)
         * make sure we don't own the lock in case we did win it in a race.
         */
 2:
-       decl    TD_MPCOUNT(%edx)
-       cmpl    $0,TD_MPCOUNT(%edx)
+       decl    TD_MPCOUNT(%rdx)
+       cmpl    $0,TD_MPCOUNT(%rdx)
        jne     3f
        movl    PCPU(cpuid),%eax
        movl    $-1,%ecx
@@ -234,32 +235,39 @@ NON_GPROF_ENTRY(try_mplock)
         * above.
         */
 NON_GPROF_ENTRY(rel_mplock)
-       movl    PCPU(curthread),%edx
-       movl    TD_MPCOUNT(%edx),%eax
+       movq    PCPU(curthread),%rdx
+       movl    TD_MPCOUNT(%rdx),%eax
 #ifdef INVARIANTS
        cmpl    $0,%eax
        je      badmp_rel
 #endif
        subl    $1,%eax
-       movl    %eax,TD_MPCOUNT(%edx)
+       movl    %eax,TD_MPCOUNT(%rdx)
        cmpl    $0,%eax
        jne     3f
        movl    PCPU(cpuid),%eax
        movl    $-1,%ecx
        lock cmpxchgl %ecx,mp_lock
+       movl    mp_lock_contention_mask,%eax
+       cmpl    $0,%eax
+       je      3f
+       call    lwkt_mp_lock_uncontested
 3:
        NON_GPROF_RET
 
 #ifdef INVARIANTS
 
 badmp_get:
-       pushl   $bmpsw1
+       movq    $bmpsw1,%rdi
+       movl    $0,%eax
        call    panic
 badmp_get2:
-       pushl   $bmpsw1a
+       movq    $bmpsw1a,%rdi
+       movl    $0,%eax
        call    panic
 badmp_rel:
-       pushl   $bmpsw2
+       movq    $bmpsw2,%rdi
+       movl    $0,%eax
        call    panic
 
        .data
index 8c61fb0..847e49b 100644 (file)
 #include "opt_ddb.h"
 
 #include <machine/asmacros.h>
-#include <machine/intr_machdep.h>
+#include <machine_base/isa/intr_machdep.h>
 #include <machine/pmap.h>
 
 #include "assym.s"
 
        ALIGN_DATA
-       .globl  intrcnt, eintrcnt
-intrcnt:
-       .space  INTRCNT_COUNT * 8
-eintrcnt:
-
-       .globl  intrnames, eintrnames
-intrnames:
-       .space  INTRCNT_COUNT * (MAXCOMLEN + 1)
-eintrnames:
 
        .text
 
index d7ed5b0..e8ab454 100644 (file)
 #include <vm/vm_param.h>
 #include <machine/cpu.h>
 #include <machine/pcb.h>
+#include <machine/smp.h>
 #include <machine/thread.h>
 #include <machine/vmparam.h>
 #include <machine/md_var.h>
 
 #include <ddb/ddb.h>
+#include <sys/thread2.h>
 
 #ifdef SMP
 
@@ -102,9 +104,6 @@ static int trap_pfault(struct trapframe *, int);
 static void trap_fatal(struct trapframe *, vm_offset_t);
 void dblfault_handler(struct trapframe *frame);
 
-#define PCPU_GET(member) ((mycpu)->gd_##member)
-#define PCPU_INC(member) ((mycpu)->gd_##member)++
-
 #define MAX_TRAP_MSG           30
 static char *trap_msg[] = {
        "",                                     /*  0 unused */
@@ -925,9 +924,10 @@ trap_fatal(struct trapframe *frame, vm_offset_t eva)
        kprintf("\n\nFatal trap %d: %s while in %s mode\n", type, msg,
            ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel");
 #ifdef SMP
-       /* two separate prints in case of a trap on an unmapped page */
-       kprintf("cpuid = %d; ", PCPU_GET(cpuid));
-       kprintf("apic id = %02x\n", PCPU_GET(apic_id));
+       /* three separate prints in case of a trap on an unmapped page */
+       kprintf("mp_lock = %08x; ", mp_lock);
+       kprintf("cpuid = %d; ", mycpu->gd_cpuid);
+       kprintf("lapic->id = %08x\n", lapic->id);
 #endif
        if (type == T_PAGEFLT) {
                kprintf("fault virtual address  = 0x%lx\n", eva);
@@ -1001,9 +1001,10 @@ dblfault_handler(struct trapframe *frame)
        kprintf("rsp = 0x%lx\n", frame->tf_rsp);
        kprintf("rbp = 0x%lx\n", frame->tf_rbp);
 #ifdef SMP
-       /* two separate prints in case of a trap on an unmapped page */
-       kprintf("cpuid = %d; ", PCPU_GET(cpuid));
-       kprintf("apic id = %02x\n", PCPU_GET(apic_id));
+       /* three separate prints in case of a trap on an unmapped page */
+       kprintf("mp_lock = %08x; ", mp_lock);
+       kprintf("cpuid = %d; ", mycpu->gd_cpuid);
+       kprintf("lapic->id = %08x\n", lapic->id);
 #endif
        panic("double fault");
 }
@@ -1046,7 +1047,7 @@ syscall2(struct trapframe *frame)
        union sysunion args;
        register_t *argsdst;
 
-       PCPU_INC(cnt.v_syscall);
+       mycpu->gd_cnt.v_syscall++;
 
 #ifdef DIAGNOSTIC
        if (ISPL(frame->tf_cs) != SEL_UPL) {
@@ -1060,7 +1061,7 @@ syscall2(struct trapframe *frame)
                frame->tf_eax);
 
 #ifdef SMP
-       KASSERT(td->td_mpcount == 0, ("badmpcount syscall2 from %p", (void *)frame->tf_eip));
+       KASSERT(td->td_mpcount == 0, ("badmpcount syscall2 from %p", (void *)frame->tf_rip));
        if (syscall_mpsafe == 0)
                MAKEMPSAFE(have_mplock);
 #endif
@@ -1265,7 +1266,7 @@ bad:
         * Release the MP lock if we had to get it
         */
        KASSERT(td->td_mpcount == have_mplock, 
-               ("badmpcount syscall2/end from %p", (void *)frame->tf_eip));
+               ("badmpcount syscall2/end from %p", (void *)frame->tf_rip));
        if (have_mplock)
                rel_mplock();
 #endif
index 5477663..692d025 100644 (file)
@@ -230,16 +230,16 @@ apic_finalize(void)
      * mask the interrupt, completing the disconnection of the
      * 8259.
      */
-    temp = lapic.lvt_lint0;
+    temp = lapic->lvt_lint0;
     temp |= APIC_LVT_MASKED;
-    lapic.lvt_lint0 = temp;
+    lapic->lvt_lint0 = temp;
 
     /*
      * setup lint1 to handle an NMI 
      */
-    temp = lapic.lvt_lint1;
+    temp = lapic->lvt_lint1;
     temp &= ~APIC_LVT_MASKED;
-    lapic.lvt_lint1 = temp;
+    lapic->lvt_lint1 = temp;
 
     if (bootverbose)
        apic_dump("bsp_apic_configure()");
@@ -270,7 +270,7 @@ apic_vectorctl(int op, int intr, int flags)
     if (intr < 0 || intr >= APIC_HWI_VECTORS)
        return (EINVAL);
 
-    ef = read_eflags();
+    ef = read_rflags();
     cpu_disable_intr();
     error = 0;
 
@@ -283,10 +283,10 @@ apic_vectorctl(int op, int intr, int flags)
        if (flags & INTR_FAST) {
            vector = TPR_SLOW_INTS + intr;
            setidt(vector, apic_wrongintr[intr],
-                   SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+                   SDT_SYSIGT, SEL_KPL, 0);
            vector = TPR_FAST_INTS + intr;
            setidt(vector, apic_fastintr[intr],
-                   SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+                   SDT_SYSIGT, SEL_KPL, 0);
        } else {
            vector = TPR_SLOW_INTS + intr;
 
@@ -297,7 +297,7 @@ apic_vectorctl(int op, int intr, int flags)
                vector = TPR_FAST_INTS + intr;
            }
            setidt(vector, apic_slowintr[intr],
-                   SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+                   SDT_SYSIGT, SEL_KPL, 0);
        }
 
        /*
@@ -326,8 +326,8 @@ apic_vectorctl(int op, int intr, int flags)
         */
        machintr_intrdis(intr);
        vector = TPR_SLOW_INTS + intr;
-       setidt(vector, apic_slowintr[intr], SDT_SYS386IGT, SEL_KPL,
-               GSEL(GCODE_SEL, SEL_KPL));
+       setidt(vector, apic_slowintr[intr], SDT_SYSIGT, SEL_KPL,
+               0);
 
        /*
         * And then reprogram the IO APIC to point to the SLOW vector (it may
@@ -359,15 +359,14 @@ apic_vectorctl(int op, int intr, int flags)
         * to IDT_OFFSET + intr.
         */
        vector = IDT_OFFSET + intr;
-       setidt(vector, apic_slowintr[intr], SDT_SYS386IGT, SEL_KPL,
-               GSEL(GCODE_SEL, SEL_KPL));
+       setidt(vector, apic_slowintr[intr], SDT_SYSIGT, SEL_KPL, 0);
        break;
     default:
        error = EOPNOTSUPP;
        break;
     }
 
-    write_eflags(ef);
+    write_rflags(ef);
     return (error);
 }
 
index ce15536..52afe63 100644 (file)
@@ -57,7 +57,9 @@
  * $DragonFly: src/sys/platform/pc64/apic/apic_ipl.s,v 1.1 2008/08/29 17:07:12 dillon Exp $
  */
 
+#if 0
 #include "use_npx.h"
+#endif
 
 #include <machine/asmacros.h>
 #include <machine/segments.h>
@@ -94,31 +96,33 @@ apic_imen:
         */
 ENTRY(APIC_INTRDIS)
        APIC_IMASK_LOCK                 /* enter critical reg */
-       movl    4(%esp),%eax
+       movl    %edi, %eax
 1:
        btsl    %eax, apic_imen
-       shll    $4, %eax
-       movl    CNAME(int_to_apicintpin) + 8(%eax), %edx
-       movl    CNAME(int_to_apicintpin) + 12(%eax), %ecx
-       testl   %edx, %edx
+       imull   $AIMI_SIZE, %eax
+       addq    $CNAME(int_to_apicintpin), %rax
+       movq    AIMI_APIC_ADDRESS(%rax), %rdx
+       movl    AIMI_REDIRINDEX(%rax), %ecx
+       testq   %rdx, %rdx
        jz      2f
-       movl    %ecx, (%edx)            /* target register index */
-       orl     $IOART_INTMASK,16(%edx) /* set intmask in target apic reg */
+       movl    %ecx, (%rdx)            /* target register index */
+       orl     $IOART_INTMASK,16(%rdx) /* set intmask in target apic reg */
 2:
        APIC_IMASK_UNLOCK               /* exit critical reg */
        ret
 
 ENTRY(APIC_INTREN)
        APIC_IMASK_LOCK                 /* enter critical reg */
-       movl    4(%esp), %eax           /* mask into %eax */
+       movl    %edi, %eax
 1:
        btrl    %eax, apic_imen         /* update apic_imen */
-       shll    $4, %eax
-       movl    CNAME(int_to_apicintpin) + 8(%eax), %edx
-       movl    CNAME(int_to_apicintpin) + 12(%eax), %ecx
-       testl   %edx, %edx
+       imull   $AIMI_SIZE, %eax
+       addq    $CNAME(int_to_apicintpin), %rax
+       movq    AIMI_APIC_ADDRESS(%rax), %rdx
+       movl    AIMI_REDIRINDEX(%rax), %ecx
+       testq   %rdx, %rdx
        jz      2f
-       movl    %ecx, (%edx)            /* write the target register index */
+       movl    %ecx, (%rdx)            /* write the target register index */
        andl    $~IOART_INTMASK, 16(%edx) /* clear mask bit */
 2:     
        APIC_IMASK_UNLOCK               /* exit critical reg */
@@ -129,35 +133,24 @@ ENTRY(APIC_INTREN)
  */
 
 /*
- * u_int io_apic_write(int apic, int select);
+ * u_int io_apic_read(int apic, int select);
  */
 ENTRY(io_apic_read)
-       movl    4(%esp), %ecx           /* APIC # */
-       movl    ioapic, %eax
-       movl    (%eax,%ecx,4), %edx     /* APIC base register address */
-       movl    8(%esp), %eax           /* target register index */
-       movl    %eax, (%edx)            /* write the target register index */
-       movl    16(%edx), %eax          /* read the APIC register data */
+       movl    %edi, %ecx              /* APIC # */
+       movq    ioapic, %rax
+       movq    (%rax,%rcx,8), %rdx     /* APIC base register address */
+       movl    %esi, (%rdx)            /* write the target register index */
+       movl    16(%rdx), %eax          /* read the APIC register data */
        ret                             /* %eax = register value */
 
 /*
- * void io_apic_write(int apic, int select, int value);
+ * void io_apic_write(int apic, int select, u_int value);
  */
 ENTRY(io_apic_write)
-       movl    4(%esp), %ecx           /* APIC # */
-       movl    ioapic, %eax
-       movl    (%eax,%ecx,4), %edx     /* APIC base register address */
-       movl    8(%esp), %eax           /* target register index */
-       movl    %eax, (%edx)            /* write the target register index */
-       movl    12(%esp), %eax          /* target register value */
-       movl    %eax, 16(%edx)          /* write the APIC register data */
+       movl    %edi, %ecx              /* APIC # */
+       movq    ioapic, %rax
+       movq    (%rax,%rcx,8), %r8      /* APIC base register address */
+       movl    %esi, (%r8)             /* write the target register index */
+       movl    %edx, 16(%r8)           /* write the APIC register data */
        ret                             /* %eax = void */
-
-/*
- * Send an EOI to the local APIC.
- */
-ENTRY(apic_eoi)
-       movl    $0, lapic+0xb0
-       ret
-
 #endif
index 226d024..1435a66 100644 (file)
@@ -1,40 +1,13 @@
 /*
- * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
- * 
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- * 3. Neither the name of The DragonFly Project nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific, prior written permission.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
- * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- * 
- * from: vector.s, 386BSD 0.1 unknown origin
+ *     from: vector.s, 386BSD 0.1 unknown origin
  * $FreeBSD: src/sys/i386/isa/apic_vector.s,v 1.47.2.5 2001/09/01 22:33:38 tegge Exp $
- * $DragonFly: src/sys/platform/pc64/apic/apic_vector.s,v 1.1 2008/08/29 17:07:12 dillon Exp $
+ * $DragonFly: src/sys/platform/pc32/apic/apic_vector.s,v 1.39 2008/08/02 01:14:43 dillon Exp $
  */
 
+#if 0
 #include "use_npx.h"
 #include "opt_auto_eoi.h"
+#endif
 
 #include <machine/asmacros.h>
 #include <machine/lock.h>
 #define MPLOCKED
 #endif
 
-/*
- * Push an interrupt frame in a format acceptable to doreti, reload
- * the segment registers for the kernel.
- */
-#define PUSH_FRAME                                                     \
-       pushl   $0 ;            /* dummy error code */                  \
-       pushl   $0 ;            /* dummy trap type */                   \
-       pushl   $0 ;            /* dummy xflags type */                 \
-       pushal ;                                                        \
-       pushl   %ds ;           /* save data and extra segments ... */  \
-       pushl   %es ;                                                   \
-       pushl   %fs ;                                                   \
-       pushl   %gs ;                                                   \
+#define APIC_PUSH_FRAME                                                        \
+       PUSH_FRAME ;            /* 15 regs + space for 5 extras */      \
+       movq $0,TF_XFLAGS(%rsp) ;                                       \
+       movq $0,TF_TRAPNO(%rsp) ;                                       \
+       movq $0,TF_ADDR(%rsp) ;                                         \
+       movq $0,TF_FLAGS(%rsp) ;                                        \
+       movq $0,TF_ERR(%rsp) ;                                          \
        cld ;                                                           \
-       mov     $KDSEL,%ax ;                                            \
-       mov     %ax,%ds ;                                               \
-       mov     %ax,%es ;                                               \
-       mov     %ax,%gs ;                                               \
-       mov     $KPSEL,%ax ;                                            \
-       mov     %ax,%fs ;                                               \
-
-#define PUSH_DUMMY                                                     \
-       pushfl ;                /* phys int frame / flags */            \
-       pushl %cs ;             /* phys int frame / cs */               \
-       pushl   12(%esp) ;      /* original caller eip */               \
-       pushl   $0 ;            /* dummy error code */                  \
-       pushl   $0 ;            /* dummy trap type */                   \
-       pushl   $0 ;            /* dummy xflags type */                 \
-       subl    $13*4,%esp ;    /* pushal + 4 seg regs (dummy) + CPL */ \
 
 /*
- * Warning: POP_FRAME can only be used if there is no chance of a
+ * JG stale? Warning: POP_FRAME can only be used if there is no chance of a
  * segment register being changed (e.g. by procfs), which is why syscalls
  * have to use doreti.
  */
-#define POP_FRAME                                                      \
-       popl    %gs ;                                                   \
-       popl    %fs ;                                                   \
-       popl    %es ;                                                   \
-       popl    %ds ;                                                   \
-       popal ;                                                         \
-       addl    $3*4,%esp ;     /* dummy xflags, trap & error codes */  \
+#define APIC_POP_FRAME POP_FRAME
 
-#define POP_DUMMY                                                      \
-       addl    $19*4,%esp ;                                            \
-
-#define IOAPICADDR(irq_num) CNAME(int_to_apicintpin) + 16 * (irq_num) + 8
-#define REDIRIDX(irq_num) CNAME(int_to_apicintpin) + 16 * (irq_num) + 12
+/* sizeof(struct apic_intmapinfo) == 24 */
+#define IOAPICADDR(irq_num) CNAME(int_to_apicintpin) + 24 * (irq_num) + 8
+#define REDIRIDX(irq_num) CNAME(int_to_apicintpin) + 24 * (irq_num) + 16
 
 #define MASK_IRQ(irq_num)                                              \
        APIC_IMASK_LOCK ;                       /* into critical reg */ \
        testl   $IRQ_LBIT(irq_num), apic_imen ;                         \
        jne     7f ;                    /* masked, don't mask */        \
        orl     $IRQ_LBIT(irq_num), apic_imen ; /* set the mask bit */  \
-       movl    IOAPICADDR(irq_num), %ecx ;     /* ioapic addr */       \
+       movq    IOAPICADDR(irq_num), %rcx ;     /* ioapic addr */       \
        movl    REDIRIDX(irq_num), %eax ;       /* get the index */     \
-       movl    %eax, (%ecx) ;                  /* write the index */   \
-       movl    IOAPIC_WINDOW(%ecx), %eax ;     /* current value */     \
+       movl    %eax, (%rcx) ;                  /* write the index */   \
+       movl    IOAPIC_WINDOW(%rcx), %eax ;     /* current value */     \
        orl     $IOART_INTMASK, %eax ;          /* set the mask */      \
-       movl    %eax, IOAPIC_WINDOW(%ecx) ;     /* new value */         \
+       movl    %eax, IOAPIC_WINDOW(%rcx) ;     /* new value */         \
 7: ;                                           /* already masked */    \
        APIC_IMASK_UNLOCK ;                                             \
 
        testl   $IRQ_LBIT(irq_num), apic_imen ;                         \
        je      7f ;                    /* bit clear, not masked */     \
        andl    $~IRQ_LBIT(irq_num), apic_imen ;/* clear mask bit */    \
-       movl    IOAPICADDR(irq_num),%ecx ;      /* ioapic addr */       \
+       movq    IOAPICADDR(irq_num),%rcx ;      /* ioapic addr */       \
        movl    REDIRIDX(irq_num), %eax ;       /* get the index */     \
-       movl    %eax,(%ecx) ;                   /* write the index */   \
-       movl    IOAPIC_WINDOW(%ecx),%eax ;      /* current value */     \
+       movl    %eax,(%rcx) ;                   /* write the index */   \
+       movl    IOAPIC_WINDOW(%rcx),%eax ;      /* current value */     \
        andl    $~IOART_INTMASK,%eax ;          /* clear the mask */    \
-       movl    %eax,IOAPIC_WINDOW(%ecx) ;      /* new value */         \
+       movl    %eax,IOAPIC_WINDOW(%rcx) ;      /* new value */         \
 7: ;                                                                   \
        APIC_IMASK_UNLOCK ;                                             \
 8: ;                                                                   \
        .text ;                                                         \
        SUPERALIGN_TEXT ;                                               \
 IDTVEC(vec_name) ;                                                     \
-       PUSH_FRAME ;                                                    \
+       APIC_PUSH_FRAME ;                                               \
        FAKE_MCOUNT(15*4(%esp)) ;                                       \
        MASK_LEVEL_IRQ(irq_num) ;                                       \
-       movl    $0, lapic_eoi ;                                         \
-       movl    PCPU(curthread),%ebx ;                                  \
-       movl    $0,%eax ;       /* CURRENT CPL IN FRAME (REMOVED) */    \
-       pushl   %eax ;                                                  \
-       testl   $-1,TD_NEST_COUNT(%ebx) ;                               \
+       movq    lapic, %rax ;                                           \
+       movl    $0, LA_EOI(%rax) ;                                      \
+       movq    PCPU(curthread),%rbx ;                                  \
+       testl   $-1,TD_NEST_COUNT(%rbx) ;                               \
        jne     1f ;                                                    \
-       cmpl    $TDPRI_CRIT,TD_PRI(%ebx) ;                              \
+       cmpl    $TDPRI_CRIT,TD_PRI(%rbx) ;                              \
        jl      2f ;                                                    \
 1: ;                                                                   \
        /* in critical section, make interrupt pending */               \
@@ -197,10 +140,10 @@ IDTVEC(vec_name) ;                                                        \
 2: ;                                                                   \
        /* clear pending bit, run handler */                            \
        andl    $~IRQ_LBIT(irq_num),PCPU(fpending) ;                    \
-       pushl   $irq_num ;                                              \
-       pushl   %esp ;                   /* pass frame by reference */  \
-       call    ithread_fast_handler ;   /* returns 0 to unmask */      \
-       addl    $8, %esp ;                                              \
+       pushq   $irq_num ;              /* trapframe -> intrframe */    \
+       movq    %rsp, %rdi ;            /* pass frame by reference */   \
+       call    ithread_fast_handler ;  /* returns 0 to unmask */       \
+       addq    $8, %rsp ;              /* intrframe -> trapframe */    \
        UNMASK_IRQ(irq_num) ;                                           \
 5: ;                                                                   \
        MEXITCOUNT ;                                                    \
@@ -231,18 +174,17 @@ IDTVEC(vec_name) ;                                                        \
        .text ;                                                         \
        SUPERALIGN_TEXT ;                                               \
 IDTVEC(vec_name) ;                                                     \
-       PUSH_FRAME ;                                                    \
+       APIC_PUSH_FRAME ;                                                       \
        maybe_extra_ipending ;                                          \
 ;                                                                      \
        MASK_LEVEL_IRQ(irq_num) ;                                       \
        incl    PCPU(cnt) + V_INTR ;                                    \
-       movl    $0, lapic_eoi ;                                         \
-       movl    PCPU(curthread),%ebx ;                                  \
-       movl    $0,%eax ;       /* CURRENT CPL IN FRAME (REMOVED) */    \
-       pushl   %eax ;          /* cpl do restore */                    \
-       testl   $-1,TD_NEST_COUNT(%ebx) ;                               \
+       movq    lapic, %rax ;                                           \
+       movl    $0, LA_EOI(%rax) ;                                      \
+       movq    PCPU(curthread),%rbx ;                                  \
+       testl   $-1,TD_NEST_COUNT(%rbx) ;                               \
        jne     1f ;                                                    \
-       cmpl    $TDPRI_CRIT,TD_PRI(%ebx) ;                              \
+       cmpl    $TDPRI_CRIT,TD_PRI(%rbx) ;                              \
        jl      2f ;                                                    \
 1: ;                                                                   \
        /* set the pending bit and return, leave the interrupt masked */ \
@@ -252,13 +194,12 @@ IDTVEC(vec_name) ;                                                        \
 2: ;                                                                   \
        /* set running bit, clear pending bit, run handler */           \
        andl    $~IRQ_LBIT(irq_num), PCPU(ipending) ;                   \
-       incl    TD_NEST_COUNT(%ebx) ;                                   \
+       incl    TD_NEST_COUNT(%rbx) ;                                   \
        sti ;                                                           \
-       pushl   $irq_num ;                                              \
+       movq    $irq_num,%rdi ;                                 \
        call    sched_ithd ;                                            \
-       addl    $4,%esp ;                                               \
        cli ;                                                           \
-       decl    TD_NEST_COUNT(%ebx) ;                                   \
+       decl    TD_NEST_COUNT(%rbx) ;                                   \
 5: ;                                                                   \
        MEXITCOUNT ;                                                    \
        jmp     doreti ;                                                \
@@ -276,12 +217,13 @@ IDTVEC(vec_name) ;                                                        \
        .text ;                                                         \
        SUPERALIGN_TEXT  ;                                              \
 IDTVEC(vec_name) ;                                                     \
-       PUSH_FRAME ;                                                    \
-       movl    $0, lapic_eoi ; /* End Of Interrupt to APIC */          \
+       APIC_PUSH_FRAME ;                                               \
+       movq    lapic,%rax ;                                            \
+       movl    $0,LA_EOI(%rax) ;       /* End Of Interrupt to APIC */  \
        /*pushl $irq_num ;*/                                            \
        /*call  do_wrongintr ;*/                                        \
        /*addl  $4,%esp ;*/                                             \
-       POP_FRAME ;                                                     \
+       APIC_POP_FRAME ;                                                \
        iret  ;                                                         \
 
 #endif
@@ -310,15 +252,15 @@ Xspuriousint:
        SUPERALIGN_TEXT
        .globl  Xinvltlb
 Xinvltlb:
-       pushl   %eax
+       pushq   %rax
 
-       movl    %cr3, %eax              /* invalidate the TLB */
-       movl    %eax, %cr3
+       movq    %cr3, %rax              /* invalidate the TLB */
+       movq    %rax, %cr3
 
-       ss                              /* stack segment, avoid %ds load */
-       movl    $0, lapic_eoi           /* End Of Interrupt to APIC */
+       movq    lapic, %rax
+       movl    $0, LA_EOI(%rax)        /* End Of Interrupt to APIC */
 
-       popl    %eax
+       popq    %rax
        iret
 
 
@@ -335,11 +277,22 @@ Xinvltlb:
        SUPERALIGN_TEXT
        .globl Xcpustop
 Xcpustop:
-       pushl   %ebp
-       movl    %esp, %ebp
-       pushl   %eax
-       pushl   %ecx
-       pushl   %edx
+       pushq   %rbp
+       movq    %rsp, %rbp
+       /* We save registers that are not preserved across function calls. */
+       /* JG can be re-written with mov's */
+       pushq   %rax
+       pushq   %rcx
+       pushq   %rdx
+       pushq   %rsi
+       pushq   %rdi
+       pushq   %r8
+       pushq   %r9
+       pushq   %r10
+       pushq   %r11
+
+#if JG
+       /* JGXXX switch to kernel %gs? */
        pushl   %ds                     /* save current data segment */
        pushl   %fs
 
@@ -347,15 +300,17 @@ Xcpustop:
        mov     %ax, %ds                /* use KERNEL data segment */
        movl    $KPSEL, %eax
        mov     %ax, %fs
+#endif
 
-       movl    $0, lapic_eoi           /* End Of Interrupt to APIC */
+       movq    lapic, %rax
+       movl    $0, LA_EOI(%rax)        /* End Of Interrupt to APIC */
 
+       /* JG */
        movl    PCPU(cpuid), %eax
        imull   $PCB_SIZE, %eax
-       leal    CNAME(stoppcbs)(%eax), %eax
-       pushl   %eax
+       leaq    CNAME(stoppcbs), %rdi
+       addq    %rax, %rdi
        call    CNAME(savectx)          /* Save process context */
-       addl    $4, %esp
        
                
        movl    PCPU(cpuid), %eax
@@ -369,9 +324,9 @@ Xcpustop:
        btsl    %eax, stopped_cpus      /* stopped_cpus |= (1<<id) */
 1:
        andl    $~RQF_IPIQ,PCPU(reqflags)
-       pushl   %eax
+       pushq   %rax
        call    lwkt_smp_stopped
-       popl    %eax
+       popq    %rax
        btl     %eax, started_cpus      /* while (!(started_cpus & (1<<id))) */
        jnc     1b
 
@@ -383,20 +338,29 @@ Xcpustop:
        test    %eax, %eax
        jnz     2f
 
-       movl    CNAME(cpustop_restartfunc), %eax
-       test    %eax, %eax
+       movq    CNAME(cpustop_restartfunc), %rax
+       test    %rax, %rax
        jz      2f
-       movl    $0, CNAME(cpustop_restartfunc)  /* One-shot */
+       movq    $0, CNAME(cpustop_restartfunc)  /* One-shot */
 
-       call    *%eax
+       call    *%rax
 2:
+       popq    %r11
+       popq    %r10
+       popq    %r9
+       popq    %r8
+       popq    %rdi
+       popq    %rsi
+       popq    %rdx
+       popq    %rcx
+       popq    %rax
+
+#if JG
        popl    %fs
        popl    %ds                     /* restore previous data segment */
-       popl    %edx
-       popl    %ecx
-       popl    %eax
-       movl    %ebp, %esp
-       popl    %ebp
+#endif
+       movq    %rbp, %rsp
+       popq    %rbp
        iret
 
        /*
@@ -408,28 +372,60 @@ Xcpustop:
        SUPERALIGN_TEXT
        .globl Xipiq
 Xipiq:
-       PUSH_FRAME
-       movl    $0, lapic_eoi           /* End Of Interrupt to APIC */
+       APIC_PUSH_FRAME
+       movq    lapic, %rax
+       movl    $0, LA_EOI(%rax)        /* End Of Interrupt to APIC */
        FAKE_MCOUNT(15*4(%esp))
 
-       movl    PCPU(curthread),%ebx
-       cmpl    $TDPRI_CRIT,TD_PRI(%ebx)
+       incl    PCPU(cnt) + V_IPI
+       movq    PCPU(curthread),%rbx
+       cmpl    $TDPRI_CRIT,TD_PRI(%rbx)
        jge     1f
-       subl    $8,%esp                 /* make same as interrupt frame */
-       pushl   %esp                    /* pass frame by reference */
+       subq    $8,%rsp                 /* make same as interrupt frame */
+       movq    %rsp,%rdi               /* pass frame by reference */
        incl    PCPU(intr_nesting_level)
-       addl    $TDPRI_CRIT,TD_PRI(%ebx)
+       addl    $TDPRI_CRIT,TD_PRI(%rbx)
        call    lwkt_process_ipiq_frame
-       subl    $TDPRI_CRIT,TD_PRI(%ebx)
+       subl    $TDPRI_CRIT,TD_PRI(%rbx)
        decl    PCPU(intr_nesting_level)
-       addl    $12,%esp
-       pushl   $0                      /* CPL for frame (REMOVED) */
+       addq    $8,%rsp                 /* turn into trapframe */
        MEXITCOUNT
        jmp     doreti
 1:
        orl     $RQF_IPIQ,PCPU(reqflags)
        MEXITCOUNT
-       POP_FRAME
+       APIC_POP_FRAME
+       iret
+
+       .text
+       SUPERALIGN_TEXT
+       .globl Xtimer
+Xtimer:
+       APIC_PUSH_FRAME
+       movq    lapic, %rax
+       movl    $0, LA_EOI(%rax)        /* End Of Interrupt to APIC */
+       FAKE_MCOUNT(15*4(%esp))
+
+       incl    PCPU(cnt) + V_TIMER
+       movq    PCPU(curthread),%rbx
+       cmpl    $TDPRI_CRIT,TD_PRI(%rbx)
+       jge     1f
+       testl   $-1,TD_NEST_COUNT(%rbx)
+       jne     1f
+       subq    $8,%rsp                 /* make same as interrupt frame */
+       movq    %rsp,%rdi               /* pass frame by reference */
+       incl    PCPU(intr_nesting_level)
+       addl    $TDPRI_CRIT,TD_PRI(%rbx)
+       call    lapic_timer_process_frame
+       subl    $TDPRI_CRIT,TD_PRI(%rbx)
+       decl    PCPU(intr_nesting_level)
+       addq    $8,%rsp                 /* turn into trapframe */
+       MEXITCOUNT
+       jmp     doreti
+1:
+       orl     $RQF_TIMER,PCPU(reqflags)
+       MEXITCOUNT
+       APIC_POP_FRAME
        iret
 
 #ifdef APIC_IO
@@ -526,7 +522,7 @@ started_cpus:
 
        .globl CNAME(cpustop_restartfunc)
 CNAME(cpustop_restartfunc):
-       .long 0
+       .quad 0
                
        .globl  apic_pin_trigger
 apic_pin_trigger:
index 25bc57e..29c027f 100644 (file)
@@ -808,6 +808,7 @@ typedef struct IOAPIC ioapic_t;
 #define IOART_HI_DEST_MASK     APIC_ID_MASK
 #define IOART_HI_DEST_RESV     ~APIC_ID_MASK
 #define IOART_HI_DEST_BROADCAST        IOART_HI_DEST_MASK      
+#define IOART_HI_DEST_SHIFT    24
 
 /*
  * Low 32 bit word
index 3c68f28..9fc3dee 100644 (file)
@@ -1,6 +1,5 @@
 /*
  * Copyright (c) 1996, by Steve Passe
- * Copyright (c) 2008 The DragonFly Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * SUCH DAMAGE.
  *
  * $FreeBSD: src/sys/i386/i386/mpapic.c,v 1.37.2.7 2003/01/25 02:31:47 peter Exp $
- * $DragonFly: src/sys/platform/pc64/apic/mpapic.c,v 1.1 2008/08/29 17:07:12 dillon Exp $
+ * $DragonFly: src/sys/platform/pc32/apic/mpapic.c,v 1.22 2008/04/20 13:44:26 swildner Exp $
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/kernel.h>
 #include <machine/globaldata.h>
 #include <machine/smp.h>
+#include <machine/md_var.h>
 #include <machine_base/apic/mpapic.h>
 #include <machine/segments.h>
 #include <sys/thread2.h>
 #define ELCR0  0x4d0                   /* eisa irq 0-7 */
 #define ELCR1  0x4d1                   /* eisa irq 8-15 */
 
+volatile lapic_t *lapic;
+
+static void    lapic_timer_calibrate(void);
+static void    lapic_timer_set_divisor(int);
+static void    lapic_timer_fixup_handler(void *);
+static void    lapic_timer_restart_handler(void *);
+
+void           lapic_timer_process(void);
+void           lapic_timer_process_frame(struct intrframe *);
+
+static int     lapic_timer_enable = 1;
+TUNABLE_INT("hw.lapic_timer_enable", &lapic_timer_enable);
+
+static void    lapic_timer_intr_reload(struct cputimer_intr *, sysclock_t);
+static void    lapic_timer_intr_enable(struct cputimer_intr *);
+static void    lapic_timer_intr_restart(struct cputimer_intr *);
+static void    lapic_timer_intr_pmfixup(struct cputimer_intr *);
+
+static struct cputimer_intr lapic_cputimer_intr = {
+       .freq = 0,
+       .reload = lapic_timer_intr_reload,
+       .enable = lapic_timer_intr_enable,
+       .config = cputimer_intr_default_config,
+       .restart = lapic_timer_intr_restart,
+       .pmfixup = lapic_timer_intr_pmfixup,
+       .initclock = cputimer_intr_default_initclock,
+       .next = SLIST_ENTRY_INITIALIZER,
+       .name = "lapic",
+       .type = CPUTIMER_INTR_LAPIC,
+       .prio = CPUTIMER_INTR_PRIO_LAPIC,
+       .caps = CPUTIMER_INTR_CAP_NONE
+};
+
 /*
  * pointers to pmapped apic hardware.
  */
 
 volatile ioapic_t      **ioapic;
 
-void   lapic_timer_fixup(void);
+static int             lapic_timer_divisor_idx = -1;
+static const uint32_t  lapic_timer_divisors[] = {
+       APIC_TDCR_2,    APIC_TDCR_4,    APIC_TDCR_8,    APIC_TDCR_16,
+       APIC_TDCR_32,   APIC_TDCR_64,   APIC_TDCR_128,  APIC_TDCR_1
+};
+#define APIC_TIMER_NDIVISORS \
+       (int)(sizeof(lapic_timer_divisors) / sizeof(lapic_timer_divisors[0]))
+
+
+void
+lapic_eoi(void)
+{
+
+       lapic->eoi = 0;
+}
 
 /*
  * Enable APIC, configure interrupts.
  */
 void
-apic_initialize(void)
+apic_initialize(boolean_t bsp)
 {
+       uint32_t timer;
        u_int   temp;
 
        /*
@@ -68,31 +117,37 @@ apic_initialize(void)
         * Disable LVT1 on the APs.  It doesn't matter what delivery
         * mode we use because we leave it masked.
         */
-       temp = lapic.lvt_lint0;
+       temp = lapic->lvt_lint0;
        temp &= ~(APIC_LVT_MASKED | APIC_LVT_TRIG_MASK | 
                  APIC_LVT_POLARITY_MASK | APIC_LVT_DM_MASK);
        if (mycpu->gd_cpuid == 0)
                temp |= APIC_LVT_DM_EXTINT;
        else
                temp |= APIC_LVT_DM_FIXED | APIC_LVT_MASKED;
-       lapic.lvt_lint0 = temp;
+       lapic->lvt_lint0 = temp;
 
        /*
         * setup LVT2 as NMI, masked till later.  Edge trigger, active high.
         */
-       temp = lapic.lvt_lint1;
+       temp = lapic->lvt_lint1;
        temp &= ~(APIC_LVT_MASKED | APIC_LVT_TRIG_MASK | 
                  APIC_LVT_POLARITY_MASK | APIC_LVT_DM_MASK);
        temp |= APIC_LVT_MASKED | APIC_LVT_DM_NMI;
-       lapic.lvt_lint1 = temp;
+       lapic->lvt_lint1 = temp;
 
        /*
         * Mask the apic error interrupt, apic performance counter
-        * interrupt, and the apic timer interrupt.
+        * interrupt.
         */
-       lapic.lvt_error = lapic.lvt_error | APIC_LVT_MASKED;
-       lapic.lvt_pcint = lapic.lvt_pcint | APIC_LVT_MASKED;
-       lapic.lvt_timer = lapic.lvt_timer | APIC_LVT_MASKED;
+       lapic->lvt_error = lapic->lvt_error | APIC_LVT_MASKED;
+       lapic->lvt_pcint = lapic->lvt_pcint | APIC_LVT_MASKED;
+
+       /* Set apic timer vector and mask the apic timer interrupt. */
+       timer = lapic->lvt_timer;
+       timer &= ~APIC_LVTT_VECTOR;
+       timer |= XTIMER_OFFSET;
+       timer |= APIC_LVTT_MASKED;
+       lapic->lvt_timer = timer;
 
        /*
         * Set the Task Priority Register as needed.   At the moment allow
@@ -100,7 +155,7 @@ apic_initialize(void)
         * ready to deal).  We could disable all but IPIs by setting
         * temp |= TPR_IPI_ONLY for cpu != 0.
         */
-       temp = lapic.tpr;
+       temp = lapic->tpr;
        temp &= ~APIC_TPR_PRIO;         /* clear priority field */
 #ifndef APIC_IO
        /*
@@ -110,12 +165,12 @@ apic_initialize(void)
        temp |= TPR_IPI_ONLY;
 #endif
 
-       lapic.tpr = temp;
+       lapic->tpr = temp;
 
        /* 
         * enable the local APIC 
         */
-       temp = lapic.svr;
+       temp = lapic->svr;
        temp |= APIC_SVR_ENABLE;        /* enable the APIC */
        temp &= ~APIC_SVR_FOCUS_DISABLE; /* enable lopri focus processor */
 
@@ -128,26 +183,214 @@ apic_initialize(void)
        temp &= ~APIC_SVR_VECTOR;
        temp |= XSPURIOUSINT_OFFSET;
 
-       lapic.svr = temp;
+       lapic->svr = temp;
 
        /*
         * Pump out a few EOIs to clean out interrupts that got through
         * before we were able to set the TPR.
         */
-       lapic.eoi = 0;
-       lapic.eoi = 0;
-       lapic.eoi = 0;
+       lapic_eoi();
+       lapic_eoi();
+       lapic_eoi();
+
+       if (bsp) {
+               lapic_timer_calibrate();
+               if (lapic_timer_enable) {
+                       cputimer_intr_register(&lapic_cputimer_intr);
+                       cputimer_intr_select(&lapic_cputimer_intr, 0);
+               }
+       } else {
+               lapic_timer_set_divisor(lapic_timer_divisor_idx);
+       }
 
        if (bootverbose)
                apic_dump("apic_initialize()");
 }
 
+
+static void
+lapic_timer_set_divisor(int divisor_idx)
+{
+       KKASSERT(divisor_idx >= 0 && divisor_idx < APIC_TIMER_NDIVISORS);
+       lapic->dcr_timer = lapic_timer_divisors[divisor_idx];
+}
+
+static void
+lapic_timer_oneshot(u_int count)
+{
+       uint32_t value;
+
+       value = lapic->lvt_timer;
+       value &= ~APIC_LVTT_PERIODIC;
+       lapic->lvt_timer = value;
+       lapic->icr_timer = count;
+}
+
+static void
+lapic_timer_oneshot_quick(u_int count)
+{
+       lapic->icr_timer = count;
+}
+
+static void
+lapic_timer_calibrate(void)
+{
+       sysclock_t value;
+
+       /* Try to calibrate the local APIC timer. */
+       for (lapic_timer_divisor_idx = 0;
+            lapic_timer_divisor_idx < APIC_TIMER_NDIVISORS;
+            lapic_timer_divisor_idx++) {
+               lapic_timer_set_divisor(lapic_timer_divisor_idx);
+               lapic_timer_oneshot(APIC_TIMER_MAX_COUNT);
+               DELAY(2000000);
+               value = APIC_TIMER_MAX_COUNT - lapic->ccr_timer;
+               if (value != APIC_TIMER_MAX_COUNT)
+                       break;
+       }
+       if (lapic_timer_divisor_idx >= APIC_TIMER_NDIVISORS)
+               panic("lapic: no proper timer divisor?!\n");
+       lapic_cputimer_intr.freq = value / 2;
+
+       kprintf("lapic: divisor index %d, frequency %u Hz\n",
+               lapic_timer_divisor_idx, lapic_cputimer_intr.freq);
+}
+
+static void
+lapic_timer_process_oncpu(struct globaldata *gd, struct intrframe *frame)
+{
+       sysclock_t count;
+
+       gd->gd_timer_running = 0;
+
+       count = sys_cputimer->count();
+       if (TAILQ_FIRST(&gd->gd_systimerq) != NULL)
+               systimer_intr(&count, 0, frame);
+}
+
 void
-lapic_timer_fixup(void)
+lapic_timer_process(void)
+{
+       lapic_timer_process_oncpu(mycpu, NULL);
+}
+
+void
+lapic_timer_process_frame(struct intrframe *frame)
+{
+       lapic_timer_process_oncpu(mycpu, frame);
+}
+
+static void
+lapic_timer_intr_reload(struct cputimer_intr *cti, sysclock_t reload)
+{
+       struct globaldata *gd = mycpu;
+
+       reload = (int64_t)reload * cti->freq / sys_cputimer->freq;
+       if (reload < 2)
+               reload = 2;
+
+       if (gd->gd_timer_running) {
+               if (reload < lapic->ccr_timer)
+                       lapic_timer_oneshot_quick(reload);
+       } else {
+               gd->gd_timer_running = 1;
+               lapic_timer_oneshot_quick(reload);
+       }
+}
+
+static void
+lapic_timer_intr_enable(struct cputimer_intr *cti __unused)
+{
+       uint32_t timer;
+
+       timer = lapic->lvt_timer;
+       timer &= ~(APIC_LVTT_MASKED | APIC_LVTT_PERIODIC);
+       lapic->lvt_timer = timer;
+
+       lapic_timer_fixup_handler(NULL);
+}
+
+static void
+lapic_timer_fixup_handler(void *arg)
 {
-       /* TODO */
+       int *started = arg;
+
+       if (started != NULL)
+               *started = 0;
+
+       if (strcmp(cpu_vendor, "AuthenticAMD") == 0) {
+               /*
+                * Detect the presence of C1E capability mostly on latest
+                * dual-cores (or future) k8 family.  This feature renders
+                * the local APIC timer dead, so we disable it by reading
+                * the Interrupt Pending Message register and clearing both
+                * C1eOnCmpHalt (bit 28) and SmiOnCmpHalt (bit 27).
+                * 
+                * Reference:
+                *   "BIOS and Kernel Developer's Guide for AMD NPT
+                *    Family 0Fh Processors"
+                *   #32559 revision 3.00
+                */
+               if ((cpu_id & 0x00000f00) == 0x00000f00 &&
+                   (cpu_id & 0x0fff0000) >= 0x00040000) {
+                       uint64_t msr;
+
+                       msr = rdmsr(0xc0010055);
+                       if (msr & 0x18000000) {
+                               struct globaldata *gd = mycpu;
+
+                               kprintf("cpu%d: AMD C1E detected\n",
+                                       gd->gd_cpuid);
+                               wrmsr(0xc0010055, msr & ~0x18000000ULL);
+
+                               /*
+                                * We are kinda stalled;
+                                * kick start again.
+                                */
+                               gd->gd_timer_running = 1;
+                               lapic_timer_oneshot_quick(2);
+
+                               if (started != NULL)
+                                       *started = 1;
+                       }
+               }
+       }
 }
 
+static void
+lapic_timer_restart_handler(void *dummy __unused)
+{
+       int started;
+
+       lapic_timer_fixup_handler(&started);
+       if (!started) {
+               struct globaldata *gd = mycpu;
+
+               gd->gd_timer_running = 1;
+               lapic_timer_oneshot_quick(2);
+       }
+}
+
+/*
+ * This function is called only by ACPI-CA code currently:
+ * - AMD C1E fixup.  AMD C1E only seems to happen after ACPI
+ *   module controls PM.  So once ACPI-CA is attached, we try
+ *   to apply the fixup to prevent LAPIC timer from hanging.
+ */
+static void
+lapic_timer_intr_pmfixup(struct cputimer_intr *cti __unused)
+{
+       lwkt_send_ipiq_mask(smp_active_mask,
+                           lapic_timer_fixup_handler, NULL);
+}
+
+static void
+lapic_timer_intr_restart(struct cputimer_intr *cti __unused)
+{
+       lwkt_send_ipiq_mask(smp_active_mask, lapic_timer_restart_handler, NULL);
+}
+
+
 /*
  * dump contents of local APIC registers
  */
@@ -156,7 +399,7 @@ apic_dump(char* str)
 {
        kprintf("SMP: CPU%d %s:\n", mycpu->gd_cpuid, str);
        kprintf("     lint0: 0x%08x lint1: 0x%08x TPR: 0x%08x SVR: 0x%08x\n",
-               lapic.lvt_lint0, lapic.lvt_lint1, lapic.tpr, lapic.svr);
+               lapic->lvt_lint0, lapic->lvt_lint1, lapic->tpr, lapic->svr);
 }
 
 
@@ -231,6 +474,8 @@ io_apic_setup_intpin(int apic, int pin)
        u_int32_t       target;         /* the window register is 32 bits */
        u_int32_t       vector;         /* the window register is 32 bits */
        int             level;
+       int             cpuid;
+       char            envpath[32];
 
        select = pin * 2 + IOAPIC_REDTBL0;      /* register */
 
@@ -308,10 +553,18 @@ io_apic_setup_intpin(int apic, int pin)
                        apic_pin_trigger |= (1 << irq);
                polarity(apic, pin, &flags, level);
        }
-       
+
+       cpuid = 0;
+       ksnprintf(envpath, sizeof(envpath), "hw.irq.%d.dest", irq);
+       kgetenv_int(envpath, &cpuid);
+
+       /* ncpus may not be available yet */
+       if (cpuid > mp_naps)
+               cpuid = 0;
+
        if (bootverbose) {
-               kprintf("IOAPIC #%d intpin %d -> irq %d\n",
-                      apic, pin, irq);
+               kprintf("IOAPIC #%d intpin %d -> irq %d (CPU%d)\n",
+                      apic, pin, irq, cpuid);
        }
 
        /*
@@ -327,7 +580,9 @@ io_apic_setup_intpin(int apic, int pin)
 
        vector = IDT_OFFSET + irq;                      /* IDT vec */
        target = io_apic_read(apic, select + 1) & IOART_HI_DEST_RESV;
-       target |= IOART_HI_DEST_BROADCAST;
+       /* Deliver all interrupts to CPU0 (BSP) */
+       target |= (CPU_TO_ID(cpuid) << IOART_HI_DEST_SHIFT) &
+                 IOART_HI_DEST_MASK;
        flags |= io_apic_read(apic, select) & IOART_RESV;
        io_apic_write(apic, select, flags | vector);
        io_apic_write(apic, select + 1, target);
@@ -374,6 +629,7 @@ io_apic_setup(int apic)
          IOART_DELLOPRI))
 
 /*
+ * XXX this function is only used by 8254 setup
  * Setup the source of External INTerrupts.
  */
 int
@@ -383,11 +639,23 @@ ext_int_setup(int apic, int intr)
        u_int32_t flags;        /* the window register is 32 bits */
        u_int32_t target;       /* the window register is 32 bits */
        u_int32_t vector;       /* the window register is 32 bits */
+       int cpuid;
+       char envpath[32];
 
        if (apic_int_type(apic, intr) != 3)
                return -1;
 
-       target = IOART_HI_DEST_BROADCAST;
+       cpuid = 0;
+       ksnprintf(envpath, sizeof(envpath), "hw.irq.%d.dest", intr);
+       kgetenv_int(envpath, &cpuid);
+
+       /* ncpus may not be available yet */
+       if (cpuid > mp_naps)
+               cpuid = 0;
+
+       /* Deliver interrupts to CPU0 (BSP) */
+       target = (CPU_TO_ID(cpuid) << IOART_HI_DEST_SHIFT) &
+                IOART_HI_DEST_MASK;
        select = IOAPIC_REDTBL0 + (2 * intr);
        vector = IDT_OFFSET + intr;
        flags = DEFAULT_EXTINT_FLAGS;
@@ -577,18 +845,18 @@ apic_ipi(int dest_type, int vector, int delivery_mode)
        u_long  icr_lo;
 
        crit_enter();
-       if ((lapic.icr_lo & APIC_DELSTAT_MASK) != 0) {
-           unsigned int eflags = read_eflags();
+       if ((lapic->icr_lo & APIC_DELSTAT_MASK) != 0) {
+           unsigned long rflags = read_rflags();
            cpu_enable_intr();
-           while ((lapic.icr_lo & APIC_DELSTAT_MASK) != 0) {
+           while ((lapic->icr_lo & APIC_DELSTAT_MASK) != 0) {
                lwkt_process_ipiq();
            }
-           write_eflags(eflags);
+           write_rflags(rflags);
        }
 
-       icr_lo = (lapic.icr_lo & APIC_ICRLO_RESV_MASK) | dest_type | 
+       icr_lo = (lapic->icr_lo & APIC_ICRLO_RESV_MASK) | dest_type | 
                delivery_mode | vector;
-       lapic.icr_lo = icr_lo;
+       lapic->icr_lo = icr_lo;
        crit_exit();
        return 0;
 }
@@ -600,24 +868,24 @@ single_apic_ipi(int cpu, int vector, int delivery_mode)
        u_long  icr_hi;
 
        crit_enter();
-       if ((lapic.icr_lo & APIC_DELSTAT_MASK) != 0) {
-           unsigned int eflags = read_eflags();
+       if ((lapic->icr_lo & APIC_DELSTAT_MASK) != 0) {
+           unsigned long rflags = read_rflags();
            cpu_enable_intr();
-           while ((lapic.icr_lo & APIC_DELSTAT_MASK) != 0) {
+           while ((lapic->icr_lo & APIC_DELSTAT_MASK) != 0) {
                lwkt_process_ipiq();
            }
-           write_eflags(eflags);
+           write_rflags(rflags);
        }
-       icr_hi = lapic.icr_hi & ~APIC_ID_MASK;
+       icr_hi = lapic->icr_hi & ~APIC_ID_MASK;
        icr_hi |= (CPU_TO_ID(cpu) << 24);
-       lapic.icr_hi = icr_hi;
+       lapic->icr_hi = icr_hi;
 
        /* build ICR_LOW */
-       icr_lo = (lapic.icr_lo & APIC_ICRLO_RESV_MASK)
+       icr_lo = (lapic->icr_lo & APIC_ICRLO_RESV_MASK)
            | APIC_DEST_DESTFLD | delivery_mode | vector;
 
        /* write APIC ICR */
-       lapic.icr_lo = icr_lo;
+       lapic->icr_lo = icr_lo;
        crit_exit();
 }
 
@@ -636,20 +904,20 @@ single_apic_ipi_passive(int cpu, int vector, int delivery_mode)
        u_long  icr_hi;
 
        crit_enter();
-       if ((lapic.icr_lo & APIC_DELSTAT_MASK) != 0) {
+       if ((lapic->icr_lo & APIC_DELSTAT_MASK) != 0) {
            crit_exit();
            return(0);
        }
-       icr_hi = lapic.icr_hi & ~APIC_ID_MASK;
+       icr_hi = lapic->icr_hi & ~APIC_ID_MASK;
        icr_hi |= (CPU_TO_ID(cpu) << 24);
-       lapic.icr_hi = icr_hi;
+       lapic->icr_hi = icr_hi;
 
        /* build IRC_LOW */
-       icr_lo = (lapic.icr_lo & APIC_RESV2_MASK)
+       icr_lo = (lapic->icr_lo & APIC_RESV2_MASK)
            | APIC_DEST_DESTFLD | delivery_mode | vector;
 
        /* write APIC ICR */
-       lapic.icr_lo = icr_lo;
+       lapic->icr_lo = icr_lo;
        crit_exit();
        return(1);
 }
@@ -680,74 +948,25 @@ selected_apic_ipi(u_int target, int vector, int delivery_mode)
  *  - suggested by rgrimes@gndrsh.aac.dev.com
  */
 
-/** XXX FIXME: temp hack till we can determin bus clock */
-#ifndef BUS_CLOCK
-#define BUS_CLOCK      66000000
-#define bus_clock()    66000000
-#endif
-
-#if defined(READY)
-int acquire_apic_timer (void);
-int release_apic_timer (void);
-
-/*
- * Acquire the APIC timer for exclusive use.
- */
-int
-acquire_apic_timer(void)
-{
-#if 1
-       return 0;
-#else
-       /** XXX FIXME: make this really do something */
-       panic("APIC timer in use when attempting to acquire");
-#endif
-}
-
-
-/*
- * Return the APIC timer.
- */
-int
-release_apic_timer(void)
-{
-#if 1
-       return 0;
-#else
-       /** XXX FIXME: make this really do something */
-       panic("APIC timer was already released");
-#endif
-}
-#endif /* READY */
-
-
 /*
  * Load a 'downcount time' in uSeconds.
  */
 void
-set_apic_timer(int value)
+set_apic_timer(int us)
 {
-       u_long  lvtt;
-       long    ticks_per_microsec;
+       u_int count;
 
        /*
-        * Calculate divisor and count from value:
-        * 
-        *  timeBase == CPU bus clock divisor == [1,2,4,8,16,32,64,128]
-        *  value == time in uS
+        * When we reach here, lapic timer's frequency
+        * must have been calculated as well as the
+        * divisor (lapic->dcr_timer is setup during the
+        * divisor calculation).
         */
-       lapic.dcr_timer = APIC_TDCR_1;
-       ticks_per_microsec = bus_clock() / 1000000;
-
-       /* configure timer as one-shot */
-       lvtt = lapic.lvt_timer;
-       lvtt &= ~(APIC_LVTT_VECTOR | APIC_LVTT_DS);
-       lvtt &= ~(APIC_LVTT_PERIODIC);
-       lvtt |= APIC_LVTT_MASKED;               /* no INT, one-shot */
-       lapic.lvt_timer = lvtt;
-
-       /* */
-       lapic.icr_timer = value * ticks_per_microsec;
+       KKASSERT(lapic_cputimer_intr.freq != 0 &&
+                lapic_timer_divisor_idx >= 0);
+
+       count = ((us * (int64_t)lapic_cputimer_intr.freq) + 999999) / 1000000;
+       lapic_timer_oneshot(count);
 }
 
 
@@ -762,7 +981,7 @@ read_apic_timer(void)
          *         for now we just return the remaining count.
          */
 #else
-       return lapic.ccr_timer;
+       return lapic->ccr_timer;
 #endif
 }
 
index dce96ad..2cd1fcb 100644 (file)
@@ -79,8 +79,7 @@ vfs/smbfs/smbfs_vnops.c               optional        smbfs
 platform/pc64/amd64/atomic.c                   standard                        \
         compile-with    "${CC} -c ${CFLAGS} ${DEFINED_PROF:S/^$/-fomit-frame-pointer/} ${.IMPSRC}"
 platform/pc64/amd64/autoconf.c standard
-platform/pc64/amd64/mp.c               optional        smp             \
-        compile-with    "${CC} -c -pthread ${CFLAGS} -I/usr/include ${.IMPSRC}"
+platform/pc64/amd64/mpboot.S           optional        smp
 platform/pc64/amd64/mplock.s           optional        smp
 
 # DDB XXX
@@ -120,6 +119,11 @@ platform/pc64/amd64/procfs_machdep.c       standard
 platform/pc64/amd64/initcpu.c          standard
 platform/pc64/amd64/identcpu.c         standard
 
+platform/pc64/apic/apic_abi.c          optional        smp
+platform/pc64/apic/mpapic.c            optional        smp
+platform/pc64/apic/apic_ipl.s          optional        smp
+platform/pc64/apic/apic_vector.s       optional        smp
+
 bus/isa/amd64/isa.c                    optional        isa
 bus/isa/amd64/isa_compat.c             optional        isa compat_oldisa
 bus/isa/amd64/isa_dma.c                        optional        isa
@@ -142,6 +146,7 @@ platform/pc64/amd64/systimer.c      standard
 platform/pc64/amd64/console.c  standard
 platform/pc64/amd64/ipl_funcs.c        standard
 kern/syscalls.c                        standard
+platform/pc64/amd64/mp_machdep.c               optional        smp
 dev/misc/atkbd/atkbd_isa.c             optional        atkbd
 dev/misc/atkbdc_layer/atkbdc_isa.c     optional        atkbdc
 dev/misc/psm/psm.c                     optional        psm
index fd2e575..0ce4868 100644 (file)
@@ -2,6 +2,9 @@
 # $DragonFly: src/sys/platform/pc64/conf/options,v 1.4 2008/08/29 17:07:15 dillon Exp $
 #
 
+# amd64 SMP options
+APIC_IO                        opt_global.h
+
 # The cpu type
 #
 HAMMER_CPU                     opt_cpu.h
index c72ea92..5142717 100644 (file)
@@ -1,41 +1,47 @@
-/*-
+/*
  * Kernel interface to machine-dependent clock driver.
  * Garrett Wollman, September 1994.
  * This file is in the public domain.
  *
- * $FreeBSD: src/sys/amd64/include/clock.h,v 1.54 2007/01/23 08:01:19 bde Exp $
- * $DragonFly: src/sys/platform/pc64/include/clock.h,v 1.2 2008/05/10 17:24:10 dillon Exp $ 
+ * $FreeBSD: src/sys/i386/include/clock.h,v 1.38.2.1 2002/11/02 04:41:50 iwasaki Exp $
+ * $DragonFly: src/sys/platform/pc32/include/clock.h,v 1.9 2008/05/10 17:24:08 dillon Exp $
  */
 
 #ifndef _MACHINE_CLOCK_H_
 #define        _MACHINE_CLOCK_H_
 
 #ifdef _KERNEL
+
+#ifndef _SYS_TYPES_H_
+#include <sys/types.h>
+#endif
+
 /*
  * i386 to clock driver interface.
  * XXX large parts of the driver and its interface are misplaced.
  */
 extern int     adjkerntz;
-extern int     clkintr_pending;
-extern int     pscnt;
-extern int     psdiv;
+extern int     disable_rtc_set;
 extern int     statclock_disable;
 extern u_int   timer_freq;
 extern int     timer0_max_count;
 extern int     tsc_present;
+extern int64_t tsc_frequency;
 extern int     tsc_is_broken;
 extern int     wall_cmos_clock;
+#ifdef APIC_IO
+extern int     apic_8254_intr;
+#endif
 
 /*
  * Driver to clock driver interface.
  */
 
-int    acquire_timer2(int mode);
-int    release_timer2(void);
-int    rtcin(int val);
-int    sysbeep(int pitch, int period);
-void   init_TSC(void);
-void   init_TSC_tc(void);
+int    rtcin (int val);
+int    acquire_timer2 (int mode);
+int    release_timer2 (void);
+int    sysbeep (int pitch, int period);
+void   timer_restore (void);
 
 #endif /* _KERNEL */
 
index 517b03e..988b07b 100644 (file)
@@ -64,6 +64,9 @@
  * the service routine will loop.
  *
  * The current thread's cpl is stored in the thread structure.
+ *
+ * Note: the embedded globaldata and/or the mdglobaldata structure
+ * may exceed the size of a page.
  */
 struct mdglobaldata {
        struct globaldata mi;
@@ -79,7 +82,7 @@ struct mdglobaldata {
        int             gd_sdelayed;    /* delayed software ints */
        int             gd_currentldt;
        int             gd_private_tss;
-       u_int           gd_unused001;
+       u_int           unused001;
        u_int           gd_other_cpus;
        u_int           gd_ss_eflags;
        pt_entry_t      *gd_CMAP1;
@@ -90,6 +93,8 @@ struct mdglobaldata {
        caddr_t         gd_CADDR2;
        caddr_t         gd_CADDR3;
        pt_entry_t      *gd_PADDR1;
+       u_int           gd_acpi_id;
+       u_int           gd_apic_id;
        register_t      gd_scratch_rsp;
        register_t      gd_rsp0;
        register_t      gd_user_fs;     /* current user fs in MSR */
diff --git a/sys/platform/pc64/include/intr_machdep.h b/sys/platform/pc64/include/intr_machdep.h
deleted file mode 100644 (file)
index d826013..0000000
+++ /dev/null
@@ -1,82 +0,0 @@
-/*-
- * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org>
- * Copyright (c) 2008 The DragonFly Project.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD: src/sys/amd64/include/intr_machdep.h,v 1.18 2007/05/08 21:29:13 jhb Exp $
- * $DragonFly: src/sys/platform/pc64/include/intr_machdep.h,v 1.2 2008/08/29 17:07:17 dillon Exp $
- */
-
-#ifndef __MACHINE_INTR_MACHDEP_H__
-#define        __MACHINE_INTR_MACHDEP_H__
-
-#ifdef _KERNEL
-
-/*
- * The maximum number of I/O interrupts we allow.  This number is rather
- * arbitrary as it is just the maximum IRQ resource value.  The interrupt
- * source for a given IRQ maps that I/O interrupt to device interrupt
- * source whether it be a pin on an interrupt controller or an MSI interrupt.
- * The 16 ISA IRQs are assigned fixed IDT vectors, but all other device
- * interrupts allocate IDT vectors on demand.  Currently we have 191 IDT
- * vectors available for device interrupts.  On many systems with I/O APICs,
- * a lot of the IRQs are not used, so this number can be much larger than
- * 191 and still be safe since only interrupt sources in actual use will
- * allocate IDT vectors.
- *
- * The first 255 IRQs (0 - 254) are reserved for ISA IRQs and PCI intline IRQs.
- * IRQ values beyond 256 are used by MSI.  We leave 255 unused to avoid
- * confusion since 255 is used in PCI to indicate an invalid IRQ.
- */
-#define        NUM_MSI_INTS    128
-#define        FIRST_MSI_INT   256
-#define        NUM_IO_INTS     (FIRST_MSI_INT + NUM_MSI_INTS)
-
-/*
- * Default base address for MSI messages on x86 platforms.
- */
-#define        MSI_INTEL_ADDR_BASE             0xfee00000
-
-/*
- * - 1 ??? dummy counter.
- * - 2 counters for each I/O interrupt.
- * - 1 counter for each CPU for lapic timer.
- * - 7 counters for each CPU for IPI counters for SMP.
- */
-#ifdef SMP
-#define        INTRCNT_COUNT   (1 + NUM_IO_INTS * 2 + (1 + 7) * MAXCPU)
-#else
-#define        INTRCNT_COUNT   (1 + NUM_IO_INTS * 2 + 1)
-#endif
-
-#ifndef LOCORE
-
-#ifndef JG_defined_inthand_t
-#define JG_defined_inthand_t
-typedef void inthand_t(u_int cs, u_int ef, u_int esp, u_int ss);
-#endif
-
-#endif /* !LOCORE */
-#endif /* _KERNEL */
-#endif /* !__MACHINE_INTR_MACHDEP_H__ */
index 4052208..ce80365 100644 (file)
@@ -1,30 +1,38 @@
 /*
- * Copyright (c) 2003,2008 The DragonFly Project.
- * Copyright (c) 2003 Matthew Dillon.
- * All rights reserved.
- *
+ * Copyright (c) 2003,2004 The DragonFly Project.  All rights reserved.
+ * 
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@backplane.com>
+ * 
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
+ * 
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
- * 2. The name of the developer may NOT be used to endorse or promote products
- *    derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific, prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
- *
+ * 
  * $FreeBSD: src/sys/i386/include/lock.h,v 1.11.2.2 2000/09/30 02:49:34 ps Exp $
- * $DragonFly: src/sys/platform/pc64/include/lock.h,v 1.4 2008/08/29 17:07:17 dillon Exp $
+ * $DragonFly: src/sys/platform/pc32/include/lock.h,v 1.17 2008/06/19 21:32:55 aggelos Exp $
  */
 
 #ifndef _MACHINE_LOCK_H_
        pushfq ;                                                \
        popq    %rcx ;          /* flags */                     \
        cli ;                                                   \
-       orl     $PSL_C,%rcx ;   /* make sure non-zero */        \
+       orq     $PSL_C,%rcx ;   /* make sure non-zero */        \
 7: ;                                                           \
        movq    $0,%rax ;       /* expected contents of lock */ \
        lock cmpxchgq %rcx,mem ; /* Z=1 (jz) on success */      \
+       pause ;                                                 \
        jnz     7b ;                                            \
 
 #define SPIN_LOCK_PUSH_REGS                                    \
-       subq    $2*8,%rsp ;                                     \
+       subq    $16,%rsp ;                                      \
        movq    %rcx,(%rsp) ;                                   \
        movq    %rax,8(%rsp) ;                                  \
 
 #define SPIN_LOCK_POP_REGS                                     \
        movq    (%rsp),%rcx ;                                   \
        movq    8(%rsp),%rax ;                                  \
-       addq    $2*8,%rsp ;                                     \
+       addq    $16,%rsp ;                                      \
 
-#define SPIN_LOCK_FRAME_SIZE   8
+#define SPIN_LOCK_FRAME_SIZE   16
 
 #define SPIN_LOCK_NOREG(mem)                                   \
        SPIN_LOCK_PUSH_REGS ;                                   \
@@ -148,8 +157,8 @@ void        clock_unlock(void);
 
 extern struct spinlock_deprecated smp_rv_spinlock;
 
-void   spin_lock_deprecated(spinlock_t);
-void   spin_unlock_deprecated(spinlock_t);
+void   spin_lock_deprecated(spinlock_t lock);
+void   spin_unlock_deprecated(spinlock_t lock);
 
 /*
  * Inline version of spinlock routines -- overrides assembly.  Only unlock
@@ -188,11 +197,18 @@ cpu_rel_mplock(void)
        mp_lock = MP_FREE_LOCK;
 }
 
-#else
+static __inline int
+owner_mplock(void)
+{
+       return (mp_lock);
+}
+
+#else /* !SMP */
 
 #define get_mplock()
 #define try_mplock()   1
 #define rel_mplock()
+#define owner_mplock() 0       /* always cpu 0 */
 #define MP_LOCK_HELD() (!0)
 #define ASSERT_MP_LOCK_HELD(td)
 
index a016bff..fdfa12f 100644 (file)
@@ -1,4 +1,4 @@
-/*-
+/*
  * ----------------------------------------------------------------------------
  * "THE BEER-WARE LICENSE" (Revision 42):
  * <phk@FreeBSD.org> wrote this file.  As long as you retain this notice you
@@ -6,8 +6,8 @@
  * this stuff is worth it, you can buy me a beer in return.   Poul-Henning Kamp
  * ----------------------------------------------------------------------------
  *
- * $FreeBSD: src/sys/amd64/include/smp.h,v 1.90 2007/05/19 05:01:43 kan Exp $
- * $DragonFly: src/sys/platform/pc64/include/smp.h,v 1.1 2007/09/23 04:42:07 yanyh Exp $
+ * $FreeBSD: src/sys/i386/include/smp.h,v 1.50.2.5 2001/02/13 22:32:45 tegge Exp $
+ * $DragonFly: src/sys/platform/pc32/include/smp.h,v 1.20 2006/11/07 06:43:24 dillon Exp $
  *
  */
 
 
 #ifdef _KERNEL
 
-#ifdef SMP
+#if defined(SMP)
 
 #ifndef LOCORE
 
-#include <sys/bus.h>
-#include <machine/frame.h>
-#include <machine/intr_machdep.h>
-#include <machine/apicvar.h>
+/*
+ * For sending values to POST displays.
+ * XXX FIXME: where does this really belong, isa.h/isa.c perhaps?
+ */
+extern int current_postcode;  /** XXX currently in mp_machdep.c */
+#define POSTCODE(X)    current_postcode = (X), \
+                       outb(0x80, current_postcode)
+#define POSTCODE_LO(X) current_postcode &= 0xf0, \
+                       current_postcode |= ((X) & 0x0f), \
+                       outb(0x80, current_postcode)
+#define POSTCODE_HI(X) current_postcode &= 0x0f, \
+                       current_postcode |= (((X) << 4) & 0xf0), \
+                       outb(0x80, current_postcode)
+
+
+#include <machine_base/apic/apicreg.h>
 #include <machine/pcb.h>
 
 /* global symbols in mpboot.S */
@@ -31,54 +43,108 @@ extern char                        mptramp_start[];
 extern char                    mptramp_end[];
 extern u_int32_t               mptramp_pagetables;
 
+/* functions in mpboot.s */
+void   bootMP                  (void);
+
+/* global data in apic_vector.s */
+extern volatile u_int          stopped_cpus;
+extern volatile u_int          started_cpus;
+
+extern volatile u_int          checkstate_probed_cpus;
+extern void (*cpustop_restartfunc) (void);
+
+/* functions in apic_ipl.s */
+u_int  io_apic_read            (int, int);
+void   io_apic_write           (int, int, u_int);
+
 /* global data in mp_machdep.c */
+extern int                     bsp_apic_ready;
 extern int                     mp_naps;
+extern int                     mp_nbusses;
+extern int                     mp_napics;
 extern int                     boot_cpu_id;
+extern vm_offset_t             cpu_apic_address;
+extern vm_offset_t             io_apic_address[];
+extern u_int32_t               cpu_apic_versions[];
+extern u_int32_t               *io_apic_versions;
+extern int                     cpu_num_to_apic_id[];
+extern int                     io_num_to_apic_id[];
+extern int                     apic_id_to_logical[];
+#define APIC_INTMAPSIZE 24
+struct apic_intmapinfo {
+       int ioapic;
+       int int_pin;
+       volatile void *apic_address;
+       int redirindex;
+};
+extern struct apic_intmapinfo  int_to_apicintpin[];
 extern struct pcb              stoppcbs[];
-extern struct mtx              smp_tlb_mtx;
-extern int                     cpu_apic_ids[];
-
-/* IPI handlers */
-inthand_t
-       IDTVEC(invltlb),        /* TLB shootdowns - global */
-       IDTVEC(invlpg),         /* TLB shootdowns - 1 page */
-       IDTVEC(invlrng),        /* TLB shootdowns - page range */
-       IDTVEC(invlcache),      /* Write back and invalidate cache */
-       IDTVEC(ipi_intr_bitmap_handler), /* Bitmap based IPIs */ 
-       IDTVEC(cpustop),        /* CPU stops & waits to be restarted */
-       IDTVEC(rendezvous);     /* handle CPU rendezvous */
 
 /* functions in mp_machdep.c */
-void   cpu_add(u_int apic_id, char boot_cpu);
-void   cpustop_handler(void);
-void   init_secondary(void);
-void   ipi_selected(u_int cpus, u_int ipi);
-void   ipi_all(u_int ipi);
-void   ipi_all_but_self(u_int ipi);
-void   ipi_self(u_int ipi);
-void   ipi_bitmap_handler(struct trapframe frame);
-u_int  mp_bootaddress(u_int);
-int    mp_grab_cpu_hlt(void);
-void   mp_topology(void);
-void   smp_cache_flush(void);
-void   smp_invlpg(vm_offset_t addr);
-void   smp_masked_invlpg(u_int mask, vm_offset_t addr);
-void   smp_invlpg_range(vm_offset_t startva, vm_offset_t endva);
-void   smp_masked_invlpg_range(u_int mask, vm_offset_t startva,
-           vm_offset_t endva);
-void   smp_invltlb(void);
-void   smp_masked_invltlb(u_int mask);
-
-#ifdef STOP_NMI
-int    ipi_nmi_handler(void);
-#endif
+void   *permanent_io_mapping(vm_paddr_t);
+u_int  mp_bootaddress          (u_int);
+int    mp_probe                (void);
+void   mp_start                (void);
+void   mp_announce             (void);
+u_int  isa_apic_mask           (u_int);
+int    isa_apic_irq            (int);
+int    pci_apic_irq            (int, int, int);
+int    apic_irq                (int, int);
+int    next_apic_irq           (int);
+int    undirect_isa_irq        (int);
+int    undirect_pci_irq        (int);
+int    apic_bus_type           (int);
+int    apic_src_bus_id         (int, int);
+int    apic_src_bus_irq        (int, int);
+int    apic_int_type           (int, int);
+int    apic_trigger            (int, int);
+int    apic_polarity           (int, int);
+void   assign_apic_irq         (int apic, int intpin, int irq);
+void   revoke_apic_irq         (int irq);
+void   init_secondary          (void);
+int    stop_cpus               (u_int);
+void   ap_init                 (void);
+int    restart_cpus            (u_int);
+void   forward_signal          (struct proc *);
+
+/* global data in mpapic.c */
+extern volatile lapic_t                *lapic;
+extern volatile ioapic_t       **ioapic;
+
+/* functions in mpapic.c */
+void   apic_dump               (char*);
+void   apic_initialize         (boolean_t);
+void   imen_dump               (void);
+int    apic_ipi                (int, int, int);
+void   selected_apic_ipi       (u_int, int, int);
+void   single_apic_ipi(int cpu, int vector, int delivery_mode);
+int    single_apic_ipi_passive(int cpu, int vector, int delivery_mode);
+int    io_apic_setup           (int);
+void   io_apic_setup_intpin    (int, int);
+void   io_apic_set_id          (int, int);
+int    io_apic_get_id          (int);
+int    ext_int_setup           (int, int);
+
+#if defined(READY)
+void   clr_io_apic_mask24      (int, u_int32_t);
+void   set_io_apic_mask24      (int, u_int32_t);
+#endif /* READY */
+
+void   set_apic_timer          (int);
+int    read_apic_timer         (void);
+void   u_sleep                 (int);
+void   cpu_send_ipiq           (int);
+int    cpu_send_ipiq_passive   (int);
+
+/* global data in init_smp.c */
+extern cpumask_t               smp_active_mask;
 
 #endif /* !LOCORE */
-#else /* !SMP */
+#else  /* !SMP */
 
-#define smp_active_mask 1      /* smp_active_mask always 1 on UP machines */
+#define        smp_active_mask 1       /* smp_active_mask always 1 on UP machines */
 
-#endif /* !SMP */
+#endif
 
 #endif /* _KERNEL */
 #endif /* _MACHINE_SMP_H_ */
index 23da2b1..4495939 100644 (file)
 /* IPIQ rendezvous */
 #define XIPIQ_OFFSET           (IDT_OFFSET + 115)
 
+/* TIMER rendezvous */
+#define XTIMER_OFFSET          (IDT_OFFSET + 116)
+
 /* IPI to signal CPUs to stop and wait for another CPU to restart them */
 #define XCPUSTOP_OFFSET                (IDT_OFFSET + 128)
 
@@ -151,6 +154,7 @@ inthand_t
        Xforward_irq,   /* Forward irq to cpu holding ISR lock */
        Xcpustop,       /* CPU stops & waits for another CPU to restart it */
        Xspuriousint,   /* handle APIC "spurious INTs" */
+       Xtimer,         /* handle LAPIC timer INT */
        Xipiq;          /* handle lwkt_send_ipiq() requests */
 #endif /* SMP */
 
index 10ac6e1..df35c5c 100644 (file)
@@ -254,8 +254,8 @@ npx_probe(device_t dev)
        save_idt_npxtrap = idt[16];
        outb(IO_ICU1 + 1, ~(1 << ICU_IRQ_SLAVE));
        outb(IO_ICU2 + 1, ~(1 << (npx_irq - 8)));
-       setidt(16, probetrap, SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
-       setidt(npx_intrno, probeintr, SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+       setidt(16, probetrap, SDT_SYS386TGT, SEL_KPL, 0);
+       setidt(npx_intrno, probeintr, SDT_SYS386IGT, SEL_KPL, 0);
        npx_idt_probeintr = idt[npx_intrno];
        cpu_enable_intr();
        result = npx_probe1(dev);