kernel - Refactor cpumask_t to extend cpus past 64, part 2/2
authorMatthew Dillon <dillon@apollo.backplane.com>
Fri, 4 Jul 2014 19:59:15 +0000 (12:59 -0700)
committerMatthew Dillon <dillon@apollo.backplane.com>
Fri, 4 Jul 2014 19:59:15 +0000 (12:59 -0700)
* Expand SMP_MAXCPU from 64 to 256 (64-bit only)

* Expand cpumask_t from 64 to 256 bits

* Refactor the C macros and the assembly code.

* Add misc cpu_pauses()s and do a bit of work on the boot sequencing.

16 files changed:
sys/cpu/x86_64/include/param.h
sys/cpu/x86_64/include/types.h
sys/kern/init_main.c
sys/kern/lwkt_ipiq.c
sys/platform/pc64/apic/apic_vector.s
sys/platform/pc64/x86_64/genassym.c
sys/platform/pc64/x86_64/machdep.c
sys/platform/pc64/x86_64/mp_machdep.c
sys/platform/pc64/x86_64/mptable.c
sys/platform/pc64/x86_64/swtch.s
sys/platform/vkernel64/x86_64/swtch.s
sys/vm/vm_extern.h
sys/vm/vm_fault.c
sys/vm/vm_kern.h
sys/vm/vm_map.c
sys/vm/vm_pager.c

index ac134fd..c39e45a 100644 (file)
 /*
  * Use SMP_MAXCPU instead of MAXCPU for structures that are intended to
  * remain compatible between UP and SMP builds.
+ *
+ * WARNING!  CPUMASK macros in include/types.h must also be adjusted,
+ *          as well as any assembly.  Be sure that CPUMASK_ELEMENTS
+ *          is always correct so incompatible assembly #error's out
+ *          during the kernel compile.
  */
-#define SMP_MAXCPU     64
+#define SMP_MAXCPU     256
 #define MAXCPU         SMP_MAXCPU
 
 #define ALIGNBYTES     _ALIGNBYTES
index 15cfb86..3590274 100644 (file)
@@ -81,53 +81,185 @@ typedef __uint32_t      cpulock_t;      /* count and exclusive lock */
  *         whole mask.
  */
 
-#define CPUMASK_ELEMENTS       1       /* tested by assembly for #error */
+#define CPUMASK_ELEMENTS       4       /* tested by assembly for #error */
 
 typedef struct {
-       __uint64_t      m0;
+       __uint64_t      ary[4];
 } cpumask_t;
 
 #if defined(_KERNEL) || defined(_KERNEL_STRUCTURES)
 
-#define CPUMASK_INITIALIZER_ALLONES    { .m0 = (__uint64_t)-1 }
-#define CPUMASK_INITIALIZER_ONLYONE    { .m0 = 1 }
+#define CPUMASK_INITIALIZER_ALLONES    { .ary = { (__uint64_t)-1, \
+                                         (__uint64_t)-1, \
+                                         (__uint64_t)-1, \
+                                         (__uint64_t)-1 } }
+#define CPUMASK_INITIALIZER_ONLYONE    { .ary = { 1, 0, 0, 0 } }
 
 #define CPUMASK_SIMPLE(cpu)    ((__uint64_t)1 << (cpu))
-#define BSRCPUMASK(val)                bsrq((val).m0)
-#define BSFCPUMASK(val)                bsfq((val).m0)
-
-#define CPUMASK_CMPMASKEQ(val1, val2)  ((val1).m0 == (val2).m0)
-#define CPUMASK_CMPMASKNEQ(val1, val2) ((val1).m0 != (val2).m0)
-#define CPUMASK_ISUP(val)              ((val).m0 == 1)
-
-#define CPUMASK_TESTZERO(val)          ((val).m0 == 0)
-#define CPUMASK_TESTNZERO(val)         ((val).m0 != 0)
-#define CPUMASK_TESTBIT(val, i)                ((val).m0 & CPUMASK_SIMPLE(i))
-#define CPUMASK_TESTMASK(val1, val2)   ((val1).m0 & (val2.m0))
-#define CPUMASK_LOWMASK(val)           (val).m0
-
-#define CPUMASK_ORBIT(mask, i)         (mask).m0 |= CPUMASK_SIMPLE(i)
-#define CPUMASK_ANDBIT(mask, i)                (mask).m0 &= CPUMASK_SIMPLE(i)
-#define CPUMASK_NANDBIT(mask, i)       (mask).m0 &= ~CPUMASK_SIMPLE(i)
-
-#define CPUMASK_ASSZERO(mask)          (mask).m0 = 0
-#define CPUMASK_ASSALLONES(mask)       (mask).m0 = (__uint64_t)-1
-#define CPUMASK_ASSBIT(mask, i)                (mask).m0 = CPUMASK_SIMPLE(i)
-#define CPUMASK_ASSBMASK(mask, i)      (mask).m0 = (CPUMASK_SIMPLE(i) - 1)
-#define CPUMASK_ASSNBMASK(mask, i)     (mask).m0 = ~(CPUMASK_SIMPLE(i) - 1)
-
-#define CPUMASK_ANDMASK(mask, val)     (mask).m0 &= (val).m0
-#define CPUMASK_NANDMASK(mask, val)    (mask).m0 &= ~(val).m0
-#define CPUMASK_ORMASK(mask, val)      (mask).m0 |= (val).m0
-
-#define ATOMIC_CPUMASK_ORBIT(mask, i)          \
-                       atomic_set_cpumask(&(mask).m0, CPUMASK_SIMPLE(i))
-#define ATOMIC_CPUMASK_NANDBIT(mask, i)                \
-                        atomic_clear_cpumask(&(mask).m0, CPUMASK_SIMPLE(i))
-#define ATOMIC_CPUMASK_ORMASK(mask, val)               \
-                       atomic_set_cpumask(&(mask).m0, val.m0)
-#define ATOMIC_CPUMASK_NANDMASK(mask, val)     \
-                       atomic_clear_cpumask(&(mask).m0, val.m0)
+
+#define BSRCPUMASK(val)                ((val).ary[3] ? 192 + bsrq((val).ary[3]) : \
+                               ((val).ary[2] ? 128 + bsrq((val).ary[2]) : \
+                               ((val).ary[1] ? 64 + bsrq((val).ary[1]) : \
+                                               bsrq((val).ary[0]))))
+
+#define BSFCPUMASK(val)                ((val).ary[0] ? bsfq((val).ary[0]) : \
+                               ((val).ary[1] ? 64 + bsfq((val).ary[1]) : \
+                               ((val).ary[2] ? 128 + bsfq((val).ary[2]) : \
+                                               192 + bsfq((val).ary[3]))))
+
+#define CPUMASK_CMPMASKEQ(val1, val2)  ((val1).ary[0] == (val2).ary[0] && \
+                                        (val1).ary[1] == (val2).ary[1] && \
+                                        (val1).ary[2] == (val2).ary[2] && \
+                                        (val1).ary[3] == (val2).ary[3])
+
+#define CPUMASK_CMPMASKNEQ(val1, val2) ((val1).ary[0] != (val2).ary[0] || \
+                                        (val1).ary[1] != (val2).ary[1] || \
+                                        (val1).ary[2] != (val2).ary[2] || \
+                                        (val1).ary[3] != (val2).ary[3])
+
+#define CPUMASK_ISUP(val)              ((val).ary[0] == 1 && \
+                                        (val).ary[1] == 0 && \
+                                        (val).ary[2] == 0 && \
+                                        (val).ary[3] == 0)
+
+#define CPUMASK_TESTZERO(val)          ((val).ary[0] == 0 && \
+                                        (val).ary[1] == 0 && \
+                                        (val).ary[2] == 0 && \
+                                        (val).ary[3] == 0)
+
+#define CPUMASK_TESTNZERO(val)         ((val).ary[0] != 0 || \
+                                        (val).ary[1] != 0 || \
+                                        (val).ary[2] != 0 || \
+                                        (val).ary[3] != 0)
+
+#define CPUMASK_TESTBIT(val, i)                ((val).ary[((i) >> 6) & 3] & \
+                                        CPUMASK_SIMPLE((i) & 63))
+
+#define CPUMASK_TESTMASK(val1, val2)   (((val1).ary[0] & (val2.ary[0])) || \
+                                        ((val1).ary[1] & (val2.ary[1])) || \
+                                        ((val1).ary[2] & (val2.ary[2])) || \
+                                        ((val1).ary[3] & (val2.ary[3])))
+
+#define CPUMASK_LOWMASK(val)           ((val).ary[0])
+
+#define CPUMASK_ORBIT(mask, i)         ((mask).ary[((i) >> 6) & 3] |= \
+                                        CPUMASK_SIMPLE((i) & 63))
+
+#define CPUMASK_ANDBIT(mask, i)                ((mask).ary[((i) >> 6) & 3] &= \
+                                        CPUMASK_SIMPLE((i) & 63))
+
+#define CPUMASK_NANDBIT(mask, i)       ((mask).ary[((i) >> 6) & 3] &= \
+                                        ~CPUMASK_SIMPLE((i) & 63))
+
+#define CPUMASK_ASSZERO(mask)          do {                            \
+                                       (mask).ary[0] = 0;              \
+                                       (mask).ary[1] = 0;              \
+                                       (mask).ary[2] = 0;              \
+                                       (mask).ary[3] = 0;              \
+                                       } while(0)
+
+#define CPUMASK_ASSALLONES(mask)       do {                            \
+                                       (mask).ary[0] = (__uint64_t)-1; \
+                                       (mask).ary[1] = (__uint64_t)-1; \
+                                       (mask).ary[2] = (__uint64_t)-1; \
+                                       (mask).ary[3] = (__uint64_t)-1; \
+                                       } while(0)
+
+#define CPUMASK_ASSBIT(mask, i)                do {                            \
+                                               CPUMASK_ASSZERO(mask);  \
+                                               CPUMASK_ORBIT(mask, i); \
+                                       } while(0)
+
+#define CPUMASK_ASSBMASK(mask, i)      do {                            \
+               if (i < 64) {                                           \
+                       (mask).ary[0] = CPUMASK_SIMPLE(i) - 1;          \
+                       (mask).ary[1] = 0;                              \
+                       (mask).ary[2] = 0;                              \
+                       (mask).ary[3] = 0;                              \
+               } else if (i < 128) {                                   \
+                       (mask).ary[0] = (__uint64_t)-1;                 \
+                       (mask).ary[1] = CPUMASK_SIMPLE((i) - 64) - 1;   \
+                       (mask).ary[2] = 0;                              \
+                       (mask).ary[3] = 0;                              \
+               } else if (i < 192) {                                   \
+                       (mask).ary[0] = (__uint64_t)-1;                 \
+                       (mask).ary[1] = (__uint64_t)-1;                 \
+                       (mask).ary[2] = CPUMASK_SIMPLE((i) - 128) - 1;  \
+                       (mask).ary[3] = 0;                              \
+               } else {                                                \
+                       (mask).ary[0] = (__uint64_t)-1;                 \
+                       (mask).ary[1] = (__uint64_t)-1;                 \
+                       (mask).ary[2] = (__uint64_t)-1;                 \
+                       (mask).ary[3] = CPUMASK_SIMPLE((i) - 192) - 1;  \
+               }                                                       \
+                                       } while(0)
+
+#define CPUMASK_ASSNBMASK(mask, i)     do {                            \
+               if (i < 64) {                                           \
+                       (mask).ary[0] = ~(CPUMASK_SIMPLE(i) - 1);       \
+                       (mask).ary[1] = (__uint64_t)-1;                 \
+                       (mask).ary[2] = (__uint64_t)-1;                 \
+                       (mask).ary[3] = (__uint64_t)-1;                 \
+               } else if (i < 128) {                                   \
+                       (mask).ary[0] = 0;                              \
+                       (mask).ary[1] = ~(CPUMASK_SIMPLE((i) - 64) - 1);\
+                       (mask).ary[2] = (__uint64_t)-1;                 \
+                       (mask).ary[3] = (__uint64_t)-1;                 \
+               } else if (i < 192) {                                   \
+                       (mask).ary[0] = 0;                              \
+                       (mask).ary[1] = 0;                              \
+                       (mask).ary[2] = ~(CPUMASK_SIMPLE((i) - 128) - 1);\
+                       (mask).ary[3] = (__uint64_t)-1;                 \
+               } else {                                                \
+                       (mask).ary[0] = 0;                              \
+                       (mask).ary[1] = 0;                              \
+                       (mask).ary[2] = 0;                              \
+                       (mask).ary[3] = ~(CPUMASK_SIMPLE((i) - 192) - 1);\
+               }                                                       \
+                                       } while(0)
+
+#define CPUMASK_ANDMASK(mask, val)     do {                            \
+                                       (mask).ary[0] &= (val).ary[0];  \
+                                       (mask).ary[1] &= (val).ary[1];  \
+                                       (mask).ary[2] &= (val).ary[2];  \
+                                       (mask).ary[3] &= (val).ary[3];  \
+                                       } while(0)
+
+#define CPUMASK_NANDMASK(mask, val)    do {                            \
+                                       (mask).ary[0] &= ~(val).ary[0]; \
+                                       (mask).ary[1] &= ~(val).ary[1]; \
+                                       (mask).ary[2] &= ~(val).ary[2]; \
+                                       (mask).ary[3] &= ~(val).ary[3]; \
+                                       } while(0)
+
+#define CPUMASK_ORMASK(mask, val)      do {                            \
+                                       (mask).ary[0] |= (val).ary[0];  \
+                                       (mask).ary[1] |= (val).ary[1];  \
+                                       (mask).ary[2] |= (val).ary[2];  \
+                                       (mask).ary[3] |= (val).ary[3];  \
+                                       } while(0)
+
+#define ATOMIC_CPUMASK_ORBIT(mask, i)                                    \
+                       atomic_set_cpumask(&(mask).ary[((i) >> 6) & 3],   \
+                                          CPUMASK_SIMPLE((i) & 63))
+
+#define ATOMIC_CPUMASK_NANDBIT(mask, i)                                          \
+                       atomic_clear_cpumask(&(mask).ary[((i) >> 6) & 3], \
+                                          CPUMASK_SIMPLE((i) & 63))
+
+#define ATOMIC_CPUMASK_ORMASK(mask, val) do {                            \
+                       atomic_set_cpumask(&(mask).ary[0], (val).ary[0]); \
+                       atomic_set_cpumask(&(mask).ary[1], (val).ary[1]); \
+                       atomic_set_cpumask(&(mask).ary[2], (val).ary[2]); \
+                       atomic_set_cpumask(&(mask).ary[3], (val).ary[3]); \
+                                        } while(0)
+
+#define ATOMIC_CPUMASK_NANDMASK(mask, val) do {                                    \
+                       atomic_clear_cpumask(&(mask).ary[0], (val).ary[0]); \
+                       atomic_clear_cpumask(&(mask).ary[1], (val).ary[1]); \
+                       atomic_clear_cpumask(&(mask).ary[2], (val).ary[2]); \
+                       atomic_clear_cpumask(&(mask).ary[3], (val).ary[3]); \
+                                        } while(0)
 
 #endif
 
index 4ee1176..da57822 100644 (file)
@@ -252,6 +252,9 @@ restart:
                if (sip->subsystem == SI_SPECIAL_DONE)
                        continue;
 
+               if (bootverbose)
+                       kprintf("(%08x-%p)\n", sip->subsystem, sip->func);
+
                /* Call function */
                (*(sip->func))(sip->udata);
 
index f41ef01..466ccb7 100644 (file)
@@ -511,6 +511,7 @@ lwkt_wait_ipiq(globaldata_t target, int seq)
                 * to ensure that the loop does not use a speculative value
                 * (which may improve performance).
                 */
+               cpu_pause();
                cpu_lfence();
            }
            DEBUG_POP_INFO();
@@ -958,10 +959,29 @@ lwkt_cpusync_remote2(lwkt_cpusync_t cs)
        lwkt_ipiq_t ip;
        int wi;
 
+       cpu_pause();
 #ifdef _KERNEL_VIRTUAL
        pthread_yield();
 #endif
+       cpu_lfence();
+
+       /*
+        * Requeue our IPI to avoid a deep stack recursion.  If no other
+        * IPIs are pending we can just loop up, which should help VMs
+        * better-detect spin loops.
+        */
        ip = &gd->gd_cpusyncq;
+#if 0
+       if (ip->ip_rindex == ip->ip_windex) {
+               __asm __volatile("cli");
+               if (ip->ip_rindex == ip->ip_windex) {
+                       __asm __volatile("sti; hlt");
+               } else {
+                       __asm __volatile("sti");
+               }
+       }
+#endif
+
        wi = ip->ip_windex & MAXCPUFIFO_MASK;
        ip->ip_info[wi].func = (ipifunc3_t)(ipifunc1_t)lwkt_cpusync_remote2;
        ip->ip_info[wi].arg1 = cs;
index 1dc531a..ba29086 100644 (file)
@@ -223,8 +223,6 @@ Xcpustop:
        addq    %rax, %rdi
        call    CNAME(savectx)          /* Save process context */
 
-       movslq  PCPU(cpuid), %rax
-
        /*
         * Indicate that we have stopped and loop waiting for permission
         * to start again.  We must still process IPI events while in a
@@ -233,24 +231,58 @@ Xcpustop:
         * Interrupts must remain enabled for non-IPI'd per-cpu interrupts
         * (e.g. Xtimer, Xinvltlb).
         */
-       MPLOCKED
-       btsq    %rax, stopped_cpus      /* stopped_cpus |= (1<<id) */
+#if CPUMASK_ELEMENTS != 4
+#error "assembly incompatible with cpumask_t"
+#endif
+       movq    PCPU(cpumask)+0,%rax    /* stopped_cpus |= 1 << cpuid */
+       MPLOCKED orq %rax, stopped_cpus+0
+       movq    PCPU(cpumask)+8,%rax
+       MPLOCKED orq %rax, stopped_cpus+8
+       movq    PCPU(cpumask)+16,%rax
+       MPLOCKED orq %rax, stopped_cpus+16
+       movq    PCPU(cpumask)+24,%rax
+       MPLOCKED orq %rax, stopped_cpus+24
        sti
 1:
        andl    $~RQF_IPIQ,PCPU(reqflags)
-       pushq   %rax
        call    lwkt_smp_stopped
-       popq    %rax
        pause
-       btq     %rax, started_cpus      /* while (!(started_cpus & (1<<id))) */
-       jnc     1b
-
-       MPLOCKED
-       btrq    %rax, started_cpus      /* started_cpus &= ~(1<<id) */
-       MPLOCKED
-       btrq    %rax, stopped_cpus      /* stopped_cpus &= ~(1<<id) */
 
-       testq   %rax, %rax
+       subq    %rdi,%rdi
+       movq    started_cpus+0,%rax     /* while (!(started_cpus & (1<<id))) */
+       andq    PCPU(cpumask)+0,%rax
+       orq     %rax,%rdi
+       movq    started_cpus+8,%rax
+       andq    PCPU(cpumask)+8,%rax
+       orq     %rax,%rdi
+       movq    started_cpus+16,%rax
+       andq    PCPU(cpumask)+16,%rax
+       orq     %rax,%rdi
+       movq    started_cpus+24,%rax
+       andq    PCPU(cpumask)+24,%rax
+       orq     %rax,%rdi
+       testq   %rdi,%rdi
+       jz      1b
+
+       movq    PCPU(other_cpus)+0,%rax /* started_cpus &= ~(1 << cpuid) */
+       MPLOCKED andq %rax, started_cpus+0
+       movq    PCPU(other_cpus)+8,%rax
+       MPLOCKED andq %rax, started_cpus+8
+       movq    PCPU(other_cpus)+16,%rax
+       MPLOCKED andq %rax, started_cpus+16
+       movq    PCPU(other_cpus)+24,%rax
+       MPLOCKED andq %rax, started_cpus+24
+
+       movq    PCPU(other_cpus)+0,%rax /* stopped_cpus &= ~(1 << cpuid) */
+       MPLOCKED andq %rax, stopped_cpus+0
+       movq    PCPU(other_cpus)+8,%rax
+       MPLOCKED andq %rax, stopped_cpus+8
+       movq    PCPU(other_cpus)+16,%rax
+       MPLOCKED andq %rax, stopped_cpus+16
+       movq    PCPU(other_cpus)+24,%rax
+       MPLOCKED andq %rax, stopped_cpus+24
+
+       cmpl    $0,PCPU(cpuid)
        jnz     2f
 
        movq    CNAME(cpustop_restartfunc), %rax
@@ -535,12 +567,21 @@ MCOUNT_LABEL(eintr)
 
        .data
 
+#if CPUMASK_ELEMENTS != 4
+#error "assembly incompatible with cpumask_t"
+#endif
 /* variables used by stop_cpus()/restart_cpus()/Xcpustop */
        .globl stopped_cpus, started_cpus
 stopped_cpus:
        .quad   0
+       .quad   0
+       .quad   0
+       .quad   0
 started_cpus:
        .quad   0
+       .quad   0
+       .quad   0
+       .quad   0
 
        .globl CNAME(cpustop_restartfunc)
 CNAME(cpustop_restartfunc):
index cc951c0..1222a0e 100644 (file)
@@ -244,6 +244,8 @@ ASSYM(CPULOCK_EXCL, CPULOCK_EXCL);
 ASSYM(CPULOCK_INCR, CPULOCK_INCR);
 ASSYM(CPULOCK_CNTMASK, CPULOCK_CNTMASK);
 
+ASSYM(CPUMASK_ELEMENTS, CPUMASK_ELEMENTS);
+
 ASSYM(IOAPIC_IRQI_ADDR, offsetof(struct ioapic_irqinfo, io_addr));
 ASSYM(IOAPIC_IRQI_IDX, offsetof(struct ioapic_irqinfo, io_idx));
 ASSYM(IOAPIC_IRQI_FLAGS, offsetof(struct ioapic_irqinfo, io_flags));
index c0f7696..daca69b 100644 (file)
@@ -1116,6 +1116,7 @@ cpu_idle(void)
 
        crit_exit();
        KKASSERT(td->td_critcount == 0);
+
        for (;;) {
                /*
                 * See if there are any LWKTs ready to go.
index 564ccf9..22fa888 100644 (file)
@@ -518,12 +518,15 @@ start_all_aps(u_int boot_addr)
        tsc_offsets[0] = 0;
        rel_mplock();
        while (CPUMASK_CMPMASKNEQ(smp_lapic_mask, smp_startup_mask)) {
+               cpu_pause();
                cpu_lfence();
                if (cpu_feature & CPUID_TSC)
                        tsc0_offset = rdtsc();
        }
-       while (try_mplock() == 0)
-               ;
+       while (try_mplock() == 0) {
+               cpu_pause();
+               cpu_lfence();
+       }
 
        /* number of APs actually started */
        return ncpus - 1;
@@ -1009,10 +1012,14 @@ ap_init(void)
         * from improperly caching mp_finish_lapic, and the cpu from improperly
         * caching it.
         */
-       while (mp_finish_lapic == 0)
+       while (mp_finish_lapic == 0) {
+               cpu_pause();
                cpu_lfence();
-       while (try_mplock() == 0)
-               ;
+       }
+       while (try_mplock() == 0) {
+               cpu_pause();
+               cpu_lfence();
+       }
 
        if (cpu_feature & CPUID_TSC) {
                /*
@@ -1063,10 +1070,14 @@ ap_init(void)
         * from improperly caching mp_finish, and the cpu from improperly
         * caching it.
         */
-       while (mp_finish == 0)
+       while (mp_finish == 0) {
+               cpu_pause();
                cpu_lfence();
-       while (try_mplock() == 0)
-               ;
+       }
+       while (try_mplock() == 0) {
+               cpu_pause();
+               cpu_lfence();
+       }
 
        /* BSP may have changed PTD while we're waiting for the lock */
        cpu_invltlb();
@@ -1108,6 +1119,16 @@ ap_init(void)
         */
        rel_mplock();
        KKASSERT((curthread->td_flags & TDF_RUNQ) == 0);
+
+#if 0
+       /*
+        * This is a qemu aid.  If we go into the normal idle loop qemu
+        */
+       while (mp_finish != 2) {
+               ;
+               /*__asm__ __volatile("hlt");*/
+       }
+#endif
 }
 
 /*
@@ -1121,12 +1142,17 @@ ap_finish(void)
        if (bootverbose)
                kprintf("Finish MP startup\n");
        rel_mplock();
+
        while (CPUMASK_CMPMASKNEQ(smp_active_mask, smp_startup_mask)) {
+               cpu_pause();
                cpu_lfence();
+       }
+       while (try_mplock() == 0) {
                cpu_pause();
+               cpu_lfence();
        }
-       while (try_mplock() == 0)
-               ;
+       mp_finish = 2;
+
        if (bootverbose) {
                kprintf("Active CPU Mask: %016jx\n",
                        (uintmax_t)CPUMASK_LOWMASK(smp_active_mask));
index 108c745..fa872d0 100644 (file)
@@ -787,7 +787,7 @@ mptable_lapic_enumerate(struct lapic_enumerator *e)
                cpumask_t mask;
 
                CPUMASK_ASSZERO(mask);
-               mask.m0 = arg1.ht_apicid_mask;
+               mask.ary[0] = arg1.ht_apicid_mask;
                logical_cpus = mptable_hyperthread_fixup(mask, arg1.cpu_count);
                if (logical_cpus != 0)
                        arg1.cpu_count *= logical_cpus;
index 5af61f7..36b8db9 100644 (file)
@@ -146,9 +146,18 @@ ENTRY(cpu_heavy_switch)
        jz      1f
        cmpq    LWP_VMSPACE(%r13),%rcx          /* same vmspace? */
        je      2f
+#if CPUMASK_ELEMENTS != 4
+#error "assembly incompatible with cpumask_t"
+#endif
 1:
-       movslq  PCPU(cpuid), %rax
-       MPLOCKED btrq   %rax, VM_PMAP+PM_ACTIVE(%rcx)
+       movq    PCPU(other_cpus)+0,%rax
+       MPLOCKED andq   %rax, VM_PMAP+PM_ACTIVE+0(%rcx)
+       movq    PCPU(other_cpus)+8,%rax
+       MPLOCKED andq   %rax, VM_PMAP+PM_ACTIVE+8(%rcx)
+       movq    PCPU(other_cpus)+16,%rax
+       MPLOCKED andq   %rax, VM_PMAP+PM_ACTIVE+16(%rcx)
+       movq    PCPU(other_cpus)+24,%rax
+       MPLOCKED andq   %rax, VM_PMAP+PM_ACTIVE+24(%rcx)
 2:
 
        /*
@@ -252,9 +261,15 @@ ENTRY(cpu_exit_switch)
        movq    TD_LWP(%rbx),%rcx
        testq   %rcx,%rcx
        jz      2f
-       movslq  PCPU(cpuid), %rax
        movq    LWP_VMSPACE(%rcx), %rcx         /* RCX = vmspace */
-       MPLOCKED btrq   %rax, VM_PMAP+PM_ACTIVE(%rcx)
+       movq    PCPU(other_cpus)+0,%rax
+       MPLOCKED andq   %rax, VM_PMAP+PM_ACTIVE+0(%rcx)
+       movq    PCPU(other_cpus)+8,%rax
+       MPLOCKED andq   %rax, VM_PMAP+PM_ACTIVE+8(%rcx)
+       movq    PCPU(other_cpus)+16,%rax
+       MPLOCKED andq   %rax, VM_PMAP+PM_ACTIVE+16(%rcx)
+       movq    PCPU(other_cpus)+24,%rax
+       MPLOCKED andq   %rax, VM_PMAP+PM_ACTIVE+24(%rcx)
 2:
        /*
         * Switch to the next thread.  RET into the restore function, which
@@ -320,8 +335,19 @@ ENTRY(cpu_heavy_restore)
         */
        movq    TD_LWP(%rax),%rcx
        movq    LWP_VMSPACE(%rcx),%rcx          /* RCX = vmspace */
-       movq    PCPU(cpumask),%rsi              /* new contents */
-       MPLOCKED orq %rsi, VM_PMAP+PM_ACTIVE(%rcx)
+
+#if CPUMASK_ELEMENTS != 4
+#error "assembly incompatible with cpumask_t"
+#endif
+       movq    PCPU(cpumask)+0,%rsi            /* new contents */
+       MPLOCKED orq %rsi, VM_PMAP+PM_ACTIVE+0(%rcx)
+       movq    PCPU(cpumask)+8,%rsi
+       MPLOCKED orq %rsi, VM_PMAP+PM_ACTIVE+8(%rcx)
+       movq    PCPU(cpumask)+16,%rsi
+       MPLOCKED orq %rsi, VM_PMAP+PM_ACTIVE+16(%rcx)
+       movq    PCPU(cpumask)+24,%rsi
+       MPLOCKED orq %rsi, VM_PMAP+PM_ACTIVE+24(%rcx)
+
        movl    VM_PMAP+PM_ACTIVE_LOCK(%rcx),%esi
        testl   $CPULOCK_EXCL,%esi
        jz      1f
index d29f82d..46c971a 100644 (file)
@@ -147,8 +147,14 @@ ENTRY(cpu_heavy_switch)
        cmpq    LWP_VMSPACE(%r13),%rcx          /* same vmspace? */
        je      2f
 1:
-       movslq  PCPU(cpuid), %rax
-       MPLOCKED btrq   %rax, VM_PMAP+PM_ACTIVE(%rcx)
+       movq    PCPU(other_cpus)+0, %rax
+       MPLOCKED andq   %rax, VM_PMAP+PM_ACTIVE+0(%rcx)
+       movq    PCPU(other_cpus)+8, %rax
+       MPLOCKED andq   %rax, VM_PMAP+PM_ACTIVE+8(%rcx)
+       movq    PCPU(other_cpus)+16, %rax
+       MPLOCKED andq   %rax, VM_PMAP+PM_ACTIVE+16(%rcx)
+       movq    PCPU(other_cpus)+24, %rax
+       MPLOCKED andq   %rax, VM_PMAP+PM_ACTIVE+24(%rcx)
 2:
 
        /*
@@ -252,9 +258,15 @@ ENTRY(cpu_exit_switch)
        movq    TD_LWP(%rbx),%rcx
        testq   %rcx,%rcx
        jz      2f
-       movslq  PCPU(cpuid), %rax
        movq    LWP_VMSPACE(%rcx), %rcx         /* RCX = vmspace */
-       MPLOCKED btrq   %rax, VM_PMAP+PM_ACTIVE(%rcx)
+       movq    PCPU(other_cpus)+0, %rax
+       MPLOCKED andq   %rax, VM_PMAP+PM_ACTIVE+0(%rcx)
+       movq    PCPU(other_cpus)+8, %rax
+       MPLOCKED andq   %rax, VM_PMAP+PM_ACTIVE+8(%rcx)
+       movq    PCPU(other_cpus)+16, %rax
+       MPLOCKED andq   %rax, VM_PMAP+PM_ACTIVE+16(%rcx)
+       movq    PCPU(other_cpus)+24, %rax
+       MPLOCKED andq   %rax, VM_PMAP+PM_ACTIVE+24(%rcx)
 2:
        /*
         * Switch to the next thread.  RET into the restore function, which
@@ -309,8 +321,16 @@ ENTRY(cpu_heavy_restore)
         */
        movq    TD_LWP(%rax),%rcx
        movq    LWP_VMSPACE(%rcx), %rcx         /* RCX = vmspace */
-       movq    PCPU(cpumask),%rsi              /* new contents */
-       MPLOCKED orq %rsi, VM_PMAP+PM_ACTIVE(%rcx)
+
+       movq    PCPU(other_cpus)+0, %rsi
+       MPLOCKED orq    %rsi, VM_PMAP+PM_ACTIVE+0(%rcx)
+       movq    PCPU(other_cpus)+8, %rsi
+       MPLOCKED orq    %rsi, VM_PMAP+PM_ACTIVE+8(%rcx)
+       movq    PCPU(other_cpus)+16, %rsi
+       MPLOCKED orq    %rsi, VM_PMAP+PM_ACTIVE+16(%rcx)
+       movq    PCPU(other_cpus)+24, %rsi
+       MPLOCKED orq    %rsi, VM_PMAP+PM_ACTIVE+24(%rcx)
+
        movl    VM_PMAP+PM_ACTIVE_LOCK(%rcx),%esi
        testl   $CPULOCK_EXCL,%esi
        jz      1f
index e657fee..507a5c1 100644 (file)
@@ -99,7 +99,7 @@ void vm_fault_copy_entry (vm_map_t, vm_map_t, vm_map_entry_t, vm_map_entry_t);
 int vm_fault_quick_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len,
     vm_prot_t prot, vm_page_t *ma, int max_count);
 void vm_fault_unwire (vm_map_t, vm_map_entry_t);
-int vm_fault_wire (vm_map_t, vm_map_entry_t, boolean_t);
+int vm_fault_wire (vm_map_t, vm_map_entry_t, boolean_t, int);
 void vm_fork (struct proc *, struct proc *, int);
 int vm_test_nominal (void);
 void vm_wait_nominal (void);
index 0ed1c67..064afcd 100644 (file)
@@ -278,7 +278,9 @@ vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, int fault_flags)
        struct lwp *lp;
        int growstack;
        int retry = 0;
+       int inherit_prot;
 
+       inherit_prot = fault_type & VM_PROT_NOSYNC;
        vm_page_pcpu_cache();
        fs.hardfault = 0;
        fs.fault_flags = fault_flags;
@@ -524,7 +526,8 @@ RetryFault:
         */
        KKASSERT(fs.lookup_still_valid == TRUE);
        vm_page_flag_set(fs.m, PG_REFERENCED);
-       pmap_enter(fs.map->pmap, vaddr, fs.m, fs.prot, fs.wired, fs.entry);
+       pmap_enter(fs.map->pmap, vaddr, fs.m, fs.prot | inherit_prot,
+                  fs.wired, fs.entry);
        mycpu->gd_cnt.v_vm_faults++;
        if (curthread->td_lwp)
                ++curthread->td_lwp->lwp_ru.ru_minflt;
@@ -1952,7 +1955,8 @@ vm_fault_quick_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len,
  * No requirements.
  */
 int
-vm_fault_wire(vm_map_t map, vm_map_entry_t entry, boolean_t user_wire)
+vm_fault_wire(vm_map_t map, vm_map_entry_t entry,
+             boolean_t user_wire, int kmflags)
 {
        boolean_t fictitious;
        vm_offset_t start;
@@ -1962,9 +1966,21 @@ vm_fault_wire(vm_map_t map, vm_map_entry_t entry, boolean_t user_wire)
        vm_page_t m;
        pmap_t pmap;
        int rv;
+       int wire_prot;
+       int fault_flags;
 
        lwkt_gettoken(&map->token);
 
+       if (user_wire) {
+               wire_prot = VM_PROT_READ;
+               fault_flags = VM_FAULT_USER_WIRE;
+       } else {
+               wire_prot = VM_PROT_READ | VM_PROT_WRITE;
+               fault_flags = VM_FAULT_CHANGE_WIRING;
+       }
+       if (kmflags & KM_NOTLBSYNC)
+               wire_prot |= VM_PROT_NOSYNC;
+
        pmap = vm_map_pmap(map);
        start = entry->start;
        end = entry->end;
@@ -1981,13 +1997,7 @@ vm_fault_wire(vm_map_t map, vm_map_entry_t entry, boolean_t user_wire)
         * map.
         */
        for (va = start; va < end; va += PAGE_SIZE) {
-               if (user_wire) {
-                       rv = vm_fault(map, va, VM_PROT_READ, 
-                                       VM_FAULT_USER_WIRE);
-               } else {
-                       rv = vm_fault(map, va, VM_PROT_READ|VM_PROT_WRITE,
-                                       VM_FAULT_CHANGE_WIRING);
-               }
+               rv = vm_fault(map, va, wire_prot, fault_flags);
                if (rv) {
                        while (va > start) {
                                va -= PAGE_SIZE;
index 2bd0160..a8f09aa 100644 (file)
@@ -81,6 +81,7 @@
 #define KM_PAGEABLE    0x0001
 #define KM_KRESERVE    0x0002
 #define KM_STACK       0x0004
+#define KM_NOTLBSYNC   0x0008
 
 /* Kernel memory management definitions. */
 extern struct vm_map buffer_map;
index 58b0c15..ce1cecd 100644 (file)
@@ -2180,7 +2180,7 @@ vm_map_unwire(vm_map_t map, vm_offset_t start, vm_offset_t real_end,
                         */
                        save_start = entry->start;
                        save_end = entry->end;
-                       rv = vm_fault_wire(map, entry, TRUE);
+                       rv = vm_fault_wire(map, entry, TRUE, 0);
                        if (rv) {
                                CLIP_CHECK_BACK(entry, save_start);
                                for (;;) {
@@ -2405,7 +2405,7 @@ vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t real_end, int kmflags)
                        vm_offset_t save_end = entry->end;
 
                        if (entry->wired_count == 1)
-                               rv = vm_fault_wire(map, entry, FALSE);
+                               rv = vm_fault_wire(map, entry, FALSE, kmflags);
                        if (rv) {
                                CLIP_CHECK_BACK(entry, save_start);
                                for (;;) {
index 193813f..003165d 100644 (file)
@@ -240,10 +240,16 @@ vm_pager_bufferinit(void *dummy __unused)
         * Initial pbuf setup.  These pbufs do not have KVA reservations,
         * so we can have a lot more of them.  These are typically used
         * to massage low level buf/bio requests.
+        *
+        * NOTE: We use KM_NOTLBSYNC here to reduce unnecessary IPIs
+        *       during startup, which can really slow down emulated
+        *       systems.
         */
        nswbuf_raw = nbuf * 2;
-       swbuf_raw = (void *)kmem_alloc(&kernel_map,
-                               round_page(nswbuf_raw * sizeof(struct buf)));
+       swbuf_raw = (void *)kmem_alloc3(&kernel_map,
+                               round_page(nswbuf_raw * sizeof(struct buf)),
+                               KM_NOTLBSYNC);
+       smp_invltlb();
        bp = swbuf_raw;
        for (i = 0; i < nswbuf_raw; ++i, ++bp) {
                BUF_LOCKINIT(bp);