From 1997b4c272782aad27f0d0387f9189ed1e1befbd Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Sun, 6 Jul 2014 12:27:04 -0700 Subject: [PATCH] kernel - extend cpus past 64 - fixes and adjustments * Reorder the SMP cpu boot code to remove a great deal of lock contention. The APs must still loop waiting for the BSP to adjust the stage, but they no longer need to hold a token or spinlock so startup under emulation is considerably faster. * Do not initialize our systimer periodics on each target cpu from the idle thread bootstrap. Previously with the MP lock held the locks acquired during this initialization were serialized and could not block. Now that cpu startup runs mostly concurrently, that is no longer the case. Instead, systimer periodics are handled by process 0 as a post-smp-startup call. * statclock() now uses sys_cputimer() directly to calculate the delta time. * The TSC is now implemented as sys_cputimer before any systimer periodics (particularly statclock()) are set-up, allowing the system to take control away from the i8254 earlier. * Clean up struct lwkt_ipiq. Remove the 'lwkt_ipiq' typedef. Calculate allocation sizes separately. * Add a new loader.conf tunable, hw.tsc_cputimer_force. If set to 1 and a TSC is present, the system will force invariant and mpsync operation and always use the TSC as the cputimer (primarily useful for qemu). * Remove unnecessary kmem_alloc() of the globaldata structure. We are using a static array now. This access was wasting memory for a long time. * Make the boot stack bigger for the APs. --- sys/kern/kern_clock.c | 90 +++++++++++++++----- sys/platform/pc32/i386/mp_machdep.c | 24 ++++-- sys/platform/pc64/isa/clock.c | 20 ++++- sys/platform/pc64/x86_64/machdep.c | 2 + sys/platform/pc64/x86_64/mp_machdep.c | 116 ++++++++++++++++++-------- sys/platform/vkernel/i386/mp.c | 13 +-- sys/platform/vkernel64/x86_64/mp.c | 13 +-- sys/sys/globaldata.h | 5 +- sys/sys/kernel.h | 1 + sys/sys/thread.h | 4 +- 10 files changed, 209 insertions(+), 79 deletions(-) diff --git a/sys/kern/kern_clock.c b/sys/kern/kern_clock.c index 202f69ca95..e7e971a9a5 100644 --- a/sys/kern/kern_clock.c +++ b/sys/kern/kern_clock.c @@ -92,6 +92,7 @@ #include #include +#include #include #include @@ -255,7 +256,12 @@ initclocks(void *dummy) } /* - * Called on a per-cpu basis + * Called on a per-cpu basis from the idle thread bootstrap on each cpu + * during SMP initialization. + * + * This routine is called concurrently during low-level SMP initialization + * and may not block in any way. Meaning, among other things, we can't + * acquire any tokens. */ void initclocks_pcpu(void) @@ -274,23 +280,49 @@ initclocks_pcpu(void) systimer_intr_enable(); + crit_exit(); +} + +/* + * This routine is called on just the BSP, just after SMP initialization + * completes to * finish initializing any clocks that might contend/block + * (e.g. like on a token). We can't do this in initclocks_pcpu() because + * that function is called from the idle thread bootstrap for each cpu and + * not allowed to block at all. + */ +static +void +initclocks_other(void *dummy) +{ + struct globaldata *ogd = mycpu; + struct globaldata *gd; + int n; + + for (n = 0; n < ncpus; ++n) { + lwkt_setcpu_self(globaldata_find(n)); + gd = mycpu; + + /* + * Use a non-queued periodic systimer to prevent multiple + * ticks from building up if the sysclock jumps forward + * (8254 gets reset). The sysclock will never jump backwards. + * Our time sync is based on the actual sysclock, not the + * ticks count. + */ + systimer_init_periodic_nq(&gd->gd_hardclock, hardclock, + NULL, hz); + systimer_init_periodic_nq(&gd->gd_statclock, statclock, + NULL, stathz); + /* XXX correct the frequency for scheduler / estcpu tests */ + systimer_init_periodic_nq(&gd->gd_schedclock, schedclock, + NULL, ESTCPUFREQ); #ifdef IFPOLL_ENABLE - ifpoll_init_pcpu(gd->gd_cpuid); + ifpoll_init_pcpu(gd->gd_cpuid); #endif - - /* - * Use a non-queued periodic systimer to prevent multiple ticks from - * building up if the sysclock jumps forward (8254 gets reset). The - * sysclock will never jump backwards. Our time sync is based on - * the actual sysclock, not the ticks count. - */ - systimer_init_periodic_nq(&gd->gd_hardclock, hardclock, NULL, hz); - systimer_init_periodic_nq(&gd->gd_statclock, statclock, NULL, stathz); - /* XXX correct the frequency for scheduler / estcpu tests */ - systimer_init_periodic_nq(&gd->gd_schedclock, schedclock, - NULL, ESTCPUFREQ); - crit_exit(); + } + lwkt_setcpu_self(ogd); } +SYSINIT(clocks2, SI_BOOT2_POST_SMP, SI_ORDER_ANY, initclocks_other, NULL) /* * This sets the current real time of day. Timespecs are in seconds and @@ -587,13 +619,32 @@ statclock(systimer_t info, int in_ipi, struct intrframe *frame) thread_t td; struct proc *p; int bump; - struct timeval tv; - struct timeval *stv; + sysclock_t cv; + sysclock_t scv; /* - * How big was our timeslice relative to the last time? + * How big was our timeslice relative to the last time? Calculate + * in microseconds. + * + * NOTE: Use of microuptime() is typically MPSAFE, but usually not + * during early boot. Just use the systimer count to be nice + * to e.g. qemu. The systimer has a better chance of being + * MPSAFE at early boot. */ - microuptime(&tv); /* mpsafe */ + cv = sys_cputimer->count(); + scv = mycpu->statint.gd_statcv; + if (scv == 0) { + bump = 1; + } else { + bump = (sys_cputimer->freq64_usec * (cv - scv)) >> 32; + if (bump < 0) + bump = 0; + if (bump > 1000000) + bump = 1000000; + } + mycpu->statint.gd_statcv = cv; + +#if 0 stv = &mycpu->gd_stattv; if (stv->tv_sec == 0) { bump = 1; @@ -606,6 +657,7 @@ statclock(systimer_t info, int in_ipi, struct intrframe *frame) bump = 1000000; } *stv = tv; +#endif td = curthread; p = td->td_proc; diff --git a/sys/platform/pc32/i386/mp_machdep.c b/sys/platform/pc32/i386/mp_machdep.c index 6454027f7a..dbce97dbbc 100644 --- a/sys/platform/pc32/i386/mp_machdep.c +++ b/sys/platform/pc32/i386/mp_machdep.c @@ -329,6 +329,7 @@ start_all_aps(u_int boot_addr) struct privatespace *ps; char *stack; uintptr_t kptbase; + size_t ipiq_size; POSTCODE(START_ALL_APS_POST); @@ -385,7 +386,6 @@ start_all_aps(u_int boot_addr) /* start each AP */ for (x = 1; x <= naps; ++x) { - /* This is a bit verbose, it will go away soon. */ /* first page of AP's private space */ @@ -436,8 +436,10 @@ start_all_aps(u_int boot_addr) kmem_alloc_nofault(&kernel_map, SEG_SIZE, SEG_SIZE); gd->gd_GDMAP1 = &PTD[(vm_offset_t)gd->gd_GDADDR1 >> PDRSHIFT]; - gd->mi.gd_ipiq = (void *)kmem_alloc(&kernel_map, sizeof(lwkt_ipiq) * (naps + 1)); - bzero(gd->mi.gd_ipiq, sizeof(lwkt_ipiq) * (naps + 1)); + ipiq_size = sizeof(struct lwkt_ipiq) * (naps + 1); + + gd->mi.gd_ipiq = (void *)kmem_alloc(&kernel_map, ipiq_size); + bzero(gd->mi.gd_ipiq, ipiq_size); /* * Setup the AP boot stack @@ -479,9 +481,10 @@ start_all_aps(u_int boot_addr) /* build our map of 'other' CPUs */ mycpu->gd_other_cpus = smp_startup_mask; CPUMASK_NANDBIT(mycpu->gd_other_cpus, mycpu->gd_cpuid); - mycpu->gd_ipiq = (void *)kmem_alloc(&kernel_map, - sizeof(lwkt_ipiq) * ncpus); - bzero(mycpu->gd_ipiq, sizeof(lwkt_ipiq) * ncpus); + + ipiq_size = sizeof(struct lwkt_ipiq) * ncpus; + mycpu->gd_ipiq = (void *)kmem_alloc(&kernel_map, ipiq_size); + bzero(mycpu->gd_ipiq, ipiq_size); /* restore the warmstart vector */ *(u_long *) WARMBOOT_OFF = mpbioswarmvec; @@ -1123,12 +1126,15 @@ cpu_send_ipiq_passive(int dcpu) static void mp_bsp_simple_setup(void) { + size_t ipiq_size; + /* build our map of 'other' CPUs */ mycpu->gd_other_cpus = smp_startup_mask; CPUMASK_NANDBIT(mycpu->gd_other_cpus, mycpu->gd_cpuid); - mycpu->gd_ipiq = (void *)kmem_alloc(&kernel_map, - sizeof(lwkt_ipiq) * ncpus); - bzero(mycpu->gd_ipiq, sizeof(lwkt_ipiq) * ncpus); + + ipiq_size = sizeof(struct lwkt_ipiq) * ncpus; + mycpu->gd_ipiq = (void *)kmem_alloc(&kernel_map, ipiq_size); + bzero(mycpu->gd_ipiq, ipiq_size); pmap_set_opt(); diff --git a/sys/platform/pc64/isa/clock.c b/sys/platform/pc64/isa/clock.c index 5b97da2138..7b07069802 100644 --- a/sys/platform/pc64/isa/clock.c +++ b/sys/platform/pc64/isa/clock.c @@ -756,9 +756,16 @@ startrtclock(void) /* * Can we use the TSC? + * + * NOTE: If running under qemu, probably a good idea to force the + * TSC because we are not likely to detect it as being + * invariant or mpsyncd if you don't. This will greatly + * reduce SMP contention. */ if (cpu_feature & CPUID_TSC) { tsc_present = 1; + TUNABLE_INT_FETCH("hw.tsc_cputimer_force", &tsc_invariant); + if ((cpu_vendor_id == CPU_VENDOR_INTEL || cpu_vendor_id == CPU_VENDOR_AMD) && cpu_exthigh >= 0x80000007) { @@ -1220,6 +1227,15 @@ tsc_mpsync_test(void) return; } + /* + * Forcing can be used w/qemu to reduce contention + */ + TUNABLE_INT_FETCH("hw.tsc_cputimer_force", &tsc_mpsync); + if (tsc_mpsync) { + kprintf("TSC as cputimer forced\n"); + return; + } + if (cpu_vendor_id != CPU_VENDOR_INTEL) { /* XXX only Intel works */ return; @@ -1321,8 +1337,8 @@ tsc_cputimer_register(void) cputimer_register(&tsc_cputimer); cputimer_select(&tsc_cputimer, 0); } -SYSINIT(tsc_cputimer_reg, SI_BOOT2_MACHDEP, SI_ORDER_ANY, - tsc_cputimer_register, NULL); +SYSINIT(tsc_cputimer_reg, SI_BOOT2_POST_SMP, SI_ORDER_FIRST, + tsc_cputimer_register, NULL); SYSCTL_NODE(_hw, OID_AUTO, i8254, CTLFLAG_RW, 0, "I8254"); SYSCTL_UINT(_hw_i8254, OID_AUTO, freq, CTLFLAG_RD, &i8254_cputimer.freq, 0, diff --git a/sys/platform/pc64/x86_64/machdep.c b/sys/platform/pc64/x86_64/machdep.c index daca69b11c..cf6ffe3557 100644 --- a/sys/platform/pc64/x86_64/machdep.c +++ b/sys/platform/pc64/x86_64/machdep.c @@ -1144,6 +1144,8 @@ cpu_idle(void) * will be slow waking up. Slows down e.g. * compiles and other pipe/event oriented stuff. * + * 4 Always use HLT. + * * NOTE: Interrupts are enabled and we are not in a critical * section. * diff --git a/sys/platform/pc64/x86_64/mp_machdep.c b/sys/platform/pc64/x86_64/mp_machdep.c index 22fa888d4c..ffd399ebc4 100644 --- a/sys/platform/pc64/x86_64/mp_machdep.c +++ b/sys/platform/pc64/x86_64/mp_machdep.c @@ -174,6 +174,7 @@ static cpumask_t smp_startup_mask = CPUMASK_INITIALIZER_ONLYONE; static cpumask_t smp_lapic_mask = CPUMASK_INITIALIZER_ONLYONE; /* which cpus are ready for IPIs etc? */ cpumask_t smp_active_mask = CPUMASK_INITIALIZER_ONLYONE; +cpumask_t smp_finalize_mask = CPUMASK_INITIALIZER_ONLYONE; SYSCTL_INT(_machdep, OID_AUTO, smp_active, CTLFLAG_RD, &smp_active_mask, 0, ""); static u_int bootMP_size; @@ -355,6 +356,7 @@ start_all_aps(u_int boot_addr) u_long mpbioswarmvec; struct mdglobaldata *gd; struct privatespace *ps; + size_t ipiq_size; POSTCODE(START_ALL_APS_POST); @@ -429,12 +431,13 @@ start_all_aps(u_int boot_addr) /* start each AP */ for (x = 1; x <= naps; ++x) { - /* This is a bit verbose, it will go away soon. */ +#if 0 /* allocate new private data page(s) */ gd = (struct mdglobaldata *)kmem_alloc(&kernel_map, MDGLOBALDATA_BASEALLOC_SIZE); +#endif gd = &CPU_prvspace[x].mdglobaldata; /* official location */ bzero(gd, sizeof(*gd)); @@ -443,8 +446,9 @@ start_all_aps(u_int boot_addr) /* prime data page for it to use */ mi_gdinit(&gd->mi, x); cpu_gdinit(gd, x); - gd->mi.gd_ipiq = (void *)kmem_alloc(&kernel_map, sizeof(lwkt_ipiq) * (naps + 1)); - bzero(gd->mi.gd_ipiq, sizeof(lwkt_ipiq) * (naps + 1)); + ipiq_size = sizeof(struct lwkt_ipiq) * (naps + 1); + gd->mi.gd_ipiq = (void *)kmem_alloc(&kernel_map, ipiq_size); + bzero(gd->mi.gd_ipiq, ipiq_size); /* setup a vector to our boot code */ *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET; @@ -455,7 +459,7 @@ start_all_aps(u_int boot_addr) /* * Setup the AP boot stack */ - bootSTK = &ps->idlestack[UPAGES*PAGE_SIZE/2]; + bootSTK = &ps->idlestack[UPAGES * PAGE_SIZE - PAGE_SIZE]; bootAP = x; /* attempt to start the Application Processor */ @@ -492,9 +496,10 @@ start_all_aps(u_int boot_addr) /* build our map of 'other' CPUs */ mycpu->gd_other_cpus = smp_startup_mask; CPUMASK_NANDBIT(mycpu->gd_other_cpus, mycpu->gd_cpuid); - mycpu->gd_ipiq = (void *)kmem_alloc(&kernel_map, - sizeof(lwkt_ipiq) * ncpus); - bzero(mycpu->gd_ipiq, sizeof(lwkt_ipiq) * ncpus); + + ipiq_size = sizeof(struct lwkt_ipiq) * ncpus; + mycpu->gd_ipiq = (void *)kmem_alloc(&kernel_map, ipiq_size); + bzero(mycpu->gd_ipiq, ipiq_size); /* restore the warmstart vector */ *(u_long *) WARMBOOT_OFF = mpbioswarmvec; @@ -510,13 +515,14 @@ start_all_aps(u_int boot_addr) /* * Wait all APs to finish initializing LAPIC */ - mp_finish_lapic = 1; if (bootverbose) kprintf("SMP: Waiting APs LAPIC initialization\n"); if (cpu_feature & CPUID_TSC) tsc0_offset = rdtsc(); tsc_offsets[0] = 0; + mp_finish_lapic = 1; rel_mplock(); + while (CPUMASK_CMPMASKNEQ(smp_lapic_mask, smp_startup_mask)) { cpu_pause(); cpu_lfence(); @@ -1016,10 +1022,12 @@ ap_init(void) cpu_pause(); cpu_lfence(); } +#if 0 while (try_mplock() == 0) { cpu_pause(); cpu_lfence(); } +#endif if (cpu_feature & CPUID_TSC) { /* @@ -1034,7 +1042,7 @@ ap_init(void) /* Build our map of 'other' CPUs. */ mycpu->gd_other_cpus = smp_startup_mask; - CPUMASK_NANDBIT(mycpu->gd_other_cpus, mycpu->gd_cpuid); + ATOMIC_CPUMASK_NANDBIT(mycpu->gd_other_cpus, mycpu->gd_cpuid); /* A quick check from sanity claus */ cpu_id = APICID_TO_CPUID((lapic->id & 0xff000000) >> 24); @@ -1052,11 +1060,13 @@ ap_init(void) lapic_init(FALSE); /* LAPIC initialization is done */ - CPUMASK_ORBIT(smp_lapic_mask, mycpu->gd_cpuid); + ATOMIC_CPUMASK_ORBIT(smp_lapic_mask, mycpu->gd_cpuid); cpu_mfence(); +#if 0 /* Let BSP move onto the next initialization stage */ rel_mplock(); +#endif /* * Interlock for finalization. Wait until mp_finish is non-zero, @@ -1074,10 +1084,6 @@ ap_init(void) cpu_pause(); cpu_lfence(); } - while (try_mplock() == 0) { - cpu_pause(); - cpu_lfence(); - } /* BSP may have changed PTD while we're waiting for the lock */ cpu_invltlb(); @@ -1099,36 +1105,52 @@ ap_init(void) * The idle thread is never placed on the runq, make sure * nothing we've done put it there. */ - KKASSERT(get_mplock_count(curthread) == 1); - CPUMASK_ORBIT(smp_active_mask, mycpu->gd_cpuid); /* - * Enable interrupts here. idle_restore will also do it, but - * doing it here lets us clean up any strays that got posted to - * the CPU during the AP boot while we are still in a critical - * section. + * Hold a critical section and allow real interrupts to occur. Zero + * any spurious interrupts which have accumulated, then set our + * smp_active_mask indicating that we are fully operational. */ + crit_enter(); __asm __volatile("sti; pause; pause"::); bzero(mdcpu->gd_ipending, sizeof(mdcpu->gd_ipending)); + ATOMIC_CPUMASK_ORBIT(smp_active_mask, mycpu->gd_cpuid); + /* + * Wait until all cpus have set their smp_active_mask and have fully + * operational interrupts before proceeding. + * + * We need a final cpu_invltlb() because we would not have received + * any until we set our bit in smp_active_mask. + */ + while (mp_finish == 1) { + cpu_pause(); + cpu_lfence(); + } + cpu_invltlb(); + + /* + * Initialize per-cpu clocks and do other per-cpu initialization. + * At this point code is expected to be able to use the full kernel + * API. + */ initclocks_pcpu(); /* clock interrupts (via IPIs) */ - lwkt_process_ipiq(); /* - * Releasing the mp lock lets the BSP finish up the SMP init + * Since we may have cleaned up the interrupt triggers, manually + * process any pending IPIs before exiting our critical section. + * Once the critical section has exited, normal interrupt processing + * may occur. */ - rel_mplock(); - KKASSERT((curthread->td_flags & TDF_RUNQ) == 0); + lwkt_process_ipiq(); + crit_exit_noyield(mycpu->gd_curthread); -#if 0 /* - * This is a qemu aid. If we go into the normal idle loop qemu + * Final final, allow the waiting BSP to resume the boot process, + * return 'into' the idle thread bootstrap. */ - while (mp_finish != 2) { - ; - /*__asm__ __volatile("hlt");*/ - } -#endif + ATOMIC_CPUMASK_ORBIT(smp_finalize_mask, mycpu->gd_cpuid); + KKASSERT((curthread->td_flags & TDF_RUNQ) == 0); } /* @@ -1138,20 +1160,39 @@ static void ap_finish(void) { - mp_finish = 1; if (bootverbose) kprintf("Finish MP startup\n"); rel_mplock(); + /* + * Wait for the active mask to complete, after which all cpus will + * be accepting interrupts. + */ + mp_finish = 1; while (CPUMASK_CMPMASKNEQ(smp_active_mask, smp_startup_mask)) { cpu_pause(); cpu_lfence(); } + + /* + * Wait for the finalization mask to complete, after which all cpus + * have completely finished initializing and are entering or are in + * their idle thread. + * + * BSP should have received all required invltlbs but do another + * one just in case. + */ + cpu_invltlb(); + mp_finish = 2; + while (CPUMASK_CMPMASKNEQ(smp_finalize_mask, smp_startup_mask)) { + cpu_pause(); + cpu_lfence(); + } + while (try_mplock() == 0) { cpu_pause(); cpu_lfence(); } - mp_finish = 2; if (bootverbose) { kprintf("Active CPU Mask: %016jx\n", @@ -1187,12 +1228,15 @@ cpu_send_ipiq_passive(int dcpu) static void mp_bsp_simple_setup(void) { + size_t ipiq_size; + /* build our map of 'other' CPUs */ mycpu->gd_other_cpus = smp_startup_mask; CPUMASK_NANDBIT(mycpu->gd_other_cpus, mycpu->gd_cpuid); - mycpu->gd_ipiq = (void *)kmem_alloc(&kernel_map, - sizeof(lwkt_ipiq) * ncpus); - bzero(mycpu->gd_ipiq, sizeof(lwkt_ipiq) * ncpus); + + ipiq_size = sizeof(struct lwkt_ipiq) * ncpus; + mycpu->gd_ipiq = (void *)kmem_alloc(&kernel_map, ipiq_size); + bzero(mycpu->gd_ipiq, ipiq_size); pmap_set_opt(); diff --git a/sys/platform/vkernel/i386/mp.c b/sys/platform/vkernel/i386/mp.c index fba99c77d5..e680de537f 100644 --- a/sys/platform/vkernel/i386/mp.c +++ b/sys/platform/vkernel/i386/mp.c @@ -143,6 +143,7 @@ void mp_start(void) { int shift; + size_t ipiq_size; ncpus = optcpus; @@ -165,9 +166,9 @@ mp_start(void) /* * cpu0 initialization */ - mycpu->gd_ipiq = (void *)kmem_alloc(&kernel_map, - sizeof(lwkt_ipiq) * ncpus); - bzero(mycpu->gd_ipiq, sizeof(lwkt_ipiq) * ncpus); + ipiq_size = sizeof(struct lwkt_ipiq) * ncpus; + mycpu->gd_ipiq = (void *)kmem_alloc(&kernel_map, ipiq_size); + bzero(mycpu->gd_ipiq, ipiq_size); /* * cpu 1-(n-1) @@ -379,6 +380,7 @@ start_all_aps(u_int boot_addr) struct privatespace *ps; vm_page_t m; vm_offset_t va; + size_t ipiq_size; #if 0 struct lwp_params params; #endif @@ -424,8 +426,9 @@ start_all_aps(u_int boot_addr) gd->gd_PADDR1 = (vpte_t *)ps->PPAGE1; #endif - gd->mi.gd_ipiq = (void *)kmem_alloc(&kernel_map, sizeof(lwkt_ipiq) * (mp_naps + 1)); - bzero(gd->mi.gd_ipiq, sizeof(lwkt_ipiq) * (mp_naps + 1)); + ipiq_size = sizeof(struct lwkt_ipiq) * (mp_naps + 1); + gd->mi.gd_ipiq = (void *)kmem_alloc(&kernel_map, ipiq_size); + bzero(gd->mi.gd_ipiq, ipiq_size); /* * Setup the AP boot stack diff --git a/sys/platform/vkernel64/x86_64/mp.c b/sys/platform/vkernel64/x86_64/mp.c index c42f6e976b..e5f51bc22f 100644 --- a/sys/platform/vkernel64/x86_64/mp.c +++ b/sys/platform/vkernel64/x86_64/mp.c @@ -147,7 +147,9 @@ pthread_t ap_tids[MAXCPU]; void mp_start(void) { + size_t ipiq_size; int shift; + ncpus = optcpus; mp_naps = ncpus - 1; @@ -169,9 +171,9 @@ mp_start(void) /* * cpu0 initialization */ - mycpu->gd_ipiq = (void *)kmem_alloc(&kernel_map, - sizeof(lwkt_ipiq) * ncpus); - bzero(mycpu->gd_ipiq, sizeof(lwkt_ipiq) * ncpus); + ipiq_size = sizeof(struct lwkt_ipiq) * ncpus; + mycpu->gd_ipiq = (void *)kmem_alloc(&kernel_map, ipiq_size); + bzero(mycpu->gd_ipiq, ipiq_size); /* * cpu 1-(n-1) @@ -426,8 +428,9 @@ start_all_aps(u_int boot_addr) gd->gd_PADDR1 = (vpte_t *)ps->PPAGE1; #endif - gd->mi.gd_ipiq = (void *)kmem_alloc(&kernel_map, sizeof(lwkt_ipiq) * (mp_naps + 1)); - bzero(gd->mi.gd_ipiq, sizeof(lwkt_ipiq) * (mp_naps + 1)); + ipiq_size = sizeof(struct lwkt_ipiq) * (mp_naps + 1); + gd->mi.gd_ipiq = (void *)kmem_alloc(&kernel_map, ipiq_size); + bzero(gd->mi.gd_ipiq, ipiq_size); /* * Setup the AP boot stack diff --git a/sys/sys/globaldata.h b/sys/sys/globaldata.h index 8c3d8b0916..0042d38cd7 100644 --- a/sys/sys/globaldata.h +++ b/sys/sys/globaldata.h @@ -130,7 +130,10 @@ struct globaldata { __uint32_t gd_cpuid; cpumask_t gd_cpumask; /* CPUMASK_ASSBIT(cpuid) */ cpumask_t gd_other_cpus; /* mask of 'other' cpus */ - struct timeval gd_stattv; + union { + struct timeval gd_stattv; + sysclock_t gd_statcv; + } statint; int gd_intr_nesting_level; /* hard code, intrs, ipis */ struct vmmeter gd_cnt; struct vmtotal gd_vmtotal; diff --git a/sys/sys/kernel.h b/sys/sys/kernel.h index 60f8e24925..1ada08020b 100644 --- a/sys/sys/kernel.h +++ b/sys/sys/kernel.h @@ -157,6 +157,7 @@ enum sysinit_sub_id { SI_BOOT2_SOFTCLOCK = 0x1b80000, SI_BOOT2_CLOCKS = 0x1c00000, /* select & start clocks */ SI_BOOT2_FINISH_SMP = 0x1c80000, /* SMP go (& synch clocks) */ + SI_BOOT2_POST_SMP = 0x1cc0000, /* post-SMP low level */ /* * Finish up core kernel initialization and set up the process diff --git a/sys/sys/thread.h b/sys/sys/thread.h index b6aac848f2..0ff2e93838 100644 --- a/sys/sys/thread.h +++ b/sys/sys/thread.h @@ -194,7 +194,7 @@ typedef void (*ipifunc1_t)(void *arg); typedef void (*ipifunc2_t)(void *arg, int arg2); typedef void (*ipifunc3_t)(void *arg, int arg2, struct intrframe *frame); -typedef struct lwkt_ipiq { +struct lwkt_ipiq { int ip_rindex; /* only written by target cpu */ int ip_xindex; /* written by target, indicates completion */ int ip_windex; /* only written by source cpu */ @@ -204,7 +204,7 @@ typedef struct lwkt_ipiq { int arg2; char filler[32 - sizeof(int) - sizeof(void *) * 2]; } ip_info[MAXCPUFIFO]; -} lwkt_ipiq; +}; /* * CPU Synchronization structure. See lwkt_cpusync_start() and -- 2.41.0