| Commit | Line | Data |
|---|---|---|
| 984263bc MD |
1 | /* |
| 2 | * Copyright (c) 1996, by Steve Passe | |
| 3 | * All rights reserved. | |
| 4 | * | |
| 5 | * Redistribution and use in source and binary forms, with or without | |
| 6 | * modification, are permitted provided that the following conditions | |
| 7 | * are met: | |
| 8 | * 1. Redistributions of source code must retain the above copyright | |
| 9 | * notice, this list of conditions and the following disclaimer. | |
| 10 | * 2. The name of the developer may NOT be used to endorse or promote products | |
| 11 | * derived from this software without specific prior written permission. | |
| 12 | * | |
| 13 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND | |
| 14 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
| 15 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
| 16 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE | |
| 17 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
| 18 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
| 19 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
| 20 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
| 21 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
| 22 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
| 23 | * SUCH DAMAGE. | |
| 24 | * | |
| 25 | * $FreeBSD: src/sys/i386/i386/mp_machdep.c,v 1.115.2.15 2003/03/14 21:22:35 jhb Exp $ | |
| c0c5de70 | 26 | * $DragonFly: src/sys/platform/pc32/i386/mp_machdep.c,v 1.60 2008/06/07 12:03:52 mneumann Exp $ |
| 984263bc MD |
27 | */ |
| 28 | ||
| 29 | #include "opt_cpu.h" | |
| 984263bc | 30 | |
| 984263bc MD |
31 | #include <sys/param.h> |
| 32 | #include <sys/systm.h> | |
| 33 | #include <sys/kernel.h> | |
| 984263bc MD |
34 | #include <sys/sysctl.h> |
| 35 | #include <sys/malloc.h> | |
| 36 | #include <sys/memrange.h> | |
| 984263bc | 37 | #include <sys/cons.h> /* cngetc() */ |
| 37e7efec | 38 | #include <sys/machintr.h> |
| 984263bc MD |
39 | |
| 40 | #include <vm/vm.h> | |
| 41 | #include <vm/vm_param.h> | |
| 42 | #include <vm/pmap.h> | |
| 43 | #include <vm/vm_kern.h> | |
| 44 | #include <vm/vm_extern.h> | |
| 984263bc MD |
45 | #include <sys/lock.h> |
| 46 | #include <vm/vm_map.h> | |
| 47 | #include <sys/user.h> | |
| 48 | #ifdef GPROF | |
| 49 | #include <sys/gmon.h> | |
| 50 | #endif | |
| 984263bc | 51 | |
| 684a93c4 MD |
52 | #include <sys/mplock2.h> |
| 53 | ||
| 984263bc | 54 | #include <machine/smp.h> |
| a9295349 | 55 | #include <machine_base/apic/apicreg.h> |
| 984263bc MD |
56 | #include <machine/atomic.h> |
| 57 | #include <machine/cpufunc.h> | |
| 90e8a35b | 58 | #include <machine/cputypes.h> |
| ac032dad | 59 | #include <machine_base/icu/icu_var.h> |
| e0918665 | 60 | #include <machine_base/apic/ioapic_abi.h> |
| 3340ac41 | 61 | #include <machine_base/apic/lapic.h> |
| 4298586a | 62 | #include <machine_base/apic/ioapic.h> |
| 984263bc MD |
63 | #include <machine/psl.h> |
| 64 | #include <machine/segments.h> | |
| 984263bc MD |
65 | #include <machine/tss.h> |
| 66 | #include <machine/specialreg.h> | |
| 67 | #include <machine/globaldata.h> | |
| 4117f2fd | 68 | #include <machine/pmap_inval.h> |
| 984263bc | 69 | |
| 984263bc | 70 | #include <machine/md_var.h> /* setidt() */ |
| 87cf6827 SZ |
71 | #include <machine_base/icu/icu.h> /* IPIs */ |
| 72 | #include <machine/intr_machdep.h> /* IPIs */ | |
| 984263bc | 73 | |
| 984263bc MD |
74 | #define WARMBOOT_TARGET 0 |
| 75 | #define WARMBOOT_OFF (KERNBASE + 0x0467) | |
| 76 | #define WARMBOOT_SEG (KERNBASE + 0x0469) | |
| 77 | ||
| 984263bc MD |
78 | #define CMOS_REG (0x70) |
| 79 | #define CMOS_DATA (0x71) | |
| 80 | #define BIOS_RESET (0x0f) | |
| 81 | #define BIOS_WARM (0x0a) | |
| 82 | ||
| 984263bc MD |
83 | /* |
| 84 | * this code MUST be enabled here and in mpboot.s. | |
| 85 | * it follows the very early stages of AP boot by placing values in CMOS ram. | |
| 86 | * it NORMALLY will never be needed and thus the primitive method for enabling. | |
| 87 | * | |
| 984263bc | 88 | */ |
| 7d34994c | 89 | #if defined(CHECK_POINTS) |
| 984263bc MD |
90 | #define CHECK_READ(A) (outb(CMOS_REG, (A)), inb(CMOS_DATA)) |
| 91 | #define CHECK_WRITE(A,D) (outb(CMOS_REG, (A)), outb(CMOS_DATA, (D))) | |
| 92 | ||
| 93 | #define CHECK_INIT(D); \ | |
| 94 | CHECK_WRITE(0x34, (D)); \ | |
| 95 | CHECK_WRITE(0x35, (D)); \ | |
| 96 | CHECK_WRITE(0x36, (D)); \ | |
| 97 | CHECK_WRITE(0x37, (D)); \ | |
| 98 | CHECK_WRITE(0x38, (D)); \ | |
| 99 | CHECK_WRITE(0x39, (D)); | |
| 100 | ||
| 101 | #define CHECK_PRINT(S); \ | |
| 26be20a0 | 102 | kprintf("%s: %d, %d, %d, %d, %d, %d\n", \ |
| 984263bc MD |
103 | (S), \ |
| 104 | CHECK_READ(0x34), \ | |
| 105 | CHECK_READ(0x35), \ | |
| 106 | CHECK_READ(0x36), \ | |
| 107 | CHECK_READ(0x37), \ | |
| 108 | CHECK_READ(0x38), \ | |
| 109 | CHECK_READ(0x39)); | |
| 110 | ||
| 111 | #else /* CHECK_POINTS */ | |
| 112 | ||
| 113 | #define CHECK_INIT(D) | |
| 114 | #define CHECK_PRINT(S) | |
| 115 | ||
| 116 | #endif /* CHECK_POINTS */ | |
| 117 | ||
| 118 | /* | |
| 119 | * Values to send to the POST hardware. | |
| 120 | */ | |
| 121 | #define MP_BOOTADDRESS_POST 0x10 | |
| 122 | #define MP_PROBE_POST 0x11 | |
| 123 | #define MPTABLE_PASS1_POST 0x12 | |
| 124 | ||
| 125 | #define MP_START_POST 0x13 | |
| 126 | #define MP_ENABLE_POST 0x14 | |
| 127 | #define MPTABLE_PASS2_POST 0x15 | |
| 128 | ||
| 129 | #define START_ALL_APS_POST 0x16 | |
| 130 | #define INSTALL_AP_TRAMP_POST 0x17 | |
| 131 | #define START_AP_POST 0x18 | |
| 132 | ||
| 133 | #define MP_ANNOUNCE_POST 0x19 | |
| 134 | ||
| 984263bc MD |
135 | /** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */ |
| 136 | int current_postcode; | |
| 137 | ||
| 138 | /** XXX FIXME: what system files declare these??? */ | |
| 139 | extern struct region_descriptor r_gdt, r_idt; | |
| 140 | ||
| 2abaa030 SZ |
141 | extern int nkpt; |
| 142 | extern int naps; | |
| 984263bc | 143 | |
| 374133e3 | 144 | int64_t tsc0_offset; |
| 0b698dca | 145 | extern int64_t tsc_offsets[]; |
| 984263bc | 146 | |
| 984263bc MD |
147 | /* AP uses this during bootstrap. Do not staticize. */ |
| 148 | char *bootSTK; | |
| 149 | static int bootAP; | |
| 150 | ||
| 151 | /* Hotwire a 0->4MB V==P mapping */ | |
| 152 | extern pt_entry_t *KPTphys; | |
| 153 | ||
| f13b5eec MD |
154 | /* |
| 155 | * SMP page table page. Setup by locore to point to a page table | |
| 156 | * page from which we allocate per-cpu privatespace areas io_apics, | |
| 157 | * and so forth. | |
| 158 | */ | |
| 984263bc MD |
159 | extern pt_entry_t *SMPpt; |
| 160 | ||
| 161 | struct pcb stoppcbs[MAXCPU]; | |
| 162 | ||
| 984263bc MD |
163 | /* |
| 164 | * Local data and functions. | |
| 165 | */ | |
| 166 | ||
| 984263bc | 167 | static u_int boot_address; |
| 41a01a4d | 168 | static int mp_finish; |
| 52596b13 | 169 | static int mp_finish_lapic; |
| 984263bc | 170 | |
| 984263bc MD |
171 | static void mp_enable(u_int boot_addr); |
| 172 | ||
| 984263bc MD |
173 | static int start_all_aps(u_int boot_addr); |
| 174 | static void install_ap_tramp(u_int boot_addr); | |
| bb467734 MD |
175 | static int start_ap(struct mdglobaldata *gd, u_int boot_addr, int smibest); |
| 176 | static int smitest(void); | |
| ac032dad | 177 | static void cpu_simple_setup(void); |
| 984263bc | 178 | |
| 41a01a4d | 179 | static cpumask_t smp_startup_mask = 1; /* which cpus have been started */ |
| 52596b13 | 180 | static cpumask_t smp_lapic_mask = 1; /* which cpus have lapic been inited */ |
| 0f7a3396 MD |
181 | cpumask_t smp_active_mask = 1; /* which cpus are ready for IPIs etc? */ |
| 182 | SYSCTL_INT(_machdep, OID_AUTO, smp_active, CTLFLAG_RD, &smp_active_mask, 0, ""); | |
| 183 | ||
| 984263bc MD |
184 | /* |
| 185 | * Calculate usable address in base memory for AP trampoline code. | |
| 186 | */ | |
| 187 | u_int | |
| 188 | mp_bootaddress(u_int basemem) | |
| 189 | { | |
| 190 | POSTCODE(MP_BOOTADDRESS_POST); | |
| 191 | ||
| ed721f0f SZ |
192 | boot_address = basemem & ~0xfff; /* round down to 4k boundary */ |
| 193 | if ((basemem - boot_address) < bootMP_size) | |
| 984263bc MD |
194 | boot_address -= 4096; /* not enough, lower by 4k */ |
| 195 | ||
| 196 | return boot_address; | |
| 197 | } | |
| 198 | ||
| 984263bc MD |
199 | /* |
| 200 | * Startup the SMP processors. | |
| 201 | */ | |
| 202 | void | |
| 203 | mp_start(void) | |
| 204 | { | |
| 205 | POSTCODE(MP_START_POST); | |
| 50bc991e | 206 | mp_enable(boot_address); |
| 984263bc MD |
207 | } |
| 208 | ||
| 209 | ||
| 210 | /* | |
| 211 | * Print various information about the SMP system hardware and setup. | |
| 212 | */ | |
| 213 | void | |
| 214 | mp_announce(void) | |
| 215 | { | |
| 216 | int x; | |
| 217 | ||
| 218 | POSTCODE(MP_ANNOUNCE_POST); | |
| 219 | ||
| 26be20a0 | 220 | kprintf("DragonFly/MP: Multiprocessor motherboard\n"); |
| 2d901d56 | 221 | kprintf(" cpu0 (BSP): apic id: %2d\n", CPUID_TO_APICID(0)); |
| 2abaa030 | 222 | for (x = 1; x <= naps; ++x) |
| 2d901d56 | 223 | kprintf(" cpu%d (AP): apic id: %2d\n", x, CPUID_TO_APICID(x)); |
| 984263bc | 224 | |
| f45bfca0 | 225 | if (!ioapic_enable) |
| 6ac31e9d | 226 | kprintf(" Warning: APIC I/O disabled\n"); |
| 984263bc MD |
227 | } |
| 228 | ||
| 229 | /* | |
| 230 | * AP cpu's call this to sync up protected mode. | |
| 7160572f MD |
231 | * |
| 232 | * WARNING! We must ensure that the cpu is sufficiently initialized to | |
| 233 | * be able to use to the FP for our optimized bzero/bcopy code before | |
| 234 | * we enter more mainstream C code. | |
| a44bdeec MD |
235 | * |
| 236 | * WARNING! %fs is not set up on entry. This routine sets up %fs. | |
| 984263bc MD |
237 | */ |
| 238 | void | |
| 239 | init_secondary(void) | |
| 240 | { | |
| 241 | int gsel_tss; | |
| 242 | int x, myid = bootAP; | |
| 243 | u_int cr0; | |
| 8a8d5d85 | 244 | struct mdglobaldata *md; |
| 0f7a3396 | 245 | struct privatespace *ps; |
| 984263bc | 246 | |
| 0f7a3396 MD |
247 | ps = &CPU_prvspace[myid]; |
| 248 | ||
| 249 | gdt_segs[GPRIV_SEL].ssd_base = (int)ps; | |
| 984263bc | 250 | gdt_segs[GPROC0_SEL].ssd_base = |
| 0f7a3396 MD |
251 | (int) &ps->mdglobaldata.gd_common_tss; |
| 252 | ps->mdglobaldata.mi.gd_prvspace = ps; | |
| 984263bc MD |
253 | |
| 254 | for (x = 0; x < NGDT; x++) { | |
| 255 | ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd); | |
| 256 | } | |
| 257 | ||
| 258 | r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; | |
| 259 | r_gdt.rd_base = (int) &gdt[myid * NGDT]; | |
| 260 | lgdt(&r_gdt); /* does magic intra-segment return */ | |
| 261 | ||
| 262 | lidt(&r_idt); | |
| 263 | ||
| 264 | lldt(_default_ldt); | |
| 7b95be2a | 265 | mdcpu->gd_currentldt = _default_ldt; |
| 984263bc MD |
266 | |
| 267 | gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); | |
| 268 | gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS; | |
| 8a8d5d85 | 269 | |
| 0f7a3396 | 270 | md = mdcpu; /* loaded through %fs:0 (mdglobaldata.mi.gd_prvspace)*/ |
| 8a8d5d85 MD |
271 | |
| 272 | md->gd_common_tss.tss_esp0 = 0; /* not used until after switch */ | |
| 273 | md->gd_common_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL); | |
| 274 | md->gd_common_tss.tss_ioopt = (sizeof md->gd_common_tss) << 16; | |
| 275 | md->gd_tss_gdt = &gdt[myid * NGDT + GPROC0_SEL].sd; | |
| 276 | md->gd_common_tssd = *md->gd_tss_gdt; | |
| 984263bc MD |
277 | ltr(gsel_tss); |
| 278 | ||
| 279 | /* | |
| 280 | * Set to a known state: | |
| 281 | * Set by mpboot.s: CR0_PG, CR0_PE | |
| 282 | * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM | |
| 283 | */ | |
| 284 | cr0 = rcr0(); | |
| 285 | cr0 &= ~(CR0_CD | CR0_NW | CR0_EM); | |
| 286 | load_cr0(cr0); | |
| 7160572f | 287 | pmap_set_opt(); /* PSE/4MB pages, etc */ |
| 984263bc | 288 | |
| 7160572f MD |
289 | /* set up CPU registers and state */ |
| 290 | cpu_setregs(); | |
| 291 | ||
| 292 | /* set up FPU state on the AP */ | |
| 293 | npxinit(__INITIAL_NPXCW__); | |
| 294 | ||
| 295 | /* set up SSE registers */ | |
| 296 | enable_sse(); | |
| 984263bc MD |
297 | } |
| 298 | ||
| 984263bc MD |
299 | /******************************************************************* |
| 300 | * local functions and data | |
| 301 | */ | |
| 302 | ||
| 303 | /* | |
| 304 | * start the SMP system | |
| 305 | */ | |
| 306 | static void | |
| 307 | mp_enable(u_int boot_addr) | |
| 308 | { | |
| ac032dad SZ |
309 | int error; |
| 310 | ||
| 984263bc MD |
311 | POSTCODE(MP_ENABLE_POST); |
| 312 | ||
| 1d6d7089 SZ |
313 | if (lapic_enable) { |
| 314 | error = lapic_config(); | |
| 315 | if (error) | |
| 316 | lapic_enable = 0; | |
| ac032dad | 317 | } |
| 984263bc | 318 | |
| 1d6d7089 SZ |
319 | if (lapic_enable) { |
| 320 | /* Initialize BSP's local APIC */ | |
| 321 | lapic_init(TRUE); | |
| 322 | } else if (ioapic_enable) { | |
| 323 | ioapic_enable = 0; | |
| 324 | icu_reinit_noioapic(); | |
| 325 | } | |
| a40ec003 | 326 | |
| 1d6d7089 SZ |
327 | if (lapic_enable) { |
| 328 | /* start each Application Processor */ | |
| 329 | start_all_aps(boot_addr); | |
| 330 | } else { | |
| 331 | cpu_simple_setup(); | |
| 332 | } | |
| 52596b13 | 333 | |
| f45bfca0 | 334 | if (ioapic_enable) { |
| 1d6d7089 SZ |
335 | KASSERT(lapic_enable, |
| 336 | ("I/O APIC is enabled, but LAPIC is disabled\n")); | |
| 0bccf4f5 SZ |
337 | error = ioapic_config(); |
| 338 | if (error) { | |
| f45bfca0 | 339 | ioapic_enable = 0; |
| 0bccf4f5 SZ |
340 | icu_reinit_noioapic(); |
| 341 | lapic_fixup_noioapic(); | |
| 342 | } | |
| 343 | } | |
| 984263bc MD |
344 | } |
| 345 | ||
| f13b5eec | 346 | /* |
| 984263bc MD |
347 | * start each AP in our list |
| 348 | */ | |
| 349 | static int | |
| 350 | start_all_aps(u_int boot_addr) | |
| 351 | { | |
| b45759e1 MD |
352 | int x, i, pg; |
| 353 | int shift; | |
| bb467734 MD |
354 | int smicount; |
| 355 | int smibest; | |
| 356 | int smilast; | |
| 984263bc MD |
357 | u_char mpbiosreason; |
| 358 | u_long mpbioswarmvec; | |
| 8a8d5d85 | 359 | struct mdglobaldata *gd; |
| 0f7a3396 | 360 | struct privatespace *ps; |
| 984263bc MD |
361 | char *stack; |
| 362 | uintptr_t kptbase; | |
| 363 | ||
| 364 | POSTCODE(START_ALL_APS_POST); | |
| 365 | ||
| 984263bc MD |
366 | /* install the AP 1st level boot code */ |
| 367 | install_ap_tramp(boot_addr); | |
| 368 | ||
| 369 | ||
| 370 | /* save the current value of the warm-start vector */ | |
| 371 | mpbioswarmvec = *((u_long *) WARMBOOT_OFF); | |
| 984263bc MD |
372 | outb(CMOS_REG, BIOS_RESET); |
| 373 | mpbiosreason = inb(CMOS_DATA); | |
| 984263bc | 374 | |
| bb467734 MD |
375 | /* setup a vector to our boot code */ |
| 376 | *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET; | |
| 377 | *((volatile u_short *) WARMBOOT_SEG) = (boot_addr >> 4); | |
| 378 | outb(CMOS_REG, BIOS_RESET); | |
| 379 | outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */ | |
| 380 | ||
| 381 | /* | |
| 382 | * If we have a TSC we can figure out the SMI interrupt rate. | |
| 383 | * The SMI does not necessarily use a constant rate. Spend | |
| 384 | * up to 250ms trying to figure it out. | |
| 385 | */ | |
| 386 | smibest = 0; | |
| 387 | if (cpu_feature & CPUID_TSC) { | |
| 388 | set_apic_timer(275000); | |
| 389 | smilast = read_apic_timer(); | |
| 390 | for (x = 0; x < 20 && read_apic_timer(); ++x) { | |
| 391 | smicount = smitest(); | |
| 392 | if (smibest == 0 || smilast - smicount < smibest) | |
| 393 | smibest = smilast - smicount; | |
| 394 | smilast = smicount; | |
| 395 | } | |
| 396 | if (smibest > 250000) | |
| 397 | smibest = 0; | |
| 398 | if (smibest) { | |
| 399 | smibest = smibest * (int64_t)1000000 / | |
| 400 | get_apic_timer_frequency(); | |
| 401 | } | |
| 402 | } | |
| 403 | if (smibest) | |
| 404 | kprintf("SMI Frequency (worst case): %d Hz (%d us)\n", | |
| 405 | 1000000 / smibest, smibest); | |
| 406 | ||
| 407 | ||
| 984263bc MD |
408 | /* set up temporary P==V mapping for AP boot */ |
| 409 | /* XXX this is a hack, we should boot the AP on its own stack/PTD */ | |
| 410 | kptbase = (uintptr_t)(void *)KPTphys; | |
| a44bdeec | 411 | for (x = 0; x < NKPT; x++) { |
| 984263bc MD |
412 | PTD[x] = (pd_entry_t)(PG_V | PG_RW | |
| 413 | ((kptbase + x * PAGE_SIZE) & PG_FRAME)); | |
| a44bdeec | 414 | } |
| 0f7a3396 | 415 | cpu_invltlb(); |
| 984263bc MD |
416 | |
| 417 | /* start each AP */ | |
| 2abaa030 | 418 | for (x = 1; x <= naps; ++x) { |
| 984263bc MD |
419 | |
| 420 | /* This is a bit verbose, it will go away soon. */ | |
| 421 | ||
| 422 | /* first page of AP's private space */ | |
| 423 | pg = x * i386_btop(sizeof(struct privatespace)); | |
| 424 | ||
| 81c04d07 | 425 | /* allocate new private data page(s) */ |
| e4846942 | 426 | gd = (struct mdglobaldata *)kmem_alloc(&kernel_map, |
| 81c04d07 | 427 | MDGLOBALDATA_BASEALLOC_SIZE); |
| 984263bc | 428 | /* wire it into the private page table page */ |
| 81c04d07 MD |
429 | for (i = 0; i < MDGLOBALDATA_BASEALLOC_SIZE; i += PAGE_SIZE) { |
| 430 | SMPpt[pg + i / PAGE_SIZE] = (pt_entry_t) | |
| 431 | (PG_V | PG_RW | vtophys_pte((char *)gd + i)); | |
| 432 | } | |
| 433 | pg += MDGLOBALDATA_BASEALLOC_PAGES; | |
| 434 | ||
| 435 | SMPpt[pg + 0] = 0; /* *gd_CMAP1 */ | |
| 436 | SMPpt[pg + 1] = 0; /* *gd_CMAP2 */ | |
| 437 | SMPpt[pg + 2] = 0; /* *gd_CMAP3 */ | |
| 438 | SMPpt[pg + 3] = 0; /* *gd_PMAP1 */ | |
| 984263bc MD |
439 | |
| 440 | /* allocate and set up an idle stack data page */ | |
| e4846942 | 441 | stack = (char *)kmem_alloc(&kernel_map, UPAGES*PAGE_SIZE); |
| 8a8d5d85 | 442 | for (i = 0; i < UPAGES; i++) { |
| 81c04d07 | 443 | SMPpt[pg + 4 + i] = (pt_entry_t) |
| b5b32410 | 444 | (PG_V | PG_RW | vtophys_pte(PAGE_SIZE * i + stack)); |
| 8a8d5d85 | 445 | } |
| 984263bc | 446 | |
| 8a8d5d85 MD |
447 | gd = &CPU_prvspace[x].mdglobaldata; /* official location */ |
| 448 | bzero(gd, sizeof(*gd)); | |
| 0f7a3396 | 449 | gd->mi.gd_prvspace = ps = &CPU_prvspace[x]; |
| 8a8d5d85 | 450 | |
| 984263bc | 451 | /* prime data page for it to use */ |
| 8a8d5d85 | 452 | mi_gdinit(&gd->mi, x); |
| 8ad65e08 | 453 | cpu_gdinit(gd, x); |
| 81c04d07 MD |
454 | gd->gd_CMAP1 = &SMPpt[pg + 0]; |
| 455 | gd->gd_CMAP2 = &SMPpt[pg + 1]; | |
| 456 | gd->gd_CMAP3 = &SMPpt[pg + 2]; | |
| 457 | gd->gd_PMAP1 = &SMPpt[pg + 3]; | |
| 0f7a3396 MD |
458 | gd->gd_CADDR1 = ps->CPAGE1; |
| 459 | gd->gd_CADDR2 = ps->CPAGE2; | |
| 460 | gd->gd_CADDR3 = ps->CPAGE3; | |
| 461 | gd->gd_PADDR1 = (unsigned *)ps->PPAGE1; | |
| 9388fcaa MD |
462 | |
| 463 | /* | |
| 464 | * Per-cpu pmap for get_ptbase(). | |
| 465 | */ | |
| 466 | gd->gd_GDADDR1= (unsigned *) | |
| 467 | kmem_alloc_nofault(&kernel_map, SEG_SIZE, SEG_SIZE); | |
| 468 | gd->gd_GDMAP1 = &PTD[(vm_offset_t)gd->gd_GDADDR1 >> PDRSHIFT]; | |
| 469 | ||
| 2abaa030 SZ |
470 | gd->mi.gd_ipiq = (void *)kmem_alloc(&kernel_map, sizeof(lwkt_ipiq) * (naps + 1)); |
| 471 | bzero(gd->mi.gd_ipiq, sizeof(lwkt_ipiq) * (naps + 1)); | |
| 984263bc | 472 | |
| 8a8d5d85 MD |
473 | /* |
| 474 | * Setup the AP boot stack | |
| 475 | */ | |
| 0f7a3396 | 476 | bootSTK = &ps->idlestack[UPAGES*PAGE_SIZE/2]; |
| 984263bc MD |
477 | bootAP = x; |
| 478 | ||
| 479 | /* attempt to start the Application Processor */ | |
| 480 | CHECK_INIT(99); /* setup checkpoints */ | |
| bb467734 | 481 | if (!start_ap(gd, boot_addr, smibest)) { |
| 2d901d56 SZ |
482 | kprintf("AP #%d (PHY# %d) failed!\n", x, |
| 483 | CPUID_TO_APICID(x)); | |
| 984263bc MD |
484 | CHECK_PRINT("trace"); /* show checkpoints */ |
| 485 | /* better panic as the AP may be running loose */ | |
| 26be20a0 | 486 | kprintf("panic y/n? [y] "); |
| 984263bc MD |
487 | if (cngetc() != 'n') |
| 488 | panic("bye-bye"); | |
| 489 | } | |
| 490 | CHECK_PRINT("trace"); /* show checkpoints */ | |
| 984263bc MD |
491 | } |
| 492 | ||
| 0f7a3396 MD |
493 | /* set ncpus to 1 + highest logical cpu. Not all may have come up */ |
| 494 | ncpus = x; | |
| 495 | ||
| b45759e1 MD |
496 | /* ncpus2 -- ncpus rounded down to the nearest power of 2 */ |
| 497 | for (shift = 0; (1 << shift) <= ncpus; ++shift) | |
| 498 | ; | |
| 499 | --shift; | |
| 500 | ncpus2_shift = shift; | |
| 501 | ncpus2 = 1 << shift; | |
| 90100055 JH |
502 | ncpus2_mask = ncpus2 - 1; |
| 503 | ||
| b45759e1 MD |
504 | /* ncpus_fit -- ncpus rounded up to the nearest power of 2 */ |
| 505 | if ((1 << shift) < ncpus) | |
| 506 | ++shift; | |
| 507 | ncpus_fit = 1 << shift; | |
| 508 | ncpus_fit_mask = ncpus_fit - 1; | |
| 509 | ||
| 984263bc | 510 | /* build our map of 'other' CPUs */ |
| da23a592 | 511 | mycpu->gd_other_cpus = smp_startup_mask & ~CPUMASK(mycpu->gd_cpuid); |
| e4846942 | 512 | mycpu->gd_ipiq = (void *)kmem_alloc(&kernel_map, sizeof(lwkt_ipiq) * ncpus); |
| 96728c05 | 513 | bzero(mycpu->gd_ipiq, sizeof(lwkt_ipiq) * ncpus); |
| 984263bc | 514 | |
| 984263bc MD |
515 | /* restore the warmstart vector */ |
| 516 | *(u_long *) WARMBOOT_OFF = mpbioswarmvec; | |
| 984263bc MD |
517 | outb(CMOS_REG, BIOS_RESET); |
| 518 | outb(CMOS_DATA, mpbiosreason); | |
| 984263bc MD |
519 | |
| 520 | /* | |
| 8a8d5d85 MD |
521 | * NOTE! The idlestack for the BSP was setup by locore. Finish |
| 522 | * up, clean out the P==V mapping we did earlier. | |
| 984263bc | 523 | */ |
| 984263bc MD |
524 | for (x = 0; x < NKPT; x++) |
| 525 | PTD[x] = 0; | |
| 526 | pmap_set_opt(); | |
| 527 | ||
| 52596b13 SZ |
528 | /* |
| 529 | * Wait all APs to finish initializing LAPIC | |
| 530 | */ | |
| 531 | mp_finish_lapic = 1; | |
| 532 | if (bootverbose) | |
| 533 | kprintf("SMP: Waiting APs LAPIC initialization\n"); | |
| 534 | if (cpu_feature & CPUID_TSC) | |
| 535 | tsc0_offset = rdtsc(); | |
| 536 | tsc_offsets[0] = 0; | |
| 537 | rel_mplock(); | |
| 538 | while (smp_lapic_mask != smp_startup_mask) { | |
| 539 | cpu_lfence(); | |
| 540 | if (cpu_feature & CPUID_TSC) | |
| 541 | tsc0_offset = rdtsc(); | |
| 542 | } | |
| 543 | while (try_mplock() == 0) | |
| 544 | ; | |
| 545 | ||
| 984263bc | 546 | /* number of APs actually started */ |
| 8a8d5d85 | 547 | return ncpus - 1; |
| 984263bc MD |
548 | } |
| 549 | ||
| 984263bc MD |
550 | /* |
| 551 | * load the 1st level AP boot code into base memory. | |
| 552 | */ | |
| 553 | ||
| 554 | /* targets for relocation */ | |
| 555 | extern void bigJump(void); | |
| 556 | extern void bootCodeSeg(void); | |
| 557 | extern void bootDataSeg(void); | |
| 558 | extern void MPentry(void); | |
| 559 | extern u_int MP_GDT; | |
| 560 | extern u_int mp_gdtbase; | |
| 561 | ||
| 562 | static void | |
| 563 | install_ap_tramp(u_int boot_addr) | |
| 564 | { | |
| 565 | int x; | |
| 566 | int size = *(int *) ((u_long) & bootMP_size); | |
| 567 | u_char *src = (u_char *) ((u_long) bootMP); | |
| 568 | u_char *dst = (u_char *) boot_addr + KERNBASE; | |
| 569 | u_int boot_base = (u_int) bootMP; | |
| 570 | u_int8_t *dst8; | |
| 571 | u_int16_t *dst16; | |
| 572 | u_int32_t *dst32; | |
| 573 | ||
| 574 | POSTCODE(INSTALL_AP_TRAMP_POST); | |
| 575 | ||
| 576 | for (x = 0; x < size; ++x) | |
| 577 | *dst++ = *src++; | |
| 578 | ||
| 579 | /* | |
| 580 | * modify addresses in code we just moved to basemem. unfortunately we | |
| 581 | * need fairly detailed info about mpboot.s for this to work. changes | |
| 582 | * to mpboot.s might require changes here. | |
| 583 | */ | |
| 584 | ||
| 585 | /* boot code is located in KERNEL space */ | |
| 586 | dst = (u_char *) boot_addr + KERNBASE; | |
| 587 | ||
| 588 | /* modify the lgdt arg */ | |
| 589 | dst32 = (u_int32_t *) (dst + ((u_int) & mp_gdtbase - boot_base)); | |
| 590 | *dst32 = boot_addr + ((u_int) & MP_GDT - boot_base); | |
| 591 | ||
| 592 | /* modify the ljmp target for MPentry() */ | |
| 593 | dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1); | |
| 594 | *dst32 = ((u_int) MPentry - KERNBASE); | |
| 595 | ||
| 596 | /* modify the target for boot code segment */ | |
| 597 | dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base)); | |
| 598 | dst8 = (u_int8_t *) (dst16 + 1); | |
| 599 | *dst16 = (u_int) boot_addr & 0xffff; | |
| 600 | *dst8 = ((u_int) boot_addr >> 16) & 0xff; | |
| 601 | ||
| 602 | /* modify the target for boot data segment */ | |
| 603 | dst16 = (u_int16_t *) (dst + ((u_int) bootDataSeg - boot_base)); | |
| 604 | dst8 = (u_int8_t *) (dst16 + 1); | |
| 605 | *dst16 = (u_int) boot_addr & 0xffff; | |
| 606 | *dst8 = ((u_int) boot_addr >> 16) & 0xff; | |
| 607 | } | |
| 608 | ||
| 609 | ||
| 610 | /* | |
| bb467734 | 611 | * This function starts the AP (application processor) identified |
| 984263bc MD |
612 | * by the APIC ID 'physicalCpu'. It does quite a "song and dance" |
| 613 | * to accomplish this. This is necessary because of the nuances | |
| 614 | * of the different hardware we might encounter. It ain't pretty, | |
| 615 | * but it seems to work. | |
| a108bf71 MD |
616 | * |
| 617 | * NOTE: eventually an AP gets to ap_init(), which is called just | |
| 618 | * before the AP goes into the LWKT scheduler's idle loop. | |
| 984263bc MD |
619 | */ |
| 620 | static int | |
| bb467734 | 621 | start_ap(struct mdglobaldata *gd, u_int boot_addr, int smibest) |
| 984263bc MD |
622 | { |
| 623 | int physical_cpu; | |
| 624 | int vector; | |
| 984263bc MD |
625 | u_long icr_lo, icr_hi; |
| 626 | ||
| 627 | POSTCODE(START_AP_POST); | |
| 628 | ||
| 629 | /* get the PHYSICAL APIC ID# */ | |
| 2d901d56 | 630 | physical_cpu = CPUID_TO_APICID(gd->mi.gd_cpuid); |
| 984263bc MD |
631 | |
| 632 | /* calculate the vector */ | |
| 633 | vector = (boot_addr >> 12) & 0xff; | |
| 634 | ||
| bb467734 MD |
635 | /* We don't want anything interfering */ |
| 636 | cpu_disable_intr(); | |
| 637 | ||
| 8a8d5d85 MD |
638 | /* Make sure the target cpu sees everything */ |
| 639 | wbinvd(); | |
| 984263bc MD |
640 | |
| 641 | /* | |
| bb467734 MD |
642 | * Try to detect when a SMI has occurred, wait up to 200ms. |
| 643 | * | |
| 644 | * If a SMI occurs during an AP reset but before we issue | |
| 645 | * the STARTUP command, the AP may brick. To work around | |
| 646 | * this problem we hold off doing the AP startup until | |
| 647 | * after we have detected the SMI. Hopefully another SMI | |
| 648 | * will not occur before we finish the AP startup. | |
| 649 | * | |
| 650 | * Retries don't seem to help. SMIs have a window of opportunity | |
| 651 | * and if USB->legacy keyboard emulation is enabled in the BIOS | |
| 652 | * the interrupt rate can be quite high. | |
| 653 | * | |
| 654 | * NOTE: Don't worry about the L1 cache load, it might bloat | |
| 655 | * ldelta a little but ndelta will be so huge when the SMI | |
| 656 | * occurs the detection logic will still work fine. | |
| 657 | */ | |
| 658 | if (smibest) { | |
| 659 | set_apic_timer(200000); | |
| 660 | smitest(); | |
| 661 | } | |
| 662 | ||
| 663 | /* | |
| 984263bc MD |
664 | * first we do an INIT/RESET IPI this INIT IPI might be run, reseting |
| 665 | * and running the target CPU. OR this INIT IPI might be latched (P5 | |
| 666 | * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be | |
| 667 | * ignored. | |
| bb467734 MD |
668 | * |
| 669 | * see apic/apicreg.h for icr bit definitions. | |
| 670 | * | |
| 671 | * TIME CRITICAL CODE, DO NOT DO ANY KPRINTFS IN THE HOT PATH. | |
| 984263bc MD |
672 | */ |
| 673 | ||
| bb467734 MD |
674 | /* |
| 675 | * Setup the address for the target AP. We can setup | |
| 676 | * icr_hi once and then just trigger operations with | |
| 677 | * icr_lo. | |
| 678 | */ | |
| cb7d6921 | 679 | icr_hi = lapic->icr_hi & ~APIC_ID_MASK; |
| 984263bc | 680 | icr_hi |= (physical_cpu << 24); |
| cb7d6921 SZ |
681 | icr_lo = lapic->icr_lo & 0xfff00000; |
| 682 | lapic->icr_hi = icr_hi; | |
| 984263bc | 683 | |
| bb467734 MD |
684 | /* |
| 685 | * Do an INIT IPI: assert RESET | |
| 686 | * | |
| 687 | * Use edge triggered mode to assert INIT | |
| 688 | */ | |
| cb7d6921 SZ |
689 | lapic->icr_lo = icr_lo | 0x0000c500; |
| 690 | while (lapic->icr_lo & APIC_DELSTAT_MASK) | |
| 984263bc MD |
691 | /* spin */ ; |
| 692 | ||
| bb467734 MD |
693 | /* |
| 694 | * The spec calls for a 10ms delay but we may have to use a | |
| 695 | * MUCH lower delay to avoid bricking an AP due to a fast SMI | |
| 696 | * interrupt. We have other loops here too and dividing by 2 | |
| 697 | * doesn't seem to be enough even after subtracting 350us, | |
| 698 | * so we divide by 4. | |
| 699 | * | |
| 700 | * Our minimum delay is 150uS, maximum is 10ms. If no SMI | |
| 701 | * interrupt was detected we use the full 10ms. | |
| 702 | */ | |
| 703 | if (smibest == 0) | |
| 704 | u_sleep(10000); | |
| 705 | else if (smibest < 150 * 4 + 350) | |
| 706 | u_sleep(150); | |
| 707 | else if ((smibest - 350) / 4 < 10000) | |
| 708 | u_sleep((smibest - 350) / 4); | |
| 709 | else | |
| 710 | u_sleep(10000); | |
| 984263bc | 711 | |
| bb467734 MD |
712 | /* |
| 713 | * Do an INIT IPI: deassert RESET | |
| 714 | * | |
| 715 | * Use level triggered mode to deassert. It is unclear | |
| 716 | * why we need to do this. | |
| 717 | */ | |
| cb7d6921 SZ |
718 | lapic->icr_lo = icr_lo | 0x00008500; |
| 719 | while (lapic->icr_lo & APIC_DELSTAT_MASK) | |
| 984263bc | 720 | /* spin */ ; |
| bb467734 | 721 | u_sleep(150); /* wait 150us */ |
| 984263bc MD |
722 | |
| 723 | /* | |
| bb467734 | 724 | * Next we do a STARTUP IPI: the previous INIT IPI might still be |
| 984263bc MD |
725 | * latched, (P5 bug) this 1st STARTUP would then terminate |
| 726 | * immediately, and the previously started INIT IPI would continue. OR | |
| 727 | * the previous INIT IPI has already run. and this STARTUP IPI will | |
| 728 | * run. OR the previous INIT IPI was ignored. and this STARTUP IPI | |
| 729 | * will run. | |
| 730 | */ | |
| cb7d6921 SZ |
731 | lapic->icr_lo = icr_lo | 0x00000600 | vector; |
| 732 | while (lapic->icr_lo & APIC_DELSTAT_MASK) | |
| 984263bc MD |
733 | /* spin */ ; |
| 734 | u_sleep(200); /* wait ~200uS */ | |
| 735 | ||
| 736 | /* | |
| bb467734 | 737 | * Finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF |
| 984263bc MD |
738 | * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR |
| 739 | * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is | |
| 740 | * recognized after hardware RESET or INIT IPI. | |
| 741 | */ | |
| cb7d6921 SZ |
742 | lapic->icr_lo = icr_lo | 0x00000600 | vector; |
| 743 | while (lapic->icr_lo & APIC_DELSTAT_MASK) | |
| 984263bc | 744 | /* spin */ ; |
| bb467734 MD |
745 | |
| 746 | /* Resume normal operation */ | |
| 747 | cpu_enable_intr(); | |
| 984263bc | 748 | |
| 8a8d5d85 | 749 | /* wait for it to start, see ap_init() */ |
| 984263bc | 750 | set_apic_timer(5000000);/* == 5 seconds */ |
| 8a8d5d85 | 751 | while (read_apic_timer()) { |
| da23a592 | 752 | if (smp_startup_mask & CPUMASK(gd->mi.gd_cpuid)) |
| 984263bc | 753 | return 1; /* return SUCCESS */ |
| 8a8d5d85 | 754 | } |
| bb467734 | 755 | |
| 984263bc MD |
756 | return 0; /* return FAILURE */ |
| 757 | } | |
| 758 | ||
| bb467734 MD |
759 | static |
| 760 | int | |
| 761 | smitest(void) | |
| 762 | { | |
| 763 | int64_t ltsc; | |
| 764 | int64_t ntsc; | |
| 765 | int64_t ldelta; | |
| 766 | int64_t ndelta; | |
| 767 | int count; | |
| 768 | ||
| 769 | ldelta = 0; | |
| 770 | ndelta = 0; | |
| 771 | while (read_apic_timer()) { | |
| 772 | ltsc = rdtsc(); | |
| 773 | for (count = 0; count < 100; ++count) | |
| 774 | ntsc = rdtsc(); /* force loop to occur */ | |
| 775 | if (ldelta) { | |
| 776 | ndelta = ntsc - ltsc; | |
| 777 | if (ldelta > ndelta) | |
| 778 | ldelta = ndelta; | |
| 779 | if (ndelta > ldelta * 2) | |
| 780 | break; | |
| 781 | } else { | |
| 782 | ldelta = ntsc - ltsc; | |
| 783 | } | |
| 784 | } | |
| 785 | return(read_apic_timer()); | |
| 786 | } | |
| 984263bc MD |
787 | |
| 788 | /* | |
| 0f7a3396 | 789 | * Lazy flush the TLB on all other CPU's. DEPRECATED. |
| 984263bc | 790 | * |
| 0f7a3396 MD |
791 | * If for some reason we were unable to start all cpus we cannot safely |
| 792 | * use broadcast IPIs. | |
| 984263bc | 793 | */ |
| 7d4d6fdb MD |
794 | |
| 795 | static cpumask_t smp_invltlb_req; | |
| b4b1a37a | 796 | #define SMP_INVLTLB_DEBUG |
| 7d4d6fdb | 797 | |
| 984263bc MD |
798 | void |
| 799 | smp_invltlb(void) | |
| 800 | { | |
| 97359a5b | 801 | #ifdef SMP |
| 7d4d6fdb | 802 | struct mdglobaldata *md = mdcpu; |
| 2d910aaf MD |
803 | #ifdef SMP_INVLTLB_DEBUG |
| 804 | long count = 0; | |
| 805 | long xcount = 0; | |
| 806 | #endif | |
| 4117f2fd | 807 | |
| 7d4d6fdb MD |
808 | crit_enter_gd(&md->mi); |
| 809 | md->gd_invltlb_ret = 0; | |
| 810 | ++md->mi.gd_cnt.v_smpinvltlb; | |
| da23a592 | 811 | atomic_set_cpumask(&smp_invltlb_req, md->mi.gd_cpumask); |
| 2d910aaf MD |
812 | #ifdef SMP_INVLTLB_DEBUG |
| 813 | again: | |
| 814 | #endif | |
| 0f7a3396 | 815 | if (smp_startup_mask == smp_active_mask) { |
| 984263bc | 816 | all_but_self_ipi(XINVLTLB_OFFSET); |
| 0f7a3396 | 817 | } else { |
| 7d4d6fdb MD |
818 | selected_apic_ipi(smp_active_mask & ~md->mi.gd_cpumask, |
| 819 | XINVLTLB_OFFSET, APIC_DELMODE_FIXED); | |
| 0f7a3396 | 820 | } |
| 2d910aaf MD |
821 | |
| 822 | #ifdef SMP_INVLTLB_DEBUG | |
| 823 | if (xcount) | |
| 824 | kprintf("smp_invltlb: ipi sent\n"); | |
| 825 | #endif | |
| 7d4d6fdb MD |
826 | while ((md->gd_invltlb_ret & smp_active_mask & ~md->mi.gd_cpumask) != |
| 827 | (smp_active_mask & ~md->mi.gd_cpumask)) { | |
| 828 | cpu_mfence(); | |
| 829 | cpu_pause(); | |
| 2d910aaf MD |
830 | #ifdef SMP_INVLTLB_DEBUG |
| 831 | /* DEBUGGING */ | |
| 832 | if (++count == 400000000) { | |
| 833 | print_backtrace(-1); | |
| 834 | kprintf("smp_invltlb: endless loop %08lx %08lx, " | |
| 835 | "rflags %016lx retry", | |
| 836 | (long)md->gd_invltlb_ret, | |
| 837 | (long)smp_invltlb_req, | |
| 838 | (long)read_eflags()); | |
| 839 | __asm __volatile ("sti"); | |
| 840 | ++xcount; | |
| 841 | if (xcount > 2) | |
| 842 | lwkt_process_ipiq(); | |
| 843 | if (xcount > 3) { | |
| da23a592 MD |
844 | int bcpu = BSFCPUMASK(~md->gd_invltlb_ret & |
| 845 | ~md->mi.gd_cpumask & | |
| 846 | smp_active_mask); | |
| 2d910aaf MD |
847 | globaldata_t xgd; |
| 848 | kprintf("bcpu %d\n", bcpu); | |
| 849 | xgd = globaldata_find(bcpu); | |
| 850 | kprintf("thread %p %s\n", xgd->gd_curthread, xgd->gd_curthread->td_comm); | |
| 851 | } | |
| 852 | if (xcount > 5) | |
| 853 | panic("giving up"); | |
| 854 | count = 0; | |
| 855 | goto again; | |
| 856 | } | |
| 857 | #endif | |
| 7d4d6fdb | 858 | } |
| da23a592 | 859 | atomic_clear_cpumask(&smp_invltlb_req, md->mi.gd_cpumask); |
| 7d4d6fdb | 860 | crit_exit_gd(&md->mi); |
| 4117f2fd | 861 | #endif |
| 984263bc MD |
862 | } |
| 863 | ||
| 7d4d6fdb MD |
864 | #ifdef SMP |
| 865 | ||
| 866 | /* | |
| 867 | * Called from Xinvltlb assembly with interrupts disabled. We didn't | |
| 868 | * bother to bump the critical section count or nested interrupt count | |
| 869 | * so only do very low level operations here. | |
| 870 | */ | |
| 871 | void | |
| 872 | smp_invltlb_intr(void) | |
| 873 | { | |
| 874 | struct mdglobaldata *md = mdcpu; | |
| 875 | struct mdglobaldata *omd; | |
| 876 | cpumask_t mask; | |
| 877 | int cpu; | |
| 878 | ||
| 879 | mask = smp_invltlb_req; | |
| 880 | cpu_mfence(); | |
| 881 | cpu_invltlb(); | |
| 882 | while (mask) { | |
| da23a592 MD |
883 | cpu = BSFCPUMASK(mask); |
| 884 | mask &= ~CPUMASK(cpu); | |
| 7d4d6fdb | 885 | omd = (struct mdglobaldata *)globaldata_find(cpu); |
| da23a592 | 886 | atomic_set_cpumask(&omd->gd_invltlb_ret, md->mi.gd_cpumask); |
| 7d4d6fdb MD |
887 | } |
| 888 | } | |
| 889 | ||
| 890 | #endif | |
| 891 | ||
| 984263bc MD |
892 | /* |
| 893 | * When called the executing CPU will send an IPI to all other CPUs | |
| 894 | * requesting that they halt execution. | |
| 895 | * | |
| 896 | * Usually (but not necessarily) called with 'other_cpus' as its arg. | |
| 897 | * | |
| 898 | * - Signals all CPUs in map to stop. | |
| 899 | * - Waits for each to stop. | |
| 900 | * | |
| 901 | * Returns: | |
| 902 | * -1: error | |
| 903 | * 0: NA | |
| 904 | * 1: ok | |
| 905 | * | |
| 906 | * XXX FIXME: this is not MP-safe, needs a lock to prevent multiple CPUs | |
| 907 | * from executing at same time. | |
| 908 | */ | |
| 909 | int | |
| da23a592 | 910 | stop_cpus(cpumask_t map) |
| 984263bc | 911 | { |
| 0f7a3396 | 912 | map &= smp_active_mask; |
| 984263bc MD |
913 | |
| 914 | /* send the Xcpustop IPI to all CPUs in map */ | |
| 915 | selected_apic_ipi(map, XCPUSTOP_OFFSET, APIC_DELMODE_FIXED); | |
| 916 | ||
| 917 | while ((stopped_cpus & map) != map) | |
| 918 | /* spin */ ; | |
| 919 | ||
| 920 | return 1; | |
| 921 | } | |
| 922 | ||
| 923 | ||
| 924 | /* | |
| 925 | * Called by a CPU to restart stopped CPUs. | |
| 926 | * | |
| 927 | * Usually (but not necessarily) called with 'stopped_cpus' as its arg. | |
| 928 | * | |
| 929 | * - Signals all CPUs in map to restart. | |
| 930 | * - Waits for each to restart. | |
| 931 | * | |
| 932 | * Returns: | |
| 933 | * -1: error | |
| 934 | * 0: NA | |
| 935 | * 1: ok | |
| 936 | */ | |
| 937 | int | |
| da23a592 | 938 | restart_cpus(cpumask_t map) |
| 984263bc | 939 | { |
| 0f7a3396 MD |
940 | /* signal other cpus to restart */ |
| 941 | started_cpus = map & smp_active_mask; | |
| 984263bc MD |
942 | |
| 943 | while ((stopped_cpus & map) != 0) /* wait for each to clear its bit */ | |
| 944 | /* spin */ ; | |
| 945 | ||
| 946 | return 1; | |
| 947 | } | |
| 948 | ||
| 984263bc | 949 | /* |
| 8a8d5d85 MD |
950 | * This is called once the mpboot code has gotten us properly relocated |
| 951 | * and the MMU turned on, etc. ap_init() is actually the idle thread, | |
| 952 | * and when it returns the scheduler will call the real cpu_idle() main | |
| 953 | * loop for the idlethread. Interrupts are disabled on entry and should | |
| 954 | * remain disabled at return. | |
| 984263bc | 955 | */ |
| 984263bc | 956 | void |
| 8a8d5d85 | 957 | ap_init(void) |
| 984263bc | 958 | { |
| 2d901d56 | 959 | int cpu_id; |
| 984263bc | 960 | |
| 8a8d5d85 | 961 | /* |
| 0f7a3396 MD |
962 | * Adjust smp_startup_mask to signal the BSP that we have started |
| 963 | * up successfully. Note that we do not yet hold the BGL. The BSP | |
| 964 | * is waiting for our signal. | |
| 965 | * | |
| 966 | * We can't set our bit in smp_active_mask yet because we are holding | |
| 967 | * interrupts physically disabled and remote cpus could deadlock | |
| 968 | * trying to send us an IPI. | |
| 8a8d5d85 | 969 | */ |
| da23a592 | 970 | smp_startup_mask |= CPUMASK(mycpu->gd_cpuid); |
| 35238fa5 | 971 | cpu_mfence(); |
| 8a8d5d85 MD |
972 | |
| 973 | /* | |
| 52596b13 SZ |
974 | * Interlock for LAPIC initialization. Wait until mp_finish_lapic is |
| 975 | * non-zero, then get the MP lock. | |
| 41a01a4d MD |
976 | * |
| 977 | * Note: We are in a critical section. | |
| 978 | * | |
| 41a01a4d MD |
979 | * Note: we are the idle thread, we can only spin. |
| 980 | * | |
| 35238fa5 | 981 | * Note: The load fence is memory volatile and prevents the compiler |
| 52596b13 | 982 | * from improperly caching mp_finish_lapic, and the cpu from improperly |
| 35238fa5 | 983 | * caching it. |
| 8a8d5d85 | 984 | */ |
| 52596b13 | 985 | while (mp_finish_lapic == 0) |
| b5d16701 MD |
986 | cpu_lfence(); |
| 987 | while (try_mplock() == 0) | |
| 988 | ; | |
| 8a8d5d85 | 989 | |
| 374133e3 | 990 | if (cpu_feature & CPUID_TSC) { |
| b5d16701 MD |
991 | /* |
| 992 | * The BSP is constantly updating tsc0_offset, figure out | |
| 993 | * the relative difference to synchronize ktrdump. | |
| 994 | */ | |
| 995 | tsc_offsets[mycpu->gd_cpuid] = rdtsc() - tsc0_offset; | |
| 374133e3 MD |
996 | } |
| 997 | ||
| 984263bc MD |
998 | /* BSP may have changed PTD while we're waiting for the lock */ |
| 999 | cpu_invltlb(); | |
| 1000 | ||
| 984263bc MD |
1001 | #if defined(I586_CPU) && !defined(NO_F00F_HACK) |
| 1002 | lidt(&r_idt); | |
| 1003 | #endif | |
| 1004 | ||
| 1005 | /* Build our map of 'other' CPUs. */ | |
| da23a592 | 1006 | mycpu->gd_other_cpus = smp_startup_mask & ~CPUMASK(mycpu->gd_cpuid); |
| 984263bc | 1007 | |
| 984263bc | 1008 | /* A quick check from sanity claus */ |
| 2d901d56 SZ |
1009 | cpu_id = APICID_TO_CPUID((lapic->id & 0xff000000) >> 24); |
| 1010 | if (mycpu->gd_cpuid != cpu_id) { | |
| 1011 | kprintf("SMP: assigned cpuid = %d\n", mycpu->gd_cpuid); | |
| 1012 | kprintf("SMP: actual cpuid = %d\n", cpu_id); | |
| 26be20a0 | 1013 | kprintf("PTD[MPPTDI] = %p\n", (void *)PTD[MPPTDI]); |
| 984263bc MD |
1014 | panic("cpuid mismatch! boom!!"); |
| 1015 | } | |
| 1016 | ||
| b52c8db0 | 1017 | /* Initialize AP's local APIC for irq's */ |
| 5ddeabb9 | 1018 | lapic_init(FALSE); |
| 984263bc | 1019 | |
| 52596b13 SZ |
1020 | /* LAPIC initialization is done */ |
| 1021 | smp_lapic_mask |= CPUMASK(mycpu->gd_cpuid); | |
| 1022 | cpu_mfence(); | |
| 1023 | ||
| 1024 | /* Let BSP move onto the next initialization stage */ | |
| 1025 | rel_mplock(); | |
| 1026 | ||
| 1027 | /* | |
| 1028 | * Interlock for finalization. Wait until mp_finish is non-zero, | |
| 1029 | * then get the MP lock. | |
| 1030 | * | |
| 1031 | * Note: We are in a critical section. | |
| 1032 | * | |
| 1033 | * Note: we are the idle thread, we can only spin. | |
| 1034 | * | |
| 1035 | * Note: The load fence is memory volatile and prevents the compiler | |
| 1036 | * from improperly caching mp_finish, and the cpu from improperly | |
| 1037 | * caching it. | |
| 1038 | */ | |
| 1039 | while (mp_finish == 0) | |
| 1040 | cpu_lfence(); | |
| 1041 | while (try_mplock() == 0) | |
| 1042 | ; | |
| 1043 | ||
| 1044 | /* BSP may have changed PTD while we're waiting for the lock */ | |
| 1045 | cpu_invltlb(); | |
| 1046 | ||
| 984263bc MD |
1047 | /* Set memory range attributes for this CPU to match the BSP */ |
| 1048 | mem_range_AP_init(); | |
| 1049 | ||
| a2a5ad0d | 1050 | /* |
| 4c9f5a7f MD |
1051 | * Once we go active we must process any IPIQ messages that may |
| 1052 | * have been queued, because no actual IPI will occur until we | |
| 1053 | * set our bit in the smp_active_mask. If we don't the IPI | |
| 1054 | * message interlock could be left set which would also prevent | |
| 1055 | * further IPIs. | |
| 1056 | * | |
| 8a8d5d85 MD |
1057 | * The idle loop doesn't expect the BGL to be held and while |
| 1058 | * lwkt_switch() normally cleans things up this is a special case | |
| 1059 | * because we returning almost directly into the idle loop. | |
| 41a01a4d MD |
1060 | * |
| 1061 | * The idle thread is never placed on the runq, make sure | |
| 4c9f5a7f | 1062 | * nothing we've done put it there. |
| 8a8d5d85 | 1063 | */ |
| b5d16701 | 1064 | KKASSERT(get_mplock_count(curthread) == 1); |
| da23a592 | 1065 | smp_active_mask |= CPUMASK(mycpu->gd_cpuid); |
| d19f6edf MD |
1066 | |
| 1067 | /* | |
| 1068 | * Enable interrupts here. idle_restore will also do it, but | |
| 1069 | * doing it here lets us clean up any strays that got posted to | |
| 1070 | * the CPU during the AP boot while we are still in a critical | |
| 1071 | * section. | |
| 1072 | */ | |
| 1073 | __asm __volatile("sti; pause; pause"::); | |
| c263294b | 1074 | bzero(mdcpu->gd_ipending, sizeof(mdcpu->gd_ipending)); |
| d19f6edf | 1075 | |
| 4a19580d | 1076 | initclocks_pcpu(); /* clock interrupts (via IPIs) */ |
| 4c9f5a7f | 1077 | lwkt_process_ipiq(); |
| d19f6edf MD |
1078 | |
| 1079 | /* | |
| 1080 | * Releasing the mp lock lets the BSP finish up the SMP init | |
| 1081 | */ | |
| 96728c05 | 1082 | rel_mplock(); |
| 41a01a4d | 1083 | KKASSERT((curthread->td_flags & TDF_RUNQ) == 0); |
| 984263bc MD |
1084 | } |
| 1085 | ||
| 41a01a4d MD |
1086 | /* |
| 1087 | * Get SMP fully working before we start initializing devices. | |
| 1088 | */ | |
| 1089 | static | |
| 1090 | void | |
| 1091 | ap_finish(void) | |
| 1092 | { | |
| 1093 | mp_finish = 1; | |
| 1094 | if (bootverbose) | |
| 26be20a0 | 1095 | kprintf("Finish MP startup\n"); |
| 41a01a4d | 1096 | rel_mplock(); |
| 52596b13 | 1097 | while (smp_active_mask != smp_startup_mask) |
| 35238fa5 | 1098 | cpu_lfence(); |
| 4da43e1f | 1099 | while (try_mplock() == 0) |
| 41a01a4d MD |
1100 | ; |
| 1101 | if (bootverbose) | |
| 26be20a0 | 1102 | kprintf("Active CPU Mask: %08x\n", smp_active_mask); |
| 41a01a4d MD |
1103 | } |
| 1104 | ||
| ba39e2e0 | 1105 | SYSINIT(finishsmp, SI_BOOT2_FINISH_SMP, SI_ORDER_FIRST, ap_finish, NULL) |
| 41a01a4d | 1106 | |
| 96728c05 MD |
1107 | void |
| 1108 | cpu_send_ipiq(int dcpu) | |
| 1109 | { | |
| da23a592 | 1110 | if (CPUMASK(dcpu) & smp_active_mask) |
| 41a01a4d | 1111 | single_apic_ipi(dcpu, XIPIQ_OFFSET, APIC_DELMODE_FIXED); |
| 96728c05 | 1112 | } |
| 41a01a4d MD |
1113 | |
| 1114 | #if 0 /* single_apic_ipi_passive() not working yet */ | |
| 1115 | /* | |
| 1116 | * Returns 0 on failure, 1 on success | |
| 1117 | */ | |
| 1118 | int | |
| 1119 | cpu_send_ipiq_passive(int dcpu) | |
| 1120 | { | |
| 1121 | int r = 0; | |
| da23a592 | 1122 | if (CPUMASK(dcpu) & smp_active_mask) { |
| 41a01a4d MD |
1123 | r = single_apic_ipi_passive(dcpu, XIPIQ_OFFSET, |
| 1124 | APIC_DELMODE_FIXED); | |
| 1125 | } | |
| 1126 | return(r); | |
| 1127 | } | |
| 1128 | #endif | |
| ac032dad SZ |
1129 | |
| 1130 | static void | |
| 1131 | cpu_simple_setup(void) | |
| 1132 | { | |
| 1133 | /* build our map of 'other' CPUs */ | |
| 1134 | mycpu->gd_other_cpus = smp_startup_mask & ~CPUMASK(mycpu->gd_cpuid); | |
| 1135 | mycpu->gd_ipiq = (void *)kmem_alloc(&kernel_map, sizeof(lwkt_ipiq) * ncpus); | |
| 1136 | bzero(mycpu->gd_ipiq, sizeof(lwkt_ipiq) * ncpus); | |
| 1137 | ||
| 1138 | pmap_set_opt(); | |
| 1139 | ||
| 1140 | if (cpu_feature & CPUID_TSC) | |
| 1141 | tsc0_offset = rdtsc(); | |
| 1142 | } |