| Commit | Line | Data |
|---|---|---|
| 984263bc MD |
1 | /* |
| 2 | * Copyright (c) 1996, by Steve Passe | |
| 3 | * All rights reserved. | |
| 4 | * | |
| 5 | * Redistribution and use in source and binary forms, with or without | |
| 6 | * modification, are permitted provided that the following conditions | |
| 7 | * are met: | |
| 8 | * 1. Redistributions of source code must retain the above copyright | |
| 9 | * notice, this list of conditions and the following disclaimer. | |
| 10 | * 2. The name of the developer may NOT be used to endorse or promote products | |
| 11 | * derived from this software without specific prior written permission. | |
| 12 | * | |
| 13 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND | |
| 14 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
| 15 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
| 16 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE | |
| 17 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
| 18 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
| 19 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
| 20 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
| 21 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
| 22 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
| 23 | * SUCH DAMAGE. | |
| 24 | * | |
| 25 | * $FreeBSD: src/sys/i386/i386/mp_machdep.c,v 1.115.2.15 2003/03/14 21:22:35 jhb Exp $ | |
| c0c5de70 | 26 | * $DragonFly: src/sys/platform/pc32/i386/mp_machdep.c,v 1.60 2008/06/07 12:03:52 mneumann Exp $ |
| 984263bc MD |
27 | */ |
| 28 | ||
| 29 | #include "opt_cpu.h" | |
| 984263bc | 30 | |
| 984263bc MD |
31 | #include <sys/param.h> |
| 32 | #include <sys/systm.h> | |
| 33 | #include <sys/kernel.h> | |
| 984263bc MD |
34 | #include <sys/sysctl.h> |
| 35 | #include <sys/malloc.h> | |
| 36 | #include <sys/memrange.h> | |
| 984263bc | 37 | #include <sys/cons.h> /* cngetc() */ |
| 37e7efec | 38 | #include <sys/machintr.h> |
| 984263bc MD |
39 | |
| 40 | #include <vm/vm.h> | |
| 41 | #include <vm/vm_param.h> | |
| 42 | #include <vm/pmap.h> | |
| 43 | #include <vm/vm_kern.h> | |
| 44 | #include <vm/vm_extern.h> | |
| 984263bc MD |
45 | #include <sys/lock.h> |
| 46 | #include <vm/vm_map.h> | |
| 47 | #include <sys/user.h> | |
| 48 | #ifdef GPROF | |
| 49 | #include <sys/gmon.h> | |
| 50 | #endif | |
| 984263bc | 51 | |
| 684a93c4 MD |
52 | #include <sys/mplock2.h> |
| 53 | ||
| 984263bc | 54 | #include <machine/smp.h> |
| a9295349 | 55 | #include <machine_base/apic/apicreg.h> |
| 984263bc MD |
56 | #include <machine/atomic.h> |
| 57 | #include <machine/cpufunc.h> | |
| 90e8a35b | 58 | #include <machine/cputypes.h> |
| e0918665 | 59 | #include <machine_base/apic/ioapic_abi.h> |
| 3340ac41 | 60 | #include <machine_base/apic/lapic.h> |
| 4298586a | 61 | #include <machine_base/apic/ioapic.h> |
| 984263bc MD |
62 | #include <machine/psl.h> |
| 63 | #include <machine/segments.h> | |
| 984263bc MD |
64 | #include <machine/tss.h> |
| 65 | #include <machine/specialreg.h> | |
| 66 | #include <machine/globaldata.h> | |
| 4117f2fd | 67 | #include <machine/pmap_inval.h> |
| 984263bc | 68 | |
| 984263bc | 69 | #include <machine/md_var.h> /* setidt() */ |
| 87cf6827 SZ |
70 | #include <machine_base/icu/icu.h> /* IPIs */ |
| 71 | #include <machine/intr_machdep.h> /* IPIs */ | |
| 984263bc | 72 | |
| 984263bc MD |
73 | #define WARMBOOT_TARGET 0 |
| 74 | #define WARMBOOT_OFF (KERNBASE + 0x0467) | |
| 75 | #define WARMBOOT_SEG (KERNBASE + 0x0469) | |
| 76 | ||
| 984263bc MD |
77 | #define CMOS_REG (0x70) |
| 78 | #define CMOS_DATA (0x71) | |
| 79 | #define BIOS_RESET (0x0f) | |
| 80 | #define BIOS_WARM (0x0a) | |
| 81 | ||
| 984263bc MD |
82 | /* |
| 83 | * this code MUST be enabled here and in mpboot.s. | |
| 84 | * it follows the very early stages of AP boot by placing values in CMOS ram. | |
| 85 | * it NORMALLY will never be needed and thus the primitive method for enabling. | |
| 86 | * | |
| 984263bc | 87 | */ |
| 7d34994c | 88 | #if defined(CHECK_POINTS) |
| 984263bc MD |
89 | #define CHECK_READ(A) (outb(CMOS_REG, (A)), inb(CMOS_DATA)) |
| 90 | #define CHECK_WRITE(A,D) (outb(CMOS_REG, (A)), outb(CMOS_DATA, (D))) | |
| 91 | ||
| 92 | #define CHECK_INIT(D); \ | |
| 93 | CHECK_WRITE(0x34, (D)); \ | |
| 94 | CHECK_WRITE(0x35, (D)); \ | |
| 95 | CHECK_WRITE(0x36, (D)); \ | |
| 96 | CHECK_WRITE(0x37, (D)); \ | |
| 97 | CHECK_WRITE(0x38, (D)); \ | |
| 98 | CHECK_WRITE(0x39, (D)); | |
| 99 | ||
| 100 | #define CHECK_PRINT(S); \ | |
| 26be20a0 | 101 | kprintf("%s: %d, %d, %d, %d, %d, %d\n", \ |
| 984263bc MD |
102 | (S), \ |
| 103 | CHECK_READ(0x34), \ | |
| 104 | CHECK_READ(0x35), \ | |
| 105 | CHECK_READ(0x36), \ | |
| 106 | CHECK_READ(0x37), \ | |
| 107 | CHECK_READ(0x38), \ | |
| 108 | CHECK_READ(0x39)); | |
| 109 | ||
| 110 | #else /* CHECK_POINTS */ | |
| 111 | ||
| 112 | #define CHECK_INIT(D) | |
| 113 | #define CHECK_PRINT(S) | |
| 114 | ||
| 115 | #endif /* CHECK_POINTS */ | |
| 116 | ||
| 117 | /* | |
| 118 | * Values to send to the POST hardware. | |
| 119 | */ | |
| 120 | #define MP_BOOTADDRESS_POST 0x10 | |
| 121 | #define MP_PROBE_POST 0x11 | |
| 122 | #define MPTABLE_PASS1_POST 0x12 | |
| 123 | ||
| 124 | #define MP_START_POST 0x13 | |
| 125 | #define MP_ENABLE_POST 0x14 | |
| 126 | #define MPTABLE_PASS2_POST 0x15 | |
| 127 | ||
| 128 | #define START_ALL_APS_POST 0x16 | |
| 129 | #define INSTALL_AP_TRAMP_POST 0x17 | |
| 130 | #define START_AP_POST 0x18 | |
| 131 | ||
| 132 | #define MP_ANNOUNCE_POST 0x19 | |
| 133 | ||
| 984263bc MD |
134 | /** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */ |
| 135 | int current_postcode; | |
| 136 | ||
| 137 | /** XXX FIXME: what system files declare these??? */ | |
| 138 | extern struct region_descriptor r_gdt, r_idt; | |
| 139 | ||
| 984263bc | 140 | int mp_naps; /* # of Applications processors */ |
| 984263bc MD |
141 | extern int nkpt; |
| 142 | ||
| 374133e3 | 143 | int64_t tsc0_offset; |
| 0b698dca | 144 | extern int64_t tsc_offsets[]; |
| 984263bc | 145 | |
| 30c5f287 | 146 | #ifdef SMP /* APIC-IO */ |
| 8a8d5d85 | 147 | struct apic_intmapinfo int_to_apicintpin[APIC_INTMAPSIZE]; |
| 97359a5b | 148 | #endif |
| 984263bc | 149 | |
| 984263bc MD |
150 | /* AP uses this during bootstrap. Do not staticize. */ |
| 151 | char *bootSTK; | |
| 152 | static int bootAP; | |
| 153 | ||
| 154 | /* Hotwire a 0->4MB V==P mapping */ | |
| 155 | extern pt_entry_t *KPTphys; | |
| 156 | ||
| f13b5eec MD |
157 | /* |
| 158 | * SMP page table page. Setup by locore to point to a page table | |
| 159 | * page from which we allocate per-cpu privatespace areas io_apics, | |
| 160 | * and so forth. | |
| 161 | */ | |
| 984263bc MD |
162 | extern pt_entry_t *SMPpt; |
| 163 | ||
| 164 | struct pcb stoppcbs[MAXCPU]; | |
| 165 | ||
| 984263bc MD |
166 | /* |
| 167 | * Local data and functions. | |
| 168 | */ | |
| 169 | ||
| 984263bc | 170 | static u_int boot_address; |
| 41a01a4d | 171 | static int mp_finish; |
| 52596b13 | 172 | static int mp_finish_lapic; |
| 984263bc | 173 | |
| 984263bc MD |
174 | static void mp_enable(u_int boot_addr); |
| 175 | ||
| 984263bc MD |
176 | static int start_all_aps(u_int boot_addr); |
| 177 | static void install_ap_tramp(u_int boot_addr); | |
| bb467734 MD |
178 | static int start_ap(struct mdglobaldata *gd, u_int boot_addr, int smibest); |
| 179 | static int smitest(void); | |
| 984263bc | 180 | |
| 41a01a4d | 181 | static cpumask_t smp_startup_mask = 1; /* which cpus have been started */ |
| 52596b13 | 182 | static cpumask_t smp_lapic_mask = 1; /* which cpus have lapic been inited */ |
| 0f7a3396 MD |
183 | cpumask_t smp_active_mask = 1; /* which cpus are ready for IPIs etc? */ |
| 184 | SYSCTL_INT(_machdep, OID_AUTO, smp_active, CTLFLAG_RD, &smp_active_mask, 0, ""); | |
| 185 | ||
| 117ef3aa | 186 | u_int base_memory; |
| 9d758cc4 SZ |
187 | int imcr_present; |
| 188 | ||
| 984263bc MD |
189 | /* |
| 190 | * Calculate usable address in base memory for AP trampoline code. | |
| 191 | */ | |
| 192 | u_int | |
| 193 | mp_bootaddress(u_int basemem) | |
| 194 | { | |
| 195 | POSTCODE(MP_BOOTADDRESS_POST); | |
| 196 | ||
| c0c5de70 | 197 | base_memory = basemem; |
| 984263bc MD |
198 | |
| 199 | boot_address = base_memory & ~0xfff; /* round down to 4k boundary */ | |
| 200 | if ((base_memory - boot_address) < bootMP_size) | |
| 201 | boot_address -= 4096; /* not enough, lower by 4k */ | |
| 202 | ||
| 203 | return boot_address; | |
| 204 | } | |
| 205 | ||
| 984263bc MD |
206 | /* |
| 207 | * Startup the SMP processors. | |
| 208 | */ | |
| 209 | void | |
| 210 | mp_start(void) | |
| 211 | { | |
| 212 | POSTCODE(MP_START_POST); | |
| 50bc991e | 213 | mp_enable(boot_address); |
| 984263bc MD |
214 | } |
| 215 | ||
| 216 | ||
| 217 | /* | |
| 218 | * Print various information about the SMP system hardware and setup. | |
| 219 | */ | |
| 220 | void | |
| 221 | mp_announce(void) | |
| 222 | { | |
| 223 | int x; | |
| 224 | ||
| 225 | POSTCODE(MP_ANNOUNCE_POST); | |
| 226 | ||
| 26be20a0 | 227 | kprintf("DragonFly/MP: Multiprocessor motherboard\n"); |
| fcddbf94 SZ |
228 | kprintf(" cpu0 (BSP): apic id: %2d\n", CPU_TO_ID(0)); |
| 229 | for (x = 1; x <= mp_naps; ++x) | |
| 230 | kprintf(" cpu%d (AP): apic id: %2d\n", x, CPU_TO_ID(x)); | |
| 984263bc | 231 | |
| 6ac31e9d SZ |
232 | if (!apic_io_enable) |
| 233 | kprintf(" Warning: APIC I/O disabled\n"); | |
| 984263bc MD |
234 | } |
| 235 | ||
| 236 | /* | |
| 237 | * AP cpu's call this to sync up protected mode. | |
| 7160572f MD |
238 | * |
| 239 | * WARNING! We must ensure that the cpu is sufficiently initialized to | |
| 240 | * be able to use to the FP for our optimized bzero/bcopy code before | |
| 241 | * we enter more mainstream C code. | |
| a44bdeec MD |
242 | * |
| 243 | * WARNING! %fs is not set up on entry. This routine sets up %fs. | |
| 984263bc MD |
244 | */ |
| 245 | void | |
| 246 | init_secondary(void) | |
| 247 | { | |
| 248 | int gsel_tss; | |
| 249 | int x, myid = bootAP; | |
| 250 | u_int cr0; | |
| 8a8d5d85 | 251 | struct mdglobaldata *md; |
| 0f7a3396 | 252 | struct privatespace *ps; |
| 984263bc | 253 | |
| 0f7a3396 MD |
254 | ps = &CPU_prvspace[myid]; |
| 255 | ||
| 256 | gdt_segs[GPRIV_SEL].ssd_base = (int)ps; | |
| 984263bc | 257 | gdt_segs[GPROC0_SEL].ssd_base = |
| 0f7a3396 MD |
258 | (int) &ps->mdglobaldata.gd_common_tss; |
| 259 | ps->mdglobaldata.mi.gd_prvspace = ps; | |
| 984263bc MD |
260 | |
| 261 | for (x = 0; x < NGDT; x++) { | |
| 262 | ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd); | |
| 263 | } | |
| 264 | ||
| 265 | r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; | |
| 266 | r_gdt.rd_base = (int) &gdt[myid * NGDT]; | |
| 267 | lgdt(&r_gdt); /* does magic intra-segment return */ | |
| 268 | ||
| 269 | lidt(&r_idt); | |
| 270 | ||
| 271 | lldt(_default_ldt); | |
| 7b95be2a | 272 | mdcpu->gd_currentldt = _default_ldt; |
| 984263bc MD |
273 | |
| 274 | gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); | |
| 275 | gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS; | |
| 8a8d5d85 | 276 | |
| 0f7a3396 | 277 | md = mdcpu; /* loaded through %fs:0 (mdglobaldata.mi.gd_prvspace)*/ |
| 8a8d5d85 MD |
278 | |
| 279 | md->gd_common_tss.tss_esp0 = 0; /* not used until after switch */ | |
| 280 | md->gd_common_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL); | |
| 281 | md->gd_common_tss.tss_ioopt = (sizeof md->gd_common_tss) << 16; | |
| 282 | md->gd_tss_gdt = &gdt[myid * NGDT + GPROC0_SEL].sd; | |
| 283 | md->gd_common_tssd = *md->gd_tss_gdt; | |
| 984263bc MD |
284 | ltr(gsel_tss); |
| 285 | ||
| 286 | /* | |
| 287 | * Set to a known state: | |
| 288 | * Set by mpboot.s: CR0_PG, CR0_PE | |
| 289 | * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM | |
| 290 | */ | |
| 291 | cr0 = rcr0(); | |
| 292 | cr0 &= ~(CR0_CD | CR0_NW | CR0_EM); | |
| 293 | load_cr0(cr0); | |
| 7160572f | 294 | pmap_set_opt(); /* PSE/4MB pages, etc */ |
| 984263bc | 295 | |
| 7160572f MD |
296 | /* set up CPU registers and state */ |
| 297 | cpu_setregs(); | |
| 298 | ||
| 299 | /* set up FPU state on the AP */ | |
| 300 | npxinit(__INITIAL_NPXCW__); | |
| 301 | ||
| 302 | /* set up SSE registers */ | |
| 303 | enable_sse(); | |
| 984263bc MD |
304 | } |
| 305 | ||
| 984263bc MD |
306 | /******************************************************************* |
| 307 | * local functions and data | |
| 308 | */ | |
| 309 | ||
| 310 | /* | |
| 311 | * start the SMP system | |
| 312 | */ | |
| 313 | static void | |
| 314 | mp_enable(u_int boot_addr) | |
| 315 | { | |
| 984263bc MD |
316 | POSTCODE(MP_ENABLE_POST); |
| 317 | ||
| 281d9482 | 318 | lapic_config(); |
| 984263bc | 319 | |
| a40ec003 SZ |
320 | /* Initialize BSP's local APIC */ |
| 321 | lapic_init(TRUE); | |
| 322 | ||
| 52596b13 SZ |
323 | /* start each Application Processor */ |
| 324 | start_all_aps(boot_addr); | |
| 325 | ||
| 65b2387f SZ |
326 | if (apic_io_enable) |
| 327 | ioapic_config(); | |
| 328 | ||
| a40ec003 SZ |
329 | /* Finalize PIC */ |
| 330 | MachIntrABI.finalize(); | |
| 984263bc MD |
331 | } |
| 332 | ||
| f13b5eec | 333 | /* |
| 984263bc MD |
334 | * start each AP in our list |
| 335 | */ | |
| 336 | static int | |
| 337 | start_all_aps(u_int boot_addr) | |
| 338 | { | |
| b45759e1 MD |
339 | int x, i, pg; |
| 340 | int shift; | |
| bb467734 MD |
341 | int smicount; |
| 342 | int smibest; | |
| 343 | int smilast; | |
| 984263bc MD |
344 | u_char mpbiosreason; |
| 345 | u_long mpbioswarmvec; | |
| 8a8d5d85 | 346 | struct mdglobaldata *gd; |
| 0f7a3396 | 347 | struct privatespace *ps; |
| 984263bc MD |
348 | char *stack; |
| 349 | uintptr_t kptbase; | |
| 350 | ||
| 351 | POSTCODE(START_ALL_APS_POST); | |
| 352 | ||
| 984263bc MD |
353 | /* install the AP 1st level boot code */ |
| 354 | install_ap_tramp(boot_addr); | |
| 355 | ||
| 356 | ||
| 357 | /* save the current value of the warm-start vector */ | |
| 358 | mpbioswarmvec = *((u_long *) WARMBOOT_OFF); | |
| 984263bc MD |
359 | outb(CMOS_REG, BIOS_RESET); |
| 360 | mpbiosreason = inb(CMOS_DATA); | |
| 984263bc | 361 | |
| bb467734 MD |
362 | /* setup a vector to our boot code */ |
| 363 | *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET; | |
| 364 | *((volatile u_short *) WARMBOOT_SEG) = (boot_addr >> 4); | |
| 365 | outb(CMOS_REG, BIOS_RESET); | |
| 366 | outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */ | |
| 367 | ||
| 368 | /* | |
| 369 | * If we have a TSC we can figure out the SMI interrupt rate. | |
| 370 | * The SMI does not necessarily use a constant rate. Spend | |
| 371 | * up to 250ms trying to figure it out. | |
| 372 | */ | |
| 373 | smibest = 0; | |
| 374 | if (cpu_feature & CPUID_TSC) { | |
| 375 | set_apic_timer(275000); | |
| 376 | smilast = read_apic_timer(); | |
| 377 | for (x = 0; x < 20 && read_apic_timer(); ++x) { | |
| 378 | smicount = smitest(); | |
| 379 | if (smibest == 0 || smilast - smicount < smibest) | |
| 380 | smibest = smilast - smicount; | |
| 381 | smilast = smicount; | |
| 382 | } | |
| 383 | if (smibest > 250000) | |
| 384 | smibest = 0; | |
| 385 | if (smibest) { | |
| 386 | smibest = smibest * (int64_t)1000000 / | |
| 387 | get_apic_timer_frequency(); | |
| 388 | } | |
| 389 | } | |
| 390 | if (smibest) | |
| 391 | kprintf("SMI Frequency (worst case): %d Hz (%d us)\n", | |
| 392 | 1000000 / smibest, smibest); | |
| 393 | ||
| 394 | ||
| 984263bc MD |
395 | /* set up temporary P==V mapping for AP boot */ |
| 396 | /* XXX this is a hack, we should boot the AP on its own stack/PTD */ | |
| 397 | kptbase = (uintptr_t)(void *)KPTphys; | |
| a44bdeec | 398 | for (x = 0; x < NKPT; x++) { |
| 984263bc MD |
399 | PTD[x] = (pd_entry_t)(PG_V | PG_RW | |
| 400 | ((kptbase + x * PAGE_SIZE) & PG_FRAME)); | |
| a44bdeec | 401 | } |
| 0f7a3396 | 402 | cpu_invltlb(); |
| 984263bc MD |
403 | |
| 404 | /* start each AP */ | |
| 405 | for (x = 1; x <= mp_naps; ++x) { | |
| 406 | ||
| 407 | /* This is a bit verbose, it will go away soon. */ | |
| 408 | ||
| 409 | /* first page of AP's private space */ | |
| 410 | pg = x * i386_btop(sizeof(struct privatespace)); | |
| 411 | ||
| 81c04d07 | 412 | /* allocate new private data page(s) */ |
| e4846942 | 413 | gd = (struct mdglobaldata *)kmem_alloc(&kernel_map, |
| 81c04d07 | 414 | MDGLOBALDATA_BASEALLOC_SIZE); |
| 984263bc | 415 | /* wire it into the private page table page */ |
| 81c04d07 MD |
416 | for (i = 0; i < MDGLOBALDATA_BASEALLOC_SIZE; i += PAGE_SIZE) { |
| 417 | SMPpt[pg + i / PAGE_SIZE] = (pt_entry_t) | |
| 418 | (PG_V | PG_RW | vtophys_pte((char *)gd + i)); | |
| 419 | } | |
| 420 | pg += MDGLOBALDATA_BASEALLOC_PAGES; | |
| 421 | ||
| 422 | SMPpt[pg + 0] = 0; /* *gd_CMAP1 */ | |
| 423 | SMPpt[pg + 1] = 0; /* *gd_CMAP2 */ | |
| 424 | SMPpt[pg + 2] = 0; /* *gd_CMAP3 */ | |
| 425 | SMPpt[pg + 3] = 0; /* *gd_PMAP1 */ | |
| 984263bc MD |
426 | |
| 427 | /* allocate and set up an idle stack data page */ | |
| e4846942 | 428 | stack = (char *)kmem_alloc(&kernel_map, UPAGES*PAGE_SIZE); |
| 8a8d5d85 | 429 | for (i = 0; i < UPAGES; i++) { |
| 81c04d07 | 430 | SMPpt[pg + 4 + i] = (pt_entry_t) |
| b5b32410 | 431 | (PG_V | PG_RW | vtophys_pte(PAGE_SIZE * i + stack)); |
| 8a8d5d85 | 432 | } |
| 984263bc | 433 | |
| 8a8d5d85 MD |
434 | gd = &CPU_prvspace[x].mdglobaldata; /* official location */ |
| 435 | bzero(gd, sizeof(*gd)); | |
| 0f7a3396 | 436 | gd->mi.gd_prvspace = ps = &CPU_prvspace[x]; |
| 8a8d5d85 | 437 | |
| 984263bc | 438 | /* prime data page for it to use */ |
| 8a8d5d85 | 439 | mi_gdinit(&gd->mi, x); |
| 8ad65e08 | 440 | cpu_gdinit(gd, x); |
| 81c04d07 MD |
441 | gd->gd_CMAP1 = &SMPpt[pg + 0]; |
| 442 | gd->gd_CMAP2 = &SMPpt[pg + 1]; | |
| 443 | gd->gd_CMAP3 = &SMPpt[pg + 2]; | |
| 444 | gd->gd_PMAP1 = &SMPpt[pg + 3]; | |
| 0f7a3396 MD |
445 | gd->gd_CADDR1 = ps->CPAGE1; |
| 446 | gd->gd_CADDR2 = ps->CPAGE2; | |
| 447 | gd->gd_CADDR3 = ps->CPAGE3; | |
| 448 | gd->gd_PADDR1 = (unsigned *)ps->PPAGE1; | |
| 9388fcaa MD |
449 | |
| 450 | /* | |
| 451 | * Per-cpu pmap for get_ptbase(). | |
| 452 | */ | |
| 453 | gd->gd_GDADDR1= (unsigned *) | |
| 454 | kmem_alloc_nofault(&kernel_map, SEG_SIZE, SEG_SIZE); | |
| 455 | gd->gd_GDMAP1 = &PTD[(vm_offset_t)gd->gd_GDADDR1 >> PDRSHIFT]; | |
| 456 | ||
| e4846942 | 457 | gd->mi.gd_ipiq = (void *)kmem_alloc(&kernel_map, sizeof(lwkt_ipiq) * (mp_naps + 1)); |
| 96728c05 | 458 | bzero(gd->mi.gd_ipiq, sizeof(lwkt_ipiq) * (mp_naps + 1)); |
| 984263bc | 459 | |
| 8a8d5d85 MD |
460 | /* |
| 461 | * Setup the AP boot stack | |
| 462 | */ | |
| 0f7a3396 | 463 | bootSTK = &ps->idlestack[UPAGES*PAGE_SIZE/2]; |
| 984263bc MD |
464 | bootAP = x; |
| 465 | ||
| 466 | /* attempt to start the Application Processor */ | |
| 467 | CHECK_INIT(99); /* setup checkpoints */ | |
| bb467734 | 468 | if (!start_ap(gd, boot_addr, smibest)) { |
| 26be20a0 | 469 | kprintf("AP #%d (PHY# %d) failed!\n", x, CPU_TO_ID(x)); |
| 984263bc MD |
470 | CHECK_PRINT("trace"); /* show checkpoints */ |
| 471 | /* better panic as the AP may be running loose */ | |
| 26be20a0 | 472 | kprintf("panic y/n? [y] "); |
| 984263bc MD |
473 | if (cngetc() != 'n') |
| 474 | panic("bye-bye"); | |
| 475 | } | |
| 476 | CHECK_PRINT("trace"); /* show checkpoints */ | |
| 984263bc MD |
477 | } |
| 478 | ||
| 0f7a3396 MD |
479 | /* set ncpus to 1 + highest logical cpu. Not all may have come up */ |
| 480 | ncpus = x; | |
| 481 | ||
| b45759e1 MD |
482 | /* ncpus2 -- ncpus rounded down to the nearest power of 2 */ |
| 483 | for (shift = 0; (1 << shift) <= ncpus; ++shift) | |
| 484 | ; | |
| 485 | --shift; | |
| 486 | ncpus2_shift = shift; | |
| 487 | ncpus2 = 1 << shift; | |
| 90100055 JH |
488 | ncpus2_mask = ncpus2 - 1; |
| 489 | ||
| b45759e1 MD |
490 | /* ncpus_fit -- ncpus rounded up to the nearest power of 2 */ |
| 491 | if ((1 << shift) < ncpus) | |
| 492 | ++shift; | |
| 493 | ncpus_fit = 1 << shift; | |
| 494 | ncpus_fit_mask = ncpus_fit - 1; | |
| 495 | ||
| 984263bc | 496 | /* build our map of 'other' CPUs */ |
| da23a592 | 497 | mycpu->gd_other_cpus = smp_startup_mask & ~CPUMASK(mycpu->gd_cpuid); |
| e4846942 | 498 | mycpu->gd_ipiq = (void *)kmem_alloc(&kernel_map, sizeof(lwkt_ipiq) * ncpus); |
| 96728c05 | 499 | bzero(mycpu->gd_ipiq, sizeof(lwkt_ipiq) * ncpus); |
| 984263bc | 500 | |
| 984263bc MD |
501 | /* restore the warmstart vector */ |
| 502 | *(u_long *) WARMBOOT_OFF = mpbioswarmvec; | |
| 984263bc MD |
503 | outb(CMOS_REG, BIOS_RESET); |
| 504 | outb(CMOS_DATA, mpbiosreason); | |
| 984263bc MD |
505 | |
| 506 | /* | |
| 8a8d5d85 MD |
507 | * NOTE! The idlestack for the BSP was setup by locore. Finish |
| 508 | * up, clean out the P==V mapping we did earlier. | |
| 984263bc | 509 | */ |
| 984263bc MD |
510 | for (x = 0; x < NKPT; x++) |
| 511 | PTD[x] = 0; | |
| 512 | pmap_set_opt(); | |
| 513 | ||
| 52596b13 SZ |
514 | /* |
| 515 | * Wait all APs to finish initializing LAPIC | |
| 516 | */ | |
| 517 | mp_finish_lapic = 1; | |
| 518 | if (bootverbose) | |
| 519 | kprintf("SMP: Waiting APs LAPIC initialization\n"); | |
| 520 | if (cpu_feature & CPUID_TSC) | |
| 521 | tsc0_offset = rdtsc(); | |
| 522 | tsc_offsets[0] = 0; | |
| 523 | rel_mplock(); | |
| 524 | while (smp_lapic_mask != smp_startup_mask) { | |
| 525 | cpu_lfence(); | |
| 526 | if (cpu_feature & CPUID_TSC) | |
| 527 | tsc0_offset = rdtsc(); | |
| 528 | } | |
| 529 | while (try_mplock() == 0) | |
| 530 | ; | |
| 531 | ||
| 984263bc | 532 | /* number of APs actually started */ |
| 8a8d5d85 | 533 | return ncpus - 1; |
| 984263bc MD |
534 | } |
| 535 | ||
| 984263bc MD |
536 | /* |
| 537 | * load the 1st level AP boot code into base memory. | |
| 538 | */ | |
| 539 | ||
| 540 | /* targets for relocation */ | |
| 541 | extern void bigJump(void); | |
| 542 | extern void bootCodeSeg(void); | |
| 543 | extern void bootDataSeg(void); | |
| 544 | extern void MPentry(void); | |
| 545 | extern u_int MP_GDT; | |
| 546 | extern u_int mp_gdtbase; | |
| 547 | ||
| 548 | static void | |
| 549 | install_ap_tramp(u_int boot_addr) | |
| 550 | { | |
| 551 | int x; | |
| 552 | int size = *(int *) ((u_long) & bootMP_size); | |
| 553 | u_char *src = (u_char *) ((u_long) bootMP); | |
| 554 | u_char *dst = (u_char *) boot_addr + KERNBASE; | |
| 555 | u_int boot_base = (u_int) bootMP; | |
| 556 | u_int8_t *dst8; | |
| 557 | u_int16_t *dst16; | |
| 558 | u_int32_t *dst32; | |
| 559 | ||
| 560 | POSTCODE(INSTALL_AP_TRAMP_POST); | |
| 561 | ||
| 562 | for (x = 0; x < size; ++x) | |
| 563 | *dst++ = *src++; | |
| 564 | ||
| 565 | /* | |
| 566 | * modify addresses in code we just moved to basemem. unfortunately we | |
| 567 | * need fairly detailed info about mpboot.s for this to work. changes | |
| 568 | * to mpboot.s might require changes here. | |
| 569 | */ | |
| 570 | ||
| 571 | /* boot code is located in KERNEL space */ | |
| 572 | dst = (u_char *) boot_addr + KERNBASE; | |
| 573 | ||
| 574 | /* modify the lgdt arg */ | |
| 575 | dst32 = (u_int32_t *) (dst + ((u_int) & mp_gdtbase - boot_base)); | |
| 576 | *dst32 = boot_addr + ((u_int) & MP_GDT - boot_base); | |
| 577 | ||
| 578 | /* modify the ljmp target for MPentry() */ | |
| 579 | dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1); | |
| 580 | *dst32 = ((u_int) MPentry - KERNBASE); | |
| 581 | ||
| 582 | /* modify the target for boot code segment */ | |
| 583 | dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base)); | |
| 584 | dst8 = (u_int8_t *) (dst16 + 1); | |
| 585 | *dst16 = (u_int) boot_addr & 0xffff; | |
| 586 | *dst8 = ((u_int) boot_addr >> 16) & 0xff; | |
| 587 | ||
| 588 | /* modify the target for boot data segment */ | |
| 589 | dst16 = (u_int16_t *) (dst + ((u_int) bootDataSeg - boot_base)); | |
| 590 | dst8 = (u_int8_t *) (dst16 + 1); | |
| 591 | *dst16 = (u_int) boot_addr & 0xffff; | |
| 592 | *dst8 = ((u_int) boot_addr >> 16) & 0xff; | |
| 593 | } | |
| 594 | ||
| 595 | ||
| 596 | /* | |
| bb467734 | 597 | * This function starts the AP (application processor) identified |
| 984263bc MD |
598 | * by the APIC ID 'physicalCpu'. It does quite a "song and dance" |
| 599 | * to accomplish this. This is necessary because of the nuances | |
| 600 | * of the different hardware we might encounter. It ain't pretty, | |
| 601 | * but it seems to work. | |
| a108bf71 MD |
602 | * |
| 603 | * NOTE: eventually an AP gets to ap_init(), which is called just | |
| 604 | * before the AP goes into the LWKT scheduler's idle loop. | |
| 984263bc MD |
605 | */ |
| 606 | static int | |
| bb467734 | 607 | start_ap(struct mdglobaldata *gd, u_int boot_addr, int smibest) |
| 984263bc MD |
608 | { |
| 609 | int physical_cpu; | |
| 610 | int vector; | |
| 984263bc MD |
611 | u_long icr_lo, icr_hi; |
| 612 | ||
| 613 | POSTCODE(START_AP_POST); | |
| 614 | ||
| 615 | /* get the PHYSICAL APIC ID# */ | |
| 0f7a3396 | 616 | physical_cpu = CPU_TO_ID(gd->mi.gd_cpuid); |
| 984263bc MD |
617 | |
| 618 | /* calculate the vector */ | |
| 619 | vector = (boot_addr >> 12) & 0xff; | |
| 620 | ||
| bb467734 MD |
621 | /* We don't want anything interfering */ |
| 622 | cpu_disable_intr(); | |
| 623 | ||
| 8a8d5d85 MD |
624 | /* Make sure the target cpu sees everything */ |
| 625 | wbinvd(); | |
| 984263bc MD |
626 | |
| 627 | /* | |
| bb467734 MD |
628 | * Try to detect when a SMI has occurred, wait up to 200ms. |
| 629 | * | |
| 630 | * If a SMI occurs during an AP reset but before we issue | |
| 631 | * the STARTUP command, the AP may brick. To work around | |
| 632 | * this problem we hold off doing the AP startup until | |
| 633 | * after we have detected the SMI. Hopefully another SMI | |
| 634 | * will not occur before we finish the AP startup. | |
| 635 | * | |
| 636 | * Retries don't seem to help. SMIs have a window of opportunity | |
| 637 | * and if USB->legacy keyboard emulation is enabled in the BIOS | |
| 638 | * the interrupt rate can be quite high. | |
| 639 | * | |
| 640 | * NOTE: Don't worry about the L1 cache load, it might bloat | |
| 641 | * ldelta a little but ndelta will be so huge when the SMI | |
| 642 | * occurs the detection logic will still work fine. | |
| 643 | */ | |
| 644 | if (smibest) { | |
| 645 | set_apic_timer(200000); | |
| 646 | smitest(); | |
| 647 | } | |
| 648 | ||
| 649 | /* | |
| 984263bc MD |
650 | * first we do an INIT/RESET IPI this INIT IPI might be run, reseting |
| 651 | * and running the target CPU. OR this INIT IPI might be latched (P5 | |
| 652 | * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be | |
| 653 | * ignored. | |
| bb467734 MD |
654 | * |
| 655 | * see apic/apicreg.h for icr bit definitions. | |
| 656 | * | |
| 657 | * TIME CRITICAL CODE, DO NOT DO ANY KPRINTFS IN THE HOT PATH. | |
| 984263bc MD |
658 | */ |
| 659 | ||
| bb467734 MD |
660 | /* |
| 661 | * Setup the address for the target AP. We can setup | |
| 662 | * icr_hi once and then just trigger operations with | |
| 663 | * icr_lo. | |
| 664 | */ | |
| cb7d6921 | 665 | icr_hi = lapic->icr_hi & ~APIC_ID_MASK; |
| 984263bc | 666 | icr_hi |= (physical_cpu << 24); |
| cb7d6921 SZ |
667 | icr_lo = lapic->icr_lo & 0xfff00000; |
| 668 | lapic->icr_hi = icr_hi; | |
| 984263bc | 669 | |
| bb467734 MD |
670 | /* |
| 671 | * Do an INIT IPI: assert RESET | |
| 672 | * | |
| 673 | * Use edge triggered mode to assert INIT | |
| 674 | */ | |
| cb7d6921 SZ |
675 | lapic->icr_lo = icr_lo | 0x0000c500; |
| 676 | while (lapic->icr_lo & APIC_DELSTAT_MASK) | |
| 984263bc MD |
677 | /* spin */ ; |
| 678 | ||
| bb467734 MD |
679 | /* |
| 680 | * The spec calls for a 10ms delay but we may have to use a | |
| 681 | * MUCH lower delay to avoid bricking an AP due to a fast SMI | |
| 682 | * interrupt. We have other loops here too and dividing by 2 | |
| 683 | * doesn't seem to be enough even after subtracting 350us, | |
| 684 | * so we divide by 4. | |
| 685 | * | |
| 686 | * Our minimum delay is 150uS, maximum is 10ms. If no SMI | |
| 687 | * interrupt was detected we use the full 10ms. | |
| 688 | */ | |
| 689 | if (smibest == 0) | |
| 690 | u_sleep(10000); | |
| 691 | else if (smibest < 150 * 4 + 350) | |
| 692 | u_sleep(150); | |
| 693 | else if ((smibest - 350) / 4 < 10000) | |
| 694 | u_sleep((smibest - 350) / 4); | |
| 695 | else | |
| 696 | u_sleep(10000); | |
| 984263bc | 697 | |
| bb467734 MD |
698 | /* |
| 699 | * Do an INIT IPI: deassert RESET | |
| 700 | * | |
| 701 | * Use level triggered mode to deassert. It is unclear | |
| 702 | * why we need to do this. | |
| 703 | */ | |
| cb7d6921 SZ |
704 | lapic->icr_lo = icr_lo | 0x00008500; |
| 705 | while (lapic->icr_lo & APIC_DELSTAT_MASK) | |
| 984263bc | 706 | /* spin */ ; |
| bb467734 | 707 | u_sleep(150); /* wait 150us */ |
| 984263bc MD |
708 | |
| 709 | /* | |
| bb467734 | 710 | * Next we do a STARTUP IPI: the previous INIT IPI might still be |
| 984263bc MD |
711 | * latched, (P5 bug) this 1st STARTUP would then terminate |
| 712 | * immediately, and the previously started INIT IPI would continue. OR | |
| 713 | * the previous INIT IPI has already run. and this STARTUP IPI will | |
| 714 | * run. OR the previous INIT IPI was ignored. and this STARTUP IPI | |
| 715 | * will run. | |
| 716 | */ | |
| cb7d6921 SZ |
717 | lapic->icr_lo = icr_lo | 0x00000600 | vector; |
| 718 | while (lapic->icr_lo & APIC_DELSTAT_MASK) | |
| 984263bc MD |
719 | /* spin */ ; |
| 720 | u_sleep(200); /* wait ~200uS */ | |
| 721 | ||
| 722 | /* | |
| bb467734 | 723 | * Finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF |
| 984263bc MD |
724 | * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR |
| 725 | * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is | |
| 726 | * recognized after hardware RESET or INIT IPI. | |
| 727 | */ | |
| cb7d6921 SZ |
728 | lapic->icr_lo = icr_lo | 0x00000600 | vector; |
| 729 | while (lapic->icr_lo & APIC_DELSTAT_MASK) | |
| 984263bc | 730 | /* spin */ ; |
| bb467734 MD |
731 | |
| 732 | /* Resume normal operation */ | |
| 733 | cpu_enable_intr(); | |
| 984263bc | 734 | |
| 8a8d5d85 | 735 | /* wait for it to start, see ap_init() */ |
| 984263bc | 736 | set_apic_timer(5000000);/* == 5 seconds */ |
| 8a8d5d85 | 737 | while (read_apic_timer()) { |
| da23a592 | 738 | if (smp_startup_mask & CPUMASK(gd->mi.gd_cpuid)) |
| 984263bc | 739 | return 1; /* return SUCCESS */ |
| 8a8d5d85 | 740 | } |
| bb467734 | 741 | |
| 984263bc MD |
742 | return 0; /* return FAILURE */ |
| 743 | } | |
| 744 | ||
| bb467734 MD |
745 | static |
| 746 | int | |
| 747 | smitest(void) | |
| 748 | { | |
| 749 | int64_t ltsc; | |
| 750 | int64_t ntsc; | |
| 751 | int64_t ldelta; | |
| 752 | int64_t ndelta; | |
| 753 | int count; | |
| 754 | ||
| 755 | ldelta = 0; | |
| 756 | ndelta = 0; | |
| 757 | while (read_apic_timer()) { | |
| 758 | ltsc = rdtsc(); | |
| 759 | for (count = 0; count < 100; ++count) | |
| 760 | ntsc = rdtsc(); /* force loop to occur */ | |
| 761 | if (ldelta) { | |
| 762 | ndelta = ntsc - ltsc; | |
| 763 | if (ldelta > ndelta) | |
| 764 | ldelta = ndelta; | |
| 765 | if (ndelta > ldelta * 2) | |
| 766 | break; | |
| 767 | } else { | |
| 768 | ldelta = ntsc - ltsc; | |
| 769 | } | |
| 770 | } | |
| 771 | return(read_apic_timer()); | |
| 772 | } | |
| 984263bc MD |
773 | |
| 774 | /* | |
| 0f7a3396 | 775 | * Lazy flush the TLB on all other CPU's. DEPRECATED. |
| 984263bc | 776 | * |
| 0f7a3396 MD |
777 | * If for some reason we were unable to start all cpus we cannot safely |
| 778 | * use broadcast IPIs. | |
| 984263bc | 779 | */ |
| 7d4d6fdb MD |
780 | |
| 781 | static cpumask_t smp_invltlb_req; | |
| b4b1a37a | 782 | #define SMP_INVLTLB_DEBUG |
| 7d4d6fdb | 783 | |
| 984263bc MD |
784 | void |
| 785 | smp_invltlb(void) | |
| 786 | { | |
| 97359a5b | 787 | #ifdef SMP |
| 7d4d6fdb | 788 | struct mdglobaldata *md = mdcpu; |
| 2d910aaf MD |
789 | #ifdef SMP_INVLTLB_DEBUG |
| 790 | long count = 0; | |
| 791 | long xcount = 0; | |
| 792 | #endif | |
| 4117f2fd | 793 | |
| 7d4d6fdb MD |
794 | crit_enter_gd(&md->mi); |
| 795 | md->gd_invltlb_ret = 0; | |
| 796 | ++md->mi.gd_cnt.v_smpinvltlb; | |
| da23a592 | 797 | atomic_set_cpumask(&smp_invltlb_req, md->mi.gd_cpumask); |
| 2d910aaf MD |
798 | #ifdef SMP_INVLTLB_DEBUG |
| 799 | again: | |
| 800 | #endif | |
| 0f7a3396 | 801 | if (smp_startup_mask == smp_active_mask) { |
| 984263bc | 802 | all_but_self_ipi(XINVLTLB_OFFSET); |
| 0f7a3396 | 803 | } else { |
| 7d4d6fdb MD |
804 | selected_apic_ipi(smp_active_mask & ~md->mi.gd_cpumask, |
| 805 | XINVLTLB_OFFSET, APIC_DELMODE_FIXED); | |
| 0f7a3396 | 806 | } |
| 2d910aaf MD |
807 | |
| 808 | #ifdef SMP_INVLTLB_DEBUG | |
| 809 | if (xcount) | |
| 810 | kprintf("smp_invltlb: ipi sent\n"); | |
| 811 | #endif | |
| 7d4d6fdb MD |
812 | while ((md->gd_invltlb_ret & smp_active_mask & ~md->mi.gd_cpumask) != |
| 813 | (smp_active_mask & ~md->mi.gd_cpumask)) { | |
| 814 | cpu_mfence(); | |
| 815 | cpu_pause(); | |
| 2d910aaf MD |
816 | #ifdef SMP_INVLTLB_DEBUG |
| 817 | /* DEBUGGING */ | |
| 818 | if (++count == 400000000) { | |
| 819 | print_backtrace(-1); | |
| 820 | kprintf("smp_invltlb: endless loop %08lx %08lx, " | |
| 821 | "rflags %016lx retry", | |
| 822 | (long)md->gd_invltlb_ret, | |
| 823 | (long)smp_invltlb_req, | |
| 824 | (long)read_eflags()); | |
| 825 | __asm __volatile ("sti"); | |
| 826 | ++xcount; | |
| 827 | if (xcount > 2) | |
| 828 | lwkt_process_ipiq(); | |
| 829 | if (xcount > 3) { | |
| da23a592 MD |
830 | int bcpu = BSFCPUMASK(~md->gd_invltlb_ret & |
| 831 | ~md->mi.gd_cpumask & | |
| 832 | smp_active_mask); | |
| 2d910aaf MD |
833 | globaldata_t xgd; |
| 834 | kprintf("bcpu %d\n", bcpu); | |
| 835 | xgd = globaldata_find(bcpu); | |
| 836 | kprintf("thread %p %s\n", xgd->gd_curthread, xgd->gd_curthread->td_comm); | |
| 837 | } | |
| 838 | if (xcount > 5) | |
| 839 | panic("giving up"); | |
| 840 | count = 0; | |
| 841 | goto again; | |
| 842 | } | |
| 843 | #endif | |
| 7d4d6fdb | 844 | } |
| da23a592 | 845 | atomic_clear_cpumask(&smp_invltlb_req, md->mi.gd_cpumask); |
| 7d4d6fdb | 846 | crit_exit_gd(&md->mi); |
| 4117f2fd | 847 | #endif |
| 984263bc MD |
848 | } |
| 849 | ||
| 7d4d6fdb MD |
850 | #ifdef SMP |
| 851 | ||
| 852 | /* | |
| 853 | * Called from Xinvltlb assembly with interrupts disabled. We didn't | |
| 854 | * bother to bump the critical section count or nested interrupt count | |
| 855 | * so only do very low level operations here. | |
| 856 | */ | |
| 857 | void | |
| 858 | smp_invltlb_intr(void) | |
| 859 | { | |
| 860 | struct mdglobaldata *md = mdcpu; | |
| 861 | struct mdglobaldata *omd; | |
| 862 | cpumask_t mask; | |
| 863 | int cpu; | |
| 864 | ||
| 865 | mask = smp_invltlb_req; | |
| 866 | cpu_mfence(); | |
| 867 | cpu_invltlb(); | |
| 868 | while (mask) { | |
| da23a592 MD |
869 | cpu = BSFCPUMASK(mask); |
| 870 | mask &= ~CPUMASK(cpu); | |
| 7d4d6fdb | 871 | omd = (struct mdglobaldata *)globaldata_find(cpu); |
| da23a592 | 872 | atomic_set_cpumask(&omd->gd_invltlb_ret, md->mi.gd_cpumask); |
| 7d4d6fdb MD |
873 | } |
| 874 | } | |
| 875 | ||
| 876 | #endif | |
| 877 | ||
| 984263bc MD |
878 | /* |
| 879 | * When called the executing CPU will send an IPI to all other CPUs | |
| 880 | * requesting that they halt execution. | |
| 881 | * | |
| 882 | * Usually (but not necessarily) called with 'other_cpus' as its arg. | |
| 883 | * | |
| 884 | * - Signals all CPUs in map to stop. | |
| 885 | * - Waits for each to stop. | |
| 886 | * | |
| 887 | * Returns: | |
| 888 | * -1: error | |
| 889 | * 0: NA | |
| 890 | * 1: ok | |
| 891 | * | |
| 892 | * XXX FIXME: this is not MP-safe, needs a lock to prevent multiple CPUs | |
| 893 | * from executing at same time. | |
| 894 | */ | |
| 895 | int | |
| da23a592 | 896 | stop_cpus(cpumask_t map) |
| 984263bc | 897 | { |
| 0f7a3396 | 898 | map &= smp_active_mask; |
| 984263bc MD |
899 | |
| 900 | /* send the Xcpustop IPI to all CPUs in map */ | |
| 901 | selected_apic_ipi(map, XCPUSTOP_OFFSET, APIC_DELMODE_FIXED); | |
| 902 | ||
| 903 | while ((stopped_cpus & map) != map) | |
| 904 | /* spin */ ; | |
| 905 | ||
| 906 | return 1; | |
| 907 | } | |
| 908 | ||
| 909 | ||
| 910 | /* | |
| 911 | * Called by a CPU to restart stopped CPUs. | |
| 912 | * | |
| 913 | * Usually (but not necessarily) called with 'stopped_cpus' as its arg. | |
| 914 | * | |
| 915 | * - Signals all CPUs in map to restart. | |
| 916 | * - Waits for each to restart. | |
| 917 | * | |
| 918 | * Returns: | |
| 919 | * -1: error | |
| 920 | * 0: NA | |
| 921 | * 1: ok | |
| 922 | */ | |
| 923 | int | |
| da23a592 | 924 | restart_cpus(cpumask_t map) |
| 984263bc | 925 | { |
| 0f7a3396 MD |
926 | /* signal other cpus to restart */ |
| 927 | started_cpus = map & smp_active_mask; | |
| 984263bc MD |
928 | |
| 929 | while ((stopped_cpus & map) != 0) /* wait for each to clear its bit */ | |
| 930 | /* spin */ ; | |
| 931 | ||
| 932 | return 1; | |
| 933 | } | |
| 934 | ||
| 984263bc | 935 | /* |
| 8a8d5d85 MD |
936 | * This is called once the mpboot code has gotten us properly relocated |
| 937 | * and the MMU turned on, etc. ap_init() is actually the idle thread, | |
| 938 | * and when it returns the scheduler will call the real cpu_idle() main | |
| 939 | * loop for the idlethread. Interrupts are disabled on entry and should | |
| 940 | * remain disabled at return. | |
| 984263bc | 941 | */ |
| 984263bc | 942 | void |
| 8a8d5d85 | 943 | ap_init(void) |
| 984263bc MD |
944 | { |
| 945 | u_int apic_id; | |
| 946 | ||
| 8a8d5d85 | 947 | /* |
| 0f7a3396 MD |
948 | * Adjust smp_startup_mask to signal the BSP that we have started |
| 949 | * up successfully. Note that we do not yet hold the BGL. The BSP | |
| 950 | * is waiting for our signal. | |
| 951 | * | |
| 952 | * We can't set our bit in smp_active_mask yet because we are holding | |
| 953 | * interrupts physically disabled and remote cpus could deadlock | |
| 954 | * trying to send us an IPI. | |
| 8a8d5d85 | 955 | */ |
| da23a592 | 956 | smp_startup_mask |= CPUMASK(mycpu->gd_cpuid); |
| 35238fa5 | 957 | cpu_mfence(); |
| 8a8d5d85 MD |
958 | |
| 959 | /* | |
| 52596b13 SZ |
960 | * Interlock for LAPIC initialization. Wait until mp_finish_lapic is |
| 961 | * non-zero, then get the MP lock. | |
| 41a01a4d MD |
962 | * |
| 963 | * Note: We are in a critical section. | |
| 964 | * | |
| 41a01a4d MD |
965 | * Note: we are the idle thread, we can only spin. |
| 966 | * | |
| 35238fa5 | 967 | * Note: The load fence is memory volatile and prevents the compiler |
| 52596b13 | 968 | * from improperly caching mp_finish_lapic, and the cpu from improperly |
| 35238fa5 | 969 | * caching it. |
| 8a8d5d85 | 970 | */ |
| 52596b13 | 971 | while (mp_finish_lapic == 0) |
| b5d16701 MD |
972 | cpu_lfence(); |
| 973 | while (try_mplock() == 0) | |
| 974 | ; | |
| 8a8d5d85 | 975 | |
| 374133e3 | 976 | if (cpu_feature & CPUID_TSC) { |
| b5d16701 MD |
977 | /* |
| 978 | * The BSP is constantly updating tsc0_offset, figure out | |
| 979 | * the relative difference to synchronize ktrdump. | |
| 980 | */ | |
| 981 | tsc_offsets[mycpu->gd_cpuid] = rdtsc() - tsc0_offset; | |
| 374133e3 MD |
982 | } |
| 983 | ||
| 984263bc MD |
984 | /* BSP may have changed PTD while we're waiting for the lock */ |
| 985 | cpu_invltlb(); | |
| 986 | ||
| 984263bc MD |
987 | #if defined(I586_CPU) && !defined(NO_F00F_HACK) |
| 988 | lidt(&r_idt); | |
| 989 | #endif | |
| 990 | ||
| 991 | /* Build our map of 'other' CPUs. */ | |
| da23a592 | 992 | mycpu->gd_other_cpus = smp_startup_mask & ~CPUMASK(mycpu->gd_cpuid); |
| 984263bc | 993 | |
| 984263bc | 994 | /* A quick check from sanity claus */ |
| cb7d6921 | 995 | apic_id = (apic_id_to_logical[(lapic->id & 0xff000000) >> 24]); |
| 8a8d5d85 | 996 | if (mycpu->gd_cpuid != apic_id) { |
| 26be20a0 SW |
997 | kprintf("SMP: cpuid = %d\n", mycpu->gd_cpuid); |
| 998 | kprintf("SMP: apic_id = %d\n", apic_id); | |
| 999 | kprintf("PTD[MPPTDI] = %p\n", (void *)PTD[MPPTDI]); | |
| 984263bc MD |
1000 | panic("cpuid mismatch! boom!!"); |
| 1001 | } | |
| 1002 | ||
| b52c8db0 | 1003 | /* Initialize AP's local APIC for irq's */ |
| 5ddeabb9 | 1004 | lapic_init(FALSE); |
| 984263bc | 1005 | |
| 52596b13 SZ |
1006 | /* LAPIC initialization is done */ |
| 1007 | smp_lapic_mask |= CPUMASK(mycpu->gd_cpuid); | |
| 1008 | cpu_mfence(); | |
| 1009 | ||
| 1010 | /* Let BSP move onto the next initialization stage */ | |
| 1011 | rel_mplock(); | |
| 1012 | ||
| 1013 | /* | |
| 1014 | * Interlock for finalization. Wait until mp_finish is non-zero, | |
| 1015 | * then get the MP lock. | |
| 1016 | * | |
| 1017 | * Note: We are in a critical section. | |
| 1018 | * | |
| 1019 | * Note: we are the idle thread, we can only spin. | |
| 1020 | * | |
| 1021 | * Note: The load fence is memory volatile and prevents the compiler | |
| 1022 | * from improperly caching mp_finish, and the cpu from improperly | |
| 1023 | * caching it. | |
| 1024 | */ | |
| 1025 | while (mp_finish == 0) | |
| 1026 | cpu_lfence(); | |
| 1027 | while (try_mplock() == 0) | |
| 1028 | ; | |
| 1029 | ||
| 1030 | /* BSP may have changed PTD while we're waiting for the lock */ | |
| 1031 | cpu_invltlb(); | |
| 1032 | ||
| 984263bc MD |
1033 | /* Set memory range attributes for this CPU to match the BSP */ |
| 1034 | mem_range_AP_init(); | |
| 1035 | ||
| a2a5ad0d | 1036 | /* |
| 4c9f5a7f MD |
1037 | * Once we go active we must process any IPIQ messages that may |
| 1038 | * have been queued, because no actual IPI will occur until we | |
| 1039 | * set our bit in the smp_active_mask. If we don't the IPI | |
| 1040 | * message interlock could be left set which would also prevent | |
| 1041 | * further IPIs. | |
| 1042 | * | |
| 8a8d5d85 MD |
1043 | * The idle loop doesn't expect the BGL to be held and while |
| 1044 | * lwkt_switch() normally cleans things up this is a special case | |
| 1045 | * because we returning almost directly into the idle loop. | |
| 41a01a4d MD |
1046 | * |
| 1047 | * The idle thread is never placed on the runq, make sure | |
| 4c9f5a7f | 1048 | * nothing we've done put it there. |
| 8a8d5d85 | 1049 | */ |
| b5d16701 | 1050 | KKASSERT(get_mplock_count(curthread) == 1); |
| da23a592 | 1051 | smp_active_mask |= CPUMASK(mycpu->gd_cpuid); |
| d19f6edf MD |
1052 | |
| 1053 | /* | |
| 1054 | * Enable interrupts here. idle_restore will also do it, but | |
| 1055 | * doing it here lets us clean up any strays that got posted to | |
| 1056 | * the CPU during the AP boot while we are still in a critical | |
| 1057 | * section. | |
| 1058 | */ | |
| 1059 | __asm __volatile("sti; pause; pause"::); | |
| c263294b | 1060 | bzero(mdcpu->gd_ipending, sizeof(mdcpu->gd_ipending)); |
| d19f6edf | 1061 | |
| 4a19580d | 1062 | initclocks_pcpu(); /* clock interrupts (via IPIs) */ |
| 4c9f5a7f | 1063 | lwkt_process_ipiq(); |
| d19f6edf MD |
1064 | |
| 1065 | /* | |
| 1066 | * Releasing the mp lock lets the BSP finish up the SMP init | |
| 1067 | */ | |
| 96728c05 | 1068 | rel_mplock(); |
| 41a01a4d | 1069 | KKASSERT((curthread->td_flags & TDF_RUNQ) == 0); |
| 984263bc MD |
1070 | } |
| 1071 | ||
| 41a01a4d MD |
1072 | /* |
| 1073 | * Get SMP fully working before we start initializing devices. | |
| 1074 | */ | |
| 1075 | static | |
| 1076 | void | |
| 1077 | ap_finish(void) | |
| 1078 | { | |
| 1079 | mp_finish = 1; | |
| 1080 | if (bootverbose) | |
| 26be20a0 | 1081 | kprintf("Finish MP startup\n"); |
| 41a01a4d | 1082 | rel_mplock(); |
| 52596b13 | 1083 | while (smp_active_mask != smp_startup_mask) |
| 35238fa5 | 1084 | cpu_lfence(); |
| 4da43e1f | 1085 | while (try_mplock() == 0) |
| 41a01a4d MD |
1086 | ; |
| 1087 | if (bootverbose) | |
| 26be20a0 | 1088 | kprintf("Active CPU Mask: %08x\n", smp_active_mask); |
| 41a01a4d MD |
1089 | } |
| 1090 | ||
| ba39e2e0 | 1091 | SYSINIT(finishsmp, SI_BOOT2_FINISH_SMP, SI_ORDER_FIRST, ap_finish, NULL) |
| 41a01a4d | 1092 | |
| 96728c05 MD |
1093 | void |
| 1094 | cpu_send_ipiq(int dcpu) | |
| 1095 | { | |
| da23a592 | 1096 | if (CPUMASK(dcpu) & smp_active_mask) |
| 41a01a4d | 1097 | single_apic_ipi(dcpu, XIPIQ_OFFSET, APIC_DELMODE_FIXED); |
| 96728c05 | 1098 | } |
| 41a01a4d MD |
1099 | |
| 1100 | #if 0 /* single_apic_ipi_passive() not working yet */ | |
| 1101 | /* | |
| 1102 | * Returns 0 on failure, 1 on success | |
| 1103 | */ | |
| 1104 | int | |
| 1105 | cpu_send_ipiq_passive(int dcpu) | |
| 1106 | { | |
| 1107 | int r = 0; | |
| da23a592 | 1108 | if (CPUMASK(dcpu) & smp_active_mask) { |
| 41a01a4d MD |
1109 | r = single_apic_ipi_passive(dcpu, XIPIQ_OFFSET, |
| 1110 | APIC_DELMODE_FIXED); | |
| 1111 | } | |
| 1112 | return(r); | |
| 1113 | } | |
| 1114 | #endif |