kernel - SMP - "Fix AP #%d (PHY# %d) failed" issues
[dragonfly.git] / sys / platform / pc32 / i386 / mp_machdep.c
CommitLineData
984263bc
MD
1/*
2 * Copyright (c) 1996, by Steve Passe
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. The name of the developer may NOT be used to endorse or promote products
11 * derived from this software without specific prior written permission.
12 *
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 *
25 * $FreeBSD: src/sys/i386/i386/mp_machdep.c,v 1.115.2.15 2003/03/14 21:22:35 jhb Exp $
c0c5de70 26 * $DragonFly: src/sys/platform/pc32/i386/mp_machdep.c,v 1.60 2008/06/07 12:03:52 mneumann Exp $
984263bc
MD
27 */
28
29#include "opt_cpu.h"
984263bc 30
984263bc
MD
31#include <sys/param.h>
32#include <sys/systm.h>
33#include <sys/kernel.h>
984263bc
MD
34#include <sys/sysctl.h>
35#include <sys/malloc.h>
36#include <sys/memrange.h>
984263bc 37#include <sys/cons.h> /* cngetc() */
37e7efec 38#include <sys/machintr.h>
984263bc
MD
39
40#include <vm/vm.h>
41#include <vm/vm_param.h>
42#include <vm/pmap.h>
43#include <vm/vm_kern.h>
44#include <vm/vm_extern.h>
984263bc
MD
45#include <sys/lock.h>
46#include <vm/vm_map.h>
47#include <sys/user.h>
48#ifdef GPROF
49#include <sys/gmon.h>
50#endif
984263bc 51
684a93c4
MD
52#include <sys/mplock2.h>
53
984263bc 54#include <machine/smp.h>
a9295349 55#include <machine_base/apic/apicreg.h>
984263bc
MD
56#include <machine/atomic.h>
57#include <machine/cpufunc.h>
90e8a35b 58#include <machine/cputypes.h>
a9295349 59#include <machine_base/apic/mpapic.h>
984263bc
MD
60#include <machine/psl.h>
61#include <machine/segments.h>
984263bc
MD
62#include <machine/tss.h>
63#include <machine/specialreg.h>
64#include <machine/globaldata.h>
65
984263bc 66#include <machine/md_var.h> /* setidt() */
a9295349
MD
67#include <machine_base/icu/icu.h> /* IPIs */
68#include <machine_base/isa/intr_machdep.h> /* IPIs */
984263bc 69
1439c090
MD
70#define FIXUP_EXTRA_APIC_INTS 8 /* additional entries we may create */
71
984263bc
MD
72#define WARMBOOT_TARGET 0
73#define WARMBOOT_OFF (KERNBASE + 0x0467)
74#define WARMBOOT_SEG (KERNBASE + 0x0469)
75
984263bc 76#define BIOS_BASE (0xf0000)
1df86978 77#define BIOS_BASE2 (0xe0000)
984263bc 78#define BIOS_SIZE (0x10000)
984263bc
MD
79#define BIOS_COUNT (BIOS_SIZE/4)
80
81#define CMOS_REG (0x70)
82#define CMOS_DATA (0x71)
83#define BIOS_RESET (0x0f)
84#define BIOS_WARM (0x0a)
85
86#define PROCENTRY_FLAG_EN 0x01
87#define PROCENTRY_FLAG_BP 0x02
88#define IOAPICENTRY_FLAG_EN 0x01
89
90
91/* MP Floating Pointer Structure */
92typedef struct MPFPS {
93 char signature[4];
981bebd1 94 u_int32_t pap;
984263bc
MD
95 u_char length;
96 u_char spec_rev;
97 u_char checksum;
98 u_char mpfb1;
99 u_char mpfb2;
100 u_char mpfb3;
101 u_char mpfb4;
102 u_char mpfb5;
103} *mpfps_t;
104
105/* MP Configuration Table Header */
106typedef struct MPCTH {
107 char signature[4];
108 u_short base_table_length;
109 u_char spec_rev;
110 u_char checksum;
111 u_char oem_id[8];
112 u_char product_id[12];
113 void *oem_table_pointer;
114 u_short oem_table_size;
115 u_short entry_count;
116 void *apic_address;
117 u_short extended_table_length;
118 u_char extended_table_checksum;
119 u_char reserved;
120} *mpcth_t;
121
122
123typedef struct PROCENTRY {
124 u_char type;
125 u_char apic_id;
126 u_char apic_version;
127 u_char cpu_flags;
128 u_long cpu_signature;
129 u_long feature_flags;
130 u_long reserved1;
131 u_long reserved2;
132} *proc_entry_ptr;
133
134typedef struct BUSENTRY {
135 u_char type;
136 u_char bus_id;
137 char bus_type[6];
138} *bus_entry_ptr;
139
140typedef struct IOAPICENTRY {
141 u_char type;
142 u_char apic_id;
143 u_char apic_version;
144 u_char apic_flags;
145 void *apic_address;
146} *io_apic_entry_ptr;
147
148typedef struct INTENTRY {
149 u_char type;
150 u_char int_type;
151 u_short int_flags;
152 u_char src_bus_id;
153 u_char src_bus_irq;
154 u_char dst_apic_id;
155 u_char dst_apic_int;
156} *int_entry_ptr;
157
158/* descriptions of MP basetable entries */
159typedef struct BASETABLE_ENTRY {
160 u_char type;
161 u_char length;
162 char name[16];
163} basetable_entry;
164
981bebd1
SZ
165struct mptable_pos {
166 mpfps_t mp_fps;
167 mpcth_t mp_cth;
168 vm_size_t mp_cth_mapsz;
169};
170
fa058384
SZ
171typedef int (*mptable_iter_func)(void *, const void *, int);
172
984263bc
MD
173/*
174 * this code MUST be enabled here and in mpboot.s.
175 * it follows the very early stages of AP boot by placing values in CMOS ram.
176 * it NORMALLY will never be needed and thus the primitive method for enabling.
177 *
984263bc 178 */
7d34994c 179#if defined(CHECK_POINTS)
984263bc
MD
180#define CHECK_READ(A) (outb(CMOS_REG, (A)), inb(CMOS_DATA))
181#define CHECK_WRITE(A,D) (outb(CMOS_REG, (A)), outb(CMOS_DATA, (D)))
182
183#define CHECK_INIT(D); \
184 CHECK_WRITE(0x34, (D)); \
185 CHECK_WRITE(0x35, (D)); \
186 CHECK_WRITE(0x36, (D)); \
187 CHECK_WRITE(0x37, (D)); \
188 CHECK_WRITE(0x38, (D)); \
189 CHECK_WRITE(0x39, (D));
190
191#define CHECK_PRINT(S); \
26be20a0 192 kprintf("%s: %d, %d, %d, %d, %d, %d\n", \
984263bc
MD
193 (S), \
194 CHECK_READ(0x34), \
195 CHECK_READ(0x35), \
196 CHECK_READ(0x36), \
197 CHECK_READ(0x37), \
198 CHECK_READ(0x38), \
199 CHECK_READ(0x39));
200
201#else /* CHECK_POINTS */
202
203#define CHECK_INIT(D)
204#define CHECK_PRINT(S)
205
206#endif /* CHECK_POINTS */
207
208/*
209 * Values to send to the POST hardware.
210 */
211#define MP_BOOTADDRESS_POST 0x10
212#define MP_PROBE_POST 0x11
213#define MPTABLE_PASS1_POST 0x12
214
215#define MP_START_POST 0x13
216#define MP_ENABLE_POST 0x14
217#define MPTABLE_PASS2_POST 0x15
218
219#define START_ALL_APS_POST 0x16
220#define INSTALL_AP_TRAMP_POST 0x17
221#define START_AP_POST 0x18
222
223#define MP_ANNOUNCE_POST 0x19
224
984263bc
MD
225/** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */
226int current_postcode;
227
228/** XXX FIXME: what system files declare these??? */
229extern struct region_descriptor r_gdt, r_idt;
230
984263bc 231int mp_naps; /* # of Applications processors */
97359a5b 232#ifdef APIC_IO
4f6a8b30 233static int mp_nbusses; /* # of busses */
984263bc 234int mp_napics; /* # of IO APICs */
97359a5b 235#endif
97359a5b 236#ifdef APIC_IO
984263bc 237vm_offset_t io_apic_address[NAPICID]; /* NAPICID is more than enough */
97359a5b
MD
238u_int32_t *io_apic_versions;
239#endif
984263bc
MD
240extern int nkpt;
241
242u_int32_t cpu_apic_versions[MAXCPU];
374133e3 243int64_t tsc0_offset;
0b698dca 244extern int64_t tsc_offsets[];
984263bc 245
1876681a
SZ
246extern u_long ebda_addr;
247
97359a5b 248#ifdef APIC_IO
8a8d5d85 249struct apic_intmapinfo int_to_apicintpin[APIC_INTMAPSIZE];
97359a5b 250#endif
984263bc 251
984263bc
MD
252/*
253 * APIC ID logical/physical mapping structures.
254 * We oversize these to simplify boot-time config.
255 */
256int cpu_num_to_apic_id[NAPICID];
97359a5b 257#ifdef APIC_IO
984263bc 258int io_num_to_apic_id[NAPICID];
97359a5b 259#endif
984263bc
MD
260int apic_id_to_logical[NAPICID];
261
984263bc
MD
262/* AP uses this during bootstrap. Do not staticize. */
263char *bootSTK;
264static int bootAP;
265
266/* Hotwire a 0->4MB V==P mapping */
267extern pt_entry_t *KPTphys;
268
f13b5eec
MD
269/*
270 * SMP page table page. Setup by locore to point to a page table
271 * page from which we allocate per-cpu privatespace areas io_apics,
272 * and so forth.
273 */
274
275#define IO_MAPPING_START_INDEX \
276 (SMP_MAXCPU * sizeof(struct privatespace) / PAGE_SIZE)
277
984263bc 278extern pt_entry_t *SMPpt;
f13b5eec 279static int SMPpt_alloc_index = IO_MAPPING_START_INDEX;
984263bc
MD
280
281struct pcb stoppcbs[MAXCPU];
282
fa058384
SZ
283static basetable_entry basetable_entry_types[] =
284{
285 {0, 20, "Processor"},
286 {1, 8, "Bus"},
287 {2, 8, "I/O APIC"},
288 {3, 8, "I/O INT"},
289 {4, 8, "Local INT"}
290};
291
984263bc
MD
292/*
293 * Local data and functions.
294 */
295
984263bc
MD
296static u_int boot_address;
297static u_int base_memory;
41a01a4d 298static int mp_finish;
984263bc 299
984263bc
MD
300static void mp_enable(u_int boot_addr);
301
fa058384
SZ
302static int mptable_iterate_entries(const mpcth_t,
303 mptable_iter_func, void *);
cb00b5c4 304static int mptable_probe(void);
34e6fa63 305static int mptable_search(void);
fa058384 306static int mptable_check(vm_paddr_t);
3aba8f73 307static int mptable_search_sig(u_int32_t target, int count);
44c36320 308static int mptable_hyperthread_fixup(u_int, int);
29bb1d92 309#ifdef APIC_IO
981bebd1 310static void mptable_pass1(struct mptable_pos *);
390b18b0 311static void mptable_pass2(struct mptable_pos *);
3aba8f73
SZ
312static void mptable_default(int type);
313static void mptable_fix(void);
29bb1d92 314#endif
fa058384 315static int mptable_map(struct mptable_pos *, vm_paddr_t);
981bebd1 316static void mptable_unmap(struct mptable_pos *);
a0eaef71 317static void mptable_imcr(struct mptable_pos *);
3aba8f73 318
281d9482
SZ
319static int mptable_lapic_probe(struct lapic_enumerator *);
320static void mptable_lapic_enumerate(struct lapic_enumerator *);
321static void mptable_lapic_default(void);
322
97359a5b 323#ifdef APIC_IO
984263bc 324static void setup_apic_irq_mapping(void);
97359a5b
MD
325static int apic_int_is_bus_type(int intr, int bus_type);
326#endif
984263bc
MD
327static int start_all_aps(u_int boot_addr);
328static void install_ap_tramp(u_int boot_addr);
bb467734
MD
329static int start_ap(struct mdglobaldata *gd, u_int boot_addr, int smibest);
330static int smitest(void);
984263bc 331
41a01a4d 332static cpumask_t smp_startup_mask = 1; /* which cpus have been started */
0f7a3396
MD
333cpumask_t smp_active_mask = 1; /* which cpus are ready for IPIs etc? */
334SYSCTL_INT(_machdep, OID_AUTO, smp_active, CTLFLAG_RD, &smp_active_mask, 0, "");
335
984263bc
MD
336/*
337 * Calculate usable address in base memory for AP trampoline code.
338 */
339u_int
340mp_bootaddress(u_int basemem)
341{
342 POSTCODE(MP_BOOTADDRESS_POST);
343
c0c5de70 344 base_memory = basemem;
984263bc
MD
345
346 boot_address = base_memory & ~0xfff; /* round down to 4k boundary */
347 if ((base_memory - boot_address) < bootMP_size)
348 boot_address -= 4096; /* not enough, lower by 4k */
349
350 return boot_address;
351}
352
353
34e6fa63
SZ
354static int
355mptable_probe(void)
356{
357 int mpfps_paddr;
358
359 mpfps_paddr = mptable_search();
360 if (mptable_check(mpfps_paddr))
361 return 0;
362
363 return mpfps_paddr;
364}
365
984263bc
MD
366/*
367 * Look for an Intel MP spec table (ie, SMP capable hardware).
368 */
cb00b5c4 369static int
34e6fa63 370mptable_search(void)
984263bc
MD
371{
372 int x;
984263bc 373 u_int32_t target;
f13b5eec
MD
374
375 /*
376 * Make sure our SMPpt[] page table is big enough to hold all the
377 * mappings we need.
378 */
379 KKASSERT(IO_MAPPING_START_INDEX < NPTEPG - 2);
984263bc
MD
380
381 POSTCODE(MP_PROBE_POST);
382
383 /* see if EBDA exists */
1876681a 384 if (ebda_addr != 0) {
984263bc 385 /* search first 1K of EBDA */
1876681a 386 target = (u_int32_t)ebda_addr;
3aba8f73 387 if ((x = mptable_search_sig(target, 1024 / 4)) > 0)
aeb48299 388 return x;
984263bc
MD
389 } else {
390 /* last 1K of base memory, effective 'top of base' passed in */
aeb48299 391 target = (u_int32_t)(base_memory - 0x400);
3aba8f73 392 if ((x = mptable_search_sig(target, 1024 / 4)) > 0)
aeb48299 393 return x;
984263bc
MD
394 }
395
396 /* search the BIOS */
aeb48299 397 target = (u_int32_t)BIOS_BASE;
3aba8f73 398 if ((x = mptable_search_sig(target, BIOS_COUNT)) > 0)
aeb48299 399 return x;
984263bc 400
1df86978
SZ
401 /* search the extended BIOS */
402 target = (u_int32_t)BIOS_BASE2;
403 if ((x = mptable_search_sig(target, BIOS_COUNT)) > 0)
404 return x;
405
984263bc 406 /* nothing found */
984263bc 407 return 0;
984263bc
MD
408}
409
fa058384
SZ
410struct mptable_check_cbarg {
411 int cpu_count;
412 int found_bsp;
413};
414
415static int
416mptable_check_callback(void *xarg, const void *pos, int type)
417{
418 const struct PROCENTRY *ent;
419 struct mptable_check_cbarg *arg = xarg;
420
421 if (type != 0)
422 return 0;
423 ent = pos;
424
425 if ((ent->cpu_flags & PROCENTRY_FLAG_EN) == 0)
426 return 0;
427 arg->cpu_count++;
428
429 if (ent->cpu_flags & PROCENTRY_FLAG_BP) {
430 if (arg->found_bsp) {
431 kprintf("more than one BSP in base MP table\n");
432 return EINVAL;
433 }
434 arg->found_bsp = 1;
435 }
436 return 0;
437}
438
439static int
440mptable_check(vm_paddr_t mpfps_paddr)
441{
442 struct mptable_pos mpt;
443 struct mptable_check_cbarg arg;
444 mpcth_t cth;
445 int error;
446
447 if (mpfps_paddr == 0)
448 return EOPNOTSUPP;
449
450 error = mptable_map(&mpt, mpfps_paddr);
451 if (error)
452 return error;
453
454 if (mpt.mp_fps->mpfb1 != 0)
455 goto done;
456
457 error = EINVAL;
458
459 cth = mpt.mp_cth;
460 if (cth == NULL)
461 goto done;
462 if (cth->apic_address == 0)
463 goto done;
464
465 bzero(&arg, sizeof(arg));
466 error = mptable_iterate_entries(cth, mptable_check_callback, &arg);
467 if (!error) {
468 if (arg.cpu_count == 0) {
469 kprintf("MP table contains no processor entries\n");
470 error = EINVAL;
471 } else if (!arg.found_bsp) {
472 kprintf("MP table does not contains BSP entry\n");
473 error = EINVAL;
474 }
475 }
476done:
477 mptable_unmap(&mpt);
478 return error;
479}
480
481static int
482mptable_iterate_entries(const mpcth_t cth, mptable_iter_func func, void *arg)
483{
484 int count, total_size;
485 const void *position;
486
487 KKASSERT(cth->base_table_length >= sizeof(struct MPCTH));
488 total_size = cth->base_table_length - sizeof(struct MPCTH);
489 position = (const uint8_t *)cth + sizeof(struct MPCTH);
490 count = cth->entry_count;
491
492 while (count--) {
493 int type, error;
494
495 KKASSERT(total_size >= 0);
496 if (total_size == 0) {
497 kprintf("invalid base MP table, "
498 "entry count and length mismatch\n");
499 return EINVAL;
500 }
501
502 type = *(const uint8_t *)position;
503 switch (type) {
504 case 0: /* processor_entry */
505 case 1: /* bus_entry */
506 case 2: /* io_apic_entry */
507 case 3: /* int_entry */
508 case 4: /* int_entry */
509 break;
510 default:
511 kprintf("unknown base MP table entry type %d\n", type);
512 return EINVAL;
513 }
514
515 if (total_size < basetable_entry_types[type].length) {
516 kprintf("invalid base MP table length, "
517 "does not contain all entries\n");
518 return EINVAL;
519 }
520 total_size -= basetable_entry_types[type].length;
521
522 error = func(arg, position, type);
523 if (error)
524 return error;
525
526 position = (const uint8_t *)position +
527 basetable_entry_types[type].length;
528 }
529 return 0;
530}
531
984263bc
MD
532
533/*
534 * Startup the SMP processors.
535 */
536void
537mp_start(void)
538{
539 POSTCODE(MP_START_POST);
50bc991e 540 mp_enable(boot_address);
984263bc
MD
541}
542
543
544/*
545 * Print various information about the SMP system hardware and setup.
546 */
547void
548mp_announce(void)
549{
550 int x;
551
552 POSTCODE(MP_ANNOUNCE_POST);
553
26be20a0
SW
554 kprintf("DragonFly/MP: Multiprocessor motherboard\n");
555 kprintf(" cpu0 (BSP): apic id: %2d", CPU_TO_ID(0));
8629c4ea 556 kprintf(", version: 0x%08x\n", cpu_apic_versions[0]);
984263bc 557 for (x = 1; x <= mp_naps; ++x) {
26be20a0 558 kprintf(" cpu%d (AP): apic id: %2d", x, CPU_TO_ID(x));
8629c4ea 559 kprintf(", version: 0x%08x\n", cpu_apic_versions[x]);
984263bc
MD
560 }
561
562#if defined(APIC_IO)
563 for (x = 0; x < mp_napics; ++x) {
26be20a0
SW
564 kprintf(" io%d (APIC): apic id: %2d", x, IO_TO_ID(x));
565 kprintf(", version: 0x%08x", io_apic_versions[x]);
d557216f 566 kprintf(", at 0x%08lx\n", io_apic_address[x]);
984263bc
MD
567 }
568#else
26be20a0 569 kprintf(" Warning: APIC I/O disabled\n");
984263bc
MD
570#endif /* APIC_IO */
571}
572
573/*
574 * AP cpu's call this to sync up protected mode.
7160572f
MD
575 *
576 * WARNING! We must ensure that the cpu is sufficiently initialized to
577 * be able to use to the FP for our optimized bzero/bcopy code before
578 * we enter more mainstream C code.
a44bdeec
MD
579 *
580 * WARNING! %fs is not set up on entry. This routine sets up %fs.
984263bc
MD
581 */
582void
583init_secondary(void)
584{
585 int gsel_tss;
586 int x, myid = bootAP;
587 u_int cr0;
8a8d5d85 588 struct mdglobaldata *md;
0f7a3396 589 struct privatespace *ps;
984263bc 590
0f7a3396
MD
591 ps = &CPU_prvspace[myid];
592
593 gdt_segs[GPRIV_SEL].ssd_base = (int)ps;
984263bc 594 gdt_segs[GPROC0_SEL].ssd_base =
0f7a3396
MD
595 (int) &ps->mdglobaldata.gd_common_tss;
596 ps->mdglobaldata.mi.gd_prvspace = ps;
984263bc
MD
597
598 for (x = 0; x < NGDT; x++) {
599 ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd);
600 }
601
602 r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
603 r_gdt.rd_base = (int) &gdt[myid * NGDT];
604 lgdt(&r_gdt); /* does magic intra-segment return */
605
606 lidt(&r_idt);
607
608 lldt(_default_ldt);
7b95be2a 609 mdcpu->gd_currentldt = _default_ldt;
984263bc
MD
610
611 gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
612 gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;
8a8d5d85 613
0f7a3396 614 md = mdcpu; /* loaded through %fs:0 (mdglobaldata.mi.gd_prvspace)*/
8a8d5d85
MD
615
616 md->gd_common_tss.tss_esp0 = 0; /* not used until after switch */
617 md->gd_common_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
618 md->gd_common_tss.tss_ioopt = (sizeof md->gd_common_tss) << 16;
619 md->gd_tss_gdt = &gdt[myid * NGDT + GPROC0_SEL].sd;
620 md->gd_common_tssd = *md->gd_tss_gdt;
984263bc
MD
621 ltr(gsel_tss);
622
623 /*
624 * Set to a known state:
625 * Set by mpboot.s: CR0_PG, CR0_PE
626 * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM
627 */
628 cr0 = rcr0();
629 cr0 &= ~(CR0_CD | CR0_NW | CR0_EM);
630 load_cr0(cr0);
7160572f 631 pmap_set_opt(); /* PSE/4MB pages, etc */
984263bc 632
7160572f
MD
633 /* set up CPU registers and state */
634 cpu_setregs();
635
636 /* set up FPU state on the AP */
637 npxinit(__INITIAL_NPXCW__);
638
639 /* set up SSE registers */
640 enable_sse();
984263bc
MD
641}
642
984263bc
MD
643/*******************************************************************
644 * local functions and data
645 */
646
647/*
648 * start the SMP system
649 */
650static void
651mp_enable(u_int boot_addr)
652{
984263bc
MD
653#if defined(APIC_IO)
654 int apic;
655 u_int ux;
656#endif /* APIC_IO */
981bebd1 657 vm_paddr_t mpfps_paddr;
f65c10b6 658 struct mptable_pos mpt;
984263bc
MD
659
660 POSTCODE(MP_ENABLE_POST);
661
281d9482 662 lapic_config();
984263bc 663
f65c10b6
SZ
664 mpfps_paddr = mptable_probe();
665 if (mpfps_paddr) {
666 mptable_map(&mpt, mpfps_paddr);
667 mptable_imcr(&mpt);
668 mptable_unmap(&mpt);
669 }
984263bc
MD
670#if defined(APIC_IO)
671
f65c10b6
SZ
672 if (!mpfps_paddr)
673 panic("no MP table, disable APIC_IO!\n");
674
675 mptable_map(&mpt, mpfps_paddr);
676
677 /*
678 * Examine the MP table for needed info
679 */
680 mptable_pass1(&mpt);
681 mptable_pass2(&mpt);
682
683 mptable_unmap(&mpt);
684
685 /* Post scan cleanup */
686 mptable_fix();
687
97359a5b
MD
688 setup_apic_irq_mapping();
689
984263bc
MD
690 /* fill the LOGICAL io_apic_versions table */
691 for (apic = 0; apic < mp_napics; ++apic) {
692 ux = io_apic_read(apic, IOAPIC_VER);
693 io_apic_versions[apic] = ux;
694 io_apic_set_id(apic, IO_TO_ID(apic));
695 }
696
697 /* program each IO APIC in the system */
698 for (apic = 0; apic < mp_napics; ++apic)
699 if (io_apic_setup(apic) < 0)
700 panic("IO APIC setup failure");
701
97359a5b
MD
702#endif /* APIC_IO */
703
704 /*
705 * These are required for SMP operation
706 */
707
984263bc
MD
708 /* install a 'Spurious INTerrupt' vector */
709 setidt(XSPURIOUSINT_OFFSET, Xspuriousint,
710 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
711
712 /* install an inter-CPU IPI for TLB invalidation */
713 setidt(XINVLTLB_OFFSET, Xinvltlb,
714 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
715
96728c05
MD
716 /* install an inter-CPU IPI for IPIQ messaging */
717 setidt(XIPIQ_OFFSET, Xipiq,
718 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
78ea5a2a
SZ
719
720 /* install a timer vector */
721 setidt(XTIMER_OFFSET, Xtimer,
722 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
984263bc 723
984263bc
MD
724 /* install an inter-CPU IPI for CPU stop/restart */
725 setidt(XCPUSTOP_OFFSET, Xcpustop,
726 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
727
984263bc
MD
728 /* start each Application Processor */
729 start_all_aps(boot_addr);
730}
731
732
733/*
734 * look for the MP spec signature
735 */
736
737/* string defined by the Intel MP Spec as identifying the MP table */
738#define MP_SIG 0x5f504d5f /* _MP_ */
739#define NEXT(X) ((X) += 4)
740static int
3aba8f73 741mptable_search_sig(u_int32_t target, int count)
984263bc 742{
0f85efa2
SZ
743 vm_size_t map_size;
744 u_int32_t *addr;
745 int x, ret;
984263bc 746
aeb48299
SZ
747 KKASSERT(target != 0);
748
0f85efa2
SZ
749 map_size = count * sizeof(u_int32_t);
750 addr = pmap_mapdev((vm_paddr_t)target, map_size);
984263bc 751
aeb48299 752 ret = 0;
0f85efa2
SZ
753 for (x = 0; x < count; NEXT(x)) {
754 if (addr[x] == MP_SIG) {
755 /* make array index a byte index */
756 ret = target + (x * sizeof(u_int32_t));
757 break;
758 }
759 }
aeb48299 760
0f85efa2
SZ
761 pmap_unmapdev((vm_offset_t)addr, map_size);
762 return ret;
984263bc
MD
763}
764
765
984263bc
MD
766typedef struct BUSDATA {
767 u_char bus_id;
768 enum busTypes bus_type;
769} bus_datum;
770
771typedef struct INTDATA {
772 u_char int_type;
773 u_short int_flags;
774 u_char src_bus_id;
775 u_char src_bus_irq;
776 u_char dst_apic_id;
777 u_char dst_apic_int;
778 u_char int_vector;
779} io_int, local_int;
780
781typedef struct BUSTYPENAME {
782 u_char type;
783 char name[7];
784} bus_type_name;
785
29bb1d92
SW
786#ifdef APIC_IO
787
984263bc
MD
788static bus_type_name bus_type_table[] =
789{
790 {CBUS, "CBUS"},
791 {CBUSII, "CBUSII"},
792 {EISA, "EISA"},
793 {MCA, "MCA"},
794 {UNKNOWN_BUSTYPE, "---"},
795 {ISA, "ISA"},
796 {MCA, "MCA"},
797 {UNKNOWN_BUSTYPE, "---"},
798 {UNKNOWN_BUSTYPE, "---"},
799 {UNKNOWN_BUSTYPE, "---"},
800 {UNKNOWN_BUSTYPE, "---"},
801 {UNKNOWN_BUSTYPE, "---"},
802 {PCI, "PCI"},
803 {UNKNOWN_BUSTYPE, "---"},
804 {UNKNOWN_BUSTYPE, "---"},
805 {UNKNOWN_BUSTYPE, "---"},
806 {UNKNOWN_BUSTYPE, "---"},
807 {XPRESS, "XPRESS"},
808 {UNKNOWN_BUSTYPE, "---"}
809};
810/* from MP spec v1.4, table 5-1 */
811static int default_data[7][5] =
812{
813/* nbus, id0, type0, id1, type1 */
814 {1, 0, ISA, 255, 255},
815 {1, 0, EISA, 255, 255},
816 {1, 0, EISA, 255, 255},
817 {1, 0, MCA, 255, 255},
818 {2, 0, ISA, 1, PCI},
819 {2, 0, EISA, 1, PCI},
820 {2, 0, MCA, 1, PCI}
821};
822
823
824/* the bus data */
825static bus_datum *bus_data;
826
827/* the IO INT data, one entry per possible APIC INTerrupt */
828static io_int *io_apic_ints;
984263bc 829static int nintrs;
4f6a8b30 830
97359a5b 831#endif
984263bc 832
a0873f07 833static int processor_entry (const struct PROCENTRY *entry, int cpu);
97359a5b 834#ifdef APIC_IO
c4717d5c
SZ
835static int bus_entry (const struct BUSENTRY *entry, int bus);
836static int io_apic_entry (const struct IOAPICENTRY *entry, int apic);
837static int int_entry (const struct INTENTRY *entry, int intr);
3ae0cd58 838static int lookup_bus_type (char *name);
29bb1d92 839#endif
984263bc 840
8658b5be
SZ
841#ifdef APIC_IO
842
843static int
844mptable_ioapic_pass1_callback(void *xarg, const void *pos, int type)
845{
846 const struct IOAPICENTRY *ioapic_ent;
847
848 switch (type) {
849 case 1: /* bus_entry */
850 ++mp_nbusses;
851 break;
852
853 case 2: /* io_apic_entry */
854 ioapic_ent = pos;
855 if (ioapic_ent->apic_flags & IOAPICENTRY_FLAG_EN) {
856 io_apic_address[mp_napics++] =
857 (vm_offset_t)ioapic_ent->apic_address;
858 }
859 break;
860
861 case 3: /* int_entry */
862 ++nintrs;
863 break;
864 }
865 return 0;
866}
867
984263bc
MD
868/*
869 * 1st pass on motherboard's Intel MP specification table.
870 *
984263bc 871 * determines:
984263bc 872 * io_apic_address[N]
984263bc
MD
873 * mp_nbusses
874 * mp_napics
875 * nintrs
876 */
877static void
981bebd1 878mptable_pass1(struct mptable_pos *mpt)
984263bc 879{
981bebd1 880 mpfps_t fps;
8658b5be 881 int x;
984263bc
MD
882
883 POSTCODE(MPTABLE_PASS1_POST);
884
981bebd1
SZ
885 fps = mpt->mp_fps;
886 KKASSERT(fps != NULL);
f9c3b04f 887
984263bc 888 /* clear various tables */
8658b5be 889 for (x = 0; x < NAPICID; ++x)
984263bc 890 io_apic_address[x] = ~0; /* IO APIC address table */
984263bc 891
4f6a8b30 892 mp_nbusses = 0;
984263bc
MD
893 mp_napics = 0;
894 nintrs = 0;
984263bc
MD
895
896 /* check for use of 'default' configuration */
981bebd1 897 if (fps->mpfb1 != 0) {
984263bc 898 io_apic_address[0] = DEFAULT_IO_APIC_BASE;
4f6a8b30 899 mp_nbusses = default_data[fps->mpfb1 - 1][0];
984263bc
MD
900 mp_napics = 1;
901 nintrs = 16;
8658b5be
SZ
902 } else {
903 int error;
904
905 error = mptable_iterate_entries(mpt->mp_cth,
906 mptable_ioapic_pass1_callback, NULL);
907 if (error)
908 panic("mptable_iterate_entries(ioapic_pass1) failed\n");
984263bc 909 }
984263bc
MD
910}
911
c4717d5c
SZ
912struct mptable_ioapic2_cbarg {
913 int bus;
914 int apic;
915 int intr;
916};
917
918static int
919mptable_ioapic_pass2_callback(void *xarg, const void *pos, int type)
920{
921 struct mptable_ioapic2_cbarg *arg = xarg;
922
923 switch (type) {
924 case 1:
925 if (bus_entry(pos, arg->bus))
926 ++arg->bus;
927 break;
928
929 case 2:
930 if (io_apic_entry(pos, arg->apic))
931 ++arg->apic;
932 break;
933
934 case 3:
935 if (int_entry(pos, arg->intr))
936 ++arg->intr;
937 break;
938 }
939 return 0;
940}
941
984263bc
MD
942/*
943 * 2nd pass on motherboard's Intel MP specification table.
944 *
945 * sets:
984263bc 946 * ID_TO_IO(N), phy APIC ID to log CPU/IO table
984263bc
MD
947 * IO_TO_ID(N), logical IO to APIC ID table
948 * bus_data[N]
949 * io_apic_ints[N]
950 */
390b18b0 951static void
981bebd1 952mptable_pass2(struct mptable_pos *mpt)
984263bc 953{
c4717d5c 954 struct mptable_ioapic2_cbarg arg;
981bebd1 955 mpfps_t fps;
c4717d5c 956 int error, x;
984263bc
MD
957
958 POSTCODE(MPTABLE_PASS2_POST);
959
981bebd1
SZ
960 fps = mpt->mp_fps;
961 KKASSERT(fps != NULL);
962
984263bc
MD
963 MALLOC(io_apic_versions, u_int32_t *, sizeof(u_int32_t) * mp_napics,
964 M_DEVBUF, M_WAITOK);
965 MALLOC(ioapic, volatile ioapic_t **, sizeof(ioapic_t *) * mp_napics,
e7b4468c 966 M_DEVBUF, M_WAITOK | M_ZERO);
1439c090 967 MALLOC(io_apic_ints, io_int *, sizeof(io_int) * (nintrs + FIXUP_EXTRA_APIC_INTS),
984263bc
MD
968 M_DEVBUF, M_WAITOK);
969 MALLOC(bus_data, bus_datum *, sizeof(bus_datum) * mp_nbusses,
970 M_DEVBUF, M_WAITOK);
971
c4717d5c
SZ
972 for (x = 0; x < mp_napics; x++)
973 ioapic[x] = permanent_io_mapping(io_apic_address[x]);
984263bc
MD
974
975 /* clear various tables */
976 for (x = 0; x < NAPICID; ++x) {
97359a5b 977 ID_TO_IO(x) = -1; /* phy APIC ID to log CPU/IO table */
984263bc
MD
978 IO_TO_ID(x) = -1; /* logical IO to APIC ID table */
979 }
980
981 /* clear bus data table */
982 for (x = 0; x < mp_nbusses; ++x)
983 bus_data[x].bus_id = 0xff;
984
985 /* clear IO APIC INT table */
986 for (x = 0; x < (nintrs + 1); ++x) {
987 io_apic_ints[x].int_type = 0xff;
988 io_apic_ints[x].int_vector = 0xff;
989 }
990
984263bc 991 /* check for use of 'default' configuration */
390b18b0
SZ
992 if (fps->mpfb1 != 0) {
993 mptable_default(fps->mpfb1);
994 return;
995 }
984263bc 996
c4717d5c
SZ
997 bzero(&arg, sizeof(arg));
998 error = mptable_iterate_entries(mpt->mp_cth,
999 mptable_ioapic_pass2_callback, &arg);
1000 if (error)
1001 panic("mptable_iterate_entries(ioapic_pass2) failed\n");
984263bc
MD
1002}
1003
29bb1d92
SW
1004#endif /* APIC_IO */
1005
984263bc
MD
1006/*
1007 * Check if we should perform a hyperthreading "fix-up" to
1008 * enumerate any logical CPU's that aren't already listed
1009 * in the table.
1010 *
1011 * XXX: We assume that all of the physical CPUs in the
1012 * system have the same number of logical CPUs.
1013 *
1014 * XXX: We assume that APIC ID's are allocated such that
1015 * the APIC ID's for a physical processor are aligned
1016 * with the number of logical CPU's in the processor.
1017 */
44c36320
SZ
1018static int
1019mptable_hyperthread_fixup(u_int id_mask, int cpu_count)
984263bc 1020{
44c36320 1021 int i, id, lcpus_max, logical_cpus;
984263bc 1022
984263bc 1023 if ((cpu_feature & CPUID_HTT) == 0)
44c36320 1024 return 0;
7ea07fd2
SZ
1025
1026 lcpus_max = (cpu_procinfo & CPUID_HTT_CORES) >> 16;
1027 if (lcpus_max <= 1)
44c36320 1028 return 0;
984263bc 1029
90e8a35b 1030 if (cpu_vendor_id == CPU_VENDOR_INTEL) {
7ea07fd2
SZ
1031 /*
1032 * INSTRUCTION SET REFERENCE, A-M (#253666)
1033 * Page 3-181, Table 3-20
1034 * "The nearest power-of-2 integer that is not smaller
1035 * than EBX[23:16] is the number of unique initial APIC
1036 * IDs reserved for addressing different logical
1037 * processors in a physical package."
1038 */
1039 for (i = 0; ; ++i) {
1040 if ((1 << i) >= lcpus_max) {
1041 lcpus_max = 1 << i;
1042 break;
1043 }
1044 }
1045 }
1046
44c36320
SZ
1047 KKASSERT(cpu_count != 0);
1048 if (cpu_count == lcpus_max) {
7ea07fd2 1049 /* We have nothing to fix */
44c36320
SZ
1050 return 0;
1051 } else if (cpu_count == 1) {
7ea07fd2
SZ
1052 /* XXX this may be incorrect */
1053 logical_cpus = lcpus_max;
1054 } else {
1055 int cur, prev, dist;
1056
1057 /*
1058 * Calculate the distances between two nearest
1059 * APIC IDs. If all such distances are same,
1060 * then it is the number of missing cpus that
1061 * we are going to fill later.
1062 */
1063 dist = cur = prev = -1;
1064 for (id = 0; id < MAXCPU; ++id) {
1065 if ((id_mask & 1 << id) == 0)
1066 continue;
1067
1068 cur = id;
1069 if (prev >= 0) {
1070 int new_dist = cur - prev;
1071
1072 if (dist < 0)
1073 dist = new_dist;
1074
1075 /*
1076 * Make sure that all distances
1077 * between two nearest APIC IDs
1078 * are same.
1079 */
1080 if (dist != new_dist)
44c36320 1081 return 0;
7ea07fd2
SZ
1082 }
1083 prev = cur;
1084 }
1085 if (dist == 1)
44c36320 1086 return 0;
7ea07fd2
SZ
1087
1088 /* Must be power of 2 */
1089 if (dist & (dist - 1))
44c36320 1090 return 0;
7ea07fd2
SZ
1091
1092 /* Can't exceed CPU package capacity */
1093 if (dist > lcpus_max)
1094 logical_cpus = lcpus_max;
1095 else
1096 logical_cpus = dist;
1097 }
1098
984263bc
MD
1099 /*
1100 * For each APIC ID of a CPU that is set in the mask,
1101 * scan the other candidate APIC ID's for this
1102 * physical processor. If any of those ID's are
1103 * already in the table, then kill the fixup.
1104 */
7ea07fd2 1105 for (id = 0; id < MAXCPU; id++) {
984263bc
MD
1106 if ((id_mask & 1 << id) == 0)
1107 continue;
1108 /* First, make sure we are on a logical_cpus boundary. */
1109 if (id % logical_cpus != 0)
44c36320 1110 return 0;
984263bc
MD
1111 for (i = id + 1; i < id + logical_cpus; i++)
1112 if ((id_mask & 1 << i) != 0)
44c36320 1113 return 0;
984263bc 1114 }
44c36320 1115 return logical_cpus;
984263bc 1116}
984263bc 1117
fa058384 1118static int
981bebd1
SZ
1119mptable_map(struct mptable_pos *mpt, vm_paddr_t mpfps_paddr)
1120{
1121 mpfps_t fps = NULL;
1122 mpcth_t cth = NULL;
1123 vm_size_t cth_mapsz = 0;
1124
fa058384
SZ
1125 bzero(mpt, sizeof(*mpt));
1126
981bebd1
SZ
1127 fps = pmap_mapdev(mpfps_paddr, sizeof(*fps));
1128 if (fps->pap != 0) {
1129 /*
1130 * Map configuration table header to get
1131 * the base table size
1132 */
1133 cth = pmap_mapdev(fps->pap, sizeof(*cth));
1134 cth_mapsz = cth->base_table_length;
1135 pmap_unmapdev((vm_offset_t)cth, sizeof(*cth));
1136
fa058384
SZ
1137 if (cth_mapsz < sizeof(*cth)) {
1138 kprintf("invalid base MP table length %d\n",
1139 (int)cth_mapsz);
1140 pmap_unmapdev((vm_offset_t)fps, sizeof(*fps));
1141 return EINVAL;
1142 }
1143
981bebd1
SZ
1144 /*
1145 * Map the base table
1146 */
1147 cth = pmap_mapdev(fps->pap, cth_mapsz);
1148 }
1149
1150 mpt->mp_fps = fps;
1151 mpt->mp_cth = cth;
1152 mpt->mp_cth_mapsz = cth_mapsz;
fa058384
SZ
1153
1154 return 0;
981bebd1
SZ
1155}
1156
1157static void
1158mptable_unmap(struct mptable_pos *mpt)
1159{
1160 if (mpt->mp_cth != NULL) {
1161 pmap_unmapdev((vm_offset_t)mpt->mp_cth, mpt->mp_cth_mapsz);
1162 mpt->mp_cth = NULL;
1163 mpt->mp_cth_mapsz = 0;
1164 }
1165 if (mpt->mp_fps != NULL) {
1166 pmap_unmapdev((vm_offset_t)mpt->mp_fps, sizeof(*mpt->mp_fps));
1167 mpt->mp_fps = NULL;
1168 }
1169}
1170
97359a5b
MD
1171#ifdef APIC_IO
1172
984263bc
MD
1173void
1174assign_apic_irq(int apic, int intpin, int irq)
1175{
1176 int x;
1177
1178 if (int_to_apicintpin[irq].ioapic != -1)
1179 panic("assign_apic_irq: inconsistent table");
1180
1181 int_to_apicintpin[irq].ioapic = apic;
1182 int_to_apicintpin[irq].int_pin = intpin;
1183 int_to_apicintpin[irq].apic_address = ioapic[apic];
1184 int_to_apicintpin[irq].redirindex = IOAPIC_REDTBL + 2 * intpin;
1185
1186 for (x = 0; x < nintrs; x++) {
1187 if ((io_apic_ints[x].int_type == 0 ||
1188 io_apic_ints[x].int_type == 3) &&
1189 io_apic_ints[x].int_vector == 0xff &&
1190 io_apic_ints[x].dst_apic_id == IO_TO_ID(apic) &&
1191 io_apic_ints[x].dst_apic_int == intpin)
1192 io_apic_ints[x].int_vector = irq;
1193 }
1194}
1195
1196void
1197revoke_apic_irq(int irq)
1198{
1199 int x;
1200 int oldapic;
1201 int oldintpin;
1202
1203 if (int_to_apicintpin[irq].ioapic == -1)
1204 panic("revoke_apic_irq: inconsistent table");
1205
1206 oldapic = int_to_apicintpin[irq].ioapic;
1207 oldintpin = int_to_apicintpin[irq].int_pin;
1208
1209 int_to_apicintpin[irq].ioapic = -1;
1210 int_to_apicintpin[irq].int_pin = 0;
1211 int_to_apicintpin[irq].apic_address = NULL;
1212 int_to_apicintpin[irq].redirindex = 0;
1213
1214 for (x = 0; x < nintrs; x++) {
1215 if ((io_apic_ints[x].int_type == 0 ||
1216 io_apic_ints[x].int_type == 3) &&
1217 io_apic_ints[x].int_vector != 0xff &&
1218 io_apic_ints[x].dst_apic_id == IO_TO_ID(oldapic) &&
1219 io_apic_ints[x].dst_apic_int == oldintpin)
1220 io_apic_ints[x].int_vector = 0xff;
1221 }
1222}
1223
1439c090
MD
1224/*
1225 * Allocate an IRQ
1226 */
984263bc
MD
1227static void
1228allocate_apic_irq(int intr)
1229{
1230 int apic;
1231 int intpin;
1232 int irq;
1233
1234 if (io_apic_ints[intr].int_vector != 0xff)
1235 return; /* Interrupt handler already assigned */
1236
1237 if (io_apic_ints[intr].int_type != 0 &&
1238 (io_apic_ints[intr].int_type != 3 ||
1239 (io_apic_ints[intr].dst_apic_id == IO_TO_ID(0) &&
1240 io_apic_ints[intr].dst_apic_int == 0)))
1241 return; /* Not INT or ExtInt on != (0, 0) */
1242
1243 irq = 0;
1244 while (irq < APIC_INTMAPSIZE &&
1245 int_to_apicintpin[irq].ioapic != -1)
1246 irq++;
1247
1248 if (irq >= APIC_INTMAPSIZE)
1249 return; /* No free interrupt handlers */
1250
1251 apic = ID_TO_IO(io_apic_ints[intr].dst_apic_id);
1252 intpin = io_apic_ints[intr].dst_apic_int;
1253
1254 assign_apic_irq(apic, intpin, irq);
4612bd39 1255 io_apic_setup_intpin(apic, intpin);
984263bc
MD
1256}
1257
1258
1259static void
1260swap_apic_id(int apic, int oldid, int newid)
1261{
1262 int x;
1263 int oapic;
1264
1265
1266 if (oldid == newid)
1267 return; /* Nothing to do */
1268
26be20a0 1269 kprintf("Changing APIC ID for IO APIC #%d from %d to %d in MP table\n",
984263bc
MD
1270 apic, oldid, newid);
1271
1272 /* Swap physical APIC IDs in interrupt entries */
1273 for (x = 0; x < nintrs; x++) {
1274 if (io_apic_ints[x].dst_apic_id == oldid)
1275 io_apic_ints[x].dst_apic_id = newid;
1276 else if (io_apic_ints[x].dst_apic_id == newid)
1277 io_apic_ints[x].dst_apic_id = oldid;
1278 }
1279
1280 /* Swap physical APIC IDs in IO_TO_ID mappings */
1281 for (oapic = 0; oapic < mp_napics; oapic++)
1282 if (IO_TO_ID(oapic) == newid)
1283 break;
1284
1285 if (oapic < mp_napics) {
26be20a0 1286 kprintf("Changing APIC ID for IO APIC #%d from "
984263bc
MD
1287 "%d to %d in MP table\n",
1288 oapic, newid, oldid);
1289 IO_TO_ID(oapic) = oldid;
1290 }
1291 IO_TO_ID(apic) = newid;
1292}
1293
1294
1295static void
1296fix_id_to_io_mapping(void)
1297{
1298 int x;
1299
1300 for (x = 0; x < NAPICID; x++)
1301 ID_TO_IO(x) = -1;
1302
1303 for (x = 0; x <= mp_naps; x++)
1304 if (CPU_TO_ID(x) < NAPICID)
1305 ID_TO_IO(CPU_TO_ID(x)) = x;
1306
1307 for (x = 0; x < mp_napics; x++)
1308 if (IO_TO_ID(x) < NAPICID)
1309 ID_TO_IO(IO_TO_ID(x)) = x;
1310}
1311
1312
1313static int
1314first_free_apic_id(void)
1315{
1316 int freeid, x;
1317
1318 for (freeid = 0; freeid < NAPICID; freeid++) {
1319 for (x = 0; x <= mp_naps; x++)
1320 if (CPU_TO_ID(x) == freeid)
1321 break;
1322 if (x <= mp_naps)
1323 continue;
1324 for (x = 0; x < mp_napics; x++)
1325 if (IO_TO_ID(x) == freeid)
1326 break;
1327 if (x < mp_napics)
1328 continue;
1329 return freeid;
1330 }
1331 return freeid;
1332}
1333
1334
1335static int
1336io_apic_id_acceptable(int apic, int id)
1337{
1338 int cpu; /* Logical CPU number */
1339 int oapic; /* Logical IO APIC number for other IO APIC */
1340
1341 if (id >= NAPICID)
1342 return 0; /* Out of range */
1343
1344 for (cpu = 0; cpu <= mp_naps; cpu++)
1345 if (CPU_TO_ID(cpu) == id)
1346 return 0; /* Conflict with CPU */
1347
1348 for (oapic = 0; oapic < mp_napics && oapic < apic; oapic++)
1349 if (IO_TO_ID(oapic) == id)
1350 return 0; /* Conflict with other APIC */
1351
1352 return 1; /* ID is acceptable for IO APIC */
1353}
1354
1439c090
MD
1355static
1356io_int *
1357io_apic_find_int_entry(int apic, int pin)
1358{
1359 int x;
1360
1361 /* search each of the possible INTerrupt sources */
1362 for (x = 0; x < nintrs; ++x) {
1363 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1364 (pin == io_apic_ints[x].dst_apic_int))
1365 return (&io_apic_ints[x]);
1366 }
1367 return NULL;
1368}
1369
984263bc
MD
1370/*
1371 * parse an Intel MP specification table
1372 */
1373static void
3aba8f73 1374mptable_fix(void)
984263bc 1375{
4f6a8b30 1376 int x;
984263bc 1377 int id;
984263bc
MD
1378 int apic; /* IO APIC unit number */
1379 int freeid; /* Free physical APIC ID */
1380 int physid; /* Current physical IO APIC ID */
1439c090 1381 io_int *io14;
97359a5b
MD
1382 int bus_0 = 0; /* Stop GCC warning */
1383 int bus_pci = 0; /* Stop GCC warning */
1384 int num_pci_bus;
984263bc
MD
1385
1386 /*
1387 * Fix mis-numbering of the PCI bus and its INT entries if the BIOS
1388 * did it wrong. The MP spec says that when more than 1 PCI bus
1389 * exists the BIOS must begin with bus entries for the PCI bus and use
1390 * actual PCI bus numbering. This implies that when only 1 PCI bus
1391 * exists the BIOS can choose to ignore this ordering, and indeed many
1392 * MP motherboards do ignore it. This causes a problem when the PCI
1393 * sub-system makes requests of the MP sub-system based on PCI bus
1394 * numbers. So here we look for the situation and renumber the
1395 * busses and associated INTs in an effort to "make it right".
1396 */
1397
1398 /* find bus 0, PCI bus, count the number of PCI busses */
1399 for (num_pci_bus = 0, x = 0; x < mp_nbusses; ++x) {
1400 if (bus_data[x].bus_id == 0) {
1401 bus_0 = x;
1402 }
1403 if (bus_data[x].bus_type == PCI) {
1404 ++num_pci_bus;
1405 bus_pci = x;
1406 }
1407 }
1408 /*
1409 * bus_0 == slot of bus with ID of 0
1410 * bus_pci == slot of last PCI bus encountered
1411 */
1412
1413 /* check the 1 PCI bus case for sanity */
1414 /* if it is number 0 all is well */
1415 if (num_pci_bus == 1 &&
1416 bus_data[bus_pci].bus_id != 0) {
1417
1418 /* mis-numbered, swap with whichever bus uses slot 0 */
1419
1420 /* swap the bus entry types */
1421 bus_data[bus_pci].bus_type = bus_data[bus_0].bus_type;
1422 bus_data[bus_0].bus_type = PCI;
1423
1424 /* swap each relavant INTerrupt entry */
1425 id = bus_data[bus_pci].bus_id;
1426 for (x = 0; x < nintrs; ++x) {
1427 if (io_apic_ints[x].src_bus_id == id) {
1428 io_apic_ints[x].src_bus_id = 0;
1429 }
1430 else if (io_apic_ints[x].src_bus_id == 0) {
1431 io_apic_ints[x].src_bus_id = id;
1432 }
1433 }
1434 }
1435
1436 /* Assign IO APIC IDs.
1437 *
1438 * First try the existing ID. If a conflict is detected, try
1439 * the ID in the MP table. If a conflict is still detected, find
1440 * a free id.
1441 *
1442 * We cannot use the ID_TO_IO table before all conflicts has been
1443 * resolved and the table has been corrected.
1444 */
1445 for (apic = 0; apic < mp_napics; ++apic) { /* For all IO APICs */
1446
1447 /* First try to use the value set by the BIOS */
1448 physid = io_apic_get_id(apic);
1449 if (io_apic_id_acceptable(apic, physid)) {
1450 if (IO_TO_ID(apic) != physid)
1451 swap_apic_id(apic, IO_TO_ID(apic), physid);
1452 continue;
1453 }
1454
1455 /* Then check if the value in the MP table is acceptable */
1456 if (io_apic_id_acceptable(apic, IO_TO_ID(apic)))
1457 continue;
1458
1459 /* Last resort, find a free APIC ID and use it */
1460 freeid = first_free_apic_id();
1461 if (freeid >= NAPICID)
1462 panic("No free physical APIC IDs found");
1463
1464 if (io_apic_id_acceptable(apic, freeid)) {
1465 swap_apic_id(apic, IO_TO_ID(apic), freeid);
1466 continue;
1467 }
1468 panic("Free physical APIC ID not usable");
1469 }
1470 fix_id_to_io_mapping();
1471
1472 /* detect and fix broken Compaq MP table */
1473 if (apic_int_type(0, 0) == -1) {
26be20a0 1474 kprintf("APIC_IO: MP table broken: 8259->APIC entry missing!\n");
984263bc
MD
1475 io_apic_ints[nintrs].int_type = 3; /* ExtInt */
1476 io_apic_ints[nintrs].int_vector = 0xff; /* Unassigned */
1477 /* XXX fixme, set src bus id etc, but it doesn't seem to hurt */
1478 io_apic_ints[nintrs].dst_apic_id = IO_TO_ID(0);
1479 io_apic_ints[nintrs].dst_apic_int = 0; /* Pin 0 */
1480 nintrs++;
ad12f88c 1481 } else if (apic_int_type(0, 0) == 0) {
26be20a0 1482 kprintf("APIC_IO: MP table broken: ExtINT entry corrupt!\n");
ad12f88c
HP
1483 for (x = 0; x < nintrs; ++x)
1484 if ((0 == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1485 (0 == io_apic_ints[x].dst_apic_int)) {
1486 io_apic_ints[x].int_type = 3;
1487 io_apic_ints[x].int_vector = 0xff;
1488 break;
1489 }
984263bc 1490 }
1439c090
MD
1491
1492 /*
1493 * Fix missing IRQ 15 when IRQ 14 is an ISA interrupt. IDE
1494 * controllers universally come in pairs. If IRQ 14 is specified
1495 * as an ISA interrupt, then IRQ 15 had better be too.
1496 *
1497 * [ Shuttle XPC / AMD Athlon X2 ]
1498 * The MPTable is missing an entry for IRQ 15. Note that the
1499 * ACPI table has an entry for both 14 and 15.
1500 */
1501 if (apic_int_type(0, 14) == 0 && apic_int_type(0, 15) == -1) {
26be20a0 1502 kprintf("APIC_IO: MP table broken: IRQ 15 not ISA when IRQ 14 is!\n");
1439c090
MD
1503 io14 = io_apic_find_int_entry(0, 14);
1504 io_apic_ints[nintrs] = *io14;
1505 io_apic_ints[nintrs].src_bus_irq = 15;
1506 io_apic_ints[nintrs].dst_apic_int = 15;
1507 nintrs++;
1508 }
984263bc
MD
1509}
1510
984263bc
MD
1511/* Assign low level interrupt handlers */
1512static void
1513setup_apic_irq_mapping(void)
1514{
1515 int x;
1516 int int_vector;
1517
1518 /* Clear array */
1519 for (x = 0; x < APIC_INTMAPSIZE; x++) {
1520 int_to_apicintpin[x].ioapic = -1;
1521 int_to_apicintpin[x].int_pin = 0;
1522 int_to_apicintpin[x].apic_address = NULL;
1523 int_to_apicintpin[x].redirindex = 0;
ea689d1c
SZ
1524
1525 /* Default to masked */
1526 int_to_apicintpin[x].flags = IOAPIC_IM_FLAG_MASKED;
984263bc
MD
1527 }
1528
1529 /* First assign ISA/EISA interrupts */
1530 for (x = 0; x < nintrs; x++) {
1531 int_vector = io_apic_ints[x].src_bus_irq;
1532 if (int_vector < APIC_INTMAPSIZE &&
1533 io_apic_ints[x].int_vector == 0xff &&
1534 int_to_apicintpin[int_vector].ioapic == -1 &&
1535 (apic_int_is_bus_type(x, ISA) ||
1536 apic_int_is_bus_type(x, EISA)) &&
1537 io_apic_ints[x].int_type == 0) {
1538 assign_apic_irq(ID_TO_IO(io_apic_ints[x].dst_apic_id),
1539 io_apic_ints[x].dst_apic_int,
1540 int_vector);
1541 }
1542 }
1543
1544 /* Assign ExtInt entry if no ISA/EISA interrupt 0 entry */
1545 for (x = 0; x < nintrs; x++) {
1546 if (io_apic_ints[x].dst_apic_int == 0 &&
1547 io_apic_ints[x].dst_apic_id == IO_TO_ID(0) &&
1548 io_apic_ints[x].int_vector == 0xff &&
1549 int_to_apicintpin[0].ioapic == -1 &&
1550 io_apic_ints[x].int_type == 3) {
1551 assign_apic_irq(0, 0, 0);
1552 break;
1553 }
1554 }
4612bd39 1555 /* PCI interrupt assignment is deferred */
984263bc
MD
1556}
1557
97359a5b 1558#endif
984263bc 1559
a9112655
SZ
1560void
1561mp_set_cpuids(int cpu_id, int apic_id)
1562{
1563 CPU_TO_ID(cpu_id) = apic_id;
1564 ID_TO_CPU(apic_id) = cpu_id;
1565}
1566
984263bc 1567static int
a0873f07 1568processor_entry(const struct PROCENTRY *entry, int cpu)
984263bc 1569{
bd8aa7e2
SZ
1570 KKASSERT(cpu > 0);
1571
984263bc
MD
1572 /* check for usability */
1573 if (!(entry->cpu_flags & PROCENTRY_FLAG_EN))
1574 return 0;
1575
984263bc
MD
1576 /* check for BSP flag */
1577 if (entry->cpu_flags & PROCENTRY_FLAG_BP) {
a9112655 1578 mp_set_cpuids(0, entry->apic_id);
984263bc
MD
1579 return 0; /* its already been counted */
1580 }
1581
1582 /* add another AP to list, if less than max number of CPUs */
1583 else if (cpu < MAXCPU) {
a9112655 1584 mp_set_cpuids(cpu, entry->apic_id);
984263bc
MD
1585 return 1;
1586 }
1587
1588 return 0;
1589}
1590
4f6a8b30 1591#ifdef APIC_IO
984263bc
MD
1592
1593static int
c4717d5c 1594bus_entry(const struct BUSENTRY *entry, int bus)
984263bc
MD
1595{
1596 int x;
1597 char c, name[8];
1598
1599 /* encode the name into an index */
1600 for (x = 0; x < 6; ++x) {
1601 if ((c = entry->bus_type[x]) == ' ')
1602 break;
1603 name[x] = c;
1604 }
1605 name[x] = '\0';
1606
1607 if ((x = lookup_bus_type(name)) == UNKNOWN_BUSTYPE)
1608 panic("unknown bus type: '%s'", name);
1609
1610 bus_data[bus].bus_id = entry->bus_id;
1611 bus_data[bus].bus_type = x;
1612
1613 return 1;
1614}
1615
984263bc 1616static int
c4717d5c 1617io_apic_entry(const struct IOAPICENTRY *entry, int apic)
984263bc
MD
1618{
1619 if (!(entry->apic_flags & IOAPICENTRY_FLAG_EN))
1620 return 0;
1621
1622 IO_TO_ID(apic) = entry->apic_id;
c163176b 1623 ID_TO_IO(entry->apic_id) = apic;
984263bc
MD
1624
1625 return 1;
1626}
1627
984263bc
MD
1628static int
1629lookup_bus_type(char *name)
1630{
1631 int x;
1632
1633 for (x = 0; x < MAX_BUSTYPE; ++x)
1634 if (strcmp(bus_type_table[x].name, name) == 0)
1635 return bus_type_table[x].type;
1636
1637 return UNKNOWN_BUSTYPE;
1638}
1639
984263bc 1640static int
c4717d5c 1641int_entry(const struct INTENTRY *entry, int intr)
984263bc
MD
1642{
1643 int apic;
1644
1645 io_apic_ints[intr].int_type = entry->int_type;
1646 io_apic_ints[intr].int_flags = entry->int_flags;
1647 io_apic_ints[intr].src_bus_id = entry->src_bus_id;
1648 io_apic_ints[intr].src_bus_irq = entry->src_bus_irq;
1649 if (entry->dst_apic_id == 255) {
1650 /* This signal goes to all IO APICS. Select an IO APIC
1651 with sufficient number of interrupt pins */
1652 for (apic = 0; apic < mp_napics; apic++)
1653 if (((io_apic_read(apic, IOAPIC_VER) &
1654 IOART_VER_MAXREDIR) >> MAXREDIRSHIFT) >=
1655 entry->dst_apic_int)
1656 break;
1657 if (apic < mp_napics)
1658 io_apic_ints[intr].dst_apic_id = IO_TO_ID(apic);
1659 else
1660 io_apic_ints[intr].dst_apic_id = entry->dst_apic_id;
1661 } else
1662 io_apic_ints[intr].dst_apic_id = entry->dst_apic_id;
1663 io_apic_ints[intr].dst_apic_int = entry->dst_apic_int;
1664
1665 return 1;
1666}
1667
984263bc
MD
1668static int
1669apic_int_is_bus_type(int intr, int bus_type)
1670{
1671 int bus;
1672
1673 for (bus = 0; bus < mp_nbusses; ++bus)
1674 if ((bus_data[bus].bus_id == io_apic_ints[intr].src_bus_id)
1675 && ((int) bus_data[bus].bus_type == bus_type))
1676 return 1;
1677
1678 return 0;
1679}
1680
984263bc
MD
1681/*
1682 * Given a traditional ISA INT mask, return an APIC mask.
1683 */
1684u_int
1685isa_apic_mask(u_int isa_mask)
1686{
1687 int isa_irq;
1688 int apic_pin;
1689
1690#if defined(SKIP_IRQ15_REDIRECT)
1691 if (isa_mask == (1 << 15)) {
26be20a0 1692 kprintf("skipping ISA IRQ15 redirect\n");
984263bc
MD
1693 return isa_mask;
1694 }
1695#endif /* SKIP_IRQ15_REDIRECT */
1696
1697 isa_irq = ffs(isa_mask); /* find its bit position */
1698 if (isa_irq == 0) /* doesn't exist */
1699 return 0;
1700 --isa_irq; /* make it zero based */
1701
1702 apic_pin = isa_apic_irq(isa_irq); /* look for APIC connection */
1703 if (apic_pin == -1)
1704 return 0;
1705
1706 return (1 << apic_pin); /* convert pin# to a mask */
1707}
1708
984263bc
MD
1709/*
1710 * Determine which APIC pin an ISA/EISA INT is attached to.
1711 */
1712#define INTTYPE(I) (io_apic_ints[(I)].int_type)
1713#define INTPIN(I) (io_apic_ints[(I)].dst_apic_int)
1714#define INTIRQ(I) (io_apic_ints[(I)].int_vector)
1715#define INTAPIC(I) (ID_TO_IO(io_apic_ints[(I)].dst_apic_id))
1716
1717#define SRCBUSIRQ(I) (io_apic_ints[(I)].src_bus_irq)
1718int
1719isa_apic_irq(int isa_irq)
1720{
1721 int intr;
1722
1723 for (intr = 0; intr < nintrs; ++intr) { /* check each record */
1724 if (INTTYPE(intr) == 0) { /* standard INT */
1725 if (SRCBUSIRQ(intr) == isa_irq) {
1726 if (apic_int_is_bus_type(intr, ISA) ||
1727 apic_int_is_bus_type(intr, EISA)) {
1728 if (INTIRQ(intr) == 0xff)
1729 return -1; /* unassigned */
1730 return INTIRQ(intr); /* found */
1731 }
1732 }
1733 }
1734 }
1735 return -1; /* NOT found */
1736}
1737
1738
1739/*
1740 * Determine which APIC pin a PCI INT is attached to.
1741 */
1742#define SRCBUSID(I) (io_apic_ints[(I)].src_bus_id)
1743#define SRCBUSDEVICE(I) ((io_apic_ints[(I)].src_bus_irq >> 2) & 0x1f)
1744#define SRCBUSLINE(I) (io_apic_ints[(I)].src_bus_irq & 0x03)
1745int
1746pci_apic_irq(int pciBus, int pciDevice, int pciInt)
1747{
1748 int intr;
1749
1750 --pciInt; /* zero based */
1751
1439c090 1752 for (intr = 0; intr < nintrs; ++intr) { /* check each record */
984263bc
MD
1753 if ((INTTYPE(intr) == 0) /* standard INT */
1754 && (SRCBUSID(intr) == pciBus)
1755 && (SRCBUSDEVICE(intr) == pciDevice)
1439c090 1756 && (SRCBUSLINE(intr) == pciInt)) { /* a candidate IRQ */
984263bc 1757 if (apic_int_is_bus_type(intr, PCI)) {
4612bd39
AP
1758 if (INTIRQ(intr) == 0xff)
1759 allocate_apic_irq(intr);
1760 if (INTIRQ(intr) == 0xff)
984263bc
MD
1761 return -1; /* unassigned */
1762 return INTIRQ(intr); /* exact match */
1763 }
1439c090
MD
1764 }
1765 }
984263bc
MD
1766
1767 return -1; /* NOT found */
1768}
1769
1770int
1771next_apic_irq(int irq)
1772{
1773 int intr, ointr;
1774 int bus, bustype;
1775
1776 bus = 0;
1777 bustype = 0;
1778 for (intr = 0; intr < nintrs; intr++) {
1779 if (INTIRQ(intr) != irq || INTTYPE(intr) != 0)
1780 continue;
1781 bus = SRCBUSID(intr);
1782 bustype = apic_bus_type(bus);
1783 if (bustype != ISA &&
1784 bustype != EISA &&
1785 bustype != PCI)
1786 continue;
1787 break;
1788 }
1789 if (intr >= nintrs) {
1790 return -1;
1791 }
1792 for (ointr = intr + 1; ointr < nintrs; ointr++) {
1793 if (INTTYPE(ointr) != 0)
1794 continue;
1795 if (bus != SRCBUSID(ointr))
1796 continue;
1797 if (bustype == PCI) {
1798 if (SRCBUSDEVICE(intr) != SRCBUSDEVICE(ointr))
1799 continue;
1800 if (SRCBUSLINE(intr) != SRCBUSLINE(ointr))
1801 continue;
1802 }
1803 if (bustype == ISA || bustype == EISA) {
1804 if (SRCBUSIRQ(intr) != SRCBUSIRQ(ointr))
1805 continue;
1806 }
1807 if (INTPIN(intr) == INTPIN(ointr))
1808 continue;
1809 break;
1810 }
1811 if (ointr >= nintrs) {
1812 return -1;
1813 }
1814 return INTIRQ(ointr);
1815}
1816#undef SRCBUSLINE
1817#undef SRCBUSDEVICE
1818#undef SRCBUSID
1819#undef SRCBUSIRQ
1820
1821#undef INTPIN
1822#undef INTIRQ
1823#undef INTAPIC
1824#undef INTTYPE
1825
97359a5b 1826#endif
984263bc
MD
1827
1828/*
1829 * Reprogram the MB chipset to NOT redirect an ISA INTerrupt.
1830 *
1831 * XXX FIXME:
1832 * Exactly what this means is unclear at this point. It is a solution
1833 * for motherboards that redirect the MBIRQ0 pin. Generically a motherboard
1834 * could route any of the ISA INTs to upper (>15) IRQ values. But most would
1835 * NOT be redirected via MBIRQ0, thus "undirect()ing" them would NOT be an
1836 * option.
1837 */
1838int
1839undirect_isa_irq(int rirq)
1840{
1841#if defined(READY)
1842 if (bootverbose)
26be20a0 1843 kprintf("Freeing redirected ISA irq %d.\n", rirq);
984263bc 1844 /** FIXME: tickle the MB redirector chip */
c044141b 1845 return /* XXX */;
984263bc
MD
1846#else
1847 if (bootverbose)
26be20a0 1848 kprintf("Freeing (NOT implemented) redirected ISA irq %d.\n", rirq);
984263bc
MD
1849 return 0;
1850#endif /* READY */
1851}
1852
1853
1854/*
1855 * Reprogram the MB chipset to NOT redirect a PCI INTerrupt
1856 */
1857int
1858undirect_pci_irq(int rirq)
1859{
1860#if defined(READY)
1861 if (bootverbose)
26be20a0 1862 kprintf("Freeing redirected PCI irq %d.\n", rirq);
984263bc
MD
1863
1864 /** FIXME: tickle the MB redirector chip */
c044141b 1865 return /* XXX */;
984263bc
MD
1866#else
1867 if (bootverbose)
26be20a0 1868 kprintf("Freeing (NOT implemented) redirected PCI irq %d.\n",
984263bc
MD
1869 rirq);
1870 return 0;
1871#endif /* READY */
1872}
1873
1874
4f6a8b30
SZ
1875#ifdef APIC_IO
1876
984263bc
MD
1877/*
1878 * given a bus ID, return:
1879 * the bus type if found
1880 * -1 if NOT found
1881 */
1882int
1883apic_bus_type(int id)
1884{
1885 int x;
1886
1887 for (x = 0; x < mp_nbusses; ++x)
1888 if (bus_data[x].bus_id == id)
1889 return bus_data[x].bus_type;
1890
1891 return -1;
1892}
1893
984263bc
MD
1894/*
1895 * given a LOGICAL APIC# and pin#, return:
1896 * the associated src bus ID if found
1897 * -1 if NOT found
1898 */
1899int
1900apic_src_bus_id(int apic, int pin)
1901{
1902 int x;
1903
1904 /* search each of the possible INTerrupt sources */
1905 for (x = 0; x < nintrs; ++x)
1906 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1907 (pin == io_apic_ints[x].dst_apic_int))
1908 return (io_apic_ints[x].src_bus_id);
1909
1910 return -1; /* NOT found */
1911}
1912
984263bc
MD
1913/*
1914 * given a LOGICAL APIC# and pin#, return:
1915 * the associated src bus IRQ if found
1916 * -1 if NOT found
1917 */
1918int
1919apic_src_bus_irq(int apic, int pin)
1920{
1921 int x;
1922
1923 for (x = 0; x < nintrs; x++)
1924 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1925 (pin == io_apic_ints[x].dst_apic_int))
1926 return (io_apic_ints[x].src_bus_irq);
1927
1928 return -1; /* NOT found */
1929}
1930
1931
1932/*
1933 * given a LOGICAL APIC# and pin#, return:
1934 * the associated INTerrupt type if found
1935 * -1 if NOT found
1936 */
1937int
1938apic_int_type(int apic, int pin)
1939{
1940 int x;
1941
1942 /* search each of the possible INTerrupt sources */
1439c090 1943 for (x = 0; x < nintrs; ++x) {
984263bc
MD
1944 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1945 (pin == io_apic_ints[x].dst_apic_int))
1946 return (io_apic_ints[x].int_type);
1439c090 1947 }
984263bc
MD
1948 return -1; /* NOT found */
1949}
1950
1439c090
MD
1951/*
1952 * Return the IRQ associated with an APIC pin
1953 */
984263bc
MD
1954int
1955apic_irq(int apic, int pin)
1956{
1957 int x;
1958 int res;
1959
1439c090 1960 for (x = 0; x < nintrs; ++x) {
984263bc
MD
1961 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1962 (pin == io_apic_ints[x].dst_apic_int)) {
1963 res = io_apic_ints[x].int_vector;
1964 if (res == 0xff)
1965 return -1;
1966 if (apic != int_to_apicintpin[res].ioapic)
1439c090 1967 panic("apic_irq: inconsistent table %d/%d", apic, int_to_apicintpin[res].ioapic);
984263bc
MD
1968 if (pin != int_to_apicintpin[res].int_pin)
1969 panic("apic_irq inconsistent table (2)");
1970 return res;
1971 }
1439c090 1972 }
984263bc
MD
1973 return -1;
1974}
1975
1976
1977/*
1978 * given a LOGICAL APIC# and pin#, return:
1979 * the associated trigger mode if found
1980 * -1 if NOT found
1981 */
1982int
1983apic_trigger(int apic, int pin)
1984{
1985 int x;
1986
1987 /* search each of the possible INTerrupt sources */
1988 for (x = 0; x < nintrs; ++x)
1989 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1990 (pin == io_apic_ints[x].dst_apic_int))
1991 return ((io_apic_ints[x].int_flags >> 2) & 0x03);
1992
1993 return -1; /* NOT found */
1994}
1995
1996
1997/*
1998 * given a LOGICAL APIC# and pin#, return:
1999 * the associated 'active' level if found
2000 * -1 if NOT found
2001 */
2002int
2003apic_polarity(int apic, int pin)
2004{
2005 int x;
2006
2007 /* search each of the possible INTerrupt sources */
2008 for (x = 0; x < nintrs; ++x)
2009 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
2010 (pin == io_apic_ints[x].dst_apic_int))
2011 return (io_apic_ints[x].int_flags & 0x03);
2012
2013 return -1; /* NOT found */
2014}
2015
984263bc
MD
2016/*
2017 * set data according to MP defaults
2018 * FIXME: probably not complete yet...
2019 */
2020static void
3aba8f73 2021mptable_default(int type)
984263bc 2022{
984263bc
MD
2023 int io_apic_id;
2024 int pin;
984263bc
MD
2025
2026#if 0
26be20a0 2027 kprintf(" MP default config type: %d\n", type);
984263bc
MD
2028 switch (type) {
2029 case 1:
26be20a0 2030 kprintf(" bus: ISA, APIC: 82489DX\n");
984263bc
MD
2031 break;
2032 case 2:
26be20a0 2033 kprintf(" bus: EISA, APIC: 82489DX\n");
984263bc
MD
2034 break;
2035 case 3:
26be20a0 2036 kprintf(" bus: EISA, APIC: 82489DX\n");
984263bc
MD
2037 break;
2038 case 4:
26be20a0 2039 kprintf(" bus: MCA, APIC: 82489DX\n");
984263bc
MD
2040 break;
2041 case 5:
26be20a0 2042 kprintf(" bus: ISA+PCI, APIC: Integrated\n");
984263bc
MD
2043 break;
2044 case 6:
26be20a0 2045 kprintf(" bus: EISA+PCI, APIC: Integrated\n");
984263bc
MD
2046 break;
2047 case 7:
26be20a0 2048 kprintf(" bus: MCA+PCI, APIC: Integrated\n");
984263bc
MD
2049 break;
2050 default:
26be20a0 2051 kprintf(" future type\n");
984263bc
MD
2052 break;
2053 /* NOTREACHED */
2054 }
2055#endif /* 0 */
2056
984263bc
MD
2057 /* one and only IO APIC */
2058 io_apic_id = (io_apic_read(0, IOAPIC_ID) & APIC_ID_MASK) >> 24;
2059
2060 /*
2061 * sanity check, refer to MP spec section 3.6.6, last paragraph
2062 * necessary as some hardware isn't properly setting up the IO APIC
2063 */
2064#if defined(REALLY_ANAL_IOAPICID_VALUE)
2065 if (io_apic_id != 2) {
2066#else
2067 if ((io_apic_id == 0) || (io_apic_id == 1) || (io_apic_id == 15)) {
2068#endif /* REALLY_ANAL_IOAPICID_VALUE */
2069 io_apic_set_id(0, 2);
2070 io_apic_id = 2;
2071 }
2072 IO_TO_ID(0) = io_apic_id;
2073 ID_TO_IO(io_apic_id) = 0;
984263bc
MD
2074
2075 /* fill out bus entries */
2076 switch (type) {
2077 case 1:
2078 case 2:
2079 case 3:
2080 case 4:
2081 case 5:
2082 case 6:
2083 case 7:
2084 bus_data[0].bus_id = default_data[type - 1][1];
2085 bus_data[0].bus_type = default_data[type - 1][2];
2086 bus_data[1].bus_id = default_data[type - 1][3];
2087 bus_data[1].bus_type = default_data[type - 1][4];
2088 break;
2089
2090 /* case 4: case 7: MCA NOT supported */
2091 default: /* illegal/reserved */
2092 panic("BAD default MP config: %d", type);
2093 /* NOTREACHED */
2094 }
2095
984263bc
MD
2096 /* general cases from MP v1.4, table 5-2 */
2097 for (pin = 0; pin < 16; ++pin) {
2098 io_apic_ints[pin].int_type = 0;
2099 io_apic_ints[pin].int_flags = 0x05; /* edge/active-hi */
2100 io_apic_ints[pin].src_bus_id = 0;
2101 io_apic_ints[pin].src_bus_irq = pin; /* IRQ2 caught below */
2102 io_apic_ints[pin].dst_apic_id = io_apic_id;
2103 io_apic_ints[pin].dst_apic_int = pin; /* 1-to-1 */
2104 }
2105
2106 /* special cases from MP v1.4, table 5-2 */
2107 if (type == 2) {
2108 io_apic_ints[2].int_type = 0xff; /* N/C */
2109 io_apic_ints[13].int_type = 0xff; /* N/C */
2110#if !defined(APIC_MIXED_MODE)
2111 /** FIXME: ??? */
2112 panic("sorry, can't support type 2 default yet");
2113#endif /* APIC_MIXED_MODE */
2114 }
2115 else
2116 io_apic_ints[2].src_bus_irq = 0; /* ISA IRQ0 is on APIC INT 2 */
2117
2118 if (type == 7)
2119 io_apic_ints[0].int_type = 0xff; /* N/C */
2120 else
2121 io_apic_ints[0].int_type = 3; /* vectored 8259 */
984263bc
MD
2122}
2123
29bb1d92
SW
2124#endif /* APIC_IO */
2125
984263bc 2126/*
f13b5eec
MD
2127 * Map a physical memory address representing I/O into KVA. The I/O
2128 * block is assumed not to cross a page boundary.
2129 */
2130void *
2131permanent_io_mapping(vm_paddr_t pa)
2132{
2133 vm_offset_t vaddr;
2134 int pgeflag;
2135 int i;
2136
2137 KKASSERT(pa < 0x100000000LL);
2138
2139 pgeflag = 0; /* not used for SMP yet */
2140
2141 /*
2142 * If the requested physical address has already been incidently
2143 * mapped, just use the existing mapping. Otherwise create a new
2144 * mapping.
2145 */
2146 for (i = IO_MAPPING_START_INDEX; i < SMPpt_alloc_index; ++i) {
2147 if (((vm_offset_t)SMPpt[i] & PG_FRAME) ==
2148 ((vm_offset_t)pa & PG_FRAME)) {
2149 break;
2150 }
2151 }
2152 if (i == SMPpt_alloc_index) {
2153 if (i == NPTEPG - 2) {
2154 panic("permanent_io_mapping: We ran out of space"
2155 " in SMPpt[]!");
2156 }
5277b9f6 2157 SMPpt[i] = (pt_entry_t)(PG_V | PG_RW | PG_N | pgeflag |
f13b5eec
MD
2158 ((vm_offset_t)pa & PG_FRAME));
2159 ++SMPpt_alloc_index;
2160 }
2161 vaddr = (vm_offset_t)CPU_prvspace + (i * PAGE_SIZE) +
2162 ((vm_offset_t)pa & PAGE_MASK);
2163 return ((void *)vaddr);
2164}
2165
2166/*
984263bc
MD
2167 * start each AP in our list
2168 */
2169static int
2170start_all_aps(u_int boot_addr)
2171{
b45759e1
MD
2172 int x, i, pg;
2173 int shift;
bb467734
MD
2174 int smicount;
2175 int smibest;
2176 int smilast;
984263bc
MD
2177 u_char mpbiosreason;
2178 u_long mpbioswarmvec;
8a8d5d85 2179 struct mdglobaldata *gd;
0f7a3396 2180 struct privatespace *ps;
984263bc
MD
2181 char *stack;
2182 uintptr_t kptbase;
2183
2184 POSTCODE(START_ALL_APS_POST);
2185
b52c8db0
SZ
2186 /* Initialize BSP's local APIC */
2187 apic_initialize(TRUE);
984263bc
MD
2188
2189 /* install the AP 1st level boot code */
2190 install_ap_tramp(boot_addr);
2191
2192
2193 /* save the current value of the warm-start vector */
2194 mpbioswarmvec = *((u_long *) WARMBOOT_OFF);
984263bc
MD
2195 outb(CMOS_REG, BIOS_RESET);
2196 mpbiosreason = inb(CMOS_DATA);
984263bc 2197
bb467734
MD
2198 /* setup a vector to our boot code */
2199 *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
2200 *((volatile u_short *) WARMBOOT_SEG) = (boot_addr >> 4);
2201 outb(CMOS_REG, BIOS_RESET);
2202 outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */
2203
2204 /*
2205 * If we have a TSC we can figure out the SMI interrupt rate.
2206 * The SMI does not necessarily use a constant rate. Spend
2207 * up to 250ms trying to figure it out.
2208 */
2209 smibest = 0;
2210 if (cpu_feature & CPUID_TSC) {
2211 set_apic_timer(275000);
2212 smilast = read_apic_timer();
2213 for (x = 0; x < 20 && read_apic_timer(); ++x) {
2214 smicount = smitest();
2215 if (smibest == 0 || smilast - smicount < smibest)
2216 smibest = smilast - smicount;
2217 smilast = smicount;
2218 }
2219 if (smibest > 250000)
2220 smibest = 0;
2221 if (smibest) {
2222 smibest = smibest * (int64_t)1000000 /
2223 get_apic_timer_frequency();
2224 }
2225 }
2226 if (smibest)
2227 kprintf("SMI Frequency (worst case): %d Hz (%d us)\n",
2228 1000000 / smibest, smibest);
2229
2230
984263bc
MD
2231 /* set up temporary P==V mapping for AP boot */
2232 /* XXX this is a hack, we should boot the AP on its own stack/PTD */
2233 kptbase = (uintptr_t)(void *)KPTphys;
a44bdeec 2234 for (x = 0; x < NKPT; x++) {
984263bc
MD
2235 PTD[x] = (pd_entry_t)(PG_V | PG_RW |
2236 ((kptbase + x * PAGE_SIZE) & PG_FRAME));
a44bdeec 2237 }
0f7a3396 2238 cpu_invltlb();
984263bc
MD
2239
2240 /* start each AP */
2241 for (x = 1; x <= mp_naps; ++x) {
2242
2243 /* This is a bit verbose, it will go away soon. */
2244
2245 /* first page of AP's private space */
2246 pg = x * i386_btop(sizeof(struct privatespace));
2247
81c04d07 2248 /* allocate new private data page(s) */
e4846942 2249 gd = (struct mdglobaldata *)kmem_alloc(&kernel_map,
81c04d07 2250 MDGLOBALDATA_BASEALLOC_SIZE);
984263bc 2251 /* wire it into the private page table page */
81c04d07
MD
2252 for (i = 0; i < MDGLOBALDATA_BASEALLOC_SIZE; i += PAGE_SIZE) {
2253 SMPpt[pg + i / PAGE_SIZE] = (pt_entry_t)
2254 (PG_V | PG_RW | vtophys_pte((char *)gd + i));
2255 }
2256 pg += MDGLOBALDATA_BASEALLOC_PAGES;
2257
2258 SMPpt[pg + 0] = 0; /* *gd_CMAP1 */
2259 SMPpt[pg + 1] = 0; /* *gd_CMAP2 */
2260 SMPpt[pg + 2] = 0; /* *gd_CMAP3 */
2261 SMPpt[pg + 3] = 0; /* *gd_PMAP1 */
984263bc
MD
2262
2263 /* allocate and set up an idle stack data page */
e4846942 2264 stack = (char *)kmem_alloc(&kernel_map, UPAGES*PAGE_SIZE);
8a8d5d85 2265 for (i = 0; i < UPAGES; i++) {
81c04d07 2266 SMPpt[pg + 4 + i] = (pt_entry_t)
b5b32410 2267 (PG_V | PG_RW | vtophys_pte(PAGE_SIZE * i + stack));
8a8d5d85 2268 }
984263bc 2269
8a8d5d85
MD
2270 gd = &CPU_prvspace[x].mdglobaldata; /* official location */
2271 bzero(gd, sizeof(*gd));
0f7a3396 2272 gd->mi.gd_prvspace = ps = &CPU_prvspace[x];
8a8d5d85 2273
984263bc 2274 /* prime data page for it to use */
8a8d5d85 2275 mi_gdinit(&gd->mi, x);
8ad65e08 2276 cpu_gdinit(gd, x);
81c04d07
MD
2277 gd->gd_CMAP1 = &SMPpt[pg + 0];
2278 gd->gd_CMAP2 = &SMPpt[pg + 1];
2279 gd->gd_CMAP3 = &SMPpt[pg + 2];
2280 gd->gd_PMAP1 = &SMPpt[pg + 3];
d2fd2a60 2281 gd->gd_GDMAP1 = &PTD[KGDTDI+x];
0f7a3396
MD
2282 gd->gd_CADDR1 = ps->CPAGE1;
2283 gd->gd_CADDR2 = ps->CPAGE2;
2284 gd->gd_CADDR3 = ps->CPAGE3;
2285 gd->gd_PADDR1 = (unsigned *)ps->PPAGE1;
d2fd2a60 2286 gd->gd_GDADDR1= (unsigned *)VADDR(KGDTDI+x, 0);
e4846942 2287 gd->mi.gd_ipiq = (void *)kmem_alloc(&kernel_map, sizeof(lwkt_ipiq) * (mp_naps + 1));
96728c05 2288 bzero(gd->mi.gd_ipiq, sizeof(lwkt_ipiq) * (mp_naps + 1));
984263bc 2289
8a8d5d85
MD
2290 /*
2291 * Setup the AP boot stack
2292 */
0f7a3396 2293 bootSTK = &ps->idlestack[UPAGES*PAGE_SIZE/2];
984263bc
MD
2294 bootAP = x;
2295
2296 /* attempt to start the Application Processor */
2297 CHECK_INIT(99); /* setup checkpoints */
bb467734 2298 if (!start_ap(gd, boot_addr, smibest)) {
26be20a0 2299 kprintf("AP #%d (PHY# %d) failed!\n", x, CPU_TO_ID(x));
984263bc
MD
2300 CHECK_PRINT("trace"); /* show checkpoints */
2301 /* better panic as the AP may be running loose */
26be20a0 2302 kprintf("panic y/n? [y] ");
984263bc
MD
2303 if (cngetc() != 'n')
2304 panic("bye-bye");
2305 }
2306 CHECK_PRINT("trace"); /* show checkpoints */
2307
2308 /* record its version info */
2309 cpu_apic_versions[x] = cpu_apic_versions[0];
984263bc
MD
2310 }
2311
0f7a3396
MD
2312 /* set ncpus to 1 + highest logical cpu. Not all may have come up */
2313 ncpus = x;
2314
b45759e1
MD
2315 /* ncpus2 -- ncpus rounded down to the nearest power of 2 */
2316 for (shift = 0; (1 << shift) <= ncpus; ++shift)
2317 ;
2318 --shift;
2319 ncpus2_shift = shift;
2320 ncpus2 = 1 << shift;
90100055
JH
2321 ncpus2_mask = ncpus2 - 1;
2322
b45759e1
MD
2323 /* ncpus_fit -- ncpus rounded up to the nearest power of 2 */
2324 if ((1 << shift) < ncpus)
2325 ++shift;
2326 ncpus_fit = 1 << shift;
2327 ncpus_fit_mask = ncpus_fit - 1;
2328
984263bc 2329 /* build our map of 'other' CPUs */
0f7a3396 2330 mycpu->gd_other_cpus = smp_startup_mask & ~(1 << mycpu->gd_cpuid);
e4846942 2331 mycpu->gd_ipiq = (void *)kmem_alloc(&kernel_map, sizeof(lwkt_ipiq) * ncpus);
96728c05 2332 bzero(mycpu->gd_ipiq, sizeof(lwkt_ipiq) * ncpus);
984263bc
MD
2333
2334 /* fill in our (BSP) APIC version */
2335 cpu_apic_versions[0] = lapic.version;
2336
2337 /* restore the warmstart vector */
2338 *(u_long *) WARMBOOT_OFF = mpbioswarmvec;
984263bc
MD
2339 outb(CMOS_REG, BIOS_RESET);
2340 outb(CMOS_DATA, mpbiosreason);
984263bc
MD
2341
2342 /*
8a8d5d85
MD
2343 * NOTE! The idlestack for the BSP was setup by locore. Finish
2344 * up, clean out the P==V mapping we did earlier.
984263bc 2345 */
984263bc
MD
2346 for (x = 0; x < NKPT; x++)
2347 PTD[x] = 0;
2348 pmap_set_opt();
2349
2350 /* number of APs actually started */
8a8d5d85 2351 return ncpus - 1;
984263bc
MD
2352}
2353
2354
2355/*
2356 * load the 1st level AP boot code into base memory.
2357 */
2358
2359/* targets for relocation */
2360extern void bigJump(void);
2361extern void bootCodeSeg(void);
2362extern void bootDataSeg(void);
2363extern void MPentry(void);
2364extern u_int MP_GDT;
2365extern u_int mp_gdtbase;
2366
2367static void
2368install_ap_tramp(u_int boot_addr)
2369{
2370 int x;
2371 int size = *(int *) ((u_long) & bootMP_size);
2372 u_char *src = (u_char *) ((u_long) bootMP);
2373 u_char *dst = (u_char *) boot_addr + KERNBASE;
2374 u_int boot_base = (u_int) bootMP;
2375 u_int8_t *dst8;
2376 u_int16_t *dst16;
2377 u_int32_t *dst32;
2378
2379 POSTCODE(INSTALL_AP_TRAMP_POST);
2380
2381 for (x = 0; x < size; ++x)
2382 *dst++ = *src++;
2383
2384 /*
2385 * modify addresses in code we just moved to basemem. unfortunately we
2386 * need fairly detailed info about mpboot.s for this to work. changes
2387 * to mpboot.s might require changes here.
2388 */
2389
2390 /* boot code is located in KERNEL space */
2391 dst = (u_char *) boot_addr + KERNBASE;
2392
2393 /* modify the lgdt arg */
2394 dst32 = (u_int32_t *) (dst + ((u_int) & mp_gdtbase - boot_base));
2395 *dst32 = boot_addr + ((u_int) & MP_GDT - boot_base);
2396
2397 /* modify the ljmp target for MPentry() */
2398 dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1);
2399 *dst32 = ((u_int) MPentry - KERNBASE);
2400
2401 /* modify the target for boot code segment */
2402 dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base));
2403 dst8 = (u_int8_t *) (dst16 + 1);
2404 *dst16 = (u_int) boot_addr & 0xffff;
2405 *dst8 = ((u_int) boot_addr >> 16) & 0xff;
2406
2407 /* modify the target for boot data segment */
2408 dst16 = (u_int16_t *) (dst + ((u_int) bootDataSeg - boot_base));
2409 dst8 = (u_int8_t *) (dst16 + 1);
2410 *dst16 = (u_int) boot_addr & 0xffff;
2411 *dst8 = ((u_int) boot_addr >> 16) & 0xff;
2412}
2413
2414
2415/*
bb467734 2416 * This function starts the AP (application processor) identified
984263bc
MD
2417 * by the APIC ID 'physicalCpu'. It does quite a "song and dance"
2418 * to accomplish this. This is necessary because of the nuances
2419 * of the different hardware we might encounter. It ain't pretty,
2420 * but it seems to work.
a108bf71
MD
2421 *
2422 * NOTE: eventually an AP gets to ap_init(), which is called just
2423 * before the AP goes into the LWKT scheduler's idle loop.
984263bc
MD
2424 */
2425static int
bb467734 2426start_ap(struct mdglobaldata *gd, u_int boot_addr, int smibest)
984263bc
MD
2427{
2428 int physical_cpu;
2429 int vector;
984263bc
MD
2430 u_long icr_lo, icr_hi;
2431
2432 POSTCODE(START_AP_POST);
2433
2434 /* get the PHYSICAL APIC ID# */
0f7a3396 2435 physical_cpu = CPU_TO_ID(gd->mi.gd_cpuid);
984263bc
MD
2436
2437 /* calculate the vector */
2438 vector = (boot_addr >> 12) & 0xff;
2439
bb467734
MD
2440 /* We don't want anything interfering */
2441 cpu_disable_intr();
2442
8a8d5d85
MD
2443 /* Make sure the target cpu sees everything */
2444 wbinvd();
984263bc
MD
2445
2446 /*
bb467734
MD
2447 * Try to detect when a SMI has occurred, wait up to 200ms.
2448 *
2449 * If a SMI occurs during an AP reset but before we issue
2450 * the STARTUP command, the AP may brick. To work around
2451 * this problem we hold off doing the AP startup until
2452 * after we have detected the SMI. Hopefully another SMI
2453 * will not occur before we finish the AP startup.
2454 *
2455 * Retries don't seem to help. SMIs have a window of opportunity
2456 * and if USB->legacy keyboard emulation is enabled in the BIOS
2457 * the interrupt rate can be quite high.
2458 *
2459 * NOTE: Don't worry about the L1 cache load, it might bloat
2460 * ldelta a little but ndelta will be so huge when the SMI
2461 * occurs the detection logic will still work fine.
2462 */
2463 if (smibest) {
2464 set_apic_timer(200000);
2465 smitest();
2466 }
2467
2468 /*
984263bc
MD
2469 * first we do an INIT/RESET IPI this INIT IPI might be run, reseting
2470 * and running the target CPU. OR this INIT IPI might be latched (P5
2471 * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
2472 * ignored.
bb467734
MD
2473 *
2474 * see apic/apicreg.h for icr bit definitions.
2475 *
2476 * TIME CRITICAL CODE, DO NOT DO ANY KPRINTFS IN THE HOT PATH.
984263bc
MD
2477 */
2478
bb467734
MD
2479 /*
2480 * Setup the address for the target AP. We can setup
2481 * icr_hi once and then just trigger operations with
2482 * icr_lo.
2483 */
984263bc
MD
2484 icr_hi = lapic.icr_hi & ~APIC_ID_MASK;
2485 icr_hi |= (physical_cpu << 24);
bb467734 2486 icr_lo = lapic.icr_lo & 0xfff00000;
984263bc
MD
2487 lapic.icr_hi = icr_hi;
2488
bb467734
MD
2489 /*
2490 * Do an INIT IPI: assert RESET
2491 *
2492 * Use edge triggered mode to assert INIT
2493 */
984263bc 2494 lapic.icr_lo = icr_lo | 0x0000c500;
984263bc
MD
2495 while (lapic.icr_lo & APIC_DELSTAT_MASK)
2496 /* spin */ ;
2497
bb467734
MD
2498 /*
2499 * The spec calls for a 10ms delay but we may have to use a
2500 * MUCH lower delay to avoid bricking an AP due to a fast SMI
2501 * interrupt. We have other loops here too and dividing by 2
2502 * doesn't seem to be enough even after subtracting 350us,
2503 * so we divide by 4.
2504 *
2505 * Our minimum delay is 150uS, maximum is 10ms. If no SMI
2506 * interrupt was detected we use the full 10ms.
2507 */
2508 if (smibest == 0)
2509 u_sleep(10000);
2510 else if (smibest < 150 * 4 + 350)
2511 u_sleep(150);
2512 else if ((smibest - 350) / 4 < 10000)
2513 u_sleep((smibest - 350) / 4);
2514 else
2515 u_sleep(10000);
984263bc 2516
bb467734
MD
2517 /*
2518 * Do an INIT IPI: deassert RESET
2519 *
2520 * Use level triggered mode to deassert. It is unclear
2521 * why we need to do this.
2522 */
2523 lapic.icr_lo = icr_lo | 0x00008500;
984263bc
MD
2524 while (lapic.icr_lo & APIC_DELSTAT_MASK)
2525 /* spin */ ;
bb467734 2526 u_sleep(150); /* wait 150us */
984263bc
MD
2527
2528 /*
bb467734 2529 * Next we do a STARTUP IPI: the previous INIT IPI might still be
984263bc
MD
2530 * latched, (P5 bug) this 1st STARTUP would then terminate
2531 * immediately, and the previously started INIT IPI would continue. OR
2532 * the previous INIT IPI has already run. and this STARTUP IPI will
2533 * run. OR the previous INIT IPI was ignored. and this STARTUP IPI
2534 * will run.
2535 */
984263bc
MD
2536 lapic.icr_lo = icr_lo | 0x00000600 | vector;
2537 while (lapic.icr_lo & APIC_DELSTAT_MASK)
2538 /* spin */ ;
2539 u_sleep(200); /* wait ~200uS */
2540
2541 /*
bb467734 2542 * Finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
984263bc
MD
2543 * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
2544 * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
2545 * recognized after hardware RESET or INIT IPI.
2546 */
984263bc
MD
2547 lapic.icr_lo = icr_lo | 0x00000600 | vector;
2548 while (lapic.icr_lo & APIC_DELSTAT_MASK)
2549 /* spin */ ;
bb467734
MD
2550
2551 /* Resume normal operation */
2552 cpu_enable_intr();
984263bc 2553
8a8d5d85 2554 /* wait for it to start, see ap_init() */
984263bc 2555 set_apic_timer(5000000);/* == 5 seconds */
8a8d5d85 2556 while (read_apic_timer()) {
0f7a3396 2557 if (smp_startup_mask & (1 << gd->mi.gd_cpuid))
984263bc 2558 return 1; /* return SUCCESS */
8a8d5d85 2559 }
bb467734 2560
984263bc
MD
2561 return 0; /* return FAILURE */
2562}
2563
bb467734
MD
2564static
2565int
2566smitest(void)
2567{
2568 int64_t ltsc;
2569 int64_t ntsc;
2570 int64_t ldelta;
2571 int64_t ndelta;
2572 int count;
2573
2574 ldelta = 0;
2575 ndelta = 0;
2576 while (read_apic_timer()) {
2577 ltsc = rdtsc();
2578 for (count = 0; count < 100; ++count)
2579 ntsc = rdtsc(); /* force loop to occur */
2580 if (ldelta) {
2581 ndelta = ntsc - ltsc;
2582 if (ldelta > ndelta)
2583 ldelta = ndelta;
2584 if (ndelta > ldelta * 2)
2585 break;
2586 } else {
2587 ldelta = ntsc - ltsc;
2588 }
2589 }
2590 return(read_apic_timer());
2591}
984263bc
MD
2592
2593/*
0f7a3396 2594 * Lazy flush the TLB on all other CPU's. DEPRECATED.
984263bc 2595 *
0f7a3396
MD
2596 * If for some reason we were unable to start all cpus we cannot safely
2597 * use broadcast IPIs.
984263bc
MD
2598 */
2599void
2600smp_invltlb(void)
2601{
97359a5b 2602#ifdef SMP
0f7a3396 2603 if (smp_startup_mask == smp_active_mask) {
984263bc 2604 all_but_self_ipi(XINVLTLB_OFFSET);
0f7a3396
MD
2605 } else {
2606 selected_apic_ipi(smp_active_mask, XINVLTLB_OFFSET,
2607 APIC_DELMODE_FIXED);
2608 }
97359a5b 2609#endif
984263bc
MD
2610}
2611
984263bc
MD
2612/*
2613 * When called the executing CPU will send an IPI to all other CPUs
2614 * requesting that they halt execution.
2615 *
2616 * Usually (but not necessarily) called with 'other_cpus' as its arg.
2617 *
2618 * - Signals all CPUs in map to stop.
2619 * - Waits for each to stop.
2620 *
2621 * Returns:
2622 * -1: error
2623 * 0: NA
2624 * 1: ok
2625 *
2626 * XXX FIXME: this is not MP-safe, needs a lock to prevent multiple CPUs
2627 * from executing at same time.
2628 */
2629int
2630stop_cpus(u_int map)
2631{
0f7a3396 2632 map &= smp_active_mask;
984263bc
MD
2633
2634 /* send the Xcpustop IPI to all CPUs in map */
2635 selected_apic_ipi(map, XCPUSTOP_OFFSET, APIC_DELMODE_FIXED);
2636
2637 while ((stopped_cpus & map) != map)
2638 /* spin */ ;
2639
2640 return 1;
2641}
2642
2643
2644/*
2645 * Called by a CPU to restart stopped CPUs.
2646 *
2647 * Usually (but not necessarily) called with 'stopped_cpus' as its arg.
2648 *
2649 * - Signals all CPUs in map to restart.
2650 * - Waits for each to restart.
2651 *
2652 * Returns:
2653 * -1: error
2654 * 0: NA
2655 * 1: ok
2656 */
2657int
2658restart_cpus(u_int map)
2659{
0f7a3396
MD
2660 /* signal other cpus to restart */
2661 started_cpus = map & smp_active_mask;
984263bc
MD
2662
2663 while ((stopped_cpus & map) != 0) /* wait for each to clear its bit */
2664 /* spin */ ;
2665
2666 return 1;
2667}
2668
984263bc 2669/*
8a8d5d85
MD
2670 * This is called once the mpboot code has gotten us properly relocated
2671 * and the MMU turned on, etc. ap_init() is actually the idle thread,
2672 * and when it returns the scheduler will call the real cpu_idle() main
2673 * loop for the idlethread. Interrupts are disabled on entry and should
2674 * remain disabled at return.
984263bc 2675 */
984263bc 2676void
8a8d5d85 2677ap_init(void)
984263bc
MD
2678{
2679 u_int apic_id;
2680
8a8d5d85 2681 /*
0f7a3396
MD
2682 * Adjust smp_startup_mask to signal the BSP that we have started
2683 * up successfully. Note that we do not yet hold the BGL. The BSP
2684 * is waiting for our signal.
2685 *
2686 * We can't set our bit in smp_active_mask yet because we are holding
2687 * interrupts physically disabled and remote cpus could deadlock
2688 * trying to send us an IPI.
8a8d5d85 2689 */
0f7a3396 2690 smp_startup_mask |= 1 << mycpu->gd_cpuid;
35238fa5 2691 cpu_mfence();
8a8d5d85
MD
2692
2693 /*
41a01a4d
MD
2694 * Interlock for finalization. Wait until mp_finish is non-zero,
2695 * then get the MP lock.
2696 *
2697 * Note: We are in a critical section.
2698 *
2699 * Note: We have to synchronize td_mpcount to our desired MP state
2700 * before calling cpu_try_mplock().
2701 *
2702 * Note: we are the idle thread, we can only spin.
2703 *
35238fa5
MD
2704 * Note: The load fence is memory volatile and prevents the compiler
2705 * from improperly caching mp_finish, and the cpu from improperly
2706 * caching it.
8a8d5d85 2707 */
41a01a4d 2708 while (mp_finish == 0)
35238fa5 2709 cpu_lfence();
d9ebdce5 2710 ++curthread->td_mpcount;
8a8d5d85
MD
2711 while (cpu_try_mplock() == 0)
2712 ;
2713
374133e3
MD
2714 if (cpu_feature & CPUID_TSC) {
2715 /*
2716 * The BSP is constantly updating tsc0_offset, figure out the
2717 * relative difference to synchronize ktrdump.
2718 */
2719 tsc_offsets[mycpu->gd_cpuid] = rdtsc() - tsc0_offset;
2720 }
2721
984263bc
MD
2722 /* BSP may have changed PTD while we're waiting for the lock */
2723 cpu_invltlb();
2724
984263bc
MD
2725#if defined(I586_CPU) && !defined(NO_F00F_HACK)
2726 lidt(&r_idt);
2727#endif
2728
2729 /* Build our map of 'other' CPUs. */
0f7a3396 2730 mycpu->gd_other_cpus = smp_startup_mask & ~(1 << mycpu->gd_cpuid);
984263bc 2731
26be20a0 2732 kprintf("SMP: AP CPU #%d Launched!\n", mycpu->gd_cpuid);
984263bc 2733
984263bc
MD
2734 /* A quick check from sanity claus */
2735 apic_id = (apic_id_to_logical[(lapic.id & 0x0f000000) >> 24]);
8a8d5d85 2736 if (mycpu->gd_cpuid != apic_id) {
26be20a0
SW
2737 kprintf("SMP: cpuid = %d\n", mycpu->gd_cpuid);
2738 kprintf("SMP: apic_id = %d\n", apic_id);
2739 kprintf("PTD[MPPTDI] = %p\n", (void *)PTD[MPPTDI]);
984263bc
MD
2740 panic("cpuid mismatch! boom!!");
2741 }
2742
b52c8db0
SZ
2743 /* Initialize AP's local APIC for irq's */
2744 apic_initialize(FALSE);
984263bc
MD
2745
2746 /* Set memory range attributes for this CPU to match the BSP */
2747 mem_range_AP_init();
2748
a2a5ad0d 2749 /*
4c9f5a7f
MD
2750 * Once we go active we must process any IPIQ messages that may
2751 * have been queued, because no actual IPI will occur until we
2752 * set our bit in the smp_active_mask. If we don't the IPI
2753 * message interlock could be left set which would also prevent
2754 * further IPIs.
2755 *
8a8d5d85
MD
2756 * The idle loop doesn't expect the BGL to be held and while
2757 * lwkt_switch() normally cleans things up this is a special case
2758 * because we returning almost directly into the idle loop.
41a01a4d
MD
2759 *
2760 * The idle thread is never placed on the runq, make sure
4c9f5a7f 2761 * nothing we've done put it there.
8a8d5d85 2762 */
96728c05 2763 KKASSERT(curthread->td_mpcount == 1);
41a01a4d 2764 smp_active_mask |= 1 << mycpu->gd_cpuid;
d19f6edf
MD
2765
2766 /*
2767 * Enable interrupts here. idle_restore will also do it, but
2768 * doing it here lets us clean up any strays that got posted to
2769 * the CPU during the AP boot while we are still in a critical
2770 * section.
2771 */
2772 __asm __volatile("sti; pause; pause"::);
2773 mdcpu->gd_fpending = 0;
d19f6edf 2774
4a19580d 2775 initclocks_pcpu(); /* clock interrupts (via IPIs) */
4c9f5a7f 2776 lwkt_process_ipiq();
d19f6edf
MD
2777
2778 /*
2779 * Releasing the mp lock lets the BSP finish up the SMP init
2780 */
96728c05 2781 rel_mplock();
41a01a4d 2782 KKASSERT((curthread->td_flags & TDF_RUNQ) == 0);
984263bc
MD
2783}
2784
41a01a4d
MD
2785/*
2786 * Get SMP fully working before we start initializing devices.
2787 */
2788static
2789void
2790ap_finish(void)
2791{
2792 mp_finish = 1;
2793 if (bootverbose)
26be20a0 2794 kprintf("Finish MP startup\n");
374133e3
MD
2795 if (cpu_feature & CPUID_TSC)
2796 tsc0_offset = rdtsc();
2797 tsc_offsets[0] = 0;
41a01a4d 2798 rel_mplock();
374133e3 2799 while (smp_active_mask != smp_startup_mask) {
35238fa5 2800 cpu_lfence();
374133e3
MD
2801 if (cpu_feature & CPUID_TSC)
2802 tsc0_offset = rdtsc();
2803 }
4da43e1f 2804 while (try_mplock() == 0)
41a01a4d
MD
2805 ;
2806 if (bootverbose)
26be20a0 2807 kprintf("Active CPU Mask: %08x\n", smp_active_mask);
41a01a4d
MD
2808}
2809
ba39e2e0 2810SYSINIT(finishsmp, SI_BOOT2_FINISH_SMP, SI_ORDER_FIRST, ap_finish, NULL)
41a01a4d 2811
96728c05
MD
2812void
2813cpu_send_ipiq(int dcpu)
2814{
41a01a4d
MD
2815 if ((1 << dcpu) & smp_active_mask)
2816 single_apic_ipi(dcpu, XIPIQ_OFFSET, APIC_DELMODE_FIXED);
96728c05 2817}
41a01a4d
MD
2818
2819#if 0 /* single_apic_ipi_passive() not working yet */
2820/*
2821 * Returns 0 on failure, 1 on success
2822 */
2823int
2824cpu_send_ipiq_passive(int dcpu)
2825{
2826 int r = 0;
2827 if ((1 << dcpu) & smp_active_mask) {
2828 r = single_apic_ipi_passive(dcpu, XIPIQ_OFFSET,
2829 APIC_DELMODE_FIXED);
2830 }
2831 return(r);
2832}
2833#endif
2834
a0873f07
SZ
2835struct mptable_lapic_cbarg1 {
2836 int cpu_count;
44c36320
SZ
2837 int ht_fixup;
2838 u_int ht_apicid_mask;
a0873f07
SZ
2839};
2840
2841static int
2842mptable_lapic_pass1_callback(void *xarg, const void *pos, int type)
2843{
2844 const struct PROCENTRY *ent;
2845 struct mptable_lapic_cbarg1 *arg = xarg;
2846
2847 if (type != 0)
2848 return 0;
2849 ent = pos;
2850
2851 if ((ent->cpu_flags & PROCENTRY_FLAG_EN) == 0)
2852 return 0;
2853
2854 arg->cpu_count++;
44c36320
SZ
2855 if (ent->apic_id < 32) {
2856 arg->ht_apicid_mask |= 1 << ent->apic_id;
2857 } else if (arg->ht_fixup) {
2858 kprintf("MPTABLE: lapic id > 32, disable HTT fixup\n");
2859 arg->ht_fixup = 0;
2860 }
a0873f07
SZ
2861 return 0;
2862}
2863
2864struct mptable_lapic_cbarg2 {
2865 int cpu;
44c36320 2866 int logical_cpus;
a0873f07
SZ
2867 int found_bsp;
2868};
2869
2870static int
2871mptable_lapic_pass2_callback(void *xarg, const void *pos, int type)
2872{
2873 const struct PROCENTRY *ent;
2874 struct mptable_lapic_cbarg2 *arg = xarg;
2875
2876 if (type != 0)
2877 return 0;
2878 ent = pos;
2879
2880 if (ent->cpu_flags & PROCENTRY_FLAG_BP) {
2881 KKASSERT(!arg->found_bsp);
2882 arg->found_bsp = 1;
2883 }
2884
2885 if (processor_entry(ent, arg->cpu))
2886 arg->cpu++;
2887
44c36320 2888 if (arg->logical_cpus) {
a0873f07
SZ
2889 struct PROCENTRY proc;
2890 int i;
2891
2892 /*
2893 * Create fake mptable processor entries
2894 * and feed them to processor_entry() to
2895 * enumerate the logical CPUs.
2896 */
2897 bzero(&proc, sizeof(proc));
2898 proc.type = 0;
2899 proc.cpu_flags = PROCENTRY_FLAG_EN;
2900 proc.apic_id = ent->apic_id;
2901
44c36320 2902 for (i = 1; i < arg->logical_cpus; i++) {
a0873f07
SZ
2903 proc.apic_id++;
2904 processor_entry(&proc, arg->cpu);
a0873f07
SZ
2905 arg->cpu++;
2906 }
2907 }
2908 return 0;
2909}
2910
322abba7 2911static void
281d9482
SZ
2912mptable_imcr(struct mptable_pos *mpt)
2913{
2914 /* record whether PIC or virtual-wire mode */
2915 machintr_setvar_simple(MACHINTR_VAR_IMCR_PRESENT,
2916 mpt->mp_fps->mpfb2 & 0x80);
2917}
2918
2919struct mptable_lapic_enumerator {
2920 struct lapic_enumerator enumerator;
2921 vm_paddr_t mpfps_paddr;
2922};
2923
2924static void
322abba7
SZ
2925mptable_lapic_default(void)
2926{
2927 int ap_apicid, bsp_apicid;
2928
2929 mp_naps = 1; /* exclude BSP */
2930
2931 /* Map local apic before the id field is accessed */
84cc808b 2932 lapic_map(DEFAULT_APIC_BASE);
322abba7
SZ
2933
2934 bsp_apicid = APIC_ID(lapic.id);
2935 ap_apicid = (bsp_apicid == 0) ? 1 : 0;
2936
2937 /* BSP */
2938 mp_set_cpuids(0, bsp_apicid);
2939 /* one and only AP */
2940 mp_set_cpuids(1, ap_apicid);
2941}
2942
a0873f07
SZ
2943/*
2944 * Configure:
a0873f07 2945 * mp_naps
d787e80c 2946 * ID_TO_CPU(N), APIC ID to logical CPU table
a0873f07
SZ
2947 * CPU_TO_ID(N), logical CPU to APIC ID table
2948 */
2949static void
281d9482 2950mptable_lapic_enumerate(struct lapic_enumerator *e)
a0873f07 2951{
281d9482 2952 struct mptable_pos mpt;
322abba7
SZ
2953 struct mptable_lapic_cbarg1 arg1;
2954 struct mptable_lapic_cbarg2 arg2;
2955 mpcth_t cth;
44c36320 2956 int error, logical_cpus = 0;
5a16ccc3 2957 vm_offset_t lapic_addr;
281d9482
SZ
2958 vm_paddr_t mpfps_paddr;
2959
2960 mpfps_paddr = ((struct mptable_lapic_enumerator *)e)->mpfps_paddr;
2961 KKASSERT(mpfps_paddr != 0);
a0873f07 2962
281d9482
SZ
2963 error = mptable_map(&mpt, mpfps_paddr);
2964 if (error)
2965 panic("mptable_lapic_enumerate mptable_map failed\n");
2966
2967 KKASSERT(mpt.mp_fps != NULL);
a0873f07 2968
322abba7
SZ
2969 /*
2970 * Check for use of 'default' configuration
2971 */
281d9482 2972 if (mpt.mp_fps->mpfb1 != 0) {
322abba7 2973 mptable_lapic_default();
281d9482 2974 mptable_unmap(&mpt);
322abba7
SZ
2975 return;
2976 }
a0873f07 2977
281d9482 2978 cth = mpt.mp_cth;
322abba7 2979 KKASSERT(cth != NULL);
a0873f07 2980
322abba7
SZ
2981 /* Save local apic address */
2982 lapic_addr = (vm_offset_t)cth->apic_address;
2983 KKASSERT(lapic_addr != 0);
a0873f07 2984
322abba7
SZ
2985 /*
2986 * Find out how many CPUs do we have
2987 */
2988 bzero(&arg1, sizeof(arg1));
44c36320
SZ
2989 arg1.ht_fixup = 1; /* Apply ht fixup by default */
2990
322abba7
SZ
2991 error = mptable_iterate_entries(cth,
2992 mptable_lapic_pass1_callback, &arg1);
2993 if (error)
2994 panic("mptable_iterate_entries(lapic_pass1) failed\n");
322abba7 2995 KKASSERT(arg1.cpu_count != 0);
a0873f07 2996
322abba7 2997 /* See if we need to fixup HT logical CPUs. */
44c36320
SZ
2998 if (arg1.ht_fixup) {
2999 logical_cpus = mptable_hyperthread_fixup(arg1.ht_apicid_mask,
3000 arg1.cpu_count);
3001 if (logical_cpus != 0)
3002 arg1.cpu_count *= logical_cpus;
3003 }
3004 mp_naps = arg1.cpu_count;
a0873f07 3005
44c36320 3006 /* Qualify the numbers again, after possible HT fixup */
322abba7
SZ
3007 if (mp_naps > MAXCPU) {
3008 kprintf("Warning: only using %d of %d available CPUs!\n",
3009 MAXCPU, mp_naps);
3010 mp_naps = MAXCPU;
a0873f07
SZ
3011 }
3012
322abba7 3013 --mp_naps; /* subtract the BSP */
a0873f07 3014
322abba7
SZ
3015 /*
3016 * Link logical CPU id to local apic id
3017 */
3018 bzero(&arg2, sizeof(arg2));
3019 arg2.cpu = 1;
44c36320 3020 arg2.logical_cpus = logical_cpus;
a0873f07 3021
322abba7
SZ
3022 error = mptable_iterate_entries(cth,
3023 mptable_lapic_pass2_callback, &arg2);
3024 if (error)
3025 panic("mptable_iterate_entries(lapic_pass2) failed\n");
3026 KKASSERT(arg2.found_bsp);
a0873f07 3027
322abba7 3028 /* Map local apic */
84cc808b 3029 lapic_map(lapic_addr);
281d9482
SZ
3030
3031 mptable_unmap(&mpt);
3032}
3033
3034static int
3035mptable_lapic_probe(struct lapic_enumerator *e)
3036{
3037 vm_paddr_t mpfps_paddr;
3038
3039 mpfps_paddr = mptable_probe();
3040 if (mpfps_paddr == 0)
3041 return ENXIO;
3042
3043 ((struct mptable_lapic_enumerator *)e)->mpfps_paddr = mpfps_paddr;
3044 return 0;
a0873f07 3045}
5a16ccc3 3046
281d9482
SZ
3047static struct mptable_lapic_enumerator mptable_lapic_enumerator = {
3048 .enumerator = {
3049 .lapic_prio = LAPIC_ENUM_PRIO_MPTABLE,
3050 .lapic_probe = mptable_lapic_probe,
3051 .lapic_enumerate = mptable_lapic_enumerate
3052 }
3053};
3054
5a16ccc3 3055static void
281d9482 3056mptable_apic_register(void)
a0eaef71 3057{
281d9482 3058 lapic_enumerator_register(&mptable_lapic_enumerator.enumerator);
a0eaef71 3059}
281d9482 3060SYSINIT(madt, SI_BOOT2_PRESMP, SI_ORDER_ANY, mptable_apic_register, 0);