IO APIC: Get rid of apic_imen
[dragonfly.git] / sys / platform / pc32 / i386 / mp_machdep.c
CommitLineData
984263bc
MD
1/*
2 * Copyright (c) 1996, by Steve Passe
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. The name of the developer may NOT be used to endorse or promote products
11 * derived from this software without specific prior written permission.
12 *
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 *
25 * $FreeBSD: src/sys/i386/i386/mp_machdep.c,v 1.115.2.15 2003/03/14 21:22:35 jhb Exp $
c0c5de70 26 * $DragonFly: src/sys/platform/pc32/i386/mp_machdep.c,v 1.60 2008/06/07 12:03:52 mneumann Exp $
984263bc
MD
27 */
28
29#include "opt_cpu.h"
984263bc 30
984263bc
MD
31#include <sys/param.h>
32#include <sys/systm.h>
33#include <sys/kernel.h>
984263bc
MD
34#include <sys/sysctl.h>
35#include <sys/malloc.h>
36#include <sys/memrange.h>
984263bc 37#include <sys/cons.h> /* cngetc() */
37e7efec 38#include <sys/machintr.h>
984263bc
MD
39
40#include <vm/vm.h>
41#include <vm/vm_param.h>
42#include <vm/pmap.h>
43#include <vm/vm_kern.h>
44#include <vm/vm_extern.h>
984263bc
MD
45#include <sys/lock.h>
46#include <vm/vm_map.h>
47#include <sys/user.h>
48#ifdef GPROF
49#include <sys/gmon.h>
50#endif
984263bc
MD
51
52#include <machine/smp.h>
a9295349 53#include <machine_base/apic/apicreg.h>
984263bc
MD
54#include <machine/atomic.h>
55#include <machine/cpufunc.h>
a9295349 56#include <machine_base/apic/mpapic.h>
984263bc
MD
57#include <machine/psl.h>
58#include <machine/segments.h>
984263bc
MD
59#include <machine/tss.h>
60#include <machine/specialreg.h>
61#include <machine/globaldata.h>
62
984263bc 63#include <machine/md_var.h> /* setidt() */
a9295349
MD
64#include <machine_base/icu/icu.h> /* IPIs */
65#include <machine_base/isa/intr_machdep.h> /* IPIs */
984263bc 66
1439c090
MD
67#define FIXUP_EXTRA_APIC_INTS 8 /* additional entries we may create */
68
984263bc
MD
69#define WARMBOOT_TARGET 0
70#define WARMBOOT_OFF (KERNBASE + 0x0467)
71#define WARMBOOT_SEG (KERNBASE + 0x0469)
72
984263bc 73#define BIOS_BASE (0xf0000)
1df86978 74#define BIOS_BASE2 (0xe0000)
984263bc 75#define BIOS_SIZE (0x10000)
984263bc
MD
76#define BIOS_COUNT (BIOS_SIZE/4)
77
78#define CMOS_REG (0x70)
79#define CMOS_DATA (0x71)
80#define BIOS_RESET (0x0f)
81#define BIOS_WARM (0x0a)
82
83#define PROCENTRY_FLAG_EN 0x01
84#define PROCENTRY_FLAG_BP 0x02
85#define IOAPICENTRY_FLAG_EN 0x01
86
87
88/* MP Floating Pointer Structure */
89typedef struct MPFPS {
90 char signature[4];
981bebd1 91 u_int32_t pap;
984263bc
MD
92 u_char length;
93 u_char spec_rev;
94 u_char checksum;
95 u_char mpfb1;
96 u_char mpfb2;
97 u_char mpfb3;
98 u_char mpfb4;
99 u_char mpfb5;
100} *mpfps_t;
101
102/* MP Configuration Table Header */
103typedef struct MPCTH {
104 char signature[4];
105 u_short base_table_length;
106 u_char spec_rev;
107 u_char checksum;
108 u_char oem_id[8];
109 u_char product_id[12];
110 void *oem_table_pointer;
111 u_short oem_table_size;
112 u_short entry_count;
113 void *apic_address;
114 u_short extended_table_length;
115 u_char extended_table_checksum;
116 u_char reserved;
117} *mpcth_t;
118
119
120typedef struct PROCENTRY {
121 u_char type;
122 u_char apic_id;
123 u_char apic_version;
124 u_char cpu_flags;
125 u_long cpu_signature;
126 u_long feature_flags;
127 u_long reserved1;
128 u_long reserved2;
129} *proc_entry_ptr;
130
131typedef struct BUSENTRY {
132 u_char type;
133 u_char bus_id;
134 char bus_type[6];
135} *bus_entry_ptr;
136
137typedef struct IOAPICENTRY {
138 u_char type;
139 u_char apic_id;
140 u_char apic_version;
141 u_char apic_flags;
142 void *apic_address;
143} *io_apic_entry_ptr;
144
145typedef struct INTENTRY {
146 u_char type;
147 u_char int_type;
148 u_short int_flags;
149 u_char src_bus_id;
150 u_char src_bus_irq;
151 u_char dst_apic_id;
152 u_char dst_apic_int;
153} *int_entry_ptr;
154
155/* descriptions of MP basetable entries */
156typedef struct BASETABLE_ENTRY {
157 u_char type;
158 u_char length;
159 char name[16];
160} basetable_entry;
161
981bebd1
SZ
162struct mptable_pos {
163 mpfps_t mp_fps;
164 mpcth_t mp_cth;
165 vm_size_t mp_cth_mapsz;
166};
167
fa058384
SZ
168typedef int (*mptable_iter_func)(void *, const void *, int);
169
984263bc
MD
170/*
171 * this code MUST be enabled here and in mpboot.s.
172 * it follows the very early stages of AP boot by placing values in CMOS ram.
173 * it NORMALLY will never be needed and thus the primitive method for enabling.
174 *
984263bc 175 */
7d34994c 176#if defined(CHECK_POINTS)
984263bc
MD
177#define CHECK_READ(A) (outb(CMOS_REG, (A)), inb(CMOS_DATA))
178#define CHECK_WRITE(A,D) (outb(CMOS_REG, (A)), outb(CMOS_DATA, (D)))
179
180#define CHECK_INIT(D); \
181 CHECK_WRITE(0x34, (D)); \
182 CHECK_WRITE(0x35, (D)); \
183 CHECK_WRITE(0x36, (D)); \
184 CHECK_WRITE(0x37, (D)); \
185 CHECK_WRITE(0x38, (D)); \
186 CHECK_WRITE(0x39, (D));
187
188#define CHECK_PRINT(S); \
26be20a0 189 kprintf("%s: %d, %d, %d, %d, %d, %d\n", \
984263bc
MD
190 (S), \
191 CHECK_READ(0x34), \
192 CHECK_READ(0x35), \
193 CHECK_READ(0x36), \
194 CHECK_READ(0x37), \
195 CHECK_READ(0x38), \
196 CHECK_READ(0x39));
197
198#else /* CHECK_POINTS */
199
200#define CHECK_INIT(D)
201#define CHECK_PRINT(S)
202
203#endif /* CHECK_POINTS */
204
205/*
206 * Values to send to the POST hardware.
207 */
208#define MP_BOOTADDRESS_POST 0x10
209#define MP_PROBE_POST 0x11
210#define MPTABLE_PASS1_POST 0x12
211
212#define MP_START_POST 0x13
213#define MP_ENABLE_POST 0x14
214#define MPTABLE_PASS2_POST 0x15
215
216#define START_ALL_APS_POST 0x16
217#define INSTALL_AP_TRAMP_POST 0x17
218#define START_AP_POST 0x18
219
220#define MP_ANNOUNCE_POST 0x19
221
984263bc
MD
222/** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */
223int current_postcode;
224
225/** XXX FIXME: what system files declare these??? */
226extern struct region_descriptor r_gdt, r_idt;
227
984263bc 228int mp_naps; /* # of Applications processors */
97359a5b 229#ifdef APIC_IO
4f6a8b30 230static int mp_nbusses; /* # of busses */
984263bc 231int mp_napics; /* # of IO APICs */
97359a5b 232#endif
97359a5b 233#ifdef APIC_IO
984263bc 234vm_offset_t io_apic_address[NAPICID]; /* NAPICID is more than enough */
97359a5b
MD
235u_int32_t *io_apic_versions;
236#endif
984263bc
MD
237extern int nkpt;
238
239u_int32_t cpu_apic_versions[MAXCPU];
374133e3 240int64_t tsc0_offset;
0b698dca 241extern int64_t tsc_offsets[];
984263bc 242
1876681a
SZ
243extern u_long ebda_addr;
244
97359a5b 245#ifdef APIC_IO
8a8d5d85 246struct apic_intmapinfo int_to_apicintpin[APIC_INTMAPSIZE];
97359a5b 247#endif
984263bc 248
984263bc
MD
249/*
250 * APIC ID logical/physical mapping structures.
251 * We oversize these to simplify boot-time config.
252 */
253int cpu_num_to_apic_id[NAPICID];
97359a5b 254#ifdef APIC_IO
984263bc 255int io_num_to_apic_id[NAPICID];
97359a5b 256#endif
984263bc
MD
257int apic_id_to_logical[NAPICID];
258
984263bc
MD
259/* AP uses this during bootstrap. Do not staticize. */
260char *bootSTK;
261static int bootAP;
262
263/* Hotwire a 0->4MB V==P mapping */
264extern pt_entry_t *KPTphys;
265
f13b5eec
MD
266/*
267 * SMP page table page. Setup by locore to point to a page table
268 * page from which we allocate per-cpu privatespace areas io_apics,
269 * and so forth.
270 */
271
272#define IO_MAPPING_START_INDEX \
273 (SMP_MAXCPU * sizeof(struct privatespace) / PAGE_SIZE)
274
984263bc 275extern pt_entry_t *SMPpt;
f13b5eec 276static int SMPpt_alloc_index = IO_MAPPING_START_INDEX;
984263bc
MD
277
278struct pcb stoppcbs[MAXCPU];
279
fa058384
SZ
280static basetable_entry basetable_entry_types[] =
281{
282 {0, 20, "Processor"},
283 {1, 8, "Bus"},
284 {2, 8, "I/O APIC"},
285 {3, 8, "I/O INT"},
286 {4, 8, "Local INT"}
287};
288
984263bc
MD
289/*
290 * Local data and functions.
291 */
292
984263bc
MD
293static u_int boot_address;
294static u_int base_memory;
41a01a4d 295static int mp_finish;
984263bc 296
984263bc
MD
297static void mp_enable(u_int boot_addr);
298
fa058384
SZ
299static int mptable_iterate_entries(const mpcth_t,
300 mptable_iter_func, void *);
cb00b5c4 301static int mptable_probe(void);
34e6fa63 302static int mptable_search(void);
fa058384 303static int mptable_check(vm_paddr_t);
3aba8f73 304static int mptable_search_sig(u_int32_t target, int count);
44c36320 305static int mptable_hyperthread_fixup(u_int, int);
981bebd1 306static void mptable_pass1(struct mptable_pos *);
390b18b0 307static void mptable_pass2(struct mptable_pos *);
3aba8f73
SZ
308static void mptable_default(int type);
309static void mptable_fix(void);
fa058384 310static int mptable_map(struct mptable_pos *, vm_paddr_t);
981bebd1 311static void mptable_unmap(struct mptable_pos *);
a0eaef71 312static void mptable_imcr(struct mptable_pos *);
3aba8f73 313
281d9482
SZ
314static int mptable_lapic_probe(struct lapic_enumerator *);
315static void mptable_lapic_enumerate(struct lapic_enumerator *);
316static void mptable_lapic_default(void);
317
97359a5b 318#ifdef APIC_IO
984263bc 319static void setup_apic_irq_mapping(void);
97359a5b
MD
320static int apic_int_is_bus_type(int intr, int bus_type);
321#endif
984263bc
MD
322static int start_all_aps(u_int boot_addr);
323static void install_ap_tramp(u_int boot_addr);
0f7a3396 324static int start_ap(struct mdglobaldata *gd, u_int boot_addr);
984263bc 325
41a01a4d 326static cpumask_t smp_startup_mask = 1; /* which cpus have been started */
0f7a3396
MD
327cpumask_t smp_active_mask = 1; /* which cpus are ready for IPIs etc? */
328SYSCTL_INT(_machdep, OID_AUTO, smp_active, CTLFLAG_RD, &smp_active_mask, 0, "");
329
984263bc
MD
330/*
331 * Calculate usable address in base memory for AP trampoline code.
332 */
333u_int
334mp_bootaddress(u_int basemem)
335{
336 POSTCODE(MP_BOOTADDRESS_POST);
337
c0c5de70 338 base_memory = basemem;
984263bc
MD
339
340 boot_address = base_memory & ~0xfff; /* round down to 4k boundary */
341 if ((base_memory - boot_address) < bootMP_size)
342 boot_address -= 4096; /* not enough, lower by 4k */
343
344 return boot_address;
345}
346
347
34e6fa63
SZ
348static int
349mptable_probe(void)
350{
351 int mpfps_paddr;
352
353 mpfps_paddr = mptable_search();
354 if (mptable_check(mpfps_paddr))
355 return 0;
356
357 return mpfps_paddr;
358}
359
984263bc
MD
360/*
361 * Look for an Intel MP spec table (ie, SMP capable hardware).
362 */
cb00b5c4 363static int
34e6fa63 364mptable_search(void)
984263bc
MD
365{
366 int x;
984263bc 367 u_int32_t target;
f13b5eec
MD
368
369 /*
370 * Make sure our SMPpt[] page table is big enough to hold all the
371 * mappings we need.
372 */
373 KKASSERT(IO_MAPPING_START_INDEX < NPTEPG - 2);
984263bc
MD
374
375 POSTCODE(MP_PROBE_POST);
376
377 /* see if EBDA exists */
1876681a 378 if (ebda_addr != 0) {
984263bc 379 /* search first 1K of EBDA */
1876681a 380 target = (u_int32_t)ebda_addr;
3aba8f73 381 if ((x = mptable_search_sig(target, 1024 / 4)) > 0)
aeb48299 382 return x;
984263bc
MD
383 } else {
384 /* last 1K of base memory, effective 'top of base' passed in */
aeb48299 385 target = (u_int32_t)(base_memory - 0x400);
3aba8f73 386 if ((x = mptable_search_sig(target, 1024 / 4)) > 0)
aeb48299 387 return x;
984263bc
MD
388 }
389
390 /* search the BIOS */
aeb48299 391 target = (u_int32_t)BIOS_BASE;
3aba8f73 392 if ((x = mptable_search_sig(target, BIOS_COUNT)) > 0)
aeb48299 393 return x;
984263bc 394
1df86978
SZ
395 /* search the extended BIOS */
396 target = (u_int32_t)BIOS_BASE2;
397 if ((x = mptable_search_sig(target, BIOS_COUNT)) > 0)
398 return x;
399
984263bc 400 /* nothing found */
984263bc 401 return 0;
984263bc
MD
402}
403
fa058384
SZ
404struct mptable_check_cbarg {
405 int cpu_count;
406 int found_bsp;
407};
408
409static int
410mptable_check_callback(void *xarg, const void *pos, int type)
411{
412 const struct PROCENTRY *ent;
413 struct mptable_check_cbarg *arg = xarg;
414
415 if (type != 0)
416 return 0;
417 ent = pos;
418
419 if ((ent->cpu_flags & PROCENTRY_FLAG_EN) == 0)
420 return 0;
421 arg->cpu_count++;
422
423 if (ent->cpu_flags & PROCENTRY_FLAG_BP) {
424 if (arg->found_bsp) {
425 kprintf("more than one BSP in base MP table\n");
426 return EINVAL;
427 }
428 arg->found_bsp = 1;
429 }
430 return 0;
431}
432
433static int
434mptable_check(vm_paddr_t mpfps_paddr)
435{
436 struct mptable_pos mpt;
437 struct mptable_check_cbarg arg;
438 mpcth_t cth;
439 int error;
440
441 if (mpfps_paddr == 0)
442 return EOPNOTSUPP;
443
444 error = mptable_map(&mpt, mpfps_paddr);
445 if (error)
446 return error;
447
448 if (mpt.mp_fps->mpfb1 != 0)
449 goto done;
450
451 error = EINVAL;
452
453 cth = mpt.mp_cth;
454 if (cth == NULL)
455 goto done;
456 if (cth->apic_address == 0)
457 goto done;
458
459 bzero(&arg, sizeof(arg));
460 error = mptable_iterate_entries(cth, mptable_check_callback, &arg);
461 if (!error) {
462 if (arg.cpu_count == 0) {
463 kprintf("MP table contains no processor entries\n");
464 error = EINVAL;
465 } else if (!arg.found_bsp) {
466 kprintf("MP table does not contains BSP entry\n");
467 error = EINVAL;
468 }
469 }
470done:
471 mptable_unmap(&mpt);
472 return error;
473}
474
475static int
476mptable_iterate_entries(const mpcth_t cth, mptable_iter_func func, void *arg)
477{
478 int count, total_size;
479 const void *position;
480
481 KKASSERT(cth->base_table_length >= sizeof(struct MPCTH));
482 total_size = cth->base_table_length - sizeof(struct MPCTH);
483 position = (const uint8_t *)cth + sizeof(struct MPCTH);
484 count = cth->entry_count;
485
486 while (count--) {
487 int type, error;
488
489 KKASSERT(total_size >= 0);
490 if (total_size == 0) {
491 kprintf("invalid base MP table, "
492 "entry count and length mismatch\n");
493 return EINVAL;
494 }
495
496 type = *(const uint8_t *)position;
497 switch (type) {
498 case 0: /* processor_entry */
499 case 1: /* bus_entry */
500 case 2: /* io_apic_entry */
501 case 3: /* int_entry */
502 case 4: /* int_entry */
503 break;
504 default:
505 kprintf("unknown base MP table entry type %d\n", type);
506 return EINVAL;
507 }
508
509 if (total_size < basetable_entry_types[type].length) {
510 kprintf("invalid base MP table length, "
511 "does not contain all entries\n");
512 return EINVAL;
513 }
514 total_size -= basetable_entry_types[type].length;
515
516 error = func(arg, position, type);
517 if (error)
518 return error;
519
520 position = (const uint8_t *)position +
521 basetable_entry_types[type].length;
522 }
523 return 0;
524}
525
984263bc
MD
526
527/*
528 * Startup the SMP processors.
529 */
530void
531mp_start(void)
532{
533 POSTCODE(MP_START_POST);
50bc991e 534 mp_enable(boot_address);
984263bc
MD
535}
536
537
538/*
539 * Print various information about the SMP system hardware and setup.
540 */
541void
542mp_announce(void)
543{
544 int x;
545
546 POSTCODE(MP_ANNOUNCE_POST);
547
26be20a0
SW
548 kprintf("DragonFly/MP: Multiprocessor motherboard\n");
549 kprintf(" cpu0 (BSP): apic id: %2d", CPU_TO_ID(0));
8629c4ea 550 kprintf(", version: 0x%08x\n", cpu_apic_versions[0]);
984263bc 551 for (x = 1; x <= mp_naps; ++x) {
26be20a0 552 kprintf(" cpu%d (AP): apic id: %2d", x, CPU_TO_ID(x));
8629c4ea 553 kprintf(", version: 0x%08x\n", cpu_apic_versions[x]);
984263bc
MD
554 }
555
556#if defined(APIC_IO)
557 for (x = 0; x < mp_napics; ++x) {
26be20a0
SW
558 kprintf(" io%d (APIC): apic id: %2d", x, IO_TO_ID(x));
559 kprintf(", version: 0x%08x", io_apic_versions[x]);
560 kprintf(", at 0x%08x\n", io_apic_address[x]);
984263bc
MD
561 }
562#else
26be20a0 563 kprintf(" Warning: APIC I/O disabled\n");
984263bc
MD
564#endif /* APIC_IO */
565}
566
567/*
568 * AP cpu's call this to sync up protected mode.
7160572f
MD
569 *
570 * WARNING! We must ensure that the cpu is sufficiently initialized to
571 * be able to use to the FP for our optimized bzero/bcopy code before
572 * we enter more mainstream C code.
a44bdeec
MD
573 *
574 * WARNING! %fs is not set up on entry. This routine sets up %fs.
984263bc
MD
575 */
576void
577init_secondary(void)
578{
579 int gsel_tss;
580 int x, myid = bootAP;
581 u_int cr0;
8a8d5d85 582 struct mdglobaldata *md;
0f7a3396 583 struct privatespace *ps;
984263bc 584
0f7a3396
MD
585 ps = &CPU_prvspace[myid];
586
587 gdt_segs[GPRIV_SEL].ssd_base = (int)ps;
984263bc 588 gdt_segs[GPROC0_SEL].ssd_base =
0f7a3396
MD
589 (int) &ps->mdglobaldata.gd_common_tss;
590 ps->mdglobaldata.mi.gd_prvspace = ps;
984263bc
MD
591
592 for (x = 0; x < NGDT; x++) {
593 ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd);
594 }
595
596 r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
597 r_gdt.rd_base = (int) &gdt[myid * NGDT];
598 lgdt(&r_gdt); /* does magic intra-segment return */
599
600 lidt(&r_idt);
601
602 lldt(_default_ldt);
7b95be2a 603 mdcpu->gd_currentldt = _default_ldt;
984263bc
MD
604
605 gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
606 gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;
8a8d5d85 607
0f7a3396 608 md = mdcpu; /* loaded through %fs:0 (mdglobaldata.mi.gd_prvspace)*/
8a8d5d85
MD
609
610 md->gd_common_tss.tss_esp0 = 0; /* not used until after switch */
611 md->gd_common_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
612 md->gd_common_tss.tss_ioopt = (sizeof md->gd_common_tss) << 16;
613 md->gd_tss_gdt = &gdt[myid * NGDT + GPROC0_SEL].sd;
614 md->gd_common_tssd = *md->gd_tss_gdt;
984263bc
MD
615 ltr(gsel_tss);
616
617 /*
618 * Set to a known state:
619 * Set by mpboot.s: CR0_PG, CR0_PE
620 * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM
621 */
622 cr0 = rcr0();
623 cr0 &= ~(CR0_CD | CR0_NW | CR0_EM);
624 load_cr0(cr0);
7160572f 625 pmap_set_opt(); /* PSE/4MB pages, etc */
984263bc 626
7160572f
MD
627 /* set up CPU registers and state */
628 cpu_setregs();
629
630 /* set up FPU state on the AP */
631 npxinit(__INITIAL_NPXCW__);
632
633 /* set up SSE registers */
634 enable_sse();
984263bc
MD
635}
636
984263bc
MD
637/*******************************************************************
638 * local functions and data
639 */
640
641/*
642 * start the SMP system
643 */
644static void
645mp_enable(u_int boot_addr)
646{
984263bc
MD
647#if defined(APIC_IO)
648 int apic;
649 u_int ux;
650#endif /* APIC_IO */
981bebd1 651 vm_paddr_t mpfps_paddr;
f65c10b6 652 struct mptable_pos mpt;
984263bc
MD
653
654 POSTCODE(MP_ENABLE_POST);
655
281d9482 656 lapic_config();
984263bc 657
f65c10b6
SZ
658 mpfps_paddr = mptable_probe();
659 if (mpfps_paddr) {
660 mptable_map(&mpt, mpfps_paddr);
661 mptable_imcr(&mpt);
662 mptable_unmap(&mpt);
663 }
984263bc
MD
664#if defined(APIC_IO)
665
f65c10b6
SZ
666 if (!mpfps_paddr)
667 panic("no MP table, disable APIC_IO!\n");
668
669 mptable_map(&mpt, mpfps_paddr);
670
671 /*
672 * Examine the MP table for needed info
673 */
674 mptable_pass1(&mpt);
675 mptable_pass2(&mpt);
676
677 mptable_unmap(&mpt);
678
679 /* Post scan cleanup */
680 mptable_fix();
681
97359a5b
MD
682 setup_apic_irq_mapping();
683
984263bc
MD
684 /* fill the LOGICAL io_apic_versions table */
685 for (apic = 0; apic < mp_napics; ++apic) {
686 ux = io_apic_read(apic, IOAPIC_VER);
687 io_apic_versions[apic] = ux;
688 io_apic_set_id(apic, IO_TO_ID(apic));
689 }
690
691 /* program each IO APIC in the system */
692 for (apic = 0; apic < mp_napics; ++apic)
693 if (io_apic_setup(apic) < 0)
694 panic("IO APIC setup failure");
695
97359a5b
MD
696#endif /* APIC_IO */
697
698 /*
699 * These are required for SMP operation
700 */
701
984263bc
MD
702 /* install a 'Spurious INTerrupt' vector */
703 setidt(XSPURIOUSINT_OFFSET, Xspuriousint,
704 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
705
706 /* install an inter-CPU IPI for TLB invalidation */
707 setidt(XINVLTLB_OFFSET, Xinvltlb,
708 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
709
96728c05
MD
710 /* install an inter-CPU IPI for IPIQ messaging */
711 setidt(XIPIQ_OFFSET, Xipiq,
712 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
78ea5a2a
SZ
713
714 /* install a timer vector */
715 setidt(XTIMER_OFFSET, Xtimer,
716 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
984263bc 717
984263bc
MD
718 /* install an inter-CPU IPI for CPU stop/restart */
719 setidt(XCPUSTOP_OFFSET, Xcpustop,
720 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
721
984263bc
MD
722 /* start each Application Processor */
723 start_all_aps(boot_addr);
724}
725
726
727/*
728 * look for the MP spec signature
729 */
730
731/* string defined by the Intel MP Spec as identifying the MP table */
732#define MP_SIG 0x5f504d5f /* _MP_ */
733#define NEXT(X) ((X) += 4)
734static int
3aba8f73 735mptable_search_sig(u_int32_t target, int count)
984263bc 736{
0f85efa2
SZ
737 vm_size_t map_size;
738 u_int32_t *addr;
739 int x, ret;
984263bc 740
aeb48299
SZ
741 KKASSERT(target != 0);
742
0f85efa2
SZ
743 map_size = count * sizeof(u_int32_t);
744 addr = pmap_mapdev((vm_paddr_t)target, map_size);
984263bc 745
aeb48299 746 ret = 0;
0f85efa2
SZ
747 for (x = 0; x < count; NEXT(x)) {
748 if (addr[x] == MP_SIG) {
749 /* make array index a byte index */
750 ret = target + (x * sizeof(u_int32_t));
751 break;
752 }
753 }
aeb48299 754
0f85efa2
SZ
755 pmap_unmapdev((vm_offset_t)addr, map_size);
756 return ret;
984263bc
MD
757}
758
759
984263bc
MD
760typedef struct BUSDATA {
761 u_char bus_id;
762 enum busTypes bus_type;
763} bus_datum;
764
765typedef struct INTDATA {
766 u_char int_type;
767 u_short int_flags;
768 u_char src_bus_id;
769 u_char src_bus_irq;
770 u_char dst_apic_id;
771 u_char dst_apic_int;
772 u_char int_vector;
773} io_int, local_int;
774
775typedef struct BUSTYPENAME {
776 u_char type;
777 char name[7];
778} bus_type_name;
779
780static bus_type_name bus_type_table[] =
781{
782 {CBUS, "CBUS"},
783 {CBUSII, "CBUSII"},
784 {EISA, "EISA"},
785 {MCA, "MCA"},
786 {UNKNOWN_BUSTYPE, "---"},
787 {ISA, "ISA"},
788 {MCA, "MCA"},
789 {UNKNOWN_BUSTYPE, "---"},
790 {UNKNOWN_BUSTYPE, "---"},
791 {UNKNOWN_BUSTYPE, "---"},
792 {UNKNOWN_BUSTYPE, "---"},
793 {UNKNOWN_BUSTYPE, "---"},
794 {PCI, "PCI"},
795 {UNKNOWN_BUSTYPE, "---"},
796 {UNKNOWN_BUSTYPE, "---"},
797 {UNKNOWN_BUSTYPE, "---"},
798 {UNKNOWN_BUSTYPE, "---"},
799 {XPRESS, "XPRESS"},
800 {UNKNOWN_BUSTYPE, "---"}
801};
802/* from MP spec v1.4, table 5-1 */
803static int default_data[7][5] =
804{
805/* nbus, id0, type0, id1, type1 */
806 {1, 0, ISA, 255, 255},
807 {1, 0, EISA, 255, 255},
808 {1, 0, EISA, 255, 255},
809 {1, 0, MCA, 255, 255},
810 {2, 0, ISA, 1, PCI},
811 {2, 0, EISA, 1, PCI},
812 {2, 0, MCA, 1, PCI}
813};
814
815
4f6a8b30
SZ
816#ifdef APIC_IO
817
984263bc
MD
818/* the bus data */
819static bus_datum *bus_data;
820
821/* the IO INT data, one entry per possible APIC INTerrupt */
822static io_int *io_apic_ints;
984263bc 823static int nintrs;
4f6a8b30 824
97359a5b 825#endif
984263bc 826
a0873f07 827static int processor_entry (const struct PROCENTRY *entry, int cpu);
97359a5b 828#ifdef APIC_IO
c4717d5c
SZ
829static int bus_entry (const struct BUSENTRY *entry, int bus);
830static int io_apic_entry (const struct IOAPICENTRY *entry, int apic);
831static int int_entry (const struct INTENTRY *entry, int intr);
97359a5b 832#endif
3ae0cd58 833static int lookup_bus_type (char *name);
984263bc 834
8658b5be
SZ
835#ifdef APIC_IO
836
837static int
838mptable_ioapic_pass1_callback(void *xarg, const void *pos, int type)
839{
840 const struct IOAPICENTRY *ioapic_ent;
841
842 switch (type) {
843 case 1: /* bus_entry */
844 ++mp_nbusses;
845 break;
846
847 case 2: /* io_apic_entry */
848 ioapic_ent = pos;
849 if (ioapic_ent->apic_flags & IOAPICENTRY_FLAG_EN) {
850 io_apic_address[mp_napics++] =
851 (vm_offset_t)ioapic_ent->apic_address;
852 }
853 break;
854
855 case 3: /* int_entry */
856 ++nintrs;
857 break;
858 }
859 return 0;
860}
861
862#endif /* APIC_IO */
984263bc
MD
863
864/*
865 * 1st pass on motherboard's Intel MP specification table.
866 *
984263bc 867 * determines:
984263bc 868 * io_apic_address[N]
984263bc
MD
869 * mp_nbusses
870 * mp_napics
871 * nintrs
872 */
873static void
981bebd1 874mptable_pass1(struct mptable_pos *mpt)
984263bc 875{
97359a5b 876#ifdef APIC_IO
981bebd1 877 mpfps_t fps;
8658b5be 878 int x;
984263bc
MD
879
880 POSTCODE(MPTABLE_PASS1_POST);
881
981bebd1
SZ
882 fps = mpt->mp_fps;
883 KKASSERT(fps != NULL);
f9c3b04f 884
984263bc 885 /* clear various tables */
8658b5be 886 for (x = 0; x < NAPICID; ++x)
984263bc 887 io_apic_address[x] = ~0; /* IO APIC address table */
984263bc 888
4f6a8b30 889 mp_nbusses = 0;
984263bc
MD
890 mp_napics = 0;
891 nintrs = 0;
984263bc
MD
892
893 /* check for use of 'default' configuration */
981bebd1 894 if (fps->mpfb1 != 0) {
984263bc 895 io_apic_address[0] = DEFAULT_IO_APIC_BASE;
4f6a8b30 896 mp_nbusses = default_data[fps->mpfb1 - 1][0];
984263bc
MD
897 mp_napics = 1;
898 nintrs = 16;
8658b5be
SZ
899 } else {
900 int error;
901
902 error = mptable_iterate_entries(mpt->mp_cth,
903 mptable_ioapic_pass1_callback, NULL);
904 if (error)
905 panic("mptable_iterate_entries(ioapic_pass1) failed\n");
984263bc 906 }
8658b5be 907#endif /* APIC_IO */
984263bc
MD
908}
909
c4717d5c
SZ
910#ifdef APIC_IO
911
912struct mptable_ioapic2_cbarg {
913 int bus;
914 int apic;
915 int intr;
916};
917
918static int
919mptable_ioapic_pass2_callback(void *xarg, const void *pos, int type)
920{
921 struct mptable_ioapic2_cbarg *arg = xarg;
922
923 switch (type) {
924 case 1:
925 if (bus_entry(pos, arg->bus))
926 ++arg->bus;
927 break;
928
929 case 2:
930 if (io_apic_entry(pos, arg->apic))
931 ++arg->apic;
932 break;
933
934 case 3:
935 if (int_entry(pos, arg->intr))
936 ++arg->intr;
937 break;
938 }
939 return 0;
940}
941
942#endif /* APIC_IO */
984263bc
MD
943
944/*
945 * 2nd pass on motherboard's Intel MP specification table.
946 *
947 * sets:
984263bc 948 * ID_TO_IO(N), phy APIC ID to log CPU/IO table
984263bc
MD
949 * IO_TO_ID(N), logical IO to APIC ID table
950 * bus_data[N]
951 * io_apic_ints[N]
952 */
390b18b0 953static void
981bebd1 954mptable_pass2(struct mptable_pos *mpt)
984263bc 955{
c4717d5c
SZ
956#ifdef APIC_IO
957 struct mptable_ioapic2_cbarg arg;
981bebd1 958 mpfps_t fps;
c4717d5c 959 int error, x;
984263bc
MD
960
961 POSTCODE(MPTABLE_PASS2_POST);
962
981bebd1
SZ
963 fps = mpt->mp_fps;
964 KKASSERT(fps != NULL);
965
984263bc
MD
966 MALLOC(io_apic_versions, u_int32_t *, sizeof(u_int32_t) * mp_napics,
967 M_DEVBUF, M_WAITOK);
968 MALLOC(ioapic, volatile ioapic_t **, sizeof(ioapic_t *) * mp_napics,
e7b4468c 969 M_DEVBUF, M_WAITOK | M_ZERO);
1439c090 970 MALLOC(io_apic_ints, io_int *, sizeof(io_int) * (nintrs + FIXUP_EXTRA_APIC_INTS),
984263bc
MD
971 M_DEVBUF, M_WAITOK);
972 MALLOC(bus_data, bus_datum *, sizeof(bus_datum) * mp_nbusses,
973 M_DEVBUF, M_WAITOK);
974
c4717d5c
SZ
975 for (x = 0; x < mp_napics; x++)
976 ioapic[x] = permanent_io_mapping(io_apic_address[x]);
984263bc
MD
977
978 /* clear various tables */
979 for (x = 0; x < NAPICID; ++x) {
97359a5b 980 ID_TO_IO(x) = -1; /* phy APIC ID to log CPU/IO table */
984263bc
MD
981 IO_TO_ID(x) = -1; /* logical IO to APIC ID table */
982 }
983
984 /* clear bus data table */
985 for (x = 0; x < mp_nbusses; ++x)
986 bus_data[x].bus_id = 0xff;
987
988 /* clear IO APIC INT table */
989 for (x = 0; x < (nintrs + 1); ++x) {
990 io_apic_ints[x].int_type = 0xff;
991 io_apic_ints[x].int_vector = 0xff;
992 }
993
984263bc 994 /* check for use of 'default' configuration */
390b18b0
SZ
995 if (fps->mpfb1 != 0) {
996 mptable_default(fps->mpfb1);
997 return;
998 }
984263bc 999
c4717d5c
SZ
1000 bzero(&arg, sizeof(arg));
1001 error = mptable_iterate_entries(mpt->mp_cth,
1002 mptable_ioapic_pass2_callback, &arg);
1003 if (error)
1004 panic("mptable_iterate_entries(ioapic_pass2) failed\n");
97359a5b 1005#endif
984263bc
MD
1006}
1007
984263bc
MD
1008/*
1009 * Check if we should perform a hyperthreading "fix-up" to
1010 * enumerate any logical CPU's that aren't already listed
1011 * in the table.
1012 *
1013 * XXX: We assume that all of the physical CPUs in the
1014 * system have the same number of logical CPUs.
1015 *
1016 * XXX: We assume that APIC ID's are allocated such that
1017 * the APIC ID's for a physical processor are aligned
1018 * with the number of logical CPU's in the processor.
1019 */
44c36320
SZ
1020static int
1021mptable_hyperthread_fixup(u_int id_mask, int cpu_count)
984263bc 1022{
44c36320 1023 int i, id, lcpus_max, logical_cpus;
984263bc 1024
984263bc 1025 if ((cpu_feature & CPUID_HTT) == 0)
44c36320 1026 return 0;
7ea07fd2
SZ
1027
1028 lcpus_max = (cpu_procinfo & CPUID_HTT_CORES) >> 16;
1029 if (lcpus_max <= 1)
44c36320 1030 return 0;
984263bc 1031
7ea07fd2
SZ
1032 if (strcmp(cpu_vendor, "GenuineIntel") == 0) {
1033 /*
1034 * INSTRUCTION SET REFERENCE, A-M (#253666)
1035 * Page 3-181, Table 3-20
1036 * "The nearest power-of-2 integer that is not smaller
1037 * than EBX[23:16] is the number of unique initial APIC
1038 * IDs reserved for addressing different logical
1039 * processors in a physical package."
1040 */
1041 for (i = 0; ; ++i) {
1042 if ((1 << i) >= lcpus_max) {
1043 lcpus_max = 1 << i;
1044 break;
1045 }
1046 }
1047 }
1048
44c36320
SZ
1049 KKASSERT(cpu_count != 0);
1050 if (cpu_count == lcpus_max) {
7ea07fd2 1051 /* We have nothing to fix */
44c36320
SZ
1052 return 0;
1053 } else if (cpu_count == 1) {
7ea07fd2
SZ
1054 /* XXX this may be incorrect */
1055 logical_cpus = lcpus_max;
1056 } else {
1057 int cur, prev, dist;
1058
1059 /*
1060 * Calculate the distances between two nearest
1061 * APIC IDs. If all such distances are same,
1062 * then it is the number of missing cpus that
1063 * we are going to fill later.
1064 */
1065 dist = cur = prev = -1;
1066 for (id = 0; id < MAXCPU; ++id) {
1067 if ((id_mask & 1 << id) == 0)
1068 continue;
1069
1070 cur = id;
1071 if (prev >= 0) {
1072 int new_dist = cur - prev;
1073
1074 if (dist < 0)
1075 dist = new_dist;
1076
1077 /*
1078 * Make sure that all distances
1079 * between two nearest APIC IDs
1080 * are same.
1081 */
1082 if (dist != new_dist)
44c36320 1083 return 0;
7ea07fd2
SZ
1084 }
1085 prev = cur;
1086 }
1087 if (dist == 1)
44c36320 1088 return 0;
7ea07fd2
SZ
1089
1090 /* Must be power of 2 */
1091 if (dist & (dist - 1))
44c36320 1092 return 0;
7ea07fd2
SZ
1093
1094 /* Can't exceed CPU package capacity */
1095 if (dist > lcpus_max)
1096 logical_cpus = lcpus_max;
1097 else
1098 logical_cpus = dist;
1099 }
1100
984263bc
MD
1101 /*
1102 * For each APIC ID of a CPU that is set in the mask,
1103 * scan the other candidate APIC ID's for this
1104 * physical processor. If any of those ID's are
1105 * already in the table, then kill the fixup.
1106 */
7ea07fd2 1107 for (id = 0; id < MAXCPU; id++) {
984263bc
MD
1108 if ((id_mask & 1 << id) == 0)
1109 continue;
1110 /* First, make sure we are on a logical_cpus boundary. */
1111 if (id % logical_cpus != 0)
44c36320 1112 return 0;
984263bc
MD
1113 for (i = id + 1; i < id + logical_cpus; i++)
1114 if ((id_mask & 1 << i) != 0)
44c36320 1115 return 0;
984263bc 1116 }
44c36320 1117 return logical_cpus;
984263bc 1118}
984263bc 1119
fa058384 1120static int
981bebd1
SZ
1121mptable_map(struct mptable_pos *mpt, vm_paddr_t mpfps_paddr)
1122{
1123 mpfps_t fps = NULL;
1124 mpcth_t cth = NULL;
1125 vm_size_t cth_mapsz = 0;
1126
fa058384
SZ
1127 bzero(mpt, sizeof(*mpt));
1128
981bebd1
SZ
1129 fps = pmap_mapdev(mpfps_paddr, sizeof(*fps));
1130 if (fps->pap != 0) {
1131 /*
1132 * Map configuration table header to get
1133 * the base table size
1134 */
1135 cth = pmap_mapdev(fps->pap, sizeof(*cth));
1136 cth_mapsz = cth->base_table_length;
1137 pmap_unmapdev((vm_offset_t)cth, sizeof(*cth));
1138
fa058384
SZ
1139 if (cth_mapsz < sizeof(*cth)) {
1140 kprintf("invalid base MP table length %d\n",
1141 (int)cth_mapsz);
1142 pmap_unmapdev((vm_offset_t)fps, sizeof(*fps));
1143 return EINVAL;
1144 }
1145
981bebd1
SZ
1146 /*
1147 * Map the base table
1148 */
1149 cth = pmap_mapdev(fps->pap, cth_mapsz);
1150 }
1151
1152 mpt->mp_fps = fps;
1153 mpt->mp_cth = cth;
1154 mpt->mp_cth_mapsz = cth_mapsz;
fa058384
SZ
1155
1156 return 0;
981bebd1
SZ
1157}
1158
1159static void
1160mptable_unmap(struct mptable_pos *mpt)
1161{
1162 if (mpt->mp_cth != NULL) {
1163 pmap_unmapdev((vm_offset_t)mpt->mp_cth, mpt->mp_cth_mapsz);
1164 mpt->mp_cth = NULL;
1165 mpt->mp_cth_mapsz = 0;
1166 }
1167 if (mpt->mp_fps != NULL) {
1168 pmap_unmapdev((vm_offset_t)mpt->mp_fps, sizeof(*mpt->mp_fps));
1169 mpt->mp_fps = NULL;
1170 }
1171}
1172
97359a5b
MD
1173#ifdef APIC_IO
1174
984263bc
MD
1175void
1176assign_apic_irq(int apic, int intpin, int irq)
1177{
1178 int x;
1179
1180 if (int_to_apicintpin[irq].ioapic != -1)
1181 panic("assign_apic_irq: inconsistent table");
1182
1183 int_to_apicintpin[irq].ioapic = apic;
1184 int_to_apicintpin[irq].int_pin = intpin;
1185 int_to_apicintpin[irq].apic_address = ioapic[apic];
1186 int_to_apicintpin[irq].redirindex = IOAPIC_REDTBL + 2 * intpin;
1187
1188 for (x = 0; x < nintrs; x++) {
1189 if ((io_apic_ints[x].int_type == 0 ||
1190 io_apic_ints[x].int_type == 3) &&
1191 io_apic_ints[x].int_vector == 0xff &&
1192 io_apic_ints[x].dst_apic_id == IO_TO_ID(apic) &&
1193 io_apic_ints[x].dst_apic_int == intpin)
1194 io_apic_ints[x].int_vector = irq;
1195 }
1196}
1197
1198void
1199revoke_apic_irq(int irq)
1200{
1201 int x;
1202 int oldapic;
1203 int oldintpin;
1204
1205 if (int_to_apicintpin[irq].ioapic == -1)
1206 panic("revoke_apic_irq: inconsistent table");
1207
1208 oldapic = int_to_apicintpin[irq].ioapic;
1209 oldintpin = int_to_apicintpin[irq].int_pin;
1210
1211 int_to_apicintpin[irq].ioapic = -1;
1212 int_to_apicintpin[irq].int_pin = 0;
1213 int_to_apicintpin[irq].apic_address = NULL;
1214 int_to_apicintpin[irq].redirindex = 0;
1215
1216 for (x = 0; x < nintrs; x++) {
1217 if ((io_apic_ints[x].int_type == 0 ||
1218 io_apic_ints[x].int_type == 3) &&
1219 io_apic_ints[x].int_vector != 0xff &&
1220 io_apic_ints[x].dst_apic_id == IO_TO_ID(oldapic) &&
1221 io_apic_ints[x].dst_apic_int == oldintpin)
1222 io_apic_ints[x].int_vector = 0xff;
1223 }
1224}
1225
1439c090
MD
1226/*
1227 * Allocate an IRQ
1228 */
984263bc
MD
1229static void
1230allocate_apic_irq(int intr)
1231{
1232 int apic;
1233 int intpin;
1234 int irq;
1235
1236 if (io_apic_ints[intr].int_vector != 0xff)
1237 return; /* Interrupt handler already assigned */
1238
1239 if (io_apic_ints[intr].int_type != 0 &&
1240 (io_apic_ints[intr].int_type != 3 ||
1241 (io_apic_ints[intr].dst_apic_id == IO_TO_ID(0) &&
1242 io_apic_ints[intr].dst_apic_int == 0)))
1243 return; /* Not INT or ExtInt on != (0, 0) */
1244
1245 irq = 0;
1246 while (irq < APIC_INTMAPSIZE &&
1247 int_to_apicintpin[irq].ioapic != -1)
1248 irq++;
1249
1250 if (irq >= APIC_INTMAPSIZE)
1251 return; /* No free interrupt handlers */
1252
1253 apic = ID_TO_IO(io_apic_ints[intr].dst_apic_id);
1254 intpin = io_apic_ints[intr].dst_apic_int;
1255
1256 assign_apic_irq(apic, intpin, irq);
1257 io_apic_setup_intpin(apic, intpin);
1258}
1259
1260
1261static void
1262swap_apic_id(int apic, int oldid, int newid)
1263{
1264 int x;
1265 int oapic;
1266
1267
1268 if (oldid == newid)
1269 return; /* Nothing to do */
1270
26be20a0 1271 kprintf("Changing APIC ID for IO APIC #%d from %d to %d in MP table\n",
984263bc
MD
1272 apic, oldid, newid);
1273
1274 /* Swap physical APIC IDs in interrupt entries */
1275 for (x = 0; x < nintrs; x++) {
1276 if (io_apic_ints[x].dst_apic_id == oldid)
1277 io_apic_ints[x].dst_apic_id = newid;
1278 else if (io_apic_ints[x].dst_apic_id == newid)
1279 io_apic_ints[x].dst_apic_id = oldid;
1280 }
1281
1282 /* Swap physical APIC IDs in IO_TO_ID mappings */
1283 for (oapic = 0; oapic < mp_napics; oapic++)
1284 if (IO_TO_ID(oapic) == newid)
1285 break;
1286
1287 if (oapic < mp_napics) {
26be20a0 1288 kprintf("Changing APIC ID for IO APIC #%d from "
984263bc
MD
1289 "%d to %d in MP table\n",
1290 oapic, newid, oldid);
1291 IO_TO_ID(oapic) = oldid;
1292 }
1293 IO_TO_ID(apic) = newid;
1294}
1295
1296
1297static void
1298fix_id_to_io_mapping(void)
1299{
1300 int x;
1301
1302 for (x = 0; x < NAPICID; x++)
1303 ID_TO_IO(x) = -1;
1304
1305 for (x = 0; x <= mp_naps; x++)
1306 if (CPU_TO_ID(x) < NAPICID)
1307 ID_TO_IO(CPU_TO_ID(x)) = x;
1308
1309 for (x = 0; x < mp_napics; x++)
1310 if (IO_TO_ID(x) < NAPICID)
1311 ID_TO_IO(IO_TO_ID(x)) = x;
1312}
1313
1314
1315static int
1316first_free_apic_id(void)
1317{
1318 int freeid, x;
1319
1320 for (freeid = 0; freeid < NAPICID; freeid++) {
1321 for (x = 0; x <= mp_naps; x++)
1322 if (CPU_TO_ID(x) == freeid)
1323 break;
1324 if (x <= mp_naps)
1325 continue;
1326 for (x = 0; x < mp_napics; x++)
1327 if (IO_TO_ID(x) == freeid)
1328 break;
1329 if (x < mp_napics)
1330 continue;
1331 return freeid;
1332 }
1333 return freeid;
1334}
1335
1336
1337static int
1338io_apic_id_acceptable(int apic, int id)
1339{
1340 int cpu; /* Logical CPU number */
1341 int oapic; /* Logical IO APIC number for other IO APIC */
1342
1343 if (id >= NAPICID)
1344 return 0; /* Out of range */
1345
1346 for (cpu = 0; cpu <= mp_naps; cpu++)
1347 if (CPU_TO_ID(cpu) == id)
1348 return 0; /* Conflict with CPU */
1349
1350 for (oapic = 0; oapic < mp_napics && oapic < apic; oapic++)
1351 if (IO_TO_ID(oapic) == id)
1352 return 0; /* Conflict with other APIC */
1353
1354 return 1; /* ID is acceptable for IO APIC */
1355}
1356
1439c090
MD
1357static
1358io_int *
1359io_apic_find_int_entry(int apic, int pin)
1360{
1361 int x;
1362
1363 /* search each of the possible INTerrupt sources */
1364 for (x = 0; x < nintrs; ++x) {
1365 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1366 (pin == io_apic_ints[x].dst_apic_int))
1367 return (&io_apic_ints[x]);
1368 }
1369 return NULL;
1370}
1371
97359a5b 1372#endif
984263bc
MD
1373
1374/*
1375 * parse an Intel MP specification table
1376 */
1377static void
3aba8f73 1378mptable_fix(void)
984263bc 1379{
97359a5b 1380#ifdef APIC_IO
4f6a8b30 1381 int x;
984263bc 1382 int id;
984263bc
MD
1383 int apic; /* IO APIC unit number */
1384 int freeid; /* Free physical APIC ID */
1385 int physid; /* Current physical IO APIC ID */
1439c090 1386 io_int *io14;
97359a5b
MD
1387 int bus_0 = 0; /* Stop GCC warning */
1388 int bus_pci = 0; /* Stop GCC warning */
1389 int num_pci_bus;
984263bc
MD
1390
1391 /*
1392 * Fix mis-numbering of the PCI bus and its INT entries if the BIOS
1393 * did it wrong. The MP spec says that when more than 1 PCI bus
1394 * exists the BIOS must begin with bus entries for the PCI bus and use
1395 * actual PCI bus numbering. This implies that when only 1 PCI bus
1396 * exists the BIOS can choose to ignore this ordering, and indeed many
1397 * MP motherboards do ignore it. This causes a problem when the PCI
1398 * sub-system makes requests of the MP sub-system based on PCI bus
1399 * numbers. So here we look for the situation and renumber the
1400 * busses and associated INTs in an effort to "make it right".
1401 */
1402
1403 /* find bus 0, PCI bus, count the number of PCI busses */
1404 for (num_pci_bus = 0, x = 0; x < mp_nbusses; ++x) {
1405 if (bus_data[x].bus_id == 0) {
1406 bus_0 = x;
1407 }
1408 if (bus_data[x].bus_type == PCI) {
1409 ++num_pci_bus;
1410 bus_pci = x;
1411 }
1412 }
1413 /*
1414 * bus_0 == slot of bus with ID of 0
1415 * bus_pci == slot of last PCI bus encountered
1416 */
1417
1418 /* check the 1 PCI bus case for sanity */
1419 /* if it is number 0 all is well */
1420 if (num_pci_bus == 1 &&
1421 bus_data[bus_pci].bus_id != 0) {
1422
1423 /* mis-numbered, swap with whichever bus uses slot 0 */
1424
1425 /* swap the bus entry types */
1426 bus_data[bus_pci].bus_type = bus_data[bus_0].bus_type;
1427 bus_data[bus_0].bus_type = PCI;
1428
1429 /* swap each relavant INTerrupt entry */
1430 id = bus_data[bus_pci].bus_id;
1431 for (x = 0; x < nintrs; ++x) {
1432 if (io_apic_ints[x].src_bus_id == id) {
1433 io_apic_ints[x].src_bus_id = 0;
1434 }
1435 else if (io_apic_ints[x].src_bus_id == 0) {
1436 io_apic_ints[x].src_bus_id = id;
1437 }
1438 }
1439 }
1440
1441 /* Assign IO APIC IDs.
1442 *
1443 * First try the existing ID. If a conflict is detected, try
1444 * the ID in the MP table. If a conflict is still detected, find
1445 * a free id.
1446 *
1447 * We cannot use the ID_TO_IO table before all conflicts has been
1448 * resolved and the table has been corrected.
1449 */
1450 for (apic = 0; apic < mp_napics; ++apic) { /* For all IO APICs */
1451
1452 /* First try to use the value set by the BIOS */
1453 physid = io_apic_get_id(apic);
1454 if (io_apic_id_acceptable(apic, physid)) {
1455 if (IO_TO_ID(apic) != physid)
1456 swap_apic_id(apic, IO_TO_ID(apic), physid);
1457 continue;
1458 }
1459
1460 /* Then check if the value in the MP table is acceptable */
1461 if (io_apic_id_acceptable(apic, IO_TO_ID(apic)))
1462 continue;
1463
1464 /* Last resort, find a free APIC ID and use it */
1465 freeid = first_free_apic_id();
1466 if (freeid >= NAPICID)
1467 panic("No free physical APIC IDs found");
1468
1469 if (io_apic_id_acceptable(apic, freeid)) {
1470 swap_apic_id(apic, IO_TO_ID(apic), freeid);
1471 continue;
1472 }
1473 panic("Free physical APIC ID not usable");
1474 }
1475 fix_id_to_io_mapping();
1476
1477 /* detect and fix broken Compaq MP table */
1478 if (apic_int_type(0, 0) == -1) {
26be20a0 1479 kprintf("APIC_IO: MP table broken: 8259->APIC entry missing!\n");
984263bc
MD
1480 io_apic_ints[nintrs].int_type = 3; /* ExtInt */
1481 io_apic_ints[nintrs].int_vector = 0xff; /* Unassigned */
1482 /* XXX fixme, set src bus id etc, but it doesn't seem to hurt */
1483 io_apic_ints[nintrs].dst_apic_id = IO_TO_ID(0);
1484 io_apic_ints[nintrs].dst_apic_int = 0; /* Pin 0 */
1485 nintrs++;
ad12f88c 1486 } else if (apic_int_type(0, 0) == 0) {
26be20a0 1487 kprintf("APIC_IO: MP table broken: ExtINT entry corrupt!\n");
ad12f88c
HP
1488 for (x = 0; x < nintrs; ++x)
1489 if ((0 == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1490 (0 == io_apic_ints[x].dst_apic_int)) {
1491 io_apic_ints[x].int_type = 3;
1492 io_apic_ints[x].int_vector = 0xff;
1493 break;
1494 }
984263bc 1495 }
1439c090
MD
1496
1497 /*
1498 * Fix missing IRQ 15 when IRQ 14 is an ISA interrupt. IDE
1499 * controllers universally come in pairs. If IRQ 14 is specified
1500 * as an ISA interrupt, then IRQ 15 had better be too.
1501 *
1502 * [ Shuttle XPC / AMD Athlon X2 ]
1503 * The MPTable is missing an entry for IRQ 15. Note that the
1504 * ACPI table has an entry for both 14 and 15.
1505 */
1506 if (apic_int_type(0, 14) == 0 && apic_int_type(0, 15) == -1) {
26be20a0 1507 kprintf("APIC_IO: MP table broken: IRQ 15 not ISA when IRQ 14 is!\n");
1439c090
MD
1508 io14 = io_apic_find_int_entry(0, 14);
1509 io_apic_ints[nintrs] = *io14;
1510 io_apic_ints[nintrs].src_bus_irq = 15;
1511 io_apic_ints[nintrs].dst_apic_int = 15;
1512 nintrs++;
1513 }
97359a5b 1514#endif
984263bc
MD
1515}
1516
97359a5b 1517#ifdef APIC_IO
984263bc
MD
1518
1519/* Assign low level interrupt handlers */
1520static void
1521setup_apic_irq_mapping(void)
1522{
1523 int x;
1524 int int_vector;
1525
1526 /* Clear array */
1527 for (x = 0; x < APIC_INTMAPSIZE; x++) {
1528 int_to_apicintpin[x].ioapic = -1;
1529 int_to_apicintpin[x].int_pin = 0;
1530 int_to_apicintpin[x].apic_address = NULL;
1531 int_to_apicintpin[x].redirindex = 0;
ea689d1c
SZ
1532
1533 /* Default to masked */
1534 int_to_apicintpin[x].flags = IOAPIC_IM_FLAG_MASKED;
984263bc
MD
1535 }
1536
1537 /* First assign ISA/EISA interrupts */
1538 for (x = 0; x < nintrs; x++) {
1539 int_vector = io_apic_ints[x].src_bus_irq;
1540 if (int_vector < APIC_INTMAPSIZE &&
1541 io_apic_ints[x].int_vector == 0xff &&
1542 int_to_apicintpin[int_vector].ioapic == -1 &&
1543 (apic_int_is_bus_type(x, ISA) ||
1544 apic_int_is_bus_type(x, EISA)) &&
1545 io_apic_ints[x].int_type == 0) {
1546 assign_apic_irq(ID_TO_IO(io_apic_ints[x].dst_apic_id),
1547 io_apic_ints[x].dst_apic_int,
1548 int_vector);
1549 }
1550 }
1551
1552 /* Assign ExtInt entry if no ISA/EISA interrupt 0 entry */
1553 for (x = 0; x < nintrs; x++) {
1554 if (io_apic_ints[x].dst_apic_int == 0 &&
1555 io_apic_ints[x].dst_apic_id == IO_TO_ID(0) &&
1556 io_apic_ints[x].int_vector == 0xff &&
1557 int_to_apicintpin[0].ioapic == -1 &&
1558 io_apic_ints[x].int_type == 3) {
1559 assign_apic_irq(0, 0, 0);
1560 break;
1561 }
1562 }
1563 /* PCI interrupt assignment is deferred */
1564}
1565
97359a5b 1566#endif
984263bc 1567
a9112655
SZ
1568void
1569mp_set_cpuids(int cpu_id, int apic_id)
1570{
1571 CPU_TO_ID(cpu_id) = apic_id;
1572 ID_TO_CPU(apic_id) = cpu_id;
1573}
1574
984263bc 1575static int
a0873f07 1576processor_entry(const struct PROCENTRY *entry, int cpu)
984263bc 1577{
bd8aa7e2
SZ
1578 KKASSERT(cpu > 0);
1579
984263bc
MD
1580 /* check for usability */
1581 if (!(entry->cpu_flags & PROCENTRY_FLAG_EN))
1582 return 0;
1583
984263bc
MD
1584 /* check for BSP flag */
1585 if (entry->cpu_flags & PROCENTRY_FLAG_BP) {
a9112655 1586 mp_set_cpuids(0, entry->apic_id);
984263bc
MD
1587 return 0; /* its already been counted */
1588 }
1589
1590 /* add another AP to list, if less than max number of CPUs */
1591 else if (cpu < MAXCPU) {
a9112655 1592 mp_set_cpuids(cpu, entry->apic_id);
984263bc
MD
1593 return 1;
1594 }
1595
1596 return 0;
1597}
1598
4f6a8b30 1599#ifdef APIC_IO
984263bc
MD
1600
1601static int
c4717d5c 1602bus_entry(const struct BUSENTRY *entry, int bus)
984263bc
MD
1603{
1604 int x;
1605 char c, name[8];
1606
1607 /* encode the name into an index */
1608 for (x = 0; x < 6; ++x) {
1609 if ((c = entry->bus_type[x]) == ' ')
1610 break;
1611 name[x] = c;
1612 }
1613 name[x] = '\0';
1614
1615 if ((x = lookup_bus_type(name)) == UNKNOWN_BUSTYPE)
1616 panic("unknown bus type: '%s'", name);
1617
1618 bus_data[bus].bus_id = entry->bus_id;
1619 bus_data[bus].bus_type = x;
1620
1621 return 1;
1622}
1623
984263bc 1624static int
c4717d5c 1625io_apic_entry(const struct IOAPICENTRY *entry, int apic)
984263bc
MD
1626{
1627 if (!(entry->apic_flags & IOAPICENTRY_FLAG_EN))
1628 return 0;
1629
1630 IO_TO_ID(apic) = entry->apic_id;
c163176b 1631 ID_TO_IO(entry->apic_id) = apic;
984263bc
MD
1632
1633 return 1;
1634}
1635
97359a5b 1636#endif
984263bc
MD
1637
1638static int
1639lookup_bus_type(char *name)
1640{
1641 int x;
1642
1643 for (x = 0; x < MAX_BUSTYPE; ++x)
1644 if (strcmp(bus_type_table[x].name, name) == 0)
1645 return bus_type_table[x].type;
1646
1647 return UNKNOWN_BUSTYPE;
1648}
1649
97359a5b 1650#ifdef APIC_IO
984263bc
MD
1651
1652static int
c4717d5c 1653int_entry(const struct INTENTRY *entry, int intr)
984263bc
MD
1654{
1655 int apic;
1656
1657 io_apic_ints[intr].int_type = entry->int_type;
1658 io_apic_ints[intr].int_flags = entry->int_flags;
1659 io_apic_ints[intr].src_bus_id = entry->src_bus_id;
1660 io_apic_ints[intr].src_bus_irq = entry->src_bus_irq;
1661 if (entry->dst_apic_id == 255) {
1662 /* This signal goes to all IO APICS. Select an IO APIC
1663 with sufficient number of interrupt pins */
1664 for (apic = 0; apic < mp_napics; apic++)
1665 if (((io_apic_read(apic, IOAPIC_VER) &
1666 IOART_VER_MAXREDIR) >> MAXREDIRSHIFT) >=
1667 entry->dst_apic_int)
1668 break;
1669 if (apic < mp_napics)
1670 io_apic_ints[intr].dst_apic_id = IO_TO_ID(apic);
1671 else
1672 io_apic_ints[intr].dst_apic_id = entry->dst_apic_id;
1673 } else
1674 io_apic_ints[intr].dst_apic_id = entry->dst_apic_id;
1675 io_apic_ints[intr].dst_apic_int = entry->dst_apic_int;
1676
1677 return 1;
1678}
1679
984263bc
MD
1680static int
1681apic_int_is_bus_type(int intr, int bus_type)
1682{
1683 int bus;
1684
1685 for (bus = 0; bus < mp_nbusses; ++bus)
1686 if ((bus_data[bus].bus_id == io_apic_ints[intr].src_bus_id)
1687 && ((int) bus_data[bus].bus_type == bus_type))
1688 return 1;
1689
1690 return 0;
1691}
1692
984263bc
MD
1693/*
1694 * Given a traditional ISA INT mask, return an APIC mask.
1695 */
1696u_int
1697isa_apic_mask(u_int isa_mask)
1698{
1699 int isa_irq;
1700 int apic_pin;
1701
1702#if defined(SKIP_IRQ15_REDIRECT)
1703 if (isa_mask == (1 << 15)) {
26be20a0 1704 kprintf("skipping ISA IRQ15 redirect\n");
984263bc
MD
1705 return isa_mask;
1706 }
1707#endif /* SKIP_IRQ15_REDIRECT */
1708
1709 isa_irq = ffs(isa_mask); /* find its bit position */
1710 if (isa_irq == 0) /* doesn't exist */
1711 return 0;
1712 --isa_irq; /* make it zero based */
1713
1714 apic_pin = isa_apic_irq(isa_irq); /* look for APIC connection */
1715 if (apic_pin == -1)
1716 return 0;
1717
1718 return (1 << apic_pin); /* convert pin# to a mask */
1719}
1720
984263bc
MD
1721/*
1722 * Determine which APIC pin an ISA/EISA INT is attached to.
1723 */
1724#define INTTYPE(I) (io_apic_ints[(I)].int_type)
1725#define INTPIN(I) (io_apic_ints[(I)].dst_apic_int)
1726#define INTIRQ(I) (io_apic_ints[(I)].int_vector)
1727#define INTAPIC(I) (ID_TO_IO(io_apic_ints[(I)].dst_apic_id))
1728
1729#define SRCBUSIRQ(I) (io_apic_ints[(I)].src_bus_irq)
1730int
1731isa_apic_irq(int isa_irq)
1732{
1733 int intr;
1734
1735 for (intr = 0; intr < nintrs; ++intr) { /* check each record */
1736 if (INTTYPE(intr) == 0) { /* standard INT */
1737 if (SRCBUSIRQ(intr) == isa_irq) {
1738 if (apic_int_is_bus_type(intr, ISA) ||
1739 apic_int_is_bus_type(intr, EISA)) {
1740 if (INTIRQ(intr) == 0xff)
1741 return -1; /* unassigned */
1742 return INTIRQ(intr); /* found */
1743 }
1744 }
1745 }
1746 }
1747 return -1; /* NOT found */
1748}
1749
1750
1751/*
1752 * Determine which APIC pin a PCI INT is attached to.
1753 */
1754#define SRCBUSID(I) (io_apic_ints[(I)].src_bus_id)
1755#define SRCBUSDEVICE(I) ((io_apic_ints[(I)].src_bus_irq >> 2) & 0x1f)
1756#define SRCBUSLINE(I) (io_apic_ints[(I)].src_bus_irq & 0x03)
1757int
1758pci_apic_irq(int pciBus, int pciDevice, int pciInt)
1759{
1760 int intr;
1761
1762 --pciInt; /* zero based */
1763
1439c090 1764 for (intr = 0; intr < nintrs; ++intr) { /* check each record */
984263bc
MD
1765 if ((INTTYPE(intr) == 0) /* standard INT */
1766 && (SRCBUSID(intr) == pciBus)
1767 && (SRCBUSDEVICE(intr) == pciDevice)
1439c090 1768 && (SRCBUSLINE(intr) == pciInt)) { /* a candidate IRQ */
984263bc
MD
1769 if (apic_int_is_bus_type(intr, PCI)) {
1770 if (INTIRQ(intr) == 0xff)
1771 allocate_apic_irq(intr);
1772 if (INTIRQ(intr) == 0xff)
1773 return -1; /* unassigned */
1774 return INTIRQ(intr); /* exact match */
1775 }
1439c090
MD
1776 }
1777 }
984263bc
MD
1778
1779 return -1; /* NOT found */
1780}
1781
1782int
1783next_apic_irq(int irq)
1784{
1785 int intr, ointr;
1786 int bus, bustype;
1787
1788 bus = 0;
1789 bustype = 0;
1790 for (intr = 0; intr < nintrs; intr++) {
1791 if (INTIRQ(intr) != irq || INTTYPE(intr) != 0)
1792 continue;
1793 bus = SRCBUSID(intr);
1794 bustype = apic_bus_type(bus);
1795 if (bustype != ISA &&
1796 bustype != EISA &&
1797 bustype != PCI)
1798 continue;
1799 break;
1800 }
1801 if (intr >= nintrs) {
1802 return -1;
1803 }
1804 for (ointr = intr + 1; ointr < nintrs; ointr++) {
1805 if (INTTYPE(ointr) != 0)
1806 continue;
1807 if (bus != SRCBUSID(ointr))
1808 continue;
1809 if (bustype == PCI) {
1810 if (SRCBUSDEVICE(intr) != SRCBUSDEVICE(ointr))
1811 continue;
1812 if (SRCBUSLINE(intr) != SRCBUSLINE(ointr))
1813 continue;
1814 }
1815 if (bustype == ISA || bustype == EISA) {
1816 if (SRCBUSIRQ(intr) != SRCBUSIRQ(ointr))
1817 continue;
1818 }
1819 if (INTPIN(intr) == INTPIN(ointr))
1820 continue;
1821 break;
1822 }
1823 if (ointr >= nintrs) {
1824 return -1;
1825 }
1826 return INTIRQ(ointr);
1827}
1828#undef SRCBUSLINE
1829#undef SRCBUSDEVICE
1830#undef SRCBUSID
1831#undef SRCBUSIRQ
1832
1833#undef INTPIN
1834#undef INTIRQ
1835#undef INTAPIC
1836#undef INTTYPE
1837
97359a5b 1838#endif
984263bc
MD
1839
1840/*
1841 * Reprogram the MB chipset to NOT redirect an ISA INTerrupt.
1842 *
1843 * XXX FIXME:
1844 * Exactly what this means is unclear at this point. It is a solution
1845 * for motherboards that redirect the MBIRQ0 pin. Generically a motherboard
1846 * could route any of the ISA INTs to upper (>15) IRQ values. But most would
1847 * NOT be redirected via MBIRQ0, thus "undirect()ing" them would NOT be an
1848 * option.
1849 */
1850int
1851undirect_isa_irq(int rirq)
1852{
1853#if defined(READY)
1854 if (bootverbose)
26be20a0 1855 kprintf("Freeing redirected ISA irq %d.\n", rirq);
984263bc 1856 /** FIXME: tickle the MB redirector chip */
c044141b 1857 return /* XXX */;
984263bc
MD
1858#else
1859 if (bootverbose)
26be20a0 1860 kprintf("Freeing (NOT implemented) redirected ISA irq %d.\n", rirq);
984263bc
MD
1861 return 0;
1862#endif /* READY */
1863}
1864
1865
1866/*
1867 * Reprogram the MB chipset to NOT redirect a PCI INTerrupt
1868 */
1869int
1870undirect_pci_irq(int rirq)
1871{
1872#if defined(READY)
1873 if (bootverbose)
26be20a0 1874 kprintf("Freeing redirected PCI irq %d.\n", rirq);
984263bc
MD
1875
1876 /** FIXME: tickle the MB redirector chip */
c044141b 1877 return /* XXX */;
984263bc
MD
1878#else
1879 if (bootverbose)
26be20a0 1880 kprintf("Freeing (NOT implemented) redirected PCI irq %d.\n",
984263bc
MD
1881 rirq);
1882 return 0;
1883#endif /* READY */
1884}
1885
1886
4f6a8b30
SZ
1887#ifdef APIC_IO
1888
984263bc
MD
1889/*
1890 * given a bus ID, return:
1891 * the bus type if found
1892 * -1 if NOT found
1893 */
1894int
1895apic_bus_type(int id)
1896{
1897 int x;
1898
1899 for (x = 0; x < mp_nbusses; ++x)
1900 if (bus_data[x].bus_id == id)
1901 return bus_data[x].bus_type;
1902
1903 return -1;
1904}
1905
984263bc
MD
1906/*
1907 * given a LOGICAL APIC# and pin#, return:
1908 * the associated src bus ID if found
1909 * -1 if NOT found
1910 */
1911int
1912apic_src_bus_id(int apic, int pin)
1913{
1914 int x;
1915
1916 /* search each of the possible INTerrupt sources */
1917 for (x = 0; x < nintrs; ++x)
1918 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1919 (pin == io_apic_ints[x].dst_apic_int))
1920 return (io_apic_ints[x].src_bus_id);
1921
1922 return -1; /* NOT found */
1923}
1924
984263bc
MD
1925/*
1926 * given a LOGICAL APIC# and pin#, return:
1927 * the associated src bus IRQ if found
1928 * -1 if NOT found
1929 */
1930int
1931apic_src_bus_irq(int apic, int pin)
1932{
1933 int x;
1934
1935 for (x = 0; x < nintrs; x++)
1936 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1937 (pin == io_apic_ints[x].dst_apic_int))
1938 return (io_apic_ints[x].src_bus_irq);
1939
1940 return -1; /* NOT found */
1941}
1942
1943
1944/*
1945 * given a LOGICAL APIC# and pin#, return:
1946 * the associated INTerrupt type if found
1947 * -1 if NOT found
1948 */
1949int
1950apic_int_type(int apic, int pin)
1951{
1952 int x;
1953
1954 /* search each of the possible INTerrupt sources */
1439c090 1955 for (x = 0; x < nintrs; ++x) {
984263bc
MD
1956 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1957 (pin == io_apic_ints[x].dst_apic_int))
1958 return (io_apic_ints[x].int_type);
1439c090 1959 }
984263bc
MD
1960 return -1; /* NOT found */
1961}
1962
1439c090
MD
1963/*
1964 * Return the IRQ associated with an APIC pin
1965 */
984263bc
MD
1966int
1967apic_irq(int apic, int pin)
1968{
1969 int x;
1970 int res;
1971
1439c090 1972 for (x = 0; x < nintrs; ++x) {
984263bc
MD
1973 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1974 (pin == io_apic_ints[x].dst_apic_int)) {
1975 res = io_apic_ints[x].int_vector;
1976 if (res == 0xff)
1977 return -1;
1978 if (apic != int_to_apicintpin[res].ioapic)
1439c090 1979 panic("apic_irq: inconsistent table %d/%d", apic, int_to_apicintpin[res].ioapic);
984263bc
MD
1980 if (pin != int_to_apicintpin[res].int_pin)
1981 panic("apic_irq inconsistent table (2)");
1982 return res;
1983 }
1439c090 1984 }
984263bc
MD
1985 return -1;
1986}
1987
1988
1989/*
1990 * given a LOGICAL APIC# and pin#, return:
1991 * the associated trigger mode if found
1992 * -1 if NOT found
1993 */
1994int
1995apic_trigger(int apic, int pin)
1996{
1997 int x;
1998
1999 /* search each of the possible INTerrupt sources */
2000 for (x = 0; x < nintrs; ++x)
2001 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
2002 (pin == io_apic_ints[x].dst_apic_int))
2003 return ((io_apic_ints[x].int_flags >> 2) & 0x03);
2004
2005 return -1; /* NOT found */
2006}
2007
2008
2009/*
2010 * given a LOGICAL APIC# and pin#, return:
2011 * the associated 'active' level if found
2012 * -1 if NOT found
2013 */
2014int
2015apic_polarity(int apic, int pin)
2016{
2017 int x;
2018
2019 /* search each of the possible INTerrupt sources */
2020 for (x = 0; x < nintrs; ++x)
2021 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
2022 (pin == io_apic_ints[x].dst_apic_int))
2023 return (io_apic_ints[x].int_flags & 0x03);
2024
2025 return -1; /* NOT found */
2026}
2027
97359a5b 2028#endif
984263bc
MD
2029
2030/*
2031 * set data according to MP defaults
2032 * FIXME: probably not complete yet...
2033 */
2034static void
3aba8f73 2035mptable_default(int type)
984263bc 2036{
984263bc
MD
2037#if defined(APIC_IO)
2038 int io_apic_id;
2039 int pin;
984263bc
MD
2040
2041#if 0
26be20a0 2042 kprintf(" MP default config type: %d\n", type);
984263bc
MD
2043 switch (type) {
2044 case 1:
26be20a0 2045 kprintf(" bus: ISA, APIC: 82489DX\n");
984263bc
MD
2046 break;
2047 case 2:
26be20a0 2048 kprintf(" bus: EISA, APIC: 82489DX\n");
984263bc
MD
2049 break;
2050 case 3:
26be20a0 2051 kprintf(" bus: EISA, APIC: 82489DX\n");
984263bc
MD
2052 break;
2053 case 4:
26be20a0 2054 kprintf(" bus: MCA, APIC: 82489DX\n");
984263bc
MD
2055 break;
2056 case 5:
26be20a0 2057 kprintf(" bus: ISA+PCI, APIC: Integrated\n");
984263bc
MD
2058 break;
2059 case 6:
26be20a0 2060 kprintf(" bus: EISA+PCI, APIC: Integrated\n");
984263bc
MD
2061 break;
2062 case 7:
26be20a0 2063 kprintf(" bus: MCA+PCI, APIC: Integrated\n");
984263bc
MD
2064 break;
2065 default:
26be20a0 2066 kprintf(" future type\n");
984263bc
MD
2067 break;
2068 /* NOTREACHED */
2069 }
2070#endif /* 0 */
2071
984263bc
MD
2072 /* one and only IO APIC */
2073 io_apic_id = (io_apic_read(0, IOAPIC_ID) & APIC_ID_MASK) >> 24;
2074
2075 /*
2076 * sanity check, refer to MP spec section 3.6.6, last paragraph
2077 * necessary as some hardware isn't properly setting up the IO APIC
2078 */
2079#if defined(REALLY_ANAL_IOAPICID_VALUE)
2080 if (io_apic_id != 2) {
2081#else
2082 if ((io_apic_id == 0) || (io_apic_id == 1) || (io_apic_id == 15)) {
2083#endif /* REALLY_ANAL_IOAPICID_VALUE */
2084 io_apic_set_id(0, 2);
2085 io_apic_id = 2;
2086 }
2087 IO_TO_ID(0) = io_apic_id;
2088 ID_TO_IO(io_apic_id) = 0;
984263bc
MD
2089
2090 /* fill out bus entries */
2091 switch (type) {
2092 case 1:
2093 case 2:
2094 case 3:
2095 case 4:
2096 case 5:
2097 case 6:
2098 case 7:
2099 bus_data[0].bus_id = default_data[type - 1][1];
2100 bus_data[0].bus_type = default_data[type - 1][2];
2101 bus_data[1].bus_id = default_data[type - 1][3];
2102 bus_data[1].bus_type = default_data[type - 1][4];
2103 break;
2104
2105 /* case 4: case 7: MCA NOT supported */
2106 default: /* illegal/reserved */
2107 panic("BAD default MP config: %d", type);
2108 /* NOTREACHED */
2109 }
2110
984263bc
MD
2111 /* general cases from MP v1.4, table 5-2 */
2112 for (pin = 0; pin < 16; ++pin) {
2113 io_apic_ints[pin].int_type = 0;
2114 io_apic_ints[pin].int_flags = 0x05; /* edge/active-hi */
2115 io_apic_ints[pin].src_bus_id = 0;
2116 io_apic_ints[pin].src_bus_irq = pin; /* IRQ2 caught below */
2117 io_apic_ints[pin].dst_apic_id = io_apic_id;
2118 io_apic_ints[pin].dst_apic_int = pin; /* 1-to-1 */
2119 }
2120
2121 /* special cases from MP v1.4, table 5-2 */
2122 if (type == 2) {
2123 io_apic_ints[2].int_type = 0xff; /* N/C */
2124 io_apic_ints[13].int_type = 0xff; /* N/C */
2125#if !defined(APIC_MIXED_MODE)
2126 /** FIXME: ??? */
2127 panic("sorry, can't support type 2 default yet");
2128#endif /* APIC_MIXED_MODE */
2129 }
2130 else
2131 io_apic_ints[2].src_bus_irq = 0; /* ISA IRQ0 is on APIC INT 2 */
2132
2133 if (type == 7)
2134 io_apic_ints[0].int_type = 0xff; /* N/C */
2135 else
2136 io_apic_ints[0].int_type = 3; /* vectored 8259 */
2137#endif /* APIC_IO */
2138}
2139
984263bc 2140/*
f13b5eec
MD
2141 * Map a physical memory address representing I/O into KVA. The I/O
2142 * block is assumed not to cross a page boundary.
2143 */
2144void *
2145permanent_io_mapping(vm_paddr_t pa)
2146{
2147 vm_offset_t vaddr;
2148 int pgeflag;
2149 int i;
2150
2151 KKASSERT(pa < 0x100000000LL);
2152
2153 pgeflag = 0; /* not used for SMP yet */
2154
2155 /*
2156 * If the requested physical address has already been incidently
2157 * mapped, just use the existing mapping. Otherwise create a new
2158 * mapping.
2159 */
2160 for (i = IO_MAPPING_START_INDEX; i < SMPpt_alloc_index; ++i) {
2161 if (((vm_offset_t)SMPpt[i] & PG_FRAME) ==
2162 ((vm_offset_t)pa & PG_FRAME)) {
2163 break;
2164 }
2165 }
2166 if (i == SMPpt_alloc_index) {
2167 if (i == NPTEPG - 2) {
2168 panic("permanent_io_mapping: We ran out of space"
2169 " in SMPpt[]!");
2170 }
5277b9f6 2171 SMPpt[i] = (pt_entry_t)(PG_V | PG_RW | PG_N | pgeflag |
f13b5eec
MD
2172 ((vm_offset_t)pa & PG_FRAME));
2173 ++SMPpt_alloc_index;
2174 }
2175 vaddr = (vm_offset_t)CPU_prvspace + (i * PAGE_SIZE) +
2176 ((vm_offset_t)pa & PAGE_MASK);
2177 return ((void *)vaddr);
2178}
2179
2180/*
984263bc
MD
2181 * start each AP in our list
2182 */
2183static int
2184start_all_aps(u_int boot_addr)
2185{
b45759e1
MD
2186 int x, i, pg;
2187 int shift;
984263bc
MD
2188 u_char mpbiosreason;
2189 u_long mpbioswarmvec;
8a8d5d85 2190 struct mdglobaldata *gd;
0f7a3396 2191 struct privatespace *ps;
984263bc
MD
2192 char *stack;
2193 uintptr_t kptbase;
2194
2195 POSTCODE(START_ALL_APS_POST);
2196
b52c8db0
SZ
2197 /* Initialize BSP's local APIC */
2198 apic_initialize(TRUE);
984263bc
MD
2199
2200 /* install the AP 1st level boot code */
2201 install_ap_tramp(boot_addr);
2202
2203
2204 /* save the current value of the warm-start vector */
2205 mpbioswarmvec = *((u_long *) WARMBOOT_OFF);
984263bc
MD
2206 outb(CMOS_REG, BIOS_RESET);
2207 mpbiosreason = inb(CMOS_DATA);
984263bc 2208
984263bc
MD
2209 /* set up temporary P==V mapping for AP boot */
2210 /* XXX this is a hack, we should boot the AP on its own stack/PTD */
2211 kptbase = (uintptr_t)(void *)KPTphys;
a44bdeec 2212 for (x = 0; x < NKPT; x++) {
984263bc
MD
2213 PTD[x] = (pd_entry_t)(PG_V | PG_RW |
2214 ((kptbase + x * PAGE_SIZE) & PG_FRAME));
a44bdeec 2215 }
0f7a3396 2216 cpu_invltlb();
984263bc
MD
2217
2218 /* start each AP */
2219 for (x = 1; x <= mp_naps; ++x) {
2220
2221 /* This is a bit verbose, it will go away soon. */
2222
2223 /* first page of AP's private space */
2224 pg = x * i386_btop(sizeof(struct privatespace));
2225
81c04d07 2226 /* allocate new private data page(s) */
e4846942 2227 gd = (struct mdglobaldata *)kmem_alloc(&kernel_map,
81c04d07 2228 MDGLOBALDATA_BASEALLOC_SIZE);
984263bc 2229 /* wire it into the private page table page */
81c04d07
MD
2230 for (i = 0; i < MDGLOBALDATA_BASEALLOC_SIZE; i += PAGE_SIZE) {
2231 SMPpt[pg + i / PAGE_SIZE] = (pt_entry_t)
2232 (PG_V | PG_RW | vtophys_pte((char *)gd + i));
2233 }
2234 pg += MDGLOBALDATA_BASEALLOC_PAGES;
2235
2236 SMPpt[pg + 0] = 0; /* *gd_CMAP1 */
2237 SMPpt[pg + 1] = 0; /* *gd_CMAP2 */
2238 SMPpt[pg + 2] = 0; /* *gd_CMAP3 */
2239 SMPpt[pg + 3] = 0; /* *gd_PMAP1 */
984263bc
MD
2240
2241 /* allocate and set up an idle stack data page */
e4846942 2242 stack = (char *)kmem_alloc(&kernel_map, UPAGES*PAGE_SIZE);
8a8d5d85 2243 for (i = 0; i < UPAGES; i++) {
81c04d07 2244 SMPpt[pg + 4 + i] = (pt_entry_t)
b5b32410 2245 (PG_V | PG_RW | vtophys_pte(PAGE_SIZE * i + stack));
8a8d5d85 2246 }
984263bc 2247
8a8d5d85
MD
2248 gd = &CPU_prvspace[x].mdglobaldata; /* official location */
2249 bzero(gd, sizeof(*gd));
0f7a3396 2250 gd->mi.gd_prvspace = ps = &CPU_prvspace[x];
8a8d5d85 2251
984263bc 2252 /* prime data page for it to use */
8a8d5d85 2253 mi_gdinit(&gd->mi, x);
8ad65e08 2254 cpu_gdinit(gd, x);
81c04d07
MD
2255 gd->gd_CMAP1 = &SMPpt[pg + 0];
2256 gd->gd_CMAP2 = &SMPpt[pg + 1];
2257 gd->gd_CMAP3 = &SMPpt[pg + 2];
2258 gd->gd_PMAP1 = &SMPpt[pg + 3];
0f7a3396
MD
2259 gd->gd_CADDR1 = ps->CPAGE1;
2260 gd->gd_CADDR2 = ps->CPAGE2;
2261 gd->gd_CADDR3 = ps->CPAGE3;
2262 gd->gd_PADDR1 = (unsigned *)ps->PPAGE1;
e4846942 2263 gd->mi.gd_ipiq = (void *)kmem_alloc(&kernel_map, sizeof(lwkt_ipiq) * (mp_naps + 1));
96728c05 2264 bzero(gd->mi.gd_ipiq, sizeof(lwkt_ipiq) * (mp_naps + 1));
984263bc
MD
2265
2266 /* setup a vector to our boot code */
2267 *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
2268 *((volatile u_short *) WARMBOOT_SEG) = (boot_addr >> 4);
984263bc
MD
2269 outb(CMOS_REG, BIOS_RESET);
2270 outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */
984263bc 2271
8a8d5d85
MD
2272 /*
2273 * Setup the AP boot stack
2274 */
0f7a3396 2275 bootSTK = &ps->idlestack[UPAGES*PAGE_SIZE/2];
984263bc
MD
2276 bootAP = x;
2277
2278 /* attempt to start the Application Processor */
2279 CHECK_INIT(99); /* setup checkpoints */
0f7a3396 2280 if (!start_ap(gd, boot_addr)) {
26be20a0 2281 kprintf("AP #%d (PHY# %d) failed!\n", x, CPU_TO_ID(x));
984263bc
MD
2282 CHECK_PRINT("trace"); /* show checkpoints */
2283 /* better panic as the AP may be running loose */
26be20a0 2284 kprintf("panic y/n? [y] ");
984263bc
MD
2285 if (cngetc() != 'n')
2286 panic("bye-bye");
2287 }
2288 CHECK_PRINT("trace"); /* show checkpoints */
2289
2290 /* record its version info */
2291 cpu_apic_versions[x] = cpu_apic_versions[0];
984263bc
MD
2292 }
2293
0f7a3396
MD
2294 /* set ncpus to 1 + highest logical cpu. Not all may have come up */
2295 ncpus = x;
2296
b45759e1
MD
2297 /* ncpus2 -- ncpus rounded down to the nearest power of 2 */
2298 for (shift = 0; (1 << shift) <= ncpus; ++shift)
2299 ;
2300 --shift;
2301 ncpus2_shift = shift;
2302 ncpus2 = 1 << shift;
90100055
JH
2303 ncpus2_mask = ncpus2 - 1;
2304
b45759e1
MD
2305 /* ncpus_fit -- ncpus rounded up to the nearest power of 2 */
2306 if ((1 << shift) < ncpus)
2307 ++shift;
2308 ncpus_fit = 1 << shift;
2309 ncpus_fit_mask = ncpus_fit - 1;
2310
984263bc 2311 /* build our map of 'other' CPUs */
0f7a3396 2312 mycpu->gd_other_cpus = smp_startup_mask & ~(1 << mycpu->gd_cpuid);
e4846942 2313 mycpu->gd_ipiq = (void *)kmem_alloc(&kernel_map, sizeof(lwkt_ipiq) * ncpus);
96728c05 2314 bzero(mycpu->gd_ipiq, sizeof(lwkt_ipiq) * ncpus);
984263bc
MD
2315
2316 /* fill in our (BSP) APIC version */
2317 cpu_apic_versions[0] = lapic.version;
2318
2319 /* restore the warmstart vector */
2320 *(u_long *) WARMBOOT_OFF = mpbioswarmvec;
984263bc
MD
2321 outb(CMOS_REG, BIOS_RESET);
2322 outb(CMOS_DATA, mpbiosreason);
984263bc
MD
2323
2324 /*
8a8d5d85
MD
2325 * NOTE! The idlestack for the BSP was setup by locore. Finish
2326 * up, clean out the P==V mapping we did earlier.
984263bc 2327 */
984263bc
MD
2328 for (x = 0; x < NKPT; x++)
2329 PTD[x] = 0;
2330 pmap_set_opt();
2331
2332 /* number of APs actually started */
8a8d5d85 2333 return ncpus - 1;
984263bc
MD
2334}
2335
2336
2337/*
2338 * load the 1st level AP boot code into base memory.
2339 */
2340
2341/* targets for relocation */
2342extern void bigJump(void);
2343extern void bootCodeSeg(void);
2344extern void bootDataSeg(void);
2345extern void MPentry(void);
2346extern u_int MP_GDT;
2347extern u_int mp_gdtbase;
2348
2349static void
2350install_ap_tramp(u_int boot_addr)
2351{
2352 int x;
2353 int size = *(int *) ((u_long) & bootMP_size);
2354 u_char *src = (u_char *) ((u_long) bootMP);
2355 u_char *dst = (u_char *) boot_addr + KERNBASE;
2356 u_int boot_base = (u_int) bootMP;
2357 u_int8_t *dst8;
2358 u_int16_t *dst16;
2359 u_int32_t *dst32;
2360
2361 POSTCODE(INSTALL_AP_TRAMP_POST);
2362
2363 for (x = 0; x < size; ++x)
2364 *dst++ = *src++;
2365
2366 /*
2367 * modify addresses in code we just moved to basemem. unfortunately we
2368 * need fairly detailed info about mpboot.s for this to work. changes
2369 * to mpboot.s might require changes here.
2370 */
2371
2372 /* boot code is located in KERNEL space */
2373 dst = (u_char *) boot_addr + KERNBASE;
2374
2375 /* modify the lgdt arg */
2376 dst32 = (u_int32_t *) (dst + ((u_int) & mp_gdtbase - boot_base));
2377 *dst32 = boot_addr + ((u_int) & MP_GDT - boot_base);
2378
2379 /* modify the ljmp target for MPentry() */
2380 dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1);
2381 *dst32 = ((u_int) MPentry - KERNBASE);
2382
2383 /* modify the target for boot code segment */
2384 dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base));
2385 dst8 = (u_int8_t *) (dst16 + 1);
2386 *dst16 = (u_int) boot_addr & 0xffff;
2387 *dst8 = ((u_int) boot_addr >> 16) & 0xff;
2388
2389 /* modify the target for boot data segment */
2390 dst16 = (u_int16_t *) (dst + ((u_int) bootDataSeg - boot_base));
2391 dst8 = (u_int8_t *) (dst16 + 1);
2392 *dst16 = (u_int) boot_addr & 0xffff;
2393 *dst8 = ((u_int) boot_addr >> 16) & 0xff;
2394}
2395
2396
2397/*
2398 * this function starts the AP (application processor) identified
2399 * by the APIC ID 'physicalCpu'. It does quite a "song and dance"
2400 * to accomplish this. This is necessary because of the nuances
2401 * of the different hardware we might encounter. It ain't pretty,
2402 * but it seems to work.
a108bf71
MD
2403 *
2404 * NOTE: eventually an AP gets to ap_init(), which is called just
2405 * before the AP goes into the LWKT scheduler's idle loop.
984263bc
MD
2406 */
2407static int
0f7a3396 2408start_ap(struct mdglobaldata *gd, u_int boot_addr)
984263bc
MD
2409{
2410 int physical_cpu;
2411 int vector;
984263bc
MD
2412 u_long icr_lo, icr_hi;
2413
2414 POSTCODE(START_AP_POST);
2415
2416 /* get the PHYSICAL APIC ID# */
0f7a3396 2417 physical_cpu = CPU_TO_ID(gd->mi.gd_cpuid);
984263bc
MD
2418
2419 /* calculate the vector */
2420 vector = (boot_addr >> 12) & 0xff;
2421
8a8d5d85
MD
2422 /* Make sure the target cpu sees everything */
2423 wbinvd();
984263bc
MD
2424
2425 /*
2426 * first we do an INIT/RESET IPI this INIT IPI might be run, reseting
2427 * and running the target CPU. OR this INIT IPI might be latched (P5
2428 * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
2429 * ignored.
2430 */
2431
2432 /* setup the address for the target AP */
2433 icr_hi = lapic.icr_hi & ~APIC_ID_MASK;
2434 icr_hi |= (physical_cpu << 24);
2435 lapic.icr_hi = icr_hi;
2436
2437 /* do an INIT IPI: assert RESET */
2438 icr_lo = lapic.icr_lo & 0xfff00000;
2439 lapic.icr_lo = icr_lo | 0x0000c500;
2440
2441 /* wait for pending status end */
2442 while (lapic.icr_lo & APIC_DELSTAT_MASK)
2443 /* spin */ ;
2444
2445 /* do an INIT IPI: deassert RESET */
2446 lapic.icr_lo = icr_lo | 0x00008500;
2447
2448 /* wait for pending status end */
2449 u_sleep(10000); /* wait ~10mS */
2450 while (lapic.icr_lo & APIC_DELSTAT_MASK)
2451 /* spin */ ;
2452
2453 /*
2454 * next we do a STARTUP IPI: the previous INIT IPI might still be
2455 * latched, (P5 bug) this 1st STARTUP would then terminate
2456 * immediately, and the previously started INIT IPI would continue. OR
2457 * the previous INIT IPI has already run. and this STARTUP IPI will
2458 * run. OR the previous INIT IPI was ignored. and this STARTUP IPI
2459 * will run.
2460 */
2461
2462 /* do a STARTUP IPI */
2463 lapic.icr_lo = icr_lo | 0x00000600 | vector;
2464 while (lapic.icr_lo & APIC_DELSTAT_MASK)
2465 /* spin */ ;
2466 u_sleep(200); /* wait ~200uS */
2467
2468 /*
2469 * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
2470 * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
2471 * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
2472 * recognized after hardware RESET or INIT IPI.
2473 */
2474
2475 lapic.icr_lo = icr_lo | 0x00000600 | vector;
2476 while (lapic.icr_lo & APIC_DELSTAT_MASK)
2477 /* spin */ ;
2478 u_sleep(200); /* wait ~200uS */
2479
8a8d5d85 2480 /* wait for it to start, see ap_init() */
984263bc 2481 set_apic_timer(5000000);/* == 5 seconds */
8a8d5d85 2482 while (read_apic_timer()) {
0f7a3396 2483 if (smp_startup_mask & (1 << gd->mi.gd_cpuid))
984263bc 2484 return 1; /* return SUCCESS */
8a8d5d85 2485 }
984263bc
MD
2486 return 0; /* return FAILURE */
2487}
2488
2489
2490/*
0f7a3396 2491 * Lazy flush the TLB on all other CPU's. DEPRECATED.
984263bc 2492 *
0f7a3396
MD
2493 * If for some reason we were unable to start all cpus we cannot safely
2494 * use broadcast IPIs.
984263bc
MD
2495 */
2496void
2497smp_invltlb(void)
2498{
97359a5b 2499#ifdef SMP
0f7a3396 2500 if (smp_startup_mask == smp_active_mask) {
984263bc 2501 all_but_self_ipi(XINVLTLB_OFFSET);
0f7a3396
MD
2502 } else {
2503 selected_apic_ipi(smp_active_mask, XINVLTLB_OFFSET,
2504 APIC_DELMODE_FIXED);
2505 }
97359a5b 2506#endif
984263bc
MD
2507}
2508
984263bc
MD
2509/*
2510 * When called the executing CPU will send an IPI to all other CPUs
2511 * requesting that they halt execution.
2512 *
2513 * Usually (but not necessarily) called with 'other_cpus' as its arg.
2514 *
2515 * - Signals all CPUs in map to stop.
2516 * - Waits for each to stop.
2517 *
2518 * Returns:
2519 * -1: error
2520 * 0: NA
2521 * 1: ok
2522 *
2523 * XXX FIXME: this is not MP-safe, needs a lock to prevent multiple CPUs
2524 * from executing at same time.
2525 */
2526int
2527stop_cpus(u_int map)
2528{
0f7a3396 2529 map &= smp_active_mask;
984263bc
MD
2530
2531 /* send the Xcpustop IPI to all CPUs in map */
2532 selected_apic_ipi(map, XCPUSTOP_OFFSET, APIC_DELMODE_FIXED);
2533
2534 while ((stopped_cpus & map) != map)
2535 /* spin */ ;
2536
2537 return 1;
2538}
2539
2540
2541/*
2542 * Called by a CPU to restart stopped CPUs.
2543 *
2544 * Usually (but not necessarily) called with 'stopped_cpus' as its arg.
2545 *
2546 * - Signals all CPUs in map to restart.
2547 * - Waits for each to restart.
2548 *
2549 * Returns:
2550 * -1: error
2551 * 0: NA
2552 * 1: ok
2553 */
2554int
2555restart_cpus(u_int map)
2556{
0f7a3396
MD
2557 /* signal other cpus to restart */
2558 started_cpus = map & smp_active_mask;
984263bc
MD
2559
2560 while ((stopped_cpus & map) != 0) /* wait for each to clear its bit */
2561 /* spin */ ;
2562
2563 return 1;
2564}
2565
984263bc 2566/*
8a8d5d85
MD
2567 * This is called once the mpboot code has gotten us properly relocated
2568 * and the MMU turned on, etc. ap_init() is actually the idle thread,
2569 * and when it returns the scheduler will call the real cpu_idle() main
2570 * loop for the idlethread. Interrupts are disabled on entry and should
2571 * remain disabled at return.
984263bc 2572 */
984263bc 2573void
8a8d5d85 2574ap_init(void)
984263bc
MD
2575{
2576 u_int apic_id;
2577
8a8d5d85 2578 /*
0f7a3396
MD
2579 * Adjust smp_startup_mask to signal the BSP that we have started
2580 * up successfully. Note that we do not yet hold the BGL. The BSP
2581 * is waiting for our signal.
2582 *
2583 * We can't set our bit in smp_active_mask yet because we are holding
2584 * interrupts physically disabled and remote cpus could deadlock
2585 * trying to send us an IPI.
8a8d5d85 2586 */
0f7a3396 2587 smp_startup_mask |= 1 << mycpu->gd_cpuid;
35238fa5 2588 cpu_mfence();
8a8d5d85
MD
2589
2590 /*
41a01a4d
MD
2591 * Interlock for finalization. Wait until mp_finish is non-zero,
2592 * then get the MP lock.
2593 *
2594 * Note: We are in a critical section.
2595 *
2596 * Note: We have to synchronize td_mpcount to our desired MP state
2597 * before calling cpu_try_mplock().
2598 *
2599 * Note: we are the idle thread, we can only spin.
2600 *
35238fa5
MD
2601 * Note: The load fence is memory volatile and prevents the compiler
2602 * from improperly caching mp_finish, and the cpu from improperly
2603 * caching it.
8a8d5d85 2604 */
41a01a4d 2605 while (mp_finish == 0)
35238fa5 2606 cpu_lfence();
d9ebdce5 2607 ++curthread->td_mpcount;
8a8d5d85
MD
2608 while (cpu_try_mplock() == 0)
2609 ;
2610
374133e3
MD
2611 if (cpu_feature & CPUID_TSC) {
2612 /*
2613 * The BSP is constantly updating tsc0_offset, figure out the
2614 * relative difference to synchronize ktrdump.
2615 */
2616 tsc_offsets[mycpu->gd_cpuid] = rdtsc() - tsc0_offset;
2617 }
2618
984263bc
MD
2619 /* BSP may have changed PTD while we're waiting for the lock */
2620 cpu_invltlb();
2621
984263bc
MD
2622#if defined(I586_CPU) && !defined(NO_F00F_HACK)
2623 lidt(&r_idt);
2624#endif
2625
2626 /* Build our map of 'other' CPUs. */
0f7a3396 2627 mycpu->gd_other_cpus = smp_startup_mask & ~(1 << mycpu->gd_cpuid);
984263bc 2628
26be20a0 2629 kprintf("SMP: AP CPU #%d Launched!\n", mycpu->gd_cpuid);
984263bc 2630
984263bc
MD
2631 /* A quick check from sanity claus */
2632 apic_id = (apic_id_to_logical[(lapic.id & 0x0f000000) >> 24]);
8a8d5d85 2633 if (mycpu->gd_cpuid != apic_id) {
26be20a0
SW
2634 kprintf("SMP: cpuid = %d\n", mycpu->gd_cpuid);
2635 kprintf("SMP: apic_id = %d\n", apic_id);
2636 kprintf("PTD[MPPTDI] = %p\n", (void *)PTD[MPPTDI]);
984263bc
MD
2637 panic("cpuid mismatch! boom!!");
2638 }
2639
b52c8db0
SZ
2640 /* Initialize AP's local APIC for irq's */
2641 apic_initialize(FALSE);
984263bc
MD
2642
2643 /* Set memory range attributes for this CPU to match the BSP */
2644 mem_range_AP_init();
2645
a2a5ad0d 2646 /*
4c9f5a7f
MD
2647 * Once we go active we must process any IPIQ messages that may
2648 * have been queued, because no actual IPI will occur until we
2649 * set our bit in the smp_active_mask. If we don't the IPI
2650 * message interlock could be left set which would also prevent
2651 * further IPIs.
2652 *
8a8d5d85
MD
2653 * The idle loop doesn't expect the BGL to be held and while
2654 * lwkt_switch() normally cleans things up this is a special case
2655 * because we returning almost directly into the idle loop.
41a01a4d
MD
2656 *
2657 * The idle thread is never placed on the runq, make sure
4c9f5a7f 2658 * nothing we've done put it there.
8a8d5d85 2659 */
96728c05 2660 KKASSERT(curthread->td_mpcount == 1);
41a01a4d 2661 smp_active_mask |= 1 << mycpu->gd_cpuid;
d19f6edf
MD
2662
2663 /*
2664 * Enable interrupts here. idle_restore will also do it, but
2665 * doing it here lets us clean up any strays that got posted to
2666 * the CPU during the AP boot while we are still in a critical
2667 * section.
2668 */
2669 __asm __volatile("sti; pause; pause"::);
2670 mdcpu->gd_fpending = 0;
2671 mdcpu->gd_ipending = 0;
2672
4a19580d 2673 initclocks_pcpu(); /* clock interrupts (via IPIs) */
4c9f5a7f 2674 lwkt_process_ipiq();
d19f6edf
MD
2675
2676 /*
2677 * Releasing the mp lock lets the BSP finish up the SMP init
2678 */
96728c05 2679 rel_mplock();
41a01a4d 2680 KKASSERT((curthread->td_flags & TDF_RUNQ) == 0);
984263bc
MD
2681}
2682
41a01a4d
MD
2683/*
2684 * Get SMP fully working before we start initializing devices.
2685 */
2686static
2687void
2688ap_finish(void)
2689{
2690 mp_finish = 1;
2691 if (bootverbose)
26be20a0 2692 kprintf("Finish MP startup\n");
374133e3
MD
2693 if (cpu_feature & CPUID_TSC)
2694 tsc0_offset = rdtsc();
2695 tsc_offsets[0] = 0;
41a01a4d 2696 rel_mplock();
374133e3 2697 while (smp_active_mask != smp_startup_mask) {
35238fa5 2698 cpu_lfence();
374133e3
MD
2699 if (cpu_feature & CPUID_TSC)
2700 tsc0_offset = rdtsc();
2701 }
4da43e1f 2702 while (try_mplock() == 0)
41a01a4d
MD
2703 ;
2704 if (bootverbose)
26be20a0 2705 kprintf("Active CPU Mask: %08x\n", smp_active_mask);
41a01a4d
MD
2706}
2707
ba39e2e0 2708SYSINIT(finishsmp, SI_BOOT2_FINISH_SMP, SI_ORDER_FIRST, ap_finish, NULL)
41a01a4d 2709
96728c05
MD
2710void
2711cpu_send_ipiq(int dcpu)
2712{
41a01a4d
MD
2713 if ((1 << dcpu) & smp_active_mask)
2714 single_apic_ipi(dcpu, XIPIQ_OFFSET, APIC_DELMODE_FIXED);
96728c05 2715}
41a01a4d
MD
2716
2717#if 0 /* single_apic_ipi_passive() not working yet */
2718/*
2719 * Returns 0 on failure, 1 on success
2720 */
2721int
2722cpu_send_ipiq_passive(int dcpu)
2723{
2724 int r = 0;
2725 if ((1 << dcpu) & smp_active_mask) {
2726 r = single_apic_ipi_passive(dcpu, XIPIQ_OFFSET,
2727 APIC_DELMODE_FIXED);
2728 }
2729 return(r);
2730}
2731#endif
2732
a0873f07
SZ
2733struct mptable_lapic_cbarg1 {
2734 int cpu_count;
44c36320
SZ
2735 int ht_fixup;
2736 u_int ht_apicid_mask;
a0873f07
SZ
2737};
2738
2739static int
2740mptable_lapic_pass1_callback(void *xarg, const void *pos, int type)
2741{
2742 const struct PROCENTRY *ent;
2743 struct mptable_lapic_cbarg1 *arg = xarg;
2744
2745 if (type != 0)
2746 return 0;
2747 ent = pos;
2748
2749 if ((ent->cpu_flags & PROCENTRY_FLAG_EN) == 0)
2750 return 0;
2751
2752 arg->cpu_count++;
44c36320
SZ
2753 if (ent->apic_id < 32) {
2754 arg->ht_apicid_mask |= 1 << ent->apic_id;
2755 } else if (arg->ht_fixup) {
2756 kprintf("MPTABLE: lapic id > 32, disable HTT fixup\n");
2757 arg->ht_fixup = 0;
2758 }
a0873f07
SZ
2759 return 0;
2760}
2761
2762struct mptable_lapic_cbarg2 {
2763 int cpu;
44c36320 2764 int logical_cpus;
a0873f07
SZ
2765 int found_bsp;
2766};
2767
2768static int
2769mptable_lapic_pass2_callback(void *xarg, const void *pos, int type)
2770{
2771 const struct PROCENTRY *ent;
2772 struct mptable_lapic_cbarg2 *arg = xarg;
2773
2774 if (type != 0)
2775 return 0;
2776 ent = pos;
2777
2778 if (ent->cpu_flags & PROCENTRY_FLAG_BP) {
2779 KKASSERT(!arg->found_bsp);
2780 arg->found_bsp = 1;
2781 }
2782
2783 if (processor_entry(ent, arg->cpu))
2784 arg->cpu++;
2785
44c36320 2786 if (arg->logical_cpus) {
a0873f07
SZ
2787 struct PROCENTRY proc;
2788 int i;
2789
2790 /*
2791 * Create fake mptable processor entries
2792 * and feed them to processor_entry() to
2793 * enumerate the logical CPUs.
2794 */
2795 bzero(&proc, sizeof(proc));
2796 proc.type = 0;
2797 proc.cpu_flags = PROCENTRY_FLAG_EN;
2798 proc.apic_id = ent->apic_id;
2799
44c36320 2800 for (i = 1; i < arg->logical_cpus; i++) {
a0873f07
SZ
2801 proc.apic_id++;
2802 processor_entry(&proc, arg->cpu);
a0873f07
SZ
2803 arg->cpu++;
2804 }
2805 }
2806 return 0;
2807}
2808
322abba7 2809static void
281d9482
SZ
2810mptable_imcr(struct mptable_pos *mpt)
2811{
2812 /* record whether PIC or virtual-wire mode */
2813 machintr_setvar_simple(MACHINTR_VAR_IMCR_PRESENT,
2814 mpt->mp_fps->mpfb2 & 0x80);
2815}
2816
2817struct mptable_lapic_enumerator {
2818 struct lapic_enumerator enumerator;
2819 vm_paddr_t mpfps_paddr;
2820};
2821
2822static void
322abba7
SZ
2823mptable_lapic_default(void)
2824{
2825 int ap_apicid, bsp_apicid;
2826
2827 mp_naps = 1; /* exclude BSP */
2828
2829 /* Map local apic before the id field is accessed */
2830 lapic_init(DEFAULT_APIC_BASE);
2831
2832 bsp_apicid = APIC_ID(lapic.id);
2833 ap_apicid = (bsp_apicid == 0) ? 1 : 0;
2834
2835 /* BSP */
2836 mp_set_cpuids(0, bsp_apicid);
2837 /* one and only AP */
2838 mp_set_cpuids(1, ap_apicid);
2839}
2840
a0873f07
SZ
2841/*
2842 * Configure:
a0873f07 2843 * mp_naps
d787e80c 2844 * ID_TO_CPU(N), APIC ID to logical CPU table
a0873f07
SZ
2845 * CPU_TO_ID(N), logical CPU to APIC ID table
2846 */
2847static void
281d9482 2848mptable_lapic_enumerate(struct lapic_enumerator *e)
a0873f07 2849{
281d9482 2850 struct mptable_pos mpt;
322abba7
SZ
2851 struct mptable_lapic_cbarg1 arg1;
2852 struct mptable_lapic_cbarg2 arg2;
2853 mpcth_t cth;
44c36320 2854 int error, logical_cpus = 0;
5a16ccc3 2855 vm_offset_t lapic_addr;
281d9482
SZ
2856 vm_paddr_t mpfps_paddr;
2857
2858 mpfps_paddr = ((struct mptable_lapic_enumerator *)e)->mpfps_paddr;
2859 KKASSERT(mpfps_paddr != 0);
a0873f07 2860
281d9482
SZ
2861 error = mptable_map(&mpt, mpfps_paddr);
2862 if (error)
2863 panic("mptable_lapic_enumerate mptable_map failed\n");
2864
2865 KKASSERT(mpt.mp_fps != NULL);
a0873f07 2866
322abba7
SZ
2867 /*
2868 * Check for use of 'default' configuration
2869 */
281d9482 2870 if (mpt.mp_fps->mpfb1 != 0) {
322abba7 2871 mptable_lapic_default();
281d9482 2872 mptable_unmap(&mpt);
322abba7
SZ
2873 return;
2874 }
a0873f07 2875
281d9482 2876 cth = mpt.mp_cth;
322abba7 2877 KKASSERT(cth != NULL);
a0873f07 2878
322abba7
SZ
2879 /* Save local apic address */
2880 lapic_addr = (vm_offset_t)cth->apic_address;
2881 KKASSERT(lapic_addr != 0);
a0873f07 2882
322abba7
SZ
2883 /*
2884 * Find out how many CPUs do we have
2885 */
2886 bzero(&arg1, sizeof(arg1));
44c36320
SZ
2887 arg1.ht_fixup = 1; /* Apply ht fixup by default */
2888
322abba7
SZ
2889 error = mptable_iterate_entries(cth,
2890 mptable_lapic_pass1_callback, &arg1);
2891 if (error)
2892 panic("mptable_iterate_entries(lapic_pass1) failed\n");
322abba7 2893 KKASSERT(arg1.cpu_count != 0);
a0873f07 2894
322abba7 2895 /* See if we need to fixup HT logical CPUs. */
44c36320
SZ
2896 if (arg1.ht_fixup) {
2897 logical_cpus = mptable_hyperthread_fixup(arg1.ht_apicid_mask,
2898 arg1.cpu_count);
2899 if (logical_cpus != 0)
2900 arg1.cpu_count *= logical_cpus;
2901 }
2902 mp_naps = arg1.cpu_count;
a0873f07 2903
44c36320 2904 /* Qualify the numbers again, after possible HT fixup */
322abba7
SZ
2905 if (mp_naps > MAXCPU) {
2906 kprintf("Warning: only using %d of %d available CPUs!\n",
2907 MAXCPU, mp_naps);
2908 mp_naps = MAXCPU;
a0873f07
SZ
2909 }
2910
322abba7 2911 --mp_naps; /* subtract the BSP */
a0873f07 2912
322abba7
SZ
2913 /*
2914 * Link logical CPU id to local apic id
2915 */
2916 bzero(&arg2, sizeof(arg2));
2917 arg2.cpu = 1;
44c36320 2918 arg2.logical_cpus = logical_cpus;
a0873f07 2919
322abba7
SZ
2920 error = mptable_iterate_entries(cth,
2921 mptable_lapic_pass2_callback, &arg2);
2922 if (error)
2923 panic("mptable_iterate_entries(lapic_pass2) failed\n");
2924 KKASSERT(arg2.found_bsp);
a0873f07 2925
322abba7
SZ
2926 /* Map local apic */
2927 lapic_init(lapic_addr);
281d9482
SZ
2928
2929 mptable_unmap(&mpt);
2930}
2931
2932static int
2933mptable_lapic_probe(struct lapic_enumerator *e)
2934{
2935 vm_paddr_t mpfps_paddr;
2936
2937 mpfps_paddr = mptable_probe();
2938 if (mpfps_paddr == 0)
2939 return ENXIO;
2940
2941 ((struct mptable_lapic_enumerator *)e)->mpfps_paddr = mpfps_paddr;
2942 return 0;
a0873f07 2943}
5a16ccc3 2944
281d9482
SZ
2945static struct mptable_lapic_enumerator mptable_lapic_enumerator = {
2946 .enumerator = {
2947 .lapic_prio = LAPIC_ENUM_PRIO_MPTABLE,
2948 .lapic_probe = mptable_lapic_probe,
2949 .lapic_enumerate = mptable_lapic_enumerate
2950 }
2951};
2952
5a16ccc3 2953static void
281d9482 2954mptable_apic_register(void)
a0eaef71 2955{
281d9482 2956 lapic_enumerator_register(&mptable_lapic_enumerator.enumerator);
a0eaef71 2957}
281d9482 2958SYSINIT(madt, SI_BOOT2_PRESMP, SI_ORDER_ANY, mptable_apic_register, 0);