Turn on PG_N (nocache) when mapping ioapic
[dragonfly.git] / sys / platform / pc32 / i386 / mp_machdep.c
CommitLineData
984263bc
MD
1/*
2 * Copyright (c) 1996, by Steve Passe
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. The name of the developer may NOT be used to endorse or promote products
11 * derived from this software without specific prior written permission.
12 *
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 *
25 * $FreeBSD: src/sys/i386/i386/mp_machdep.c,v 1.115.2.15 2003/03/14 21:22:35 jhb Exp $
c0c5de70 26 * $DragonFly: src/sys/platform/pc32/i386/mp_machdep.c,v 1.60 2008/06/07 12:03:52 mneumann Exp $
984263bc
MD
27 */
28
29#include "opt_cpu.h"
984263bc 30
984263bc
MD
31#include <sys/param.h>
32#include <sys/systm.h>
33#include <sys/kernel.h>
984263bc
MD
34#include <sys/sysctl.h>
35#include <sys/malloc.h>
36#include <sys/memrange.h>
984263bc 37#include <sys/cons.h> /* cngetc() */
37e7efec 38#include <sys/machintr.h>
984263bc
MD
39
40#include <vm/vm.h>
41#include <vm/vm_param.h>
42#include <vm/pmap.h>
43#include <vm/vm_kern.h>
44#include <vm/vm_extern.h>
984263bc
MD
45#include <sys/lock.h>
46#include <vm/vm_map.h>
47#include <sys/user.h>
48#ifdef GPROF
49#include <sys/gmon.h>
50#endif
984263bc
MD
51
52#include <machine/smp.h>
a9295349 53#include <machine_base/apic/apicreg.h>
984263bc
MD
54#include <machine/atomic.h>
55#include <machine/cpufunc.h>
a9295349 56#include <machine_base/apic/mpapic.h>
984263bc
MD
57#include <machine/psl.h>
58#include <machine/segments.h>
984263bc
MD
59#include <machine/tss.h>
60#include <machine/specialreg.h>
61#include <machine/globaldata.h>
62
984263bc 63#include <machine/md_var.h> /* setidt() */
a9295349
MD
64#include <machine_base/icu/icu.h> /* IPIs */
65#include <machine_base/isa/intr_machdep.h> /* IPIs */
984263bc 66
1439c090
MD
67#define FIXUP_EXTRA_APIC_INTS 8 /* additional entries we may create */
68
984263bc
MD
69#define WARMBOOT_TARGET 0
70#define WARMBOOT_OFF (KERNBASE + 0x0467)
71#define WARMBOOT_SEG (KERNBASE + 0x0469)
72
984263bc 73#define BIOS_BASE (0xf0000)
1df86978 74#define BIOS_BASE2 (0xe0000)
984263bc 75#define BIOS_SIZE (0x10000)
984263bc
MD
76#define BIOS_COUNT (BIOS_SIZE/4)
77
78#define CMOS_REG (0x70)
79#define CMOS_DATA (0x71)
80#define BIOS_RESET (0x0f)
81#define BIOS_WARM (0x0a)
82
83#define PROCENTRY_FLAG_EN 0x01
84#define PROCENTRY_FLAG_BP 0x02
85#define IOAPICENTRY_FLAG_EN 0x01
86
87
88/* MP Floating Pointer Structure */
89typedef struct MPFPS {
90 char signature[4];
981bebd1 91 u_int32_t pap;
984263bc
MD
92 u_char length;
93 u_char spec_rev;
94 u_char checksum;
95 u_char mpfb1;
96 u_char mpfb2;
97 u_char mpfb3;
98 u_char mpfb4;
99 u_char mpfb5;
100} *mpfps_t;
101
102/* MP Configuration Table Header */
103typedef struct MPCTH {
104 char signature[4];
105 u_short base_table_length;
106 u_char spec_rev;
107 u_char checksum;
108 u_char oem_id[8];
109 u_char product_id[12];
110 void *oem_table_pointer;
111 u_short oem_table_size;
112 u_short entry_count;
113 void *apic_address;
114 u_short extended_table_length;
115 u_char extended_table_checksum;
116 u_char reserved;
117} *mpcth_t;
118
119
120typedef struct PROCENTRY {
121 u_char type;
122 u_char apic_id;
123 u_char apic_version;
124 u_char cpu_flags;
125 u_long cpu_signature;
126 u_long feature_flags;
127 u_long reserved1;
128 u_long reserved2;
129} *proc_entry_ptr;
130
131typedef struct BUSENTRY {
132 u_char type;
133 u_char bus_id;
134 char bus_type[6];
135} *bus_entry_ptr;
136
137typedef struct IOAPICENTRY {
138 u_char type;
139 u_char apic_id;
140 u_char apic_version;
141 u_char apic_flags;
142 void *apic_address;
143} *io_apic_entry_ptr;
144
145typedef struct INTENTRY {
146 u_char type;
147 u_char int_type;
148 u_short int_flags;
149 u_char src_bus_id;
150 u_char src_bus_irq;
151 u_char dst_apic_id;
152 u_char dst_apic_int;
153} *int_entry_ptr;
154
155/* descriptions of MP basetable entries */
156typedef struct BASETABLE_ENTRY {
157 u_char type;
158 u_char length;
159 char name[16];
160} basetable_entry;
161
981bebd1
SZ
162struct mptable_pos {
163 mpfps_t mp_fps;
164 mpcth_t mp_cth;
165 vm_size_t mp_cth_mapsz;
166};
167
fa058384
SZ
168typedef int (*mptable_iter_func)(void *, const void *, int);
169
984263bc
MD
170/*
171 * this code MUST be enabled here and in mpboot.s.
172 * it follows the very early stages of AP boot by placing values in CMOS ram.
173 * it NORMALLY will never be needed and thus the primitive method for enabling.
174 *
984263bc 175 */
7d34994c 176#if defined(CHECK_POINTS)
984263bc
MD
177#define CHECK_READ(A) (outb(CMOS_REG, (A)), inb(CMOS_DATA))
178#define CHECK_WRITE(A,D) (outb(CMOS_REG, (A)), outb(CMOS_DATA, (D)))
179
180#define CHECK_INIT(D); \
181 CHECK_WRITE(0x34, (D)); \
182 CHECK_WRITE(0x35, (D)); \
183 CHECK_WRITE(0x36, (D)); \
184 CHECK_WRITE(0x37, (D)); \
185 CHECK_WRITE(0x38, (D)); \
186 CHECK_WRITE(0x39, (D));
187
188#define CHECK_PRINT(S); \
26be20a0 189 kprintf("%s: %d, %d, %d, %d, %d, %d\n", \
984263bc
MD
190 (S), \
191 CHECK_READ(0x34), \
192 CHECK_READ(0x35), \
193 CHECK_READ(0x36), \
194 CHECK_READ(0x37), \
195 CHECK_READ(0x38), \
196 CHECK_READ(0x39));
197
198#else /* CHECK_POINTS */
199
200#define CHECK_INIT(D)
201#define CHECK_PRINT(S)
202
203#endif /* CHECK_POINTS */
204
205/*
206 * Values to send to the POST hardware.
207 */
208#define MP_BOOTADDRESS_POST 0x10
209#define MP_PROBE_POST 0x11
210#define MPTABLE_PASS1_POST 0x12
211
212#define MP_START_POST 0x13
213#define MP_ENABLE_POST 0x14
214#define MPTABLE_PASS2_POST 0x15
215
216#define START_ALL_APS_POST 0x16
217#define INSTALL_AP_TRAMP_POST 0x17
218#define START_AP_POST 0x18
219
220#define MP_ANNOUNCE_POST 0x19
221
984263bc
MD
222/** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */
223int current_postcode;
224
225/** XXX FIXME: what system files declare these??? */
226extern struct region_descriptor r_gdt, r_idt;
227
984263bc 228int mp_naps; /* # of Applications processors */
97359a5b 229#ifdef APIC_IO
4f6a8b30 230static int mp_nbusses; /* # of busses */
984263bc 231int mp_napics; /* # of IO APICs */
97359a5b 232#endif
97359a5b 233#ifdef APIC_IO
984263bc 234vm_offset_t io_apic_address[NAPICID]; /* NAPICID is more than enough */
97359a5b
MD
235u_int32_t *io_apic_versions;
236#endif
984263bc
MD
237extern int nkpt;
238
239u_int32_t cpu_apic_versions[MAXCPU];
374133e3 240int64_t tsc0_offset;
0b698dca 241extern int64_t tsc_offsets[];
984263bc 242
1876681a
SZ
243extern u_long ebda_addr;
244
97359a5b 245#ifdef APIC_IO
8a8d5d85 246struct apic_intmapinfo int_to_apicintpin[APIC_INTMAPSIZE];
97359a5b 247#endif
984263bc 248
984263bc
MD
249/*
250 * APIC ID logical/physical mapping structures.
251 * We oversize these to simplify boot-time config.
252 */
253int cpu_num_to_apic_id[NAPICID];
97359a5b 254#ifdef APIC_IO
984263bc 255int io_num_to_apic_id[NAPICID];
97359a5b 256#endif
984263bc
MD
257int apic_id_to_logical[NAPICID];
258
984263bc
MD
259/* AP uses this during bootstrap. Do not staticize. */
260char *bootSTK;
261static int bootAP;
262
263/* Hotwire a 0->4MB V==P mapping */
264extern pt_entry_t *KPTphys;
265
f13b5eec
MD
266/*
267 * SMP page table page. Setup by locore to point to a page table
268 * page from which we allocate per-cpu privatespace areas io_apics,
269 * and so forth.
270 */
271
272#define IO_MAPPING_START_INDEX \
273 (SMP_MAXCPU * sizeof(struct privatespace) / PAGE_SIZE)
274
984263bc 275extern pt_entry_t *SMPpt;
f13b5eec 276static int SMPpt_alloc_index = IO_MAPPING_START_INDEX;
984263bc
MD
277
278struct pcb stoppcbs[MAXCPU];
279
fa058384
SZ
280static basetable_entry basetable_entry_types[] =
281{
282 {0, 20, "Processor"},
283 {1, 8, "Bus"},
284 {2, 8, "I/O APIC"},
285 {3, 8, "I/O INT"},
286 {4, 8, "Local INT"}
287};
288
984263bc
MD
289/*
290 * Local data and functions.
291 */
292
984263bc
MD
293static u_int boot_address;
294static u_int base_memory;
41a01a4d 295static int mp_finish;
984263bc 296
984263bc
MD
297static void mp_enable(u_int boot_addr);
298
fa058384
SZ
299static int mptable_iterate_entries(const mpcth_t,
300 mptable_iter_func, void *);
cb00b5c4 301static int mptable_probe(void);
34e6fa63 302static int mptable_search(void);
fa058384 303static int mptable_check(vm_paddr_t);
3aba8f73 304static int mptable_search_sig(u_int32_t target, int count);
44c36320 305static int mptable_hyperthread_fixup(u_int, int);
981bebd1 306static void mptable_pass1(struct mptable_pos *);
390b18b0 307static void mptable_pass2(struct mptable_pos *);
3aba8f73
SZ
308static void mptable_default(int type);
309static void mptable_fix(void);
fa058384 310static int mptable_map(struct mptable_pos *, vm_paddr_t);
981bebd1 311static void mptable_unmap(struct mptable_pos *);
a0eaef71 312static void mptable_imcr(struct mptable_pos *);
3aba8f73 313
281d9482
SZ
314static int mptable_lapic_probe(struct lapic_enumerator *);
315static void mptable_lapic_enumerate(struct lapic_enumerator *);
316static void mptable_lapic_default(void);
317
97359a5b 318#ifdef APIC_IO
984263bc 319static void setup_apic_irq_mapping(void);
97359a5b
MD
320static int apic_int_is_bus_type(int intr, int bus_type);
321#endif
984263bc
MD
322static int start_all_aps(u_int boot_addr);
323static void install_ap_tramp(u_int boot_addr);
0f7a3396 324static int start_ap(struct mdglobaldata *gd, u_int boot_addr);
984263bc 325
41a01a4d 326static cpumask_t smp_startup_mask = 1; /* which cpus have been started */
0f7a3396
MD
327cpumask_t smp_active_mask = 1; /* which cpus are ready for IPIs etc? */
328SYSCTL_INT(_machdep, OID_AUTO, smp_active, CTLFLAG_RD, &smp_active_mask, 0, "");
329
984263bc
MD
330/*
331 * Calculate usable address in base memory for AP trampoline code.
332 */
333u_int
334mp_bootaddress(u_int basemem)
335{
336 POSTCODE(MP_BOOTADDRESS_POST);
337
c0c5de70 338 base_memory = basemem;
984263bc
MD
339
340 boot_address = base_memory & ~0xfff; /* round down to 4k boundary */
341 if ((base_memory - boot_address) < bootMP_size)
342 boot_address -= 4096; /* not enough, lower by 4k */
343
344 return boot_address;
345}
346
347
34e6fa63
SZ
348static int
349mptable_probe(void)
350{
351 int mpfps_paddr;
352
353 mpfps_paddr = mptable_search();
354 if (mptable_check(mpfps_paddr))
355 return 0;
356
357 return mpfps_paddr;
358}
359
984263bc
MD
360/*
361 * Look for an Intel MP spec table (ie, SMP capable hardware).
362 */
cb00b5c4 363static int
34e6fa63 364mptable_search(void)
984263bc
MD
365{
366 int x;
984263bc 367 u_int32_t target;
f13b5eec
MD
368
369 /*
370 * Make sure our SMPpt[] page table is big enough to hold all the
371 * mappings we need.
372 */
373 KKASSERT(IO_MAPPING_START_INDEX < NPTEPG - 2);
984263bc
MD
374
375 POSTCODE(MP_PROBE_POST);
376
377 /* see if EBDA exists */
1876681a 378 if (ebda_addr != 0) {
984263bc 379 /* search first 1K of EBDA */
1876681a 380 target = (u_int32_t)ebda_addr;
3aba8f73 381 if ((x = mptable_search_sig(target, 1024 / 4)) > 0)
aeb48299 382 return x;
984263bc
MD
383 } else {
384 /* last 1K of base memory, effective 'top of base' passed in */
aeb48299 385 target = (u_int32_t)(base_memory - 0x400);
3aba8f73 386 if ((x = mptable_search_sig(target, 1024 / 4)) > 0)
aeb48299 387 return x;
984263bc
MD
388 }
389
390 /* search the BIOS */
aeb48299 391 target = (u_int32_t)BIOS_BASE;
3aba8f73 392 if ((x = mptable_search_sig(target, BIOS_COUNT)) > 0)
aeb48299 393 return x;
984263bc 394
1df86978
SZ
395 /* search the extended BIOS */
396 target = (u_int32_t)BIOS_BASE2;
397 if ((x = mptable_search_sig(target, BIOS_COUNT)) > 0)
398 return x;
399
984263bc 400 /* nothing found */
984263bc 401 return 0;
984263bc
MD
402}
403
fa058384
SZ
404struct mptable_check_cbarg {
405 int cpu_count;
406 int found_bsp;
407};
408
409static int
410mptable_check_callback(void *xarg, const void *pos, int type)
411{
412 const struct PROCENTRY *ent;
413 struct mptable_check_cbarg *arg = xarg;
414
415 if (type != 0)
416 return 0;
417 ent = pos;
418
419 if ((ent->cpu_flags & PROCENTRY_FLAG_EN) == 0)
420 return 0;
421 arg->cpu_count++;
422
423 if (ent->cpu_flags & PROCENTRY_FLAG_BP) {
424 if (arg->found_bsp) {
425 kprintf("more than one BSP in base MP table\n");
426 return EINVAL;
427 }
428 arg->found_bsp = 1;
429 }
430 return 0;
431}
432
433static int
434mptable_check(vm_paddr_t mpfps_paddr)
435{
436 struct mptable_pos mpt;
437 struct mptable_check_cbarg arg;
438 mpcth_t cth;
439 int error;
440
441 if (mpfps_paddr == 0)
442 return EOPNOTSUPP;
443
444 error = mptable_map(&mpt, mpfps_paddr);
445 if (error)
446 return error;
447
448 if (mpt.mp_fps->mpfb1 != 0)
449 goto done;
450
451 error = EINVAL;
452
453 cth = mpt.mp_cth;
454 if (cth == NULL)
455 goto done;
456 if (cth->apic_address == 0)
457 goto done;
458
459 bzero(&arg, sizeof(arg));
460 error = mptable_iterate_entries(cth, mptable_check_callback, &arg);
461 if (!error) {
462 if (arg.cpu_count == 0) {
463 kprintf("MP table contains no processor entries\n");
464 error = EINVAL;
465 } else if (!arg.found_bsp) {
466 kprintf("MP table does not contains BSP entry\n");
467 error = EINVAL;
468 }
469 }
470done:
471 mptable_unmap(&mpt);
472 return error;
473}
474
475static int
476mptable_iterate_entries(const mpcth_t cth, mptable_iter_func func, void *arg)
477{
478 int count, total_size;
479 const void *position;
480
481 KKASSERT(cth->base_table_length >= sizeof(struct MPCTH));
482 total_size = cth->base_table_length - sizeof(struct MPCTH);
483 position = (const uint8_t *)cth + sizeof(struct MPCTH);
484 count = cth->entry_count;
485
486 while (count--) {
487 int type, error;
488
489 KKASSERT(total_size >= 0);
490 if (total_size == 0) {
491 kprintf("invalid base MP table, "
492 "entry count and length mismatch\n");
493 return EINVAL;
494 }
495
496 type = *(const uint8_t *)position;
497 switch (type) {
498 case 0: /* processor_entry */
499 case 1: /* bus_entry */
500 case 2: /* io_apic_entry */
501 case 3: /* int_entry */
502 case 4: /* int_entry */
503 break;
504 default:
505 kprintf("unknown base MP table entry type %d\n", type);
506 return EINVAL;
507 }
508
509 if (total_size < basetable_entry_types[type].length) {
510 kprintf("invalid base MP table length, "
511 "does not contain all entries\n");
512 return EINVAL;
513 }
514 total_size -= basetable_entry_types[type].length;
515
516 error = func(arg, position, type);
517 if (error)
518 return error;
519
520 position = (const uint8_t *)position +
521 basetable_entry_types[type].length;
522 }
523 return 0;
524}
525
984263bc
MD
526
527/*
528 * Startup the SMP processors.
529 */
530void
531mp_start(void)
532{
533 POSTCODE(MP_START_POST);
50bc991e 534 mp_enable(boot_address);
984263bc
MD
535}
536
537
538/*
539 * Print various information about the SMP system hardware and setup.
540 */
541void
542mp_announce(void)
543{
544 int x;
545
546 POSTCODE(MP_ANNOUNCE_POST);
547
26be20a0
SW
548 kprintf("DragonFly/MP: Multiprocessor motherboard\n");
549 kprintf(" cpu0 (BSP): apic id: %2d", CPU_TO_ID(0));
8629c4ea 550 kprintf(", version: 0x%08x\n", cpu_apic_versions[0]);
984263bc 551 for (x = 1; x <= mp_naps; ++x) {
26be20a0 552 kprintf(" cpu%d (AP): apic id: %2d", x, CPU_TO_ID(x));
8629c4ea 553 kprintf(", version: 0x%08x\n", cpu_apic_versions[x]);
984263bc
MD
554 }
555
556#if defined(APIC_IO)
557 for (x = 0; x < mp_napics; ++x) {
26be20a0
SW
558 kprintf(" io%d (APIC): apic id: %2d", x, IO_TO_ID(x));
559 kprintf(", version: 0x%08x", io_apic_versions[x]);
560 kprintf(", at 0x%08x\n", io_apic_address[x]);
984263bc
MD
561 }
562#else
26be20a0 563 kprintf(" Warning: APIC I/O disabled\n");
984263bc
MD
564#endif /* APIC_IO */
565}
566
567/*
568 * AP cpu's call this to sync up protected mode.
7160572f
MD
569 *
570 * WARNING! We must ensure that the cpu is sufficiently initialized to
571 * be able to use to the FP for our optimized bzero/bcopy code before
572 * we enter more mainstream C code.
a44bdeec
MD
573 *
574 * WARNING! %fs is not set up on entry. This routine sets up %fs.
984263bc
MD
575 */
576void
577init_secondary(void)
578{
579 int gsel_tss;
580 int x, myid = bootAP;
581 u_int cr0;
8a8d5d85 582 struct mdglobaldata *md;
0f7a3396 583 struct privatespace *ps;
984263bc 584
0f7a3396
MD
585 ps = &CPU_prvspace[myid];
586
587 gdt_segs[GPRIV_SEL].ssd_base = (int)ps;
984263bc 588 gdt_segs[GPROC0_SEL].ssd_base =
0f7a3396
MD
589 (int) &ps->mdglobaldata.gd_common_tss;
590 ps->mdglobaldata.mi.gd_prvspace = ps;
984263bc
MD
591
592 for (x = 0; x < NGDT; x++) {
593 ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd);
594 }
595
596 r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
597 r_gdt.rd_base = (int) &gdt[myid * NGDT];
598 lgdt(&r_gdt); /* does magic intra-segment return */
599
600 lidt(&r_idt);
601
602 lldt(_default_ldt);
7b95be2a 603 mdcpu->gd_currentldt = _default_ldt;
984263bc
MD
604
605 gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
606 gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;
8a8d5d85 607
0f7a3396 608 md = mdcpu; /* loaded through %fs:0 (mdglobaldata.mi.gd_prvspace)*/
8a8d5d85
MD
609
610 md->gd_common_tss.tss_esp0 = 0; /* not used until after switch */
611 md->gd_common_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
612 md->gd_common_tss.tss_ioopt = (sizeof md->gd_common_tss) << 16;
613 md->gd_tss_gdt = &gdt[myid * NGDT + GPROC0_SEL].sd;
614 md->gd_common_tssd = *md->gd_tss_gdt;
984263bc
MD
615 ltr(gsel_tss);
616
617 /*
618 * Set to a known state:
619 * Set by mpboot.s: CR0_PG, CR0_PE
620 * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM
621 */
622 cr0 = rcr0();
623 cr0 &= ~(CR0_CD | CR0_NW | CR0_EM);
624 load_cr0(cr0);
7160572f 625 pmap_set_opt(); /* PSE/4MB pages, etc */
984263bc 626
7160572f
MD
627 /* set up CPU registers and state */
628 cpu_setregs();
629
630 /* set up FPU state on the AP */
631 npxinit(__INITIAL_NPXCW__);
632
633 /* set up SSE registers */
634 enable_sse();
984263bc
MD
635}
636
984263bc
MD
637/*******************************************************************
638 * local functions and data
639 */
640
641/*
642 * start the SMP system
643 */
644static void
645mp_enable(u_int boot_addr)
646{
984263bc
MD
647#if defined(APIC_IO)
648 int apic;
649 u_int ux;
650#endif /* APIC_IO */
981bebd1 651 vm_paddr_t mpfps_paddr;
f65c10b6 652 struct mptable_pos mpt;
984263bc
MD
653
654 POSTCODE(MP_ENABLE_POST);
655
281d9482 656 lapic_config();
984263bc 657
f65c10b6
SZ
658 mpfps_paddr = mptable_probe();
659 if (mpfps_paddr) {
660 mptable_map(&mpt, mpfps_paddr);
661 mptable_imcr(&mpt);
662 mptable_unmap(&mpt);
663 }
984263bc
MD
664#if defined(APIC_IO)
665
f65c10b6
SZ
666 if (!mpfps_paddr)
667 panic("no MP table, disable APIC_IO!\n");
668
669 mptable_map(&mpt, mpfps_paddr);
670
671 /*
672 * Examine the MP table for needed info
673 */
674 mptable_pass1(&mpt);
675 mptable_pass2(&mpt);
676
677 mptable_unmap(&mpt);
678
679 /* Post scan cleanup */
680 mptable_fix();
681
97359a5b
MD
682 setup_apic_irq_mapping();
683
984263bc
MD
684 /* fill the LOGICAL io_apic_versions table */
685 for (apic = 0; apic < mp_napics; ++apic) {
686 ux = io_apic_read(apic, IOAPIC_VER);
687 io_apic_versions[apic] = ux;
688 io_apic_set_id(apic, IO_TO_ID(apic));
689 }
690
691 /* program each IO APIC in the system */
692 for (apic = 0; apic < mp_napics; ++apic)
693 if (io_apic_setup(apic) < 0)
694 panic("IO APIC setup failure");
695
97359a5b
MD
696#endif /* APIC_IO */
697
698 /*
699 * These are required for SMP operation
700 */
701
984263bc
MD
702 /* install a 'Spurious INTerrupt' vector */
703 setidt(XSPURIOUSINT_OFFSET, Xspuriousint,
704 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
705
706 /* install an inter-CPU IPI for TLB invalidation */
707 setidt(XINVLTLB_OFFSET, Xinvltlb,
708 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
709
96728c05
MD
710 /* install an inter-CPU IPI for IPIQ messaging */
711 setidt(XIPIQ_OFFSET, Xipiq,
712 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
78ea5a2a
SZ
713
714 /* install a timer vector */
715 setidt(XTIMER_OFFSET, Xtimer,
716 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
984263bc 717
984263bc
MD
718 /* install an inter-CPU IPI for CPU stop/restart */
719 setidt(XCPUSTOP_OFFSET, Xcpustop,
720 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
721
984263bc
MD
722 /* start each Application Processor */
723 start_all_aps(boot_addr);
724}
725
726
727/*
728 * look for the MP spec signature
729 */
730
731/* string defined by the Intel MP Spec as identifying the MP table */
732#define MP_SIG 0x5f504d5f /* _MP_ */
733#define NEXT(X) ((X) += 4)
734static int
3aba8f73 735mptable_search_sig(u_int32_t target, int count)
984263bc 736{
0f85efa2
SZ
737 vm_size_t map_size;
738 u_int32_t *addr;
739 int x, ret;
984263bc 740
aeb48299
SZ
741 KKASSERT(target != 0);
742
0f85efa2
SZ
743 map_size = count * sizeof(u_int32_t);
744 addr = pmap_mapdev((vm_paddr_t)target, map_size);
984263bc 745
aeb48299 746 ret = 0;
0f85efa2
SZ
747 for (x = 0; x < count; NEXT(x)) {
748 if (addr[x] == MP_SIG) {
749 /* make array index a byte index */
750 ret = target + (x * sizeof(u_int32_t));
751 break;
752 }
753 }
aeb48299 754
0f85efa2
SZ
755 pmap_unmapdev((vm_offset_t)addr, map_size);
756 return ret;
984263bc
MD
757}
758
759
984263bc
MD
760typedef struct BUSDATA {
761 u_char bus_id;
762 enum busTypes bus_type;
763} bus_datum;
764
765typedef struct INTDATA {
766 u_char int_type;
767 u_short int_flags;
768 u_char src_bus_id;
769 u_char src_bus_irq;
770 u_char dst_apic_id;
771 u_char dst_apic_int;
772 u_char int_vector;
773} io_int, local_int;
774
775typedef struct BUSTYPENAME {
776 u_char type;
777 char name[7];
778} bus_type_name;
779
780static bus_type_name bus_type_table[] =
781{
782 {CBUS, "CBUS"},
783 {CBUSII, "CBUSII"},
784 {EISA, "EISA"},
785 {MCA, "MCA"},
786 {UNKNOWN_BUSTYPE, "---"},
787 {ISA, "ISA"},
788 {MCA, "MCA"},
789 {UNKNOWN_BUSTYPE, "---"},
790 {UNKNOWN_BUSTYPE, "---"},
791 {UNKNOWN_BUSTYPE, "---"},
792 {UNKNOWN_BUSTYPE, "---"},
793 {UNKNOWN_BUSTYPE, "---"},
794 {PCI, "PCI"},
795 {UNKNOWN_BUSTYPE, "---"},
796 {UNKNOWN_BUSTYPE, "---"},
797 {UNKNOWN_BUSTYPE, "---"},
798 {UNKNOWN_BUSTYPE, "---"},
799 {XPRESS, "XPRESS"},
800 {UNKNOWN_BUSTYPE, "---"}
801};
802/* from MP spec v1.4, table 5-1 */
803static int default_data[7][5] =
804{
805/* nbus, id0, type0, id1, type1 */
806 {1, 0, ISA, 255, 255},
807 {1, 0, EISA, 255, 255},
808 {1, 0, EISA, 255, 255},
809 {1, 0, MCA, 255, 255},
810 {2, 0, ISA, 1, PCI},
811 {2, 0, EISA, 1, PCI},
812 {2, 0, MCA, 1, PCI}
813};
814
815
4f6a8b30
SZ
816#ifdef APIC_IO
817
984263bc
MD
818/* the bus data */
819static bus_datum *bus_data;
820
821/* the IO INT data, one entry per possible APIC INTerrupt */
822static io_int *io_apic_ints;
984263bc 823static int nintrs;
4f6a8b30 824
97359a5b 825#endif
984263bc 826
a0873f07 827static int processor_entry (const struct PROCENTRY *entry, int cpu);
97359a5b 828#ifdef APIC_IO
c4717d5c
SZ
829static int bus_entry (const struct BUSENTRY *entry, int bus);
830static int io_apic_entry (const struct IOAPICENTRY *entry, int apic);
831static int int_entry (const struct INTENTRY *entry, int intr);
97359a5b 832#endif
3ae0cd58 833static int lookup_bus_type (char *name);
984263bc 834
8658b5be
SZ
835#ifdef APIC_IO
836
837static int
838mptable_ioapic_pass1_callback(void *xarg, const void *pos, int type)
839{
840 const struct IOAPICENTRY *ioapic_ent;
841
842 switch (type) {
843 case 1: /* bus_entry */
844 ++mp_nbusses;
845 break;
846
847 case 2: /* io_apic_entry */
848 ioapic_ent = pos;
849 if (ioapic_ent->apic_flags & IOAPICENTRY_FLAG_EN) {
850 io_apic_address[mp_napics++] =
851 (vm_offset_t)ioapic_ent->apic_address;
852 }
853 break;
854
855 case 3: /* int_entry */
856 ++nintrs;
857 break;
858 }
859 return 0;
860}
861
862#endif /* APIC_IO */
984263bc
MD
863
864/*
865 * 1st pass on motherboard's Intel MP specification table.
866 *
984263bc 867 * determines:
984263bc 868 * io_apic_address[N]
984263bc
MD
869 * mp_nbusses
870 * mp_napics
871 * nintrs
872 */
873static void
981bebd1 874mptable_pass1(struct mptable_pos *mpt)
984263bc 875{
97359a5b 876#ifdef APIC_IO
981bebd1 877 mpfps_t fps;
8658b5be 878 int x;
984263bc
MD
879
880 POSTCODE(MPTABLE_PASS1_POST);
881
981bebd1
SZ
882 fps = mpt->mp_fps;
883 KKASSERT(fps != NULL);
f9c3b04f 884
984263bc 885 /* clear various tables */
8658b5be 886 for (x = 0; x < NAPICID; ++x)
984263bc 887 io_apic_address[x] = ~0; /* IO APIC address table */
984263bc 888
4f6a8b30 889 mp_nbusses = 0;
984263bc
MD
890 mp_napics = 0;
891 nintrs = 0;
984263bc
MD
892
893 /* check for use of 'default' configuration */
981bebd1 894 if (fps->mpfb1 != 0) {
984263bc 895 io_apic_address[0] = DEFAULT_IO_APIC_BASE;
4f6a8b30 896 mp_nbusses = default_data[fps->mpfb1 - 1][0];
984263bc
MD
897 mp_napics = 1;
898 nintrs = 16;
8658b5be
SZ
899 } else {
900 int error;
901
902 error = mptable_iterate_entries(mpt->mp_cth,
903 mptable_ioapic_pass1_callback, NULL);
904 if (error)
905 panic("mptable_iterate_entries(ioapic_pass1) failed\n");
984263bc 906 }
8658b5be 907#endif /* APIC_IO */
984263bc
MD
908}
909
c4717d5c
SZ
910#ifdef APIC_IO
911
912struct mptable_ioapic2_cbarg {
913 int bus;
914 int apic;
915 int intr;
916};
917
918static int
919mptable_ioapic_pass2_callback(void *xarg, const void *pos, int type)
920{
921 struct mptable_ioapic2_cbarg *arg = xarg;
922
923 switch (type) {
924 case 1:
925 if (bus_entry(pos, arg->bus))
926 ++arg->bus;
927 break;
928
929 case 2:
930 if (io_apic_entry(pos, arg->apic))
931 ++arg->apic;
932 break;
933
934 case 3:
935 if (int_entry(pos, arg->intr))
936 ++arg->intr;
937 break;
938 }
939 return 0;
940}
941
942#endif /* APIC_IO */
984263bc
MD
943
944/*
945 * 2nd pass on motherboard's Intel MP specification table.
946 *
947 * sets:
984263bc 948 * ID_TO_IO(N), phy APIC ID to log CPU/IO table
984263bc
MD
949 * IO_TO_ID(N), logical IO to APIC ID table
950 * bus_data[N]
951 * io_apic_ints[N]
952 */
390b18b0 953static void
981bebd1 954mptable_pass2(struct mptable_pos *mpt)
984263bc 955{
c4717d5c
SZ
956#ifdef APIC_IO
957 struct mptable_ioapic2_cbarg arg;
981bebd1 958 mpfps_t fps;
c4717d5c 959 int error, x;
984263bc
MD
960
961 POSTCODE(MPTABLE_PASS2_POST);
962
981bebd1
SZ
963 fps = mpt->mp_fps;
964 KKASSERT(fps != NULL);
965
984263bc
MD
966 MALLOC(io_apic_versions, u_int32_t *, sizeof(u_int32_t) * mp_napics,
967 M_DEVBUF, M_WAITOK);
968 MALLOC(ioapic, volatile ioapic_t **, sizeof(ioapic_t *) * mp_napics,
e7b4468c 969 M_DEVBUF, M_WAITOK | M_ZERO);
1439c090 970 MALLOC(io_apic_ints, io_int *, sizeof(io_int) * (nintrs + FIXUP_EXTRA_APIC_INTS),
984263bc
MD
971 M_DEVBUF, M_WAITOK);
972 MALLOC(bus_data, bus_datum *, sizeof(bus_datum) * mp_nbusses,
973 M_DEVBUF, M_WAITOK);
974
c4717d5c
SZ
975 for (x = 0; x < mp_napics; x++)
976 ioapic[x] = permanent_io_mapping(io_apic_address[x]);
984263bc
MD
977
978 /* clear various tables */
979 for (x = 0; x < NAPICID; ++x) {
97359a5b 980 ID_TO_IO(x) = -1; /* phy APIC ID to log CPU/IO table */
984263bc
MD
981 IO_TO_ID(x) = -1; /* logical IO to APIC ID table */
982 }
983
984 /* clear bus data table */
985 for (x = 0; x < mp_nbusses; ++x)
986 bus_data[x].bus_id = 0xff;
987
988 /* clear IO APIC INT table */
989 for (x = 0; x < (nintrs + 1); ++x) {
990 io_apic_ints[x].int_type = 0xff;
991 io_apic_ints[x].int_vector = 0xff;
992 }
993
984263bc 994 /* check for use of 'default' configuration */
390b18b0
SZ
995 if (fps->mpfb1 != 0) {
996 mptable_default(fps->mpfb1);
997 return;
998 }
984263bc 999
c4717d5c
SZ
1000 bzero(&arg, sizeof(arg));
1001 error = mptable_iterate_entries(mpt->mp_cth,
1002 mptable_ioapic_pass2_callback, &arg);
1003 if (error)
1004 panic("mptable_iterate_entries(ioapic_pass2) failed\n");
97359a5b 1005#endif
984263bc
MD
1006}
1007
984263bc
MD
1008/*
1009 * Check if we should perform a hyperthreading "fix-up" to
1010 * enumerate any logical CPU's that aren't already listed
1011 * in the table.
1012 *
1013 * XXX: We assume that all of the physical CPUs in the
1014 * system have the same number of logical CPUs.
1015 *
1016 * XXX: We assume that APIC ID's are allocated such that
1017 * the APIC ID's for a physical processor are aligned
1018 * with the number of logical CPU's in the processor.
1019 */
44c36320
SZ
1020static int
1021mptable_hyperthread_fixup(u_int id_mask, int cpu_count)
984263bc 1022{
44c36320 1023 int i, id, lcpus_max, logical_cpus;
984263bc 1024
984263bc 1025 if ((cpu_feature & CPUID_HTT) == 0)
44c36320 1026 return 0;
7ea07fd2
SZ
1027
1028 lcpus_max = (cpu_procinfo & CPUID_HTT_CORES) >> 16;
1029 if (lcpus_max <= 1)
44c36320 1030 return 0;
984263bc 1031
7ea07fd2
SZ
1032 if (strcmp(cpu_vendor, "GenuineIntel") == 0) {
1033 /*
1034 * INSTRUCTION SET REFERENCE, A-M (#253666)
1035 * Page 3-181, Table 3-20
1036 * "The nearest power-of-2 integer that is not smaller
1037 * than EBX[23:16] is the number of unique initial APIC
1038 * IDs reserved for addressing different logical
1039 * processors in a physical package."
1040 */
1041 for (i = 0; ; ++i) {
1042 if ((1 << i) >= lcpus_max) {
1043 lcpus_max = 1 << i;
1044 break;
1045 }
1046 }
1047 }
1048
44c36320
SZ
1049 KKASSERT(cpu_count != 0);
1050 if (cpu_count == lcpus_max) {
7ea07fd2 1051 /* We have nothing to fix */
44c36320
SZ
1052 return 0;
1053 } else if (cpu_count == 1) {
7ea07fd2
SZ
1054 /* XXX this may be incorrect */
1055 logical_cpus = lcpus_max;
1056 } else {
1057 int cur, prev, dist;
1058
1059 /*
1060 * Calculate the distances between two nearest
1061 * APIC IDs. If all such distances are same,
1062 * then it is the number of missing cpus that
1063 * we are going to fill later.
1064 */
1065 dist = cur = prev = -1;
1066 for (id = 0; id < MAXCPU; ++id) {
1067 if ((id_mask & 1 << id) == 0)
1068 continue;
1069
1070 cur = id;
1071 if (prev >= 0) {
1072 int new_dist = cur - prev;
1073
1074 if (dist < 0)
1075 dist = new_dist;
1076
1077 /*
1078 * Make sure that all distances
1079 * between two nearest APIC IDs
1080 * are same.
1081 */
1082 if (dist != new_dist)
44c36320 1083 return 0;
7ea07fd2
SZ
1084 }
1085 prev = cur;
1086 }
1087 if (dist == 1)
44c36320 1088 return 0;
7ea07fd2
SZ
1089
1090 /* Must be power of 2 */
1091 if (dist & (dist - 1))
44c36320 1092 return 0;
7ea07fd2
SZ
1093
1094 /* Can't exceed CPU package capacity */
1095 if (dist > lcpus_max)
1096 logical_cpus = lcpus_max;
1097 else
1098 logical_cpus = dist;
1099 }
1100
984263bc
MD
1101 /*
1102 * For each APIC ID of a CPU that is set in the mask,
1103 * scan the other candidate APIC ID's for this
1104 * physical processor. If any of those ID's are
1105 * already in the table, then kill the fixup.
1106 */
7ea07fd2 1107 for (id = 0; id < MAXCPU; id++) {
984263bc
MD
1108 if ((id_mask & 1 << id) == 0)
1109 continue;
1110 /* First, make sure we are on a logical_cpus boundary. */
1111 if (id % logical_cpus != 0)
44c36320 1112 return 0;
984263bc
MD
1113 for (i = id + 1; i < id + logical_cpus; i++)
1114 if ((id_mask & 1 << i) != 0)
44c36320 1115 return 0;
984263bc 1116 }
44c36320 1117 return logical_cpus;
984263bc 1118}
984263bc 1119
fa058384 1120static int
981bebd1
SZ
1121mptable_map(struct mptable_pos *mpt, vm_paddr_t mpfps_paddr)
1122{
1123 mpfps_t fps = NULL;
1124 mpcth_t cth = NULL;
1125 vm_size_t cth_mapsz = 0;
1126
fa058384
SZ
1127 bzero(mpt, sizeof(*mpt));
1128
981bebd1
SZ
1129 fps = pmap_mapdev(mpfps_paddr, sizeof(*fps));
1130 if (fps->pap != 0) {
1131 /*
1132 * Map configuration table header to get
1133 * the base table size
1134 */
1135 cth = pmap_mapdev(fps->pap, sizeof(*cth));
1136 cth_mapsz = cth->base_table_length;
1137 pmap_unmapdev((vm_offset_t)cth, sizeof(*cth));
1138
fa058384
SZ
1139 if (cth_mapsz < sizeof(*cth)) {
1140 kprintf("invalid base MP table length %d\n",
1141 (int)cth_mapsz);
1142 pmap_unmapdev((vm_offset_t)fps, sizeof(*fps));
1143 return EINVAL;
1144 }
1145
981bebd1
SZ
1146 /*
1147 * Map the base table
1148 */
1149 cth = pmap_mapdev(fps->pap, cth_mapsz);
1150 }
1151
1152 mpt->mp_fps = fps;
1153 mpt->mp_cth = cth;
1154 mpt->mp_cth_mapsz = cth_mapsz;
fa058384
SZ
1155
1156 return 0;
981bebd1
SZ
1157}
1158
1159static void
1160mptable_unmap(struct mptable_pos *mpt)
1161{
1162 if (mpt->mp_cth != NULL) {
1163 pmap_unmapdev((vm_offset_t)mpt->mp_cth, mpt->mp_cth_mapsz);
1164 mpt->mp_cth = NULL;
1165 mpt->mp_cth_mapsz = 0;
1166 }
1167 if (mpt->mp_fps != NULL) {
1168 pmap_unmapdev((vm_offset_t)mpt->mp_fps, sizeof(*mpt->mp_fps));
1169 mpt->mp_fps = NULL;
1170 }
1171}
1172
97359a5b
MD
1173#ifdef APIC_IO
1174
984263bc
MD
1175void
1176assign_apic_irq(int apic, int intpin, int irq)
1177{
1178 int x;
1179
1180 if (int_to_apicintpin[irq].ioapic != -1)
1181 panic("assign_apic_irq: inconsistent table");
1182
1183 int_to_apicintpin[irq].ioapic = apic;
1184 int_to_apicintpin[irq].int_pin = intpin;
1185 int_to_apicintpin[irq].apic_address = ioapic[apic];
1186 int_to_apicintpin[irq].redirindex = IOAPIC_REDTBL + 2 * intpin;
1187
1188 for (x = 0; x < nintrs; x++) {
1189 if ((io_apic_ints[x].int_type == 0 ||
1190 io_apic_ints[x].int_type == 3) &&
1191 io_apic_ints[x].int_vector == 0xff &&
1192 io_apic_ints[x].dst_apic_id == IO_TO_ID(apic) &&
1193 io_apic_ints[x].dst_apic_int == intpin)
1194 io_apic_ints[x].int_vector = irq;
1195 }
1196}
1197
1198void
1199revoke_apic_irq(int irq)
1200{
1201 int x;
1202 int oldapic;
1203 int oldintpin;
1204
1205 if (int_to_apicintpin[irq].ioapic == -1)
1206 panic("revoke_apic_irq: inconsistent table");
1207
1208 oldapic = int_to_apicintpin[irq].ioapic;
1209 oldintpin = int_to_apicintpin[irq].int_pin;
1210
1211 int_to_apicintpin[irq].ioapic = -1;
1212 int_to_apicintpin[irq].int_pin = 0;
1213 int_to_apicintpin[irq].apic_address = NULL;
1214 int_to_apicintpin[irq].redirindex = 0;
1215
1216 for (x = 0; x < nintrs; x++) {
1217 if ((io_apic_ints[x].int_type == 0 ||
1218 io_apic_ints[x].int_type == 3) &&
1219 io_apic_ints[x].int_vector != 0xff &&
1220 io_apic_ints[x].dst_apic_id == IO_TO_ID(oldapic) &&
1221 io_apic_ints[x].dst_apic_int == oldintpin)
1222 io_apic_ints[x].int_vector = 0xff;
1223 }
1224}
1225
1439c090
MD
1226/*
1227 * Allocate an IRQ
1228 */
984263bc
MD
1229static void
1230allocate_apic_irq(int intr)
1231{
1232 int apic;
1233 int intpin;
1234 int irq;
1235
1236 if (io_apic_ints[intr].int_vector != 0xff)
1237 return; /* Interrupt handler already assigned */
1238
1239 if (io_apic_ints[intr].int_type != 0 &&
1240 (io_apic_ints[intr].int_type != 3 ||
1241 (io_apic_ints[intr].dst_apic_id == IO_TO_ID(0) &&
1242 io_apic_ints[intr].dst_apic_int == 0)))
1243 return; /* Not INT or ExtInt on != (0, 0) */
1244
1245 irq = 0;
1246 while (irq < APIC_INTMAPSIZE &&
1247 int_to_apicintpin[irq].ioapic != -1)
1248 irq++;
1249
1250 if (irq >= APIC_INTMAPSIZE)
1251 return; /* No free interrupt handlers */
1252
1253 apic = ID_TO_IO(io_apic_ints[intr].dst_apic_id);
1254 intpin = io_apic_ints[intr].dst_apic_int;
1255
1256 assign_apic_irq(apic, intpin, irq);
1257 io_apic_setup_intpin(apic, intpin);
1258}
1259
1260
1261static void
1262swap_apic_id(int apic, int oldid, int newid)
1263{
1264 int x;
1265 int oapic;
1266
1267
1268 if (oldid == newid)
1269 return; /* Nothing to do */
1270
26be20a0 1271 kprintf("Changing APIC ID for IO APIC #%d from %d to %d in MP table\n",
984263bc
MD
1272 apic, oldid, newid);
1273
1274 /* Swap physical APIC IDs in interrupt entries */
1275 for (x = 0; x < nintrs; x++) {
1276 if (io_apic_ints[x].dst_apic_id == oldid)
1277 io_apic_ints[x].dst_apic_id = newid;
1278 else if (io_apic_ints[x].dst_apic_id == newid)
1279 io_apic_ints[x].dst_apic_id = oldid;
1280 }
1281
1282 /* Swap physical APIC IDs in IO_TO_ID mappings */
1283 for (oapic = 0; oapic < mp_napics; oapic++)
1284 if (IO_TO_ID(oapic) == newid)
1285 break;
1286
1287 if (oapic < mp_napics) {
26be20a0 1288 kprintf("Changing APIC ID for IO APIC #%d from "
984263bc
MD
1289 "%d to %d in MP table\n",
1290 oapic, newid, oldid);
1291 IO_TO_ID(oapic) = oldid;
1292 }
1293 IO_TO_ID(apic) = newid;
1294}
1295
1296
1297static void
1298fix_id_to_io_mapping(void)
1299{
1300 int x;
1301
1302 for (x = 0; x < NAPICID; x++)
1303 ID_TO_IO(x) = -1;
1304
1305 for (x = 0; x <= mp_naps; x++)
1306 if (CPU_TO_ID(x) < NAPICID)
1307 ID_TO_IO(CPU_TO_ID(x)) = x;
1308
1309 for (x = 0; x < mp_napics; x++)
1310 if (IO_TO_ID(x) < NAPICID)
1311 ID_TO_IO(IO_TO_ID(x)) = x;
1312}
1313
1314
1315static int
1316first_free_apic_id(void)
1317{
1318 int freeid, x;
1319
1320 for (freeid = 0; freeid < NAPICID; freeid++) {
1321 for (x = 0; x <= mp_naps; x++)
1322 if (CPU_TO_ID(x) == freeid)
1323 break;
1324 if (x <= mp_naps)
1325 continue;
1326 for (x = 0; x < mp_napics; x++)
1327 if (IO_TO_ID(x) == freeid)
1328 break;
1329 if (x < mp_napics)
1330 continue;
1331 return freeid;
1332 }
1333 return freeid;
1334}
1335
1336
1337static int
1338io_apic_id_acceptable(int apic, int id)
1339{
1340 int cpu; /* Logical CPU number */
1341 int oapic; /* Logical IO APIC number for other IO APIC */
1342
1343 if (id >= NAPICID)
1344 return 0; /* Out of range */
1345
1346 for (cpu = 0; cpu <= mp_naps; cpu++)
1347 if (CPU_TO_ID(cpu) == id)
1348 return 0; /* Conflict with CPU */
1349
1350 for (oapic = 0; oapic < mp_napics && oapic < apic; oapic++)
1351 if (IO_TO_ID(oapic) == id)
1352 return 0; /* Conflict with other APIC */
1353
1354 return 1; /* ID is acceptable for IO APIC */
1355}
1356
1439c090
MD
1357static
1358io_int *
1359io_apic_find_int_entry(int apic, int pin)
1360{
1361 int x;
1362
1363 /* search each of the possible INTerrupt sources */
1364 for (x = 0; x < nintrs; ++x) {
1365 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1366 (pin == io_apic_ints[x].dst_apic_int))
1367 return (&io_apic_ints[x]);
1368 }
1369 return NULL;
1370}
1371
97359a5b 1372#endif
984263bc
MD
1373
1374/*
1375 * parse an Intel MP specification table
1376 */
1377static void
3aba8f73 1378mptable_fix(void)
984263bc 1379{
97359a5b 1380#ifdef APIC_IO
4f6a8b30 1381 int x;
984263bc 1382 int id;
984263bc
MD
1383 int apic; /* IO APIC unit number */
1384 int freeid; /* Free physical APIC ID */
1385 int physid; /* Current physical IO APIC ID */
1439c090 1386 io_int *io14;
97359a5b
MD
1387 int bus_0 = 0; /* Stop GCC warning */
1388 int bus_pci = 0; /* Stop GCC warning */
1389 int num_pci_bus;
984263bc
MD
1390
1391 /*
1392 * Fix mis-numbering of the PCI bus and its INT entries if the BIOS
1393 * did it wrong. The MP spec says that when more than 1 PCI bus
1394 * exists the BIOS must begin with bus entries for the PCI bus and use
1395 * actual PCI bus numbering. This implies that when only 1 PCI bus
1396 * exists the BIOS can choose to ignore this ordering, and indeed many
1397 * MP motherboards do ignore it. This causes a problem when the PCI
1398 * sub-system makes requests of the MP sub-system based on PCI bus
1399 * numbers. So here we look for the situation and renumber the
1400 * busses and associated INTs in an effort to "make it right".
1401 */
1402
1403 /* find bus 0, PCI bus, count the number of PCI busses */
1404 for (num_pci_bus = 0, x = 0; x < mp_nbusses; ++x) {
1405 if (bus_data[x].bus_id == 0) {
1406 bus_0 = x;
1407 }
1408 if (bus_data[x].bus_type == PCI) {
1409 ++num_pci_bus;
1410 bus_pci = x;
1411 }
1412 }
1413 /*
1414 * bus_0 == slot of bus with ID of 0
1415 * bus_pci == slot of last PCI bus encountered
1416 */
1417
1418 /* check the 1 PCI bus case for sanity */
1419 /* if it is number 0 all is well */
1420 if (num_pci_bus == 1 &&
1421 bus_data[bus_pci].bus_id != 0) {
1422
1423 /* mis-numbered, swap with whichever bus uses slot 0 */
1424
1425 /* swap the bus entry types */
1426 bus_data[bus_pci].bus_type = bus_data[bus_0].bus_type;
1427 bus_data[bus_0].bus_type = PCI;
1428
1429 /* swap each relavant INTerrupt entry */
1430 id = bus_data[bus_pci].bus_id;
1431 for (x = 0; x < nintrs; ++x) {
1432 if (io_apic_ints[x].src_bus_id == id) {
1433 io_apic_ints[x].src_bus_id = 0;
1434 }
1435 else if (io_apic_ints[x].src_bus_id == 0) {
1436 io_apic_ints[x].src_bus_id = id;
1437 }
1438 }
1439 }
1440
1441 /* Assign IO APIC IDs.
1442 *
1443 * First try the existing ID. If a conflict is detected, try
1444 * the ID in the MP table. If a conflict is still detected, find
1445 * a free id.
1446 *
1447 * We cannot use the ID_TO_IO table before all conflicts has been
1448 * resolved and the table has been corrected.
1449 */
1450 for (apic = 0; apic < mp_napics; ++apic) { /* For all IO APICs */
1451
1452 /* First try to use the value set by the BIOS */
1453 physid = io_apic_get_id(apic);
1454 if (io_apic_id_acceptable(apic, physid)) {
1455 if (IO_TO_ID(apic) != physid)
1456 swap_apic_id(apic, IO_TO_ID(apic), physid);
1457 continue;
1458 }
1459
1460 /* Then check if the value in the MP table is acceptable */
1461 if (io_apic_id_acceptable(apic, IO_TO_ID(apic)))
1462 continue;
1463
1464 /* Last resort, find a free APIC ID and use it */
1465 freeid = first_free_apic_id();
1466 if (freeid >= NAPICID)
1467 panic("No free physical APIC IDs found");
1468
1469 if (io_apic_id_acceptable(apic, freeid)) {
1470 swap_apic_id(apic, IO_TO_ID(apic), freeid);
1471 continue;
1472 }
1473 panic("Free physical APIC ID not usable");
1474 }
1475 fix_id_to_io_mapping();
1476
1477 /* detect and fix broken Compaq MP table */
1478 if (apic_int_type(0, 0) == -1) {
26be20a0 1479 kprintf("APIC_IO: MP table broken: 8259->APIC entry missing!\n");
984263bc
MD
1480 io_apic_ints[nintrs].int_type = 3; /* ExtInt */
1481 io_apic_ints[nintrs].int_vector = 0xff; /* Unassigned */
1482 /* XXX fixme, set src bus id etc, but it doesn't seem to hurt */
1483 io_apic_ints[nintrs].dst_apic_id = IO_TO_ID(0);
1484 io_apic_ints[nintrs].dst_apic_int = 0; /* Pin 0 */
1485 nintrs++;
ad12f88c 1486 } else if (apic_int_type(0, 0) == 0) {
26be20a0 1487 kprintf("APIC_IO: MP table broken: ExtINT entry corrupt!\n");
ad12f88c
HP
1488 for (x = 0; x < nintrs; ++x)
1489 if ((0 == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1490 (0 == io_apic_ints[x].dst_apic_int)) {
1491 io_apic_ints[x].int_type = 3;
1492 io_apic_ints[x].int_vector = 0xff;
1493 break;
1494 }
984263bc 1495 }
1439c090
MD
1496
1497 /*
1498 * Fix missing IRQ 15 when IRQ 14 is an ISA interrupt. IDE
1499 * controllers universally come in pairs. If IRQ 14 is specified
1500 * as an ISA interrupt, then IRQ 15 had better be too.
1501 *
1502 * [ Shuttle XPC / AMD Athlon X2 ]
1503 * The MPTable is missing an entry for IRQ 15. Note that the
1504 * ACPI table has an entry for both 14 and 15.
1505 */
1506 if (apic_int_type(0, 14) == 0 && apic_int_type(0, 15) == -1) {
26be20a0 1507 kprintf("APIC_IO: MP table broken: IRQ 15 not ISA when IRQ 14 is!\n");
1439c090
MD
1508 io14 = io_apic_find_int_entry(0, 14);
1509 io_apic_ints[nintrs] = *io14;
1510 io_apic_ints[nintrs].src_bus_irq = 15;
1511 io_apic_ints[nintrs].dst_apic_int = 15;
1512 nintrs++;
1513 }
97359a5b 1514#endif
984263bc
MD
1515}
1516
97359a5b 1517#ifdef APIC_IO
984263bc
MD
1518
1519/* Assign low level interrupt handlers */
1520static void
1521setup_apic_irq_mapping(void)
1522{
1523 int x;
1524 int int_vector;
1525
1526 /* Clear array */
1527 for (x = 0; x < APIC_INTMAPSIZE; x++) {
1528 int_to_apicintpin[x].ioapic = -1;
1529 int_to_apicintpin[x].int_pin = 0;
1530 int_to_apicintpin[x].apic_address = NULL;
1531 int_to_apicintpin[x].redirindex = 0;
1532 }
1533
1534 /* First assign ISA/EISA interrupts */
1535 for (x = 0; x < nintrs; x++) {
1536 int_vector = io_apic_ints[x].src_bus_irq;
1537 if (int_vector < APIC_INTMAPSIZE &&
1538 io_apic_ints[x].int_vector == 0xff &&
1539 int_to_apicintpin[int_vector].ioapic == -1 &&
1540 (apic_int_is_bus_type(x, ISA) ||
1541 apic_int_is_bus_type(x, EISA)) &&
1542 io_apic_ints[x].int_type == 0) {
1543 assign_apic_irq(ID_TO_IO(io_apic_ints[x].dst_apic_id),
1544 io_apic_ints[x].dst_apic_int,
1545 int_vector);
1546 }
1547 }
1548
1549 /* Assign ExtInt entry if no ISA/EISA interrupt 0 entry */
1550 for (x = 0; x < nintrs; x++) {
1551 if (io_apic_ints[x].dst_apic_int == 0 &&
1552 io_apic_ints[x].dst_apic_id == IO_TO_ID(0) &&
1553 io_apic_ints[x].int_vector == 0xff &&
1554 int_to_apicintpin[0].ioapic == -1 &&
1555 io_apic_ints[x].int_type == 3) {
1556 assign_apic_irq(0, 0, 0);
1557 break;
1558 }
1559 }
1560 /* PCI interrupt assignment is deferred */
1561}
1562
97359a5b 1563#endif
984263bc 1564
a9112655
SZ
1565void
1566mp_set_cpuids(int cpu_id, int apic_id)
1567{
1568 CPU_TO_ID(cpu_id) = apic_id;
1569 ID_TO_CPU(apic_id) = cpu_id;
1570}
1571
984263bc 1572static int
a0873f07 1573processor_entry(const struct PROCENTRY *entry, int cpu)
984263bc 1574{
bd8aa7e2
SZ
1575 KKASSERT(cpu > 0);
1576
984263bc
MD
1577 /* check for usability */
1578 if (!(entry->cpu_flags & PROCENTRY_FLAG_EN))
1579 return 0;
1580
984263bc
MD
1581 /* check for BSP flag */
1582 if (entry->cpu_flags & PROCENTRY_FLAG_BP) {
a9112655 1583 mp_set_cpuids(0, entry->apic_id);
984263bc
MD
1584 return 0; /* its already been counted */
1585 }
1586
1587 /* add another AP to list, if less than max number of CPUs */
1588 else if (cpu < MAXCPU) {
a9112655 1589 mp_set_cpuids(cpu, entry->apic_id);
984263bc
MD
1590 return 1;
1591 }
1592
1593 return 0;
1594}
1595
4f6a8b30 1596#ifdef APIC_IO
984263bc
MD
1597
1598static int
c4717d5c 1599bus_entry(const struct BUSENTRY *entry, int bus)
984263bc
MD
1600{
1601 int x;
1602 char c, name[8];
1603
1604 /* encode the name into an index */
1605 for (x = 0; x < 6; ++x) {
1606 if ((c = entry->bus_type[x]) == ' ')
1607 break;
1608 name[x] = c;
1609 }
1610 name[x] = '\0';
1611
1612 if ((x = lookup_bus_type(name)) == UNKNOWN_BUSTYPE)
1613 panic("unknown bus type: '%s'", name);
1614
1615 bus_data[bus].bus_id = entry->bus_id;
1616 bus_data[bus].bus_type = x;
1617
1618 return 1;
1619}
1620
984263bc 1621static int
c4717d5c 1622io_apic_entry(const struct IOAPICENTRY *entry, int apic)
984263bc
MD
1623{
1624 if (!(entry->apic_flags & IOAPICENTRY_FLAG_EN))
1625 return 0;
1626
1627 IO_TO_ID(apic) = entry->apic_id;
c163176b 1628 ID_TO_IO(entry->apic_id) = apic;
984263bc
MD
1629
1630 return 1;
1631}
1632
97359a5b 1633#endif
984263bc
MD
1634
1635static int
1636lookup_bus_type(char *name)
1637{
1638 int x;
1639
1640 for (x = 0; x < MAX_BUSTYPE; ++x)
1641 if (strcmp(bus_type_table[x].name, name) == 0)
1642 return bus_type_table[x].type;
1643
1644 return UNKNOWN_BUSTYPE;
1645}
1646
97359a5b 1647#ifdef APIC_IO
984263bc
MD
1648
1649static int
c4717d5c 1650int_entry(const struct INTENTRY *entry, int intr)
984263bc
MD
1651{
1652 int apic;
1653
1654 io_apic_ints[intr].int_type = entry->int_type;
1655 io_apic_ints[intr].int_flags = entry->int_flags;
1656 io_apic_ints[intr].src_bus_id = entry->src_bus_id;
1657 io_apic_ints[intr].src_bus_irq = entry->src_bus_irq;
1658 if (entry->dst_apic_id == 255) {
1659 /* This signal goes to all IO APICS. Select an IO APIC
1660 with sufficient number of interrupt pins */
1661 for (apic = 0; apic < mp_napics; apic++)
1662 if (((io_apic_read(apic, IOAPIC_VER) &
1663 IOART_VER_MAXREDIR) >> MAXREDIRSHIFT) >=
1664 entry->dst_apic_int)
1665 break;
1666 if (apic < mp_napics)
1667 io_apic_ints[intr].dst_apic_id = IO_TO_ID(apic);
1668 else
1669 io_apic_ints[intr].dst_apic_id = entry->dst_apic_id;
1670 } else
1671 io_apic_ints[intr].dst_apic_id = entry->dst_apic_id;
1672 io_apic_ints[intr].dst_apic_int = entry->dst_apic_int;
1673
1674 return 1;
1675}
1676
984263bc
MD
1677static int
1678apic_int_is_bus_type(int intr, int bus_type)
1679{
1680 int bus;
1681
1682 for (bus = 0; bus < mp_nbusses; ++bus)
1683 if ((bus_data[bus].bus_id == io_apic_ints[intr].src_bus_id)
1684 && ((int) bus_data[bus].bus_type == bus_type))
1685 return 1;
1686
1687 return 0;
1688}
1689
984263bc
MD
1690/*
1691 * Given a traditional ISA INT mask, return an APIC mask.
1692 */
1693u_int
1694isa_apic_mask(u_int isa_mask)
1695{
1696 int isa_irq;
1697 int apic_pin;
1698
1699#if defined(SKIP_IRQ15_REDIRECT)
1700 if (isa_mask == (1 << 15)) {
26be20a0 1701 kprintf("skipping ISA IRQ15 redirect\n");
984263bc
MD
1702 return isa_mask;
1703 }
1704#endif /* SKIP_IRQ15_REDIRECT */
1705
1706 isa_irq = ffs(isa_mask); /* find its bit position */
1707 if (isa_irq == 0) /* doesn't exist */
1708 return 0;
1709 --isa_irq; /* make it zero based */
1710
1711 apic_pin = isa_apic_irq(isa_irq); /* look for APIC connection */
1712 if (apic_pin == -1)
1713 return 0;
1714
1715 return (1 << apic_pin); /* convert pin# to a mask */
1716}
1717
984263bc
MD
1718/*
1719 * Determine which APIC pin an ISA/EISA INT is attached to.
1720 */
1721#define INTTYPE(I) (io_apic_ints[(I)].int_type)
1722#define INTPIN(I) (io_apic_ints[(I)].dst_apic_int)
1723#define INTIRQ(I) (io_apic_ints[(I)].int_vector)
1724#define INTAPIC(I) (ID_TO_IO(io_apic_ints[(I)].dst_apic_id))
1725
1726#define SRCBUSIRQ(I) (io_apic_ints[(I)].src_bus_irq)
1727int
1728isa_apic_irq(int isa_irq)
1729{
1730 int intr;
1731
1732 for (intr = 0; intr < nintrs; ++intr) { /* check each record */
1733 if (INTTYPE(intr) == 0) { /* standard INT */
1734 if (SRCBUSIRQ(intr) == isa_irq) {
1735 if (apic_int_is_bus_type(intr, ISA) ||
1736 apic_int_is_bus_type(intr, EISA)) {
1737 if (INTIRQ(intr) == 0xff)
1738 return -1; /* unassigned */
1739 return INTIRQ(intr); /* found */
1740 }
1741 }
1742 }
1743 }
1744 return -1; /* NOT found */
1745}
1746
1747
1748/*
1749 * Determine which APIC pin a PCI INT is attached to.
1750 */
1751#define SRCBUSID(I) (io_apic_ints[(I)].src_bus_id)
1752#define SRCBUSDEVICE(I) ((io_apic_ints[(I)].src_bus_irq >> 2) & 0x1f)
1753#define SRCBUSLINE(I) (io_apic_ints[(I)].src_bus_irq & 0x03)
1754int
1755pci_apic_irq(int pciBus, int pciDevice, int pciInt)
1756{
1757 int intr;
1758
1759 --pciInt; /* zero based */
1760
1439c090 1761 for (intr = 0; intr < nintrs; ++intr) { /* check each record */
984263bc
MD
1762 if ((INTTYPE(intr) == 0) /* standard INT */
1763 && (SRCBUSID(intr) == pciBus)
1764 && (SRCBUSDEVICE(intr) == pciDevice)
1439c090 1765 && (SRCBUSLINE(intr) == pciInt)) { /* a candidate IRQ */
984263bc
MD
1766 if (apic_int_is_bus_type(intr, PCI)) {
1767 if (INTIRQ(intr) == 0xff)
1768 allocate_apic_irq(intr);
1769 if (INTIRQ(intr) == 0xff)
1770 return -1; /* unassigned */
1771 return INTIRQ(intr); /* exact match */
1772 }
1439c090
MD
1773 }
1774 }
984263bc
MD
1775
1776 return -1; /* NOT found */
1777}
1778
1779int
1780next_apic_irq(int irq)
1781{
1782 int intr, ointr;
1783 int bus, bustype;
1784
1785 bus = 0;
1786 bustype = 0;
1787 for (intr = 0; intr < nintrs; intr++) {
1788 if (INTIRQ(intr) != irq || INTTYPE(intr) != 0)
1789 continue;
1790 bus = SRCBUSID(intr);
1791 bustype = apic_bus_type(bus);
1792 if (bustype != ISA &&
1793 bustype != EISA &&
1794 bustype != PCI)
1795 continue;
1796 break;
1797 }
1798 if (intr >= nintrs) {
1799 return -1;
1800 }
1801 for (ointr = intr + 1; ointr < nintrs; ointr++) {
1802 if (INTTYPE(ointr) != 0)
1803 continue;
1804 if (bus != SRCBUSID(ointr))
1805 continue;
1806 if (bustype == PCI) {
1807 if (SRCBUSDEVICE(intr) != SRCBUSDEVICE(ointr))
1808 continue;
1809 if (SRCBUSLINE(intr) != SRCBUSLINE(ointr))
1810 continue;
1811 }
1812 if (bustype == ISA || bustype == EISA) {
1813 if (SRCBUSIRQ(intr) != SRCBUSIRQ(ointr))
1814 continue;
1815 }
1816 if (INTPIN(intr) == INTPIN(ointr))
1817 continue;
1818 break;
1819 }
1820 if (ointr >= nintrs) {
1821 return -1;
1822 }
1823 return INTIRQ(ointr);
1824}
1825#undef SRCBUSLINE
1826#undef SRCBUSDEVICE
1827#undef SRCBUSID
1828#undef SRCBUSIRQ
1829
1830#undef INTPIN
1831#undef INTIRQ
1832#undef INTAPIC
1833#undef INTTYPE
1834
97359a5b 1835#endif
984263bc
MD
1836
1837/*
1838 * Reprogram the MB chipset to NOT redirect an ISA INTerrupt.
1839 *
1840 * XXX FIXME:
1841 * Exactly what this means is unclear at this point. It is a solution
1842 * for motherboards that redirect the MBIRQ0 pin. Generically a motherboard
1843 * could route any of the ISA INTs to upper (>15) IRQ values. But most would
1844 * NOT be redirected via MBIRQ0, thus "undirect()ing" them would NOT be an
1845 * option.
1846 */
1847int
1848undirect_isa_irq(int rirq)
1849{
1850#if defined(READY)
1851 if (bootverbose)
26be20a0 1852 kprintf("Freeing redirected ISA irq %d.\n", rirq);
984263bc 1853 /** FIXME: tickle the MB redirector chip */
c044141b 1854 return /* XXX */;
984263bc
MD
1855#else
1856 if (bootverbose)
26be20a0 1857 kprintf("Freeing (NOT implemented) redirected ISA irq %d.\n", rirq);
984263bc
MD
1858 return 0;
1859#endif /* READY */
1860}
1861
1862
1863/*
1864 * Reprogram the MB chipset to NOT redirect a PCI INTerrupt
1865 */
1866int
1867undirect_pci_irq(int rirq)
1868{
1869#if defined(READY)
1870 if (bootverbose)
26be20a0 1871 kprintf("Freeing redirected PCI irq %d.\n", rirq);
984263bc
MD
1872
1873 /** FIXME: tickle the MB redirector chip */
c044141b 1874 return /* XXX */;
984263bc
MD
1875#else
1876 if (bootverbose)
26be20a0 1877 kprintf("Freeing (NOT implemented) redirected PCI irq %d.\n",
984263bc
MD
1878 rirq);
1879 return 0;
1880#endif /* READY */
1881}
1882
1883
4f6a8b30
SZ
1884#ifdef APIC_IO
1885
984263bc
MD
1886/*
1887 * given a bus ID, return:
1888 * the bus type if found
1889 * -1 if NOT found
1890 */
1891int
1892apic_bus_type(int id)
1893{
1894 int x;
1895
1896 for (x = 0; x < mp_nbusses; ++x)
1897 if (bus_data[x].bus_id == id)
1898 return bus_data[x].bus_type;
1899
1900 return -1;
1901}
1902
984263bc
MD
1903/*
1904 * given a LOGICAL APIC# and pin#, return:
1905 * the associated src bus ID if found
1906 * -1 if NOT found
1907 */
1908int
1909apic_src_bus_id(int apic, int pin)
1910{
1911 int x;
1912
1913 /* search each of the possible INTerrupt sources */
1914 for (x = 0; x < nintrs; ++x)
1915 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1916 (pin == io_apic_ints[x].dst_apic_int))
1917 return (io_apic_ints[x].src_bus_id);
1918
1919 return -1; /* NOT found */
1920}
1921
984263bc
MD
1922/*
1923 * given a LOGICAL APIC# and pin#, return:
1924 * the associated src bus IRQ if found
1925 * -1 if NOT found
1926 */
1927int
1928apic_src_bus_irq(int apic, int pin)
1929{
1930 int x;
1931
1932 for (x = 0; x < nintrs; x++)
1933 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1934 (pin == io_apic_ints[x].dst_apic_int))
1935 return (io_apic_ints[x].src_bus_irq);
1936
1937 return -1; /* NOT found */
1938}
1939
1940
1941/*
1942 * given a LOGICAL APIC# and pin#, return:
1943 * the associated INTerrupt type if found
1944 * -1 if NOT found
1945 */
1946int
1947apic_int_type(int apic, int pin)
1948{
1949 int x;
1950
1951 /* search each of the possible INTerrupt sources */
1439c090 1952 for (x = 0; x < nintrs; ++x) {
984263bc
MD
1953 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1954 (pin == io_apic_ints[x].dst_apic_int))
1955 return (io_apic_ints[x].int_type);
1439c090 1956 }
984263bc
MD
1957 return -1; /* NOT found */
1958}
1959
1439c090
MD
1960/*
1961 * Return the IRQ associated with an APIC pin
1962 */
984263bc
MD
1963int
1964apic_irq(int apic, int pin)
1965{
1966 int x;
1967 int res;
1968
1439c090 1969 for (x = 0; x < nintrs; ++x) {
984263bc
MD
1970 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1971 (pin == io_apic_ints[x].dst_apic_int)) {
1972 res = io_apic_ints[x].int_vector;
1973 if (res == 0xff)
1974 return -1;
1975 if (apic != int_to_apicintpin[res].ioapic)
1439c090 1976 panic("apic_irq: inconsistent table %d/%d", apic, int_to_apicintpin[res].ioapic);
984263bc
MD
1977 if (pin != int_to_apicintpin[res].int_pin)
1978 panic("apic_irq inconsistent table (2)");
1979 return res;
1980 }
1439c090 1981 }
984263bc
MD
1982 return -1;
1983}
1984
1985
1986/*
1987 * given a LOGICAL APIC# and pin#, return:
1988 * the associated trigger mode if found
1989 * -1 if NOT found
1990 */
1991int
1992apic_trigger(int apic, int pin)
1993{
1994 int x;
1995
1996 /* search each of the possible INTerrupt sources */
1997 for (x = 0; x < nintrs; ++x)
1998 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1999 (pin == io_apic_ints[x].dst_apic_int))
2000 return ((io_apic_ints[x].int_flags >> 2) & 0x03);
2001
2002 return -1; /* NOT found */
2003}
2004
2005
2006/*
2007 * given a LOGICAL APIC# and pin#, return:
2008 * the associated 'active' level if found
2009 * -1 if NOT found
2010 */
2011int
2012apic_polarity(int apic, int pin)
2013{
2014 int x;
2015
2016 /* search each of the possible INTerrupt sources */
2017 for (x = 0; x < nintrs; ++x)
2018 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
2019 (pin == io_apic_ints[x].dst_apic_int))
2020 return (io_apic_ints[x].int_flags & 0x03);
2021
2022 return -1; /* NOT found */
2023}
2024
97359a5b 2025#endif
984263bc
MD
2026
2027/*
2028 * set data according to MP defaults
2029 * FIXME: probably not complete yet...
2030 */
2031static void
3aba8f73 2032mptable_default(int type)
984263bc 2033{
984263bc
MD
2034#if defined(APIC_IO)
2035 int io_apic_id;
2036 int pin;
984263bc
MD
2037
2038#if 0
26be20a0 2039 kprintf(" MP default config type: %d\n", type);
984263bc
MD
2040 switch (type) {
2041 case 1:
26be20a0 2042 kprintf(" bus: ISA, APIC: 82489DX\n");
984263bc
MD
2043 break;
2044 case 2:
26be20a0 2045 kprintf(" bus: EISA, APIC: 82489DX\n");
984263bc
MD
2046 break;
2047 case 3:
26be20a0 2048 kprintf(" bus: EISA, APIC: 82489DX\n");
984263bc
MD
2049 break;
2050 case 4:
26be20a0 2051 kprintf(" bus: MCA, APIC: 82489DX\n");
984263bc
MD
2052 break;
2053 case 5:
26be20a0 2054 kprintf(" bus: ISA+PCI, APIC: Integrated\n");
984263bc
MD
2055 break;
2056 case 6:
26be20a0 2057 kprintf(" bus: EISA+PCI, APIC: Integrated\n");
984263bc
MD
2058 break;
2059 case 7:
26be20a0 2060 kprintf(" bus: MCA+PCI, APIC: Integrated\n");
984263bc
MD
2061 break;
2062 default:
26be20a0 2063 kprintf(" future type\n");
984263bc
MD
2064 break;
2065 /* NOTREACHED */
2066 }
2067#endif /* 0 */
2068
984263bc
MD
2069 /* one and only IO APIC */
2070 io_apic_id = (io_apic_read(0, IOAPIC_ID) & APIC_ID_MASK) >> 24;
2071
2072 /*
2073 * sanity check, refer to MP spec section 3.6.6, last paragraph
2074 * necessary as some hardware isn't properly setting up the IO APIC
2075 */
2076#if defined(REALLY_ANAL_IOAPICID_VALUE)
2077 if (io_apic_id != 2) {
2078#else
2079 if ((io_apic_id == 0) || (io_apic_id == 1) || (io_apic_id == 15)) {
2080#endif /* REALLY_ANAL_IOAPICID_VALUE */
2081 io_apic_set_id(0, 2);
2082 io_apic_id = 2;
2083 }
2084 IO_TO_ID(0) = io_apic_id;
2085 ID_TO_IO(io_apic_id) = 0;
984263bc
MD
2086
2087 /* fill out bus entries */
2088 switch (type) {
2089 case 1:
2090 case 2:
2091 case 3:
2092 case 4:
2093 case 5:
2094 case 6:
2095 case 7:
2096 bus_data[0].bus_id = default_data[type - 1][1];
2097 bus_data[0].bus_type = default_data[type - 1][2];
2098 bus_data[1].bus_id = default_data[type - 1][3];
2099 bus_data[1].bus_type = default_data[type - 1][4];
2100 break;
2101
2102 /* case 4: case 7: MCA NOT supported */
2103 default: /* illegal/reserved */
2104 panic("BAD default MP config: %d", type);
2105 /* NOTREACHED */
2106 }
2107
984263bc
MD
2108 /* general cases from MP v1.4, table 5-2 */
2109 for (pin = 0; pin < 16; ++pin) {
2110 io_apic_ints[pin].int_type = 0;
2111 io_apic_ints[pin].int_flags = 0x05; /* edge/active-hi */
2112 io_apic_ints[pin].src_bus_id = 0;
2113 io_apic_ints[pin].src_bus_irq = pin; /* IRQ2 caught below */
2114 io_apic_ints[pin].dst_apic_id = io_apic_id;
2115 io_apic_ints[pin].dst_apic_int = pin; /* 1-to-1 */
2116 }
2117
2118 /* special cases from MP v1.4, table 5-2 */
2119 if (type == 2) {
2120 io_apic_ints[2].int_type = 0xff; /* N/C */
2121 io_apic_ints[13].int_type = 0xff; /* N/C */
2122#if !defined(APIC_MIXED_MODE)
2123 /** FIXME: ??? */
2124 panic("sorry, can't support type 2 default yet");
2125#endif /* APIC_MIXED_MODE */
2126 }
2127 else
2128 io_apic_ints[2].src_bus_irq = 0; /* ISA IRQ0 is on APIC INT 2 */
2129
2130 if (type == 7)
2131 io_apic_ints[0].int_type = 0xff; /* N/C */
2132 else
2133 io_apic_ints[0].int_type = 3; /* vectored 8259 */
2134#endif /* APIC_IO */
2135}
2136
f13b5eec
MD
2137/*
2138 * Map a physical memory address representing I/O into KVA. The I/O
2139 * block is assumed not to cross a page boundary.
2140 */
2141void *
2142permanent_io_mapping(vm_paddr_t pa)
2143{
2144 vm_offset_t vaddr;
2145 int pgeflag;
2146 int i;
2147
2148 KKASSERT(pa < 0x100000000LL);
2149
2150 pgeflag = 0; /* not used for SMP yet */
2151
2152 /*
2153 * If the requested physical address has already been incidently
2154 * mapped, just use the existing mapping. Otherwise create a new
2155 * mapping.
2156 */
2157 for (i = IO_MAPPING_START_INDEX; i < SMPpt_alloc_index; ++i) {
2158 if (((vm_offset_t)SMPpt[i] & PG_FRAME) ==
2159 ((vm_offset_t)pa & PG_FRAME)) {
2160 break;
2161 }
2162 }
2163 if (i == SMPpt_alloc_index) {
2164 if (i == NPTEPG - 2) {
2165 panic("permanent_io_mapping: We ran out of space"
2166 " in SMPpt[]!");
2167 }
5277b9f6 2168 SMPpt[i] = (pt_entry_t)(PG_V | PG_RW | PG_N | pgeflag |
f13b5eec
MD
2169 ((vm_offset_t)pa & PG_FRAME));
2170 ++SMPpt_alloc_index;
2171 }
2172 vaddr = (vm_offset_t)CPU_prvspace + (i * PAGE_SIZE) +
2173 ((vm_offset_t)pa & PAGE_MASK);
2174 return ((void *)vaddr);
2175}
2176
984263bc
MD
2177/*
2178 * start each AP in our list
2179 */
2180static int
2181start_all_aps(u_int boot_addr)
2182{
b45759e1
MD
2183 int x, i, pg;
2184 int shift;
984263bc
MD
2185 u_char mpbiosreason;
2186 u_long mpbioswarmvec;
8a8d5d85 2187 struct mdglobaldata *gd;
0f7a3396 2188 struct privatespace *ps;
984263bc
MD
2189 char *stack;
2190 uintptr_t kptbase;
2191
2192 POSTCODE(START_ALL_APS_POST);
2193
b52c8db0
SZ
2194 /* Initialize BSP's local APIC */
2195 apic_initialize(TRUE);
984263bc
MD
2196
2197 /* install the AP 1st level boot code */
2198 install_ap_tramp(boot_addr);
2199
2200
2201 /* save the current value of the warm-start vector */
2202 mpbioswarmvec = *((u_long *) WARMBOOT_OFF);
984263bc
MD
2203 outb(CMOS_REG, BIOS_RESET);
2204 mpbiosreason = inb(CMOS_DATA);
984263bc 2205
984263bc
MD
2206 /* set up temporary P==V mapping for AP boot */
2207 /* XXX this is a hack, we should boot the AP on its own stack/PTD */
2208 kptbase = (uintptr_t)(void *)KPTphys;
a44bdeec 2209 for (x = 0; x < NKPT; x++) {
984263bc
MD
2210 PTD[x] = (pd_entry_t)(PG_V | PG_RW |
2211 ((kptbase + x * PAGE_SIZE) & PG_FRAME));
a44bdeec 2212 }
0f7a3396 2213 cpu_invltlb();
984263bc
MD
2214
2215 /* start each AP */
2216 for (x = 1; x <= mp_naps; ++x) {
2217
2218 /* This is a bit verbose, it will go away soon. */
2219
2220 /* first page of AP's private space */
2221 pg = x * i386_btop(sizeof(struct privatespace));
2222
81c04d07 2223 /* allocate new private data page(s) */
e4846942 2224 gd = (struct mdglobaldata *)kmem_alloc(&kernel_map,
81c04d07 2225 MDGLOBALDATA_BASEALLOC_SIZE);
984263bc 2226 /* wire it into the private page table page */
81c04d07
MD
2227 for (i = 0; i < MDGLOBALDATA_BASEALLOC_SIZE; i += PAGE_SIZE) {
2228 SMPpt[pg + i / PAGE_SIZE] = (pt_entry_t)
2229 (PG_V | PG_RW | vtophys_pte((char *)gd + i));
2230 }
2231 pg += MDGLOBALDATA_BASEALLOC_PAGES;
2232
2233 SMPpt[pg + 0] = 0; /* *gd_CMAP1 */
2234 SMPpt[pg + 1] = 0; /* *gd_CMAP2 */
2235 SMPpt[pg + 2] = 0; /* *gd_CMAP3 */
2236 SMPpt[pg + 3] = 0; /* *gd_PMAP1 */
984263bc
MD
2237
2238 /* allocate and set up an idle stack data page */
e4846942 2239 stack = (char *)kmem_alloc(&kernel_map, UPAGES*PAGE_SIZE);
8a8d5d85 2240 for (i = 0; i < UPAGES; i++) {
81c04d07 2241 SMPpt[pg + 4 + i] = (pt_entry_t)
b5b32410 2242 (PG_V | PG_RW | vtophys_pte(PAGE_SIZE * i + stack));
8a8d5d85 2243 }
984263bc 2244
8a8d5d85
MD
2245 gd = &CPU_prvspace[x].mdglobaldata; /* official location */
2246 bzero(gd, sizeof(*gd));
0f7a3396 2247 gd->mi.gd_prvspace = ps = &CPU_prvspace[x];
8a8d5d85 2248
984263bc 2249 /* prime data page for it to use */
8a8d5d85 2250 mi_gdinit(&gd->mi, x);
8ad65e08 2251 cpu_gdinit(gd, x);
81c04d07
MD
2252 gd->gd_CMAP1 = &SMPpt[pg + 0];
2253 gd->gd_CMAP2 = &SMPpt[pg + 1];
2254 gd->gd_CMAP3 = &SMPpt[pg + 2];
2255 gd->gd_PMAP1 = &SMPpt[pg + 3];
0f7a3396
MD
2256 gd->gd_CADDR1 = ps->CPAGE1;
2257 gd->gd_CADDR2 = ps->CPAGE2;
2258 gd->gd_CADDR3 = ps->CPAGE3;
2259 gd->gd_PADDR1 = (unsigned *)ps->PPAGE1;
e4846942 2260 gd->mi.gd_ipiq = (void *)kmem_alloc(&kernel_map, sizeof(lwkt_ipiq) * (mp_naps + 1));
96728c05 2261 bzero(gd->mi.gd_ipiq, sizeof(lwkt_ipiq) * (mp_naps + 1));
984263bc
MD
2262
2263 /* setup a vector to our boot code */
2264 *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
2265 *((volatile u_short *) WARMBOOT_SEG) = (boot_addr >> 4);
984263bc
MD
2266 outb(CMOS_REG, BIOS_RESET);
2267 outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */
984263bc 2268
8a8d5d85
MD
2269 /*
2270 * Setup the AP boot stack
2271 */
0f7a3396 2272 bootSTK = &ps->idlestack[UPAGES*PAGE_SIZE/2];
984263bc
MD
2273 bootAP = x;
2274
2275 /* attempt to start the Application Processor */
2276 CHECK_INIT(99); /* setup checkpoints */
0f7a3396 2277 if (!start_ap(gd, boot_addr)) {
26be20a0 2278 kprintf("AP #%d (PHY# %d) failed!\n", x, CPU_TO_ID(x));
984263bc
MD
2279 CHECK_PRINT("trace"); /* show checkpoints */
2280 /* better panic as the AP may be running loose */
26be20a0 2281 kprintf("panic y/n? [y] ");
984263bc
MD
2282 if (cngetc() != 'n')
2283 panic("bye-bye");
2284 }
2285 CHECK_PRINT("trace"); /* show checkpoints */
2286
2287 /* record its version info */
2288 cpu_apic_versions[x] = cpu_apic_versions[0];
984263bc
MD
2289 }
2290
0f7a3396
MD
2291 /* set ncpus to 1 + highest logical cpu. Not all may have come up */
2292 ncpus = x;
2293
b45759e1
MD
2294 /* ncpus2 -- ncpus rounded down to the nearest power of 2 */
2295 for (shift = 0; (1 << shift) <= ncpus; ++shift)
2296 ;
2297 --shift;
2298 ncpus2_shift = shift;
2299 ncpus2 = 1 << shift;
90100055
JH
2300 ncpus2_mask = ncpus2 - 1;
2301
b45759e1
MD
2302 /* ncpus_fit -- ncpus rounded up to the nearest power of 2 */
2303 if ((1 << shift) < ncpus)
2304 ++shift;
2305 ncpus_fit = 1 << shift;
2306 ncpus_fit_mask = ncpus_fit - 1;
2307
984263bc 2308 /* build our map of 'other' CPUs */
0f7a3396 2309 mycpu->gd_other_cpus = smp_startup_mask & ~(1 << mycpu->gd_cpuid);
e4846942 2310 mycpu->gd_ipiq = (void *)kmem_alloc(&kernel_map, sizeof(lwkt_ipiq) * ncpus);
96728c05 2311 bzero(mycpu->gd_ipiq, sizeof(lwkt_ipiq) * ncpus);
984263bc
MD
2312
2313 /* fill in our (BSP) APIC version */
2314 cpu_apic_versions[0] = lapic.version;
2315
2316 /* restore the warmstart vector */
2317 *(u_long *) WARMBOOT_OFF = mpbioswarmvec;
984263bc
MD
2318 outb(CMOS_REG, BIOS_RESET);
2319 outb(CMOS_DATA, mpbiosreason);
984263bc
MD
2320
2321 /*
8a8d5d85
MD
2322 * NOTE! The idlestack for the BSP was setup by locore. Finish
2323 * up, clean out the P==V mapping we did earlier.
984263bc 2324 */
984263bc
MD
2325 for (x = 0; x < NKPT; x++)
2326 PTD[x] = 0;
2327 pmap_set_opt();
2328
2329 /* number of APs actually started */
8a8d5d85 2330 return ncpus - 1;
984263bc
MD
2331}
2332
2333
2334/*
2335 * load the 1st level AP boot code into base memory.
2336 */
2337
2338/* targets for relocation */
2339extern void bigJump(void);
2340extern void bootCodeSeg(void);
2341extern void bootDataSeg(void);
2342extern void MPentry(void);
2343extern u_int MP_GDT;
2344extern u_int mp_gdtbase;
2345
2346static void
2347install_ap_tramp(u_int boot_addr)
2348{
2349 int x;
2350 int size = *(int *) ((u_long) & bootMP_size);
2351 u_char *src = (u_char *) ((u_long) bootMP);
2352 u_char *dst = (u_char *) boot_addr + KERNBASE;
2353 u_int boot_base = (u_int) bootMP;
2354 u_int8_t *dst8;
2355 u_int16_t *dst16;
2356 u_int32_t *dst32;
2357
2358 POSTCODE(INSTALL_AP_TRAMP_POST);
2359
2360 for (x = 0; x < size; ++x)
2361 *dst++ = *src++;
2362
2363 /*
2364 * modify addresses in code we just moved to basemem. unfortunately we
2365 * need fairly detailed info about mpboot.s for this to work. changes
2366 * to mpboot.s might require changes here.
2367 */
2368
2369 /* boot code is located in KERNEL space */
2370 dst = (u_char *) boot_addr + KERNBASE;
2371
2372 /* modify the lgdt arg */
2373 dst32 = (u_int32_t *) (dst + ((u_int) & mp_gdtbase - boot_base));
2374 *dst32 = boot_addr + ((u_int) & MP_GDT - boot_base);
2375
2376 /* modify the ljmp target for MPentry() */
2377 dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1);
2378 *dst32 = ((u_int) MPentry - KERNBASE);
2379
2380 /* modify the target for boot code segment */
2381 dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base));
2382 dst8 = (u_int8_t *) (dst16 + 1);
2383 *dst16 = (u_int) boot_addr & 0xffff;
2384 *dst8 = ((u_int) boot_addr >> 16) & 0xff;
2385
2386 /* modify the target for boot data segment */
2387 dst16 = (u_int16_t *) (dst + ((u_int) bootDataSeg - boot_base));
2388 dst8 = (u_int8_t *) (dst16 + 1);
2389 *dst16 = (u_int) boot_addr & 0xffff;
2390 *dst8 = ((u_int) boot_addr >> 16) & 0xff;
2391}
2392
2393
2394/*
2395 * this function starts the AP (application processor) identified
2396 * by the APIC ID 'physicalCpu'. It does quite a "song and dance"
2397 * to accomplish this. This is necessary because of the nuances
2398 * of the different hardware we might encounter. It ain't pretty,
2399 * but it seems to work.
a108bf71
MD
2400 *
2401 * NOTE: eventually an AP gets to ap_init(), which is called just
2402 * before the AP goes into the LWKT scheduler's idle loop.
984263bc
MD
2403 */
2404static int
0f7a3396 2405start_ap(struct mdglobaldata *gd, u_int boot_addr)
984263bc
MD
2406{
2407 int physical_cpu;
2408 int vector;
984263bc
MD
2409 u_long icr_lo, icr_hi;
2410
2411 POSTCODE(START_AP_POST);
2412
2413 /* get the PHYSICAL APIC ID# */
0f7a3396 2414 physical_cpu = CPU_TO_ID(gd->mi.gd_cpuid);
984263bc
MD
2415
2416 /* calculate the vector */
2417 vector = (boot_addr >> 12) & 0xff;
2418
8a8d5d85
MD
2419 /* Make sure the target cpu sees everything */
2420 wbinvd();
984263bc
MD
2421
2422 /*
2423 * first we do an INIT/RESET IPI this INIT IPI might be run, reseting
2424 * and running the target CPU. OR this INIT IPI might be latched (P5
2425 * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
2426 * ignored.
2427 */
2428
2429 /* setup the address for the target AP */
2430 icr_hi = lapic.icr_hi & ~APIC_ID_MASK;
2431 icr_hi |= (physical_cpu << 24);
2432 lapic.icr_hi = icr_hi;
2433
2434 /* do an INIT IPI: assert RESET */
2435 icr_lo = lapic.icr_lo & 0xfff00000;
2436 lapic.icr_lo = icr_lo | 0x0000c500;
2437
2438 /* wait for pending status end */
2439 while (lapic.icr_lo & APIC_DELSTAT_MASK)
2440 /* spin */ ;
2441
2442 /* do an INIT IPI: deassert RESET */
2443 lapic.icr_lo = icr_lo | 0x00008500;
2444
2445 /* wait for pending status end */
2446 u_sleep(10000); /* wait ~10mS */
2447 while (lapic.icr_lo & APIC_DELSTAT_MASK)
2448 /* spin */ ;
2449
2450 /*
2451 * next we do a STARTUP IPI: the previous INIT IPI might still be
2452 * latched, (P5 bug) this 1st STARTUP would then terminate
2453 * immediately, and the previously started INIT IPI would continue. OR
2454 * the previous INIT IPI has already run. and this STARTUP IPI will
2455 * run. OR the previous INIT IPI was ignored. and this STARTUP IPI
2456 * will run.
2457 */
2458
2459 /* do a STARTUP IPI */
2460 lapic.icr_lo = icr_lo | 0x00000600 | vector;
2461 while (lapic.icr_lo & APIC_DELSTAT_MASK)
2462 /* spin */ ;
2463 u_sleep(200); /* wait ~200uS */
2464
2465 /*
2466 * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
2467 * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
2468 * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
2469 * recognized after hardware RESET or INIT IPI.
2470 */
2471
2472 lapic.icr_lo = icr_lo | 0x00000600 | vector;
2473 while (lapic.icr_lo & APIC_DELSTAT_MASK)
2474 /* spin */ ;
2475 u_sleep(200); /* wait ~200uS */
2476
8a8d5d85 2477 /* wait for it to start, see ap_init() */
984263bc 2478 set_apic_timer(5000000);/* == 5 seconds */
8a8d5d85 2479 while (read_apic_timer()) {
0f7a3396 2480 if (smp_startup_mask & (1 << gd->mi.gd_cpuid))
984263bc 2481 return 1; /* return SUCCESS */
8a8d5d85 2482 }
984263bc
MD
2483 return 0; /* return FAILURE */
2484}
2485
2486
2487/*
0f7a3396 2488 * Lazy flush the TLB on all other CPU's. DEPRECATED.
984263bc 2489 *
0f7a3396
MD
2490 * If for some reason we were unable to start all cpus we cannot safely
2491 * use broadcast IPIs.
984263bc
MD
2492 */
2493void
2494smp_invltlb(void)
2495{
97359a5b 2496#ifdef SMP
0f7a3396 2497 if (smp_startup_mask == smp_active_mask) {
984263bc 2498 all_but_self_ipi(XINVLTLB_OFFSET);
0f7a3396
MD
2499 } else {
2500 selected_apic_ipi(smp_active_mask, XINVLTLB_OFFSET,
2501 APIC_DELMODE_FIXED);
2502 }
97359a5b 2503#endif
984263bc
MD
2504}
2505
984263bc
MD
2506/*
2507 * When called the executing CPU will send an IPI to all other CPUs
2508 * requesting that they halt execution.
2509 *
2510 * Usually (but not necessarily) called with 'other_cpus' as its arg.
2511 *
2512 * - Signals all CPUs in map to stop.
2513 * - Waits for each to stop.
2514 *
2515 * Returns:
2516 * -1: error
2517 * 0: NA
2518 * 1: ok
2519 *
2520 * XXX FIXME: this is not MP-safe, needs a lock to prevent multiple CPUs
2521 * from executing at same time.
2522 */
2523int
2524stop_cpus(u_int map)
2525{
0f7a3396 2526 map &= smp_active_mask;
984263bc
MD
2527
2528 /* send the Xcpustop IPI to all CPUs in map */
2529 selected_apic_ipi(map, XCPUSTOP_OFFSET, APIC_DELMODE_FIXED);
2530
2531 while ((stopped_cpus & map) != map)
2532 /* spin */ ;
2533
2534 return 1;
2535}
2536
2537
2538/*
2539 * Called by a CPU to restart stopped CPUs.
2540 *
2541 * Usually (but not necessarily) called with 'stopped_cpus' as its arg.
2542 *
2543 * - Signals all CPUs in map to restart.
2544 * - Waits for each to restart.
2545 *
2546 * Returns:
2547 * -1: error
2548 * 0: NA
2549 * 1: ok
2550 */
2551int
2552restart_cpus(u_int map)
2553{
0f7a3396
MD
2554 /* signal other cpus to restart */
2555 started_cpus = map & smp_active_mask;
984263bc
MD
2556
2557 while ((stopped_cpus & map) != 0) /* wait for each to clear its bit */
2558 /* spin */ ;
2559
2560 return 1;
2561}
2562
984263bc 2563/*
8a8d5d85
MD
2564 * This is called once the mpboot code has gotten us properly relocated
2565 * and the MMU turned on, etc. ap_init() is actually the idle thread,
2566 * and when it returns the scheduler will call the real cpu_idle() main
2567 * loop for the idlethread. Interrupts are disabled on entry and should
2568 * remain disabled at return.
984263bc 2569 */
984263bc 2570void
8a8d5d85 2571ap_init(void)
984263bc
MD
2572{
2573 u_int apic_id;
2574
8a8d5d85 2575 /*
0f7a3396
MD
2576 * Adjust smp_startup_mask to signal the BSP that we have started
2577 * up successfully. Note that we do not yet hold the BGL. The BSP
2578 * is waiting for our signal.
2579 *
2580 * We can't set our bit in smp_active_mask yet because we are holding
2581 * interrupts physically disabled and remote cpus could deadlock
2582 * trying to send us an IPI.
8a8d5d85 2583 */
0f7a3396 2584 smp_startup_mask |= 1 << mycpu->gd_cpuid;
35238fa5 2585 cpu_mfence();
8a8d5d85
MD
2586
2587 /*
41a01a4d
MD
2588 * Interlock for finalization. Wait until mp_finish is non-zero,
2589 * then get the MP lock.
2590 *
2591 * Note: We are in a critical section.
2592 *
2593 * Note: We have to synchronize td_mpcount to our desired MP state
2594 * before calling cpu_try_mplock().
2595 *
2596 * Note: we are the idle thread, we can only spin.
2597 *
35238fa5
MD
2598 * Note: The load fence is memory volatile and prevents the compiler
2599 * from improperly caching mp_finish, and the cpu from improperly
2600 * caching it.
8a8d5d85 2601 */
41a01a4d 2602 while (mp_finish == 0)
35238fa5 2603 cpu_lfence();
d9ebdce5 2604 ++curthread->td_mpcount;
8a8d5d85
MD
2605 while (cpu_try_mplock() == 0)
2606 ;
2607
374133e3
MD
2608 if (cpu_feature & CPUID_TSC) {
2609 /*
2610 * The BSP is constantly updating tsc0_offset, figure out the
2611 * relative difference to synchronize ktrdump.
2612 */
2613 tsc_offsets[mycpu->gd_cpuid] = rdtsc() - tsc0_offset;
2614 }
2615
984263bc
MD
2616 /* BSP may have changed PTD while we're waiting for the lock */
2617 cpu_invltlb();
2618
984263bc
MD
2619#if defined(I586_CPU) && !defined(NO_F00F_HACK)
2620 lidt(&r_idt);
2621#endif
2622
2623 /* Build our map of 'other' CPUs. */
0f7a3396 2624 mycpu->gd_other_cpus = smp_startup_mask & ~(1 << mycpu->gd_cpuid);
984263bc 2625
26be20a0 2626 kprintf("SMP: AP CPU #%d Launched!\n", mycpu->gd_cpuid);
984263bc 2627
984263bc
MD
2628 /* A quick check from sanity claus */
2629 apic_id = (apic_id_to_logical[(lapic.id & 0x0f000000) >> 24]);
8a8d5d85 2630 if (mycpu->gd_cpuid != apic_id) {
26be20a0
SW
2631 kprintf("SMP: cpuid = %d\n", mycpu->gd_cpuid);
2632 kprintf("SMP: apic_id = %d\n", apic_id);
2633 kprintf("PTD[MPPTDI] = %p\n", (void *)PTD[MPPTDI]);
984263bc
MD
2634 panic("cpuid mismatch! boom!!");
2635 }
2636
b52c8db0
SZ
2637 /* Initialize AP's local APIC for irq's */
2638 apic_initialize(FALSE);
984263bc
MD
2639
2640 /* Set memory range attributes for this CPU to match the BSP */
2641 mem_range_AP_init();
2642
8a8d5d85 2643 /*
4c9f5a7f
MD
2644 * Once we go active we must process any IPIQ messages that may
2645 * have been queued, because no actual IPI will occur until we
2646 * set our bit in the smp_active_mask. If we don't the IPI
2647 * message interlock could be left set which would also prevent
2648 * further IPIs.
2649 *
8a8d5d85
MD
2650 * The idle loop doesn't expect the BGL to be held and while
2651 * lwkt_switch() normally cleans things up this is a special case
2652 * because we returning almost directly into the idle loop.
41a01a4d
MD
2653 *
2654 * The idle thread is never placed on the runq, make sure
4c9f5a7f 2655 * nothing we've done put it there.
8a8d5d85 2656 */
96728c05 2657 KKASSERT(curthread->td_mpcount == 1);
41a01a4d 2658 smp_active_mask |= 1 << mycpu->gd_cpuid;
d19f6edf
MD
2659
2660 /*
2661 * Enable interrupts here. idle_restore will also do it, but
2662 * doing it here lets us clean up any strays that got posted to
2663 * the CPU during the AP boot while we are still in a critical
2664 * section.
2665 */
2666 __asm __volatile("sti; pause; pause"::);
2667 mdcpu->gd_fpending = 0;
2668 mdcpu->gd_ipending = 0;
2669
4a19580d 2670 initclocks_pcpu(); /* clock interrupts (via IPIs) */
4c9f5a7f 2671 lwkt_process_ipiq();
d19f6edf
MD
2672
2673 /*
2674 * Releasing the mp lock lets the BSP finish up the SMP init
2675 */
96728c05 2676 rel_mplock();
41a01a4d 2677 KKASSERT((curthread->td_flags & TDF_RUNQ) == 0);
984263bc
MD
2678}
2679
41a01a4d
MD
2680/*
2681 * Get SMP fully working before we start initializing devices.
2682 */
2683static
2684void
2685ap_finish(void)
2686{
2687 mp_finish = 1;
2688 if (bootverbose)
26be20a0 2689 kprintf("Finish MP startup\n");
374133e3
MD
2690 if (cpu_feature & CPUID_TSC)
2691 tsc0_offset = rdtsc();
2692 tsc_offsets[0] = 0;
41a01a4d 2693 rel_mplock();
374133e3 2694 while (smp_active_mask != smp_startup_mask) {
35238fa5 2695 cpu_lfence();
374133e3
MD
2696 if (cpu_feature & CPUID_TSC)
2697 tsc0_offset = rdtsc();
2698 }
4da43e1f 2699 while (try_mplock() == 0)
41a01a4d
MD
2700 ;
2701 if (bootverbose)
26be20a0 2702 kprintf("Active CPU Mask: %08x\n", smp_active_mask);
41a01a4d
MD
2703}
2704
ba39e2e0 2705SYSINIT(finishsmp, SI_BOOT2_FINISH_SMP, SI_ORDER_FIRST, ap_finish, NULL)
41a01a4d 2706
96728c05
MD
2707void
2708cpu_send_ipiq(int dcpu)
2709{
41a01a4d
MD
2710 if ((1 << dcpu) & smp_active_mask)
2711 single_apic_ipi(dcpu, XIPIQ_OFFSET, APIC_DELMODE_FIXED);
96728c05 2712}
41a01a4d
MD
2713
2714#if 0 /* single_apic_ipi_passive() not working yet */
2715/*
2716 * Returns 0 on failure, 1 on success
2717 */
2718int
2719cpu_send_ipiq_passive(int dcpu)
2720{
2721 int r = 0;
2722 if ((1 << dcpu) & smp_active_mask) {
2723 r = single_apic_ipi_passive(dcpu, XIPIQ_OFFSET,
2724 APIC_DELMODE_FIXED);
2725 }
2726 return(r);
2727}
2728#endif
2729
a0873f07
SZ
2730struct mptable_lapic_cbarg1 {
2731 int cpu_count;
44c36320
SZ
2732 int ht_fixup;
2733 u_int ht_apicid_mask;
a0873f07
SZ
2734};
2735
2736static int
2737mptable_lapic_pass1_callback(void *xarg, const void *pos, int type)
2738{
2739 const struct PROCENTRY *ent;
2740 struct mptable_lapic_cbarg1 *arg = xarg;
2741
2742 if (type != 0)
2743 return 0;
2744 ent = pos;
2745
2746 if ((ent->cpu_flags & PROCENTRY_FLAG_EN) == 0)
2747 return 0;
2748
2749 arg->cpu_count++;
44c36320
SZ
2750 if (ent->apic_id < 32) {
2751 arg->ht_apicid_mask |= 1 << ent->apic_id;
2752 } else if (arg->ht_fixup) {
2753 kprintf("MPTABLE: lapic id > 32, disable HTT fixup\n");
2754 arg->ht_fixup = 0;
2755 }
a0873f07
SZ
2756 return 0;
2757}
2758
2759struct mptable_lapic_cbarg2 {
2760 int cpu;
44c36320 2761 int logical_cpus;
a0873f07
SZ
2762 int found_bsp;
2763};
2764
2765static int
2766mptable_lapic_pass2_callback(void *xarg, const void *pos, int type)
2767{
2768 const struct PROCENTRY *ent;
2769 struct mptable_lapic_cbarg2 *arg = xarg;
2770
2771 if (type != 0)
2772 return 0;
2773 ent = pos;
2774
2775 if (ent->cpu_flags & PROCENTRY_FLAG_BP) {
2776 KKASSERT(!arg->found_bsp);
2777 arg->found_bsp = 1;
2778 }
2779
2780 if (processor_entry(ent, arg->cpu))
2781 arg->cpu++;
2782
44c36320 2783 if (arg->logical_cpus) {
a0873f07
SZ
2784 struct PROCENTRY proc;
2785 int i;
2786
2787 /*
2788 * Create fake mptable processor entries
2789 * and feed them to processor_entry() to
2790 * enumerate the logical CPUs.
2791 */
2792 bzero(&proc, sizeof(proc));
2793 proc.type = 0;
2794 proc.cpu_flags = PROCENTRY_FLAG_EN;
2795 proc.apic_id = ent->apic_id;
2796
44c36320 2797 for (i = 1; i < arg->logical_cpus; i++) {
a0873f07
SZ
2798 proc.apic_id++;
2799 processor_entry(&proc, arg->cpu);
a0873f07
SZ
2800 arg->cpu++;
2801 }
2802 }
2803 return 0;
2804}
2805
281d9482
SZ
2806static void
2807mptable_imcr(struct mptable_pos *mpt)
2808{
2809 /* record whether PIC or virtual-wire mode */
2810 machintr_setvar_simple(MACHINTR_VAR_IMCR_PRESENT,
2811 mpt->mp_fps->mpfb2 & 0x80);
2812}
2813
2814struct mptable_lapic_enumerator {
2815 struct lapic_enumerator enumerator;
2816 vm_paddr_t mpfps_paddr;
2817};
2818
322abba7
SZ
2819static void
2820mptable_lapic_default(void)
2821{
2822 int ap_apicid, bsp_apicid;
2823
2824 mp_naps = 1; /* exclude BSP */
2825
2826 /* Map local apic before the id field is accessed */
2827 lapic_init(DEFAULT_APIC_BASE);
2828
2829 bsp_apicid = APIC_ID(lapic.id);
2830 ap_apicid = (bsp_apicid == 0) ? 1 : 0;
2831
2832 /* BSP */
2833 mp_set_cpuids(0, bsp_apicid);
2834 /* one and only AP */
2835 mp_set_cpuids(1, ap_apicid);
2836}
2837
a0873f07
SZ
2838/*
2839 * Configure:
a0873f07 2840 * mp_naps
d787e80c 2841 * ID_TO_CPU(N), APIC ID to logical CPU table
a0873f07
SZ
2842 * CPU_TO_ID(N), logical CPU to APIC ID table
2843 */
2844static void
281d9482 2845mptable_lapic_enumerate(struct lapic_enumerator *e)
a0873f07 2846{
281d9482 2847 struct mptable_pos mpt;
322abba7
SZ
2848 struct mptable_lapic_cbarg1 arg1;
2849 struct mptable_lapic_cbarg2 arg2;
2850 mpcth_t cth;
44c36320 2851 int error, logical_cpus = 0;
5a16ccc3 2852 vm_offset_t lapic_addr;
281d9482
SZ
2853 vm_paddr_t mpfps_paddr;
2854
2855 mpfps_paddr = ((struct mptable_lapic_enumerator *)e)->mpfps_paddr;
2856 KKASSERT(mpfps_paddr != 0);
a0873f07 2857
281d9482
SZ
2858 error = mptable_map(&mpt, mpfps_paddr);
2859 if (error)
2860 panic("mptable_lapic_enumerate mptable_map failed\n");
2861
2862 KKASSERT(mpt.mp_fps != NULL);
a0873f07 2863
322abba7
SZ
2864 /*
2865 * Check for use of 'default' configuration
2866 */
281d9482 2867 if (mpt.mp_fps->mpfb1 != 0) {
322abba7 2868 mptable_lapic_default();
281d9482 2869 mptable_unmap(&mpt);
322abba7
SZ
2870 return;
2871 }
a0873f07 2872
281d9482 2873 cth = mpt.mp_cth;
322abba7 2874 KKASSERT(cth != NULL);
a0873f07 2875
322abba7
SZ
2876 /* Save local apic address */
2877 lapic_addr = (vm_offset_t)cth->apic_address;
2878 KKASSERT(lapic_addr != 0);
a0873f07 2879
322abba7
SZ
2880 /*
2881 * Find out how many CPUs do we have
2882 */
2883 bzero(&arg1, sizeof(arg1));
44c36320
SZ
2884 arg1.ht_fixup = 1; /* Apply ht fixup by default */
2885
322abba7
SZ
2886 error = mptable_iterate_entries(cth,
2887 mptable_lapic_pass1_callback, &arg1);
2888 if (error)
2889 panic("mptable_iterate_entries(lapic_pass1) failed\n");
322abba7 2890 KKASSERT(arg1.cpu_count != 0);
a0873f07 2891
322abba7 2892 /* See if we need to fixup HT logical CPUs. */
44c36320
SZ
2893 if (arg1.ht_fixup) {
2894 logical_cpus = mptable_hyperthread_fixup(arg1.ht_apicid_mask,
2895 arg1.cpu_count);
2896 if (logical_cpus != 0)
2897 arg1.cpu_count *= logical_cpus;
2898 }
2899 mp_naps = arg1.cpu_count;
a0873f07 2900
44c36320 2901 /* Qualify the numbers again, after possible HT fixup */
322abba7
SZ
2902 if (mp_naps > MAXCPU) {
2903 kprintf("Warning: only using %d of %d available CPUs!\n",
2904 MAXCPU, mp_naps);
2905 mp_naps = MAXCPU;
a0873f07
SZ
2906 }
2907
322abba7 2908 --mp_naps; /* subtract the BSP */
a0873f07 2909
322abba7
SZ
2910 /*
2911 * Link logical CPU id to local apic id
2912 */
2913 bzero(&arg2, sizeof(arg2));
2914 arg2.cpu = 1;
44c36320 2915 arg2.logical_cpus = logical_cpus;
a0873f07 2916
322abba7
SZ
2917 error = mptable_iterate_entries(cth,
2918 mptable_lapic_pass2_callback, &arg2);
2919 if (error)
2920 panic("mptable_iterate_entries(lapic_pass2) failed\n");
2921 KKASSERT(arg2.found_bsp);
a0873f07 2922
322abba7
SZ
2923 /* Map local apic */
2924 lapic_init(lapic_addr);
281d9482
SZ
2925
2926 mptable_unmap(&mpt);
2927}
2928
2929static int
2930mptable_lapic_probe(struct lapic_enumerator *e)
2931{
2932 vm_paddr_t mpfps_paddr;
2933
2934 mpfps_paddr = mptable_probe();
2935 if (mpfps_paddr == 0)
2936 return ENXIO;
2937
2938 ((struct mptable_lapic_enumerator *)e)->mpfps_paddr = mpfps_paddr;
2939 return 0;
a0873f07 2940}
5a16ccc3 2941
281d9482
SZ
2942static struct mptable_lapic_enumerator mptable_lapic_enumerator = {
2943 .enumerator = {
2944 .lapic_prio = LAPIC_ENUM_PRIO_MPTABLE,
2945 .lapic_probe = mptable_lapic_probe,
2946 .lapic_enumerate = mptable_lapic_enumerate
2947 }
2948};
2949
a0eaef71 2950static void
281d9482 2951mptable_apic_register(void)
a0eaef71 2952{
281d9482 2953 lapic_enumerator_register(&mptable_lapic_enumerator.enumerator);
a0eaef71 2954}
281d9482 2955SYSINIT(madt, SI_BOOT2_PRESMP, SI_ORDER_ANY, mptable_apic_register, 0);