x86_64: Remove old IOAPIC code
[dragonfly.git] / sys / platform / pc64 / x86_64 / mp_machdep.c
CommitLineData
46d4e165
JG
1/*
2 * Copyright (c) 1996, by Steve Passe
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. The name of the developer may NOT be used to endorse or promote products
11 * derived from this software without specific prior written permission.
12 *
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 *
25 * $FreeBSD: src/sys/i386/i386/mp_machdep.c,v 1.115.2.15 2003/03/14 21:22:35 jhb Exp $
46d4e165
JG
26 */
27
28#include "opt_cpu.h"
29
30#include <sys/param.h>
31#include <sys/systm.h>
32#include <sys/kernel.h>
33#include <sys/sysctl.h>
34#include <sys/malloc.h>
35#include <sys/memrange.h>
36#include <sys/cons.h> /* cngetc() */
37#include <sys/machintr.h>
38
684a93c4
MD
39#include <sys/mplock2.h>
40
46d4e165
JG
41#include <vm/vm.h>
42#include <vm/vm_param.h>
43#include <vm/pmap.h>
44#include <vm/vm_kern.h>
45#include <vm/vm_extern.h>
46#include <sys/lock.h>
47#include <vm/vm_map.h>
48#include <sys/user.h>
49#ifdef GPROF
50#include <sys/gmon.h>
51#endif
52
53#include <machine/smp.h>
54#include <machine_base/apic/apicreg.h>
55#include <machine/atomic.h>
56#include <machine/cpufunc.h>
57#include <machine_base/apic/mpapic.h>
58#include <machine/psl.h>
59#include <machine/segments.h>
60#include <machine/tss.h>
61#include <machine/specialreg.h>
62#include <machine/globaldata.h>
4117f2fd 63#include <machine/pmap_inval.h>
46d4e165
JG
64
65#include <machine/md_var.h> /* setidt() */
57a9c56b 66#include <machine_base/icu/icu.h> /* IPIs */
e0918665 67#include <machine_base/apic/ioapic_abi.h>
57a9c56b 68#include <machine/intr_machdep.h> /* IPIs */
46d4e165
JG
69
70#define FIXUP_EXTRA_APIC_INTS 8 /* additional entries we may create */
71
72#define WARMBOOT_TARGET 0
73#define WARMBOOT_OFF (KERNBASE + 0x0467)
74#define WARMBOOT_SEG (KERNBASE + 0x0469)
75
76#define BIOS_BASE (0xf0000)
f0df4ded 77#define BIOS_BASE2 (0xe0000)
46d4e165
JG
78#define BIOS_SIZE (0x10000)
79#define BIOS_COUNT (BIOS_SIZE/4)
80
81#define CMOS_REG (0x70)
82#define CMOS_DATA (0x71)
83#define BIOS_RESET (0x0f)
84#define BIOS_WARM (0x0a)
85
86#define PROCENTRY_FLAG_EN 0x01
87#define PROCENTRY_FLAG_BP 0x02
88#define IOAPICENTRY_FLAG_EN 0x01
89
90
91/* MP Floating Pointer Structure */
92typedef struct MPFPS {
93 char signature[4];
94 u_int32_t pap;
95 u_char length;
96 u_char spec_rev;
97 u_char checksum;
98 u_char mpfb1;
99 u_char mpfb2;
100 u_char mpfb3;
101 u_char mpfb4;
102 u_char mpfb5;
103} *mpfps_t;
104
105/* MP Configuration Table Header */
106typedef struct MPCTH {
107 char signature[4];
108 u_short base_table_length;
109 u_char spec_rev;
110 u_char checksum;
111 u_char oem_id[8];
112 u_char product_id[12];
a5f51ef3 113 u_int32_t oem_table_pointer;
46d4e165
JG
114 u_short oem_table_size;
115 u_short entry_count;
a5f51ef3 116 u_int32_t apic_address;
46d4e165
JG
117 u_short extended_table_length;
118 u_char extended_table_checksum;
119 u_char reserved;
120} *mpcth_t;
121
122
123typedef struct PROCENTRY {
124 u_char type;
125 u_char apic_id;
126 u_char apic_version;
127 u_char cpu_flags;
a5f51ef3
JG
128 u_int32_t cpu_signature;
129 u_int32_t feature_flags;
130 u_int32_t reserved1;
131 u_int32_t reserved2;
46d4e165
JG
132} *proc_entry_ptr;
133
134typedef struct BUSENTRY {
135 u_char type;
136 u_char bus_id;
137 char bus_type[6];
138} *bus_entry_ptr;
139
140typedef struct IOAPICENTRY {
141 u_char type;
142 u_char apic_id;
143 u_char apic_version;
144 u_char apic_flags;
a5f51ef3 145 u_int32_t apic_address;
46d4e165
JG
146} *io_apic_entry_ptr;
147
148typedef struct INTENTRY {
149 u_char type;
150 u_char int_type;
151 u_short int_flags;
152 u_char src_bus_id;
153 u_char src_bus_irq;
154 u_char dst_apic_id;
155 u_char dst_apic_int;
156} *int_entry_ptr;
157
158/* descriptions of MP basetable entries */
159typedef struct BASETABLE_ENTRY {
160 u_char type;
161 u_char length;
162 char name[16];
163} basetable_entry;
164
91f1c7a4
MN
165struct mptable_pos {
166 mpfps_t mp_fps;
167 mpcth_t mp_cth;
168 vm_size_t mp_cth_mapsz;
169};
170
c455a23f
SZ
171#define MPTABLE_POS_USE_DEFAULT(mpt) \
172 ((mpt)->mp_fps->mpfb1 != 0 || (mpt)->mp_cth == NULL)
173
e0fd357f
SZ
174struct mptable_bus {
175 int mb_id;
176 int mb_type; /* MPTABLE_BUS_ */
177 TAILQ_ENTRY(mptable_bus) mb_link;
178};
179
180#define MPTABLE_BUS_ISA 0
181#define MPTABLE_BUS_PCI 1
182
183struct mptable_bus_info {
184 TAILQ_HEAD(, mptable_bus) mbi_list;
185};
186
187struct mptable_pci_int {
188 int mpci_bus;
189 int mpci_dev;
190 int mpci_pin;
191
6b881b58 192 int mpci_ioapic_idx;
e0fd357f
SZ
193 int mpci_ioapic_pin;
194 TAILQ_ENTRY(mptable_pci_int) mpci_link;
195};
196
6b881b58
SZ
197struct mptable_ioapic {
198 int mio_idx;
199 int mio_apic_id;
200 uint32_t mio_addr;
0471bb0e
SZ
201 int mio_gsi_base;
202 int mio_npin;
6b881b58
SZ
203 TAILQ_ENTRY(mptable_ioapic) mio_link;
204};
205
8f54b133
MN
206typedef int (*mptable_iter_func)(void *, const void *, int);
207
46d4e165
JG
208/*
209 * this code MUST be enabled here and in mpboot.s.
210 * it follows the very early stages of AP boot by placing values in CMOS ram.
211 * it NORMALLY will never be needed and thus the primitive method for enabling.
212 *
213 */
214#if defined(CHECK_POINTS)
215#define CHECK_READ(A) (outb(CMOS_REG, (A)), inb(CMOS_DATA))
216#define CHECK_WRITE(A,D) (outb(CMOS_REG, (A)), outb(CMOS_DATA, (D)))
217
218#define CHECK_INIT(D); \
219 CHECK_WRITE(0x34, (D)); \
220 CHECK_WRITE(0x35, (D)); \
221 CHECK_WRITE(0x36, (D)); \
222 CHECK_WRITE(0x37, (D)); \
223 CHECK_WRITE(0x38, (D)); \
224 CHECK_WRITE(0x39, (D));
225
226#define CHECK_PRINT(S); \
227 kprintf("%s: %d, %d, %d, %d, %d, %d\n", \
228 (S), \
229 CHECK_READ(0x34), \
230 CHECK_READ(0x35), \
231 CHECK_READ(0x36), \
232 CHECK_READ(0x37), \
233 CHECK_READ(0x38), \
234 CHECK_READ(0x39));
235
236#else /* CHECK_POINTS */
237
238#define CHECK_INIT(D)
239#define CHECK_PRINT(S)
240
241#endif /* CHECK_POINTS */
242
243/*
244 * Values to send to the POST hardware.
245 */
246#define MP_BOOTADDRESS_POST 0x10
247#define MP_PROBE_POST 0x11
248#define MPTABLE_PASS1_POST 0x12
249
250#define MP_START_POST 0x13
251#define MP_ENABLE_POST 0x14
252#define MPTABLE_PASS2_POST 0x15
253
254#define START_ALL_APS_POST 0x16
255#define INSTALL_AP_TRAMP_POST 0x17
256#define START_AP_POST 0x18
257
258#define MP_ANNOUNCE_POST 0x19
259
46d4e165
JG
260/** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */
261int current_postcode;
262
263/** XXX FIXME: what system files declare these??? */
264extern struct region_descriptor r_gdt, r_idt;
265
46d4e165 266int mp_naps; /* # of Applications processors */
faaf4131 267#ifdef SMP /* APIC-IO */
c784234d 268static int mp_nbusses; /* # of busses */
46d4e165 269int mp_napics; /* # of IO APICs */
46d4e165
JG
270vm_offset_t io_apic_address[NAPICID]; /* NAPICID is more than enough */
271u_int32_t *io_apic_versions;
272#endif
273extern int nkpt;
274
8e5ea5f7 275u_int32_t cpu_apic_versions[NAPICID]; /* populated during mptable scan */
46d4e165
JG
276int64_t tsc0_offset;
277extern int64_t tsc_offsets[];
278
927c4c1f
MN
279extern u_long ebda_addr;
280
faaf4131 281#ifdef SMP /* APIC-IO */
46d4e165
JG
282struct apic_intmapinfo int_to_apicintpin[APIC_INTMAPSIZE];
283#endif
284
285/*
286 * APIC ID logical/physical mapping structures.
287 * We oversize these to simplify boot-time config.
288 */
289int cpu_num_to_apic_id[NAPICID];
faaf4131 290#ifdef SMP /* APIC-IO */
46d4e165
JG
291int io_num_to_apic_id[NAPICID];
292#endif
293int apic_id_to_logical[NAPICID];
294
295/* AP uses this during bootstrap. Do not staticize. */
296char *bootSTK;
297static int bootAP;
298
46d4e165
JG
299struct pcb stoppcbs[MAXCPU];
300
301extern inthand_t IDTVEC(fast_syscall), IDTVEC(fast_syscall32);
302
8f54b133
MN
303static basetable_entry basetable_entry_types[] =
304{
305 {0, 20, "Processor"},
306 {1, 8, "Bus"},
307 {2, 8, "I/O APIC"},
308 {3, 8, "I/O INT"},
309 {4, 8, "Local INT"}
310};
311
46d4e165
JG
312/*
313 * Local data and functions.
314 */
315
46d4e165
JG
316static u_int boot_address;
317static u_int base_memory;
318static int mp_finish;
c6b1591c 319static int mp_finish_lapic;
46d4e165 320
46d4e165
JG
321static void mp_enable(u_int boot_addr);
322
8f54b133
MN
323static int mptable_iterate_entries(const mpcth_t,
324 mptable_iter_func, void *);
2814810e 325static int mptable_search(void);
0eaa8172 326static long mptable_search_sig(u_int32_t target, int count);
da23a592 327static int mptable_hyperthread_fixup(cpumask_t, int);
fe423084 328static int mptable_map(struct mptable_pos *);
91f1c7a4 329static void mptable_unmap(struct mptable_pos *);
e0fd357f
SZ
330static void mptable_bus_info_alloc(const mpcth_t,
331 struct mptable_bus_info *);
332static void mptable_bus_info_free(struct mptable_bus_info *);
0eaa8172 333
91903a05
MN
334static int mptable_lapic_probe(struct lapic_enumerator *);
335static void mptable_lapic_enumerate(struct lapic_enumerator *);
336static void mptable_lapic_default(void);
0eaa8172 337
7da2706b
SZ
338static int mptable_ioapic_probe(struct ioapic_enumerator *);
339static void mptable_ioapic_enumerate(struct ioapic_enumerator *);
340
faaf4131 341#ifdef SMP /* APIC-IO */
46d4e165
JG
342static int apic_int_is_bus_type(int intr, int bus_type);
343#endif
344static int start_all_aps(u_int boot_addr);
bfc09ba0 345#if 0
46d4e165 346static void install_ap_tramp(u_int boot_addr);
bfc09ba0 347#endif
bb467734
MD
348static int start_ap(struct mdglobaldata *gd, u_int boot_addr, int smibest);
349static int smitest(void);
46d4e165
JG
350
351static cpumask_t smp_startup_mask = 1; /* which cpus have been started */
c6b1591c 352static cpumask_t smp_lapic_mask = 1; /* which cpus have lapic been inited */
46d4e165
JG
353cpumask_t smp_active_mask = 1; /* which cpus are ready for IPIs etc? */
354SYSCTL_INT(_machdep, OID_AUTO, smp_active, CTLFLAG_RD, &smp_active_mask, 0, "");
355static u_int bootMP_size;
356
9d758cc4
SZ
357int imcr_present;
358
fe423084 359static vm_paddr_t mptable_fps_phyaddr;
c455a23f 360static int mptable_use_default;
6b881b58 361static TAILQ_HEAD(mptable_pci_int_list, mptable_pci_int) mptable_pci_int_list =
e0fd357f 362 TAILQ_HEAD_INITIALIZER(mptable_pci_int_list);
6b881b58
SZ
363static TAILQ_HEAD(mptable_ioapic_list, mptable_ioapic) mptable_ioapic_list =
364 TAILQ_HEAD_INITIALIZER(mptable_ioapic_list);
fe423084 365
46d4e165
JG
366/*
367 * Calculate usable address in base memory for AP trampoline code.
368 */
369u_int
370mp_bootaddress(u_int basemem)
371{
372 POSTCODE(MP_BOOTADDRESS_POST);
373
46d4e165
JG
374 base_memory = basemem;
375
c855ebba
JG
376 bootMP_size = mptramp_end - mptramp_start;
377 boot_address = trunc_page(basemem * 1024); /* round down to 4k boundary */
378 if (((basemem * 1024) - boot_address) < bootMP_size)
379 boot_address -= PAGE_SIZE; /* not enough, lower by 4k */
46d4e165
JG
380 /* 3 levels of page table pages */
381 mptramp_pagetables = boot_address - (PAGE_SIZE * 3);
382
383 return mptramp_pagetables;
384}
385
386
fe423084 387static void
2814810e
MN
388mptable_probe(void)
389{
c455a23f
SZ
390 struct mptable_pos mpt;
391 int error;
392
fe423084 393 KKASSERT(mptable_fps_phyaddr == 0);
c455a23f 394
fe423084 395 mptable_fps_phyaddr = mptable_search();
c455a23f
SZ
396 if (mptable_fps_phyaddr == 0)
397 return;
398
399 error = mptable_map(&mpt);
400 if (error) {
401 mptable_fps_phyaddr = 0;
402 return;
403 }
404
405 if (MPTABLE_POS_USE_DEFAULT(&mpt)) {
406 kprintf("MPTABLE: use default configuration\n");
407 mptable_use_default = 1;
408 }
9d758cc4
SZ
409 if (mpt.mp_fps->mpfb2 & 0x80)
410 imcr_present = 1;
c455a23f
SZ
411
412 mptable_unmap(&mpt);
2814810e 413}
fe423084 414SYSINIT(mptable_probe, SI_BOOT2_PRESMP, SI_ORDER_FIRST, mptable_probe, 0);
2814810e 415
46d4e165
JG
416/*
417 * Look for an Intel MP spec table (ie, SMP capable hardware).
418 */
f804d15f 419static int
2814810e 420mptable_search(void)
46d4e165 421{
3a918cfd 422 long x;
46d4e165
JG
423 u_int32_t target;
424
46d4e165
JG
425 POSTCODE(MP_PROBE_POST);
426
427 /* see if EBDA exists */
927c4c1f 428 if (ebda_addr != 0) {
46d4e165 429 /* search first 1K of EBDA */
927c4c1f 430 target = (u_int32_t)ebda_addr;
0eaa8172 431 if ((x = mptable_search_sig(target, 1024 / 4)) > 0)
351254e7 432 return x;
46d4e165
JG
433 } else {
434 /* last 1K of base memory, effective 'top of base' passed in */
351254e7 435 target = (u_int32_t)(base_memory - 0x400);
0eaa8172 436 if ((x = mptable_search_sig(target, 1024 / 4)) > 0)
351254e7 437 return x;
46d4e165
JG
438 }
439
440 /* search the BIOS */
351254e7 441 target = (u_int32_t)BIOS_BASE;
0eaa8172 442 if ((x = mptable_search_sig(target, BIOS_COUNT)) > 0)
351254e7 443 return x;
46d4e165 444
f0df4ded
MN
445 /* search the extended BIOS */
446 target = (u_int32_t)BIOS_BASE2;
447 if ((x = mptable_search_sig(target, BIOS_COUNT)) > 0)
448 return x;
449
46d4e165 450 /* nothing found */
46d4e165 451 return 0;
46d4e165
JG
452}
453
8f54b133
MN
454static int
455mptable_iterate_entries(const mpcth_t cth, mptable_iter_func func, void *arg)
456{
457 int count, total_size;
458 const void *position;
459
460 KKASSERT(cth->base_table_length >= sizeof(struct MPCTH));
461 total_size = cth->base_table_length - sizeof(struct MPCTH);
462 position = (const uint8_t *)cth + sizeof(struct MPCTH);
463 count = cth->entry_count;
464
465 while (count--) {
466 int type, error;
467
468 KKASSERT(total_size >= 0);
469 if (total_size == 0) {
470 kprintf("invalid base MP table, "
471 "entry count and length mismatch\n");
472 return EINVAL;
473 }
474
475 type = *(const uint8_t *)position;
476 switch (type) {
477 case 0: /* processor_entry */
478 case 1: /* bus_entry */
479 case 2: /* io_apic_entry */
480 case 3: /* int_entry */
481 case 4: /* int_entry */
482 break;
483 default:
484 kprintf("unknown base MP table entry type %d\n", type);
485 return EINVAL;
486 }
487
488 if (total_size < basetable_entry_types[type].length) {
489 kprintf("invalid base MP table length, "
490 "does not contain all entries\n");
491 return EINVAL;
492 }
493 total_size -= basetable_entry_types[type].length;
494
495 error = func(arg, position, type);
496 if (error)
497 return error;
498
499 position = (const uint8_t *)position +
500 basetable_entry_types[type].length;
501 }
502 return 0;
503}
504
46d4e165
JG
505
506/*
507 * Startup the SMP processors.
508 */
509void
510mp_start(void)
511{
512 POSTCODE(MP_START_POST);
a0679cc7 513 mp_enable(boot_address);
46d4e165
JG
514}
515
516
517/*
518 * Print various information about the SMP system hardware and setup.
519 */
520void
521mp_announce(void)
522{
523 int x;
524
525 POSTCODE(MP_ANNOUNCE_POST);
526
527 kprintf("DragonFly/MP: Multiprocessor motherboard\n");
528 kprintf(" cpu0 (BSP): apic id: %2d", CPU_TO_ID(0));
a15824ef 529 kprintf(", version: 0x%08x\n", cpu_apic_versions[0]);
46d4e165
JG
530 for (x = 1; x <= mp_naps; ++x) {
531 kprintf(" cpu%d (AP): apic id: %2d", x, CPU_TO_ID(x));
a15824ef 532 kprintf(", version: 0x%08x\n", cpu_apic_versions[x]);
46d4e165
JG
533 }
534
7a603b36
SZ
535 if (!apic_io_enable)
536 kprintf(" Warning: APIC I/O disabled\n");
46d4e165
JG
537}
538
539/*
540 * AP cpu's call this to sync up protected mode.
541 *
ec073ddc 542 * WARNING! %gs is not set up on entry. This routine sets up %gs.
46d4e165
JG
543 */
544void
545init_secondary(void)
546{
547 int gsel_tss;
548 int x, myid = bootAP;
549 u_int64_t msr, cr0;
550 struct mdglobaldata *md;
551 struct privatespace *ps;
552
553 ps = &CPU_prvspace[myid];
554
555 gdt_segs[GPROC0_SEL].ssd_base =
556 (long) &ps->mdglobaldata.gd_common_tss;
557 ps->mdglobaldata.mi.gd_prvspace = ps;
558
559 /* We fill the 32-bit segment descriptors */
560 for (x = 0; x < NGDT; x++) {
561 if (x != GPROC0_SEL && x != (GPROC0_SEL + 1))
562 ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x]);
563 }
564 /* And now a 64-bit one */
565 ssdtosyssd(&gdt_segs[GPROC0_SEL],
566 (struct system_segment_descriptor *)&gdt[myid * NGDT + GPROC0_SEL]);
567
568 r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
569 r_gdt.rd_base = (long) &gdt[myid * NGDT];
570 lgdt(&r_gdt); /* does magic intra-segment return */
571
ec073ddc
JG
572 /* lgdt() destroys the GSBASE value, so we load GSBASE after lgdt() */
573 wrmsr(MSR_FSBASE, 0); /* User value */
574 wrmsr(MSR_GSBASE, (u_int64_t)ps);
575 wrmsr(MSR_KGSBASE, 0); /* XXX User value while we're in the kernel */
576
46d4e165
JG
577 lidt(&r_idt);
578
579#if 0
580 lldt(_default_ldt);
581 mdcpu->gd_currentldt = _default_ldt;
582#endif
583
584 gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
585 gdt[myid * NGDT + GPROC0_SEL].sd_type = SDT_SYSTSS;
586
587 md = mdcpu; /* loaded through %gs:0 (mdglobaldata.mi.gd_prvspace)*/
588
589 md->gd_common_tss.tss_rsp0 = 0; /* not used until after switch */
590#if 0 /* JG XXX */
591 md->gd_common_tss.tss_ioopt = (sizeof md->gd_common_tss) << 16;
592#endif
593 md->gd_tss_gdt = &gdt[myid * NGDT + GPROC0_SEL];
594 md->gd_common_tssd = *md->gd_tss_gdt;
093565f2
MD
595
596 /* double fault stack */
597 md->gd_common_tss.tss_ist1 =
598 (long)&md->mi.gd_prvspace->idlestack[
599 sizeof(md->mi.gd_prvspace->idlestack)];
600
46d4e165
JG
601 ltr(gsel_tss);
602
46d4e165
JG
603 /*
604 * Set to a known state:
605 * Set by mpboot.s: CR0_PG, CR0_PE
606 * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM
607 */
608 cr0 = rcr0();
609 cr0 &= ~(CR0_CD | CR0_NW | CR0_EM);
610 load_cr0(cr0);
611
612 /* Set up the fast syscall stuff */
613 msr = rdmsr(MSR_EFER) | EFER_SCE;
614 wrmsr(MSR_EFER, msr);
615 wrmsr(MSR_LSTAR, (u_int64_t)IDTVEC(fast_syscall));
616 wrmsr(MSR_CSTAR, (u_int64_t)IDTVEC(fast_syscall32));
617 msr = ((u_int64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) |
618 ((u_int64_t)GSEL(GUCODE32_SEL, SEL_UPL) << 48);
619 wrmsr(MSR_STAR, msr);
620 wrmsr(MSR_SF_MASK, PSL_NT|PSL_T|PSL_I|PSL_C|PSL_D);
621
622 pmap_set_opt(); /* PSE/4MB pages, etc */
623#if JGXXX
624 /* Initialize the PAT MSR. */
625 pmap_init_pat();
626#endif
627
628 /* set up CPU registers and state */
629 cpu_setregs();
630
631 /* set up SSE/NX registers */
632 initializecpu();
633
634 /* set up FPU state on the AP */
635 npxinit(__INITIAL_NPXCW__);
ec073ddc
JG
636
637 /* disable the APIC, just to be SURE */
638 lapic->svr &= ~APIC_SVR_ENABLE;
639
640 /* data returned to BSP */
641 cpu_apic_versions[0] = lapic->version;
46d4e165
JG
642}
643
644/*******************************************************************
645 * local functions and data
646 */
647
648/*
649 * start the SMP system
650 */
651static void
652mp_enable(u_int boot_addr)
653{
46d4e165
JG
654 POSTCODE(MP_ENABLE_POST);
655
91903a05 656 lapic_config();
8e4c6923 657
a40ec003
SZ
658 /* Initialize BSP's local APIC */
659 lapic_init(TRUE);
660
c6b1591c
SZ
661 /* start each Application Processor */
662 start_all_aps(boot_addr);
663
65b2387f
SZ
664 if (apic_io_enable)
665 ioapic_config();
666
a40ec003
SZ
667 /* Finalize PIC */
668 MachIntrABI.finalize();
46d4e165
JG
669}
670
671
672/*
673 * look for the MP spec signature
674 */
675
676/* string defined by the Intel MP Spec as identifying the MP table */
677#define MP_SIG 0x5f504d5f /* _MP_ */
678#define NEXT(X) ((X) += 4)
3a918cfd 679static long
0eaa8172 680mptable_search_sig(u_int32_t target, int count)
46d4e165 681{
f592025a
MN
682 vm_size_t map_size;
683 u_int32_t *addr;
684 int x, ret;
46d4e165 685
351254e7
MN
686 KKASSERT(target != 0);
687
f592025a
MN
688 map_size = count * sizeof(u_int32_t);
689 addr = pmap_mapdev((vm_paddr_t)target, map_size);
46d4e165 690
351254e7 691 ret = 0;
f592025a
MN
692 for (x = 0; x < count; NEXT(x)) {
693 if (addr[x] == MP_SIG) {
694 /* make array index a byte index */
695 ret = target + (x * sizeof(u_int32_t));
696 break;
697 }
698 }
351254e7 699
f592025a
MN
700 pmap_unmapdev((vm_offset_t)addr, map_size);
701 return ret;
46d4e165
JG
702}
703
704
46d4e165
JG
705typedef struct BUSDATA {
706 u_char bus_id;
707 enum busTypes bus_type;
708} bus_datum;
709
710typedef struct INTDATA {
711 u_char int_type;
712 u_short int_flags;
713 u_char src_bus_id;
714 u_char src_bus_irq;
715 u_char dst_apic_id;
716 u_char dst_apic_int;
717 u_char int_vector;
718} io_int, local_int;
719
720typedef struct BUSTYPENAME {
721 u_char type;
722 char name[7];
723} bus_type_name;
724
46d4e165
JG
725/* the bus data */
726static bus_datum *bus_data;
727
46d4e165
JG
728/* the IO INT data, one entry per possible APIC INTerrupt */
729static io_int *io_apic_ints;
730static int nintrs;
c784234d 731
16794646 732static int processor_entry (const struct PROCENTRY *entry, int cpu);
46d4e165
JG
733
734/*
735 * Check if we should perform a hyperthreading "fix-up" to
736 * enumerate any logical CPU's that aren't already listed
737 * in the table.
738 *
739 * XXX: We assume that all of the physical CPUs in the
740 * system have the same number of logical CPUs.
741 *
742 * XXX: We assume that APIC ID's are allocated such that
743 * the APIC ID's for a physical processor are aligned
744 * with the number of logical CPU's in the processor.
745 */
7f310ea1 746static int
da23a592 747mptable_hyperthread_fixup(cpumask_t id_mask, int cpu_count)
46d4e165 748{
7f310ea1 749 int i, id, lcpus_max, logical_cpus;
46d4e165 750
46d4e165 751 if ((cpu_feature & CPUID_HTT) == 0)
7f310ea1 752 return 0;
f5abf528
MN
753
754 lcpus_max = (cpu_procinfo & CPUID_HTT_CORES) >> 16;
755 if (lcpus_max <= 1)
7f310ea1 756 return 0;
46d4e165 757
f5abf528
MN
758 if (strcmp(cpu_vendor, "GenuineIntel") == 0) {
759 /*
760 * INSTRUCTION SET REFERENCE, A-M (#253666)
761 * Page 3-181, Table 3-20
762 * "The nearest power-of-2 integer that is not smaller
763 * than EBX[23:16] is the number of unique initial APIC
764 * IDs reserved for addressing different logical
765 * processors in a physical package."
766 */
767 for (i = 0; ; ++i) {
768 if ((1 << i) >= lcpus_max) {
769 lcpus_max = 1 << i;
770 break;
771 }
772 }
773 }
774
7f310ea1
MN
775 KKASSERT(cpu_count != 0);
776 if (cpu_count == lcpus_max) {
f5abf528 777 /* We have nothing to fix */
7f310ea1
MN
778 return 0;
779 } else if (cpu_count == 1) {
f5abf528
MN
780 /* XXX this may be incorrect */
781 logical_cpus = lcpus_max;
782 } else {
783 int cur, prev, dist;
784
785 /*
786 * Calculate the distances between two nearest
787 * APIC IDs. If all such distances are same,
788 * then it is the number of missing cpus that
789 * we are going to fill later.
790 */
791 dist = cur = prev = -1;
792 for (id = 0; id < MAXCPU; ++id) {
da23a592 793 if ((id_mask & CPUMASK(id)) == 0)
f5abf528
MN
794 continue;
795
796 cur = id;
797 if (prev >= 0) {
798 int new_dist = cur - prev;
799
800 if (dist < 0)
801 dist = new_dist;
802
803 /*
804 * Make sure that all distances
805 * between two nearest APIC IDs
806 * are same.
807 */
808 if (dist != new_dist)
7f310ea1 809 return 0;
f5abf528
MN
810 }
811 prev = cur;
812 }
813 if (dist == 1)
7f310ea1 814 return 0;
f5abf528
MN
815
816 /* Must be power of 2 */
817 if (dist & (dist - 1))
7f310ea1 818 return 0;
f5abf528
MN
819
820 /* Can't exceed CPU package capacity */
821 if (dist > lcpus_max)
822 logical_cpus = lcpus_max;
823 else
824 logical_cpus = dist;
825 }
826
46d4e165
JG
827 /*
828 * For each APIC ID of a CPU that is set in the mask,
829 * scan the other candidate APIC ID's for this
830 * physical processor. If any of those ID's are
831 * already in the table, then kill the fixup.
832 */
f5abf528 833 for (id = 0; id < MAXCPU; id++) {
da23a592 834 if ((id_mask & CPUMASK(id)) == 0)
46d4e165
JG
835 continue;
836 /* First, make sure we are on a logical_cpus boundary. */
837 if (id % logical_cpus != 0)
7f310ea1 838 return 0;
46d4e165 839 for (i = id + 1; i < id + logical_cpus; i++)
da23a592 840 if ((id_mask & CPUMASK(i)) != 0)
7f310ea1 841 return 0;
46d4e165 842 }
7f310ea1 843 return logical_cpus;
46d4e165
JG
844}
845
8f54b133 846static int
fe423084 847mptable_map(struct mptable_pos *mpt)
91f1c7a4
MN
848{
849 mpfps_t fps = NULL;
850 mpcth_t cth = NULL;
851 vm_size_t cth_mapsz = 0;
852
fe423084
SZ
853 KKASSERT(mptable_fps_phyaddr != 0);
854
8f54b133
MN
855 bzero(mpt, sizeof(*mpt));
856
fe423084 857 fps = pmap_mapdev(mptable_fps_phyaddr, sizeof(*fps));
91f1c7a4
MN
858 if (fps->pap != 0) {
859 /*
860 * Map configuration table header to get
861 * the base table size
862 */
863 cth = pmap_mapdev(fps->pap, sizeof(*cth));
864 cth_mapsz = cth->base_table_length;
865 pmap_unmapdev((vm_offset_t)cth, sizeof(*cth));
866
8f54b133
MN
867 if (cth_mapsz < sizeof(*cth)) {
868 kprintf("invalid base MP table length %d\n",
869 (int)cth_mapsz);
870 pmap_unmapdev((vm_offset_t)fps, sizeof(*fps));
871 return EINVAL;
872 }
873
91f1c7a4
MN
874 /*
875 * Map the base table
876 */
877 cth = pmap_mapdev(fps->pap, cth_mapsz);
878 }
879
880 mpt->mp_fps = fps;
881 mpt->mp_cth = cth;
882 mpt->mp_cth_mapsz = cth_mapsz;
8f54b133
MN
883
884 return 0;
91f1c7a4
MN
885}
886
887static void
888mptable_unmap(struct mptable_pos *mpt)
889{
890 if (mpt->mp_cth != NULL) {
891 pmap_unmapdev((vm_offset_t)mpt->mp_cth, mpt->mp_cth_mapsz);
892 mpt->mp_cth = NULL;
893 mpt->mp_cth_mapsz = 0;
894 }
895 if (mpt->mp_fps != NULL) {
896 pmap_unmapdev((vm_offset_t)mpt->mp_fps, sizeof(*mpt->mp_fps));
897 mpt->mp_fps = NULL;
898 }
899}
900
46d4e165
JG
901void
902assign_apic_irq(int apic, int intpin, int irq)
903{
904 int x;
905
906 if (int_to_apicintpin[irq].ioapic != -1)
907 panic("assign_apic_irq: inconsistent table");
908
909 int_to_apicintpin[irq].ioapic = apic;
910 int_to_apicintpin[irq].int_pin = intpin;
911 int_to_apicintpin[irq].apic_address = ioapic[apic];
912 int_to_apicintpin[irq].redirindex = IOAPIC_REDTBL + 2 * intpin;
913
914 for (x = 0; x < nintrs; x++) {
915 if ((io_apic_ints[x].int_type == 0 ||
916 io_apic_ints[x].int_type == 3) &&
917 io_apic_ints[x].int_vector == 0xff &&
918 io_apic_ints[x].dst_apic_id == IO_TO_ID(apic) &&
919 io_apic_ints[x].dst_apic_int == intpin)
920 io_apic_ints[x].int_vector = irq;
921 }
922}
923
924void
925revoke_apic_irq(int irq)
926{
927 int x;
928 int oldapic;
929 int oldintpin;
930
931 if (int_to_apicintpin[irq].ioapic == -1)
932 panic("revoke_apic_irq: inconsistent table");
933
934 oldapic = int_to_apicintpin[irq].ioapic;
935 oldintpin = int_to_apicintpin[irq].int_pin;
936
937 int_to_apicintpin[irq].ioapic = -1;
938 int_to_apicintpin[irq].int_pin = 0;
939 int_to_apicintpin[irq].apic_address = NULL;
940 int_to_apicintpin[irq].redirindex = 0;
941
942 for (x = 0; x < nintrs; x++) {
943 if ((io_apic_ints[x].int_type == 0 ||
944 io_apic_ints[x].int_type == 3) &&
945 io_apic_ints[x].int_vector != 0xff &&
946 io_apic_ints[x].dst_apic_id == IO_TO_ID(oldapic) &&
947 io_apic_ints[x].dst_apic_int == oldintpin)
948 io_apic_ints[x].int_vector = 0xff;
949 }
950}
951
40d323b6
MN
952void
953mp_set_cpuids(int cpu_id, int apic_id)
954{
955 CPU_TO_ID(cpu_id) = apic_id;
956 ID_TO_CPU(apic_id) = cpu_id;
c5c405ff
SZ
957
958 if (apic_id > lapic_id_max)
959 lapic_id_max = apic_id;
40d323b6
MN
960}
961
46d4e165 962static int
16794646 963processor_entry(const struct PROCENTRY *entry, int cpu)
46d4e165 964{
bfa17615
MN
965 KKASSERT(cpu > 0);
966
46d4e165
JG
967 /* check for usability */
968 if (!(entry->cpu_flags & PROCENTRY_FLAG_EN))
969 return 0;
970
46d4e165
JG
971 /* check for BSP flag */
972 if (entry->cpu_flags & PROCENTRY_FLAG_BP) {
40d323b6 973 mp_set_cpuids(0, entry->apic_id);
46d4e165
JG
974 return 0; /* its already been counted */
975 }
976
977 /* add another AP to list, if less than max number of CPUs */
978 else if (cpu < MAXCPU) {
40d323b6 979 mp_set_cpuids(cpu, entry->apic_id);
46d4e165
JG
980 return 1;
981 }
982
983 return 0;
984}
985
46d4e165
JG
986static int
987apic_int_is_bus_type(int intr, int bus_type)
988{
989 int bus;
990
991 for (bus = 0; bus < mp_nbusses; ++bus)
992 if ((bus_data[bus].bus_id == io_apic_ints[intr].src_bus_id)
993 && ((int) bus_data[bus].bus_type == bus_type))
994 return 1;
995
996 return 0;
997}
998
999/*
1000 * Given a traditional ISA INT mask, return an APIC mask.
1001 */
1002u_int
1003isa_apic_mask(u_int isa_mask)
1004{
1005 int isa_irq;
1006 int apic_pin;
1007
1008#if defined(SKIP_IRQ15_REDIRECT)
1009 if (isa_mask == (1 << 15)) {
1010 kprintf("skipping ISA IRQ15 redirect\n");
1011 return isa_mask;
1012 }
1013#endif /* SKIP_IRQ15_REDIRECT */
1014
1015 isa_irq = ffs(isa_mask); /* find its bit position */
1016 if (isa_irq == 0) /* doesn't exist */
1017 return 0;
1018 --isa_irq; /* make it zero based */
1019
1020 apic_pin = isa_apic_irq(isa_irq); /* look for APIC connection */
1021 if (apic_pin == -1)
1022 return 0;
1023
1024 return (1 << apic_pin); /* convert pin# to a mask */
1025}
1026
1027/*
1028 * Determine which APIC pin an ISA/EISA INT is attached to.
1029 */
1030#define INTTYPE(I) (io_apic_ints[(I)].int_type)
1031#define INTPIN(I) (io_apic_ints[(I)].dst_apic_int)
1032#define INTIRQ(I) (io_apic_ints[(I)].int_vector)
1033#define INTAPIC(I) (ID_TO_IO(io_apic_ints[(I)].dst_apic_id))
1034
1035#define SRCBUSIRQ(I) (io_apic_ints[(I)].src_bus_irq)
1036int
1037isa_apic_irq(int isa_irq)
1038{
1039 int intr;
1040
1041 for (intr = 0; intr < nintrs; ++intr) { /* check each record */
1042 if (INTTYPE(intr) == 0) { /* standard INT */
1043 if (SRCBUSIRQ(intr) == isa_irq) {
1044 if (apic_int_is_bus_type(intr, ISA) ||
1045 apic_int_is_bus_type(intr, EISA)) {
1046 if (INTIRQ(intr) == 0xff)
1047 return -1; /* unassigned */
1048 return INTIRQ(intr); /* found */
1049 }
1050 }
1051 }
1052 }
1053 return -1; /* NOT found */
1054}
1055
1056
1057/*
1058 * Determine which APIC pin a PCI INT is attached to.
1059 */
1060#define SRCBUSID(I) (io_apic_ints[(I)].src_bus_id)
1061#define SRCBUSDEVICE(I) ((io_apic_ints[(I)].src_bus_irq >> 2) & 0x1f)
1062#define SRCBUSLINE(I) (io_apic_ints[(I)].src_bus_irq & 0x03)
1063int
1064pci_apic_irq(int pciBus, int pciDevice, int pciInt)
1065{
1066 int intr;
1067
1068 --pciInt; /* zero based */
1069
1070 for (intr = 0; intr < nintrs; ++intr) { /* check each record */
1071 if ((INTTYPE(intr) == 0) /* standard INT */
1072 && (SRCBUSID(intr) == pciBus)
1073 && (SRCBUSDEVICE(intr) == pciDevice)
1074 && (SRCBUSLINE(intr) == pciInt)) { /* a candidate IRQ */
1075 if (apic_int_is_bus_type(intr, PCI)) {
662f60ef
SZ
1076 if (INTIRQ(intr) == 0xff) {
1077 kprintf("IOAPIC: pci_apic_irq() "
1078 "failed\n");
46d4e165 1079 return -1; /* unassigned */
662f60ef 1080 }
46d4e165
JG
1081 return INTIRQ(intr); /* exact match */
1082 }
1083 }
1084 }
1085
1086 return -1; /* NOT found */
1087}
1088
1089int
1090next_apic_irq(int irq)
1091{
1092 int intr, ointr;
1093 int bus, bustype;
1094
1095 bus = 0;
1096 bustype = 0;
1097 for (intr = 0; intr < nintrs; intr++) {
1098 if (INTIRQ(intr) != irq || INTTYPE(intr) != 0)
1099 continue;
1100 bus = SRCBUSID(intr);
1101 bustype = apic_bus_type(bus);
1102 if (bustype != ISA &&
1103 bustype != EISA &&
1104 bustype != PCI)
1105 continue;
1106 break;
1107 }
1108 if (intr >= nintrs) {
1109 return -1;
1110 }
1111 for (ointr = intr + 1; ointr < nintrs; ointr++) {
1112 if (INTTYPE(ointr) != 0)
1113 continue;
1114 if (bus != SRCBUSID(ointr))
1115 continue;
1116 if (bustype == PCI) {
1117 if (SRCBUSDEVICE(intr) != SRCBUSDEVICE(ointr))
1118 continue;
1119 if (SRCBUSLINE(intr) != SRCBUSLINE(ointr))
1120 continue;
1121 }
1122 if (bustype == ISA || bustype == EISA) {
1123 if (SRCBUSIRQ(intr) != SRCBUSIRQ(ointr))
1124 continue;
1125 }
1126 if (INTPIN(intr) == INTPIN(ointr))
1127 continue;
1128 break;
1129 }
1130 if (ointr >= nintrs) {
1131 return -1;
1132 }
1133 return INTIRQ(ointr);
1134}
1135#undef SRCBUSLINE
1136#undef SRCBUSDEVICE
1137#undef SRCBUSID
1138#undef SRCBUSIRQ
1139
1140#undef INTPIN
1141#undef INTIRQ
1142#undef INTAPIC
1143#undef INTTYPE
1144
46d4e165
JG
1145/*
1146 * Reprogram the MB chipset to NOT redirect an ISA INTerrupt.
1147 *
1148 * XXX FIXME:
1149 * Exactly what this means is unclear at this point. It is a solution
1150 * for motherboards that redirect the MBIRQ0 pin. Generically a motherboard
1151 * could route any of the ISA INTs to upper (>15) IRQ values. But most would
1152 * NOT be redirected via MBIRQ0, thus "undirect()ing" them would NOT be an
1153 * option.
1154 */
1155int
1156undirect_isa_irq(int rirq)
1157{
1158#if defined(READY)
1159 if (bootverbose)
1160 kprintf("Freeing redirected ISA irq %d.\n", rirq);
1161 /** FIXME: tickle the MB redirector chip */
1162 return /* XXX */;
1163#else
1164 if (bootverbose)
1165 kprintf("Freeing (NOT implemented) redirected ISA irq %d.\n", rirq);
1166 return 0;
1167#endif /* READY */
1168}
1169
1170
1171/*
1172 * Reprogram the MB chipset to NOT redirect a PCI INTerrupt
1173 */
1174int
1175undirect_pci_irq(int rirq)
1176{
1177#if defined(READY)
1178 if (bootverbose)
1179 kprintf("Freeing redirected PCI irq %d.\n", rirq);
1180
1181 /** FIXME: tickle the MB redirector chip */
1182 return /* XXX */;
1183#else
1184 if (bootverbose)
1185 kprintf("Freeing (NOT implemented) redirected PCI irq %d.\n",
1186 rirq);
1187 return 0;
1188#endif /* READY */
1189}
1190
1191
1192/*
1193 * given a bus ID, return:
1194 * the bus type if found
1195 * -1 if NOT found
1196 */
1197int
1198apic_bus_type(int id)
1199{
1200 int x;
1201
1202 for (x = 0; x < mp_nbusses; ++x)
1203 if (bus_data[x].bus_id == id)
1204 return bus_data[x].bus_type;
1205
1206 return -1;
1207}
1208
46d4e165
JG
1209/*
1210 * given a LOGICAL APIC# and pin#, return:
1211 * the associated src bus ID if found
1212 * -1 if NOT found
1213 */
1214int
1215apic_src_bus_id(int apic, int pin)
1216{
1217 int x;
1218
1219 /* search each of the possible INTerrupt sources */
1220 for (x = 0; x < nintrs; ++x)
1221 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1222 (pin == io_apic_ints[x].dst_apic_int))
1223 return (io_apic_ints[x].src_bus_id);
1224
1225 return -1; /* NOT found */
1226}
1227
1228/*
1229 * given a LOGICAL APIC# and pin#, return:
1230 * the associated src bus IRQ if found
1231 * -1 if NOT found
1232 */
1233int
1234apic_src_bus_irq(int apic, int pin)
1235{
1236 int x;
1237
1238 for (x = 0; x < nintrs; x++)
1239 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1240 (pin == io_apic_ints[x].dst_apic_int))
1241 return (io_apic_ints[x].src_bus_irq);
1242
1243 return -1; /* NOT found */
1244}
1245
1246
1247/*
1248 * given a LOGICAL APIC# and pin#, return:
1249 * the associated INTerrupt type if found
1250 * -1 if NOT found
1251 */
1252int
1253apic_int_type(int apic, int pin)
1254{
1255 int x;
1256
1257 /* search each of the possible INTerrupt sources */
1258 for (x = 0; x < nintrs; ++x) {
1259 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1260 (pin == io_apic_ints[x].dst_apic_int))
1261 return (io_apic_ints[x].int_type);
1262 }
1263 return -1; /* NOT found */
1264}
1265
1266/*
1267 * Return the IRQ associated with an APIC pin
1268 */
1269int
1270apic_irq(int apic, int pin)
1271{
1272 int x;
1273 int res;
1274
1275 for (x = 0; x < nintrs; ++x) {
1276 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1277 (pin == io_apic_ints[x].dst_apic_int)) {
1278 res = io_apic_ints[x].int_vector;
1279 if (res == 0xff)
1280 return -1;
1281 if (apic != int_to_apicintpin[res].ioapic)
1282 panic("apic_irq: inconsistent table %d/%d", apic, int_to_apicintpin[res].ioapic);
1283 if (pin != int_to_apicintpin[res].int_pin)
1284 panic("apic_irq inconsistent table (2)");
1285 return res;
1286 }
1287 }
1288 return -1;
1289}
1290
1291
1292/*
1293 * given a LOGICAL APIC# and pin#, return:
1294 * the associated trigger mode if found
1295 * -1 if NOT found
1296 */
1297int
1298apic_trigger(int apic, int pin)
1299{
1300 int x;
1301
1302 /* search each of the possible INTerrupt sources */
1303 for (x = 0; x < nintrs; ++x)
1304 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1305 (pin == io_apic_ints[x].dst_apic_int))
1306 return ((io_apic_ints[x].int_flags >> 2) & 0x03);
1307
1308 return -1; /* NOT found */
1309}
1310
1311
1312/*
1313 * given a LOGICAL APIC# and pin#, return:
1314 * the associated 'active' level if found
1315 * -1 if NOT found
1316 */
1317int
1318apic_polarity(int apic, int pin)
1319{
1320 int x;
1321
1322 /* search each of the possible INTerrupt sources */
1323 for (x = 0; x < nintrs; ++x)
1324 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1325 (pin == io_apic_ints[x].dst_apic_int))
1326 return (io_apic_ints[x].int_flags & 0x03);
1327
1328 return -1; /* NOT found */
1329}
1330
46d4e165
JG
1331/*
1332 * Map a physical memory address representing I/O into KVA. The I/O
1333 * block is assumed not to cross a page boundary.
1334 */
1335void *
01616f8b 1336ioapic_map(vm_paddr_t pa)
46d4e165 1337{
46d4e165
JG
1338 KKASSERT(pa < 0x100000000LL);
1339
403c36ea 1340 return pmap_mapdev_uncacheable(pa, PAGE_SIZE);
46d4e165
JG
1341}
1342
1343/*
1344 * start each AP in our list
1345 */
1346static int
1347start_all_aps(u_int boot_addr)
1348{
1349 vm_offset_t va = boot_address + KERNBASE;
1350 u_int64_t *pt4, *pt3, *pt2;
1351 int x, i, pg;
1352 int shift;
bb467734
MD
1353 int smicount;
1354 int smibest;
1355 int smilast;
46d4e165
JG
1356 u_char mpbiosreason;
1357 u_long mpbioswarmvec;
1358 struct mdglobaldata *gd;
1359 struct privatespace *ps;
46d4e165
JG
1360
1361 POSTCODE(START_ALL_APS_POST);
1362
46d4e165
JG
1363 /* install the AP 1st level boot code */
1364 pmap_kenter(va, boot_address);
bfc09ba0 1365 cpu_invlpg((void *)va); /* JG XXX */
46d4e165
JG
1366 bcopy(mptramp_start, (void *)va, bootMP_size);
1367
1368 /* Locate the page tables, they'll be below the trampoline */
1369 pt4 = (u_int64_t *)(uintptr_t)(mptramp_pagetables + KERNBASE);
1370 pt3 = pt4 + (PAGE_SIZE) / sizeof(u_int64_t);
1371 pt2 = pt3 + (PAGE_SIZE) / sizeof(u_int64_t);
1372
1373 /* Create the initial 1GB replicated page tables */
1374 for (i = 0; i < 512; i++) {
1375 /* Each slot of the level 4 pages points to the same level 3 page */
1376 pt4[i] = (u_int64_t)(uintptr_t)(mptramp_pagetables + PAGE_SIZE);
1377 pt4[i] |= PG_V | PG_RW | PG_U;
1378
1379 /* Each slot of the level 3 pages points to the same level 2 page */
1380 pt3[i] = (u_int64_t)(uintptr_t)(mptramp_pagetables + (2 * PAGE_SIZE));
1381 pt3[i] |= PG_V | PG_RW | PG_U;
1382
1383 /* The level 2 page slots are mapped with 2MB pages for 1GB. */
1384 pt2[i] = i * (2 * 1024 * 1024);
1385 pt2[i] |= PG_V | PG_RW | PG_PS | PG_U;
1386 }
1387
1388 /* save the current value of the warm-start vector */
1389 mpbioswarmvec = *((u_int32_t *) WARMBOOT_OFF);
1390 outb(CMOS_REG, BIOS_RESET);
1391 mpbiosreason = inb(CMOS_DATA);
1392
1393 /* setup a vector to our boot code */
1394 *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
1395 *((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4);
1396 outb(CMOS_REG, BIOS_RESET);
1397 outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */
1398
bb467734
MD
1399 /*
1400 * If we have a TSC we can figure out the SMI interrupt rate.
1401 * The SMI does not necessarily use a constant rate. Spend
1402 * up to 250ms trying to figure it out.
1403 */
1404 smibest = 0;
1405 if (cpu_feature & CPUID_TSC) {
1406 set_apic_timer(275000);
1407 smilast = read_apic_timer();
1408 for (x = 0; x < 20 && read_apic_timer(); ++x) {
1409 smicount = smitest();
1410 if (smibest == 0 || smilast - smicount < smibest)
1411 smibest = smilast - smicount;
1412 smilast = smicount;
1413 }
1414 if (smibest > 250000)
1415 smibest = 0;
1416 if (smibest) {
1417 smibest = smibest * (int64_t)1000000 /
1418 get_apic_timer_frequency();
1419 }
1420 }
1421 if (smibest)
1422 kprintf("SMI Frequency (worst case): %d Hz (%d us)\n",
1423 1000000 / smibest, smibest);
1424
46d4e165
JG
1425 /* start each AP */
1426 for (x = 1; x <= mp_naps; ++x) {
1427
1428 /* This is a bit verbose, it will go away soon. */
1429
1430 /* first page of AP's private space */
b2b3ffcd 1431 pg = x * x86_64_btop(sizeof(struct privatespace));
46d4e165
JG
1432
1433 /* allocate new private data page(s) */
1434 gd = (struct mdglobaldata *)kmem_alloc(&kernel_map,
1435 MDGLOBALDATA_BASEALLOC_SIZE);
46d4e165
JG
1436
1437 gd = &CPU_prvspace[x].mdglobaldata; /* official location */
1438 bzero(gd, sizeof(*gd));
1439 gd->mi.gd_prvspace = ps = &CPU_prvspace[x];
1440
1441 /* prime data page for it to use */
1442 mi_gdinit(&gd->mi, x);
1443 cpu_gdinit(gd, x);
46d4e165
JG
1444 gd->mi.gd_ipiq = (void *)kmem_alloc(&kernel_map, sizeof(lwkt_ipiq) * (mp_naps + 1));
1445 bzero(gd->mi.gd_ipiq, sizeof(lwkt_ipiq) * (mp_naps + 1));
1446
1447 /* setup a vector to our boot code */
1448 *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
1449 *((volatile u_short *) WARMBOOT_SEG) = (boot_addr >> 4);
1450 outb(CMOS_REG, BIOS_RESET);
1451 outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */
1452
1453 /*
1454 * Setup the AP boot stack
1455 */
1456 bootSTK = &ps->idlestack[UPAGES*PAGE_SIZE/2];
1457 bootAP = x;
1458
1459 /* attempt to start the Application Processor */
1460 CHECK_INIT(99); /* setup checkpoints */
bb467734 1461 if (!start_ap(gd, boot_addr, smibest)) {
ea96e50f
MD
1462 kprintf("\nAP #%d (PHY# %d) failed!\n",
1463 x, CPU_TO_ID(x));
46d4e165
JG
1464 CHECK_PRINT("trace"); /* show checkpoints */
1465 /* better panic as the AP may be running loose */
1466 kprintf("panic y/n? [y] ");
1467 if (cngetc() != 'n')
1468 panic("bye-bye");
1469 }
1470 CHECK_PRINT("trace"); /* show checkpoints */
1471
1472 /* record its version info */
1473 cpu_apic_versions[x] = cpu_apic_versions[0];
1474 }
1475
1476 /* set ncpus to 1 + highest logical cpu. Not all may have come up */
1477 ncpus = x;
1478
1479 /* ncpus2 -- ncpus rounded down to the nearest power of 2 */
1480 for (shift = 0; (1 << shift) <= ncpus; ++shift)
1481 ;
1482 --shift;
1483 ncpus2_shift = shift;
1484 ncpus2 = 1 << shift;
1485 ncpus2_mask = ncpus2 - 1;
1486
1487 /* ncpus_fit -- ncpus rounded up to the nearest power of 2 */
1488 if ((1 << shift) < ncpus)
1489 ++shift;
1490 ncpus_fit = 1 << shift;
1491 ncpus_fit_mask = ncpus_fit - 1;
1492
1493 /* build our map of 'other' CPUs */
da23a592 1494 mycpu->gd_other_cpus = smp_startup_mask & ~CPUMASK(mycpu->gd_cpuid);
46d4e165
JG
1495 mycpu->gd_ipiq = (void *)kmem_alloc(&kernel_map, sizeof(lwkt_ipiq) * ncpus);
1496 bzero(mycpu->gd_ipiq, sizeof(lwkt_ipiq) * ncpus);
1497
1498 /* fill in our (BSP) APIC version */
1499 cpu_apic_versions[0] = lapic->version;
1500
1501 /* restore the warmstart vector */
1502 *(u_long *) WARMBOOT_OFF = mpbioswarmvec;
1503 outb(CMOS_REG, BIOS_RESET);
1504 outb(CMOS_DATA, mpbiosreason);
1505
1506 /*
1507 * NOTE! The idlestack for the BSP was setup by locore. Finish
1508 * up, clean out the P==V mapping we did earlier.
1509 */
46d4e165
JG
1510 pmap_set_opt();
1511
c6b1591c
SZ
1512 /*
1513 * Wait all APs to finish initializing LAPIC
1514 */
1515 mp_finish_lapic = 1;
1516 if (bootverbose)
1517 kprintf("SMP: Waiting APs LAPIC initialization\n");
1518 if (cpu_feature & CPUID_TSC)
1519 tsc0_offset = rdtsc();
1520 tsc_offsets[0] = 0;
1521 rel_mplock();
1522 while (smp_lapic_mask != smp_startup_mask) {
1523 cpu_lfence();
1524 if (cpu_feature & CPUID_TSC)
1525 tsc0_offset = rdtsc();
1526 }
1527 while (try_mplock() == 0)
1528 ;
1529
46d4e165
JG
1530 /* number of APs actually started */
1531 return ncpus - 1;
1532}
1533
1534
1535/*
1536 * load the 1st level AP boot code into base memory.
1537 */
1538
1539/* targets for relocation */
1540extern void bigJump(void);
1541extern void bootCodeSeg(void);
1542extern void bootDataSeg(void);
1543extern void MPentry(void);
1544extern u_int MP_GDT;
1545extern u_int mp_gdtbase;
1546
bfc09ba0
MD
1547#if 0
1548
46d4e165
JG
1549static void
1550install_ap_tramp(u_int boot_addr)
1551{
1552 int x;
1553 int size = *(int *) ((u_long) & bootMP_size);
1554 u_char *src = (u_char *) ((u_long) bootMP);
1555 u_char *dst = (u_char *) boot_addr + KERNBASE;
1556 u_int boot_base = (u_int) bootMP;
1557 u_int8_t *dst8;
1558 u_int16_t *dst16;
1559 u_int32_t *dst32;
1560
1561 POSTCODE(INSTALL_AP_TRAMP_POST);
1562
1563 for (x = 0; x < size; ++x)
1564 *dst++ = *src++;
1565
1566 /*
1567 * modify addresses in code we just moved to basemem. unfortunately we
1568 * need fairly detailed info about mpboot.s for this to work. changes
1569 * to mpboot.s might require changes here.
1570 */
1571
1572 /* boot code is located in KERNEL space */
1573 dst = (u_char *) boot_addr + KERNBASE;
1574
1575 /* modify the lgdt arg */
1576 dst32 = (u_int32_t *) (dst + ((u_int) & mp_gdtbase - boot_base));
1577 *dst32 = boot_addr + ((u_int) & MP_GDT - boot_base);
1578
1579 /* modify the ljmp target for MPentry() */
1580 dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1);
1581 *dst32 = ((u_int) MPentry - KERNBASE);
1582
1583 /* modify the target for boot code segment */
1584 dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base));
1585 dst8 = (u_int8_t *) (dst16 + 1);
1586 *dst16 = (u_int) boot_addr & 0xffff;
1587 *dst8 = ((u_int) boot_addr >> 16) & 0xff;
1588
1589 /* modify the target for boot data segment */
1590 dst16 = (u_int16_t *) (dst + ((u_int) bootDataSeg - boot_base));
1591 dst8 = (u_int8_t *) (dst16 + 1);
1592 *dst16 = (u_int) boot_addr & 0xffff;
1593 *dst8 = ((u_int) boot_addr >> 16) & 0xff;
1594}
1595
bfc09ba0 1596#endif
46d4e165
JG
1597
1598/*
bb467734 1599 * This function starts the AP (application processor) identified
46d4e165
JG
1600 * by the APIC ID 'physicalCpu'. It does quite a "song and dance"
1601 * to accomplish this. This is necessary because of the nuances
1602 * of the different hardware we might encounter. It ain't pretty,
1603 * but it seems to work.
1604 *
1605 * NOTE: eventually an AP gets to ap_init(), which is called just
1606 * before the AP goes into the LWKT scheduler's idle loop.
1607 */
1608static int
bb467734 1609start_ap(struct mdglobaldata *gd, u_int boot_addr, int smibest)
46d4e165
JG
1610{
1611 int physical_cpu;
1612 int vector;
1613 u_long icr_lo, icr_hi;
1614
1615 POSTCODE(START_AP_POST);
1616
1617 /* get the PHYSICAL APIC ID# */
1618 physical_cpu = CPU_TO_ID(gd->mi.gd_cpuid);
1619
1620 /* calculate the vector */
1621 vector = (boot_addr >> 12) & 0xff;
1622
bb467734
MD
1623 /* We don't want anything interfering */
1624 cpu_disable_intr();
1625
46d4e165
JG
1626 /* Make sure the target cpu sees everything */
1627 wbinvd();
1628
bb467734
MD
1629 /*
1630 * Try to detect when a SMI has occurred, wait up to 200ms.
1631 *
1632 * If a SMI occurs during an AP reset but before we issue
1633 * the STARTUP command, the AP may brick. To work around
1634 * this problem we hold off doing the AP startup until
1635 * after we have detected the SMI. Hopefully another SMI
1636 * will not occur before we finish the AP startup.
1637 *
1638 * Retries don't seem to help. SMIs have a window of opportunity
1639 * and if USB->legacy keyboard emulation is enabled in the BIOS
1640 * the interrupt rate can be quite high.
1641 *
1642 * NOTE: Don't worry about the L1 cache load, it might bloat
1643 * ldelta a little but ndelta will be so huge when the SMI
1644 * occurs the detection logic will still work fine.
1645 */
1646 if (smibest) {
1647 set_apic_timer(200000);
1648 smitest();
1649 }
1650
46d4e165
JG
1651 /*
1652 * first we do an INIT/RESET IPI this INIT IPI might be run, reseting
1653 * and running the target CPU. OR this INIT IPI might be latched (P5
1654 * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
1655 * ignored.
bb467734
MD
1656 *
1657 * see apic/apicreg.h for icr bit definitions.
1658 *
1659 * TIME CRITICAL CODE, DO NOT DO ANY KPRINTFS IN THE HOT PATH.
46d4e165
JG
1660 */
1661
bb467734
MD
1662 /*
1663 * Setup the address for the target AP. We can setup
1664 * icr_hi once and then just trigger operations with
1665 * icr_lo.
1666 */
46d4e165
JG
1667 icr_hi = lapic->icr_hi & ~APIC_ID_MASK;
1668 icr_hi |= (physical_cpu << 24);
46d4e165 1669 icr_lo = lapic->icr_lo & 0xfff00000;
bb467734 1670 lapic->icr_hi = icr_hi;
46d4e165 1671
bb467734
MD
1672 /*
1673 * Do an INIT IPI: assert RESET
1674 *
1675 * Use edge triggered mode to assert INIT
1676 */
1677 lapic->icr_lo = icr_lo | 0x00004500;
46d4e165
JG
1678 while (lapic->icr_lo & APIC_DELSTAT_MASK)
1679 /* spin */ ;
1680
bb467734
MD
1681 /*
1682 * The spec calls for a 10ms delay but we may have to use a
1683 * MUCH lower delay to avoid bricking an AP due to a fast SMI
1684 * interrupt. We have other loops here too and dividing by 2
1685 * doesn't seem to be enough even after subtracting 350us,
1686 * so we divide by 4.
1687 *
1688 * Our minimum delay is 150uS, maximum is 10ms. If no SMI
1689 * interrupt was detected we use the full 10ms.
1690 */
1691 if (smibest == 0)
1692 u_sleep(10000);
1693 else if (smibest < 150 * 4 + 350)
1694 u_sleep(150);
1695 else if ((smibest - 350) / 4 < 10000)
1696 u_sleep((smibest - 350) / 4);
1697 else
1698 u_sleep(10000);
46d4e165 1699
bb467734
MD
1700 /*
1701 * Do an INIT IPI: deassert RESET
1702 *
1703 * Use level triggered mode to deassert. It is unclear
1704 * why we need to do this.
1705 */
1706 lapic->icr_lo = icr_lo | 0x00008500;
46d4e165
JG
1707 while (lapic->icr_lo & APIC_DELSTAT_MASK)
1708 /* spin */ ;
bb467734 1709 u_sleep(150); /* wait 150us */
46d4e165
JG
1710
1711 /*
bb467734 1712 * Next we do a STARTUP IPI: the previous INIT IPI might still be
46d4e165
JG
1713 * latched, (P5 bug) this 1st STARTUP would then terminate
1714 * immediately, and the previously started INIT IPI would continue. OR
1715 * the previous INIT IPI has already run. and this STARTUP IPI will
1716 * run. OR the previous INIT IPI was ignored. and this STARTUP IPI
1717 * will run.
1718 */
46d4e165
JG
1719 lapic->icr_lo = icr_lo | 0x00000600 | vector;
1720 while (lapic->icr_lo & APIC_DELSTAT_MASK)
1721 /* spin */ ;
1722 u_sleep(200); /* wait ~200uS */
1723
1724 /*
bb467734 1725 * Finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
46d4e165
JG
1726 * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
1727 * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
1728 * recognized after hardware RESET or INIT IPI.
1729 */
46d4e165
JG
1730 lapic->icr_lo = icr_lo | 0x00000600 | vector;
1731 while (lapic->icr_lo & APIC_DELSTAT_MASK)
1732 /* spin */ ;
bb467734
MD
1733
1734 /* Resume normal operation */
1735 cpu_enable_intr();
46d4e165
JG
1736
1737 /* wait for it to start, see ap_init() */
1738 set_apic_timer(5000000);/* == 5 seconds */
1739 while (read_apic_timer()) {
da23a592 1740 if (smp_startup_mask & CPUMASK(gd->mi.gd_cpuid))
46d4e165
JG
1741 return 1; /* return SUCCESS */
1742 }
bb467734 1743
46d4e165
JG
1744 return 0; /* return FAILURE */
1745}
1746
bb467734
MD
1747static
1748int
1749smitest(void)
1750{
1751 int64_t ltsc;
1752 int64_t ntsc;
1753 int64_t ldelta;
1754 int64_t ndelta;
1755 int count;
1756
1757 ldelta = 0;
1758 ndelta = 0;
1759 while (read_apic_timer()) {
1760 ltsc = rdtsc();
1761 for (count = 0; count < 100; ++count)
1762 ntsc = rdtsc(); /* force loop to occur */
1763 if (ldelta) {
1764 ndelta = ntsc - ltsc;
1765 if (ldelta > ndelta)
1766 ldelta = ndelta;
1767 if (ndelta > ldelta * 2)
1768 break;
1769 } else {
1770 ldelta = ntsc - ltsc;
1771 }
1772 }
1773 return(read_apic_timer());
1774}
46d4e165
JG
1775
1776/*
7d4d6fdb
MD
1777 * Synchronously flush the TLB on all other CPU's. The current cpu's
1778 * TLB is not flushed. If the caller wishes to flush the current cpu's
1779 * TLB the caller must call cpu_invltlb() in addition to smp_invltlb().
46d4e165 1780 *
7d4d6fdb
MD
1781 * NOTE: If for some reason we were unable to start all cpus we cannot
1782 * safely use broadcast IPIs.
46d4e165 1783 */
7d4d6fdb
MD
1784
1785static cpumask_t smp_invltlb_req;
1786
b4b1a37a
MD
1787#define SMP_INVLTLB_DEBUG
1788
46d4e165
JG
1789void
1790smp_invltlb(void)
1791{
1792#ifdef SMP
7d4d6fdb 1793 struct mdglobaldata *md = mdcpu;
2d910aaf 1794#ifdef SMP_INVLTLB_DEBUG
7d4d6fdb 1795 long count = 0;
2d910aaf 1796 long xcount = 0;
7d4d6fdb 1797#endif
4117f2fd 1798
7d4d6fdb
MD
1799 crit_enter_gd(&md->mi);
1800 md->gd_invltlb_ret = 0;
1801 ++md->mi.gd_cnt.v_smpinvltlb;
da23a592 1802 atomic_set_cpumask(&smp_invltlb_req, md->mi.gd_cpumask);
2d910aaf
MD
1803#ifdef SMP_INVLTLB_DEBUG
1804again:
1805#endif
46d4e165
JG
1806 if (smp_startup_mask == smp_active_mask) {
1807 all_but_self_ipi(XINVLTLB_OFFSET);
1808 } else {
7d4d6fdb
MD
1809 selected_apic_ipi(smp_active_mask & ~md->mi.gd_cpumask,
1810 XINVLTLB_OFFSET, APIC_DELMODE_FIXED);
46d4e165 1811 }
2d910aaf
MD
1812
1813#ifdef SMP_INVLTLB_DEBUG
1814 if (xcount)
1815 kprintf("smp_invltlb: ipi sent\n");
1816#endif
7d4d6fdb
MD
1817 while ((md->gd_invltlb_ret & smp_active_mask & ~md->mi.gd_cpumask) !=
1818 (smp_active_mask & ~md->mi.gd_cpumask)) {
1819 cpu_mfence();
1820 cpu_pause();
2d910aaf 1821#ifdef SMP_INVLTLB_DEBUG
7d4d6fdb
MD
1822 /* DEBUGGING */
1823 if (++count == 400000000) {
2d910aaf
MD
1824 print_backtrace(-1);
1825 kprintf("smp_invltlb: endless loop %08lx %08lx, "
1826 "rflags %016jx retry",
7d4d6fdb 1827 (long)md->gd_invltlb_ret,
2d910aaf
MD
1828 (long)smp_invltlb_req,
1829 (intmax_t)read_rflags());
1830 __asm __volatile ("sti");
1831 ++xcount;
1832 if (xcount > 2)
1833 lwkt_process_ipiq();
1834 if (xcount > 3) {
da23a592
MD
1835 int bcpu = BSFCPUMASK(~md->gd_invltlb_ret &
1836 ~md->mi.gd_cpumask &
1837 smp_active_mask);
2d910aaf
MD
1838 globaldata_t xgd;
1839
1840 kprintf("bcpu %d\n", bcpu);
1841 xgd = globaldata_find(bcpu);
1842 kprintf("thread %p %s\n", xgd->gd_curthread, xgd->gd_curthread->td_comm);
1843 }
1844 if (xcount > 5)
1845 Debugger("giving up");
1846 count = 0;
1847 goto again;
7d4d6fdb 1848 }
46d4e165 1849#endif
7d4d6fdb 1850 }
da23a592 1851 atomic_clear_cpumask(&smp_invltlb_req, md->mi.gd_cpumask);
7d4d6fdb 1852 crit_exit_gd(&md->mi);
4117f2fd 1853#endif
46d4e165
JG
1854}
1855
7d4d6fdb
MD
1856#ifdef SMP
1857
1858/*
1859 * Called from Xinvltlb assembly with interrupts disabled. We didn't
1860 * bother to bump the critical section count or nested interrupt count
1861 * so only do very low level operations here.
1862 */
1863void
1864smp_invltlb_intr(void)
1865{
1866 struct mdglobaldata *md = mdcpu;
1867 struct mdglobaldata *omd;
1868 cpumask_t mask;
1869 int cpu;
1870
7d4d6fdb 1871 cpu_mfence();
2d910aaf 1872 mask = smp_invltlb_req;
7d4d6fdb
MD
1873 cpu_invltlb();
1874 while (mask) {
da23a592
MD
1875 cpu = BSFCPUMASK(mask);
1876 mask &= ~CPUMASK(cpu);
7d4d6fdb 1877 omd = (struct mdglobaldata *)globaldata_find(cpu);
da23a592 1878 atomic_set_cpumask(&omd->gd_invltlb_ret, md->mi.gd_cpumask);
7d4d6fdb
MD
1879 }
1880}
1881
1882#endif
1883
46d4e165
JG
1884/*
1885 * When called the executing CPU will send an IPI to all other CPUs
1886 * requesting that they halt execution.
1887 *
1888 * Usually (but not necessarily) called with 'other_cpus' as its arg.
1889 *
1890 * - Signals all CPUs in map to stop.
1891 * - Waits for each to stop.
1892 *
1893 * Returns:
1894 * -1: error
1895 * 0: NA
1896 * 1: ok
1897 *
1898 * XXX FIXME: this is not MP-safe, needs a lock to prevent multiple CPUs
1899 * from executing at same time.
1900 */
1901int
da23a592 1902stop_cpus(cpumask_t map)
46d4e165
JG
1903{
1904 map &= smp_active_mask;
1905
1906 /* send the Xcpustop IPI to all CPUs in map */
1907 selected_apic_ipi(map, XCPUSTOP_OFFSET, APIC_DELMODE_FIXED);
1908
1909 while ((stopped_cpus & map) != map)
1910 /* spin */ ;
1911
1912 return 1;
1913}
1914
1915
1916/*
1917 * Called by a CPU to restart stopped CPUs.
1918 *
1919 * Usually (but not necessarily) called with 'stopped_cpus' as its arg.
1920 *
1921 * - Signals all CPUs in map to restart.
1922 * - Waits for each to restart.
1923 *
1924 * Returns:
1925 * -1: error
1926 * 0: NA
1927 * 1: ok
1928 */
1929int
da23a592 1930restart_cpus(cpumask_t map)
46d4e165
JG
1931{
1932 /* signal other cpus to restart */
1933 started_cpus = map & smp_active_mask;
1934
1935 while ((stopped_cpus & map) != 0) /* wait for each to clear its bit */
1936 /* spin */ ;
1937
1938 return 1;
1939}
1940
1941/*
1942 * This is called once the mpboot code has gotten us properly relocated
1943 * and the MMU turned on, etc. ap_init() is actually the idle thread,
1944 * and when it returns the scheduler will call the real cpu_idle() main
1945 * loop for the idlethread. Interrupts are disabled on entry and should
1946 * remain disabled at return.
1947 */
1948void
1949ap_init(void)
1950{
1951 u_int apic_id;
1952
1953 /*
1954 * Adjust smp_startup_mask to signal the BSP that we have started
1955 * up successfully. Note that we do not yet hold the BGL. The BSP
1956 * is waiting for our signal.
1957 *
1958 * We can't set our bit in smp_active_mask yet because we are holding
1959 * interrupts physically disabled and remote cpus could deadlock
1960 * trying to send us an IPI.
1961 */
da23a592 1962 smp_startup_mask |= CPUMASK(mycpu->gd_cpuid);
46d4e165
JG
1963 cpu_mfence();
1964
1965 /*
c6b1591c
SZ
1966 * Interlock for LAPIC initialization. Wait until mp_finish_lapic is
1967 * non-zero, then get the MP lock.
46d4e165
JG
1968 *
1969 * Note: We are in a critical section.
1970 *
46d4e165
JG
1971 * Note: we are the idle thread, we can only spin.
1972 *
1973 * Note: The load fence is memory volatile and prevents the compiler
c6b1591c 1974 * from improperly caching mp_finish_lapic, and the cpu from improperly
46d4e165
JG
1975 * caching it.
1976 */
c6b1591c 1977 while (mp_finish_lapic == 0)
b5d16701
MD
1978 cpu_lfence();
1979 while (try_mplock() == 0)
1980 ;
46d4e165
JG
1981
1982 if (cpu_feature & CPUID_TSC) {
b5d16701
MD
1983 /*
1984 * The BSP is constantly updating tsc0_offset, figure out
1985 * the relative difference to synchronize ktrdump.
1986 */
1987 tsc_offsets[mycpu->gd_cpuid] = rdtsc() - tsc0_offset;
46d4e165
JG
1988 }
1989
1990 /* BSP may have changed PTD while we're waiting for the lock */
1991 cpu_invltlb();
1992
46d4e165 1993 /* Build our map of 'other' CPUs. */
da23a592 1994 mycpu->gd_other_cpus = smp_startup_mask & ~CPUMASK(mycpu->gd_cpuid);
46d4e165 1995
46d4e165 1996 /* A quick check from sanity claus */
d53907dd 1997 apic_id = (apic_id_to_logical[(lapic->id & 0xff000000) >> 24]);
46d4e165
JG
1998 if (mycpu->gd_cpuid != apic_id) {
1999 kprintf("SMP: cpuid = %d\n", mycpu->gd_cpuid);
d53907dd
MD
2000 kprintf("SMP: apic_id = %d lapicid %d\n",
2001 apic_id, (lapic->id & 0xff000000) >> 24);
46d4e165
JG
2002#if JGXXX
2003 kprintf("PTD[MPPTDI] = %p\n", (void *)PTD[MPPTDI]);
2004#endif
2005 panic("cpuid mismatch! boom!!");
2006 }
2007
2008 /* Initialize AP's local APIC for irq's */
5ddeabb9 2009 lapic_init(FALSE);
46d4e165 2010
c6b1591c
SZ
2011 /* LAPIC initialization is done */
2012 smp_lapic_mask |= CPUMASK(mycpu->gd_cpuid);
2013 cpu_mfence();
2014
2015 /* Let BSP move onto the next initialization stage */
2016 rel_mplock();
2017
2018 /*
2019 * Interlock for finalization. Wait until mp_finish is non-zero,
2020 * then get the MP lock.
2021 *
2022 * Note: We are in a critical section.
2023 *
2024 * Note: we are the idle thread, we can only spin.
2025 *
2026 * Note: The load fence is memory volatile and prevents the compiler
2027 * from improperly caching mp_finish, and the cpu from improperly
2028 * caching it.
2029 */
2030 while (mp_finish == 0)
2031 cpu_lfence();
2032 while (try_mplock() == 0)
2033 ;
2034
2035 /* BSP may have changed PTD while we're waiting for the lock */
2036 cpu_invltlb();
2037
46d4e165
JG
2038 /* Set memory range attributes for this CPU to match the BSP */
2039 mem_range_AP_init();
2040
2041 /*
2042 * Once we go active we must process any IPIQ messages that may
2043 * have been queued, because no actual IPI will occur until we
2044 * set our bit in the smp_active_mask. If we don't the IPI
2045 * message interlock could be left set which would also prevent
2046 * further IPIs.
2047 *
2048 * The idle loop doesn't expect the BGL to be held and while
2049 * lwkt_switch() normally cleans things up this is a special case
2050 * because we returning almost directly into the idle loop.
2051 *
2052 * The idle thread is never placed on the runq, make sure
2053 * nothing we've done put it there.
2054 */
b5d16701 2055 KKASSERT(get_mplock_count(curthread) == 1);
da23a592 2056 smp_active_mask |= CPUMASK(mycpu->gd_cpuid);
46d4e165
JG
2057
2058 /*
2059 * Enable interrupts here. idle_restore will also do it, but
2060 * doing it here lets us clean up any strays that got posted to
2061 * the CPU during the AP boot while we are still in a critical
2062 * section.
2063 */
2064 __asm __volatile("sti; pause; pause"::);
9611ff20 2065 bzero(mdcpu->gd_ipending, sizeof(mdcpu->gd_ipending));
46d4e165
JG
2066
2067 initclocks_pcpu(); /* clock interrupts (via IPIs) */
2068 lwkt_process_ipiq();
2069
2070 /*
2071 * Releasing the mp lock lets the BSP finish up the SMP init
2072 */
2073 rel_mplock();
2074 KKASSERT((curthread->td_flags & TDF_RUNQ) == 0);
2075}
2076
2077/*
2078 * Get SMP fully working before we start initializing devices.
2079 */
2080static
2081void
2082ap_finish(void)
2083{
2084 mp_finish = 1;
2085 if (bootverbose)
2086 kprintf("Finish MP startup\n");
46d4e165 2087 rel_mplock();
c6b1591c 2088 while (smp_active_mask != smp_startup_mask)
46d4e165 2089 cpu_lfence();
46d4e165
JG
2090 while (try_mplock() == 0)
2091 ;
da23a592
MD
2092 if (bootverbose) {
2093 kprintf("Active CPU Mask: %016jx\n",
2094 (uintmax_t)smp_active_mask);
2095 }
46d4e165
JG
2096}
2097
2098SYSINIT(finishsmp, SI_BOOT2_FINISH_SMP, SI_ORDER_FIRST, ap_finish, NULL)
2099
2100void
2101cpu_send_ipiq(int dcpu)
2102{
da23a592 2103 if (CPUMASK(dcpu) & smp_active_mask)
46d4e165
JG
2104 single_apic_ipi(dcpu, XIPIQ_OFFSET, APIC_DELMODE_FIXED);
2105}
2106
2107#if 0 /* single_apic_ipi_passive() not working yet */
2108/*
2109 * Returns 0 on failure, 1 on success
2110 */
2111int
2112cpu_send_ipiq_passive(int dcpu)
2113{
2114 int r = 0;
da23a592 2115 if (CPUMASK(dcpu) & smp_active_mask) {
46d4e165
JG
2116 r = single_apic_ipi_passive(dcpu, XIPIQ_OFFSET,
2117 APIC_DELMODE_FIXED);
2118 }
2119 return(r);
2120}
2121#endif
2122
e0fd357f
SZ
2123static int
2124mptable_bus_info_callback(void *xarg, const void *pos, int type)
2125{
2126 struct mptable_bus_info *bus_info = xarg;
2127 const struct BUSENTRY *ent;
2128 struct mptable_bus *bus;
2129
2130 if (type != 1)
2131 return 0;
c715f062 2132
e0fd357f 2133 ent = pos;
c715f062
SZ
2134 TAILQ_FOREACH(bus, &bus_info->mbi_list, mb_link) {
2135 if (bus->mb_id == ent->bus_id) {
2136 kprintf("mptable_bus_info_alloc: duplicated bus id "
2137 "(%d)\n", bus->mb_id);
2138 return EINVAL;
2139 }
2140 }
e0fd357f
SZ
2141
2142 bus = NULL;
2143 if (strncmp(ent->bus_type, "PCI", 3) == 0) {
2144 bus = kmalloc(sizeof(*bus), M_TEMP, M_WAITOK | M_ZERO);
2145 bus->mb_type = MPTABLE_BUS_PCI;
2146 } else if (strncmp(ent->bus_type, "ISA", 3) == 0) {
2147 bus = kmalloc(sizeof(*bus), M_TEMP, M_WAITOK | M_ZERO);
2148 bus->mb_type = MPTABLE_BUS_ISA;
2149 }
2150
2151 if (bus != NULL) {
c715f062
SZ
2152 bus->mb_id = ent->bus_id;
2153 TAILQ_INSERT_TAIL(&bus_info->mbi_list, bus, mb_link);
e0fd357f
SZ
2154 }
2155 return 0;
2156}
2157
2158static void
2159mptable_bus_info_alloc(const mpcth_t cth, struct mptable_bus_info *bus_info)
2160{
2161 int error;
2162
2163 bzero(bus_info, sizeof(*bus_info));
2164 TAILQ_INIT(&bus_info->mbi_list);
2165
2166 error = mptable_iterate_entries(cth, mptable_bus_info_callback, bus_info);
2167 if (error)
2168 mptable_bus_info_free(bus_info);
2169}
2170
2171static void
2172mptable_bus_info_free(struct mptable_bus_info *bus_info)
2173{
2174 struct mptable_bus *bus;
2175
2176 while ((bus = TAILQ_FIRST(&bus_info->mbi_list)) != NULL) {
2177 TAILQ_REMOVE(&bus_info->mbi_list, bus, mb_link);
2178 kfree(bus, M_TEMP);
2179 }
2180}
2181
16794646
MN
2182struct mptable_lapic_cbarg1 {
2183 int cpu_count;
7f310ea1
MN
2184 int ht_fixup;
2185 u_int ht_apicid_mask;
16794646
MN
2186};
2187
2188static int
2189mptable_lapic_pass1_callback(void *xarg, const void *pos, int type)
2190{
2191 const struct PROCENTRY *ent;
2192 struct mptable_lapic_cbarg1 *arg = xarg;
2193
2194 if (type != 0)
2195 return 0;
2196 ent = pos;
2197
2198 if ((ent->cpu_flags & PROCENTRY_FLAG_EN) == 0)
2199 return 0;
2200
2201 arg->cpu_count++;
7f310ea1
MN
2202 if (ent->apic_id < 32) {
2203 arg->ht_apicid_mask |= 1 << ent->apic_id;
2204 } else if (arg->ht_fixup) {
2205 kprintf("MPTABLE: lapic id > 32, disable HTT fixup\n");
2206 arg->ht_fixup = 0;
2207 }
16794646
MN
2208 return 0;
2209}
2210
2211struct mptable_lapic_cbarg2 {
2212 int cpu;
7f310ea1 2213 int logical_cpus;
16794646
MN
2214 int found_bsp;
2215};
2216
2217static int
2218mptable_lapic_pass2_callback(void *xarg, const void *pos, int type)
2219{
2220 const struct PROCENTRY *ent;
2221 struct mptable_lapic_cbarg2 *arg = xarg;
2222
2223 if (type != 0)
2224 return 0;
2225 ent = pos;
2226
2227 if (ent->cpu_flags & PROCENTRY_FLAG_BP) {
2228 KKASSERT(!arg->found_bsp);
2229 arg->found_bsp = 1;
2230 }
2231
2232 if (processor_entry(ent, arg->cpu))
2233 arg->cpu++;
2234
7f310ea1 2235 if (arg->logical_cpus) {
16794646
MN
2236 struct PROCENTRY proc;
2237 int i;
2238
2239 /*
2240 * Create fake mptable processor entries
2241 * and feed them to processor_entry() to
2242 * enumerate the logical CPUs.
2243 */
2244 bzero(&proc, sizeof(proc));
2245 proc.type = 0;
2246 proc.cpu_flags = PROCENTRY_FLAG_EN;
2247 proc.apic_id = ent->apic_id;
2248
7f310ea1 2249 for (i = 1; i < arg->logical_cpus; i++) {
16794646
MN
2250 proc.apic_id++;
2251 processor_entry(&proc, arg->cpu);
16794646
MN
2252 arg->cpu++;
2253 }
2254 }
2255 return 0;
2256}
2257
afcb64b2
MN
2258static void
2259mptable_lapic_default(void)
2260{
2261 int ap_apicid, bsp_apicid;
2262
2263 mp_naps = 1; /* exclude BSP */
2264
2265 /* Map local apic before the id field is accessed */
b44f1d28 2266 lapic_map(DEFAULT_APIC_BASE);
afcb64b2
MN
2267
2268 bsp_apicid = APIC_ID(lapic->id);
2269 ap_apicid = (bsp_apicid == 0) ? 1 : 0;
2270
2271 /* BSP */
2272 mp_set_cpuids(0, bsp_apicid);
2273 /* one and only AP */
2274 mp_set_cpuids(1, ap_apicid);
2275}
2276
16794646
MN
2277/*
2278 * Configure:
16794646 2279 * mp_naps
7159723d 2280 * ID_TO_CPU(N), APIC ID to logical CPU table
16794646
MN
2281 * CPU_TO_ID(N), logical CPU to APIC ID table
2282 */
2283static void
91903a05 2284mptable_lapic_enumerate(struct lapic_enumerator *e)
16794646 2285{
91903a05 2286 struct mptable_pos mpt;
afcb64b2
MN
2287 struct mptable_lapic_cbarg1 arg1;
2288 struct mptable_lapic_cbarg2 arg2;
2289 mpcth_t cth;
7f310ea1 2290 int error, logical_cpus = 0;
f2fc5f9b 2291 vm_offset_t lapic_addr;
16794646 2292
c455a23f 2293 if (mptable_use_default) {
afcb64b2
MN
2294 mptable_lapic_default();
2295 return;
16794646 2296 }
afcb64b2 2297
c455a23f
SZ
2298 error = mptable_map(&mpt);
2299 if (error)
2300 panic("mptable_lapic_enumerate mptable_map failed\n");
2301 KKASSERT(!MPTABLE_POS_USE_DEFAULT(&mpt));
2302
91903a05 2303 cth = mpt.mp_cth;
afcb64b2
MN
2304
2305 /* Save local apic address */
2306 lapic_addr = (vm_offset_t)cth->apic_address;
2307 KKASSERT(lapic_addr != 0);
2308
2309 /*
2310 * Find out how many CPUs do we have
2311 */
2312 bzero(&arg1, sizeof(arg1));
7f310ea1
MN
2313 arg1.ht_fixup = 1; /* Apply ht fixup by default */
2314
afcb64b2
MN
2315 error = mptable_iterate_entries(cth,
2316 mptable_lapic_pass1_callback, &arg1);
2317 if (error)
2318 panic("mptable_iterate_entries(lapic_pass1) failed\n");
afcb64b2 2319 KKASSERT(arg1.cpu_count != 0);
afcb64b2
MN
2320
2321 /* See if we need to fixup HT logical CPUs. */
7f310ea1
MN
2322 if (arg1.ht_fixup) {
2323 logical_cpus = mptable_hyperthread_fixup(arg1.ht_apicid_mask,
2324 arg1.cpu_count);
2325 if (logical_cpus != 0)
2326 arg1.cpu_count *= logical_cpus;
2327 }
2328 mp_naps = arg1.cpu_count;
afcb64b2 2329
7f310ea1 2330 /* Qualify the numbers again, after possible HT fixup */
afcb64b2
MN
2331 if (mp_naps > MAXCPU) {
2332 kprintf("Warning: only using %d of %d available CPUs!\n",
2333 MAXCPU, mp_naps);
8e5ea5f7 2334 DELAY(1000000);
afcb64b2
MN
2335 mp_naps = MAXCPU;
2336 }
16794646 2337
afcb64b2 2338 --mp_naps; /* subtract the BSP */
16794646 2339
afcb64b2
MN
2340 /*
2341 * Link logical CPU id to local apic id
2342 */
2343 bzero(&arg2, sizeof(arg2));
2344 arg2.cpu = 1;
7f310ea1 2345 arg2.logical_cpus = logical_cpus;
16794646 2346
afcb64b2
MN
2347 error = mptable_iterate_entries(cth,
2348 mptable_lapic_pass2_callback, &arg2);
2349 if (error)
2350 panic("mptable_iterate_entries(lapic_pass2) failed\n");
2351 KKASSERT(arg2.found_bsp);
16794646 2352
afcb64b2 2353 /* Map local apic */
b44f1d28 2354 lapic_map(lapic_addr);
91903a05
MN
2355
2356 mptable_unmap(&mpt);
2357}
2358
fe423084
SZ
2359struct mptable_lapic_probe_cbarg {
2360 int cpu_count;
2361 int found_bsp;
2362};
2363
91903a05 2364static int
fe423084 2365mptable_lapic_probe_callback(void *xarg, const void *pos, int type)
91903a05 2366{
fe423084
SZ
2367 const struct PROCENTRY *ent;
2368 struct mptable_lapic_probe_cbarg *arg = xarg;
91903a05 2369
fe423084
SZ
2370 if (type != 0)
2371 return 0;
2372 ent = pos;
91903a05 2373
fe423084
SZ
2374 if ((ent->cpu_flags & PROCENTRY_FLAG_EN) == 0)
2375 return 0;
2376 arg->cpu_count++;
2377
2378 if (ent->cpu_flags & PROCENTRY_FLAG_BP) {
2379 if (arg->found_bsp) {
2380 kprintf("more than one BSP in base MP table\n");
2381 return EINVAL;
2382 }
2383 arg->found_bsp = 1;
2384 }
91903a05 2385 return 0;
16794646 2386}
f2fc5f9b 2387
fe423084
SZ
2388static int
2389mptable_lapic_probe(struct lapic_enumerator *e)
2390{
2391 struct mptable_pos mpt;
2392 struct mptable_lapic_probe_cbarg arg;
2393 mpcth_t cth;
2394 int error;
2395
2396 if (mptable_fps_phyaddr == 0)
2397 return ENXIO;
2398
c455a23f
SZ
2399 if (mptable_use_default)
2400 return 0;
2401
fe423084
SZ
2402 error = mptable_map(&mpt);
2403 if (error)
2404 return error;
c455a23f 2405 KKASSERT(!MPTABLE_POS_USE_DEFAULT(&mpt));
fe423084
SZ
2406
2407 error = EINVAL;
fe423084 2408 cth = mpt.mp_cth;
c455a23f 2409
fe423084
SZ
2410 if (cth->apic_address == 0)
2411 goto done;
2412
2413 bzero(&arg, sizeof(arg));
2414 error = mptable_iterate_entries(cth,
2415 mptable_lapic_probe_callback, &arg);
2416 if (!error) {
2417 if (arg.cpu_count == 0) {
2418 kprintf("MP table contains no processor entries\n");
2419 error = EINVAL;
2420 } else if (!arg.found_bsp) {
2421 kprintf("MP table does not contains BSP entry\n");
2422 error = EINVAL;
2423 }
91903a05 2424 }
fe423084
SZ
2425done:
2426 mptable_unmap(&mpt);
2427 return error;
2428}
2429
2430static struct lapic_enumerator mptable_lapic_enumerator = {
2431 .lapic_prio = LAPIC_ENUM_PRIO_MPTABLE,
2432 .lapic_probe = mptable_lapic_probe,
2433 .lapic_enumerate = mptable_lapic_enumerate
91903a05
MN
2434};
2435
e6a7270f 2436static void
becce73f 2437mptable_lapic_enum_register(void)
e6a7270f 2438{
fe423084 2439 lapic_enumerator_register(&mptable_lapic_enumerator);
e6a7270f 2440}
becce73f
SZ
2441SYSINIT(mptable_lapic, SI_BOOT2_PRESMP, SI_ORDER_ANY,
2442 mptable_lapic_enum_register, 0);
e0fd357f 2443
6b881b58
SZ
2444static int
2445mptable_ioapic_list_callback(void *xarg, const void *pos, int type)
2446{
2447 const struct IOAPICENTRY *ent;
2448 struct mptable_ioapic *nioapic, *ioapic;
2449
2450 if (type != 2)
2451 return 0;
2452 ent = pos;
2453
2454 if ((ent->apic_flags & IOAPICENTRY_FLAG_EN) == 0)
2455 return 0;
2456
2457 if (ent->apic_address == 0) {
2458 kprintf("mptable_ioapic_create_list: zero IOAPIC addr\n");
2459 return EINVAL;
2460 }
2461
2462 TAILQ_FOREACH(ioapic, &mptable_ioapic_list, mio_link) {
2463 if (ioapic->mio_apic_id == ent->apic_id) {
2464 kprintf("mptable_ioapic_create_list: duplicated "
2465 "apic id %d\n", ioapic->mio_apic_id);
2466 return EINVAL;
2467 }
2468 if (ioapic->mio_addr == ent->apic_address) {
2469 kprintf("mptable_ioapic_create_list: overlapped "
2470 "IOAPIC addr 0x%08x", ioapic->mio_addr);
2471 return EINVAL;
2472 }
2473 }
2474
2475 nioapic = kmalloc(sizeof(*nioapic), M_DEVBUF, M_WAITOK | M_ZERO);
2476 nioapic->mio_apic_id = ent->apic_id;
2477 nioapic->mio_addr = ent->apic_address;
2478
2479 /*
2480 * Create IOAPIC list in ascending order of APIC ID
2481 */
2482 TAILQ_FOREACH_REVERSE(ioapic, &mptable_ioapic_list,
2483 mptable_ioapic_list, mio_link) {
2484 if (nioapic->mio_apic_id > ioapic->mio_apic_id) {
2485 TAILQ_INSERT_AFTER(&mptable_ioapic_list,
2486 ioapic, nioapic, mio_link);
2487 break;
2488 }
2489 }
2490 if (ioapic == NULL)
2491 TAILQ_INSERT_HEAD(&mptable_ioapic_list, nioapic, mio_link);
2492
2493 return 0;
2494}
2495
2496static void
2497mptable_ioapic_create_list(void)
2498{
2499 struct mptable_ioapic *ioapic;
2500 struct mptable_pos mpt;
2501 int idx, error;
2502
2503 if (mptable_fps_phyaddr == 0)
2504 return;
2505
2506 if (mptable_use_default) {
2507 ioapic = kmalloc(sizeof(*ioapic), M_DEVBUF, M_WAITOK | M_ZERO);
2508 ioapic->mio_idx = 0;
2509 ioapic->mio_apic_id = 0; /* NOTE: any value is ok here */
2510 ioapic->mio_addr = 0xfec00000; /* XXX magic number */
2511
2512 TAILQ_INSERT_HEAD(&mptable_ioapic_list, ioapic, mio_link);
2513 return;
2514 }
2515
2516 error = mptable_map(&mpt);
2517 if (error)
2518 panic("mptable_ioapic_create_list: mptable_map failed\n");
2519 KKASSERT(!MPTABLE_POS_USE_DEFAULT(&mpt));
2520
2521 error = mptable_iterate_entries(mpt.mp_cth,
2522 mptable_ioapic_list_callback, NULL);
2523 if (error) {
2524 while ((ioapic = TAILQ_FIRST(&mptable_ioapic_list)) != NULL) {
2525 TAILQ_REMOVE(&mptable_ioapic_list, ioapic, mio_link);
2526 kfree(ioapic, M_DEVBUF);
2527 }
2528 goto done;
2529 }
2530
2531 /*
2532 * Assign index number for each IOAPIC
2533 */
2534 idx = 0;
2535 TAILQ_FOREACH(ioapic, &mptable_ioapic_list, mio_link) {
2536 ioapic->mio_idx = idx;
2537 ++idx;
2538 }
2539done:
2540 mptable_unmap(&mpt);
2541}
2542SYSINIT(mptable_ioapic_list, SI_BOOT2_PRESMP, SI_ORDER_SECOND,
2543 mptable_ioapic_create_list, 0);
2544
e0fd357f
SZ
2545static int
2546mptable_pci_int_callback(void *xarg, const void *pos, int type)
2547{
2548 const struct mptable_bus_info *bus_info = xarg;
6b881b58 2549 const struct mptable_ioapic *ioapic;
e0fd357f
SZ
2550 const struct mptable_bus *bus;
2551 struct mptable_pci_int *pci_int;
2552 const struct INTENTRY *ent;
2553 int pci_pin, pci_dev;
2554
2555 if (type != 3)
2556 return 0;
2557 ent = pos;
2558
2559 if (ent->int_type != 0)
2560 return 0;
2561
2562 TAILQ_FOREACH(bus, &bus_info->mbi_list, mb_link) {
2563 if (bus->mb_type == MPTABLE_BUS_PCI &&
2564 bus->mb_id == ent->src_bus_id)
2565 break;
2566 }
2567 if (bus == NULL)
2568 return 0;
2569
6b881b58
SZ
2570 TAILQ_FOREACH(ioapic, &mptable_ioapic_list, mio_link) {
2571 if (ioapic->mio_apic_id == ent->dst_apic_id)
2572 break;
2573 }
2574 if (ioapic == NULL) {
2575 kprintf("MPTABLE: warning PCI int dst apic id %d "
2576 "does not exist\n", ent->dst_apic_id);
2577 return 0;
2578 }
2579
e0fd357f
SZ
2580 pci_pin = ent->src_bus_irq & 0x3;
2581 pci_dev = (ent->src_bus_irq >> 2) & 0x1f;
2582
2583 TAILQ_FOREACH(pci_int, &mptable_pci_int_list, mpci_link) {
2584 if (pci_int->mpci_bus == ent->src_bus_id &&
2585 pci_int->mpci_dev == pci_dev &&
2586 pci_int->mpci_pin == pci_pin) {
6b881b58 2587 if (pci_int->mpci_ioapic_idx == ioapic->mio_idx &&
e0fd357f
SZ
2588 pci_int->mpci_ioapic_pin == ent->dst_apic_int) {
2589 kprintf("MPTABLE: warning duplicated "
2590 "PCI int entry for "
2591 "bus %d, dev %d, pin %d\n",
2592 pci_int->mpci_bus,
2593 pci_int->mpci_dev,
2594 pci_int->mpci_pin);
2595 return 0;
2596 } else {
2597 kprintf("mptable_pci_int_register: "
2598 "conflict PCI int entry for "
2599 "bus %d, dev %d, pin %d, "
2600 "IOAPIC %d.%d -> %d.%d\n",
2601 pci_int->mpci_bus,
2602 pci_int->mpci_dev,
2603 pci_int->mpci_pin,
6b881b58 2604 pci_int->mpci_ioapic_idx,
e0fd357f 2605 pci_int->mpci_ioapic_pin,
6b881b58 2606 ioapic->mio_idx,
e0fd357f
SZ
2607 ent->dst_apic_int);
2608 return EINVAL;
2609 }
2610 }
2611 }
2612
2619977b 2613 pci_int = kmalloc(sizeof(*pci_int), M_DEVBUF, M_WAITOK | M_ZERO);
e0fd357f
SZ
2614
2615 pci_int->mpci_bus = ent->src_bus_id;
2616 pci_int->mpci_dev = pci_dev;
2617 pci_int->mpci_pin = pci_pin;
6b881b58 2618 pci_int->mpci_ioapic_idx = ioapic->mio_idx;
e0fd357f
SZ
2619 pci_int->mpci_ioapic_pin = ent->dst_apic_int;
2620
2621 TAILQ_INSERT_TAIL(&mptable_pci_int_list, pci_int, mpci_link);
2622
2623 return 0;
2624}
2625
2626static void
2627mptable_pci_int_register(void)
2628{
2629 struct mptable_bus_info bus_info;
2630 const struct mptable_bus *bus;
2631 struct mptable_pci_int *pci_int;
2632 struct mptable_pos mpt;
2633 int error, force_pci0, npcibus;
2634 mpcth_t cth;
2635
2636 if (mptable_fps_phyaddr == 0)
2637 return;
2638
2639 if (mptable_use_default)
2640 return;
2641
6b881b58
SZ
2642 if (TAILQ_EMPTY(&mptable_ioapic_list))
2643 return;
2644
e0fd357f
SZ
2645 error = mptable_map(&mpt);
2646 if (error)
2647 panic("mptable_pci_int_register: mptable_map failed\n");
2648 KKASSERT(!MPTABLE_POS_USE_DEFAULT(&mpt));
2649
2650 cth = mpt.mp_cth;
2651
2652 mptable_bus_info_alloc(cth, &bus_info);
2653 if (TAILQ_EMPTY(&bus_info.mbi_list))
2654 goto done;
2655
8d905764 2656 force_pci0 = 0;
e0fd357f
SZ
2657 npcibus = 0;
2658 TAILQ_FOREACH(bus, &bus_info.mbi_list, mb_link) {
2659 if (bus->mb_type == MPTABLE_BUS_PCI)
2660 ++npcibus;
2661 }
2662 if (npcibus == 0) {
2663 mptable_bus_info_free(&bus_info);
2664 goto done;
2665 } else if (npcibus == 1) {
2666 force_pci0 = 1;
2667 }
2668
2669 error = mptable_iterate_entries(cth,
2670 mptable_pci_int_callback, &bus_info);
2671
2672 mptable_bus_info_free(&bus_info);
2673
2674 if (error) {
2675 while ((pci_int = TAILQ_FIRST(&mptable_pci_int_list)) != NULL) {
2676 TAILQ_REMOVE(&mptable_pci_int_list, pci_int, mpci_link);
2677 kfree(pci_int, M_DEVBUF);
2678 }
2679 goto done;
2680 }
2681
2682 if (force_pci0) {
2683 TAILQ_FOREACH(pci_int, &mptable_pci_int_list, mpci_link)
2684 pci_int->mpci_bus = 0;
2685 }
2686done:
2687 mptable_unmap(&mpt);
2688}
2689SYSINIT(mptable_pci, SI_BOOT2_PRESMP, SI_ORDER_ANY,
2690 mptable_pci_int_register, 0);
7da2706b
SZ
2691
2692struct mptable_ioapic_probe_cbarg {
2693 const struct mptable_bus_info *bus_info;
7da2706b
SZ
2694};
2695
2696static int
2697mptable_ioapic_probe_callback(void *xarg, const void *pos, int type)
2698{
2699 struct mptable_ioapic_probe_cbarg *arg = xarg;
6b881b58
SZ
2700 const struct mptable_ioapic *ioapic;
2701 const struct mptable_bus *bus;
2702 const struct INTENTRY *ent;
7da2706b 2703
6b881b58
SZ
2704 if (type != 3)
2705 return 0;
2706 ent = pos;
7da2706b 2707
6b881b58
SZ
2708 if (ent->int_type != 0)
2709 return 0;
7da2706b 2710
6b881b58
SZ
2711 TAILQ_FOREACH(bus, &arg->bus_info->mbi_list, mb_link) {
2712 if (bus->mb_type == MPTABLE_BUS_ISA &&
2713 bus->mb_id == ent->src_bus_id)
2714 break;
2715 }
2716 if (bus == NULL)
2717 return 0;
7da2706b 2718
6b881b58
SZ
2719 TAILQ_FOREACH(ioapic, &mptable_ioapic_list, mio_link) {
2720 if (ioapic->mio_apic_id == ent->dst_apic_id)
2721 break;
2722 }
2723 if (ioapic == NULL) {
2724 kprintf("MPTABLE: warning ISA int dst apic id %d "
2725 "does not exist\n", ent->dst_apic_id);
2726 return 0;
2727 }
7da2706b 2728
6b881b58
SZ
2729 /* XXX magic number */
2730 if (ent->src_bus_irq >= 16) {
2731 kprintf("mptable_ioapic_probe: invalid ISA irq (%d)\n",
2732 ent->src_bus_irq);
2733 return EINVAL;
7da2706b
SZ
2734 }
2735 return 0;
2736}
2737
2738static int
2739mptable_ioapic_probe(struct ioapic_enumerator *e)
2740{
2741 struct mptable_ioapic_probe_cbarg arg;
2742 struct mptable_bus_info bus_info;
2743 struct mptable_pos mpt;
2744 mpcth_t cth;
2745 int error;
2746
2747 if (mptable_fps_phyaddr == 0)
2748 return ENXIO;
2749
2750 if (mptable_use_default)
2751 return 0;
2752
6b881b58
SZ
2753 if (TAILQ_EMPTY(&mptable_ioapic_list))
2754 return ENXIO;
2755
7da2706b
SZ
2756 error = mptable_map(&mpt);
2757 if (error)
2758 panic("mptable_ioapic_probe: mptable_map failed\n");
2759 KKASSERT(!MPTABLE_POS_USE_DEFAULT(&mpt));
2760
2761 cth = mpt.mp_cth;
2762
2763 mptable_bus_info_alloc(cth, &bus_info);
2764
2765 bzero(&arg, sizeof(arg));
2766 arg.bus_info = &bus_info;
2767
2768 error = mptable_iterate_entries(cth,
2769 mptable_ioapic_probe_callback, &arg);
7da2706b
SZ
2770
2771 mptable_bus_info_free(&bus_info);
2772 mptable_unmap(&mpt);
2773
2774 return error;
2775}
2776
7da2706b
SZ
2777struct mptable_ioapic_int_cbarg {
2778 const struct mptable_bus_info *bus_info;
2779 int ioapic_nint;
2780};
2781
2782static int
2783mptable_ioapic_int_callback(void *xarg, const void *pos, int type)
2784{
2785 struct mptable_ioapic_int_cbarg *arg = xarg;
512fb675 2786 const struct mptable_ioapic *ioapic;
7da2706b
SZ
2787 const struct mptable_bus *bus;
2788 const struct INTENTRY *ent;
7a603b36 2789 int gsi;
7da2706b
SZ
2790
2791 if (type != 3)
2792 return 0;
2793
2794 arg->ioapic_nint++;
2795
2796 ent = pos;
2797 if (ent->int_type != 0)
2798 return 0;
2799
2800 TAILQ_FOREACH(bus, &arg->bus_info->mbi_list, mb_link) {
2801 if (bus->mb_type == MPTABLE_BUS_ISA &&
2802 bus->mb_id == ent->src_bus_id)
2803 break;
2804 }
2805 if (bus == NULL)
2806 return 0;
2807
512fb675
SZ
2808 TAILQ_FOREACH(ioapic, &mptable_ioapic_list, mio_link) {
2809 if (ioapic->mio_apic_id == ent->dst_apic_id)
2810 break;
2811 }
2812 if (ioapic == NULL) {
2813 kprintf("MPTABLE: warning ISA int dst apic id %d "
2814 "does not exist\n", ent->dst_apic_id);
2815 return 0;
2816 }
2817
7a603b36
SZ
2818 if (ent->dst_apic_int >= ioapic->mio_npin) {
2819 panic("mptable_ioapic_enumerate: invalid I/O APIC "
2820 "pin %d, should be < %d",
2821 ent->dst_apic_int, ioapic->mio_npin);
2822 }
2823 gsi = ioapic->mio_gsi_base + ent->dst_apic_int;
512fb675 2824
7a603b36
SZ
2825 if (ent->src_bus_irq != gsi) {
2826 if (bootverbose) {
2827 kprintf("MPTABLE: INTSRC irq %d -> GSI %d\n",
2828 ent->src_bus_irq, gsi);
7da2706b 2829 }
7a603b36
SZ
2830 ioapic_intsrc(ent->src_bus_irq, gsi,
2831 INTR_TRIGGER_EDGE, INTR_POLARITY_HIGH);
7da2706b
SZ
2832 }
2833 return 0;
2834}
2835
2836static void
2837mptable_ioapic_enumerate(struct ioapic_enumerator *e)
2838{
2839 struct mptable_bus_info bus_info;
0471bb0e 2840 struct mptable_ioapic *ioapic;
7da2706b
SZ
2841 struct mptable_pos mpt;
2842 mpcth_t cth;
2843 int error;
2844
2845 KKASSERT(mptable_fps_phyaddr != 0);
6b881b58 2846 KKASSERT(!TAILQ_EMPTY(&mptable_ioapic_list));
7da2706b 2847
6b881b58 2848 TAILQ_FOREACH(ioapic, &mptable_ioapic_list, mio_link) {
7a603b36
SZ
2849 const struct mptable_ioapic *prev_ioapic;
2850 uint32_t ver;
2851 void *addr;
0471bb0e 2852
7a603b36 2853 addr = ioapic_map(ioapic->mio_addr);
0471bb0e 2854
7a603b36
SZ
2855 ver = ioapic_read(addr, IOAPIC_VER);
2856 ioapic->mio_npin = ((ver & IOART_VER_MAXREDIR)
2857 >> MAXREDIRSHIFT) + 1;
0471bb0e 2858
7a603b36
SZ
2859 prev_ioapic = TAILQ_PREV(ioapic,
2860 mptable_ioapic_list, mio_link);
2861 if (prev_ioapic == NULL) {
2862 ioapic->mio_gsi_base = 0;
2863 } else {
2864 ioapic->mio_gsi_base =
2865 prev_ioapic->mio_gsi_base +
2866 prev_ioapic->mio_npin;
0471bb0e 2867 }
7a603b36
SZ
2868 ioapic_add(addr, ioapic->mio_gsi_base, ioapic->mio_npin);
2869
7da2706b 2870 if (bootverbose) {
6b881b58 2871 kprintf("MPTABLE: IOAPIC addr 0x%08x, "
0471bb0e 2872 "apic id %d, idx %d, gsi base %d, npin %d\n",
6b881b58 2873 ioapic->mio_addr,
0471bb0e
SZ
2874 ioapic->mio_apic_id,
2875 ioapic->mio_idx,
2876 ioapic->mio_gsi_base,
2877 ioapic->mio_npin);
7da2706b 2878 }
6b881b58
SZ
2879 }
2880
2881 if (mptable_use_default) {
2882 if (bootverbose)
2883 kprintf("MPTABLE: INTSRC irq 0 -> GSI 2 (default)\n");
ae80be10 2884 ioapic_intsrc(0, 2, INTR_TRIGGER_EDGE, INTR_POLARITY_HIGH);
7da2706b
SZ
2885 return;
2886 }
2887
2888 error = mptable_map(&mpt);
2889 if (error)
2890 panic("mptable_ioapic_probe: mptable_map failed\n");
2891 KKASSERT(!MPTABLE_POS_USE_DEFAULT(&mpt));
2892
2893 cth = mpt.mp_cth;
2894
7da2706b
SZ
2895 mptable_bus_info_alloc(cth, &bus_info);
2896
2897 if (TAILQ_EMPTY(&bus_info.mbi_list)) {
2898 if (bootverbose)
2899 kprintf("MPTABLE: INTSRC irq 0 -> GSI 2 (no bus)\n");
ae80be10 2900 ioapic_intsrc(0, 2, INTR_TRIGGER_EDGE, INTR_POLARITY_HIGH);
7da2706b
SZ
2901 } else {
2902 struct mptable_ioapic_int_cbarg arg;
2903
2904 bzero(&arg, sizeof(arg));
2905 arg.bus_info = &bus_info;
2906
2907 error = mptable_iterate_entries(cth,
2908 mptable_ioapic_int_callback, &arg);
2909 if (error)
2910 panic("mptable_ioapic_int failed\n");
2911
2912 if (arg.ioapic_nint == 0) {
2913 if (bootverbose) {
2914 kprintf("MPTABLE: INTSRC irq 0 -> GSI 2 "
2915 "(no int)\n");
2916 }
ae80be10
SZ
2917 ioapic_intsrc(0, 2, INTR_TRIGGER_EDGE,
2918 INTR_POLARITY_HIGH);
7da2706b
SZ
2919 }
2920 }
2921
2922 mptable_bus_info_free(&bus_info);
2923
2924 mptable_unmap(&mpt);
2925}
2926
2927static struct ioapic_enumerator mptable_ioapic_enumerator = {
2928 .ioapic_prio = IOAPIC_ENUM_PRIO_MPTABLE,
2929 .ioapic_probe = mptable_ioapic_probe,
2930 .ioapic_enumerate = mptable_ioapic_enumerate
2931};
2932
2933static void
2934mptable_ioapic_enum_register(void)
2935{
2936 ioapic_enumerator_register(&mptable_ioapic_enumerator);
2937}
2938SYSINIT(mptable_ioapic, SI_BOOT2_PRESMP, SI_ORDER_ANY,
2939 mptable_ioapic_enum_register, 0);
e90e7ac4
SZ
2940
2941void
2942mptable_pci_int_dump(void)
2943{
2944 const struct mptable_pci_int *pci_int;
2945
2946 TAILQ_FOREACH(pci_int, &mptable_pci_int_list, mpci_link) {
eab22b0b 2947 kprintf("MPTABLE: %d:%d INT%c -> IOAPIC %d.%d\n",
e90e7ac4
SZ
2948 pci_int->mpci_bus,
2949 pci_int->mpci_dev,
eab22b0b 2950 pci_int->mpci_pin + 'A',
e90e7ac4
SZ
2951 pci_int->mpci_ioapic_idx,
2952 pci_int->mpci_ioapic_pin);
2953 }
2954}
2955
2956int
2957mptable_pci_int_route(int bus, int dev, int pin, int intline)
2958{
2959 const struct mptable_pci_int *pci_int;
2960 int irq = -1;
2961
2962 KKASSERT(pin >= 1);
2963 --pin; /* zero based */
2964
2965 TAILQ_FOREACH(pci_int, &mptable_pci_int_list, mpci_link) {
2966 if (pci_int->mpci_bus == bus &&
2967 pci_int->mpci_dev == dev &&
2968 pci_int->mpci_pin == pin)
2969 break;
2970 }
2971 if (pci_int != NULL) {
2972 int gsi;
2973
2974 gsi = ioapic_gsi(pci_int->mpci_ioapic_idx,
2975 pci_int->mpci_ioapic_pin);
2976 if (gsi >= 0) {
2977 irq = ioapic_abi_find_gsi(gsi,
2978 INTR_TRIGGER_LEVEL, INTR_POLARITY_LOW);
2979 }
2980 }
2981
2982 if (irq < 0) {
40fd5939
SZ
2983 if (bootverbose) {
2984 kprintf("MPTABLE: fixed interrupt routing "
eab22b0b 2985 "for %d:%d INT%c\n", bus, dev, pin + 'A');
40fd5939 2986 }
e90e7ac4
SZ
2987
2988 irq = ioapic_abi_find_irq(intline,
2989 INTR_TRIGGER_LEVEL, INTR_POLARITY_LOW);
2990 }
eab22b0b
SZ
2991
2992 if (irq >= 0 && bootverbose) {
2993 kprintf("MPTABLE: %d:%d INT%c routed to irq %d\n",
2994 bus, dev, pin + 'A', irq);
2995 }
e90e7ac4
SZ
2996 return irq;
2997}