Remove upc_{control,register} syscalls and everything that has to do with it.
[dragonfly.git] / sys / platform / pc64 / x86_64 / machdep.c
CommitLineData
c8fe38ae
MD
1/*-
2 * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
3 * Copyright (c) 1992 Terrence R. Lambert.
4 * Copyright (c) 2003 Peter Wemm.
5 * Copyright (c) 2008 The DragonFly Project.
6 * All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * William Jolitz.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 *
39 * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91
40 * $FreeBSD: src/sys/i386/i386/machdep.c,v 1.385.2.30 2003/05/31 08:48:05 alc Exp $
c8fe38ae
MD
41 */
42
c8fe38ae
MD
43//#include "use_npx.h"
44#include "use_isa.h"
c8fe38ae
MD
45#include "opt_compat.h"
46#include "opt_cpu.h"
47#include "opt_ddb.h"
48#include "opt_directio.h"
49#include "opt_inet.h"
50#include "opt_ipx.h"
51#include "opt_msgbuf.h"
52#include "opt_swap.h"
53
54#include <sys/param.h>
55#include <sys/systm.h>
56#include <sys/sysproto.h>
57#include <sys/signalvar.h>
58#include <sys/kernel.h>
59#include <sys/linker.h>
60#include <sys/malloc.h>
61#include <sys/proc.h>
895c1f85 62#include <sys/priv.h>
c8fe38ae
MD
63#include <sys/buf.h>
64#include <sys/reboot.h>
65#include <sys/mbuf.h>
66#include <sys/msgbuf.h>
67#include <sys/sysent.h>
68#include <sys/sysctl.h>
69#include <sys/vmmeter.h>
70#include <sys/bus.h>
c8fe38ae
MD
71#include <sys/usched.h>
72#include <sys/reg.h>
73
74#include <vm/vm.h>
75#include <vm/vm_param.h>
76#include <sys/lock.h>
77#include <vm/vm_kern.h>
78#include <vm/vm_object.h>
79#include <vm/vm_page.h>
80#include <vm/vm_map.h>
81#include <vm/vm_pager.h>
82#include <vm/vm_extern.h>
83
84#include <sys/thread2.h>
684a93c4 85#include <sys/mplock2.h>
320c681e 86#include <sys/mutex2.h>
c8fe38ae
MD
87
88#include <sys/user.h>
89#include <sys/exec.h>
90#include <sys/cons.h>
91
92#include <ddb/ddb.h>
93
94#include <machine/cpu.h>
95#include <machine/clock.h>
96#include <machine/specialreg.h>
97#if JG
98#include <machine/bootinfo.h>
99#endif
c8fe38ae
MD
100#include <machine/md_var.h>
101#include <machine/metadata.h>
102#include <machine/pc/bios.h>
103#include <machine/pcb_ext.h> /* pcb.h included via sys/user.h */
104#include <machine/globaldata.h> /* CPU_prvspace */
105#include <machine/smp.h>
106#ifdef PERFMON
107#include <machine/perfmon.h>
108#endif
109#include <machine/cputypes.h>
57a9c56b 110#include <machine/intr_machdep.h>
c8fe38ae
MD
111
112#ifdef OLD_BUS_ARCH
46d4e165 113#include <bus/isa/isa_device.h>
c8fe38ae 114#endif
57a9c56b 115#include <machine_base/isa/isa_intr.h>
c8fe38ae
MD
116#include <bus/isa/rtc.h>
117#include <sys/random.h>
118#include <sys/ptrace.h>
119#include <machine/sigframe.h>
120
faaf4131 121#include <sys/machintr.h>
9284cddf 122#include <machine_base/icu/icu_abi.h>
7265a4fe 123#include <machine_base/icu/elcr_var.h>
2e0ed166 124#include <machine_base/apic/lapic.h>
ed4d621d 125#include <machine_base/apic/ioapic.h>
a3dd9120 126#include <machine_base/apic/ioapic_abi.h>
8cc9a8d1 127#include <machine/mptable.h>
faaf4131 128
c8fe38ae
MD
129#define PHYSMAP_ENTRIES 10
130
c8fe38ae
MD
131extern u_int64_t hammer_time(u_int64_t, u_int64_t);
132
133extern void printcpuinfo(void); /* XXX header file */
134extern void identify_cpu(void);
135#if JG
136extern void finishidentcpu(void);
137#endif
138extern void panicifcpuunsupported(void);
c8fe38ae
MD
139
140static void cpu_startup(void *);
1ebcbb29
SZ
141static void pic_finish(void *);
142static void cpu_finish(void *);
143
c8fe38ae
MD
144#ifndef CPU_DISABLE_SSE
145static void set_fpregs_xmm(struct save87 *, struct savexmm *);
146static void fill_fpregs_xmm(struct savexmm *, struct save87 *);
147#endif /* CPU_DISABLE_SSE */
148#ifdef DIRECTIO
149extern void ffs_rawread_setup(void);
150#endif /* DIRECTIO */
151static void init_locks(void);
152
7c006a9e 153SYSINIT(cpu, SI_BOOT2_START_CPU, SI_ORDER_FIRST, cpu_startup, NULL)
1ebcbb29
SZ
154SYSINIT(pic_finish, SI_BOOT2_FINISH_PIC, SI_ORDER_FIRST, pic_finish, NULL)
155SYSINIT(cpu_finish, SI_BOOT2_FINISH_CPU, SI_ORDER_FIRST, cpu_finish, NULL)
c8fe38ae
MD
156
157#ifdef DDB
158extern vm_offset_t ksym_start, ksym_end;
159#endif
160
da23a592 161struct privatespace CPU_prvspace[MAXCPU] __aligned(4096); /* XXX */
48ffc236 162
c8fe38ae
MD
163int _udatasel, _ucodesel, _ucode32sel;
164u_long atdevbase;
c8fe38ae 165int64_t tsc_offsets[MAXCPU];
c8fe38ae
MD
166
167#if defined(SWTCH_OPTIM_STATS)
168extern int swtch_optim_stats;
169SYSCTL_INT(_debug, OID_AUTO, swtch_optim_stats,
170 CTLFLAG_RD, &swtch_optim_stats, 0, "");
171SYSCTL_INT(_debug, OID_AUTO, tlb_flush_count,
172 CTLFLAG_RD, &tlb_flush_count, 0, "");
173#endif
174
39d69dae 175long physmem = 0;
c8fe38ae 176
927c4c1f
MN
177u_long ebda_addr = 0;
178
85bcaa51
SZ
179int imcr_present = 0;
180
637df2f6
SZ
181int naps = 0; /* # of Applications processors */
182
8936cd9b 183u_int base_memory;
320c681e 184struct mtx dt_lock; /* lock for GDT and LDT */
8936cd9b 185
c8fe38ae
MD
186static int
187sysctl_hw_physmem(SYSCTL_HANDLER_ARGS)
188{
39d69dae
AH
189 u_long pmem = ctob(physmem);
190
191 int error = sysctl_handle_long(oidp, &pmem, 0, req);
c8fe38ae
MD
192 return (error);
193}
194
39d69dae 195SYSCTL_PROC(_hw, HW_PHYSMEM, physmem, CTLTYPE_ULONG|CTLFLAG_RD,
9b9532a0 196 0, 0, sysctl_hw_physmem, "LU", "Total system memory in bytes (number of pages * page size)");
c8fe38ae
MD
197
198static int
199sysctl_hw_usermem(SYSCTL_HANDLER_ARGS)
200{
201 int error = sysctl_handle_int(oidp, 0,
202 ctob(physmem - vmstats.v_wire_count), req);
203 return (error);
204}
205
206SYSCTL_PROC(_hw, HW_USERMEM, usermem, CTLTYPE_INT|CTLFLAG_RD,
207 0, 0, sysctl_hw_usermem, "IU", "");
208
209static int
210sysctl_hw_availpages(SYSCTL_HANDLER_ARGS)
211{
c8fe38ae 212 int error = sysctl_handle_int(oidp, 0,
b2b3ffcd 213 x86_64_btop(avail_end - avail_start), req);
c8fe38ae 214 return (error);
c8fe38ae
MD
215}
216
217SYSCTL_PROC(_hw, OID_AUTO, availpages, CTLTYPE_INT|CTLFLAG_RD,
218 0, 0, sysctl_hw_availpages, "I", "");
219
1bda0d3d
MD
220vm_paddr_t Maxmem;
221vm_paddr_t Realmem;
c8fe38ae
MD
222
223/*
224 * The number of PHYSMAP entries must be one less than the number of
225 * PHYSSEG entries because the PHYSMAP entry that spans the largest
226 * physical address that is accessible by ISA DMA is split into two
227 * PHYSSEG entries.
228 */
229#define PHYSMAP_SIZE (2 * (VM_PHYSSEG_MAX - 1))
230
231vm_paddr_t phys_avail[PHYSMAP_SIZE + 2];
232vm_paddr_t dump_avail[PHYSMAP_SIZE + 2];
233
234/* must be 2 less so 0 0 can signal end of chunks */
c157ff7a
SW
235#define PHYS_AVAIL_ARRAY_END (NELEM(phys_avail) - 2)
236#define DUMP_AVAIL_ARRAY_END (NELEM(dump_avail) - 2)
c8fe38ae
MD
237
238static vm_offset_t buffer_sva, buffer_eva;
239vm_offset_t clean_sva, clean_eva;
240static vm_offset_t pager_sva, pager_eva;
241static struct trapframe proc0_tf;
242
243static void
244cpu_startup(void *dummy)
245{
246 caddr_t v;
247 vm_size_t size = 0;
248 vm_offset_t firstaddr;
249
c8fe38ae
MD
250 /*
251 * Good {morning,afternoon,evening,night}.
252 */
253 kprintf("%s", version);
254 startrtclock();
255 printcpuinfo();
256 panicifcpuunsupported();
257#ifdef PERFMON
258 perfmon_init();
259#endif
15dc6550 260 kprintf("real memory = %ju (%ju MB)\n",
1bda0d3d
MD
261 (intmax_t)Realmem,
262 (intmax_t)Realmem / 1024 / 1024);
c8fe38ae
MD
263 /*
264 * Display any holes after the first chunk of extended memory.
265 */
266 if (bootverbose) {
267 int indx;
268
269 kprintf("Physical memory chunk(s):\n");
270 for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) {
271 vm_paddr_t size1 = phys_avail[indx + 1] - phys_avail[indx];
272
bfc09ba0
MD
273 kprintf("0x%08jx - 0x%08jx, %ju bytes (%ju pages)\n",
274 (intmax_t)phys_avail[indx],
275 (intmax_t)phys_avail[indx + 1] - 1,
276 (intmax_t)size1,
277 (intmax_t)(size1 / PAGE_SIZE));
c8fe38ae
MD
278 }
279 }
280
281 /*
282 * Allocate space for system data structures.
283 * The first available kernel virtual address is in "v".
284 * As pages of kernel virtual memory are allocated, "v" is incremented.
285 * As pages of memory are allocated and cleared,
286 * "firstaddr" is incremented.
287 * An index into the kernel page table corresponding to the
288 * virtual memory address maintained in "v" is kept in "mapaddr".
289 */
290
291 /*
292 * Make two passes. The first pass calculates how much memory is
293 * needed and allocates it. The second pass assigns virtual
294 * addresses to the various data structures.
295 */
296 firstaddr = 0;
297again:
298 v = (caddr_t)firstaddr;
299
300#define valloc(name, type, num) \
301 (name) = (type *)v; v = (caddr_t)((name)+(num))
302#define valloclim(name, type, num, lim) \
303 (name) = (type *)v; v = (caddr_t)((lim) = ((name)+(num)))
304
305 /*
306 * The nominal buffer size (and minimum KVA allocation) is BKVASIZE.
307 * For the first 64MB of ram nominally allocate sufficient buffers to
308 * cover 1/4 of our ram. Beyond the first 64MB allocate additional
309 * buffers to cover 1/20 of our ram over 64MB. When auto-sizing
310 * the buffer cache we limit the eventual kva reservation to
311 * maxbcache bytes.
312 *
313 * factor represents the 1/4 x ram conversion.
314 */
315 if (nbuf == 0) {
74d62460
MD
316 long factor = 4 * BKVASIZE / 1024;
317 long kbytes = physmem * (PAGE_SIZE / 1024);
c8fe38ae
MD
318
319 nbuf = 50;
320 if (kbytes > 4096)
321 nbuf += min((kbytes - 4096) / factor, 65536 / factor);
322 if (kbytes > 65536)
323 nbuf += (kbytes - 65536) * 2 / (factor * 5);
324 if (maxbcache && nbuf > maxbcache / BKVASIZE)
325 nbuf = maxbcache / BKVASIZE;
326 }
327
328 /*
329 * Do not allow the buffer_map to be more then 1/2 the size of the
330 * kernel_map.
331 */
74d62460
MD
332 if (nbuf > (virtual_end - virtual_start +
333 virtual2_end - virtual2_start) / (BKVASIZE * 2)) {
334 nbuf = (virtual_end - virtual_start +
335 virtual2_end - virtual2_start) / (BKVASIZE * 2);
336 kprintf("Warning: nbufs capped at %ld due to kvm\n", nbuf);
c8fe38ae
MD
337 }
338
74d62460
MD
339 /*
340 * Do not allow the buffer_map to use more than 50% of available
341 * physical-equivalent memory. Since the VM pages which back
342 * individual buffers are typically wired, having too many bufs
343 * can prevent the system from paging properly.
344 */
345 if (nbuf > physmem * PAGE_SIZE / (BKVASIZE * 2)) {
346 nbuf = physmem * PAGE_SIZE / (BKVASIZE * 2);
347 kprintf("Warning: nbufs capped at %ld due to physmem\n", nbuf);
348 }
349
350 /*
351 * Do not allow the sizeof(struct buf) * nbuf to exceed half of
352 * the valloc space which is just the virtual_end - virtual_start
353 * section. We use valloc() to allocate the buf header array.
354 */
355 if (nbuf > (virtual_end - virtual_start) / sizeof(struct buf) / 2) {
356 nbuf = (virtual_end - virtual_start) /
357 sizeof(struct buf) / 2;
358 kprintf("Warning: nbufs capped at %ld due to valloc "
359 "considerations", nbuf);
360 }
361
362 nswbuf = lmax(lmin(nbuf / 4, 256), 16);
c8fe38ae
MD
363#ifdef NSWBUF_MIN
364 if (nswbuf < NSWBUF_MIN)
365 nswbuf = NSWBUF_MIN;
366#endif
367#ifdef DIRECTIO
368 ffs_rawread_setup();
369#endif
370
371 valloc(swbuf, struct buf, nswbuf);
372 valloc(buf, struct buf, nbuf);
373
374 /*
375 * End of first pass, size has been calculated so allocate memory
376 */
377 if (firstaddr == 0) {
378 size = (vm_size_t)(v - firstaddr);
379 firstaddr = kmem_alloc(&kernel_map, round_page(size));
380 if (firstaddr == 0)
381 panic("startup: no room for tables");
382 goto again;
383 }
384
385 /*
386 * End of second pass, addresses have been assigned
74d62460
MD
387 *
388 * nbuf is an int, make sure we don't overflow the field.
c8fe38ae
MD
389 */
390 if ((vm_size_t)(v - firstaddr) != size)
391 panic("startup: table size inconsistency");
392
393 kmem_suballoc(&kernel_map, &clean_map, &clean_sva, &clean_eva,
74d62460
MD
394 ((vm_offset_t)nbuf * BKVASIZE) +
395 (nswbuf * MAXPHYS) + pager_map_size);
c8fe38ae 396 kmem_suballoc(&clean_map, &buffer_map, &buffer_sva, &buffer_eva,
74d62460 397 ((vm_offset_t)nbuf * BKVASIZE));
c8fe38ae
MD
398 buffer_map.system_map = 1;
399 kmem_suballoc(&clean_map, &pager_map, &pager_sva, &pager_eva,
74d62460 400 ((vm_offset_t)nswbuf * MAXPHYS) + pager_map_size);
c8fe38ae
MD
401 pager_map.system_map = 1;
402
403#if defined(USERCONFIG)
404 userconfig();
405 cninit(); /* the preferred console may have changed */
406#endif
407
361c5f22 408 kprintf("avail memory = %ju (%ju MB)\n",
79d182b0
MD
409 (uintmax_t)ptoa(vmstats.v_free_count + vmstats.v_dma_pages),
410 (uintmax_t)ptoa(vmstats.v_free_count + vmstats.v_dma_pages) /
411 1024 / 1024);
c8fe38ae
MD
412
413 /*
414 * Set up buffers, so they can be used to read disk labels.
415 */
416 bufinit();
417 vm_pager_bufferinit();
1ebcbb29
SZ
418}
419
420static void
421cpu_finish(void *dummy __unused)
422{
423 cpu_setregs();
424}
425
426static void
427pic_finish(void *dummy __unused)
428{
429 /* Log ELCR information */
430 elcr_dump();
8dc88f05 431
8cc9a8d1
SZ
432 /* Log MPTABLE information */
433 mptable_pci_int_dump();
434
8dc88f05
SZ
435 /* Finalize PCI */
436 MachIntrABI.finalize();
c8fe38ae
MD
437}
438
439/*
440 * Send an interrupt to process.
441 *
442 * Stack is set up to allow sigcode stored
443 * at top to call routine, followed by kcall
444 * to sigreturn routine below. After sigreturn
445 * resets the signal mask, the stack, and the
446 * frame pointer, it returns to the user
447 * specified pc, psl.
448 */
449void
450sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
451{
c8fe38ae
MD
452 struct lwp *lp = curthread->td_lwp;
453 struct proc *p = lp->lwp_proc;
454 struct trapframe *regs;
455 struct sigacts *psp = p->p_sigacts;
456 struct sigframe sf, *sfp;
457 int oonstack;
a6a09809 458 char *sp;
c8fe38ae
MD
459
460 regs = lp->lwp_md.md_regs;
461 oonstack = (lp->lwp_sigstk.ss_flags & SS_ONSTACK) ? 1 : 0;
462
a6a09809 463 /* Save user context */
c8fe38ae
MD
464 bzero(&sf, sizeof(struct sigframe));
465 sf.sf_uc.uc_sigmask = *mask;
466 sf.sf_uc.uc_stack = lp->lwp_sigstk;
467 sf.sf_uc.uc_mcontext.mc_onstack = oonstack;
5b9f6cc4
MD
468 KKASSERT(__offsetof(struct trapframe, tf_rdi) == 0);
469 bcopy(regs, &sf.sf_uc.uc_mcontext.mc_rdi, sizeof(struct trapframe));
c8fe38ae 470
a6a09809 471 /* Make the size of the saved context visible to userland */
c8fe38ae
MD
472 sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext);
473
c8fe38ae 474 /* Allocate and validate space for the signal handler context. */
4643740a 475 if ((lp->lwp_flags & LWP_ALTSTACK) != 0 && !oonstack &&
c8fe38ae 476 SIGISMEMBER(psp->ps_sigonstack, sig)) {
a6a09809
MD
477 sp = (char *)(lp->lwp_sigstk.ss_sp + lp->lwp_sigstk.ss_size -
478 sizeof(struct sigframe));
c8fe38ae
MD
479 lp->lwp_sigstk.ss_flags |= SS_ONSTACK;
480 } else {
89954408
JG
481 /* We take red zone into account */
482 sp = (char *)regs->tf_rsp - sizeof(struct sigframe) - 128;
c8fe38ae
MD
483 }
484
a6a09809 485 /* Align to 16 bytes */
4117f2fd 486 sfp = (struct sigframe *)((intptr_t)sp & ~(intptr_t)0xF);
a6a09809 487
c8fe38ae
MD
488 /* Translate the signal is appropriate */
489 if (p->p_sysent->sv_sigtbl) {
490 if (sig <= p->p_sysent->sv_sigsize)
491 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
492 }
493
a6a09809
MD
494 /*
495 * Build the argument list for the signal handler.
496 *
497 * Arguments are in registers (%rdi, %rsi, %rdx, %rcx)
498 */
499 regs->tf_rdi = sig; /* argument 1 */
500 regs->tf_rdx = (register_t)&sfp->sf_uc; /* argument 3 */
501
c8fe38ae 502 if (SIGISMEMBER(psp->ps_siginfo, sig)) {
a6a09809
MD
503 /*
504 * Signal handler installed with SA_SIGINFO.
505 *
506 * action(signo, siginfo, ucontext)
507 */
508 regs->tf_rsi = (register_t)&sfp->sf_si; /* argument 2 */
630d9ab4 509 regs->tf_rcx = (register_t)regs->tf_addr; /* argument 4 */
c8fe38ae
MD
510 sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher;
511
512 /* fill siginfo structure */
513 sf.sf_si.si_signo = sig;
514 sf.sf_si.si_code = code;
630d9ab4 515 sf.sf_si.si_addr = (void *)regs->tf_addr;
a6a09809
MD
516 } else {
517 /*
518 * Old FreeBSD-style arguments.
519 *
520 * handler (signo, code, [uc], addr)
521 */
522 regs->tf_rsi = (register_t)code; /* argument 2 */
630d9ab4 523 regs->tf_rcx = (register_t)regs->tf_addr; /* argument 4 */
c8fe38ae
MD
524 sf.sf_ahu.sf_handler = catcher;
525 }
526
527 /*
528 * If we're a vm86 process, we want to save the segment registers.
529 * We also change eflags to be our emulated eflags, not the actual
530 * eflags.
531 */
532#if JG
533 if (regs->tf_eflags & PSL_VM) {
534 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
535 struct vm86_kernel *vm86 = &lp->lwp_thread->td_pcb->pcb_ext->ext_vm86;
536
537 sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs;
538 sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs;
539 sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es;
540 sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds;
541
542 if (vm86->vm86_has_vme == 0)
543 sf.sf_uc.uc_mcontext.mc_eflags =
544 (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
545 (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
546
547 /*
548 * Clear PSL_NT to inhibit T_TSSFLT faults on return from
549 * syscalls made by the signal handler. This just avoids
550 * wasting time for our lazy fixup of such faults. PSL_NT
551 * does nothing in vm86 mode, but vm86 programs can set it
552 * almost legitimately in probes for old cpu types.
553 */
554 tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP);
555 }
556#endif
557
558 /*
559 * Save the FPU state and reinit the FP unit
560 */
c8fe38ae 561 npxpush(&sf.sf_uc.uc_mcontext);
c8fe38ae
MD
562
563 /*
564 * Copy the sigframe out to the user's stack.
565 */
566 if (copyout(&sf, sfp, sizeof(struct sigframe)) != 0) {
567 /*
568 * Something is wrong with the stack pointer.
569 * ...Kill the process.
570 */
571 sigexit(lp, SIGILL);
572 }
573
5b9f6cc4
MD
574 regs->tf_rsp = (register_t)sfp;
575 regs->tf_rip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
c8fe38ae
MD
576
577 /*
578 * i386 abi specifies that the direction flag must be cleared
579 * on function entry
580 */
5b9f6cc4 581 regs->tf_rflags &= ~(PSL_T|PSL_D);
c8fe38ae 582
c8fe38ae 583 /*
a6a09809
MD
584 * 64 bit mode has a code and stack selector but
585 * no data or extra selector. %fs and %gs are not
586 * stored in-context.
c8fe38ae 587 */
a6a09809 588 regs->tf_cs = _ucodesel;
c8fe38ae 589 regs->tf_ss = _udatasel;
f2081646 590 clear_quickret();
c8fe38ae
MD
591}
592
593/*
594 * Sanitize the trapframe for a virtual kernel passing control to a custom
595 * VM context. Remove any items that would otherwise create a privilage
596 * issue.
597 *
598 * XXX at the moment we allow userland to set the resume flag. Is this a
599 * bad idea?
600 */
601int
602cpu_sanitize_frame(struct trapframe *frame)
603{
c8fe38ae 604 frame->tf_cs = _ucodesel;
c8fe38ae 605 frame->tf_ss = _udatasel;
5b9f6cc4
MD
606 /* XXX VM (8086) mode not supported? */
607 frame->tf_rflags &= (PSL_RF | PSL_USERCHANGE | PSL_VM_UNSUPP);
608 frame->tf_rflags |= PSL_RESERVED_DEFAULT | PSL_I;
609
c8fe38ae
MD
610 return(0);
611}
612
613/*
614 * Sanitize the tls so loading the descriptor does not blow up
b2b3ffcd 615 * on us. For x86_64 we don't have to do anything.
c8fe38ae
MD
616 */
617int
618cpu_sanitize_tls(struct savetls *tls)
619{
620 return(0);
621}
622
623/*
624 * sigreturn(ucontext_t *sigcntxp)
625 *
626 * System call to cleanup state after a signal
627 * has been taken. Reset signal mask and
628 * stack state from context left by sendsig (above).
629 * Return to previous pc and psl as specified by
630 * context left by sendsig. Check carefully to
631 * make sure that the user has not modified the
632 * state to gain improper privileges.
3919ced0
MD
633 *
634 * MPSAFE
c8fe38ae
MD
635 */
636#define EFL_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
637#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
638
639int
640sys_sigreturn(struct sigreturn_args *uap)
641{
642 struct lwp *lp = curthread->td_lwp;
c8fe38ae
MD
643 struct trapframe *regs;
644 ucontext_t uc;
645 ucontext_t *ucp;
5b9f6cc4 646 register_t rflags;
c8fe38ae 647 int cs;
c8fe38ae
MD
648 int error;
649
650 /*
651 * We have to copy the information into kernel space so userland
652 * can't modify it while we are sniffing it.
653 */
654 regs = lp->lwp_md.md_regs;
655 error = copyin(uap->sigcntxp, &uc, sizeof(uc));
656 if (error)
657 return (error);
658 ucp = &uc;
5b9f6cc4
MD
659 rflags = ucp->uc_mcontext.mc_rflags;
660
661 /* VM (8086) mode not supported */
662 rflags &= ~PSL_VM_UNSUPP;
c8fe38ae
MD
663
664#if JG
665 if (eflags & PSL_VM) {
666 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
667 struct vm86_kernel *vm86;
668
669 /*
670 * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
671 * set up the vm86 area, and we can't enter vm86 mode.
672 */
673 if (lp->lwp_thread->td_pcb->pcb_ext == 0)
674 return (EINVAL);
675 vm86 = &lp->lwp_thread->td_pcb->pcb_ext->ext_vm86;
676 if (vm86->vm86_inited == 0)
677 return (EINVAL);
678
679 /* go back to user mode if both flags are set */
680 if ((eflags & PSL_VIP) && (eflags & PSL_VIF))
681 trapsignal(lp, SIGBUS, 0);
682
683 if (vm86->vm86_has_vme) {
c8fe38ae
MD
684 eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
685 (eflags & VME_USERCHANGE) | PSL_VM;
c8fe38ae 686 } else {
c8fe38ae
MD
687 vm86->vm86_eflags = eflags; /* save VIF, VIP */
688 eflags = (tf->tf_eflags & ~VM_USERCHANGE) |
689 (eflags & VM_USERCHANGE) | PSL_VM;
c8fe38ae 690 }
c8fe38ae
MD
691 bcopy(&ucp->uc_mcontext.mc_gs, tf, sizeof(struct trapframe));
692 tf->tf_eflags = eflags;
c8fe38ae
MD
693 tf->tf_vm86_ds = tf->tf_ds;
694 tf->tf_vm86_es = tf->tf_es;
695 tf->tf_vm86_fs = tf->tf_fs;
696 tf->tf_vm86_gs = tf->tf_gs;
697 tf->tf_ds = _udatasel;
698 tf->tf_es = _udatasel;
c8fe38ae
MD
699 tf->tf_fs = _udatasel;
700 tf->tf_gs = _udatasel;
5b9f6cc4 701 } else
c8fe38ae 702#endif
5b9f6cc4 703 {
c8fe38ae
MD
704 /*
705 * Don't allow users to change privileged or reserved flags.
706 */
707 /*
708 * XXX do allow users to change the privileged flag PSL_RF.
709 * The cpu sets PSL_RF in tf_eflags for faults. Debuggers
710 * should sometimes set it there too. tf_eflags is kept in
711 * the signal context during signal handling and there is no
712 * other place to remember it, so the PSL_RF bit may be
713 * corrupted by the signal handler without us knowing.
714 * Corruption of the PSL_RF bit at worst causes one more or
715 * one less debugger trap, so allowing it is fairly harmless.
716 */
5b9f6cc4
MD
717 if (!EFL_SECURE(rflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF)) {
718 kprintf("sigreturn: rflags = 0x%lx\n", (long)rflags);
c8fe38ae
MD
719 return(EINVAL);
720 }
c8fe38ae
MD
721
722 /*
723 * Don't allow users to load a valid privileged %cs. Let the
724 * hardware check for invalid selectors, excess privilege in
725 * other selectors, invalid %eip's and invalid %esp's.
726 */
727 cs = ucp->uc_mcontext.mc_cs;
728 if (!CS_SECURE(cs)) {
729 kprintf("sigreturn: cs = 0x%x\n", cs);
730 trapsignal(lp, SIGBUS, T_PROTFLT);
731 return(EINVAL);
732 }
5b9f6cc4 733 bcopy(&ucp->uc_mcontext.mc_rdi, regs, sizeof(struct trapframe));
c8fe38ae 734 }
c8fe38ae
MD
735
736 /*
737 * Restore the FPU state from the frame
738 */
3919ced0 739 crit_enter();
c8fe38ae 740 npxpop(&ucp->uc_mcontext);
c8fe38ae 741
c8fe38ae
MD
742 if (ucp->uc_mcontext.mc_onstack & 1)
743 lp->lwp_sigstk.ss_flags |= SS_ONSTACK;
744 else
745 lp->lwp_sigstk.ss_flags &= ~SS_ONSTACK;
746
747 lp->lwp_sigmask = ucp->uc_sigmask;
748 SIG_CANTMASK(lp->lwp_sigmask);
f2081646 749 clear_quickret();
3919ced0 750 crit_exit();
c8fe38ae
MD
751 return(EJUSTRETURN);
752}
753
754/*
c8fe38ae
MD
755 * Machine dependent boot() routine
756 *
757 * I haven't seen anything to put here yet
758 * Possibly some stuff might be grafted back here from boot()
759 */
760void
761cpu_boot(int howto)
762{
763}
764
765/*
766 * Shutdown the CPU as much as possible
767 */
768void
769cpu_halt(void)
770{
771 for (;;)
772 __asm__ __volatile("hlt");
773}
774
775/*
776 * cpu_idle() represents the idle LWKT. You cannot return from this function
777 * (unless you want to blow things up!). Instead we look for runnable threads
778 * and loop or halt as appropriate. Giant is not held on entry to the thread.
779 *
780 * The main loop is entered with a critical section held, we must release
781 * the critical section before doing anything else. lwkt_switch() will
782 * check for pending interrupts due to entering and exiting its own
783 * critical section.
784 *
7d4d6fdb
MD
785 * NOTE: On an SMP system we rely on a scheduler IPI to wake a HLTed cpu up.
786 * However, there are cases where the idlethread will be entered with
787 * the possibility that no IPI will occur and in such cases
788 * lwkt_switch() sets TDF_IDLE_NOHLT.
789 *
46e562ce
MD
790 * NOTE: cpu_idle_hlt again defaults to 2 (use ACPI sleep states). Set to
791 * 1 to just use hlt and for debugging purposes.
be71787b
MD
792 *
793 * NOTE: cpu_idle_repeat determines how many entries into the idle thread
794 * must occur before it starts using ACPI halt.
c8fe38ae 795 */
46e562ce 796static int cpu_idle_hlt = 2;
c8fe38ae
MD
797static int cpu_idle_hltcnt;
798static int cpu_idle_spincnt;
582fd846 799static u_int cpu_idle_repeat = 750;
c8fe38ae
MD
800SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_hlt, CTLFLAG_RW,
801 &cpu_idle_hlt, 0, "Idle loop HLT enable");
802SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_hltcnt, CTLFLAG_RW,
803 &cpu_idle_hltcnt, 0, "Idle loop entry halts");
804SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_spincnt, CTLFLAG_RW,
805 &cpu_idle_spincnt, 0, "Idle loop entry spins");
be71787b
MD
806SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_repeat, CTLFLAG_RW,
807 &cpu_idle_repeat, 0, "Idle entries before acpi hlt");
c8fe38ae
MD
808
809static void
810cpu_idle_default_hook(void)
811{
812 /*
813 * We must guarentee that hlt is exactly the instruction
814 * following the sti.
815 */
816 __asm __volatile("sti; hlt");
817}
818
819/* Other subsystems (e.g., ACPI) can hook this later. */
820void (*cpu_idle_hook)(void) = cpu_idle_default_hook;
821
822void
823cpu_idle(void)
824{
0f0466c0 825 globaldata_t gd = mycpu;
86232a57 826 struct thread *td __debugvar = gd->gd_curthread;
0f0466c0 827 int reqflags;
be71787b 828 int quick;
c8fe38ae
MD
829
830 crit_exit();
f9235b6d 831 KKASSERT(td->td_critcount == 0);
c8fe38ae
MD
832 for (;;) {
833 /*
834 * See if there are any LWKTs ready to go.
835 */
836 lwkt_switch();
837
838 /*
be71787b
MD
839 * When halting inside a cli we must check for reqflags
840 * races, particularly [re]schedule requests. Running
841 * splz() does the job.
842 *
843 * cpu_idle_hlt:
844 * 0 Never halt, just spin
845 *
846 * 1 Always use HLT (or MONITOR/MWAIT if avail).
847 * This typically eats more power than the
848 * ACPI halt.
849 *
850 * 2 Use HLT/MONITOR/MWAIT up to a point and then
851 * use the ACPI halt (default). This is a hybrid
852 * approach. See machdep.cpu_idle_repeat.
853 *
854 * 3 Always use the ACPI halt. This typically
855 * eats the least amount of power but the cpu
856 * will be slow waking up. Slows down e.g.
857 * compiles and other pipe/event oriented stuff.
858 *
859 * NOTE: Interrupts are enabled and we are not in a critical
860 * section.
861 *
862 * NOTE: Preemptions do not reset gd_idle_repeat. Also we
863 * don't bother capping gd_idle_repeat, it is ok if
864 * it overflows.
c8fe38ae 865 */
be71787b 866 ++gd->gd_idle_repeat;
0f0466c0 867 reqflags = gd->gd_reqflags;
be71787b
MD
868 quick = (cpu_idle_hlt == 1) ||
869 (cpu_idle_hlt < 3 &&
870 gd->gd_idle_repeat < cpu_idle_repeat);
871
872 if (quick && (cpu_mi_feature & CPU_MI_MONITOR) &&
0f0466c0 873 (reqflags & RQF_IDLECHECK_WK_MASK) == 0) {
701c977e 874 splz(); /* XXX */
0f0466c0 875 cpu_mmw_pause_int(&gd->gd_reqflags, reqflags);
be71787b 876 ++cpu_idle_hltcnt;
0f0466c0 877 } else if (cpu_idle_hlt) {
c8fe38ae
MD
878 __asm __volatile("cli");
879 splz();
0f0466c0 880 if ((gd->gd_reqflags & RQF_IDLECHECK_WK_MASK) == 0) {
be71787b 881 if (quick)
7d4d6fdb
MD
882 cpu_idle_default_hook();
883 else
884 cpu_idle_hook();
885 }
7d4d6fdb 886 __asm __volatile("sti");
c8fe38ae
MD
887 ++cpu_idle_hltcnt;
888 } else {
c8fe38ae 889 splz();
c5724852 890 __asm __volatile("sti");
c8fe38ae
MD
891 ++cpu_idle_spincnt;
892 }
893 }
894}
895
896/*
c8fe38ae
MD
897 * This routine is called if a spinlock has been held through the
898 * exponential backoff period and is seriously contested. On a real cpu
899 * we let it spin.
900 */
901void
902cpu_spinlock_contested(void)
903{
904 cpu_pause();
905}
906
907/*
908 * Clear registers on exec
909 */
910void
911exec_setregs(u_long entry, u_long stack, u_long ps_strings)
912{
913 struct thread *td = curthread;
914 struct lwp *lp = td->td_lwp;
915 struct pcb *pcb = td->td_pcb;
916 struct trapframe *regs = lp->lwp_md.md_regs;
917
c8fe38ae
MD
918 /* was i386_user_cleanup() in NetBSD */
919 user_ldt_free(pcb);
920
f2081646 921 clear_quickret();
c8fe38ae
MD
922 bzero((char *)regs, sizeof(struct trapframe));
923 regs->tf_rip = entry;
924 regs->tf_rsp = ((stack - 8) & ~0xFul) + 8; /* align the stack */
925 regs->tf_rdi = stack; /* argv */
926 regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T);
927 regs->tf_ss = _udatasel;
928 regs->tf_cs = _ucodesel;
929 regs->tf_rbx = ps_strings;
930
931 /*
932 * Reset the hardware debug registers if they were in use.
933 * They won't have any meaning for the newly exec'd process.
934 */
935 if (pcb->pcb_flags & PCB_DBREGS) {
936 pcb->pcb_dr0 = 0;
937 pcb->pcb_dr1 = 0;
938 pcb->pcb_dr2 = 0;
939 pcb->pcb_dr3 = 0;
940 pcb->pcb_dr6 = 0;
0855a2af 941 pcb->pcb_dr7 = 0; /* JG set bit 10? */
c8fe38ae
MD
942 if (pcb == td->td_pcb) {
943 /*
944 * Clear the debug registers on the running
945 * CPU, otherwise they will end up affecting
946 * the next process we switch to.
947 */
948 reset_dbregs();
949 }
950 pcb->pcb_flags &= ~PCB_DBREGS;
951 }
952
953 /*
954 * Initialize the math emulator (if any) for the current process.
955 * Actually, just clear the bit that says that the emulator has
956 * been initialized. Initialization is delayed until the process
957 * traps to the emulator (if it is done at all) mainly because
958 * emulators don't provide an entry point for initialization.
959 */
c8fe38ae 960 pcb->pcb_flags &= ~FP_SOFTFP;
c8fe38ae
MD
961
962 /*
5b9f6cc4
MD
963 * NOTE: do not set CR0_TS here. npxinit() must do it after clearing
964 * gd_npxthread. Otherwise a preemptive interrupt thread
965 * may panic in npxdna().
c8fe38ae
MD
966 */
967 crit_enter();
968 load_cr0(rcr0() | CR0_MP);
969
5b9f6cc4
MD
970 /*
971 * NOTE: The MSR values must be correct so we can return to
972 * userland. gd_user_fs/gs must be correct so the switch
973 * code knows what the current MSR values are.
974 */
975 pcb->pcb_fsbase = 0; /* Values loaded from PCB on switch */
c8fe38ae 976 pcb->pcb_gsbase = 0;
5b9f6cc4
MD
977 mdcpu->gd_user_fs = 0; /* Cache of current MSR values */
978 mdcpu->gd_user_gs = 0;
979 wrmsr(MSR_FSBASE, 0); /* Set MSR values for return to userland */
980 wrmsr(MSR_KGSBASE, 0);
c8fe38ae 981
c8fe38ae 982 /* Initialize the npx (if any) for the current process. */
a8f1df17 983 npxinit(__INITIAL_FPUCW__);
c8fe38ae
MD
984 crit_exit();
985
986 pcb->pcb_ds = _udatasel;
987 pcb->pcb_es = _udatasel;
988 pcb->pcb_fs = _udatasel;
989 pcb->pcb_gs = _udatasel;
990}
991
992void
993cpu_setregs(void)
994{
995 register_t cr0;
996
997 cr0 = rcr0();
998 cr0 |= CR0_NE; /* Done by npxinit() */
999 cr0 |= CR0_MP | CR0_TS; /* Done at every execve() too. */
1000 cr0 |= CR0_WP | CR0_AM;
1001 load_cr0(cr0);
1002 load_gs(_udatasel);
1003}
1004
1005static int
1006sysctl_machdep_adjkerntz(SYSCTL_HANDLER_ARGS)
1007{
1008 int error;
1009 error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2,
1010 req);
1011 if (!error && req->newptr)
1012 resettodr();
1013 return (error);
1014}
1015
1016SYSCTL_PROC(_machdep, CPU_ADJKERNTZ, adjkerntz, CTLTYPE_INT|CTLFLAG_RW,
1017 &adjkerntz, 0, sysctl_machdep_adjkerntz, "I", "");
1018
c8fe38ae
MD
1019SYSCTL_INT(_machdep, CPU_DISRTCSET, disable_rtc_set,
1020 CTLFLAG_RW, &disable_rtc_set, 0, "");
c8fe38ae
MD
1021
1022#if JG
1023SYSCTL_STRUCT(_machdep, CPU_BOOTINFO, bootinfo,
1024 CTLFLAG_RD, &bootinfo, bootinfo, "");
1025#endif
1026
1027SYSCTL_INT(_machdep, CPU_WALLCLOCK, wall_cmos_clock,
1028 CTLFLAG_RW, &wall_cmos_clock, 0, "");
1029
1030extern u_long bootdev; /* not a cdev_t - encoding is different */
1031SYSCTL_ULONG(_machdep, OID_AUTO, guessed_bootdev,
1032 CTLFLAG_RD, &bootdev, 0, "Boot device (not in cdev_t format)");
1033
1034/*
1035 * Initialize 386 and configure to run kernel
1036 */
1037
1038/*
1039 * Initialize segments & interrupt table
1040 */
1041
1042int _default_ldt;
1043struct user_segment_descriptor gdt[NGDT * MAXCPU]; /* global descriptor table */
8a06c6ee 1044struct gate_descriptor idt_arr[MAXCPU][NIDT];
c8fe38ae
MD
1045#if JG
1046union descriptor ldt[NLDT]; /* local descriptor table */
1047#endif
1048
1049/* table descriptors - used to load tables by cpu */
8a06c6ee
SZ
1050struct region_descriptor r_gdt;
1051struct region_descriptor r_idt_arr[MAXCPU];
c8fe38ae 1052
c8fe38ae
MD
1053/* JG proc0paddr is a virtual address */
1054void *proc0paddr;
1055/* JG alignment? */
1056char proc0paddr_buff[LWKT_THREAD_STACK];
1057
1058
1059/* software prototypes -- in more palatable form */
1060struct soft_segment_descriptor gdt_segs[] = {
1061/* GNULL_SEL 0 Null Descriptor */
1062{ 0x0, /* segment base address */
1063 0x0, /* length */
1064 0, /* segment type */
1065 0, /* segment descriptor priority level */
1066 0, /* segment descriptor present */
1067 0, /* long */
1068 0, /* default 32 vs 16 bit size */
1069 0 /* limit granularity (byte/page units)*/ },
1070/* GCODE_SEL 1 Code Descriptor for kernel */
1071{ 0x0, /* segment base address */
1072 0xfffff, /* length - all address space */
1073 SDT_MEMERA, /* segment type */
1074 SEL_KPL, /* segment descriptor priority level */
1075 1, /* segment descriptor present */
1076 1, /* long */
1077 0, /* default 32 vs 16 bit size */
1078 1 /* limit granularity (byte/page units)*/ },
1079/* GDATA_SEL 2 Data Descriptor for kernel */
1080{ 0x0, /* segment base address */
1081 0xfffff, /* length - all address space */
1082 SDT_MEMRWA, /* segment type */
1083 SEL_KPL, /* segment descriptor priority level */
1084 1, /* segment descriptor present */
1085 1, /* long */
1086 0, /* default 32 vs 16 bit size */
1087 1 /* limit granularity (byte/page units)*/ },
1088/* GUCODE32_SEL 3 32 bit Code Descriptor for user */
1089{ 0x0, /* segment base address */
1090 0xfffff, /* length - all address space */
1091 SDT_MEMERA, /* segment type */
1092 SEL_UPL, /* segment descriptor priority level */
1093 1, /* segment descriptor present */
1094 0, /* long */
1095 1, /* default 32 vs 16 bit size */
1096 1 /* limit granularity (byte/page units)*/ },
1097/* GUDATA_SEL 4 32/64 bit Data Descriptor for user */
1098{ 0x0, /* segment base address */
1099 0xfffff, /* length - all address space */
1100 SDT_MEMRWA, /* segment type */
1101 SEL_UPL, /* segment descriptor priority level */
1102 1, /* segment descriptor present */
1103 0, /* long */
1104 1, /* default 32 vs 16 bit size */
1105 1 /* limit granularity (byte/page units)*/ },
1106/* GUCODE_SEL 5 64 bit Code Descriptor for user */
1107{ 0x0, /* segment base address */
1108 0xfffff, /* length - all address space */
1109 SDT_MEMERA, /* segment type */
1110 SEL_UPL, /* segment descriptor priority level */
1111 1, /* segment descriptor present */
1112 1, /* long */
1113 0, /* default 32 vs 16 bit size */
1114 1 /* limit granularity (byte/page units)*/ },
1115/* GPROC0_SEL 6 Proc 0 Tss Descriptor */
1116{
1117 0x0, /* segment base address */
b2b3ffcd 1118 sizeof(struct x86_64tss)-1,/* length - all address space */
c8fe38ae
MD
1119 SDT_SYSTSS, /* segment type */
1120 SEL_KPL, /* segment descriptor priority level */
1121 1, /* segment descriptor present */
1122 0, /* long */
1123 0, /* unused - default 32 vs 16 bit size */
1124 0 /* limit granularity (byte/page units)*/ },
1125/* Actually, the TSS is a system descriptor which is double size */
1126{ 0x0, /* segment base address */
1127 0x0, /* length */
1128 0, /* segment type */
1129 0, /* segment descriptor priority level */
1130 0, /* segment descriptor present */
1131 0, /* long */
1132 0, /* default 32 vs 16 bit size */
1133 0 /* limit granularity (byte/page units)*/ },
1134/* GUGS32_SEL 8 32 bit GS Descriptor for user */
1135{ 0x0, /* segment base address */
1136 0xfffff, /* length - all address space */
1137 SDT_MEMRWA, /* segment type */
1138 SEL_UPL, /* segment descriptor priority level */
1139 1, /* segment descriptor present */
1140 0, /* long */
1141 1, /* default 32 vs 16 bit size */
1142 1 /* limit granularity (byte/page units)*/ },
1143};
1144
1145void
8a06c6ee 1146setidt_global(int idx, inthand_t *func, int typ, int dpl, int ist)
c8fe38ae 1147{
8a06c6ee
SZ
1148 int cpu;
1149
1150 for (cpu = 0; cpu < MAXCPU; ++cpu) {
1151 struct gate_descriptor *ip = &idt_arr[cpu][idx];
1152
1153 ip->gd_looffset = (uintptr_t)func;
1154 ip->gd_selector = GSEL(GCODE_SEL, SEL_KPL);
1155 ip->gd_ist = ist;
1156 ip->gd_xx = 0;
1157 ip->gd_type = typ;
1158 ip->gd_dpl = dpl;
1159 ip->gd_p = 1;
1160 ip->gd_hioffset = ((uintptr_t)func)>>16 ;
1161 }
c8fe38ae
MD
1162}
1163
8a782434
SZ
1164void
1165setidt(int idx, inthand_t *func, int typ, int dpl, int ist, int cpu)
1166{
1167 struct gate_descriptor *ip;
1168
ed20d0e3 1169 KASSERT(cpu >= 0 && cpu < ncpus, ("invalid cpu %d", cpu));
8a782434
SZ
1170
1171 ip = &idt_arr[cpu][idx];
1172 ip->gd_looffset = (uintptr_t)func;
1173 ip->gd_selector = GSEL(GCODE_SEL, SEL_KPL);
1174 ip->gd_ist = ist;
1175 ip->gd_xx = 0;
1176 ip->gd_type = typ;
1177 ip->gd_dpl = dpl;
1178 ip->gd_p = 1;
1179 ip->gd_hioffset = ((uintptr_t)func)>>16 ;
1180}
1181
c8fe38ae
MD
1182#define IDTVEC(name) __CONCAT(X,name)
1183
1184extern inthand_t
1185 IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
1186 IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
1187 IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
1188 IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
1189 IDTVEC(xmm), IDTVEC(dblfault),
1190 IDTVEC(fast_syscall), IDTVEC(fast_syscall32);
1191
1192#ifdef DEBUG_INTERRUPTS
1193extern inthand_t *Xrsvdary[256];
1194#endif
1195
1196void
1197sdtossd(struct user_segment_descriptor *sd, struct soft_segment_descriptor *ssd)
1198{
1199 ssd->ssd_base = (sd->sd_hibase << 24) | sd->sd_lobase;
1200 ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit;
1201 ssd->ssd_type = sd->sd_type;
1202 ssd->ssd_dpl = sd->sd_dpl;
1203 ssd->ssd_p = sd->sd_p;
1204 ssd->ssd_def32 = sd->sd_def32;
1205 ssd->ssd_gran = sd->sd_gran;
1206}
1207
1208void
1209ssdtosd(struct soft_segment_descriptor *ssd, struct user_segment_descriptor *sd)
1210{
1211
1212 sd->sd_lobase = (ssd->ssd_base) & 0xffffff;
1213 sd->sd_hibase = (ssd->ssd_base >> 24) & 0xff;
1214 sd->sd_lolimit = (ssd->ssd_limit) & 0xffff;
1215 sd->sd_hilimit = (ssd->ssd_limit >> 16) & 0xf;
1216 sd->sd_type = ssd->ssd_type;
1217 sd->sd_dpl = ssd->ssd_dpl;
1218 sd->sd_p = ssd->ssd_p;
1219 sd->sd_long = ssd->ssd_long;
1220 sd->sd_def32 = ssd->ssd_def32;
1221 sd->sd_gran = ssd->ssd_gran;
1222}
1223
1224void
1225ssdtosyssd(struct soft_segment_descriptor *ssd,
1226 struct system_segment_descriptor *sd)
1227{
1228
1229 sd->sd_lobase = (ssd->ssd_base) & 0xffffff;
1230 sd->sd_hibase = (ssd->ssd_base >> 24) & 0xfffffffffful;
1231 sd->sd_lolimit = (ssd->ssd_limit) & 0xffff;
1232 sd->sd_hilimit = (ssd->ssd_limit >> 16) & 0xf;
1233 sd->sd_type = ssd->ssd_type;
1234 sd->sd_dpl = ssd->ssd_dpl;
1235 sd->sd_p = ssd->ssd_p;
1236 sd->sd_gran = ssd->ssd_gran;
1237}
1238
c8fe38ae
MD
1239/*
1240 * Populate the (physmap) array with base/bound pairs describing the
1241 * available physical memory in the system, then test this memory and
1242 * build the phys_avail array describing the actually-available memory.
1243 *
1244 * If we cannot accurately determine the physical memory map, then use
1245 * value from the 0xE801 call, and failing that, the RTC.
1246 *
1247 * Total memory size may be set by the kernel environment variable
1248 * hw.physmem or the compile-time define MAXMEM.
1249 *
b4d9abe2
MD
1250 * Memory is aligned to PHYSMAP_ALIGN which must be a multiple
1251 * of PAGE_SIZE. This also greatly reduces the memory test time
1252 * which would otherwise be excessive on machines with > 8G of ram.
1253 *
c8fe38ae
MD
1254 * XXX first should be vm_paddr_t.
1255 */
b4d9abe2
MD
1256
1257#define PHYSMAP_ALIGN (vm_paddr_t)(128 * 1024)
1258#define PHYSMAP_ALIGN_MASK (vm_paddr_t)(PHYSMAP_ALIGN - 1)
1259
c8fe38ae
MD
1260static void
1261getmemsize(caddr_t kmdp, u_int64_t first)
1262{
b4d9abe2
MD
1263 int off, physmap_idx, pa_indx, da_indx;
1264 int i, j;
1265 vm_paddr_t physmap[PHYSMAP_SIZE];
1266 vm_paddr_t pa;
1267 vm_paddr_t msgbuf_size;
c8fe38ae
MD
1268 u_long physmem_tunable;
1269 pt_entry_t *pte;
1270 struct bios_smap *smapbase, *smap, *smapend;
1271 u_int32_t smapsize;
1272 quad_t dcons_addr, dcons_size;
1273
1274 bzero(physmap, sizeof(physmap));
c8fe38ae
MD
1275 physmap_idx = 0;
1276
1277 /*
1278 * get memory map from INT 15:E820, kindly supplied by the loader.
1279 *
1280 * subr_module.c says:
1281 * "Consumer may safely assume that size value precedes data."
1282 * ie: an int32_t immediately precedes smap.
1283 */
1284 smapbase = (struct bios_smap *)preload_search_info(kmdp,
1285 MODINFO_METADATA | MODINFOMD_SMAP);
1286 if (smapbase == NULL)
1287 panic("No BIOS smap info from loader!");
1288
1289 smapsize = *((u_int32_t *)smapbase - 1);
1290 smapend = (struct bios_smap *)((uintptr_t)smapbase + smapsize);
1291
1292 for (smap = smapbase; smap < smapend; smap++) {
1293 if (boothowto & RB_VERBOSE)
1294 kprintf("SMAP type=%02x base=%016lx len=%016lx\n",
1295 smap->type, smap->base, smap->length);
1296
1297 if (smap->type != SMAP_TYPE_MEMORY)
1298 continue;
1299
1300 if (smap->length == 0)
1301 continue;
1302
1303 for (i = 0; i <= physmap_idx; i += 2) {
1304 if (smap->base < physmap[i + 1]) {
1bda0d3d
MD
1305 if (boothowto & RB_VERBOSE) {
1306 kprintf("Overlapping or non-monotonic "
1307 "memory region, ignoring "
1308 "second region\n");
1309 }
2eddd927 1310 break;
c8fe38ae
MD
1311 }
1312 }
2eddd927
MD
1313 if (i <= physmap_idx)
1314 continue;
1315
1bda0d3d 1316 Realmem += smap->length;
c8fe38ae
MD
1317
1318 if (smap->base == physmap[physmap_idx + 1]) {
1319 physmap[physmap_idx + 1] += smap->length;
1320 continue;
1321 }
1322
1323 physmap_idx += 2;
1324 if (physmap_idx == PHYSMAP_SIZE) {
1bda0d3d
MD
1325 kprintf("Too many segments in the physical "
1326 "address map, giving up\n");
c8fe38ae
MD
1327 break;
1328 }
1329 physmap[physmap_idx] = smap->base;
1330 physmap[physmap_idx + 1] = smap->base + smap->length;
1331 }
1332
8936cd9b 1333 base_memory = physmap[1] / 1024;
c8fe38ae 1334 /* make hole for AP bootstrap code */
8936cd9b 1335 physmap[1] = mp_bootaddress(base_memory);
2331304b 1336
927c4c1f
MN
1337 /* Save EBDA address, if any */
1338 ebda_addr = (u_long)(*(u_short *)(KERNBASE + 0x40e));
1339 ebda_addr <<= 4;
c8fe38ae
MD
1340
1341 /*
1342 * Maxmem isn't the "maximum memory", it's one larger than the
1343 * highest page of the physical address space. It should be
1344 * called something like "Maxphyspage". We may adjust this
1345 * based on ``hw.physmem'' and the results of the memory test.
1346 */
1347 Maxmem = atop(physmap[physmap_idx + 1]);
1348
1349#ifdef MAXMEM
1350 Maxmem = MAXMEM / 4;
1351#endif
1352
1353 if (TUNABLE_ULONG_FETCH("hw.physmem", &physmem_tunable))
1354 Maxmem = atop(physmem_tunable);
1355
1356 /*
1357 * Don't allow MAXMEM or hw.physmem to extend the amount of memory
1358 * in the system.
1359 */
1360 if (Maxmem > atop(physmap[physmap_idx + 1]))
1361 Maxmem = atop(physmap[physmap_idx + 1]);
1362
8e5ea5f7 1363 /*
b4d9abe2 1364 * Blowing out the DMAP will blow up the system.
8e5ea5f7
MD
1365 */
1366 if (Maxmem > atop(DMAP_MAX_ADDRESS - DMAP_MIN_ADDRESS)) {
1367 kprintf("Limiting Maxmem due to DMAP size\n");
1368 Maxmem = atop(DMAP_MAX_ADDRESS - DMAP_MIN_ADDRESS);
1369 }
1370
c8fe38ae 1371 if (atop(physmap[physmap_idx + 1]) != Maxmem &&
b4d9abe2 1372 (boothowto & RB_VERBOSE)) {
c8fe38ae 1373 kprintf("Physical memory use set to %ldK\n", Maxmem * 4);
b4d9abe2 1374 }
c8fe38ae 1375
b4d9abe2
MD
1376 /*
1377 * Call pmap initialization to make new kernel address space
1378 *
1379 * Mask off page 0.
1380 */
48ffc236 1381 pmap_bootstrap(&first);
b4d9abe2
MD
1382 physmap[0] = PAGE_SIZE;
1383
1384 /*
1385 * Align the physmap to PHYSMAP_ALIGN and cut out anything
1386 * exceeding Maxmem.
1387 */
1388 for (i = j = 0; i <= physmap_idx; i += 2) {
06bb314f
SW
1389 if (physmap[i+1] > ptoa(Maxmem))
1390 physmap[i+1] = ptoa(Maxmem);
b4d9abe2
MD
1391 physmap[i] = (physmap[i] + PHYSMAP_ALIGN_MASK) &
1392 ~PHYSMAP_ALIGN_MASK;
1393 physmap[i+1] = physmap[i+1] & ~PHYSMAP_ALIGN_MASK;
1394
1395 physmap[j] = physmap[i];
1396 physmap[j+1] = physmap[i+1];
1397
1398 if (physmap[i] < physmap[i+1])
1399 j += 2;
1400 }
1401 physmap_idx = j - 2;
1402
1403 /*
1404 * Align anything else used in the validation loop.
1405 */
1406 first = (first + PHYSMAP_ALIGN_MASK) & ~PHYSMAP_ALIGN_MASK;
c8fe38ae
MD
1407
1408 /*
1409 * Size up each available chunk of physical memory.
1410 */
c8fe38ae
MD
1411 pa_indx = 0;
1412 da_indx = 1;
1413 phys_avail[pa_indx++] = physmap[0];
1414 phys_avail[pa_indx] = physmap[0];
1415 dump_avail[da_indx] = physmap[0];
1416 pte = CMAP1;
1417
1418 /*
1419 * Get dcons buffer address
1420 */
1421 if (kgetenv_quad("dcons.addr", &dcons_addr) == 0 ||
1422 kgetenv_quad("dcons.size", &dcons_size) == 0)
1423 dcons_addr = 0;
1424
1425 /*
b4d9abe2
MD
1426 * Validate the physical memory. The physical memory segments
1427 * have already been aligned to PHYSMAP_ALIGN which is a multiple
1428 * of PAGE_SIZE.
c8fe38ae
MD
1429 */
1430 for (i = 0; i <= physmap_idx; i += 2) {
1431 vm_paddr_t end;
1432
b4d9abe2
MD
1433 end = physmap[i + 1];
1434
1435 for (pa = physmap[i]; pa < end; pa += PHYSMAP_ALIGN) {
c8fe38ae
MD
1436 int tmp, page_bad, full;
1437 int *ptr = (int *)CADDR1;
1438
1439 full = FALSE;
1440 /*
1441 * block out kernel memory as not available.
1442 */
0aefa526 1443 if (pa >= 0x200000 && pa < first)
c8fe38ae
MD
1444 goto do_dump_avail;
1445
1446 /*
1447 * block out dcons buffer
1448 */
1449 if (dcons_addr > 0
1450 && pa >= trunc_page(dcons_addr)
b4d9abe2 1451 && pa < dcons_addr + dcons_size) {
c8fe38ae 1452 goto do_dump_avail;
b4d9abe2 1453 }
c8fe38ae
MD
1454
1455 page_bad = FALSE;
1456
1457 /*
1458 * map page into kernel: valid, read/write,non-cacheable
1459 */
1460 *pte = pa | PG_V | PG_RW | PG_N;
1461 cpu_invltlb();
1462
06bb314f 1463 tmp = *ptr;
c8fe38ae
MD
1464 /*
1465 * Test for alternating 1's and 0's
1466 */
1467 *(volatile int *)ptr = 0xaaaaaaaa;
b4d9abe2 1468 cpu_mfence();
c8fe38ae
MD
1469 if (*(volatile int *)ptr != 0xaaaaaaaa)
1470 page_bad = TRUE;
1471 /*
1472 * Test for alternating 0's and 1's
1473 */
1474 *(volatile int *)ptr = 0x55555555;
b4d9abe2 1475 cpu_mfence();
c8fe38ae
MD
1476 if (*(volatile int *)ptr != 0x55555555)
1477 page_bad = TRUE;
1478 /*
1479 * Test for all 1's
1480 */
1481 *(volatile int *)ptr = 0xffffffff;
b4d9abe2 1482 cpu_mfence();
c8fe38ae
MD
1483 if (*(volatile int *)ptr != 0xffffffff)
1484 page_bad = TRUE;
1485 /*
1486 * Test for all 0's
1487 */
1488 *(volatile int *)ptr = 0x0;
b4d9abe2 1489 cpu_mfence();
c8fe38ae
MD
1490 if (*(volatile int *)ptr != 0x0)
1491 page_bad = TRUE;
1492 /*
1493 * Restore original value.
1494 */
06bb314f 1495 *ptr = tmp;
c8fe38ae
MD
1496
1497 /*
1498 * Adjust array of valid/good pages.
1499 */
1500 if (page_bad == TRUE)
1501 continue;
1502 /*
1503 * If this good page is a continuation of the
1504 * previous set of good pages, then just increase
1505 * the end pointer. Otherwise start a new chunk.
1506 * Note that "end" points one higher than end,
1507 * making the range >= start and < end.
1508 * If we're also doing a speculative memory
1509 * test and we at or past the end, bump up Maxmem
1510 * so that we keep going. The first bad page
1511 * will terminate the loop.
1512 */
1513 if (phys_avail[pa_indx] == pa) {
b4d9abe2 1514 phys_avail[pa_indx] += PHYSMAP_ALIGN;
c8fe38ae
MD
1515 } else {
1516 pa_indx++;
1517 if (pa_indx == PHYS_AVAIL_ARRAY_END) {
1518 kprintf(
1519 "Too many holes in the physical address space, giving up\n");
1520 pa_indx--;
1521 full = TRUE;
1522 goto do_dump_avail;
1523 }
b4d9abe2
MD
1524 phys_avail[pa_indx++] = pa;
1525 phys_avail[pa_indx] = pa + PHYSMAP_ALIGN;
c8fe38ae 1526 }
7a3eee88 1527 physmem += PHYSMAP_ALIGN / PAGE_SIZE;
c8fe38ae
MD
1528do_dump_avail:
1529 if (dump_avail[da_indx] == pa) {
b4d9abe2 1530 dump_avail[da_indx] += PHYSMAP_ALIGN;
c8fe38ae
MD
1531 } else {
1532 da_indx++;
1533 if (da_indx == DUMP_AVAIL_ARRAY_END) {
1534 da_indx--;
1535 goto do_next;
1536 }
b4d9abe2
MD
1537 dump_avail[da_indx++] = pa;
1538 dump_avail[da_indx] = pa + PHYSMAP_ALIGN;
c8fe38ae
MD
1539 }
1540do_next:
1541 if (full)
1542 break;
1543 }
1544 }
1545 *pte = 0;
1546 cpu_invltlb();
1547
1548 /*
c8fe38ae
MD
1549 * The last chunk must contain at least one page plus the message
1550 * buffer to avoid complicating other code (message buffer address
1551 * calculation, etc.).
1552 */
b4d9abe2
MD
1553 msgbuf_size = (MSGBUF_SIZE + PHYSMAP_ALIGN_MASK) & ~PHYSMAP_ALIGN_MASK;
1554
1555 while (phys_avail[pa_indx - 1] + PHYSMAP_ALIGN +
1556 msgbuf_size >= phys_avail[pa_indx]) {
c8fe38ae
MD
1557 physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]);
1558 phys_avail[pa_indx--] = 0;
1559 phys_avail[pa_indx--] = 0;
1560 }
1561
1562 Maxmem = atop(phys_avail[pa_indx]);
1563
1564 /* Trim off space for the message buffer. */
b4d9abe2 1565 phys_avail[pa_indx] -= msgbuf_size;
c8fe38ae 1566
1185babf
JG
1567 avail_end = phys_avail[pa_indx];
1568
c8fe38ae 1569 /* Map the message buffer. */
b4d9abe2
MD
1570 for (off = 0; off < msgbuf_size; off += PAGE_SIZE) {
1571 pmap_kenter((vm_offset_t)msgbufp + off,
1572 phys_avail[pa_indx] + off);
1573 }
c8fe38ae
MD
1574}
1575
faaf4131
MN
1576struct machintr_abi MachIntrABI;
1577
c8fe38ae
MD
1578/*
1579 * IDT VECTORS:
1580 * 0 Divide by zero
1581 * 1 Debug
1582 * 2 NMI
1583 * 3 BreakPoint
1584 * 4 OverFlow
1585 * 5 Bound-Range
1586 * 6 Invalid OpCode
1587 * 7 Device Not Available (x87)
1588 * 8 Double-Fault
1589 * 9 Coprocessor Segment overrun (unsupported, reserved)
1590 * 10 Invalid-TSS
1591 * 11 Segment not present
1592 * 12 Stack
1593 * 13 General Protection
1594 * 14 Page Fault
1595 * 15 Reserved
1596 * 16 x87 FP Exception pending
1597 * 17 Alignment Check
1598 * 18 Machine Check
1599 * 19 SIMD floating point
1600 * 20-31 reserved
1601 * 32-255 INTn/external sources
1602 */
1603u_int64_t
1604hammer_time(u_int64_t modulep, u_int64_t physfree)
1605{
1606 caddr_t kmdp;
8a06c6ee 1607 int gsel_tss, x, cpu;
5b9f6cc4
MD
1608#if JG
1609 int metadata_missing, off;
1610#endif
c8fe38ae
MD
1611 struct mdglobaldata *gd;
1612 u_int64_t msr;
c8fe38ae 1613
c8fe38ae
MD
1614 /*
1615 * Prevent lowering of the ipl if we call tsleep() early.
1616 */
1617 gd = &CPU_prvspace[0].mdglobaldata;
1618 bzero(gd, sizeof(*gd));
1619
1620 /*
1621 * Note: on both UP and SMP curthread must be set non-NULL
1622 * early in the boot sequence because the system assumes
1623 * that 'curthread' is never NULL.
1624 */
1625
1626 gd->mi.gd_curthread = &thread0;
1627 thread0.td_gd = &gd->mi;
1628
1629 atdevbase = ISA_HOLE_START + PTOV_OFFSET;
1630
1631#if JG
1632 metadata_missing = 0;
1633 if (bootinfo.bi_modulep) {
1634 preload_metadata = (caddr_t)bootinfo.bi_modulep + KERNBASE;
1635 preload_bootstrap_relocate(KERNBASE);
1636 } else {
1637 metadata_missing = 1;
1638 }
1639 if (bootinfo.bi_envp)
1640 kern_envp = (caddr_t)bootinfo.bi_envp + KERNBASE;
1641#endif
1642
1643 preload_metadata = (caddr_t)(uintptr_t)(modulep + PTOV_OFFSET);
1644 preload_bootstrap_relocate(PTOV_OFFSET);
1645 kmdp = preload_search_by_type("elf kernel");
1646 if (kmdp == NULL)
1647 kmdp = preload_search_by_type("elf64 kernel");
1648 boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int);
1649 kern_envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *) + PTOV_OFFSET;
1650#ifdef DDB
1651 ksym_start = MD_FETCH(kmdp, MODINFOMD_SSYM, uintptr_t);
1652 ksym_end = MD_FETCH(kmdp, MODINFOMD_ESYM, uintptr_t);
1653#endif
1654
27af435a
SZ
1655 if (boothowto & RB_VERBOSE)
1656 bootverbose++;
1657
c8fe38ae 1658 /*
10db3cc6 1659 * Default MachIntrABI to ICU
faaf4131
MN
1660 */
1661 MachIntrABI = MachIntrABI_ICU;
9a4bd8f3 1662
faaf4131 1663 /*
c8fe38ae
MD
1664 * start with one cpu. Note: with one cpu, ncpus2_shift, ncpus2_mask,
1665 * and ncpus_fit_mask remain 0.
1666 */
1667 ncpus = 1;
1668 ncpus2 = 1;
1669 ncpus_fit = 1;
1670 /* Init basic tunables, hz etc */
1671 init_param1();
1672
1673 /*
1674 * make gdt memory segments
1675 */
1676 gdt_segs[GPROC0_SEL].ssd_base =
1677 (uintptr_t) &CPU_prvspace[0].mdglobaldata.gd_common_tss;
1678
1679 gd->mi.gd_prvspace = &CPU_prvspace[0];
1680
1681 for (x = 0; x < NGDT; x++) {
1682 if (x != GPROC0_SEL && x != (GPROC0_SEL + 1))
1683 ssdtosd(&gdt_segs[x], &gdt[x]);
1684 }
1685 ssdtosyssd(&gdt_segs[GPROC0_SEL],
1686 (struct system_segment_descriptor *)&gdt[GPROC0_SEL]);
48ffc236 1687
c8fe38ae
MD
1688 r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
1689 r_gdt.rd_base = (long) gdt;
1690 lgdt(&r_gdt);
1691
1692 wrmsr(MSR_FSBASE, 0); /* User value */
1693 wrmsr(MSR_GSBASE, (u_int64_t)&gd->mi);
1694 wrmsr(MSR_KGSBASE, 0); /* User value while in the kernel */
1695
1696 mi_gdinit(&gd->mi, 0);
1697 cpu_gdinit(gd, 0);
1698 proc0paddr = proc0paddr_buff;
1699 mi_proc0init(&gd->mi, proc0paddr);
1700 safepri = TDPRI_MAX;
1701
1702 /* spinlocks and the BGL */
1703 init_locks();
1704
1705 /* exceptions */
1706 for (x = 0; x < NIDT; x++)
8a06c6ee
SZ
1707 setidt_global(x, &IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0);
1708 setidt_global(IDT_DE, &IDTVEC(div), SDT_SYSIGT, SEL_KPL, 0);
1709 setidt_global(IDT_DB, &IDTVEC(dbg), SDT_SYSIGT, SEL_KPL, 0);
1710 setidt_global(IDT_NMI, &IDTVEC(nmi), SDT_SYSIGT, SEL_KPL, 1);
1711 setidt_global(IDT_BP, &IDTVEC(bpt), SDT_SYSIGT, SEL_UPL, 0);
1712 setidt_global(IDT_OF, &IDTVEC(ofl), SDT_SYSIGT, SEL_KPL, 0);
1713 setidt_global(IDT_BR, &IDTVEC(bnd), SDT_SYSIGT, SEL_KPL, 0);
1714 setidt_global(IDT_UD, &IDTVEC(ill), SDT_SYSIGT, SEL_KPL, 0);
1715 setidt_global(IDT_NM, &IDTVEC(dna), SDT_SYSIGT, SEL_KPL, 0);
1716 setidt_global(IDT_DF, &IDTVEC(dblfault), SDT_SYSIGT, SEL_KPL, 1);
1717 setidt_global(IDT_FPUGP, &IDTVEC(fpusegm), SDT_SYSIGT, SEL_KPL, 0);
1718 setidt_global(IDT_TS, &IDTVEC(tss), SDT_SYSIGT, SEL_KPL, 0);
1719 setidt_global(IDT_NP, &IDTVEC(missing), SDT_SYSIGT, SEL_KPL, 0);
1720 setidt_global(IDT_SS, &IDTVEC(stk), SDT_SYSIGT, SEL_KPL, 0);
1721 setidt_global(IDT_GP, &IDTVEC(prot), SDT_SYSIGT, SEL_KPL, 0);
1722 setidt_global(IDT_PF, &IDTVEC(page), SDT_SYSIGT, SEL_KPL, 0);
1723 setidt_global(IDT_MF, &IDTVEC(fpu), SDT_SYSIGT, SEL_KPL, 0);
1724 setidt_global(IDT_AC, &IDTVEC(align), SDT_SYSIGT, SEL_KPL, 0);
1725 setidt_global(IDT_MC, &IDTVEC(mchk), SDT_SYSIGT, SEL_KPL, 0);
1726 setidt_global(IDT_XF, &IDTVEC(xmm), SDT_SYSIGT, SEL_KPL, 0);
1727
1728 for (cpu = 0; cpu < MAXCPU; ++cpu) {
1729 r_idt_arr[cpu].rd_limit = sizeof(idt_arr[cpu]) - 1;
1730 r_idt_arr[cpu].rd_base = (long) &idt_arr[cpu][0];
1731 }
1732
1733 lidt(&r_idt_arr[0]);
c8fe38ae
MD
1734
1735 /*
1736 * Initialize the console before we print anything out.
1737 */
1738 cninit();
1739
1740#if JG
1741 if (metadata_missing)
1742 kprintf("WARNING: loader(8) metadata is missing!\n");
1743#endif
1744
1745#if NISA >0
e24dd6e0 1746 elcr_probe();
c8fe38ae
MD
1747 isa_defaultirq();
1748#endif
1749 rand_initialize();
1750
a3dd9120
SZ
1751 /*
1752 * Initialize IRQ mapping
1753 *
1754 * NOTE:
1755 * SHOULD be after elcr_probe()
1756 */
1757 MachIntrABI_ICU.initmap();
a3dd9120 1758 MachIntrABI_IOAPIC.initmap();
a3dd9120 1759
c8fe38ae
MD
1760#ifdef DDB
1761 kdb_init();
1762 if (boothowto & RB_KDB)
1763 Debugger("Boot flags requested debugger");
1764#endif
1765
1766#if JG
1767 finishidentcpu(); /* Final stage of CPU initialization */
2883d2d8
MD
1768 setidt(6, &IDTVEC(ill), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1769 setidt(13, &IDTVEC(prot), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
c8fe38ae
MD
1770#endif
1771 identify_cpu(); /* Final stage of CPU initialization */
1772 initializecpu(); /* Initialize CPU registers */
1773
e6dee928
SZ
1774 TUNABLE_INT_FETCH("hw.apic_io_enable", &ioapic_enable); /* for compat */
1775 TUNABLE_INT_FETCH("hw.ioapic_enable", &ioapic_enable);
1776 TUNABLE_INT_FETCH("hw.lapic_enable", &lapic_enable);
1777
1778 /*
08771751 1779 * Some of the virtual machines do not work w/ I/O APIC
e6dee928
SZ
1780 * enabled. If the user does not explicitly enable or
1781 * disable the I/O APIC (ioapic_enable < 0), then we
1782 * disable I/O APIC on all virtual machines.
1783 *
1784 * NOTE:
1785 * This must be done after identify_cpu(), which sets
1786 * 'cpu_feature2'
1787 */
1788 if (ioapic_enable < 0) {
1789 if (cpu_feature2 & CPUID2_VMM)
1790 ioapic_enable = 0;
1791 else
1792 ioapic_enable = 1;
1793 }
1794
c8fe38ae 1795 /* make an initial tss so cpu can get interrupt stack on syscall! */
5b9f6cc4
MD
1796 gd->gd_common_tss.tss_rsp0 =
1797 (register_t)(thread0.td_kstack +
1798 KSTACK_PAGES * PAGE_SIZE - sizeof(struct pcb));
c8fe38ae 1799 /* Ensure the stack is aligned to 16 bytes */
2883d2d8 1800 gd->gd_common_tss.tss_rsp0 &= ~(register_t)0xF;
c8fe38ae 1801
093565f2
MD
1802 /* double fault stack */
1803 gd->gd_common_tss.tss_ist1 =
1804 (long)&gd->mi.gd_prvspace->idlestack[
1805 sizeof(gd->mi.gd_prvspace->idlestack)];
c8fe38ae
MD
1806
1807 /* Set the IO permission bitmap (empty due to tss seg limit) */
b2b3ffcd 1808 gd->gd_common_tss.tss_iobase = sizeof(struct x86_64tss);
c8fe38ae
MD
1809
1810 gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
1811 gd->gd_tss_gdt = &gdt[GPROC0_SEL];
1812 gd->gd_common_tssd = *gd->gd_tss_gdt;
1813 ltr(gsel_tss);
1814
1815 /* Set up the fast syscall stuff */
1816 msr = rdmsr(MSR_EFER) | EFER_SCE;
1817 wrmsr(MSR_EFER, msr);
1818 wrmsr(MSR_LSTAR, (u_int64_t)IDTVEC(fast_syscall));
1819 wrmsr(MSR_CSTAR, (u_int64_t)IDTVEC(fast_syscall32));
1820 msr = ((u_int64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) |
1821 ((u_int64_t)GSEL(GUCODE32_SEL, SEL_UPL) << 48);
1822 wrmsr(MSR_STAR, msr);
3338cc67 1823 wrmsr(MSR_SF_MASK, PSL_NT|PSL_T|PSL_I|PSL_C|PSL_D|PSL_IOPL);
c8fe38ae
MD
1824
1825 getmemsize(kmdp, physfree);
1826 init_param2(physmem);
1827
1828 /* now running on new page tables, configured,and u/iom is accessible */
1829
1830 /* Map the message buffer. */
1831#if JG
1832 for (off = 0; off < round_page(MSGBUF_SIZE); off += PAGE_SIZE)
1833 pmap_kenter((vm_offset_t)msgbufp + off, avail_end + off);
1834#endif
1835
1836 msgbufinit(msgbufp, MSGBUF_SIZE);
1837
1838
1839 /* transfer to user mode */
1840
1841 _ucodesel = GSEL(GUCODE_SEL, SEL_UPL);
1842 _udatasel = GSEL(GUDATA_SEL, SEL_UPL);
1843 _ucode32sel = GSEL(GUCODE32_SEL, SEL_UPL);
1844
1845 load_ds(_udatasel);
1846 load_es(_udatasel);
1847 load_fs(_udatasel);
1848
1849 /* setup proc 0's pcb */
1850 thread0.td_pcb->pcb_flags = 0;
c8fe38ae 1851 thread0.td_pcb->pcb_cr3 = KPML4phys;
d8061892 1852 thread0.td_pcb->pcb_ext = NULL;
d1368d1a 1853 lwp0.lwp_md.md_regs = &proc0_tf; /* XXX needed? */
c8fe38ae
MD
1854
1855 /* Location of kernel stack for locore */
1856 return ((u_int64_t)thread0.td_pcb);
1857}
1858
1859/*
1860 * Initialize machine-dependant portions of the global data structure.
1861 * Note that the global data area and cpu0's idlestack in the private
1862 * data space were allocated in locore.
1863 *
1864 * Note: the idlethread's cpl is 0
1865 *
1866 * WARNING! Called from early boot, 'mycpu' may not work yet.
1867 */
1868void
1869cpu_gdinit(struct mdglobaldata *gd, int cpu)
1870{
1871 if (cpu)
1872 gd->mi.gd_curthread = &gd->mi.gd_idlethread;
1873
1874 lwkt_init_thread(&gd->mi.gd_idlethread,
1875 gd->mi.gd_prvspace->idlestack,
1876 sizeof(gd->mi.gd_prvspace->idlestack),
fdce8919 1877 0, &gd->mi);
c8fe38ae
MD
1878 lwkt_set_comm(&gd->mi.gd_idlethread, "idle_%d", cpu);
1879 gd->mi.gd_idlethread.td_switch = cpu_lwkt_switch;
1880 gd->mi.gd_idlethread.td_sp -= sizeof(void *);
1881 *(void **)gd->mi.gd_idlethread.td_sp = cpu_idle_restore;
1882}
1883
1884int
1885is_globaldata_space(vm_offset_t saddr, vm_offset_t eaddr)
1886{
1887 if (saddr >= (vm_offset_t)&CPU_prvspace[0] &&
1888 eaddr <= (vm_offset_t)&CPU_prvspace[MAXCPU]) {
1889 return (TRUE);
1890 }
616516c8
MD
1891 if (saddr >= DMAP_MIN_ADDRESS && eaddr <= DMAP_MAX_ADDRESS)
1892 return (TRUE);
c8fe38ae
MD
1893 return (FALSE);
1894}
1895
1896struct globaldata *
1897globaldata_find(int cpu)
1898{
1899 KKASSERT(cpu >= 0 && cpu < ncpus);
1900 return(&CPU_prvspace[cpu].mdglobaldata.mi);
1901}
1902
c8fe38ae
MD
1903int
1904ptrace_set_pc(struct lwp *lp, unsigned long addr)
1905{
5b9f6cc4 1906 lp->lwp_md.md_regs->tf_rip = addr;
c8fe38ae
MD
1907 return (0);
1908}
1909
1910int
1911ptrace_single_step(struct lwp *lp)
1912{
5b9f6cc4 1913 lp->lwp_md.md_regs->tf_rflags |= PSL_T;
c8fe38ae
MD
1914 return (0);
1915}
1916
1917int
1918fill_regs(struct lwp *lp, struct reg *regs)
1919{
c8fe38ae
MD
1920 struct trapframe *tp;
1921
d64d3805
MD
1922 if ((tp = lp->lwp_md.md_regs) == NULL)
1923 return EINVAL;
5b9f6cc4 1924 bcopy(&tp->tf_rdi, &regs->r_rdi, sizeof(*regs));
c8fe38ae
MD
1925 return (0);
1926}
1927
1928int
1929set_regs(struct lwp *lp, struct reg *regs)
1930{
c8fe38ae
MD
1931 struct trapframe *tp;
1932
1933 tp = lp->lwp_md.md_regs;
5b9f6cc4 1934 if (!EFL_SECURE(regs->r_rflags, tp->tf_rflags) ||
c8fe38ae
MD
1935 !CS_SECURE(regs->r_cs))
1936 return (EINVAL);
5b9f6cc4 1937 bcopy(&regs->r_rdi, &tp->tf_rdi, sizeof(*regs));
f2081646 1938 clear_quickret();
c8fe38ae
MD
1939 return (0);
1940}
1941
1942#ifndef CPU_DISABLE_SSE
1943static void
1944fill_fpregs_xmm(struct savexmm *sv_xmm, struct save87 *sv_87)
1945{
1946 struct env87 *penv_87 = &sv_87->sv_env;
1947 struct envxmm *penv_xmm = &sv_xmm->sv_env;
1948 int i;
1949
1950 /* FPU control/status */
1951 penv_87->en_cw = penv_xmm->en_cw;
1952 penv_87->en_sw = penv_xmm->en_sw;
1953 penv_87->en_tw = penv_xmm->en_tw;
1954 penv_87->en_fip = penv_xmm->en_fip;
1955 penv_87->en_fcs = penv_xmm->en_fcs;
1956 penv_87->en_opcode = penv_xmm->en_opcode;
1957 penv_87->en_foo = penv_xmm->en_foo;
1958 penv_87->en_fos = penv_xmm->en_fos;
1959
1960 /* FPU registers */
1961 for (i = 0; i < 8; ++i)
1962 sv_87->sv_ac[i] = sv_xmm->sv_fp[i].fp_acc;
c8fe38ae
MD
1963}
1964
1965static void
1966set_fpregs_xmm(struct save87 *sv_87, struct savexmm *sv_xmm)
1967{
1968 struct env87 *penv_87 = &sv_87->sv_env;
1969 struct envxmm *penv_xmm = &sv_xmm->sv_env;
1970 int i;
1971
1972 /* FPU control/status */
1973 penv_xmm->en_cw = penv_87->en_cw;
1974 penv_xmm->en_sw = penv_87->en_sw;
1975 penv_xmm->en_tw = penv_87->en_tw;
1976 penv_xmm->en_fip = penv_87->en_fip;
1977 penv_xmm->en_fcs = penv_87->en_fcs;
1978 penv_xmm->en_opcode = penv_87->en_opcode;
1979 penv_xmm->en_foo = penv_87->en_foo;
1980 penv_xmm->en_fos = penv_87->en_fos;
1981
1982 /* FPU registers */
1983 for (i = 0; i < 8; ++i)
1984 sv_xmm->sv_fp[i].fp_acc = sv_87->sv_ac[i];
c8fe38ae
MD
1985}
1986#endif /* CPU_DISABLE_SSE */
1987
1988int
1989fill_fpregs(struct lwp *lp, struct fpreg *fpregs)
1990{
d64d3805
MD
1991 if (lp->lwp_thread == NULL || lp->lwp_thread->td_pcb == NULL)
1992 return EINVAL;
c8fe38ae
MD
1993#ifndef CPU_DISABLE_SSE
1994 if (cpu_fxsr) {
1995 fill_fpregs_xmm(&lp->lwp_thread->td_pcb->pcb_save.sv_xmm,
1996 (struct save87 *)fpregs);
1997 return (0);
1998 }
1999#endif /* CPU_DISABLE_SSE */
2000 bcopy(&lp->lwp_thread->td_pcb->pcb_save.sv_87, fpregs, sizeof *fpregs);
2001 return (0);
2002}
2003
2004int
2005set_fpregs(struct lwp *lp, struct fpreg *fpregs)
2006{
2007#ifndef CPU_DISABLE_SSE
2008 if (cpu_fxsr) {
2009 set_fpregs_xmm((struct save87 *)fpregs,
2010 &lp->lwp_thread->td_pcb->pcb_save.sv_xmm);
2011 return (0);
2012 }
2013#endif /* CPU_DISABLE_SSE */
2014 bcopy(fpregs, &lp->lwp_thread->td_pcb->pcb_save.sv_87, sizeof *fpregs);
2015 return (0);
2016}
2017
2018int
2019fill_dbregs(struct lwp *lp, struct dbreg *dbregs)
2020{
d64d3805
MD
2021 struct pcb *pcb;
2022
c8fe38ae 2023 if (lp == NULL) {
0855a2af
JG
2024 dbregs->dr[0] = rdr0();
2025 dbregs->dr[1] = rdr1();
2026 dbregs->dr[2] = rdr2();
2027 dbregs->dr[3] = rdr3();
2028 dbregs->dr[4] = rdr4();
2029 dbregs->dr[5] = rdr5();
2030 dbregs->dr[6] = rdr6();
2031 dbregs->dr[7] = rdr7();
d64d3805 2032 return (0);
c8fe38ae 2033 }
d64d3805
MD
2034 if (lp->lwp_thread == NULL || (pcb = lp->lwp_thread->td_pcb) == NULL)
2035 return EINVAL;
2036 dbregs->dr[0] = pcb->pcb_dr0;
2037 dbregs->dr[1] = pcb->pcb_dr1;
2038 dbregs->dr[2] = pcb->pcb_dr2;
2039 dbregs->dr[3] = pcb->pcb_dr3;
2040 dbregs->dr[4] = 0;
2041 dbregs->dr[5] = 0;
2042 dbregs->dr[6] = pcb->pcb_dr6;
2043 dbregs->dr[7] = pcb->pcb_dr7;
c8fe38ae
MD
2044 return (0);
2045}
2046
2047int
2048set_dbregs(struct lwp *lp, struct dbreg *dbregs)
2049{
2050 if (lp == NULL) {
0855a2af
JG
2051 load_dr0(dbregs->dr[0]);
2052 load_dr1(dbregs->dr[1]);
2053 load_dr2(dbregs->dr[2]);
2054 load_dr3(dbregs->dr[3]);
2055 load_dr4(dbregs->dr[4]);
2056 load_dr5(dbregs->dr[5]);
2057 load_dr6(dbregs->dr[6]);
2058 load_dr7(dbregs->dr[7]);
c8fe38ae
MD
2059 } else {
2060 struct pcb *pcb;
2061 struct ucred *ucred;
2062 int i;
0855a2af 2063 uint64_t mask1, mask2;
c8fe38ae
MD
2064
2065 /*
2066 * Don't let an illegal value for dr7 get set. Specifically,
2067 * check for undefined settings. Setting these bit patterns
2068 * result in undefined behaviour and can lead to an unexpected
2069 * TRCTRAP.
2070 */
0855a2af
JG
2071 /* JG this loop looks unreadable */
2072 /* Check 4 2-bit fields for invalid patterns.
2073 * These fields are R/Wi, for i = 0..3
2074 */
2075 /* Is 10 in LENi allowed when running in compatibility mode? */
2076 /* Pattern 10 in R/Wi might be used to indicate
2077 * breakpoint on I/O. Further analysis should be
2078 * carried to decide if it is safe and useful to
2079 * provide access to that capability
2080 */
2081 for (i = 0, mask1 = 0x3<<16, mask2 = 0x2<<16; i < 4;
2082 i++, mask1 <<= 4, mask2 <<= 4)
2083 if ((dbregs->dr[7] & mask1) == mask2)
c8fe38ae 2084 return (EINVAL);
c8fe38ae
MD
2085
2086 pcb = lp->lwp_thread->td_pcb;
2087 ucred = lp->lwp_proc->p_ucred;
2088
2089 /*
2090 * Don't let a process set a breakpoint that is not within the
2091 * process's address space. If a process could do this, it
2092 * could halt the system by setting a breakpoint in the kernel
2093 * (if ddb was enabled). Thus, we need to check to make sure
2094 * that no breakpoints are being enabled for addresses outside
2095 * process's address space, unless, perhaps, we were called by
2096 * uid 0.
2097 *
2098 * XXX - what about when the watched area of the user's
2099 * address space is written into from within the kernel
2100 * ... wouldn't that still cause a breakpoint to be generated
2101 * from within kernel mode?
2102 */
2103
895c1f85 2104 if (priv_check_cred(ucred, PRIV_ROOT, 0) != 0) {
0855a2af 2105 if (dbregs->dr[7] & 0x3) {
c8fe38ae 2106 /* dr0 is enabled */
0855a2af 2107 if (dbregs->dr[0] >= VM_MAX_USER_ADDRESS)
c8fe38ae
MD
2108 return (EINVAL);
2109 }
2110
0855a2af 2111 if (dbregs->dr[7] & (0x3<<2)) {
c8fe38ae 2112 /* dr1 is enabled */
0855a2af 2113 if (dbregs->dr[1] >= VM_MAX_USER_ADDRESS)
c8fe38ae
MD
2114 return (EINVAL);
2115 }
2116
0855a2af 2117 if (dbregs->dr[7] & (0x3<<4)) {
c8fe38ae 2118 /* dr2 is enabled */
0855a2af 2119 if (dbregs->dr[2] >= VM_MAX_USER_ADDRESS)
c8fe38ae
MD
2120 return (EINVAL);
2121 }
2122
0855a2af 2123 if (dbregs->dr[7] & (0x3<<6)) {
c8fe38ae 2124 /* dr3 is enabled */
0855a2af 2125 if (dbregs->dr[3] >= VM_MAX_USER_ADDRESS)
c8fe38ae
MD
2126 return (EINVAL);
2127 }
c8fe38ae
MD
2128 }
2129
0855a2af
JG
2130 pcb->pcb_dr0 = dbregs->dr[0];
2131 pcb->pcb_dr1 = dbregs->dr[1];
2132 pcb->pcb_dr2 = dbregs->dr[2];
2133 pcb->pcb_dr3 = dbregs->dr[3];
2134 pcb->pcb_dr6 = dbregs->dr[6];
2135 pcb->pcb_dr7 = dbregs->dr[7];
c8fe38ae
MD
2136
2137 pcb->pcb_flags |= PCB_DBREGS;
2138 }
2139
2140 return (0);
2141}
2142
2143/*
2144 * Return > 0 if a hardware breakpoint has been hit, and the
2145 * breakpoint was in user space. Return 0, otherwise.
2146 */
2147int
2148user_dbreg_trap(void)
2149{
0855a2af
JG
2150 u_int64_t dr7, dr6; /* debug registers dr6 and dr7 */
2151 u_int64_t bp; /* breakpoint bits extracted from dr6 */
c8fe38ae
MD
2152 int nbp; /* number of breakpoints that triggered */
2153 caddr_t addr[4]; /* breakpoint addresses */
2154 int i;
2155
2156 dr7 = rdr7();
0855a2af 2157 if ((dr7 & 0xff) == 0) {
c8fe38ae
MD
2158 /*
2159 * all GE and LE bits in the dr7 register are zero,
2160 * thus the trap couldn't have been caused by the
2161 * hardware debug registers
2162 */
2163 return 0;
2164 }
2165
2166 nbp = 0;
2167 dr6 = rdr6();
0855a2af 2168 bp = dr6 & 0xf;
c8fe38ae 2169
0855a2af 2170 if (bp == 0) {
c8fe38ae
MD
2171 /*
2172 * None of the breakpoint bits are set meaning this
2173 * trap was not caused by any of the debug registers
2174 */
2175 return 0;
2176 }
2177
2178 /*
2179 * at least one of the breakpoints were hit, check to see
2180 * which ones and if any of them are user space addresses
2181 */
2182
2183 if (bp & 0x01) {
2184 addr[nbp++] = (caddr_t)rdr0();
2185 }
2186 if (bp & 0x02) {
2187 addr[nbp++] = (caddr_t)rdr1();
2188 }
2189 if (bp & 0x04) {
2190 addr[nbp++] = (caddr_t)rdr2();
2191 }
2192 if (bp & 0x08) {
2193 addr[nbp++] = (caddr_t)rdr3();
2194 }
2195
2196 for (i=0; i<nbp; i++) {
2197 if (addr[i] <
2198 (caddr_t)VM_MAX_USER_ADDRESS) {
2199 /*
2200 * addr[i] is in user space
2201 */
2202 return nbp;
2203 }
2204 }
2205
2206 /*
2207 * None of the breakpoints are in user space.
2208 */
2209 return 0;
2210}
2211
2212
2213#ifndef DDB
2214void
2215Debugger(const char *msg)
2216{
2217 kprintf("Debugger(\"%s\") called.\n", msg);
2218}
2219#endif /* no DDB */
2220
2221#ifdef DDB
2222
2223/*
2224 * Provide inb() and outb() as functions. They are normally only
2225 * available as macros calling inlined functions, thus cannot be
2226 * called inside DDB.
2227 *
2228 * The actual code is stolen from <machine/cpufunc.h>, and de-inlined.
2229 */
2230
2231#undef inb
2232#undef outb
2233
2234/* silence compiler warnings */
2235u_char inb(u_int);
2236void outb(u_int, u_char);
2237
2238u_char
2239inb(u_int port)
2240{
2241 u_char data;
2242 /*
2243 * We use %%dx and not %1 here because i/o is done at %dx and not at
2244 * %edx, while gcc generates inferior code (movw instead of movl)
2245 * if we tell it to load (u_short) port.
2246 */
2247 __asm __volatile("inb %%dx,%0" : "=a" (data) : "d" (port));
2248 return (data);
2249}
2250
2251void
2252outb(u_int port, u_char data)
2253{
2254 u_char al;
2255 /*
2256 * Use an unnecessary assignment to help gcc's register allocator.
2257 * This make a large difference for gcc-1.40 and a tiny difference
2258 * for gcc-2.6.0. For gcc-1.40, al had to be ``asm("ax")'' for
2259 * best results. gcc-2.6.0 can't handle this.
2260 */
2261 al = data;
2262 __asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port));
2263}
2264
2265#endif /* DDB */
2266
2267
2268
2269#include "opt_cpu.h"
2270
2271
2272/*
2273 * initialize all the SMP locks
2274 */
2275
2276/* critical region when masking or unmasking interupts */
2277struct spinlock_deprecated imen_spinlock;
2278
c8fe38ae
MD
2279/* critical region for old style disable_intr/enable_intr */
2280struct spinlock_deprecated mpintr_spinlock;
2281
2282/* critical region around INTR() routines */
2283struct spinlock_deprecated intr_spinlock;
2284
2285/* lock region used by kernel profiling */
2286struct spinlock_deprecated mcount_spinlock;
2287
2288/* locks com (tty) data/hardware accesses: a FASTINTR() */
2289struct spinlock_deprecated com_spinlock;
2290
c8fe38ae
MD
2291/* lock regions around the clock hardware */
2292struct spinlock_deprecated clock_spinlock;
2293
c8fe38ae
MD
2294static void
2295init_locks(void)
2296{
2297 /*
b5d16701 2298 * Get the initial mplock with a count of 1 for the BSP.
c8fe38ae
MD
2299 * This uses a LOGICAL cpu ID, ie BSP == 0.
2300 */
c8fe38ae 2301 cpu_get_initial_mplock();
c8fe38ae
MD
2302 /* DEPRECATED */
2303 spin_lock_init(&mcount_spinlock);
c8fe38ae
MD
2304 spin_lock_init(&intr_spinlock);
2305 spin_lock_init(&mpintr_spinlock);
2306 spin_lock_init(&imen_spinlock);
c8fe38ae
MD
2307 spin_lock_init(&com_spinlock);
2308 spin_lock_init(&clock_spinlock);
c8fe38ae
MD
2309
2310 /* our token pool needs to work early */
2311 lwkt_token_pool_init();
2312}
2313