From ee5e79e30bc80107d436c388e623e8a164287c0d Mon Sep 17 00:00:00 2001 From: Sascha Wildner Date: Tue, 23 Jun 2015 22:41:44 +0200 Subject: [PATCH] i386 removal, part 9/x: Remove the 32 bit vkernel platform code. --- sys/Makefile.modules | 2 +- sys/conf/kern.paths.mk | 2 +- sys/conf/kern.post.mk | 2 +- sys/conf/kern.pre.mk | 2 - sys/platform/vkernel/Makefile.inc | 4 - sys/platform/vkernel/conf/Makefile | 35 - sys/platform/vkernel/conf/files | 89 - sys/platform/vkernel/conf/kern.mk | 25 - sys/platform/vkernel/conf/ldscript.i386 | 216 -- sys/platform/vkernel/conf/options | 12 - sys/platform/vkernel/i386/autoconf.c | 559 --- sys/platform/vkernel/i386/cpu_regs.c | 935 ----- sys/platform/vkernel/i386/db_interface.c | 331 -- sys/platform/vkernel/i386/db_trace.c | 438 --- sys/platform/vkernel/i386/exception.c | 213 -- sys/platform/vkernel/i386/fork_tramp.s | 103 - sys/platform/vkernel/i386/genassym.c | 219 -- sys/platform/vkernel/i386/global.s | 97 - sys/platform/vkernel/i386/locore.s | 99 - sys/platform/vkernel/i386/mp.c | 491 --- sys/platform/vkernel/i386/npx.c | 711 ---- sys/platform/vkernel/i386/procfs_machdep.c | 129 - sys/platform/vkernel/i386/swtch.s | 625 ---- sys/platform/vkernel/i386/tls.c | 211 -- sys/platform/vkernel/i386/trap.c | 1425 -------- sys/platform/vkernel/i386/userldt.c | 61 - sys/platform/vkernel/i386/vm_machdep.c | 386 -- sys/platform/vkernel/include/clock.h | 45 - sys/platform/vkernel/include/cothread.h | 65 - sys/platform/vkernel/include/cpu.h | 64 - sys/platform/vkernel/include/cpufunc.h | 79 - sys/platform/vkernel/include/globaldata.h | 161 - sys/platform/vkernel/include/lock.h | 48 - sys/platform/vkernel/include/md_var.h | 126 - sys/platform/vkernel/include/param.h | 33 - sys/platform/vkernel/include/pcb.h | 82 - sys/platform/vkernel/include/pcb_ext.h | 68 - sys/platform/vkernel/include/pmap.h | 203 -- sys/platform/vkernel/include/pmap_inval.h | 72 - sys/platform/vkernel/include/proc.h | 50 - sys/platform/vkernel/include/ptrace.h | 52 - sys/platform/vkernel/include/smp.h | 51 - sys/platform/vkernel/include/thread.h | 95 - sys/platform/vkernel/include/types.h | 46 - sys/platform/vkernel/include/vmm.h | 74 - sys/platform/vkernel/include/vmparam.h | 92 - .../vkernel/platform/busdma_machdep.c | 1277 ------- sys/platform/vkernel/platform/console.c | 434 --- sys/platform/vkernel/platform/copyio.c | 279 -- sys/platform/vkernel/platform/cothread.c | 204 -- sys/platform/vkernel/platform/globaldata.c | 128 - sys/platform/vkernel/platform/init.c | 1450 -------- sys/platform/vkernel/platform/ipl_funcs.c | 72 - sys/platform/vkernel/platform/kqueue.c | 206 -- sys/platform/vkernel/platform/machintr.c | 210 -- sys/platform/vkernel/platform/pmap.c | 3131 ----------------- sys/platform/vkernel/platform/pmap_inval.c | 224 -- sys/platform/vkernel/platform/shutdown.c | 102 - sys/platform/vkernel/platform/sysarch.c | 60 - sys/platform/vkernel/platform/systimer.c | 274 -- 60 files changed, 3 insertions(+), 16976 deletions(-) delete mode 100644 sys/platform/vkernel/Makefile.inc delete mode 100644 sys/platform/vkernel/conf/Makefile delete mode 100644 sys/platform/vkernel/conf/files delete mode 100644 sys/platform/vkernel/conf/kern.mk delete mode 100644 sys/platform/vkernel/conf/ldscript.i386 delete mode 100644 sys/platform/vkernel/conf/options delete mode 100644 sys/platform/vkernel/i386/autoconf.c delete mode 100644 sys/platform/vkernel/i386/cpu_regs.c delete mode 100644 sys/platform/vkernel/i386/db_interface.c delete mode 100644 sys/platform/vkernel/i386/db_trace.c delete mode 100644 sys/platform/vkernel/i386/exception.c delete mode 100644 sys/platform/vkernel/i386/fork_tramp.s delete mode 100644 sys/platform/vkernel/i386/genassym.c delete mode 100644 sys/platform/vkernel/i386/global.s delete mode 100644 sys/platform/vkernel/i386/locore.s delete mode 100644 sys/platform/vkernel/i386/mp.c delete mode 100644 sys/platform/vkernel/i386/npx.c delete mode 100644 sys/platform/vkernel/i386/procfs_machdep.c delete mode 100644 sys/platform/vkernel/i386/swtch.s delete mode 100644 sys/platform/vkernel/i386/tls.c delete mode 100644 sys/platform/vkernel/i386/trap.c delete mode 100644 sys/platform/vkernel/i386/userldt.c delete mode 100644 sys/platform/vkernel/i386/vm_machdep.c delete mode 100644 sys/platform/vkernel/include/clock.h delete mode 100644 sys/platform/vkernel/include/cothread.h delete mode 100644 sys/platform/vkernel/include/cpu.h delete mode 100644 sys/platform/vkernel/include/cpufunc.h delete mode 100644 sys/platform/vkernel/include/globaldata.h delete mode 100644 sys/platform/vkernel/include/lock.h delete mode 100644 sys/platform/vkernel/include/md_var.h delete mode 100644 sys/platform/vkernel/include/param.h delete mode 100644 sys/platform/vkernel/include/pcb.h delete mode 100644 sys/platform/vkernel/include/pcb_ext.h delete mode 100644 sys/platform/vkernel/include/pmap.h delete mode 100644 sys/platform/vkernel/include/pmap_inval.h delete mode 100644 sys/platform/vkernel/include/proc.h delete mode 100644 sys/platform/vkernel/include/ptrace.h delete mode 100644 sys/platform/vkernel/include/smp.h delete mode 100644 sys/platform/vkernel/include/thread.h delete mode 100644 sys/platform/vkernel/include/types.h delete mode 100644 sys/platform/vkernel/include/vmm.h delete mode 100644 sys/platform/vkernel/include/vmparam.h delete mode 100644 sys/platform/vkernel/platform/busdma_machdep.c delete mode 100644 sys/platform/vkernel/platform/console.c delete mode 100644 sys/platform/vkernel/platform/copyio.c delete mode 100644 sys/platform/vkernel/platform/cothread.c delete mode 100644 sys/platform/vkernel/platform/globaldata.c delete mode 100644 sys/platform/vkernel/platform/init.c delete mode 100644 sys/platform/vkernel/platform/ipl_funcs.c delete mode 100644 sys/platform/vkernel/platform/kqueue.c delete mode 100644 sys/platform/vkernel/platform/machintr.c delete mode 100644 sys/platform/vkernel/platform/pmap.c delete mode 100644 sys/platform/vkernel/platform/pmap_inval.c delete mode 100644 sys/platform/vkernel/platform/shutdown.c delete mode 100644 sys/platform/vkernel/platform/sysarch.c delete mode 100644 sys/platform/vkernel/platform/systimer.c diff --git a/sys/Makefile.modules b/sys/Makefile.modules index 8bfa7aad52..dfada64604 100644 --- a/sys/Makefile.modules +++ b/sys/Makefile.modules @@ -4,7 +4,7 @@ .if defined(MODULES_OVERRIDE) SUBDIR=${MODULES_OVERRIDE} .else -.if ${MACHINE_PLATFORM} != "vkernel" && ${MACHINE_PLATFORM} != "vkernel64" +.if ${MACHINE_PLATFORM} != "vkernel64" SUBDIR=bus .endif SUBDIR+=crypto emulation dev gnu kern net netbt netproto vfs diff --git a/sys/conf/kern.paths.mk b/sys/conf/kern.paths.mk index c78a560d89..9bdb998123 100644 --- a/sys/conf/kern.paths.mk +++ b/sys/conf/kern.paths.mk @@ -17,6 +17,6 @@ OLDMODULESDIR?= /boot/modules # Set DESTDIR to /var/vkernel by default for vkernel platform so as # not to shoot the real kernel installation. -.if ${MACHINE_PLATFORM} == vkernel || ${MACHINE_PLATFORM} == vkernel64 +.if ${MACHINE_PLATFORM} == vkernel64 DESTDIR?= /var/vkernel .endif diff --git a/sys/conf/kern.post.mk b/sys/conf/kern.post.mk index 8bfb66edf7..597873e446 100644 --- a/sys/conf/kern.post.mk +++ b/sys/conf/kern.post.mk @@ -158,7 +158,7 @@ kernel-installable: /usr/bin/false; \ fi # Skip this step for vkernels -.if ${MACHINE_PLATFORM} != vkernel && ${MACHINE_PLATFORM} != vkernel64 +.if ${MACHINE_PLATFORM} != vkernel64 @if [ ! -f ${DESTDIR}/boot/dloader.rc ]; then \ echo "You need to install a new ${DESTDIR}/boot before you"; \ echo "can install a new kernel, kernels are now installed"; \ diff --git a/sys/conf/kern.pre.mk b/sys/conf/kern.pre.mk index 8a66a93029..6077e4e9e2 100644 --- a/sys/conf/kern.pre.mk +++ b/sys/conf/kern.pre.mk @@ -108,8 +108,6 @@ SYSTEM_LD= @${LD} -Bdynamic -T $S/platform/$P/conf/ldscript.$M \ # .if ${P} == "pc64" || ${P} == "vkernel64" SYSTEM_LD+= -z max-page-size=0x200000 -.elif ${P} == "pc32" || ${P} == "vkernel" -SYSTEM_LD+= -z max-page-size=0x1000 .endif SYSTEM_LD_TAIL= @${OBJCOPY} --strip-symbol gcc2_compiled. ${.TARGET} ; \ diff --git a/sys/platform/vkernel/Makefile.inc b/sys/platform/vkernel/Makefile.inc deleted file mode 100644 index d7ab0b05d8..0000000000 --- a/sys/platform/vkernel/Makefile.inc +++ /dev/null @@ -1,4 +0,0 @@ -# Used by the device build to check for device support -# - -DEV_SUPPORT= virtual/vkernel diff --git a/sys/platform/vkernel/conf/Makefile b/sys/platform/vkernel/conf/Makefile deleted file mode 100644 index eb78ae8161..0000000000 --- a/sys/platform/vkernel/conf/Makefile +++ /dev/null @@ -1,35 +0,0 @@ -# Which version of config(8) is required. -# -%VERSREQ= 400026 - -.if !defined(S) -.if exists(./@/.) -S= ./@ -.else -S= ../.. -.endif -.endif - -.include "$S/conf/kern.pre.mk" - -# Override the normal kernel link and link as a normal user program -# -SYSTEM_LD= @${CC} ${DEBUG} ${PROF} -export-dynamic -o ${.TARGET} ${SYSTEM_OBJS} vers.o -pthread - -%BEFORE_DEPEND - -%OBJS - -%CFILES - -%SFILES - -%MFILES - -%CLEAN - -.include "$S/conf/kern.post.mk" - -%RULES - -# DO NOT DELETE THIS LINE -- make depend uses it diff --git a/sys/platform/vkernel/conf/files b/sys/platform/vkernel/conf/files deleted file mode 100644 index 4d3db4cf98..0000000000 --- a/sys/platform/vkernel/conf/files +++ /dev/null @@ -1,89 +0,0 @@ -# This file tells config what files go into building a kernel, -# files marked standard are always included. -# -bf_enc.o optional ipsec ipsec_esp \ - dependency "$S/crypto/blowfish/arch/i386/bf_enc.S $S/crypto/blowfish/arch/i386/bf_enc_586.S $S/crypto/blowfish/arch/i386/bf_enc_686.S" \ - compile-with "${CC} -c -I$S/crypto/blowfish/arch/i386 ${ASM_CFLAGS} ${WERROR} ${.IMPSRC}" \ - no-implicit-rule -crypto/des/arch/i386/des_enc.S optional ipsec ipsec_esp -crypto/des/des_ecb.c optional netsmbcrypto -crypto/des/arch/i386/des_enc.S optional netsmbcrypto -crypto/des/des_setkey.c optional netsmbcrypto -bf_enc.o optional crypto \ - dependency "$S/crypto/blowfish/arch/i386/bf_enc.S $S/crypto/blowfish/arch/i386/bf_enc_586.S $S/crypto/blowfish/arch/i386/bf_enc_686.S" \ - compile-with "${CC} -c -I$S/crypto/blowfish/arch/i386 ${ASM_CFLAGS} ${WERROR} ${.IMPSRC}" \ - no-implicit-rule -crypto/des/arch/i386/des_enc.S optional crypto -crypto/des/des_ecb.c optional crypto -crypto/des/des_setkey.c optional crypto -emulation/dragonfly12/dfbsd12_getdirentries.c optional compat_df12 -emulation/dragonfly12/dfbsd12_stat.c optional compat_df12 -emulation/linux/i386/linprocfs/linprocfs_misc.c optional linprocfs -emulation/linux/i386/linprocfs/linprocfs_subr.c optional linprocfs -emulation/linux/i386/linprocfs/linprocfs_vfsops.c optional linprocfs -emulation/linux/i386/linprocfs/linprocfs_vnops.c optional linprocfs -vfs/smbfs/smbfs_io.c optional smbfs -vfs/smbfs/smbfs_node.c optional smbfs -vfs/smbfs/smbfs_smb.c optional smbfs -vfs/smbfs/smbfs_subr.c optional smbfs -vfs/smbfs/smbfs_vfsops.c optional smbfs -vfs/smbfs/smbfs_vnops.c optional smbfs -cpu/i386/misc/atomic.c standard \ - compile-with "${CC} -c ${CFLAGS} ${WERROR} ${DEFINED_PROF:S/^$/-fomit-frame-pointer/} ${.IMPSRC}" -platform/vkernel/i386/autoconf.c standard -platform/vkernel/i386/mp.c standard \ - compile-with "${CC} -c -pthread ${CFLAGS} ${WERROR} -I/usr/include ${.IMPSRC}" -# -# DDB XXX -cpu/i386/misc/elf_machdep.c standard -cpu/i386/misc/lwbuf.c standard -cpu/i386/misc/in_cksum2.s optional inet -cpu/i386/misc/in_cksum2.s optional inet6 -cpu/i386/misc/monitor.s standard -cpu/i386/misc/ktr.c optional ktr -cpu/i386/misc/db_disasm.c optional ddb -cpu/i386/misc/i386-gdbstub.c optional ddb -cpu/i386/misc/bzeront.s standard -# -# DOS mbr and gpt -kern/subr_diskmbr.c standard -kern/subr_diskgpt.c standard - -# DEVICES -# -dev/virtual/vkernel/cdrom/vcd.c optional vcd -dev/virtual/vkernel/disk/vdisk.c optional vkd -dev/virtual/vkernel/net/if_vke.c optional vke -vfs/dirfs/dirfs_vnops.c optional dirfs -vfs/dirfs/dirfs_vfsops.c optional dirfs -vfs/dirfs/dirfs_subr.c optional dirfs - -# PLATFORM FILES -# -platform/vkernel/i386/global.s standard -platform/vkernel/i386/swtch.s standard -platform/vkernel/i386/npx.c mandatory npx -platform/vkernel/i386/db_interface.c optional ddb -platform/vkernel/i386/db_trace.c optional ddb -platform/vkernel/i386/vm_machdep.c standard -platform/vkernel/i386/cpu_regs.c standard -platform/vkernel/i386/userldt.c standard -platform/vkernel/i386/tls.c standard -platform/vkernel/i386/trap.c standard -platform/vkernel/i386/exception.c standard -platform/vkernel/i386/procfs_machdep.c standard -platform/vkernel/i386/fork_tramp.s standard -platform/vkernel/platform/init.c standard -platform/vkernel/platform/globaldata.c standard -platform/vkernel/platform/kqueue.c standard -platform/vkernel/platform/shutdown.c standard -platform/vkernel/platform/machintr.c standard -platform/vkernel/platform/copyio.c standard -platform/vkernel/platform/pmap.c standard -platform/vkernel/platform/pmap_inval.c standard -platform/vkernel/platform/busdma_machdep.c standard -platform/vkernel/platform/sysarch.c standard -platform/vkernel/platform/systimer.c standard -platform/vkernel/platform/console.c standard -platform/vkernel/platform/ipl_funcs.c standard -platform/vkernel/platform/cothread.c standard diff --git a/sys/platform/vkernel/conf/kern.mk b/sys/platform/vkernel/conf/kern.mk deleted file mode 100644 index 8ceb4d7c01..0000000000 --- a/sys/platform/vkernel/conf/kern.mk +++ /dev/null @@ -1,25 +0,0 @@ -# -# On the i386, do not align the stack to 16-byte boundaries. Otherwise GCC -# adds code to the entry and exit point of every function to align the -# stack to 16-byte boundaries -- thus wasting approximately 12 bytes of stack -# per function call. While the 16-byte alignment may benefit micro benchmarks, -# it is probably an overall loss as it makes the code bigger (less efficient -# use of code cache tag lines) and uses more stack (less efficient use of data -# cache tag lines) -# -# Prohibit the use of FP registers in the kernel. The user FP state is -# only saved and restored under strictly managed conditions and mainline -# kernel code cannot safely use the FP system. -# -.if ${CCVER:Mgcc*} -CFLAGS+= -mpreferred-stack-boundary=2 -.endif -CFLAGS+= -fno-stack-protector -CFLAGS+= -mno-mmx -mno-3dnow -mno-sse -mno-sse2 -mno-sse3 -CFLAGS+= -D_KERNEL_VIRTUAL - -# Remove the dynamic library hack for now -# -SYSTEM_OBJS:= ${SYSTEM_OBJS:Nhack.So} - -INLINE_LIMIT= 8000 diff --git a/sys/platform/vkernel/conf/ldscript.i386 b/sys/platform/vkernel/conf/ldscript.i386 deleted file mode 100644 index a0db817d15..0000000000 --- a/sys/platform/vkernel/conf/ldscript.i386 +++ /dev/null @@ -1,216 +0,0 @@ -/* Default linker script, for normal executables */ -OUTPUT_FORMAT("elf32-i386", "elf32-i386", - "elf32-i386") -OUTPUT_ARCH(i386) -ENTRY(btext) -SEARCH_DIR(/usr/lib); - -PHDRS -{ - headers PT_PHDR PHDRS ; - interp PT_INTERP ; - text PT_LOAD FILEHDR PHDRS ; - data PT_LOAD ; - dynamic PT_DYNAMIC ; -} - -SECTIONS -{ - /* Read-only sections, merged into text segment: */ - kernphys = 0x100000; - kernmxps = CONSTANT (MAXPAGESIZE); - kernpage = CONSTANT (COMMONPAGESIZE); - . = kernbase + kernphys + SIZEOF_HEADERS; - .interp : { *(.interp) } :text :interp - .note.gnu.build-id : { *(.note.gnu.build-id) } :text - .hash : { *(.hash) } - .gnu.hash : { *(.gnu.hash) } - .dynsym : { *(.dynsym) } - .dynstr : { *(.dynstr) } - .gnu.version : { *(.gnu.version) } - .gnu.version_d : { *(.gnu.version_d) } - .gnu.version_r : { *(.gnu.version_r) } - .rel.init : { *(.rel.init) } - .rel.text : { *(.rel.text .rel.text.* .rel.gnu.linkonce.t.*) } - .rel.fini : { *(.rel.fini) } - .rel.rodata : { *(.rel.rodata .rel.rodata.* .rel.gnu.linkonce.r.*) } - .rel.data.rel.ro : { *(.rel.data.rel.ro* .rel.gnu.linkonce.d.rel.ro.*) } - .rel.data : { *(.rel.data .rel.data.* .rel.gnu.linkonce.d.*) } - .rel.tdata : { *(.rel.tdata .rel.tdata.* .rel.gnu.linkonce.td.*) } - .rel.tbss : { *(.rel.tbss .rel.tbss.* .rel.gnu.linkonce.tb.*) } - .rel.ctors : { *(.rel.ctors) } - .rel.dtors : { *(.rel.dtors) } - .rel.got : { *(.rel.got) } - .rel.bss : { *(.rel.bss .rel.bss.* .rel.gnu.linkonce.b.*) } - .rel.ifunc : { *(.rel.ifunc) } - .rel.plt : - { - *(.rel.plt) - PROVIDE_HIDDEN (__rel_iplt_start = .); - *(.rel.iplt) - PROVIDE_HIDDEN (__rel_iplt_end = .); - } - .init : - { - KEEP (*(.init)) - } =0x90909090 - .plt : { *(.plt) *(.iplt) } - .text : - { - *(.text.unlikely .text.*_unlikely) - *(.text.exit .text.exit.*) - *(.text.startup .text.startup.*) - *(.text.hot .text.hot.*) - *(.text .stub .text.* .gnu.linkonce.t.*) - /* .gnu.warning sections are handled specially by elf32.em. */ - *(.gnu.warning) - } =0x90909090 - .fini : - { - KEEP (*(.fini)) - } =0x90909090 - PROVIDE (__etext = .); - PROVIDE (_etext = .); - PROVIDE (etext = .); - .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) } - .rodata1 : { *(.rodata1) } - .eh_frame_hdr : { *(.eh_frame_hdr) } - .eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) } - .gcc_except_table : ONLY_IF_RO { *(.gcc_except_table - .gcc_except_table.*) } - /* These sections are generated by the Sun/Oracle C++ compiler. */ - .exception_ranges : ONLY_IF_RO { *(.exception_ranges - .exception_ranges*) } - /* Adjust the address for the data segment. We want to adjust up to - the same address within the page on the next page up. */ - . = ALIGN (kernmxps) - ((kernmxps - .) & (kernmxps - 1)); - . = DATA_SEGMENT_ALIGN (kernmxps, kernpage); - /* Exception handling */ - .eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) } - .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) } - .exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) } - /* Thread Local Storage sections */ - .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) } - .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) } - .preinit_array : - { - PROVIDE_HIDDEN (__preinit_array_start = .); - KEEP (*(.preinit_array)) - PROVIDE_HIDDEN (__preinit_array_end = .); - } - .init_array : - { - PROVIDE_HIDDEN (__init_array_start = .); - KEEP (*(SORT(.init_array.*))) - KEEP (*(.init_array)) - PROVIDE_HIDDEN (__init_array_end = .); - } - .fini_array : - { - PROVIDE_HIDDEN (__fini_array_start = .); - KEEP (*(SORT(.fini_array.*))) - KEEP (*(.fini_array)) - PROVIDE_HIDDEN (__fini_array_end = .); - } - _start_ctors = .; - PROVIDE (start_ctors = .); - .ctors : - { - /* gcc uses crtbegin.o to find the start of - the constructors, so we make sure it is - first. Because this is a wildcard, it - doesn't matter if the user does not - actually link against crtbegin.o; the - linker won't look for a file to match a - wildcard. The wildcard also means that it - doesn't matter which directory crtbegin.o - is in. */ - KEEP (*crtbegin.o(.ctors)) - KEEP (*crtbegin?.o(.ctors)) - /* We don't want to include the .ctor section from - the crtend.o file until after the sorted ctors. - The .ctor section from the crtend file contains the - end of ctors marker and it must be last */ - KEEP (*(EXCLUDE_FILE (*crtend.o *crtend?.o ) .ctors)) - KEEP (*(SORT(.ctors.*))) - KEEP (*(.ctors)) - } - _stop_ctors = .; - PROVIDE (stop_ctors = .); - .dtors : - { - KEEP (*crtbegin.o(.dtors)) - KEEP (*crtbegin?.o(.dtors)) - KEEP (*(EXCLUDE_FILE (*crtend.o *crtend?.o ) .dtors)) - KEEP (*(SORT(.dtors.*))) - KEEP (*(.dtors)) - } - .jcr : { KEEP (*(.jcr)) } - .data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro* .gnu.linkonce.d.rel.ro.*) } :data - .dynamic : { *(.dynamic) } :data :dynamic - .got : { *(.got) *(.igot) } :data - . = DATA_SEGMENT_RELRO_END (12, .); - .got.plt : { *(.got.plt) *(.igot.plt) } - .data : - { - *(.data .data.* .gnu.linkonce.d.*) - SORT(CONSTRUCTORS) - } - .data1 : { *(.data1) } - _edata = .; PROVIDE (edata = .); - __bss_start = .; - .bss : - { - *(.dynbss) - *(.bss .bss.* .gnu.linkonce.b.*) - *(COMMON) - /* Align here to ensure that the .bss section occupies space up to - _end. Align after .bss to ensure correct alignment even if the - .bss section disappears because there are no input sections. - FIXME: Why do we need it? When there is no .bss section, we don't - pad the .data section. */ - . = ALIGN(. != 0 ? 32 / 8 : 1); - } - . = ALIGN(32 / 8); - . = ALIGN(32 / 8); - _end = .; PROVIDE (end = .); - . = DATA_SEGMENT_END (.); - /* Stabs debugging sections. */ - .stab 0 : { *(.stab) } - .stabstr 0 : { *(.stabstr) } - .stab.excl 0 : { *(.stab.excl) } - .stab.exclstr 0 : { *(.stab.exclstr) } - .stab.index 0 : { *(.stab.index) } - .stab.indexstr 0 : { *(.stab.indexstr) } - .comment 0 : { *(.comment) } - /* DWARF debug sections. - Symbols in the DWARF debugging sections are relative to the beginning - of the section so we begin them at 0. */ - /* DWARF 1 */ - .debug 0 : { *(.debug) } - .line 0 : { *(.line) } - /* GNU DWARF 1 extensions */ - .debug_srcinfo 0 : { *(.debug_srcinfo) } - .debug_sfnames 0 : { *(.debug_sfnames) } - /* DWARF 1.1 and DWARF 2 */ - .debug_aranges 0 : { *(.debug_aranges) } - .debug_pubnames 0 : { *(.debug_pubnames) } - /* DWARF 2 */ - .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } - .debug_abbrev 0 : { *(.debug_abbrev) } - .debug_line 0 : { *(.debug_line) } - .debug_frame 0 : { *(.debug_frame) } - .debug_str 0 : { *(.debug_str) } - .debug_loc 0 : { *(.debug_loc) } - .debug_macinfo 0 : { *(.debug_macinfo) } - /* SGI/MIPS DWARF 2 extensions */ - .debug_weaknames 0 : { *(.debug_weaknames) } - .debug_funcnames 0 : { *(.debug_funcnames) } - .debug_typenames 0 : { *(.debug_typenames) } - .debug_varnames 0 : { *(.debug_varnames) } - /* DWARF 3 */ - .debug_pubtypes 0 : { *(.debug_pubtypes) } - .debug_ranges 0 : { *(.debug_ranges) } - .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) } - /DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) } -} diff --git a/sys/platform/vkernel/conf/options b/sys/platform/vkernel/conf/options deleted file mode 100644 index 77a7d72db3..0000000000 --- a/sys/platform/vkernel/conf/options +++ /dev/null @@ -1,12 +0,0 @@ -# The cpu type -# -I586_CPU opt_global.h -I686_CPU opt_global.h - -COMPAT_DF12 opt_compatdf12.h - -# Static filesystems -DIRFS opt_dontuse.h - -# KTR options -KTR_DIRFS opt_ktr.h diff --git a/sys/platform/vkernel/i386/autoconf.c b/sys/platform/vkernel/i386/autoconf.c deleted file mode 100644 index 2cff1b1341..0000000000 --- a/sys/platform/vkernel/i386/autoconf.c +++ /dev/null @@ -1,559 +0,0 @@ -/*- - * Copyright (c) 1990 The Regents of the University of California. - * All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * William Jolitz. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * from: @(#)autoconf.c 7.1 (Berkeley) 5/9/91 - * $FreeBSD: src/sys/i386/i386/autoconf.c,v 1.146.2.2 2001/06/07 06:05:58 dd Exp $ - */ - -/* - * Setup the system to run on the current machine. - * - * Configure() is called at boot time and initializes the vba - * device tables and the memory controller monitoring. Available - * devices are determined (from possibilities mentioned in ioconf.c), - * and the drivers are initialized. - */ -#include "opt_bootp.h" -#include "opt_ffs.h" -#include "opt_cd9660.h" -#include "opt_nfs.h" -#include "opt_nfsroot.h" -#include "opt_rootdevname.h" - -#include "use_isa.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#if 0 -#include -#include -#include -#endif -#include -#include -#include - -#if NISA > 0 -#include - -device_t isa_bus_device = NULL; -#endif - -static void cpu_startup (void *); -static void configure_first (void *); -static void configure (void *); -static void configure_final (void *); - -#if defined(FFS) && defined(FFS_ROOT) -static void setroot (void); -#endif - -#if defined(NFS) && defined(NFS_ROOT) -#if !defined(BOOTP_NFSROOT) -static void pxe_setup_nfsdiskless(void); -#endif -#endif - -SYSINIT(cpu, SI_BOOT2_START_CPU, SI_ORDER_FIRST, cpu_startup, NULL); -SYSINIT(configure1, SI_SUB_CONFIGURE, SI_ORDER_FIRST, configure_first, NULL); -/* SI_ORDER_SECOND is hookable */ -SYSINIT(configure2, SI_SUB_CONFIGURE, SI_ORDER_THIRD, configure, NULL); -/* SI_ORDER_MIDDLE is hookable */ -SYSINIT(configure3, SI_SUB_CONFIGURE, SI_ORDER_ANY, configure_final, NULL); - -cdev_t rootdev = NULL; -cdev_t dumpdev = NULL; - -/* - * - */ -static void -cpu_startup(void *dummy) -{ - vm_offset_t buffer_sva; - vm_offset_t buffer_eva; - vm_offset_t pager_sva; - vm_offset_t pager_eva; - - kprintf("%s", version); - kprintf("real memory = %ju (%ju MB)\n", - (uintmax_t)ptoa(Maxmem), - (uintmax_t)ptoa(Maxmem) / 1024 / 1024); - - if (nbuf == 0) { - int factor = 4 * BKVASIZE / 1024; - int kbytes = Maxmem * (PAGE_SIZE / 1024); - - nbuf = 50; - if (kbytes > 4096) - nbuf += min((kbytes - 4096) / factor, 65536 / factor); - if (kbytes > 65536) - nbuf += (kbytes - 65536) * 2 / (factor * 5); - if (maxbcache && nbuf > maxbcache / BKVASIZE) - nbuf = maxbcache / BKVASIZE; - } - if (nbuf > (virtual_end - virtual_start) / (BKVASIZE * 2)) { - nbuf = (virtual_end - virtual_start) / (BKVASIZE * 2); - kprintf("Warning: nbufs capped at %ld\n", nbuf); - } - - nswbuf = lmax(lmin(nbuf / 4, 256), 16); -#ifdef NSWBUF_MIN - if (nswbuf < NSWBUF_MIN) - nswbuf = NSWBUF_MIN; -#endif - - /* - * Allocate memory for the buffer cache - */ - buf = (void *)kmem_alloc(&kernel_map, nbuf * sizeof(struct buf)); - swbuf = (void *)kmem_alloc(&kernel_map, nswbuf * sizeof(struct buf)); - - -#ifdef DIRECTIO - ffs_rawread_setup(); -#endif - kmem_suballoc(&kernel_map, &clean_map, &clean_sva, &clean_eva, - (nbuf*BKVASIZE) + (nswbuf*MAXPHYS) + pager_map_size); - kmem_suballoc(&clean_map, &buffer_map, &buffer_sva, &buffer_eva, - (nbuf*BKVASIZE)); - buffer_map.system_map = 1; - kmem_suballoc(&clean_map, &pager_map, &pager_sva, &pager_eva, - (nswbuf*MAXPHYS) + pager_map_size); - pager_map.system_map = 1; -#if defined(USERCONFIG) - userconfig(); - cninit(); /* the preferred console may have changed */ -#endif - kprintf("avail memory = %ju (%ju MB)\n", - (uintmax_t)ptoa(vmstats.v_free_count), - (uintmax_t)ptoa(vmstats.v_free_count) / 1024 / 1024); - mp_start(); - mp_announce(); - cpu_setregs(); -} - -/* - * Determine i/o configuration for a machine. - */ -static void -configure_first(void *dummy) -{ -} - -static void -configure(void *dummy) -{ - /* - * Final interrupt support acviation, then enable hardware interrupts. - */ - MachIntrABI.finalize(); - cpu_enable_intr(); - - /* - * This will configure all devices, generally starting with the - * nexus (i386/i386/nexus.c). The nexus ISA code explicitly - * dummies up the attach in order to delay legacy initialization - * until after all other busses/subsystems have had a chance - * at those resources. - */ - root_bus_configure(); - -#if NISA > 0 - /* - * Explicitly probe and attach ISA last. The isa bus saves - * it's device node at attach time for us here. - */ - if (isa_bus_device) - isa_probe_children(isa_bus_device); -#endif - - /* - * Allow lowering of the ipl to the lowest kernel level if we - * panic (or call tsleep() before clearing `cold'). No level is - * completely safe (since a panic may occur in a critical region - * at splhigh()), but we want at least bio interrupts to work. - */ - safepri = TDPRI_KERN_USER; -} - -static void -configure_final(void *dummy) -{ - cninit_finish(); - - if (bootverbose) - kprintf("Device configuration finished.\n"); -} - -#ifdef BOOTP -void bootpc_init(void); -#endif -/* - * Do legacy root filesystem discovery. - */ -void -cpu_rootconf(void) -{ -#ifdef BOOTP - bootpc_init(); -#endif -#if defined(NFS) && defined(NFS_ROOT) -#if !defined(BOOTP_NFSROOT) - pxe_setup_nfsdiskless(); - if (nfs_diskless_valid) -#endif - rootdevnames[0] = "nfs:"; -#endif -#if defined(FFS) && defined(FFS_ROOT) - if (!rootdevnames[0]) - setroot(); -#endif -} -SYSINIT(cpu_rootconf, SI_SUB_ROOT_CONF, SI_ORDER_FIRST, cpu_rootconf, NULL); - -u_long bootdev = 0; /* not a cdev_t - encoding is different */ - -#if defined(FFS) && defined(FFS_ROOT) - -/* - * The boot code uses old block device major numbers to pass bootdev to - * us. We have to translate these to character device majors because - * we don't have block devices any more. - */ -static int -boot_translate_majdev(int bmajor) -{ - static int conv[] = { BOOTMAJOR_CONVARY }; - - if (bmajor >= 0 && bmajor < NELEM(conv)) - return(conv[bmajor]); - return(-1); -} - -/* - * Attempt to find the device from which we were booted. - * If we can do so, and not instructed not to do so, - * set rootdevs[] and rootdevnames[] to correspond to the - * boot device(s). - * - * This code survives in order to allow the system to be - * booted from legacy environments that do not correctly - * populate the kernel environment. There are significant - * restrictions on the bootability of the system in this - * situation; it can only be mounting root from a 'da' - * 'wd' or 'fd' device, and the root filesystem must be ufs. - */ -static void -setroot(void) -{ - int majdev, mindev, unit, slice, part; - cdev_t newrootdev, dev; - char partname[2]; - char *sname; - - if ((bootdev & B_MAGICMASK) != B_DEVMAGIC) { - kprintf("no B_DEVMAGIC (bootdev=%#lx)\n", bootdev); - return; - } - majdev = boot_translate_majdev(B_TYPE(bootdev)); - if (bootverbose) { - kprintf("bootdev: %08lx type=%ld unit=%ld " - "slice=%ld part=%ld major=%d\n", - bootdev, B_TYPE(bootdev), B_UNIT(bootdev), - B_SLICE(bootdev), B_PARTITION(bootdev), majdev); - } - dev = udev2dev(makeudev(majdev, 0), 0); - if (!dev_is_good(dev)) - return; - unit = B_UNIT(bootdev); - slice = B_SLICE(bootdev); - if (slice == WHOLE_DISK_SLICE) - slice = COMPATIBILITY_SLICE; - if (slice < 0 || slice >= MAX_SLICES) { - kprintf("bad slice\n"); - return; - } - - part = B_PARTITION(bootdev); - mindev = dkmakeminor(unit, slice, part); - newrootdev = udev2dev(makeudev(majdev, mindev), 0); - if (!dev_is_good(newrootdev)) - return; - sname = dsname(newrootdev, unit, slice, part, partname); - rootdevnames[0] = kmalloc(strlen(sname) + 6, M_DEVBUF, M_WAITOK); - ksprintf(rootdevnames[0], "ufs:%s%s", sname, partname); - - /* - * For properly dangerously dedicated disks (ones with a historical - * bogus partition table), the boot blocks will give slice = 4, but - * the kernel will only provide the compatibility slice since it - * knows that slice 4 is not a real slice. Arrange to try mounting - * the compatibility slice as root if mounting the slice passed by - * the boot blocks fails. This handles the dangerously dedicated - * case and perhaps others. - */ - if (slice == COMPATIBILITY_SLICE) - return; - slice = COMPATIBILITY_SLICE; - sname = dsname(newrootdev, unit, slice, part, partname); - rootdevnames[1] = kmalloc(strlen(sname) + 6, M_DEVBUF, M_WAITOK); - ksprintf(rootdevnames[1], "ufs:%s%s", sname, partname); -} -#endif - -#if defined(NFS) && defined(NFS_ROOT) -#if !defined(BOOTP_NFSROOT) - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -extern struct nfs_diskless nfs_diskless; - -/* - * Convert a kenv variable to a sockaddr. If the kenv variable does not - * exist the sockaddr will remain zerod out (callers typically just check - * sin_len). A network address of 0.0.0.0 is equivalent to failure. - */ -static int -inaddr_to_sockaddr(char *ev, struct sockaddr_in *sa) -{ - u_int32_t a[4]; - char *cp; - - bzero(sa, sizeof(*sa)); - - if ((cp = kgetenv(ev)) == NULL) - return(1); - if (ksscanf(cp, "%d.%d.%d.%d", &a[0], &a[1], &a[2], &a[3]) != 4) - return(1); - if (a[0] == 0 && a[1] == 0 && a[2] == 0 && a[3] == 0) - return(1); - /* XXX is this ordering correct? */ - sa->sin_addr.s_addr = (a[3] << 24) + (a[2] << 16) + (a[1] << 8) + a[0]; - sa->sin_len = sizeof(*sa); - sa->sin_family = AF_INET; - return(0); -} - -static int -hwaddr_to_sockaddr(char *ev, struct sockaddr_dl *sa) -{ - char *cp; - u_int32_t a[6]; - - bzero(sa, sizeof(*sa)); - sa->sdl_len = sizeof(*sa); - sa->sdl_family = AF_LINK; - sa->sdl_type = IFT_ETHER; - sa->sdl_alen = ETHER_ADDR_LEN; - if ((cp = kgetenv(ev)) == NULL) - return(1); - if (ksscanf(cp, "%x:%x:%x:%x:%x:%x", &a[0], &a[1], &a[2], &a[3], &a[4], &a[5]) != 6) - return(1); - sa->sdl_data[0] = a[0]; - sa->sdl_data[1] = a[1]; - sa->sdl_data[2] = a[2]; - sa->sdl_data[3] = a[3]; - sa->sdl_data[4] = a[4]; - sa->sdl_data[5] = a[5]; - return(0); -} - -static int -decode_nfshandle(char *ev, u_char *fh) -{ - u_char *cp; - int len, val; - - if (((cp = kgetenv(ev)) == NULL) || (strlen(cp) < 2) || (*cp != 'X')) - return(0); - len = 0; - cp++; - for (;;) { - if (*cp == 'X') - return(len); - if ((ksscanf(cp, "%2x", &val) != 1) || (val > 0xff)) - return(0); - *(fh++) = val; - len++; - cp += 2; - if (len > NFSX_V2FH) - return(0); - } -} - -/* - * Populate the essential fields in the nfsv3_diskless structure. - * - * The loader is expected to export the following environment variables: - * - * boot.netif.ip IP address on boot interface - * boot.netif.netmask netmask on boot interface - * boot.netif.gateway default gateway (optional) - * boot.netif.hwaddr hardware address of boot interface - * boot.netif.name name of boot interface (instead of hw addr) - * boot.nfsroot.server IP address of root filesystem server - * boot.nfsroot.path path of the root filesystem on server - * boot.nfsroot.nfshandle NFS handle for root filesystem on server - */ -static void -pxe_setup_nfsdiskless(void) -{ - struct nfs_diskless *nd = &nfs_diskless; - struct ifnet *ifp; - struct ifaddr *ifa; - struct sockaddr_dl *sdl, ourdl; - struct sockaddr_in myaddr, netmask; - char *cp; - - /* set up interface */ - if (inaddr_to_sockaddr("boot.netif.ip", &myaddr)) - return; - if (inaddr_to_sockaddr("boot.netif.netmask", &netmask)) { - kprintf("PXE: no netmask\n"); - return; - } - bcopy(&myaddr, &nd->myif.ifra_addr, sizeof(myaddr)); - bcopy(&myaddr, &nd->myif.ifra_broadaddr, sizeof(myaddr)); - ((struct sockaddr_in *) &nd->myif.ifra_broadaddr)->sin_addr.s_addr = - myaddr.sin_addr.s_addr | ~ netmask.sin_addr.s_addr; - bcopy(&netmask, &nd->myif.ifra_mask, sizeof(netmask)); - - if ((cp = kgetenv("boot.netif.name")) != NULL) { - ifnet_lock(); - ifp = ifunit(cp); - if (ifp) { - strlcpy(nd->myif.ifra_name, ifp->if_xname, - sizeof(nd->myif.ifra_name)); - ifnet_unlock(); - goto match_done; - } - ifnet_unlock(); - kprintf("PXE: cannot find interface %s\n", cp); - return; - } - - if (hwaddr_to_sockaddr("boot.netif.hwaddr", &ourdl)) { - kprintf("PXE: no hardware address\n"); - return; - } - ifa = NULL; - ifnet_lock(); - TAILQ_FOREACH(ifp, &ifnetlist, if_link) { - struct ifaddr_container *ifac; - - TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { - ifa = ifac->ifa; - - if ((ifa->ifa_addr->sa_family == AF_LINK) && - (sdl = ((struct sockaddr_dl *)ifa->ifa_addr))) { - if ((sdl->sdl_type == ourdl.sdl_type) && - (sdl->sdl_alen == ourdl.sdl_alen) && - !bcmp(sdl->sdl_data + sdl->sdl_nlen, - ourdl.sdl_data + ourdl.sdl_nlen, - sdl->sdl_alen)) { - strlcpy(nd->myif.ifra_name, - ifp->if_xname, - sizeof(nd->myif.ifra_name)); - ifnet_unlock(); - goto match_done; - } - } - } - } - ifnet_unlock(); - kprintf("PXE: no interface\n"); - return; /* no matching interface */ -match_done: - /* set up gateway */ - inaddr_to_sockaddr("boot.netif.gateway", &nd->mygateway); - - /* XXX set up swap? */ - - /* set up root mount */ - nd->root_args.rsize = 8192; /* XXX tunable? */ - nd->root_args.wsize = 8192; - nd->root_args.sotype = SOCK_STREAM; - nd->root_args.flags = NFSMNT_WSIZE | NFSMNT_RSIZE | NFSMNT_RESVPORT; - if (inaddr_to_sockaddr("boot.nfsroot.server", &nd->root_saddr)) { - kprintf("PXE: no server\n"); - return; - } - nd->root_saddr.sin_port = htons(NFS_PORT); - - /* - * A tftp-only loader may pass NFS path information without a - * root handle. Generate a warning but continue configuring. - */ - if (decode_nfshandle("boot.nfsroot.nfshandle", &nd->root_fh[0]) == 0) { - kprintf("PXE: Warning, no NFS handle passed from loader\n"); - } - if ((cp = kgetenv("boot.nfsroot.path")) != NULL) - strncpy(nd->root_hostnam, cp, MNAMELEN - 1); - - nfs_diskless_valid = 1; -} - -#endif -#endif diff --git a/sys/platform/vkernel/i386/cpu_regs.c b/sys/platform/vkernel/i386/cpu_regs.c deleted file mode 100644 index b21e5d39c9..0000000000 --- a/sys/platform/vkernel/i386/cpu_regs.c +++ /dev/null @@ -1,935 +0,0 @@ -/*- - * Copyright (c) 1992 Terrence R. Lambert. - * Copyright (C) 1994, David Greenman - * Copyright (c) 1982, 1987, 1990, 1993 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * William Jolitz. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 - * $FreeBSD: src/sys/i386/i386/machdep.c,v 1.385.2.30 2003/05/31 08:48:05 alc Exp $ - */ - -#include "use_npx.h" -#include "opt_compat.h" -#include "opt_ddb.h" -#include "opt_directio.h" -#include "opt_inet.h" -#include "opt_msgbuf.h" -#include "opt_swap.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include - -#include - -#include -#include -#include -#include -#include /* pcb.h included via sys/user.h */ -#include /* CPU_prvspace */ -#include -#ifdef PERFMON -#include -#endif -#include - -#include -#include -#include -#include -#include -#include /* umtx_* functions */ -#include /* pthread_yield */ - -extern void dblfault_handler (void); - -#ifndef CPU_DISABLE_SSE -static void set_fpregs_xmm (struct save87 *, struct savexmm *); -static void fill_fpregs_xmm (struct savexmm *, struct save87 *); -#endif /* CPU_DISABLE_SSE */ -#ifdef DIRECTIO -extern void ffs_rawread_setup(void); -#endif /* DIRECTIO */ - -int64_t tsc_offsets[MAXCPU]; - -#if defined(SWTCH_OPTIM_STATS) -extern int swtch_optim_stats; -SYSCTL_INT(_debug, OID_AUTO, swtch_optim_stats, - CTLFLAG_RD, &swtch_optim_stats, 0, ""); -SYSCTL_INT(_debug, OID_AUTO, tlb_flush_count, - CTLFLAG_RD, &tlb_flush_count, 0, ""); -#endif - -static int -sysctl_hw_physmem(SYSCTL_HANDLER_ARGS) -{ - u_long pmem = ctob(physmem); - - int error = sysctl_handle_long(oidp, &pmem, 0, req); - return (error); -} - -SYSCTL_PROC(_hw, HW_PHYSMEM, physmem, CTLTYPE_ULONG|CTLFLAG_RD, - 0, 0, sysctl_hw_physmem, "LU", "Total system memory in bytes (number of pages * page size)"); - -static int -sysctl_hw_usermem(SYSCTL_HANDLER_ARGS) -{ - int error = sysctl_handle_int(oidp, 0, - ctob((int)Maxmem - vmstats.v_wire_count), req); - return (error); -} - -SYSCTL_PROC(_hw, HW_USERMEM, usermem, CTLTYPE_INT|CTLFLAG_RD, - 0, 0, sysctl_hw_usermem, "IU", ""); - -SYSCTL_ULONG(_hw, OID_AUTO, availpages, CTLFLAG_RD, &Maxmem, 0, ""); - -/* - * Send an interrupt to process. - * - * Stack is set up to allow sigcode stored - * at top to call routine, followed by kcall - * to sigreturn routine below. After sigreturn - * resets the signal mask, the stack, and the - * frame pointer, it returns to the user - * specified pc, psl. - */ -void -sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code) -{ - struct lwp *lp = curthread->td_lwp; - struct proc *p = lp->lwp_proc; - struct trapframe *regs; - struct sigacts *psp = p->p_sigacts; - struct sigframe sf, *sfp; - int oonstack; - - regs = lp->lwp_md.md_regs; - oonstack = (lp->lwp_sigstk.ss_flags & SS_ONSTACK) ? 1 : 0; - - /* save user context */ - bzero(&sf, sizeof(struct sigframe)); - sf.sf_uc.uc_sigmask = *mask; - sf.sf_uc.uc_stack = lp->lwp_sigstk; - sf.sf_uc.uc_mcontext.mc_onstack = oonstack; - bcopy(regs, &sf.sf_uc.uc_mcontext.mc_gs, sizeof(struct trapframe)); - - /* make the size of the saved context visible to userland */ - sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); - - /* Allocate and validate space for the signal handler context. */ - if ((lp->lwp_flags & LWP_ALTSTACK) != 0 && !oonstack && - SIGISMEMBER(psp->ps_sigonstack, sig)) { - sfp = (struct sigframe *)(lp->lwp_sigstk.ss_sp + - lp->lwp_sigstk.ss_size - sizeof(struct sigframe)); - lp->lwp_sigstk.ss_flags |= SS_ONSTACK; - } - else - sfp = (struct sigframe *)regs->tf_esp - 1; - - /* Translate the signal is appropriate */ - if (p->p_sysent->sv_sigtbl) { - if (sig <= p->p_sysent->sv_sigsize) - sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; - } - - /* Build the argument list for the signal handler. */ - sf.sf_signum = sig; - sf.sf_ucontext = (register_t)&sfp->sf_uc; - if (SIGISMEMBER(psp->ps_siginfo, sig)) { - /* Signal handler installed with SA_SIGINFO. */ - sf.sf_siginfo = (register_t)&sfp->sf_si; - sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher; - - /* fill siginfo structure */ - sf.sf_si.si_signo = sig; - sf.sf_si.si_code = code; - sf.sf_si.si_addr = (void*)regs->tf_err; - } - else { - /* Old FreeBSD-style arguments. */ - sf.sf_siginfo = code; - sf.sf_addr = regs->tf_err; - sf.sf_ahu.sf_handler = catcher; - } - -#if 0 - /* - * If we're a vm86 process, we want to save the segment registers. - * We also change eflags to be our emulated eflags, not the actual - * eflags. - */ - if (regs->tf_eflags & PSL_VM) { - struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; - struct vm86_kernel *vm86 = &lp->lwp_thread->td_pcb->pcb_ext->ext_vm86; - - sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs; - sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs; - sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es; - sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds; - - if (vm86->vm86_has_vme == 0) - sf.sf_uc.uc_mcontext.mc_eflags = - (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) | - (vm86->vm86_eflags & (PSL_VIF | PSL_VIP)); - - /* - * Clear PSL_NT to inhibit T_TSSFLT faults on return from - * syscalls made by the signal handler. This just avoids - * wasting time for our lazy fixup of such faults. PSL_NT - * does nothing in vm86 mode, but vm86 programs can set it - * almost legitimately in probes for old cpu types. - */ - tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP); - } -#endif - - /* - * Save the FPU state and reinit the FP unit - */ - npxpush(&sf.sf_uc.uc_mcontext); - - /* - * Copy the sigframe out to the user's stack. - */ - if (copyout(&sf, sfp, sizeof(struct sigframe)) != 0) { - /* - * Something is wrong with the stack pointer. - * ...Kill the process. - */ - sigexit(lp, SIGILL); - } - - regs->tf_esp = (int)sfp; - regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode); - - /* - * i386 abi specifies that the direction flag must be cleared - * on function entry - */ - regs->tf_eflags &= ~(PSL_T|PSL_D); - - regs->tf_cs = _ucodesel; - regs->tf_ds = _udatasel; - regs->tf_es = _udatasel; - if (regs->tf_trapno == T_PROTFLT) { - regs->tf_fs = _udatasel; - regs->tf_gs = _udatasel; - } - regs->tf_ss = _udatasel; -} - -/* - * Sanitize the trapframe for a virtual kernel passing control to a custom - * VM context. - * - * Allow userland to set or maintain PSL_RF, the resume flag. This flag - * basically controls whether the return PC should skip the first instruction - * (as in an explicit system call) or re-execute it (as in an exception). - */ -int -cpu_sanitize_frame(struct trapframe *frame) -{ - frame->tf_cs = _ucodesel; - frame->tf_ds = _udatasel; - frame->tf_es = _udatasel; -#if 0 - frame->tf_fs = _udatasel; - frame->tf_gs = _udatasel; -#endif - frame->tf_ss = _udatasel; - frame->tf_eflags &= (PSL_RF | PSL_USERCHANGE); - frame->tf_eflags |= PSL_RESERVED_DEFAULT | PSL_I; - return(0); -} - -int -cpu_sanitize_tls(struct savetls *tls) -{ - struct segment_descriptor *desc; - int i; - - for (i = 0; i < NGTLS; ++i) { - desc = &tls->tls[i]; - if (desc->sd_dpl == 0 && desc->sd_type == 0) - continue; - if (desc->sd_def32 == 0) - return(ENXIO); - if (desc->sd_type != SDT_MEMRWA) - return(ENXIO); - if (desc->sd_dpl != SEL_UPL) - return(ENXIO); - if (desc->sd_xx != 0 || desc->sd_p != 1) - return(ENXIO); - } - return(0); -} - -/* - * sigreturn(ucontext_t *sigcntxp) - * - * System call to cleanup state after a signal - * has been taken. Reset signal mask and - * stack state from context left by sendsig (above). - * Return to previous pc and psl as specified by - * context left by sendsig. Check carefully to - * make sure that the user has not modified the - * state to gain improper privileges. - * - * MPSAFE - */ -#define EFL_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) -#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) - -int -sys_sigreturn(struct sigreturn_args *uap) -{ - struct lwp *lp = curthread->td_lwp; - struct trapframe *regs; - ucontext_t ucp; - int cs; - int eflags; - int error; - - error = copyin(uap->sigcntxp, &ucp, sizeof(ucp)); - if (error) - return (error); - - regs = lp->lwp_md.md_regs; - eflags = ucp.uc_mcontext.mc_eflags; - -#if 0 - if (eflags & PSL_VM) { - struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; - struct vm86_kernel *vm86; - - /* - * if pcb_ext == 0 or vm86_inited == 0, the user hasn't - * set up the vm86 area, and we can't enter vm86 mode. - */ - if (lp->lwp_thread->td_pcb->pcb_ext == 0) - return (EINVAL); - vm86 = &lp->lwp_thread->td_pcb->pcb_ext->ext_vm86; - if (vm86->vm86_inited == 0) - return (EINVAL); - - /* go back to user mode if both flags are set */ - if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) - trapsignal(lp->lwp_proc, SIGBUS, 0); - - if (vm86->vm86_has_vme) { - eflags = (tf->tf_eflags & ~VME_USERCHANGE) | - (eflags & VME_USERCHANGE) | PSL_VM; - } else { - vm86->vm86_eflags = eflags; /* save VIF, VIP */ - eflags = (tf->tf_eflags & ~VM_USERCHANGE) | (eflags & VM_USERCHANGE) | PSL_VM; - } - bcopy(&ucp.uc_mcontext.mc_gs, tf, sizeof(struct trapframe)); - tf->tf_eflags = eflags; - tf->tf_vm86_ds = tf->tf_ds; - tf->tf_vm86_es = tf->tf_es; - tf->tf_vm86_fs = tf->tf_fs; - tf->tf_vm86_gs = tf->tf_gs; - tf->tf_ds = _udatasel; - tf->tf_es = _udatasel; -#if 0 - tf->tf_fs = _udatasel; - tf->tf_gs = _udatasel; -#endif - } else -#endif - { - /* - * Don't allow users to change privileged or reserved flags. - */ - /* - * XXX do allow users to change the privileged flag PSL_RF. - * The cpu sets PSL_RF in tf_eflags for faults. Debuggers - * should sometimes set it there too. tf_eflags is kept in - * the signal context during signal handling and there is no - * other place to remember it, so the PSL_RF bit may be - * corrupted by the signal handler without us knowing. - * Corruption of the PSL_RF bit at worst causes one more or - * one less debugger trap, so allowing it is fairly harmless. - */ - if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) { - kprintf("sigreturn: eflags = 0x%x\n", eflags); - return(EINVAL); - } - - /* - * Don't allow users to load a valid privileged %cs. Let the - * hardware check for invalid selectors, excess privilege in - * other selectors, invalid %eip's and invalid %esp's. - */ - cs = ucp.uc_mcontext.mc_cs; - if (!CS_SECURE(cs)) { - kprintf("sigreturn: cs = 0x%x\n", cs); - trapsignal(lp, SIGBUS, T_PROTFLT); - return(EINVAL); - } - bcopy(&ucp.uc_mcontext.mc_gs, regs, sizeof(struct trapframe)); - } - - /* - * Restore the FPU state from the frame - */ - crit_enter(); - npxpop(&ucp.uc_mcontext); - - if (ucp.uc_mcontext.mc_onstack & 1) - lp->lwp_sigstk.ss_flags |= SS_ONSTACK; - else - lp->lwp_sigstk.ss_flags &= ~SS_ONSTACK; - - lp->lwp_sigmask = ucp.uc_sigmask; - SIG_CANTMASK(lp->lwp_sigmask); - crit_exit(); - return(EJUSTRETURN); -} - -/* - * cpu_idle() represents the idle LWKT. You cannot return from this function - * (unless you want to blow things up!). Instead we look for runnable threads - * and loop or halt as appropriate. Giant is not held on entry to the thread. - * - * The main loop is entered with a critical section held, we must release - * the critical section before doing anything else. lwkt_switch() will - * check for pending interrupts due to entering and exiting its own - * critical section. - * - * Note on cpu_idle_hlt: On an SMP system we rely on a scheduler IPI - * to wake a HLTed cpu up. - */ -static int cpu_idle_hlt = 1; -static int cpu_idle_hltcnt; -static int cpu_idle_spincnt; -SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_hlt, CTLFLAG_RW, - &cpu_idle_hlt, 0, "Idle loop HLT enable"); -SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_hltcnt, CTLFLAG_RW, - &cpu_idle_hltcnt, 0, "Idle loop entry halts"); -SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_spincnt, CTLFLAG_RW, - &cpu_idle_spincnt, 0, "Idle loop entry spins"); - -void -cpu_idle(void) -{ - struct thread *td = curthread; - struct mdglobaldata *gd = mdcpu; - int reqflags; - - crit_exit(); - KKASSERT(td->td_critcount == 0); - cpu_enable_intr(); - for (;;) { - /* - * See if there are any LWKTs ready to go. - */ - lwkt_switch(); - - /* - * The idle loop halts only if no threads are scheduleable - * and no signals have occured. - */ - if (cpu_idle_hlt && - (td->td_gd->gd_reqflags & RQF_IDLECHECK_WK_MASK) == 0) { - splz(); - KKASSERT(MP_LOCK_HELD() == 0); - if ((td->td_gd->gd_reqflags & RQF_IDLECHECK_WK_MASK) == 0) { -#ifdef DEBUGIDLE - struct timeval tv1, tv2; - gettimeofday(&tv1, NULL); -#endif - reqflags = gd->mi.gd_reqflags & - ~RQF_IDLECHECK_WK_MASK; - umtx_sleep(&gd->mi.gd_reqflags, reqflags, - 1000000); -#ifdef DEBUGIDLE - gettimeofday(&tv2, NULL); - if (tv2.tv_usec - tv1.tv_usec + - (tv2.tv_sec - tv1.tv_sec) * 1000000 - > 500000) { - kprintf("cpu %d idlelock %08x %08x\n", - gd->mi.gd_cpuid, - gd->mi.gd_reqflags, - gd->gd_fpending); - } -#endif - } - ++cpu_idle_hltcnt; - } else { - splz(); - __asm __volatile("pause"); - ++cpu_idle_spincnt; - } - } -} - -/* - * Called by the spinlock code with or without a critical section held - * when a spinlock is found to be seriously constested. - * - * We need to enter a critical section to prevent signals from recursing - * into pthreads. - */ -void -cpu_spinlock_contested(void) -{ - cpu_pause(); -} - -/* - * Clear registers on exec - */ -void -exec_setregs(u_long entry, u_long stack, u_long ps_strings) -{ - struct thread *td = curthread; - struct lwp *lp = td->td_lwp; - struct trapframe *regs = lp->lwp_md.md_regs; - struct pcb *pcb = lp->lwp_thread->td_pcb; - - /* was i386_user_cleanup() in NetBSD */ - user_ldt_free(pcb); - - bzero((char *)regs, sizeof(struct trapframe)); - regs->tf_eip = entry; - regs->tf_esp = stack; - regs->tf_eflags = PSL_USER | (regs->tf_eflags & PSL_T); - regs->tf_ss = 0; - regs->tf_ds = 0; - regs->tf_es = 0; - regs->tf_fs = 0; - regs->tf_gs = 0; - regs->tf_cs = 0; - - /* PS_STRINGS value for BSD/OS binaries. It is 0 for non-BSD/OS. */ - regs->tf_ebx = ps_strings; - - /* - * Reset the hardware debug registers if they were in use. - * They won't have any meaning for the newly exec'd process. - */ - if (pcb->pcb_flags & PCB_DBREGS) { - pcb->pcb_dr0 = 0; - pcb->pcb_dr1 = 0; - pcb->pcb_dr2 = 0; - pcb->pcb_dr3 = 0; - pcb->pcb_dr6 = 0; - pcb->pcb_dr7 = 0; - if (pcb == td->td_pcb) { - /* - * Clear the debug registers on the running - * CPU, otherwise they will end up affecting - * the next process we switch to. - */ - reset_dbregs(); - } - pcb->pcb_flags &= ~PCB_DBREGS; - } - - /* - * Initialize the math emulator (if any) for the current process. - * Actually, just clear the bit that says that the emulator has - * been initialized. Initialization is delayed until the process - * traps to the emulator (if it is done at all) mainly because - * emulators don't provide an entry point for initialization. - */ - pcb->pcb_flags &= ~FP_SOFTFP; - - /* - * note: do not set CR0_TS here. npxinit() must do it after clearing - * gd_npxthread. Otherwise a preemptive interrupt thread may panic - * in npxdna(). - */ - crit_enter(); -#if 0 - load_cr0(rcr0() | CR0_MP); -#endif - -#if NNPX > 0 - /* Initialize the npx (if any) for the current process. */ - npxinit(); -#endif - crit_exit(); - - /* - * note: linux emulator needs edx to be 0x0 on entry, which is - * handled in execve simply by setting the 64 bit syscall - * return value to 0. - */ -} - -void -cpu_setregs(void) -{ -#if 0 - unsigned int cr0; - - cr0 = rcr0(); - cr0 |= CR0_NE; /* Done by npxinit() */ - cr0 |= CR0_MP | CR0_TS; /* Done at every execve() too. */ - cr0 |= CR0_WP | CR0_AM; - load_cr0(cr0); - load_gs(_udatasel); -#endif -} - -static int -sysctl_machdep_adjkerntz(SYSCTL_HANDLER_ARGS) -{ - int error; - error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, - req); - if (!error && req->newptr) - resettodr(); - return (error); -} - -SYSCTL_PROC(_machdep, CPU_ADJKERNTZ, adjkerntz, CTLTYPE_INT|CTLFLAG_RW, - &adjkerntz, 0, sysctl_machdep_adjkerntz, "I", ""); - -extern u_long bootdev; /* not a cdev_t - encoding is different */ -SYSCTL_ULONG(_machdep, OID_AUTO, guessed_bootdev, - CTLFLAG_RD, &bootdev, 0, "Boot device (not in cdev_t format)"); - -/* - * Initialize 386 and configure to run kernel - */ - -/* - * Initialize segments & interrupt table - */ - -extern struct user *proc0paddr; - -#if 0 - -extern inthand_t - IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl), - IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm), - IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot), - IDTVEC(page), IDTVEC(mchk), IDTVEC(fpu), IDTVEC(align), - IDTVEC(xmm), IDTVEC(syscall), - IDTVEC(rsvd0); -extern inthand_t - IDTVEC(int0x80_syscall); - -#endif - -#ifdef DEBUG_INTERRUPTS -extern inthand_t *Xrsvdary[256]; -#endif - -int -ptrace_set_pc(struct lwp *lp, unsigned long addr) -{ - lp->lwp_md.md_regs->tf_eip = addr; - return (0); -} - -int -ptrace_single_step(struct lwp *lp) -{ - lp->lwp_md.md_regs->tf_eflags |= PSL_T; - return (0); -} - -int -fill_regs(struct lwp *lp, struct reg *regs) -{ - struct trapframe *tp; - - if ((tp = lp->lwp_md.md_regs) == NULL) - return EINVAL; - regs->r_gs = tp->tf_gs; - regs->r_fs = tp->tf_fs; - regs->r_es = tp->tf_es; - regs->r_ds = tp->tf_ds; - regs->r_edi = tp->tf_edi; - regs->r_esi = tp->tf_esi; - regs->r_ebp = tp->tf_ebp; - regs->r_ebx = tp->tf_ebx; - regs->r_edx = tp->tf_edx; - regs->r_ecx = tp->tf_ecx; - regs->r_eax = tp->tf_eax; - regs->r_eip = tp->tf_eip; - regs->r_cs = tp->tf_cs; - regs->r_eflags = tp->tf_eflags; - regs->r_esp = tp->tf_esp; - regs->r_ss = tp->tf_ss; - return (0); -} - -int -set_regs(struct lwp *lp, struct reg *regs) -{ - struct trapframe *tp; - - tp = lp->lwp_md.md_regs; - if (!EFL_SECURE(regs->r_eflags, tp->tf_eflags) || - !CS_SECURE(regs->r_cs)) - return (EINVAL); - tp->tf_gs = regs->r_gs; - tp->tf_fs = regs->r_fs; - tp->tf_es = regs->r_es; - tp->tf_ds = regs->r_ds; - tp->tf_edi = regs->r_edi; - tp->tf_esi = regs->r_esi; - tp->tf_ebp = regs->r_ebp; - tp->tf_ebx = regs->r_ebx; - tp->tf_edx = regs->r_edx; - tp->tf_ecx = regs->r_ecx; - tp->tf_eax = regs->r_eax; - tp->tf_eip = regs->r_eip; - tp->tf_cs = regs->r_cs; - tp->tf_eflags = regs->r_eflags; - tp->tf_esp = regs->r_esp; - tp->tf_ss = regs->r_ss; - return (0); -} - -#ifndef CPU_DISABLE_SSE -static void -fill_fpregs_xmm(struct savexmm *sv_xmm, struct save87 *sv_87) -{ - struct env87 *penv_87 = &sv_87->sv_env; - struct envxmm *penv_xmm = &sv_xmm->sv_env; - int i; - - /* FPU control/status */ - penv_87->en_cw = penv_xmm->en_cw; - penv_87->en_sw = penv_xmm->en_sw; - penv_87->en_tw = penv_xmm->en_tw; - penv_87->en_fip = penv_xmm->en_fip; - penv_87->en_fcs = penv_xmm->en_fcs; - penv_87->en_opcode = penv_xmm->en_opcode; - penv_87->en_foo = penv_xmm->en_foo; - penv_87->en_fos = penv_xmm->en_fos; - - /* FPU registers */ - for (i = 0; i < 8; ++i) - sv_87->sv_ac[i] = sv_xmm->sv_fp[i].fp_acc; -} - -static void -set_fpregs_xmm(struct save87 *sv_87, struct savexmm *sv_xmm) -{ - struct env87 *penv_87 = &sv_87->sv_env; - struct envxmm *penv_xmm = &sv_xmm->sv_env; - int i; - - /* FPU control/status */ - penv_xmm->en_cw = penv_87->en_cw; - penv_xmm->en_sw = penv_87->en_sw; - penv_xmm->en_tw = penv_87->en_tw; - penv_xmm->en_fip = penv_87->en_fip; - penv_xmm->en_fcs = penv_87->en_fcs; - penv_xmm->en_opcode = penv_87->en_opcode; - penv_xmm->en_foo = penv_87->en_foo; - penv_xmm->en_fos = penv_87->en_fos; - - /* FPU registers */ - for (i = 0; i < 8; ++i) - sv_xmm->sv_fp[i].fp_acc = sv_87->sv_ac[i]; -} -#endif /* CPU_DISABLE_SSE */ - -int -fill_fpregs(struct lwp *lp, struct fpreg *fpregs) -{ - if (lp->lwp_thread == NULL || lp->lwp_thread->td_pcb == NULL) - return EINVAL; -#ifndef CPU_DISABLE_SSE - if (cpu_fxsr) { - fill_fpregs_xmm(&lp->lwp_thread->td_pcb->pcb_save.sv_xmm, - (struct save87 *)fpregs); - return (0); - } -#endif /* CPU_DISABLE_SSE */ - bcopy(&lp->lwp_thread->td_pcb->pcb_save.sv_87, fpregs, sizeof *fpregs); - return (0); -} - -int -set_fpregs(struct lwp *lp, struct fpreg *fpregs) -{ -#ifndef CPU_DISABLE_SSE - if (cpu_fxsr) { - set_fpregs_xmm((struct save87 *)fpregs, - &lp->lwp_thread->td_pcb->pcb_save.sv_xmm); - return (0); - } -#endif /* CPU_DISABLE_SSE */ - bcopy(fpregs, &lp->lwp_thread->td_pcb->pcb_save.sv_87, sizeof *fpregs); - return (0); -} - -int -fill_dbregs(struct lwp *lp, struct dbreg *dbregs) -{ - return (ENOSYS); -} - -int -set_dbregs(struct lwp *lp, struct dbreg *dbregs) -{ - return (ENOSYS); -} - -#if 0 -/* - * Return > 0 if a hardware breakpoint has been hit, and the - * breakpoint was in user space. Return 0, otherwise. - */ -int -user_dbreg_trap(void) -{ - u_int32_t dr7, dr6; /* debug registers dr6 and dr7 */ - u_int32_t bp; /* breakpoint bits extracted from dr6 */ - int nbp; /* number of breakpoints that triggered */ - caddr_t addr[4]; /* breakpoint addresses */ - int i; - - dr7 = rdr7(); - if ((dr7 & 0x000000ff) == 0) { - /* - * all GE and LE bits in the dr7 register are zero, - * thus the trap couldn't have been caused by the - * hardware debug registers - */ - return 0; - } - - nbp = 0; - dr6 = rdr6(); - bp = dr6 & 0x0000000f; - - if (!bp) { - /* - * None of the breakpoint bits are set meaning this - * trap was not caused by any of the debug registers - */ - return 0; - } - - /* - * at least one of the breakpoints were hit, check to see - * which ones and if any of them are user space addresses - */ - - if (bp & 0x01) { - addr[nbp++] = (caddr_t)rdr0(); - } - if (bp & 0x02) { - addr[nbp++] = (caddr_t)rdr1(); - } - if (bp & 0x04) { - addr[nbp++] = (caddr_t)rdr2(); - } - if (bp & 0x08) { - addr[nbp++] = (caddr_t)rdr3(); - } - - for (i=0; i -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include -#include - -#include - -#include - -#include - -static jmp_buf *db_nofault = NULL; -extern jmp_buf db_jmpbuf; - -extern void gdb_handle_exception (db_regs_t *, int, int); - -int db_active; -db_regs_t ddb_regs; - -static jmp_buf db_global_jmpbuf; -static int db_global_jmpbuf_valid; - -#ifdef __GNUC__ -#define rss() ({u_short ss; __asm __volatile("mov %%ss,%0" : "=r" (ss)); ss;}) -#endif - -/* - * kdb_trap - field a TRACE or BPT trap - */ -int -kdb_trap(int type, int code, struct i386_saved_state *regs) -{ - volatile int ddb_mode = !(boothowto & RB_GDB); - - /* - * XXX try to do nothing if the console is in graphics mode. - * Handle trace traps (and hardware breakpoints...) by ignoring - * them except for forgetting about them. Return 0 for other - * traps to say that we haven't done anything. The trap handler - * will usually panic. We should handle breakpoint traps for - * our breakpoints by disarming our breakpoints and fixing up - * %eip. - */ - if (cons_unavail && ddb_mode) { - if (type == T_TRCTRAP) { - regs->tf_eflags &= ~PSL_T; - return (1); - } - return (0); - } - - switch (type) { - case T_BPTFLT: /* breakpoint */ - case T_TRCTRAP: /* debug exception */ - break; - - default: - /* - * XXX this is almost useless now. In most cases, - * trap_fatal() has already printed a much more verbose - * message. However, it is dangerous to print things in - * trap_fatal() - kprintf() might be reentered and trap. - * The debugger should be given control first. - */ - if (ddb_mode) - db_printf("kernel: type %d trap, code=%x\n", type, code); - - if (db_nofault) { - jmp_buf *no_fault = db_nofault; - db_nofault = NULL; - longjmp(*no_fault, 1); - } - } - - /* - * This handles unexpected traps in ddb commands, including calls to - * non-ddb functions. db_nofault only applies to memory accesses by - * internal ddb commands. - */ - if (db_global_jmpbuf_valid) - longjmp(db_global_jmpbuf, 1); - - /* - * XXX We really should switch to a local stack here. - */ - ddb_regs = *regs; - - /* - * If in kernel mode, esp and ss are not saved, so dummy them up. - */ - if (ISPL(regs->tf_cs) == 0) { - ddb_regs.tf_esp = (int)®s->tf_esp; - ddb_regs.tf_ss = rss(); - } - - crit_enter(); - db_printf("\nCPU%d stopping CPUs: 0x%08x\n", - mycpu->gd_cpuid, mycpu->gd_other_cpus); - - /* We stop all CPUs except ourselves (obviously) */ - stop_cpus(mycpu->gd_other_cpus); - - db_printf(" stopped\n"); - - setjmp(db_global_jmpbuf); - db_global_jmpbuf_valid = TRUE; - db_active++; - vcons_set_mode(1); - if (ddb_mode) { - cndbctl(TRUE); - db_trap(type, code); - cndbctl(FALSE); - } else - gdb_handle_exception(&ddb_regs, type, code); - db_active--; - vcons_set_mode(0); - db_global_jmpbuf_valid = FALSE; - - db_printf("\nCPU%d restarting CPUs: 0x%016jx\n", - mycpu->gd_cpuid, (uintmax_t)stopped_cpus); - - /* Restart all the CPUs we previously stopped */ - if (stopped_cpus != mycpu->gd_other_cpus) { - db_printf("whoa, other_cpus: 0x%08x, stopped_cpus: 0x%016jx\n", - mycpu->gd_other_cpus, (uintmax_t)stopped_cpus); - panic("stop_cpus() failed"); - } - restart_cpus(stopped_cpus); - - db_printf(" restarted\n"); - crit_exit(); - - regs->tf_eip = ddb_regs.tf_eip; - regs->tf_eflags = ddb_regs.tf_eflags; - regs->tf_eax = ddb_regs.tf_eax; - regs->tf_ecx = ddb_regs.tf_ecx; - regs->tf_edx = ddb_regs.tf_edx; - regs->tf_ebx = ddb_regs.tf_ebx; - - /* - * If in user mode, the saved ESP and SS were valid, restore them. - */ - if (ISPL(regs->tf_cs)) { - regs->tf_esp = ddb_regs.tf_esp; - regs->tf_ss = ddb_regs.tf_ss & 0xffff; - } - - regs->tf_ebp = ddb_regs.tf_ebp; - regs->tf_esi = ddb_regs.tf_esi; - regs->tf_edi = ddb_regs.tf_edi; - regs->tf_es = ddb_regs.tf_es & 0xffff; - regs->tf_fs = ddb_regs.tf_fs & 0xffff; - regs->tf_gs = ddb_regs.tf_gs & 0xffff; - regs->tf_cs = ddb_regs.tf_cs & 0xffff; - regs->tf_ds = ddb_regs.tf_ds & 0xffff; - return (1); -} - -/* - * Read bytes from kernel address space for debugger. - */ -void -db_read_bytes(vm_offset_t addr, size_t size, char *data) -{ - char *src; - - db_nofault = &db_jmpbuf; - - src = (char *)addr; - while (size-- > 0) - *data++ = *src++; - - db_nofault = NULL; -} - -/* - * Write bytes to kernel address space for debugger. - */ -void -db_write_bytes(vm_offset_t addr, size_t size, char *data) -{ - char *dst; -#if 0 - vpte_t *ptep0 = NULL; - vpte_t oldmap0 = 0; - vm_offset_t addr1; - vpte_t *ptep1 = NULL; - vpte_t oldmap1 = 0; -#endif - - db_nofault = &db_jmpbuf; -#if 0 - if (addr > trunc_page((vm_offset_t)btext) - size && - addr < round_page((vm_offset_t)etext)) { - - ptep0 = pmap_kpte(addr); - oldmap0 = *ptep0; - *ptep0 |= VPTE_RW; - - /* Map another page if the data crosses a page boundary. */ - if ((*ptep0 & PG_PS) == 0) { - addr1 = trunc_page(addr + size - 1); - if (trunc_page(addr) != addr1) { - ptep1 = pmap_kpte(addr1); - oldmap1 = *ptep1; - *ptep1 |= VPTE_RW; - } - } else { - addr1 = trunc_4mpage(addr + size - 1); - if (trunc_4mpage(addr) != addr1) { - ptep1 = pmap_kpte(addr1); - oldmap1 = *ptep1; - *ptep1 |= VPTE_RW; - } - } - - cpu_invltlb(); - } -#endif - - dst = (char *)addr; - - while (size-- > 0) - *dst++ = *data++; - - db_nofault = NULL; - -#if 0 - if (ptep0) { - *ptep0 = oldmap0; - - if (ptep1) - *ptep1 = oldmap1; - - cpu_invltlb(); - } -#endif -} - -/* - * The debugger sometimes needs to know the actual KVM address represented - * by the instruction pointer, stack pointer, or base pointer. Normally - * the actual KVM address is simply the contents of the register. However, - * if the debugger is entered from the BIOS or VM86 we need to figure out - * the offset from the segment register. - */ -db_addr_t -PC_REGS(db_regs_t *regs) -{ - return(regs->tf_eip); -} - -db_addr_t -SP_REGS(db_regs_t *regs) -{ - return(regs->tf_esp); -} - -db_addr_t -BP_REGS(db_regs_t *regs) -{ - return(regs->tf_ebp); -} - -/* - * XXX - * Move this to machdep.c and allow it to be called if any debugger is - * installed. - */ -void -Debugger(const char *msg) -{ - static volatile u_char in_Debugger; - - /* - * XXX - * Do nothing if the console is in graphics mode. This is - * OK if the call is for the debugger hotkey but not if the call - * is a weak form of panicing. - */ - if (cons_unavail && !(boothowto & RB_GDB)) - return; - - if (!in_Debugger) { - in_Debugger = 1; - db_printf("Debugger(\"%s\")\n", msg); - breakpoint(); - in_Debugger = 0; - } -} diff --git a/sys/platform/vkernel/i386/db_trace.c b/sys/platform/vkernel/i386/db_trace.c deleted file mode 100644 index 8501d0f886..0000000000 --- a/sys/platform/vkernel/i386/db_trace.c +++ /dev/null @@ -1,438 +0,0 @@ -/* - * Mach Operating System - * Copyright (c) 1991,1990 Carnegie Mellon University - * All Rights Reserved. - * - * Permission to use, copy, modify and distribute this software and its - * documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR - * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - * - * $FreeBSD: src/sys/i386/i386/db_trace.c,v 1.35.2.3 2002/02/21 22:31:25 silby Exp $ - */ - -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include -#include -#include -#include /* DLL */ - -#include - -#include -#include -#include - -static int db_dr(struct db_variable *vp, db_expr_t *valuep, int op); - -/* - * Machine register set. - */ -struct db_variable db_regs[] = { - { "cs", &ddb_regs.tf_cs, NULL }, - { "ds", &ddb_regs.tf_ds, NULL }, - { "es", &ddb_regs.tf_es, NULL }, - { "fs", &ddb_regs.tf_fs, NULL }, - { "gs", &ddb_regs.tf_gs, NULL }, - { "ss", &ddb_regs.tf_ss, NULL }, - { "eax", &ddb_regs.tf_eax, NULL }, - { "ecx", &ddb_regs.tf_ecx, NULL }, - { "edx", &ddb_regs.tf_edx, NULL }, - { "ebx", &ddb_regs.tf_ebx, NULL }, - { "esp", &ddb_regs.tf_esp, NULL }, - { "ebp", &ddb_regs.tf_ebp, NULL }, - { "esi", &ddb_regs.tf_esi, NULL }, - { "edi", &ddb_regs.tf_edi, NULL }, - { "eip", &ddb_regs.tf_eip, NULL }, - { "efl", &ddb_regs.tf_eflags, NULL }, - { "dr0", NULL, db_dr }, - { "dr1", NULL, db_dr }, - { "dr2", NULL, db_dr }, - { "dr3", NULL, db_dr }, - { "dr4", NULL, db_dr }, - { "dr5", NULL, db_dr }, - { "dr6", NULL, db_dr }, - { "dr7", NULL, db_dr }, -}; -struct db_variable *db_eregs = db_regs + NELEM(db_regs); - -/* - * Stack trace. - */ -#define INKERNEL(va) (((vm_offset_t)(va)) >= USRSTACK) - -struct i386_frame { - struct i386_frame *f_frame; - int f_retaddr; - int f_arg0; -}; - -#define NORMAL 0 -#define TRAP 1 -#define INTERRUPT 2 -#define SYSCALL 3 - -static void db_nextframe(struct i386_frame **, db_addr_t *); -static int db_numargs(struct i386_frame *); -static void db_print_stack_entry(const char *, int, char **, int *, db_addr_t); -static void dl_symbol_values(int callpc, const char **name); - - -int db_md_set_watchpoint(db_expr_t addr, db_expr_t size); -int db_md_clr_watchpoint(db_expr_t addr, db_expr_t size); -void db_md_list_watchpoints(void); - - -/* - * Figure out how many arguments were passed into the frame at "fp". - */ -static int -db_numargs(struct i386_frame *fp) -{ - int args; -#if 0 - int *argp; - int inst; - - argp = (int *)db_get_value((int)&fp->f_retaddr, 4, FALSE); - /* - * XXX etext is wrong for LKMs. We should attempt to interpret - * the instruction at the return address in all cases. This - * may require better fault handling. - */ - if (argp < (int *)btext || argp >= (int *)etext) { - args = 5; - } else { - inst = db_get_value((int)argp, 4, FALSE); - if ((inst & 0xff) == 0x59) /* popl %ecx */ - args = 1; - else if ((inst & 0xffff) == 0xc483) /* addl $Ibs, %esp */ - args = ((inst >> 16) & 0xff) / 4; - else - args = 5; - } -#endif - args = 5; - return(args); -} - -static void -db_print_stack_entry(const char *name, int narg, char **argnp, int *argp, - db_addr_t callpc) -{ - db_printf("%s(", name); - while (narg) { - if (argnp) - db_printf("%s=", *argnp++); - db_printf("%r", db_get_value((int)argp, 4, FALSE)); - argp++; - if (--narg != 0) - db_printf(","); - } - db_printf(") at "); - db_printsym(callpc, DB_STGY_PROC); - db_printf("\n"); -} - -/* - * Figure out the next frame up in the call stack. - */ -static void -db_nextframe(struct i386_frame **fp, db_addr_t *ip) -{ - struct trapframe *tf; - int frame_type; - int eip, esp, ebp; - db_expr_t offset; - const char *sym, *name; - - eip = db_get_value((int) &(*fp)->f_retaddr, 4, FALSE); - ebp = db_get_value((int) &(*fp)->f_frame, 4, FALSE); - - /* - * Figure out frame type. - */ - - frame_type = NORMAL; - - sym = db_search_symbol(eip, DB_STGY_ANY, &offset); - db_symbol_values(sym, &name, NULL); - dl_symbol_values(eip, &name); - if (name != NULL) { - if (!strcmp(name, "calltrap")) { - frame_type = TRAP; - } else if (!strncmp(name, "Xresume", 7)) { - frame_type = INTERRUPT; - } else if (!strcmp(name, "_Xsyscall")) { - frame_type = SYSCALL; - } - } - - /* - * Normal frames need no special processing. - */ - if (frame_type == NORMAL) { - *ip = (db_addr_t) eip; - *fp = (struct i386_frame *) ebp; - return; - } - - db_print_stack_entry(name, 0, 0, 0, eip); - - /* - * Point to base of trapframe which is just above the - * current frame. - */ - tf = (struct trapframe *) ((int)*fp + 8); - -#if 0 - esp = (ISPL(tf->tf_cs) == SEL_UPL) ? tf->tf_esp : (int)&tf->tf_esp; -#endif - esp = (int)&tf->tf_esp; - - switch (frame_type) { - case TRAP: - { - eip = tf->tf_eip; - ebp = tf->tf_ebp; - db_printf( - "--- trap %#r, eip = %#r, esp = %#r, ebp = %#r ---\n", - tf->tf_trapno, eip, esp, ebp); - } - break; - case SYSCALL: - { - eip = tf->tf_eip; - ebp = tf->tf_ebp; - db_printf( - "--- syscall %#r, eip = %#r, esp = %#r, ebp = %#r ---\n", - tf->tf_eax, eip, esp, ebp); - } - break; - case INTERRUPT: - tf = (struct trapframe *)((int)*fp + 16); - { - eip = tf->tf_eip; - ebp = tf->tf_ebp; - db_printf( - "--- interrupt, eip = %#r, esp = %#r, ebp = %#r ---\n", - eip, esp, ebp); - } - break; - default: - break; - } - - *ip = (db_addr_t) eip; - *fp = (struct i386_frame *) ebp; -} - -void -db_stack_trace_cmd(db_expr_t addr, boolean_t have_addr, db_expr_t count, - char *modif) -{ - struct i386_frame *frame; - int *argp; - db_addr_t callpc; - boolean_t first; - int i; - - if (count == -1) - count = 1024; - - if (!have_addr) { - frame = (struct i386_frame *)BP_REGS(&ddb_regs); - if (frame == NULL) - frame = (struct i386_frame *)(SP_REGS(&ddb_regs) - 4); - callpc = PC_REGS(&ddb_regs); - } else { - /* - * Look for something that might be a frame pointer, just as - * a convenience. - */ - frame = (struct i386_frame *)addr; - for (i = 0; i < 4096; i += 4) { - struct i386_frame *check; - - check = (struct i386_frame *)db_get_value((int)((char *)&frame->f_frame + i), 4, FALSE); - if ((char *)check - (char *)frame >= 0 && - (char *)check - (char *)frame < 4096 - ) { - break; - } - db_printf("%p does not look like a stack frame, skipping\n", (char *)&frame->f_frame + i); - } - if (i == 4096) { - db_printf("Unable to find anything that looks like a stack frame\n"); - return; - } - frame = (void *)((char *)frame + i); - db_printf("Trace beginning at frame %p\n", frame); - callpc = (db_addr_t)db_get_value((int)&frame->f_retaddr, 4, FALSE); - } - - first = TRUE; - while (count--) { - struct i386_frame *actframe; - int narg; - const char * name; - db_expr_t offset; - c_db_sym_t sym; -#define MAXNARG 16 - char *argnames[MAXNARG], **argnp = NULL; - - sym = db_search_symbol(callpc, DB_STGY_ANY, &offset); - db_symbol_values(sym, &name, NULL); - dl_symbol_values(callpc, &name); - - /* - * Attempt to determine a (possibly fake) frame that gives - * the caller's pc. It may differ from `frame' if the - * current function never sets up a standard frame or hasn't - * set one up yet or has just discarded one. The last two - * cases can be guessed fairly reliably for code generated - * by gcc. The first case is too much trouble to handle in - * general because the amount of junk on the stack depends - * on the pc (the special handling of "calltrap", etc. in - * db_nextframe() works because the `next' pc is special). - */ - actframe = frame; - if (first) { - if (!have_addr) { - int instr; - - instr = db_get_value(callpc, 4, FALSE); - if ((instr & 0x00ffffff) == 0x00e58955) { - /* pushl %ebp; movl %esp, %ebp */ - actframe = (struct i386_frame *) - (SP_REGS(&ddb_regs) - 4); - } else if ((instr & 0x0000ffff) == 0x0000e589) { - /* movl %esp, %ebp */ - actframe = (struct i386_frame *) - SP_REGS(&ddb_regs); - if (ddb_regs.tf_ebp == 0) { - /* Fake caller's frame better. */ - frame = actframe; - } - } else if ((instr & 0x000000ff) == 0x000000c3) { - /* ret */ - actframe = (struct i386_frame *) - (SP_REGS(&ddb_regs) - 4); - } else if (offset == 0) { - /* Probably a symbol in assembler code. */ - actframe = (struct i386_frame *) - (SP_REGS(&ddb_regs) - 4); - } - } else if (name != NULL && - strcmp(name, "fork_trampoline") == 0) { - /* - * Don't try to walk back on a stack for a - * process that hasn't actually been run yet. - */ - db_print_stack_entry(name, 0, 0, 0, callpc); - break; - } - first = FALSE; - } - - argp = &actframe->f_arg0; - narg = MAXNARG; - if (sym != NULL && db_sym_numargs(sym, &narg, argnames)) { - argnp = argnames; - } else { - narg = db_numargs(frame); - } - - db_print_stack_entry(name, narg, argnp, argp, callpc); - - if (actframe != frame) { - /* `frame' belongs to caller. */ - callpc = (db_addr_t) - db_get_value((int)&actframe->f_retaddr, 4, FALSE); - continue; - } - - db_nextframe(&frame, &callpc); - if (frame == NULL) - break; - } -} - -void -print_backtrace(int count) -{ - register_t ebp; - - __asm __volatile("movl %%ebp, %0" : "=r" (ebp)); - db_stack_trace_cmd(ebp, 1, count, NULL); -} - -static int -db_dr(struct db_variable *vp, db_expr_t *valuep, int op) -{ - if (op == DB_VAR_GET) - *valuep = 0; - return(-1); -} - -int -db_md_set_watchpoint(db_expr_t addr, db_expr_t size) -{ - return(-1); -} - -int -db_md_clr_watchpoint(db_expr_t addr, db_expr_t size) -{ - return(-1); -} - -void -db_md_list_watchpoints(void) -{ - /* no hardware watchpoints in vkernel */ -} - -/* - * See if dladdr() can get the symbol name via the standard dynamic loader. - */ -static -void -dl_symbol_values(int callpc, const char **name) -{ - Dl_info info; - - if (*name == NULL) { - if (dladdr((const void *)callpc, &info) != 0) { - if (info.dli_saddr <= (const void *)callpc) - *name = info.dli_sname; - } - } -} - diff --git a/sys/platform/vkernel/i386/exception.c b/sys/platform/vkernel/i386/exception.c deleted file mode 100644 index 54994c0b81..0000000000 --- a/sys/platform/vkernel/i386/exception.c +++ /dev/null @@ -1,213 +0,0 @@ - -/* - * Copyright (c) 2006 The DragonFly Project. All rights reserved. - * - * This code is derived from software contributed to The DragonFly Project - * by Matthew Dillon - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * 3. Neither the name of The DragonFly Project nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific, prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "opt_ddb.h" -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -int _ucodesel = LSEL(LUCODE_SEL, SEL_UPL); -int _udatasel = LSEL(LUDATA_SEL, SEL_UPL); - -static void exc_segfault(int signo, siginfo_t *info, void *ctx); -#ifdef DDB -static void exc_debugger(int signo, siginfo_t *info, void *ctx); -#endif - -/* - * IPIs are 'fast' interrupts, so we deal with them directly from our - * signal handler. - * - * WARNING: Signals are not physically disabled here so we have to enter - * our critical section before bumping gd_intr_nesting_level or another - * interrupt can come along and get really confused. - */ -static -void -ipisig(int nada, siginfo_t *info, void *ctxp) -{ - if (curthread->td_critcount == 0) { - ++curthread->td_critcount; - ++mycpu->gd_intr_nesting_level; - lwkt_process_ipiq(); - --mycpu->gd_intr_nesting_level; - --curthread->td_critcount; - } else { - need_ipiq(); - } -} - -/* - * Unconditionally stop or restart a cpu. - * - * Note: cpu_mask_all_signals() masks all signals except SIGXCPU itself. - * SIGXCPU itself is blocked on entry to stopsig() by the signal handler - * itself. - * - * WARNING: Signals are not physically disabled here so we have to enter - * our critical section before bumping gd_intr_nesting_level or another - * interrupt can come along and get really confused. - */ -static -void -stopsig(int nada, siginfo_t *info, void *ctxp) -{ - sigset_t ss; - - sigemptyset(&ss); - sigaddset(&ss, SIGALRM); - sigaddset(&ss, SIGIO); - sigaddset(&ss, SIGQUIT); - sigaddset(&ss, SIGUSR1); - sigaddset(&ss, SIGUSR2); - sigaddset(&ss, SIGTERM); - sigaddset(&ss, SIGWINCH); - - ++curthread->td_critcount; - ++mycpu->gd_intr_nesting_level; - while (CPUMASK_TESTMASK(stopped_cpus, mycpu->gd_cpumask)) { - sigsuspend(&ss); - } - --mycpu->gd_intr_nesting_level; - --curthread->td_critcount; -} - -#if 0 - -/* - * SIGIO is used by cothreads to signal back into the virtual kernel. - */ -static -void -iosig(int nada, siginfo_t *info, void *ctxp) -{ - signalintr(4); -} - -#endif - -static -void -infosig(int nada, siginfo_t *info, void *ctxp) -{ - ucontext_t *ctx = ctxp; - char buf[256]; - - snprintf(buf, sizeof(buf), "lwp %d pc=%p sp=%p\n", - (int)lwp_gettid(), - (void *)(intptr_t)ctx->uc_mcontext.mc_eip, - (void *)(intptr_t)ctx->uc_mcontext.mc_esp); - write(2, buf, strlen(buf)); -} - -void -init_exceptions(void) -{ - struct sigaction sa; - - bzero(&sa, sizeof(sa)); - sa.sa_sigaction = exc_segfault; - sa.sa_flags |= SA_SIGINFO | SA_NODEFER; - sigemptyset(&sa.sa_mask); - sigaction(SIGBUS, &sa, NULL); - sigaction(SIGSEGV, &sa, NULL); - sigaction(SIGTRAP, &sa, NULL); - sigaction(SIGFPE, &sa, NULL); - - sa.sa_flags &= ~SA_NODEFER; - -#ifdef DDB - sa.sa_sigaction = exc_debugger; - sigaction(SIGQUIT, &sa, NULL); -#endif - sa.sa_sigaction = ipisig; - sigaction(SIGUSR1, &sa, NULL); - sa.sa_sigaction = stopsig; - sigaction(SIGXCPU, &sa, NULL); -#if 0 - sa.sa_sigaction = iosig; - sigaction(SIGIO, &sa, NULL); -#endif - sa.sa_sigaction = infosig; - sigaction(SIGINFO, &sa, NULL); -} - -/* - * This function handles a segmentation fault. - * - * XXX We assume that trapframe is a subset of ucontext. It is as of - * this writing. - */ -static void -exc_segfault(int signo, siginfo_t *info, void *ctxp) -{ - ucontext_t *ctx = ctxp; - -#if 0 - kprintf("CAUGHT SEGFAULT EIP %08x ERR %08x TRAPNO %d err %d\n", - ctx->uc_mcontext.mc_eip, - ctx->uc_mcontext.mc_err, - ctx->uc_mcontext.mc_trapno & 0xFFFF, - ctx->uc_mcontext.mc_trapno >> 16); -#endif - kern_trap((struct trapframe *)&ctx->uc_mcontext.mc_gs); - splz(); -} - -#ifdef DDB - -static void -exc_debugger(int signo, siginfo_t *info, void *ctx) -{ - Debugger("interrupt from console"); -} - -#endif diff --git a/sys/platform/vkernel/i386/fork_tramp.s b/sys/platform/vkernel/i386/fork_tramp.s deleted file mode 100644 index a21a7a905f..0000000000 --- a/sys/platform/vkernel/i386/fork_tramp.s +++ /dev/null @@ -1,103 +0,0 @@ -/*- - * Copyright (c) 1990 The Regents of the University of California. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD: src/sys/i386/i386/exception.s,v 1.65.2.3 2001/08/15 01:23:49 peter Exp $ - */ - -#include -#include -#include -#include -#include - -#include "assym.s" - - .text - - .globl lwkt_switch_return - -/* - * This function is what cpu_heavy_restore jumps to after a new process - * is created. The LWKT subsystem switches while holding a critical - * section and we maintain that abstraction here (e.g. because - * cpu_heavy_restore needs it due to PCB_*() manipulation), then get out of - * it before calling the initial function (typically fork_return()) and/or - * returning to user mode. - * - * The MP lock is not held at any point but the critcount is bumped - * on entry to prevent interruption of the trampoline at a bad point. - * - * This is effectively what td->td_switch() returns to. It 'returns' the - * old thread in %eax and since this is not returning to a td->td_switch() - * call from lwkt_switch() we must handle the cleanup for the old thread - * by calling lwkt_switch_return(). - * - * fork_trampoline(%eax:otd, %esi:func, %ebx:arg) - */ -ENTRY(fork_trampoline) - pushl %eax - call lwkt_switch_return - addl $4,%esp - movl PCPU(curthread),%eax - decl TD_CRITCOUNT(%eax) - - /* - * cpu_set_fork_handler intercepts this function call to - * have this call a non-return function to stay in kernel mode. - * - * initproc has its own fork handler, start_init(), which DOES - * return. - * - * The function (set in pcb_esi) gets passed two arguments, - * the primary parameter set in pcb_ebx and a pointer to the - * trapframe. - * void (func)(int arg, struct trapframe *frame); - */ - pushl %esp /* pass frame by reference */ - pushl %ebx /* arg1 */ - call *%esi /* function */ - addl $8,%esp - /* cut from syscall */ - - call splz - - /* - * Return via doreti to handle ASTs. - */ - MEXITCOUNT - pushl $0 /* if_ppl */ - pushl $0 /* if_vec */ - pushl %esp /* pass by reference */ - call go_user - /* NOT REACHED */ - - diff --git a/sys/platform/vkernel/i386/genassym.c b/sys/platform/vkernel/i386/genassym.c deleted file mode 100644 index 09df35beeb..0000000000 --- a/sys/platform/vkernel/i386/genassym.c +++ /dev/null @@ -1,219 +0,0 @@ -/*- - * Copyright (c) 1982, 1990 The Regents of the University of California. - * All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * William Jolitz. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * from: @(#)genassym.c 5.11 (Berkeley) 5/10/91 - * $FreeBSD: src/sys/i386/i386/genassym.c,v 1.86.2.3 2002/03/03 05:42:49 nyan Exp $ - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -/*#include */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -ASSYM(P_VMSPACE, offsetof(struct proc, p_vmspace)); -ASSYM(VM_PMAP, offsetof(struct vmspace, vm_pmap)); -ASSYM(PM_ACTIVE, offsetof(struct pmap, pm_active)); -ASSYM(PM_ACTIVE_LOCK, offsetof(struct pmap, pm_active_lock)); - -ASSYM(LWP_VMSPACE, offsetof(struct lwp, lwp_vmspace)); - -ASSYM(TD_PROC, offsetof(struct thread, td_proc)); -ASSYM(TD_LWP, offsetof(struct thread, td_lwp)); -ASSYM(TD_PCB, offsetof(struct thread, td_pcb)); -ASSYM(TD_SP, offsetof(struct thread, td_sp)); -ASSYM(TD_PRI, offsetof(struct thread, td_pri)); -ASSYM(TD_CRITCOUNT, offsetof(struct thread, td_critcount)); -ASSYM(TD_MACH, offsetof(struct thread, td_mach)); -ASSYM(TD_WCHAN, offsetof(struct thread, td_wchan)); -ASSYM(TD_NEST_COUNT, offsetof(struct thread, td_nest_count)); -ASSYM(TD_FLAGS, offsetof(struct thread, td_flags)); -ASSYM(TDF_RUNNING, TDF_RUNNING); - -ASSYM(TD_SAVEFPU, offsetof(struct thread, td_mach) + offsetof(struct md_thread, mtd_savefpu)); - -ASSYM(TDPRI_INT_SUPPORT, TDPRI_INT_SUPPORT); -ASSYM(CPULOCK_EXCLBIT, CPULOCK_EXCLBIT); -ASSYM(CPULOCK_EXCL, CPULOCK_EXCL); -ASSYM(CPULOCK_INCR, CPULOCK_INCR); -ASSYM(CPULOCK_CNTMASK, CPULOCK_CNTMASK); - -ASSYM(V_TRAP, offsetof(struct vmmeter, v_trap)); -ASSYM(V_SYSCALL, offsetof(struct vmmeter, v_syscall)); -ASSYM(V_SENDSYS, offsetof(struct vmmeter, v_sendsys)); -ASSYM(V_WAITSYS, offsetof(struct vmmeter, v_waitsys)); -ASSYM(V_INTR, offsetof(struct vmmeter, v_intr)); -ASSYM(V_FORWARDED_INTS, offsetof(struct vmmeter, v_forwarded_ints)); -ASSYM(V_FORWARDED_HITS, offsetof(struct vmmeter, v_forwarded_hits)); -ASSYM(V_FORWARDED_MISSES, offsetof(struct vmmeter, v_forwarded_misses)); - -ASSYM(PAGE_SIZE, PAGE_SIZE); -ASSYM(PAGE_SHIFT, PAGE_SHIFT); -ASSYM(PAGE_MASK, PAGE_MASK); -ASSYM(USRSTACK, USRSTACK); -ASSYM(VM_MAX_USER_ADDRESS, VM_MAX_USER_ADDRESS); -ASSYM(MCLBYTES, MCLBYTES); - -/* PCB_CR3 */ -ASSYM(PCB_EDI, offsetof(struct pcb, pcb_edi)); -ASSYM(PCB_ESI, offsetof(struct pcb, pcb_esi)); -ASSYM(PCB_EBP, offsetof(struct pcb, pcb_ebp)); -ASSYM(PCB_ESP, offsetof(struct pcb, pcb_esp)); -ASSYM(PCB_EBX, offsetof(struct pcb, pcb_ebx)); -ASSYM(PCB_EIP, offsetof(struct pcb, pcb_eip)); -ASSYM(TSS_ESP0, offsetof(struct i386tss, tss_esp0)); - -/*ASSYM(PCB_USERLDT, offsetof(struct pcb, pcb_ldt));*/ - -#if 1 -ASSYM(PCB_DR0, offsetof(struct pcb, pcb_dr0)); -ASSYM(PCB_DR1, offsetof(struct pcb, pcb_dr1)); -ASSYM(PCB_DR2, offsetof(struct pcb, pcb_dr2)); -ASSYM(PCB_DR3, offsetof(struct pcb, pcb_dr3)); -ASSYM(PCB_DR6, offsetof(struct pcb, pcb_dr6)); -ASSYM(PCB_DR7, offsetof(struct pcb, pcb_dr7)); -ASSYM(PCB_DBREGS, PCB_DBREGS); -ASSYM(PCB_EXT, offsetof(struct pcb, pcb_ext)); -#endif - -ASSYM(PCB_SPARE, offsetof(struct pcb, __pcb_spare)); -ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags)); -ASSYM(PCB_SAVEFPU, offsetof(struct pcb, pcb_save)); -ASSYM(PCB_SAVEFPU_SIZE, sizeof(union savefpu)); -ASSYM(PCB_SAVE87_SIZE, sizeof(struct save87)); -ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault)); - -ASSYM(PCB_SIZE, sizeof(struct pcb)); - -ASSYM(TF_TRAPNO, offsetof(struct trapframe, tf_trapno)); -ASSYM(TF_XFLAGS, offsetof(struct trapframe, tf_xflags)); -ASSYM(TF_ERR, offsetof(struct trapframe, tf_err)); -ASSYM(TF_CS, offsetof(struct trapframe, tf_cs)); -ASSYM(TF_EFLAGS, offsetof(struct trapframe, tf_eflags)); -ASSYM(SIGF_HANDLER, offsetof(struct sigframe, sf_ahu.sf_handler)); -ASSYM(SIGF_UC, offsetof(struct sigframe, sf_uc)); -ASSYM(UC_EFLAGS, offsetof(ucontext_t, uc_mcontext.mc_eflags)); -ASSYM(ENOENT, ENOENT); -ASSYM(EFAULT, EFAULT); -ASSYM(ENAMETOOLONG, ENAMETOOLONG); -ASSYM(MAXPATHLEN, MAXPATHLEN); - -#if 0 -ASSYM(BOOTINFO_SIZE, sizeof(struct bootinfo)); -ASSYM(BI_VERSION, offsetof(struct bootinfo, bi_version)); -ASSYM(BI_KERNELNAME, offsetof(struct bootinfo, bi_kernelname)); -ASSYM(BI_NFS_DISKLESS, offsetof(struct bootinfo, bi_nfs_diskless)); -ASSYM(BI_ENDCOMMON, offsetof(struct bootinfo, bi_endcommon)); -ASSYM(NFSDISKLESS_SIZE, sizeof(struct nfs_diskless)); -ASSYM(BI_SIZE, offsetof(struct bootinfo, bi_size)); -ASSYM(BI_SYMTAB, offsetof(struct bootinfo, bi_symtab)); -ASSYM(BI_ESYMTAB, offsetof(struct bootinfo, bi_esymtab)); -ASSYM(BI_KERNEND, offsetof(struct bootinfo, bi_kernend)); -#endif - -ASSYM(GD_CURTHREAD, offsetof(struct mdglobaldata, mi.gd_curthread)); -ASSYM(GD_CPUID, offsetof(struct mdglobaldata, mi.gd_cpuid)); -ASSYM(GD_CPUMASK, offsetof(struct mdglobaldata, mi.gd_cpumask)); -ASSYM(GD_CNT, offsetof(struct mdglobaldata, mi.gd_cnt)); -ASSYM(GD_INTR_NESTING_LEVEL, offsetof(struct mdglobaldata, mi.gd_intr_nesting_level)); -ASSYM(GD_REQFLAGS, offsetof(struct mdglobaldata, mi.gd_reqflags)); - -ASSYM(GD_CURRENTLDT, offsetof(struct mdglobaldata, gd_currentldt)); - -ASSYM(RQF_IPIQ, RQF_IPIQ); -ASSYM(RQF_INTPEND, RQF_INTPEND); -ASSYM(RQF_AST_OWEUPC, RQF_AST_OWEUPC); -ASSYM(RQF_AST_SIGNAL, RQF_AST_SIGNAL); -ASSYM(RQF_AST_USER_RESCHED, RQF_AST_USER_RESCHED); -ASSYM(RQF_AST_LWKT_RESCHED, RQF_AST_LWKT_RESCHED); -ASSYM(RQF_AST_MASK, RQF_AST_MASK); - -ASSYM(FIRST_SOFTINT, FIRST_SOFTINT); -ASSYM(MDGLOBALDATA_BASEALLOC_PAGES, MDGLOBALDATA_BASEALLOC_PAGES); - -ASSYM(GD_FPENDING, offsetof(struct mdglobaldata, gd_fpending)); -ASSYM(GD_IPENDING, offsetof(struct mdglobaldata, gd_ipending)); -ASSYM(GD_SPENDING, offsetof(struct mdglobaldata, gd_spending)); -ASSYM(GD_COMMON_TSS, offsetof(struct mdglobaldata, gd_common_tss)); -ASSYM(GD_COMMON_TSSD, offsetof(struct mdglobaldata, gd_common_tssd)); -ASSYM(GD_TSS_GDT, offsetof(struct mdglobaldata, gd_tss_gdt)); -ASSYM(GD_NPXTHREAD, offsetof(struct mdglobaldata, gd_npxthread)); -ASSYM(GD_FPU_LOCK, offsetof(struct mdglobaldata, gd_fpu_lock)); -ASSYM(GD_SAVEFPU, offsetof(struct mdglobaldata, gd_savefpu)); -ASSYM(GD_OTHER_CPUS, offsetof(struct mdglobaldata, mi.gd_other_cpus)); -ASSYM(GD_SS_EFLAGS, offsetof(struct mdglobaldata, gd_ss_eflags)); - -ASSYM(GD_CMAP1, offsetof(struct mdglobaldata, gd_CMAP1)); -ASSYM(GD_CMAP2, offsetof(struct mdglobaldata, gd_CMAP2)); -ASSYM(GD_CMAP3, offsetof(struct mdglobaldata, gd_CMAP3)); -ASSYM(GD_PMAP1, offsetof(struct mdglobaldata, gd_PMAP1)); -ASSYM(GD_CADDR1, offsetof(struct mdglobaldata, gd_CADDR1)); -ASSYM(GD_CADDR2, offsetof(struct mdglobaldata, gd_CADDR2)); -ASSYM(GD_CADDR3, offsetof(struct mdglobaldata, gd_CADDR3)); -ASSYM(GD_PADDR1, offsetof(struct mdglobaldata, gd_PADDR1)); - -ASSYM(PS_IDLESTACK, offsetof(struct privatespace, idlestack)); -ASSYM(PS_IDLESTACK_PAGE, offsetof(struct privatespace, idlestack) / PAGE_SIZE); -ASSYM(PS_IDLESTACK_TOP, sizeof(struct privatespace)); -ASSYM(PS_SIZEOF, sizeof(struct privatespace)); - diff --git a/sys/platform/vkernel/i386/global.s b/sys/platform/vkernel/i386/global.s deleted file mode 100644 index c81783fa79..0000000000 --- a/sys/platform/vkernel/i386/global.s +++ /dev/null @@ -1,97 +0,0 @@ -/*- - * Copyright (c) Peter Wemm - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD: src/sys/i386/i386/globals.s,v 1.13.2.1 2000/05/16 06:58:06 dillon Exp $ - * $DragonFly: src/sys/platform/vkernel/i386/global.s,v 1.2 2007/01/05 22:18:18 dillon Exp $ - */ - -#include -#include - -#include "assym.s" - - /* - * Define the layout of the per-cpu address space. This is - * "constructed" in locore.s on the BSP and in mp_machdep.c for - * each AP. DO NOT REORDER THESE WITHOUT UPDATING THE REST! - * - * On UP the per-cpu address space is simply placed in the data - * segment. - */ - .data - - .globl globaldata - .set globaldata,0 - - /* - * Define layout of the global data. On SMP this lives in - * the per-cpu address space, otherwise it's in the data segment. - */ - .globl gd_curthread, gd_npxthread, gd_reqflags, gd_common_tss - .set gd_curthread,globaldata + GD_CURTHREAD - .set gd_npxthread,globaldata + GD_NPXTHREAD - .set gd_reqflags,globaldata + GD_REQFLAGS - .set gd_common_tss,globaldata + GD_COMMON_TSS - - .globl gd_common_tssd, gd_tss_gdt - .set gd_common_tssd,globaldata + GD_COMMON_TSSD - .set gd_tss_gdt,globaldata + GD_TSS_GDT - - .globl gd_currentldt - .set gd_currentldt,globaldata + GD_CURRENTLDT - - .globl gd_fpu_lock, gd_savefpu - .set gd_fpu_lock, globaldata + GD_FPU_LOCK - .set gd_savefpu, globaldata + GD_SAVEFPU - - /* - * The BSP version of these get setup in locore.s and pmap.c, while - * the AP versions are setup in mp_machdep.c. - */ - .globl gd_cpuid, gd_cpumask, gd_other_cpus - .globl gd_ss_eflags, gd_intr_nesting_level - .globl gd_CMAP1, gd_CMAP2, gd_CMAP3, gd_PMAP1 - .globl gd_CADDR1, gd_CADDR2, gd_CADDR3, gd_PADDR1 - .globl gd_spending, gd_ipending, gd_fpending - .globl gd_cnt - - .set gd_cpuid,globaldata + GD_CPUID - .set gd_cpumask,globaldata + GD_CPUMASK - .set gd_other_cpus,globaldata + GD_OTHER_CPUS - .set gd_ss_eflags,globaldata + GD_SS_EFLAGS - .set gd_intr_nesting_level,globaldata + GD_INTR_NESTING_LEVEL - .set gd_CMAP1,globaldata + GD_PRV_CMAP1 - .set gd_CMAP2,globaldata + GD_PRV_CMAP2 - .set gd_CMAP3,globaldata + GD_PRV_CMAP3 - .set gd_PMAP1,globaldata + GD_PRV_PMAP1 - .set gd_CADDR1,globaldata + GD_PRV_CADDR1 - .set gd_CADDR2,globaldata + GD_PRV_CADDR2 - .set gd_CADDR3,globaldata + GD_PRV_CADDR3 - .set gd_PADDR1,globaldata + GD_PRV_PADDR1 - .set gd_fpending,globaldata + GD_FPENDING - .set gd_ipending,globaldata + GD_IPENDING - .set gd_spending,globaldata + GD_SPENDING - .set gd_cnt,globaldata + GD_CNT - diff --git a/sys/platform/vkernel/i386/locore.s b/sys/platform/vkernel/i386/locore.s deleted file mode 100644 index 4b3c30dad6..0000000000 --- a/sys/platform/vkernel/i386/locore.s +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Copyright (c) 2006 The DragonFly Project. All rights reserved. - * - * This code is derived from software contributed to The DragonFly Project - * by Matthew Dillon - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * 3. Neither the name of The DragonFly Project nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific, prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $DragonFly: src/sys/platform/vkernel/i386/locore.s,v 1.6 2007/01/08 03:33:43 dillon Exp $ - */ - -#include -#include -#include -#include "assym.s" - - .globl kernbase - .set kernbase,KERNBASE - - /* - * The sysinit and sysuninit sections have to be read-write data - * sections, not read-only data sections. - */ - .section set_sysinit_set, "aw" - .section set_sysuninit_set, "aw" - - .data - ALIGN_DATA /* just to be sure */ - - /* - * Normally the startup code would begin here, but this is a - * virtual kernel so we just have a main() in platform/init.c - */ - - .text - -/* - * Signal trampoline, copied to top of user stack - */ -NON_GPROF_ENTRY(sigcode) - call *SIGF_HANDLER(%esp) /* call signal handler */ - lea SIGF_UC(%esp),%eax /* get ucontext_t */ - pushl %eax -#if 0 - testl $PSL_VM,UC_EFLAGS(%eax) - jne 9f -#endif -#if 0 -9: -#endif - movl $SYS_sigreturn,%eax - pushl %eax /* junk to fake return addr. */ - int $0x80 /* enter kernel with args */ -0: jmp 0b - - ALIGN_TEXT -esigcode: - -/* void reset_dbregs() */ -ENTRY(reset_dbregs) - movl $0,%eax - movl %eax,%dr7 /* disable all breapoints first */ - movl %eax,%dr0 - movl %eax,%dr1 - movl %eax,%dr2 - movl %eax,%dr3 - movl %eax,%dr6 - ret - - .data - .globl szsigcode -szsigcode: - .long esigcode - sigcode - diff --git a/sys/platform/vkernel/i386/mp.c b/sys/platform/vkernel/i386/mp.c deleted file mode 100644 index 60ae4be2d3..0000000000 --- a/sys/platform/vkernel/i386/mp.c +++ /dev/null @@ -1,491 +0,0 @@ -/* - * Copyright (c) 2007 The DragonFly Project. All rights reserved. - * - * This code is derived from software contributed to The DragonFly Project - * by Matthew Dillon - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * 3. Neither the name of The DragonFly Project nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific, prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - - -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -extern pt_entry_t *KPTphys; - -volatile cpumask_t stopped_cpus; -cpumask_t smp_active_mask = 1; /* which cpus are ready for IPIs etc? */ -static int boot_address; -static cpumask_t smp_startup_mask = 1; /* which cpus have been started */ -int mp_naps; /* # of Applications processors */ -static int mp_finish; - -/* Local data for detecting CPU TOPOLOGY */ -static int core_bits = 0; -static int logical_CPU_bits = 0; - -/* function prototypes XXX these should go elsewhere */ -void bootstrap_idle(void); -void single_cpu_ipi(int, int, int); -void selected_cpu_ipi(cpumask_t, int, int); -#if 0 -void ipi_handler(int); -#endif - -pt_entry_t *SMPpt; - -/* AP uses this during bootstrap. Do not staticize. */ -char *bootSTK; -static int bootAP; - - -/* XXX these need to go into the appropriate header file */ -static int start_all_aps(u_int); -void init_secondary(void); -void *start_ap(void *); - -/* - * Get SMP fully working before we start initializing devices. - */ -static -void -ap_finish(void) -{ - mp_finish = 1; - if (bootverbose) - kprintf("Finish MP startup\n"); - - /* build our map of 'other' CPUs */ - mycpu->gd_other_cpus = smp_startup_mask; - CPUMASK_NANDBIT(mycpu->gd_other_cpus, mycpu->gd_cpuid); - - /* - * Let the other cpu's finish initializing and build their map - * of 'other' CPUs. - */ - rel_mplock(); - while (CPUMASK_CMPMASKNEQ(smp_active_mask, smp_startup_mask)) { - DELAY(100000); - cpu_lfence(); - } - - while (try_mplock() == 0) - DELAY(100000); - if (bootverbose) - kprintf("Active CPU Mask: %08x\n", smp_active_mask); -} - -SYSINIT(finishsmp, SI_BOOT2_FINISH_SMP, SI_ORDER_FIRST, ap_finish, NULL); - - -void * -start_ap(void *arg __unused) -{ - init_secondary(); - setrealcpu(); - bootstrap_idle(); - - return(NULL); /* NOTREACHED */ -} - -/* storage for AP thread IDs */ -pthread_t ap_tids[MAXCPU]; - -void -mp_start(void) -{ - int shift; - size_t ipiq_size; - - ncpus = optcpus; - - mp_naps = ncpus - 1; - - /* ncpus2 -- ncpus rounded down to the nearest power of 2 */ - for (shift = 0; (1 << shift) <= ncpus; ++shift) - ; - --shift; - ncpus2_shift = shift; - ncpus2 = 1 << shift; - ncpus2_mask = ncpus2 - 1; - - /* ncpus_fit -- ncpus rounded up to the nearest power of 2 */ - if ((1 << shift) < ncpus) - ++shift; - ncpus_fit = 1 << shift; - ncpus_fit_mask = ncpus_fit - 1; - - /* - * cpu0 initialization - */ - ipiq_size = sizeof(struct lwkt_ipiq) * ncpus; - mycpu->gd_ipiq = (void *)kmem_alloc(&kernel_map, ipiq_size); - bzero(mycpu->gd_ipiq, ipiq_size); - - /* - * cpu 1-(n-1) - */ - start_all_aps(boot_address); - -} - -void -mp_announce(void) -{ - int x; - - kprintf("DragonFly/MP: Multiprocessor\n"); - kprintf(" cpu0 (BSP)\n"); - - for (x = 1; x <= mp_naps; ++x) - kprintf(" cpu%d (AP)\n", x); -} - -void -cpu_send_ipiq(int dcpu) -{ - if (CPUMASK_TESTBIT(smp_active_mask, dcpu)) { - if (pthread_kill(ap_tids[dcpu], SIGUSR1) != 0) - panic("pthread_kill failed in cpu_send_ipiq"); - } -#if 0 - panic("XXX cpu_send_ipiq()"); -#endif -} - -void -smp_invltlb(void) -{ -} - -void -single_cpu_ipi(int cpu, int vector, int delivery_mode) -{ - kprintf("XXX single_cpu_ipi\n"); -} - -void -selected_cpu_ipi(cpumask_t target, int vector, int delivery_mode) -{ - crit_enter(); - while (CPUMASK_TESTNZERO(target)) { - int n = BSFCPUMASK(target); - CPUMASK_NANDBIT(target, n); - single_cpu_ipi(n, vector, delivery_mode); - } - crit_exit(); -} - -int -stop_cpus(cpumask_t map) -{ - CPUMASK_ANDMASK(map, smp_active_mask); - - crit_enter(); - while (CPUMASK_TESTNZERO(map)) { - int n = BSFCPUMASK(map); - CPUMASK_NANDBIT(map, n); - ATOMIC_CPUMASK_ORBIT(stopped_cpus, n); - if (pthread_kill(ap_tids[n], SIGXCPU) != 0) - panic("stop_cpus: pthread_kill failed"); - } - crit_exit(); -#if 0 - panic("XXX stop_cpus()"); -#endif - - return(1); -} - -int -restart_cpus(cpumask_t map) -{ - CPUMASK_ANDMASK(map, smp_active_mask); - - crit_enter(); - while (CPUMASK_TESTNZERO(map)) { - int n = BSFCPUMASK(map); - CPUMASK_NANDBIT(map, n); - ATOMIC_CPUMASK_NANDBIT(stopped_cpus, n); - if (pthread_kill(ap_tids[n], SIGXCPU) != 0) - panic("restart_cpus: pthread_kill failed"); - } - crit_exit(); -#if 0 - panic("XXX restart_cpus()"); -#endif - - return(1); -} - -void -ap_init(void) -{ - /* - * Adjust smp_startup_mask to signal the BSP that we have started - * up successfully. Note that we do not yet hold the BGL. The BSP - * is waiting for our signal. - * - * We can't set our bit in smp_active_mask yet because we are holding - * interrupts physically disabled and remote cpus could deadlock - * trying to send us an IPI. - */ - ATOMIC_CPUMASK_ORBIT(smp_startup_mask, mycpu->gd_cpuid); - cpu_mfence(); - - /* - * Interlock for finalization. Wait until mp_finish is non-zero, - * then get the MP lock. - * - * Note: We are in a critical section. - * - * Note: we are the idle thread, we can only spin. - * - * Note: The load fence is memory volatile and prevents the compiler - * from improperly caching mp_finish, and the cpu from improperly - * caching it. - */ - - while (mp_finish == 0) { - cpu_lfence(); - DELAY(500000); - } - while (try_mplock() == 0) - DELAY(100000); - - /* BSP may have changed PTD while we're waiting for the lock */ - cpu_invltlb(); - - /* Build our map of 'other' CPUs. */ - mycpu->gd_other_cpus = smp_startup_mask; - CPUMASK_NANDBIT(mycpu->gd_other_cpus, mycpu->gd_cpuid); - - kprintf("SMP: AP CPU #%d Launched!\n", mycpu->gd_cpuid); - - - /* Set memory range attributes for this CPU to match the BSP */ - mem_range_AP_init(); - /* - * Once we go active we must process any IPIQ messages that may - * have been queued, because no actual IPI will occur until we - * set our bit in the smp_active_mask. If we don't the IPI - * message interlock could be left set which would also prevent - * further IPIs. - * - * The idle loop doesn't expect the BGL to be held and while - * lwkt_switch() normally cleans things up this is a special case - * because we returning almost directly into the idle loop. - * - * The idle thread is never placed on the runq, make sure - * nothing we've done put it there. - */ - KKASSERT(get_mplock_count(curthread) == 1); - ATOMIC_CPUMASK_ORBIT(smp_active_mask, mycpu->gd_cpuid); - - mdcpu->gd_fpending = 0; - mdcpu->gd_ipending = 0; - initclocks_pcpu(); /* clock interrupts (via IPIs) */ - lwkt_process_ipiq(); - - /* - * Releasing the mp lock lets the BSP finish up the SMP init - */ - rel_mplock(); - KKASSERT((curthread->td_flags & TDF_RUNQ) == 0); -} - -void -init_secondary(void) -{ - int myid = bootAP; - struct mdglobaldata *md; - struct privatespace *ps; - - ps = &CPU_prvspace[myid]; - - KKASSERT(ps->mdglobaldata.mi.gd_prvspace == ps); - - /* - * Setup the %fs for cpu #n. The mycpu macro works after this - * point. Note that %gs is used by pthreads. - */ - tls_set_fs(&CPU_prvspace[myid], sizeof(struct privatespace)); - - md = mdcpu; /* loaded through %fs:0 (mdglobaldata.mi.gd_prvspace)*/ - - md->gd_common_tss.tss_esp0 = 0; /* not used until after switch */ - md->gd_common_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL); - md->gd_common_tss.tss_ioopt = (sizeof md->gd_common_tss) << 16; - - /* - * Set to a known state: - * Set by mpboot.s: CR0_PG, CR0_PE - * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM - */ -} - -static int -start_all_aps(u_int boot_addr) -{ - int x, i; - struct mdglobaldata *gd; - struct privatespace *ps; - vm_page_t m; - vm_offset_t va; - size_t ipiq_size; -#if 0 - struct lwp_params params; -#endif - - /* - * needed for ipis to initial thread - * FIXME: rename ap_tids? - */ - ap_tids[0] = pthread_self(); - - vm_object_hold(&kernel_object); - for (x = 1; x <= mp_naps; x++) - { - /* Allocate space for the CPU's private space. */ - for (i = 0; i < sizeof(struct mdglobaldata); i += PAGE_SIZE) { - va =(vm_offset_t)&CPU_prvspace[x].mdglobaldata + i; - m = vm_page_alloc(&kernel_object, va, VM_ALLOC_SYSTEM); - pmap_kenter_quick(va, m->phys_addr); - } - - for (i = 0; i < sizeof(CPU_prvspace[x].idlestack); i += PAGE_SIZE) { - va =(vm_offset_t)&CPU_prvspace[x].idlestack + i; - m = vm_page_alloc(&kernel_object, va, VM_ALLOC_SYSTEM); - pmap_kenter_quick(va, m->phys_addr); - } - - gd = &CPU_prvspace[x].mdglobaldata; /* official location */ - bzero(gd, sizeof(*gd)); - gd->mi.gd_prvspace = ps = &CPU_prvspace[x]; - - /* prime data page for it to use */ - mi_gdinit(&gd->mi, x); - cpu_gdinit(gd, x); - -#if 0 - gd->gd_CMAP1 = pmap_kpte((vm_offset_t)CPU_prvspace[x].CPAGE1); - gd->gd_CMAP2 = pmap_kpte((vm_offset_t)CPU_prvspace[x].CPAGE2); - gd->gd_CMAP3 = pmap_kpte((vm_offset_t)CPU_prvspace[x].CPAGE3); - gd->gd_PMAP1 = pmap_kpte((vm_offset_t)CPU_prvspace[x].PPAGE1); - gd->gd_CADDR1 = ps->CPAGE1; - gd->gd_CADDR2 = ps->CPAGE2; - gd->gd_CADDR3 = ps->CPAGE3; - gd->gd_PADDR1 = (vpte_t *)ps->PPAGE1; -#endif - - ipiq_size = sizeof(struct lwkt_ipiq) * (mp_naps + 1); - gd->mi.gd_ipiq = (void *)kmem_alloc(&kernel_map, ipiq_size); - bzero(gd->mi.gd_ipiq, ipiq_size); - - /* - * Setup the AP boot stack - */ - bootSTK = &ps->idlestack[UPAGES*PAGE_SIZE/2]; - bootAP = x; - - /* - * Setup the AP's lwp, this is the 'cpu' - * - * We have to make sure our signals are masked or the new LWP - * may pick up a signal that it isn't ready for yet. SMP - * startup occurs after SI_BOOT2_LEAVE_CRIT so interrupts - * have already been enabled. - */ - cpu_disable_intr(); - pthread_create(&ap_tids[x], NULL, start_ap, NULL); - cpu_enable_intr(); - - while (CPUMASK_TESTBIT(smp_startup_mask, x) == 0) { - cpu_lfence(); /* XXX spin until the AP has started */ - DELAY(1000); - } - } - vm_object_drop(&kernel_object); - - return(ncpus - 1); -} - -/* - * CPU TOPOLOGY DETECTION FUNCTIONS. - */ - -void -detect_cpu_topology(void) -{ - logical_CPU_bits = vkernel_b_arg; - core_bits = vkernel_B_arg; -} - -int -get_chip_ID(int cpuid) -{ - return get_apicid_from_cpuid(cpuid) >> - (logical_CPU_bits + core_bits); -} - -int -get_core_number_within_chip(int cpuid) -{ - return (get_apicid_from_cpuid(cpuid) >> logical_CPU_bits) & - ( (1 << core_bits) -1); -} - -int -get_logical_CPU_number_within_core(int cpuid) -{ - return get_apicid_from_cpuid(cpuid) & - ( (1 << logical_CPU_bits) -1); -} diff --git a/sys/platform/vkernel/i386/npx.c b/sys/platform/vkernel/i386/npx.c deleted file mode 100644 index a19218c9d6..0000000000 --- a/sys/platform/vkernel/i386/npx.c +++ /dev/null @@ -1,711 +0,0 @@ -/* - * Copyright (c) 2006 The DragonFly Project. All rights reserved. - * Copyright (c) 1990 William Jolitz. - * Copyright (c) 1991 The Regents of the University of California. - * All rights reserved. - * - * This code is derived from software contributed to The DragonFly Project - * by Matthew Dillon - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * 3. Neither the name of The DragonFly Project nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific, prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * from: @(#)npx.c 7.2 (Berkeley) 5/12/91 - * $FreeBSD: src/sys/i386/isa/npx.c,v 1.80.2.3 2001/10/20 19:04:38 tegge Exp $ - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#define fldcw(addr) __asm("fldcw %0" : : "m" (*(addr))) -#define fnclex() __asm("fnclex") -#define fninit() __asm("fninit") -#define fnop() __asm("fnop") -#define fnsave(addr) __asm __volatile("fnsave %0" : "=m" (*(addr))) -#define fnstcw(addr) __asm __volatile("fnstcw %0" : "=m" (*(addr))) -#define fnstsw(addr) __asm __volatile("fnstsw %0" : "=m" (*(addr))) -#define frstor(addr) __asm("frstor %0" : : "m" (*(addr))) -#ifndef CPU_DISABLE_SSE -#define fxrstor(addr) __asm("fxrstor %0" : : "m" (*(addr))) -#define fxsave(addr) __asm __volatile("fxsave %0" : "=m" (*(addr))) -#endif - -typedef u_char bool_t; -#ifndef CPU_DISABLE_SSE -static void fpu_clean_state(void); -#endif - -int cpu_fxsr = 0; - -static struct krate badfprate = { 1 }; - -/*static int npx_attach (device_t dev);*/ -static void fpusave (union savefpu *); -static void fpurstor (union savefpu *); - -uint32_t npx_mxcsr_mask = 0xFFBF; /* this is the default */ - -#if (defined(I586_CPU) || defined(I686_CPU)) && !defined(CPU_DISABLE_SSE) -int mmxopt = 1; -SYSCTL_INT(_kern, OID_AUTO, mmxopt, CTLFLAG_RD, &mmxopt, 0, - "MMX/XMM optimized bcopy/copyin/copyout support"); -#endif - -static int hw_instruction_sse; -SYSCTL_INT(_hw, OID_AUTO, instruction_sse, CTLFLAG_RD, - &hw_instruction_sse, 0, "SIMD/MMX2 instructions available in CPU"); - -#if 0 -/* - * Attach routine - announce which it is, and wire into system - */ -int -npx_attach(device_t dev) -{ - npxinit(); - return (0); -} -#endif - -/* - * Probe the npx_mxcsr_mask - */ -void npxprobemask(void) -{ - static union savefpu dummy __aligned(64); - - crit_enter(); - /*stop_emulating();*/ - fxsave(&dummy); - npx_mxcsr_mask = ((uint32_t *)&dummy)[7]; - /*start_emulating();*/ - crit_exit(); -} - -void -init_fpu(int supports_sse) -{ - cpu_fxsr = hw_instruction_sse = supports_sse; - if(supports_sse) { - npxprobemask(); - } -} - -/* - * Initialize the floating point unit. - */ -void npxinit(void) -{ - static union savefpu dummy __aligned(16); - u_short control = __INITIAL_NPXCW__; - - /* - * fninit has the same h/w bugs as fnsave. Use the detoxified - * fnsave to throw away any junk in the fpu. npxsave() initializes - * the fpu and sets npxthread = NULL as important side effects. - */ - npxsave(&dummy); - crit_enter(); - /*stop_emulating();*/ - fldcw(&control); - fpusave(curthread->td_savefpu); - mdcpu->gd_npxthread = NULL; - /*start_emulating();*/ - crit_exit(); -} - -/* - * Free coprocessor (if we have it). - */ -void -npxexit(void) -{ - if (curthread == mdcpu->gd_npxthread) - npxsave(curthread->td_savefpu); -} - -#if 0 -/* - * The following mechanism is used to ensure that the FPE_... value - * that is passed as a trapcode to the signal handler of the user - * process does not have more than one bit set. - * - * Multiple bits may be set if the user process modifies the control - * word while a status word bit is already set. While this is a sign - * of bad coding, we have no choise than to narrow them down to one - * bit, since we must not send a trapcode that is not exactly one of - * the FPE_ macros. - * - * The mechanism has a static table with 127 entries. Each combination - * of the 7 FPU status word exception bits directly translates to a - * position in this table, where a single FPE_... value is stored. - * This FPE_... value stored there is considered the "most important" - * of the exception bits and will be sent as the signal code. The - * precedence of the bits is based upon Intel Document "Numerical - * Applications", Chapter "Special Computational Situations". - * - * The macro to choose one of these values does these steps: 1) Throw - * away status word bits that cannot be masked. 2) Throw away the bits - * currently masked in the control word, assuming the user isn't - * interested in them anymore. 3) Reinsert status word bit 7 (stack - * fault) if it is set, which cannot be masked but must be presered. - * 4) Use the remaining bits to point into the trapcode table. - * - * The 6 maskable bits in order of their preference, as stated in the - * above referenced Intel manual: - * 1 Invalid operation (FP_X_INV) - * 1a Stack underflow - * 1b Stack overflow - * 1c Operand of unsupported format - * 1d SNaN operand. - * 2 QNaN operand (not an exception, irrelavant here) - * 3 Any other invalid-operation not mentioned above or zero divide - * (FP_X_INV, FP_X_DZ) - * 4 Denormal operand (FP_X_DNML) - * 5 Numeric over/underflow (FP_X_OFL, FP_X_UFL) - * 6 Inexact result (FP_X_IMP) - */ -static char fpetable[128] = { - 0, - FPE_FLTINV, /* 1 - INV */ - FPE_FLTUND, /* 2 - DNML */ - FPE_FLTINV, /* 3 - INV | DNML */ - FPE_FLTDIV, /* 4 - DZ */ - FPE_FLTINV, /* 5 - INV | DZ */ - FPE_FLTDIV, /* 6 - DNML | DZ */ - FPE_FLTINV, /* 7 - INV | DNML | DZ */ - FPE_FLTOVF, /* 8 - OFL */ - FPE_FLTINV, /* 9 - INV | OFL */ - FPE_FLTUND, /* A - DNML | OFL */ - FPE_FLTINV, /* B - INV | DNML | OFL */ - FPE_FLTDIV, /* C - DZ | OFL */ - FPE_FLTINV, /* D - INV | DZ | OFL */ - FPE_FLTDIV, /* E - DNML | DZ | OFL */ - FPE_FLTINV, /* F - INV | DNML | DZ | OFL */ - FPE_FLTUND, /* 10 - UFL */ - FPE_FLTINV, /* 11 - INV | UFL */ - FPE_FLTUND, /* 12 - DNML | UFL */ - FPE_FLTINV, /* 13 - INV | DNML | UFL */ - FPE_FLTDIV, /* 14 - DZ | UFL */ - FPE_FLTINV, /* 15 - INV | DZ | UFL */ - FPE_FLTDIV, /* 16 - DNML | DZ | UFL */ - FPE_FLTINV, /* 17 - INV | DNML | DZ | UFL */ - FPE_FLTOVF, /* 18 - OFL | UFL */ - FPE_FLTINV, /* 19 - INV | OFL | UFL */ - FPE_FLTUND, /* 1A - DNML | OFL | UFL */ - FPE_FLTINV, /* 1B - INV | DNML | OFL | UFL */ - FPE_FLTDIV, /* 1C - DZ | OFL | UFL */ - FPE_FLTINV, /* 1D - INV | DZ | OFL | UFL */ - FPE_FLTDIV, /* 1E - DNML | DZ | OFL | UFL */ - FPE_FLTINV, /* 1F - INV | DNML | DZ | OFL | UFL */ - FPE_FLTRES, /* 20 - IMP */ - FPE_FLTINV, /* 21 - INV | IMP */ - FPE_FLTUND, /* 22 - DNML | IMP */ - FPE_FLTINV, /* 23 - INV | DNML | IMP */ - FPE_FLTDIV, /* 24 - DZ | IMP */ - FPE_FLTINV, /* 25 - INV | DZ | IMP */ - FPE_FLTDIV, /* 26 - DNML | DZ | IMP */ - FPE_FLTINV, /* 27 - INV | DNML | DZ | IMP */ - FPE_FLTOVF, /* 28 - OFL | IMP */ - FPE_FLTINV, /* 29 - INV | OFL | IMP */ - FPE_FLTUND, /* 2A - DNML | OFL | IMP */ - FPE_FLTINV, /* 2B - INV | DNML | OFL | IMP */ - FPE_FLTDIV, /* 2C - DZ | OFL | IMP */ - FPE_FLTINV, /* 2D - INV | DZ | OFL | IMP */ - FPE_FLTDIV, /* 2E - DNML | DZ | OFL | IMP */ - FPE_FLTINV, /* 2F - INV | DNML | DZ | OFL | IMP */ - FPE_FLTUND, /* 30 - UFL | IMP */ - FPE_FLTINV, /* 31 - INV | UFL | IMP */ - FPE_FLTUND, /* 32 - DNML | UFL | IMP */ - FPE_FLTINV, /* 33 - INV | DNML | UFL | IMP */ - FPE_FLTDIV, /* 34 - DZ | UFL | IMP */ - FPE_FLTINV, /* 35 - INV | DZ | UFL | IMP */ - FPE_FLTDIV, /* 36 - DNML | DZ | UFL | IMP */ - FPE_FLTINV, /* 37 - INV | DNML | DZ | UFL | IMP */ - FPE_FLTOVF, /* 38 - OFL | UFL | IMP */ - FPE_FLTINV, /* 39 - INV | OFL | UFL | IMP */ - FPE_FLTUND, /* 3A - DNML | OFL | UFL | IMP */ - FPE_FLTINV, /* 3B - INV | DNML | OFL | UFL | IMP */ - FPE_FLTDIV, /* 3C - DZ | OFL | UFL | IMP */ - FPE_FLTINV, /* 3D - INV | DZ | OFL | UFL | IMP */ - FPE_FLTDIV, /* 3E - DNML | DZ | OFL | UFL | IMP */ - FPE_FLTINV, /* 3F - INV | DNML | DZ | OFL | UFL | IMP */ - FPE_FLTSUB, /* 40 - STK */ - FPE_FLTSUB, /* 41 - INV | STK */ - FPE_FLTUND, /* 42 - DNML | STK */ - FPE_FLTSUB, /* 43 - INV | DNML | STK */ - FPE_FLTDIV, /* 44 - DZ | STK */ - FPE_FLTSUB, /* 45 - INV | DZ | STK */ - FPE_FLTDIV, /* 46 - DNML | DZ | STK */ - FPE_FLTSUB, /* 47 - INV | DNML | DZ | STK */ - FPE_FLTOVF, /* 48 - OFL | STK */ - FPE_FLTSUB, /* 49 - INV | OFL | STK */ - FPE_FLTUND, /* 4A - DNML | OFL | STK */ - FPE_FLTSUB, /* 4B - INV | DNML | OFL | STK */ - FPE_FLTDIV, /* 4C - DZ | OFL | STK */ - FPE_FLTSUB, /* 4D - INV | DZ | OFL | STK */ - FPE_FLTDIV, /* 4E - DNML | DZ | OFL | STK */ - FPE_FLTSUB, /* 4F - INV | DNML | DZ | OFL | STK */ - FPE_FLTUND, /* 50 - UFL | STK */ - FPE_FLTSUB, /* 51 - INV | UFL | STK */ - FPE_FLTUND, /* 52 - DNML | UFL | STK */ - FPE_FLTSUB, /* 53 - INV | DNML | UFL | STK */ - FPE_FLTDIV, /* 54 - DZ | UFL | STK */ - FPE_FLTSUB, /* 55 - INV | DZ | UFL | STK */ - FPE_FLTDIV, /* 56 - DNML | DZ | UFL | STK */ - FPE_FLTSUB, /* 57 - INV | DNML | DZ | UFL | STK */ - FPE_FLTOVF, /* 58 - OFL | UFL | STK */ - FPE_FLTSUB, /* 59 - INV | OFL | UFL | STK */ - FPE_FLTUND, /* 5A - DNML | OFL | UFL | STK */ - FPE_FLTSUB, /* 5B - INV | DNML | OFL | UFL | STK */ - FPE_FLTDIV, /* 5C - DZ | OFL | UFL | STK */ - FPE_FLTSUB, /* 5D - INV | DZ | OFL | UFL | STK */ - FPE_FLTDIV, /* 5E - DNML | DZ | OFL | UFL | STK */ - FPE_FLTSUB, /* 5F - INV | DNML | DZ | OFL | UFL | STK */ - FPE_FLTRES, /* 60 - IMP | STK */ - FPE_FLTSUB, /* 61 - INV | IMP | STK */ - FPE_FLTUND, /* 62 - DNML | IMP | STK */ - FPE_FLTSUB, /* 63 - INV | DNML | IMP | STK */ - FPE_FLTDIV, /* 64 - DZ | IMP | STK */ - FPE_FLTSUB, /* 65 - INV | DZ | IMP | STK */ - FPE_FLTDIV, /* 66 - DNML | DZ | IMP | STK */ - FPE_FLTSUB, /* 67 - INV | DNML | DZ | IMP | STK */ - FPE_FLTOVF, /* 68 - OFL | IMP | STK */ - FPE_FLTSUB, /* 69 - INV | OFL | IMP | STK */ - FPE_FLTUND, /* 6A - DNML | OFL | IMP | STK */ - FPE_FLTSUB, /* 6B - INV | DNML | OFL | IMP | STK */ - FPE_FLTDIV, /* 6C - DZ | OFL | IMP | STK */ - FPE_FLTSUB, /* 6D - INV | DZ | OFL | IMP | STK */ - FPE_FLTDIV, /* 6E - DNML | DZ | OFL | IMP | STK */ - FPE_FLTSUB, /* 6F - INV | DNML | DZ | OFL | IMP | STK */ - FPE_FLTUND, /* 70 - UFL | IMP | STK */ - FPE_FLTSUB, /* 71 - INV | UFL | IMP | STK */ - FPE_FLTUND, /* 72 - DNML | UFL | IMP | STK */ - FPE_FLTSUB, /* 73 - INV | DNML | UFL | IMP | STK */ - FPE_FLTDIV, /* 74 - DZ | UFL | IMP | STK */ - FPE_FLTSUB, /* 75 - INV | DZ | UFL | IMP | STK */ - FPE_FLTDIV, /* 76 - DNML | DZ | UFL | IMP | STK */ - FPE_FLTSUB, /* 77 - INV | DNML | DZ | UFL | IMP | STK */ - FPE_FLTOVF, /* 78 - OFL | UFL | IMP | STK */ - FPE_FLTSUB, /* 79 - INV | OFL | UFL | IMP | STK */ - FPE_FLTUND, /* 7A - DNML | OFL | UFL | IMP | STK */ - FPE_FLTSUB, /* 7B - INV | DNML | OFL | UFL | IMP | STK */ - FPE_FLTDIV, /* 7C - DZ | OFL | UFL | IMP | STK */ - FPE_FLTSUB, /* 7D - INV | DZ | OFL | UFL | IMP | STK */ - FPE_FLTDIV, /* 7E - DNML | DZ | OFL | UFL | IMP | STK */ - FPE_FLTSUB, /* 7F - INV | DNML | DZ | OFL | UFL | IMP | STK */ -}; -#endif - -#if 0 - -/* - * Preserve the FP status word, clear FP exceptions, then generate a SIGFPE. - * - * Clearing exceptions is necessary mainly to avoid IRQ13 bugs. We now - * depend on longjmp() restoring a usable state. Restoring the state - * or examining it might fail if we didn't clear exceptions. - * - * The error code chosen will be one of the FPE_... macros. It will be - * sent as the second argument to old BSD-style signal handlers and as - * "siginfo_t->si_code" (second argument) to SA_SIGINFO signal handlers. - * - * XXX the FP state is not preserved across signal handlers. So signal - * handlers cannot afford to do FP unless they preserve the state or - * longjmp() out. Both preserving the state and longjmp()ing may be - * destroyed by IRQ13 bugs. Clearing FP exceptions is not an acceptable - * solution for signals other than SIGFPE. - * - * The MP lock is not held on entry (see i386/i386/exception.s) and - * should not be held on exit. Interrupts are enabled. We must enter - * a critical section to stabilize the FP system and prevent an interrupt - * or preemption from changing the FP state out from under us. - */ -void -npx_intr(void *dummy) -{ - int code; - u_short control; - u_short status; - struct intrframe *frame; - - crit_enter(); - - /* - * This exception can only occur with CR0_TS clear, otherwise we - * would get a DNA exception. However, since interrupts were - * enabled a preemption could have sneaked in and used the FP system - * before we entered our critical section. If that occured, the - * TS bit will be set and npxthread will be NULL. - */ - panic("npx_intr: not coded"); - /* XXX FP STATE FLAG MUST BE PART OF CONTEXT SUPPLIED BY REAL KERNEL */ -#if 0 - if (rcr0() & CR0_TS) { - KASSERT(mdcpu->gd_npxthread == NULL, ("gd_npxthread was %p with TS set!", mdcpu->gd_npxthread)); - npxdna(); - crit_exit(); - return; - } -#endif - if (mdcpu->gd_npxthread == NULL) { - get_mplock(); - kprintf("npxintr: npxthread = %p, curthread = %p\n", - mdcpu->gd_npxthread, curthread); - panic("npxintr from nowhere"); - } - if (mdcpu->gd_npxthread != curthread) { - get_mplock(); - kprintf("npxintr: npxthread = %p, curthread = %p\n", - mdcpu->gd_npxthread, curthread); - panic("npxintr from non-current process"); - } - - outb(0xf0, 0); - fnstsw(&status); - fnstcw(&control); - fnclex(); - - get_mplock(); - - /* - * Pass exception to process. - */ - frame = (struct intrframe *)&dummy; /* XXX */ - if ((ISPL(frame->if_cs) == SEL_UPL) /*||(frame->if_eflags&PSL_VM)*/) { - /* - * Interrupt is essentially a trap, so we can afford to call - * the SIGFPE handler (if any) as soon as the interrupt - * returns. - * - * XXX little or nothing is gained from this, and plenty is - * lost - the interrupt frame has to contain the trap frame - * (this is otherwise only necessary for the rescheduling trap - * in doreti, and the frame for that could easily be set up - * just before it is used). - */ - curthread->td_lwp->lwp_md.md_regs = INTR_TO_TRAPFRAME(frame); - /* - * Encode the appropriate code for detailed information on - * this exception. - */ - code = - fpetable[(status & ~control & 0x3f) | (status & 0x40)]; - trapsignal(curthread->td_lwp, SIGFPE, code); - } else { - /* - * Nested interrupt. These losers occur when: - * o an IRQ13 is bogusly generated at a bogus time, e.g.: - * o immediately after an fnsave or frstor of an - * error state. - * o a couple of 386 instructions after - * "fstpl _memvar" causes a stack overflow. - * These are especially nasty when combined with a - * trace trap. - * o an IRQ13 occurs at the same time as another higher- - * priority interrupt. - * - * Treat them like a true async interrupt. - */ - lwpsignal(curproc, curthread->td_lwp, SIGFPE); - } - rel_mplock(); - crit_exit(); -} - -#endif - -/* - * Implement the device not available (DNA) exception. gd_npxthread had - * better be NULL. Restore the current thread's FP state and set gd_npxthread - * to curthread. - * - * Interrupts are enabled and preemption can occur. Enter a critical - * section to stabilize the FP state. - */ -int -npxdna(struct trapframe *frame) -{ - thread_t td = curthread; - int didinit = 0; - - if (mdcpu->gd_npxthread != NULL) { - kprintf("npxdna: npxthread = %p, curthread = %p\n", - mdcpu->gd_npxthread, td); - panic("npxdna"); - } - - /* - * Setup the initial saved state if the thread has never before - * used the FP unit. This also occurs when a thread pushes a - * signal handler and uses FP in the handler. - */ - if ((curthread->td_flags & TDF_USINGFP) == 0) { - curthread->td_flags |= TDF_USINGFP; - npxinit(); - didinit = 1; - } - - /* - * The setting of gd_npxthread and the call to fpurstor() must not - * be preempted by an interrupt thread or we will take an npxdna - * trap and potentially save our current fpstate (which is garbage) - * and then restore the garbage rather then the originally saved - * fpstate. - */ - crit_enter(); - /*stop_emulating();*/ - /* - * Record new context early in case frstor causes an IRQ13. - */ - mdcpu->gd_npxthread = td; - /* - * The following frstor may cause an IRQ13 when the state being - * restored has a pending error. The error will appear to have been - * triggered by the current (npx) user instruction even when that - * instruction is a no-wait instruction that should not trigger an - * error (e.g., fnclex). On at least one 486 system all of the - * no-wait instructions are broken the same as frstor, so our - * treatment does not amplify the breakage. On at least one - * 386/Cyrix 387 system, fnclex works correctly while frstor and - * fnsave are broken, so our treatment breaks fnclex if it is the - * first FPU instruction after a context switch. - */ - if ((td->td_savefpu->sv_xmm.sv_env.en_mxcsr & ~0xFFBF) && cpu_fxsr) { - krateprintf(&badfprate, - "FXRSTR: illegal FP MXCSR %08x didinit = %d\n", - td->td_savefpu->sv_xmm.sv_env.en_mxcsr, didinit); - td->td_savefpu->sv_xmm.sv_env.en_mxcsr &= 0xFFBF; - lwpsignal(curproc, curthread->td_lwp, SIGFPE); - } - fpurstor(curthread->td_savefpu); - crit_exit(); - - return (1); -} - -/* - * Wrapper for the fnsave instruction to handle h/w bugs. If there is an error - * pending, then fnsave generates a bogus IRQ13 on some systems. Force - * any IRQ13 to be handled immediately, and then ignore it. This routine is - * often called at splhigh so it must not use many system services. In - * particular, it's much easier to install a special handler than to - * guarantee that it's safe to use npxintr() and its supporting code. - * - * WARNING! This call is made during a switch and the MP lock will be - * setup for the new target thread rather then the current thread, so we - * cannot do anything here that depends on the *_mplock() functions as - * we may trip over their assertions. - * - * WARNING! When using fxsave we MUST fninit after saving the FP state. The - * kernel will always assume that the FP state is 'safe' (will not cause - * exceptions) for mmx/xmm use if npxthread is NULL. The kernel must still - * setup a custom save area before actually using the FP unit, but it will - * not bother calling fninit. This greatly improves kernel performance when - * it wishes to use the FP unit. - */ -void -npxsave(union savefpu *addr) -{ - crit_enter(); - /*stop_emulating();*/ - fpusave(addr); - mdcpu->gd_npxthread = NULL; - fninit(); - /*start_emulating();*/ - crit_exit(); -} - -static void -fpusave(union savefpu *addr) -{ - if (cpu_fxsr) - fxsave(addr); - else - fnsave(addr); -} - -/* - * Save the FP state to the mcontext structure. - * - * WARNING: If you want to try to npxsave() directly to mctx->mc_fpregs, - * then it MUST be 16-byte aligned. Currently this is not guarenteed. - */ -void -npxpush(mcontext_t *mctx) -{ - thread_t td = curthread; - - if (td->td_flags & TDF_USINGFP) { - if (mdcpu->gd_npxthread == td) { - /* - * XXX Note: This is a bit inefficient if the signal - * handler uses floating point, extra faults will - * occur. - */ - mctx->mc_ownedfp = _MC_FPOWNED_FPU; - npxsave(td->td_savefpu); - } else { - mctx->mc_ownedfp = _MC_FPOWNED_PCB; - } - bcopy(td->td_savefpu, mctx->mc_fpregs, sizeof(mctx->mc_fpregs)); - td->td_flags &= ~TDF_USINGFP; - mctx->mc_fpformat = -#ifndef CPU_DISABLE_SSE - (cpu_fxsr) ? _MC_FPFMT_XMM : -#endif - _MC_FPFMT_387; - } else { - mctx->mc_ownedfp = _MC_FPOWNED_NONE; - mctx->mc_fpformat = _MC_FPFMT_NODEV; - } -} - -/* - * Restore the FP state from the mcontext structure. - */ -void -npxpop(mcontext_t *mctx) -{ - thread_t td = curthread; - - switch(mctx->mc_ownedfp) { - case _MC_FPOWNED_NONE: - /* - * If the signal handler used the FP unit but the interrupted - * code did not, release the FP unit. Clear TDF_USINGFP will - * force the FP unit to reinit so the interrupted code sees - * a clean slate. - */ - if (td->td_flags & TDF_USINGFP) { - if (td == mdcpu->gd_npxthread) - npxsave(td->td_savefpu); - td->td_flags &= ~TDF_USINGFP; - } - break; - case _MC_FPOWNED_FPU: - case _MC_FPOWNED_PCB: - /* - * Clear ownership of the FP unit and restore our saved state. - * - * NOTE: The signal handler may have set-up some FP state and - * enabled the FP unit, so we have to restore no matter what. - * - * XXX: This is bit inefficient, if the code being returned - * to is actively using the FP this results in multiple - * kernel faults. - * - * WARNING: The saved state was exposed to userland and may - * have to be sanitized to avoid a GP fault in the kernel. - */ - if (td == mdcpu->gd_npxthread) - npxsave(td->td_savefpu); - bcopy(mctx->mc_fpregs, td->td_savefpu, sizeof(*td->td_savefpu)); - if ((td->td_savefpu->sv_xmm.sv_env.en_mxcsr & ~0xFFBF) && - cpu_fxsr) { - krateprintf(&badfprate, - "pid %d (%s) signal return from user: " - "illegal FP MXCSR %08x\n", - td->td_proc->p_pid, - td->td_proc->p_comm, - td->td_savefpu->sv_xmm.sv_env.en_mxcsr); - td->td_savefpu->sv_xmm.sv_env.en_mxcsr &= 0xFFBF; - } - td->td_flags |= TDF_USINGFP; - break; - } -} - - -#ifndef CPU_DISABLE_SSE -/* - * On AuthenticAMD processors, the fxrstor instruction does not restore - * the x87's stored last instruction pointer, last data pointer, and last - * opcode values, except in the rare case in which the exception summary - * (ES) bit in the x87 status word is set to 1. - * - * In order to avoid leaking this information across processes, we clean - * these values by performing a dummy load before executing fxrstor(). - */ -static double dummy_variable = 0.0; -static void -fpu_clean_state(void) -{ - u_short status; - - /* - * Clear the ES bit in the x87 status word if it is currently - * set, in order to avoid causing a fault in the upcoming load. - */ - fnstsw(&status); - if (status & 0x80) - fnclex(); - - /* - * Load the dummy variable into the x87 stack. This mangles - * the x87 stack, but we don't care since we're about to call - * fxrstor() anyway. - */ - __asm __volatile("ffree %%st(7); fld %0" : : "m" (dummy_variable)); -} -#endif /* CPU_DISABLE_SSE */ - -static void -fpurstor(union savefpu *addr) -{ -#ifndef CPU_DISABLE_SSE - if (cpu_fxsr) { - fpu_clean_state(); - fxrstor(addr); - } else { - frstor(addr); - } -#else - frstor(addr); -#endif -} - diff --git a/sys/platform/vkernel/i386/procfs_machdep.c b/sys/platform/vkernel/i386/procfs_machdep.c deleted file mode 100644 index f929baf63e..0000000000 --- a/sys/platform/vkernel/i386/procfs_machdep.c +++ /dev/null @@ -1,129 +0,0 @@ -/* - * Copyright (c) 1993 - * The Regents of the University of California. All rights reserved. - * Copyright (c) 1993 Jan-Simon Pendry - * - * This code is derived from software contributed to Berkeley by - * Jan-Simon Pendry. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)procfs_machdep.c 8.3 (Berkeley) 1/27/94 - * - * From: - * $FreeBSD: src/sys/i386/i386/procfs_machdep.c,v 1.14 1999/10/11 14:50:03 peter Exp $ - * $DragonFly: src/sys/platform/vkernel/i386/procfs_machdep.c,v 1.3 2007/02/19 01:14:23 corecode Exp $ - */ - -/* - * Functions to be implemented here are: - * - * procfs_read_regs(lwp, regs) - * Get the current user-visible register set from the process - * and copy it into the regs structure (). - * The process is stopped at the time read_regs is called. - * - * procfs_write_regs(lwp, regs) - * Update the current register set from the passed in regs - * structure. Take care to avoid clobbering special CPU - * registers or privileged bits in the PSL. - * Depending on the architecture this may have fix-up work to do, - * especially if the IAR or PCW are modified. - * The process is stopped at the time write_regs is called. - * - * procfs_read_fpregs, procfs_write_fpregs - * deal with the floating point register set, otherwise as above. - * - * procfs_read_dbregs, procfs_write_dbregs - * deal with the processor debug register set, otherwise as above. - * - * procfs_sstep(lwp) - * Arrange for the process to trap after executing a single instruction. - * - */ - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -int -procfs_read_regs(struct lwp *lp, struct reg *regs) -{ - return (fill_regs(lp, regs)); -} - -int -procfs_write_regs(struct lwp *lp, struct reg *regs) -{ - return (set_regs(lp, regs)); -} - -int -procfs_read_dbregs(struct lwp *lp, struct dbreg *dbregs) -{ - return (fill_dbregs(lp, dbregs)); -} - -int -procfs_write_dbregs(struct lwp *lp, struct dbreg *dbregs) -{ - return (set_dbregs(lp, dbregs)); -} - -/* - * Ptrace doesn't support fpregs at all, and there are no security holes - * or translations for fpregs, so we can just copy them. - */ - -int -procfs_read_fpregs(struct lwp *lp, struct fpreg *fpregs) -{ - return (fill_fpregs(lp, fpregs)); -} - -int -procfs_write_fpregs(struct lwp *lp, struct fpreg *fpregs) -{ - return (set_fpregs(lp, fpregs)); -} - -int -procfs_sstep(struct lwp *lp) -{ - return (ptrace_single_step(lp)); -} diff --git a/sys/platform/vkernel/i386/swtch.s b/sys/platform/vkernel/i386/swtch.s deleted file mode 100644 index 1201b7ad7f..0000000000 --- a/sys/platform/vkernel/i386/swtch.s +++ /dev/null @@ -1,625 +0,0 @@ -/* - * Copyright (c) 2003,2004 The DragonFly Project. All rights reserved. - * - * This code is derived from software contributed to The DragonFly Project - * by Matthew Dillon - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * 3. Neither the name of The DragonFly Project nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific, prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * Copyright (c) 1990 The Regents of the University of California. - * All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * William Jolitz. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD: src/sys/i386/i386/swtch.s,v 1.89.2.10 2003/01/23 03:36:24 ps Exp $ - */ - -#include "use_npx.h" - -#include - -#include -#include - -#include -#include - -#include "assym.s" - -#define MPLOCKED lock ; - - .data - - .globl panic - .globl lwkt_switch_return - -#if defined(SWTCH_OPTIM_STATS) - .globl swtch_optim_stats, tlb_flush_count -swtch_optim_stats: .long 0 /* number of _swtch_optims */ -tlb_flush_count: .long 0 -#endif - - .text - - -/* - * cpu_heavy_switch(next_thread) - * - * Switch from the current thread to a new thread. This entry - * is normally called via the thread->td_switch function, and will - * only be called when the current thread is a heavy weight process. - * - * Some instructions have been reordered to reduce pipeline stalls. - * - * YYY disable interrupts once giant is removed. - */ -ENTRY(cpu_heavy_switch) - /* - * Save general regs - */ - movl PCPU(curthread),%ecx - movl (%esp),%eax /* (reorder optimization) */ - movl TD_PCB(%ecx),%edx /* EDX = PCB */ - movl %eax,PCB_EIP(%edx) /* return PC may be modified */ - movl %ebx,PCB_EBX(%edx) - movl %esp,PCB_ESP(%edx) - movl %ebp,PCB_EBP(%edx) - movl %esi,PCB_ESI(%edx) - movl %edi,PCB_EDI(%edx) - - movl %ecx,%ebx /* EBX = curthread */ - movl TD_LWP(%ecx),%ecx - movl PCPU(cpuid), %eax - movl LWP_VMSPACE(%ecx), %ecx /* ECX = vmspace */ - MPLOCKED btrl %eax, VM_PMAP+PM_ACTIVE(%ecx) - - /* - * Push the LWKT switch restore function, which resumes a heavy - * weight process. Note that the LWKT switcher is based on - * TD_SP, while the heavy weight process switcher is based on - * PCB_ESP. TD_SP is usually two ints pushed relative to - * PCB_ESP. We push the flags for later restore by cpu_heavy_restore. - */ - pushfl - pushl $cpu_heavy_restore - movl %esp,TD_SP(%ebx) - - /* - * Save debug regs if necessary - */ - movb PCB_FLAGS(%edx),%al - andb $PCB_DBREGS,%al - jz 1f /* no, skip over */ - movl %dr7,%eax /* yes, do the save */ - movl %eax,PCB_DR7(%edx) - andl $0x0000fc00, %eax /* disable all watchpoints */ - movl %eax,%dr7 - movl %dr6,%eax - movl %eax,PCB_DR6(%edx) - movl %dr3,%eax - movl %eax,PCB_DR3(%edx) - movl %dr2,%eax - movl %eax,PCB_DR2(%edx) - movl %dr1,%eax - movl %eax,PCB_DR1(%edx) - movl %dr0,%eax - movl %eax,PCB_DR0(%edx) -1: - -#if NNPX > 0 - /* - * Save the FP state if we have used the FP. Note that calling - * npxsave will NULL out PCPU(npxthread). - */ - cmpl %ebx,PCPU(npxthread) - jne 1f - pushl TD_SAVEFPU(%ebx) - call npxsave /* do it in a big C function */ - addl $4,%esp /* EAX, ECX, EDX trashed */ -1: -#endif /* NNPX > 0 */ - - /* - * Switch to the next thread, which was passed as an argument - * to cpu_heavy_switch(). Due to the eflags and switch-restore - * function we pushed, the argument is at 12(%esp). Set the current - * thread, load the stack pointer, and 'ret' into the switch-restore - * function. - * - * The switch restore function expects the new thread to be in %eax - * and the old one to be in %ebx. - * - * There is a one-instruction window where curthread is the new - * thread but %esp still points to the old thread's stack, but - * we are protected by a critical section so it is ok. - */ - movl 12(%esp),%eax /* EAX = newtd, EBX = oldtd */ - movl %eax,PCPU(curthread) - movl TD_SP(%eax),%esp - ret - -/* - * cpu_exit_switch() - * - * The switch function is changed to this when a thread is going away - * for good. We have to ensure that the MMU state is not cached, and - * we don't bother saving the existing thread state before switching. - * - * At this point we are in a critical section and this cpu owns the - * thread's token, which serves as an interlock until the switchout is - * complete. - */ -ENTRY(cpu_exit_switch) - /* - * Get us out of the vmspace - */ -#if 0 - movl IdlePTD,%ecx - movl %cr3,%eax - cmpl %ecx,%eax - je 1f - movl %ecx,%cr3 -1: -#endif - movl PCPU(curthread),%ebx - - /* - * If this is a process/lwp, deactivate the pmap after we've - * switched it out. - */ - movl TD_LWP(%ebx),%ecx - testl %ecx,%ecx - jz 2f - movl PCPU(cpuid), %eax - movl LWP_VMSPACE(%ecx), %ecx /* ECX = vmspace */ - MPLOCKED btrl %eax, VM_PMAP+PM_ACTIVE(%ecx) -2: - /* - * Switch to the next thread. RET into the restore function, which - * expects the new thread in EAX and the old in EBX. - * - * There is a one-instruction window where curthread is the new - * thread but %esp still points to the old thread's stack, but - * we are protected by a critical section so it is ok. - */ - movl 4(%esp),%eax - movl %eax,PCPU(curthread) - movl TD_SP(%eax),%esp - ret - -/* - * cpu_heavy_restore() (current thread in %eax on entry) - * - * Restore the thread after an LWKT switch. This entry is normally - * called via the LWKT switch restore function, which was pulled - * off the thread stack and jumped to. - * - * This entry is only called if the thread was previously saved - * using cpu_heavy_switch() (the heavy weight process thread switcher), - * or when a new process is initially scheduled. - * - * NOTE: The lwp may be in any state, not necessarily LSRUN, because - * a preemption switch may interrupt the process and then return via - * cpu_heavy_restore. - * - * YYY theoretically we do not have to restore everything here, a lot - * of this junk can wait until we return to usermode. But for now - * we restore everything. - * - * YYY the PCB crap is really crap, it makes startup a bitch because - * we can't switch away. - * - * YYY note: spl check is done in mi_switch when it splx()'s. - */ - -ENTRY(cpu_heavy_restore) - popfl - movl TD_PCB(%eax),%edx /* EDX = PCB */ - movl TD_LWP(%eax),%ecx - -#if defined(SWTCH_OPTIM_STATS) - incl _swtch_optim_stats -#endif - /* - * Tell the pmap that our cpu is using the VMSPACE now. We cannot - * safely test/reload %cr3 until after we have set the bit in the - * pmap (remember, we do not hold the MP lock in the switch code). - */ - movl LWP_VMSPACE(%ecx), %ecx /* ECX = vmspace */ - movl PCPU(cpuid), %esi - MPLOCKED btsl %esi, VM_PMAP+PM_ACTIVE(%ecx) - - /* - * Restore the MMU address space. If it is the same as the last - * thread we don't have to invalidate the tlb (i.e. reload cr3). - * YYY which naturally also means that the PM_ACTIVE bit had better - * already have been set before we set it above, check? YYY - */ -#if 0 - movl %cr3,%esi - movl PCB_CR3(%edx),%ecx - cmpl %esi,%ecx - je 4f -#if defined(SWTCH_OPTIM_STATS) - decl _swtch_optim_stats - incl _tlb_flush_count -#endif - movl %ecx,%cr3 -4: -#endif - /* - * NOTE: %ebx is the previous thread and %eax is the new thread. - * %ebx is retained throughout so we can return it. - * - * lwkt_switch[_return] is responsible for handling TDF_RUNNING. - */ -#if 0 - /* - * Deal with the PCB extension, restore the private tss - */ - movl PCB_EXT(%edx),%edi /* check for a PCB extension */ - movl $1,%ecx /* maybe mark use of a private tss */ - testl %edi,%edi - jnz 2f - - /* - * Going back to the common_tss. We may need to update TSS_ESP0 - * which sets the top of the supervisor stack when entering from - * usermode. The PCB is at the top of the stack but we need another - * 16 bytes to take vm86 into account. - */ - leal -16(%edx),%ecx - movl %ecx, PCPU(common_tss) + TSS_ESP0 - - cmpl $0,PCPU(private_tss) /* don't have to reload if */ - je 3f /* already using the common TSS */ - - subl %ecx,%ecx /* unmark use of private tss */ - - /* - * Get the address of the common TSS descriptor for the ltr. - * There is no way to get the address of a segment-accessed variable - * so we store a self-referential pointer at the base of the per-cpu - * data area and add the appropriate offset. - */ - movl $gd_common_tssd, %edi - addl %fs:0, %edi - - /* - * Move the correct TSS descriptor into the GDT slot, then reload - * ltr. - */ -2: - movl %ecx,PCPU(private_tss) /* mark/unmark private tss */ - movl PCPU(tss_gdt), %ecx /* entry in GDT */ - movl 0(%edi), %eax - movl %eax, 0(%ecx) - movl 4(%edi), %eax - movl %eax, 4(%ecx) - movl $GPROC0_SEL*8, %esi /* GSEL(entry, SEL_KPL) */ - ltr %si -3: -#endif - /* - * Restore general registers. %ebx is restored later. - */ - movl PCB_ESP(%edx),%esp - movl PCB_EBP(%edx),%ebp - movl PCB_ESI(%edx),%esi - movl PCB_EDI(%edx),%edi - movl PCB_EIP(%edx),%eax - movl %eax,(%esp) - -#if 0 - /* - * Restore the user LDT if we have one - */ - cmpl $0, PCB_USERLDT(%edx) - jnz 1f - movl _default_ldt,%eax - cmpl PCPU(currentldt),%eax - je 2f - lldt _default_ldt - movl %eax,PCPU(currentldt) - jmp 2f -1: pushl %edx - call set_user_ldt - popl %edx -2: -#endif -#if 0 - /* - * Restore the user TLS if we have one - */ - pushl %edx - call set_user_TLS - popl %edx -#endif - - /* - * Restore the DEBUG register state if necessary. - */ - movb PCB_FLAGS(%edx),%al - andb $PCB_DBREGS,%al - jz 1f /* no, skip over */ - movl PCB_DR6(%edx),%eax /* yes, do the restore */ - movl %eax,%dr6 - movl PCB_DR3(%edx),%eax - movl %eax,%dr3 - movl PCB_DR2(%edx),%eax - movl %eax,%dr2 - movl PCB_DR1(%edx),%eax - movl %eax,%dr1 - movl PCB_DR0(%edx),%eax - movl %eax,%dr0 - movl %dr7,%eax /* load dr7 so as not to disturb */ - andl $0x0000fc00,%eax /* reserved bits */ - movl PCB_DR7(%edx),%ecx - andl $~0x0000fc00,%ecx - orl %ecx,%eax - movl %eax,%dr7 -1: - movl %ebx,%eax /* return previous thread */ - movl PCB_EBX(%edx),%ebx - ret - -/* - * savectx(pcb) - * - * Update pcb, saving current processor state. - */ -ENTRY(savectx) - /* fetch PCB */ - movl 4(%esp),%ecx - - /* caller's return address - child won't execute this routine */ - movl (%esp),%eax - movl %eax,PCB_EIP(%ecx) - movl %ebx,PCB_EBX(%ecx) - movl %esp,PCB_ESP(%ecx) - movl %ebp,PCB_EBP(%ecx) - movl %esi,PCB_ESI(%ecx) - movl %edi,PCB_EDI(%ecx) - -#if NNPX > 0 - /* - * If npxthread == NULL, then the npx h/w state is irrelevant and the - * state had better already be in the pcb. This is true for forks - * but not for dumps (the old book-keeping with FP flags in the pcb - * always lost for dumps because the dump pcb has 0 flags). - * - * If npxthread != NULL, then we have to save the npx h/w state to - * npxthread's pcb and copy it to the requested pcb, or save to the - * requested pcb and reload. Copying is easier because we would - * have to handle h/w bugs for reloading. We used to lose the - * parent's npx state for forks by forgetting to reload. - */ - movl PCPU(npxthread),%eax - testl %eax,%eax - je 1f - - pushl %ecx /* target pcb */ - movl TD_SAVEFPU(%eax),%eax /* originating savefpu area */ - pushl %eax - - pushl %eax - call npxsave - addl $4,%esp - - popl %eax - popl %ecx - - pushl $PCB_SAVEFPU_SIZE - leal PCB_SAVEFPU(%ecx),%ecx - pushl %ecx - pushl %eax - call bcopy - addl $12,%esp -#endif /* NNPX > 0 */ - -1: - ret - -/* - * cpu_idle_restore() (current thread in %eax on entry) (one-time execution) - * - * Don't bother setting up any regs other then %ebp so backtraces - * don't die. This restore function is used to bootstrap into the - * cpu_idle() LWKT only, after that cpu_lwkt_*() will be used for - * switching. - * - * Clear TDF_RUNNING in old thread only after we've cleaned up %cr3. - * This only occurs during system boot so no special handling is - * required for migration. - * - * If we are an AP we have to call ap_init() before jumping to - * cpu_idle(). ap_init() will synchronize with the BP and finish - * setting up various ncpu-dependant globaldata fields. This may - * happen on UP as well as SMP if we happen to be simulating multiple - * cpus. - */ -ENTRY(cpu_idle_restore) - /* cli */ - movl $0,%ebp - pushl $0 - andl $~TDF_RUNNING,TD_FLAGS(%ebx) - orl $TDF_RUNNING,TD_FLAGS(%eax) /* manual, no switch_return */ - cmpl $0,PCPU(cpuid) - je 1f - call ap_init -1: - /* sti */ - jmp cpu_idle - -/* - * cpu_kthread_restore() (current thread is %eax on entry) (one-time execution) - * - * Don't bother setting up any regs other then %ebp so backtraces - * don't die. This restore function is used to bootstrap into an - * LWKT based kernel thread only. cpu_lwkt_switch() will be used - * after this. - * - * Since all of our context is on the stack we are reentrant and - * we can release our critical section and enable interrupts early. - * - * Because this switch target does not 'return' to lwkt_switch() - * we have to call lwkt_switch_return(otd) to clean up otd. - * otd is in %ebx. - */ -ENTRY(cpu_kthread_restore) - /*sti*/ - movl TD_PCB(%eax),%esi - movl $0,%ebp - - pushl %eax - pushl %ebx /* argument to lwkt_switch_return */ - call lwkt_switch_return - addl $4,%esp - popl %eax - decl TD_CRITCOUNT(%eax) - popl %eax /* kthread exit function */ - pushl PCB_EBX(%esi) /* argument to ESI function */ - pushl %eax /* set exit func as return address */ - movl PCB_ESI(%esi),%eax - jmp *%eax - -/* - * cpu_lwkt_switch() - * - * Standard LWKT switching function. Only non-scratch registers are - * saved and we don't bother with the MMU state or anything else. - * - * This function is always called while in a critical section. - * - * There is a one-instruction window where curthread is the new - * thread but %esp still points to the old thread's stack, but - * we are protected by a critical section so it is ok. - * - * YYY BGL, SPL - */ -ENTRY(cpu_lwkt_switch) - pushl %ebp /* note: GDB hacked to locate ebp relative to td_sp */ - pushl %ebx - movl PCPU(curthread),%ebx - pushl %esi - pushl %edi - pushfl - /* warning: adjust movl into %eax below if you change the pushes */ - -#if NNPX > 0 - /* - * Save the FP state if we have used the FP. Note that calling - * npxsave will NULL out PCPU(npxthread). - * - * We have to deal with the FP state for LWKT threads in case they - * happen to get preempted or block while doing an optimized - * bzero/bcopy/memcpy. - */ - cmpl %ebx,PCPU(npxthread) - jne 1f - pushl TD_SAVEFPU(%ebx) - call npxsave /* do it in a big C function */ - addl $4,%esp /* EAX, ECX, EDX trashed */ -1: -#endif /* NNPX > 0 */ - - movl 4+20(%esp),%eax /* switch to this thread */ - pushl $cpu_lwkt_restore - movl %esp,TD_SP(%ebx) - movl %eax,PCPU(curthread) - movl TD_SP(%eax),%esp - - /* - * eax contains new thread, ebx contains old thread. - */ - ret - -/* - * cpu_lwkt_restore() (current thread in %eax on entry) - * - * Standard LWKT restore function. This function is always called - * while in a critical section. - * - * Warning: due to preemption the restore function can be used to - * 'return' to the original thread. Interrupt disablement must be - * protected through the switch so we cannot run splz here. - */ -ENTRY(cpu_lwkt_restore) - /* - * NOTE: %ebx is the previous thread and %eax is the new thread. - * %ebx is retained throughout so we can return it. - * - * lwkt_switch[_return] is responsible for handling TDF_RUNNING. - */ - movl %ebx,%eax - popfl - popl %edi - popl %esi - popl %ebx - popl %ebp - ret - -/* - * bootstrap_idle() - * - * Make AP become the idle loop. - */ -ENTRY(bootstrap_idle) - movl PCPU(curthread),%eax - movl %eax,%ebx - movl TD_SP(%eax),%esp - ret diff --git a/sys/platform/vkernel/i386/tls.c b/sys/platform/vkernel/i386/tls.c deleted file mode 100644 index 13195f8f7c..0000000000 --- a/sys/platform/vkernel/i386/tls.c +++ /dev/null @@ -1,211 +0,0 @@ -/* - * Copyright (c) 2003,2004 The DragonFly Project. All rights reserved. - * - * This code is derived from software contributed to The DragonFly Project - * by David Xu and Matthew Dillon - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * 3. Neither the name of The DragonFly Project nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific, prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $DragonFly: src/sys/platform/vkernel/i386/tls.c,v 1.3 2007/01/08 03:33:43 dillon Exp $ - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include /* pcb.h included via sys/user.h */ -#include /* CPU_prvspace */ -#include - -/* - * set a TLS descriptor and resync the GDT. A descriptor may be cleared - * by passing info=NULL and infosize=0. Note that hardware limitations may - * cause the size passed in tls_info to be approximated. - * - * Returns the value userland needs to load into %gs representing the - * TLS descriptor or -1 on error. - * - * (struct tls_info *info, int infosize, int which) - * - * MPSAFE - */ -int -sys_set_tls_area(struct set_tls_area_args *uap) -{ - struct tls_info info; - struct segment_descriptor *desc; - int error; - int i; - - /* - * Sanity checks - */ - i = uap->which; - if (i < 0 || i >= NGTLS) - return (ERANGE); - if (uap->infosize < 0) - return (EINVAL); - - /* - * Maintain forwards compatibility with future extensions. - */ - if (uap->infosize != sizeof(info)) { - bzero(&info, sizeof(info)); - error = copyin(uap->info, &info, - min(sizeof(info), uap->infosize)); - } else { - error = copyin(uap->info, &info, sizeof(info)); - } - if (error) - return (error); - if (info.size < -1) - return (EINVAL); - if (info.size > (1 << 20)) - info.size = (info.size + PAGE_MASK) & ~PAGE_MASK; - - /* - * Load the descriptor. A critical section is required in case - * an interrupt thread comes along and switches us out and then back - * in. - */ - desc = &curthread->td_tls.tls[i]; - crit_enter(); - if (info.size == 0) { - bzero(desc, sizeof(*desc)); - } else { - desc->sd_lobase = (intptr_t)info.base; - desc->sd_hibase = (intptr_t)info.base >> 24; - desc->sd_def32 = 1; - desc->sd_type = SDT_MEMRWA; - desc->sd_dpl = SEL_UPL; - desc->sd_xx = 0; - desc->sd_p = 1; - if (info.size == -1) { - /* - * A descriptor size of -1 is a hack to map the - * whole address space. This type of mapping is - * required for direct-tls accesses of variable - * data, e.g. %gs:OFFSET where OFFSET is negative. - */ - desc->sd_lolimit = -1; - desc->sd_hilimit = -1; - desc->sd_gran = 1; - } else if (info.size >= (1 << 20)) { - /* - * A descriptor size greater then 1MB requires page - * granularity (the lo+hilimit field is only 20 bits) - */ - desc->sd_lolimit = info.size >> PAGE_SHIFT; - desc->sd_hilimit = info.size >> (PAGE_SHIFT + 16); - desc->sd_gran = 1; - } else { - /* - * Otherwise a byte-granular size is supported. - */ - desc->sd_lolimit = info.size; - desc->sd_hilimit = info.size >> 16; - desc->sd_gran = 0; - } - } - crit_exit(); - uap->sysmsg_result = GSEL(GTLS_START + i, SEL_UPL); - set_user_TLS(); - return(0); -} - -/* - * Return the specified TLS descriptor to userland. - * - * Returns the value userland needs to load into %gs representing the - * TLS descriptor or -1 on error. - * - * (struct tls_info *info, int infosize, int which) - * - * MPSAFE - */ -int -sys_get_tls_area(struct get_tls_area_args *uap) -{ - struct tls_info info; - struct segment_descriptor *desc; - int error; - int i; - - /* - * Sanity checks - */ - i = uap->which; - if (i < 0 || i >= NGTLS) - return (ERANGE); - if (uap->infosize < 0) - return (EINVAL); - - /* - * unpack the descriptor, ENOENT is returned for any descriptor - * which has not been loaded. uap->info may be NULL. - */ - desc = &curthread->td_tls.tls[i]; - if (desc->sd_p) { - if (uap->info && uap->infosize > 0) { - bzero(&info, sizeof(info)); - info.base = (void *)(intptr_t) - ((desc->sd_hibase << 24) | desc->sd_lobase); - info.size = (desc->sd_hilimit << 16) | desc->sd_lolimit; - if (desc->sd_gran) - info.size <<= PAGE_SHIFT; - error = copyout(&info, uap->info, - min(sizeof(info), uap->infosize)); - } else { - error = 0; - } - uap->sysmsg_result = GSEL(GTLS_START + i, SEL_UPL); - } else { - error = ENOENT; - } - return(error); -} - -/* - * This function is a NOP because the TLS segments are proactively copied - * by vmspace_ctl() when we switch to the (emulated) user process. - */ -void -set_user_TLS(void) -{ -} diff --git a/sys/platform/vkernel/i386/trap.c b/sys/platform/vkernel/i386/trap.c deleted file mode 100644 index 39ee04ffd1..0000000000 --- a/sys/platform/vkernel/i386/trap.c +++ /dev/null @@ -1,1425 +0,0 @@ -/*- - * Copyright (C) 1994, David Greenman - * Copyright (c) 1990, 1993 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * the University of Utah, and William Jolitz. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 - * $FreeBSD: src/sys/i386/i386/trap.c,v 1.147.2.11 2003/02/27 19:09:59 luoqi Exp $ - */ - -/* - * 386 Trap and System call handling - */ - -#include "use_isa.h" -#include "use_npx.h" - -#include "opt_ddb.h" -#include "opt_ktrace.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#ifdef KTRACE -#include -#endif -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include - -#include - -#include -#include -#include - -#define MAKEMPSAFE(have_mplock) \ - if (have_mplock == 0) { \ - get_mplock(); \ - have_mplock = 1; \ - } - -int (*pmath_emulate) (struct trapframe *); - -static int trap_pfault (struct trapframe *, int, vm_offset_t); -static void trap_fatal (struct trapframe *, int, vm_offset_t); -void dblfault_handler (void); - -#if 0 -extern inthand_t IDTVEC(syscall); -#endif - -#define MAX_TRAP_MSG 28 -static char *trap_msg[] = { - "", /* 0 unused */ - "privileged instruction fault", /* 1 T_PRIVINFLT */ - "", /* 2 unused */ - "breakpoint instruction fault", /* 3 T_BPTFLT */ - "", /* 4 unused */ - "", /* 5 unused */ - "arithmetic trap", /* 6 T_ARITHTRAP */ - "system forced exception", /* 7 T_ASTFLT */ - "", /* 8 unused */ - "general protection fault", /* 9 T_PROTFLT */ - "trace trap", /* 10 T_TRCTRAP */ - "", /* 11 unused */ - "page fault", /* 12 T_PAGEFLT */ - "", /* 13 unused */ - "alignment fault", /* 14 T_ALIGNFLT */ - "", /* 15 unused */ - "", /* 16 unused */ - "", /* 17 unused */ - "integer divide fault", /* 18 T_DIVIDE */ - "non-maskable interrupt trap", /* 19 T_NMI */ - "overflow trap", /* 20 T_OFLOW */ - "FPU bounds check fault", /* 21 T_BOUND */ - "FPU device not available", /* 22 T_DNA */ - "double fault", /* 23 T_DOUBLEFLT */ - "FPU operand fetch fault", /* 24 T_FPOPFLT */ - "invalid TSS fault", /* 25 T_TSSFLT */ - "segment not present fault", /* 26 T_SEGNPFLT */ - "stack fault", /* 27 T_STKFLT */ - "machine check trap", /* 28 T_MCHK */ -}; - -#ifdef DDB -static int ddb_on_nmi = 1; -SYSCTL_INT(_machdep, OID_AUTO, ddb_on_nmi, CTLFLAG_RW, - &ddb_on_nmi, 0, "Go to DDB on NMI"); -#endif -static int panic_on_nmi = 1; -SYSCTL_INT(_machdep, OID_AUTO, panic_on_nmi, CTLFLAG_RW, - &panic_on_nmi, 0, "Panic on NMI"); -static int fast_release; -SYSCTL_INT(_machdep, OID_AUTO, fast_release, CTLFLAG_RW, - &fast_release, 0, "Passive Release was optimal"); -static int slow_release; -SYSCTL_INT(_machdep, OID_AUTO, slow_release, CTLFLAG_RW, - &slow_release, 0, "Passive Release was nonoptimal"); - -/* - * Passively intercepts the thread switch function to increase the thread - * priority from a user priority to a kernel priority, reducing - * syscall and trap overhead for the case where no switch occurs. - * - * Synchronizes td_ucred with p_ucred. This is used by system calls, - * signal handling, faults, AST traps, and anything else that enters the - * kernel from userland and provides the kernel with a stable read-only - * copy of the process ucred. - */ -static __inline void -userenter(struct thread *curtd, struct proc *curp) -{ - struct ucred *ocred; - struct ucred *ncred; - - curtd->td_release = lwkt_passive_release; - - if (curtd->td_ucred != curp->p_ucred) { - ncred = crhold(curp->p_ucred); - ocred = curtd->td_ucred; - curtd->td_ucred = ncred; - if (ocred) - crfree(ocred); - } - -} - -/* - * Handle signals, profiling, and other AST's and/or tasks that - * must be completed before we can return to or try to return to userland. - * - * Note that td_sticks is a 64 bit quantity, but there's no point doing 64 - * arithmatic on the delta calculation so the absolute tick values are - * truncated to an integer. - */ -static void -userret(struct lwp *lp, struct trapframe *frame, int sticks) -{ - struct proc *p = lp->lwp_proc; - int sig; - - /* - * Charge system time if profiling. Note: times are in microseconds. - * This may do a copyout and block, so do it first even though it - * means some system time will be charged as user time. - */ - if (p->p_flags & P_PROFIL) { - addupc_task(p, frame->tf_eip, - (u_int)((int)lp->lwp_thread->td_sticks - sticks)); - } - -recheck: - /* - * Specific on-return-to-usermode checks (LWP_MP_WEXIT, - * LWP_MP_VNLRU, etc). - */ - if (lp->lwp_mpflags & LWP_MP_URETMASK) - lwpuserret(lp); - - /* - * Block here if we are in a stopped state. - */ - if (p->p_stat == SSTOP) { - lwkt_gettoken(&p->p_token); - tstop(); - lwkt_reltoken(&p->p_token); - goto recheck; - } - - /* - * Post any pending upcalls. If running a virtual kernel be sure - * to restore the virtual kernel's vmspace before posting the upcall. - */ - if (p->p_flags & (P_SIGVTALRM | P_SIGPROF)) { - lwkt_gettoken(&p->p_token); - if (p->p_flags & P_SIGVTALRM) { - p->p_flags &= ~P_SIGVTALRM; - ksignal(p, SIGVTALRM); - } - if (p->p_flags & P_SIGPROF) { - p->p_flags &= ~P_SIGPROF; - ksignal(p, SIGPROF); - } - lwkt_reltoken(&p->p_token); - goto recheck; - } - - /* - * Post any pending signals - * - * WARNING! postsig() can exit and not return. - */ - if ((sig = CURSIG_TRACE(lp)) != 0) { - lwkt_gettoken(&p->p_token); - postsig(sig); - lwkt_reltoken(&p->p_token); - goto recheck; - } - - /* - * block here if we are swapped out, but still process signals - * (such as SIGKILL). proc0 (the swapin scheduler) is already - * aware of our situation, we do not have to wake it up. - */ - if (p->p_flags & P_SWAPPEDOUT) { - lwkt_gettoken(&p->p_token); - get_mplock(); - p->p_flags |= P_SWAPWAIT; - swapin_request(); - if (p->p_flags & P_SWAPWAIT) - tsleep(p, PCATCH, "SWOUT", 0); - p->p_flags &= ~P_SWAPWAIT; - rel_mplock(); - lwkt_reltoken(&p->p_token); - goto recheck; - } - - /* - * In a multi-threaded program it is possible for a thread to change - * signal state during a system call which temporarily changes the - * signal mask. In this case postsig() might not be run and we - * have to restore the mask ourselves. - */ - if (lp->lwp_flags & LWP_OLDMASK) { - lp->lwp_flags &= ~LWP_OLDMASK; - lp->lwp_sigmask = lp->lwp_oldsigmask; - goto recheck; - } -} - -/* - * Cleanup from userenter and any passive release that might have occured. - * We must reclaim the current-process designation before we can return - * to usermode. We also handle both LWKT and USER reschedule requests. - */ -static __inline void -userexit(struct lwp *lp) -{ - struct thread *td = lp->lwp_thread; - /* globaldata_t gd = td->td_gd; */ - - /* - * Handle stop requests at kernel priority. Any requests queued - * after this loop will generate another AST. - */ - while (lp->lwp_proc->p_stat == SSTOP) { - lwkt_gettoken(&lp->lwp_proc->p_token); - tstop(); - lwkt_reltoken(&lp->lwp_proc->p_token); - } - - /* - * Become the current user scheduled process if we aren't already, - * and deal with reschedule requests and other factors. - */ - lp->lwp_proc->p_usched->acquire_curproc(lp); - /* WARNING: we may have migrated cpu's */ - /* gd = td->td_gd; */ - - /* - * Reduce our priority in preparation for a return to userland. If - * our passive release function was still in place, our priority was - * never raised and does not need to be reduced. - */ - lwkt_passive_recover(td); -} - -#if !defined(KTR_KERNENTRY) -#define KTR_KERNENTRY KTR_ALL -#endif -KTR_INFO_MASTER(kernentry); -KTR_INFO(KTR_KERNENTRY, kernentry, trap, 0, - "TRAP(pid %d, tid %d, trapno %d, eva %lu)", - pid_t pid, lwpid_t tid, register_t trapno, vm_offset_t eva); -KTR_INFO(KTR_KERNENTRY, kernentry, trap_ret, 0, "TRAP_RET(pid %d, tid %d)", - pid_t pid, lwpid_t tid); -KTR_INFO(KTR_KERNENTRY, kernentry, syscall, 0, "SYSC(pid %d, tid %d, nr %d)", - pid_t pid, lwpid_t tid, register_t trapno); -KTR_INFO(KTR_KERNENTRY, kernentry, syscall_ret, 0, "SYSRET(pid %d, tid %d, err %d)", - pid_t pid, lwpid_t tid, int err); -KTR_INFO(KTR_KERNENTRY, kernentry, fork_ret, 0, "FORKRET(pid %d, tid %d)", - pid_t pid, lwpid_t tid); - -/* - * Exception, fault, and trap interface to the kernel. - * This common code is called from assembly language IDT gate entry - * routines that prepare a suitable stack frame, and restore this - * frame after the exception has been processed. - * - * This function is also called from doreti in an interlock to handle ASTs. - * For example: hardwareint->INTROUTINE->(set ast)->doreti->trap - * - * NOTE! We have to retrieve the fault address prior to obtaining the - * MP lock because get_mplock() may switch out. YYY cr2 really ought - * to be retrieved by the assembly code, not here. - * - * XXX gd_trap_nesting_level currently prevents lwkt_switch() from panicing - * if an attempt is made to switch from a fast interrupt or IPI. This is - * necessary to properly take fatal kernel traps on SMP machines if - * get_mplock() has to block. - */ - -void -user_trap(struct trapframe *frame) -{ - struct globaldata *gd = mycpu; - struct thread *td = gd->gd_curthread; - struct lwp *lp = td->td_lwp; - struct proc *p; - int sticks = 0; - int i = 0, ucode = 0, type, code; - int have_mplock = 0; -#ifdef INVARIANTS - int crit_count = td->td_critcount; - lwkt_tokref_t curstop = td->td_toks_stop; -#endif - vm_offset_t eva; - - p = td->td_proc; - - /* - * This is a bad kludge to avoid changing the various trapframe - * structures. Because we are enabled as a virtual kernel, - * the original tf_err field will be passed to us shifted 16 - * over in the tf_trapno field for T_PAGEFLT. - */ - if (frame->tf_trapno == T_PAGEFLT) - eva = frame->tf_err; - else - eva = 0; -#if 0 - kprintf("USER_TRAP AT %08x xflags %d trapno %d eva %08x\n", - frame->tf_eip, frame->tf_xflags, frame->tf_trapno, eva); -#endif - - /* - * Everything coming from user mode runs through user_trap, - * including system calls. - */ - if (frame->tf_trapno == T_SYSCALL80) { - syscall2(frame); - return; - } - - KTR_LOG(kernentry_trap, lp->lwp_proc->p_pid, lp->lwp_tid, - frame->tf_trapno, eva); - -#ifdef DDB - if (db_active) { - eva = (frame->tf_trapno == T_PAGEFLT ? rcr2() : 0); - ++gd->gd_trap_nesting_level; - MAKEMPSAFE(have_mplock); - trap_fatal(frame, TRUE, eva); - --gd->gd_trap_nesting_level; - goto out2; - } -#endif - -#if defined(I586_CPU) && !defined(NO_F00F_HACK) -restart: -#endif - type = frame->tf_trapno; - code = frame->tf_err; - - userenter(td, p); - - sticks = (int)td->td_sticks; - lp->lwp_md.md_regs = frame; - - switch (type) { - case T_PRIVINFLT: /* privileged instruction fault */ - i = SIGILL; - ucode = ILL_PRVOPC; - break; - - case T_BPTFLT: /* bpt instruction fault */ - case T_TRCTRAP: /* trace trap */ - frame->tf_eflags &= ~PSL_T; - i = SIGTRAP; - ucode = (type == T_TRCTRAP ? TRAP_TRACE : TRAP_BRKPT); - break; - - case T_ARITHTRAP: /* arithmetic trap */ - ucode = code; - i = SIGFPE; - break; - - case T_ASTFLT: /* Allow process switch */ - mycpu->gd_cnt.v_soft++; - if (mycpu->gd_reqflags & RQF_AST_OWEUPC) { - atomic_clear_int(&mycpu->gd_reqflags, - RQF_AST_OWEUPC); - addupc_task(p, p->p_prof.pr_addr, - p->p_prof.pr_ticks); - } - goto out; - - /* - * The following two traps can happen in - * vm86 mode, and, if so, we want to handle - * them specially. - */ - case T_PROTFLT: /* general protection fault */ - case T_STKFLT: /* stack fault */ -#if 0 - if (frame->tf_eflags & PSL_VM) { - i = vm86_emulate((struct vm86frame *)frame); - if (i == 0) - goto out; - break; - } -#endif - i = SIGBUS; - ucode = (type == T_PROTFLT) ? BUS_OBJERR : BUS_ADRERR; - break; - case T_SEGNPFLT: /* segment not present fault */ - i = SIGBUS; - ucode = BUS_ADRERR; - break; - case T_TSSFLT: /* invalid TSS fault */ - case T_DOUBLEFLT: /* double fault */ - default: - i = SIGBUS; - ucode = BUS_OBJERR; - break; - - case T_PAGEFLT: /* page fault */ - MAKEMPSAFE(have_mplock); - i = trap_pfault(frame, TRUE, eva); - if (i == -1) - goto out; -#if defined(I586_CPU) && !defined(NO_F00F_HACK) - if (i == -2) - goto restart; -#endif - if (i == 0) - goto out; - - if (i == SIGSEGV) - ucode = SEGV_MAPERR; - else { - i = SIGSEGV; - ucode = SEGV_ACCERR; - } - break; - - case T_DIVIDE: /* integer divide fault */ - ucode = FPE_INTDIV; - i = SIGFPE; - break; - -#if NISA > 0 - case T_NMI: - MAKEMPSAFE(have_mplock); - /* machine/parity/power fail/"kitchen sink" faults */ - if (isa_nmi(code) == 0) { -#ifdef DDB - /* - * NMI can be hooked up to a pushbutton - * for debugging. - */ - if (ddb_on_nmi) { - kprintf ("NMI ... going to debugger\n"); - kdb_trap (type, 0, frame); - } -#endif /* DDB */ - goto out2; - } else if (panic_on_nmi) - panic("NMI indicates hardware failure"); - break; -#endif /* NISA > 0 */ - - case T_OFLOW: /* integer overflow fault */ - ucode = FPE_INTOVF; - i = SIGFPE; - break; - - case T_BOUND: /* bounds check fault */ - ucode = FPE_FLTSUB; - i = SIGFPE; - break; - - case T_DNA: - /* - * Virtual kernel intercept - pass the DNA exception - * to the (emulated) virtual kernel if it asked to handle - * it. This occurs when the virtual kernel is holding - * onto the FP context for a different emulated - * process then the one currently running. - * - * We must still call npxdna() since we may have - * saved FP state that the (emulated) virtual kernel - * needs to hand over to a different emulated process. - */ - if (lp->lwp_vkernel && lp->lwp_vkernel->ve && - (td->td_pcb->pcb_flags & FP_VIRTFP) - ) { - npxdna(frame); - break; - } - -#if NNPX > 0 - /* - * The kernel may have switched out the FP unit's - * state, causing the user process to take a fault - * when it tries to use the FP unit. Restore the - * state here - */ - if (npxdna(frame)) - goto out; -#endif - if (!pmath_emulate) { - i = SIGFPE; - ucode = FPE_FPU_NP_TRAP; - break; - } - i = (*pmath_emulate)(frame); - if (i == 0) { - if (!(frame->tf_eflags & PSL_T)) - goto out2; - frame->tf_eflags &= ~PSL_T; - i = SIGTRAP; - } - /* else ucode = emulator_only_knows() XXX */ - break; - - case T_FPOPFLT: /* FPU operand fetch fault */ - ucode = ILL_COPROC; - i = SIGILL; - break; - - case T_XMMFLT: /* SIMD floating-point exception */ - ucode = 0; /* XXX */ - i = SIGFPE; - break; - } - - /* - * Virtual kernel intercept - if the fault is directly related to a - * VM context managed by a virtual kernel then let the virtual kernel - * handle it. - */ - if (lp->lwp_vkernel && lp->lwp_vkernel->ve) { - vkernel_trap(lp, frame); - goto out; - } - - /* - * Translate fault for emulators (e.g. Linux) - */ - if (*p->p_sysent->sv_transtrap) - i = (*p->p_sysent->sv_transtrap)(i, type); - - MAKEMPSAFE(have_mplock); - trapsignal(lp, i, ucode); - -#ifdef DEBUG - if (type <= MAX_TRAP_MSG) { - uprintf("fatal process exception: %s", - trap_msg[type]); - if ((type == T_PAGEFLT) || (type == T_PROTFLT)) - uprintf(", fault VA = 0x%lx", (u_long)eva); - uprintf("\n"); - } -#endif - -out: - userret(lp, frame, sticks); - userexit(lp); -out2: ; - if (have_mplock) - rel_mplock(); - KTR_LOG(kernentry_trap_ret, lp->lwp_proc->p_pid, lp->lwp_tid); -#ifdef INVARIANTS - KASSERT(crit_count == td->td_critcount, - ("trap: critical section count mismatch! %d/%d", - crit_count, td->td_pri)); - KASSERT(curstop == td->td_toks_stop, - ("trap: extra tokens held after trap! %zd/%zd", - curstop - &td->td_toks_base, - td->td_toks_stop - &td->td_toks_base)); -#endif -} - -void -kern_trap(struct trapframe *frame) -{ - struct globaldata *gd = mycpu; - struct thread *td = gd->gd_curthread; - struct lwp *lp; - struct proc *p; - int i = 0, ucode = 0, type, code; - int have_mplock = 0; -#ifdef INVARIANTS - int crit_count = td->td_critcount; - lwkt_tokref_t curstop = td->td_toks_stop; -#endif - vm_offset_t eva; - - lp = td->td_lwp; - p = td->td_proc; - - if (frame->tf_trapno == T_PAGEFLT) - eva = frame->tf_err; - else - eva = 0; - -#ifdef DDB - if (db_active) { - ++gd->gd_trap_nesting_level; - MAKEMPSAFE(have_mplock); - trap_fatal(frame, FALSE, eva); - --gd->gd_trap_nesting_level; - goto out2; - } -#endif - type = frame->tf_trapno; - code = frame->tf_err; - -#if 0 -kernel_trap: -#endif - /* kernel trap */ - - switch (type) { - case T_PAGEFLT: /* page fault */ - MAKEMPSAFE(have_mplock); - trap_pfault(frame, FALSE, eva); - goto out2; - - case T_DNA: -#if NNPX > 0 - /* - * The kernel may be using npx for copying or other - * purposes. - */ - panic("kernel NPX should not happen"); - if (npxdna(frame)) - goto out2; -#endif - break; - - case T_PROTFLT: /* general protection fault */ - case T_SEGNPFLT: /* segment not present fault */ - /* - * Invalid segment selectors and out of bounds - * %eip's and %esp's can be set up in user mode. - * This causes a fault in kernel mode when the - * kernel tries to return to user mode. We want - * to get this fault so that we can fix the - * problem here and not have to check all the - * selectors and pointers when the user changes - * them. - */ - if (mycpu->gd_intr_nesting_level == 0) { - if (td->td_pcb->pcb_onfault) { - frame->tf_eip = - (register_t)td->td_pcb->pcb_onfault; - goto out2; - } - } - break; - - case T_TSSFLT: - /* - * PSL_NT can be set in user mode and isn't cleared - * automatically when the kernel is entered. This - * causes a TSS fault when the kernel attempts to - * `iret' because the TSS link is uninitialized. We - * want to get this fault so that we can fix the - * problem here and not every time the kernel is - * entered. - */ - if (frame->tf_eflags & PSL_NT) { - frame->tf_eflags &= ~PSL_NT; - goto out2; - } - break; - - case T_TRCTRAP: /* trace trap */ -#if 0 - if (frame->tf_eip == (int)IDTVEC(syscall)) { - /* - * We've just entered system mode via the - * syscall lcall. Continue single stepping - * silently until the syscall handler has - * saved the flags. - */ - goto out2; - } - if (frame->tf_eip == (int)IDTVEC(syscall) + 1) { - /* - * The syscall handler has now saved the - * flags. Stop single stepping it. - */ - frame->tf_eflags &= ~PSL_T; - goto out2; - } -#endif -#if 0 - /* - * Ignore debug register trace traps due to - * accesses in the user's address space, which - * can happen under several conditions such as - * if a user sets a watchpoint on a buffer and - * then passes that buffer to a system call. - * We still want to get TRCTRAPS for addresses - * in kernel space because that is useful when - * debugging the kernel. - */ - if (user_dbreg_trap()) { - /* - * Reset breakpoint bits because the - * processor doesn't - */ - load_dr6(rdr6() & 0xfffffff0); - goto out2; - } -#endif - /* - * FALLTHROUGH (TRCTRAP kernel mode, kernel address) - */ - case T_BPTFLT: - /* - * If DDB is enabled, let it handle the debugger trap. - * Otherwise, debugger traps "can't happen". - */ -#ifdef DDB - MAKEMPSAFE(have_mplock); - if (kdb_trap (type, 0, frame)) - goto out2; -#endif - break; - case T_DIVIDE: - MAKEMPSAFE(have_mplock); - trap_fatal(frame, FALSE, eva); - goto out2; - case T_NMI: - MAKEMPSAFE(have_mplock); - trap_fatal(frame, FALSE, eva); - goto out2; - case T_SYSCALL80: - /* - * Ignore this trap generated from a spurious SIGTRAP. - * - * single stepping in / syscalls leads to spurious / SIGTRAP - * so ignore - * - * Haiku (c) 2007 Simon 'corecode' Schubert - */ - goto out2; - } - - /* - * Translate fault for emulators (e.g. Linux) - */ - if (*p->p_sysent->sv_transtrap) - i = (*p->p_sysent->sv_transtrap)(i, type); - - MAKEMPSAFE(have_mplock); - trapsignal(lp, i, ucode); - -#ifdef DEBUG - if (type <= MAX_TRAP_MSG) { - uprintf("fatal process exception: %s", - trap_msg[type]); - if ((type == T_PAGEFLT) || (type == T_PROTFLT)) - uprintf(", fault VA = 0x%lx", (u_long)eva); - uprintf("\n"); - } -#endif - -out2: - ; - if (have_mplock) - rel_mplock(); -#ifdef INVARIANTS - KASSERT(crit_count == td->td_critcount, - ("trap: critical section count mismatch! %d/%d", - crit_count, td->td_pri)); - KASSERT(curstop == td->td_toks_stop, - ("trap: extra tokens held after trap! %zd/%zd", - curstop - &td->td_toks_base, - td->td_toks_stop - &td->td_toks_base)); -#endif -} - -int -trap_pfault(struct trapframe *frame, int usermode, vm_offset_t eva) -{ - vm_offset_t va; - struct vmspace *vm = NULL; - vm_map_t map = 0; - int rv = 0; - int fault_flags; - vm_prot_t ftype; - thread_t td = curthread; - struct lwp *lp = td->td_lwp; - - va = trunc_page(eva); - if (usermode == FALSE) { - /* - * This is a fault on kernel virtual memory. - */ - map = &kernel_map; - } else { - /* - * This is a fault on non-kernel virtual memory. - * vm is initialized above to NULL. If curproc is NULL - * or curproc->p_vmspace is NULL the fault is fatal. - */ - if (lp != NULL) - vm = lp->lwp_vmspace; - - if (vm == NULL) - goto nogo; - - map = &vm->vm_map; - } - - if (frame->tf_xflags & PGEX_W) - ftype = VM_PROT_READ | VM_PROT_WRITE; - else - ftype = VM_PROT_READ; - - if (map != &kernel_map) { - /* - * Keep swapout from messing with us during this - * critical time. - */ - PHOLD(lp->lwp_proc); - - /* - * Issue fault - */ - fault_flags = 0; - if (usermode) - fault_flags |= VM_FAULT_BURST; - if (ftype & VM_PROT_WRITE) - fault_flags |= VM_FAULT_DIRTY; - else - fault_flags |= VM_FAULT_NORMAL; - rv = vm_fault(map, va, ftype, fault_flags); - PRELE(lp->lwp_proc); - } else { - /* - * Don't have to worry about process locking or stacks in the kernel. - */ - rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL); - } - if (rv == KERN_SUCCESS) - return (0); -nogo: - if (!usermode) { - if (td->td_gd->gd_intr_nesting_level == 0 && - td->td_pcb->pcb_onfault) { - frame->tf_eip = (register_t)td->td_pcb->pcb_onfault; - return (0); - } - trap_fatal(frame, usermode, eva); - return (-1); - } - return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV); -} - -static void -trap_fatal(struct trapframe *frame, int usermode, vm_offset_t eva) -{ - int code, type, ss, esp; - - code = frame->tf_xflags; - type = frame->tf_trapno; - - if (type <= MAX_TRAP_MSG) { - kprintf("\n\nFatal trap %d: %s while in %s mode\n", - type, trap_msg[type], - (usermode ? "user" : "kernel")); - } - /* two separate prints in case of a trap on an unmapped page */ - kprintf("cpuid = %d\n", mycpu->gd_cpuid); - if (type == T_PAGEFLT) { - kprintf("fault virtual address = %p\n", (void *)eva); - kprintf("fault code = %s %s, %s\n", - usermode ? "user" : "supervisor", - code & PGEX_W ? "write" : "read", - code & PGEX_P ? "protection violation" : "page not present"); - } - kprintf("instruction pointer = 0x%x:0x%x\n", - frame->tf_cs & 0xffff, frame->tf_eip); - if (usermode) { - ss = frame->tf_ss & 0xffff; - esp = frame->tf_esp; - } else { - ss = GSEL(GDATA_SEL, SEL_KPL); - esp = (int)&frame->tf_esp; - } - kprintf("stack pointer = 0x%x:0x%x\n", ss, esp); - kprintf("frame pointer = 0x%x:0x%x\n", ss, frame->tf_ebp); - kprintf("processor eflags = "); - if (frame->tf_eflags & PSL_T) - kprintf("trace trap, "); - if (frame->tf_eflags & PSL_I) - kprintf("interrupt enabled, "); - if (frame->tf_eflags & PSL_NT) - kprintf("nested task, "); - if (frame->tf_eflags & PSL_RF) - kprintf("resume, "); -#if 0 - if (frame->tf_eflags & PSL_VM) - kprintf("vm86, "); -#endif - kprintf("IOPL = %d\n", (frame->tf_eflags & PSL_IOPL) >> 12); - kprintf("current process = "); - if (curproc) { - kprintf("%lu (%s)\n", - (u_long)curproc->p_pid, curproc->p_comm ? - curproc->p_comm : ""); - } else { - kprintf("Idle\n"); - } - kprintf("current thread = pri %d ", curthread->td_pri); - if (curthread->td_critcount) - kprintf("(CRIT)"); - kprintf("\n"); -/** - * XXX FIXME: - * we probably SHOULD have stopped the other CPUs before now! - * another CPU COULD have been touching cpl at this moment... - */ - kprintf(" <- SMP: XXX"); - kprintf("\n"); - -#ifdef KDB - if (kdb_trap(&psl)) - return; -#endif -#ifdef DDB - if ((debugger_on_panic || db_active) && kdb_trap(type, code, frame)) - return; -#endif - kprintf("trap number = %d\n", type); - if (type <= MAX_TRAP_MSG) - panic("%s", trap_msg[type]); - else - panic("unknown/reserved trap"); -} - -/* - * Double fault handler. Called when a fault occurs while writing - * a frame for a trap/exception onto the stack. This usually occurs - * when the stack overflows (such is the case with infinite recursion, - * for example). - * - * XXX Note that the current PTD gets replaced by IdlePTD when the - * task switch occurs. This means that the stack that was active at - * the time of the double fault is not available at unless - * the machine was idle when the double fault occurred. The downside - * of this is that "trace " in ddb won't work. - */ -void -dblfault_handler(void) -{ - struct mdglobaldata *gd = mdcpu; - - kprintf("\nFatal double fault:\n"); - kprintf("eip = 0x%x\n", gd->gd_common_tss.tss_eip); - kprintf("esp = 0x%x\n", gd->gd_common_tss.tss_esp); - kprintf("ebp = 0x%x\n", gd->gd_common_tss.tss_ebp); - /* two separate prints in case of a trap on an unmapped page */ - kprintf("cpuid = %d\n", mycpu->gd_cpuid); - panic("double fault"); -} - -/* - * syscall2 - MP aware system call request C handler - * - * A system call is essentially treated as a trap except that the - * MP lock is not held on entry or return. We are responsible for - * obtaining the MP lock if necessary and for handling ASTs - * (e.g. a task switch) prior to return. - * - * MPSAFE - */ -void -syscall2(struct trapframe *frame) -{ - struct thread *td = curthread; - struct proc *p = td->td_proc; - struct lwp *lp = td->td_lwp; - caddr_t params; - struct sysent *callp; - register_t orig_tf_eflags; - int sticks; - int error; - int narg; -#ifdef INVARIANTS - int crit_count = td->td_critcount; -#endif - int have_mplock = 0; - u_int code; - union sysunion args; - - KTR_LOG(kernentry_syscall, lp->lwp_proc->p_pid, lp->lwp_tid, - frame->tf_eax); - - userenter(td, p); /* lazy raise our priority */ - - /* - * Misc - */ - sticks = (int)td->td_sticks; - orig_tf_eflags = frame->tf_eflags; - - /* - * Virtual kernel intercept - if a VM context managed by a virtual - * kernel issues a system call the virtual kernel handles it, not us. - * Restore the virtual kernel context and return from its system - * call. The current frame is copied out to the virtual kernel. - */ - if (lp->lwp_vkernel && lp->lwp_vkernel->ve) { - vkernel_trap(lp, frame); - error = EJUSTRETURN; - goto out; - } - - /* - * Get the system call parameters and account for time - */ - lp->lwp_md.md_regs = frame; - params = (caddr_t)frame->tf_esp + sizeof(int); - code = frame->tf_eax; - - if (p->p_sysent->sv_prepsyscall) { - (*p->p_sysent->sv_prepsyscall)( - frame, (int *)(&args.nosys.sysmsg + 1), - &code, ¶ms); - } else { - /* - * Need to check if this is a 32 bit or 64 bit syscall. - * fuword is MP aware. - */ - if (code == SYS_syscall) { - /* - * Code is first argument, followed by actual args. - */ - code = fuword(params); - params += sizeof(int); - } else if (code == SYS___syscall) { - /* - * Like syscall, but code is a quad, so as to maintain - * quad alignment for the rest of the arguments. - */ - code = fuword(params); - params += sizeof(quad_t); - } - } - - code &= p->p_sysent->sv_mask; - if (code >= p->p_sysent->sv_size) - callp = &p->p_sysent->sv_table[0]; - else - callp = &p->p_sysent->sv_table[code]; - - narg = callp->sy_narg & SYF_ARGMASK; - - /* - * copyin is MP aware, but the tracing code is not - */ - if (narg && params) { - error = copyin(params, (caddr_t)(&args.nosys.sysmsg + 1), - narg * sizeof(register_t)); - if (error) { -#ifdef KTRACE - if (KTRPOINT(td, KTR_SYSCALL)) { - MAKEMPSAFE(have_mplock); - - ktrsyscall(lp, code, narg, - (void *)(&args.nosys.sysmsg + 1)); - } -#endif - goto bad; - } - } - -#ifdef KTRACE - if (KTRPOINT(td, KTR_SYSCALL)) { - MAKEMPSAFE(have_mplock); - ktrsyscall(lp, code, narg, (void *)(&args.nosys.sysmsg + 1)); - } -#endif - - /* - * For traditional syscall code edx is left untouched when 32 bit - * results are returned. Since edx is loaded from fds[1] when the - * system call returns we pre-set it here. - */ - args.sysmsg_fds[0] = 0; - args.sysmsg_fds[1] = frame->tf_edx; - - /* - * The syscall might manipulate the trap frame. If it does it - * will probably return EJUSTRETURN. - */ - args.sysmsg_frame = frame; - - STOPEVENT(p, S_SCE, narg); /* MP aware */ - - /* - * NOTE: All system calls run MPSAFE now. The system call itself - * is responsible for getting the MP lock. - */ - error = (*callp->sy_call)(&args); - -#if 0 - kprintf("system call %d returned %d\n", code, error); -#endif - -out: - /* - * MP SAFE (we may or may not have the MP lock at this point) - */ - switch (error) { - case 0: - /* - * Reinitialize proc pointer `p' as it may be different - * if this is a child returning from fork syscall. - */ - p = curproc; - lp = curthread->td_lwp; - frame->tf_eax = args.sysmsg_fds[0]; - frame->tf_edx = args.sysmsg_fds[1]; - frame->tf_eflags &= ~PSL_C; - break; - case ERESTART: - /* - * Reconstruct pc, assuming lcall $X,y is 7 bytes, - * int 0x80 is 2 bytes. We saved this in tf_err. - */ - frame->tf_eip -= frame->tf_err; - break; - case EJUSTRETURN: - break; - case EASYNC: - panic("Unexpected EASYNC return value (for now)"); - default: -bad: - if (p->p_sysent->sv_errsize) { - if (error >= p->p_sysent->sv_errsize) - error = -1; /* XXX */ - else - error = p->p_sysent->sv_errtbl[error]; - } - frame->tf_eax = error; - frame->tf_eflags |= PSL_C; - break; - } - - /* - * Traced syscall. trapsignal() is not MP aware. - */ - if ((orig_tf_eflags & PSL_T) /*&& !(orig_tf_eflags & PSL_VM)*/) { - MAKEMPSAFE(have_mplock); - frame->tf_eflags &= ~PSL_T; - trapsignal(lp, SIGTRAP, TRAP_TRACE); - } - - /* - * Handle reschedule and other end-of-syscall issues - */ - userret(lp, frame, sticks); - -#ifdef KTRACE - if (KTRPOINT(td, KTR_SYSRET)) { - MAKEMPSAFE(have_mplock); - ktrsysret(lp, code, error, args.sysmsg_result); - } -#endif - - /* - * This works because errno is findable through the - * register set. If we ever support an emulation where this - * is not the case, this code will need to be revisited. - */ - STOPEVENT(p, S_SCX, code); - - userexit(lp); - /* - * Release the MP lock if we had to get it - */ - if (have_mplock) - rel_mplock(); - KTR_LOG(kernentry_syscall_ret, lp->lwp_proc->p_pid, lp->lwp_tid, error); -#ifdef INVARIANTS - KASSERT(crit_count == td->td_critcount, - ("syscall: critical section count mismatch! %d/%d", - crit_count, td->td_pri)); - KASSERT(&td->td_toks_base == td->td_toks_stop, - ("syscall: extra tokens held after trap! %zd", - td->td_toks_stop - &td->td_toks_base)); -#endif -} - -/* - * NOTE: mplock not held at any point - */ -void -fork_return(struct lwp *lp, struct trapframe *frame) -{ - frame->tf_eax = 0; /* Child returns zero */ - frame->tf_eflags &= ~PSL_C; /* success */ - frame->tf_edx = 1; - - generic_lwp_return(lp, frame); - KTR_LOG(kernentry_fork_ret, lp->lwp_proc->p_pid, lp->lwp_tid); -} - -/* - * Simplified back end of syscall(), used when returning from fork() - * directly into user mode. - * - * This code will return back into the fork trampoline code which then - * runs doreti. - * - * NOTE: The mplock is not held at any point. - */ -void -generic_lwp_return(struct lwp *lp, struct trapframe *frame) -{ - struct proc *p = lp->lwp_proc; - - /* - * Newly forked processes are given a kernel priority. We have to - * adjust the priority to a normal user priority and fake entry - * into the kernel (call userenter()) to install a passive release - * function just in case userret() decides to stop the process. This - * can occur when ^Z races a fork. If we do not install the passive - * release function the current process designation will not be - * released when the thread goes to sleep. - */ - lwkt_setpri_self(TDPRI_USER_NORM); - userenter(lp->lwp_thread, p); - userret(lp, frame, 0); -#ifdef KTRACE - if (KTRPOINT(lp->lwp_thread, KTR_SYSRET)) - ktrsysret(lp, SYS_fork, 0, 0); -#endif - lp->lwp_flags |= LWP_PASSIVE_ACQ; - userexit(lp); - lp->lwp_flags &= ~LWP_PASSIVE_ACQ; -} - -/* - * doreti has turned into this. The frame is directly on the stack. We - * pull everything else we need (fpu and tls context) from the current - * thread. - * - * Note on fpu interactions: In a virtual kernel, the fpu context for - * an emulated user mode process is not shared with the virtual kernel's - * fpu context, so we only have to 'stack' fpu contexts within the virtual - * kernel itself, and not even then since the signal() contexts that we care - * about save and restore the FPU state (I think anyhow). - * - * vmspace_ctl() returns an error only if it had problems instaling the - * context we supplied or problems copying data to/from our VM space. - */ -void -go_user(struct intrframe *frame) -{ - struct trapframe *tf = (void *)&frame->if_gs; - int r; - - /* - * Interrupts may be disabled on entry, make sure all signals - * can be received before beginning our loop. - */ - sigsetmask(0); - - /* - * Switch to the current simulated user process, then call - * user_trap() when we break out of it (usually due to a signal). - */ - for (;;) { - /* - * Tell the real kernel whether it is ok to use the FP - * unit or not. - * - * The critical section is required to prevent an interrupt - * from causing a preemptive task switch and changing - * the FP state. - */ - crit_enter(); - if (mdcpu->gd_npxthread == curthread) { - tf->tf_xflags &= ~PGEX_FPFAULT; - } else { - tf->tf_xflags |= PGEX_FPFAULT; - } - - /* - * Run emulated user process context. This call interlocks - * with new mailbox signals. - * - * Set PGEX_U unconditionally, indicating a user frame (the - * bit is normally set only by T_PAGEFLT). - */ - r = vmspace_ctl(&curproc->p_vmspace->vm_pmap, VMSPACE_CTL_RUN, - tf, &curthread->td_savevext); - crit_exit(); - frame->if_xflags |= PGEX_U; -#if 0 - kprintf("GO USER %d trap %d EVA %08x EIP %08x ESP %08x XFLAGS %02x/%02x\n", - r, tf->tf_trapno, tf->tf_err, tf->tf_eip, tf->tf_esp, - tf->tf_xflags, frame->if_xflags); -#endif - if (r < 0) { - if (errno != EINTR) - panic("vmspace_ctl failed error %d", errno); - } else { - if (tf->tf_trapno) { - user_trap(tf); - } - } - if (mycpu->gd_reqflags & RQF_AST_MASK) { - tf->tf_trapno = T_ASTFLT; - user_trap(tf); - } - tf->tf_trapno = 0; - } -} - -/* - * If PGEX_FPFAULT is set then set FP_VIRTFP in the PCB to force a T_DNA - * fault (which is then passed back to the virtual kernel) if an attempt is - * made to use the FP unit. - * - * XXX this is a fairly big hack. - */ -void -set_vkernel_fp(struct trapframe *frame) -{ - struct thread *td = curthread; - - if (frame->tf_xflags & PGEX_FPFAULT) { - td->td_pcb->pcb_flags |= FP_VIRTFP; - if (mdcpu->gd_npxthread == td) - npxexit(); - } else { - td->td_pcb->pcb_flags &= ~FP_VIRTFP; - } -} - -/* - * Called from vkernel_trap() to fixup the vkernel's syscall - * frame for vmspace_ctl() return. - */ -void -cpu_vkernel_trap(struct trapframe *frame, int error) -{ - frame->tf_eax = error; - if (error) - frame->tf_eflags |= PSL_C; - else - frame->tf_eflags &= ~PSL_C; -} diff --git a/sys/platform/vkernel/i386/userldt.c b/sys/platform/vkernel/i386/userldt.c deleted file mode 100644 index d4f7f9df3f..0000000000 --- a/sys/platform/vkernel/i386/userldt.c +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (c) 2006 The DragonFly Project. All rights reserved. - * - * This code is derived from software contributed to The DragonFly Project - * by Matthew Dillon - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * 3. Neither the name of The DragonFly Project nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific, prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $DragonFly: src/sys/platform/vkernel/i386/userldt.c,v 1.2 2007/01/07 08:37:35 dillon Exp $ - */ - -#include -#include -#include -#include -#include - -void -set_user_ldt (struct pcb *pcb) -{ - panic("set_user_ldt"); -} - -struct pcb_ldt * -user_ldt_alloc (struct pcb *pcb, int len) -{ - panic("user_ldt_alloc"); -} - -void -user_ldt_free (struct pcb *pcb) -{ - if (pcb->pcb_ldt) - panic("user_ldt_free"); -} - diff --git a/sys/platform/vkernel/i386/vm_machdep.c b/sys/platform/vkernel/i386/vm_machdep.c deleted file mode 100644 index 31776cdd58..0000000000 --- a/sys/platform/vkernel/i386/vm_machdep.c +++ /dev/null @@ -1,386 +0,0 @@ -/*- - * Copyright (c) 1982, 1986 The Regents of the University of California. - * Copyright (c) 1989, 1990 William Jolitz - * Copyright (c) 1994 John Dyson - * All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * the Systems Programming Group of the University of Utah Computer - * Science Department, and William Jolitz. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91 - * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$ - * $FreeBSD: src/sys/i386/i386/vm_machdep.c,v 1.132.2.9 2003/01/25 19:02:23 dillon Exp $ - */ - -#include "use_npx.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include /* npxthread */ - -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include - -#include - -#include -#include - -char machine[] = MACHINE; -SYSCTL_STRING(_hw, HW_MACHINE, machine, CTLFLAG_RD, - machine, 0, "Machine class"); - -char cpu_vendor[] = "DragonFly"; /* XXX */ -u_int cpu_vendor_id = 0; /* XXX */ -u_int cpu_id = 0x80000000; /* XXX */ - -/* - * Finish a fork operation, with lwp lp2 nearly set up. - * Copy and update the pcb, set up the stack so that the child - * ready to run and return to user mode. - */ -void -cpu_fork(struct lwp *lp1, struct lwp *lp2, int flags) -{ - struct pcb *pcb2; - - if ((flags & RFPROC) == 0) { - if ((flags & RFMEM) == 0) { - /* unshare user LDT */ - struct pcb *pcb1 = lp1->lwp_thread->td_pcb; - struct pcb_ldt *pcb_ldt = pcb1->pcb_ldt; - if (pcb_ldt && pcb_ldt->ldt_refcnt > 1) { - pcb_ldt = user_ldt_alloc(pcb1,pcb_ldt->ldt_len); - user_ldt_free(pcb1); - pcb1->pcb_ldt = pcb_ldt; - set_user_ldt(pcb1); - } - } - return; - } - -#if NNPX > 0 - /* Ensure that lp1's pcb is up to date. */ - if (mdcpu->gd_npxthread == lp1->lwp_thread) - npxsave(lp1->lwp_thread->td_savefpu); -#endif - - /* - * Copy lp1's PCB. This really only applies to the - * debug registers and FP state, but its faster to just copy the - * whole thing. Because we only save the PCB at switchout time, - * the register state may not be current. - */ - pcb2 = lp2->lwp_thread->td_pcb; - *pcb2 = *lp1->lwp_thread->td_pcb; - - /* - * Create a new fresh stack for the new process. - * Copy the trap frame for the return to user mode as if from a - * syscall. This copies the user mode register values. The - * 16 byte offset saves space for vm86, and must match - * common_tss.esp0 (kernel stack pointer on entry from user mode) - * - * pcb_esp must allocate an additional call-return pointer below - * the trap frame which will be restored by cpu_restore from - * PCB_EIP, and the thread's td_sp pointer must allocate an - * additonal two worsd below the pcb_esp call-return pointer to - * hold the LWKT restore function pointer and eflags. - * - * The LWKT restore function pointer must be set to cpu_restore, - * which is our standard heavy weight process switch-in function. - * YYY eventually we should shortcut fork_return and fork_trampoline - * to use the LWKT restore function directly so we can get rid of - * all the extra crap we are setting up. - */ - lp2->lwp_md.md_regs = (struct trapframe *)((char *)pcb2 - 16) - 1; - bcopy(lp1->lwp_md.md_regs, lp2->lwp_md.md_regs, sizeof(*lp2->lwp_md.md_regs)); - - /* - * Set registers for trampoline to user mode. Leave space for the - * return address on stack. These are the kernel mode register values. - */ - pcb2->pcb_unused01 = 0; - pcb2->pcb_edi = 0; - pcb2->pcb_esi = (int)fork_return; /* fork_trampoline argument */ - pcb2->pcb_ebp = 0; - pcb2->pcb_esp = (int)lp2->lwp_md.md_regs - sizeof(void *); - pcb2->pcb_ebx = (int)lp2; /* fork_trampoline argument */ - pcb2->pcb_eip = (int)fork_trampoline; - lp2->lwp_thread->td_sp = (char *)(pcb2->pcb_esp - sizeof(void *)); - *(u_int32_t *)lp2->lwp_thread->td_sp = PSL_USER; - lp2->lwp_thread->td_sp -= sizeof(void *); - *(void **)lp2->lwp_thread->td_sp = (void *)cpu_heavy_restore; - - /* - * pcb2->pcb_ldt: duplicated below, if necessary. - * pcb2->pcb_savefpu: cloned above. - * pcb2->pcb_flags: cloned above (always 0 here?). - * pcb2->pcb_onfault: cloned above (always NULL here?). - */ - - /* - * XXX don't copy the i/o pages. this should probably be fixed. - */ - pcb2->pcb_ext = NULL; - - /* Copy the LDT, if necessary. */ - if (pcb2->pcb_ldt != NULL) { - if (flags & RFMEM) { - pcb2->pcb_ldt->ldt_refcnt++; - } else { - pcb2->pcb_ldt = user_ldt_alloc(pcb2, - pcb2->pcb_ldt->ldt_len); - } - } - bcopy(&lp1->lwp_thread->td_tls, &lp2->lwp_thread->td_tls, - sizeof(lp2->lwp_thread->td_tls)); - /* - * Now, cpu_switch() can schedule the new process. - * pcb_esp is loaded pointing to the cpu_switch() stack frame - * containing the return address when exiting cpu_switch. - * This will normally be to fork_trampoline(), which will have - * %ebx loaded with the new proc's pointer. fork_trampoline() - * will set up a stack to call fork_return(p, frame); to complete - * the return to user-mode. - */ -} - -/* - * Prepare new lwp to return to the address specified in params. - */ -int -cpu_prepare_lwp(struct lwp *lp, struct lwp_params *params) -{ - struct trapframe *regs = lp->lwp_md.md_regs; - void *bad_return = NULL; - int error; - - regs->tf_eip = (int)params->func; - regs->tf_esp = (int)params->stack; - /* Set up argument for function call */ - regs->tf_esp -= sizeof(params->arg); - error = copyout(¶ms->arg, (void *)regs->tf_esp, - sizeof(params->arg)); - if (error) - return (error); - /* - * Set up fake return address. As the lwp function may never return, - * we simply copy out a NULL pointer and force the lwp to receive - * a SIGSEGV if it returns anyways. - */ - regs->tf_esp -= sizeof(void *); - error = copyout(&bad_return, (void *)regs->tf_esp, - sizeof(bad_return)); - if (error) - return (error); - - cpu_set_fork_handler(lp, - (void (*)(void *, struct trapframe *))generic_lwp_return, lp); - return (0); -} - -/* - * Intercept the return address from a freshly forked process that has NOT - * been scheduled yet. - * - * This is needed to make kernel threads stay in kernel mode. - */ -void -cpu_set_fork_handler(struct lwp *lp, void (*func)(void *, struct trapframe *), - void *arg) -{ - /* - * Note that the trap frame follows the args, so the function - * is really called like this: func(arg, frame); - */ - lp->lwp_thread->td_pcb->pcb_esi = (int) func; /* function */ - lp->lwp_thread->td_pcb->pcb_ebx = (int) arg; /* first arg */ -} - -void -cpu_set_thread_handler(thread_t td, void (*rfunc)(void), void *func, void *arg) -{ - td->td_pcb->pcb_esi = (int)func; - td->td_pcb->pcb_ebx = (int) arg; - td->td_switch = cpu_lwkt_switch; - td->td_sp -= sizeof(void *); - *(void **)td->td_sp = rfunc; /* exit function on return */ - td->td_sp -= sizeof(void *); - *(void **)td->td_sp = cpu_kthread_restore; -} - -void -cpu_lwp_exit(void) -{ - struct thread *td = curthread; - struct pcb *pcb; - struct pcb_ext *ext; - - /* - * If we were using a private TSS do a forced-switch to ourselves - * to switch back to the common TSS before freeing it. - */ - pcb = td->td_pcb; - if ((ext = pcb->pcb_ext) != NULL) { - crit_enter(); - pcb->pcb_ext = NULL; - lwkt_switch_return(td->td_switch(td)); - crit_exit(); - kmem_free(&kernel_map, (vm_offset_t)ext, ctob(IOPAGES + 1)); - } - user_ldt_free(pcb); - if (pcb->pcb_flags & PCB_DBREGS) { - /* - * disable all hardware breakpoints - */ - reset_dbregs(); - pcb->pcb_flags &= ~PCB_DBREGS; - } - td->td_gd->gd_cnt.v_swtch++; - - crit_enter_quick(td); - if (td->td_flags & TDF_TSLEEPQ) - tsleep_remove(td); - lwkt_deschedule_self(td); - lwkt_remove_tdallq(td); - cpu_thread_exit(); -} - -/* - * Terminate the current thread. The caller must have already acquired - * the thread's rwlock and placed it on a reap list or otherwise notified - * a reaper of its existance. We set a special assembly switch function which - * releases td_rwlock after it has cleaned up the MMU state and switched - * out the stack. - * - * Must be caller from a critical section and with the thread descheduled. - */ -void -cpu_thread_exit(void) -{ -#if NNPX > 0 - npxexit(); -#endif - curthread->td_switch = cpu_exit_switch; - curthread->td_flags |= TDF_EXITING; - lwkt_switch(); - panic("cpu_exit"); -} - -#ifdef notyet -static void -setredzone(u_short *pte, caddr_t vaddr) -{ -/* eventually do this by setting up an expand-down stack segment - for ss0: selector, allowing stack access down to top of u. - this means though that protection violations need to be handled - thru a double fault exception that must do an integral task - switch to a known good context, within which a dump can be - taken. a sensible scheme might be to save the initial context - used by sched (that has physical memory mapped 1:1 at bottom) - and take the dump while still in mapped mode */ -} -#endif - -/* - * Convert kernel VA to physical address - */ -vm_paddr_t -kvtop(void *addr) -{ - vm_paddr_t pa; - - pa = pmap_kextract((vm_offset_t)addr); - if (pa == 0) - panic("kvtop: zero page frame"); - return (pa); -} - -SYSCTL_DECL(_vm_stats_misc); - -/* - * Used by /dev/kmem to determine if we can safely read or write - * the requested KVA range. Some portions of kernel memory are - * not governed by our virtual page table. - */ -extern int32_t _end; -extern void _start(void); - -int -kvm_access_check(vm_offset_t saddr, vm_offset_t eaddr, int prot) -{ - vm_offset_t addr; - - if (saddr >= trunc_page((vm_offset_t)&_start) && eaddr <= round_page((vm_offset_t)&_end)) - return 0; - if (saddr < KvaStart) - return EFAULT; - if (eaddr >= KvaEnd) - return EFAULT; - for (addr = saddr; addr < eaddr; addr += PAGE_SIZE) { - if (pmap_extract(&kernel_pmap, addr) == 0) - return EFAULT; - } - if (!kernacc((caddr_t)saddr, eaddr - saddr, prot)) - return EFAULT; - return 0; -} - diff --git a/sys/platform/vkernel/include/clock.h b/sys/platform/vkernel/include/clock.h deleted file mode 100644 index c145eefabf..0000000000 --- a/sys/platform/vkernel/include/clock.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Kernel interface to machine-dependent clock driver. - * Garrett Wollman, September 1994. - * This file is in the public domain. - * - * $FreeBSD: src/sys/i386/include/clock.h,v 1.38.2.1 2002/11/02 04:41:50 iwasaki Exp $ - */ - -#ifndef _MACHINE_CLOCK_H_ -#define _MACHINE_CLOCK_H_ - -#ifdef _KERNEL - -#ifndef _SYS_TYPES_H_ -#include -#endif - -/* - * i386 to clock driver interface. - * XXX large parts of the driver and its interface are misplaced. - */ -extern int adjkerntz; -extern int disable_rtc_set; -extern u_int timer_freq; -extern int timer0_max_count; -extern int tsc_present; -extern int tsc_invariant; -extern int tsc_mpsync; -extern int64_t tsc_frequency; -extern int tsc_is_broken; -extern int wall_cmos_clock; - -/* - * Driver to clock driver interface. - */ - -int rtcin (int val); -int acquire_timer2 (int mode); -int release_timer2 (void); -int sysbeep (int pitch, int period); -void timer_restore (void); - -#endif /* _KERNEL */ - -#endif /* !_MACHINE_CLOCK_H_ */ diff --git a/sys/platform/vkernel/include/cothread.h b/sys/platform/vkernel/include/cothread.h deleted file mode 100644 index 7fd72a2dc7..0000000000 --- a/sys/platform/vkernel/include/cothread.h +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright (c) 2008 The DragonFly Project. All rights reserved. - * - * This code is derived from software contributed to The DragonFly Project - * by Matthew Dillon - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * 3. Neither the name of The DragonFly Project nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific, prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $DragonFly: src/sys/platform/vkernel/include/cothread.h,v 1.2 2008/03/27 04:28:07 dillon Exp $ - */ - -#ifndef _MACHINE_COTHREAD_H_ -#define _MACHINE_COTHREAD_H_ - -#include - -struct cothread { - pthread_t pthr; - pthread_t pintr; - void *arg; - void (*thr_func)(struct cothread *); - void (*thr_intr)(struct cothread *); - void *intr_id; - pthread_mutex_t mutex; - pthread_cond_t cond; -}; - -typedef struct cothread *cothread_t; - -cothread_t cothread_create(void (*thr_func)(cothread_t cotd), - void (*thr_intr)(cothread_t cotd), - void *arg, const char *name); -void cothread_delete(cothread_t *cotdp); -void cothread_intr(cothread_t cotd); -void cothread_signal(cothread_t cotd); -void cothread_wait(cothread_t cotd); -void cothread_lock(cothread_t cotd, int is_cotd); -void cothread_unlock(cothread_t cotd, int is_cotd); - -#endif diff --git a/sys/platform/vkernel/include/cpu.h b/sys/platform/vkernel/include/cpu.h deleted file mode 100644 index cd0ea74687..0000000000 --- a/sys/platform/vkernel/include/cpu.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2006 The DragonFly Project. All rights reserved. - * - * This code is derived from software contributed to The DragonFly Project - * by Matthew Dillon - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * 3. Neither the name of The DragonFly Project nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific, prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $DragonFly: src/sys/platform/vkernel/include/cpu.h,v 1.3 2008/05/19 10:29:58 corecode Exp $ - */ - -#ifndef _MACHINE_CPU_H_ -#define _MACHINE_CPU_H_ - -#include - -#define CLKF_USERMODE(framep) ((framep)->if_xflags & PGEX_U) - -/* globals used in vkernel CPU to kernel CPU locking */ -#if defined(_KERNEL) - -extern int lwp_cpu_lock; -extern int next_cpu; -extern int real_ncpus; -void setrealcpu(void); -void identcpu(void); - -#endif - -#if defined(_KERNEL) || defined(_KERNEL_STRUCTURES) - -#define LCL_NONE 0 -#define LCL_PER_CPU 1 -#define LCL_SINGLE_CPU 2 - -#endif - -#endif - diff --git a/sys/platform/vkernel/include/cpufunc.h b/sys/platform/vkernel/include/cpufunc.h deleted file mode 100644 index 5ed7b0045b..0000000000 --- a/sys/platform/vkernel/include/cpufunc.h +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright (c) 2006 The DragonFly Project. All rights reserved. - * - * This code is derived from software contributed to The DragonFly Project - * by Matthew Dillon - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * 3. Neither the name of The DragonFly Project nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific, prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $DragonFly: src/sys/platform/vkernel/include/cpufunc.h,v 1.2 2007/02/18 14:28:18 corecode Exp $ - */ -#ifndef _MACHINE_CPUFUNC_H_ -#define _MACHINE_CPUFUNC_H_ - -#ifdef _KERNEL - -/* - * First declare our overriding functions. We have to do this to prevent - * cpu/cpufunc.h to define inline assembler versions. However, we need - * cpu/cpufunc.h to define other functions like ``ffs'', which will otherwise - * be defined by libkern (via sys/systm.h). This is why the order needs to be: - * - * 1. Declare our overrides - * 2. include cpu/cpufunc.h - * 3. include the remaining needed headers for our overrides - */ - -#define _CPU_ENABLE_INTR_DEFINED -#define _CPU_DISABLE_INTR_DEFINED -#define _CPU_INVLPG_DEFINED -#define _CPU_INVLTLB_DEFINED - -void cpu_disable_intr(void); -void cpu_enable_intr(void); -void cpu_invlpg(void *addr); -void cpu_invltlb(void); - -#endif - -#include - -#ifdef _KERNEL - -#include -#include -#include -#include - -#include -#include - -#endif /* _KERNEL */ - -#endif /* !_MACHINE_CPUFUNC_H_ */ - diff --git a/sys/platform/vkernel/include/globaldata.h b/sys/platform/vkernel/include/globaldata.h deleted file mode 100644 index 9cd8289137..0000000000 --- a/sys/platform/vkernel/include/globaldata.h +++ /dev/null @@ -1,161 +0,0 @@ -/*- - * Copyright (c) Peter Wemm - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * Only machine-dependant code should ever include this file. MI - * code and header files do NOT include this file. e.g. sys/globaldata.h - * should not include this file. - * - * $FreeBSD: src/sys/i386/include/globaldata.h,v 1.11.2.1 2000/05/16 06:58:10 dillon Exp $ - */ - -#ifndef _MACHINE_GLOBALDATA_H_ -#define _MACHINE_GLOBALDATA_H_ - -#if defined(_KERNEL) || defined(_KERNEL_STRUCTURES) - -#ifndef _SYS_GLOBALDATA_H_ -#include /* struct globaldata */ -#endif -#ifndef _SYS_THREAD_H_ -#include /* struct thread */ -#endif -#ifndef _SYS_VKERNEL_H_ -#include /* vpte_t */ -#endif -#include /* struct segment_descriptor */ -#include /* struct i386tss */ -#include - -/* - * Note on interrupt control. Pending interrupts not yet dispatched are - * marked in gd_fpending, gd_ipending, or gd_spending. Once dispatched - * the interrupt's pending bit is cleared and the interrupt is masked. - * Upon completion the interrupt is unmasked. - * - * For edge triggered interrupts interrupts may be enabled again at this - * point and if they occur before the interrupt service routine is complete - * the service routine will loop. - * - * The current thread's cpl is stored in the thread structure. - * - * Note: the embedded globaldata and/or the mdglobaldata structure - * may exceed the size of a page. - */ -struct mdglobaldata { - struct globaldata mi; - struct segment_descriptor gd_common_tssd; - struct segment_descriptor *gd_tss_gdt; - struct thread *gd_npxthread; - struct i386tss gd_common_tss; - union savefpu gd_savefpu; /* fast bcopy/zero temp fpu save area */ - int gd_fpu_lock; /* fast bcopy/zero cpu lock */ - int gd_fpending; /* fast interrupt pending */ - int gd_ipending; /* normal interrupt pending */ - int gd_spending; /* software interrupt pending */ - int gd_sdelayed; /* delayed software ints */ - int gd_currentldt; - int unused001; - int unused002; - u_int unused003; - u_int unused004; - u_int gd_ss_eflags; - vpte_t *gd_CMAP1; /* pointer to pte for CADDR1 */ - vpte_t *gd_CMAP2; - vpte_t *gd_CMAP3; - vpte_t *gd_PMAP1; - - caddr_t gd_CADDR1; - caddr_t gd_CADDR2; - caddr_t gd_CADDR3; - vpte_t *gd_PADDR1; - - /* - * Page table mappings, see get_ptbase() - */ - vpte_t *gd_PT1map; /* points into privatedata */ - vpte_t *gd_PT1pdir; /* KVA of page directory */ - vpte_t *gd_PT1pde; /* pointer to pde */ - vpte_t *gd_PT2map; - vpte_t *gd_PT2pdir; - vpte_t *gd_PT2pde; - int gd_PTflip; - - vpte_t *gd_PT3map; /* used from preemptive interrupt */ - vpte_t *gd_PT3pdir; - vpte_t *gd_PT3pde; -}; - -#define MDGLOBALDATA_BASEALLOC_SIZE \ - ((sizeof(struct mdglobaldata) + PAGE_MASK) & ~PAGE_MASK) -#define MDGLOBALDATA_BASEALLOC_PAGES \ - (MDGLOBALDATA_BASEALLOC_SIZE / PAGE_SIZE) -#define MDGLOBALDATA_PAD \ - (MDGLOBALDATA_BASEALLOC_SIZE - sizeof(struct mdglobaldata)) - -/* - * This is the upper (0xff800000) address space layout that is per-cpu. - * It is setup in locore.s and pmap.c for the BSP and in mp_machdep.c for - * each AP. genassym helps export this to the assembler code. - * - * WARNING! This structure must be segment-aligned and portions within the - * structure must also be segment-aligned. The structure typically - * takes 3 segments per cpu (12MB). - */ -#define PRIVATESPACE_SEGPAD \ - (SEG_SIZE - \ - ((sizeof(struct mdglobaldata) + MDGLOBALDATA_PAD + PAGE_SIZE * 4 + \ - UPAGES * PAGE_SIZE) % SEG_SIZE)) \ - -struct privatespace { - /* main data page */ - struct mdglobaldata mdglobaldata; - char __filler0[MDGLOBALDATA_PAD]; - - /* mapping pages - CPAGE1,CPAGE2,CPAGE3,PPAGE1 */ - char CPAGE1[PAGE_SIZE]; - char CPAGE2[PAGE_SIZE]; - char CPAGE3[PAGE_SIZE]; - vpte_t PPAGE1[PAGE_SIZE / sizeof(vpte_t)]; - - /* idle stack (UPAGES pages) */ - char idlestack[UPAGES * PAGE_SIZE]; - - /* we must PAD to SEG_SIZE */ - char __filler1[PRIVATESPACE_SEGPAD]; - vpte_t PT1MAP[SEG_SIZE / sizeof(vpte_t)]; - vpte_t PT2MAP[SEG_SIZE / sizeof(vpte_t)]; - vpte_t PT3MAP[SEG_SIZE / sizeof(vpte_t)]; -}; -#define mdcpu ((struct mdglobaldata *)_get_mycpu()) - -#endif - -#ifdef _KERNEL - -extern struct privatespace *CPU_prvspace; - -#endif - -#endif diff --git a/sys/platform/vkernel/include/lock.h b/sys/platform/vkernel/include/lock.h deleted file mode 100644 index 20131d7e5c..0000000000 --- a/sys/platform/vkernel/include/lock.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (c) 2003-2006 The DragonFly Project. All rights reserved. - * - * This code is derived from software contributed to The DragonFly Project - * by Matthew Dillon - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * 3. Neither the name of The DragonFly Project nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific, prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD: src/sys/i386/include/lock.h,v 1.11.2.2 2000/09/30 02:49:34 ps Exp $ - * $DragonFly: src/sys/platform/vkernel/include/lock.h,v 1.2 2008/06/19 21:32:55 aggelos Exp $ - */ - -#ifndef _MACHINE_LOCK_H_ -#define _MACHINE_LOCK_H_ - -#ifndef _CPU_PSL_H_ -#include -#endif - -#ifndef LOCORE - -#endif /* LOCORE */ -#endif /* !_MACHINE_LOCK_H_ */ diff --git a/sys/platform/vkernel/include/md_var.h b/sys/platform/vkernel/include/md_var.h deleted file mode 100644 index 7eb7bbb4aa..0000000000 --- a/sys/platform/vkernel/include/md_var.h +++ /dev/null @@ -1,126 +0,0 @@ -/* - * Copyright (c) 2006 The DragonFly Project. All rights reserved. - * - * This code is derived from software contributed to The DragonFly Project - * by Matthew Dillon - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * 3. Neither the name of The DragonFly Project nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific, prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#ifndef _MACHINE_MD_VAR_H_ -#define _MACHINE_MD_VAR_H_ - -#ifndef _SYS_TYPES_H_ -#include -#endif -#ifndef _SYS_VKERNEL_H_ -#include -#endif -#ifndef _NET_ETHERNET_H_ -#include -#endif - -#define VKNETIF_MAX 16 -#define VKDISK_MAX 16 -#define SERNOLEN 30 - -struct vknetif_info { - int tap_fd; - int tap_unit; - in_addr_t netif_addr; - in_addr_t netif_mask; - u_char *enaddr; -}; - -struct vkdisk_info { - int fd; - int unit; - enum vkdisk_type { VKD_EMPTY, VKD_DISK, VKD_CD } type; - char fname[MAXPATHLEN]; - char *serno; -}; - -extern char sigcode[]; -extern int szsigcode; -extern vpte_t *KernelPTA; /* NOTE: Offset for direct VA translation */ -extern vpte_t *KernelPTD; -extern vm_offset_t crashdumpmap; -extern int cpu_fxsr; - -extern char cpu_vendor[]; /* XXX belongs in i386 */ -extern u_int cpu_vendor_id; /* XXX belongs in i386 */ -extern u_int cpu_id; /* XXX belongs in i386 */ - -extern struct vkdisk_info DiskInfo[VKDISK_MAX]; -extern int DiskNum; -extern int MemImageFd; -extern struct vknetif_info NetifInfo[VKNETIF_MAX]; -extern int NetifNum; -extern int _ucodesel, _udatasel; - -extern int via_feature_xcrypt; -extern int via_feature_rng; - -struct mdglobaldata; -struct __mcontext; - -vpte_t *pmap_kpte(vm_offset_t va); -void cpu_gdinit (struct mdglobaldata *gd, int cpu); - -void cpu_heavy_restore(void); /* cannot be called from C */ -void cpu_lwkt_restore(void); /* cannot be called from C */ -void cpu_idle_restore(void); /* cannot be called from C */ -void cpu_kthread_restore(void); /* cannot be called from C */ -thread_t cpu_exit_switch (struct thread *next); -void cpu_setregs (void); -void cpu_idle (void); -void cpu_mask_all_signals (void); -void cpu_unmask_all_signals (void); -void go_user (struct intrframe *frame); - -void init_exceptions(void); -void init_kqueue(void); -void init_fpu(int supports_sse); -void kern_trap(struct trapframe *); -void user_trap(struct trapframe *); -void syscall2 (struct trapframe *); -void vcons_set_mode(int); -int npxdna(struct trapframe *); -void npxpush(struct __mcontext *mctx); -void npxpop(struct __mcontext *mctx); - -void signalintr(int intr); - -struct kqueue_info; -struct kqueue_info *kqueue_add(int, void (*)(void *, struct intrframe *), void *); -void kqueue_del(struct kqueue_info *); -struct kqueue_info *kqueue_add_timer(void (*func)(void *, struct intrframe *), void *data); -void kqueue_reload_timer(struct kqueue_info *info, int ms); - - -#endif diff --git a/sys/platform/vkernel/include/param.h b/sys/platform/vkernel/include/param.h deleted file mode 100644 index 141d730db7..0000000000 --- a/sys/platform/vkernel/include/param.h +++ /dev/null @@ -1,33 +0,0 @@ -#ifndef _MACHINE_PARAM_H_ - -#ifndef _NO_NAMESPACE_POLLUTION -#define _MACHINE_PARAM_H_ -#endif - -#ifndef _MACHINE_PLATFORM -#define _MACHINE_PLATFORM vkernel -#endif - -#ifndef _NO_NAMESPACE_POLLUTION - -#ifndef MACHINE_PLATFORM -#define MACHINE_PLATFORM "vkernel" -#endif - -#endif - -/* - * Set the default HZ to the likely resolution of the kqueue timer - * the vkernel uses, otherwise our ticks will be seriously off and - * while date/time will be correct, sleep intervals will not. - */ -#ifdef _KERNEL -#ifndef HZ -#define HZ 20 -#endif -#endif - -#include - -#endif - diff --git a/sys/platform/vkernel/include/pcb.h b/sys/platform/vkernel/include/pcb.h deleted file mode 100644 index 48e64a56f4..0000000000 --- a/sys/platform/vkernel/include/pcb.h +++ /dev/null @@ -1,82 +0,0 @@ -/*- - * Copyright (c) 1990 The Regents of the University of California. - * All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * William Jolitz. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * from: @(#)pcb.h 5.10 (Berkeley) 5/12/91 - * $FreeBSD: src/sys/i386/include/pcb.h,v 1.32.2.1 2001/08/15 01:23:52 peter Exp $ - * $DragonFly: src/sys/platform/vkernel/include/pcb.h,v 1.3 2007/01/09 23:34:05 dillon Exp $ - */ - -#ifndef _MACHINE_PCB_H_ -#define _MACHINE_PCB_H_ - -/* - * Intel 386 process control block - */ -#include - -struct pcb { - int pcb_unused01; - int pcb_edi; - int pcb_esi; - int pcb_ebp; - int pcb_esp; - int pcb_ebx; - int pcb_eip; - - int pcb_dr0; - int pcb_dr1; - int pcb_dr2; - int pcb_dr3; - int pcb_dr6; - int pcb_dr7; - - struct pcb_ldt *pcb_ldt; /* per process (user) LDT */ - union savefpu pcb_save; - u_char pcb_flags; -#define FP_SOFTFP 0x01 /* process using software fltng pnt emulator */ -#define PCB_DBREGS 0x02 /* process using debug registers */ -#define FP_VIRTFP 0x04 /* virtual kernel wants exception */ - caddr_t pcb_onfault; /* copyin/out fault recovery */ - int pcb_unused; - struct pcb_ext *pcb_ext; /* optional pcb extension */ - u_long __pcb_spare[3]; /* adjust to avoid core dump size changes */ -}; - -#ifdef _KERNEL - -void savectx (struct pcb *); -#endif - -#endif /* _MACHINE_PCB_H_ */ diff --git a/sys/platform/vkernel/include/pcb_ext.h b/sys/platform/vkernel/include/pcb_ext.h deleted file mode 100644 index ea8a3c25d9..0000000000 --- a/sys/platform/vkernel/include/pcb_ext.h +++ /dev/null @@ -1,68 +0,0 @@ -/*- - * Copyright (c) 1997 Jonathan Lemon - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD: src/sys/i386/include/pcb_ext.h,v 1.4 1999/12/29 04:33:04 peter Exp $ - */ - -#ifndef _MACHINE_PCB_EXT_H_ -#define _MACHINE_PCB_EXT_H_ - -#ifndef _SYS_TYPES_H_ -#include -#endif - -/* - * Extension to the 386 process control block - */ -#include -#include -#include - -struct pcb_ext { - struct segment_descriptor ext_tssd; /* tss descriptor */ - struct i386tss ext_tss; /* per-process i386tss */ - caddr_t ext_iomap; /* i/o permission bitmap */ - struct vm86_kernel ext_vm86; /* vm86 area */ -}; - -struct pcb_ldt { - caddr_t ldt_base; - int ldt_len; - int ldt_refcnt; - u_long ldt_active; - struct segment_descriptor ldt_sd; -}; - -#ifdef _KERNEL - -struct pcb; - -void set_user_ldt (struct pcb *); -struct pcb_ldt *user_ldt_alloc (struct pcb *, int); -void user_ldt_free (struct pcb *); - -#endif - -#endif /* _MACHINE_PCB_EXT_H_ */ diff --git a/sys/platform/vkernel/include/pmap.h b/sys/platform/vkernel/include/pmap.h deleted file mode 100644 index 6ab7b905d1..0000000000 --- a/sys/platform/vkernel/include/pmap.h +++ /dev/null @@ -1,203 +0,0 @@ -/* - * Copyright (c) 1991 Regents of the University of California. - * All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * the Systems Programming Group of the University of Utah Computer - * Science Department and William Jolitz of UUNET Technologies Inc. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * Derived from hp300 version by Mike Hibler, this version by William - * Jolitz uses a recursive map [a pde points to the page directory] to - * map the page tables using the pagetables themselves. This is done to - * reduce the impact on kernel virtual memory for lots of sparse address - * space, and to reduce the cost of memory to each process. - * - * from: hp300: @(#)pmap.h 7.2 (Berkeley) 12/16/90 - * from: @(#)pmap.h 7.4 (Berkeley) 5/12/91 - * $FreeBSD: src/sys/i386/include/pmap.h,v 1.65.2.3 2001/10/03 07:15:37 peter Exp $ - */ - -#ifndef _MACHINE_PMAP_H_ -#define _MACHINE_PMAP_H_ - -#include - -/* - * Size of Kernel address space. This is the number of page table pages - * (4MB each) to use for the kernel. 256 pages == 1 Gigabyte. - * This **MUST** be a multiple of 4 (eg: 252, 256, 260, etc). - */ -#ifndef KVA_PAGES -#define KVA_PAGES 256 -#endif - -/* - * Pte related macros - */ -#define VADDR(pdi, pti) ((vm_offset_t)(((pdi)< -#endif -#ifndef _SYS_QUEUE_H_ -#include -#endif -#ifndef _SYS_SPINLOCK_H_ -#include -#endif -#ifndef _SYS_THREAD_H_ -#include -#endif -#ifndef _SYS_VKERNEL_H_ -#include -#endif -#ifndef _MACHINE_TYPES_H_ -#include -#endif -#ifndef _MACHINE_PARAM_H_ -#include -#endif - -#ifdef _KERNEL - -vm_paddr_t pmap_kextract(vm_offset_t); - -/* - * XXX - */ -#define vtophys(va) pmap_kextract(((vm_offset_t)(va))) -#define vtophys_pte(va) ((pt_entry_t)pmap_kextract(((vm_offset_t)(va)))) - -#endif - -/* - * Pmap stuff - */ -struct pv_entry; -struct vm_page; -struct vm_object; - -struct md_page { - int pv_list_count; - TAILQ_HEAD(,pv_entry) pv_list; -}; - -struct md_object { -}; - -/* - * Each machine dependent implementation is expected to - * keep certain statistics. They may do this anyway they - * so choose, but are expected to return the statistics - * in the following structure. - */ -struct pmap_statistics { - long resident_count; /* # of pages mapped (total) */ - long wired_count; /* # of pages wired */ -}; -typedef struct pmap_statistics *pmap_statistics_t; - -struct pmap { - vpte_t *pm_pdir; /* KVA of page directory */ - vpte_t pm_pdirpte; /* pte mapping phys page */ - struct vm_object *pm_pteobj; /* Container for pte's */ - cpumask_t pm_cpucachemask;/* Invalidate cpu mappings */ - TAILQ_ENTRY(pmap) pm_pmnode; /* list of pmaps */ - TAILQ_HEAD(,pv_entry) pm_pvlist; /* list of mappings in pmap */ - TAILQ_HEAD(,pv_entry) pm_pvlist_free; /* free mappings */ - int pm_count; /* reference count */ - cpulock_t pm_active_lock; /* interlock */ - cpumask_t pm_active; /* active on cpus */ - int pm_pdindex; /* page dir page in obj */ - struct pmap_statistics pm_stats; /* pmap statistics */ - struct vm_page *pm_ptphint; /* pmap ptp hint */ - int pm_generation; /* detect pvlist deletions */ - struct spinlock pm_spin; - struct lwkt_token pm_token; -}; - -#define pmap_resident_count(pmap) (pmap)->pm_stats.resident_count - -typedef struct pmap *pmap_t; - -#ifdef _KERNEL -extern struct pmap kernel_pmap; -#endif - -/* - * For each vm_page_t, there is a list of all currently valid virtual - * mappings of that page. An entry is a pv_entry_t, the list is pv_table. - */ -typedef struct pv_entry { - pmap_t pv_pmap; /* pmap where mapping lies */ - vm_offset_t pv_va; /* virtual address for mapping */ - TAILQ_ENTRY(pv_entry) pv_list; - TAILQ_ENTRY(pv_entry) pv_plist; - struct vm_page *pv_ptem; /* VM page for pte */ -} *pv_entry_t; - -#ifdef _KERNEL - -extern caddr_t CADDR1; -extern pt_entry_t *CMAP1; -extern char *ptvmmap; /* poor name! */ -extern vm_offset_t clean_sva; -extern vm_offset_t clean_eva; - -#ifndef __VM_PAGE_T_DEFINED__ -#define __VM_PAGE_T_DEFINED__ -typedef struct vm_page *vm_page_t; -#endif -#ifndef __VM_MEMATTR_T_DEFINED__ -#define __VM_MEMATTR_T_DEFINED__ -typedef char vm_memattr_t; -#endif - -void pmap_bootstrap (void); -void *pmap_mapdev (vm_paddr_t, vm_size_t); -void pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma); -void pmap_unmapdev (vm_offset_t, vm_size_t); -void pmap_release(struct pmap *pmap); -struct vm_page *pmap_use_pt (pmap_t, vm_offset_t); -void pmap_set_opt (void); - -static __inline int -pmap_emulate_ad_bits(pmap_t pmap) { - return 0; -} - -#endif /* _KERNEL */ - -#endif /* !LOCORE */ - -#endif /* !_MACHINE_PMAP_H_ */ diff --git a/sys/platform/vkernel/include/pmap_inval.h b/sys/platform/vkernel/include/pmap_inval.h deleted file mode 100644 index 3f9e6dc7e5..0000000000 --- a/sys/platform/vkernel/include/pmap_inval.h +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2003,2004 The DragonFly Project. All rights reserved. - * - * This code is derived from software contributed to The DragonFly Project - * by Matthew Dillon - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * 3. Neither the name of The DragonFly Project nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific, prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $DragonFly: src/sys/platform/vkernel/include/pmap_inval.h,v 1.3 2007/07/02 02:22:57 dillon Exp $ - */ - -#ifndef _MACHINE_PMAP_INVAL_H_ -#define _MACHINE_PMAP_INVAL_H_ - -#ifndef _SYS_THREAD_H_ -#include -#endif - -typedef struct pmap_inval_info { - int pir_flags; - struct lwkt_cpusync pir_cpusync; -} pmap_inval_info; - -typedef pmap_inval_info *pmap_inval_info_t; - -#define PIRF_INVLTLB 0x0001 /* request invalidation of whole table */ -#define PIRF_INVL1PG 0x0002 /* else request invalidation of one page */ -#define PIRF_CPUSYNC 0x0004 /* cpusync is currently active */ - -#ifdef _KERNEL - -#ifndef _MACHINE_PMAP_H_ -#include -#endif - -void pmap_inval_pte(volatile vpte_t *ptep, struct pmap *pmap, vm_offset_t va); -void pmap_inval_pte_quick(volatile vpte_t *ptep, struct pmap *pmap, vm_offset_t va); -void pmap_inval_pde(volatile vpte_t *ptep, struct pmap *pmap, vm_offset_t va); -void pmap_inval_pde_quick(volatile vpte_t *ptep, struct pmap *pmap, vm_offset_t va); -vpte_t pmap_clean_pte(volatile vpte_t *ptep, struct pmap *pmap, vm_offset_t va); -vpte_t pmap_clean_pde(volatile vpte_t *ptep, struct pmap *pmap, vm_offset_t va); -vpte_t pmap_setro_pte(volatile vpte_t *ptep, struct pmap *pmap, vm_offset_t va); -vpte_t pmap_inval_loadandclear(volatile vpte_t *ptep, struct pmap *pmap, vm_offset_t va); - -#endif - -#endif diff --git a/sys/platform/vkernel/include/proc.h b/sys/platform/vkernel/include/proc.h deleted file mode 100644 index 88d2ee626e..0000000000 --- a/sys/platform/vkernel/include/proc.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright (c) 2006 The DragonFly Project. All rights reserved. - * - * This code is derived from software contributed to The DragonFly Project - * by Matthew Dillon - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * 3. Neither the name of The DragonFly Project nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific, prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $DragonFly: src/sys/platform/vkernel/include/proc.h,v 1.1 2006/11/07 18:50:07 dillon Exp $ - */ - -#ifndef _MACHINE_PROC_H_ -#define _MACHINE_PROC_H_ - -/* - * When a trap or exception occurs the trap code stores the frame pointer - * in md_regs so emulation and other code can modify it for the return. - */ -struct trapframe; - -struct mdproc { - struct trapframe *md_regs; /* registers on current frame */ -}; - -#endif /* !_MACHINE_PROC_H_ */ diff --git a/sys/platform/vkernel/include/ptrace.h b/sys/platform/vkernel/include/ptrace.h deleted file mode 100644 index a9068ff34a..0000000000 --- a/sys/platform/vkernel/include/ptrace.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (c) 1992, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)ptrace.h 8.1 (Berkeley) 6/11/93 - * $FreeBSD: src/sys/i386/include/ptrace.h,v 1.9 1999/12/29 04:33:06 peter Exp $ - * $DragonFly: src/sys/platform/vkernel/include/ptrace.h,v 1.1 2006/11/08 16:40:00 dillon Exp $ - */ - -#ifndef _MACHINE_PTRACE_H_ -#define _MACHINE_PTRACE_H_ - -/* - * Machine dependent trace commands. - */ -#define PT_GETREGS (PT_FIRSTMACH + 1) -#define PT_SETREGS (PT_FIRSTMACH + 2) -#define PT_GETFPREGS (PT_FIRSTMACH + 3) -#define PT_SETFPREGS (PT_FIRSTMACH + 4) -#define PT_GETDBREGS (PT_FIRSTMACH + 5) -#define PT_SETDBREGS (PT_FIRSTMACH + 6) - -#endif - diff --git a/sys/platform/vkernel/include/smp.h b/sys/platform/vkernel/include/smp.h deleted file mode 100644 index 9db37906cc..0000000000 --- a/sys/platform/vkernel/include/smp.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - * ---------------------------------------------------------------------------- - * "THE BEER-WARE LICENSE" (Revision 42): - * wrote this file. As long as you retain this notice you - * can do whatever you want with this stuff. If we meet some day, and you think - * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp - * ---------------------------------------------------------------------------- - * - * $FreeBSD: src/sys/i386/include/smp.h,v 1.50.2.5 2001/02/13 22:32:45 tegge Exp $ - */ - -#ifndef _MACHINE_SMP_H_ -#define _MACHINE_SMP_H_ - -#ifdef _KERNEL - -#ifndef LOCORE - -/* global data in apic_vector.s */ -extern volatile cpumask_t stopped_cpus; -extern int optcpus; /* from main() */ -extern int vkernel_b_arg; /* arg from main() */ -extern int vkernel_B_arg; /* arg from main() */ - -void mp_start (void); -void mp_announce (void); -int stop_cpus (cpumask_t); -void ap_init (void); -int restart_cpus (cpumask_t); -void cpu_send_ipiq (int); -int cpu_send_ipiq_passive (int); - -/* global data in init_smp.c */ -extern cpumask_t smp_active_mask; - -/* Detect CPU topology bits */ -void detect_cpu_topology(void); - -/* Interface functions for IDs calculation */ -int get_chip_ID(int cpuid); -int get_core_number_within_chip(int cpuid); -int get_logical_CPU_number_within_core(int cpuid); - -/* Assume that APICID = CPUID for virtual processors */ -#define get_cpuid_from_apicid(cpuid) cpuid -#define get_apicid_from_cpuid(cpuid) cpuid - -#endif /* !LOCORE */ - -#endif /* _KERNEL */ -#endif /* _MACHINE_SMP_H_ */ diff --git a/sys/platform/vkernel/include/thread.h b/sys/platform/vkernel/include/thread.h deleted file mode 100644 index aae75a5de3..0000000000 --- a/sys/platform/vkernel/include/thread.h +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Copyright (c) 2006 The DragonFly Project. All rights reserved. - * - * This code is derived from software contributed to The DragonFly Project - * by Matthew Dillon - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * 3. Neither the name of The DragonFly Project nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific, prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#ifndef _MACHINE_THREAD_H_ -#define _MACHINE_THREAD_H_ - -#include -#include - -struct md_thread { - unsigned int mtd_unused; /* used to be mtd_cpl */ - union savefpu *mtd_savefpu; /* pointer to current fpu context */ - struct vextframe mtd_savevext; -}; - -#ifdef _KERNEL - -#define td_savefpu td_mach.mtd_savefpu -#define td_tls td_mach.mtd_savevext.vx_tls -#define td_savevext td_mach.mtd_savevext - -/* - * mycpu() retrieves the base of the current cpu's globaldata structure. - * Note that it is *NOT* volatile, meaning that the value may be cached by - * GCC. We have to force a dummy memory reference so gcc does not cache - * the gd pointer across a procedure call (which might block and cause us - * to wakeup on a different cpu). - * - * Also note that in DragonFly a thread can be preempted, but only by an - * interrupt thread and the original thread will resume after the - * interrupt thread finishes or blocks. A thread cannot move to another - * cpu preemptively or at all, in fact, while you are in the kernel, even - * if you block. - */ - -struct globaldata; - -extern int __mycpu__dummy; - -static __inline -struct globaldata * -_get_mycpu(void) -{ - struct globaldata *gd; - - __asm ("movl %%fs:globaldata,%0" : "=r" (gd) : "m"(__mycpu__dummy)); - return(gd); -} - -#define mycpu _get_mycpu() -#define mycpuid (_get_mycpu()->gd_cpuid) - -/* - * note: curthread is never NULL, but curproc can be. Also note that - * that only processes really use the PCB. Threads fill in some fields - * but mostly store contextual data on the stack and do not use (much of) - * the PCB. - */ -#define curthread mycpu->gd_curthread -#define curproc curthread->td_proc - -#endif /* _KERNEL */ - -#endif /* !_MACHINE_THREAD_H_ */ diff --git a/sys/platform/vkernel/include/types.h b/sys/platform/vkernel/include/types.h deleted file mode 100644 index 81877083fb..0000000000 --- a/sys/platform/vkernel/include/types.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright (c) 2006 The DragonFly Project. All rights reserved. - * - * This code is derived from software contributed to The DragonFly Project - * by Matthew Dillon - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * 3. Neither the name of The DragonFly Project nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific, prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $DragonFly: src/sys/platform/vkernel/include/types.h,v 1.1 2006/11/07 18:50:07 dillon Exp $ - */ -#ifndef _MACHINE_TYPES_H_ -#define _MACHINE_TYPES_H_ - -#include - -#ifdef _KERNEL -typedef __uint32_t intrmask_t; -#endif - -#endif /* !_MACHINE_TYPES_H_ */ - diff --git a/sys/platform/vkernel/include/vmm.h b/sys/platform/vkernel/include/vmm.h deleted file mode 100644 index ef2a9b0669..0000000000 --- a/sys/platform/vkernel/include/vmm.h +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2003-2013 The DragonFly Project. All rights reserved. - * - * This code is derived from software contributed to The DragonFly Project - * by Mihai Carabas - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * 3. Neither the name of The DragonFly Project nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific, prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#ifndef _MACHINE_VMM_H_ -#define _MACHINE_VMM_H_ - -#include - -static __inline -int vmm_vminit(struct vmm_guest_options* opts) { - return 0; -} - -static __inline -int vmm_vmdestroy(void) { - return 0; -} - -static __inline -int vmm_vmrun(void) { - return 0; -} - -static __inline -int vmm_vm_set_tls_area(void) { - return 0; -} - -static __inline -void vmm_lwp_return(struct lwp *lp, struct trapframe *frame) { -} - -static __inline -void vmm_vm_set_guest_cr3(register_t guest_cr3) { -} - -static __inline -int vmm_vm_get_gpa(struct proc *p, register_t *gpa, register_t uaddr) { - *gpa = 0; - return 0; -} - -#endif diff --git a/sys/platform/vkernel/include/vmparam.h b/sys/platform/vkernel/include/vmparam.h deleted file mode 100644 index 33348f6698..0000000000 --- a/sys/platform/vkernel/include/vmparam.h +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright (c) 2006 The DragonFly Project. All rights reserved. - * - * This code is derived from software contributed to The DragonFly Project - * by Matthew Dillon - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * 3. Neither the name of The DragonFly Project nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific, prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $DragonFly: src/sys/platform/vkernel/include/vmparam.h,v 1.5 2007/01/14 00:01:07 dillon Exp $ - */ - -#ifndef _MACHINE_VMPARAM_H_ -#define _MACHINE_VMPARAM_H_ - -/* - * Indicate that read access also means execution access (XXX newer PCs - * have a separate bit). - */ -#define VM_PROT_READ_IS_EXEC - -/* - * Virtual memory related constants, all in bytes - */ -#define MAXTSIZ (128UL*1024*1024) /* max text size */ -#ifndef DFLDSIZ -#define DFLDSIZ (128UL*1024*1024) /* initial data size limit */ -#endif -#ifndef MAXDSIZ -#define MAXDSIZ (512UL*1024*1024) /* max data size */ -#endif -#ifndef DFLSSIZ -#define DFLSSIZ (8UL*1024*1024) /* initial stack size limit */ -#endif -#ifndef MAXSSIZ -#define MAXSSIZ (64UL*1024*1024) /* max stack size */ -#endif -#ifndef SGROWSIZ -#define SGROWSIZ (128UL*1024) /* amount to grow stack */ -#endif - -/* - * After this period of time allow a process to become swappable. This - * parameter is mostly obsolete now. - */ -#define MAXSLP 20 - -/* - * For virtual kernels running as userland processes the user and kernel - * address spaces exist in different VM spaces and can overlap. - */ -#define KERNEL_KVA_SIZE 0x40000000 - -#define VM_MIN_USER_ADDRESS 0x00000000 -#define VM_MAX_USER_ADDRESS 0x9FC00000 /* XXX match to real kernel */ - -#define USRSTACK VM_MAX_USER_ADDRESS - -#define KERNBASE 0xC0000000 /* XXX totally wrong */ - -/* - * Initial memory map preload - */ -#ifndef VM_INITIAL_PAGEIN -#define VM_INITIAL_PAGEIN 16 -#endif - -#endif /* _MACHINE_VMPARAM_H_ */ diff --git a/sys/platform/vkernel/platform/busdma_machdep.c b/sys/platform/vkernel/platform/busdma_machdep.c deleted file mode 100644 index 8212bfc91f..0000000000 --- a/sys/platform/vkernel/platform/busdma_machdep.c +++ /dev/null @@ -1,1277 +0,0 @@ -/* - * Copyright (c) 1997, 1998 Justin T. Gibbs. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions, and the following disclaimer, - * without modification, immediately at the beginning of the file. - * 2. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR - * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD: src/sys/i386/i386/busdma_machdep.c,v 1.94 2008/08/15 20:51:31 kmacy Exp $ - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include - -/* XXX needed for to access pmap to convert per-proc virtual to physical */ -#include -#include - -#include - -#define MAX_BPAGES 1024 - -struct bounce_zone; -struct bus_dmamap; - -struct bus_dma_tag { - bus_dma_tag_t parent; - bus_size_t alignment; - bus_size_t boundary; - bus_addr_t lowaddr; - bus_addr_t highaddr; - bus_dma_filter_t *filter; - void *filterarg; - bus_size_t maxsize; - u_int nsegments; - bus_size_t maxsegsz; - int flags; - int ref_count; - int map_count; - bus_dma_segment_t *segments; - struct bounce_zone *bounce_zone; -}; - -/* - * bus_dma_tag private flags - */ -#define BUS_DMA_BOUNCE_ALIGN BUS_DMA_BUS2 -#define BUS_DMA_BOUNCE_LOWADDR BUS_DMA_BUS3 -#define BUS_DMA_MIN_ALLOC_COMP BUS_DMA_BUS4 - -#define BUS_DMA_COULD_BOUNCE (BUS_DMA_BOUNCE_LOWADDR | BUS_DMA_BOUNCE_ALIGN) - -#define BUS_DMAMEM_KMALLOC(dmat) \ - ((dmat)->maxsize <= PAGE_SIZE && \ - (dmat)->alignment <= PAGE_SIZE && \ - (dmat)->lowaddr >= ptoa(Maxmem)) - -struct bounce_page { - vm_offset_t vaddr; /* kva of bounce buffer */ - bus_addr_t busaddr; /* Physical address */ - vm_offset_t datavaddr; /* kva of client data */ - bus_size_t datacount; /* client data count */ - STAILQ_ENTRY(bounce_page) links; -}; - -struct bounce_zone { - STAILQ_ENTRY(bounce_zone) links; - STAILQ_HEAD(bp_list, bounce_page) bounce_page_list; - STAILQ_HEAD(, bus_dmamap) bounce_map_waitinglist; - struct spinlock spin; - int total_bpages; - int free_bpages; - int reserved_bpages; - int active_bpages; - int total_bounced; - int total_deferred; - int reserve_failed; - bus_size_t alignment; - bus_addr_t lowaddr; - char zoneid[8]; - char lowaddrid[20]; - struct sysctl_ctx_list sysctl_ctx; - struct sysctl_oid *sysctl_tree; -}; - -#define BZ_LOCK(bz) spin_lock(&(bz)->spin) -#define BZ_UNLOCK(bz) spin_unlock(&(bz)->spin) - -static struct lwkt_token bounce_zone_tok = - LWKT_TOKEN_INITIALIZER(bounce_zone_token); -static int busdma_zonecount; -static STAILQ_HEAD(, bounce_zone) bounce_zone_list = - STAILQ_HEAD_INITIALIZER(bounce_zone_list); - -int busdma_swi_pending; -static int total_bounce_pages; -static int max_bounce_pages = MAX_BPAGES; -static int bounce_alignment = 1; /* XXX temporary */ - -TUNABLE_INT("hw.busdma.max_bpages", &max_bounce_pages); -TUNABLE_INT("hw.busdma.bounce_alignment", &bounce_alignment); - -struct bus_dmamap { - struct bp_list bpages; - int pagesneeded; - int pagesreserved; - bus_dma_tag_t dmat; - void *buf; /* unmapped buffer pointer */ - bus_size_t buflen; /* unmapped buffer length */ - bus_dmamap_callback_t *callback; - void *callback_arg; - STAILQ_ENTRY(bus_dmamap) links; -}; - -static STAILQ_HEAD(, bus_dmamap) bounce_map_callbacklist = - STAILQ_HEAD_INITIALIZER(bounce_map_callbacklist); - -static struct bus_dmamap nobounce_dmamap; - -static int alloc_bounce_zone(bus_dma_tag_t); -static int alloc_bounce_pages(bus_dma_tag_t, u_int, int); -static int reserve_bounce_pages(bus_dma_tag_t, bus_dmamap_t, int); -static void return_bounce_pages(bus_dma_tag_t, bus_dmamap_t); -static bus_addr_t add_bounce_page(bus_dma_tag_t, bus_dmamap_t, - vm_offset_t, bus_size_t); -static void free_bounce_page(bus_dma_tag_t, struct bounce_page *); - -static bus_dmamap_t get_map_waiting(bus_dma_tag_t); -static void add_map_callback(bus_dmamap_t); - -SYSCTL_NODE(_hw, OID_AUTO, busdma, CTLFLAG_RD, 0, "Busdma parameters"); -SYSCTL_INT(_hw_busdma, OID_AUTO, total_bpages, CTLFLAG_RD, &total_bounce_pages, - 0, "Total bounce pages"); -SYSCTL_INT(_hw_busdma, OID_AUTO, max_bpages, CTLFLAG_RD, &max_bounce_pages, - 0, "Max bounce pages per bounce zone"); -SYSCTL_INT(_hw_busdma, OID_AUTO, bounce_alignment, CTLFLAG_RD, - &bounce_alignment, 0, "Obey alignment constraint"); - -static __inline int -run_filter(bus_dma_tag_t dmat, bus_addr_t paddr) -{ - int retval; - - retval = 0; - do { - if (((paddr > dmat->lowaddr && paddr <= dmat->highaddr) || - (bounce_alignment && (paddr & (dmat->alignment - 1)) != 0)) - && (dmat->filter == NULL || - dmat->filter(dmat->filterarg, paddr) != 0)) - retval = 1; - - dmat = dmat->parent; - } while (retval == 0 && dmat != NULL); - return (retval); -} - -/* - * Allocate a device specific dma_tag. - */ -int -bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment, - bus_size_t boundary, bus_addr_t lowaddr, - bus_addr_t highaddr, bus_dma_filter_t *filter, - void *filterarg, bus_size_t maxsize, int nsegments, - bus_size_t maxsegsz, int flags, bus_dma_tag_t *dmat) -{ - bus_dma_tag_t newtag; - int error = 0; - - /* - * Sanity checks - */ - - if (alignment == 0) - alignment = 1; - if (alignment & (alignment - 1)) - panic("alignment must be power of 2"); - - if (boundary != 0) { - if (boundary & (boundary - 1)) - panic("boundary must be power of 2"); - if (boundary < maxsegsz) { - kprintf("boundary < maxsegsz:\n"); - print_backtrace(-1); - maxsegsz = boundary; - } - } - - /* Return a NULL tag on failure */ - *dmat = NULL; - - newtag = kmalloc(sizeof(*newtag), M_DEVBUF, M_INTWAIT); - - newtag->parent = parent; - newtag->alignment = alignment; - newtag->boundary = boundary; - newtag->lowaddr = trunc_page((vm_paddr_t)lowaddr) + (PAGE_SIZE - 1); - newtag->highaddr = trunc_page((vm_paddr_t)highaddr) + (PAGE_SIZE - 1); - newtag->filter = filter; - newtag->filterarg = filterarg; - newtag->maxsize = maxsize; - newtag->nsegments = nsegments; - newtag->maxsegsz = maxsegsz; - newtag->flags = flags; - newtag->ref_count = 1; /* Count ourself */ - newtag->map_count = 0; - newtag->segments = NULL; - newtag->bounce_zone = NULL; - - /* Take into account any restrictions imposed by our parent tag */ - if (parent != NULL) { - newtag->lowaddr = MIN(parent->lowaddr, newtag->lowaddr); - newtag->highaddr = MAX(parent->highaddr, newtag->highaddr); - - if (newtag->boundary == 0) { - newtag->boundary = parent->boundary; - } else if (parent->boundary != 0) { - newtag->boundary = MIN(parent->boundary, - newtag->boundary); - } - -#ifdef notyet - newtag->alignment = MAX(parent->alignment, newtag->alignment); -#endif - - if (newtag->filter == NULL) { - /* - * Short circuit looking at our parent directly - * since we have encapsulated all of its information - */ - newtag->filter = parent->filter; - newtag->filterarg = parent->filterarg; - newtag->parent = parent->parent; - } - if (newtag->parent != NULL) - parent->ref_count++; - } - - if (newtag->lowaddr < ptoa(Maxmem)) - newtag->flags |= BUS_DMA_BOUNCE_LOWADDR; - if (bounce_alignment && newtag->alignment > 1 && - !(newtag->flags & BUS_DMA_ALIGNED)) - newtag->flags |= BUS_DMA_BOUNCE_ALIGN; - - if ((newtag->flags & BUS_DMA_COULD_BOUNCE) && - (flags & BUS_DMA_ALLOCNOW) != 0) { - struct bounce_zone *bz; - - /* Must bounce */ - - error = alloc_bounce_zone(newtag); - if (error) - goto back; - bz = newtag->bounce_zone; - - if (ptoa(bz->total_bpages) < maxsize) { - int pages; - - if (flags & BUS_DMA_ONEBPAGE) { - pages = 1; - } else { - pages = atop(round_page(maxsize)) - - bz->total_bpages; - pages = MAX(pages, 1); - } - - /* Add pages to our bounce pool */ - if (alloc_bounce_pages(newtag, pages, flags) < pages) - error = ENOMEM; - - /* Performed initial allocation */ - newtag->flags |= BUS_DMA_MIN_ALLOC_COMP; - } - } -back: - if (error) - kfree(newtag, M_DEVBUF); - else - *dmat = newtag; - return error; -} - -int -bus_dma_tag_destroy(bus_dma_tag_t dmat) -{ - if (dmat != NULL) { - if (dmat->map_count != 0) - return (EBUSY); - - while (dmat != NULL) { - bus_dma_tag_t parent; - - parent = dmat->parent; - dmat->ref_count--; - if (dmat->ref_count == 0) { - if (dmat->segments != NULL) - kfree(dmat->segments, M_DEVBUF); - kfree(dmat, M_DEVBUF); - /* - * Last reference count, so - * release our reference - * count on our parent. - */ - dmat = parent; - } else - dmat = NULL; - } - } - return (0); -} - -/* - * Allocate a handle for mapping from kva/uva/physical - * address space into bus device space. - */ -int -bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp) -{ - int error; - - error = 0; - - if (dmat->segments == NULL) { - KKASSERT(dmat->nsegments && dmat->nsegments < 16384); - dmat->segments = kmalloc(sizeof(bus_dma_segment_t) * - dmat->nsegments, M_DEVBUF, M_INTWAIT); - } - - if (dmat->flags & BUS_DMA_COULD_BOUNCE) { - struct bounce_zone *bz; - int maxpages; - - /* Must bounce */ - - if (dmat->bounce_zone == NULL) { - error = alloc_bounce_zone(dmat); - if (error) - return error; - } - bz = dmat->bounce_zone; - - *mapp = kmalloc(sizeof(**mapp), M_DEVBUF, M_INTWAIT | M_ZERO); - - /* Initialize the new map */ - STAILQ_INIT(&((*mapp)->bpages)); - - /* - * Attempt to add pages to our pool on a per-instance - * basis up to a sane limit. - */ - if (dmat->flags & BUS_DMA_BOUNCE_ALIGN) { - maxpages = max_bounce_pages; - } else { - maxpages = MIN(max_bounce_pages, - Maxmem - atop(dmat->lowaddr)); - } - if ((dmat->flags & BUS_DMA_MIN_ALLOC_COMP) == 0 - || (dmat->map_count > 0 - && bz->total_bpages < maxpages)) { - int pages; - - if (flags & BUS_DMA_ONEBPAGE) { - pages = 1; - } else { - pages = atop(round_page(dmat->maxsize)); - pages = MIN(maxpages - bz->total_bpages, pages); - pages = MAX(pages, 1); - } - if (alloc_bounce_pages(dmat, pages, flags) < pages) - error = ENOMEM; - - if ((dmat->flags & BUS_DMA_MIN_ALLOC_COMP) == 0) { - if (!error) - dmat->flags |= BUS_DMA_MIN_ALLOC_COMP; - } else { - error = 0; - } - } - } else { - *mapp = NULL; - } - if (!error) - dmat->map_count++; - return error; -} - -/* - * Destroy a handle for mapping from kva/uva/physical - * address space into bus device space. - */ -int -bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map) -{ - if (map != NULL) { - if (STAILQ_FIRST(&map->bpages) != NULL) - return (EBUSY); - kfree(map, M_DEVBUF); - } - dmat->map_count--; - return (0); -} - -static __inline bus_size_t -check_kmalloc(bus_dma_tag_t dmat, const void *vaddr0, int verify) -{ - bus_size_t maxsize = 0; - uintptr_t vaddr = (uintptr_t)vaddr0; - - if ((vaddr ^ (vaddr + dmat->maxsize - 1)) & ~PAGE_MASK) { - if (verify || bootverbose) - kprintf("boundary check failed\n"); - if (verify) - print_backtrace(-1); /* XXX panic */ - maxsize = dmat->maxsize; - } - if (vaddr & (dmat->alignment - 1)) { - if (verify || bootverbose) - kprintf("alignment check failed\n"); - if (verify) - print_backtrace(-1); /* XXX panic */ - if (dmat->maxsize < dmat->alignment) - maxsize = dmat->alignment; - else - maxsize = dmat->maxsize; - } - return maxsize; -} - -/* - * Allocate a piece of memory that can be efficiently mapped into - * bus device space based on the constraints lited in the dma tag. - * - * mapp is degenerate. By definition this allocation should not require - * bounce buffers so do not allocate a dma map. - */ -int -bus_dmamem_alloc(bus_dma_tag_t dmat, void **vaddr, int flags, - bus_dmamap_t *mapp) -{ - int mflags; - - /* If we succeed, no mapping/bouncing will be required */ - *mapp = NULL; - - if (dmat->segments == NULL) { - KKASSERT(dmat->nsegments < 16384); - dmat->segments = kmalloc(sizeof(bus_dma_segment_t) * - dmat->nsegments, M_DEVBUF, M_INTWAIT); - } - - if (flags & BUS_DMA_NOWAIT) - mflags = M_NOWAIT; - else - mflags = M_WAITOK; - if (flags & BUS_DMA_ZERO) - mflags |= M_ZERO; - - if (BUS_DMAMEM_KMALLOC(dmat)) { - bus_size_t maxsize; - - *vaddr = kmalloc(dmat->maxsize, M_DEVBUF, mflags); - - /* - * XXX - * Check whether the allocation - * - crossed a page boundary - * - was not aligned - * Retry with power-of-2 alignment in the above cases. - */ - maxsize = check_kmalloc(dmat, *vaddr, 0); - if (maxsize) { - size_t size; - - kfree(*vaddr, M_DEVBUF); - /* XXX check for overflow? */ - for (size = 1; size <= maxsize; size <<= 1) - ; - *vaddr = kmalloc(size, M_DEVBUF, mflags); - check_kmalloc(dmat, *vaddr, 1); - } - } else { - /* - * XXX Use Contigmalloc until it is merged into this facility - * and handles multi-seg allocations. Nobody is doing - * multi-seg allocations yet though. - */ - *vaddr = contigmalloc(dmat->maxsize, M_DEVBUF, mflags, - 0ul, dmat->lowaddr, dmat->alignment, dmat->boundary); - } - if (*vaddr == NULL) - return (ENOMEM); - return (0); -} - -/* - * Free a piece of memory and it's allociated dmamap, that was allocated - * via bus_dmamem_alloc. Make the same choice for free/contigfree. - */ -void -bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map) -{ - /* - * dmamem does not need to be bounced, so the map should be - * NULL - */ - if (map != NULL) - panic("bus_dmamem_free: Invalid map freed"); - if (BUS_DMAMEM_KMALLOC(dmat)) - kfree(vaddr, M_DEVBUF); - else - contigfree(vaddr, dmat->maxsize, M_DEVBUF); -} - -static __inline vm_paddr_t -_bus_dma_extract(pmap_t pmap, vm_offset_t vaddr) -{ - if (pmap) - return pmap_extract(pmap, vaddr); - else - return pmap_kextract(vaddr); -} - -/* - * Utility function to load a linear buffer. lastaddrp holds state - * between invocations (for multiple-buffer loads). segp contains - * the segment following the starting one on entrace, and the ending - * segment on exit. first indicates if this is the first invocation - * of this function. - */ -static int -_bus_dmamap_load_buffer(bus_dma_tag_t dmat, - bus_dmamap_t map, - void *buf, bus_size_t buflen, - bus_dma_segment_t *segments, - int nsegments, - pmap_t pmap, - int flags, - vm_paddr_t *lastpaddrp, - int *segp, - int first) -{ - vm_offset_t vaddr; - vm_paddr_t paddr, nextpaddr; - bus_dma_segment_t *sg; - bus_addr_t bmask; - int seg, error = 0; - - if (map == NULL) - map = &nobounce_dmamap; - -#ifdef INVARIANTS - if (dmat->flags & BUS_DMA_ALIGNED) - KKASSERT(((uintptr_t)buf & (dmat->alignment - 1)) == 0); -#endif - - /* - * If we are being called during a callback, pagesneeded will - * be non-zero, so we can avoid doing the work twice. - */ - if ((dmat->flags & BUS_DMA_COULD_BOUNCE) && - map != &nobounce_dmamap && map->pagesneeded == 0) { - vm_offset_t vendaddr; - - /* - * Count the number of bounce pages - * needed in order to complete this transfer - */ - vaddr = (vm_offset_t)buf; - vendaddr = (vm_offset_t)buf + buflen; - - while (vaddr < vendaddr) { - paddr = _bus_dma_extract(pmap, vaddr); - if (run_filter(dmat, paddr) != 0) - map->pagesneeded++; - vaddr += (PAGE_SIZE - (vaddr & PAGE_MASK)); - } - } - - /* Reserve Necessary Bounce Pages */ - if (map->pagesneeded != 0) { - struct bounce_zone *bz; - - bz = dmat->bounce_zone; - BZ_LOCK(bz); - if (flags & BUS_DMA_NOWAIT) { - if (reserve_bounce_pages(dmat, map, 0) != 0) { - BZ_UNLOCK(bz); - error = ENOMEM; - goto free_bounce; - } - } else { - if (reserve_bounce_pages(dmat, map, 1) != 0) { - /* Queue us for resources */ - map->dmat = dmat; - map->buf = buf; - map->buflen = buflen; - - STAILQ_INSERT_TAIL( - &dmat->bounce_zone->bounce_map_waitinglist, - map, links); - BZ_UNLOCK(bz); - - return (EINPROGRESS); - } - } - BZ_UNLOCK(bz); - } - - KKASSERT(*segp >= 1 && *segp <= nsegments); - seg = *segp; - sg = &segments[seg - 1]; - - vaddr = (vm_offset_t)buf; - nextpaddr = *lastpaddrp; - bmask = ~(dmat->boundary - 1); /* note: will be 0 if boundary is 0 */ - - /* force at least one segment */ - do { - bus_size_t size; - - /* - * Per-page main loop - */ - paddr = _bus_dma_extract(pmap, vaddr); - size = PAGE_SIZE - (paddr & PAGE_MASK); - if (size > buflen) - size = buflen; - if (map->pagesneeded != 0 && run_filter(dmat, paddr)) { - /* - * note: this paddr has the same in-page offset - * as vaddr and thus the paddr above, so the - * size does not have to be recalculated - */ - paddr = add_bounce_page(dmat, map, vaddr, size); - } - - /* - * Fill in the bus_dma_segment - */ - if (first) { - sg->ds_addr = paddr; - sg->ds_len = size; - first = 0; - } else if (paddr == nextpaddr) { - sg->ds_len += size; - } else { - sg++; - seg++; - if (seg > nsegments) - break; - sg->ds_addr = paddr; - sg->ds_len = size; - } - nextpaddr = paddr + size; - - /* - * Handle maxsegsz and boundary issues with a nested loop - */ - for (;;) { - bus_size_t tmpsize; - - /* - * Limit to the boundary and maximum segment size - */ - if (((nextpaddr - 1) ^ sg->ds_addr) & bmask) { - tmpsize = dmat->boundary - - (sg->ds_addr & ~bmask); - if (tmpsize > dmat->maxsegsz) - tmpsize = dmat->maxsegsz; - KKASSERT(tmpsize < sg->ds_len); - } else if (sg->ds_len > dmat->maxsegsz) { - tmpsize = dmat->maxsegsz; - } else { - break; - } - - /* - * Futz, split the data into a new segment. - */ - if (seg >= nsegments) - goto fail; - sg[1].ds_len = sg[0].ds_len - tmpsize; - sg[1].ds_addr = sg[0].ds_addr + tmpsize; - sg[0].ds_len = tmpsize; - sg++; - seg++; - } - - /* - * Adjust for loop - */ - buflen -= size; - vaddr += size; - } while (buflen > 0); -fail: - if (buflen != 0) - error = EFBIG; - - *segp = seg; - *lastpaddrp = nextpaddr; - -free_bounce: - if (error && (dmat->flags & BUS_DMA_COULD_BOUNCE) && - map != &nobounce_dmamap) { - _bus_dmamap_unload(dmat, map); - return_bounce_pages(dmat, map); - } - return error; -} - -/* - * Map the buffer buf into bus space using the dmamap map. - */ -int -bus_dmamap_load(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf, - bus_size_t buflen, bus_dmamap_callback_t *callback, - void *callback_arg, int flags) -{ - vm_paddr_t lastaddr = 0; - int error, nsegs = 1; - - if (map != NULL) { - /* - * XXX - * Follow old semantics. Once all of the callers are fixed, - * we should get rid of these internal flag "adjustment". - */ - flags &= ~BUS_DMA_NOWAIT; - flags |= BUS_DMA_WAITOK; - - map->callback = callback; - map->callback_arg = callback_arg; - } - - error = _bus_dmamap_load_buffer(dmat, map, buf, buflen, - dmat->segments, dmat->nsegments, - NULL, flags, &lastaddr, &nsegs, 1); - if (error == EINPROGRESS) - return error; - - callback(callback_arg, dmat->segments, nsegs, error); - return 0; -} - -/* - * Like _bus_dmamap_load(), but for mbufs. - */ -int -bus_dmamap_load_mbuf(bus_dma_tag_t dmat, bus_dmamap_t map, - struct mbuf *m0, - bus_dmamap_callback2_t *callback, void *callback_arg, - int flags) -{ - int nsegs, error; - - /* - * XXX - * Follow old semantics. Once all of the callers are fixed, - * we should get rid of these internal flag "adjustment". - */ - flags &= ~BUS_DMA_WAITOK; - flags |= BUS_DMA_NOWAIT; - - error = bus_dmamap_load_mbuf_segment(dmat, map, m0, - dmat->segments, dmat->nsegments, &nsegs, flags); - if (error) { - /* force "no valid mappings" in callback */ - callback(callback_arg, dmat->segments, 0, 0, error); - } else { - callback(callback_arg, dmat->segments, nsegs, - m0->m_pkthdr.len, error); - } - return error; -} - -int -bus_dmamap_load_mbuf_segment(bus_dma_tag_t dmat, bus_dmamap_t map, - struct mbuf *m0, - bus_dma_segment_t *segs, int maxsegs, - int *nsegs, int flags) -{ - int error; - - M_ASSERTPKTHDR(m0); - - KASSERT(maxsegs >= 1, ("invalid maxsegs %d", maxsegs)); - KASSERT(maxsegs <= dmat->nsegments, - ("%d too many segments, dmat only supports %d segments", - maxsegs, dmat->nsegments)); - KASSERT(flags & BUS_DMA_NOWAIT, - ("only BUS_DMA_NOWAIT is supported")); - - if (m0->m_pkthdr.len <= dmat->maxsize) { - int first = 1; - vm_paddr_t lastaddr = 0; - struct mbuf *m; - - *nsegs = 1; - error = 0; - for (m = m0; m != NULL && error == 0; m = m->m_next) { - if (m->m_len == 0) - continue; - - error = _bus_dmamap_load_buffer(dmat, map, - m->m_data, m->m_len, - segs, maxsegs, - NULL, flags, &lastaddr, - nsegs, first); - if (error == ENOMEM && !first) { - /* - * Out of bounce pages due to too many - * fragments in the mbuf chain; return - * EFBIG instead. - */ - error = EFBIG; - } - first = 0; - } -#ifdef INVARIANTS - if (!error) - KKASSERT(*nsegs <= maxsegs && *nsegs >= 1); -#endif - } else { - *nsegs = 0; - error = EINVAL; - } - KKASSERT(error != EINPROGRESS); - return error; -} - -/* - * Like _bus_dmamap_load(), but for uios. - */ -int -bus_dmamap_load_uio(bus_dma_tag_t dmat, bus_dmamap_t map, - struct uio *uio, - bus_dmamap_callback2_t *callback, void *callback_arg, - int flags) -{ - vm_paddr_t lastaddr; - int nsegs, error, first, i; - bus_size_t resid; - struct iovec *iov; - pmap_t pmap; - - /* - * XXX - * Follow old semantics. Once all of the callers are fixed, - * we should get rid of these internal flag "adjustment". - */ - flags &= ~BUS_DMA_WAITOK; - flags |= BUS_DMA_NOWAIT; - - resid = (bus_size_t)uio->uio_resid; - iov = uio->uio_iov; - - if (uio->uio_segflg == UIO_USERSPACE) { - struct thread *td; - - td = uio->uio_td; - KASSERT(td != NULL && td->td_proc != NULL, - ("bus_dmamap_load_uio: USERSPACE but no proc")); - pmap = vmspace_pmap(td->td_proc->p_vmspace); - } else { - pmap = NULL; - } - - error = 0; - nsegs = 1; - first = 1; - lastaddr = 0; - for (i = 0; i < uio->uio_iovcnt && resid != 0 && !error; i++) { - /* - * Now at the first iovec to load. Load each iovec - * until we have exhausted the residual count. - */ - bus_size_t minlen = - resid < iov[i].iov_len ? resid : iov[i].iov_len; - caddr_t addr = (caddr_t) iov[i].iov_base; - - error = _bus_dmamap_load_buffer(dmat, map, addr, minlen, - dmat->segments, dmat->nsegments, - pmap, flags, &lastaddr, &nsegs, first); - first = 0; - - resid -= minlen; - } - - if (error) { - /* force "no valid mappings" in callback */ - callback(callback_arg, dmat->segments, 0, 0, error); - } else { - callback(callback_arg, dmat->segments, nsegs, - (bus_size_t)uio->uio_resid, error); - } - return error; -} - -/* - * Release the mapping held by map. - */ -void -_bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map) -{ - struct bounce_page *bpage; - - while ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) { - STAILQ_REMOVE_HEAD(&map->bpages, links); - free_bounce_page(dmat, bpage); - } -} - -void -_bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op) -{ - struct bounce_page *bpage; - - if ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) { - /* - * Handle data bouncing. We might also - * want to add support for invalidating - * the caches on broken hardware - */ - switch (op) { - case BUS_DMASYNC_PREWRITE: - while (bpage != NULL) { - bcopy((void *)bpage->datavaddr, - (void *)bpage->vaddr, - bpage->datacount); - bpage = STAILQ_NEXT(bpage, links); - } - dmat->bounce_zone->total_bounced++; - break; - - case BUS_DMASYNC_POSTREAD: - while (bpage != NULL) { - bcopy((void *)bpage->vaddr, - (void *)bpage->datavaddr, - bpage->datacount); - bpage = STAILQ_NEXT(bpage, links); - } - dmat->bounce_zone->total_bounced++; - break; - - case BUS_DMASYNC_PREREAD: - case BUS_DMASYNC_POSTWRITE: - /* No-ops */ - break; - } - } -} - -static int -alloc_bounce_zone(bus_dma_tag_t dmat) -{ - struct bounce_zone *bz, *new_bz; - - KASSERT(dmat->bounce_zone == NULL, - ("bounce zone was already assigned")); - - new_bz = kmalloc(sizeof(*new_bz), M_DEVBUF, M_INTWAIT | M_ZERO); - - lwkt_gettoken(&bounce_zone_tok); - - /* Check to see if we already have a suitable zone */ - STAILQ_FOREACH(bz, &bounce_zone_list, links) { - if (dmat->alignment <= bz->alignment && - dmat->lowaddr >= bz->lowaddr) { - lwkt_reltoken(&bounce_zone_tok); - - dmat->bounce_zone = bz; - kfree(new_bz, M_DEVBUF); - return 0; - } - } - bz = new_bz; - - spin_init(&bz->spin, "allocbouncezone"); - STAILQ_INIT(&bz->bounce_page_list); - STAILQ_INIT(&bz->bounce_map_waitinglist); - bz->free_bpages = 0; - bz->reserved_bpages = 0; - bz->active_bpages = 0; - bz->lowaddr = dmat->lowaddr; - bz->alignment = round_page(dmat->alignment); - ksnprintf(bz->zoneid, 8, "zone%d", busdma_zonecount); - busdma_zonecount++; - ksnprintf(bz->lowaddrid, 18, "%#jx", (uintmax_t)bz->lowaddr); - STAILQ_INSERT_TAIL(&bounce_zone_list, bz, links); - - lwkt_reltoken(&bounce_zone_tok); - - dmat->bounce_zone = bz; - - sysctl_ctx_init(&bz->sysctl_ctx); - bz->sysctl_tree = SYSCTL_ADD_NODE(&bz->sysctl_ctx, - SYSCTL_STATIC_CHILDREN(_hw_busdma), OID_AUTO, bz->zoneid, - CTLFLAG_RD, 0, ""); - if (bz->sysctl_tree == NULL) { - sysctl_ctx_free(&bz->sysctl_ctx); - return 0; /* XXX error code? */ - } - - SYSCTL_ADD_INT(&bz->sysctl_ctx, - SYSCTL_CHILDREN(bz->sysctl_tree), OID_AUTO, - "total_bpages", CTLFLAG_RD, &bz->total_bpages, 0, - "Total bounce pages"); - SYSCTL_ADD_INT(&bz->sysctl_ctx, - SYSCTL_CHILDREN(bz->sysctl_tree), OID_AUTO, - "free_bpages", CTLFLAG_RD, &bz->free_bpages, 0, - "Free bounce pages"); - SYSCTL_ADD_INT(&bz->sysctl_ctx, - SYSCTL_CHILDREN(bz->sysctl_tree), OID_AUTO, - "reserved_bpages", CTLFLAG_RD, &bz->reserved_bpages, 0, - "Reserved bounce pages"); - SYSCTL_ADD_INT(&bz->sysctl_ctx, - SYSCTL_CHILDREN(bz->sysctl_tree), OID_AUTO, - "active_bpages", CTLFLAG_RD, &bz->active_bpages, 0, - "Active bounce pages"); - SYSCTL_ADD_INT(&bz->sysctl_ctx, - SYSCTL_CHILDREN(bz->sysctl_tree), OID_AUTO, - "total_bounced", CTLFLAG_RD, &bz->total_bounced, 0, - "Total bounce requests"); - SYSCTL_ADD_INT(&bz->sysctl_ctx, - SYSCTL_CHILDREN(bz->sysctl_tree), OID_AUTO, - "total_deferred", CTLFLAG_RD, &bz->total_deferred, 0, - "Total bounce requests that were deferred"); - SYSCTL_ADD_INT(&bz->sysctl_ctx, - SYSCTL_CHILDREN(bz->sysctl_tree), OID_AUTO, - "reserve_failed", CTLFLAG_RD, &bz->reserve_failed, 0, - "Total bounce page reservations that were failed"); - SYSCTL_ADD_STRING(&bz->sysctl_ctx, - SYSCTL_CHILDREN(bz->sysctl_tree), OID_AUTO, - "lowaddr", CTLFLAG_RD, bz->lowaddrid, 0, ""); - SYSCTL_ADD_INT(&bz->sysctl_ctx, - SYSCTL_CHILDREN(bz->sysctl_tree), OID_AUTO, - "alignment", CTLFLAG_RD, &bz->alignment, 0, ""); - - return 0; -} - -static int -alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages, int flags) -{ - struct bounce_zone *bz = dmat->bounce_zone; - int count = 0, mflags; - - if (flags & BUS_DMA_NOWAIT) - mflags = M_NOWAIT; - else - mflags = M_WAITOK; - - while (numpages > 0) { - struct bounce_page *bpage; - - bpage = kmalloc(sizeof(*bpage), M_DEVBUF, M_INTWAIT | M_ZERO); - - bpage->vaddr = (vm_offset_t)contigmalloc(PAGE_SIZE, M_DEVBUF, - mflags, 0ul, - bz->lowaddr, - bz->alignment, 0); - if (bpage->vaddr == 0) { - kfree(bpage, M_DEVBUF); - break; - } - bpage->busaddr = pmap_kextract(bpage->vaddr); - - BZ_LOCK(bz); - STAILQ_INSERT_TAIL(&bz->bounce_page_list, bpage, links); - total_bounce_pages++; - bz->total_bpages++; - bz->free_bpages++; - BZ_UNLOCK(bz); - - count++; - numpages--; - } - return count; -} - -/* Assume caller holds bounce zone spinlock */ -static int -reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int commit) -{ - struct bounce_zone *bz = dmat->bounce_zone; - int pages; - - pages = MIN(bz->free_bpages, map->pagesneeded - map->pagesreserved); - if (!commit && map->pagesneeded > (map->pagesreserved + pages)) { - bz->reserve_failed++; - return (map->pagesneeded - (map->pagesreserved + pages)); - } - - bz->free_bpages -= pages; - - bz->reserved_bpages += pages; - KKASSERT(bz->reserved_bpages <= bz->total_bpages); - - map->pagesreserved += pages; - pages = map->pagesneeded - map->pagesreserved; - - return pages; -} - -static void -return_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map) -{ - struct bounce_zone *bz = dmat->bounce_zone; - int reserved = map->pagesreserved; - bus_dmamap_t wait_map; - - map->pagesreserved = 0; - map->pagesneeded = 0; - - if (reserved == 0) - return; - - BZ_LOCK(bz); - - bz->free_bpages += reserved; - KKASSERT(bz->free_bpages <= bz->total_bpages); - - KKASSERT(bz->reserved_bpages >= reserved); - bz->reserved_bpages -= reserved; - - wait_map = get_map_waiting(dmat); - - BZ_UNLOCK(bz); - - if (wait_map != NULL) - add_map_callback(map); -} - -static bus_addr_t -add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, vm_offset_t vaddr, - bus_size_t size) -{ - struct bounce_zone *bz = dmat->bounce_zone; - struct bounce_page *bpage; - - KASSERT(map->pagesneeded > 0, ("map doesn't need any pages")); - map->pagesneeded--; - - KASSERT(map->pagesreserved > 0, ("map doesn't reserve any pages")); - map->pagesreserved--; - - BZ_LOCK(bz); - - bpage = STAILQ_FIRST(&bz->bounce_page_list); - KASSERT(bpage != NULL, ("free page list is empty")); - STAILQ_REMOVE_HEAD(&bz->bounce_page_list, links); - - KKASSERT(bz->reserved_bpages > 0); - bz->reserved_bpages--; - - bz->active_bpages++; - KKASSERT(bz->active_bpages <= bz->total_bpages); - - BZ_UNLOCK(bz); - - bpage->datavaddr = vaddr; - bpage->datacount = size; - STAILQ_INSERT_TAIL(&map->bpages, bpage, links); - return bpage->busaddr; -} - -static void -free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage) -{ - struct bounce_zone *bz = dmat->bounce_zone; - bus_dmamap_t map; - - bpage->datavaddr = 0; - bpage->datacount = 0; - - BZ_LOCK(bz); - - STAILQ_INSERT_HEAD(&bz->bounce_page_list, bpage, links); - - bz->free_bpages++; - KKASSERT(bz->free_bpages <= bz->total_bpages); - - KKASSERT(bz->active_bpages > 0); - bz->active_bpages--; - - map = get_map_waiting(dmat); - - BZ_UNLOCK(bz); - - if (map != NULL) - add_map_callback(map); -} - -/* Assume caller holds bounce zone spinlock */ -static bus_dmamap_t -get_map_waiting(bus_dma_tag_t dmat) -{ - struct bounce_zone *bz = dmat->bounce_zone; - bus_dmamap_t map; - - map = STAILQ_FIRST(&bz->bounce_map_waitinglist); - if (map != NULL) { - if (reserve_bounce_pages(map->dmat, map, 1) == 0) { - STAILQ_REMOVE_HEAD(&bz->bounce_map_waitinglist, links); - bz->total_deferred++; - } else { - map = NULL; - } - } - return map; -} - -static void -add_map_callback(bus_dmamap_t map) -{ -#ifdef notyet - /* XXX callbacklist is not MPSAFE */ - crit_enter(); - get_mplock(); - STAILQ_INSERT_TAIL(&bounce_map_callbacklist, map, links); - busdma_swi_pending = 1; - setsoftvm(); - rel_mplock(); - crit_exit(); -#else - panic("%s uncoded", __func__); -#endif -} - -#ifdef notyet -void -busdma_swi(void) -{ - bus_dmamap_t map; - - crit_enter(); - while ((map = STAILQ_FIRST(&bounce_map_callbacklist)) != NULL) { - STAILQ_REMOVE_HEAD(&bounce_map_callbacklist, links); - crit_exit(); - bus_dmamap_load(map->dmat, map, map->buf, map->buflen, - map->callback, map->callback_arg, /*flags*/0); - crit_enter(); - } - crit_exit(); -} -#endif diff --git a/sys/platform/vkernel/platform/console.c b/sys/platform/vkernel/platform/console.c deleted file mode 100644 index 8fab22b05f..0000000000 --- a/sys/platform/vkernel/platform/console.c +++ /dev/null @@ -1,434 +0,0 @@ -/* - * (MPSAFE) - * - * Copyright (c) 2006 The DragonFly Project. All rights reserved. - * - * This code is derived from software contributed to The DragonFly Project - * by Matthew Dillon - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * 3. Neither the name of The DragonFly Project nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific, prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static int console_stolen_by_kernel; -static struct kqueue_info *kqueue_console_info; - -/************************************************************************ - * CONSOLE DEVICE * - ************************************************************************ - * - */ - -static int vcons_tty_param(struct tty *tp, struct termios *tio); -static void vcons_tty_start(struct tty *tp); -static void vcons_intr(void *tpx, struct intrframe *frame __unused); - -static d_open_t vcons_open; -static d_close_t vcons_close; -static d_ioctl_t vcons_ioctl; - -static struct dev_ops vcons_ops = { - { "vcons", 0, D_TTY }, - .d_open = vcons_open, - .d_close = vcons_close, - .d_read = ttyread, - .d_write = ttywrite, - .d_ioctl = vcons_ioctl, - .d_kqfilter = ttykqfilter, -}; - -static int -vcons_open(struct dev_open_args *ap) -{ - cdev_t dev = ap->a_head.a_dev; - struct tty *tp; - int error; - - lwkt_gettoken(&tty_token); - tp = dev->si_tty = ttymalloc(dev->si_tty); - -#define ISSET(t, f) ((t) & (f)) - - if ((tp->t_state & TS_ISOPEN) == 0) { - tp->t_oproc = vcons_tty_start; - tp->t_param = vcons_tty_param; - tp->t_stop = nottystop; - tp->t_dev = dev; - - tp->t_state |= TS_CARR_ON | TS_CONNECTED; - ttychars(tp); - tp->t_iflag = TTYDEF_IFLAG; - tp->t_oflag = TTYDEF_OFLAG; - tp->t_cflag = TTYDEF_CFLAG; - tp->t_lflag = TTYDEF_LFLAG; - tp->t_ispeed = TTYDEF_SPEED; - tp->t_ospeed = TTYDEF_SPEED; - ttsetwater(tp); - } - if (minor(dev) == 0) { - error = (*linesw[tp->t_line].l_open)(dev, tp); - ioctl(0, TIOCGWINSZ, &tp->t_winsize); - - if (kqueue_console_info == NULL) - kqueue_console_info = kqueue_add(0, vcons_intr, tp); - } else { - /* dummy up other minors so the installer will run */ - error = 0; - } - lwkt_reltoken(&tty_token); - return(error); -} - -static int -vcons_close(struct dev_close_args *ap) -{ - cdev_t dev = ap->a_head.a_dev; - struct tty *tp; - - lwkt_gettoken(&tty_token); - tp = dev->si_tty; - (*linesw[tp->t_line].l_close)(tp, ap->a_fflag); - ttyclose(tp); - lwkt_reltoken(&tty_token); - return(0); -} - -static int -vcons_ioctl(struct dev_ioctl_args *ap) -{ - cdev_t dev = ap->a_head.a_dev; - struct tty *tp; - int error; - - lwkt_gettoken(&tty_token); - tp = dev->si_tty; - error = (*linesw[tp->t_line].l_ioctl)(tp, ap->a_cmd, ap->a_data, - ap->a_fflag, ap->a_cred); - if (error != ENOIOCTL) { - lwkt_reltoken(&tty_token); - return (error); - } - error = ttioctl(tp, ap->a_cmd, ap->a_data, ap->a_fflag); - if (error != ENOIOCTL) { - lwkt_reltoken(&tty_token); - return (error); - } - lwkt_reltoken(&tty_token); - return (ENOTTY); -} - -static int -vcons_tty_param(struct tty *tp, struct termios *tio) -{ - lwkt_gettoken(&tty_token); - tp->t_ispeed = tio->c_ispeed; - tp->t_ospeed = tio->c_ospeed; - tp->t_cflag = tio->c_cflag; - lwkt_reltoken(&tty_token); - return(0); -} - -static void -vcons_tty_start(struct tty *tp) -{ - int n; - char buf[64]; - - lwkt_gettoken(&tty_token); - if (tp->t_state & (TS_TIMEOUT | TS_TTSTOP)) { - ttwwakeup(tp); - lwkt_reltoken(&tty_token); - return; - } - tp->t_state |= TS_BUSY; - while ((n = q_to_b(&tp->t_outq, buf, sizeof(buf))) > 0) { - /* - * Dummy up ttyv1, etc. - */ - if (minor(tp->t_dev) == 0) { - pwrite(1, buf, n, -1); - } - } - tp->t_state &= ~TS_BUSY; - ttwwakeup(tp); - lwkt_reltoken(&tty_token); -} - -static -void -vcons_intr(void *tpx, struct intrframe *frame __unused) -{ - struct tty *tp = tpx; - unsigned char buf[32]; - int i; - int n; - - lwkt_gettoken(&tty_token); - /* - * If we aren't open we only have synchronous traffic via the - * debugger and do not need to poll. - */ - if ((tp->t_state & TS_ISOPEN) == 0) { - lwkt_reltoken(&tty_token); - return; - } - - /* - * Only poll if we are open and haven't been stolen by the debugger. - */ - if (console_stolen_by_kernel == 0 && (tp->t_state & TS_ISOPEN)) { - do { - n = extpread(0, buf, sizeof(buf), O_FNONBLOCKING, -1LL); - for (i = 0; i < n; ++i) - (*linesw[tp->t_line].l_rint)(buf[i], tp); - } while (n > 0); - } - lwkt_reltoken(&tty_token); -} - -/************************************************************************ - * KERNEL CONSOLE INTERFACE * - ************************************************************************ - * - * Kernel direct-call interface console driver - */ -static cn_probe_t vconsprobe; -static cn_init_t vconsinit; -static cn_init_fini_t vconsinit_fini; -static cn_term_t vconsterm; -static cn_getc_t vconsgetc; -static cn_checkc_t vconscheckc; -static cn_putc_t vconsputc; - -CONS_DRIVER(vcons, vconsprobe, vconsinit, vconsinit_fini, vconsterm, vconsgetc, - vconscheckc, vconsputc, NULL); - -static struct termios init_tio; -static struct consdev *vconsole; - -static void -vconsprobe(struct consdev *cp) -{ - cp->cn_pri = CN_NORMAL; - cp->cn_probegood = 1; -} - -/* - * This is a little bulky handler to set proper terminal - * settings in the case of a signal which might lead to - * termination or suspension. - */ -static void -vconssignal(int sig) -{ - struct termios curtio; - struct sigaction sa, osa; - sigset_t ss, oss; - - tcgetattr(0, &curtio); - tcsetattr(0, TCSAFLUSH, &init_tio); - bzero(&sa, sizeof(sa)); - sigemptyset(&sa.sa_mask); - sa.sa_handler = SIG_DFL; - sigaction(sig, &sa, &osa); - sigemptyset(&ss); - sigaddset(&ss, sig); - sigprocmask(SIG_UNBLOCK, &ss, &oss); - raise(sig); /* now hand down the sig */ - sigprocmask(SIG_SETMASK, &oss, NULL); - sigaction(sig, &osa, NULL); - tcsetattr(0, TCSAFLUSH, &curtio); -} - -static void -vconswinchsig(int __unused sig) -{ - signalintr(3); -} - -static void -vconswinch_intr(void *arg __unused, void *frame __unused) -{ - struct winsize newsize; - - if (vconsole != NULL && vconsole->cn_dev->si_tty != NULL) { - ioctl(0, TIOCGWINSZ, &newsize); - /* - * ttioctl(vconsole->cn_dev->si_tty, TIOCSWINSZ, &newsize, 0); - * I wished. Unfortunately this needs a curproc, so do it - * manually. - */ - if (bcmp(&newsize, &vconsole->cn_dev->si_tty->t_winsize, - sizeof(newsize)) != 0) { - vconsole->cn_dev->si_tty->t_winsize = newsize; - pgsignal(vconsole->cn_dev->si_tty->t_pgrp, SIGWINCH, 1); - } - } -} - -static void -vconscleanup(void) -{ - /* - * We might catch stray SIGIOs, so try hard. - */ - while (tcsetattr(0, TCSAFLUSH, &init_tio) != 0 && errno == EINTR) - /* NOTHING */; -} - -static void -vconsinit(struct consdev *cp) -{ - struct sigaction sa; - - vconsole = cp; - - tcgetattr(0, &init_tio); - bzero(&sa, sizeof(sa)); - sigemptyset(&sa.sa_mask); - sa.sa_handler = vconssignal; - sigaction(SIGTSTP, &sa, NULL); - sigaction(SIGINT, &sa, NULL); - sigaction(SIGTERM, &sa, NULL); - atexit(vconscleanup); - vcons_set_mode(0); -} - -static void -vconsinit_fini(struct consdev *cp) -{ - struct sigaction sa; - cdev_t dev; - int i; - - /* - * We have to do this here rather then in early boot to be able - * to use the interrupt subsystem. - */ - register_int_virtual(3, vconswinch_intr, NULL, "swinch", NULL, - INTR_MPSAFE); - bzero(&sa, sizeof(sa)); - sigemptyset(&sa.sa_mask); - sa.sa_handler = vconswinchsig; - sigaction(SIGWINCH, &sa, NULL); - - /* - * Implement ttyv0-ttyv7. At the moment ttyv1-7 are sink nulls. - */ - for (i = 0; i < 8; ++i) { - dev = make_dev(&vcons_ops, i, - UID_ROOT, GID_WHEEL, 0600, "ttyv%d", i); - if (i == 0) { - cp->cn_dev = dev; - } - } - EVENTHANDLER_REGISTER(shutdown_final, vconscleanup, NULL, SHUTDOWN_PRI_LAST); -} - -static void -vconsterm(struct consdev *vp) -{ - vconsole = NULL; - vconscleanup(); -} - -static int -vconsgetc(void *private) -{ - unsigned char c; - ssize_t n; - - console_stolen_by_kernel = 1; - for (;;) { - n = pread(0, &c, 1, -1); - if (n == 1) - break; - if (n < 0 && errno == EINTR) - continue; - panic("vconsgetc: EOF on console %d %d", n ,errno); - } - console_stolen_by_kernel = 0; - return((int)c); -} - -static int -vconscheckc(void *private) -{ - unsigned char c; - - if (extpread(0, &c, 1, O_FNONBLOCKING, -1LL) == 1) - return((int)c); - return(-1); -} - -static void -vconsputc(void *private, int c) -{ - char cc = c; - - pwrite(1, &cc, 1, -1); -} - -void -vcons_set_mode(int in_debugger) -{ - struct termios tio; - - if (tcgetattr(0, &tio) < 0) { - return; - } - cfmakeraw(&tio); - tio.c_oflag |= OPOST | ONLCR; - tio.c_lflag |= ISIG; - if (in_debugger) { - tio.c_cc[VINTR] = init_tio.c_cc[VINTR]; - tio.c_cc[VSUSP] = init_tio.c_cc[VSUSP]; - tio.c_cc[VSTATUS] = init_tio.c_cc[VSTATUS]; - } else { - tio.c_cc[VINTR] = _POSIX_VDISABLE; - tio.c_cc[VSUSP] = _POSIX_VDISABLE; - tio.c_cc[VSTATUS] = _POSIX_VDISABLE; - } - tcsetattr(0, TCSAFLUSH, &tio); -} diff --git a/sys/platform/vkernel/platform/copyio.c b/sys/platform/vkernel/platform/copyio.c deleted file mode 100644 index 586e060b2e..0000000000 --- a/sys/platform/vkernel/platform/copyio.c +++ /dev/null @@ -1,279 +0,0 @@ -/* - * Copyright (c) 2006 The DragonFly Project. All rights reserved. - * - * This code is derived from software contributed to The DragonFly Project - * by Matthew Dillon - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * 3. Neither the name of The DragonFly Project nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific, prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include -#include -#include -#include -#include -#include - -#include -#include - -/* - * A bcopy that works dring low level boot, before FP is working - */ -void -ovbcopy(const void *src, void *dst, size_t len) -{ - bcopy(src, dst, len); -} - -void -bcopyi(const void *src, void *dst, size_t len) -{ - bcopy(src, dst, len); -} - -int -copystr(const void *kfaddr, void *kdaddr, size_t len, size_t *lencopied) -{ - size_t i; - - for (i = 0; i < len; ++i) { - if ((((char *)kdaddr)[i] = ((const char *)kfaddr)[i]) == 0) { - if (lencopied) - *lencopied = i + 1; - return(0); - } - } - return (ENAMETOOLONG); -} - -/* - * Copies a NUL-terminated string from user space to kernel space. - * The number of bytes copied, including the terminator, is returned in - * (*res). - * - * Returns 0 on success, EFAULT or ENAMETOOLONG on failure. - */ -int -copyinstr(const void *udaddr, void *kaddr, size_t len, size_t *res) -{ - int error; - size_t n; - const char *uptr = udaddr; - char *kptr = kaddr; - - if (res) - *res = 0; - while (len) { - n = PAGE_SIZE - ((vm_offset_t)uptr & PAGE_MASK); - if (n > 32) - n = 32; - if (n > len) - n = len; - if ((error = copyin(uptr, kptr, n)) != 0) - return(error); - while (n) { - if (res) - ++*res; - if (*kptr == 0) - return(0); - ++kptr; - ++uptr; - --n; - --len; - } - - } - return(ENAMETOOLONG); -} - -/* - * Copy a binary buffer from user space to kernel space. - * - * NOTE: on a real system copyin/copyout are MP safe, but the current - * implementation on a vkernel is not so we get the mp lock. - * - * Returns 0 on success, EFAULT on failure. - */ -int -copyin(const void *udaddr, void *kaddr, size_t len) -{ - struct vmspace *vm = curproc->p_vmspace; - struct lwbuf *lwb; - struct lwbuf lwb_cache; - vm_page_t m; - int error; - size_t n; - - error = 0; - while (len) { - m = vm_fault_page(&vm->vm_map, trunc_page((vm_offset_t)udaddr), - VM_PROT_READ, - VM_FAULT_NORMAL, &error); - if (error) - break; - n = PAGE_SIZE - ((vm_offset_t)udaddr & PAGE_MASK); - if (n > len) - n = len; - lwb = lwbuf_alloc(m, &lwb_cache); - bcopy((char *)lwbuf_kva(lwb)+((vm_offset_t)udaddr & PAGE_MASK), - kaddr, n); - len -= n; - udaddr = (const char *)udaddr + n; - kaddr = (char *)kaddr + n; - lwbuf_free(lwb); - vm_page_unhold(m); - } - if (error) - error = EFAULT; - return (error); -} - -/* - * Copy a binary buffer from kernel space to user space. - * - * Returns 0 on success, EFAULT on failure. - */ -int -copyout(const void *kaddr, void *udaddr, size_t len) -{ - struct vmspace *vm = curproc->p_vmspace; - struct lwbuf *lwb; - struct lwbuf lwb_cache; - vm_page_t m; - int error; - size_t n; - - error = 0; - while (len) { - m = vm_fault_page(&vm->vm_map, trunc_page((vm_offset_t)udaddr), - VM_PROT_READ|VM_PROT_WRITE, - VM_FAULT_NORMAL, &error); - if (error) - break; - n = PAGE_SIZE - ((vm_offset_t)udaddr & PAGE_MASK); - if (n > len) - n = len; - lwb = lwbuf_alloc(m, &lwb_cache); - bcopy(kaddr, (char *)lwbuf_kva(lwb) + - ((vm_offset_t)udaddr & PAGE_MASK), n); - len -= n; - udaddr = (char *)udaddr + n; - kaddr = (const char *)kaddr + n; - vm_page_dirty(m); - lwbuf_free(lwb); - vm_page_unhold(m); - } - if (error) - error = EFAULT; - return (error); -} - -/* - * Fetch the byte at the specified user address. Returns -1 on failure. - */ -int -fubyte(const void *base) -{ - unsigned char c; - - if (copyin(base, &c, 1) == 0) - return((int)c); - return(-1); -} - -/* - * Store a byte at the specified user address. Returns -1 on failure. - */ -int -subyte (void *base, int byte) -{ - unsigned char c = byte; - - if (copyout(&c, base, 1) == 0) - return(0); - return(-1); -} - -/* - * Fetch a word (integer, 32 bits) from user space - */ -long -fuword(const void *base) -{ - long v; - - if (copyin(base, &v, sizeof(v)) == 0) - return(v); - return(-1); -} - -/* - * Store a word (integer, 32 bits) to user space - */ -int -suword(void *base, long word) -{ - if (copyout(&word, base, sizeof(word)) == 0) - return(0); - return(-1); -} - -int -suword32(void *base, int word) -{ - if (copyout(&word, base, sizeof(word)) == 0) - return(0); - return(-1); -} - -/* - * Fetch an short word (16 bits) from user space - */ -int -fusword(void *base) -{ - unsigned short sword; - - if (copyin(base, &sword, sizeof(sword)) == 0) - return((int)sword); - return(-1); -} - -/* - * Store a short word (16 bits) to user space - */ -int -susword (void *base, int word) -{ - unsigned short sword = word; - - if (copyout(&sword, base, sizeof(sword)) == 0) - return(0); - return(-1); -} diff --git a/sys/platform/vkernel/platform/cothread.c b/sys/platform/vkernel/platform/cothread.c deleted file mode 100644 index c00b9ce8dc..0000000000 --- a/sys/platform/vkernel/platform/cothread.c +++ /dev/null @@ -1,204 +0,0 @@ -/* - * Copyright (c) 2008-2010 The DragonFly Project. All rights reserved. - * - * This code is derived from software contributed to The DragonFly Project - * by Matthew Dillon - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * 3. Neither the name of The DragonFly Project nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific, prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $DragonFly: src/sys/platform/vkernel/platform/cothread.c,v 1.3 2008/05/07 17:19:47 dillon Exp $ - */ -/* - * Provides the vkernel with an asynchronous I/O mechanism using pthreads - * which operates outside the cpu abstraction. Cothreads are intended to - * operate like DMA engines and may ONLY make libc and cothread_*() calls. - * The cothread may NOT call into the vkernel since abstractions like - * 'mycpu' do not exist for it. - */ - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -static void cothread_thread(void *arg); - -/* - * Create a co-processor thread for a virtual kernel. This thread operates - * outside of the virtual kernel cpu abstraction and may only make direct - * cothread and libc calls. - */ -cothread_t -cothread_create(void (*thr_func)(cothread_t cotd), - void (*thr_intr)(cothread_t cotd), - void *arg, const char *name) -{ - cothread_t cotd; - - cotd = kmalloc(sizeof(*cotd), M_DEVBUF, M_WAITOK|M_ZERO); - cotd->thr_intr = thr_intr; - cotd->thr_func = thr_func; - cotd->arg = arg; - crit_enter(); - pthread_mutex_init(&cotd->mutex, NULL); - pthread_cond_init(&cotd->cond, NULL); - crit_exit(); - - cotd->pintr = pthread_self(); - - cotd->intr_id = register_int_virtual(1, (void *)thr_intr, cotd, name, - NULL, INTR_MPSAFE); - - /* - * The vkernel's cpu_disable_intr() masks signals. We don't want - * our coprocessor thread taking any unix signals :-) - */ - crit_enter(); - cpu_mask_all_signals(); - pthread_create(&cotd->pthr, NULL, (void *)cothread_thread, cotd); - cpu_unmask_all_signals(); - crit_exit(); - return(cotd); -} - -/* - * Wait for the target thread to terminate and then destroy the cothread - * structure. - */ -void -cothread_delete(cothread_t *cotdp) -{ - cothread_t cotd; - - if ((cotd = *cotdp) != NULL) { - unregister_int_virtual(cotd->intr_id); - crit_enter(); - pthread_join(cotd->pthr, NULL); - crit_exit(); - kfree(cotd, M_DEVBUF); - *cotdp = NULL; - } -} - -static void -cothread_thread(void *arg) -{ - cothread_t cotd = arg; - int dummy = 0; - - cpu_mask_all_signals(); /* XXX remove me? should already be masked */ - /* - * %fs (aka mycpu) is illegal in cothreads. Note that %fs is used - * by pthreads. - */ - tls_set_fs(&dummy, sizeof(dummy)); - cotd->thr_func(cotd); -} - -/* - * Called by the cothread to generate an interrupt back to the vkernel. - */ -void -cothread_intr(cothread_t cotd) -{ - pthread_kill(cotd->pintr, SIGIO); -} - -/* - * Called by the vkernel to wakeup a cothread. - * The cothread must be locked. - */ -void -cothread_signal(cothread_t cotd) -{ - pthread_cond_signal(&cotd->cond); -} - -/* - * Called by the cothread to wait for the vkernel to call cothread_signal(). - * The cothread must be locked. - */ -void -cothread_wait(cothread_t cotd) -{ - pthread_cond_wait(&cotd->cond, &cotd->mutex); -} - -/* - * Typically called by kernel thread or cothread - * - * These must be a matched pair. We will acquire a critical - * section in cothread_lock() and release it in cothread_unlock(). - * - * We do this to simplify cothread operation to prevent an - * interrupt (e.g. vkd_io_intr()) from preempting a vkd_strategy() - * call and creating a recursion in the pthread. - */ -void -cothread_lock(cothread_t cotd, int is_cotd) -{ - if (is_cotd) { - pthread_mutex_lock(&cotd->mutex); - } else { - crit_enter_id("cothread"); - pthread_mutex_lock(&cotd->mutex); - } -} - -void -cothread_unlock(cothread_t cotd, int is_cotd) -{ - if (is_cotd) { - pthread_mutex_unlock(&cotd->mutex); - } else { - pthread_mutex_unlock(&cotd->mutex); - crit_exit_id("cothread"); - } -} - diff --git a/sys/platform/vkernel/platform/globaldata.c b/sys/platform/vkernel/platform/globaldata.c deleted file mode 100644 index 9b956bf8e3..0000000000 --- a/sys/platform/vkernel/platform/globaldata.c +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Copyright (c) 2006 The DragonFly Project. All rights reserved. - * - * This code is derived from software contributed to The DragonFly Project - * by Matthew Dillon - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * 3. Neither the name of The DragonFly Project nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific, prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $DragonFly: src/sys/platform/vkernel/platform/globaldata.c,v 1.5 2008/04/28 07:05:08 dillon Exp $ - */ - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -struct globaldata * -globaldata_find(int cpu) -{ - KKASSERT(cpu >= 0 && cpu < ncpus); - return (&CPU_prvspace[cpu].mdglobaldata.mi); -} - -void -cpu_gdinit(struct mdglobaldata *gd, int cpu) -{ - if (cpu) - gd->mi.gd_curthread = &gd->mi.gd_idlethread; - - lwkt_init_thread(&gd->mi.gd_idlethread, - gd->mi.gd_prvspace->idlestack, - sizeof(gd->mi.gd_prvspace->idlestack), - 0, &gd->mi); - lwkt_set_comm(&gd->mi.gd_idlethread, "idle_%d", cpu); - gd->mi.gd_idlethread.td_switch = cpu_lwkt_switch; - gd->mi.gd_idlethread.td_sp -= sizeof(void *); - *(void **)gd->mi.gd_idlethread.td_sp = cpu_idle_restore; - - /* - * Single page mappings and where we have to store the related - * pte's - */ - gd->gd_CADDR1 = (caddr_t)gd->mi.gd_prvspace->CPAGE1; - gd->gd_CADDR2 = (caddr_t)gd->mi.gd_prvspace->CPAGE2; - gd->gd_CADDR3 = (caddr_t)gd->mi.gd_prvspace->CPAGE3; - gd->gd_PADDR1 = gd->mi.gd_prvspace->PPAGE1; - - gd->gd_CMAP1 = &KernelPTA[(vm_offset_t)gd->gd_CADDR1 / PAGE_SIZE]; - gd->gd_CMAP2 = &KernelPTA[(vm_offset_t)gd->gd_CADDR2 / PAGE_SIZE]; - gd->gd_CMAP3 = &KernelPTA[(vm_offset_t)gd->gd_CADDR3 / PAGE_SIZE]; - gd->gd_PMAP1 = &KernelPTA[(vm_offset_t)gd->gd_PADDR1 / PAGE_SIZE]; - - /* - * Whole page table mappings and where we have to store the related - * pde's. - */ - - gd->gd_PT1map = gd->mi.gd_prvspace->PT1MAP; - gd->gd_PT1pdir = NULL; - gd->gd_PT1pde = &KernelPTD[((vm_offset_t)gd->gd_PT1map - KvaStart) / - SEG_SIZE]; - - gd->gd_PT2map = gd->mi.gd_prvspace->PT2MAP; - gd->gd_PT2pdir = NULL; - gd->gd_PT2pde = &KernelPTD[((vm_offset_t)gd->gd_PT2map - KvaStart) / - SEG_SIZE]; - - gd->gd_PT3map = gd->mi.gd_prvspace->PT3MAP; - gd->gd_PT3pdir = NULL; - gd->gd_PT3pde = &KernelPTD[((vm_offset_t)gd->gd_PT3map - KvaStart) / - SEG_SIZE]; - - KKASSERT(((vm_offset_t)gd->gd_PT1map & SEG_MASK) == 0); - KKASSERT(((vm_offset_t)gd->gd_PT2map & SEG_MASK) == 0); - KKASSERT(((vm_offset_t)gd->gd_PT3map & SEG_MASK) == 0); -} - -int -is_globaldata_space(vm_offset_t saddr, vm_offset_t eaddr) -{ - if (saddr >= (vm_offset_t)&CPU_prvspace[0] && - eaddr <= (vm_offset_t)&CPU_prvspace[MAXCPU]) { - return (TRUE); - } - return (FALSE); -} - diff --git a/sys/platform/vkernel/platform/init.c b/sys/platform/vkernel/platform/init.c deleted file mode 100644 index eda6a7536f..0000000000 --- a/sys/platform/vkernel/platform/init.c +++ /dev/null @@ -1,1450 +0,0 @@ -/* - * Copyright (c) 2006 The DragonFly Project. All rights reserved. - * - * This code is derived from software contributed to The DragonFly Project - * by Matthew Dillon - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * 3. Neither the name of The DragonFly Project nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific, prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -vm_paddr_t phys_avail[16]; -vm_paddr_t Maxmem; -vm_paddr_t Maxmem_bytes; -long physmem; -int MemImageFd = -1; -struct vkdisk_info DiskInfo[VKDISK_MAX]; -int DiskNum; -struct vknetif_info NetifInfo[VKNETIF_MAX]; -int NetifNum; -char *pid_file; -vm_offset_t KvaStart; -vm_offset_t KvaEnd; -vm_offset_t KvaSize; -vm_offset_t virtual_start; -vm_offset_t virtual_end; -vm_offset_t virtual2_start; -vm_offset_t virtual2_end; -vm_offset_t kernel_vm_end; -vm_offset_t crashdumpmap; -vm_offset_t clean_sva; -vm_offset_t clean_eva; -struct msgbuf *msgbufp; -caddr_t ptvmmap; -vpte_t *KernelPTD; -vpte_t *KernelPTA; /* Warning: Offset for direct VA translation */ -u_int cpu_feature; /* XXX */ -int tsc_present; -int tsc_invariant; -int tsc_mpsync; -int64_t tsc_frequency; -int optcpus; /* number of cpus - see mp_start() */ -int lwp_cpu_lock; /* if/how to lock virtual CPUs to real CPUs */ -int real_ncpus; /* number of real CPUs */ -int next_cpu; /* next real CPU to lock a virtual CPU to */ -int vkernel_b_arg; /* no of logical CPU bits - only SMP */ -int vkernel_B_arg; /* no of core bits - only SMP */ - -int via_feature_xcrypt = 0; /* XXX */ -int via_feature_rng = 0; /* XXX */ - -struct privatespace *CPU_prvspace; - -static struct trapframe proc0_tf; -static void *proc0paddr; - -static void init_sys_memory(char *imageFile); -static void init_kern_memory(void); -static void init_globaldata(void); -static void init_vkernel(void); -static void init_disk(char *diskExp[], int diskFileNum, enum vkdisk_type type); -static void init_netif(char *netifExp[], int netifFileNum); -static void writepid(void); -static void cleanpid(void); -static int unix_connect(const char *path); -static void usage_err(const char *ctl, ...); -static void usage_help(_Bool); -static void init_locks(void); - -static int save_ac; -static char **save_av; - -/* - * Kernel startup for virtual kernels - standard main() - */ -int -main(int ac, char **av) -{ - char *memImageFile = NULL; - char *netifFile[VKNETIF_MAX]; - char *diskFile[VKDISK_MAX]; - char *cdFile[VKDISK_MAX]; - char *suffix; - char *endp; - char *tmp; - char *tok; - int netifFileNum = 0; - int diskFileNum = 0; - int cdFileNum = 0; - int bootOnDisk = -1; /* set below to vcd (0) or vkd (1) */ - int c; - int i; - int j; - int n; - int isq; - int pos; - int eflag; - int dflag = 0; /* disable vmm */ - int real_vkernel_enable; - int supports_sse; - size_t vsize; - size_t kenv_size; - size_t kenv_size2; - - save_ac = ac; - save_av = av; - eflag = 0; - pos = 0; - kenv_size = 0; - - /* - * Process options - */ - kernel_mem_readonly = 1; - optcpus = 2; - vkernel_b_arg = 0; - vkernel_B_arg = 0; - lwp_cpu_lock = LCL_NONE; - - real_vkernel_enable = 0; - vsize = sizeof(real_vkernel_enable); - sysctlbyname("vm.vkernel_enable", &real_vkernel_enable, &vsize, NULL,0); - - if (real_vkernel_enable == 0) { - errx(1, "vm.vkernel_enable is 0, must be set " - "to 1 to execute a vkernel!"); - } - - real_ncpus = 1; - vsize = sizeof(real_ncpus); - sysctlbyname("hw.ncpu", &real_ncpus, &vsize, NULL, 0); - - if (ac < 2) - usage_help(false); - - while ((c = getopt(ac, av, "c:hsvl:m:n:r:e:i:p:I:Ud")) != -1) { - switch(c) { - case 'd': - printf("vmm: No need to disable. Hardware pagetable " - "is not available in vkernel32.\n"); - dflag = 1; - break; - case 'e': - /* - * name=value:name=value:name=value... - * name="value"... - * - * Allow values to be quoted but note that shells - * may remove the quotes, so using this feature - * to embed colons may require a backslash. - */ - n = strlen(optarg); - isq = 0; - - if (eflag == 0) { - kenv_size = n + 2; - kern_envp = malloc(kenv_size); - if (kern_envp == NULL) - errx(1, "Couldn't allocate %zd bytes for kern_envp", kenv_size); - } else { - kenv_size2 = kenv_size + n + 1; - pos = kenv_size - 1; - if ((tmp = realloc(kern_envp, kenv_size2)) == NULL) - errx(1, "Couldn't reallocate %zd bytes for kern_envp", kenv_size2); - kern_envp = tmp; - kenv_size = kenv_size2; - } - - for (i = 0, j = pos; i < n; ++i) { - if (optarg[i] == '"') - isq ^= 1; - else if (optarg[i] == '\'') - isq ^= 2; - else if (isq == 0 && optarg[i] == ':') - kern_envp[j++] = 0; - else - kern_envp[j++] = optarg[i]; - } - kern_envp[j++] = 0; - kern_envp[j++] = 0; - eflag++; - break; - case 's': - boothowto |= RB_SINGLE; - break; - case 'v': - bootverbose = 1; - break; - case 'i': - memImageFile = optarg; - break; - case 'I': - if (netifFileNum < VKNETIF_MAX) - netifFile[netifFileNum++] = strdup(optarg); - break; - case 'r': - if (bootOnDisk < 0) - bootOnDisk = 1; - if (diskFileNum + cdFileNum < VKDISK_MAX) - diskFile[diskFileNum++] = strdup(optarg); - break; - case 'c': - if (bootOnDisk < 0) - bootOnDisk = 0; - if (diskFileNum + cdFileNum < VKDISK_MAX) - cdFile[cdFileNum++] = strdup(optarg); - break; - case 'm': - Maxmem_bytes = strtoull(optarg, &suffix, 0); - if (suffix) { - switch(*suffix) { - case 'g': - case 'G': - Maxmem_bytes <<= 30; - break; - case 'm': - case 'M': - Maxmem_bytes <<= 20; - break; - case 'k': - case 'K': - Maxmem_bytes <<= 10; - break; - default: - Maxmem_bytes = 0; - usage_err("Bad maxmem option"); - /* NOT REACHED */ - break; - } - } - break; - case 'l': - next_cpu = -1; - if (strncmp("map", optarg, 3) == 0) { - lwp_cpu_lock = LCL_PER_CPU; - if (optarg[3] == ',') { - next_cpu = strtol(optarg+4, &endp, 0); - if (*endp != '\0') - usage_err("Bad target CPU number at '%s'", endp); - } else { - next_cpu = 0; - } - if (next_cpu < 0 || next_cpu > real_ncpus - 1) - usage_err("Bad target CPU, valid range is 0-%d", real_ncpus - 1); - } else if (strncmp("any", optarg, 3) == 0) { - lwp_cpu_lock = LCL_NONE; - } else { - lwp_cpu_lock = LCL_SINGLE_CPU; - next_cpu = strtol(optarg, &endp, 0); - if (*endp != '\0') - usage_err("Bad target CPU number at '%s'", endp); - if (next_cpu < 0 || next_cpu > real_ncpus - 1) - usage_err("Bad target CPU, valid range is 0-%d", real_ncpus - 1); - } - break; - case 'n': - /* - * This value is set up by mp_start(), don't just - * set ncpus here. - */ - tok = strtok(optarg, ":"); - optcpus = strtol(tok, NULL, 0); - if (optcpus < 1 || optcpus > MAXCPU) - usage_err("Bad ncpus, valid range is 1-%d", MAXCPU); - - /* :core_bits argument */ - tok = strtok(NULL, ":"); - if (tok != NULL) { - vkernel_b_arg = strtol(tok, NULL, 0); - - /* :logical_CPU_bits argument */ - tok = strtok(NULL, ":"); - if (tok != NULL) { - vkernel_B_arg = strtol(tok, NULL, 0); - } - - } - break; - case 'p': - pid_file = optarg; - break; - case 'U': - kernel_mem_readonly = 0; - break; - case 'h': - usage_help(true); - break; - default: - usage_help(false); - } - } - - writepid(); - cpu_disable_intr(); - init_sys_memory(memImageFile); - init_kern_memory(); - init_globaldata(); - init_vkernel(); - setrealcpu(); - init_kqueue(); - - vmm_guest = VMM_GUEST_VKERNEL; - - /* - * Check TSC - */ - vsize = sizeof(tsc_present); - sysctlbyname("hw.tsc_present", &tsc_present, &vsize, NULL, 0); - vsize = sizeof(tsc_invariant); - sysctlbyname("hw.tsc_invariant", &tsc_invariant, &vsize, NULL, 0); - vsize = sizeof(tsc_mpsync); - sysctlbyname("hw.tsc_mpsync", &tsc_mpsync, &vsize, NULL, 0); - vsize = sizeof(tsc_frequency); - sysctlbyname("hw.tsc_frequency", &tsc_frequency, &vsize, NULL, 0); - if (tsc_present) - cpu_feature |= CPUID_TSC; - - /* - * Check SSE - */ - vsize = sizeof(supports_sse); - supports_sse = 0; - sysctlbyname("hw.instruction_sse", &supports_sse, &vsize, NULL, 0); - init_fpu(supports_sse); - if (supports_sse) - cpu_feature |= CPUID_SSE | CPUID_FXSR; - - /* - * We boot from the first installed disk. - */ - if (bootOnDisk == 1) { - init_disk(diskFile, diskFileNum, VKD_DISK); - init_disk(cdFile, cdFileNum, VKD_CD); - } else { - init_disk(cdFile, cdFileNum, VKD_CD); - init_disk(diskFile, diskFileNum, VKD_DISK); - } - init_netif(netifFile, netifFileNum); - init_exceptions(); - mi_startup(); - /* NOT REACHED */ - exit(EX_SOFTWARE); -} - -/* - * Initialize system memory. This is the virtual kernel's 'RAM'. - */ -static -void -init_sys_memory(char *imageFile) -{ - struct stat st; - int i; - int fd; - - /* - * Figure out the system memory image size. If an image file was - * specified and -m was not specified, use the image file's size. - */ - if (imageFile && stat(imageFile, &st) == 0 && Maxmem_bytes == 0) - Maxmem_bytes = (vm_paddr_t)st.st_size; - if ((imageFile == NULL || stat(imageFile, &st) < 0) && - Maxmem_bytes == 0) { - errx(1, "Cannot create new memory file %s unless " - "system memory size is specified with -m", - imageFile); - /* NOT REACHED */ - } - - /* - * Maxmem must be known at this time - */ - if (Maxmem_bytes < 32 * 1024 * 1024 || (Maxmem_bytes & SEG_MASK)) { - errx(1, "Bad maxmem specification: 32MB minimum, " - "multiples of %dMB only", - SEG_SIZE / 1024 / 1024); - /* NOT REACHED */ - } - - /* - * Generate an image file name if necessary, then open/create the - * file exclusively locked. Do not allow multiple virtual kernels - * to use the same image file. - * - * Don't iterate through a million files if we do not have write - * access to the directory, stop if our open() failed on a - * non-existant file. Otherwise opens can fail for any number - * of reasons (lock failed, file not owned or writable by us, etc). - */ - if (imageFile == NULL) { - for (i = 0; i < 1000000; ++i) { - asprintf(&imageFile, "/var/vkernel/memimg.%06d", i); - fd = open(imageFile, - O_RDWR|O_CREAT|O_EXLOCK|O_NONBLOCK, 0644); - if (fd < 0 && stat(imageFile, &st) == 0) { - free(imageFile); - continue; - } - break; - } - } else { - fd = open(imageFile, O_RDWR|O_CREAT|O_EXLOCK|O_NONBLOCK, 0644); - } - printf("Using memory file: %s\n", imageFile); - if (fd < 0 || fstat(fd, &st) < 0) { - err(1, "Unable to open/create %s", imageFile); - /* NOT REACHED */ - } - - /* - * Truncate or extend the file as necessary. Clean out the contents - * of the file, we want it to be full of holes so we don't waste - * time reading in data from an old file that we no longer care - * about. - */ - ftruncate(fd, 0); - ftruncate(fd, Maxmem_bytes); - - MemImageFd = fd; - Maxmem = Maxmem_bytes >> PAGE_SHIFT; - physmem = Maxmem; -} - -/* - * Initialize pool tokens and other necessary locks - */ -static void -init_locks(void) -{ - - /* - * Get the initial mplock with a count of 1 for the BSP. - * This uses a LOGICAL cpu ID, ie BSP == 0. - */ - cpu_get_initial_mplock(); - - /* our token pool needs to work early */ - lwkt_token_pool_init(); - -} - -/* - * Initialize kernel memory. This reserves kernel virtual memory by using - * MAP_VPAGETABLE - */ - -static -void -init_kern_memory(void) -{ - void *base; - void *try; - char *zero; - char dummy; - char *topofstack = &dummy; - vpte_t pte; - int i; - - /* - * Memory map our kernel virtual memory space. Note that the - * kernel image itself is not made part of this memory for the - * moment. - * - * The memory map must be segment-aligned so we can properly - * offset KernelPTD. - * - * If the system kernel has a different MAXDSIZ, it might not - * be possible to map kernel memory in its prefered location. - * Try a number of different locations. - */ - try = (void *)0x40000000; - base = NULL; - while ((char *)try + KERNEL_KVA_SIZE < topofstack) { - base = mmap(try, KERNEL_KVA_SIZE, PROT_READ|PROT_WRITE, - MAP_FILE|MAP_SHARED|MAP_VPAGETABLE, - MemImageFd, 0); - if (base == try) - break; - if (base != MAP_FAILED) - munmap(base, KERNEL_KVA_SIZE); - try = (char *)try + 0x10000000; - } - if (base != try) { - err(1, "Unable to mmap() kernel virtual memory!"); - /* NOT REACHED */ - } - madvise(base, KERNEL_KVA_SIZE, MADV_NOSYNC); - KvaStart = (vm_offset_t)base; - KvaSize = KERNEL_KVA_SIZE; - KvaEnd = KvaStart + KvaSize; - - /* cannot use kprintf yet */ - printf("KVM mapped at %p-%p\n", (void *)KvaStart, (void *)KvaEnd); - - /* - * Create a top-level page table self-mapping itself. - * - * Initialize the page directory at physical page index 0 to point - * to an array of page table pages starting at physical page index 1 - */ - lseek(MemImageFd, 0L, 0); - for (i = 0; i < KERNEL_KVA_SIZE / SEG_SIZE; ++i) { - pte = ((i + 1) * PAGE_SIZE) | VPTE_V | VPTE_RW; - write(MemImageFd, &pte, sizeof(pte)); - } - - /* - * Initialize the PTEs in the page table pages required to map the - * page table itself. This includes mapping the page directory page - * at the base so we go one more loop then normal. - */ - lseek(MemImageFd, PAGE_SIZE, 0); - for (i = 0; i <= KERNEL_KVA_SIZE / SEG_SIZE * sizeof(vpte_t); ++i) { - pte = (i * PAGE_SIZE) | VPTE_V | VPTE_RW; - write(MemImageFd, &pte, sizeof(pte)); - } - - /* - * Initialize remaining PTEs to 0. We may be reusing a memory image - * file. This is approximately a megabyte. - */ - i = (KERNEL_KVA_SIZE / PAGE_SIZE - i) * sizeof(pte); - zero = malloc(PAGE_SIZE); - bzero(zero, PAGE_SIZE); - while (i) { - write(MemImageFd, zero, (i > PAGE_SIZE) ? PAGE_SIZE : i); - i = i - ((i > PAGE_SIZE) ? PAGE_SIZE : i); - } - free(zero); - - /* - * Enable the page table and calculate pointers to our self-map - * for easy kernel page table manipulation. - * - * KernelPTA must be offset so we can do direct VA translations - */ - mcontrol(base, KERNEL_KVA_SIZE, MADV_SETMAP, - 0 | VPTE_RW | VPTE_V); - KernelPTD = (vpte_t *)base; /* pg directory */ - KernelPTA = (vpte_t *)((char *)base + PAGE_SIZE); /* pg table pages */ - KernelPTA -= KvaStart >> PAGE_SHIFT; - - /* - * phys_avail[] represents unallocated physical memory. MI code - * will use phys_avail[] to create the vm_page array. - */ - phys_avail[0] = PAGE_SIZE + - KERNEL_KVA_SIZE / PAGE_SIZE * sizeof(vpte_t); - phys_avail[0] = (phys_avail[0] + PAGE_MASK) & ~(vm_paddr_t)PAGE_MASK; - phys_avail[1] = Maxmem_bytes; - - /* - * (virtual_start, virtual_end) represent unallocated kernel virtual - * memory. MI code will create kernel_map using these parameters. - */ - virtual_start = KvaStart + PAGE_SIZE + - KERNEL_KVA_SIZE / PAGE_SIZE * sizeof(vpte_t); - virtual_start = (virtual_start + PAGE_MASK) & ~(vm_offset_t)PAGE_MASK; - virtual_end = KvaStart + KERNEL_KVA_SIZE; - - /* - * kernel_vm_end could be set to virtual_end but we want some - * indication of how much of the kernel_map we've used, so - * set it low and let pmap_growkernel increase it even though we - * don't need to create any new page table pages. - */ - kernel_vm_end = virtual_start; - - /* - * Allocate space for process 0's UAREA. - */ - proc0paddr = (void *)virtual_start; - for (i = 0; i < UPAGES; ++i) { - pmap_kenter_quick(virtual_start, phys_avail[0]); - virtual_start += PAGE_SIZE; - phys_avail[0] += PAGE_SIZE; - } - - /* - * crashdumpmap - */ - crashdumpmap = virtual_start; - virtual_start += MAXDUMPPGS * PAGE_SIZE; - - /* - * msgbufp maps the system message buffer - */ - assert((MSGBUF_SIZE & PAGE_MASK) == 0); - msgbufp = (void *)virtual_start; - for (i = 0; i < (MSGBUF_SIZE >> PAGE_SHIFT); ++i) { - pmap_kenter_quick(virtual_start, phys_avail[0]); - virtual_start += PAGE_SIZE; - phys_avail[0] += PAGE_SIZE; - } - msgbufinit(msgbufp, MSGBUF_SIZE); - - /* - * used by kern_memio for /dev/mem access - */ - ptvmmap = (caddr_t)virtual_start; - virtual_start += PAGE_SIZE; - - /* - * Bootstrap the kernel_pmap - */ - pmap_bootstrap(); -} - -/* - * Map the per-cpu globaldata for cpu #0. Allocate the space using - * virtual_start and phys_avail[0] - */ -static -void -init_globaldata(void) -{ - int i; - vm_paddr_t pa; - vm_offset_t va; - - /* - * Reserve enough KVA to cover possible cpus. This is a considerable - * amount of KVA since the privatespace structure includes two - * whole page table mappings. - */ - virtual_start = (virtual_start + SEG_MASK) & ~(vm_offset_t)SEG_MASK; - CPU_prvspace = (void *)virtual_start; - virtual_start += sizeof(struct privatespace) * SMP_MAXCPU; - - /* - * Allocate enough physical memory to cover the mdglobaldata - * portion of the space and the idle stack and map the pages - * into KVA. For cpu #0 only. - */ - for (i = 0; i < sizeof(struct mdglobaldata); i += PAGE_SIZE) { - pa = phys_avail[0]; - va = (vm_offset_t)&CPU_prvspace[0].mdglobaldata + i; - pmap_kenter_quick(va, pa); - phys_avail[0] += PAGE_SIZE; - } - for (i = 0; i < sizeof(CPU_prvspace[0].idlestack); i += PAGE_SIZE) { - pa = phys_avail[0]; - va = (vm_offset_t)&CPU_prvspace[0].idlestack + i; - pmap_kenter_quick(va, pa); - phys_avail[0] += PAGE_SIZE; - } - - /* - * Setup the %fs for cpu #0. The mycpu macro works after this - * point. Note that %gs is used by pthreads. - */ - tls_set_fs(&CPU_prvspace[0], sizeof(struct privatespace)); -} - -/* - * Initialize very low level systems including thread0, proc0, etc. - */ -static -void -init_vkernel(void) -{ - struct mdglobaldata *gd; - - gd = &CPU_prvspace[0].mdglobaldata; - bzero(gd, sizeof(*gd)); - - gd->mi.gd_curthread = &thread0; - thread0.td_gd = &gd->mi; - ncpus = 1; - ncpus2 = 1; /* rounded down power of 2 */ - ncpus_fit = 1; /* rounded up power of 2 */ - /* ncpus2_mask and ncpus_fit_mask are 0 */ - init_param1(); - gd->mi.gd_prvspace = &CPU_prvspace[0]; - mi_gdinit(&gd->mi, 0); - cpu_gdinit(gd, 0); - mi_proc0init(&gd->mi, proc0paddr); - lwp0.lwp_md.md_regs = &proc0_tf; - - init_locks(); - cninit(); - rand_initialize(); -#if 0 /* #ifdef DDB */ - kdb_init(); - if (boothowto & RB_KDB) - Debugger("Boot flags requested debugger"); -#endif - identcpu(); -#if 0 - initializecpu(); /* Initialize CPU registers */ -#endif - init_param2((phys_avail[1] - phys_avail[0]) / PAGE_SIZE); - -#if 0 - /* - * Map the message buffer - */ - for (off = 0; off < round_page(MSGBUF_SIZE); off += PAGE_SIZE) - pmap_kenter((vm_offset_t)msgbufp + off, avail_end + off); - msgbufinit(msgbufp, MSGBUF_SIZE); -#endif -#if 0 - thread0.td_pcb_cr3 ... MMU - lwp0.lwp_md.md_regs = &proc0_tf; -#endif -} - -/* - * Filesystem image paths for the virtual kernel are optional. - * If specified they each should point to a disk image, - * the first of which will become the root disk. - * - * The virtual kernel caches data from our 'disk' just like a normal kernel, - * so we do not really want the real kernel to cache the data too. Use - * O_DIRECT to remove the duplication. - */ -static -void -init_disk(char *diskExp[], int diskFileNum, enum vkdisk_type type) -{ - char *serno; - int i; - - if (diskFileNum == 0) - return; - - for(i=0; i < diskFileNum; i++){ - char *fname; - fname = diskExp[i]; - - if (fname == NULL) { - warnx("Invalid argument to '-r'"); - continue; - } - /* - * Check for a serial number for the virtual disk - * passed from the command line. - */ - serno = fname; - strsep(&serno, ":"); - - if (DiskNum < VKDISK_MAX) { - struct stat st; - struct vkdisk_info* info = NULL; - int fd; - size_t l = 0; - - if (type == VKD_DISK) - fd = open(fname, O_RDWR|O_DIRECT, 0644); - else - fd = open(fname, O_RDONLY|O_DIRECT, 0644); - if (fd < 0 || fstat(fd, &st) < 0) { - err(1, "Unable to open/create %s", fname); - /* NOT REACHED */ - } - if (S_ISREG(st.st_mode)) { - if (flock(fd, LOCK_EX|LOCK_NB) < 0) { - errx(1, "Disk image %s is already " - "in use\n", fname); - /* NOT REACHED */ - } - } - - info = &DiskInfo[DiskNum]; - l = strlen(fname); - - info->unit = i; - info->fd = fd; - info->type = type; - memcpy(info->fname, fname, l); - info->serno = NULL; - if (serno) { - if ((info->serno = malloc(SERNOLEN)) != NULL) - strlcpy(info->serno, serno, SERNOLEN); - else - warnx("Couldn't allocate memory for the operation"); - } - - if (DiskNum == 0) { - if (type == VKD_CD) { - rootdevnames[0] = "cd9660:vcd0"; - } else if (type == VKD_DISK) { - rootdevnames[0] = "ufs:vkd0s0a"; - rootdevnames[1] = "ufs:vkd0s1a"; - } - } - - DiskNum++; - } else { - warnx("vkd%d (%s) > VKDISK_MAX", DiskNum, fname); - continue; - } - } -} - -static -int -netif_set_tapflags(int tap_unit, int f, int s) -{ - struct ifreq ifr; - int flags; - - bzero(&ifr, sizeof(ifr)); - - snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "tap%d", tap_unit); - if (ioctl(s, SIOCGIFFLAGS, &ifr) < 0) { - warn("tap%d: ioctl(SIOCGIFFLAGS) failed", tap_unit); - return -1; - } - - /* - * Adjust if_flags - * - * If the flags are already set/cleared, then we return - * immediately to avoid extra syscalls - */ - flags = (ifr.ifr_flags & 0xffff) | (ifr.ifr_flagshigh << 16); - if (f < 0) { - /* Turn off flags */ - f = -f; - if ((flags & f) == 0) - return 0; - flags &= ~f; - } else { - /* Turn on flags */ - if (flags & f) - return 0; - flags |= f; - } - - /* - * Fix up ifreq.ifr_name, since it may be trashed - * in previous ioctl(SIOCGIFFLAGS) - */ - snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "tap%d", tap_unit); - - ifr.ifr_flags = flags & 0xffff; - ifr.ifr_flagshigh = flags >> 16; - if (ioctl(s, SIOCSIFFLAGS, &ifr) < 0) { - warn("tap%d: ioctl(SIOCSIFFLAGS) failed", tap_unit); - return -1; - } - return 0; -} - -static -int -netif_set_tapaddr(int tap_unit, in_addr_t addr, in_addr_t mask, int s) -{ - struct ifaliasreq ifra; - struct sockaddr_in *in; - - bzero(&ifra, sizeof(ifra)); - snprintf(ifra.ifra_name, sizeof(ifra.ifra_name), "tap%d", tap_unit); - - /* Setup address */ - in = (struct sockaddr_in *)&ifra.ifra_addr; - in->sin_family = AF_INET; - in->sin_len = sizeof(*in); - in->sin_addr.s_addr = addr; - - if (mask != 0) { - /* Setup netmask */ - in = (struct sockaddr_in *)&ifra.ifra_mask; - in->sin_len = sizeof(*in); - in->sin_addr.s_addr = mask; - } - - if (ioctl(s, SIOCAIFADDR, &ifra) < 0) { - warn("tap%d: ioctl(SIOCAIFADDR) failed", tap_unit); - return -1; - } - return 0; -} - -static -int -netif_add_tap2brg(int tap_unit, const char *ifbridge, int s) -{ - struct ifbreq ifbr; - struct ifdrv ifd; - - bzero(&ifbr, sizeof(ifbr)); - snprintf(ifbr.ifbr_ifsname, sizeof(ifbr.ifbr_ifsname), - "tap%d", tap_unit); - - bzero(&ifd, sizeof(ifd)); - strlcpy(ifd.ifd_name, ifbridge, sizeof(ifd.ifd_name)); - ifd.ifd_cmd = BRDGADD; - ifd.ifd_len = sizeof(ifbr); - ifd.ifd_data = &ifbr; - - if (ioctl(s, SIOCSDRVSPEC, &ifd) < 0) { - /* - * 'errno == EEXIST' means that the tap(4) is already - * a member of the bridge(4) - */ - if (errno != EEXIST) { - warn("ioctl(%s, SIOCSDRVSPEC) failed", ifbridge); - return -1; - } - } - return 0; -} - -#define TAPDEV_OFLAGS (O_RDWR | O_NONBLOCK) - -/* - * Locate the first unused tap(4) device file if auto mode is requested, - * or open the user supplied device file, and bring up the corresponding - * tap(4) interface. - * - * NOTE: Only tap(4) device file is supported currently - */ -static -int -netif_open_tap(const char *netif, int *tap_unit, int s) -{ - char tap_dev[MAXPATHLEN]; - int tap_fd, failed; - struct stat st; - char *dname; - - *tap_unit = -1; - - if (strcmp(netif, "auto") == 0) { - /* - * Find first unused tap(4) device file - */ - tap_fd = open("/dev/tap", TAPDEV_OFLAGS); - if (tap_fd < 0) { - warnc(errno, "Unable to find a free tap(4)"); - return -1; - } - } else { - /* - * User supplied tap(4) device file or unix socket. - */ - if (netif[0] == '/') /* Absolute path */ - strlcpy(tap_dev, netif, sizeof(tap_dev)); - else - snprintf(tap_dev, sizeof(tap_dev), "/dev/%s", netif); - - tap_fd = open(tap_dev, TAPDEV_OFLAGS); - - /* - * If we cannot open normally try to connect to it. - */ - if (tap_fd < 0) - tap_fd = unix_connect(tap_dev); - - if (tap_fd < 0) { - warn("Unable to open %s", tap_dev); - return -1; - } - } - - /* - * Check whether the device file is a tap(4) - */ - if (fstat(tap_fd, &st) < 0) { - failed = 1; - } else if (S_ISCHR(st.st_mode)) { - dname = fdevname(tap_fd); - if (dname) - dname = strstr(dname, "tap"); - if (dname) { - /* - * Bring up the corresponding tap(4) interface - */ - *tap_unit = strtol(dname + 3, NULL, 10); - printf("TAP UNIT %d\n", *tap_unit); - if (netif_set_tapflags(*tap_unit, IFF_UP, s) == 0) - failed = 0; - else - failed = 1; - } else { - failed = 1; - } - } else if (S_ISSOCK(st.st_mode)) { - /* - * Special socket connection (typically to vknet). We - * do not have to do anything. - */ - failed = 0; - } else { - failed = 1; - } - - if (failed) { - warnx("%s is not a tap(4) device or socket", tap_dev); - close(tap_fd); - tap_fd = -1; - *tap_unit = -1; - } - return tap_fd; -} - -static int -unix_connect(const char *path) -{ - struct sockaddr_un sunx; - int len; - int net_fd; - int sndbuf = 262144; - struct stat st; - - snprintf(sunx.sun_path, sizeof(sunx.sun_path), "%s", path); - len = offsetof(struct sockaddr_un, sun_path[strlen(sunx.sun_path)]); - ++len; /* include nul */ - sunx.sun_family = AF_UNIX; - sunx.sun_len = len; - - net_fd = socket(AF_UNIX, SOCK_SEQPACKET, 0); - if (net_fd < 0) - return(-1); - if (connect(net_fd, (void *)&sunx, len) < 0) { - close(net_fd); - return(-1); - } - setsockopt(net_fd, SOL_SOCKET, SO_SNDBUF, &sndbuf, sizeof(sndbuf)); - if (fstat(net_fd, &st) == 0) - printf("Network socket buffer: %d bytes\n", st.st_blksize); - fcntl(net_fd, F_SETFL, O_NONBLOCK); - return(net_fd); -} - -#undef TAPDEV_MAJOR -#undef TAPDEV_MINOR -#undef TAPDEV_OFLAGS - -/* - * Following syntax is supported, - * 1) x.x.x.x tap(4)'s address is x.x.x.x - * - * 2) x.x.x.x/z tap(4)'s address is x.x.x.x - * tap(4)'s netmask len is z - * - * 3) x.x.x.x:y.y.y.y tap(4)'s address is x.x.x.x - * pseudo netif's address is y.y.y.y - * - * 4) x.x.x.x:y.y.y.y/z tap(4)'s address is x.x.x.x - * pseudo netif's address is y.y.y.y - * tap(4) and pseudo netif's netmask len are z - * - * 5) bridgeX tap(4) will be added to bridgeX - * - * 6) bridgeX:y.y.y.y tap(4) will be added to bridgeX - * pseudo netif's address is y.y.y.y - * - * 7) bridgeX:y.y.y.y/z tap(4) will be added to bridgeX - * pseudo netif's address is y.y.y.y - * pseudo netif's netmask len is z - */ -static -int -netif_init_tap(int tap_unit, in_addr_t *addr, in_addr_t *mask, int s) -{ - in_addr_t tap_addr, netmask, netif_addr; - int next_netif_addr; - char *tok, *masklen_str, *ifbridge; - - *addr = 0; - *mask = 0; - - tok = strtok(NULL, ":/"); - if (tok == NULL) { - /* - * Nothing special, simply use tap(4) as backend - */ - return 0; - } - - if (inet_pton(AF_INET, tok, &tap_addr) > 0) { - /* - * tap(4)'s address is supplied - */ - ifbridge = NULL; - - /* - * If there is next token, then it may be pseudo - * netif's address or netmask len for tap(4) - */ - next_netif_addr = 0; - } else { - /* - * Not tap(4)'s address, assume it as a bridge(4) - * iface name - */ - tap_addr = 0; - ifbridge = tok; - - /* - * If there is next token, then it must be pseudo - * netif's address - */ - next_netif_addr = 1; - } - - netmask = netif_addr = 0; - - tok = strtok(NULL, ":/"); - if (tok == NULL) - goto back; - - if (inet_pton(AF_INET, tok, &netif_addr) <= 0) { - if (next_netif_addr) { - warnx("Invalid pseudo netif address: %s", tok); - return -1; - } - netif_addr = 0; - - /* - * Current token is not address, then it must be netmask len - */ - masklen_str = tok; - } else { - /* - * Current token is pseudo netif address, if there is next token - * it must be netmask len - */ - masklen_str = strtok(NULL, "/"); - } - - /* Calculate netmask */ - if (masklen_str != NULL) { - u_long masklen; - - masklen = strtoul(masklen_str, NULL, 10); - if (masklen < 32 && masklen > 0) { - netmask = htonl(~((1LL << (32 - masklen)) - 1) - & 0xffffffff); - } else { - warnx("Invalid netmask len: %lu", masklen); - return -1; - } - } - - /* Make sure there is no more token left */ - if (strtok(NULL, ":/") != NULL) { - warnx("Invalid argument to '-I'"); - return -1; - } - -back: - if (tap_unit < 0) { - /* Do nothing */ - } else if (ifbridge == NULL) { - /* Set tap(4) address/netmask */ - if (netif_set_tapaddr(tap_unit, tap_addr, netmask, s) < 0) - return -1; - } else { - /* Tie tap(4) to bridge(4) */ - if (netif_add_tap2brg(tap_unit, ifbridge, s) < 0) - return -1; - } - - *addr = netif_addr; - *mask = netmask; - return 0; -} - -/* - * NetifInfo[] will be filled for pseudo netif initialization. - * NetifNum will be bumped to reflect the number of valid entries - * in NetifInfo[]. - */ -static -void -init_netif(char *netifExp[], int netifExpNum) -{ - int i, s; - char *tmp; - - if (netifExpNum == 0) - return; - - s = socket(AF_INET, SOCK_DGRAM, 0); /* for ioctl(SIOC) */ - if (s < 0) - return; - - for (i = 0; i < netifExpNum; ++i) { - struct vknetif_info *info; - in_addr_t netif_addr, netif_mask; - int tap_fd, tap_unit; - char *netif; - - /* Extract MAC address if there is one */ - tmp = netifExp[i]; - strsep(&tmp, "="); - - netif = strtok(netifExp[i], ":"); - if (netif == NULL) { - warnx("Invalid argument to '-I'"); - continue; - } - - /* - * Open tap(4) device file and bring up the - * corresponding interface - */ - tap_fd = netif_open_tap(netif, &tap_unit, s); - if (tap_fd < 0) - continue; - - /* - * Initialize tap(4) and get address/netmask - * for pseudo netif - * - * NB: Rest part of netifExp[i] is passed - * to netif_init_tap() implicitly. - */ - if (netif_init_tap(tap_unit, &netif_addr, &netif_mask, s) < 0) { - /* - * NB: Closing tap(4) device file will bring - * down the corresponding interface - */ - close(tap_fd); - continue; - } - - info = &NetifInfo[NetifNum]; - bzero(info, sizeof(*info)); - info->tap_fd = tap_fd; - info->tap_unit = tap_unit; - info->netif_addr = netif_addr; - info->netif_mask = netif_mask; - /* - * If tmp isn't NULL it means a MAC could have been - * specified so attempt to convert it. - * Setting enaddr to NULL will tell vke_attach() we - * need a pseudo-random MAC address. - */ - if (tmp != NULL) { - if ((info->enaddr = malloc(ETHER_ADDR_LEN)) == NULL) - warnx("Couldn't allocate memory for the operation"); - else { - if ((kether_aton(tmp, info->enaddr)) == NULL) { - free(info->enaddr); - info->enaddr = NULL; - } - } - } - - NetifNum++; - if (NetifNum >= VKNETIF_MAX) /* XXX will this happen? */ - break; - } - close(s); -} - -/* - * Create the pid file and leave it open and locked while the vkernel is - * running. This allows a script to use /usr/bin/lockf to probe whether - * a vkernel is still running (so as not to accidently kill an unrelated - * process from a stale pid file). - */ -static -void -writepid(void) -{ - char buf[32]; - int fd; - - if (pid_file != NULL) { - snprintf(buf, sizeof(buf), "%ld\n", (long)getpid()); - fd = open(pid_file, O_RDWR|O_CREAT|O_EXLOCK|O_NONBLOCK, 0666); - if (fd < 0) { - if (errno == EWOULDBLOCK) { - perror("Failed to lock pidfile, " - "vkernel already running"); - } else { - perror("Failed to create pidfile"); - } - exit(EX_SOFTWARE); - } - ftruncate(fd, 0); - write(fd, buf, strlen(buf)); - /* leave the file open to maintain the lock */ - } -} - -static -void -cleanpid( void ) -{ - if (pid_file != NULL) { - if (unlink(pid_file) < 0) - perror("Warning: couldn't remove pidfile"); - } -} - -static -void -usage_err(const char *ctl, ...) -{ - va_list va; - - va_start(va, ctl); - vfprintf(stderr, ctl, va); - va_end(va); - fprintf(stderr, "\n"); - exit(EX_USAGE); -} - -static -void -usage_help(_Bool help) -{ - fprintf(stderr, "Usage: %s [-hsUvd] [-c file] [-e name=value:name=value:...]\n" - "\t[-i file] [-I interface[:address1[:address2][/netmask]]] [-l cpulock]\n" - "\t[-m size] [-n numcpus[:lbits[:cbits]]]\n" - "\t[-p file] [-r file]\n", save_av[0]); - - if (help) - fprintf(stderr, "\nArguments:\n" - "\t-c\tSpecify a readonly CD-ROM image file to be used by the kernel.\n" - "\t-e\tSpecify an environment to be used by the kernel.\n" - "\t-h\tThis list of options.\n" - "\t-i\tSpecify a memory image file to be used by the virtual kernel.\n" - "\t-I\tCreate a virtual network device.\n" - "\t-l\tSpecify which, if any, real CPUs to lock virtual CPUs to.\n" - "\t-m\tSpecify the amount of memory to be used by the kernel in bytes.\n" - "\t-n\tSpecify the number of CPUs and the topology you wish to emulate:\n" - "\t \t- numcpus - number of cpus\n" - "\t \t- :lbits - specify the number of bits within APICID(=CPUID) needed for representing\n" - "\t \tthe logical ID. Controls the number of threads/core (0bits - 1 thread, 1bit - 2 threads).\n" - "\t \t- :cbits - specify the number of bits within APICID(=CPUID) needed for representing\n" - "\t \tthe core ID. Controls the number of core/package (0bits - 1 core, 1bit - 2 cores).\n" - "\t-p\tSpecify a file in which to store the process ID.\n" - "\t-r\tSpecify a R/W disk image file to be used by the kernel.\n" - "\t-s\tBoot into single-user mode.\n" - "\t-U\tEnable writing to kernel memory and module loading.\n" - "\t-v\tTurn on verbose booting.\n"); - - exit(EX_USAGE); -} - -void -cpu_reset(void) -{ - kprintf("cpu reset, rebooting vkernel\n"); - closefrom(3); - cleanpid(); - execv(save_av[0], save_av); -} - -void -cpu_halt(void) -{ - kprintf("cpu halt, exiting vkernel\n"); - cleanpid(); - exit(EX_OK); -} - -void -setrealcpu(void) -{ - switch(lwp_cpu_lock) { - case LCL_PER_CPU: - if (bootverbose) - kprintf("Locking CPU%d to real cpu %d\n", - mycpuid, next_cpu); - usched_set(getpid(), USCHED_SET_CPU, &next_cpu, sizeof(next_cpu)); - next_cpu++; - if (next_cpu >= real_ncpus) - next_cpu = 0; - break; - case LCL_SINGLE_CPU: - if (bootverbose) - kprintf("Locking CPU%d to real cpu %d\n", - mycpuid, next_cpu); - usched_set(getpid(), USCHED_SET_CPU, &next_cpu, sizeof(next_cpu)); - break; - default: - /* do not map virtual cpus to real cpus */ - break; - } -} - diff --git a/sys/platform/vkernel/platform/ipl_funcs.c b/sys/platform/vkernel/platform/ipl_funcs.c deleted file mode 100644 index 9599ff90f2..0000000000 --- a/sys/platform/vkernel/platform/ipl_funcs.c +++ /dev/null @@ -1,72 +0,0 @@ -/*- - * Copyright (c) 1997 Bruce Evans. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD: src/sys/i386/isa/ipl_funcs.c,v 1.32.2.5 2002/12/17 18:04:02 sam Exp $ - * $DragonFly: src/sys/platform/vkernel/platform/ipl_funcs.c,v 1.2 2007/01/11 23:23:56 dillon Exp $ - */ - -#include -#include -#include -#include -#include -#include -#include - -/* - * Bits in the ipending bitmap variable must be set atomically because - * ipending may be manipulated by interrupts or other cpu's without holding - * any locks. - * - * Note: setbits uses a locked or, making simple cases MP safe. - */ -#define DO_SETBITS(name, var, bits) \ -void \ -name(void) \ -{ \ - struct mdglobaldata *gd = mdcpu; \ - atomic_set_int_nonlocked(var, bits); \ - atomic_set_int(&gd->mi.gd_reqflags, RQF_INTPEND); \ -} \ - -DO_SETBITS(setdelayed, &gd->gd_spending, loadandclear(&gd->gd_sdelayed)) - -DO_SETBITS(setsoftcamnet,&gd->gd_spending, SWI_CAMNET_PENDING) -DO_SETBITS(setsoftcambio,&gd->gd_spending, SWI_CAMBIO_PENDING) -/*DO_SETBITS(setsoftunused02, &gd->gd_spending, SWI_UNUSED02_PENDING)*/ -/*DO_SETBITS(setsoftunused01, &gd->gd_spending, SWI_UNUSED01_PENDING)*/ -DO_SETBITS(setsofttty, &gd->gd_spending, SWI_TTY_PENDING) -DO_SETBITS(setsoftvm, &gd->gd_spending, SWI_VM_PENDING) -DO_SETBITS(setsofttq, &gd->gd_spending, SWI_TQ_PENDING) -DO_SETBITS(setsoftcrypto,&gd->gd_spending, SWI_CRYPTO_PENDING) - -/*DO_SETBITS(schedsoftcamnet, &gd->gd_sdelayed, SWI_CAMNET_PENDING)*/ -/*DO_SETBITS(schedsoftcambio, &gd->gd_sdelayed, SWI_CAMBIO_PENDING)*/ -/*DO_SETBITS(schedsoftunused01, &gd->gd_sdelayed, SWI_UNUSED01_PENDING)*/ -DO_SETBITS(schedsofttty, &gd->gd_sdelayed, SWI_TTY_PENDING) -/*DO_SETBITS(schedsoftvm, &gd->gd_sdelayed, SWI_VM_PENDING)*/ -/*DO_SETBITS(schedsofttq, &gd->gd_sdelayed, SWI_TQ_PENDING)*/ -/* YYY schedsoft what? */ - diff --git a/sys/platform/vkernel/platform/kqueue.c b/sys/platform/vkernel/platform/kqueue.c deleted file mode 100644 index 1639b1ff14..0000000000 --- a/sys/platform/vkernel/platform/kqueue.c +++ /dev/null @@ -1,206 +0,0 @@ -/* - * Copyright (c) 2006 The DragonFly Project. All rights reserved. - * - * This code is derived from software contributed to The DragonFly Project - * by Matthew Dillon - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * 3. Neither the name of The DragonFly Project nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific, prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include - -struct kqueue_info { - void (*func)(void *, struct intrframe *); - void *data; - int fd; -}; - -static void kqueuesig(int signo); -static void kqueue_intr(void *arg __unused, void *frame __unused); - -static int KQueueFd = -1; -static void *VIntr1; - -/* - * Initialize kqueue based I/O - * - * Use SIGIO to get an immediate event when the kqueue has something waiting - * for us. Setup the SIGIO signal as a mailbox signal for efficiency. - * - * Currently only read events are supported. - */ -void -init_kqueue(void) -{ - struct sigaction sa; - - bzero(&sa, sizeof(sa)); - /*sa.sa_mailbox = &mdcpu->gd_mailbox;*/ - sa.sa_flags = 0; - sa.sa_handler = kqueuesig; - sigemptyset(&sa.sa_mask); - sigaction(SIGIO, &sa, NULL); - KQueueFd = kqueue(); - if (fcntl(KQueueFd, F_SETOWN, getpid()) < 0) - panic("Cannot configure kqueue for SIGIO, update your kernel"); - if (fcntl(KQueueFd, F_SETFL, O_ASYNC) < 0) - panic("Cannot configure kqueue for SIGIO, update your kernel"); -} - -/* - * Signal handler dispatches interrupt thread. Use interrupt #1 - */ -static void -kqueuesig(int signo) -{ - signalintr(1); -} - -/* - * Generic I/O event support - */ -struct kqueue_info * -kqueue_add(int fd, void (*func)(void *, struct intrframe *), void *data) -{ - struct timespec ts = { 0, 0 }; - struct kqueue_info *info; - struct kevent kev; - - if (VIntr1 == NULL) { - VIntr1 = register_int_virtual(1, kqueue_intr, NULL, "kqueue", - NULL, INTR_MPSAFE); - } - - info = kmalloc(sizeof(*info), M_DEVBUF, M_ZERO|M_INTWAIT); - info->func = func; - info->data = data; - info->fd = fd; - EV_SET(&kev, fd, EVFILT_READ, EV_ADD|EV_ENABLE|EV_CLEAR, 0, 0, info); - if (kevent(KQueueFd, &kev, 1, NULL, 0, &ts) < 0) - panic("kqueue: kevent() call could not add descriptor"); - return(info); -} - -/* - * Medium resolution timer support - */ -struct kqueue_info * -kqueue_add_timer(void (*func)(void *, struct intrframe *), void *data) -{ - struct kqueue_info *info; - - if (VIntr1 == NULL) { - VIntr1 = register_int_virtual(1, kqueue_intr, NULL, "kqueue", - NULL, INTR_MPSAFE); - } - - info = kmalloc(sizeof(*info), M_DEVBUF, M_ZERO|M_INTWAIT); - info->func = func; - info->data = data; - info->fd = (uintptr_t)info; - return(info); -} - -void -kqueue_reload_timer(struct kqueue_info *info, int ms) -{ - struct timespec ts = { 0, 0 }; - struct kevent kev; - - KKASSERT(ms > 0); - - EV_SET(&kev, info->fd, EVFILT_TIMER, - EV_ADD|EV_ENABLE|EV_ONESHOT|EV_CLEAR, 0, (uintptr_t)ms, info); - if (kevent(KQueueFd, &kev, 1, NULL, 0, &ts) < 0) - panic("kqueue_reload_timer: Failed"); -} - -/* - * Destroy a previously added kqueue event - */ -void -kqueue_del(struct kqueue_info *info) -{ - struct timespec ts = { 0, 0 }; - struct kevent kev; - - KKASSERT(info->fd >= 0); - EV_SET(&kev, info->fd, EVFILT_READ, EV_DELETE, 0, 0, NULL); - if (kevent(KQueueFd, &kev, 1, NULL, 0, &ts) < 0) - panic("kevent: failed to delete descriptor %d", info->fd); - info->fd = -1; - kfree(info, M_DEVBUF); -} - -/* - * Safely called via DragonFly's normal interrupt handling mechanism. - * - * Calleld with the MP lock held. Note that this is still an interrupt - * thread context. - */ -static -void -kqueue_intr(void *arg __unused, void *frame __unused) -{ - struct timespec ts; - struct kevent kevary[8]; - int n; - int i; - - ts.tv_sec = 0; - ts.tv_nsec = 0; - do { - n = kevent(KQueueFd, NULL, 0, kevary, 8, &ts); - for (i = 0; i < n; ++i) { - struct kevent *kev = &kevary[i]; - struct kqueue_info *info = (void *)kev->udata; - - info->func(info->data, frame); - } - } while (n == 8); -} - diff --git a/sys/platform/vkernel/platform/machintr.c b/sys/platform/vkernel/platform/machintr.c deleted file mode 100644 index bfcda1d787..0000000000 --- a/sys/platform/vkernel/platform/machintr.c +++ /dev/null @@ -1,210 +0,0 @@ -/* - * Copyright (c) 2006 The DragonFly Project. All rights reserved. - * - * This code is derived from software contributed to The DragonFly Project - * by Matthew Dillon - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * 3. Neither the name of The DragonFly Project nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific, prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * Interrupt Subsystem ABI - */ - -static void dummy_intr_disable(int); -static void dummy_intr_enable(int); -static void dummy_intr_setup(int, int); -static void dummy_intr_teardown(int); -static int dummy_legacy_intr_cpuid(int); -static void dummy_finalize(void); -static void dummy_intrcleanup(void); -static void dummy_stabilize(void); - -struct machintr_abi MachIntrABI = { - MACHINTR_GENERIC, - .intr_disable = dummy_intr_disable, - .intr_enable = dummy_intr_enable, - .intr_setup = dummy_intr_setup, - .intr_teardown = dummy_intr_teardown, - .legacy_intr_cpuid = dummy_legacy_intr_cpuid, - - .finalize = dummy_finalize, - .cleanup = dummy_intrcleanup, - .stabilize = dummy_stabilize -}; - -static void -dummy_intr_disable(int intr) -{ -} - -static void -dummy_intr_enable(int intr) -{ -} - -static void -dummy_intr_setup(int intr, int flags) -{ -} - -static void -dummy_intr_teardown(int intr) -{ -} - -static void -dummy_finalize(void) -{ -} - -static void -dummy_intrcleanup(void) -{ -} - -static void -dummy_stabilize(void) -{ -} - -static int -dummy_legacy_intr_cpuid(int irq __unused) -{ - return 0; -} - -/* - * Process pending interrupts - */ -void -splz(void) -{ - struct mdglobaldata *gd = mdcpu; - thread_t td = gd->mi.gd_curthread; - int irq; - - while (gd->mi.gd_reqflags & (RQF_IPIQ|RQF_INTPEND)) { - crit_enter_quick(td); - if (gd->mi.gd_reqflags & RQF_IPIQ) { - atomic_clear_int(&gd->mi.gd_reqflags, RQF_IPIQ); - lwkt_process_ipiq(); - } - if (gd->mi.gd_reqflags & RQF_INTPEND) { - atomic_clear_int(&gd->mi.gd_reqflags, RQF_INTPEND); - while ((irq = ffs(gd->gd_spending)) != 0) { - --irq; - atomic_clear_int(&gd->gd_spending, 1 << irq); - irq += FIRST_SOFTINT; - sched_ithd_soft(irq); - } - while ((irq = ffs(gd->gd_fpending)) != 0) { - --irq; - atomic_clear_int(&gd->gd_fpending, 1 << irq); - sched_ithd_hard_virtual(irq); - } - } - crit_exit_noyield(td); - } -} - -/* - * Allows an unprotected signal handler or mailbox to signal an interrupt - * - * For sched_ithd_hard_virtaul() to properly preempt via lwkt_schedule() we - * cannot enter a critical section here. We use td_nest_count instead. - */ -void -signalintr(int intr) -{ - struct mdglobaldata *gd = mdcpu; - thread_t td = gd->mi.gd_curthread; - - if (td->td_critcount || td->td_nest_count) { - atomic_set_int_nonlocked(&gd->gd_fpending, 1 << intr); - atomic_set_int(&gd->mi.gd_reqflags, RQF_INTPEND); - } else { - ++td->td_nest_count; - atomic_clear_int(&gd->gd_fpending, 1 << intr); - sched_ithd_hard_virtual(intr); - --td->td_nest_count; - } -} - -void -cpu_disable_intr(void) -{ - sigblock(sigmask(SIGALRM)|sigmask(SIGIO)|sigmask(SIGUSR1)); -} - -void -cpu_enable_intr(void) -{ - sigsetmask(0); -} - -void -cpu_mask_all_signals(void) -{ - sigblock(sigmask(SIGALRM)|sigmask(SIGIO)|sigmask(SIGQUIT)| - sigmask(SIGUSR1)|sigmask(SIGTERM)|sigmask(SIGWINCH)| - sigmask(SIGUSR2)); -} - -void -cpu_unmask_all_signals(void) -{ - sigsetmask(0); -} - -void -cpu_invlpg(void *addr) -{ - madvise(addr, PAGE_SIZE, MADV_INVAL); -} - -void -cpu_invltlb(void) -{ - madvise((void *)KvaStart, KvaEnd - KvaStart, MADV_INVAL); -} - diff --git a/sys/platform/vkernel/platform/pmap.c b/sys/platform/vkernel/platform/pmap.c deleted file mode 100644 index 5dfa8cc857..0000000000 --- a/sys/platform/vkernel/platform/pmap.c +++ /dev/null @@ -1,3131 +0,0 @@ -/* - * (MPSAFE) - * - * Copyright (c) 2006 The DragonFly Project. All rights reserved. - * Copyright (c) 1991 Regents of the University of California. - * All rights reserved. - * Copyright (c) 1994 John S. Dyson - * All rights reserved. - * Copyright (c) 1994 David Greenman - * All rights reserved. - * Copyright (c) 2004-2006 Matthew Dillon - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * 3. Neither the name of The DragonFly Project nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific, prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 - * $FreeBSD: src/sys/i386/i386/pmap.c,v 1.250.2.18 2002/03/06 22:48:53 silby Exp $ - */ -/* - * NOTE: PMAP_INVAL_ADD: In pc32 this function is called prior to adjusting - * the PTE in the page table, because a cpu synchronization might be required. - * The actual invalidation is delayed until the following call or flush. In - * the VKERNEL build this function is called prior to adjusting the PTE and - * invalidates the table synchronously (not delayed), and is not SMP safe - * as a consequence. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include -#include -#include - -#include - -struct pmap kernel_pmap; - -static struct vm_zone pvzone; -static struct vm_object pvzone_obj; -static TAILQ_HEAD(,pmap) pmap_list = TAILQ_HEAD_INITIALIZER(pmap_list); -static int pv_entry_count; -static int pv_entry_max; -static int pv_entry_high_water; -static int pmap_pagedaemon_waken; -static boolean_t pmap_initialized = FALSE; -static int protection_codes[8]; - -static void i386_protection_init(void); -static void pmap_remove_all(vm_page_t m); -static int pmap_release_free_page(struct pmap *pmap, vm_page_t p); - -#define MINPV 2048 -#ifndef PMAP_SHPGPERPROC -#define PMAP_SHPGPERPROC 200 -#endif - -#define pmap_pde(m, v) (&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT])) - -#define pte_prot(m, p) \ - (protection_codes[p & (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE)]) - -void -pmap_init(void) -{ - struct pv_entry *pvinit; - int i; - int npages; - - npages = VPTE_PAGETABLE_SIZE + - (VM_MAX_USER_ADDRESS / PAGE_SIZE) * sizeof(vpte_t); - npages = (npages + PAGE_MASK) / PAGE_SIZE; - kernel_pmap.pm_pteobj = vm_object_allocate(OBJT_DEFAULT, npages); - - for (i = 0; i < vm_page_array_size; i++) { - vm_page_t m; - - m = &vm_page_array[i]; - TAILQ_INIT(&m->md.pv_list); - m->md.pv_list_count = 0; - } - - i = vm_page_array_size; - if (i < MINPV) - i = MINPV; - pvinit = (struct pv_entry *)kmem_alloc(&kernel_map, i*sizeof(*pvinit)); - zbootinit(&pvzone, "PV ENTRY", sizeof(*pvinit), pvinit, i); - pmap_initialized = TRUE; -} - -void -pmap_init2(void) -{ - int shpgperproc = PMAP_SHPGPERPROC; - - TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); - pv_entry_max = shpgperproc * maxproc + vm_page_array_size; - TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max); - pv_entry_high_water = 9 * (pv_entry_max / 10); - zinitna(&pvzone, &pvzone_obj, NULL, 0, pv_entry_max, ZONE_INTERRUPT, 1); -} - -/* - * Typically used to initialize a fictitious page by vm/device_pager.c - */ -void -pmap_page_init(struct vm_page *m) -{ - vm_page_init(m); - TAILQ_INIT(&m->md.pv_list); -} - -/* - * Bootstrap the kernel_pmap so it can be used with pmap_enter(). - * - * NOTE! pm_pdir for the kernel pmap is offset so VA's translate - * directly into PTD indexes (PTA is also offset for the same reason). - * This is necessary because, for now, KVA is not mapped at address 0. - * - * Page table pages are not managed like they are in normal pmaps, so - * no pteobj is needed. - */ -void -pmap_bootstrap(void) -{ - vm_pindex_t i = (vm_offset_t)KernelPTD >> PAGE_SHIFT; - - /* - * The kernel_pmap's pm_pteobj is used only for locking and not - * for mmu pages. - */ - kernel_pmap.pm_pdir = KernelPTD - (KvaStart >> SEG_SHIFT); - kernel_pmap.pm_pdirpte = KernelPTA[i]; - kernel_pmap.pm_count = 1; - kernel_pmap.pm_active = (cpumask_t)-1; - kernel_pmap.pm_pteobj = NULL; /* see pmap_init */ - TAILQ_INIT(&kernel_pmap.pm_pvlist); - TAILQ_INIT(&kernel_pmap.pm_pvlist_free); - spin_init(&kernel_pmap.pm_spin, "pmapbootstrap"); - lwkt_token_init(&kernel_pmap.pm_token, "kpmap_tok"); - i386_protection_init(); -} - -/* - * Initialize pmap0/vmspace0 . Since process 0 never enters user mode we - * just dummy it up so it works well enough for fork(). - * - * In DragonFly, process pmaps may only be used to manipulate user address - * space, never kernel address space. - */ -void -pmap_pinit0(struct pmap *pmap) -{ - pmap_pinit(pmap); -} - -/************************************************************************ - * Procedures to manage whole physical maps * - ************************************************************************ - * - * Initialize a preallocated and zeroed pmap structure, - * such as one in a vmspace structure. - */ -void -pmap_pinit(struct pmap *pmap) -{ - vm_page_t ptdpg; - int npages; - - /* - * No need to allocate page table space yet but we do need a valid - * page directory table. - */ - if (pmap->pm_pdir == NULL) { - pmap->pm_pdir = - (vpte_t *)kmem_alloc_pageable(&kernel_map, PAGE_SIZE); - } - - /* - * allocate object for the pte array and page directory - */ - npages = VPTE_PAGETABLE_SIZE + - (VM_MAX_USER_ADDRESS / PAGE_SIZE) * sizeof(vpte_t); - npages = (npages + PAGE_MASK) / PAGE_SIZE; - - if (pmap->pm_pteobj == NULL) - pmap->pm_pteobj = vm_object_allocate(OBJT_DEFAULT, npages); - pmap->pm_pdindex = npages - 1; - - /* - * allocate the page directory page - */ - ptdpg = vm_page_grab(pmap->pm_pteobj, pmap->pm_pdindex, - VM_ALLOC_NORMAL | VM_ALLOC_RETRY | VM_ALLOC_ZERO); - vm_page_wire(ptdpg); - - /* not usually mapped */ - vm_page_flag_clear(ptdpg, PG_MAPPED); - vm_page_wakeup(ptdpg); - - pmap_kenter((vm_offset_t)pmap->pm_pdir, VM_PAGE_TO_PHYS(ptdpg)); - pmap->pm_pdirpte = KernelPTA[(vm_offset_t)pmap->pm_pdir >> PAGE_SHIFT]; - - pmap->pm_count = 1; - pmap->pm_active = 0; - pmap->pm_ptphint = NULL; - pmap->pm_cpucachemask = 0; - TAILQ_INIT(&pmap->pm_pvlist); - TAILQ_INIT(&pmap->pm_pvlist_free); - spin_init(&pmap->pm_spin, "pmapinit"); - lwkt_token_init(&pmap->pm_token, "pmap_tok"); - bzero(&pmap->pm_stats, sizeof pmap->pm_stats); - pmap->pm_stats.resident_count = 1; -} - -/* - * Clean up a pmap structure so it can be physically freed - * - * No requirements. - */ -void -pmap_puninit(pmap_t pmap) -{ - if (pmap->pm_pdir) { - kmem_free(&kernel_map, (vm_offset_t)pmap->pm_pdir, PAGE_SIZE); - pmap->pm_pdir = NULL; - } - if (pmap->pm_pteobj) { - vm_object_deallocate(pmap->pm_pteobj); - pmap->pm_pteobj = NULL; - } -} - - -/* - * Wire in kernel global address entries. To avoid a race condition - * between pmap initialization and pmap_growkernel, this procedure - * adds the pmap to the master list (which growkernel scans to update), - * then copies the template. - * - * In a virtual kernel there are no kernel global address entries. - * - * No requirements. - */ -void -pmap_pinit2(struct pmap *pmap) -{ - spin_lock(&pmap_spin); - TAILQ_INSERT_TAIL(&pmap_list, pmap, pm_pmnode); - spin_unlock(&pmap_spin); -} - -/* - * Release all resources held by the given physical map. - * - * Should only be called if the map contains no valid mappings. - * - * Caller must hold pmap->pm_token - */ -static int pmap_release_callback(struct vm_page *p, void *data); - -void -pmap_release(struct pmap *pmap) -{ - struct mdglobaldata *gd = mdcpu; - vm_object_t object = pmap->pm_pteobj; - struct rb_vm_page_scan_info info; - - KKASSERT(pmap != &kernel_pmap); - -#if defined(DIAGNOSTIC) - if (object->ref_count != 1) - panic("pmap_release: pteobj reference count != 1"); -#endif - /* - * Once we destroy the page table, the mapping becomes invalid. - * Don't waste time doing a madvise to invalidate the mapping, just - * set cpucachemask to 0. - */ - if (pmap->pm_pdir == gd->gd_PT1pdir) { - gd->gd_PT1pdir = NULL; - *gd->gd_PT1pde = 0; - /* madvise(gd->gd_PT1map, SEG_SIZE, MADV_INVAL); */ - } - if (pmap->pm_pdir == gd->gd_PT2pdir) { - gd->gd_PT2pdir = NULL; - *gd->gd_PT2pde = 0; - /* madvise(gd->gd_PT2map, SEG_SIZE, MADV_INVAL); */ - } - if (pmap->pm_pdir == gd->gd_PT3pdir) { - gd->gd_PT3pdir = NULL; - *gd->gd_PT3pde = 0; - /* madvise(gd->gd_PT3map, SEG_SIZE, MADV_INVAL); */ - } - - info.pmap = pmap; - info.object = object; - - spin_lock(&pmap_spin); - TAILQ_REMOVE(&pmap_list, pmap, pm_pmnode); - spin_unlock(&pmap_spin); - - vm_object_hold(object); - do { - info.error = 0; - info.mpte = NULL; - info.limit = object->generation; - - vm_page_rb_tree_RB_SCAN(&object->rb_memq, NULL, - pmap_release_callback, &info); - if (info.error == 0 && info.mpte) { - if (!pmap_release_free_page(pmap, info.mpte)) - info.error = 1; - } - } while (info.error); - vm_object_drop(object); - - /* - * Leave the KVA reservation for pm_pdir cached for later reuse. - */ - pmap->pm_pdirpte = 0; - pmap->pm_cpucachemask = 0; -} - -/* - * Callback to release a page table page backing a directory - * entry. - */ -static int -pmap_release_callback(struct vm_page *p, void *data) -{ - struct rb_vm_page_scan_info *info = data; - - if (p->pindex == info->pmap->pm_pdindex) { - info->mpte = p; - return(0); - } - if (!pmap_release_free_page(info->pmap, p)) { - info->error = 1; - return(-1); - } - if (info->object->generation != info->limit) { - info->error = 1; - return(-1); - } - return(0); -} - -/* - * Add a reference to the specified pmap. - * - * No requirements. - */ -void -pmap_reference(pmap_t pmap) -{ - if (pmap) { - lwkt_gettoken(&vm_token); - ++pmap->pm_count; - lwkt_reltoken(&vm_token); - } -} - -/************************************************************************ - * VMSPACE MANAGEMENT * - ************************************************************************ - * - * The VMSPACE management we do in our virtual kernel must be reflected - * in the real kernel. This is accomplished by making vmspace system - * calls to the real kernel. - */ -void -cpu_vmspace_alloc(struct vmspace *vm) -{ - int r; - void *rp; - -#define LAST_EXTENT (VM_MAX_USER_ADDRESS - 0x80000000) - - if (vmspace_create(&vm->vm_pmap, 0, NULL) < 0) - panic("vmspace_create() failed"); - - rp = vmspace_mmap(&vm->vm_pmap, (void *)0x00000000, 0x40000000, - PROT_READ|PROT_WRITE, - MAP_FILE|MAP_SHARED|MAP_VPAGETABLE|MAP_FIXED, - MemImageFd, 0); - if (rp == MAP_FAILED) - panic("vmspace_mmap: failed1"); - vmspace_mcontrol(&vm->vm_pmap, (void *)0x00000000, 0x40000000, - MADV_NOSYNC, 0); - rp = vmspace_mmap(&vm->vm_pmap, (void *)0x40000000, 0x40000000, - PROT_READ|PROT_WRITE, - MAP_FILE|MAP_SHARED|MAP_VPAGETABLE|MAP_FIXED, - MemImageFd, 0x40000000); - if (rp == MAP_FAILED) - panic("vmspace_mmap: failed2"); - vmspace_mcontrol(&vm->vm_pmap, (void *)0x40000000, 0x40000000, - MADV_NOSYNC, 0); - rp = vmspace_mmap(&vm->vm_pmap, (void *)0x80000000, LAST_EXTENT, - PROT_READ|PROT_WRITE, - MAP_FILE|MAP_SHARED|MAP_VPAGETABLE|MAP_FIXED, - MemImageFd, 0x80000000); - vmspace_mcontrol(&vm->vm_pmap, (void *)0x80000000, LAST_EXTENT, - MADV_NOSYNC, 0); - if (rp == MAP_FAILED) - panic("vmspace_mmap: failed3"); - - r = vmspace_mcontrol(&vm->vm_pmap, (void *)0x00000000, 0x40000000, - MADV_SETMAP, vmspace_pmap(vm)->pm_pdirpte); - if (r < 0) - panic("vmspace_mcontrol: failed1"); - r = vmspace_mcontrol(&vm->vm_pmap, (void *)0x40000000, 0x40000000, - MADV_SETMAP, vmspace_pmap(vm)->pm_pdirpte); - if (r < 0) - panic("vmspace_mcontrol: failed2"); - r = vmspace_mcontrol(&vm->vm_pmap, (void *)0x80000000, LAST_EXTENT, - MADV_SETMAP, vmspace_pmap(vm)->pm_pdirpte); - if (r < 0) - panic("vmspace_mcontrol: failed3"); -} - -void -cpu_vmspace_free(struct vmspace *vm) -{ - if (vmspace_destroy(&vm->vm_pmap) < 0) - panic("vmspace_destroy() failed"); -} - -/************************************************************************ - * Procedures which operate directly on the kernel PMAP * - ************************************************************************/ - -/* - * This maps the requested page table and gives us access to it. - * - * This routine can be called from a potentially preempting interrupt - * thread or from a normal thread. - */ -static vpte_t * -get_ptbase(struct pmap *pmap, vm_offset_t va) -{ - struct mdglobaldata *gd = mdcpu; - - if (pmap == &kernel_pmap) { - KKASSERT(va >= KvaStart && va < KvaEnd); - return(KernelPTA + (va >> PAGE_SHIFT)); - } else if (pmap->pm_pdir == gd->gd_PT1pdir) { - if (CPUMASK_TESTMASK(pmap->pm_cpucachemask, - gd->mi.gd_cpumask) == 0) { - *gd->gd_PT1pde = pmap->pm_pdirpte; - madvise(gd->gd_PT1map, SEG_SIZE, MADV_INVAL); - ATOMIC_CPUMASK_ORMASK(pmap->pm_cpucachemask, - gd->mi.gd_cpumask); - } - return(gd->gd_PT1map + (va >> PAGE_SHIFT)); - } else if (pmap->pm_pdir == gd->gd_PT2pdir) { - if (CPUMASK_TESTMASK(pmap->pm_cpucachemask, - gd->mi.gd_cpumask) == 0) { - *gd->gd_PT2pde = pmap->pm_pdirpte; - madvise(gd->gd_PT2map, SEG_SIZE, MADV_INVAL); - ATOMIC_CPUMASK_ORMASK(pmap->pm_cpucachemask, - gd->mi.gd_cpumask); - } - return(gd->gd_PT2map + (va >> PAGE_SHIFT)); - } - - /* - * If we aren't running from a potentially preempting interrupt, - * load a new page table directory into the page table cache - */ - if (gd->mi.gd_intr_nesting_level == 0 && - (gd->mi.gd_curthread->td_flags & TDF_INTTHREAD) == 0) { - /* - * Choose one or the other and map the page table - * in the KVA space reserved for it. - */ - if ((gd->gd_PTflip = 1 - gd->gd_PTflip) == 0) { - gd->gd_PT1pdir = pmap->pm_pdir; - *gd->gd_PT1pde = pmap->pm_pdirpte; - madvise(gd->gd_PT1map, SEG_SIZE, MADV_INVAL); - ATOMIC_CPUMASK_ORMASK(pmap->pm_cpucachemask, - gd->mi.gd_cpumask); - return(gd->gd_PT1map + (va >> PAGE_SHIFT)); - } else { - gd->gd_PT2pdir = pmap->pm_pdir; - *gd->gd_PT2pde = pmap->pm_pdirpte; - madvise(gd->gd_PT2map, SEG_SIZE, MADV_INVAL); - ATOMIC_CPUMASK_ORMASK(pmap->pm_cpucachemask, - gd->mi.gd_cpumask); - return(gd->gd_PT2map + (va >> PAGE_SHIFT)); - } - } - - /* - * If we are running from a preempting interrupt use a private - * map. The caller must be in a critical section. - */ - KKASSERT(IN_CRITICAL_SECT(curthread)); - if (pmap->pm_pdir == gd->gd_PT3pdir) { - if (CPUMASK_TESTMASK(pmap->pm_cpucachemask, - gd->mi.gd_cpumask) == 0) { - *gd->gd_PT3pde = pmap->pm_pdirpte; - madvise(gd->gd_PT3map, SEG_SIZE, MADV_INVAL); - ATOMIC_CPUMASK_ORMASK(pmap->pm_cpucachemask, - gd->mi.gd_cpumask); - } - } else { - gd->gd_PT3pdir = pmap->pm_pdir; - *gd->gd_PT3pde = pmap->pm_pdirpte; - madvise(gd->gd_PT3map, SEG_SIZE, MADV_INVAL); - ATOMIC_CPUMASK_ORMASK(pmap->pm_cpucachemask, - gd->mi.gd_cpumask); - } - return(gd->gd_PT3map + (va >> PAGE_SHIFT)); -} - -static vpte_t * -get_ptbase1(struct pmap *pmap, vm_offset_t va) -{ - struct mdglobaldata *gd = mdcpu; - - if (pmap == &kernel_pmap) { - KKASSERT(va >= KvaStart && va < KvaEnd); - return(KernelPTA + (va >> PAGE_SHIFT)); - } else if (pmap->pm_pdir == gd->gd_PT1pdir) { - if (CPUMASK_TESTMASK(pmap->pm_cpucachemask, - gd->mi.gd_cpumask) == 0) { - *gd->gd_PT1pde = pmap->pm_pdirpte; - madvise(gd->gd_PT1map, SEG_SIZE, MADV_INVAL); - ATOMIC_CPUMASK_ORMASK(pmap->pm_cpucachemask, - gd->mi.gd_cpumask); - } - return(gd->gd_PT1map + (va >> PAGE_SHIFT)); - } - KKASSERT(gd->mi.gd_intr_nesting_level == 0 && - (gd->mi.gd_curthread->td_flags & TDF_INTTHREAD) == 0); - gd->gd_PT1pdir = pmap->pm_pdir; - *gd->gd_PT1pde = pmap->pm_pdirpte; - madvise(gd->gd_PT1map, SEG_SIZE, MADV_INVAL); - return(gd->gd_PT1map + (va >> PAGE_SHIFT)); -} - -static vpte_t * -get_ptbase2(struct pmap *pmap, vm_offset_t va) -{ - struct mdglobaldata *gd = mdcpu; - - if (pmap == &kernel_pmap) { - KKASSERT(va >= KvaStart && va < KvaEnd); - return(KernelPTA + (va >> PAGE_SHIFT)); - } else if (pmap->pm_pdir == gd->gd_PT2pdir) { - if (CPUMASK_TESTMASK(pmap->pm_cpucachemask, - gd->mi.gd_cpumask) == 0) { - *gd->gd_PT2pde = pmap->pm_pdirpte; - madvise(gd->gd_PT2map, SEG_SIZE, MADV_INVAL); - ATOMIC_CPUMASK_ORMASK(pmap->pm_cpucachemask, - gd->mi.gd_cpumask); - } - return(gd->gd_PT2map + (va >> PAGE_SHIFT)); - } - KKASSERT(gd->mi.gd_intr_nesting_level == 0 && - (gd->mi.gd_curthread->td_flags & TDF_INTTHREAD) == 0); - gd->gd_PT2pdir = pmap->pm_pdir; - *gd->gd_PT2pde = pmap->pm_pdirpte; - madvise(gd->gd_PT2map, SEG_SIZE, MADV_INVAL); - return(gd->gd_PT2map + (va >> PAGE_SHIFT)); -} - -/* - * Return a pointer to the page table entry for the specified va in the - * specified pmap. NULL is returned if there is no valid page table page - * for the VA. - */ -static __inline vpte_t * -pmap_pte(struct pmap *pmap, vm_offset_t va) -{ - vpte_t *ptep; - - ptep = &pmap->pm_pdir[va >> SEG_SHIFT]; - if (*ptep & VPTE_PS) - return(ptep); - if (*ptep) - return (get_ptbase(pmap, va)); - return(NULL); -} - - -/* - * Enter a mapping into kernel_pmap. Mappings created in this fashion - * are not managed. Mappings must be immediately accessible on all cpus. - * - * Call pmap_inval_pte() to invalidate the virtual pte and clean out the - * real pmap and handle related races before storing the new vpte. - */ -void -pmap_kenter(vm_offset_t va, vm_paddr_t pa) -{ - vpte_t *ptep; - vpte_t npte; - - KKASSERT(va >= KvaStart && va < KvaEnd); - npte = (vpte_t)pa | VPTE_RW | VPTE_V; - ptep = KernelPTA + (va >> PAGE_SHIFT); - if (*ptep & VPTE_V) - pmap_inval_pte(ptep, &kernel_pmap, va); - *ptep = npte; -} - -/* - * Synchronize a kvm mapping originally made for the private use on - * some other cpu so it can be used on all cpus. - * - * XXX add MADV_RESYNC to improve performance. - */ -void -pmap_kenter_sync(vm_offset_t va) -{ - madvise((void *)va, PAGE_SIZE, MADV_INVAL); -} - -/* - * Synchronize a kvm mapping originally made for the private use on - * some other cpu so it can be used on our cpu. Turns out to be the - * same madvise() call, because we have to sync the real pmaps anyway. - * - * XXX add MADV_RESYNC to improve performance. - */ -void -pmap_kenter_sync_quick(vm_offset_t va) -{ - madvise((void *)va, PAGE_SIZE, MADV_INVAL); -} - -#if 0 -/* - * Make a previously read-only kernel mapping R+W (not implemented by - * virtual kernels). - */ -void -pmap_kmodify_rw(vm_offset_t va) -{ - *pmap_kpte(va) |= VPTE_RW; - madvise((void *)va, PAGE_SIZE, MADV_INVAL); -} - -/* - * Make a kernel mapping non-cacheable (not applicable to virtual kernels) - */ -void -pmap_kmodify_nc(vm_offset_t va) -{ - *pmap_kpte(va) |= VPTE_N; - madvise((void *)va, PAGE_SIZE, MADV_INVAL); -} - -#endif - -/* - * Map a contiguous range of physical memory to a KVM - */ -vm_offset_t -pmap_map(vm_offset_t *virtp, vm_paddr_t start, vm_paddr_t end, int prot) -{ - vm_offset_t sva, virt; - - sva = virt = *virtp; - while (start < end) { - pmap_kenter(virt, start); - virt += PAGE_SIZE; - start += PAGE_SIZE; - } - *virtp = virt; - return (sva); -} - -vpte_t * -pmap_kpte(vm_offset_t va) -{ - vpte_t *ptep; - - KKASSERT(va >= KvaStart && va < KvaEnd); - ptep = KernelPTA + (va >> PAGE_SHIFT); - return(ptep); -} - -/* - * Enter an unmanaged KVA mapping for the private use of the current - * cpu only. pmap_kenter_sync() may be called to make the mapping usable - * by other cpus. - * - * It is illegal for the mapping to be accessed by other cpus unleess - * pmap_kenter_sync*() is called. - */ -void -pmap_kenter_quick(vm_offset_t va, vm_paddr_t pa) -{ - vpte_t *ptep; - vpte_t npte; - - KKASSERT(va >= KvaStart && va < KvaEnd); - - npte = (vpte_t)pa | VPTE_RW | VPTE_V; - ptep = KernelPTA + (va >> PAGE_SHIFT); - if (*ptep & VPTE_V) - pmap_inval_pte_quick(ptep, &kernel_pmap, va); - *ptep = npte; -} - -/* - * Make a temporary mapping for a physical address. This is only intended - * to be used for panic dumps. - * - * The caller is responsible for calling smp_invltlb(). - */ -void * -pmap_kenter_temporary(vm_paddr_t pa, long i) -{ - pmap_kenter_quick(crashdumpmap + (i * PAGE_SIZE), pa); - return ((void *)crashdumpmap); -} - -/* - * Remove an unmanaged mapping created with pmap_kenter*(). - */ -void -pmap_kremove(vm_offset_t va) -{ - vpte_t *ptep; - - KKASSERT(va >= KvaStart && va < KvaEnd); - - ptep = KernelPTA + (va >> PAGE_SHIFT); - if (*ptep & VPTE_V) - pmap_inval_pte(ptep, &kernel_pmap, va); - *ptep = 0; -} - -/* - * Remove an unmanaged mapping created with pmap_kenter*() but synchronize - * only with this cpu. - * - * Unfortunately because we optimize new entries by testing VPTE_V later - * on, we actually still have to synchronize with all the cpus. XXX maybe - * store a junk value and test against 0 in the other places instead? - */ -void -pmap_kremove_quick(vm_offset_t va) -{ - vpte_t *ptep; - - KKASSERT(va >= KvaStart && va < KvaEnd); - - ptep = KernelPTA + (va >> PAGE_SHIFT); - if (*ptep & VPTE_V) - pmap_inval_pte(ptep, &kernel_pmap, va); /* NOT _quick */ - *ptep = 0; -} - -/* - * Extract the physical address from the kernel_pmap that is associated - * with the specified virtual address. - */ -vm_paddr_t -pmap_kextract(vm_offset_t va) -{ - vpte_t *ptep; - vm_paddr_t pa; - - KKASSERT(va >= KvaStart && va < KvaEnd); - - ptep = KernelPTA + (va >> PAGE_SHIFT); - pa = (vm_paddr_t)(*ptep & VPTE_FRAME) | (va & PAGE_MASK); - return(pa); -} - -/* - * Map a set of unmanaged VM pages into KVM. - */ -void -pmap_qenter(vm_offset_t va, struct vm_page **m, int count) -{ - KKASSERT(va >= KvaStart && va + count * PAGE_SIZE < KvaEnd); - while (count) { - vpte_t *ptep; - - ptep = KernelPTA + (va >> PAGE_SHIFT); - if (*ptep & VPTE_V) - pmap_inval_pte(ptep, &kernel_pmap, va); - *ptep = (vpte_t)(*m)->phys_addr | VPTE_RW | VPTE_V; - --count; - ++m; - va += PAGE_SIZE; - } -} - -/* - * Undo the effects of pmap_qenter*(). - */ -void -pmap_qremove(vm_offset_t va, int count) -{ - KKASSERT(va >= KvaStart && va + count * PAGE_SIZE < KvaEnd); - while (count) { - vpte_t *ptep; - - ptep = KernelPTA + (va >> PAGE_SHIFT); - if (*ptep & VPTE_V) - pmap_inval_pte(ptep, &kernel_pmap, va); - *ptep = 0; - --count; - va += PAGE_SIZE; - } -} - -/************************************************************************ - * Misc support glue called by machine independant code * - ************************************************************************ - * - * These routines are called by machine independant code to operate on - * certain machine-dependant aspects of processes, threads, and pmaps. - */ - -/* - * Initialize MD portions of the thread structure. - */ -void -pmap_init_thread(thread_t td) -{ - /* enforce pcb placement */ - td->td_pcb = (struct pcb *)(td->td_kstack + td->td_kstack_size) - 1; - td->td_savefpu = &td->td_pcb->pcb_save; - td->td_sp = (char *)td->td_pcb - 16; -} - -/* - * This routine directly affects the fork perf for a process. - */ -void -pmap_init_proc(struct proc *p) -{ -} - -/* - * We pre-allocate all page table pages for kernel virtual memory so - * this routine will only be called if KVM has been exhausted. - * - * No requirements. - */ -void -pmap_growkernel(vm_offset_t kstart, vm_offset_t kend) -{ - vm_offset_t addr; - - addr = (kend + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); - - lwkt_gettoken(&vm_token); - if (addr > virtual_end - SEG_SIZE) - panic("KVM exhausted"); - kernel_vm_end = addr; - lwkt_reltoken(&vm_token); -} - -/* - * The modification bit is not tracked for any pages in this range. XXX - * such pages in this maps should always use pmap_k*() functions and not - * be managed anyhow. - * - * XXX User and kernel address spaces are independant for virtual kernels, - * this function only applies to the kernel pmap. - */ -static int -pmap_track_modified(pmap_t pmap, vm_offset_t va) -{ - if (pmap != &kernel_pmap) - return 1; - if ((va < clean_sva) || (va >= clean_eva)) - return 1; - else - return 0; -} - -/************************************************************************ - * Procedures supporting managed page table pages * - ************************************************************************ - * - * These procedures are used to track managed page table pages. These pages - * use the page table page's vm_page_t to track PTEs in the page. The - * page table pages themselves are arranged in a VM object, pmap->pm_pteobj. - * - * This allows the system to throw away page table pages for user processes - * at will and reinstantiate them on demand. - */ - -/* - * This routine works like vm_page_lookup() but also blocks as long as the - * page is busy. This routine does not busy the page it returns. - * - * Unless the caller is managing objects whos pages are in a known state, - * the call should be made with a critical section held so the page's object - * association remains valid on return. - */ -static vm_page_t -pmap_page_lookup(vm_object_t object, vm_pindex_t pindex) -{ - vm_page_t m; - - ASSERT_LWKT_TOKEN_HELD(vm_object_token(object)); - m = vm_page_lookup_busy_wait(object, pindex, FALSE, "pplookp"); - - return(m); -} - -/* - * This routine unholds page table pages, and if the hold count - * drops to zero, then it decrements the wire count. - * - * We must recheck that this is the last hold reference after busy-sleeping - * on the page. - */ -static int -_pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) -{ - vm_page_busy_wait(m, FALSE, "pmuwpt"); - KASSERT(m->queue == PQ_NONE, - ("_pmap_unwire_pte_hold: %p->queue != PQ_NONE", m)); - - if (m->hold_count == 1) { - /* - * Unmap the page table page. - */ - KKASSERT(pmap->pm_pdir[m->pindex] != 0); - pmap_inval_pde(&pmap->pm_pdir[m->pindex], pmap, - (vm_offset_t)m->pindex << SEG_SHIFT); - KKASSERT(pmap->pm_stats.resident_count > 0); - --pmap->pm_stats.resident_count; - - if (pmap->pm_ptphint == m) - pmap->pm_ptphint = NULL; - - /* - * This was our last hold, the page had better be unwired - * after we decrement wire_count. - * - * FUTURE NOTE: shared page directory page could result in - * multiple wire counts. - */ - vm_page_unhold(m); - --m->wire_count; - KKASSERT(m->wire_count == 0); - atomic_add_int(&vmstats.v_wire_count, -1); - vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); - vm_page_flash(m); - vm_page_free_zero(m); - return 1; - } - KKASSERT(m->hold_count > 1); - vm_page_unhold(m); - vm_page_wakeup(m); - - return 0; -} - -static __inline int -pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) -{ - KKASSERT(m->hold_count > 0); - if (m->hold_count > 1) { - vm_page_unhold(m); - return 0; - } else { - return _pmap_unwire_pte_hold(pmap, m); - } -} - -/* - * After removing a page table entry, this routine is used to - * conditionally free the page, and manage the hold/wire counts. - */ -static int -pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t mpte) -{ - unsigned ptepindex; - - ASSERT_LWKT_TOKEN_HELD(vm_object_token(pmap->pm_pteobj)); - - if (mpte == NULL) { - /* - * page table pages in the kernel_pmap are not managed. - */ - if (pmap == &kernel_pmap) - return(0); - ptepindex = (va >> PDRSHIFT); - if (pmap->pm_ptphint && - (pmap->pm_ptphint->pindex == ptepindex)) { - mpte = pmap->pm_ptphint; - } else { - mpte = pmap_page_lookup(pmap->pm_pteobj, ptepindex); - pmap->pm_ptphint = mpte; - vm_page_wakeup(mpte); - } - } - return pmap_unwire_pte_hold(pmap, mpte); -} - -/* - * Attempt to release and free the vm_page backing a page directory page - * in a pmap. Returns 1 on success, 0 on failure (if the procedure had - * to sleep). - */ -static int -pmap_release_free_page(struct pmap *pmap, vm_page_t p) -{ - vpte_t *pde = pmap->pm_pdir; - - /* - * This code optimizes the case of freeing non-busy - * page-table pages. Those pages are zero now, and - * might as well be placed directly into the zero queue. - */ - if (vm_page_busy_try(p, FALSE)) { - vm_page_sleep_busy(p, FALSE, "pmaprl"); - return 0; - } - KKASSERT(pmap->pm_stats.resident_count > 0); - --pmap->pm_stats.resident_count; - - if (p->hold_count) { - panic("pmap_release: freeing held page table page"); - } - /* - * Page directory pages need to have the kernel stuff cleared, so - * they can go into the zero queue also. - * - * In virtual kernels there is no 'kernel stuff'. For the moment - * I just make sure the whole thing has been zero'd even though - * it should already be completely zero'd. - * - * pmaps for vkernels do not self-map because they do not share - * their address space with the vkernel. Clearing of pde[] thus - * only applies to page table pages and not to the page directory - * page. - */ - if (p->pindex == pmap->pm_pdindex) { - bzero(pde, VPTE_PAGETABLE_SIZE); - pmap_kremove((vm_offset_t)pmap->pm_pdir); - } else { - KKASSERT(pde[p->pindex] != 0); - pmap_inval_pde(&pde[p->pindex], pmap, - (vm_offset_t)p->pindex << SEG_SHIFT); - } - - /* - * Clear the matching hint - */ - if (pmap->pm_ptphint && (pmap->pm_ptphint->pindex == p->pindex)) - pmap->pm_ptphint = NULL; - - /* - * And throw the page away. The page is completely zero'd out so - * optimize the free call. - */ - p->wire_count--; - atomic_add_int(&vmstats.v_wire_count, -1); - vm_page_free_zero(p); - return 1; -} - -/* - * This routine is called if the page table page is not mapped in the page - * table directory. - * - * The routine is broken up into two parts for readability. - * - * It must return a held mpte and map the page directory page as required. - * Because vm_page_grab() can block, we must re-check pm_pdir[ptepindex] - */ -static vm_page_t -_pmap_allocpte(pmap_t pmap, unsigned ptepindex) -{ - vm_paddr_t ptepa; - vm_page_t m; - - /* - * Find or fabricate a new pagetable page. A busied page will be - * returned. This call may block. - */ - m = vm_page_grab(pmap->pm_pteobj, ptepindex, - VM_ALLOC_NORMAL | VM_ALLOC_ZERO | VM_ALLOC_RETRY); - vm_page_flag_set(m, PG_MAPPED); - - KASSERT(m->queue == PQ_NONE, - ("_pmap_allocpte: %p->queue != PQ_NONE", m)); - - /* - * Increment the hold count for the page we will be returning to - * the caller. - */ - m->hold_count++; - - /* - * It is possible that someone else got in and mapped by the page - * directory page while we were blocked, if so just unbusy and - * return the held page. - */ - if ((ptepa = pmap->pm_pdir[ptepindex]) != 0) { - KKASSERT((ptepa & VPTE_FRAME) == VM_PAGE_TO_PHYS(m)); - vm_page_wakeup(m); - return(m); - } - vm_page_wire(m); - - /* - * Map the pagetable page into the process address space, if - * it isn't already there. - */ - ++pmap->pm_stats.resident_count; - - ptepa = VM_PAGE_TO_PHYS(m); - pmap->pm_pdir[ptepindex] = (vpte_t)ptepa | VPTE_RW | VPTE_V | - VPTE_A | VPTE_M; - - /* - * We are likely about to access this page table page, so set the - * page table hint to reduce overhead. - */ - pmap->pm_ptphint = m; - - vm_page_wakeup(m); - - return (m); -} - -/* - * Determine the page table page required to access the VA in the pmap - * and allocate it if necessary. Return a held vm_page_t for the page. - * - * Only used with user pmaps. - */ -static vm_page_t -pmap_allocpte(pmap_t pmap, vm_offset_t va) -{ - unsigned ptepindex; - vm_offset_t ptepa; - vm_page_t m; - - ASSERT_LWKT_TOKEN_HELD(vm_object_token(pmap->pm_pteobj)); - - /* - * Calculate pagetable page index - */ - ptepindex = va >> PDRSHIFT; - - /* - * Get the page directory entry - */ - ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex]; - - /* - * This supports switching from a 4MB page to a - * normal 4K page. - */ - if (ptepa & VPTE_PS) { - KKASSERT(pmap->pm_pdir[ptepindex] != 0); - pmap_inval_pde(&pmap->pm_pdir[ptepindex], pmap, - (vm_offset_t)ptepindex << SEG_SHIFT); - ptepa = 0; - } - - /* - * If the page table page is mapped, we just increment the - * hold count, and activate it. - */ - if (ptepa) { - /* - * In order to get the page table page, try the - * hint first. - */ - if (pmap->pm_ptphint && - (pmap->pm_ptphint->pindex == ptepindex)) { - m = pmap->pm_ptphint; - } else { - m = pmap_page_lookup(pmap->pm_pteobj, ptepindex); - pmap->pm_ptphint = m; - vm_page_wakeup(m); - } - m->hold_count++; - return m; - } - /* - * Here if the pte page isn't mapped, or if it has been deallocated. - */ - return _pmap_allocpte(pmap, ptepindex); -} - -/************************************************************************ - * Managed pages in pmaps * - ************************************************************************ - * - * All pages entered into user pmaps and some pages entered into the kernel - * pmap are managed, meaning that pmap_protect() and other related management - * functions work on these pages. - */ - -/* - * free the pv_entry back to the free list. This function may be - * called from an interrupt. - */ -static __inline void -free_pv_entry(pv_entry_t pv) -{ - pv_entry_count--; - zfree(&pvzone, pv); -} - -/* - * get a new pv_entry, allocating a block from the system - * when needed. This function may be called from an interrupt. - */ -static pv_entry_t -get_pv_entry(void) -{ - pv_entry_count++; - if (pv_entry_high_water && - (pv_entry_count > pv_entry_high_water) && - (pmap_pagedaemon_waken == 0)) { - pmap_pagedaemon_waken = 1; - wakeup (&vm_pages_needed); - } - return zalloc(&pvzone); -} - -/* - * This routine is very drastic, but can save the system - * in a pinch. - * - * No requirements. - */ -void -pmap_collect(void) -{ - int i; - vm_page_t m; - static int warningdone=0; - - if (pmap_pagedaemon_waken == 0) - return; - lwkt_gettoken(&vm_token); - pmap_pagedaemon_waken = 0; - - if (warningdone < 5) { - kprintf("pmap_collect: collecting pv entries -- suggest increasing PMAP_SHPGPERPROC\n"); - warningdone++; - } - - for (i = 0; i < vm_page_array_size; i++) { - m = &vm_page_array[i]; - if (m->wire_count || m->hold_count) - continue; - if (vm_page_busy_try(m, TRUE) == 0) { - if (m->wire_count == 0 && m->hold_count == 0) { - pmap_remove_all(m); - } - vm_page_wakeup(m); - } - } - lwkt_reltoken(&vm_token); -} - -/* - * If it is the first entry on the list, it is actually - * in the header and we must copy the following entry up - * to the header. Otherwise we must search the list for - * the entry. In either case we free the now unused entry. - * - * caller must hold vm_token - */ -static int -pmap_remove_entry(struct pmap *pmap, vm_page_t m, vm_offset_t va) -{ - pv_entry_t pv; - int rtval; - - crit_enter(); - if (m->md.pv_list_count < pmap->pm_stats.resident_count) { - TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { - if (pmap == pv->pv_pmap && va == pv->pv_va) - break; - } - } else { - TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) { - if (va == pv->pv_va) - break; - } - } - - /* - * Note that pv_ptem is NULL if the page table page itself is not - * managed, even if the page being removed IS managed. - */ - rtval = 0; - - TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); - m->md.pv_list_count--; - atomic_add_int(&m->object->agg_pv_list_count, -1); - TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); - if (TAILQ_EMPTY(&m->md.pv_list)) - vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); - ++pmap->pm_generation; - vm_object_hold(pmap->pm_pteobj); - rtval = pmap_unuse_pt(pmap, va, pv->pv_ptem); - vm_object_drop(pmap->pm_pteobj); - free_pv_entry(pv); - - crit_exit(); - return rtval; -} - -/* - * Create a pv entry for page at pa for (pmap, va). If the page table page - * holding the VA is managed, mpte will be non-NULL. - */ -static void -pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t mpte, vm_page_t m) -{ - pv_entry_t pv; - - crit_enter(); - pv = get_pv_entry(); - pv->pv_va = va; - pv->pv_pmap = pmap; - pv->pv_ptem = mpte; - - TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist); - TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); - ++pmap->pm_generation; - m->md.pv_list_count++; - atomic_add_int(&m->object->agg_pv_list_count, 1); - - crit_exit(); -} - -/* - * pmap_remove_pte: do the things to unmap a page in a process - */ -static int -pmap_remove_pte(struct pmap *pmap, vpte_t *ptq, vm_offset_t va) -{ - vpte_t oldpte; - vm_page_t m; - - oldpte = pmap_inval_loadandclear(ptq, pmap, va); - if (oldpte & VPTE_WIRED) - --pmap->pm_stats.wired_count; - KKASSERT(pmap->pm_stats.wired_count >= 0); - -#if 0 - /* - * Machines that don't support invlpg, also don't support - * VPTE_G. XXX VPTE_G is disabled for SMP so don't worry about - * the SMP case. - */ - if (oldpte & VPTE_G) - madvise((void *)va, PAGE_SIZE, MADV_INVAL); -#endif - KKASSERT(pmap->pm_stats.resident_count > 0); - --pmap->pm_stats.resident_count; - if (oldpte & VPTE_MANAGED) { - m = PHYS_TO_VM_PAGE(oldpte); - if (oldpte & VPTE_M) { -#if defined(PMAP_DIAGNOSTIC) - if (pmap_nw_modified((pt_entry_t) oldpte)) { - kprintf( - "pmap_remove: modified page not writable: va: 0x%x, pte: 0x%x\n", - va, oldpte); - } -#endif - if (pmap_track_modified(pmap, va)) - vm_page_dirty(m); - } - if (oldpte & VPTE_A) - vm_page_flag_set(m, PG_REFERENCED); - return pmap_remove_entry(pmap, m, va); - } else { - return pmap_unuse_pt(pmap, va, NULL); - } - - return 0; -} - -/* - * pmap_remove_page: - * - * Remove a single page from a process address space. - * - * This function may not be called from an interrupt if the pmap is - * not kernel_pmap. - */ -static void -pmap_remove_page(struct pmap *pmap, vm_offset_t va) -{ - vpte_t *ptq; - - /* - * if there is no pte for this address, just skip it!!! Otherwise - * get a local va for mappings for this pmap and remove the entry. - */ - if (*pmap_pde(pmap, va) != 0) { - ptq = get_ptbase(pmap, va); - if (*ptq) { - pmap_remove_pte(pmap, ptq, va); - } - } -} - -/* - * Remove the given range of addresses from the specified map. - * - * It is assumed that the start and end are properly rounded to the - * page size. - * - * This function may not be called from an interrupt if the pmap is - * not kernel_pmap. - * - * No requirements. - */ -void -pmap_remove(struct pmap *pmap, vm_offset_t sva, vm_offset_t eva) -{ - vpte_t *ptbase; - vm_offset_t pdnxt; - vm_offset_t ptpaddr; - vm_pindex_t sindex, eindex; - - if (pmap == NULL) - return; - - vm_object_hold(pmap->pm_pteobj); - lwkt_gettoken(&vm_token); - KKASSERT(pmap->pm_stats.resident_count >= 0); - if (pmap->pm_stats.resident_count == 0) { - lwkt_reltoken(&vm_token); - vm_object_drop(pmap->pm_pteobj); - return; - } - - /* - * special handling of removing one page. a very - * common operation and easy to short circuit some - * code. - */ - if (((sva + PAGE_SIZE) == eva) && - ((pmap->pm_pdir[(sva >> PDRSHIFT)] & VPTE_PS) == 0)) { - pmap_remove_page(pmap, sva); - lwkt_reltoken(&vm_token); - vm_object_drop(pmap->pm_pteobj); - return; - } - - /* - * Get a local virtual address for the mappings that are being - * worked with. - * - * XXX this is really messy because the kernel pmap is not relative - * to address 0 - */ - sindex = (sva >> PAGE_SHIFT); - eindex = (eva >> PAGE_SHIFT); - - for (; sindex < eindex; sindex = pdnxt) { - vpte_t pdirindex; - - /* - * Calculate index for next page table. - */ - pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1)); - if (pmap->pm_stats.resident_count == 0) - break; - - pdirindex = sindex / NPDEPG; - if (((ptpaddr = pmap->pm_pdir[pdirindex]) & VPTE_PS) != 0) { - KKASSERT(pmap->pm_pdir[pdirindex] != 0); - pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; - pmap_inval_pde(&pmap->pm_pdir[pdirindex], pmap, - (vm_offset_t)pdirindex << SEG_SHIFT); - continue; - } - - /* - * Weed out invalid mappings. Note: we assume that the page - * directory table is always allocated, and in kernel virtual. - */ - if (ptpaddr == 0) - continue; - - /* - * Limit our scan to either the end of the va represented - * by the current page table page, or to the end of the - * range being removed. - */ - if (pdnxt > eindex) - pdnxt = eindex; - - /* - * NOTE: pmap_remove_pte() can block. - */ - for (; sindex != pdnxt; sindex++) { - vm_offset_t va; - - ptbase = get_ptbase(pmap, sindex << PAGE_SHIFT); - if (*ptbase == 0) - continue; - va = i386_ptob(sindex); - if (pmap_remove_pte(pmap, ptbase, va)) - break; - } - } - lwkt_reltoken(&vm_token); - vm_object_drop(pmap->pm_pteobj); -} - -/* - * Removes this physical page from all physical maps in which it resides. - * Reflects back modify bits to the pager. - * - * This routine may not be called from an interrupt. - * - * No requirements. - */ -static void -pmap_remove_all(vm_page_t m) -{ - vpte_t *pte, tpte; - pv_entry_t pv; - -#if defined(PMAP_DIAGNOSTIC) - /* - * XXX this makes pmap_page_protect(NONE) illegal for non-managed - * pages! - */ - if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) { - panic("pmap_page_protect: illegal for unmanaged page, va: 0x%08llx", (long long)VM_PAGE_TO_PHYS(m)); - } -#endif - - lwkt_gettoken(&vm_token); - while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { - KKASSERT(pv->pv_pmap->pm_stats.resident_count > 0); - --pv->pv_pmap->pm_stats.resident_count; - - pte = pmap_pte(pv->pv_pmap, pv->pv_va); - KKASSERT(pte != NULL); - - tpte = pmap_inval_loadandclear(pte, pv->pv_pmap, pv->pv_va); - if (tpte & VPTE_WIRED) - --pv->pv_pmap->pm_stats.wired_count; - KKASSERT(pv->pv_pmap->pm_stats.wired_count >= 0); - - if (tpte & VPTE_A) - vm_page_flag_set(m, PG_REFERENCED); - - /* - * Update the vm_page_t clean and reference bits. - */ - if (tpte & VPTE_M) { -#if defined(PMAP_DIAGNOSTIC) - if (pmap_nw_modified((pt_entry_t) tpte)) { - kprintf( - "pmap_remove_all: modified page not writable: va: 0x%x, pte: 0x%x\n", - pv->pv_va, tpte); - } -#endif - if (pmap_track_modified(pv->pv_pmap, pv->pv_va)) - vm_page_dirty(m); - } - TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); - TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist); - ++pv->pv_pmap->pm_generation; - m->md.pv_list_count--; - atomic_add_int(&m->object->agg_pv_list_count, -1); - if (TAILQ_EMPTY(&m->md.pv_list)) - vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); - vm_object_hold(pv->pv_pmap->pm_pteobj); - pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem); - vm_object_drop(pv->pv_pmap->pm_pteobj); - free_pv_entry(pv); - } - KKASSERT((m->flags & (PG_MAPPED | PG_WRITEABLE)) == 0); - lwkt_reltoken(&vm_token); -} - -/* - * Set the physical protection on the specified range of this map - * as requested. - * - * This function may not be called from an interrupt if the map is - * not the kernel_pmap. - * - * No requirements. - */ -void -pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) -{ - vpte_t *ptbase; - vpte_t *ptep; - vm_offset_t pdnxt, ptpaddr; - vm_pindex_t sindex, eindex; - vm_pindex_t sbase; - - if (pmap == NULL) - return; - - if ((prot & VM_PROT_READ) == VM_PROT_NONE) { - pmap_remove(pmap, sva, eva); - return; - } - - if (prot & VM_PROT_WRITE) - return; - - lwkt_gettoken(&vm_token); - ptbase = get_ptbase(pmap, sva); - - sindex = (sva >> PAGE_SHIFT); - eindex = (eva >> PAGE_SHIFT); - sbase = sindex; - - for (; sindex < eindex; sindex = pdnxt) { - - unsigned pdirindex; - - pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1)); - - pdirindex = sindex / NPDEPG; - - /* - * Clear the modified and writable bits for a 4m page. - * Throw away the modified bit (?) - */ - if (((ptpaddr = pmap->pm_pdir[pdirindex]) & VPTE_PS) != 0) { - pmap_clean_pde(&pmap->pm_pdir[pdirindex], pmap, - (vm_offset_t)pdirindex << SEG_SHIFT); - pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; - continue; - } - - /* - * Weed out invalid mappings. Note: we assume that the page - * directory table is always allocated, and in kernel virtual. - */ - if (ptpaddr == 0) - continue; - - if (pdnxt > eindex) { - pdnxt = eindex; - } - - for (; sindex != pdnxt; sindex++) { - vpte_t pbits; - vm_page_t m; - - /* - * Clean managed pages and also check the accessed - * bit. Just remove write perms for unmanaged - * pages. Be careful of races, turning off write - * access will force a fault rather then setting - * the modified bit at an unexpected time. - */ - ptep = &ptbase[sindex - sbase]; - if (*ptep & VPTE_MANAGED) { - pbits = pmap_clean_pte(ptep, pmap, - i386_ptob(sindex)); - m = NULL; - if (pbits & VPTE_A) { - m = PHYS_TO_VM_PAGE(pbits); - vm_page_flag_set(m, PG_REFERENCED); - atomic_clear_long(ptep, VPTE_A); - } - if (pbits & VPTE_M) { - if (pmap_track_modified(pmap, i386_ptob(sindex))) { - if (m == NULL) - m = PHYS_TO_VM_PAGE(pbits); - vm_page_dirty(m); - } - } - } else { - pbits = pmap_setro_pte(ptep, pmap, - i386_ptob(sindex)); - } - } - } - lwkt_reltoken(&vm_token); -} - -/* - * Enter a managed page into a pmap. If the page is not wired related pmap - * data can be destroyed at any time for later demand-operation. - * - * Insert the vm_page (m) at virtual address (v) in (pmap), with the - * specified protection, and wire the mapping if requested. - * - * NOTE: This routine may not lazy-evaluate or lose information. The - * page must actually be inserted into the given map NOW. - * - * NOTE: When entering a page at a KVA address, the pmap must be the - * kernel_pmap. - * - * No requirements. - */ -void -pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, - boolean_t wired, vm_map_entry_t entry __unused) -{ - vm_paddr_t pa; - vpte_t *pte; - vm_paddr_t opa; - vpte_t origpte, newpte; - vm_page_t mpte; - - if (pmap == NULL) - return; - - va &= VPTE_FRAME; - - vm_object_hold(pmap->pm_pteobj); - lwkt_gettoken(&vm_token); - - /* - * Get the page table page. The kernel_pmap's page table pages - * are preallocated and have no associated vm_page_t. - */ - if (pmap == &kernel_pmap) - mpte = NULL; - else - mpte = pmap_allocpte(pmap, va); - - pte = pmap_pte(pmap, va); - - /* - * Page Directory table entry not valid, we need a new PT page - * and pmap_allocpte() didn't give us one. Oops! - */ - if (pte == NULL) { - panic("pmap_enter: invalid page directory pmap=%p, va=0x%p", - pmap, (void *)va); - } - - /* - * Deal with races on the original mapping (though don't worry - * about VPTE_A races) by cleaning it. This will force a fault - * if an attempt is made to write to the page. - */ - pa = VM_PAGE_TO_PHYS(m) & VPTE_FRAME; - origpte = pmap_clean_pte(pte, pmap, va); - opa = origpte & VPTE_FRAME; - - if (origpte & VPTE_PS) - panic("pmap_enter: attempted pmap_enter on 4MB page"); - - /* - * Mapping has not changed, must be protection or wiring change. - */ - if (origpte && (opa == pa)) { - /* - * Wiring change, just update stats. We don't worry about - * wiring PT pages as they remain resident as long as there - * are valid mappings in them. Hence, if a user page is wired, - * the PT page will be also. - */ - if (wired && ((origpte & VPTE_WIRED) == 0)) - ++pmap->pm_stats.wired_count; - else if (!wired && (origpte & VPTE_WIRED)) - --pmap->pm_stats.wired_count; - KKASSERT(pmap->pm_stats.wired_count >= 0); - - /* - * Remove the extra pte reference. Note that we cannot - * optimize the RO->RW case because we have adjusted the - * wiring count above and may need to adjust the wiring - * bits below. - */ - if (mpte) - mpte->hold_count--; - - /* - * We might be turning off write access to the page, - * so we go ahead and sense modify status. - */ - if (origpte & VPTE_MANAGED) { - if ((origpte & VPTE_M) && - pmap_track_modified(pmap, va)) { - vm_page_t om; - om = PHYS_TO_VM_PAGE(opa); - vm_page_dirty(om); - } - pa |= VPTE_MANAGED; - KKASSERT(m->flags & PG_MAPPED); - } - goto validate; - } - /* - * Mapping has changed, invalidate old range and fall through to - * handle validating new mapping. - */ - while (opa) { - int err; - err = pmap_remove_pte(pmap, pte, va); - if (err) - panic("pmap_enter: pte vanished, va: %p", (void *)va); - pte = pmap_pte(pmap, va); - origpte = pmap_clean_pte(pte, pmap, va); - opa = origpte & VPTE_FRAME; - if (opa) { - kprintf("pmap_enter: Warning, raced pmap %p va %p\n", - pmap, (void *)va); - } - } - - /* - * Enter on the PV list if part of our managed memory. Note that we - * raise IPL while manipulating pv_table since pmap_enter can be - * called at interrupt time. - */ - if (pmap_initialized && - (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) { - pmap_insert_entry(pmap, va, mpte, m); - pa |= VPTE_MANAGED; - vm_page_flag_set(m, PG_MAPPED); - } - - /* - * Increment counters - */ - ++pmap->pm_stats.resident_count; - if (wired) - pmap->pm_stats.wired_count++; - -validate: - /* - * Now validate mapping with desired protection/wiring. - */ - newpte = (vm_offset_t) (pa | pte_prot(pmap, prot) | VPTE_V); - - if (wired) - newpte |= VPTE_WIRED; - if (pmap != &kernel_pmap) - newpte |= VPTE_U; - - /* - * If the mapping or permission bits are different from the - * (now cleaned) original pte, an update is needed. We've - * already downgraded or invalidated the page so all we have - * to do now is update the bits. - * - * XXX should we synchronize RO->RW changes to avoid another - * fault? - */ - if ((origpte & ~(VPTE_RW|VPTE_M|VPTE_A)) != newpte) { - *pte = newpte | VPTE_A; - if (newpte & VPTE_RW) - vm_page_flag_set(m, PG_WRITEABLE); - } - KKASSERT((newpte & VPTE_MANAGED) == 0 || m->flags & PG_MAPPED); - lwkt_reltoken(&vm_token); - vm_object_drop(pmap->pm_pteobj); -} - -/* - * This code works like pmap_enter() but assumes VM_PROT_READ and not-wired. - * - * Currently this routine may only be used on user pmaps, not kernel_pmap. - */ -void -pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m) -{ - vpte_t *pte; - vm_paddr_t pa; - vm_page_t mpte; - unsigned ptepindex; - vm_offset_t ptepa; - - KKASSERT(pmap != &kernel_pmap); - - KKASSERT(va >= VM_MIN_USER_ADDRESS && va < VM_MAX_USER_ADDRESS); - - /* - * Calculate pagetable page (mpte), allocating it if necessary. - * - * A held page table page (mpte), or NULL, is passed onto the - * section following. - */ - ptepindex = va >> PDRSHIFT; - - vm_object_hold(pmap->pm_pteobj); - lwkt_gettoken(&vm_token); - - do { - /* - * Get the page directory entry - */ - ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex]; - - /* - * If the page table page is mapped, we just increment - * the hold count, and activate it. - */ - if (ptepa) { - if (ptepa & VPTE_PS) - panic("pmap_enter_quick: unexpected mapping into 4MB page"); - if (pmap->pm_ptphint && - (pmap->pm_ptphint->pindex == ptepindex)) { - mpte = pmap->pm_ptphint; - } else { - mpte = pmap_page_lookup( pmap->pm_pteobj, ptepindex); - pmap->pm_ptphint = mpte; - vm_page_wakeup(mpte); - } - if (mpte) - mpte->hold_count++; - } else { - mpte = _pmap_allocpte(pmap, ptepindex); - } - } while (mpte == NULL); - - /* - * Ok, now that the page table page has been validated, get the pte. - * If the pte is already mapped undo mpte's hold_count and - * just return. - */ - pte = pmap_pte(pmap, va); - if (*pte) { - pmap_unwire_pte_hold(pmap, mpte); - lwkt_reltoken(&vm_token); - vm_object_drop(pmap->pm_pteobj); - return; - } - - /* - * Enter on the PV list if part of our managed memory. Note that we - * raise IPL while manipulating pv_table since pmap_enter can be - * called at interrupt time. - */ - if ((m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) { - pmap_insert_entry(pmap, va, mpte, m); - vm_page_flag_set(m, PG_MAPPED); - } - - /* - * Increment counters - */ - ++pmap->pm_stats.resident_count; - - pa = VM_PAGE_TO_PHYS(m); - - /* - * Now validate mapping with RO protection - */ - if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) - *pte = (vpte_t)pa | VPTE_V | VPTE_U; - else - *pte = (vpte_t)pa | VPTE_V | VPTE_U | VPTE_MANAGED; - /*pmap_inval_add(&info, pmap, va); shouldn't be needed 0->valid */ - /*pmap_inval_flush(&info); don't need for vkernel */ - lwkt_reltoken(&vm_token); - vm_object_drop(pmap->pm_pteobj); -} - -/* - * Extract the physical address for the translation at the specified - * virtual address in the pmap. - * - * The caller must hold vm_token if non-blocking operation is desired. - * No requirements. - */ -vm_paddr_t -pmap_extract(pmap_t pmap, vm_offset_t va) -{ - vm_paddr_t rtval; - vpte_t pte; - - lwkt_gettoken(&vm_token); - if (pmap && (pte = pmap->pm_pdir[va >> SEG_SHIFT]) != 0) { - if (pte & VPTE_PS) { - rtval = pte & ~((vpte_t)(1 << SEG_SHIFT) - 1); - rtval |= va & SEG_MASK; - } else { - pte = *get_ptbase(pmap, va); - rtval = (pte & VPTE_FRAME) | (va & PAGE_MASK); - } - } else { - rtval = 0; - } - lwkt_reltoken(&vm_token); - return(rtval); -} - -/* - * Similar to extract but checks protections, SMP-friendly short-cut for - * vm_fault_page[_quick](). - */ -vm_page_t -pmap_fault_page_quick(pmap_t pmap __unused, vm_offset_t vaddr __unused, - vm_prot_t prot __unused) -{ - return(NULL); -} - -#define MAX_INIT_PT (96) - -/* - * This routine preloads the ptes for a given object into the specified pmap. - * This eliminates the blast of soft faults on process startup and - * immediately after an mmap. - * - * No requirements. - */ -static int pmap_object_init_pt_callback(vm_page_t p, void *data); - -void -pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_prot_t prot, - vm_object_t object, vm_pindex_t pindex, - vm_size_t size, int limit) -{ - struct rb_vm_page_scan_info info; - struct lwp *lp; - int psize; - - /* - * We can't preinit if read access isn't set or there is no pmap - * or object. - */ - if ((prot & VM_PROT_READ) == 0 || pmap == NULL || object == NULL) - return; - - /* - * We can't preinit if the pmap is not the current pmap - */ - lp = curthread->td_lwp; - if (lp == NULL || pmap != vmspace_pmap(lp->lwp_vmspace)) - return; - - psize = size >> PAGE_SHIFT; - - if ((object->type != OBJT_VNODE) || - ((limit & MAP_PREFAULT_PARTIAL) && (psize > MAX_INIT_PT) && - (object->resident_page_count > MAX_INIT_PT))) { - return; - } - - if (psize + pindex > object->size) { - if (object->size < pindex) - return; - psize = object->size - pindex; - } - - if (psize == 0) - return; - - /* - * Use a red-black scan to traverse the requested range and load - * any valid pages found into the pmap. - * - * We cannot safely scan the object's memq unless we are in a - * critical section since interrupts can remove pages from objects. - */ - info.start_pindex = pindex; - info.end_pindex = pindex + psize - 1; - info.limit = limit; - info.mpte = NULL; - info.addr = addr; - info.pmap = pmap; - - vm_object_hold_shared(object); - vm_page_rb_tree_RB_SCAN(&object->rb_memq, rb_vm_page_scancmp, - pmap_object_init_pt_callback, &info); - vm_object_drop(object); -} - -/* - * The caller must hold vm_token. - */ -static -int -pmap_object_init_pt_callback(vm_page_t p, void *data) -{ - struct rb_vm_page_scan_info *info = data; - vm_pindex_t rel_index; - - /* - * don't allow an madvise to blow away our really - * free pages allocating pv entries. - */ - if ((info->limit & MAP_PREFAULT_MADVISE) && - vmstats.v_free_count < vmstats.v_free_reserved) { - return(-1); - } - - /* - * Ignore list markers and ignore pages we cannot instantly - * busy (while holding the object token). - */ - if (p->flags & PG_MARKER) - return 0; - if (vm_page_busy_try(p, TRUE)) - return 0; - if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && - (p->flags & PG_FICTITIOUS) == 0) { - if ((p->queue - p->pc) == PQ_CACHE) - vm_page_deactivate(p); - rel_index = p->pindex - info->start_pindex; - pmap_enter_quick(info->pmap, - info->addr + i386_ptob(rel_index), p); - } - vm_page_wakeup(p); - return(0); -} - -/* - * Return TRUE if the pmap is in shape to trivially - * pre-fault the specified address. - * - * Returns FALSE if it would be non-trivial or if a - * pte is already loaded into the slot. - * - * No requirements. - */ -int -pmap_prefault_ok(pmap_t pmap, vm_offset_t addr) -{ - vpte_t *pte; - int ret; - - lwkt_gettoken(&vm_token); - if ((*pmap_pde(pmap, addr)) == 0) { - ret = 0; - } else { - pte = get_ptbase(pmap, addr); - ret = (*pte) ? 0 : 1; - } - lwkt_reltoken(&vm_token); - return (ret); -} - -/* - * Change the wiring attribute for a map/virtual-address pair. - * The mapping must already exist in the pmap. - * - * No other requirements. - */ -void -pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired, - vm_map_entry_t entry __unused) -{ - vpte_t *pte; - - if (pmap == NULL) - return; - - lwkt_gettoken(&vm_token); - pte = get_ptbase(pmap, va); - - if (wired && (*pte & VPTE_WIRED) == 0) - ++pmap->pm_stats.wired_count; - else if (!wired && (*pte & VPTE_WIRED)) - --pmap->pm_stats.wired_count; - KKASSERT(pmap->pm_stats.wired_count >= 0); - - /* - * Wiring is not a hardware characteristic so there is no need to - * invalidate TLB. However, in an SMP environment we must use - * a locked bus cycle to update the pte (if we are not using - * the pmap_inval_*() API that is)... it's ok to do this for simple - * wiring changes. - */ - if (wired) - atomic_set_long(pte, VPTE_WIRED); - else - atomic_clear_long(pte, VPTE_WIRED); - lwkt_reltoken(&vm_token); -} - -/* - * Copy the range specified by src_addr/len - * from the source map to the range dst_addr/len - * in the destination map. - * - * This routine is only advisory and need not do anything. - */ -void -pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, - vm_size_t len, vm_offset_t src_addr) -{ - vm_offset_t addr; - vm_offset_t end_addr = src_addr + len; - vm_offset_t pdnxt; - vpte_t *src_frame; - vpte_t *dst_frame; - vm_page_t m; - - /* - * XXX BUGGY. Amoung other things srcmpte is assumed to remain - * valid through blocking calls, and that's just not going to - * be the case. - * - * FIXME! - */ - return; - - if (dst_addr != src_addr) - return; - if (dst_pmap->pm_pdir == NULL) - return; - if (src_pmap->pm_pdir == NULL) - return; - - lwkt_gettoken(&vm_token); - - src_frame = get_ptbase1(src_pmap, src_addr); - dst_frame = get_ptbase2(dst_pmap, src_addr); - - /* - * critical section protection is required to maintain the page/object - * association, interrupts can free pages and remove them from - * their objects. - */ - for (addr = src_addr; addr < end_addr; addr = pdnxt) { - vpte_t *src_pte, *dst_pte; - vm_page_t dstmpte, srcmpte; - vm_offset_t srcptepaddr; - unsigned ptepindex; - - if (addr >= VM_MAX_USER_ADDRESS) - panic("pmap_copy: invalid to pmap_copy page tables"); - - /* - * Don't let optional prefaulting of pages make us go - * way below the low water mark of free pages or way - * above high water mark of used pv entries. - */ - if (vmstats.v_free_count < vmstats.v_free_reserved || - pv_entry_count > pv_entry_high_water) - break; - - pdnxt = ((addr + PAGE_SIZE*NPTEPG) & ~(PAGE_SIZE*NPTEPG - 1)); - ptepindex = addr >> PDRSHIFT; - - srcptepaddr = (vm_offset_t) src_pmap->pm_pdir[ptepindex]; - if (srcptepaddr == 0) - continue; - - if (srcptepaddr & VPTE_PS) { - if (dst_pmap->pm_pdir[ptepindex] == 0) { - dst_pmap->pm_pdir[ptepindex] = (vpte_t)srcptepaddr; - dst_pmap->pm_stats.resident_count += NBPDR / PAGE_SIZE; - } - continue; - } - - srcmpte = vm_page_lookup(src_pmap->pm_pteobj, ptepindex); - if ((srcmpte == NULL) || (srcmpte->hold_count == 0) || - (srcmpte->flags & PG_BUSY)) { - continue; - } - - if (pdnxt > end_addr) - pdnxt = end_addr; - - src_pte = src_frame + ((addr - src_addr) >> PAGE_SHIFT); - dst_pte = dst_frame + ((addr - src_addr) >> PAGE_SHIFT); - while (addr < pdnxt) { - vpte_t ptetemp; - - ptetemp = *src_pte; - /* - * we only virtual copy managed pages - */ - if ((ptetemp & VPTE_MANAGED) != 0) { - /* - * We have to check after allocpte for the - * pte still being around... allocpte can - * block. - * - * pmap_allocpte can block, unfortunately - * we have to reload the tables. - */ - dstmpte = pmap_allocpte(dst_pmap, addr); - src_frame = get_ptbase1(src_pmap, src_addr); - dst_frame = get_ptbase2(dst_pmap, src_addr); - - if ((*dst_pte == 0) && (ptetemp = *src_pte) && - (ptetemp & VPTE_MANAGED) != 0) { - /* - * Clear the modified and accessed - * (referenced) bits during the copy. - * - * We do not have to clear the write - * bit to force a fault-on-modify - * because the real kernel's target - * pmap is empty and will fault anyway. - */ - m = PHYS_TO_VM_PAGE(ptetemp); - *dst_pte = ptetemp & ~(VPTE_M | VPTE_A); - ++dst_pmap->pm_stats.resident_count; - pmap_insert_entry(dst_pmap, addr, - dstmpte, m); - KKASSERT(m->flags & PG_MAPPED); - } else { - pmap_unwire_pte_hold(dst_pmap, dstmpte); - } - if (dstmpte->hold_count >= srcmpte->hold_count) - break; - } - addr += PAGE_SIZE; - src_pte++; - dst_pte++; - } - } - lwkt_reltoken(&vm_token); -} - -/* - * pmap_zero_page: - * - * Zero the specified PA by mapping the page into KVM and clearing its - * contents. - * - * This function may be called from an interrupt and no locking is - * required. - */ -void -pmap_zero_page(vm_paddr_t phys) -{ - struct mdglobaldata *gd = mdcpu; - - crit_enter(); - if (*gd->gd_CMAP3) - panic("pmap_zero_page: CMAP3 busy"); - *gd->gd_CMAP3 = VPTE_V | VPTE_RW | (phys & VPTE_FRAME) | VPTE_A | VPTE_M; - madvise(gd->gd_CADDR3, PAGE_SIZE, MADV_INVAL); - - bzero(gd->gd_CADDR3, PAGE_SIZE); - *gd->gd_CMAP3 = 0; - crit_exit(); -} - -/* - * pmap_page_assertzero: - * - * Assert that a page is empty, panic if it isn't. - */ -void -pmap_page_assertzero(vm_paddr_t phys) -{ - struct mdglobaldata *gd = mdcpu; - int i; - - crit_enter(); - if (*gd->gd_CMAP3) - panic("pmap_zero_page: CMAP3 busy"); - *gd->gd_CMAP3 = VPTE_V | VPTE_RW | - (phys & VPTE_FRAME) | VPTE_A | VPTE_M; - madvise(gd->gd_CADDR3, PAGE_SIZE, MADV_INVAL); - for (i = 0; i < PAGE_SIZE; i += 4) { - if (*(int *)((char *)gd->gd_CADDR3 + i) != 0) { - panic("pmap_page_assertzero() @ %p not zero!", - (void *)gd->gd_CADDR3); - } - } - *gd->gd_CMAP3 = 0; - crit_exit(); -} - -/* - * pmap_zero_page: - * - * Zero part of a physical page by mapping it into memory and clearing - * its contents with bzero. - * - * off and size may not cover an area beyond a single hardware page. - */ -void -pmap_zero_page_area(vm_paddr_t phys, int off, int size) -{ - struct mdglobaldata *gd = mdcpu; - - crit_enter(); - if (*gd->gd_CMAP3) - panic("pmap_zero_page: CMAP3 busy"); - *gd->gd_CMAP3 = VPTE_V | VPTE_RW | - (phys & VPTE_FRAME) | VPTE_A | VPTE_M; - madvise(gd->gd_CADDR3, PAGE_SIZE, MADV_INVAL); - - bzero((char *)gd->gd_CADDR3 + off, size); - *gd->gd_CMAP3 = 0; - crit_exit(); -} - -/* - * pmap_copy_page: - * - * Copy the physical page from the source PA to the target PA. - * This function may be called from an interrupt. No locking - * is required. - */ -void -pmap_copy_page(vm_paddr_t src, vm_paddr_t dst) -{ - struct mdglobaldata *gd = mdcpu; - - crit_enter(); - if (*(int *) gd->gd_CMAP1) - panic("pmap_copy_page: CMAP1 busy"); - if (*(int *) gd->gd_CMAP2) - panic("pmap_copy_page: CMAP2 busy"); - - *(int *) gd->gd_CMAP1 = VPTE_V | (src & PG_FRAME) | VPTE_A; - *(int *) gd->gd_CMAP2 = VPTE_V | VPTE_RW | (dst & VPTE_FRAME) | VPTE_A | VPTE_M; - - madvise(gd->gd_CADDR1, PAGE_SIZE, MADV_INVAL); - madvise(gd->gd_CADDR2, PAGE_SIZE, MADV_INVAL); - - bcopy(gd->gd_CADDR1, gd->gd_CADDR2, PAGE_SIZE); - - *(int *) gd->gd_CMAP1 = 0; - *(int *) gd->gd_CMAP2 = 0; - crit_exit(); -} - -/* - * pmap_copy_page_frag: - * - * Copy the physical page from the source PA to the target PA. - * This function may be called from an interrupt. No locking - * is required. - */ -void -pmap_copy_page_frag(vm_paddr_t src, vm_paddr_t dst, size_t bytes) -{ - struct mdglobaldata *gd = mdcpu; - - crit_enter(); - if (*(int *) gd->gd_CMAP1) - panic("pmap_copy_page: CMAP1 busy"); - if (*(int *) gd->gd_CMAP2) - panic("pmap_copy_page: CMAP2 busy"); - - *(int *) gd->gd_CMAP1 = VPTE_V | (src & VPTE_FRAME) | VPTE_A; - *(int *) gd->gd_CMAP2 = VPTE_V | VPTE_RW | (dst & VPTE_FRAME) | VPTE_A | VPTE_M; - - madvise(gd->gd_CADDR1, PAGE_SIZE, MADV_INVAL); - madvise(gd->gd_CADDR2, PAGE_SIZE, MADV_INVAL); - - bcopy((char *)gd->gd_CADDR1 + (src & PAGE_MASK), - (char *)gd->gd_CADDR2 + (dst & PAGE_MASK), - bytes); - - *(int *) gd->gd_CMAP1 = 0; - *(int *) gd->gd_CMAP2 = 0; - crit_exit(); -} - -/* - * Returns true if the pmap's pv is one of the first - * 16 pvs linked to from this page. This count may - * be changed upwards or downwards in the future; it - * is only necessary that true be returned for a small - * subset of pmaps for proper page aging. - * - * No requirements. - */ -boolean_t -pmap_page_exists_quick(pmap_t pmap, vm_page_t m) -{ - pv_entry_t pv; - int loops = 0; - - if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) - return FALSE; - - crit_enter(); - lwkt_gettoken(&vm_token); - - TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { - if (pv->pv_pmap == pmap) { - lwkt_reltoken(&vm_token); - crit_exit(); - return TRUE; - } - loops++; - if (loops >= 16) - break; - } - lwkt_reltoken(&vm_token); - crit_exit(); - return (FALSE); -} - -/* - * Remove all pages from specified address space - * this aids process exit speeds. Also, this code - * is special cased for current process only, but - * can have the more generic (and slightly slower) - * mode enabled. This is much faster than pmap_remove - * in the case of running down an entire address space. - * - * No requirements. - */ -void -pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) -{ - vpte_t *pte, tpte; - pv_entry_t pv, npv; - vm_page_t m; - int32_t save_generation; - - if (pmap->pm_pteobj) - vm_object_hold(pmap->pm_pteobj); - lwkt_gettoken(&vm_token); - for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) { - if (pv->pv_va >= eva || pv->pv_va < sva) { - npv = TAILQ_NEXT(pv, pv_plist); - continue; - } - - KKASSERT(pmap == pv->pv_pmap); - - pte = pmap_pte(pmap, pv->pv_va); - - /* - * We cannot remove wired pages from a process' mapping - * at this time - */ - if (*pte & VPTE_WIRED) { - npv = TAILQ_NEXT(pv, pv_plist); - continue; - } - tpte = pmap_inval_loadandclear(pte, pmap, pv->pv_va); - - m = PHYS_TO_VM_PAGE(tpte); - - KASSERT(m < &vm_page_array[vm_page_array_size], - ("pmap_remove_pages: bad tpte %lx", tpte)); - - KKASSERT(pmap->pm_stats.resident_count > 0); - --pmap->pm_stats.resident_count; - - /* - * Update the vm_page_t clean and reference bits. - */ - if (tpte & VPTE_M) { - vm_page_dirty(m); - } - - npv = TAILQ_NEXT(pv, pv_plist); - TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); - save_generation = ++pmap->pm_generation; - - m->md.pv_list_count--; - atomic_add_int(&m->object->agg_pv_list_count, -1); - TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); - if (TAILQ_FIRST(&m->md.pv_list) == NULL) - vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); - - pmap_unuse_pt(pmap, pv->pv_va, pv->pv_ptem); - free_pv_entry(pv); - - /* - * Restart the scan if we blocked during the unuse or free - * calls and other removals were made. - */ - if (save_generation != pmap->pm_generation) { - kprintf("Warning: pmap_remove_pages race-A avoided\n"); - npv = TAILQ_FIRST(&pmap->pm_pvlist); - } - } - lwkt_reltoken(&vm_token); - if (pmap->pm_pteobj) - vm_object_drop(pmap->pm_pteobj); -} - -/* - * pmap_testbit tests bits in active mappings of a VM page. - * - * The caller must hold vm_token - */ -static boolean_t -pmap_testbit(vm_page_t m, int bit) -{ - pv_entry_t pv; - vpte_t *pte; - - if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) - return FALSE; - - if (TAILQ_FIRST(&m->md.pv_list) == NULL) - return FALSE; - - crit_enter(); - - TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { - /* - * if the bit being tested is the modified bit, then - * mark clean_map and ptes as never - * modified. - */ - if (bit & (VPTE_A|VPTE_M)) { - if (!pmap_track_modified(pv->pv_pmap, pv->pv_va)) - continue; - } - -#if defined(PMAP_DIAGNOSTIC) - if (!pv->pv_pmap) { - kprintf("Null pmap (tb) at va: 0x%x\n", pv->pv_va); - continue; - } -#endif - pte = pmap_pte(pv->pv_pmap, pv->pv_va); - if (*pte & bit) { - crit_exit(); - return TRUE; - } - } - crit_exit(); - return (FALSE); -} - -/* - * This routine is used to clear bits in ptes. Certain bits require special - * handling, in particular (on virtual kernels) the VPTE_M (modify) bit. - * - * This routine is only called with certain VPTE_* bit combinations. - * - * The caller must hold vm_token - */ -static __inline void -pmap_clearbit(vm_page_t m, int bit) -{ - pv_entry_t pv; - vpte_t *pte; - vpte_t pbits; - - if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) - return; - - crit_enter(); - - /* - * Loop over all current mappings setting/clearing as appropos If - * setting RO do we need to clear the VAC? - */ - TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { - /* - * don't write protect pager mappings - */ - if (bit == VPTE_RW) { - if (!pmap_track_modified(pv->pv_pmap, pv->pv_va)) - continue; - } - -#if defined(PMAP_DIAGNOSTIC) - if (!pv->pv_pmap) { - kprintf("Null pmap (cb) at va: 0x%x\n", pv->pv_va); - continue; - } -#endif - - /* - * Careful here. We can use a locked bus instruction to - * clear VPTE_A or VPTE_M safely but we need to synchronize - * with the target cpus when we mess with VPTE_EW. - * - * On virtual kernels we must force a new fault-on-write - * in the real kernel if we clear the Modify bit ourselves, - * otherwise the real kernel will not get a new fault and - * will never set our Modify bit again. - */ - pte = pmap_pte(pv->pv_pmap, pv->pv_va); - if (*pte & bit) { - if (bit == VPTE_RW) { - /* - * We must also clear VPTE_M when clearing - * VPTE_RW - */ - pbits = pmap_clean_pte(pte, pv->pv_pmap, - pv->pv_va); - if (pbits & VPTE_M) - vm_page_dirty(m); - } else if (bit == VPTE_M) { - /* - * We do not have to make the page read-only - * when clearing the Modify bit. The real - * kernel will make the real PTE read-only - * or otherwise detect the write and set - * our VPTE_M again simply by us invalidating - * the real kernel VA for the pmap (as we did - * above). This allows the real kernel to - * handle the write fault without forwarding - * the fault to us. - */ - atomic_clear_long(pte, VPTE_M); - } else if ((bit & (VPTE_RW|VPTE_M)) == (VPTE_RW|VPTE_M)) { - /* - * We've been asked to clear W & M, I guess - * the caller doesn't want us to update - * the dirty status of the VM page. - */ - pmap_clean_pte(pte, pv->pv_pmap, pv->pv_va); - } else { - /* - * We've been asked to clear bits that do - * not interact with hardware. - */ - atomic_clear_long(pte, bit); - } - } - } - crit_exit(); -} - -/* - * Lower the permission for all mappings to a given page. - * - * No requirements. - */ -void -pmap_page_protect(vm_page_t m, vm_prot_t prot) -{ - if ((prot & VM_PROT_WRITE) == 0) { - lwkt_gettoken(&vm_token); - if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) { - pmap_clearbit(m, VPTE_RW); - vm_page_flag_clear(m, PG_WRITEABLE); - } else { - pmap_remove_all(m); - } - lwkt_reltoken(&vm_token); - } -} - -vm_paddr_t -pmap_phys_address(vm_pindex_t ppn) -{ - return (i386_ptob(ppn)); -} - -/* - * Return a count of reference bits for a page, clearing those bits. - * It is not necessary for every reference bit to be cleared, but it - * is necessary that 0 only be returned when there are truly no - * reference bits set. - * - * XXX: The exact number of bits to check and clear is a matter that - * should be tested and standardized at some point in the future for - * optimal aging of shared pages. - * - * No requirements. - */ -int -pmap_ts_referenced(vm_page_t m) -{ - pv_entry_t pv, pvf, pvn; - vpte_t *pte; - int rtval = 0; - - if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) - return (rtval); - - crit_enter(); - lwkt_gettoken(&vm_token); - - if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { - - pvf = pv; - - do { - pvn = TAILQ_NEXT(pv, pv_list); - - TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); - - TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); - - if (!pmap_track_modified(pv->pv_pmap, pv->pv_va)) - continue; - - pte = pmap_pte(pv->pv_pmap, pv->pv_va); - - if (pte && (*pte & VPTE_A)) { - atomic_clear_long(pte, VPTE_A); - rtval++; - if (rtval > 4) { - break; - } - } - } while ((pv = pvn) != NULL && pv != pvf); - } - lwkt_reltoken(&vm_token); - crit_exit(); - - return (rtval); -} - -/* - * Return whether or not the specified physical page was modified - * in any physical maps. - * - * No requirements. - */ -boolean_t -pmap_is_modified(vm_page_t m) -{ - boolean_t res; - - lwkt_gettoken(&vm_token); - res = pmap_testbit(m, VPTE_M); - lwkt_reltoken(&vm_token); - return (res); -} - -/* - * Clear the modify bits on the specified physical page. - * - * No requirements. - */ -void -pmap_clear_modify(vm_page_t m) -{ - lwkt_gettoken(&vm_token); - pmap_clearbit(m, VPTE_M); - lwkt_reltoken(&vm_token); -} - -/* - * Clear the reference bit on the specified physical page. - * - * No requirements. - */ -void -pmap_clear_reference(vm_page_t m) -{ - lwkt_gettoken(&vm_token); - pmap_clearbit(m, VPTE_A); - lwkt_reltoken(&vm_token); -} - -/* - * Miscellaneous support routines follow - */ - -static void -i386_protection_init(void) -{ - int *kp, prot; - - kp = protection_codes; - for (prot = 0; prot < 8; prot++) { - if (prot & VM_PROT_READ) - *kp |= 0; - if (prot & VM_PROT_WRITE) - *kp |= VPTE_RW; - if (prot & VM_PROT_EXECUTE) - *kp |= 0; - ++kp; - } -} - -#if 0 - -/* - * Map a set of physical memory pages into the kernel virtual - * address space. Return a pointer to where it is mapped. This - * routine is intended to be used for mapping device memory, - * NOT real memory. - * - * NOTE: we can't use pgeflag unless we invalidate the pages one at - * a time. - */ -void * -pmap_mapdev(vm_paddr_t pa, vm_size_t size) -{ - vm_offset_t va, tmpva, offset; - vpte_t *pte; - - offset = pa & PAGE_MASK; - size = roundup(offset + size, PAGE_SIZE); - - va = kmem_alloc_nofault(&kernel_map, size, PAGE_SIZE); - if (!va) - panic("pmap_mapdev: Couldn't alloc kernel virtual memory"); - - pa = pa & VPTE_FRAME; - for (tmpva = va; size > 0;) { - pte = KernelPTA + (tmpva >> PAGE_SHIFT); - *pte = pa | VPTE_RW | VPTE_V; /* | pgeflag; */ - size -= PAGE_SIZE; - tmpva += PAGE_SIZE; - pa += PAGE_SIZE; - } - cpu_invltlb(); - smp_invltlb(); - - return ((void *)(va + offset)); -} - -void -pmap_unmapdev(vm_offset_t va, vm_size_t size) -{ - vm_offset_t base, offset; - - base = va & VPTE_FRAME; - offset = va & PAGE_MASK; - size = roundup(offset + size, PAGE_SIZE); - pmap_qremove(va, size >> PAGE_SHIFT); - kmem_free(&kernel_map, base, size); -} - -#endif - -/* - * Sets the memory attribute for the specified page. - */ -void -pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) -{ - /* This is a vkernel, do nothing */ -} - -/* - * Change the PAT attribute on an existing kernel memory map. Caller - * must ensure that the virtual memory in question is not accessed - * during the adjustment. - */ -void -pmap_change_attr(vm_offset_t va, vm_size_t count, int mode) -{ - /* This is a vkernel, do nothing */ -} - -/* - * Perform the pmap work for mincore - * - * No requirements. - */ -int -pmap_mincore(pmap_t pmap, vm_offset_t addr) -{ - vpte_t *ptep, pte; - vm_page_t m; - int val = 0; - - lwkt_gettoken(&vm_token); - - ptep = pmap_pte(pmap, addr); - if (ptep == NULL) { - lwkt_reltoken(&vm_token); - return 0; - } - - if ((pte = *ptep) != 0) { - vm_paddr_t pa; - - val = MINCORE_INCORE; - if ((pte & VPTE_MANAGED) == 0) - goto done; - - pa = pte & VPTE_FRAME; - - m = PHYS_TO_VM_PAGE(pa); - - /* - * Modified by us - */ - if (pte & VPTE_M) - val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER; - /* - * Modified by someone - */ - else if (m->dirty || pmap_is_modified(m)) - val |= MINCORE_MODIFIED_OTHER; - /* - * Referenced by us - */ - if (pte & VPTE_A) - val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER; - - /* - * Referenced by someone - */ - else if ((m->flags & PG_REFERENCED) || pmap_ts_referenced(m)) { - val |= MINCORE_REFERENCED_OTHER; - vm_page_flag_set(m, PG_REFERENCED); - } - } -done: - lwkt_reltoken(&vm_token); - return val; -} - -/* - * Caller must hold vmspace->vm_map.token for oldvm and newvm - */ -void -pmap_replacevm(struct proc *p, struct vmspace *newvm, int adjrefs) -{ - struct vmspace *oldvm; - struct lwp *lp; - - oldvm = p->p_vmspace; - crit_enter(); - if (oldvm != newvm) { - p->p_vmspace = newvm; - KKASSERT(p->p_nthreads == 1); - lp = RB_ROOT(&p->p_lwp_tree); - pmap_setlwpvm(lp, newvm); - if (adjrefs) { - vmspace_ref(newvm); - vmspace_rel(oldvm); - } - } - crit_exit(); -} - -void -pmap_setlwpvm(struct lwp *lp, struct vmspace *newvm) -{ - struct vmspace *oldvm; - struct pmap *pmap; - struct mdglobaldata *gd = mdcpu; - - crit_enter(); - oldvm = lp->lwp_vmspace; - - if (oldvm != newvm) { - lp->lwp_vmspace = newvm; - if (curthread->td_lwp == lp) { - pmap = vmspace_pmap(newvm); - ATOMIC_CPUMASK_ORMASK(pmap->pm_active, - gd->mi.gd_cpumask); -#if defined(SWTCH_OPTIM_STATS) - tlb_flush_count++; -#endif - pmap = vmspace_pmap(oldvm); - ATOMIC_CPUMASK_NANDMASK(pmap->pm_active, - gd->mi.gd_cpumask); - } - } - crit_exit(); -} - - -vm_offset_t -pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size) -{ - - if ((obj == NULL) || (size < NBPDR) || - ((obj->type != OBJT_DEVICE) && (obj->type != OBJT_MGTDEVICE))) { - return addr; - } - - addr = (addr + (NBPDR - 1)) & ~(NBPDR - 1); - return addr; -} - -/* - * Used by kmalloc/kfree, page already exists at va - */ -vm_page_t -pmap_kvtom(vm_offset_t va) -{ - vpte_t *ptep; - - KKASSERT(va >= KvaStart && va < KvaEnd); - ptep = KernelPTA + (va >> PAGE_SHIFT); - return(PHYS_TO_VM_PAGE(*ptep & PG_FRAME)); -} - -void -pmap_object_init(vm_object_t object) -{ - /* empty */ -} - -void -pmap_object_free(vm_object_t object) -{ - /* empty */ -} diff --git a/sys/platform/vkernel/platform/pmap_inval.c b/sys/platform/vkernel/platform/pmap_inval.c deleted file mode 100644 index 26b8df8b93..0000000000 --- a/sys/platform/vkernel/platform/pmap_inval.c +++ /dev/null @@ -1,224 +0,0 @@ -/* - * Copyright (c) 2003,2004 The DragonFly Project. All rights reserved. - * - * This code is derived from software contributed to The DragonFly Project - * by Matthew Dillon - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * 3. Neither the name of The DragonFly Project nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific, prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $DragonFly: src/sys/platform/vkernel/platform/pmap_inval.c,v 1.4 2007/07/02 02:22:58 dillon Exp $ - */ - -/* - * pmap invalidation support code. Certain hardware requirements must - * be dealt with when manipulating page table entries and page directory - * entries within a pmap. In particular, we cannot safely manipulate - * page tables which are in active use by another cpu (even if it is - * running in userland) for two reasons: First, TLB writebacks will - * race against our own modifications and tests. Second, even if we - * were to use bus-locked instruction we can still screw up the - * target cpu's instruction pipeline due to Intel cpu errata. - * - * For our virtual page tables, the real kernel will handle SMP interactions - * with pmaps that may be active on other cpus. Even so, we have to be - * careful about bit setting races particularly when we are trying to clean - * a page and test the modified bit to avoid races where the modified bit - * might get set after our poll but before we clear the field. - */ -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -static __inline -void -pmap_inval_cpu(struct pmap *pmap, vm_offset_t va, size_t bytes) -{ - if (pmap == &kernel_pmap) { - madvise((void *)va, bytes, MADV_INVAL); - } else { - vmspace_mcontrol(pmap, (void *)va, bytes, MADV_INVAL, 0); - } -} - -/* - * Invalidate a pte in a pmap and synchronize with target cpus - * as required. Throw away the modified and access bits. Use - * pmap_clean_pte() to do the same thing but also get an interlocked - * modified/access status. - * - * Clearing the field first (basically clearing VPTE_V) prevents any - * new races from occuring while we invalidate the TLB (i.e. the pmap - * on the real cpu), then clear it again to clean out any race that - * might have occured before the invalidation completed. - */ -void -pmap_inval_pte(volatile vpte_t *ptep, struct pmap *pmap, vm_offset_t va) -{ - *ptep = 0; - pmap_inval_cpu(pmap, va, PAGE_SIZE); - *ptep = 0; -} - -/* - * Same as pmap_inval_pte() but only synchronize with the current - * cpu. For the moment its the same as the non-quick version. - */ -void -pmap_inval_pte_quick(volatile vpte_t *ptep, struct pmap *pmap, vm_offset_t va) -{ - *ptep = 0; - pmap_inval_cpu(pmap, va, PAGE_SIZE); - *ptep = 0; -} - -/* - * Invalidating page directory entries requires some additional - * sophistication. The cachemask must be cleared so the kernel - * resynchronizes its temporary page table mappings cache. - */ -void -pmap_inval_pde(volatile vpte_t *ptep, struct pmap *pmap, vm_offset_t va) -{ - *ptep = 0; - pmap_inval_cpu(pmap, va, SEG_SIZE); - *ptep = 0; - pmap->pm_cpucachemask = 0; -} - -void -pmap_inval_pde_quick(volatile vpte_t *ptep, struct pmap *pmap, vm_offset_t va) -{ - pmap_inval_pde(ptep, pmap, va); -} - -/* - * These carefully handle interactions with other cpus and return - * the original vpte. Clearing VPTE_RW prevents us from racing the - * setting of VPTE_M, allowing us to invalidate the tlb (the real cpu's - * pmap) and get good status for VPTE_M. - * - * When messing with page directory entries we have to clear the cpu - * mask to force a reload of the kernel's page table mapping cache. - * - * clean: clear VPTE_M and VPTE_RW - * setro: clear VPTE_RW - * load&clear: clear entire field - */ -vpte_t -pmap_clean_pte(volatile vpte_t *ptep, struct pmap *pmap, vm_offset_t va) -{ - vpte_t pte; - - pte = *ptep; - if (pte & VPTE_V) { - atomic_clear_long(ptep, VPTE_RW); - pmap_inval_cpu(pmap, va, PAGE_SIZE); - pte = *ptep; - atomic_clear_long(ptep, VPTE_RW|VPTE_M); - } - return(pte); -} - -vpte_t -pmap_clean_pde(volatile vpte_t *ptep, struct pmap *pmap, vm_offset_t va) -{ - vpte_t pte; - - pte = *ptep; - if (pte & VPTE_V) { - atomic_clear_long(ptep, VPTE_RW); - pmap_inval_cpu(pmap, va, SEG_SIZE); - pte = *ptep; - atomic_clear_long(ptep, VPTE_RW|VPTE_M); - pmap->pm_cpucachemask = 0; - } - return(pte); -} - -/* - * This is an odd case and I'm not sure whether it even occurs in normal - * operation. Turn off write access to the page, clean out the tlb - * (the real cpu's pmap), and deal with any VPTE_M race that may have - * occured. VPTE_M is not cleared. - */ -vpte_t -pmap_setro_pte(volatile vpte_t *ptep, struct pmap *pmap, vm_offset_t va) -{ - vpte_t pte; - - pte = *ptep; - if (pte & VPTE_V) { - pte = *ptep; - atomic_clear_long(ptep, VPTE_RW); - pmap_inval_cpu(pmap, va, PAGE_SIZE); - pte |= *ptep & VPTE_M; - } - return(pte); -} - -/* - * This is a combination of pmap_inval_pte() and pmap_clean_pte(). - * Firts prevent races with the 'A' and 'M' bits, then clean out - * the tlb (the real cpu's pmap), then incorporate any races that - * may have occured in the mean time, and finally zero out the pte. - */ -vpte_t -pmap_inval_loadandclear(volatile vpte_t *ptep, struct pmap *pmap, - vm_offset_t va) -{ - vpte_t pte; - - pte = *ptep; - if (pte & VPTE_V) { - pte = *ptep; - atomic_clear_long(ptep, VPTE_RW); - pmap_inval_cpu(pmap, va, PAGE_SIZE); - pte |= *ptep & (VPTE_A | VPTE_M); - } - *ptep = 0; - return(pte); -} - diff --git a/sys/platform/vkernel/platform/shutdown.c b/sys/platform/vkernel/platform/shutdown.c deleted file mode 100644 index d1d2fabb88..0000000000 --- a/sys/platform/vkernel/platform/shutdown.c +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Copyright (c) 2007 The DragonFly Project. All rights reserved. - * - * This code is derived from software contributed to The DragonFly Project - * by Matthew Dillon - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * 3. Neither the name of The DragonFly Project nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific, prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ -/* - * Install a signal handler for SIGTERM which shuts down the virtual kernel - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include - -#include -#include -#include - -static void shutdownsig(int signo); -static void shutdown_intr(void *arg __unused, void *frame __unused); - -static -void -initshutdown(void *arg __unused) -{ - struct sigaction sa; - - bzero(&sa, sizeof(sa)); - sigemptyset(&sa.sa_mask); - sa.sa_flags |= SA_NODEFER; - sa.sa_handler = shutdownsig; - sigaction(SIGTERM, &sa, NULL); - - register_int_virtual(2, shutdown_intr, NULL, "shutdown", NULL, - INTR_MPSAFE); -} - -static -void -shutdownsig(int signo) -{ - signalintr(2); -} - -SYSINIT(initshutdown, SI_BOOT2_PROC0, SI_ORDER_ANY, - initshutdown, NULL); - -/* - * DragonFly-safe interrupt thread. We are the only handler on interrupt - * #2 so we can just steal the thread's context forever. - */ -static -void -shutdown_intr(void *arg __unused, void *frame __unused) -{ - kprintf("Caught SIGTERM from host system. Shutting down...\n"); - if (initproc != NULL) { - ksignal(initproc, SIGUSR2); - } else { - reboot(RB_POWEROFF); - } -} - diff --git a/sys/platform/vkernel/platform/sysarch.c b/sys/platform/vkernel/platform/sysarch.c deleted file mode 100644 index 0ddcaf9d54..0000000000 --- a/sys/platform/vkernel/platform/sysarch.c +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright (c) 2006 The DragonFly Project. All rights reserved. - * - * This code is derived from software contributed to The DragonFly Project - * by Matthew Dillon - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * 3. Neither the name of The DragonFly Project nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific, prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $DragonFly: src/sys/platform/vkernel/platform/sysarch.c,v 1.1 2007/01/05 22:18:20 dillon Exp $ - */ -#include -#include -#include -#include -#include -#include - -int -sys_sysarch(struct sysarch_args *uap) -{ - return (EOPNOTSUPP); -} - -int -cpu_set_iopl(void) -{ - return (EOPNOTSUPP); -} - -int -cpu_clr_iopl(void) -{ - return (EOPNOTSUPP); -} - diff --git a/sys/platform/vkernel/platform/systimer.c b/sys/platform/vkernel/platform/systimer.c deleted file mode 100644 index 963522355b..0000000000 --- a/sys/platform/vkernel/platform/systimer.c +++ /dev/null @@ -1,274 +0,0 @@ -/* - * Copyright (c) 2006 The DragonFly Project. All rights reserved. - * - * This code is derived from software contributed to The DragonFly Project - * by Matthew Dillon - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * 3. Neither the name of The DragonFly Project nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific, prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include - -#define VKTIMER_FREQ 1000000 /* 1us granularity */ - -static void vktimer_intr(void *dummy, struct intrframe *frame); - -int disable_rtc_set; -SYSCTL_INT(_machdep, CPU_DISRTCSET, disable_rtc_set, - CTLFLAG_RW, &disable_rtc_set, 0, ""); -SYSCTL_INT(_hw, OID_AUTO, tsc_present, CTLFLAG_RD, - &tsc_present, 0, "TSC Available"); -SYSCTL_INT(_hw, OID_AUTO, tsc_invariant, CTLFLAG_RD, - &tsc_invariant, 0, "Invariant TSC"); -SYSCTL_INT(_hw, OID_AUTO, tsc_mpsync, CTLFLAG_RD, - &tsc_mpsync, 0, "TSC is synchronized across CPUs"); -SYSCTL_QUAD(_hw, OID_AUTO, tsc_frequency, CTLFLAG_RD, - &tsc_frequency, 0, "TSC Frequency"); - -int adjkerntz; -int wall_cmos_clock = 0; -SYSCTL_INT(_machdep, CPU_WALLCLOCK, wall_cmos_clock, - CTLFLAG_RD, &wall_cmos_clock, 0, ""); - -static struct kqueue_info *kqueue_timer_info; - -static int cputimer_mib[16]; -static int cputimer_miblen; - -/* - * SYSTIMER IMPLEMENTATION - */ -static sysclock_t vkernel_timer_get_timecount(void); -static void vkernel_timer_construct(struct cputimer *timer, sysclock_t oclock); - -static struct cputimer vkernel_cputimer = { - SLIST_ENTRY_INITIALIZER, - "VKERNEL", - CPUTIMER_PRI_VKERNEL, - CPUTIMER_VKERNEL, - vkernel_timer_get_timecount, - cputimer_default_fromhz, - cputimer_default_fromus, - vkernel_timer_construct, - cputimer_default_destruct, - VKTIMER_FREQ, - 0, 0, 0 -}; - -static void vktimer_intr_reload(struct cputimer_intr *, sysclock_t); -static void vktimer_intr_initclock(struct cputimer_intr *, boolean_t); - -static struct cputimer_intr vkernel_cputimer_intr = { - .freq = VKTIMER_FREQ, - .reload = vktimer_intr_reload, - .enable = cputimer_intr_default_enable, - .config = cputimer_intr_default_config, - .restart = cputimer_intr_default_restart, - .pmfixup = cputimer_intr_default_pmfixup, - .initclock = vktimer_intr_initclock, - .next = SLIST_ENTRY_INITIALIZER, - .name = "vkernel", - .type = CPUTIMER_INTR_VKERNEL, - .prio = CPUTIMER_INTR_PRIO_VKERNEL, - .caps = CPUTIMER_INTR_CAP_NONE -}; - -/* - * Initialize the systimer subsystem, called from MI code in early boot. - */ -static void -cpu_initclocks(void *arg __unused) -{ - int len; - - kprintf("initclocks\n"); - len = sizeof(vkernel_cputimer.freq); - if (sysctlbyname("kern.cputimer.freq", &vkernel_cputimer.freq, &len, - NULL, 0) < 0) { - panic("cpu_initclocks: can't get kern.cputimer.freq!"); - } - len = NELEM(cputimer_mib); - if (sysctlnametomib("kern.cputimer.clock", cputimer_mib, &len) < 0) - panic("cpu_initclocks: can't get kern.cputimer.clock!"); - cputimer_miblen = len; - - cputimer_intr_register(&vkernel_cputimer_intr); - cputimer_intr_select(&vkernel_cputimer_intr, 0); - - cputimer_register(&vkernel_cputimer); - cputimer_select(&vkernel_cputimer, 0); -} -SYSINIT(clocksvk, SI_BOOT2_CLOCKREG, SI_ORDER_FIRST, cpu_initclocks, NULL); - -/* - * Constructor to initialize timer->base and get an initial count. - */ -static void -vkernel_timer_construct(struct cputimer *timer, sysclock_t oclock) -{ - timer->base = 0; - timer->base = oclock - vkernel_timer_get_timecount(); -} - -/* - * Get the current counter, with 2's complement rollover. - * - * NOTE! MPSAFE, possibly no critical section - */ -static sysclock_t -vkernel_timer_get_timecount(void) -{ - sysclock_t counter; - size_t len; - - len = sizeof(counter); - if (sysctl(cputimer_mib, cputimer_miblen, &counter, &len, - NULL, 0) < 0) { - panic("vkernel_timer_get_timecount: sysctl failed!"); - } - return(counter); -} - -/* - * Initialize the interrupt for our core systimer. Use the kqueue timer - * support functions. - */ -static void -vktimer_intr_initclock(struct cputimer_intr *cti __unused, - boolean_t selected __unused) -{ - KKASSERT(kqueue_timer_info == NULL); - kqueue_timer_info = kqueue_add_timer(vktimer_intr, NULL); -} - -/* - * Reload the interrupt for our core systimer. Because the caller's - * reload calculation can be negatively indexed, we need a minimal - * check to ensure that a reasonable reload value is selected. - */ -static void -vktimer_intr_reload(struct cputimer_intr *cti __unused, sysclock_t reload) -{ - if (kqueue_timer_info) { - if ((int)reload < 1) - reload = 1; - kqueue_reload_timer(kqueue_timer_info, (reload + 999) / 1000); - } -} - -/* - * clock interrupt. - * - * NOTE: frame is a struct intrframe pointer. - */ -static void -vktimer_intr(void *dummy, struct intrframe *frame) -{ - static sysclock_t sysclock_count; - struct globaldata *gd = mycpu; - struct globaldata *gscan; - int n; - - sysclock_count = sys_cputimer->count(); - for (n = 0; n < ncpus; ++n) { - gscan = globaldata_find(n); - if (TAILQ_FIRST(&gscan->gd_systimerq) == NULL) - continue; - if (gscan != gd) { - lwkt_send_ipiq3(gscan, (ipifunc3_t)systimer_intr, - &sysclock_count, 0); - } else { - systimer_intr(&sysclock_count, 0, frame); - } - } -} - -/* - * Initialize the time of day register, based on the time base which is, e.g. - * from a filesystem. - */ -void -inittodr(time_t base) -{ - struct timespec ts; - struct timeval tv; - - gettimeofday(&tv, NULL); - ts.tv_sec = tv.tv_sec; - ts.tv_nsec = tv.tv_usec * 1000; - set_timeofday(&ts); -} - -/* - * Write system time back to the RTC - */ -void -resettodr(void) -{ -} - -/* - * We need to enter a critical section to prevent signals from recursing - * into pthreads. - */ -void -DELAY(int usec) -{ - crit_enter(); - usleep(usec); - crit_exit(); -} - -void -DRIVERSLEEP(int usec) -{ - if (mycpu->gd_intr_nesting_level) - DELAY(usec); - else if (1000000 / usec >= hz) - tsleep(DRIVERSLEEP, 0, "DELAY", 1000000 / usec / hz + 1); - else - DELAY(usec); -} - -- 2.41.0