From 4e7c41c549d873c60943035447ac04c4d30ad100 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Mon, 8 Jan 2007 03:33:43 +0000 Subject: [PATCH] Modify the trapframe sigcontext, ucontext, etc. Add %gs to the trapframe and xflags and an expanded floating point save area to sigcontext/ucontext so traps can be fully specified. Remove all the %gs hacks in the system code and signal trampoline and handle %gs faults natively, like we do %fs faults. Implement writebacks to the virtual page table to set VPTE_M and VPTE_A and add checks for VPTE_R and VPTE_W. Consolidate the TLS save area into a MD structure that can be accessed by MI code. Reformulate the vmspace_ctl() system call to allow an extended context to be passed (for TLS info and soon the FP and eventually the LDT). Adjust the GDB patches to recognize the new location of %gs. Properly detect non-exception returns to the virtual kernel when the virtual kernel is running an emulated user process and receives a signal. And misc other work on the virtual kernel. --- .../gdb/libgdb/patches/i386fbsd-nat.c.patch | 16 +- lib/libc/sys/mmap.2 | 7 +- sys/cpu/i386/include/frame.h | 8 +- sys/cpu/i386/include/npx.h | 14 +- sys/cpu/i386/include/reg.h | 43 ++--- sys/cpu/i386/include/segments.h | 10 +- sys/cpu/i386/include/signal.h | 12 +- sys/cpu/i386/include/ucontext.h | 7 +- sys/cpu/i386/include/vm86.h | 4 +- sys/emulation/linux/i386/linux_sysvec.c | 14 +- sys/kern/init_sysent.c | 4 +- sys/kern/syscalls.c | 4 +- sys/kern/syscalls.master | 7 +- sys/kern/tty_cons.c | 5 +- sys/platform/pc32/apic/apic_vector.s | 17 +- sys/platform/pc32/i386/db_interface.c | 3 +- sys/platform/pc32/i386/db_trace.c | 4 +- sys/platform/pc32/i386/exception.s | 36 +++-- sys/platform/pc32/i386/genassym.c | 5 +- sys/platform/pc32/i386/locore.s | 3 +- sys/platform/pc32/i386/machdep.c | 77 ++++++--- sys/platform/pc32/i386/support.s | 3 +- sys/platform/pc32/i386/swtch.s | 14 +- sys/platform/pc32/i386/sys_machdep.c | 8 +- sys/platform/pc32/i386/tls.c | 6 +- sys/platform/pc32/i386/trap.c | 152 ++---------------- sys/platform/pc32/i386/vm86.c | 4 +- sys/platform/pc32/i386/vm_machdep.c | 9 +- sys/platform/pc32/icu/icu_vector.s | 15 +- sys/platform/pc32/include/md_var.h | 4 +- sys/platform/pc32/include/pcb.h | 4 +- sys/platform/pc32/include/pcb_ext.h | 3 +- sys/platform/pc32/include/thread.h | 6 +- sys/platform/pc32/isa/ipl.s | 26 ++- sys/platform/pc32/isa/npx.c | 9 +- sys/platform/vkernel/i386/cpu_regs.c | 111 +++++++------ sys/platform/vkernel/i386/db_interface.c | 3 +- sys/platform/vkernel/i386/db_trace.c | 4 +- sys/platform/vkernel/i386/exception.c | 15 +- sys/platform/vkernel/i386/genassym.c | 5 +- sys/platform/vkernel/i386/locore.s | 3 +- sys/platform/vkernel/i386/npx.c | 9 +- sys/platform/vkernel/i386/swtch.s | 20 +-- sys/platform/vkernel/i386/tls.c | 11 +- sys/platform/vkernel/i386/trap.c | 83 ++++++---- sys/platform/vkernel/i386/vm_machdep.c | 9 +- sys/platform/vkernel/include/md_var.h | 3 +- sys/platform/vkernel/include/pcb.h | 4 +- sys/platform/vkernel/include/pcb_ext.h | 3 +- sys/platform/vkernel/include/thread.h | 18 ++- sys/platform/vkernel/platform/console.c | 6 +- sys/platform/vkernel/platform/init.c | 12 +- sys/platform/vkernel/platform/machintr.c | 13 +- sys/platform/vkernel/platform/pmap.c | 32 ++-- sys/platform/vkernel/platform/pmap_inval.c | 31 ++-- sys/sys/syscall-hide.h | 4 +- sys/sys/syscall.h | 4 +- sys/sys/syscall.mk | 4 +- sys/sys/sysproto.h | 9 +- sys/sys/systm.h | 5 +- sys/sys/sysunion.h | 4 +- sys/sys/vkernel.h | 11 +- sys/sys/vmspace.h | 8 +- sys/vm/vm_fault.c | 94 +++++++++-- sys/vm/vm_vmspace.c | 55 +++++-- 65 files changed, 637 insertions(+), 519 deletions(-) diff --git a/gnu/usr.bin/gdb/libgdb/patches/i386fbsd-nat.c.patch b/gnu/usr.bin/gdb/libgdb/patches/i386fbsd-nat.c.patch index 171256acdb..11c8a4af4f 100644 --- a/gnu/usr.bin/gdb/libgdb/patches/i386fbsd-nat.c.patch +++ b/gnu/usr.bin/gdb/libgdb/patches/i386fbsd-nat.c.patch @@ -1,13 +1,21 @@ -$DragonFly: src/gnu/usr.bin/gdb/libgdb/patches/Attic/i386fbsd-nat.c.patch,v 1.1 2006/03/07 15:48:11 corecode Exp $ +$DragonFly: src/gnu/usr.bin/gdb/libgdb/patches/Attic/i386fbsd-nat.c.patch,v 1.2 2007/01/08 03:33:32 dillon Exp $ Index: i386fbsd-nat.c =================================================================== -RCS file: /home/joerg/wd/repository/dragonflybsd/src/contrib/gdb-6.2.1/gdb/i386fbsd-nat.c,v +RCS file: /cvs/src/contrib/gdb-6.2.1/gdb/i386fbsd-nat.c,v retrieving revision 1.1 diff -u -r1.1 i386fbsd-nat.c --- i386fbsd-nat.c 24 Oct 2004 19:56:25 -0000 1.1 -+++ i386fbsd-nat.c 12 Jan 2005 11:20:57 -0000 -@@ -141,7 +141,4 @@ ++++ i386fbsd-nat.c 8 Jan 2007 01:11:49 -0000 +@@ -107,7 +107,6 @@ + regcache_raw_supply (regcache, I386_ESP_REGNUM, &pcb->pcb_esp); + regcache_raw_supply (regcache, I386_EBX_REGNUM, &pcb->pcb_ebx); + regcache_raw_supply (regcache, I386_EIP_REGNUM, &pcb->pcb_eip); +- regcache_raw_supply (regcache, I386_GS_REGNUM, &pcb->pcb_gs); + + return 1; + } +@@ -141,7 +140,4 @@ } } #endif diff --git a/lib/libc/sys/mmap.2 b/lib/libc/sys/mmap.2 index 6ac257d290..4e4bcc7367 100644 --- a/lib/libc/sys/mmap.2 +++ b/lib/libc/sys/mmap.2 @@ -31,7 +31,7 @@ .\" .\" @(#)mmap.2 8.4 (Berkeley) 5/11/95 .\" $FreeBSD: src/lib/libc/sys/mmap.2,v 1.22.2.12 2002/02/27 03:40:13 dd Exp $ -.\" $DragonFly: src/lib/libc/sys/mmap.2,v 1.6 2006/12/12 00:08:17 swildner Exp $ +.\" $DragonFly: src/lib/libc/sys/mmap.2,v 1.7 2007/01/08 03:33:34 dillon Exp $ .\" .Dd December 11, 2006 .Dt MMAP 2 @@ -241,7 +241,10 @@ with Virtual page tables work with anonymous memory but there is no way to populate the page table so for all intents and purposes .Dv MAP_VPAGETABLE -can only be used when mapping file descriptors. +can only be used when mapping file descriptors. Since the kernel will +update the VPTE_M bit in the virtual page table, the mapping must R+W +even though actual access to the memory will be properly governed by +the virtual page table. .Pp Addressable backing store is limited by the range suppored in the virtual page table entries. The kernel may implement a page table abstraction capable diff --git a/sys/cpu/i386/include/frame.h b/sys/cpu/i386/include/frame.h index be4725b5b5..5ca7896af0 100644 --- a/sys/cpu/i386/include/frame.h +++ b/sys/cpu/i386/include/frame.h @@ -35,7 +35,7 @@ * * from: @(#)frame.h 5.2 (Berkeley) 1/18/91 * $FreeBSD: src/sys/i386/include/frame.h,v 1.20 1999/09/29 15:06:22 marcel Exp $ - * $DragonFly: src/sys/cpu/i386/include/frame.h,v 1.6 2006/11/07 06:43:22 dillon Exp $ + * $DragonFly: src/sys/cpu/i386/include/frame.h,v 1.7 2007/01/08 03:33:37 dillon Exp $ */ #ifndef _CPU_FRAME_H_ @@ -50,6 +50,7 @@ */ struct trapframe { + int tf_gs; int tf_fs; int tf_es; int tf_ds; @@ -61,6 +62,7 @@ struct trapframe { int tf_edx; int tf_ecx; int tf_eax; + int tf_xflags; int tf_trapno; /* below portion defined in 386 hardware */ int tf_err; @@ -75,6 +77,7 @@ struct trapframe { /* Superset of trap frame, for traps from virtual-8086 mode */ struct trapframe_vm86 { + int tf_gs; int tf_fs; int tf_es; int tf_ds; @@ -86,6 +89,7 @@ struct trapframe_vm86 { int tf_edx; int tf_ecx; int tf_eax; + int tf_xflags; int tf_trapno; /* below portion defined in 386 hardware */ int tf_err; @@ -107,6 +111,7 @@ struct trapframe_vm86 { struct intrframe { int if_vec; int if_ppl; + int if_gs; int if_fs; int if_es; int if_ds; @@ -119,6 +124,7 @@ struct intrframe { int if_ecx; int if_eax; int :32; /* for compat with trap frame - trapno */ + int :32; /* for compat with trap frame - xflags */ int :32; /* for compat with trap frame - err */ /* below portion defined in 386 hardware */ int if_eip; diff --git a/sys/cpu/i386/include/npx.h b/sys/cpu/i386/include/npx.h index 962cb07de8..c7adee5bca 100644 --- a/sys/cpu/i386/include/npx.h +++ b/sys/cpu/i386/include/npx.h @@ -35,7 +35,7 @@ * * from: @(#)npx.h 5.3 (Berkeley) 1/18/91 * $FreeBSD: src/sys/i386/include/npx.h,v 1.18.2.1 2001/08/15 01:23:52 peter Exp $ - * $DragonFly: src/sys/cpu/i386/include/npx.h,v 1.10 2006/11/07 06:43:22 dillon Exp $ + * $DragonFly: src/sys/cpu/i386/include/npx.h,v 1.11 2007/01/08 03:33:37 dillon Exp $ */ /* @@ -110,13 +110,16 @@ struct xmmacc { u_char xmm_bytes[16]; }; +/* + * savexmm is a 512-byte structure + */ struct savexmm { - struct envxmm sv_env; + struct envxmm sv_env; /* 32 */ struct { - struct fpacc87 fp_acc; - u_char fp_pad[6]; /* padding */ + struct fpacc87 fp_acc; /* 10 */ + u_char fp_pad[6]; /* 6 (padding) */ } sv_fp[8]; - struct xmmacc sv_xmm[8]; + struct xmmacc sv_xmm[8]; /* 128 */ u_long sv_ex_sw; /* status word for last exception */ u_char sv_pad[220]; } __attribute__((aligned(16))); @@ -153,6 +156,7 @@ int npxdna (void); void npxexit (void); void npxinit (u_short control); void npxsave (union savefpu *addr); +void npxsync (void); #endif #endif /* !_CPU_NPX_H_ */ diff --git a/sys/cpu/i386/include/reg.h b/sys/cpu/i386/include/reg.h index 3bad279315..fa5ac11c4b 100644 --- a/sys/cpu/i386/include/reg.h +++ b/sys/cpu/i386/include/reg.h @@ -35,7 +35,7 @@ * * from: @(#)reg.h 5.5 (Berkeley) 1/18/91 * $FreeBSD: src/sys/i386/include/reg.h,v 1.22.2.2 2002/11/07 22:47:55 alfred Exp $ - * $DragonFly: src/sys/cpu/i386/include/reg.h,v 1.7 2006/11/07 06:43:22 dillon Exp $ + * $DragonFly: src/sys/cpu/i386/include/reg.h,v 1.8 2007/01/08 03:33:37 dillon Exp $ */ #ifndef _CPU_REG_H_ @@ -52,23 +52,26 @@ * stopped accessing the registers in the trap frame via PT_{READ,WRITE}_U * and we can stop supporting the user area soon. */ -#define tFS (0) -#define tES (1) -#define tDS (2) -#define tEDI (3) -#define tESI (4) -#define tEBP (5) -#define tISP (6) -#define tEBX (7) -#define tEDX (8) -#define tECX (9) -#define tEAX (10) -#define tERR (12) -#define tEIP (13) -#define tCS (14) -#define tEFLAGS (15) -#define tESP (16) -#define tSS (17) +#define tGS (0) +#define tFS (1) +#define tES (2) +#define tDS (3) +#define tEDI (4) +#define tESI (5) +#define tEBP (6) +#define tISP (7) +#define tEBX (8) +#define tEDX (9) +#define tECX (10) +#define tEAX (11) +#define tXFLAGS (12) +#define tTRAPNO (13) +#define tERR (14) +#define tEIP (15) +#define tCS (16) +#define tEFLAGS (17) +#define tESP (18) +#define tSS (19) /* * Indices for registers in `struct regs' only. @@ -77,12 +80,12 @@ * other registers in application interfaces that copy all the registers * to or from a `struct regs'. */ -#define tGS (18) /* * Register set accessible via /proc/$pid/regs and PT_{SET,GET}REGS. */ struct reg { + unsigned int r_gs; unsigned int r_fs; unsigned int r_es; unsigned int r_ds; @@ -94,6 +97,7 @@ struct reg { unsigned int r_edx; unsigned int r_ecx; unsigned int r_eax; + unsigned int r_xflags; unsigned int r_trapno; unsigned int r_err; unsigned int r_eip; @@ -101,7 +105,6 @@ struct reg { unsigned int r_eflags; unsigned int r_esp; unsigned int r_ss; - unsigned int r_gs; }; /* diff --git a/sys/cpu/i386/include/segments.h b/sys/cpu/i386/include/segments.h index 3426063f74..4e19db212e 100644 --- a/sys/cpu/i386/include/segments.h +++ b/sys/cpu/i386/include/segments.h @@ -36,7 +36,7 @@ * * from: @(#)segments.h 7.1 (Berkeley) 5/9/91 * $FreeBSD: src/sys/i386/include/segments.h,v 1.24 1999/12/29 04:33:07 peter Exp $ - * $DragonFly: src/sys/cpu/i386/include/segments.h,v 1.12 2006/11/07 06:43:22 dillon Exp $ + * $DragonFly: src/sys/cpu/i386/include/segments.h,v 1.13 2007/01/08 03:33:37 dillon Exp $ */ #ifndef _CPU_SEGMENTS_H_ @@ -250,6 +250,14 @@ struct region_descriptor { #define LBSDICALLS_SEL 16 /* BSDI system call gate */ #define NLDT (LBSDICALLS_SEL + 1) +#ifndef LOCORE + +struct savetls { + struct segment_descriptor tls[NGTLS]; +}; + +#endif + #if defined(_KERNEL) && !defined(LOCORE) #ifndef _SYS_TYPES_H_ diff --git a/sys/cpu/i386/include/signal.h b/sys/cpu/i386/include/signal.h index 1d3be6203d..863a757e74 100644 --- a/sys/cpu/i386/include/signal.h +++ b/sys/cpu/i386/include/signal.h @@ -32,7 +32,7 @@ * * @(#)signal.h 8.1 (Berkeley) 6/11/93 * $FreeBSD: src/sys/i386/include/signal.h,v 1.12 1999/11/12 13:52:11 marcel Exp $ - * $DragonFly: src/sys/cpu/i386/include/signal.h,v 1.7 2006/11/07 06:43:22 dillon Exp $ + * $DragonFly: src/sys/cpu/i386/include/signal.h,v 1.8 2007/01/08 03:33:37 dillon Exp $ */ #ifndef _CPU_SIGNAL_H_ @@ -85,6 +85,7 @@ struct sigcontext { int sc_edx; int sc_ecx; int sc_eax; + int sc_xflags; int sc_trapno; int sc_err; int sc_eip; @@ -92,13 +93,12 @@ struct sigcontext { int sc_efl; int sc_esp; int sc_ss; + /* - * XXX FPU state is 27 * 4 bytes h/w, 1 * 4 bytes s/w (probably not - * needed here), or that + 16 * 4 bytes for emulators (probably all - * needed here). The "spare" bytes are mostly not spare. + * Full FPU state is 512 bytes. Add another 16 bytes worth of spare. */ - int sc_fpregs[28]; /* machine state (FPU): */ - int sc_spare[17]; + int sc_fpregs[128]; /* machine state (FPU): */ + int sc_spare[16]; }; #define sc_sp sc_esp diff --git a/sys/cpu/i386/include/ucontext.h b/sys/cpu/i386/include/ucontext.h index dbf6c7c820..fee1bbb9ca 100644 --- a/sys/cpu/i386/include/ucontext.h +++ b/sys/cpu/i386/include/ucontext.h @@ -26,7 +26,7 @@ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD: src/sys/i386/include/ucontext.h,v 1.4 1999/10/11 20:33:09 luoqi Exp $ - * $DragonFly: src/sys/cpu/i386/include/ucontext.h,v 1.3 2006/11/07 06:43:23 dillon Exp $ + * $DragonFly: src/sys/cpu/i386/include/ucontext.h,v 1.4 2007/01/08 03:33:37 dillon Exp $ */ #ifndef _CPU_UCONTEXT_H_ @@ -51,6 +51,7 @@ typedef struct __mcontext { int mc_edx; int mc_ecx; int mc_eax; + int mc_xflags; int mc_trapno; int mc_err; int mc_eip; @@ -59,8 +60,8 @@ typedef struct __mcontext { int mc_esp; /* machine state */ int mc_ss; - int mc_fpregs[28]; /* env87 + fpacc87 + u_long */ - int __spare__[17]; + int mc_fpregs[128]; /* full fp state */ + int __spare__[16]; } mcontext_t; #endif /* !_CPU_UCONTEXT_H_ */ diff --git a/sys/cpu/i386/include/vm86.h b/sys/cpu/i386/include/vm86.h index 7a54c75c83..a15f18666b 100644 --- a/sys/cpu/i386/include/vm86.h +++ b/sys/cpu/i386/include/vm86.h @@ -27,7 +27,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/include/vm86.h,v 1.13 1999/09/02 20:59:50 luoqi Exp $ - * $DragonFly: src/sys/cpu/i386/include/vm86.h,v 1.8 2006/11/07 06:43:23 dillon Exp $ + * $DragonFly: src/sys/cpu/i386/include/vm86.h,v 1.9 2007/01/08 03:33:37 dillon Exp $ */ #ifndef _CPU_VM86_H_ @@ -54,6 +54,7 @@ typedef union { /* layout must match definition of struct trapframe_vm86 in */ struct vm86frame { + int kernel_gs; int kernel_fs; int kernel_es; int kernel_ds; @@ -65,6 +66,7 @@ struct vm86frame { reg86_t edx; reg86_t ecx; reg86_t eax; + int vmf_xflags; int vmf_trapno; int vmf_err; reg86_t eip; diff --git a/sys/emulation/linux/i386/linux_sysvec.c b/sys/emulation/linux/i386/linux_sysvec.c index f17de34670..a0167b53b9 100644 --- a/sys/emulation/linux/i386/linux_sysvec.c +++ b/sys/emulation/linux/i386/linux_sysvec.c @@ -26,7 +26,7 @@ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD: src/sys/i386/linux/linux_sysvec.c,v 1.55.2.9 2002/01/12 11:03:30 bde Exp $ - * $DragonFly: src/sys/emulation/linux/i386/linux_sysvec.c,v 1.24 2006/12/23 00:27:02 swildner Exp $ + * $DragonFly: src/sys/emulation/linux/i386/linux_sysvec.c,v 1.25 2007/01/08 03:33:40 dillon Exp $ */ /* XXX we use functions that might not exist. */ @@ -327,7 +327,7 @@ linux_rt_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code) bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask); frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0]; - frame.sf_sc.uc_mcontext.sc_gs = rgs(); + frame.sf_sc.uc_mcontext.sc_gs = regs->tf_gs; frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs; frame.sf_sc.uc_mcontext.sc_es = regs->tf_es; frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds; @@ -372,7 +372,11 @@ linux_rt_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code) regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; + /* allow %fs and %gs to be inherited by the signal handler */ + /* regs->tf_fs = _udatasel; + regs->tf_gs = _udatasel; + */ regs->tf_ss = _udatasel; } @@ -459,7 +463,7 @@ linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code) * Build the signal context to be used by sigreturn. */ frame.sf_sc.sc_mask = lmask.__bits[0]; - frame.sf_sc.sc_gs = rgs(); + frame.sf_sc.sc_gs = regs->tf_gs; frame.sf_sc.sc_fs = regs->tf_fs; frame.sf_sc.sc_es = regs->tf_es; frame.sf_sc.sc_ds = regs->tf_ds; @@ -501,7 +505,11 @@ linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code) regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; + /* Allow %fs and %gs to be inherited by the signal handler */ + /* regs->tf_fs = _udatasel; + regs->tf_gs = _udatasel; + */ regs->tf_ss = _udatasel; } diff --git a/sys/kern/init_sysent.c b/sys/kern/init_sysent.c index 8ad1047ae8..3acded516e 100644 --- a/sys/kern/init_sysent.c +++ b/sys/kern/init_sysent.c @@ -2,8 +2,8 @@ * System call switch table. * * DO NOT EDIT-- this file is automatically generated. - * $DragonFly: src/sys/kern/init_sysent.c,v 1.48 2007/01/06 01:46:40 dillon Exp $ - * created from DragonFly: src/sys/kern/syscalls.master,v 1.45 2006/10/10 15:43:14 dillon Exp + * $DragonFly: src/sys/kern/init_sysent.c,v 1.49 2007/01/08 03:33:41 dillon Exp $ + * created from DragonFly: src/sys/kern/syscalls.master,v 1.46 2007/01/06 01:46:40 dillon Exp */ #include "opt_compat.h" diff --git a/sys/kern/syscalls.c b/sys/kern/syscalls.c index 07cdcb3be2..6cccb360d9 100644 --- a/sys/kern/syscalls.c +++ b/sys/kern/syscalls.c @@ -2,8 +2,8 @@ * System call names. * * DO NOT EDIT-- this file is automatically generated. - * $DragonFly: src/sys/kern/syscalls.c,v 1.47 2007/01/06 01:46:40 dillon Exp $ - * created from DragonFly: src/sys/kern/syscalls.master,v 1.45 2006/10/10 15:43:14 dillon Exp + * $DragonFly: src/sys/kern/syscalls.c,v 1.48 2007/01/08 03:33:41 dillon Exp $ + * created from DragonFly: src/sys/kern/syscalls.master,v 1.46 2007/01/06 01:46:40 dillon Exp */ char *syscallnames[] = { diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master index c1245a1bb7..43d4348102 100644 --- a/sys/kern/syscalls.master +++ b/sys/kern/syscalls.master @@ -1,4 +1,4 @@ - $DragonFly: src/sys/kern/syscalls.master,v 1.46 2007/01/06 01:46:40 dillon Exp $ + $DragonFly: src/sys/kern/syscalls.master,v 1.47 2007/01/08 03:33:41 dillon Exp $ ; @(#)syscalls.master 8.2 (Berkeley) 1/13/94 ; $FreeBSD: src/sys/kern/syscalls.master,v 1.72.2.10 2002/07/12 08:22:46 alfred Exp $ @@ -662,8 +662,9 @@ 485 STD BSD { int mcontrol(void *addr, size_t len, int behav, off_t value); } 486 STD BSD { int vmspace_create(void *id, int type, void *data); } 487 STD BSD { int vmspace_destroy(void *id); } -488 STD BSD { int vmspace_ctl(void *id, int cmd, void *ctx, \ - int ctx_bytes, int timeout_us); } +488 STD BSD { int vmspace_ctl(void *id, int cmd, \ + struct trapframe *tframe, \ + struct vextframe *vframe); } 489 STD BSD { int vmspace_mmap(void *id, void *addr, size_t len, \ int prot, int flags, int fd, \ off_t offset); } diff --git a/sys/kern/tty_cons.c b/sys/kern/tty_cons.c index 47d227c95d..2fe5e20f64 100644 --- a/sys/kern/tty_cons.c +++ b/sys/kern/tty_cons.c @@ -37,7 +37,7 @@ * * from: @(#)cons.c 7.2 (Berkeley) 5/9/91 * $FreeBSD: src/sys/kern/tty_cons.c,v 1.81.2.4 2001/12/17 18:44:41 guido Exp $ - * $DragonFly: src/sys/kern/tty_cons.c,v 1.19 2006/12/23 23:47:54 swildner Exp $ + * $DragonFly: src/sys/kern/tty_cons.c,v 1.20 2007/01/08 03:33:41 dillon Exp $ */ #include "opt_ddb.h" @@ -180,8 +180,9 @@ cninit_finish(void) /* * Hook the open and close functions. XXX bad hack. */ - if (dev_is_good(cn_tab->cn_dev)) + if (dev_is_good(cn_tab->cn_dev)) { cn_fwd_ops = dev_ops_intercept(cn_tab->cn_dev, &cn_iops); + } cn_dev = cn_tab->cn_dev; cn_udev = dev2udev(cn_dev); console_pausing = 0; diff --git a/sys/platform/pc32/apic/apic_vector.s b/sys/platform/pc32/apic/apic_vector.s index d81278af06..57b87a0bf6 100644 --- a/sys/platform/pc32/apic/apic_vector.s +++ b/sys/platform/pc32/apic/apic_vector.s @@ -1,7 +1,7 @@ /* * from: vector.s, 386BSD 0.1 unknown origin * $FreeBSD: src/sys/i386/isa/apic_vector.s,v 1.47.2.5 2001/09/01 22:33:38 tegge Exp $ - * $DragonFly: src/sys/platform/pc32/apic/apic_vector.s,v 1.34 2006/11/07 18:50:06 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/apic/apic_vector.s,v 1.35 2007/01/08 03:33:42 dillon Exp $ */ #include "use_npx.h" @@ -41,13 +41,16 @@ #define PUSH_FRAME \ pushl $0 ; /* dummy error code */ \ pushl $0 ; /* dummy trap type */ \ + pushl $0 ; /* dummy xflags type */ \ pushal ; \ pushl %ds ; /* save data and extra segments ... */ \ pushl %es ; \ pushl %fs ; \ + pushl %gs ; \ mov $KDSEL,%ax ; \ mov %ax,%ds ; \ mov %ax,%es ; \ + mov %ax,%gs ; \ mov $KPSEL,%ax ; \ mov %ax,%fs ; \ @@ -57,7 +60,8 @@ pushl 12(%esp) ; /* original caller eip */ \ pushl $0 ; /* dummy error code */ \ pushl $0 ; /* dummy trap type */ \ - subl $12*4,%esp ; /* pushal + 3 seg regs (dummy) + CPL */ \ + pushl $0 ; /* dummy xflags type */ \ + subl $13*4,%esp ; /* pushal + 4 seg regs (dummy) + CPL */ \ /* * Warning: POP_FRAME can only be used if there is no chance of a @@ -65,14 +69,15 @@ * have to use doreti. */ #define POP_FRAME \ + popl %gs ; \ popl %fs ; \ popl %es ; \ popl %ds ; \ popal ; \ - addl $2*4,%esp ; /* dummy trap & error codes */ \ + addl $3*4,%esp ; /* dummy xflags, trap & error codes */ \ #define POP_DUMMY \ - addl $17*4,%esp ; \ + addl $19*4,%esp ; \ #define IOAPICADDR(irq_num) CNAME(int_to_apicintpin) + 16 * (irq_num) + 8 #define REDIRIDX(irq_num) CNAME(int_to_apicintpin) + 16 * (irq_num) + 12 @@ -143,7 +148,7 @@ SUPERALIGN_TEXT ; \ IDTVEC(vec_name) ; \ PUSH_FRAME ; \ - FAKE_MCOUNT(13*4(%esp)) ; \ + FAKE_MCOUNT(15*4(%esp)) ; \ MASK_LEVEL_IRQ(irq_num) ; \ movl $0, lapic_eoi ; \ movl PCPU(curthread),%ebx ; \ @@ -367,7 +372,7 @@ Xcpustop: Xipiq: PUSH_FRAME movl $0, lapic_eoi /* End Of Interrupt to APIC */ - FAKE_MCOUNT(13*4(%esp)) + FAKE_MCOUNT(15*4(%esp)) movl PCPU(curthread),%ebx cmpl $TDPRI_CRIT,TD_PRI(%ebx) diff --git a/sys/platform/pc32/i386/db_interface.c b/sys/platform/pc32/i386/db_interface.c index 4133ae94f1..55e3b969c9 100644 --- a/sys/platform/pc32/i386/db_interface.c +++ b/sys/platform/pc32/i386/db_interface.c @@ -24,7 +24,7 @@ * rights to redistribute these changes. * * $FreeBSD: src/sys/i386/i386/db_interface.c,v 1.48.2.1 2000/07/07 00:38:46 obrien Exp $ - * $DragonFly: src/sys/platform/pc32/i386/db_interface.c,v 1.15 2007/01/02 04:21:15 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/i386/db_interface.c,v 1.16 2007/01/08 03:33:42 dillon Exp $ */ /* @@ -187,6 +187,7 @@ kdb_trap(int type, int code, struct i386_saved_state *regs) regs->tf_edi = ddb_regs.tf_edi; regs->tf_es = ddb_regs.tf_es & 0xffff; regs->tf_fs = ddb_regs.tf_fs & 0xffff; + regs->tf_gs = ddb_regs.tf_gs & 0xffff; regs->tf_cs = ddb_regs.tf_cs & 0xffff; regs->tf_ds = ddb_regs.tf_ds & 0xffff; return (1); diff --git a/sys/platform/pc32/i386/db_trace.c b/sys/platform/pc32/i386/db_trace.c index f3d9404e10..3d0157750b 100644 --- a/sys/platform/pc32/i386/db_trace.c +++ b/sys/platform/pc32/i386/db_trace.c @@ -24,7 +24,7 @@ * rights to redistribute these changes. * * $FreeBSD: src/sys/i386/i386/db_trace.c,v 1.35.2.3 2002/02/21 22:31:25 silby Exp $ - * $DragonFly: src/sys/platform/pc32/i386/db_trace.c,v 1.16 2006/11/07 20:48:15 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/i386/db_trace.c,v 1.17 2007/01/08 03:33:42 dillon Exp $ */ #include @@ -66,9 +66,7 @@ struct db_variable db_regs[] = { { "ds", &ddb_regs.tf_ds, FCN_NULL }, { "es", &ddb_regs.tf_es, FCN_NULL }, { "fs", &ddb_regs.tf_fs, FCN_NULL }, -#if 0 { "gs", &ddb_regs.tf_gs, FCN_NULL }, -#endif { "ss", &ddb_regs.tf_ss, FCN_NULL }, { "eax", &ddb_regs.tf_eax, FCN_NULL }, { "ecx", &ddb_regs.tf_ecx, FCN_NULL }, diff --git a/sys/platform/pc32/i386/exception.s b/sys/platform/pc32/i386/exception.s index 8b838e4082..50497f4d93 100644 --- a/sys/platform/pc32/i386/exception.s +++ b/sys/platform/pc32/i386/exception.s @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/i386/exception.s,v 1.65.2.3 2001/08/15 01:23:49 peter Exp $ - * $DragonFly: src/sys/platform/pc32/i386/exception.s,v 1.29 2006/11/07 18:50:07 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/i386/exception.s,v 1.30 2007/01/08 03:33:42 dillon Exp $ */ #include "use_npx.h" @@ -110,8 +110,9 @@ Xrsvdary: * * The cpu does not push the general registers, we must do that, and we * must restore them prior to calling 'iret'. The cpu adjusts the %cs and - * %ss segment registers, but does not mess with %ds, %es, or %fs. Thus we - * must load them with appropriate values for supervisor mode operation. + * %ss segment registers, but does not mess with %ds, %es, %fs, or %gs. + * Thus we must load the ones we use (which is most of them) with appropriate + * values for supervisor mode operation. * * On entry to a trap or interrupt WE DO NOT OWN THE MP LOCK. This means * that we must be careful in regards to accessing global variables. We @@ -703,19 +704,24 @@ IDTVEC(fpu) * call npx_intr to clear the error. It would be better to handle * npx interrupts as traps. Nested interrupts would probably have * to be converted to ASTs. + * + * Convert everything to a full trapframe */ pushl $0 /* dummy error code */ pushl $0 /* dummy trap type */ + pushl $0 /* dummy xflags */ pushal pushl %ds - pushl %es /* now stack frame is a trap frame */ + pushl %es pushl %fs + pushl %gs mov $KDSEL,%ax mov %ax,%ds mov %ax,%es + mov %ax,%gs mov $KPSEL,%ax mov %ax,%fs - FAKE_MCOUNT(13*4(%esp)) + FAKE_MCOUNT(15*4(%esp)) incl PCPU(cnt)+V_TRAP @@ -754,18 +760,21 @@ IDTVEC(xmm) .globl alltraps .type alltraps,@function alltraps: + pushl $0 /* xflags (inherits hardware err on pagefault) */ pushal pushl %ds pushl %es pushl %fs + pushl %gs .globl alltraps_with_regs_pushed alltraps_with_regs_pushed: mov $KDSEL,%ax mov %ax,%ds mov %ax,%es + mov %ax,%gs mov $KPSEL,%ax mov %ax,%fs - FAKE_MCOUNT(13*4(%esp)) + FAKE_MCOUNT(15*4(%esp)) calltrap: FAKE_MCOUNT(btrap) /* init "from" _btrap -> calltrap */ incl PCPU(cnt)+V_TRAP @@ -798,20 +807,23 @@ calltrap: SUPERALIGN_TEXT IDTVEC(syscall) pushfl /* save eflags in tf_err for now */ - subl $4,%esp /* skip over tf_trapno */ + pushl $T_SYSCALL80 /* tf_trapno */ + pushl $0 /* tf_xflags */ pushal pushl %ds pushl %es pushl %fs + pushl %gs mov $KDSEL,%ax /* switch to kernel segments */ mov %ax,%ds mov %ax,%es + mov %ax,%gs mov $KPSEL,%ax mov %ax,%fs movl TF_ERR(%esp),%eax /* copy saved eflags to final spot */ movl %eax,TF_EFLAGS(%esp) movl $7,TF_ERR(%esp) /* sizeof "lcall 7,0" */ - FAKE_MCOUNT(13*4(%esp)) + FAKE_MCOUNT(15*4(%esp)) incl PCPU(cnt)+V_SYSCALL /* YYY per-cpu */ /* warning, trap frame dummy arg, no extra reg pushes */ call syscall2 @@ -834,18 +846,22 @@ IDTVEC(syscall) */ SUPERALIGN_TEXT IDTVEC(int0x80_syscall) - subl $8,%esp /* skip over tf_trapno and tf_err */ + pushl $0 /* tf_err */ + pushl $T_SYSCALL80 /* tf_trapno */ + pushl $0 /* tf_xflags */ pushal pushl %ds pushl %es pushl %fs + pushl %gs mov $KDSEL,%ax /* switch to kernel segments */ mov %ax,%ds mov %ax,%es + mov %ax,%gs mov $KPSEL,%ax mov %ax,%fs movl $2,TF_ERR(%esp) /* sizeof "int 0x80" */ - FAKE_MCOUNT(13*4(%esp)) + FAKE_MCOUNT(15*4(%esp)) incl PCPU(cnt)+V_SYSCALL /* warning, trap frame dummy arg, no extra reg pushes */ call syscall2 diff --git a/sys/platform/pc32/i386/genassym.c b/sys/platform/pc32/i386/genassym.c index 4f4d987494..4605895113 100644 --- a/sys/platform/pc32/i386/genassym.c +++ b/sys/platform/pc32/i386/genassym.c @@ -35,7 +35,7 @@ * * from: @(#)genassym.c 5.11 (Berkeley) 5/10/91 * $FreeBSD: src/sys/i386/i386/genassym.c,v 1.86.2.3 2002/03/03 05:42:49 nyan Exp $ - * $DragonFly: src/sys/platform/pc32/i386/genassym.c,v 1.53 2006/11/07 17:51:23 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/i386/genassym.c,v 1.54 2007/01/08 03:33:42 dillon Exp $ */ #include @@ -135,7 +135,6 @@ ASSYM(TSS_ESP0, offsetof(struct i386tss, tss_esp0)); ASSYM(PCB_USERLDT, offsetof(struct pcb, pcb_ldt)); -ASSYM(PCB_GS, offsetof(struct pcb, pcb_gs)); ASSYM(PCB_DR0, offsetof(struct pcb, pcb_dr0)); ASSYM(PCB_DR1, offsetof(struct pcb, pcb_dr1)); ASSYM(PCB_DR2, offsetof(struct pcb, pcb_dr2)); @@ -154,6 +153,7 @@ ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault)); ASSYM(PCB_SIZE, sizeof(struct pcb)); +ASSYM(TF_XFLAGS, offsetof(struct trapframe, tf_xflags)); ASSYM(TF_TRAPNO, offsetof(struct trapframe, tf_trapno)); ASSYM(TF_ERR, offsetof(struct trapframe, tf_err)); ASSYM(TF_CS, offsetof(struct trapframe, tf_cs)); @@ -161,7 +161,6 @@ ASSYM(TF_EFLAGS, offsetof(struct trapframe, tf_eflags)); ASSYM(SIGF_HANDLER, offsetof(struct sigframe, sf_ahu.sf_handler)); ASSYM(SIGF_UC, offsetof(struct sigframe, sf_uc)); ASSYM(UC_EFLAGS, offsetof(ucontext_t, uc_mcontext.mc_eflags)); -ASSYM(UC_GS, offsetof(ucontext_t, uc_mcontext.mc_gs)); ASSYM(ENOENT, ENOENT); ASSYM(EFAULT, EFAULT); ASSYM(ENAMETOOLONG, ENAMETOOLONG); diff --git a/sys/platform/pc32/i386/locore.s b/sys/platform/pc32/i386/locore.s index 8c7207bb8f..c92ec6ffb0 100644 --- a/sys/platform/pc32/i386/locore.s +++ b/sys/platform/pc32/i386/locore.s @@ -35,7 +35,7 @@ * * from: @(#)locore.s 7.3 (Berkeley) 5/13/91 * $FreeBSD: src/sys/i386/i386/locore.s,v 1.132.2.10 2003/02/03 20:54:49 jhb Exp $ - * $DragonFly: src/sys/platform/pc32/i386/locore.s,v 1.12 2006/12/27 17:20:28 tgen Exp $ + * $DragonFly: src/sys/platform/pc32/i386/locore.s,v 1.13 2007/01/08 03:33:42 dillon Exp $ * * originally from: locore.s, by William F. Jolitz * @@ -347,7 +347,6 @@ NON_GPROF_ENTRY(sigcode) pushl %eax testl $PSL_VM,UC_EFLAGS(%eax) jne 9f - movl UC_GS(%eax),%gs /* restore %gs */ 9: movl $SYS_sigreturn,%eax pushl %eax /* junk to fake return addr. */ diff --git a/sys/platform/pc32/i386/machdep.c b/sys/platform/pc32/i386/machdep.c index 3a71571ce8..75ad240b44 100644 --- a/sys/platform/pc32/i386/machdep.c +++ b/sys/platform/pc32/i386/machdep.c @@ -36,7 +36,7 @@ * * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 * $FreeBSD: src/sys/i386/i386/machdep.c,v 1.385.2.30 2003/05/31 08:48:05 alc Exp $ - * $DragonFly: src/sys/platform/pc32/i386/machdep.c,v 1.111 2007/01/07 00:39:15 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/i386/machdep.c,v 1.112 2007/01/08 03:33:42 dillon Exp $ */ #include "use_apm.h" @@ -427,8 +427,7 @@ sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code) sf.sf_uc.uc_sigmask = *mask; sf.sf_uc.uc_stack = lp->lwp_sigstk; sf.sf_uc.uc_mcontext.mc_onstack = oonstack; - sf.sf_uc.uc_mcontext.mc_gs = rgs(); - bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(struct trapframe)); + bcopy(regs, &sf.sf_uc.uc_mcontext.mc_gs, sizeof(struct trapframe)); /* Allocate and validate space for the signal handler context. */ /* XXX lwp flags */ @@ -437,9 +436,9 @@ sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code) sfp = (struct sigframe *)(lp->lwp_sigstk.ss_sp + lp->lwp_sigstk.ss_size - sizeof(struct sigframe)); lp->lwp_sigstk.ss_flags |= SS_ONSTACK; - } - else + } else { sfp = (struct sigframe *)regs->tf_esp - 1; + } /* Translate the signal is appropriate */ if (p->p_sysent->sv_sigtbl) { @@ -516,33 +515,65 @@ sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code) /* * Allow the signal handler to inherit %fs in addition to %gs as - * the userland program might be using both + * the userland program might be using both. + * + * However, if a T_PROTFLT occured the segment registers could be + * totally broken. They must be reset in order to be able to + * return to userland. */ - /*regs->tf_fs = _udatasel;*/ + if (regs->tf_trapno == T_PROTFLT) { + regs->tf_fs = _udatasel; + regs->tf_gs = _udatasel; + } regs->tf_ss = _udatasel; } /* * Sanitize the trapframe for a virtual kernel passing control to a custom - * VM context. + * VM context. Remove any items that would otherwise create a privilage + * issue. * - * Allow userland to set or maintain PSL_RF, the resume flag. This flag - * basically controls whether the return PC should skip the first instruction - * (as in an explicit system call) or re-execute it (as in an exception). + * XXX at the moment we allow userland to set the resume flag. Is this a + * bad idea? */ int cpu_sanitize_frame(struct trapframe *frame) { frame->tf_cs = _ucodesel; frame->tf_ds = _udatasel; - frame->tf_es = _udatasel; + frame->tf_es = _udatasel; /* XXX allow userland this one too? */ +#if 0 frame->tf_fs = _udatasel; + frame->tf_gs = _udatasel; +#endif frame->tf_ss = _udatasel; - frame->tf_eflags &= (PSL_USER | PSL_RF); + frame->tf_eflags &= (PSL_RF | PSL_USERCHANGE); frame->tf_eflags |= PSL_RESERVED_DEFAULT | PSL_I; return(0); } +int +cpu_sanitize_tls(struct savetls *tls) +{ + struct segment_descriptor *desc; + int i; + + for (i = 0; i < NGTLS; ++i) { + desc = &tls->tls[i]; + if (desc->sd_dpl == 0 && desc->sd_type == 0) + continue; + if (desc->sd_def32 == 0) + return(ENXIO); + if (desc->sd_type != SDT_MEMRWA) + return(ENXIO); + if (desc->sd_dpl != SEL_UPL) + return(ENXIO); + if (desc->sd_xx != 0 || desc->sd_p != 1) + return(ENXIO); + } + return(0); +} + /* * sigreturn(ucontext_t *sigcntxp) * @@ -598,15 +629,18 @@ sys_sigreturn(struct sigreturn_args *uap) vm86->vm86_eflags = eflags; /* save VIF, VIP */ eflags = (tf->tf_eflags & ~VM_USERCHANGE) | (eflags & VM_USERCHANGE) | PSL_VM; } - bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe)); + bcopy(&ucp->uc_mcontext.mc_gs, tf, sizeof(struct trapframe)); tf->tf_eflags = eflags; tf->tf_vm86_ds = tf->tf_ds; tf->tf_vm86_es = tf->tf_es; tf->tf_vm86_fs = tf->tf_fs; - tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs; + tf->tf_vm86_gs = tf->tf_gs; tf->tf_ds = _udatasel; tf->tf_es = _udatasel; +#if 0 tf->tf_fs = _udatasel; + tf->tf_gs = _udatasel; +#endif } else { /* * Don't allow users to change privileged or reserved flags. @@ -637,7 +671,7 @@ sys_sigreturn(struct sigreturn_args *uap) trapsignal(lp->lwp_proc, SIGBUS, T_PROTFLT); return(EINVAL); } - bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(struct trapframe)); + bcopy(&ucp->uc_mcontext.mc_gs, regs, sizeof(struct trapframe)); } if (ucp->uc_mcontext.mc_onstack & 1) @@ -902,10 +936,6 @@ setregs(struct lwp *lp, u_long entry, u_long stack, u_long ps_strings) struct trapframe *regs = lp->lwp_md.md_regs; struct pcb *pcb = lp->lwp_thread->td_pcb; - /* Reset pc->pcb_gs and %gs before possibly invalidating it. */ - pcb->pcb_gs = _udatasel; - load_gs(_udatasel); - /* was i386_user_cleanup() in NetBSD */ user_ldt_free(pcb); @@ -917,6 +947,7 @@ setregs(struct lwp *lp, u_long entry, u_long stack, u_long ps_strings) regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; + regs->tf_gs = _udatasel; regs->tf_cs = _ucodesel; /* PS_STRINGS value for BSD/OS binaries. It is 0 for non-BSD/OS. */ @@ -1806,7 +1837,7 @@ init386(int first) bzero(gd, sizeof(*gd)); gd->mi.gd_curthread = &thread0; - thread0.td_gd = gd; + thread0.td_gd = &gd->mi; atdevbase = ISA_HOLE_START + KERNBASE; @@ -2118,6 +2149,7 @@ fill_regs(struct lwp *lp, struct reg *regs) struct trapframe *tp; tp = lp->lwp_md.md_regs; + regs->r_gs = tp->tf_gs; regs->r_fs = tp->tf_fs; regs->r_es = tp->tf_es; regs->r_ds = tp->tf_ds; @@ -2134,7 +2166,6 @@ fill_regs(struct lwp *lp, struct reg *regs) regs->r_esp = tp->tf_esp; regs->r_ss = tp->tf_ss; pcb = lp->lwp_thread->td_pcb; - regs->r_gs = pcb->pcb_gs; return (0); } @@ -2148,6 +2179,7 @@ set_regs(struct lwp *lp, struct reg *regs) if (!EFL_SECURE(regs->r_eflags, tp->tf_eflags) || !CS_SECURE(regs->r_cs)) return (EINVAL); + tp->tf_gs = regs->r_gs; tp->tf_fs = regs->r_fs; tp->tf_es = regs->r_es; tp->tf_ds = regs->r_ds; @@ -2164,7 +2196,6 @@ set_regs(struct lwp *lp, struct reg *regs) tp->tf_esp = regs->r_esp; tp->tf_ss = regs->r_ss; pcb = lp->lwp_thread->td_pcb; - pcb->pcb_gs = regs->r_gs; return (0); } diff --git a/sys/platform/pc32/i386/support.s b/sys/platform/pc32/i386/support.s index a807b1d0a2..7c92a0981e 100644 --- a/sys/platform/pc32/i386/support.s +++ b/sys/platform/pc32/i386/support.s @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/i386/support.s,v 1.67.2.5 2001/08/15 01:23:50 peter Exp $ - * $DragonFly: src/sys/platform/pc32/i386/support.s,v 1.18 2007/01/06 03:23:19 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/i386/support.s,v 1.19 2007/01/08 03:33:42 dillon Exp $ */ #include "use_npx.h" @@ -733,6 +733,7 @@ ENTRY(lgdt) mov %ax,%ss movl $KPSEL,%eax mov %ax,%fs + mov %ax,%gs /* reload code selector by turning return into intersegmental return */ movl (%esp),%eax diff --git a/sys/platform/pc32/i386/swtch.s b/sys/platform/pc32/i386/swtch.s index b8ebafc222..d9a4773eb8 100644 --- a/sys/platform/pc32/i386/swtch.s +++ b/sys/platform/pc32/i386/swtch.s @@ -66,7 +66,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/i386/swtch.s,v 1.89.2.10 2003/01/23 03:36:24 ps Exp $ - * $DragonFly: src/sys/platform/pc32/i386/swtch.s,v 1.42 2006/11/07 18:50:07 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/i386/swtch.s,v 1.43 2007/01/08 03:33:42 dillon Exp $ */ #include "use_npx.h" @@ -125,7 +125,6 @@ ENTRY(cpu_heavy_switch) movl %ebp,PCB_EBP(%edx) movl %esi,PCB_ESI(%edx) movl %edi,PCB_EDI(%edx) - movl %gs,PCB_GS(%edx) movl %ecx,%ebx /* EBX = curthread */ movl TD_PROC(%ecx),%ecx @@ -379,16 +378,6 @@ ENTRY(cpu_heavy_restore) pushl %edx call set_user_TLS popl %edx - /* - * Restore the %gs segment register, which must be done after - * loading the user LDT. Since user processes can modify the - * register via procfs, this may result in a fault which is - * detected by checking the fault address against cpu_switch_load_gs - * in i386/i386/trap.c - */ - .globl cpu_switch_load_gs -cpu_switch_load_gs: - movl PCB_GS(%edx),%gs /* * Restore the DEBUG register state if necessary. @@ -439,7 +428,6 @@ ENTRY(savectx) movl %ebp,PCB_EBP(%ecx) movl %esi,PCB_ESI(%ecx) movl %edi,PCB_EDI(%ecx) - movl %gs,PCB_GS(%ecx) #if NNPX > 0 /* diff --git a/sys/platform/pc32/i386/sys_machdep.c b/sys/platform/pc32/i386/sys_machdep.c index 6d64ffbeca..c69b46bd36 100644 --- a/sys/platform/pc32/i386/sys_machdep.c +++ b/sys/platform/pc32/i386/sys_machdep.c @@ -32,7 +32,7 @@ * * from: @(#)sys_machdep.c 5.5 (Berkeley) 1/19/91 * $FreeBSD: src/sys/i386/i386/sys_machdep.c,v 1.47.2.3 2002/10/07 17:20:00 jhb Exp $ - * $DragonFly: src/sys/platform/pc32/i386/sys_machdep.c,v 1.29 2006/12/28 21:24:02 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/i386/sys_machdep.c,v 1.30 2007/01/08 03:33:42 dillon Exp $ * */ @@ -253,7 +253,7 @@ set_user_TLS(void) const int off = GTLS_START; #endif for (i = 0; i < NGTLS; ++i) - gdt[off + i].sd = td->td_tls[i]; + gdt[off + i].sd = td->td_tls.tls[i]; } #ifdef SMP @@ -467,8 +467,8 @@ ki386_set_ldt(struct lwp *lp, char *args, int *res) } /* - * Fill in the actual ldt entries. Since %fs might point to one of - * these entries a critical section is required to prevent an + * Fill in the actual ldt entries. Since %fs or %gs might point to + * one of these entries a critical section is required to prevent an * interrupt thread from preempting us, switch back, and faulting * on the load of %fs due to a half-formed descriptor. */ diff --git a/sys/platform/pc32/i386/tls.c b/sys/platform/pc32/i386/tls.c index d61d086e46..c9629f05a8 100644 --- a/sys/platform/pc32/i386/tls.c +++ b/sys/platform/pc32/i386/tls.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/platform/pc32/i386/tls.c,v 1.7 2007/01/06 01:46:42 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/i386/tls.c,v 1.8 2007/01/08 03:33:42 dillon Exp $ */ #include @@ -103,7 +103,7 @@ sys_set_tls_area(struct set_tls_area_args *uap) * an interrupt thread comes along and switches us out and then back * in. */ - desc = &curthread->td_tls[i]; + desc = &curthread->td_tls.tls[i]; crit_enter(); if (info.size == 0) { bzero(desc, sizeof(*desc)); @@ -177,7 +177,7 @@ sys_get_tls_area(struct get_tls_area_args *uap) * unpack the descriptor, ENOENT is returned for any descriptor * which has not been loaded. uap->info may be NULL. */ - desc = &curthread->td_tls[i]; + desc = &curthread->td_tls.tls[i]; if (desc->sd_p) { if (uap->info && uap->infosize > 0) { bzero(&info, sizeof(info)); diff --git a/sys/platform/pc32/i386/trap.c b/sys/platform/pc32/i386/trap.c index 32ebb92811..68dd229eff 100644 --- a/sys/platform/pc32/i386/trap.c +++ b/sys/platform/pc32/i386/trap.c @@ -36,7 +36,7 @@ * * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 * $FreeBSD: src/sys/i386/i386/trap.c,v 1.147.2.11 2003/02/27 19:09:59 luoqi Exp $ - * $DragonFly: src/sys/platform/pc32/i386/trap.c,v 1.91 2007/01/07 08:37:34 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/i386/trap.c,v 1.92 2007/01/08 03:33:42 dillon Exp $ */ /* @@ -257,9 +257,14 @@ recheck: } /* - * Post any pending upcalls + * Post any pending upcalls. If running a virtual kernel be sure + * to restore the virtual kernel's vmspace before posting the upcall. */ if (p->p_flag & P_UPCALLPEND) { + if (p->p_vkernel && p->p_vkernel->vk_current) { + frame->tf_trapno = 0; + vkernel_trap(p, frame); + } p->p_flag &= ~P_UPCALLPEND; get_mplock(); postupcall(lp); @@ -268,9 +273,14 @@ recheck: } /* - * Post any pending signals + * Post any pending signals. If running a virtual kernel be sure + * to restore the virtual kernel's vmspace before posting the signal. */ if ((sig = CURSIG(p)) != 0) { + if (p->p_vkernel && p->p_vkernel->vk_current) { + frame->tf_trapno = 0; + vkernel_trap(p, frame); + } get_mplock(); postsig(sig); rel_mplock(); @@ -565,18 +575,6 @@ restart: goto out; ucode = T_PAGEFLT; - - /* - * The code is lost because tf_err is overwritten - * with the fault address. Store it in the upper - * 16 bits of tf_trapno for vkernel consumption. - * - * This is a horrible kludge but saves us from having - * to add a new field to the trapframe (making it - * incompatible with existing apps). - */ - if (p->p_vkernel) - frame.tf_trapno |= (code << 16); break; case T_DIVIDE: /* integer divide fault */ @@ -695,17 +693,6 @@ kernel_trap: goto out2; \ } \ } while (0) - /* - * Since we don't save %gs across an interrupt - * frame this check must occur outside the intr - * nesting level check. - */ - if (frame.tf_eip == (int)cpu_switch_load_gs) { - td->td_pcb->pcb_gs = 0; - MAKEMPSAFE(have_mplock); - ksignal(p, SIGBUS); - goto out2; - } if (mycpu->gd_intr_nesting_level == 0) { /* * Invalid %fs's and %gs's can be created using @@ -724,6 +711,8 @@ kernel_trap: doreti_popl_es_fault); MAYBE_DORETI_FAULT(doreti_popl_fs, doreti_popl_fs_fault); + MAYBE_DORETI_FAULT(doreti_popl_gs, + doreti_popl_gs_fault); if (td->td_pcb->pcb_onfault) { frame.tf_eip = (register_t)td->td_pcb->pcb_onfault; @@ -893,115 +882,6 @@ out2: ; #endif } -#ifdef notyet -/* - * This version doesn't allow a page fault to user space while - * in the kernel. The rest of the kernel needs to be made "safe" - * before this can be used. I think the only things remaining - * to be made safe is the process tracing/debugging code. - */ -static int -trap_pfault(struct trapframe *frame, int usermode, vm_offset_t eva) -{ - vm_offset_t va; - struct vmspace *vm = NULL; - vm_map_t map = 0; - int rv = 0; - vm_prot_t ftype; - thread_t td = curthread; - struct proc *p = td->td_proc; /* may be NULL */ - - if (frame->tf_err & PGEX_W) - ftype = VM_PROT_WRITE; - else - ftype = VM_PROT_READ; - - va = trunc_page(eva); - if (va < KvaStart) { - vm_offset_t v; - vm_page_t mpte; - - if (p == NULL || - (!usermode && va < VM_MAX_USER_ADDRESS && - (td->td_gd->gd_intr_nesting_level != 0 || - td->td_pcb->pcb_onfault == NULL))) { - trap_fatal(frame, eva); - return (-1); - } - - /* - * This is a fault on non-kernel virtual memory. - * vm is initialized above to NULL. If curproc is NULL - * or curproc->p_vmspace is NULL the fault is fatal. - */ - vm = p->p_vmspace; - if (vm == NULL) - goto nogo; - - map = &vm->vm_map; - - /* - * Keep swapout from messing with us during this - * critical time. - */ - ++p->p_lock; - - /* - * Grow the stack if necessary - */ - /* grow_stack returns false only if va falls into - * a growable stack region and the stack growth - * fails. It returns true if va was not within - * a growable stack region, or if the stack - * growth succeeded. - */ - if (!grow_stack (p, va)) { - rv = KERN_FAILURE; - --p->p_lock; - goto nogo; - } - - /* Fault in the user page: */ - rv = vm_fault(map, va, ftype, - (ftype & VM_PROT_WRITE) ? VM_FAULT_DIRTY - : VM_FAULT_NORMAL); - - --p->p_lock; - } else { - /* - * Don't allow user-mode faults in kernel address space. - */ - if (usermode) - goto nogo; - - /* - * Since we know that kernel virtual address addresses - * always have pte pages mapped, we just have to fault - * the page. - */ - rv = vm_fault(&kernel_map, va, ftype, VM_FAULT_NORMAL); - } - - if (rv == KERN_SUCCESS) - return (0); -nogo: - if (!usermode) { - if (mtd->td_gd->gd_intr_nesting_level == 0 && - td->td_pcb->pcb_onfault) { - frame->tf_eip = (register_t)td->td_pcb->pcb_onfault; - return (0); - } - trap_fatal(frame, eva); - return (-1); - } - - /* kludge to pass faulting virtual address to sendsig */ - frame->tf_err = eva; - - return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV); -} -#endif - int trap_pfault(struct trapframe *frame, int usermode, vm_offset_t eva) { @@ -1102,6 +982,7 @@ nogo: } /* kludge to pass faulting virtual address to sendsig */ + frame->tf_xflags = frame->tf_err; frame->tf_err = eva; return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV); @@ -1338,7 +1219,6 @@ syscall2(struct trapframe frame) * call. The current frame is copied out to the virtual kernel. */ if (p->p_vkernel && p->p_vkernel->vk_current) { - frame.tf_trapno = T_SYSCALL80; error = vkernel_trap(p, &frame); frame.tf_eax = error; if (error) diff --git a/sys/platform/pc32/i386/vm86.c b/sys/platform/pc32/i386/vm86.c index 59b0dd362a..d75eb5fb3a 100644 --- a/sys/platform/pc32/i386/vm86.c +++ b/sys/platform/pc32/i386/vm86.c @@ -25,7 +25,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/i386/vm86.c,v 1.31.2.2 2001/10/05 06:18:55 peter Exp $ - * $DragonFly: src/sys/platform/pc32/i386/vm86.c,v 1.23 2006/12/23 00:27:03 swildner Exp $ + * $DragonFly: src/sys/platform/pc32/i386/vm86.c,v 1.24 2007/01/08 03:33:42 dillon Exp $ */ #include @@ -584,7 +584,7 @@ vm86_prepcall(struct vm86frame vmf) vmf.vmf_cs = 0; } vmf.vmf_sp = addr[1] - 2; /* keep aligned */ - vmf.kernel_fs = vmf.kernel_es = vmf.kernel_ds = 0; + vmf.kernel_fs = vmf.kernel_es = vmf.kernel_ds = vmf.kernel_gs = 0; vmf.vmf_ss = 0; vmf.vmf_eflags = PSL_VIF | PSL_VM | PSL_USER; vm86_initflags(&vmf); diff --git a/sys/platform/pc32/i386/vm_machdep.c b/sys/platform/pc32/i386/vm_machdep.c index 6fb3365d0e..13828eefec 100644 --- a/sys/platform/pc32/i386/vm_machdep.c +++ b/sys/platform/pc32/i386/vm_machdep.c @@ -39,7 +39,7 @@ * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91 * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$ * $FreeBSD: src/sys/i386/i386/vm_machdep.c,v 1.132.2.9 2003/01/25 19:02:23 dillon Exp $ - * $DragonFly: src/sys/platform/pc32/i386/vm_machdep.c,v 1.52 2007/01/07 08:37:34 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/i386/vm_machdep.c,v 1.53 2007/01/08 03:33:42 dillon Exp $ */ #include "use_npx.h" @@ -125,7 +125,7 @@ cpu_fork(struct lwp *lp1, struct lwp *lp2, int flags) * Copy lp1's PCB. This really only applies to the * debug registers and FP state, but its faster to just copy the * whole thing. Because we only save the PCB at switchout time, - * the register state (including pcb_gs) may not be current. + * the register state may not be current. */ pcb2 = lp2->lwp_thread->td_pcb; *pcb2 = *lp1->lwp_thread->td_pcb; @@ -168,11 +168,6 @@ cpu_fork(struct lwp *lp1, struct lwp *lp2, int flags) lp2->lwp_thread->td_sp -= sizeof(void *); *(void **)lp2->lwp_thread->td_sp = (void *)cpu_heavy_restore; - /* - * Segment registers. - */ - pcb2->pcb_gs = rgs(); - /* * pcb2->pcb_ldt: duplicated below, if necessary. * pcb2->pcb_savefpu: cloned above. diff --git a/sys/platform/pc32/icu/icu_vector.s b/sys/platform/pc32/icu/icu_vector.s index c24cbf8344..8621bdeccb 100644 --- a/sys/platform/pc32/icu/icu_vector.s +++ b/sys/platform/pc32/icu/icu_vector.s @@ -1,7 +1,7 @@ /* * from: vector.s, 386BSD 0.1 unknown origin * $FreeBSD: src/sys/i386/isa/icu_vector.s,v 1.14.2.2 2000/07/18 21:12:42 dfr Exp $ - * $DragonFly: src/sys/platform/pc32/icu/icu_vector.s,v 1.28 2006/11/07 18:50:07 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/icu/icu_vector.s,v 1.29 2007/01/08 03:33:42 dillon Exp $ */ /* * WARNING! SMP builds can use the ICU now so this code must be MP safe. @@ -63,13 +63,16 @@ #define PUSH_FRAME \ pushl $0 ; /* dummy error code */ \ pushl $0 ; /* dummy trap type */ \ + pushl $0 ; /* dummy xflags */ \ pushal ; /* 8 registers */ \ pushl %ds ; \ pushl %es ; \ pushl %fs ; \ + pushl %gs ; \ mov $KDSEL,%ax ; \ mov %ax,%ds ; \ mov %ax,%es ; \ + mov %ax,%gs ; \ mov $KPSEL,%ax ; \ mov %ax,%fs ; \ @@ -79,7 +82,8 @@ pushl 12(%esp) ; /* original caller eip */ \ pushl $0 ; /* dummy error code */ \ pushl $0 ; /* dummy trap type */ \ - subl $12*4,%esp ; /* pushal + 3 seg regs (dummy) + CPL */ \ + pushl $0 ; /* dummy xflags */ \ + subl $13*4,%esp ; /* pushal + 4 seg regs (dummy) + CPL */ \ /* * Warning: POP_FRAME can only be used if there is no chance of a @@ -87,6 +91,7 @@ * have to use doreti. */ #define POP_FRAME \ + popl %gs ; \ popl %fs ; \ popl %es ; \ popl %ds ; \ @@ -94,7 +99,7 @@ addl $2*4,%esp ; /* dummy trap & error codes */ \ #define POP_DUMMY \ - addl $17*4,%esp ; \ + addl $19*4,%esp ; \ #define MASK_IRQ(icu, irq_num) \ ICU_IMASK_LOCK ; \ @@ -134,7 +139,7 @@ SUPERALIGN_TEXT ; \ IDTVEC(vec_name) ; \ PUSH_FRAME ; \ - FAKE_MCOUNT(13*4(%esp)) ; \ + FAKE_MCOUNT(15*4(%esp)) ; \ MASK_IRQ(icu, irq_num) ; \ enable_icus ; \ movl PCPU(curthread),%ebx ; \ @@ -186,7 +191,7 @@ IDTVEC(vec_name) ; \ SUPERALIGN_TEXT ; \ IDTVEC(vec_name) ; \ PUSH_FRAME ; \ - FAKE_MCOUNT(13*4(%esp)) ; \ + FAKE_MCOUNT(15*4(%esp)) ; \ MASK_IRQ(icu, irq_num) ; \ incl PCPU(cnt) + V_INTR ; \ enable_icus ; \ diff --git a/sys/platform/pc32/include/md_var.h b/sys/platform/pc32/include/md_var.h index b1391bd82d..0417f33bc6 100644 --- a/sys/platform/pc32/include/md_var.h +++ b/sys/platform/pc32/include/md_var.h @@ -27,7 +27,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/include/md_var.h,v 1.35.2.4 2003/01/22 20:14:53 jhb Exp $ - * $DragonFly: src/sys/platform/pc32/include/md_var.h,v 1.23 2007/01/05 22:16:31 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/include/md_var.h,v 1.24 2007/01/08 03:33:42 dillon Exp $ */ #ifndef _MACHINE_MD_VAR_H_ @@ -89,6 +89,8 @@ void doreti_popl_es (void) __asm(__STRING(doreti_popl_es)); void doreti_popl_es_fault (void) __asm(__STRING(doreti_popl_es_fault)); void doreti_popl_fs (void) __asm(__STRING(doreti_popl_fs)); void doreti_popl_fs_fault (void) __asm(__STRING(doreti_popl_fs_fault)); +void doreti_popl_gs (void) __asm(__STRING(doreti_popl_gs)); +void doreti_popl_gs_fault (void) __asm(__STRING(doreti_popl_gs_fault)); void enable_sse (void); void fillw (int /*u_short*/ pat, void *base, size_t cnt); #if 0 diff --git a/sys/platform/pc32/include/pcb.h b/sys/platform/pc32/include/pcb.h index ba7a474453..3e401807da 100644 --- a/sys/platform/pc32/include/pcb.h +++ b/sys/platform/pc32/include/pcb.h @@ -35,7 +35,7 @@ * * from: @(#)pcb.h 5.10 (Berkeley) 5/12/91 * $FreeBSD: src/sys/i386/include/pcb.h,v 1.32.2.1 2001/08/15 01:23:52 peter Exp $ - * $DragonFly: src/sys/platform/pc32/include/pcb.h,v 1.10 2006/10/23 21:50:31 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/include/pcb.h,v 1.11 2007/01/08 03:33:42 dillon Exp $ */ #ifndef _MACHINE_PCB_H_ @@ -68,7 +68,7 @@ struct pcb { #define FP_SOFTFP 0x01 /* process using software fltng pnt emulator */ #define PCB_DBREGS 0x02 /* process using debug registers */ caddr_t pcb_onfault; /* copyin/out fault recovery */ - int pcb_gs; + int pcb_unused; struct pcb_ext *pcb_ext; /* optional pcb extension */ u_long __pcb_spare[3]; /* adjust to avoid core dump size changes */ }; diff --git a/sys/platform/pc32/include/pcb_ext.h b/sys/platform/pc32/include/pcb_ext.h index 19874993f6..ad64d4e2a6 100644 --- a/sys/platform/pc32/include/pcb_ext.h +++ b/sys/platform/pc32/include/pcb_ext.h @@ -24,7 +24,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/include/pcb_ext.h,v 1.4 1999/12/29 04:33:04 peter Exp $ - * $DragonFly: src/sys/platform/pc32/include/pcb_ext.h,v 1.8 2006/10/23 21:50:31 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/include/pcb_ext.h,v 1.9 2007/01/08 03:33:42 dillon Exp $ */ #ifndef _MACHINE_PCB_EXT_H_ @@ -69,7 +69,6 @@ struct pcb; void set_user_ldt (struct pcb *); struct pcb_ldt *user_ldt_alloc (struct pcb *, int); void user_ldt_free (struct pcb *); -void set_user_TLS (void); #endif diff --git a/sys/platform/pc32/include/thread.h b/sys/platform/pc32/include/thread.h index ec7a2572b8..3cce5cd44c 100644 --- a/sys/platform/pc32/include/thread.h +++ b/sys/platform/pc32/include/thread.h @@ -33,7 +33,7 @@ * * Machine independant code should not directly include this file. * - * $DragonFly: src/sys/platform/pc32/include/thread.h,v 1.16 2006/10/23 21:50:31 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/include/thread.h,v 1.17 2007/01/08 03:33:42 dillon Exp $ */ #ifndef _MACHINE_THREAD_H_ @@ -48,13 +48,13 @@ union savefpu; struct md_thread { unsigned int mtd_unused; /* used to be mtd_cpl */ union savefpu *mtd_savefpu; - struct segment_descriptor mtd_tls[NGTLS]; + struct savetls mtd_savetls; }; #ifdef _KERNEL #define td_savefpu td_mach.mtd_savefpu -#define td_tls td_mach.mtd_tls +#define td_tls td_mach.mtd_savetls /* * mycpu() retrieves the base of the current cpu's globaldata structure. diff --git a/sys/platform/pc32/isa/ipl.s b/sys/platform/pc32/isa/ipl.s index 6fd1d0e6d7..5d985c8e2b 100644 --- a/sys/platform/pc32/isa/ipl.s +++ b/sys/platform/pc32/isa/ipl.s @@ -37,7 +37,7 @@ * @(#)ipl.s * * $FreeBSD: src/sys/i386/isa/ipl.s,v 1.32.2.3 2002/05/16 16:03:56 bde Exp $ - * $DragonFly: src/sys/platform/pc32/isa/ipl.s,v 1.26 2005/12/06 02:02:24 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/isa/ipl.s,v 1.27 2007/01/08 03:33:43 dillon Exp $ */ #include "use_npx.h" @@ -159,12 +159,24 @@ doreti_next: andl $~RQF_INTPEND,PCPU(reqflags) 5: MEXITCOUNT + + /* + * Restore the segment registers. Since segment register values + * can be set from user mode, this can result in a kernel mode + * exception. The trap code will revector to the *_fault code + * which then sets up a T_PROTFLT signal. If the signal is + * sent to userland, sendsig() will automatically clean up all + * the segment registers to avoid a loop. + */ + .globl doreti_popl_gs .globl doreti_popl_fs .globl doreti_popl_es .globl doreti_popl_ds .globl doreti_iret .globl doreti_syscall_ret doreti_syscall_ret: +doreti_popl_gs: + popl %gs doreti_popl_fs: popl %fs doreti_popl_es: @@ -172,14 +184,14 @@ doreti_popl_es: doreti_popl_ds: popl %ds popal - addl $8,%esp + addl $3*4,%esp /* xflags, trap, err */ doreti_iret: iret ALIGN_TEXT .globl doreti_iret_fault doreti_iret_fault: - subl $8,%esp + subl $3*4,%esp /* xflags, trap, err */ pushal pushl %ds .globl doreti_popl_ds_fault @@ -190,6 +202,9 @@ doreti_popl_es_fault: pushl %fs .globl doreti_popl_fs_fault doreti_popl_fs_fault: + pushl %gs + .globl doreti_popl_gs_fault +doreti_popl_gs_fault: movl $0,TF_ERR(%esp) /* XXX should be the error code */ movl $T_PROTFLT,TF_TRAPNO(%esp) jmp alltraps_with_regs_pushed @@ -477,10 +492,11 @@ splz_ipiq: pushl 12(%esp) ; /* original caller eip */ \ pushl $0 ; /* dummy error code */ \ pushl $0 ; /* dummy trap type */ \ - subl $12*4,%esp ; /* pushal + 3 seg regs (dummy) + CPL */ \ + pushl $0 ; /* dummy xflags */ \ + subl $13*4,%esp ; /* pushal + 4 seg regs (dummy) + CPL */ \ #define POP_DUMMY \ - addl $17*4,%esp ; \ + addl $19*4,%esp ; \ dofastunpend: pushl %ebp /* frame for backtrace */ diff --git a/sys/platform/pc32/isa/npx.c b/sys/platform/pc32/isa/npx.c index c7f6949f92..b2482cd837 100644 --- a/sys/platform/pc32/isa/npx.c +++ b/sys/platform/pc32/isa/npx.c @@ -33,7 +33,7 @@ * * from: @(#)npx.c 7.2 (Berkeley) 5/12/91 * $FreeBSD: src/sys/i386/isa/npx.c,v 1.80.2.3 2001/10/20 19:04:38 tegge Exp $ - * $DragonFly: src/sys/platform/pc32/isa/npx.c,v 1.38 2006/12/23 00:27:03 swildner Exp $ + * $DragonFly: src/sys/platform/pc32/isa/npx.c,v 1.39 2007/01/08 03:33:43 dillon Exp $ */ #include "opt_cpu.h" @@ -975,6 +975,13 @@ fpusave(union savefpu *addr) fnsave(addr); } +void +npxsync(void) +{ + if (curthread == mdcpu->gd_npxthread) + npxsave(curthread->td_savefpu); +} + #ifndef CPU_DISABLE_SSE /* * On AuthenticAMD processors, the fxrstor instruction does not restore diff --git a/sys/platform/vkernel/i386/cpu_regs.c b/sys/platform/vkernel/i386/cpu_regs.c index 3eda3b3f42..ba743983b3 100644 --- a/sys/platform/vkernel/i386/cpu_regs.c +++ b/sys/platform/vkernel/i386/cpu_regs.c @@ -37,7 +37,7 @@ * * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 * $FreeBSD: src/sys/i386/i386/machdep.c,v 1.385.2.30 2003/05/31 08:48:05 alc Exp $ - * $DragonFly: src/sys/platform/vkernel/i386/cpu_regs.c,v 1.3 2007/01/07 00:44:30 dillon Exp $ + * $DragonFly: src/sys/platform/vkernel/i386/cpu_regs.c,v 1.4 2007/01/08 03:33:43 dillon Exp $ */ #include "use_ether.h" @@ -227,8 +227,7 @@ sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code) sf.sf_uc.uc_sigmask = *mask; sf.sf_uc.uc_stack = lp->lwp_sigstk; sf.sf_uc.uc_mcontext.mc_onstack = oonstack; - sf.sf_uc.uc_mcontext.mc_gs = rgs(); - bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(struct trapframe)); + bcopy(regs, &sf.sf_uc.uc_mcontext.mc_gs, sizeof(struct trapframe)); /* Allocate and validate space for the signal handler context. */ /* XXX lwp flags */ @@ -312,11 +311,14 @@ sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code) regs->tf_esp = (int)sfp; regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode); regs->tf_eflags &= ~PSL_T; - regs->tf_cs = 0; - regs->tf_ds = 0; - regs->tf_es = 0; - regs->tf_fs = 0; - regs->tf_ss = 0; + regs->tf_cs = _ucodesel; + regs->tf_ds = _udatasel; + regs->tf_es = _udatasel; + if (regs->tf_trapno == T_PROTFLT) { + regs->tf_fs = _udatasel; + regs->tf_gs = _udatasel; + } + regs->tf_ss = _udatasel; } /* @@ -330,16 +332,41 @@ sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code) int cpu_sanitize_frame(struct trapframe *frame) { - frame->tf_cs = 0; - frame->tf_ds = 0; - frame->tf_es = 0; - frame->tf_fs = 0; - frame->tf_ss = 0; - frame->tf_eflags &= (PSL_USER | PSL_RF); + frame->tf_cs = _ucodesel; + frame->tf_ds = _udatasel; + frame->tf_es = _udatasel; +#if 0 + frame->tf_fs = _udatasel; + frame->tf_gs = _udatasel; +#endif + frame->tf_ss = _udatasel; + frame->tf_eflags &= (PSL_RF | PSL_USERCHANGE); frame->tf_eflags |= PSL_RESERVED_DEFAULT | PSL_I; return(0); } +int +cpu_sanitize_tls(struct savetls *tls) +{ + struct segment_descriptor *desc; + int i; + + for (i = 0; i < NGTLS; ++i) { + desc = &tls->tls[i]; + if (desc->sd_dpl == 0 && desc->sd_type == 0) + continue; + if (desc->sd_def32 == 0) + return(ENXIO); + if (desc->sd_type != SDT_MEMRWA) + return(ENXIO); + if (desc->sd_dpl != SEL_UPL) + return(ENXIO); + if (desc->sd_xx != 0 || desc->sd_p != 1) + return(ENXIO); + } + return(0); +} + /* * sigreturn(ucontext_t *sigcntxp) * @@ -396,15 +423,18 @@ sys_sigreturn(struct sigreturn_args *uap) vm86->vm86_eflags = eflags; /* save VIF, VIP */ eflags = (tf->tf_eflags & ~VM_USERCHANGE) | (eflags & VM_USERCHANGE) | PSL_VM; } - bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe)); + bcopy(&ucp->uc_mcontext.mc_gs, tf, sizeof(struct trapframe)); tf->tf_eflags = eflags; tf->tf_vm86_ds = tf->tf_ds; tf->tf_vm86_es = tf->tf_es; tf->tf_vm86_fs = tf->tf_fs; - tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs; - tf->tf_ds = 0; - tf->tf_es = 0; - tf->tf_fs = 0; + tf->tf_vm86_gs = tf->tf_gs; + tf->tf_ds = _udatasel; + tf->tf_es = _udatasel; +#if 0 + tf->tf_fs = _udatasel; + tf->tf_gs = _udatasel; +#endif } else #endif { @@ -437,7 +467,7 @@ sys_sigreturn(struct sigreturn_args *uap) trapsignal(lp->lwp_proc, SIGBUS, T_PROTFLT); return(EINVAL); } - bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(struct trapframe)); + bcopy(&ucp->uc_mcontext.mc_gs, regs, sizeof(struct trapframe)); } if (ucp->uc_mcontext.mc_onstack & 1) @@ -617,20 +647,6 @@ SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_hltcnt, CTLFLAG_RW, SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_spincnt, CTLFLAG_RW, &cpu_idle_spincnt, 0, "Idle loop entry spins"); -static void -cpu_idle_default_hook(void) -{ - /* - * We must guarentee that hlt is exactly the instruction - * following the sti. - */ - kprintf("idle halt\n"); - __asm __volatile("hlt"); /* sti; hlt */ -} - -/* Other subsystems (e.g., ACPI) can hook this later. */ -void (*cpu_idle_hook)(void) = cpu_idle_default_hook; - void cpu_idle(void) { @@ -651,13 +667,17 @@ cpu_idle(void) */ if (cpu_idle_hlt && !lwkt_runnable() && (td->td_flags & TDF_IDLE_NOHLT) == 0) { - /* __asm __volatile("cli"); */ + sigblock(SIGALRM); splz(); - if (!lwkt_runnable()) - cpu_idle_hook(); + if (!lwkt_runnable()) { + sigpause(0); + } else { + sigblock(0); + } #ifdef SMP - else + else { __asm __volatile("pause"); + } #endif ++cpu_idle_hltcnt; } else { @@ -683,12 +703,6 @@ setregs(struct lwp *lp, u_long entry, u_long stack, u_long ps_strings) struct trapframe *regs = lp->lwp_md.md_regs; struct pcb *pcb = lp->lwp_thread->td_pcb; - /* Reset pc->pcb_gs and %gs before possibly invalidating it. */ - pcb->pcb_gs = 0; -#if 0 - load_gs(_udatasel); -#endif - /* was i386_user_cleanup() in NetBSD */ user_ldt_free(pcb); @@ -700,6 +714,7 @@ setregs(struct lwp *lp, u_long entry, u_long stack, u_long ps_strings) regs->tf_ds = 0; regs->tf_es = 0; regs->tf_fs = 0; + regs->tf_gs = 0; regs->tf_cs = 0; /* PS_STRINGS value for BSD/OS binaries. It is 0 for non-BSD/OS. */ @@ -840,10 +855,10 @@ ptrace_single_step(struct lwp *lp) int fill_regs(struct lwp *lp, struct reg *regs) { - struct pcb *pcb; struct trapframe *tp; tp = lp->lwp_md.md_regs; + regs->r_gs = tp->tf_gs; regs->r_fs = tp->tf_fs; regs->r_es = tp->tf_es; regs->r_ds = tp->tf_ds; @@ -859,21 +874,19 @@ fill_regs(struct lwp *lp, struct reg *regs) regs->r_eflags = tp->tf_eflags; regs->r_esp = tp->tf_esp; regs->r_ss = tp->tf_ss; - pcb = lp->lwp_thread->td_pcb; - regs->r_gs = pcb->pcb_gs; return (0); } int set_regs(struct lwp *lp, struct reg *regs) { - struct pcb *pcb; struct trapframe *tp; tp = lp->lwp_md.md_regs; if (!EFL_SECURE(regs->r_eflags, tp->tf_eflags) || !CS_SECURE(regs->r_cs)) return (EINVAL); + tp->tf_gs = regs->r_gs; tp->tf_fs = regs->r_fs; tp->tf_es = regs->r_es; tp->tf_ds = regs->r_ds; @@ -889,8 +902,6 @@ set_regs(struct lwp *lp, struct reg *regs) tp->tf_eflags = regs->r_eflags; tp->tf_esp = regs->r_esp; tp->tf_ss = regs->r_ss; - pcb = lp->lwp_thread->td_pcb; - pcb->pcb_gs = regs->r_gs; return (0); } diff --git a/sys/platform/vkernel/i386/db_interface.c b/sys/platform/vkernel/i386/db_interface.c index 6f7634f94e..e22709c785 100644 --- a/sys/platform/vkernel/i386/db_interface.c +++ b/sys/platform/vkernel/i386/db_interface.c @@ -24,7 +24,7 @@ * rights to redistribute these changes. * * $FreeBSD: src/sys/i386/i386/db_interface.c,v 1.48.2.1 2000/07/07 00:38:46 obrien Exp $ - * $DragonFly: src/sys/platform/vkernel/i386/db_interface.c,v 1.1 2007/01/05 22:18:18 dillon Exp $ + * $DragonFly: src/sys/platform/vkernel/i386/db_interface.c,v 1.2 2007/01/08 03:33:43 dillon Exp $ */ /* @@ -189,6 +189,7 @@ kdb_trap(int type, int code, struct i386_saved_state *regs) regs->tf_edi = ddb_regs.tf_edi; regs->tf_es = ddb_regs.tf_es & 0xffff; regs->tf_fs = ddb_regs.tf_fs & 0xffff; + regs->tf_gs = ddb_regs.tf_gs & 0xffff; regs->tf_cs = ddb_regs.tf_cs & 0xffff; regs->tf_ds = ddb_regs.tf_ds & 0xffff; return (1); diff --git a/sys/platform/vkernel/i386/db_trace.c b/sys/platform/vkernel/i386/db_trace.c index cf92ccf06c..a0beb91cba 100644 --- a/sys/platform/vkernel/i386/db_trace.c +++ b/sys/platform/vkernel/i386/db_trace.c @@ -24,7 +24,7 @@ * rights to redistribute these changes. * * $FreeBSD: src/sys/i386/i386/db_trace.c,v 1.35.2.3 2002/02/21 22:31:25 silby Exp $ - * $DragonFly: src/sys/platform/vkernel/i386/db_trace.c,v 1.2 2007/01/06 08:34:53 dillon Exp $ + * $DragonFly: src/sys/platform/vkernel/i386/db_trace.c,v 1.3 2007/01/08 03:33:43 dillon Exp $ */ #include @@ -66,9 +66,7 @@ struct db_variable db_regs[] = { { "ds", &ddb_regs.tf_ds, FCN_NULL }, { "es", &ddb_regs.tf_es, FCN_NULL }, { "fs", &ddb_regs.tf_fs, FCN_NULL }, -#if 0 { "gs", &ddb_regs.tf_gs, FCN_NULL }, -#endif { "ss", &ddb_regs.tf_ss, FCN_NULL }, { "eax", &ddb_regs.tf_eax, FCN_NULL }, { "ecx", &ddb_regs.tf_ecx, FCN_NULL }, diff --git a/sys/platform/vkernel/i386/exception.c b/sys/platform/vkernel/i386/exception.c index eddcff49b2..ecc65bc796 100644 --- a/sys/platform/vkernel/i386/exception.c +++ b/sys/platform/vkernel/i386/exception.c @@ -32,7 +32,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/platform/vkernel/i386/exception.c,v 1.1 2007/01/07 05:45:04 dillon Exp $ + * $DragonFly: src/sys/platform/vkernel/i386/exception.c,v 1.2 2007/01/08 03:33:43 dillon Exp $ */ #include @@ -40,9 +40,13 @@ #include #include #include +#include #include +int _ucodesel = LSEL(LUCODE_SEL, SEL_UPL); +int _udatasel = LSEL(LUDATA_SEL, SEL_UPL); + static void exc_segfault(int signo, siginfo_t *info, void *ctx); void @@ -55,6 +59,7 @@ init_exceptions(void) sa.sa_flags |= SA_SIGINFO; sigemptyset(&sa.sa_mask); sigaction(SIGSEGV, &sa, NULL); + sigaction(SIGTRAP, &sa, NULL); } /* @@ -67,13 +72,13 @@ static void exc_segfault(int signo, siginfo_t *info, void *ctxp) { ucontext_t *ctx = ctxp; - int trapno; - printf("CAUGHT SEGFAULT EIP %08x ERR %08x TRAPNO %d\n", + printf("CAUGHT SEGFAULT EIP %08x ERR %08x TRAPNO %d err %d\n", ctx->uc_mcontext.mc_eip, ctx->uc_mcontext.mc_err, - ctx->uc_mcontext.mc_trapno); - kern_trap((struct trapframe *)&ctx->uc_mcontext.mc_fs); + ctx->uc_mcontext.mc_trapno & 0xFFFF, + ctx->uc_mcontext.mc_trapno >> 16); + kern_trap((struct trapframe *)&ctx->uc_mcontext.mc_gs); splz(); } diff --git a/sys/platform/vkernel/i386/genassym.c b/sys/platform/vkernel/i386/genassym.c index e5d4076758..47a0e4e0e6 100644 --- a/sys/platform/vkernel/i386/genassym.c +++ b/sys/platform/vkernel/i386/genassym.c @@ -35,7 +35,7 @@ * * from: @(#)genassym.c 5.11 (Berkeley) 5/10/91 * $FreeBSD: src/sys/i386/i386/genassym.c,v 1.86.2.3 2002/03/03 05:42:49 nyan Exp $ - * $DragonFly: src/sys/platform/vkernel/i386/genassym.c,v 1.54 2007/01/02 04:24:25 dillon Exp $ + * $DragonFly: src/sys/platform/vkernel/i386/genassym.c,v 1.55 2007/01/08 03:33:43 dillon Exp $ */ #include @@ -125,7 +125,6 @@ ASSYM(TSS_ESP0, offsetof(struct i386tss, tss_esp0)); /*ASSYM(PCB_USERLDT, offsetof(struct pcb, pcb_ldt));*/ -ASSYM(PCB_GS, offsetof(struct pcb, pcb_gs)); #if 1 ASSYM(PCB_DR0, offsetof(struct pcb, pcb_dr0)); ASSYM(PCB_DR1, offsetof(struct pcb, pcb_dr1)); @@ -147,13 +146,13 @@ ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault)); ASSYM(PCB_SIZE, sizeof(struct pcb)); ASSYM(TF_TRAPNO, offsetof(struct trapframe, tf_trapno)); +ASSYM(TF_XFLAGS, offsetof(struct trapframe, tf_xflags)); ASSYM(TF_ERR, offsetof(struct trapframe, tf_err)); ASSYM(TF_CS, offsetof(struct trapframe, tf_cs)); ASSYM(TF_EFLAGS, offsetof(struct trapframe, tf_eflags)); ASSYM(SIGF_HANDLER, offsetof(struct sigframe, sf_ahu.sf_handler)); ASSYM(SIGF_UC, offsetof(struct sigframe, sf_uc)); ASSYM(UC_EFLAGS, offsetof(ucontext_t, uc_mcontext.mc_eflags)); -ASSYM(UC_GS, offsetof(ucontext_t, uc_mcontext.mc_gs)); ASSYM(ENOENT, ENOENT); ASSYM(EFAULT, EFAULT); ASSYM(ENAMETOOLONG, ENAMETOOLONG); diff --git a/sys/platform/vkernel/i386/locore.s b/sys/platform/vkernel/i386/locore.s index 9dc1a2f870..4b3c30dad6 100644 --- a/sys/platform/vkernel/i386/locore.s +++ b/sys/platform/vkernel/i386/locore.s @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/platform/vkernel/i386/locore.s,v 1.5 2007/01/06 19:40:53 dillon Exp $ + * $DragonFly: src/sys/platform/vkernel/i386/locore.s,v 1.6 2007/01/08 03:33:43 dillon Exp $ */ #include @@ -70,7 +70,6 @@ NON_GPROF_ENTRY(sigcode) testl $PSL_VM,UC_EFLAGS(%eax) jne 9f #endif - movl UC_GS(%eax),%gs /* restore %gs */ #if 0 9: #endif diff --git a/sys/platform/vkernel/i386/npx.c b/sys/platform/vkernel/i386/npx.c index 3a35bad869..c462d29265 100644 --- a/sys/platform/vkernel/i386/npx.c +++ b/sys/platform/vkernel/i386/npx.c @@ -36,7 +36,7 @@ * * from: @(#)npx.c 7.2 (Berkeley) 5/12/91 * $FreeBSD: src/sys/i386/isa/npx.c,v 1.80.2.3 2001/10/20 19:04:38 tegge Exp $ - * $DragonFly: src/sys/platform/vkernel/i386/npx.c,v 1.2 2007/01/05 22:18:18 dillon Exp $ + * $DragonFly: src/sys/platform/vkernel/i386/npx.c,v 1.3 2007/01/08 03:33:43 dillon Exp $ */ #include "opt_debug_npx.h" @@ -533,6 +533,13 @@ fpusave(union savefpu *addr) fnsave(addr); } +void +npxsync(void) +{ + if (curthread == mdcpu->gd_npxthread) + npxsave(curthread->td_savefpu); +} + #ifndef CPU_DISABLE_SSE /* * On AuthenticAMD processors, the fxrstor instruction does not restore diff --git a/sys/platform/vkernel/i386/swtch.s b/sys/platform/vkernel/i386/swtch.s index 5332fedf12..1b485f3333 100644 --- a/sys/platform/vkernel/i386/swtch.s +++ b/sys/platform/vkernel/i386/swtch.s @@ -66,7 +66,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/i386/swtch.s,v 1.89.2.10 2003/01/23 03:36:24 ps Exp $ - * $DragonFly: src/sys/platform/vkernel/i386/swtch.s,v 1.3 2007/01/07 02:42:13 dillon Exp $ + * $DragonFly: src/sys/platform/vkernel/i386/swtch.s,v 1.4 2007/01/08 03:33:43 dillon Exp $ */ #include "use_npx.h" @@ -124,9 +124,6 @@ ENTRY(cpu_heavy_switch) movl %ebp,PCB_EBP(%edx) movl %esi,PCB_ESI(%edx) movl %edi,PCB_EDI(%edx) -#if 0 - movl %gs,PCB_GS(%edx) -#endif movl %ecx,%ebx /* EBX = curthread */ movl TD_PROC(%ecx),%ecx @@ -389,18 +386,6 @@ ENTRY(cpu_heavy_restore) call set_user_TLS popl %edx #endif -#if 0 - /* - * Restore the %gs segment register, which must be done after - * loading the user LDT. Since user processes can modify the - * register via procfs, this may result in a fault which is - * detected by checking the fault address against cpu_switch_load_gs - * in i386/i386/trap.c - */ - .globl cpu_switch_load_gs -cpu_switch_load_gs: - movl PCB_GS(%edx),%gs -#endif /* * Restore the DEBUG register state if necessary. @@ -447,9 +432,6 @@ ENTRY(savectx) movl %ebp,PCB_EBP(%ecx) movl %esi,PCB_ESI(%ecx) movl %edi,PCB_EDI(%ecx) -#if 0 - movl %gs,PCB_GS(%ecx) -#endif #if NNPX > 0 /* diff --git a/sys/platform/vkernel/i386/tls.c b/sys/platform/vkernel/i386/tls.c index fcb1d607e4..332a9c659b 100644 --- a/sys/platform/vkernel/i386/tls.c +++ b/sys/platform/vkernel/i386/tls.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/platform/vkernel/i386/tls.c,v 1.2 2007/01/06 01:46:43 dillon Exp $ + * $DragonFly: src/sys/platform/vkernel/i386/tls.c,v 1.3 2007/01/08 03:33:43 dillon Exp $ */ #include @@ -102,7 +102,7 @@ sys_set_tls_area(struct set_tls_area_args *uap) * an interrupt thread comes along and switches us out and then back * in. */ - desc = &curthread->td_tls[i]; + desc = &curthread->td_tls.tls[i]; crit_enter(); if (info.size == 0) { bzero(desc, sizeof(*desc)); @@ -176,7 +176,7 @@ sys_get_tls_area(struct get_tls_area_args *uap) * unpack the descriptor, ENOENT is returned for any descriptor * which has not been loaded. uap->info may be NULL. */ - desc = &curthread->td_tls[i]; + desc = &curthread->td_tls.tls[i]; if (desc->sd_p) { if (uap->info && uap->infosize > 0) { bzero(&info, sizeof(info)); @@ -197,8 +197,11 @@ sys_get_tls_area(struct get_tls_area_args *uap) return(error); } +/* + * This function is a NOP because the TLS segments are proactively copied + * by vmspace_ctl() when we switch to the (emulated) user process. + */ void set_user_TLS(void) { - panic("set_user_TLS"); } diff --git a/sys/platform/vkernel/i386/trap.c b/sys/platform/vkernel/i386/trap.c index 416b0826cb..bacbb7e31a 100644 --- a/sys/platform/vkernel/i386/trap.c +++ b/sys/platform/vkernel/i386/trap.c @@ -36,7 +36,7 @@ * * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 * $FreeBSD: src/sys/i386/i386/trap.c,v 1.147.2.11 2003/02/27 19:09:59 luoqi Exp $ - * $DragonFly: src/sys/platform/vkernel/i386/trap.c,v 1.4 2007/01/07 08:37:35 dillon Exp $ + * $DragonFly: src/sys/platform/vkernel/i386/trap.c,v 1.5 2007/01/08 03:33:43 dillon Exp $ */ /* @@ -247,8 +247,12 @@ recheck: * Post any pending upcalls */ if (p->p_flag & P_UPCALLPEND) { - p->p_flag &= ~P_UPCALLPEND; get_mplock(); + if (p->p_vkernel && p->p_vkernel->vk_current) { + frame->tf_trapno = 0; + vkernel_trap(p, frame); + } + p->p_flag &= ~P_UPCALLPEND; postupcall(lp); rel_mplock(); goto recheck; @@ -259,6 +263,10 @@ recheck: */ if ((sig = CURSIG(p)) != 0) { get_mplock(); + if (p->p_vkernel && p->p_vkernel->vk_current) { + frame->tf_trapno = 0; + vkernel_trap(p, frame); + } postsig(sig); rel_mplock(); goto recheck; @@ -381,16 +389,14 @@ user_trap(struct trapframe *frame) * the original tf_err field will be passed to us shifted 16 * over in the tf_trapno field for T_PAGEFLT. */ - if ((int16_t)frame->tf_trapno == T_PAGEFLT) { + if (frame->tf_trapno == T_PAGEFLT) eva = frame->tf_err; - frame->tf_err = frame->tf_trapno >> 16; - frame->tf_trapno &= 0xFFFF; - /*cpu_enable_intr();*/ - } else { + else eva = 0; - } - kprintf("USER_TRAP AT %08x err %d trapno %d eva %08x\n", - frame->tf_eip, frame->tf_err, frame->tf_trapno, eva); +#if 0 + kprintf("USER_TRAP AT %08x xflags %d trapno %d eva %08x\n", + frame->tf_eip, frame->tf_xflags, frame->tf_trapno, eva); +#endif /* * Everything coming from user mode runs through user_trap, @@ -644,20 +650,10 @@ kern_trap(struct trapframe *frame) p = td->td_proc; - /* - * This is a bad kludge to avoid changing the various trapframe - * structures. Because we are enabled as a virtual kernel, - * the original tf_err field will be passed to us shifted 16 - * over in the tf_trapno field for T_PAGEFLT. - */ - if ((int16_t)frame->tf_trapno == T_PAGEFLT) { + if (frame->tf_trapno == T_PAGEFLT) eva = frame->tf_err; - frame->tf_err = frame->tf_trapno >> 16; - frame->tf_trapno &= 0xFFFF; - /*cpu_enable_intr();*/ - } else { + else eva = 0; - } #ifdef DDB if (db_active) { @@ -865,7 +861,7 @@ trap_pfault(struct trapframe *frame, int usermode, vm_offset_t eva) map = &vm->vm_map; } - if (frame->tf_err & PGEX_W) + if (frame->tf_xflags & PGEX_W) ftype = VM_PROT_WRITE; else ftype = VM_PROT_READ; @@ -917,10 +913,6 @@ nogo: trap_fatal(frame, usermode, eva); return (-1); } - - /* kludge to pass faulting virtual address to sendsig */ - frame->tf_err = eva; - return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV); } @@ -929,7 +921,7 @@ trap_fatal(struct trapframe *frame, int usermode, vm_offset_t eva) { int code, type, ss, esp; - code = frame->tf_err; + code = frame->tf_xflags; type = frame->tf_trapno; if (type <= MAX_TRAP_MSG) @@ -1249,6 +1241,9 @@ syscall2(struct trapframe *frame) if ((callp->sy_narg & SYF_MPSAFE) == 0) MAKEMPSAFE(have_mplock); #endif +#if 0 + kprintf("system call %d\n", code); +#endif error = (*callp->sy_call)(&args); @@ -1377,15 +1372,37 @@ fork_return(struct lwp *lp, struct trapframe frame) #endif } +/* + * doreti has turned into this. The frame is directly on the stack. We + * pull everything else we need (fpu and tls context) from the current + * thread. + * + * Note on fpu interactions: In a virtual kernel, the fpu context for + * an emulated user mode process is not shared with the virtual kernel's + * fpu context, so we only have to 'stack' fpu contexts within the virtual + * kernel itself, and not even then since the signal() contexts that we care + * about save and restore the FPU state (I think anyhow). + * + * vmspace_ctl() returns an error only if it had problems instaling the + * context we supplied or problems copying data to/from our VM space. + */ void go_user(struct trapframe frame) { + int r; + for (;;) { - kprintf("GO USER"); - vmspace_ctl(curproc->p_vmspace, VMSPACE_CTL_RUN, - &frame, sizeof(frame), 0); - kprintf("RETURN USER"); - user_trap(&frame); + kprintf("GO USER VMSPC %p pid %-4d %s\n", + &curproc->p_vmspace->vm_pmap, + curproc->p_pid, curproc->p_comm); + r = vmspace_ctl(&curproc->p_vmspace->vm_pmap, VMSPACE_CTL_RUN, + &frame, &curthread->td_savevext); + if (r < 0) + panic("vmspace_ctl had problems with the context"); + if (frame.tf_trapno) + user_trap(&frame); + else + kprintf("Kernel AST\n"); } } diff --git a/sys/platform/vkernel/i386/vm_machdep.c b/sys/platform/vkernel/i386/vm_machdep.c index b76aa93a15..1e2a08abe0 100644 --- a/sys/platform/vkernel/i386/vm_machdep.c +++ b/sys/platform/vkernel/i386/vm_machdep.c @@ -39,7 +39,7 @@ * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91 * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$ * $FreeBSD: src/sys/i386/i386/vm_machdep.c,v 1.132.2.9 2003/01/25 19:02:23 dillon Exp $ - * $DragonFly: src/sys/platform/vkernel/i386/vm_machdep.c,v 1.2 2007/01/06 08:34:53 dillon Exp $ + * $DragonFly: src/sys/platform/vkernel/i386/vm_machdep.c,v 1.3 2007/01/08 03:33:43 dillon Exp $ */ #include "use_npx.h" @@ -125,7 +125,7 @@ cpu_fork(struct lwp *lp1, struct lwp *lp2, int flags) * Copy lp1's PCB. This really only applies to the * debug registers and FP state, but its faster to just copy the * whole thing. Because we only save the PCB at switchout time, - * the register state (including pcb_gs) may not be current. + * the register state may not be current. */ pcb2 = lp2->lwp_thread->td_pcb; *pcb2 = *lp1->lwp_thread->td_pcb; @@ -168,11 +168,6 @@ cpu_fork(struct lwp *lp1, struct lwp *lp2, int flags) lp2->lwp_thread->td_sp -= sizeof(void *); *(void **)lp2->lwp_thread->td_sp = (void *)cpu_heavy_restore; - /* - * Segment registers. - */ - pcb2->pcb_gs = rgs(); - /* * pcb2->pcb_ldt: duplicated below, if necessary. * pcb2->pcb_savefpu: cloned above. diff --git a/sys/platform/vkernel/include/md_var.h b/sys/platform/vkernel/include/md_var.h index 9f6efa4ab4..4a62c65a42 100644 --- a/sys/platform/vkernel/include/md_var.h +++ b/sys/platform/vkernel/include/md_var.h @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/platform/vkernel/include/md_var.h,v 1.8 2007/01/07 08:37:36 dillon Exp $ + * $DragonFly: src/sys/platform/vkernel/include/md_var.h,v 1.9 2007/01/08 03:33:43 dillon Exp $ */ #ifndef _MACHINE_MD_VAR_H_ @@ -56,6 +56,7 @@ extern u_int cpu_id; /* XXX belongs in i386 */ extern int RootImageFd; extern int MemImageFd; +extern int _ucodesel, _udatasel; struct mdglobaldata; diff --git a/sys/platform/vkernel/include/pcb.h b/sys/platform/vkernel/include/pcb.h index ac1c39d236..d4939a2f37 100644 --- a/sys/platform/vkernel/include/pcb.h +++ b/sys/platform/vkernel/include/pcb.h @@ -35,7 +35,7 @@ * * from: @(#)pcb.h 5.10 (Berkeley) 5/12/91 * $FreeBSD: src/sys/i386/include/pcb.h,v 1.32.2.1 2001/08/15 01:23:52 peter Exp $ - * $DragonFly: src/sys/platform/vkernel/include/pcb.h,v 1.1 2006/11/07 18:50:07 dillon Exp $ + * $DragonFly: src/sys/platform/vkernel/include/pcb.h,v 1.2 2007/01/08 03:33:43 dillon Exp $ */ #ifndef _MACHINE_PCB_H_ @@ -68,7 +68,7 @@ struct pcb { #define FP_SOFTFP 0x01 /* process using software fltng pnt emulator */ #define PCB_DBREGS 0x02 /* process using debug registers */ caddr_t pcb_onfault; /* copyin/out fault recovery */ - int pcb_gs; + int pcb_unused; struct pcb_ext *pcb_ext; /* optional pcb extension */ u_long __pcb_spare[3]; /* adjust to avoid core dump size changes */ }; diff --git a/sys/platform/vkernel/include/pcb_ext.h b/sys/platform/vkernel/include/pcb_ext.h index 59d0e61cbb..21cb924353 100644 --- a/sys/platform/vkernel/include/pcb_ext.h +++ b/sys/platform/vkernel/include/pcb_ext.h @@ -24,7 +24,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/include/pcb_ext.h,v 1.4 1999/12/29 04:33:04 peter Exp $ - * $DragonFly: src/sys/platform/vkernel/include/pcb_ext.h,v 1.1 2007/01/05 22:18:19 dillon Exp $ + * $DragonFly: src/sys/platform/vkernel/include/pcb_ext.h,v 1.2 2007/01/08 03:33:43 dillon Exp $ */ #ifndef _MACHINE_PCB_EXT_H_ @@ -69,7 +69,6 @@ struct pcb; void set_user_ldt (struct pcb *); struct pcb_ldt *user_ldt_alloc (struct pcb *, int); void user_ldt_free (struct pcb *); -void set_user_TLS (void); #endif diff --git a/sys/platform/vkernel/include/thread.h b/sys/platform/vkernel/include/thread.h index 71f1076941..58fc114144 100644 --- a/sys/platform/vkernel/include/thread.h +++ b/sys/platform/vkernel/include/thread.h @@ -31,28 +31,30 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/platform/vkernel/include/thread.h,v 1.1 2006/11/07 18:50:07 dillon Exp $ + * $DragonFly: src/sys/platform/vkernel/include/thread.h,v 1.2 2007/01/08 03:33:43 dillon Exp $ */ #ifndef _MACHINE_THREAD_H_ #define _MACHINE_THREAD_H_ -#ifndef _MACHINE_SEGMENTS_H_ -#include +#ifndef _MACHINE_VFRAME_H_ +#include +#endif +#ifndef _MACHINE_NPX_H_ +#include #endif - -union savefpu; struct md_thread { unsigned int mtd_unused; /* used to be mtd_cpl */ - union savefpu *mtd_savefpu; - struct segment_descriptor mtd_tls[NGTLS]; + union savefpu *mtd_savefpu; /* pointer to current fpu context */ + struct vextframe mtd_savevext; }; #ifdef _KERNEL #define td_savefpu td_mach.mtd_savefpu -#define td_tls td_mach.mtd_tls +#define td_tls td_mach.mtd_savevext.vx_tls +#define td_savevext td_mach.mtd_savevext /* * mycpu() retrieves the base of the current cpu's globaldata structure. diff --git a/sys/platform/vkernel/platform/console.c b/sys/platform/vkernel/platform/console.c index 7da2b98579..7a8018dc58 100644 --- a/sys/platform/vkernel/platform/console.c +++ b/sys/platform/vkernel/platform/console.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/platform/vkernel/platform/console.c,v 1.2 2007/01/07 05:45:06 dillon Exp $ + * $DragonFly: src/sys/platform/vkernel/platform/console.c,v 1.3 2007/01/08 03:33:43 dillon Exp $ */ #include @@ -88,7 +88,7 @@ vcons_open(struct dev_open_args *ap) struct tty *tp; int error; - if (minor(dev) != 0) + if (minor(dev) != 255) return(ENXIO); tp = dev->si_tty = ttymalloc(dev->si_tty); @@ -100,7 +100,7 @@ vcons_open(struct dev_open_args *ap) if (tp->t_state & TS_ISOPEN) return (EBUSY); - tp->t_state |= TS_CARR_ON; + tp->t_state |= TS_CARR_ON | TS_CONNECTED; ttychars(tp); tp->t_iflag = TTYDEF_IFLAG; tp->t_oflag = TTYDEF_OFLAG; diff --git a/sys/platform/vkernel/platform/init.c b/sys/platform/vkernel/platform/init.c index d97d72d754..0ade2a8b26 100644 --- a/sys/platform/vkernel/platform/init.c +++ b/sys/platform/vkernel/platform/init.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/platform/vkernel/platform/init.c,v 1.13 2007/01/07 05:52:53 dillon Exp $ + * $DragonFly: src/sys/platform/vkernel/platform/init.c,v 1.14 2007/01/08 03:33:43 dillon Exp $ */ #include @@ -147,16 +147,6 @@ main(int ac, char **av) } } - /* - * Enable virtual kernel support by creating a dummy VM space. - * This also causes bus and seg fault signals to generate - * an augmented tf_trapno. - */ - if (vmspace_create((void *)1, 0, NULL) < 0) { - err(1, "Virtual Kernel support disabled on this system"); - /* NOT REACHED */ - } - init_sys_memory(memImageFile); init_kern_memory(); init_globaldata(); diff --git a/sys/platform/vkernel/platform/machintr.c b/sys/platform/vkernel/platform/machintr.c index 58ff0d56c8..7d2a3f084d 100644 --- a/sys/platform/vkernel/platform/machintr.c +++ b/sys/platform/vkernel/platform/machintr.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/platform/vkernel/platform/machintr.c,v 1.4 2007/01/07 00:44:32 dillon Exp $ + * $DragonFly: src/sys/platform/vkernel/platform/machintr.c,v 1.5 2007/01/08 03:33:43 dillon Exp $ */ #include @@ -40,8 +40,11 @@ #include #include #include +#include +#include #include #include +#include /* * Interrupt Subsystem ABI @@ -103,6 +106,14 @@ dummy_finalize(void) void splz(void) { + struct mdglobaldata *gd = mdcpu; + int irq; + + atomic_clear_int_nonlocked(&gd->mi.gd_reqflags, RQF_INTPEND); + while ((irq = ffs(gd->gd_spending)) != 0) { + irq = irq - 1 + FIRST_SOFTINT; + sched_ithd(irq); + } } void diff --git a/sys/platform/vkernel/platform/pmap.c b/sys/platform/vkernel/platform/pmap.c index 5836602a67..e66ac41fc4 100644 --- a/sys/platform/vkernel/platform/pmap.c +++ b/sys/platform/vkernel/platform/pmap.c @@ -38,7 +38,7 @@ * * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 * $FreeBSD: src/sys/i386/i386/pmap.c,v 1.250.2.18 2002/03/06 22:48:53 silby Exp $ - * $DragonFly: src/sys/platform/vkernel/platform/pmap.c,v 1.5 2007/01/07 08:37:37 dillon Exp $ + * $DragonFly: src/sys/platform/vkernel/platform/pmap.c,v 1.6 2007/01/08 03:33:43 dillon Exp $ */ #include @@ -345,38 +345,38 @@ cpu_vmspace_alloc(struct vmspace *vm) #define LAST_EXTENT (VM_MAX_USER_ADDRESS - 0x80000000) - if (vmspace_create(vm, 0, NULL) < 0) + if (vmspace_create(&vm->vm_pmap, 0, NULL) < 0) panic("vmspace_create() failed"); - rp = vmspace_mmap(vm, (void *)0x00000000, 0x40000000, + rp = vmspace_mmap(&vm->vm_pmap, (void *)0x00000000, 0x40000000, PROT_READ|PROT_WRITE, MAP_FILE|MAP_SHARED|MAP_VPAGETABLE|MAP_FIXED, MemImageFd, 0); if (rp == MAP_FAILED) panic("vmspace_mmap: failed1"); - rp = vmspace_mmap(vm, (void *)0x40000000, 0x40000000, + rp = vmspace_mmap(&vm->vm_pmap, (void *)0x40000000, 0x40000000, PROT_READ|PROT_WRITE, MAP_FILE|MAP_SHARED|MAP_VPAGETABLE|MAP_FIXED, MemImageFd, 0x40000000); if (rp == MAP_FAILED) panic("vmspace_mmap: failed2"); - rp = vmspace_mmap(vm, (void *)0x80000000, LAST_EXTENT, + rp = vmspace_mmap(&vm->vm_pmap, (void *)0x80000000, LAST_EXTENT, PROT_READ|PROT_WRITE, MAP_FILE|MAP_SHARED|MAP_VPAGETABLE|MAP_FIXED, MemImageFd, 0x80000000); if (rp == MAP_FAILED) panic("vmspace_mmap: failed3"); - r = vmspace_mcontrol(vm, (void *)0x00000000, 0x40000000, MADV_SETMAP, - vmspace_pmap(vm)->pm_pdirpte); + r = vmspace_mcontrol(&vm->vm_pmap, (void *)0x00000000, 0x40000000, + MADV_SETMAP, vmspace_pmap(vm)->pm_pdirpte); if (r < 0) panic("vmspace_mcontrol: failed1"); - r = vmspace_mcontrol(vm, (void *)0x40000000, 0x40000000, MADV_SETMAP, - vmspace_pmap(vm)->pm_pdirpte); + r = vmspace_mcontrol(&vm->vm_pmap, (void *)0x40000000, 0x40000000, + MADV_SETMAP, vmspace_pmap(vm)->pm_pdirpte); if (r < 0) panic("vmspace_mcontrol: failed2"); - r = vmspace_mcontrol(vm, (void *)0x80000000, LAST_EXTENT, MADV_SETMAP, - vmspace_pmap(vm)->pm_pdirpte); + r = vmspace_mcontrol(&vm->vm_pmap, (void *)0x80000000, LAST_EXTENT, + MADV_SETMAP, vmspace_pmap(vm)->pm_pdirpte); if (r < 0) panic("vmspace_mcontrol: failed3"); } @@ -384,7 +384,7 @@ cpu_vmspace_alloc(struct vmspace *vm) void cpu_vmspace_free(struct vmspace *vm) { - if (vmspace_destroy(vm) < 0) + if (vmspace_destroy(&vm->vm_pmap) < 0) panic("vmspace_destroy() failed"); } @@ -2337,7 +2337,7 @@ pmap_copy_page(vm_paddr_t src, vm_paddr_t dst) if (*(int *) gd->gd_CMAP2) panic("pmap_copy_page: CMAP2 busy"); - *(int *) gd->gd_CMAP1 = VPTE_V | (src & PG_FRAME) | PG_A; + *(int *) gd->gd_CMAP1 = VPTE_V | VPTE_R | (src & PG_FRAME) | VPTE_A; *(int *) gd->gd_CMAP2 = VPTE_V | VPTE_R | VPTE_W | (dst & VPTE_FRAME) | VPTE_A | VPTE_M; madvise(gd->gd_CADDR1, PAGE_SIZE, MADV_INVAL); @@ -2445,8 +2445,7 @@ pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) } pte = pmap_pte(pv->pv_pmap, pv->pv_va); - if (pmap->pm_active) - pmap_inval_add(&info, pv->pv_pmap, pv->pv_va); + pmap_inval_add(&info, pv->pv_pmap, pv->pv_va); tpte = *pte; /* @@ -2588,7 +2587,7 @@ pmap_changebit(vm_page_t m, int bit, boolean_t setem) atomic_set_int_nonlocked(pte, bit); #endif } else { - vm_offset_t pbits = *(vm_offset_t *)pte; + vpte_t pbits = *pte; if (pbits & bit) { if (bit == VPTE_W) { if (pbits & VPTE_M) { @@ -2950,3 +2949,4 @@ pmap_pvdump(vm_paddr_t pa) kprintf(" "); } #endif + diff --git a/sys/platform/vkernel/platform/pmap_inval.c b/sys/platform/vkernel/platform/pmap_inval.c index b4a5594acc..42cfd62dac 100644 --- a/sys/platform/vkernel/platform/pmap_inval.c +++ b/sys/platform/vkernel/platform/pmap_inval.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/platform/vkernel/platform/pmap_inval.c,v 1.2 2007/01/05 22:18:20 dillon Exp $ + * $DragonFly: src/sys/platform/vkernel/platform/pmap_inval.c,v 1.3 2007/01/08 03:33:43 dillon Exp $ */ /* @@ -53,6 +53,7 @@ #include #include +#include #include #include @@ -66,28 +67,33 @@ #include #include -static void -_cpu_invltlb(void *dummy) +/* + * Initialize for add or flush + */ +void +pmap_inval_init(pmap_inval_info_t info) { - /* XXX madvise over entire address space is really expensive */ - madvise((void *)KvaStart, KvaSize, MADV_INVAL); + info->pir_flags = 0; } -static void -_cpu_invl1pg(void *data) +void +pmap_inval_add(pmap_inval_info_t info, pmap_t pmap, vm_offset_t va) { - madvise(data, PAGE_SIZE, MADV_INVAL); + if (pmap == &kernel_pmap) { + madvise((void *)va, PAGE_SIZE, MADV_INVAL); + } else { + vmspace_mcontrol(pmap, (void *)va, PAGE_SIZE, MADV_INVAL, 0); + } } -/* - * Initialize for add or flush - */ void -pmap_inval_init(pmap_inval_info_t info) +pmap_inval_flush(pmap_inval_info_t info) { info->pir_flags = 0; } +#if 0 + /* * Add a (pmap, va) pair to the invalidation list and protect access * as appropriate. @@ -148,3 +154,4 @@ pmap_inval_flush(pmap_inval_info_t info) info->pir_flags = 0; } +#endif diff --git a/sys/sys/syscall-hide.h b/sys/sys/syscall-hide.h index 9d24cc99bb..475ac55810 100644 --- a/sys/sys/syscall-hide.h +++ b/sys/sys/syscall-hide.h @@ -2,8 +2,8 @@ * System call hiders. * * DO NOT EDIT-- this file is automatically generated. - * $DragonFly: src/sys/sys/syscall-hide.h,v 1.48 2007/01/06 01:46:44 dillon Exp $ - * created from DragonFly: src/sys/kern/syscalls.master,v 1.45 2006/10/10 15:43:14 dillon Exp + * $DragonFly: src/sys/sys/syscall-hide.h,v 1.49 2007/01/08 03:33:43 dillon Exp $ + * created from DragonFly: src/sys/kern/syscalls.master,v 1.46 2007/01/06 01:46:40 dillon Exp */ #ifdef COMPAT_43 diff --git a/sys/sys/syscall.h b/sys/sys/syscall.h index a029a7707e..139706b031 100644 --- a/sys/sys/syscall.h +++ b/sys/sys/syscall.h @@ -2,8 +2,8 @@ * System call numbers. * * DO NOT EDIT-- this file is automatically generated. - * $DragonFly: src/sys/sys/syscall.h,v 1.48 2007/01/06 01:46:44 dillon Exp $ - * created from DragonFly: src/sys/kern/syscalls.master,v 1.45 2006/10/10 15:43:14 dillon Exp + * $DragonFly: src/sys/sys/syscall.h,v 1.49 2007/01/08 03:33:43 dillon Exp $ + * created from DragonFly: src/sys/kern/syscalls.master,v 1.46 2007/01/06 01:46:40 dillon Exp */ #define SYS_syscall 0 diff --git a/sys/sys/syscall.mk b/sys/sys/syscall.mk index af56619324..83719d14ff 100644 --- a/sys/sys/syscall.mk +++ b/sys/sys/syscall.mk @@ -1,7 +1,7 @@ # DragonFly system call names. # DO NOT EDIT-- this file is automatically generated. -# $DragonFly: src/sys/sys/syscall.mk,v 1.48 2007/01/06 01:46:44 dillon Exp $ -# created from DragonFly: src/sys/kern/syscalls.master,v 1.45 2006/10/10 15:43:14 dillon Exp +# $DragonFly: src/sys/sys/syscall.mk,v 1.49 2007/01/08 03:33:43 dillon Exp $ +# created from DragonFly: src/sys/kern/syscalls.master,v 1.46 2007/01/06 01:46:40 dillon Exp MIASM = \ syscall.o \ exit.o \ diff --git a/sys/sys/sysproto.h b/sys/sys/sysproto.h index 5157d8e110..521aafe79d 100644 --- a/sys/sys/sysproto.h +++ b/sys/sys/sysproto.h @@ -2,8 +2,8 @@ * System call prototypes. * * DO NOT EDIT-- this file is automatically generated. - * $DragonFly: src/sys/sys/sysproto.h,v 1.48 2007/01/06 01:46:44 dillon Exp $ - * created from DragonFly: src/sys/kern/syscalls.master,v 1.45 2006/10/10 15:43:14 dillon Exp + * $DragonFly: src/sys/sys/sysproto.h,v 1.49 2007/01/08 03:33:43 dillon Exp $ + * created from DragonFly: src/sys/kern/syscalls.master,v 1.46 2007/01/06 01:46:40 dillon Exp */ #ifndef _SYS_SYSPROTO_H_ @@ -2015,9 +2015,8 @@ struct vmspace_ctl_args { #endif void * id; char id_[PAD_(void *)]; int cmd; char cmd_[PAD_(int)]; - void * ctx; char ctx_[PAD_(void *)]; - int ctx_bytes; char ctx_bytes_[PAD_(int)]; - int timeout_us; char timeout_us_[PAD_(int)]; + struct trapframe * tframe; char tframe_[PAD_(struct trapframe *)]; + struct vextframe * vframe; char vframe_[PAD_(struct vextframe *)]; }; struct vmspace_mmap_args { #ifdef _KERNEL diff --git a/sys/sys/systm.h b/sys/sys/systm.h index 1c33fd51bc..59d4738047 100644 --- a/sys/sys/systm.h +++ b/sys/sys/systm.h @@ -37,7 +37,7 @@ * * @(#)systm.h 8.7 (Berkeley) 3/29/95 * $FreeBSD: src/sys/sys/systm.h,v 1.111.2.18 2002/12/17 18:04:02 sam Exp $ - * $DragonFly: src/sys/sys/systm.h,v 1.59 2007/01/07 08:37:37 dillon Exp $ + * $DragonFly: src/sys/sys/systm.h,v 1.60 2007/01/08 03:33:43 dillon Exp $ */ #ifndef _SYS_SYSTM_H_ @@ -126,6 +126,7 @@ struct thread; struct trapframe; struct user; struct vmspace; +struct savetls; void Debugger (const char *msg); void backtrace(void); @@ -139,12 +140,14 @@ void *hashinit (int count, struct malloc_type *type, u_long *hashmask); void *phashinit (int count, struct malloc_type *type, u_long *nentries); int cpu_sanitize_frame (struct trapframe *); +int cpu_sanitize_tls (struct savetls *); void cpu_halt (void); void cpu_reset (void); void cpu_boot (int); void cpu_rootconf (void); void cpu_vmspace_alloc(struct vmspace *); void cpu_vmspace_free(struct vmspace *); +void set_user_TLS(void); vm_paddr_t kvtop(void *addr); int is_physical_memory (vm_offset_t addr); diff --git a/sys/sys/sysunion.h b/sys/sys/sysunion.h index 71d05c36ad..b2cb7dc3bb 100644 --- a/sys/sys/sysunion.h +++ b/sys/sys/sysunion.h @@ -2,8 +2,8 @@ * Union of syscall args for messaging. * * DO NOT EDIT-- this file is automatically generated. - * $DragonFly: src/sys/sys/sysunion.h,v 1.45 2007/01/06 01:46:44 dillon Exp $ - * created from DragonFly: src/sys/kern/syscalls.master,v 1.45 2006/10/10 15:43:14 dillon Exp + * $DragonFly: src/sys/sys/sysunion.h,v 1.46 2007/01/08 03:33:43 dillon Exp $ + * created from DragonFly: src/sys/kern/syscalls.master,v 1.46 2007/01/06 01:46:40 dillon Exp */ union sysunion { diff --git a/sys/sys/vkernel.h b/sys/sys/vkernel.h index 368097d969..a948287421 100644 --- a/sys/sys/vkernel.h +++ b/sys/sys/vkernel.h @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/sys/vkernel.h,v 1.6 2006/12/31 03:52:46 dillon Exp $ + * $DragonFly: src/sys/sys/vkernel.h,v 1.7 2007/01/08 03:33:43 dillon Exp $ */ #ifndef _SYS_VKERNEL_H_ @@ -57,6 +57,9 @@ #ifndef _MACHINE_FRAME_H_ #include #endif +#ifndef _MACHINE_VFRAME_H_ +#include +#endif struct vmspace_rb_tree; struct vmspace_entry; @@ -69,8 +72,10 @@ RB_PROTOTYPE(vmspace_rb_tree, vmspace_entry, rb_entry, rb_vmspace_compare); */ struct vkernel { struct vmspace *vk_save_vmspace; /* saved VM space */ - struct trapframe vk_save_frame; /* saved trap frame */ - struct trapframe *vk_user_frame; /* copyback to user process */ + struct trapframe vk_save_trapframe; /* swapped context */ + struct vextframe vk_save_vextframe; + struct trapframe *vk_user_trapframe; /* copyback to vkernel */ + struct vextframe *vk_user_vextframe; struct vkernel_common *vk_common; /* shared data */ struct vmspace_entry *vk_current; }; diff --git a/sys/sys/vmspace.h b/sys/sys/vmspace.h index 70aadd484f..c991e61612 100644 --- a/sys/sys/vmspace.h +++ b/sys/sys/vmspace.h @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/sys/vmspace.h,v 1.5 2007/01/07 05:54:01 dillon Exp $ + * $DragonFly: src/sys/sys/vmspace.h,v 1.6 2007/01/08 03:33:43 dillon Exp $ */ /* * VMSPACE - Virtualized Environment control from user mode. The VMSPACE @@ -46,6 +46,9 @@ #include #endif +struct trapframe; +struct vextframe; + #define VMSPACE_CTL_RUN 1 #define VMSPACE_PAGEFAULT 1 @@ -55,7 +58,8 @@ int vmspace_create(void *, int, void *); int vmspace_destroy(void *); -int vmspace_ctl(void *, int, void *, int, int); +int vmspace_ctl (void *, int, struct trapframe *, struct vextframe *); + void *vmspace_mmap(void *, void *, size_t, int, int, int, off_t); int vmspace_munmap(void *, void *, size_t); int vmspace_mcontrol(void *, void *, size_t, int, off_t); diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c index e0a3af5dd6..a86d31d1c3 100644 --- a/sys/vm/vm_fault.c +++ b/sys/vm/vm_fault.c @@ -67,7 +67,7 @@ * rights to redistribute these changes. * * $FreeBSD: src/sys/vm/vm_fault.c,v 1.108.2.8 2002/02/26 05:49:27 silby Exp $ - * $DragonFly: src/sys/vm/vm_fault.c,v 1.35 2007/01/06 22:35:47 dillon Exp $ + * $DragonFly: src/sys/vm/vm_fault.c,v 1.36 2007/01/08 03:33:43 dillon Exp $ */ /* @@ -124,7 +124,7 @@ struct faultstate { }; static int vm_fault_object(struct faultstate *, vm_pindex_t, vm_prot_t); -static int vm_fault_vpagetable(struct faultstate *, vm_pindex_t *, vpte_t); +static int vm_fault_vpagetable(struct faultstate *, vm_pindex_t *, vpte_t, int); static int vm_fault_additional_pages (vm_page_t, int, int, vm_page_t *, int *); static int vm_fault_ratelimit(struct vmspace *); @@ -234,9 +234,10 @@ RetryFault: * On success fs.map is left read-locked and various other fields * are initialized but not otherwise referenced or locked. * - * NOTE! vm_map_lookup will upgrade the fault_type to VM_FAULT_WRITE - * if the map entry is a virtual page table and also writable, - * so we can set the 'A'accessed bit in the virtual page table entry. + * NOTE! vm_map_lookup will try to upgrade the fault_type to + * VM_FAULT_WRITE if the map entry is a virtual page table and also + * writable, so we can set the 'A'accessed bit in the virtual page + * table entry. */ fs.map = map; result = vm_map_lookup(&fs.map, vaddr, fault_type, @@ -334,7 +335,8 @@ RetryFault: */ if (fs.entry->maptype == VM_MAPTYPE_VPAGETABLE) { result = vm_fault_vpagetable(&fs, &first_pindex, - fs.entry->aux.master_pde); + fs.entry->aux.master_pde, + fault_type); if (result == KERN_TRY_AGAIN) goto RetryFault; if (result != KERN_SUCCESS) @@ -347,6 +349,12 @@ RetryFault: * data. If it succeeds everything remains locked and fs->object * will have an additinal PIP count if it is not equal to * fs->first_object + * + * vm_fault_object will set fs->prot for the pmap operation. It is + * allowed to set VM_PROT_WRITE if fault_type == VM_PROT_READ if the + * page can be safely written. However, it will force a read-only + * mapping for a read fault if the memory is managed by a virtual + * page table. */ result = vm_fault_object(&fs, first_pindex, fault_type); @@ -506,7 +514,8 @@ RetryFault: */ if (fs.entry->maptype == VM_MAPTYPE_VPAGETABLE) { result = vm_fault_vpagetable(&fs, &first_pindex, - fs.entry->aux.master_pde); + fs.entry->aux.master_pde, + fault_type); if (result == KERN_TRY_AGAIN) goto RetryFault; if (result != KERN_SUCCESS) { @@ -580,25 +589,43 @@ RetryFault: */ static int -vm_fault_vpagetable(struct faultstate *fs, vm_pindex_t *pindex, vpte_t vpte) +vm_fault_vpagetable(struct faultstate *fs, vm_pindex_t *pindex, + vpte_t vpte, int fault_type) { struct sf_buf *sf; int vshift = 32 - PAGE_SHIFT; /* page index bits remaining */ int result = KERN_SUCCESS; + vpte_t *ptep; for (;;) { + /* + * We cannot proceed if the vpte is not valid, not readable + * for a read fault, or not writable for a write fault. + */ if ((vpte & VPTE_V) == 0) { unlock_and_deallocate(fs); return (KERN_FAILURE); } + if ((fault_type & VM_PROT_READ) && (vpte & VPTE_R) == 0) { + unlock_and_deallocate(fs); + return (KERN_FAILURE); + } + if ((fault_type & VM_PROT_WRITE) && (vpte & VPTE_W) == 0) { + unlock_and_deallocate(fs); + return (KERN_FAILURE); + } if ((vpte & VPTE_PS) || vshift == 0) break; KKASSERT(vshift >= VPTE_PAGE_BITS); /* - * Get the page table page + * Get the page table page. Nominally we only read the page + * table, but since we are actively setting VPTE_M and VPTE_A, + * tell vm_fault_object() that we are writing it. + * + * There is currently no real need to optimize this. */ - result = vm_fault_object(fs, vpte >> PAGE_SHIFT, VM_PROT_READ); + result = vm_fault_object(fs, vpte >> PAGE_SHIFT, VM_PROT_WRITE); if (result != KERN_SUCCESS) return (result); @@ -608,8 +635,32 @@ vm_fault_vpagetable(struct faultstate *fs, vm_pindex_t *pindex, vpte_t vpte) */ vshift -= VPTE_PAGE_BITS; sf = sf_buf_alloc(fs->m, SFB_CPUPRIVATE); - vpte = *((vpte_t *)sf_buf_kva(sf) + - ((*pindex >> vshift) & VPTE_PAGE_MASK)); + ptep = ((vpte_t *)sf_buf_kva(sf) + + ((*pindex >> vshift) & VPTE_PAGE_MASK)); + vpte = *ptep; + + /* + * Page table write-back. If the vpte is valid for the + * requested operation, do a write-back to the page table. + * + * XXX VPTE_M is not set properly for page directory pages. + * It doesn't get set in the page directory if the page table + * is modified during a read access. + */ + if ((fault_type & VM_PROT_WRITE) && (vpte & VPTE_V) && + (vpte & VPTE_W)) { + if ((vpte & (VPTE_M|VPTE_A)) == 0) { + atomic_set_int(ptep, VPTE_M|VPTE_A); + vm_page_dirty(fs->m); + } + } + if ((fault_type & VM_PROT_READ) && (vpte & VPTE_V) && + (vpte & VPTE_R)) { + if ((vpte & VPTE_A) == 0) { + atomic_set_int(ptep, VPTE_A); + vm_page_dirty(fs->m); + } + } sf_buf_free(sf); vm_page_flag_set(fs->m, PG_REFERENCED); vm_page_activate(fs->m); @@ -651,6 +702,25 @@ vm_fault_object(struct faultstate *fs, fs->object = fs->first_object; pindex = first_pindex; + /* + * If a read fault occurs we try to make the page writable if + * possible. There are three cases where we cannot make the + * page mapping writable: + * + * (1) The mapping is read-only or the VM object is read-only, + * fs->prot above will simply not have VM_PROT_WRITE SET. + * + * (2) If the mapping is a virtual page table we need to be able + * to detect writes so we can set VPTE_M. + * + * (3) If the VM page is read-only or copy-on-write, upgrading would + * just result in an unnecessary COW fault. + */ + if (fault_type == VM_PROT_READ && + fs->entry->maptype == VM_MAPTYPE_VPAGETABLE) { + fs->prot &= ~VM_PROT_WRITE; + } + for (;;) { /* * If the object is dead, we stop here diff --git a/sys/vm/vm_vmspace.c b/sys/vm/vm_vmspace.c index 8667906c39..b89f03a2ec 100644 --- a/sys/vm/vm_vmspace.c +++ b/sys/vm/vm_vmspace.c @@ -31,8 +31,9 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vm/vm_vmspace.c,v 1.7 2006/12/28 18:29:08 dillon Exp $ + * $DragonFly: src/sys/vm/vm_vmspace.c,v 1.8 2007/01/08 03:33:43 dillon Exp $ */ +#include "opt_ddb.h" #include #include @@ -49,6 +50,7 @@ #include #include +#include #include @@ -132,7 +134,8 @@ sys_vmspace_destroy(struct vmspace_destroy_args *uap) } /* - * vmspace_ctl (void *id, int cmd, void *ctx, int ctx_bytes, int timeout_us) + * vmspace_ctl (void *id, int cmd, struct trapframe *tframe, + * struct vextframe *vframe); * * Transfer control to a VMSPACE. Control is returned after the specified * number of microseconds or if a page fault, signal, trap, or system call @@ -166,13 +169,23 @@ sys_vmspace_ctl(struct vmspace_ctl_args *uap) framesz = sizeof(struct trapframe); vk->vk_current = ve; vk->vk_save_vmspace = p->p_vmspace; - vk->vk_user_frame = uap->ctx; - bcopy(uap->sysmsg_frame, &vk->vk_save_frame, framesz); - error = copyin(uap->ctx, uap->sysmsg_frame, framesz); + vk->vk_user_trapframe = uap->tframe; + vk->vk_user_vextframe = uap->vframe; + bcopy(uap->sysmsg_frame, &vk->vk_save_trapframe, framesz); + bcopy(&curthread->td_tls, &vk->vk_save_vextframe.vx_tls, + sizeof(vk->vk_save_vextframe.vx_tls)); + error = copyin(uap->tframe, uap->sysmsg_frame, framesz); + if (error == 0) + error = copyin(&uap->vframe->vx_tls, &curthread->td_tls, sizeof(struct savetls)); if (error == 0) error = cpu_sanitize_frame(uap->sysmsg_frame); + if (error == 0) + error = cpu_sanitize_tls(&curthread->td_tls); if (error) { - bcopy(&vk->vk_save_frame, uap->sysmsg_frame, framesz); + bcopy(&vk->vk_save_trapframe, uap->sysmsg_frame, framesz); + bcopy(&vk->vk_save_vextframe.vx_tls, &curthread->td_tls, + sizeof(vk->vk_save_vextframe.vx_tls)); + set_user_TLS(); vk->vk_current = NULL; vk->vk_save_vmspace = NULL; --ve->refs; @@ -180,6 +193,7 @@ sys_vmspace_ctl(struct vmspace_ctl_args *uap) pmap_deactivate(p); p->p_vmspace = ve->vmspace; pmap_activate(p); + set_user_TLS(); error = EJUSTRETURN; } break; @@ -443,9 +457,16 @@ vkernel_exit(struct proc *p) /* * Restore the original VM context if we are killed while running * a different one. + * + * This isn't supposed to happen. What is supposed to happen is + * that the process should enter vkernel_trap() before the handling + * the signal. */ if ((ve = vk->vk_current) != NULL) { - kprintf("killed with active VC\n"); + kprintf("Killed with active VC, notify kernel list\n"); +#ifdef DDB + db_print_backtrace(); +#endif vk->vk_current = NULL; pmap_deactivate(p); p->p_vmspace = vk->vk_save_vmspace; @@ -483,10 +504,6 @@ vkernel_trap(struct proc *p, struct trapframe *frame) struct vkernel *vk; int error; - kprintf("trap for vkernel type %d wm=%d\n", - frame->tf_trapno & 0x7FFFFFFF, - ((frame->tf_trapno & 0x80000000) ? 1 : 0)); - /* * Which vmspace entry was running? */ @@ -506,12 +523,18 @@ vkernel_trap(struct proc *p, struct trapframe *frame) --ve->refs; /* - * Copy the trapframe to the virtual kernel's userspace, then - * restore virtual kernel's original syscall trap frame so we - * can 'return' from the system call that ran the custom VM space. + * Copy the emulated process frame to the virtual kernel process. + * The emulated process cannot change TLS descriptors so don't + * bother saving them, we already have a copy. + * + * Restore the virtual kernel's saved context so the virtual kernel + * process can resume. */ - error = copyout(frame, vk->vk_user_frame, sizeof(*frame)); - bcopy(&vk->vk_save_frame, frame, sizeof(*frame)); + error = copyout(frame, vk->vk_user_trapframe, sizeof(*frame)); + bcopy(&vk->vk_save_trapframe, frame, sizeof(*frame)); + bcopy(&vk->vk_save_vextframe.vx_tls, &curthread->td_tls, + sizeof(vk->vk_save_vextframe.vx_tls)); + set_user_TLS(); return(error); } -- 2.41.0