From 4a22e89306157337692c1934727ae876ade60c0a Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Fri, 20 Oct 2006 17:02:19 +0000 Subject: [PATCH] Add a ton of infrastructure for VKERNEL support. Add code for intercepting traps and system calls, for switching to and executing a foreign VM space, and for accessing trap frames. --- sys/i386/i386/machdep.c | 23 ++- sys/i386/i386/pmap.c | 54 ++++--- sys/i386/i386/trap.c | 56 ++++++- sys/i386/i386/vm_machdep.c | 3 +- sys/kern/kern_exec.c | 8 +- sys/kern/kern_exit.c | 8 +- sys/kern/kern_fork.c | 7 +- sys/kern/kern_shutdown.c | 8 +- sys/platform/pc32/i386/machdep.c | 23 ++- sys/platform/pc32/i386/pmap.c | 54 ++++--- sys/platform/pc32/i386/trap.c | 56 ++++++- sys/platform/pc32/i386/vm_machdep.c | 3 +- sys/sys/sysmsg.h | 5 +- sys/sys/systm.h | 5 +- sys/sys/vkernel.h | 35 +++- sys/sys/vmspace.h | 4 +- sys/vm/pmap.h | 3 +- sys/vm/vm_vmspace.c | 242 +++++++++++++++++++++++----- 18 files changed, 477 insertions(+), 120 deletions(-) diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c index cc54428e95..4c734b0a60 100644 --- a/sys/i386/i386/machdep.c +++ b/sys/i386/i386/machdep.c @@ -36,7 +36,7 @@ * * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 * $FreeBSD: src/sys/i386/i386/machdep.c,v 1.385.2.30 2003/05/31 08:48:05 alc Exp $ - * $DragonFly: src/sys/i386/i386/Attic/machdep.c,v 1.98 2006/09/19 11:47:35 corecode Exp $ + * $DragonFly: src/sys/i386/i386/Attic/machdep.c,v 1.99 2006/10/20 17:02:19 dillon Exp $ */ #include "use_apm.h" @@ -523,6 +523,27 @@ sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code) regs->tf_ss = _udatasel; } +/* + * Sanitize the trapframe for a virtual kernel passing control to a custom + * VM context. + * + * Allow userland to set or maintain PSL_RF, the resume flag. This flag + * basically controls whether the return PC should skip the first instruction + * (as in an explicit system call) or re-execute it (as in an exception). + */ +int +cpu_sanitize_frame(struct trapframe *frame) +{ + frame->tf_cs = _ucodesel; + frame->tf_ds = _udatasel; + frame->tf_es = _udatasel; + frame->tf_fs = _udatasel; + frame->tf_ss = _udatasel; + frame->tf_eflags &= (PSL_USER | PSL_RF); + frame->tf_eflags |= PSL_RESERVED_DEFAULT | PSL_I; + return(0); +} + /* * sigreturn(ucontext_t *sigcntxp) * diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c index fff68af224..18880a8817 100644 --- a/sys/i386/i386/pmap.c +++ b/sys/i386/i386/pmap.c @@ -40,7 +40,7 @@ * * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 * $FreeBSD: src/sys/i386/i386/pmap.c,v 1.250.2.18 2002/03/06 22:48:53 silby Exp $ - * $DragonFly: src/sys/i386/i386/Attic/pmap.c,v 1.58 2006/09/30 20:23:05 swildner Exp $ + * $DragonFly: src/sys/i386/i386/Attic/pmap.c,v 1.59 2006/10/20 17:02:19 dillon Exp $ */ /* @@ -2812,7 +2812,6 @@ pmap_page_exists_quick(pmap_t pmap, vm_page_t m) return (FALSE); } -#define PMAP_REMOVE_PAGES_CURPROC_ONLY /* * Remove all pages from specified address space * this aids process exit speeds. Also, this code @@ -2828,36 +2827,34 @@ pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) pv_entry_t pv, npv; vm_page_t m; pmap_inval_info info; + int iscurrentpmap; -#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY - if (!curproc || (pmap != vmspace_pmap(curproc->p_vmspace))) { - printf("warning: pmap_remove_pages called with non-current pmap\n"); - return; - } -#endif + if (curproc && pmap == vmspace_pmap(curproc->p_vmspace)) + iscurrentpmap = 1; + else + iscurrentpmap = 0; pmap_inval_init(&info); crit_enter(); - for(pv = TAILQ_FIRST(&pmap->pm_pvlist); - pv; - pv = npv) { + for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) { if (pv->pv_va >= eva || pv->pv_va < sva) { npv = TAILQ_NEXT(pv, pv_plist); continue; } -#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY - pte = (unsigned *)vtopte(pv->pv_va); -#else - pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); -#endif - pmap_inval_add(&info, pv->pv_pmap, pv->pv_va); + if (iscurrentpmap) + pte = (unsigned *)vtopte(pv->pv_va); + else + pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); + if (pmap->pm_active) + pmap_inval_add(&info, pv->pv_pmap, pv->pv_va); tpte = *pte; -/* - * We cannot remove wired pages from a process' mapping at this time - */ + /* + * We cannot remove wired pages from a process' mapping + * at this time + */ if (tpte & PG_W) { npv = TAILQ_NEXT(pv, pv_plist); continue; @@ -3282,6 +3279,23 @@ pmap_activate(struct proc *p) load_cr3(p->p_thread->td_pcb->pcb_cr3); } +void +pmap_deactivate(struct proc *p) +{ + pmap_t pmap; + + pmap = vmspace_pmap(p->p_vmspace); +#if defined(SMP) + atomic_clear_int(&pmap->pm_active, 1 << mycpu->gd_cpuid); +#else + pmap->pm_active &= ~1; +#endif + /* + * XXX - note we do not adjust %cr3. The caller is expected to + * activate a new pmap or do a thread-exit. + */ +} + vm_offset_t pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size) { diff --git a/sys/i386/i386/trap.c b/sys/i386/i386/trap.c index 551fb7d5fa..ecb0b59a8b 100644 --- a/sys/i386/i386/trap.c +++ b/sys/i386/i386/trap.c @@ -36,7 +36,7 @@ * * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 * $FreeBSD: src/sys/i386/i386/trap.c,v 1.147.2.11 2003/02/27 19:09:59 luoqi Exp $ - * $DragonFly: src/sys/i386/i386/Attic/trap.c,v 1.81 2006/09/19 11:47:35 corecode Exp $ + * $DragonFly: src/sys/i386/i386/Attic/trap.c,v 1.82 2006/10/20 17:02:19 dillon Exp $ */ /* @@ -69,6 +69,7 @@ #include #endif #include +#include #include #include @@ -565,6 +566,15 @@ restart: goto out; ucode = T_PAGEFLT; + + /* + * The code is lost because tf_err is overwritten + * with the fault address. Store it in the upper + * 16 bits of tf_trapno for vkernel consumption. + */ + if (p->p_vkernel && p->p_vkernel->vk_current) { + frame.tf_trapno |= (code << 16); + } break; case T_DIVIDE: /* integer divide fault */ @@ -833,7 +843,19 @@ kernel_trap: goto out2; } - /* Translate fault for emulators (e.g. Linux) */ + /* + * Virtual kernel intercept - if the fault is directly related to a + * VM context managed by a virtual kernel then let the virtual kernel + * handle it. + */ + if (p->p_vkernel && p->p_vkernel->vk_current) { + vkernel_trap(p, &frame); + goto out; + } + + /* + * Translate fault for emulators (e.g. Linux) + */ if (*p->p_sysent->sv_transtrap) i = (*p->p_sysent->sv_transtrap)(i, type); @@ -1301,12 +1323,33 @@ syscall2(struct trapframe frame) #endif userenter(td); /* lazy raise our priority */ + /* + * Misc + */ sticks = (int)td->td_sticks; + orig_tf_eflags = frame.tf_eflags; + /* + * Virtual kernel intercept - if a VM context managed by a virtual + * kernel issues a system call the virtual kernel handles it, not us. + * Restore the virtual kernel context and return from its system + * call. The current frame is copied out to the virtual kernel. + */ + if (p->p_vkernel && p->p_vkernel->vk_current) { + error = vkernel_trap(p, &frame); + frame.tf_eax = error; + if (error) + frame.tf_eflags |= PSL_C; + error = EJUSTRETURN; + goto out; + } + + /* + * Get the system call parameters and account for time + */ lp->lwp_md.md_regs = &frame; params = (caddr_t)frame.tf_esp + sizeof(int); code = frame.tf_eax; - orig_tf_eflags = frame.tf_eflags; if (p->p_sysent->sv_prepsyscall) { (*p->p_sysent->sv_prepsyscall)( @@ -1375,6 +1418,12 @@ syscall2(struct trapframe frame) args.sysmsg_fds[0] = 0; args.sysmsg_fds[1] = frame.tf_edx; + /* + * The syscall might manipulate the trap frame. If it does it + * will probably return EJUSTRETURN. + */ + args.sysmsg_frame = &frame; + STOPEVENT(p, S_SCE, narg); /* MP aware */ #ifdef SMP @@ -1389,6 +1438,7 @@ syscall2(struct trapframe frame) error = (*callp->sy_call)(&args); +out: /* * MP SAFE (we may or may not have the MP lock at this point) */ diff --git a/sys/i386/i386/vm_machdep.c b/sys/i386/i386/vm_machdep.c index fb2b9e4dfe..3d6f268c8e 100644 --- a/sys/i386/i386/vm_machdep.c +++ b/sys/i386/i386/vm_machdep.c @@ -39,7 +39,7 @@ * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91 * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$ * $FreeBSD: src/sys/i386/i386/vm_machdep.c,v 1.132.2.9 2003/01/25 19:02:23 dillon Exp $ - * $DragonFly: src/sys/i386/i386/Attic/vm_machdep.c,v 1.46 2006/09/19 11:47:35 corecode Exp $ + * $DragonFly: src/sys/i386/i386/Attic/vm_machdep.c,v 1.47 2006/10/20 17:02:19 dillon Exp $ */ #include "use_npx.h" @@ -65,6 +65,7 @@ #include #include #include +#include #include /* npxthread */ #include /* SWI_ */ diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c index 8b4164ddb7..55d3020cfc 100644 --- a/sys/kern/kern_exec.c +++ b/sys/kern/kern_exec.c @@ -24,7 +24,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/kern/kern_exec.c,v 1.107.2.15 2002/07/30 15:40:46 nectar Exp $ - * $DragonFly: src/sys/kern/kern_exec.c,v 1.46 2006/09/19 11:47:35 corecode Exp $ + * $DragonFly: src/sys/kern/kern_exec.c,v 1.47 2006/10/20 17:02:16 dillon Exp $ */ #include @@ -331,10 +331,8 @@ interpret: * inherited by an exec. This also allows a virtual kernel * to fork/exec unrelated applications. */ - if (p->p_vkernel) { - vkernel_drop(p->p_vkernel); - p->p_vkernel = NULL; - } + if (p->p_vkernel) + vkernel_exit(p); /* Stop profiling */ stopprofclock(p); diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c index 52ad9afb33..1dea481987 100644 --- a/sys/kern/kern_exit.c +++ b/sys/kern/kern_exit.c @@ -37,7 +37,7 @@ * * @(#)kern_exit.c 8.7 (Berkeley) 2/12/94 * $FreeBSD: src/sys/kern/kern_exit.c,v 1.92.2.11 2003/01/13 22:51:16 dillon Exp $ - * $DragonFly: src/sys/kern/kern_exit.c,v 1.63 2006/09/19 11:47:35 corecode Exp $ + * $DragonFly: src/sys/kern/kern_exit.c,v 1.64 2006/10/20 17:02:16 dillon Exp $ */ #include "opt_compat.h" @@ -217,10 +217,8 @@ exit1(int rv) upc_release(vm, &p->p_lwp); /* clean up data related to virtual kernel operation */ - if (p->p_vkernel) { - vkernel_drop(p->p_vkernel); - p->p_vkernel = NULL; - } + if (p->p_vkernel) + vkernel_exit(p); /* * Release user portion of address space. diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c index 537978abec..fb096b89b6 100644 --- a/sys/kern/kern_fork.c +++ b/sys/kern/kern_fork.c @@ -37,7 +37,7 @@ * * @(#)kern_fork.c 8.6 (Berkeley) 4/8/94 * $FreeBSD: src/sys/kern/kern_fork.c,v 1.72.2.14 2003/06/26 04:15:10 silby Exp $ - * $DragonFly: src/sys/kern/kern_fork.c,v 1.58 2006/10/10 15:40:46 dillon Exp $ + * $DragonFly: src/sys/kern/kern_fork.c,v 1.59 2006/10/20 17:02:16 dillon Exp $ */ #include "opt_ktrace.h" @@ -423,8 +423,9 @@ fork1(struct lwp *lp1, int flags, struct proc **procp) * Inherit the virtual kernel structure (allows a virtual kernel * to fork to simulate multiple cpus). */ - if ((p2->p_vkernel = p1->p_vkernel) != NULL) - vkernel_hold(p2->p_vkernel); + p2->p_vkernel = NULL; + if (p1->p_vkernel) + vkernel_inherit(p1, p2); /* * Once we are on a pglist we may receive signals. XXX we might diff --git a/sys/kern/kern_shutdown.c b/sys/kern/kern_shutdown.c index 240df976cd..db7ee4fbaa 100644 --- a/sys/kern/kern_shutdown.c +++ b/sys/kern/kern_shutdown.c @@ -37,7 +37,7 @@ * * @(#)kern_shutdown.c 8.3 (Berkeley) 1/21/94 * $FreeBSD: src/sys/kern/kern_shutdown.c,v 1.72.2.12 2002/02/21 19:15:10 dillon Exp $ - * $DragonFly: src/sys/kern/kern_shutdown.c,v 1.38 2006/09/24 19:43:55 dillon Exp $ + * $DragonFly: src/sys/kern/kern_shutdown.c,v 1.39 2006/10/20 17:02:16 dillon Exp $ */ #include "opt_ddb.h" @@ -498,10 +498,8 @@ shutdown_cleanup_proc(struct proc *p) fdp->fd_njdir = NULL; } } - if (p->p_vkernel) { - vkernel_drop(p->p_vkernel); - p->p_vkernel = NULL; - } + if (p->p_vkernel) + vkernel_exit(p); if (p->p_textvp) { vrele(p->p_textvp); p->p_textvp = NULL; diff --git a/sys/platform/pc32/i386/machdep.c b/sys/platform/pc32/i386/machdep.c index 8b81ade18a..488b2800a7 100644 --- a/sys/platform/pc32/i386/machdep.c +++ b/sys/platform/pc32/i386/machdep.c @@ -36,7 +36,7 @@ * * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 * $FreeBSD: src/sys/i386/i386/machdep.c,v 1.385.2.30 2003/05/31 08:48:05 alc Exp $ - * $DragonFly: src/sys/platform/pc32/i386/machdep.c,v 1.98 2006/09/19 11:47:35 corecode Exp $ + * $DragonFly: src/sys/platform/pc32/i386/machdep.c,v 1.99 2006/10/20 17:02:19 dillon Exp $ */ #include "use_apm.h" @@ -523,6 +523,27 @@ sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code) regs->tf_ss = _udatasel; } +/* + * Sanitize the trapframe for a virtual kernel passing control to a custom + * VM context. + * + * Allow userland to set or maintain PSL_RF, the resume flag. This flag + * basically controls whether the return PC should skip the first instruction + * (as in an explicit system call) or re-execute it (as in an exception). + */ +int +cpu_sanitize_frame(struct trapframe *frame) +{ + frame->tf_cs = _ucodesel; + frame->tf_ds = _udatasel; + frame->tf_es = _udatasel; + frame->tf_fs = _udatasel; + frame->tf_ss = _udatasel; + frame->tf_eflags &= (PSL_USER | PSL_RF); + frame->tf_eflags |= PSL_RESERVED_DEFAULT | PSL_I; + return(0); +} + /* * sigreturn(ucontext_t *sigcntxp) * diff --git a/sys/platform/pc32/i386/pmap.c b/sys/platform/pc32/i386/pmap.c index 7eca8380a0..d8f43aa374 100644 --- a/sys/platform/pc32/i386/pmap.c +++ b/sys/platform/pc32/i386/pmap.c @@ -40,7 +40,7 @@ * * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 * $FreeBSD: src/sys/i386/i386/pmap.c,v 1.250.2.18 2002/03/06 22:48:53 silby Exp $ - * $DragonFly: src/sys/platform/pc32/i386/pmap.c,v 1.58 2006/09/30 20:23:05 swildner Exp $ + * $DragonFly: src/sys/platform/pc32/i386/pmap.c,v 1.59 2006/10/20 17:02:19 dillon Exp $ */ /* @@ -2812,7 +2812,6 @@ pmap_page_exists_quick(pmap_t pmap, vm_page_t m) return (FALSE); } -#define PMAP_REMOVE_PAGES_CURPROC_ONLY /* * Remove all pages from specified address space * this aids process exit speeds. Also, this code @@ -2828,36 +2827,34 @@ pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) pv_entry_t pv, npv; vm_page_t m; pmap_inval_info info; + int iscurrentpmap; -#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY - if (!curproc || (pmap != vmspace_pmap(curproc->p_vmspace))) { - printf("warning: pmap_remove_pages called with non-current pmap\n"); - return; - } -#endif + if (curproc && pmap == vmspace_pmap(curproc->p_vmspace)) + iscurrentpmap = 1; + else + iscurrentpmap = 0; pmap_inval_init(&info); crit_enter(); - for(pv = TAILQ_FIRST(&pmap->pm_pvlist); - pv; - pv = npv) { + for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) { if (pv->pv_va >= eva || pv->pv_va < sva) { npv = TAILQ_NEXT(pv, pv_plist); continue; } -#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY - pte = (unsigned *)vtopte(pv->pv_va); -#else - pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); -#endif - pmap_inval_add(&info, pv->pv_pmap, pv->pv_va); + if (iscurrentpmap) + pte = (unsigned *)vtopte(pv->pv_va); + else + pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); + if (pmap->pm_active) + pmap_inval_add(&info, pv->pv_pmap, pv->pv_va); tpte = *pte; -/* - * We cannot remove wired pages from a process' mapping at this time - */ + /* + * We cannot remove wired pages from a process' mapping + * at this time + */ if (tpte & PG_W) { npv = TAILQ_NEXT(pv, pv_plist); continue; @@ -3282,6 +3279,23 @@ pmap_activate(struct proc *p) load_cr3(p->p_thread->td_pcb->pcb_cr3); } +void +pmap_deactivate(struct proc *p) +{ + pmap_t pmap; + + pmap = vmspace_pmap(p->p_vmspace); +#if defined(SMP) + atomic_clear_int(&pmap->pm_active, 1 << mycpu->gd_cpuid); +#else + pmap->pm_active &= ~1; +#endif + /* + * XXX - note we do not adjust %cr3. The caller is expected to + * activate a new pmap or do a thread-exit. + */ +} + vm_offset_t pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size) { diff --git a/sys/platform/pc32/i386/trap.c b/sys/platform/pc32/i386/trap.c index 756388ffb6..f977d4be4e 100644 --- a/sys/platform/pc32/i386/trap.c +++ b/sys/platform/pc32/i386/trap.c @@ -36,7 +36,7 @@ * * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 * $FreeBSD: src/sys/i386/i386/trap.c,v 1.147.2.11 2003/02/27 19:09:59 luoqi Exp $ - * $DragonFly: src/sys/platform/pc32/i386/trap.c,v 1.81 2006/09/19 11:47:35 corecode Exp $ + * $DragonFly: src/sys/platform/pc32/i386/trap.c,v 1.82 2006/10/20 17:02:19 dillon Exp $ */ /* @@ -69,6 +69,7 @@ #include #endif #include +#include #include #include @@ -565,6 +566,15 @@ restart: goto out; ucode = T_PAGEFLT; + + /* + * The code is lost because tf_err is overwritten + * with the fault address. Store it in the upper + * 16 bits of tf_trapno for vkernel consumption. + */ + if (p->p_vkernel && p->p_vkernel->vk_current) { + frame.tf_trapno |= (code << 16); + } break; case T_DIVIDE: /* integer divide fault */ @@ -833,7 +843,19 @@ kernel_trap: goto out2; } - /* Translate fault for emulators (e.g. Linux) */ + /* + * Virtual kernel intercept - if the fault is directly related to a + * VM context managed by a virtual kernel then let the virtual kernel + * handle it. + */ + if (p->p_vkernel && p->p_vkernel->vk_current) { + vkernel_trap(p, &frame); + goto out; + } + + /* + * Translate fault for emulators (e.g. Linux) + */ if (*p->p_sysent->sv_transtrap) i = (*p->p_sysent->sv_transtrap)(i, type); @@ -1301,12 +1323,33 @@ syscall2(struct trapframe frame) #endif userenter(td); /* lazy raise our priority */ + /* + * Misc + */ sticks = (int)td->td_sticks; + orig_tf_eflags = frame.tf_eflags; + /* + * Virtual kernel intercept - if a VM context managed by a virtual + * kernel issues a system call the virtual kernel handles it, not us. + * Restore the virtual kernel context and return from its system + * call. The current frame is copied out to the virtual kernel. + */ + if (p->p_vkernel && p->p_vkernel->vk_current) { + error = vkernel_trap(p, &frame); + frame.tf_eax = error; + if (error) + frame.tf_eflags |= PSL_C; + error = EJUSTRETURN; + goto out; + } + + /* + * Get the system call parameters and account for time + */ lp->lwp_md.md_regs = &frame; params = (caddr_t)frame.tf_esp + sizeof(int); code = frame.tf_eax; - orig_tf_eflags = frame.tf_eflags; if (p->p_sysent->sv_prepsyscall) { (*p->p_sysent->sv_prepsyscall)( @@ -1375,6 +1418,12 @@ syscall2(struct trapframe frame) args.sysmsg_fds[0] = 0; args.sysmsg_fds[1] = frame.tf_edx; + /* + * The syscall might manipulate the trap frame. If it does it + * will probably return EJUSTRETURN. + */ + args.sysmsg_frame = &frame; + STOPEVENT(p, S_SCE, narg); /* MP aware */ #ifdef SMP @@ -1389,6 +1438,7 @@ syscall2(struct trapframe frame) error = (*callp->sy_call)(&args); +out: /* * MP SAFE (we may or may not have the MP lock at this point) */ diff --git a/sys/platform/pc32/i386/vm_machdep.c b/sys/platform/pc32/i386/vm_machdep.c index 1de2bad06c..76e570187b 100644 --- a/sys/platform/pc32/i386/vm_machdep.c +++ b/sys/platform/pc32/i386/vm_machdep.c @@ -39,7 +39,7 @@ * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91 * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$ * $FreeBSD: src/sys/i386/i386/vm_machdep.c,v 1.132.2.9 2003/01/25 19:02:23 dillon Exp $ - * $DragonFly: src/sys/platform/pc32/i386/vm_machdep.c,v 1.46 2006/09/19 11:47:35 corecode Exp $ + * $DragonFly: src/sys/platform/pc32/i386/vm_machdep.c,v 1.47 2006/10/20 17:02:19 dillon Exp $ */ #include "use_npx.h" @@ -65,6 +65,7 @@ #include #include #include +#include #include /* npxthread */ #include /* SWI_ */ diff --git a/sys/sys/sysmsg.h b/sys/sys/sysmsg.h index e39429d55f..cc59360926 100644 --- a/sys/sys/sysmsg.h +++ b/sys/sys/sysmsg.h @@ -1,7 +1,7 @@ /* * SYS/SYSMSG.H * - * $DragonFly: src/sys/sys/sysmsg.h,v 1.10 2006/06/07 03:02:11 dillon Exp $ + * $DragonFly: src/sys/sys/sysmsg.h,v 1.11 2006/10/20 17:02:13 dillon Exp $ */ #ifndef _SYS_SYSMSG_H_ @@ -30,6 +30,8 @@ struct sysmsg { __int64_t result64; /* 64 bit result */ __off_t offset; /* off_t result */ } sm_result; + struct trapframe *sm_frame; /* trapframe - saved user context */ + void *sm_unused; }; struct lwp; @@ -45,6 +47,7 @@ union sysunion; #define sysmsg_offset sysmsg.sm_result.offset #define sysmsg_result32 sysmsg.sm_result.result32 #define sysmsg_result64 sysmsg.sm_result.result64 +#define sysmsg_frame sysmsg.sm_frame #endif #endif diff --git a/sys/sys/systm.h b/sys/sys/systm.h index 80c96bc4c0..80c6a9c0c6 100644 --- a/sys/sys/systm.h +++ b/sys/sys/systm.h @@ -37,7 +37,7 @@ * * @(#)systm.h 8.7 (Berkeley) 3/29/95 * $FreeBSD: src/sys/sys/systm.h,v 1.111.2.18 2002/12/17 18:04:02 sam Exp $ - * $DragonFly: src/sys/sys/systm.h,v 1.42 2006/09/30 20:03:44 swildner Exp $ + * $DragonFly: src/sys/sys/systm.h,v 1.43 2006/10/20 17:02:13 dillon Exp $ */ #ifndef _SYS_SYSTM_H_ @@ -116,6 +116,7 @@ struct tty; struct uio; struct globaldata; struct thread; +struct trapframe; void Debugger (const char *msg); void backtrace(void); @@ -127,6 +128,8 @@ int ureadc (int, struct uio *); void *hashinit (int count, struct malloc_type *type, u_long *hashmask); void *phashinit (int count, struct malloc_type *type, u_long *nentries); +int cpu_sanitize_frame (struct trapframe *); + void cpu_boot (int); void cpu_rootconf (void); extern uint32_t crc32_tab[]; diff --git a/sys/sys/vkernel.h b/sys/sys/vkernel.h index 89c3013127..97a3193486 100644 --- a/sys/sys/vkernel.h +++ b/sys/sys/vkernel.h @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/sys/vkernel.h,v 1.3 2006/09/13 21:05:22 dillon Exp $ + * $DragonFly: src/sys/sys/vkernel.h,v 1.4 2006/10/20 17:02:13 dillon Exp $ */ #ifndef _SYS_VKERNEL_H_ @@ -54,28 +54,47 @@ #ifndef _SYS_SPINLOCK_H_ #include #endif +#ifndef _MACHINE_FRAME_H_ +#include +#endif struct vmspace_rb_tree; struct vmspace_entry; RB_PROTOTYPE(vmspace_rb_tree, vmspace_entry, rb_entry, rb_vmspace_compare); +/* + * Process operating as virtual kernels manage multiple VM spaces. The + * original VM space and trap context is saved in the process's vkernel + * structure. + */ struct vkernel { - RB_HEAD(vmspace_rb_tree, vmspace_entry) vk_root; - struct vmspace *vk_orig_vmspace; /* vkernel's vmspace */ - struct vmspace_entry *vk_vvmspace; /* selected vmspace */ - struct spinlock vk_spin; - int vk_refs; + struct vmspace *vk_save_vmspace; /* saved VM space */ + struct trapframe vk_save_frame; /* saved trap frame */ + struct trapframe *vk_user_frame; /* copyback to user process */ + struct vkernel_common *vk_common; /* shared data */ + struct vmspace_entry *vk_current; +}; + +struct vkernel_common { + RB_HEAD(vmspace_rb_tree, vmspace_entry) vc_root; + struct spinlock vc_spin; + int vc_refs; }; struct vmspace_entry { void *id; struct vmspace *vmspace; + int flags; + int refs; /* when vk_current */ RB_ENTRY(vmspace_entry) rb_entry; }; #ifdef _KERNEL -void vkernel_hold(struct vkernel *vk); -void vkernel_drop(struct vkernel *vk); + +void vkernel_inherit(struct proc *p1, struct proc *p2); +void vkernel_exit(struct proc *p); +int vkernel_trap(struct proc *p, struct trapframe *frame); + #endif #else diff --git a/sys/sys/vmspace.h b/sys/sys/vmspace.h index 0f5583df52..c93fff4418 100644 --- a/sys/sys/vmspace.h +++ b/sys/sys/vmspace.h @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/sys/vmspace.h,v 1.3 2006/10/10 15:43:15 dillon Exp $ + * $DragonFly: src/sys/sys/vmspace.h,v 1.4 2006/10/20 17:02:13 dillon Exp $ */ /* * VMSPACE - Virtualized Environment control from user mode. The VMSPACE @@ -58,7 +58,7 @@ int vmspace_create(void *, int, void *); int vmspace_destroy(void *); int vmspace_ctl(void *, int, void *, int, int); -int vmspace_mmap(void *, void *, size_t, int, int, int, off_t); +void *vmspace_mmap(void *, void *, size_t, int, int, int, off_t); int vmspace_munmap(void *, void *, size_t); int vmspace_mcontrol(void *, void *, size_t, int, off_t); ssize_t vmspace_pread(void *, void *, size_t, int, off_t); diff --git a/sys/vm/pmap.h b/sys/vm/pmap.h index 68eeba5b1d..dd9024a2a8 100644 --- a/sys/vm/pmap.h +++ b/sys/vm/pmap.h @@ -62,7 +62,7 @@ * rights to redistribute these changes. * * $FreeBSD: src/sys/vm/pmap.h,v 1.33.2.4 2002/03/06 22:44:24 silby Exp $ - * $DragonFly: src/sys/vm/pmap.h,v 1.18 2006/05/21 03:43:47 dillon Exp $ + * $DragonFly: src/sys/vm/pmap.h,v 1.19 2006/10/20 17:02:09 dillon Exp $ */ /* @@ -142,6 +142,7 @@ void pmap_init_proc (struct proc *p, struct thread *td); void pmap_init_thread (struct thread *td); struct thread *pmap_dispose_proc (struct proc *p); void pmap_activate (struct proc *p); +void pmap_deactivate (struct proc *p); vm_offset_t pmap_addr_hint (vm_object_t obj, vm_offset_t addr, vm_size_t size); void *pmap_kenter_temporary (vm_paddr_t pa, int i); void pmap_init2 (void); diff --git a/sys/vm/vm_vmspace.c b/sys/vm/vm_vmspace.c index 736fec7cc9..516c20f43e 100644 --- a/sys/vm/vm_vmspace.c +++ b/sys/vm/vm_vmspace.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vm/vm_vmspace.c,v 1.3 2006/10/10 15:43:16 dillon Exp $ + * $DragonFly: src/sys/vm/vm_vmspace.c,v 1.4 2006/10/20 17:02:09 dillon Exp $ */ #include @@ -44,11 +44,16 @@ #include #include #include +#include +#include #include #include -static struct vmspace_entry *vkernel_find_vmspace(struct vkernel *vk, void *id); +static struct vmspace_entry *vkernel_find_vmspace(struct vkernel_common *vc, + void *id); +static void vmspace_entry_delete(struct vmspace_entry *ve, + struct vkernel_common *vc); static MALLOC_DEFINE(M_VKERNEL, "vkernel", "VKernel structures"); @@ -66,8 +71,9 @@ static MALLOC_DEFINE(M_VKERNEL, "vkernel", "VKernel structures"); int sys_vmspace_create(struct vmspace_create_args *uap) { - struct vkernel *vk; + struct vkernel_common *vc; struct vmspace_entry *ve; + struct vkernel *vk; if (vkernel_enable == 0) return (EOPNOTSUPP); @@ -78,21 +84,25 @@ sys_vmspace_create(struct vmspace_create_args *uap) */ if ((vk = curproc->p_vkernel) == NULL) { vk = kmalloc(sizeof(*vk), M_VKERNEL, M_WAITOK|M_ZERO); - vk->vk_refs = 1; - RB_INIT(&vk->vk_root); + vc = kmalloc(sizeof(*vc), M_VKERNEL, M_WAITOK|M_ZERO); + vc->vc_refs = 1; + spin_init(&vc->vc_spin); + RB_INIT(&vc->vc_root); + vk->vk_common = vc; curproc->p_vkernel = vk; } + vc = vk->vk_common; /* * Create a new VMSPACE */ - if (vkernel_find_vmspace(vk, uap->id)) + if (vkernel_find_vmspace(vc, uap->id)) return (EEXIST); ve = kmalloc(sizeof(struct vmspace_entry), M_VKERNEL, M_WAITOK|M_ZERO); ve->vmspace = vmspace_alloc(VM_MIN_ADDRESS, VM_MAXUSER_ADDRESS); ve->id = uap->id; pmap_pinit2(vmspace_pmap(ve->vmspace)); - RB_INSERT(vmspace_rb_tree, &vk->vk_root, ve); + RB_INSERT(vmspace_rb_tree, &vc->vc_root, ve); return (0); } @@ -104,17 +114,18 @@ sys_vmspace_create(struct vmspace_create_args *uap) int sys_vmspace_destroy(struct vmspace_destroy_args *uap) { - struct vkernel *vk; + struct vkernel_common *vc; struct vmspace_entry *ve; + struct vkernel *vk; if ((vk = curproc->p_vkernel) == NULL) return (EINVAL); - if ((ve = vkernel_find_vmspace(vk, uap->id)) == NULL) + vc = vk->vk_common; + if ((ve = vkernel_find_vmspace(vc, uap->id)) == NULL) return (ENOENT); - /* XXX check if active */ - RB_REMOVE(vmspace_rb_tree, &vk->vk_root, ve); - vmspace_free(ve->vmspace); - kfree(ve, M_VKERNEL); + if (ve->refs) + return (EBUSY); + vmspace_entry_delete(ve, vc); return(0); } @@ -128,14 +139,53 @@ sys_vmspace_destroy(struct vmspace_destroy_args *uap) int sys_vmspace_ctl(struct vmspace_ctl_args *uap) { - struct vkernel *vk; + struct vkernel_common *vc; struct vmspace_entry *ve; + struct vkernel *vk; + struct proc *p; + int framesz; + int error; if ((vk = curproc->p_vkernel) == NULL) return (EINVAL); - if ((ve = vkernel_find_vmspace(vk, uap->id)) == NULL) + vc = vk->vk_common; + if ((ve = vkernel_find_vmspace(vc, uap->id)) == NULL) return (ENOENT); - return(EINVAL); + + switch(uap->cmd) { + case VMSPACE_CTL_RUN: + /* + * Save the caller's register context, swap VM spaces, and + * install the passed register context. Return with + * EJUSTRETURN so the syscall code doesn't adjust the context. + */ + p = curproc; + ++ve->refs; + framesz = sizeof(struct trapframe); + vk->vk_current = ve; + vk->vk_save_vmspace = p->p_vmspace; + vk->vk_user_frame = uap->ctx; + bcopy(uap->sysmsg_frame, &vk->vk_save_frame, framesz); + error = copyin(uap->ctx, uap->sysmsg_frame, framesz); + if (error == 0) + error = cpu_sanitize_frame(uap->sysmsg_frame); + if (error) { + bcopy(&vk->vk_save_frame, uap->sysmsg_frame, framesz); + vk->vk_current = NULL; + vk->vk_save_vmspace = NULL; + --ve->refs; + } else { + pmap_deactivate(p); + p->p_vmspace = ve->vmspace; + pmap_activate(p); + error = EJUSTRETURN; + } + break; + default: + error = EOPNOTSUPP; + break; + } + return(error); } /* @@ -148,13 +198,15 @@ sys_vmspace_ctl(struct vmspace_ctl_args *uap) int sys_vmspace_mmap(struct vmspace_mmap_args *uap) { - struct vkernel *vk; + struct vkernel_common *vc; struct vmspace_entry *ve; + struct vkernel *vk; int error; if ((vk = curproc->p_vkernel) == NULL) return (EINVAL); - if ((ve = vkernel_find_vmspace(vk, uap->id)) == NULL) + vc = vk->vk_common; + if ((ve = vkernel_find_vmspace(vc, uap->id)) == NULL) return (ENOENT); error = kern_mmap(ve->vmspace, uap->addr, uap->len, uap->prot, uap->flags, @@ -170,15 +222,17 @@ sys_vmspace_mmap(struct vmspace_mmap_args *uap) int sys_vmspace_munmap(struct vmspace_munmap_args *uap) { - struct vkernel *vk; + struct vkernel_common *vc; struct vmspace_entry *ve; + struct vkernel *vk; vm_offset_t addr; vm_size_t size, pageoff; vm_map_t map; if ((vk = curproc->p_vkernel) == NULL) return (EINVAL); - if ((ve = vkernel_find_vmspace(vk, uap->id)) == NULL) + vc = vk->vk_common; + if ((ve = vkernel_find_vmspace(vc, uap->id)) == NULL) return (ENOENT); /* @@ -220,12 +274,14 @@ sys_vmspace_munmap(struct vmspace_munmap_args *uap) int sys_vmspace_pread(struct vmspace_pread_args *uap) { - struct vkernel *vk; + struct vkernel_common *vc; struct vmspace_entry *ve; + struct vkernel *vk; if ((vk = curproc->p_vkernel) == NULL) return (EINVAL); - if ((ve = vkernel_find_vmspace(vk, uap->id)) == NULL) + vc = vk->vk_common; + if ((ve = vkernel_find_vmspace(vc, uap->id)) == NULL) return (ENOENT); return (EINVAL); } @@ -241,12 +297,14 @@ sys_vmspace_pread(struct vmspace_pread_args *uap) int sys_vmspace_pwrite(struct vmspace_pwrite_args *uap) { - struct vkernel *vk; + struct vkernel_common *vc; struct vmspace_entry *ve; + struct vkernel *vk; if ((vk = curproc->p_vkernel) == NULL) return (EINVAL); - if ((ve = vkernel_find_vmspace(vk, uap->id)) == NULL) + vc = vk->vk_common; + if ((ve = vkernel_find_vmspace(vc, uap->id)) == NULL) return (ENOENT); return (EINVAL); } @@ -259,13 +317,15 @@ sys_vmspace_pwrite(struct vmspace_pwrite_args *uap) int sys_vmspace_mcontrol(struct vmspace_mcontrol_args *uap) { - struct vkernel *vk; + struct vkernel_common *vc; struct vmspace_entry *ve; + struct vkernel *vk; vm_offset_t start, end; if ((vk = curproc->p_vkernel) == NULL) return (EINVAL); - if ((ve = vkernel_find_vmspace(vk, uap->id)) == NULL) + vc = vk->vk_common; + if ((ve = vkernel_find_vmspace(vc, uap->id)) == NULL) return (ENOENT); /* @@ -312,23 +372,41 @@ static int rb_vmspace_delete(struct vmspace_entry *ve, void *data) { - struct vkernel *vk = data; + struct vkernel_common *vc = data; + + KKASSERT(ve->refs == 0); + vmspace_entry_delete(ve, vc); + return(0); +} + +/* + * Remove a vmspace_entry from the RB tree and destroy it. We have to clean + * up the pmap, the vm_map, then destroy the vmspace. + */ +static +void +vmspace_entry_delete(struct vmspace_entry *ve, struct vkernel_common *vc) +{ + RB_REMOVE(vmspace_rb_tree, &vc->vc_root, ve); - RB_REMOVE(vmspace_rb_tree, &vk->vk_root, ve); + pmap_remove_pages(vmspace_pmap(ve->vmspace), + VM_MIN_ADDRESS, VM_MAXUSER_ADDRESS); + vm_map_remove(&ve->vmspace->vm_map, + VM_MIN_ADDRESS, VM_MAXUSER_ADDRESS); vmspace_free(ve->vmspace); kfree(ve, M_VKERNEL); - return(0); } + static struct vmspace_entry * -vkernel_find_vmspace(struct vkernel *vk, void *id) +vkernel_find_vmspace(struct vkernel_common *vc, void *id) { struct vmspace_entry *ve; struct vmspace_entry key; key.id = id; - ve = RB_FIND(vmspace_rb_tree, &vk->vk_root, &key); + ve = RB_FIND(vmspace_rb_tree, &vc->vc_root, &key); return (ve); } @@ -337,19 +415,105 @@ vkernel_find_vmspace(struct vkernel *vk, void *id) * a vkernel process. */ void -vkernel_hold(struct vkernel *vk) +vkernel_inherit(struct proc *p1, struct proc *p2) { - ++vk->vk_refs; + struct vkernel_common *vc; + struct vkernel *vk; + + vk = p1->p_vkernel; + vc = vk->vk_common; + KKASSERT(vc->vc_refs > 0); + atomic_add_int(&vc->vc_refs, 1); + vk = kmalloc(sizeof(*vk), M_VKERNEL, M_WAITOK|M_ZERO); + p2->p_vkernel = vk; + vk->vk_common = vc; } void -vkernel_drop(struct vkernel *vk) +vkernel_exit(struct proc *p) { - KKASSERT(vk->vk_refs > 0); - if (--vk->vk_refs == 0) { - RB_SCAN(vmspace_rb_tree, &vk->vk_root, NULL, - rb_vmspace_delete, vk); - kfree(vk, M_VKERNEL); + struct vkernel_common *vc; + struct vmspace_entry *ve; + struct vkernel *vk; + int freeme = 0; + + vk = p->p_vkernel; + p->p_vkernel = NULL; + vc = vk->vk_common; + vk->vk_common = NULL; + + /* + * Restore the original VM context if we are killed while running + * a different one. + */ + if ((ve = vk->vk_current) != NULL) { + printf("killed with active VC\n"); + vk->vk_current = NULL; + pmap_deactivate(p); + p->p_vmspace = vk->vk_save_vmspace; + pmap_activate(p); + vk->vk_save_vmspace = NULL; + KKASSERT(ve->refs > 0); + --ve->refs; } + + /* + * Dereference the common area + */ + KKASSERT(vc->vc_refs > 0); + spin_lock_wr(&vc->vc_spin); + if (--vc->vc_refs == 0) + freeme = 1; + spin_unlock_wr(&vc->vc_spin); + + if (freeme) { + RB_SCAN(vmspace_rb_tree, &vc->vc_root, NULL, + rb_vmspace_delete, vc); + kfree(vc, M_VKERNEL); + } + kfree(vk, M_VKERNEL); +} + +/* + * A VM space under virtual kernel control trapped out or made a system call + * or otherwise needs to return control to the virtual kernel context. + */ +int +vkernel_trap(struct proc *p, struct trapframe *frame) +{ + struct vmspace_entry *ve; + struct vkernel *vk; + int error; + + printf("trap for vkernel type %d wm=%d\n", + frame->tf_trapno & 0x7FFFFFFF, + ((frame->tf_trapno & 0x80000000) ? 1 : 0)); + + /* + * Which vmspace entry was running? + */ + vk = p->p_vkernel; + ve = vk->vk_current; + vk->vk_current = NULL; + KKASSERT(ve != NULL); + + /* + * Switch the process context back to the virtual kernel's VM space. + */ + pmap_deactivate(p); + p->p_vmspace = vk->vk_save_vmspace; + pmap_activate(p); + vk->vk_save_vmspace = NULL; + KKASSERT(ve->refs > 0); + --ve->refs; + + /* + * Copy the trapframe to the virtual kernel's userspace, then + * restore virtual kernel's original syscall trap frame so we + * can 'return' from the system call that ran the custom VM space. + */ + error = copyout(frame, vk->vk_user_frame, sizeof(*frame)); + bcopy(&vk->vk_save_frame, frame, sizeof(*frame)); + return(error); } -- 2.41.0