From 135d71994d02bc01e026e9a5c89f9fcbd96e031f Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Sun, 7 Jan 2007 08:37:37 +0000 Subject: [PATCH] Implement nearly all the remaining items required to allow the virtual kernel to actually execute code on behalf of a virtualized user process. The virtual kernel is now able to execute the init binary through to the point where it sets up a TLS segment. * Create a pseudo tf_trapno called T_SYSCALL80 to indicate system call traps. * Add MD shims when creating or destroying a struct vmspace, allowing the virtual kernel to create and destroy real-kernel vmspaces along with. Add appropriate calls to vmspace_mmap() and vmspace_mcontrol() to map memory inside the user process vmspace. The memory is mapped VPAGETABLE and the page table directory is set to point to the pmap page directory. * Clean up user_trap, handle T_PAGEFLT properly. * Implement go_user(). It calls vmspace_ctl(... VMSPACE_CTL_RUN) and user_trap() in a loop, allowing the virtual kernel to 'run' a user mode context under its control. * Reduce VM_MAX_USER_ADDRESS to 0xb8000000 for now, until I figure out the best way to have the virtual kernel query the actual max user address from the real kernel. * Correct a pm_pdirpte assignment. We can't look up the PTE until after we have entered it into the kernel pmap. --- sys/cpu/i386/include/trap.h | 5 +- sys/platform/pc32/i386/trap.c | 3 +- sys/platform/pc32/i386/vm_machdep.c | 15 ++++- sys/platform/vkernel/i386/trap.c | 85 ++++++++++++++++---------- sys/platform/vkernel/i386/userldt.c | 5 +- sys/platform/vkernel/include/md_var.h | 5 +- sys/platform/vkernel/include/vmparam.h | 4 +- sys/platform/vkernel/platform/pmap.c | 67 +++++++++++++++++++- sys/sys/systm.h | 5 +- sys/vm/vm_map.c | 5 +- 10 files changed, 152 insertions(+), 47 deletions(-) diff --git a/sys/cpu/i386/include/trap.h b/sys/cpu/i386/include/trap.h index 4db8379255..0ebd313073 100644 --- a/sys/cpu/i386/include/trap.h +++ b/sys/cpu/i386/include/trap.h @@ -35,7 +35,7 @@ * * from: @(#)trap.h 5.4 (Berkeley) 5/9/91 * $FreeBSD: src/sys/i386/include/trap.h,v 1.10.2.2 2001/08/15 01:23:52 peter Exp $ - * $DragonFly: src/sys/cpu/i386/include/trap.h,v 1.3 2006/11/07 06:43:22 dillon Exp $ + * $DragonFly: src/sys/cpu/i386/include/trap.h,v 1.4 2007/01/07 08:37:33 dillon Exp $ */ #ifndef _CPU_TRAP_H_ @@ -104,6 +104,7 @@ #define BUS_SEGM_FAULT T_RESERVED /* segment protection base */ /* Trap's coming from user mode */ -#define T_USER 0x100 +#define T_SYSCALL80 0x080 +#define T_USER 0x100 #endif /* !_CPU_TRAP_H_ */ diff --git a/sys/platform/pc32/i386/trap.c b/sys/platform/pc32/i386/trap.c index a8211f94c7..32ebb92811 100644 --- a/sys/platform/pc32/i386/trap.c +++ b/sys/platform/pc32/i386/trap.c @@ -36,7 +36,7 @@ * * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 * $FreeBSD: src/sys/i386/i386/trap.c,v 1.147.2.11 2003/02/27 19:09:59 luoqi Exp $ - * $DragonFly: src/sys/platform/pc32/i386/trap.c,v 1.90 2007/01/07 05:41:02 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/i386/trap.c,v 1.91 2007/01/07 08:37:34 dillon Exp $ */ /* @@ -1338,6 +1338,7 @@ syscall2(struct trapframe frame) * call. The current frame is copied out to the virtual kernel. */ if (p->p_vkernel && p->p_vkernel->vk_current) { + frame.tf_trapno = T_SYSCALL80; error = vkernel_trap(p, &frame); frame.tf_eax = error; if (error) diff --git a/sys/platform/pc32/i386/vm_machdep.c b/sys/platform/pc32/i386/vm_machdep.c index 089348c18f..6fb3365d0e 100644 --- a/sys/platform/pc32/i386/vm_machdep.c +++ b/sys/platform/pc32/i386/vm_machdep.c @@ -39,7 +39,7 @@ * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91 * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$ * $FreeBSD: src/sys/i386/i386/vm_machdep.c,v 1.132.2.9 2003/01/25 19:02:23 dillon Exp $ - * $DragonFly: src/sys/platform/pc32/i386/vm_machdep.c,v 1.51 2007/01/05 22:16:30 dillon Exp $ + * $DragonFly: src/sys/platform/pc32/i386/vm_machdep.c,v 1.52 2007/01/07 08:37:34 dillon Exp $ */ #include "use_npx.h" @@ -532,3 +532,16 @@ is_physical_memory(vm_offset_t addr) return 1; } + +/* + * platform-specific vmspace initialization (nothing for i386) + */ +void +cpu_vmspace_alloc(struct vmspace *vm __unused) +{ +} + +void +cpu_vmspace_free(struct vmspace *vm __unused) +{ +} diff --git a/sys/platform/vkernel/i386/trap.c b/sys/platform/vkernel/i386/trap.c index 53d8da0e97..416b0826cb 100644 --- a/sys/platform/vkernel/i386/trap.c +++ b/sys/platform/vkernel/i386/trap.c @@ -36,7 +36,7 @@ * * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 * $FreeBSD: src/sys/i386/i386/trap.c,v 1.147.2.11 2003/02/27 19:09:59 luoqi Exp $ - * $DragonFly: src/sys/platform/vkernel/i386/trap.c,v 1.3 2007/01/07 05:45:04 dillon Exp $ + * $DragonFly: src/sys/platform/vkernel/i386/trap.c,v 1.4 2007/01/07 08:37:35 dillon Exp $ */ /* @@ -69,6 +69,7 @@ #include #include #include +#include #include #include @@ -373,6 +374,33 @@ user_trap(struct trapframe *frame) vm_offset_t eva; p = td->td_proc; + + /* + * This is a bad kludge to avoid changing the various trapframe + * structures. Because we are enabled as a virtual kernel, + * the original tf_err field will be passed to us shifted 16 + * over in the tf_trapno field for T_PAGEFLT. + */ + if ((int16_t)frame->tf_trapno == T_PAGEFLT) { + eva = frame->tf_err; + frame->tf_err = frame->tf_trapno >> 16; + frame->tf_trapno &= 0xFFFF; + /*cpu_enable_intr();*/ + } else { + eva = 0; + } + kprintf("USER_TRAP AT %08x err %d trapno %d eva %08x\n", + frame->tf_eip, frame->tf_err, frame->tf_trapno, eva); + + /* + * Everything coming from user mode runs through user_trap, + * including system calls. + */ + if (frame->tf_trapno == T_SYSCALL80) { + syscall2(frame); + return; + } + #ifdef DDB if (db_active) { eva = (frame->tf_trapno == T_PAGEFLT ? rcr2() : 0); @@ -384,23 +412,7 @@ user_trap(struct trapframe *frame) } #endif - eva = 0; ++gd->gd_trap_nesting_level; - if (frame->tf_trapno == T_PAGEFLT) { - /* - * For some Cyrix CPUs, %cr2 is clobbered by interrupts. - * This problem is worked around by using an interrupt - * gate for the pagefault handler. We are finally ready - * to read %cr2 and then must reenable interrupts. - * - * XXX this should be in the switch statement, but the - * NO_FOOF_HACK and VM86 goto and ifdefs obfuscate the - * flow of control too much for this to be obviously - * correct. - */ - eva = rcr2(); - cpu_enable_intr(); - } #ifdef SMP if (trap_mpsafe == 0) MAKEMPSAFE(have_mplock); @@ -631,19 +643,6 @@ kern_trap(struct trapframe *frame) vm_offset_t eva; p = td->td_proc; -#ifdef DDB - if (db_active) { - eva = (frame->tf_trapno == T_PAGEFLT ? frame->tf_err : 0); - ++gd->gd_trap_nesting_level; - MAKEMPSAFE(have_mplock); - trap_fatal(frame, FALSE, eva); - --gd->gd_trap_nesting_level; - goto out2; - } -#endif - - eva = 0; - ++gd->gd_trap_nesting_level; /* * This is a bad kludge to avoid changing the various trapframe @@ -656,7 +655,22 @@ kern_trap(struct trapframe *frame) frame->tf_err = frame->tf_trapno >> 16; frame->tf_trapno &= 0xFFFF; /*cpu_enable_intr();*/ + } else { + eva = 0; } + +#ifdef DDB + if (db_active) { + ++gd->gd_trap_nesting_level; + MAKEMPSAFE(have_mplock); + trap_fatal(frame, FALSE, eva); + --gd->gd_trap_nesting_level; + goto out2; + } +#endif + + ++gd->gd_trap_nesting_level; + #ifdef SMP if (trap_mpsafe == 0) MAKEMPSAFE(have_mplock); @@ -1364,7 +1378,14 @@ fork_return(struct lwp *lp, struct trapframe frame) } void -go_user(void) +go_user(struct trapframe frame) { - panic("GO USER"); + for (;;) { + kprintf("GO USER"); + vmspace_ctl(curproc->p_vmspace, VMSPACE_CTL_RUN, + &frame, sizeof(frame), 0); + kprintf("RETURN USER"); + user_trap(&frame); + } } + diff --git a/sys/platform/vkernel/i386/userldt.c b/sys/platform/vkernel/i386/userldt.c index c2f3ad6608..d4f7f9df3f 100644 --- a/sys/platform/vkernel/i386/userldt.c +++ b/sys/platform/vkernel/i386/userldt.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/platform/vkernel/i386/userldt.c,v 1.1 2007/01/05 22:18:18 dillon Exp $ + * $DragonFly: src/sys/platform/vkernel/i386/userldt.c,v 1.2 2007/01/07 08:37:35 dillon Exp $ */ #include @@ -55,6 +55,7 @@ user_ldt_alloc (struct pcb *pcb, int len) void user_ldt_free (struct pcb *pcb) { - panic("user_ldt_free"); + if (pcb->pcb_ldt) + panic("user_ldt_free"); } diff --git a/sys/platform/vkernel/include/md_var.h b/sys/platform/vkernel/include/md_var.h index 47cdc4a019..9f6efa4ab4 100644 --- a/sys/platform/vkernel/include/md_var.h +++ b/sys/platform/vkernel/include/md_var.h @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/platform/vkernel/include/md_var.h,v 1.7 2007/01/07 05:45:05 dillon Exp $ + * $DragonFly: src/sys/platform/vkernel/include/md_var.h,v 1.8 2007/01/07 08:37:36 dillon Exp $ */ #ifndef _MACHINE_MD_VAR_H_ @@ -55,6 +55,7 @@ extern char cpu_vendor[]; /* XXX belongs in i386 */ extern u_int cpu_id; /* XXX belongs in i386 */ extern int RootImageFd; +extern int MemImageFd; struct mdglobaldata; @@ -68,7 +69,7 @@ void cpu_kthread_restore(void); /* cannot be called from C */ void cpu_exit_switch (struct thread *next); void cpu_setregs (void); void cpu_idle (void); -void go_user (void); +void go_user (struct trapframe frame); void init_exceptions(void); void kern_trap(struct trapframe *); diff --git a/sys/platform/vkernel/include/vmparam.h b/sys/platform/vkernel/include/vmparam.h index 10a5919ac0..92e23a62cb 100644 --- a/sys/platform/vkernel/include/vmparam.h +++ b/sys/platform/vkernel/include/vmparam.h @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/platform/vkernel/include/vmparam.h,v 1.3 2007/01/02 04:24:26 dillon Exp $ + * $DragonFly: src/sys/platform/vkernel/include/vmparam.h,v 1.4 2007/01/07 08:37:36 dillon Exp $ */ #ifndef _MACHINE_VMPARAM_H_ @@ -76,7 +76,7 @@ #define KERNEL_KVA_SIZE 0x40000000 #define VM_MIN_USER_ADDRESS 0x00000000 -#define VM_MAX_USER_ADDRESS 0xC0000000 /* XXX match to real kernel */ +#define VM_MAX_USER_ADDRESS 0xB8000000 /* XXX match to real kernel */ #define USRSTACK VM_MAX_USER_ADDRESS diff --git a/sys/platform/vkernel/platform/pmap.c b/sys/platform/vkernel/platform/pmap.c index e1ffae78a0..5836602a67 100644 --- a/sys/platform/vkernel/platform/pmap.c +++ b/sys/platform/vkernel/platform/pmap.c @@ -38,7 +38,7 @@ * * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 * $FreeBSD: src/sys/i386/i386/pmap.c,v 1.250.2.18 2002/03/06 22:48:53 silby Exp $ - * $DragonFly: src/sys/platform/vkernel/platform/pmap.c,v 1.4 2007/01/06 19:40:55 dillon Exp $ + * $DragonFly: src/sys/platform/vkernel/platform/pmap.c,v 1.5 2007/01/07 08:37:37 dillon Exp $ */ #include @@ -50,6 +50,7 @@ #include #include #include +#include #include #include @@ -209,6 +210,7 @@ pmap_pinit(struct pmap *pmap) ptdpg->valid = VM_PAGE_BITS_ALL; pmap_kenter((vm_offset_t)pmap->pm_pdir, VM_PAGE_TO_PHYS(ptdpg)); + pmap->pm_pdirpte = KernelPTA[(vm_offset_t)pmap->pm_pdir >> PAGE_SHIFT]; if ((ptdpg->flags & PG_ZERO) == 0) bzero(pmap->pm_pdir, PAGE_SIZE); @@ -327,6 +329,65 @@ pmap_reference(pmap_t pmap) } } +/************************************************************************ + * VMSPACE MANAGEMENT * + ************************************************************************ + * + * The VMSPACE management we do in our virtual kernel must be reflected + * in the real kernel. This is accomplished by making vmspace system + * calls to the real kernel. + */ +void +cpu_vmspace_alloc(struct vmspace *vm) +{ + int r; + void *rp; + +#define LAST_EXTENT (VM_MAX_USER_ADDRESS - 0x80000000) + + if (vmspace_create(vm, 0, NULL) < 0) + panic("vmspace_create() failed"); + + rp = vmspace_mmap(vm, (void *)0x00000000, 0x40000000, + PROT_READ|PROT_WRITE, + MAP_FILE|MAP_SHARED|MAP_VPAGETABLE|MAP_FIXED, + MemImageFd, 0); + if (rp == MAP_FAILED) + panic("vmspace_mmap: failed1"); + rp = vmspace_mmap(vm, (void *)0x40000000, 0x40000000, + PROT_READ|PROT_WRITE, + MAP_FILE|MAP_SHARED|MAP_VPAGETABLE|MAP_FIXED, + MemImageFd, 0x40000000); + if (rp == MAP_FAILED) + panic("vmspace_mmap: failed2"); + rp = vmspace_mmap(vm, (void *)0x80000000, LAST_EXTENT, + PROT_READ|PROT_WRITE, + MAP_FILE|MAP_SHARED|MAP_VPAGETABLE|MAP_FIXED, + MemImageFd, 0x80000000); + if (rp == MAP_FAILED) + panic("vmspace_mmap: failed3"); + + r = vmspace_mcontrol(vm, (void *)0x00000000, 0x40000000, MADV_SETMAP, + vmspace_pmap(vm)->pm_pdirpte); + if (r < 0) + panic("vmspace_mcontrol: failed1"); + r = vmspace_mcontrol(vm, (void *)0x40000000, 0x40000000, MADV_SETMAP, + vmspace_pmap(vm)->pm_pdirpte); + if (r < 0) + panic("vmspace_mcontrol: failed2"); + r = vmspace_mcontrol(vm, (void *)0x80000000, LAST_EXTENT, MADV_SETMAP, + vmspace_pmap(vm)->pm_pdirpte); + if (r < 0) + panic("vmspace_mcontrol: failed3"); +} + +void +cpu_vmspace_free(struct vmspace *vm) +{ + if (vmspace_destroy(vm) < 0) + panic("vmspace_destroy() failed"); +} + /************************************************************************ * Procedures which operate directly on the kernel PMAP * ************************************************************************/ @@ -1699,6 +1760,8 @@ pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t mpte) vpte_t *pte; vm_paddr_t pa; pmap_inval_info info; + unsigned ptepindex; + vm_offset_t ptepa; KKASSERT(pmap != &kernel_pmap); pmap_inval_init(&info); @@ -1708,8 +1771,6 @@ pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t mpte) /* * Instantiate the page table page if required */ - unsigned ptepindex; - vm_offset_t ptepa; /* * Calculate pagetable page index diff --git a/sys/sys/systm.h b/sys/sys/systm.h index 82d4085d64..1c33fd51bc 100644 --- a/sys/sys/systm.h +++ b/sys/sys/systm.h @@ -37,7 +37,7 @@ * * @(#)systm.h 8.7 (Berkeley) 3/29/95 * $FreeBSD: src/sys/sys/systm.h,v 1.111.2.18 2002/12/17 18:04:02 sam Exp $ - * $DragonFly: src/sys/sys/systm.h,v 1.58 2007/01/05 22:16:32 dillon Exp $ + * $DragonFly: src/sys/sys/systm.h,v 1.59 2007/01/07 08:37:37 dillon Exp $ */ #ifndef _SYS_SYSTM_H_ @@ -125,6 +125,7 @@ struct globaldata; struct thread; struct trapframe; struct user; +struct vmspace; void Debugger (const char *msg); void backtrace(void); @@ -142,6 +143,8 @@ void cpu_halt (void); void cpu_reset (void); void cpu_boot (int); void cpu_rootconf (void); +void cpu_vmspace_alloc(struct vmspace *); +void cpu_vmspace_free(struct vmspace *); vm_paddr_t kvtop(void *addr); int is_physical_memory (vm_offset_t addr); diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index 704c540202..eb104ac62c 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -62,7 +62,7 @@ * rights to redistribute these changes. * * $FreeBSD: src/sys/vm/vm_map.c,v 1.187.2.19 2003/05/27 00:47:02 alc Exp $ - * $DragonFly: src/sys/vm/vm_map.c,v 1.54 2006/12/28 21:24:02 dillon Exp $ + * $DragonFly: src/sys/vm/vm_map.c,v 1.55 2007/01/07 08:37:37 dillon Exp $ */ /* @@ -203,6 +203,7 @@ vmspace_alloc(vm_offset_t min, vm_offset_t max) vm->vm_refcnt = 1; vm->vm_shm = NULL; vm->vm_exitingcnt = 0; + cpu_vmspace_alloc(vm); return (vm); } @@ -222,6 +223,8 @@ vmspace_dofree(struct vmspace *vm) { int count; + cpu_vmspace_free(vm); + /* * Make sure any SysV shm is freed, it might not have in * exit1() -- 2.41.0