From 6f7b98e081c59b0af2e4c3558c24bd00cd422419 Mon Sep 17 00:00:00 2001
From: Matthew Dillon <dillon@dragonflybsd.org>
Date: Fri, 5 Jan 2007 22:18:20 +0000
Subject: [PATCH] Continue fleshing out the VKERNEL.

---
 sys/platform/vkernel/conf/files               |   20 +-
 sys/platform/vkernel/i386/autoconf.c          |   83 +-
 sys/platform/vkernel/i386/cpu_regs.c          | 1253 +++++++++++++
 sys/platform/vkernel/i386/db_interface.c      |  328 ++++
 sys/platform/vkernel/i386/db_trace.c          |  642 +++++++
 sys/platform/vkernel/i386/global.s            |    5 +-
 sys/platform/vkernel/i386/locore.s            |   19 +-
 sys/platform/vkernel/i386/npx.c               |    8 +-
 sys/platform/vkernel/i386/swtch.s             |   15 +-
 sys/platform/vkernel/i386/tls.c               |  204 +++
 sys/platform/vkernel/i386/trap.c              | 1551 +++++++++++++++++
 .../{include/md_var.h => i386/userldt.c}      |   39 +-
 sys/platform/vkernel/i386/vm_machdep.c        |  398 +++++
 sys/platform/vkernel/include/globaldata.h     |    4 +-
 sys/platform/vkernel/include/md_var.h         |   13 +-
 sys/platform/vkernel/include/pcb_ext.h        |   76 +
 .../vkernel/platform/busdma_machdep.c         |  900 ++++++++++
 sys/platform/vkernel/platform/console.c       |  241 +++
 sys/platform/vkernel/platform/copyio.c        |   22 +-
 sys/platform/vkernel/platform/init.c          |   35 +-
 sys/platform/vkernel/platform/ipl_funcs.c     |   78 +
 sys/platform/vkernel/platform/machintr.c      |   15 +-
 sys/platform/vkernel/platform/pmap.c          |   82 +-
 sys/platform/vkernel/platform/pmap_inval.c    |   17 +-
 .../{include/md_var.h => platform/sysarch.c}  |   41 +-
 .../platform/{machintr.c => systimer.c}       |   73 +-
 26 files changed, 6050 insertions(+), 112 deletions(-)
 create mode 100644 sys/platform/vkernel/i386/cpu_regs.c
 create mode 100644 sys/platform/vkernel/i386/db_interface.c
 create mode 100644 sys/platform/vkernel/i386/db_trace.c
 create mode 100644 sys/platform/vkernel/i386/tls.c
 create mode 100644 sys/platform/vkernel/i386/trap.c
 copy sys/platform/vkernel/{include/md_var.h => i386/userldt.c} (78%)
 create mode 100644 sys/platform/vkernel/i386/vm_machdep.c
 create mode 100644 sys/platform/vkernel/include/pcb_ext.h
 create mode 100644 sys/platform/vkernel/platform/busdma_machdep.c
 create mode 100644 sys/platform/vkernel/platform/console.c
 create mode 100644 sys/platform/vkernel/platform/ipl_funcs.c
 copy sys/platform/vkernel/{include/md_var.h => platform/sysarch.c} (78%)
 copy sys/platform/vkernel/platform/{machintr.c => systimer.c} (66%)

diff --git a/sys/platform/vkernel/conf/files b/sys/platform/vkernel/conf/files
index 1927feda8b..dda23b0c51 100644
--- a/sys/platform/vkernel/conf/files
+++ b/sys/platform/vkernel/conf/files
@@ -1,7 +1,7 @@
 # This file tells config what files go into building a kernel,
 # files marked standard are always included.
 #
-# $DragonFly: src/sys/platform/vkernel/conf/files,v 1.6 2007/01/02 04:24:24 dillon Exp $
+# $DragonFly: src/sys/platform/vkernel/conf/files,v 1.7 2007/01/05 22:18:17 dillon Exp $
 #
 bf_enc.o			optional	ipsec ipsec_esp		\
 	dependency	"$S/crypto/blowfish/arch/i386/bf_enc.S $S/crypto/blowfish/arch/i386/bf_enc_586.S $S/crypto/blowfish/arch/i386/bf_enc_686.S"		\
@@ -36,6 +36,11 @@ machine/vkernel/i386/autoconf.c	standard
 cpu/i386/misc/elf_machdep.c		standard
 cpu/i386/misc/in_cksum2.s		optional	inet
 cpu/i386/misc/ktr.c			optional	ktr
+cpu/i386/misc/db_disasm.c		optional	ddb
+#
+# DOS mbr
+kern/subr_diskmbr.c			standard
+
 #vkernel/vkernel/pmap.c		standard
 #vkernel/vkernel/pmap_inval.c	standard
 #vkernel/vkernel/spinlock.s	standard
@@ -49,8 +54,21 @@ cpu/i386/misc/ktr.c			optional	ktr
 machine/vkernel/i386/global.s		standard
 machine/vkernel/i386/swtch.s		standard
 machine/vkernel/i386/npx.c		mandatory       npx
+machine/vkernel/i386/db_interface.c	standard
+machine/vkernel/i386/db_trace.c		standard
+machine/vkernel/i386/vm_machdep.c	standard
+machine/vkernel/i386/cpu_regs.c		standard
+machine/vkernel/i386/userldt.c		standard
+machine/vkernel/i386/tls.c		standard
+machine/vkernel/i386/trap.c		standard
 machine/vkernel/platform/init.c		standard
 machine/vkernel/platform/globaldata.c	standard
 machine/vkernel/platform/machintr.c	standard
 machine/vkernel/platform/copyio.c	standard
 machine/vkernel/platform/pmap.c		standard
+machine/vkernel/platform/pmap_inval.c	standard
+machine/vkernel/platform/busdma_machdep.c standard
+machine/vkernel/platform/sysarch.c	standard
+machine/vkernel/platform/systimer.c	standard
+machine/vkernel/platform/console.c	standard
+machine/vkernel/platform/ipl_funcs.c	standard
diff --git a/sys/platform/vkernel/i386/autoconf.c b/sys/platform/vkernel/i386/autoconf.c
index c2c4c9410c..4afec6151d 100644
--- a/sys/platform/vkernel/i386/autoconf.c
+++ b/sys/platform/vkernel/i386/autoconf.c
@@ -35,7 +35,7 @@
  *
  *	from: @(#)autoconf.c	7.1 (Berkeley) 5/9/91
  * $FreeBSD: src/sys/i386/i386/autoconf.c,v 1.146.2.2 2001/06/07 06:05:58 dd Exp $
- * $DragonFly: src/sys/platform/vkernel/i386/autoconf.c,v 1.5 2006/12/23 00:27:03 swildner Exp $
+ * $DragonFly: src/sys/platform/vkernel/i386/autoconf.c,v 1.6 2007/01/05 22:18:18 dillon Exp $
  */
 
 /*
@@ -60,6 +60,7 @@
 #include <sys/systm.h>
 #include <sys/bootmaj.h>
 #include <sys/bus.h>
+#include <sys/buf.h>
 #include <sys/conf.h>
 #include <sys/disklabel.h>
 #include <sys/diskslice.h>
@@ -72,6 +73,11 @@
 #include <sys/device.h>
 #include <sys/machintr.h>
 
+#include <vm/pmap.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_pager.h>
+
 #if 0
 #include <machine/pcb.h>
 #include <machine/pcb_ext.h>
@@ -85,9 +91,10 @@
 device_t isa_bus_device = 0;
 #endif
 
-static void	configure_first (void *);
-static void	configure (void *);
-static void	configure_final (void *);
+static void cpu_startup (void *);
+static void configure_first (void *);
+static void configure (void *);
+static void configure_final (void *);
 
 #if defined(FFS) && defined(FFS_ROOT)
 static void	setroot (void);
@@ -99,6 +106,7 @@ static void	pxe_setup_nfsdiskless(void);
 #endif
 #endif
 
+SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL);
 SYSINIT(configure1, SI_SUB_CONFIGURE, SI_ORDER_FIRST, configure_first, NULL);
 /* SI_ORDER_SECOND is hookable */
 SYSINIT(configure2, SI_SUB_CONFIGURE, SI_ORDER_THIRD, configure, NULL);
@@ -108,6 +116,73 @@ SYSINIT(configure3, SI_SUB_CONFIGURE, SI_ORDER_ANY, configure_final, NULL);
 cdev_t	rootdev = NOCDEV;
 cdev_t	dumpdev = NOCDEV;
 
+/*
+ * 
+ */
+static void
+cpu_startup(void *dummy)
+{
+	vm_offset_t buffer_sva;
+	vm_offset_t buffer_eva;
+	vm_offset_t pager_sva;
+	vm_offset_t pager_eva;
+	vm_offset_t minaddr;
+	vm_offset_t maxaddr;
+
+	kprintf("%s", version);
+	kprintf("real memory = %llu (%lluK bytes)\n",
+		ptoa(Maxmem), ptoa(Maxmem) / 1024);
+
+	if (nbuf == 0) {
+		int factor = 4 * BKVASIZE / 1024;
+		int kbytes = physmem * (PAGE_SIZE / 1024);
+
+		nbuf = 50;
+		if (kbytes > 4096)
+			nbuf += min((kbytes - 4096) / factor, 65536 / factor);
+		if (kbytes > 65536)
+			nbuf += (kbytes - 65536) * 2 / (factor * 5);
+		if (maxbcache && nbuf > maxbcache / BKVASIZE)
+			nbuf = maxbcache / BKVASIZE;
+	}
+	if (nbuf > (virtual_end - virtual_start) / (BKVASIZE * 2)) {
+		nbuf = (virtual_end - virtual_start) / (BKVASIZE * 2);
+		kprintf("Warning: nbufs capped at %d\n", nbuf);
+	}
+
+	nswbuf = max(min(nbuf/4, 256), 16);
+#ifdef NSWBUF_MIN
+	if (nswbuf < NSWBUF_MIN)
+		nswbuf = NSWBUF_MIN;
+#endif
+#ifdef DIRECTIO
+        ffs_rawread_setup();
+#endif
+	kmem_suballoc(&kernel_map, &clean_map, &clean_sva, &clean_eva,
+		      (nbuf*BKVASIZE) + (nswbuf*MAXPHYS) + pager_map_size);
+	kmem_suballoc(&clean_map, &buffer_map, &buffer_sva, &buffer_eva,
+		      (nbuf*BKVASIZE));
+	buffer_map.system_map = 1;
+	kmem_suballoc(&clean_map, &pager_map, &pager_sva, &pager_eva,
+		      (nswbuf*MAXPHYS) + pager_map_size);
+	pager_map.system_map = 1;
+	kmem_suballoc(&kernel_map, &exec_map, &minaddr, &maxaddr,
+		      (16*(ARG_MAX+(PAGE_SIZE*3))));
+#if defined(USERCONFIG)
+        userconfig();
+	cninit();               /* the preferred console may have changed */
+#endif
+	kprintf("avail memory = %u (%uK bytes)\n", ptoa(vmstats.v_free_count),
+		ptoa(vmstats.v_free_count) / 1024);
+	bufinit();
+	vm_pager_bufferinit();
+#ifdef SMP
+	mp_start();
+	mp_announce();
+#endif
+	cpu_setregs();
+}
+
 /*
  * Determine i/o configuration for a machine.
  */
diff --git a/sys/platform/vkernel/i386/cpu_regs.c b/sys/platform/vkernel/i386/cpu_regs.c
new file mode 100644
index 0000000000..73bfa47f9a
--- /dev/null
+++ b/sys/platform/vkernel/i386/cpu_regs.c
@@ -0,0 +1,1253 @@
+/*-
+ * Copyright (c) 1992 Terrence R. Lambert.
+ * Copyright (C) 1994, David Greenman
+ * Copyright (c) 1982, 1987, 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from: @(#)machdep.c	7.4 (Berkeley) 6/3/91
+ * $FreeBSD: src/sys/i386/i386/machdep.c,v 1.385.2.30 2003/05/31 08:48:05 alc Exp $
+ * $DragonFly: src/sys/platform/vkernel/i386/cpu_regs.c,v 1.1 2007/01/05 22:18:18 dillon Exp $
+ */
+
+#include "use_ether.h"
+#include "use_npx.h"
+#include "use_isa.h"
+#include "opt_atalk.h"
+#include "opt_compat.h"
+#include "opt_ddb.h"
+#include "opt_directio.h"
+#include "opt_inet.h"
+#include "opt_ipx.h"
+#include "opt_msgbuf.h"
+#include "opt_swap.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/sysproto.h>
+#include <sys/signalvar.h>
+#include <sys/kernel.h>
+#include <sys/linker.h>
+#include <sys/malloc.h>
+#include <sys/proc.h>
+#include <sys/buf.h>
+#include <sys/reboot.h>
+#include <sys/mbuf.h>
+#include <sys/msgbuf.h>
+#include <sys/sysent.h>
+#include <sys/sysctl.h>
+#include <sys/vmmeter.h>
+#include <sys/bus.h>
+#include <sys/upcall.h>
+#include <sys/usched.h>
+#include <sys/reg.h>
+
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <sys/lock.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_map.h>
+#include <vm/vm_pager.h>
+#include <vm/vm_extern.h>
+
+#include <sys/thread2.h>
+
+#include <sys/user.h>
+#include <sys/exec.h>
+#include <sys/cons.h>
+
+#include <ddb/ddb.h>
+
+#include <machine/cpu.h>
+#include <machine/clock.h>
+#include <machine/specialreg.h>
+#include <machine/md_var.h>
+#include <machine/pcb_ext.h>		/* pcb.h included via sys/user.h */
+#include <machine/globaldata.h>		/* CPU_prvspace */
+#include <machine/smp.h>
+#ifdef PERFMON
+#include <machine/perfmon.h>
+#endif
+#include <machine/cputypes.h>
+
+#include <bus/isa/rtc.h>
+#include <machine/vm86.h>
+#include <sys/random.h>
+#include <sys/ptrace.h>
+#include <machine/sigframe.h>
+
+extern void dblfault_handler (void);
+
+#ifndef CPU_DISABLE_SSE
+static void set_fpregs_xmm (struct save87 *, struct savexmm *);
+static void fill_fpregs_xmm (struct savexmm *, struct save87 *);
+#endif /* CPU_DISABLE_SSE */
+#ifdef DIRECTIO
+extern void ffs_rawread_setup(void);
+#endif /* DIRECTIO */
+
+#ifdef SMP
+int64_t tsc_offsets[MAXCPU];
+#else
+int64_t tsc_offsets[1];
+#endif
+
+#if defined(SWTCH_OPTIM_STATS)
+extern int swtch_optim_stats;
+SYSCTL_INT(_debug, OID_AUTO, swtch_optim_stats,
+	CTLFLAG_RD, &swtch_optim_stats, 0, "");
+SYSCTL_INT(_debug, OID_AUTO, tlb_flush_count,
+	CTLFLAG_RD, &tlb_flush_count, 0, "");
+#endif
+
+int physmem = 0;
+
+static int
+sysctl_hw_physmem(SYSCTL_HANDLER_ARGS)
+{
+	int error = sysctl_handle_int(oidp, 0, ctob(physmem), req);
+	return (error);
+}
+
+SYSCTL_PROC(_hw, HW_PHYSMEM, physmem, CTLTYPE_INT|CTLFLAG_RD,
+	0, 0, sysctl_hw_physmem, "IU", "");
+
+static int
+sysctl_hw_usermem(SYSCTL_HANDLER_ARGS)
+{
+	int error = sysctl_handle_int(oidp, 0,
+		ctob(physmem - vmstats.v_wire_count), req);
+	return (error);
+}
+
+SYSCTL_PROC(_hw, HW_USERMEM, usermem, CTLTYPE_INT|CTLFLAG_RD,
+	0, 0, sysctl_hw_usermem, "IU", "");
+
+#if 0
+
+static int
+sysctl_machdep_msgbuf(SYSCTL_HANDLER_ARGS)
+{
+	int error;
+
+	/* Unwind the buffer, so that it's linear (possibly starting with
+	 * some initial nulls).
+	 */
+	error=sysctl_handle_opaque(oidp,msgbufp->msg_ptr+msgbufp->msg_bufr,
+		msgbufp->msg_size-msgbufp->msg_bufr,req);
+	if(error) return(error);
+	if(msgbufp->msg_bufr>0) {
+		error=sysctl_handle_opaque(oidp,msgbufp->msg_ptr,
+			msgbufp->msg_bufr,req);
+	}
+	return(error);
+}
+
+SYSCTL_PROC(_machdep, OID_AUTO, msgbuf, CTLTYPE_STRING|CTLFLAG_RD,
+	0, 0, sysctl_machdep_msgbuf, "A","Contents of kernel message buffer");
+
+static int msgbuf_clear;
+
+static int
+sysctl_machdep_msgbuf_clear(SYSCTL_HANDLER_ARGS)
+{
+	int error;
+	error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2,
+		req);
+	if (!error && req->newptr) {
+		/* Clear the buffer and reset write pointer */
+		bzero(msgbufp->msg_ptr,msgbufp->msg_size);
+		msgbufp->msg_bufr=msgbufp->msg_bufx=0;
+		msgbuf_clear=0;
+	}
+	return (error);
+}
+
+SYSCTL_PROC(_machdep, OID_AUTO, msgbuf_clear, CTLTYPE_INT|CTLFLAG_RW,
+	&msgbuf_clear, 0, sysctl_machdep_msgbuf_clear, "I",
+	"Clear kernel message buffer");
+
+#endif
+
+/*
+ * Send an interrupt to process.
+ *
+ * Stack is set up to allow sigcode stored
+ * at top to call routine, followed by kcall
+ * to sigreturn routine below.  After sigreturn
+ * resets the signal mask, the stack, and the
+ * frame pointer, it returns to the user
+ * specified pc, psl.
+ */
+void
+sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
+{
+	struct lwp *lp = curthread->td_lwp;
+	struct proc *p = lp->lwp_proc;
+	struct trapframe *regs;
+	struct sigacts *psp = p->p_sigacts;
+	struct sigframe sf, *sfp;
+	int oonstack;
+
+	regs = lp->lwp_md.md_regs;
+	oonstack = (lp->lwp_sigstk.ss_flags & SS_ONSTACK) ? 1 : 0;
+
+	/* save user context */
+	bzero(&sf, sizeof(struct sigframe));
+	sf.sf_uc.uc_sigmask = *mask;
+	sf.sf_uc.uc_stack = lp->lwp_sigstk;
+	sf.sf_uc.uc_mcontext.mc_onstack = oonstack;
+	sf.sf_uc.uc_mcontext.mc_gs = rgs();
+	bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(struct trapframe));
+
+	/* Allocate and validate space for the signal handler context. */
+	/* XXX lwp flags */
+        if ((p->p_flag & P_ALTSTACK) != 0 && !oonstack &&
+	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
+		sfp = (struct sigframe *)(lp->lwp_sigstk.ss_sp +
+		    lp->lwp_sigstk.ss_size - sizeof(struct sigframe));
+		lp->lwp_sigstk.ss_flags |= SS_ONSTACK;
+	}
+	else
+		sfp = (struct sigframe *)regs->tf_esp - 1;
+
+	/* Translate the signal is appropriate */
+	if (p->p_sysent->sv_sigtbl) {
+		if (sig <= p->p_sysent->sv_sigsize)
+			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
+	}
+
+	/* Build the argument list for the signal handler. */
+	sf.sf_signum = sig;
+	sf.sf_ucontext = (register_t)&sfp->sf_uc;
+	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
+		/* Signal handler installed with SA_SIGINFO. */
+		sf.sf_siginfo = (register_t)&sfp->sf_si;
+		sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher;
+
+		/* fill siginfo structure */
+		sf.sf_si.si_signo = sig;
+		sf.sf_si.si_code = code;
+		sf.sf_si.si_addr = (void*)regs->tf_err;
+	}
+	else {
+		/* Old FreeBSD-style arguments. */
+		sf.sf_siginfo = code;
+		sf.sf_addr = regs->tf_err;
+		sf.sf_ahu.sf_handler = catcher;
+	}
+
+#if 0
+	/*
+	 * If we're a vm86 process, we want to save the segment registers.
+	 * We also change eflags to be our emulated eflags, not the actual
+	 * eflags.
+	 */
+	if (regs->tf_eflags & PSL_VM) {
+		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
+		struct vm86_kernel *vm86 = &lp->lwp_thread->td_pcb->pcb_ext->ext_vm86;
+
+		sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs;
+		sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs;
+		sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es;
+		sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds;
+
+		if (vm86->vm86_has_vme == 0)
+			sf.sf_uc.uc_mcontext.mc_eflags =
+			    (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
+			    (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
+
+		/*
+		 * Clear PSL_NT to inhibit T_TSSFLT faults on return from
+		 * syscalls made by the signal handler.  This just avoids
+		 * wasting time for our lazy fixup of such faults.  PSL_NT
+		 * does nothing in vm86 mode, but vm86 programs can set it
+		 * almost legitimately in probes for old cpu types.
+		 */
+		tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP);
+	}
+#endif
+
+	/*
+	 * Copy the sigframe out to the user's stack.
+	 */
+	if (copyout(&sf, sfp, sizeof(struct sigframe)) != 0) {
+		/*
+		 * Something is wrong with the stack pointer.
+		 * ...Kill the process.
+		 */
+		sigexit(p, SIGILL);
+	}
+
+	regs->tf_esp = (int)sfp;
+	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
+	regs->tf_eflags &= ~PSL_T;
+	regs->tf_cs = 0;
+	regs->tf_ds = 0;
+	regs->tf_es = 0;
+	regs->tf_fs = 0;
+	regs->tf_ss = 0;
+}
+
+/*
+ * Sanitize the trapframe for a virtual kernel passing control to a custom
+ * VM context.
+ *
+ * Allow userland to set or maintain PSL_RF, the resume flag.  This flag
+ * basically controls whether the return PC should skip the first instruction
+ * (as in an explicit system call) or re-execute it (as in an exception).
+ */
+int
+cpu_sanitize_frame(struct trapframe *frame)
+{
+	frame->tf_cs = 0;
+	frame->tf_ds = 0;
+	frame->tf_es = 0;
+	frame->tf_fs = 0;
+	frame->tf_ss = 0;
+	frame->tf_eflags &= (PSL_USER | PSL_RF);
+	frame->tf_eflags |= PSL_RESERVED_DEFAULT | PSL_I;
+	return(0);
+}
+
+/*
+ * sigreturn(ucontext_t *sigcntxp)
+ *
+ * System call to cleanup state after a signal
+ * has been taken.  Reset signal mask and
+ * stack state from context left by sendsig (above).
+ * Return to previous pc and psl as specified by
+ * context left by sendsig. Check carefully to
+ * make sure that the user has not modified the
+ * state to gain improper privileges.
+ */
+#define	EFL_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
+#define	CS_SECURE(cs)		(ISPL(cs) == SEL_UPL)
+
+int
+sys_sigreturn(struct sigreturn_args *uap)
+{
+	struct lwp *lp = curthread->td_lwp;
+	struct trapframe *regs;
+	ucontext_t *ucp;
+	int cs, eflags;
+
+	ucp = uap->sigcntxp;
+
+	if (!useracc((caddr_t)ucp, sizeof(ucontext_t), VM_PROT_READ))
+		return (EFAULT);
+
+	regs = lp->lwp_md.md_regs;
+	eflags = ucp->uc_mcontext.mc_eflags;
+
+#if 0
+	if (eflags & PSL_VM) {
+		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
+		struct vm86_kernel *vm86;
+
+		/*
+		 * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
+		 * set up the vm86 area, and we can't enter vm86 mode.
+		 */
+		if (lp->lwp_thread->td_pcb->pcb_ext == 0)
+			return (EINVAL);
+		vm86 = &lp->lwp_thread->td_pcb->pcb_ext->ext_vm86;
+		if (vm86->vm86_inited == 0)
+			return (EINVAL);
+
+		/* go back to user mode if both flags are set */
+		if ((eflags & PSL_VIP) && (eflags & PSL_VIF))
+			trapsignal(lp->lwp_proc, SIGBUS, 0);
+
+		if (vm86->vm86_has_vme) {
+			eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
+			    (eflags & VME_USERCHANGE) | PSL_VM;
+		} else {
+			vm86->vm86_eflags = eflags;	/* save VIF, VIP */
+			eflags = (tf->tf_eflags & ~VM_USERCHANGE) |					    (eflags & VM_USERCHANGE) | PSL_VM;
+		}
+		bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe));
+		tf->tf_eflags = eflags;
+		tf->tf_vm86_ds = tf->tf_ds;
+		tf->tf_vm86_es = tf->tf_es;
+		tf->tf_vm86_fs = tf->tf_fs;
+		tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs;
+		tf->tf_ds = 0;
+		tf->tf_es = 0;
+		tf->tf_fs = 0;
+	} else 
+#endif
+	{
+		/*
+		 * Don't allow users to change privileged or reserved flags.
+		 */
+		/*
+		 * XXX do allow users to change the privileged flag PSL_RF.
+		 * The cpu sets PSL_RF in tf_eflags for faults.  Debuggers
+		 * should sometimes set it there too.  tf_eflags is kept in
+		 * the signal context during signal handling and there is no
+		 * other place to remember it, so the PSL_RF bit may be
+		 * corrupted by the signal handler without us knowing.
+		 * Corruption of the PSL_RF bit at worst causes one more or
+		 * one less debugger trap, so allowing it is fairly harmless.
+		 */
+		if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
+			kprintf("sigreturn: eflags = 0x%x\n", eflags);
+	    		return(EINVAL);
+		}
+
+		/*
+		 * Don't allow users to load a valid privileged %cs.  Let the
+		 * hardware check for invalid selectors, excess privilege in
+		 * other selectors, invalid %eip's and invalid %esp's.
+		 */
+		cs = ucp->uc_mcontext.mc_cs;
+		if (!CS_SECURE(cs)) {
+			kprintf("sigreturn: cs = 0x%x\n", cs);
+			trapsignal(lp->lwp_proc, SIGBUS, T_PROTFLT);
+			return(EINVAL);
+		}
+		bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(struct trapframe));
+	}
+
+	if (ucp->uc_mcontext.mc_onstack & 1)
+		lp->lwp_sigstk.ss_flags |= SS_ONSTACK;
+	else
+		lp->lwp_sigstk.ss_flags &= ~SS_ONSTACK;
+
+	lp->lwp_sigmask = ucp->uc_sigmask;
+	SIG_CANTMASK(lp->lwp_sigmask);
+	return(EJUSTRETURN);
+}
+
+/*
+ * Stack frame on entry to function.  %eax will contain the function vector,
+ * %ecx will contain the function data.  flags, ecx, and eax will have 
+ * already been pushed on the stack.
+ */
+struct upc_frame {
+	register_t	eax;
+	register_t	ecx;
+	register_t	edx;
+	register_t	flags;
+	register_t	oldip;
+};
+
+void
+sendupcall(struct vmupcall *vu, int morepending)
+{
+	struct lwp *lp = curthread->td_lwp;
+	struct trapframe *regs;
+	struct upcall upcall;
+	struct upc_frame upc_frame;
+	int	crit_count = 0;
+
+	/*
+	 * Get the upcall data structure
+	 */
+	if (copyin(lp->lwp_upcall, &upcall, sizeof(upcall)) ||
+	    copyin((char *)upcall.upc_uthread + upcall.upc_critoff, &crit_count, sizeof(int))
+	) {
+		vu->vu_pending = 0;
+		kprintf("bad upcall address\n");
+		return;
+	}
+
+	/*
+	 * If the data structure is already marked pending or has a critical
+	 * section count, mark the data structure as pending and return 
+	 * without doing an upcall.  vu_pending is left set.
+	 */
+	if (upcall.upc_pending || crit_count >= vu->vu_pending) {
+		if (upcall.upc_pending < vu->vu_pending) {
+			upcall.upc_pending = vu->vu_pending;
+			copyout(&upcall.upc_pending, &lp->lwp_upcall->upc_pending,
+				sizeof(upcall.upc_pending));
+		}
+		return;
+	}
+
+	/*
+	 * We can run this upcall now, clear vu_pending.
+	 *
+	 * Bump our critical section count and set or clear the
+	 * user pending flag depending on whether more upcalls are
+	 * pending.  The user will be responsible for calling 
+	 * upc_dispatch(-1) to process remaining upcalls.
+	 */
+	vu->vu_pending = 0;
+	upcall.upc_pending = morepending;
+	crit_count += TDPRI_CRIT;
+	copyout(&upcall.upc_pending, &lp->lwp_upcall->upc_pending, 
+		sizeof(upcall.upc_pending));
+	copyout(&crit_count, (char *)upcall.upc_uthread + upcall.upc_critoff,
+		sizeof(int));
+
+	/*
+	 * Construct a stack frame and issue the upcall
+	 */
+	regs = lp->lwp_md.md_regs;
+	upc_frame.eax = regs->tf_eax;
+	upc_frame.ecx = regs->tf_ecx;
+	upc_frame.edx = regs->tf_edx;
+	upc_frame.flags = regs->tf_eflags;
+	upc_frame.oldip = regs->tf_eip;
+	if (copyout(&upc_frame, (void *)(regs->tf_esp - sizeof(upc_frame)),
+	    sizeof(upc_frame)) != 0) {
+		kprintf("bad stack on upcall\n");
+	} else {
+		regs->tf_eax = (register_t)vu->vu_func;
+		regs->tf_ecx = (register_t)vu->vu_data;
+		regs->tf_edx = (register_t)lp->lwp_upcall;
+		regs->tf_eip = (register_t)vu->vu_ctx;
+		regs->tf_esp -= sizeof(upc_frame);
+	}
+}
+
+/*
+ * fetchupcall occurs in the context of a system call, which means that
+ * we have to return EJUSTRETURN in order to prevent eax and edx from
+ * being overwritten by the syscall return value.
+ *
+ * if vu is not NULL we return the new context in %edx, the new data in %ecx,
+ * and the function pointer in %eax.  
+ */
+int
+fetchupcall (struct vmupcall *vu, int morepending, void *rsp)
+{
+	struct upc_frame upc_frame;
+	struct lwp *lp = curthread->td_lwp;
+	struct trapframe *regs;
+	int error;
+	struct upcall upcall;
+	int crit_count;
+
+	regs = lp->lwp_md.md_regs;
+
+	error = copyout(&morepending, &lp->lwp_upcall->upc_pending, sizeof(int));
+	if (error == 0) {
+	    if (vu) {
+		/*
+		 * This jumps us to the next ready context.
+		 */
+		vu->vu_pending = 0;
+		error = copyin(lp->lwp_upcall, &upcall, sizeof(upcall));
+		crit_count = 0;
+		if (error == 0)
+			error = copyin((char *)upcall.upc_uthread + upcall.upc_critoff, &crit_count, sizeof(int));
+		crit_count += TDPRI_CRIT;
+		if (error == 0)
+			error = copyout(&crit_count, (char *)upcall.upc_uthread + upcall.upc_critoff, sizeof(int));
+		regs->tf_eax = (register_t)vu->vu_func;
+		regs->tf_ecx = (register_t)vu->vu_data;
+		regs->tf_edx = (register_t)lp->lwp_upcall;
+		regs->tf_eip = (register_t)vu->vu_ctx;
+		regs->tf_esp = (register_t)rsp;
+	    } else {
+		/*
+		 * This returns us to the originally interrupted code.
+		 */
+		error = copyin(rsp, &upc_frame, sizeof(upc_frame));
+		regs->tf_eax = upc_frame.eax;
+		regs->tf_ecx = upc_frame.ecx;
+		regs->tf_edx = upc_frame.edx;
+		regs->tf_eflags = (regs->tf_eflags & ~PSL_USERCHANGE) |
+				(upc_frame.flags & PSL_USERCHANGE);
+		regs->tf_eip = upc_frame.oldip;
+		regs->tf_esp = (register_t)((char *)rsp + sizeof(upc_frame));
+	    }
+	}
+	if (error == 0)
+		error = EJUSTRETURN;
+	return(error);
+}
+
+/*
+ * cpu_idle() represents the idle LWKT.  You cannot return from this function
+ * (unless you want to blow things up!).  Instead we look for runnable threads
+ * and loop or halt as appropriate.  Giant is not held on entry to the thread.
+ *
+ * The main loop is entered with a critical section held, we must release
+ * the critical section before doing anything else.  lwkt_switch() will
+ * check for pending interrupts due to entering and exiting its own 
+ * critical section.
+ *
+ * Note on cpu_idle_hlt:  On an SMP system we rely on a scheduler IPI
+ * to wake a HLTed cpu up.  However, there are cases where the idlethread
+ * will be entered with the possibility that no IPI will occur and in such
+ * cases lwkt_switch() sets TDF_IDLE_NOHLT.
+ */
+static int	cpu_idle_hlt = 1;
+static int	cpu_idle_hltcnt;
+static int	cpu_idle_spincnt;
+SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_hlt, CTLFLAG_RW,
+    &cpu_idle_hlt, 0, "Idle loop HLT enable");
+SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_hltcnt, CTLFLAG_RW,
+    &cpu_idle_hltcnt, 0, "Idle loop entry halts");
+SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_spincnt, CTLFLAG_RW,
+    &cpu_idle_spincnt, 0, "Idle loop entry spins");
+
+static void
+cpu_idle_default_hook(void)
+{
+	/*
+	 * We must guarentee that hlt is exactly the instruction
+	 * following the sti.
+	 */
+	__asm __volatile("hlt");	/* sti; hlt */
+}
+
+/* Other subsystems (e.g., ACPI) can hook this later. */
+void (*cpu_idle_hook)(void) = cpu_idle_default_hook;
+
+void
+cpu_idle(void)
+{
+	struct thread *td = curthread;
+
+	crit_exit();
+	KKASSERT(td->td_pri < TDPRI_CRIT);
+	for (;;) {
+		/*
+		 * See if there are any LWKTs ready to go.
+		 */
+		lwkt_switch();
+
+		/*
+		 * If we are going to halt call splz unconditionally after
+		 * CLIing to catch any interrupt races.  Note that we are
+		 * at SPL0 and interrupts are enabled.
+		 */
+		if (cpu_idle_hlt && !lwkt_runnable() &&
+		    (td->td_flags & TDF_IDLE_NOHLT) == 0) {
+			/* __asm __volatile("cli"); */
+			splz();
+			if (!lwkt_runnable())
+			    cpu_idle_hook();
+#ifdef SMP
+			else
+			    __asm __volatile("pause");
+#endif
+			++cpu_idle_hltcnt;
+		} else {
+			td->td_flags &= ~TDF_IDLE_NOHLT;
+			splz();
+#ifdef SMP
+			/*__asm __volatile("sti; pause");*/
+			__asm __volatile("pause");
+#else
+			/*__asm __volatile("sti");*/
+#endif
+			++cpu_idle_spincnt;
+		}
+	}
+}
+
+/*
+ * Clear registers on exec
+ */
+void
+setregs(struct lwp *lp, u_long entry, u_long stack, u_long ps_strings)
+{
+	struct trapframe *regs = lp->lwp_md.md_regs;
+	struct pcb *pcb = lp->lwp_thread->td_pcb;
+
+	/* Reset pc->pcb_gs and %gs before possibly invalidating it. */
+	pcb->pcb_gs = 0;
+#if 0
+	load_gs(_udatasel);
+#endif
+
+	/* was i386_user_cleanup() in NetBSD */
+	user_ldt_free(pcb);
+  
+	bzero((char *)regs, sizeof(struct trapframe));
+	regs->tf_eip = entry;
+	regs->tf_esp = stack;
+	regs->tf_eflags = PSL_USER | (regs->tf_eflags & PSL_T);
+	regs->tf_ss = 0;
+	regs->tf_ds = 0;
+	regs->tf_es = 0;
+	regs->tf_fs = 0;
+	regs->tf_cs = 0;
+
+	/* PS_STRINGS value for BSD/OS binaries.  It is 0 for non-BSD/OS. */
+	regs->tf_ebx = ps_strings;
+
+        /*
+         * Reset the hardware debug registers if they were in use.
+         * They won't have any meaning for the newly exec'd process.  
+         */
+        if (pcb->pcb_flags & PCB_DBREGS) {
+                pcb->pcb_dr0 = 0;
+                pcb->pcb_dr1 = 0;
+                pcb->pcb_dr2 = 0;
+                pcb->pcb_dr3 = 0;
+                pcb->pcb_dr6 = 0;
+                pcb->pcb_dr7 = 0;
+                if (pcb == curthread->td_pcb) {
+		        /*
+			 * Clear the debug registers on the running
+			 * CPU, otherwise they will end up affecting
+			 * the next process we switch to.
+			 */
+		        reset_dbregs();
+                }
+                pcb->pcb_flags &= ~PCB_DBREGS;
+        }
+
+	/*
+	 * Initialize the math emulator (if any) for the current process.
+	 * Actually, just clear the bit that says that the emulator has
+	 * been initialized.  Initialization is delayed until the process
+	 * traps to the emulator (if it is done at all) mainly because
+	 * emulators don't provide an entry point for initialization.
+	 */
+	lp->lwp_thread->td_pcb->pcb_flags &= ~FP_SOFTFP;
+
+	/*
+	 * note: do not set CR0_TS here.  npxinit() must do it after clearing
+	 * gd_npxthread.  Otherwise a preemptive interrupt thread may panic
+	 * in npxdna().
+	 */
+	crit_enter();
+#if 0
+	load_cr0(rcr0() | CR0_MP);
+#endif
+
+#if NNPX > 0
+	/* Initialize the npx (if any) for the current process. */
+	npxinit(__INITIAL_NPXCW__);
+#endif
+	crit_exit();
+
+	/*
+	 * note: linux emulator needs edx to be 0x0 on entry, which is
+	 * handled in execve simply by setting the 64 bit syscall
+	 * return value to 0.
+	 */
+}
+
+void
+cpu_setregs(void)
+{
+#if 0
+	unsigned int cr0;
+
+	cr0 = rcr0();
+	cr0 |= CR0_NE;			/* Done by npxinit() */
+	cr0 |= CR0_MP | CR0_TS;		/* Done at every execve() too. */
+#ifdef I386_CPU
+	if (cpu_class != CPUCLASS_386)
+#endif
+		cr0 |= CR0_WP | CR0_AM;
+	load_cr0(cr0);
+	load_gs(_udatasel);
+#endif
+}
+
+static int
+sysctl_machdep_adjkerntz(SYSCTL_HANDLER_ARGS)
+{
+	int error;
+	error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2,
+		req);
+	if (!error && req->newptr)
+		resettodr();
+	return (error);
+}
+
+SYSCTL_PROC(_machdep, CPU_ADJKERNTZ, adjkerntz, CTLTYPE_INT|CTLFLAG_RW,
+	&adjkerntz, 0, sysctl_machdep_adjkerntz, "I", "");
+
+extern u_long bootdev;		/* not a cdev_t - encoding is different */
+SYSCTL_ULONG(_machdep, OID_AUTO, guessed_bootdev,
+	CTLFLAG_RD, &bootdev, 0, "Boot device (not in cdev_t format)");
+
+/*
+ * Initialize 386 and configure to run kernel
+ */
+
+/*
+ * Initialize segments & interrupt table
+ */
+
+extern  struct user *proc0paddr;
+
+#if 0
+
+extern inthand_t
+	IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
+	IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
+	IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
+	IDTVEC(page), IDTVEC(mchk), IDTVEC(fpu), IDTVEC(align),
+	IDTVEC(xmm), IDTVEC(syscall),
+	IDTVEC(rsvd0);
+extern inthand_t
+	IDTVEC(int0x80_syscall);
+
+#endif
+
+#ifdef DEBUG_INTERRUPTS
+extern inthand_t *Xrsvdary[256];
+#endif
+
+int
+ptrace_set_pc(struct proc *p, unsigned long addr)
+{
+	p->p_md.md_regs->tf_eip = addr;
+	return (0);
+}
+
+int
+ptrace_single_step(struct lwp *lp)
+{
+	lp->lwp_md.md_regs->tf_eflags |= PSL_T;
+	return (0);
+}
+
+int
+fill_regs(struct lwp *lp, struct reg *regs)
+{
+	struct pcb *pcb;
+	struct trapframe *tp;
+
+	tp = lp->lwp_md.md_regs;
+	regs->r_fs = tp->tf_fs;
+	regs->r_es = tp->tf_es;
+	regs->r_ds = tp->tf_ds;
+	regs->r_edi = tp->tf_edi;
+	regs->r_esi = tp->tf_esi;
+	regs->r_ebp = tp->tf_ebp;
+	regs->r_ebx = tp->tf_ebx;
+	regs->r_edx = tp->tf_edx;
+	regs->r_ecx = tp->tf_ecx;
+	regs->r_eax = tp->tf_eax;
+	regs->r_eip = tp->tf_eip;
+	regs->r_cs = tp->tf_cs;
+	regs->r_eflags = tp->tf_eflags;
+	regs->r_esp = tp->tf_esp;
+	regs->r_ss = tp->tf_ss;
+	pcb = lp->lwp_thread->td_pcb;
+	regs->r_gs = pcb->pcb_gs;
+	return (0);
+}
+
+int
+set_regs(struct lwp *lp, struct reg *regs)
+{
+	struct pcb *pcb;
+	struct trapframe *tp;
+
+	tp = lp->lwp_md.md_regs;
+	if (!EFL_SECURE(regs->r_eflags, tp->tf_eflags) ||
+	    !CS_SECURE(regs->r_cs))
+		return (EINVAL);
+	tp->tf_fs = regs->r_fs;
+	tp->tf_es = regs->r_es;
+	tp->tf_ds = regs->r_ds;
+	tp->tf_edi = regs->r_edi;
+	tp->tf_esi = regs->r_esi;
+	tp->tf_ebp = regs->r_ebp;
+	tp->tf_ebx = regs->r_ebx;
+	tp->tf_edx = regs->r_edx;
+	tp->tf_ecx = regs->r_ecx;
+	tp->tf_eax = regs->r_eax;
+	tp->tf_eip = regs->r_eip;
+	tp->tf_cs = regs->r_cs;
+	tp->tf_eflags = regs->r_eflags;
+	tp->tf_esp = regs->r_esp;
+	tp->tf_ss = regs->r_ss;
+	pcb = lp->lwp_thread->td_pcb;
+	pcb->pcb_gs = regs->r_gs;
+	return (0);
+}
+
+#ifndef CPU_DISABLE_SSE
+static void
+fill_fpregs_xmm(struct savexmm *sv_xmm, struct save87 *sv_87)
+{
+	struct env87 *penv_87 = &sv_87->sv_env;
+	struct envxmm *penv_xmm = &sv_xmm->sv_env;
+	int i;
+
+	/* FPU control/status */
+	penv_87->en_cw = penv_xmm->en_cw;
+	penv_87->en_sw = penv_xmm->en_sw;
+	penv_87->en_tw = penv_xmm->en_tw;
+	penv_87->en_fip = penv_xmm->en_fip;
+	penv_87->en_fcs = penv_xmm->en_fcs;
+	penv_87->en_opcode = penv_xmm->en_opcode;
+	penv_87->en_foo = penv_xmm->en_foo;
+	penv_87->en_fos = penv_xmm->en_fos;
+
+	/* FPU registers */
+	for (i = 0; i < 8; ++i)
+		sv_87->sv_ac[i] = sv_xmm->sv_fp[i].fp_acc;
+
+	sv_87->sv_ex_sw = sv_xmm->sv_ex_sw;
+}
+
+static void
+set_fpregs_xmm(struct save87 *sv_87, struct savexmm *sv_xmm)
+{
+	struct env87 *penv_87 = &sv_87->sv_env;
+	struct envxmm *penv_xmm = &sv_xmm->sv_env;
+	int i;
+
+	/* FPU control/status */
+	penv_xmm->en_cw = penv_87->en_cw;
+	penv_xmm->en_sw = penv_87->en_sw;
+	penv_xmm->en_tw = penv_87->en_tw;
+	penv_xmm->en_fip = penv_87->en_fip;
+	penv_xmm->en_fcs = penv_87->en_fcs;
+	penv_xmm->en_opcode = penv_87->en_opcode;
+	penv_xmm->en_foo = penv_87->en_foo;
+	penv_xmm->en_fos = penv_87->en_fos;
+
+	/* FPU registers */
+	for (i = 0; i < 8; ++i)
+		sv_xmm->sv_fp[i].fp_acc = sv_87->sv_ac[i];
+
+	sv_xmm->sv_ex_sw = sv_87->sv_ex_sw;
+}
+#endif /* CPU_DISABLE_SSE */
+
+int
+fill_fpregs(struct lwp *lp, struct fpreg *fpregs)
+{
+#ifndef CPU_DISABLE_SSE
+	if (cpu_fxsr) {
+		fill_fpregs_xmm(&lp->lwp_thread->td_pcb->pcb_save.sv_xmm,
+				(struct save87 *)fpregs);
+		return (0);
+	}
+#endif /* CPU_DISABLE_SSE */
+	bcopy(&lp->lwp_thread->td_pcb->pcb_save.sv_87, fpregs, sizeof *fpregs);
+	return (0);
+}
+
+int
+set_fpregs(struct lwp *lp, struct fpreg *fpregs)
+{
+#ifndef CPU_DISABLE_SSE
+	if (cpu_fxsr) {
+		set_fpregs_xmm((struct save87 *)fpregs,
+			       &lp->lwp_thread->td_pcb->pcb_save.sv_xmm);
+		return (0);
+	}
+#endif /* CPU_DISABLE_SSE */
+	bcopy(fpregs, &lp->lwp_thread->td_pcb->pcb_save.sv_87, sizeof *fpregs);
+	return (0);
+}
+
+int
+fill_dbregs(struct lwp *lp, struct dbreg *dbregs)
+{
+        if (lp == NULL) {
+                dbregs->dr0 = rdr0();
+                dbregs->dr1 = rdr1();
+                dbregs->dr2 = rdr2();
+                dbregs->dr3 = rdr3();
+                dbregs->dr4 = rdr4();
+                dbregs->dr5 = rdr5();
+                dbregs->dr6 = rdr6();
+                dbregs->dr7 = rdr7();
+        } else {
+		struct pcb *pcb;
+
+                pcb = lp->lwp_thread->td_pcb;
+                dbregs->dr0 = pcb->pcb_dr0;
+                dbregs->dr1 = pcb->pcb_dr1;
+                dbregs->dr2 = pcb->pcb_dr2;
+                dbregs->dr3 = pcb->pcb_dr3;
+                dbregs->dr4 = 0;
+                dbregs->dr5 = 0;
+                dbregs->dr6 = pcb->pcb_dr6;
+                dbregs->dr7 = pcb->pcb_dr7;
+        }
+	return (0);
+}
+
+int
+set_dbregs(struct lwp *lp, struct dbreg *dbregs)
+{
+	if (lp == NULL) {
+		load_dr0(dbregs->dr0);
+		load_dr1(dbregs->dr1);
+		load_dr2(dbregs->dr2);
+		load_dr3(dbregs->dr3);
+		load_dr4(dbregs->dr4);
+		load_dr5(dbregs->dr5);
+		load_dr6(dbregs->dr6);
+		load_dr7(dbregs->dr7);
+	} else {
+		struct pcb *pcb;
+		struct ucred *ucred;
+		int i;
+		uint32_t mask1, mask2;
+
+		/*
+		 * Don't let an illegal value for dr7 get set.	Specifically,
+		 * check for undefined settings.  Setting these bit patterns
+		 * result in undefined behaviour and can lead to an unexpected
+		 * TRCTRAP.
+		 */
+		for (i = 0, mask1 = 0x3<<16, mask2 = 0x2<<16; i < 8; 
+		     i++, mask1 <<= 2, mask2 <<= 2)
+			if ((dbregs->dr7 & mask1) == mask2)
+				return (EINVAL);
+		
+		pcb = lp->lwp_thread->td_pcb;
+		ucred = lp->lwp_proc->p_ucred;
+
+		/*
+		 * Don't let a process set a breakpoint that is not within the
+		 * process's address space.  If a process could do this, it
+		 * could halt the system by setting a breakpoint in the kernel
+		 * (if ddb was enabled).  Thus, we need to check to make sure
+		 * that no breakpoints are being enabled for addresses outside
+		 * process's address space, unless, perhaps, we were called by
+		 * uid 0.
+		 *
+		 * XXX - what about when the watched area of the user's
+		 * address space is written into from within the kernel
+		 * ... wouldn't that still cause a breakpoint to be generated
+		 * from within kernel mode?
+		 */
+
+		if (suser_cred(ucred, 0) != 0) {
+			if (dbregs->dr7 & 0x3) {
+				/* dr0 is enabled */
+				if (dbregs->dr0 >= VM_MAX_USER_ADDRESS)
+					return (EINVAL);
+			}
+
+			if (dbregs->dr7 & (0x3<<2)) {
+				/* dr1 is enabled */
+				if (dbregs->dr1 >= VM_MAX_USER_ADDRESS)
+					return (EINVAL);
+			}
+
+			if (dbregs->dr7 & (0x3<<4)) {
+				/* dr2 is enabled */
+				if (dbregs->dr2 >= VM_MAX_USER_ADDRESS)
+					return (EINVAL);
+			}
+
+			if (dbregs->dr7 & (0x3<<6)) {
+				/* dr3 is enabled */
+				if (dbregs->dr3 >= VM_MAX_USER_ADDRESS)
+					return (EINVAL);
+			}
+		}
+
+		pcb->pcb_dr0 = dbregs->dr0;
+		pcb->pcb_dr1 = dbregs->dr1;
+		pcb->pcb_dr2 = dbregs->dr2;
+		pcb->pcb_dr3 = dbregs->dr3;
+		pcb->pcb_dr6 = dbregs->dr6;
+		pcb->pcb_dr7 = dbregs->dr7;
+
+		pcb->pcb_flags |= PCB_DBREGS;
+	}
+
+	return (0);
+}
+
+#if 0
+/*
+ * Return > 0 if a hardware breakpoint has been hit, and the
+ * breakpoint was in user space.  Return 0, otherwise.
+ */
+int
+user_dbreg_trap(void)
+{
+        u_int32_t dr7, dr6; /* debug registers dr6 and dr7 */
+        u_int32_t bp;       /* breakpoint bits extracted from dr6 */
+        int nbp;            /* number of breakpoints that triggered */
+        caddr_t addr[4];    /* breakpoint addresses */
+        int i;
+        
+        dr7 = rdr7();
+        if ((dr7 & 0x000000ff) == 0) {
+                /*
+                 * all GE and LE bits in the dr7 register are zero,
+                 * thus the trap couldn't have been caused by the
+                 * hardware debug registers
+                 */
+                return 0;
+        }
+
+        nbp = 0;
+        dr6 = rdr6();
+        bp = dr6 & 0x0000000f;
+
+        if (!bp) {
+                /*
+                 * None of the breakpoint bits are set meaning this
+                 * trap was not caused by any of the debug registers
+                 */
+                return 0;
+        }
+
+        /*
+         * at least one of the breakpoints were hit, check to see
+         * which ones and if any of them are user space addresses
+         */
+
+        if (bp & 0x01) {
+                addr[nbp++] = (caddr_t)rdr0();
+        }
+        if (bp & 0x02) {
+                addr[nbp++] = (caddr_t)rdr1();
+        }
+        if (bp & 0x04) {
+                addr[nbp++] = (caddr_t)rdr2();
+        }
+        if (bp & 0x08) {
+                addr[nbp++] = (caddr_t)rdr3();
+        }
+
+        for (i=0; i<nbp; i++) {
+                if (addr[i] <
+                    (caddr_t)VM_MAX_USER_ADDRESS) {
+                        /*
+                         * addr[i] is in user space
+                         */
+                        return nbp;
+                }
+        }
+
+        /*
+         * None of the breakpoints are in user space.
+         */
+        return 0;
+}
+
+#endif
+
+
+#ifndef DDB
+void
+Debugger(const char *msg)
+{
+	kprintf("Debugger(\"%s\") called.\n", msg);
+}
+#endif /* no DDB */
+
+#include <sys/disklabel.h>
+
+/*
+ * Determine the size of the transfer, and make sure it is
+ * within the boundaries of the partition. Adjust transfer
+ * if needed, and signal errors or early completion.
+ *
+ * On success a new bio layer is pushed with the translated
+ * block number, and returned.
+ */
+struct bio *
+bounds_check_with_label(cdev_t dev, struct bio *bio,
+			struct disklabel *lp, int wlabel)
+{
+	struct bio *nbio;
+	struct buf *bp = bio->bio_buf;
+        struct partition *p = lp->d_partitions + dkpart(dev);
+        int labelsect = lp->d_partitions[0].p_offset;
+        int maxsz = p->p_size,
+                sz = (bp->b_bcount + DEV_BSIZE - 1) >> DEV_BSHIFT;
+	daddr_t blkno = (daddr_t)(bio->bio_offset >> DEV_BSHIFT);
+
+        /* overwriting disk label ? */
+        /* XXX should also protect bootstrap in first 8K */
+        if (blkno + p->p_offset <= LABELSECTOR + labelsect &&
+#if LABELSECTOR != 0
+            blkno + p->p_offset + sz > LABELSECTOR + labelsect &&
+#endif
+            bp->b_cmd != BUF_CMD_READ && wlabel == 0) {
+                bp->b_error = EROFS;
+                goto error;
+        }
+
+#if     defined(DOSBBSECTOR) && defined(notyet)
+        /* overwriting master boot record? */
+        if (blkno + p->p_offset <= DOSBBSECTOR &&
+            bp->b_cmd != BUF_CMD_READ && wlabel == 0) {
+                bp->b_error = EROFS;
+                goto error;
+        }
+#endif
+
+	/*
+	 * Check for out of bounds, EOF, and EOF clipping.
+	 */
+	if (bio->bio_offset < 0)
+		goto bad;
+	if (blkno + sz > maxsz) {
+		/*
+		 * Past EOF or B_BNOCLIP flag was set, the request is bad.
+		 */
+		if (blkno > maxsz || (bp->b_flags & B_BNOCLIP))
+			goto bad;
+
+		/*
+		 * If exactly on EOF just complete the I/O with no bytes
+		 * transfered.  B_INVAL must be set to throw away the
+		 * contents of the buffer.  Otherwise clip b_bcount.
+		 */
+                if (blkno == maxsz) {
+                        bp->b_resid = bp->b_bcount;
+			bp->b_flags |= B_INVAL;
+			goto done;
+                }
+                bp->b_bcount = (maxsz - blkno) << DEV_BSHIFT;
+        }
+	nbio = push_bio(bio);
+        nbio->bio_offset = bio->bio_offset + ((off_t)p->p_offset << DEV_BSHIFT);
+	return (nbio);
+
+	/*
+	 * The caller is responsible for calling biodone() on the passed bio
+	 * when we return NULL.
+	 */
+bad:
+	bp->b_error = EINVAL;
+error:
+	bp->b_resid = bp->b_bcount;
+        bp->b_flags |= B_ERROR | B_INVAL;
+done:
+	return (NULL);
+}
+
diff --git a/sys/platform/vkernel/i386/db_interface.c b/sys/platform/vkernel/i386/db_interface.c
new file mode 100644
index 0000000000..6f7634f94e
--- /dev/null
+++ b/sys/platform/vkernel/i386/db_interface.c
@@ -0,0 +1,328 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
+ *  School of Computer Science
+ *  Carnegie Mellon University
+ *  Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ *
+ * $FreeBSD: src/sys/i386/i386/db_interface.c,v 1.48.2.1 2000/07/07 00:38:46 obrien Exp $
+ * $DragonFly: src/sys/platform/vkernel/i386/db_interface.c,v 1.1 2007/01/05 22:18:18 dillon Exp $
+ */
+
+/*
+ * Interface to new debugger.
+ */
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/reboot.h>
+#include <sys/cons.h>
+#include <sys/vkernel.h>
+
+#include <machine/cpu.h>
+#include <machine/smp.h>
+#include <machine/globaldata.h>
+#include <machine/md_var.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <ddb/ddb.h>
+
+#include <setjmp.h>
+
+static jmp_buf *db_nofault = 0;
+extern jmp_buf	db_jmpbuf;
+
+extern void	gdb_handle_exception (db_regs_t *, int, int);
+
+int	db_active;
+db_regs_t ddb_regs;
+
+static jmp_buf	db_global_jmpbuf;
+static int	db_global_jmpbuf_valid;
+
+#ifdef __GNUC__
+#define	rss() ({u_short ss; __asm __volatile("mov %%ss,%0" : "=r" (ss)); ss;})
+#endif
+
+/*
+ *  kdb_trap - field a TRACE or BPT trap
+ */
+int
+kdb_trap(int type, int code, struct i386_saved_state *regs)
+{
+	volatile int ddb_mode = !(boothowto & RB_GDB);
+
+	/*
+	 * XXX try to do nothing if the console is in graphics mode.
+	 * Handle trace traps (and hardware breakpoints...) by ignoring
+	 * them except for forgetting about them.  Return 0 for other
+	 * traps to say that we haven't done anything.  The trap handler
+	 * will usually panic.  We should handle breakpoint traps for
+	 * our breakpoints by disarming our breakpoints and fixing up
+	 * %eip.
+	 */
+	if (cons_unavail && ddb_mode) {
+	    if (type == T_TRCTRAP) {
+		regs->tf_eflags &= ~PSL_T;
+		return (1);
+	    }
+	    return (0);
+	}
+
+	switch (type) {
+	    case T_BPTFLT:	/* breakpoint */
+	    case T_TRCTRAP:	/* debug exception */
+		break;
+
+	    default:
+		/*
+		 * XXX this is almost useless now.  In most cases,
+		 * trap_fatal() has already printed a much more verbose
+		 * message.  However, it is dangerous to print things in
+		 * trap_fatal() - kprintf() might be reentered and trap.
+		 * The debugger should be given control first.
+		 */
+		if (ddb_mode)
+		    db_printf("kernel: type %d trap, code=%x\n", type, code);
+
+		if (db_nofault) {
+		    jmp_buf *no_fault = db_nofault;
+		    db_nofault = 0;
+		    longjmp(*no_fault, 1);
+		}
+	}
+
+	/*
+	 * This handles unexpected traps in ddb commands, including calls to
+	 * non-ddb functions.  db_nofault only applies to memory accesses by
+	 * internal ddb commands.
+	 */
+	if (db_global_jmpbuf_valid)
+	    longjmp(db_global_jmpbuf, 1);
+
+	/*
+	 * XXX We really should switch to a local stack here.
+	 */
+	ddb_regs = *regs;
+
+	/*
+	 * If in kernel mode, esp and ss are not saved, so dummy them up.
+	 */
+	if (ISPL(regs->tf_cs) == 0) {
+	    ddb_regs.tf_esp = (int)&regs->tf_esp;
+	    ddb_regs.tf_ss = rss();
+	}
+
+#ifdef SMP
+	db_printf("\nCPU%d stopping CPUs: 0x%08x\n", 
+	    mycpu->gd_cpuid, mycpu->gd_other_cpus);
+
+	/* We stop all CPUs except ourselves (obviously) */
+	stop_cpus(mycpu->gd_other_cpus);
+
+	db_printf(" stopped\n");
+#endif /* SMP */
+
+	setjmp(db_global_jmpbuf);
+	db_global_jmpbuf_valid = TRUE;
+	db_active++;
+	if (ddb_mode) {
+	    cndbctl(TRUE);
+	    db_trap(type, code);
+	    cndbctl(FALSE);
+	} else
+	    gdb_handle_exception(&ddb_regs, type, code);
+	db_active--;
+	db_global_jmpbuf_valid = FALSE;
+
+#ifdef SMP
+	db_printf("\nCPU%d restarting CPUs: 0x%08x\n",
+	    mycpu->gd_cpuid, stopped_cpus);
+
+	/* Restart all the CPUs we previously stopped */
+	if (stopped_cpus != mycpu->gd_other_cpus) {
+		db_printf("whoa, other_cpus: 0x%08x, stopped_cpus: 0x%08x\n",
+			  mycpu->gd_other_cpus, stopped_cpus);
+		panic("stop_cpus() failed");
+	}
+	restart_cpus(stopped_cpus);
+
+	db_printf(" restarted\n");
+#endif /* SMP */
+
+	regs->tf_eip    = ddb_regs.tf_eip;
+	regs->tf_eflags = ddb_regs.tf_eflags;
+	regs->tf_eax    = ddb_regs.tf_eax;
+	regs->tf_ecx    = ddb_regs.tf_ecx;
+	regs->tf_edx    = ddb_regs.tf_edx;
+	regs->tf_ebx    = ddb_regs.tf_ebx;
+
+	/*
+	 * If in user mode, the saved ESP and SS were valid, restore them.
+	 */
+	if (ISPL(regs->tf_cs)) {
+	    regs->tf_esp = ddb_regs.tf_esp;
+	    regs->tf_ss  = ddb_regs.tf_ss & 0xffff;
+	}
+
+	regs->tf_ebp    = ddb_regs.tf_ebp;
+	regs->tf_esi    = ddb_regs.tf_esi;
+	regs->tf_edi    = ddb_regs.tf_edi;
+	regs->tf_es     = ddb_regs.tf_es & 0xffff;
+	regs->tf_fs     = ddb_regs.tf_fs & 0xffff;
+	regs->tf_cs     = ddb_regs.tf_cs & 0xffff;
+	regs->tf_ds     = ddb_regs.tf_ds & 0xffff;
+	return (1);
+}
+
+/*
+ * Read bytes from kernel address space for debugger.
+ */
+void
+db_read_bytes(vm_offset_t addr, size_t size, char *data)
+{
+	char	*src;
+
+	db_nofault = &db_jmpbuf;
+
+	src = (char *)addr;
+	while (size-- > 0)
+	    *data++ = *src++;
+
+	db_nofault = 0;
+}
+
+/*
+ * Write bytes to kernel address space for debugger.
+ */
+void
+db_write_bytes(vm_offset_t addr, size_t size, char *data)
+{
+	char	*dst;
+#if 0
+	vpte_t	*ptep0 = NULL;
+	vpte_t	oldmap0 = 0;
+	vm_offset_t	addr1;
+	vpte_t	*ptep1 = NULL;
+	vpte_t	oldmap1 = 0;
+#endif
+
+	db_nofault = &db_jmpbuf;
+#if 0
+	if (addr > trunc_page((vm_offset_t)btext) - size &&
+	    addr < round_page((vm_offset_t)etext)) {
+
+	    ptep0 = pmap_kpte(addr);
+	    oldmap0 = *ptep0;
+	    *ptep0 |= VPTE_W;
+
+	    /* Map another page if the data crosses a page boundary. */
+	    if ((*ptep0 & PG_PS) == 0) {
+	    	addr1 = trunc_page(addr + size - 1);
+	    	if (trunc_page(addr) != addr1) {
+		    ptep1 = pmap_kpte(addr1);
+		    oldmap1 = *ptep1;
+		    *ptep1 |= VPTE_W;
+	    	}
+	    } else {
+		addr1 = trunc_4mpage(addr + size - 1);
+		if (trunc_4mpage(addr) != addr1) {
+		    ptep1 = pmap_kpte(addr1);
+		    oldmap1 = *ptep1;
+		    *ptep1 |= VPTE_W;
+		}
+	    }
+
+	    cpu_invltlb();
+	}
+#endif
+
+	dst = (char *)addr;
+
+	while (size-- > 0)
+	    *dst++ = *data++;
+
+	db_nofault = 0;
+
+#if 0
+	if (ptep0) {
+	    *ptep0 = oldmap0;
+
+	    if (ptep1)
+		*ptep1 = oldmap1;
+
+	    cpu_invltlb();
+	}
+#endif
+}
+
+/*
+ * The debugger sometimes needs to know the actual KVM address represented
+ * by the instruction pointer, stack pointer, or base pointer.  Normally
+ * the actual KVM address is simply the contents of the register.  However,
+ * if the debugger is entered from the BIOS or VM86 we need to figure out
+ * the offset from the segment register.
+ */
+db_addr_t
+PC_REGS(db_regs_t *regs)
+{
+    return(regs->tf_eip);
+}
+
+db_addr_t
+SP_REGS(db_regs_t *regs)
+{
+    return(regs->tf_esp);
+}
+
+db_addr_t
+BP_REGS(db_regs_t *regs)
+{
+    return(regs->tf_ebp);
+}
+
+/*
+ * XXX
+ * Move this to machdep.c and allow it to be called if any debugger is
+ * installed.
+ */
+void
+Debugger(const char *msg)
+{
+	static volatile u_char in_Debugger;
+
+	/*
+	 * XXX
+	 * Do nothing if the console is in graphics mode.  This is
+	 * OK if the call is for the debugger hotkey but not if the call
+	 * is a weak form of panicing.
+	 */
+	if (cons_unavail && !(boothowto & RB_GDB))
+	    return;
+
+	if (!in_Debugger) {
+	    in_Debugger = 1;
+	    db_printf("Debugger(\"%s\")\n", msg);
+	    breakpoint();
+	    in_Debugger = 0;
+	}
+}
diff --git a/sys/platform/vkernel/i386/db_trace.c b/sys/platform/vkernel/i386/db_trace.c
new file mode 100644
index 0000000000..9951fb024b
--- /dev/null
+++ b/sys/platform/vkernel/i386/db_trace.c
@@ -0,0 +1,642 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
+ *  School of Computer Science
+ *  Carnegie Mellon University
+ *  Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ *
+ * $FreeBSD: src/sys/i386/i386/db_trace.c,v 1.35.2.3 2002/02/21 22:31:25 silby Exp $
+ * $DragonFly: src/sys/platform/vkernel/i386/db_trace.c,v 1.1 2007/01/05 22:18:18 dillon Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/linker_set.h>
+#include <sys/lock.h>
+#include <sys/proc.h>
+#include <sys/reg.h>
+
+#include <machine/cpu.h>
+#include <machine/md_var.h>
+
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+#include <ddb/ddb.h>
+
+#include <sys/user.h>
+
+#include <ddb/db_access.h>
+#include <ddb/db_sym.h>
+#include <ddb/db_variables.h>
+
+db_varfcn_t db_dr0;
+db_varfcn_t db_dr1;
+db_varfcn_t db_dr2;
+db_varfcn_t db_dr3;
+db_varfcn_t db_dr4;
+db_varfcn_t db_dr5;
+db_varfcn_t db_dr6;
+db_varfcn_t db_dr7;
+
+/*
+ * Machine register set.
+ */
+struct db_variable db_regs[] = {
+	{ "cs",		&ddb_regs.tf_cs,     FCN_NULL },
+	{ "ds",		&ddb_regs.tf_ds,     FCN_NULL },
+	{ "es",		&ddb_regs.tf_es,     FCN_NULL },
+	{ "fs",		&ddb_regs.tf_fs,     FCN_NULL },
+#if 0
+	{ "gs",		&ddb_regs.tf_gs,     FCN_NULL },
+#endif
+	{ "ss",		&ddb_regs.tf_ss,     FCN_NULL },
+	{ "eax",	&ddb_regs.tf_eax,    FCN_NULL },
+	{ "ecx",	&ddb_regs.tf_ecx,    FCN_NULL },
+	{ "edx",	&ddb_regs.tf_edx,    FCN_NULL },
+	{ "ebx",	&ddb_regs.tf_ebx,    FCN_NULL },
+	{ "esp",	&ddb_regs.tf_esp,    FCN_NULL },
+	{ "ebp",	&ddb_regs.tf_ebp,    FCN_NULL },
+	{ "esi",	&ddb_regs.tf_esi,    FCN_NULL },
+	{ "edi",	&ddb_regs.tf_edi,    FCN_NULL },
+	{ "eip",	&ddb_regs.tf_eip,    FCN_NULL },
+	{ "efl",	&ddb_regs.tf_eflags, FCN_NULL },
+	{ "dr0",	NULL,		     db_dr0 },
+	{ "dr1",	NULL,		     db_dr1 },
+	{ "dr2",	NULL,		     db_dr2 },
+	{ "dr3",	NULL,		     db_dr3 },
+	{ "dr4",	NULL,		     db_dr4 },
+	{ "dr5",	NULL,		     db_dr5 },
+	{ "dr6",	NULL,		     db_dr6 },
+	{ "dr7",	NULL,		     db_dr7 },
+};
+struct db_variable *db_eregs = db_regs + sizeof(db_regs)/sizeof(db_regs[0]);
+
+/*
+ * Stack trace.
+ */
+#define	INKERNEL(va)	(((vm_offset_t)(va)) >= USRSTACK)
+
+struct i386_frame {
+	struct i386_frame	*f_frame;
+	int			f_retaddr;
+	int			f_arg0;
+};
+
+#define NORMAL		0
+#define	TRAP		1
+#define	INTERRUPT	2
+#define	SYSCALL		3
+
+static void	db_nextframe(struct i386_frame **, db_addr_t *);
+static int	db_numargs(struct i386_frame *);
+static void	db_print_stack_entry(const char *, int, char **, int *, db_addr_t);
+
+
+static char	*watchtype_str(int type);
+static int	ki386_set_watch(int watchnum, unsigned int watchaddr, 
+                               int size, int access, struct dbreg * d);
+static int	ki386_clr_watch(int watchnum, struct dbreg * d);
+int		db_md_set_watchpoint(db_expr_t addr, db_expr_t size);
+int		db_md_clr_watchpoint(db_expr_t addr, db_expr_t size);
+void		db_md_list_watchpoints(void);
+
+
+/*
+ * Figure out how many arguments were passed into the frame at "fp".
+ */
+static int
+db_numargs(struct i386_frame *fp)
+{
+	int	*argp;
+	int	inst;
+	int	args;
+
+	argp = (int *)db_get_value((int)&fp->f_retaddr, 4, FALSE);
+	/*
+	 * XXX etext is wrong for LKMs.  We should attempt to interpret
+	 * the instruction at the return address in all cases.  This
+	 * may require better fault handling.
+	 */
+	if (argp < (int *)btext || argp >= (int *)etext) {
+		args = 5;
+	} else {
+		inst = db_get_value((int)argp, 4, FALSE);
+		if ((inst & 0xff) == 0x59)	/* popl %ecx */
+			args = 1;
+		else if ((inst & 0xffff) == 0xc483)	/* addl $Ibs, %esp */
+			args = ((inst >> 16) & 0xff) / 4;
+		else
+			args = 5;
+	}
+	return(args);
+}
+
+static void
+db_print_stack_entry(const char *name, int narg, char **argnp, int *argp,
+		     db_addr_t callpc)
+{
+	db_printf("%s(", name);
+	while (narg) {
+		if (argnp)
+			db_printf("%s=", *argnp++);
+		db_printf("%r", db_get_value((int)argp, 4, FALSE));
+		argp++;
+		if (--narg != 0)
+			db_printf(",");
+  	}
+	db_printf(") at ");
+	db_printsym(callpc, DB_STGY_PROC);
+	db_printf("\n");
+}
+
+/*
+ * Figure out the next frame up in the call stack.
+ */
+static void
+db_nextframe(struct i386_frame **fp, db_addr_t *ip)
+{
+	struct trapframe *tf;
+	int frame_type;
+	int eip, esp, ebp;
+	db_expr_t offset;
+	const char *sym, *name;
+
+	eip = db_get_value((int) &(*fp)->f_retaddr, 4, FALSE);
+	ebp = db_get_value((int) &(*fp)->f_frame, 4, FALSE);
+
+	/*
+	 * Figure out frame type.
+	 */
+
+	frame_type = NORMAL;
+
+	sym = db_search_symbol(eip, DB_STGY_ANY, &offset);
+	db_symbol_values(sym, &name, NULL);
+	if (name != NULL) {
+		if (!strcmp(name, "calltrap")) {
+			frame_type = TRAP;
+		} else if (!strncmp(name, "Xresume", 7)) {
+			frame_type = INTERRUPT;
+		} else if (!strcmp(name, "_Xsyscall")) {
+			frame_type = SYSCALL;
+		}
+	}
+
+	/*
+	 * Normal frames need no special processing.
+	 */
+	if (frame_type == NORMAL) {
+		*ip = (db_addr_t) eip;
+		*fp = (struct i386_frame *) ebp;
+		return;
+	}
+
+	db_print_stack_entry(name, 0, 0, 0, eip);
+
+	/*
+	 * Point to base of trapframe which is just above the
+	 * current frame.
+	 */
+	tf = (struct trapframe *) ((int)*fp + 8);
+
+	esp = (ISPL(tf->tf_cs) == SEL_UPL) ?  tf->tf_esp : (int)&tf->tf_esp;
+	switch (frame_type) {
+	case TRAP:
+		if (INKERNEL((int) tf)) {
+			eip = tf->tf_eip;
+			ebp = tf->tf_ebp;
+			db_printf(
+		    "--- trap %#r, eip = %#r, esp = %#r, ebp = %#r ---\n",
+			    tf->tf_trapno, eip, esp, ebp);
+		}
+		break;
+	case SYSCALL:
+		if (INKERNEL((int) tf)) {
+			eip = tf->tf_eip;
+			ebp = tf->tf_ebp;
+			db_printf(
+		    "--- syscall %#r, eip = %#r, esp = %#r, ebp = %#r ---\n",
+			    tf->tf_eax, eip, esp, ebp);
+		}
+		break;
+	case INTERRUPT:
+		tf = (struct trapframe *)((int)*fp + 16);
+		if (INKERNEL((int) tf)) {
+			eip = tf->tf_eip;
+			ebp = tf->tf_ebp;
+			db_printf(
+		    "--- interrupt, eip = %#r, esp = %#r, ebp = %#r ---\n",
+			    eip, esp, ebp);
+		}
+		break;
+	default:
+		break;
+	}
+
+	*ip = (db_addr_t) eip;
+	*fp = (struct i386_frame *) ebp;
+}
+
+void
+db_stack_trace_cmd(db_expr_t addr, boolean_t have_addr, db_expr_t count,
+		   char *modif)
+{
+	struct i386_frame *frame;
+	int *argp;
+	db_addr_t callpc;
+	boolean_t first;
+	int i;
+
+	if (count == -1)
+		count = 1024;
+
+	if (!have_addr) {
+		frame = (struct i386_frame *)BP_REGS(&ddb_regs);
+		if (frame == NULL)
+			frame = (struct i386_frame *)(SP_REGS(&ddb_regs) - 4);
+		callpc = PC_REGS(&ddb_regs);
+	} else if (!INKERNEL(addr)) {
+#if needswork
+		pid = (addr % 16) + ((addr >> 4) % 16) * 10 +
+		    ((addr >> 8) % 16) * 100 + ((addr >> 12) % 16) * 1000 +
+		    ((addr >> 16) % 16) * 10000;
+		/*
+		 * The pcb for curproc is not valid at this point,
+		 * so fall back to the default case.
+		 */
+		if ((curproc != NULL) && (pid == curproc->p_pid)) {
+			frame = (struct i386_frame *)BP_REGS(&ddb_regs);
+			if (frame == NULL)
+				frame = (struct i386_frame *)
+				    (SP_REGS(&ddb_regs) - 4);
+			callpc = PC_REGS(&ddb_regs);
+		} else {
+			pid_t pid;
+			struct proc *p;
+			struct pcb *pcb;
+
+			p = pfind(pid);
+			if (p == NULL) {
+				db_printf("pid %d not found\n", pid);
+				return;
+			}
+			if ((p->p_flag & P_SWAPPEDOUT)) {
+				db_printf("pid %d swapped out\n", pid);
+				return;
+			}
+			pcb = p->p_thread->td_pcb;
+			frame = (struct i386_frame *)pcb->pcb_ebp;
+			if (frame == NULL)
+				frame = (struct i386_frame *)
+				    (pcb->pcb_esp - 4);
+			callpc = (db_addr_t)pcb->pcb_eip;
+		}
+#else
+		/* XXX */
+		db_printf("no kernel stack address\n");
+		return;
+#endif
+	} else {
+		/*
+		 * Look for something that might be a frame pointer, just as
+		 * a convenience.
+		 */
+		frame = (struct i386_frame *)addr;
+		for (i = 0; i < 4096; i += 4) {
+			struct i386_frame *check;
+
+			check = (struct i386_frame *)db_get_value((int)((char *)&frame->f_frame + i), 4, FALSE);
+			if ((char *)check - (char *)frame >= 0 &&
+			    (char *)check - (char *)frame < 4096
+			) {
+				break;
+			}
+			db_printf("%p does not look like a stack frame, skipping\n", (char *)&frame->f_frame + i);
+		}
+		if (i == 4096) {
+			db_printf("Unable to find anything that looks like a stack frame\n");
+			return;
+		}
+		frame = (void *)((char *)frame + i);
+		db_printf("Trace beginning at frame %p\n", frame);
+		callpc = (db_addr_t)db_get_value((int)&frame->f_retaddr, 4, FALSE);
+	}
+
+	first = TRUE;
+	while (count--) {
+		struct i386_frame *actframe;
+		int		narg;
+		const char *	name;
+		db_expr_t	offset;
+		c_db_sym_t	sym;
+#define MAXNARG	16
+		char	*argnames[MAXNARG], **argnp = NULL;
+
+		sym = db_search_symbol(callpc, DB_STGY_ANY, &offset);
+		db_symbol_values(sym, &name, NULL);
+
+		/*
+		 * Attempt to determine a (possibly fake) frame that gives
+		 * the caller's pc.  It may differ from `frame' if the
+		 * current function never sets up a standard frame or hasn't
+		 * set one up yet or has just discarded one.  The last two
+		 * cases can be guessed fairly reliably for code generated
+		 * by gcc.  The first case is too much trouble to handle in
+		 * general because the amount of junk on the stack depends
+		 * on the pc (the special handling of "calltrap", etc. in
+		 * db_nextframe() works because the `next' pc is special).
+		 */
+		actframe = frame;
+		if (first) {
+			if (!have_addr) {
+				int instr;
+
+				instr = db_get_value(callpc, 4, FALSE);
+				if ((instr & 0x00ffffff) == 0x00e58955) {
+					/* pushl %ebp; movl %esp, %ebp */
+					actframe = (struct i386_frame *)
+					    (SP_REGS(&ddb_regs) - 4);
+				} else if ((instr & 0x0000ffff) == 0x0000e589) {
+					/* movl %esp, %ebp */
+					actframe = (struct i386_frame *)
+					    SP_REGS(&ddb_regs);
+					if (ddb_regs.tf_ebp == 0) {
+						/* Fake caller's frame better. */
+						frame = actframe;
+					}
+				} else if ((instr & 0x000000ff) == 0x000000c3) {
+					/* ret */
+					actframe = (struct i386_frame *)
+					    (SP_REGS(&ddb_regs) - 4);
+				} else if (offset == 0) {
+					/* Probably a symbol in assembler code. */
+					actframe = (struct i386_frame *)
+					    (SP_REGS(&ddb_regs) - 4);
+				}
+			} else if (!strcmp(name, "fork_trampoline")) {
+				/*
+				 * Don't try to walk back on a stack for a
+				 * process that hasn't actually been run yet.
+				 */
+				db_print_stack_entry(name, 0, 0, 0, callpc);
+				break;
+			}
+			first = FALSE;
+		}
+
+		argp = &actframe->f_arg0;
+		narg = MAXNARG;
+		if (sym != NULL && db_sym_numargs(sym, &narg, argnames)) {
+			argnp = argnames;
+		} else {
+			narg = db_numargs(frame);
+		}
+
+		db_print_stack_entry(name, narg, argnp, argp, callpc);
+
+		if (actframe != frame) {
+			/* `frame' belongs to caller. */
+			callpc = (db_addr_t)
+			    db_get_value((int)&actframe->f_retaddr, 4, FALSE);
+			continue;
+		}
+
+		db_nextframe(&frame, &callpc);
+
+		if (INKERNEL((int) callpc) && !INKERNEL((int) frame)) {
+			sym = db_search_symbol(callpc, DB_STGY_ANY, &offset);
+			db_symbol_values(sym, &name, NULL);
+			db_print_stack_entry(name, 0, 0, 0, callpc);
+			break;
+		}
+		if (!INKERNEL((int) frame)) {
+			break;
+		}
+	}
+}
+
+void
+db_print_backtrace(void)
+{
+	register_t  ebp;
+
+	__asm __volatile("movl %%ebp, %0" : "=r" (ebp));
+	db_stack_trace_cmd(ebp, 1, -1, NULL);
+}
+
+#define DB_DRX_FUNC(reg)						\
+int									\
+db_ ## reg (struct db_variable *vp, db_expr_t *valuep, int op)		\
+{									\
+	if (op == DB_VAR_GET)						\
+		*valuep = r ## reg ();					\
+	else								\
+		load_ ## reg (*valuep); 				\
+									\
+	return(0);							\
+} 
+
+DB_DRX_FUNC(dr0)
+DB_DRX_FUNC(dr1)
+DB_DRX_FUNC(dr2)
+DB_DRX_FUNC(dr3)
+DB_DRX_FUNC(dr4)
+DB_DRX_FUNC(dr5)
+DB_DRX_FUNC(dr6)
+DB_DRX_FUNC(dr7)
+
+static int
+ki386_set_watch(int watchnum, unsigned int watchaddr, int size, int access,
+	       struct dbreg *d)
+{
+	int i;
+	unsigned int mask;
+	
+	if (watchnum == -1) {
+		for (i = 0, mask = 0x3; i < 4; i++, mask <<= 2)
+			if ((d->dr7 & mask) == 0)
+				break;
+		if (i < 4)
+			watchnum = i;
+		else
+			return(-1);
+	}
+	
+	switch (access) {
+	case DBREG_DR7_EXEC:
+		size = 1; /* size must be 1 for an execution breakpoint */
+		/* fall through */
+	case DBREG_DR7_WRONLY:
+	case DBREG_DR7_RDWR:
+		break;
+	default:
+		return(-1);
+	}
+
+	/*
+	 * we can watch a 1, 2, or 4 byte sized location
+	 */
+	switch (size) {
+	case 1:
+		mask = 0x00;
+		break;
+	case 2:
+		mask = 0x01 << 2;
+		break;
+	case 4:
+		mask = 0x03 << 2;
+		break;
+	default:
+		return(-1);
+	}
+
+	mask |= access;
+
+	/* clear the bits we are about to affect */
+	d->dr7 &= ~((0x3 << (watchnum * 2)) | (0x0f << (watchnum * 4 + 16)));
+
+	/* set drN register to the address, N=watchnum */
+	DBREG_DRX(d, watchnum) = watchaddr;
+
+	/* enable the watchpoint */
+	d->dr7 |= (0x2 << (watchnum * 2)) | (mask << (watchnum * 4 + 16));
+
+	return(watchnum);
+}
+
+
+int
+ki386_clr_watch(int watchnum, struct dbreg *d)
+{
+	if (watchnum < 0 || watchnum >= 4)
+		return(-1);
+	
+	d->dr7 &= ~((0x3 << (watchnum * 2)) | (0x0f << (watchnum * 4 + 16)));
+	DBREG_DRX(d, watchnum) = 0;
+	
+	return(0);
+}
+
+
+int
+db_md_set_watchpoint(db_expr_t addr, db_expr_t size)
+{
+	int avail, wsize;
+	int i;
+	struct dbreg d;
+	
+	fill_dbregs(NULL, &d);
+	
+	avail = 0;
+	for(i=0; i < 4; i++) {
+		if ((d.dr7 & (3 << (i * 2))) == 0)
+			avail++;
+	}
+	
+	if (avail * 4 < size)
+		return(-1);
+	
+	for (i=0; i < 4 && (size != 0); i++) {
+		if ((d.dr7 & (3 << (i * 2))) == 0) {
+			if (size > 4)
+				wsize = 4;
+			else
+				wsize = size;
+			if (wsize == 3)
+				wsize++;
+			ki386_set_watch(i, addr, wsize, DBREG_DR7_WRONLY, &d);
+			addr += wsize;
+			size -= wsize;
+		}
+	}
+
+	set_dbregs(NULL, &d);
+
+	return(0);
+}
+
+int
+db_md_clr_watchpoint(db_expr_t addr, db_expr_t size)
+{
+	int i;
+	struct dbreg d;
+
+	fill_dbregs(NULL, &d);
+
+	for(i=0; i<4; i++) {
+		if (d.dr7 & (3 << (i * 2))) {
+			if ((DBREG_DRX((&d), i) >= addr) && 
+			    (DBREG_DRX((&d), i) < addr + size))
+				ki386_clr_watch(i, &d);
+		}
+	}
+
+	set_dbregs(NULL, &d);
+
+	return(0);
+}
+
+static char *
+watchtype_str(int type)
+{
+	switch (type) {
+	case DBREG_DR7_EXEC:
+		return "execute";
+	case DBREG_DR7_RDWR:
+		return "read/write";
+	case DBREG_DR7_WRONLY:
+		return "write";
+	default:
+		return "invalid";
+	}
+}
+
+void
+db_md_list_watchpoints(void)
+{
+	int i;
+	struct dbreg d;
+
+	fill_dbregs(NULL, &d);
+
+	db_printf("\nhardware watchpoints:\n");
+	db_printf("  watch    status        type  len     address\n"
+		  "  -----  --------  ----------  ---  ----------\n");
+	for (i=0; i < 4; i++) {
+		if (d.dr7 & (0x03 << (i * 2))) {
+			unsigned type, len;
+			type = (d.dr7 >> (16 + (i * 4))) & 3;
+			len =  (d.dr7 >> (16 + (i * 4) + 2)) & 3;
+			db_printf("  %-5d  %-8s  %10s  %3d  0x%08x\n",
+				  i, "enabled", watchtype_str(type), 
+				  len + 1, DBREG_DRX((&d), i));
+		} else {
+			db_printf("  %-5d  disabled\n", i);
+		}
+	}
+
+	db_printf("\ndebug register values:\n");
+	for (i=0; i < 8; i++)
+		db_printf("  dr%d 0x%08x\n", i, DBREG_DRX((&d),i));
+	db_printf("\n");
+}
diff --git a/sys/platform/vkernel/i386/global.s b/sys/platform/vkernel/i386/global.s
index 057e282470..36fd01dcd0 100644
--- a/sys/platform/vkernel/i386/global.s
+++ b/sys/platform/vkernel/i386/global.s
@@ -24,7 +24,7 @@
  * SUCH DAMAGE.
  *
  * $FreeBSD: src/sys/i386/i386/globals.s,v 1.13.2.1 2000/05/16 06:58:06 dillon Exp $
- * $DragonFly: src/sys/platform/vkernel/i386/global.s,v 1.1 2006/12/26 20:46:10 dillon Exp $
+ * $DragonFly: src/sys/platform/vkernel/i386/global.s,v 1.2 2007/01/05 22:18:18 dillon Exp $
  */
 
 #include <machine/asmacros.h>
@@ -75,10 +75,9 @@
 	.globl  gd_CMAP1, gd_CMAP2, gd_CMAP3, gd_PMAP1
 	.globl  gd_CADDR1, gd_CADDR2, gd_CADDR3, gd_PADDR1
 	.globl  gd_spending, gd_ipending, gd_fpending
-	.globl	gd_cnt, gd_private_tss
+	.globl	gd_cnt
 
 	.set    gd_cpuid,globaldata + GD_CPUID
-	.set    gd_private_tss,globaldata + GD_PRIVATE_TSS
 	.set    gd_other_cpus,globaldata + GD_OTHER_CPUS
 	.set    gd_ss_eflags,globaldata + GD_SS_EFLAGS
 	.set    gd_intr_nesting_level,globaldata + GD_INTR_NESTING_LEVEL
diff --git a/sys/platform/vkernel/i386/locore.s b/sys/platform/vkernel/i386/locore.s
index b3e7c2438a..9c831c77c7 100644
--- a/sys/platform/vkernel/i386/locore.s
+++ b/sys/platform/vkernel/i386/locore.s
@@ -31,10 +31,12 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/platform/vkernel/i386/locore.s,v 1.3 2006/12/04 18:04:01 dillon Exp $
+ * $DragonFly: src/sys/platform/vkernel/i386/locore.s,v 1.4 2007/01/05 22:18:18 dillon Exp $
  */
 
+#include <sys/syscall.h>
 #include <machine/asmacros.h>
+#include <machine/psl.h>
 #include "assym.s"
 	
 	.globl	kernbase
@@ -57,10 +59,14 @@ NON_GPROF_ENTRY(sigcode)
 	call	*SIGF_HANDLER(%esp)		/* call signal handler */
 	lea	SIGF_UC(%esp),%eax		/* get ucontext_t */
 	pushl	%eax
+#if 0
 	testl	$PSL_VM,UC_EFLAGS(%eax)
 	jne	9f
+#endif
 	movl	UC_GS(%eax),%gs			/* restore %gs */
+#if 0
 9:
+#endif
 	movl	$SYS_sigreturn,%eax
 	pushl	%eax				/* junk to fake return addr. */
 	int	$0x80				/* enter kernel with args */
@@ -69,6 +75,17 @@ NON_GPROF_ENTRY(sigcode)
 	ALIGN_TEXT
 esigcode:
 
+/* void reset_dbregs() */
+ENTRY(reset_dbregs)
+        movl    $0,%eax
+        movl    %eax,%dr7     /* disable all breapoints first */
+        movl    %eax,%dr0
+        movl    %eax,%dr1
+        movl    %eax,%dr2
+        movl    %eax,%dr3
+        movl    %eax,%dr6
+        ret
+
 	.data
 	.globl	szsigcode
 szsigcode:
diff --git a/sys/platform/vkernel/i386/npx.c b/sys/platform/vkernel/i386/npx.c
index f8adc1c4e0..3a35bad869 100644
--- a/sys/platform/vkernel/i386/npx.c
+++ b/sys/platform/vkernel/i386/npx.c
@@ -36,7 +36,7 @@
  * 
  * from: @(#)npx.c	7.2 (Berkeley) 5/12/91
  * $FreeBSD: src/sys/i386/isa/npx.c,v 1.80.2.3 2001/10/20 19:04:38 tegge Exp $
- * $DragonFly: src/sys/platform/vkernel/i386/npx.c,v 1.1 2007/01/02 04:24:25 dillon Exp $
+ * $DragonFly: src/sys/platform/vkernel/i386/npx.c,v 1.2 2007/01/05 22:18:18 dillon Exp $
  */
 
 #include "opt_debug_npx.h"
@@ -365,12 +365,16 @@ npx_intr(void *dummy)
 	 * before we entered our critical section.  If that occured, the
 	 * TS bit will be set and npxthread will be NULL.
 	 */
+	panic("npx_intr: not coded");
+	/* XXX FP STATE FLAG MUST BE PART OF CONTEXT SUPPLIED BY REAL KERNEL */
+#if 0
 	if (rcr0() & CR0_TS) {
 		KASSERT(mdcpu->gd_npxthread == NULL, ("gd_npxthread was %p with TS set!", mdcpu->gd_npxthread));
 		npxdna();
 		crit_exit();
 		return;
 	}
+#endif
 	if (mdcpu->gd_npxthread == NULL) {
 		get_mplock();
 		kprintf("npxintr: npxthread = %p, curthread = %p\n",
@@ -396,7 +400,7 @@ npx_intr(void *dummy)
 	 * Pass exception to process.
 	 */
 	frame = (struct intrframe *)&dummy;	/* XXX */
-	if ((ISPL(frame->if_cs) == SEL_UPL) || (frame->if_eflags & PSL_VM)) {
+	if ((ISPL(frame->if_cs) == SEL_UPL) /*||(frame->if_eflags&PSL_VM)*/) {
 		/*
 		 * Interrupt is essentially a trap, so we can afford to call
 		 * the SIGFPE handler (if any) as soon as the interrupt
diff --git a/sys/platform/vkernel/i386/swtch.s b/sys/platform/vkernel/i386/swtch.s
index 52c50c9693..52287af743 100644
--- a/sys/platform/vkernel/i386/swtch.s
+++ b/sys/platform/vkernel/i386/swtch.s
@@ -66,7 +66,7 @@
  * SUCH DAMAGE.
  *
  * $FreeBSD: src/sys/i386/i386/swtch.s,v 1.89.2.10 2003/01/23 03:36:24 ps Exp $
- * $DragonFly: src/sys/platform/vkernel/i386/swtch.s,v 1.1 2007/01/02 04:24:25 dillon Exp $
+ * $DragonFly: src/sys/platform/vkernel/i386/swtch.s,v 1.2 2007/01/05 22:18:18 dillon Exp $
  */
 
 #include "use_npx.h"
@@ -212,12 +212,14 @@ ENTRY(cpu_exit_switch)
 	/*
 	 * Get us out of the vmspace
 	 */
+#if 0
 	movl	IdlePTD,%ecx
 	movl	%cr3,%eax
 	cmpl	%ecx,%eax
 	je	1f
 	movl	%ecx,%cr3
 1:
+#endif
 	movl	PCPU(curthread),%ebx
 	/*
 	 * Switch to the next thread.  RET into the restore function, which
@@ -282,6 +284,7 @@ ENTRY(cpu_heavy_restore)
 	 * YYY which naturally also means that the PM_ACTIVE bit had better
 	 * already have been set before we set it above, check? YYY
 	 */
+#if 0
 	movl	%cr3,%esi
 	movl	PCB_CR3(%edx),%ecx
 	cmpl	%esi,%ecx
@@ -292,6 +295,7 @@ ENTRY(cpu_heavy_restore)
 #endif
 	movl	%ecx,%cr3
 4:
+#endif
 	/*
 	 * Clear TDF_RUNNING flag in old thread only after cleaning up
 	 * %cr3.  The target thread is already protected by being TDF_RUNQ
@@ -300,6 +304,7 @@ ENTRY(cpu_heavy_restore)
 	andl	$~TDF_RUNNING,TD_FLAGS(%ebx)
 	orl	$TDF_RUNNING,TD_FLAGS(%eax)
 
+#if 0
 	/*
 	 * Deal with the PCB extension, restore the private tss
 	 */
@@ -344,8 +349,8 @@ ENTRY(cpu_heavy_restore)
 	movl	%eax, 4(%ebx)
 	movl	$GPROC0_SEL*8, %esi		/* GSEL(entry, SEL_KPL) */
 	ltr	%si
-
 3:
+#endif
 	/*
 	 * Restore general registers.
 	 */
@@ -357,6 +362,7 @@ ENTRY(cpu_heavy_restore)
 	movl	PCB_EIP(%edx),%eax
 	movl	%eax,(%esp)
 
+#if 0
 	/*
 	 * Restore the user LDT if we have one
 	 */
@@ -372,12 +378,16 @@ ENTRY(cpu_heavy_restore)
 	call	set_user_ldt
 	popl	%edx
 2:
+#endif
+#if 0
 	/*
 	 * Restore the user TLS if we have one
 	 */
 	pushl	%edx
 	call	set_user_TLS
 	popl	%edx
+#endif
+#if 0
 	/*
 	 * Restore the %gs segment register, which must be done after
 	 * loading the user LDT.  Since user processes can modify the
@@ -388,6 +398,7 @@ ENTRY(cpu_heavy_restore)
 	.globl	cpu_switch_load_gs
 cpu_switch_load_gs:
 	movl	PCB_GS(%edx),%gs
+#endif
 
 	/*
 	 * Restore the DEBUG register state if necessary.
diff --git a/sys/platform/vkernel/i386/tls.c b/sys/platform/vkernel/i386/tls.c
new file mode 100644
index 0000000000..ab21686db5
--- /dev/null
+++ b/sys/platform/vkernel/i386/tls.c
@@ -0,0 +1,204 @@
+/*
+ * Copyright (c) 2003,2004 The DragonFly Project.  All rights reserved.
+ * 
+ * This code is derived from software contributed to The DragonFly Project
+ * by David Xu <davidxu@t2t2.com> and Matthew Dillon <dillon@backplane.com>
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific, prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * 
+ * $DragonFly: src/sys/platform/vkernel/i386/tls.c,v 1.1 2007/01/05 22:18:18 dillon Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/sysproto.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/sysent.h>
+#include <sys/sysctl.h>
+#include <sys/tls.h>
+#include <sys/reg.h>
+#include <sys/thread2.h>
+
+#include <machine/cpu.h>
+#include <machine/clock.h>
+#include <machine/specialreg.h>
+#include <machine/md_var.h>
+#include <machine/pcb_ext.h>		/* pcb.h included via sys/user.h */
+#include <machine/globaldata.h>		/* CPU_prvspace */
+#include <machine/smp.h>
+
+/*
+ * set a TLS descriptor and resync the GDT.  A descriptor may be cleared
+ * by passing info=NULL and infosize=0.  Note that hardware limitations may
+ * cause the size passed in tls_info to be approximated. 
+ *
+ * Returns the value userland needs to load into %gs representing the 
+ * TLS descriptor or -1 on error.
+ *
+ * (struct tls_info *info, int infosize, int which)
+ */
+int
+sys_sys_set_tls_area(struct sys_set_tls_area_args *uap)
+{
+	struct tls_info info;
+	struct segment_descriptor *desc;
+	int error;
+	int i;
+
+	/*
+	 * Sanity checks
+	 */
+	i = uap->which;
+	if (i < 0 || i >= NGTLS)
+		return (ERANGE);
+	if (uap->infosize < 0)
+		return (EINVAL);
+
+	/*
+	 * Maintain forwards compatibility with future extensions.
+	 */
+	if (uap->infosize != sizeof(info)) {
+		bzero(&info, sizeof(info));
+		error = copyin(uap->info, &info, 
+				min(sizeof(info), uap->infosize));
+	} else {
+		error = copyin(uap->info, &info, sizeof(info));
+	}
+	if (error)
+		return (error);
+	if (info.size < -1)
+		return (EINVAL);
+	if (info.size > (1 << 20))
+		info.size = (info.size + PAGE_MASK) & ~PAGE_MASK;
+
+	/*
+	 * Load the descriptor.  A critical section is required in case
+	 * an interrupt thread comes along and switches us out and then back
+	 * in.
+	 */
+	desc = &curthread->td_tls[i];
+	crit_enter();
+	if (info.size == 0) {
+		bzero(desc, sizeof(*desc));
+	} else {
+		desc->sd_lobase = (intptr_t)info.base;
+		desc->sd_hibase = (intptr_t)info.base >> 24;
+		desc->sd_def32 = 1;
+		desc->sd_type = SDT_MEMRWA;
+		desc->sd_dpl = SEL_UPL;
+		desc->sd_xx = 0;
+		desc->sd_p = 1;
+		if (info.size == -1) {
+			/*
+			 * A descriptor size of -1 is a hack to map the
+			 * whole address space.  This type of mapping is
+			 * required for direct-tls accesses of variable
+			 * data, e.g. %gs:OFFSET where OFFSET is negative.
+			 */
+			desc->sd_lolimit = -1;
+			desc->sd_hilimit = -1;
+			desc->sd_gran = 1;
+		} else if (info.size >= (1 << 20)) {
+			/*
+			 * A descriptor size greater then 1MB requires page
+			 * granularity (the lo+hilimit field is only 20 bits)
+			 */
+			desc->sd_lolimit = info.size >> PAGE_SHIFT;
+			desc->sd_hilimit = info.size >> (PAGE_SHIFT + 16);
+			desc->sd_gran = 1;
+		} else {
+			/*
+			 * Otherwise a byte-granular size is supported.
+			 */
+			desc->sd_lolimit = info.size;
+			desc->sd_hilimit = info.size >> 16;
+			desc->sd_gran = 0;
+		}
+	}
+	crit_exit();
+	uap->sysmsg_result = GSEL(GTLS_START + i, SEL_UPL);
+	set_user_TLS();
+	return(0);
+}
+	
+/*
+ * Return the specified TLS descriptor to userland.
+ *
+ * Returns the value userland needs to load into %gs representing the 
+ * TLS descriptor or -1 on error.
+ *
+ * (struct tls_info *info, int infosize, int which)
+ */
+int
+sys_sys_get_tls_area(struct sys_get_tls_area_args *uap)
+{
+	struct tls_info info;
+	struct segment_descriptor *desc;
+	int error;
+	int i;
+
+	/*
+	 * Sanity checks
+	 */
+	i = uap->which;
+	if (i < 0 || i >= NGTLS)
+		return (ERANGE);
+	if (uap->infosize < 0)
+		return (EINVAL);
+
+	/*
+	 * unpack the descriptor, ENOENT is returned for any descriptor
+	 * which has not been loaded.  uap->info may be NULL.
+	 */
+	desc = &curthread->td_tls[i];
+	if (desc->sd_p) {
+		if (uap->info && uap->infosize > 0) {
+			bzero(&info, sizeof(info));
+			info.base = (void *)(intptr_t)
+				((desc->sd_hibase << 24) | desc->sd_lobase);
+			info.size = (desc->sd_hilimit << 16) | desc->sd_lolimit;
+			if (desc->sd_gran)
+				info.size <<= PAGE_SHIFT;
+			error = copyout(&info, uap->info,
+					min(sizeof(info), uap->infosize));
+		} else {
+			error = 0;
+		}
+		uap->sysmsg_result = GSEL(GTLS_START + i, SEL_UPL);
+	} else {
+		error = ENOENT;
+	}
+	return(error);
+}
+
+void
+set_user_TLS(void)
+{
+	panic("set_user_TLS");
+}
diff --git a/sys/platform/vkernel/i386/trap.c b/sys/platform/vkernel/i386/trap.c
new file mode 100644
index 0000000000..5473d512d8
--- /dev/null
+++ b/sys/platform/vkernel/i386/trap.c
@@ -0,0 +1,1551 @@
+/*-
+ * Copyright (C) 1994, David Greenman
+ * Copyright (c) 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the University of Utah, and William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from: @(#)trap.c	7.4 (Berkeley) 5/13/91
+ * $FreeBSD: src/sys/i386/i386/trap.c,v 1.147.2.11 2003/02/27 19:09:59 luoqi Exp $
+ * $DragonFly: src/sys/platform/vkernel/i386/trap.c,v 1.1 2007/01/05 22:18:18 dillon Exp $
+ */
+
+/*
+ * 386 Trap and System call handling
+ */
+
+#include "use_isa.h"
+#include "use_npx.h"
+
+#include "opt_ddb.h"
+#include "opt_ktrace.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/pioctl.h>
+#include <sys/kernel.h>
+#include <sys/resourcevar.h>
+#include <sys/signalvar.h>
+#include <sys/syscall.h>
+#include <sys/sysctl.h>
+#include <sys/sysent.h>
+#include <sys/uio.h>
+#include <sys/vmmeter.h>
+#include <sys/malloc.h>
+#ifdef KTRACE
+#include <sys/ktrace.h>
+#endif
+#include <sys/upcall.h>
+#include <sys/vkernel.h>
+#include <sys/sysproto.h>
+#include <sys/sysunion.h>
+
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <sys/lock.h>
+#include <vm/pmap.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_map.h>
+#include <vm/vm_page.h>
+#include <vm/vm_extern.h>
+
+#include <machine/cpu.h>
+#include <machine/md_var.h>
+#include <machine/pcb.h>
+#include <machine/smp.h>
+#include <machine/tss.h>
+#include <machine/globaldata.h>
+
+#include <machine/vm86.h>
+
+#include <ddb/ddb.h>
+#include <sys/msgport2.h>
+#include <sys/thread2.h>
+
+#ifdef SMP
+
+#define MAKEMPSAFE(have_mplock)			\
+	if (have_mplock == 0) {			\
+		get_mplock();			\
+		have_mplock = 1;		\
+	}
+
+#else
+
+#define MAKEMPSAFE(have_mplock)
+
+#endif
+
+int (*pmath_emulate) (struct trapframe *);
+
+extern void trap (struct trapframe frame);
+extern int trapwrite (unsigned addr);
+extern void syscall2 (struct trapframe frame);
+
+static int trap_pfault (struct trapframe *, int, vm_offset_t);
+static void trap_fatal (struct trapframe *, vm_offset_t);
+void dblfault_handler (void);
+
+#if 0
+extern inthand_t IDTVEC(syscall);
+#endif
+
+#define MAX_TRAP_MSG		28
+static char *trap_msg[] = {
+	"",					/*  0 unused */
+	"privileged instruction fault",		/*  1 T_PRIVINFLT */
+	"",					/*  2 unused */
+	"breakpoint instruction fault",		/*  3 T_BPTFLT */
+	"",					/*  4 unused */
+	"",					/*  5 unused */
+	"arithmetic trap",			/*  6 T_ARITHTRAP */
+	"system forced exception",		/*  7 T_ASTFLT */
+	"",					/*  8 unused */
+	"general protection fault",		/*  9 T_PROTFLT */
+	"trace trap",				/* 10 T_TRCTRAP */
+	"",					/* 11 unused */
+	"page fault",				/* 12 T_PAGEFLT */
+	"",					/* 13 unused */
+	"alignment fault",			/* 14 T_ALIGNFLT */
+	"",					/* 15 unused */
+	"",					/* 16 unused */
+	"",					/* 17 unused */
+	"integer divide fault",			/* 18 T_DIVIDE */
+	"non-maskable interrupt trap",		/* 19 T_NMI */
+	"overflow trap",			/* 20 T_OFLOW */
+	"FPU bounds check fault",		/* 21 T_BOUND */
+	"FPU device not available",		/* 22 T_DNA */
+	"double fault",				/* 23 T_DOUBLEFLT */
+	"FPU operand fetch fault",		/* 24 T_FPOPFLT */
+	"invalid TSS fault",			/* 25 T_TSSFLT */
+	"segment not present fault",		/* 26 T_SEGNPFLT */
+	"stack fault",				/* 27 T_STKFLT */
+	"machine check trap",			/* 28 T_MCHK */
+};
+
+#ifdef DDB
+static int ddb_on_nmi = 1;
+SYSCTL_INT(_machdep, OID_AUTO, ddb_on_nmi, CTLFLAG_RW,
+	&ddb_on_nmi, 0, "Go to DDB on NMI");
+#endif
+static int panic_on_nmi = 1;
+SYSCTL_INT(_machdep, OID_AUTO, panic_on_nmi, CTLFLAG_RW,
+	&panic_on_nmi, 0, "Panic on NMI");
+static int fast_release;
+SYSCTL_INT(_machdep, OID_AUTO, fast_release, CTLFLAG_RW,
+	&fast_release, 0, "Passive Release was optimal");
+static int slow_release;
+SYSCTL_INT(_machdep, OID_AUTO, slow_release, CTLFLAG_RW,
+	&slow_release, 0, "Passive Release was nonoptimal");
+#ifdef SMP
+static int syscall_mpsafe = 0;
+SYSCTL_INT(_kern, OID_AUTO, syscall_mpsafe, CTLFLAG_RW,
+	&syscall_mpsafe, 0, "Allow MPSAFE marked syscalls to run without BGL");
+TUNABLE_INT("kern.syscall_mpsafe", &syscall_mpsafe);
+static int trap_mpsafe = 0;
+SYSCTL_INT(_kern, OID_AUTO, trap_mpsafe, CTLFLAG_RW,
+	&trap_mpsafe, 0, "Allow traps to mostly run without the BGL");
+TUNABLE_INT("kern.trap_mpsafe", &trap_mpsafe);
+#endif
+
+MALLOC_DEFINE(M_SYSMSG, "sysmsg", "sysmsg structure");
+extern int max_sysmsg;
+
+/*
+ * Passive USER->KERNEL transition.  This only occurs if we block in the
+ * kernel while still holding our userland priority.  We have to fixup our
+ * priority in order to avoid potential deadlocks before we allow the system
+ * to switch us to another thread.
+ */
+static void
+passive_release(struct thread *td)
+{
+	struct lwp *lp = td->td_lwp;
+
+	td->td_release = NULL;
+	lwkt_setpri_self(TDPRI_KERN_USER);
+	lp->lwp_proc->p_usched->release_curproc(lp);
+}
+
+/*
+ * userenter() passively intercepts the thread switch function to increase
+ * the thread priority from a user priority to a kernel priority, reducing
+ * syscall and trap overhead for the case where no switch occurs.
+ */
+
+static __inline void
+userenter(struct thread *curtd)
+{
+	curtd->td_release = passive_release;
+}
+
+/*
+ * Handle signals, upcalls, profiling, and other AST's and/or tasks that
+ * must be completed before we can return to or try to return to userland.
+ *
+ * Note that td_sticks is a 64 bit quantity, but there's no point doing 64
+ * arithmatic on the delta calculation so the absolute tick values are
+ * truncated to an integer.
+ */
+static void
+userret(struct lwp *lp, struct trapframe *frame, int sticks)
+{
+	struct proc *p = lp->lwp_proc;
+	int sig;
+
+	/*
+	 * Charge system time if profiling.  Note: times are in microseconds.
+	 * This may do a copyout and block, so do it first even though it
+	 * means some system time will be charged as user time.
+	 */
+	if (p->p_flag & P_PROFIL) {
+		addupc_task(p, frame->tf_eip, 
+			(u_int)((int)lp->lwp_thread->td_sticks - sticks));
+	}
+
+recheck:
+	/*
+	 * Block here if we are in a stopped state.
+	 */
+	if (p->p_flag & P_STOPPED) {
+		get_mplock();
+		tstop(p);
+		rel_mplock();
+		goto recheck;
+	}
+
+	/*
+	 * Post any pending upcalls
+	 */
+	if (p->p_flag & P_UPCALLPEND) {
+		p->p_flag &= ~P_UPCALLPEND;
+		get_mplock();
+		postupcall(lp);
+		rel_mplock();
+		goto recheck;
+	}
+
+	/*
+	 * Post any pending signals
+	 */
+	if ((sig = CURSIG(p)) != 0) {
+		get_mplock();
+		postsig(sig);
+		rel_mplock();
+		goto recheck;
+	}
+
+	/*
+	 * block here if we are swapped out, but still process signals
+	 * (such as SIGKILL).  proc0 (the swapin scheduler) is already
+	 * aware of our situation, we do not have to wake it up.
+	 */
+	if (p->p_flag & P_SWAPPEDOUT) {
+		get_mplock();
+		p->p_flag |= P_SWAPWAIT;
+		swapin_request();
+		if (p->p_flag & P_SWAPWAIT)
+			tsleep(p, PCATCH, "SWOUT", 0);
+		p->p_flag &= ~P_SWAPWAIT;
+		rel_mplock();
+		goto recheck;
+	}
+}
+
+/*
+ * Cleanup from userenter and any passive release that might have occured.
+ * We must reclaim the current-process designation before we can return
+ * to usermode.  We also handle both LWKT and USER reschedule requests.
+ */
+static __inline void
+userexit(struct lwp *lp)
+{
+	struct thread *td = lp->lwp_thread;
+	globaldata_t gd = td->td_gd;
+
+#if 0
+	/*
+	 * If a user reschedule is requested force a new process to be
+	 * chosen by releasing the current process.  Our process will only
+	 * be chosen again if it has a considerably better priority.
+	 */
+	if (user_resched_wanted())
+		lp->lwp_proc->p_usched->release_curproc(lp);
+#endif
+
+	/*
+	 * Handle a LWKT reschedule request first.  Since our passive release
+	 * is still in place we do not have to do anything special.
+	 */
+	if (lwkt_resched_wanted())
+		lwkt_switch();
+
+	/*
+	 * Acquire the current process designation for this user scheduler
+	 * on this cpu.  This will also handle any user-reschedule requests.
+	 */
+	lp->lwp_proc->p_usched->acquire_curproc(lp);
+	/* We may have switched cpus on acquisition */
+	gd = td->td_gd;
+
+	/*
+	 * Reduce our priority in preparation for a return to userland.  If
+	 * our passive release function was still in place, our priority was
+	 * never raised and does not need to be reduced.
+	 */
+	if (td->td_release == NULL)
+		lwkt_setpri_self(TDPRI_USER_NORM);
+	td->td_release = NULL;
+
+	/*
+	 * After reducing our priority there might be other kernel-level
+	 * LWKTs that now have a greater priority.  Run them as necessary.
+	 * We don't have to worry about losing cpu to userland because
+	 * we still control the current-process designation and we no longer
+	 * have a passive release function installed.
+	 */
+	if (lwkt_checkpri_self())
+		lwkt_switch();
+}
+
+/*
+ * Exception, fault, and trap interface to the kernel.
+ * This common code is called from assembly language IDT gate entry
+ * routines that prepare a suitable stack frame, and restore this
+ * frame after the exception has been processed.
+ *
+ * This function is also called from doreti in an interlock to handle ASTs.
+ * For example:  hardwareint->INTROUTINE->(set ast)->doreti->trap
+ *
+ * NOTE!  We have to retrieve the fault address prior to obtaining the
+ * MP lock because get_mplock() may switch out.  YYY cr2 really ought
+ * to be retrieved by the assembly code, not here.
+ *
+ * XXX gd_trap_nesting_level currently prevents lwkt_switch() from panicing
+ * if an attempt is made to switch from a fast interrupt or IPI.  This is
+ * necessary to properly take fatal kernel traps on SMP machines if 
+ * get_mplock() has to block.
+ */
+
+void
+trap(struct trapframe frame)
+{
+	struct globaldata *gd = mycpu;
+	struct thread *td = gd->gd_curthread;
+	struct lwp *lp = td->td_lwp;
+	struct proc *p;
+	int sticks = 0;
+	int i = 0, ucode = 0, type, code;
+#ifdef SMP
+	int have_mplock = 0;
+#endif
+#ifdef INVARIANTS
+	int crit_count = td->td_pri & ~TDPRI_MASK;
+#endif
+	vm_offset_t eva;
+
+	p = td->td_proc;
+#ifdef DDB
+	if (db_active) {
+		eva = (frame.tf_trapno == T_PAGEFLT ? rcr2() : 0);
+		++gd->gd_trap_nesting_level;
+		MAKEMPSAFE(have_mplock);
+		trap_fatal(&frame, eva);
+		--gd->gd_trap_nesting_level;
+		goto out2;
+	}
+#endif
+
+	eva = 0;
+	++gd->gd_trap_nesting_level;
+	if (frame.tf_trapno == T_PAGEFLT) {
+		/*
+		 * For some Cyrix CPUs, %cr2 is clobbered by interrupts.
+		 * This problem is worked around by using an interrupt
+		 * gate for the pagefault handler.  We are finally ready
+		 * to read %cr2 and then must reenable interrupts.
+		 *
+		 * XXX this should be in the switch statement, but the
+		 * NO_FOOF_HACK and VM86 goto and ifdefs obfuscate the
+		 * flow of control too much for this to be obviously
+		 * correct.
+		 */
+		eva = rcr2();
+		cpu_enable_intr();
+	}
+#ifdef SMP
+	if (trap_mpsafe == 0)
+		MAKEMPSAFE(have_mplock);
+#endif
+
+	--gd->gd_trap_nesting_level;
+
+	if (!(frame.tf_eflags & PSL_I)) {
+		/*
+		 * Buggy application or kernel code has disabled interrupts
+		 * and then trapped.  Enabling interrupts now is wrong, but
+		 * it is better than running with interrupts disabled until
+		 * they are accidentally enabled later.
+		 */
+		type = frame.tf_trapno;
+		if (ISPL(frame.tf_cs)==SEL_UPL /*||(frame.tf_eflags&PSL_VM)*/) {
+			MAKEMPSAFE(have_mplock);
+			kprintf(
+			    "pid %ld (%s): trap %d with interrupts disabled\n",
+			    (long)curproc->p_pid, curproc->p_comm, type);
+		} else if (type != T_BPTFLT && type != T_TRCTRAP) {
+			/*
+			 * XXX not quite right, since this may be for a
+			 * multiple fault in user mode.
+			 */
+			MAKEMPSAFE(have_mplock);
+			kprintf("kernel trap %d with interrupts disabled\n",
+			    type);
+		}
+		cpu_enable_intr();
+	}
+
+#if defined(I586_CPU) && !defined(NO_F00F_HACK)
+restart:
+#endif
+	type = frame.tf_trapno;
+	code = frame.tf_err;
+
+#if 0
+	if (in_vm86call) {
+		ASSERT_MP_LOCK_HELD(curthread);
+		if (frame.tf_eflags & PSL_VM &&
+		    (type == T_PROTFLT || type == T_STKFLT)) {
+#ifdef SMP
+			KKASSERT(td->td_mpcount > 0);
+#endif
+			i = vm86_emulate((struct vm86frame *)&frame);
+#ifdef SMP
+			KKASSERT(td->td_mpcount > 0);
+#endif
+			if (i != 0) {
+				/*
+				 * returns to original process
+				 */
+#ifdef SMP
+				vm86_trap((struct vm86frame *)&frame,
+					  have_mplock);
+#else
+				vm86_trap((struct vm86frame *)&frame, 0);
+#endif
+				KKASSERT(0); /* NOT REACHED */
+			}
+			goto out2;
+		}
+		switch (type) {
+			/*
+			 * these traps want either a process context, or
+			 * assume a normal userspace trap.
+			 */
+		case T_PROTFLT:
+		case T_SEGNPFLT:
+			trap_fatal(&frame, eva);
+			goto out2;
+		case T_TRCTRAP:
+			type = T_BPTFLT;	/* kernel breakpoint */
+			/* FALL THROUGH */
+		}
+		goto kernel_trap;	/* normal kernel trap handling */
+	}
+#endif
+
+        if ((ISPL(frame.tf_cs) == SEL_UPL) /*||(frame.tf_eflags & PSL_VM)*/) {
+		/* user trap */
+
+		userenter(td);
+
+		sticks = (int)td->td_sticks;
+		lp->lwp_md.md_regs = &frame;
+
+		switch (type) {
+		case T_PRIVINFLT:	/* privileged instruction fault */
+			ucode = type;
+			i = SIGILL;
+			break;
+
+		case T_BPTFLT:		/* bpt instruction fault */
+		case T_TRCTRAP:		/* trace trap */
+			frame.tf_eflags &= ~PSL_T;
+			i = SIGTRAP;
+			break;
+
+		case T_ARITHTRAP:	/* arithmetic trap */
+			ucode = code;
+			i = SIGFPE;
+			break;
+
+		case T_ASTFLT:		/* Allow process switch */
+			mycpu->gd_cnt.v_soft++;
+			if (mycpu->gd_reqflags & RQF_AST_OWEUPC) {
+				atomic_clear_int_nonlocked(&mycpu->gd_reqflags,
+					    RQF_AST_OWEUPC);
+				addupc_task(p, p->p_prof.pr_addr,
+					    p->p_prof.pr_ticks);
+			}
+			goto out;
+
+			/*
+			 * The following two traps can happen in
+			 * vm86 mode, and, if so, we want to handle
+			 * them specially.
+			 */
+		case T_PROTFLT:		/* general protection fault */
+		case T_STKFLT:		/* stack fault */
+#if 0
+			if (frame.tf_eflags & PSL_VM) {
+				i = vm86_emulate((struct vm86frame *)&frame);
+				if (i == 0)
+					goto out;
+				break;
+			}
+#endif
+			/* FALL THROUGH */
+
+		case T_SEGNPFLT:	/* segment not present fault */
+		case T_TSSFLT:		/* invalid TSS fault */
+		case T_DOUBLEFLT:	/* double fault */
+		default:
+			ucode = code + BUS_SEGM_FAULT ;
+			i = SIGBUS;
+			break;
+
+		case T_PAGEFLT:		/* page fault */
+			MAKEMPSAFE(have_mplock);
+			i = trap_pfault(&frame, TRUE, eva);
+			if (i == -1)
+				goto out;
+#if defined(I586_CPU) && !defined(NO_F00F_HACK)
+			if (i == -2)
+				goto restart;
+#endif
+			if (i == 0)
+				goto out;
+
+			ucode = T_PAGEFLT;
+
+			/*
+			 * The code is lost because tf_err is overwritten
+			 * with the fault address.  Store it in the upper
+			 * 16 bits of tf_trapno for vkernel consumption.
+			 */
+			if (p->p_vkernel && p->p_vkernel->vk_current) {
+				frame.tf_trapno |= (code << 16);
+			}
+			break;
+
+		case T_DIVIDE:		/* integer divide fault */
+			ucode = FPE_INTDIV;
+			i = SIGFPE;
+			break;
+
+#if NISA > 0
+		case T_NMI:
+			MAKEMPSAFE(have_mplock);
+			/* machine/parity/power fail/"kitchen sink" faults */
+			if (isa_nmi(code) == 0) {
+#ifdef DDB
+				/*
+				 * NMI can be hooked up to a pushbutton
+				 * for debugging.
+				 */
+				if (ddb_on_nmi) {
+					kprintf ("NMI ... going to debugger\n");
+					kdb_trap (type, 0, &frame);
+				}
+#endif /* DDB */
+				goto out2;
+			} else if (panic_on_nmi)
+				panic("NMI indicates hardware failure");
+			break;
+#endif /* NISA > 0 */
+
+		case T_OFLOW:		/* integer overflow fault */
+			ucode = FPE_INTOVF;
+			i = SIGFPE;
+			break;
+
+		case T_BOUND:		/* bounds check fault */
+			ucode = FPE_FLTSUB;
+			i = SIGFPE;
+			break;
+
+		case T_DNA:
+#if NNPX > 0
+			/* 
+			 * The kernel may have switched out the FP unit's
+			 * state, causing the user process to take a fault
+			 * when it tries to use the FP unit.  Restore the
+			 * state here
+			 */
+			if (npxdna())
+				goto out;
+#endif
+			if (!pmath_emulate) {
+				i = SIGFPE;
+				ucode = FPE_FPU_NP_TRAP;
+				break;
+			}
+			i = (*pmath_emulate)(&frame);
+			if (i == 0) {
+				if (!(frame.tf_eflags & PSL_T))
+					goto out2;
+				frame.tf_eflags &= ~PSL_T;
+				i = SIGTRAP;
+			}
+			/* else ucode = emulator_only_knows() XXX */
+			break;
+
+		case T_FPOPFLT:		/* FPU operand fetch fault */
+			ucode = T_FPOPFLT;
+			i = SIGILL;
+			break;
+
+		case T_XMMFLT:		/* SIMD floating-point exception */
+			ucode = 0; /* XXX */
+			i = SIGFPE;
+			break;
+		}
+	} else {
+#if 0
+kernel_trap:
+#endif
+		/* kernel trap */
+
+		switch (type) {
+		case T_PAGEFLT:			/* page fault */
+			MAKEMPSAFE(have_mplock);
+			trap_pfault(&frame, FALSE, eva);
+			goto out2;
+
+		case T_DNA:
+#if NNPX > 0
+			/*
+			 * The kernel may be using npx for copying or other
+			 * purposes.
+			 */
+			if (npxdna())
+				goto out2;
+#endif
+			break;
+
+		case T_PROTFLT:		/* general protection fault */
+		case T_SEGNPFLT:	/* segment not present fault */
+			/*
+			 * Invalid segment selectors and out of bounds
+			 * %eip's and %esp's can be set up in user mode.
+			 * This causes a fault in kernel mode when the
+			 * kernel tries to return to user mode.  We want
+			 * to get this fault so that we can fix the
+			 * problem here and not have to check all the
+			 * selectors and pointers when the user changes
+			 * them.
+			 */
+#define	MAYBE_DORETI_FAULT(where, whereto)				\
+	do {								\
+		if (frame.tf_eip == (int)where) {			\
+			frame.tf_eip = (int)whereto;			\
+			goto out2;					\
+		}							\
+	} while (0)
+
+#if 0
+			/*
+			 * Since we don't save %gs across an interrupt
+			 * frame this check must occur outside the intr
+			 * nesting level check.
+			 */
+			if (frame.tf_eip == (int)cpu_switch_load_gs) {
+				td->td_pcb->pcb_gs = 0;
+				MAKEMPSAFE(have_mplock);
+				ksignal(p, SIGBUS);
+				goto out2;
+			}
+#endif
+			if (mycpu->gd_intr_nesting_level == 0) {
+#if 0
+				/*
+				 * Invalid %fs's and %gs's can be created using
+				 * procfs or PT_SETREGS or by invalidating the
+				 * underlying LDT entry.  This causes a fault
+				 * in kernel mode when the kernel attempts to
+				 * switch contexts.  Lose the bad context
+				 * (XXX) so that we can continue, and generate
+				 * a signal.
+				 */
+				MAYBE_DORETI_FAULT(doreti_iret,
+						   doreti_iret_fault);
+				MAYBE_DORETI_FAULT(doreti_popl_ds,
+						   doreti_popl_ds_fault);
+				MAYBE_DORETI_FAULT(doreti_popl_es,
+						   doreti_popl_es_fault);
+				MAYBE_DORETI_FAULT(doreti_popl_fs,
+						   doreti_popl_fs_fault);
+#endif
+				if (td->td_pcb->pcb_onfault) {
+					frame.tf_eip = 
+					    (register_t)td->td_pcb->pcb_onfault;
+					goto out2;
+				}
+			}
+			break;
+
+		case T_TSSFLT:
+			/*
+			 * PSL_NT can be set in user mode and isn't cleared
+			 * automatically when the kernel is entered.  This
+			 * causes a TSS fault when the kernel attempts to
+			 * `iret' because the TSS link is uninitialized.  We
+			 * want to get this fault so that we can fix the
+			 * problem here and not every time the kernel is
+			 * entered.
+			 */
+			if (frame.tf_eflags & PSL_NT) {
+				frame.tf_eflags &= ~PSL_NT;
+				goto out2;
+			}
+			break;
+
+		case T_TRCTRAP:	 /* trace trap */
+#if 0
+			if (frame.tf_eip == (int)IDTVEC(syscall)) {
+				/*
+				 * We've just entered system mode via the
+				 * syscall lcall.  Continue single stepping
+				 * silently until the syscall handler has
+				 * saved the flags.
+				 */
+				goto out2;
+			}
+			if (frame.tf_eip == (int)IDTVEC(syscall) + 1) {
+				/*
+				 * The syscall handler has now saved the
+				 * flags.  Stop single stepping it.
+				 */
+				frame.tf_eflags &= ~PSL_T;
+				goto out2;
+			}
+#endif
+#if 0
+                        /*
+                         * Ignore debug register trace traps due to
+                         * accesses in the user's address space, which
+                         * can happen under several conditions such as
+                         * if a user sets a watchpoint on a buffer and
+                         * then passes that buffer to a system call.
+                         * We still want to get TRCTRAPS for addresses
+                         * in kernel space because that is useful when
+                         * debugging the kernel.
+                         */
+                        if (user_dbreg_trap()) {
+                                /*
+                                 * Reset breakpoint bits because the
+                                 * processor doesn't
+                                 */
+                                load_dr6(rdr6() & 0xfffffff0);
+                                goto out2;
+                        }
+#endif
+			/*
+			 * Fall through (TRCTRAP kernel mode, kernel address)
+			 */
+		case T_BPTFLT:
+			/*
+			 * If DDB is enabled, let it handle the debugger trap.
+			 * Otherwise, debugger traps "can't happen".
+			 */
+#ifdef DDB
+			MAKEMPSAFE(have_mplock);
+			if (kdb_trap (type, 0, &frame))
+				goto out2;
+#endif
+			break;
+
+#if NISA > 0
+		case T_NMI:
+			MAKEMPSAFE(have_mplock);
+#ifdef POWERFAIL_NMI
+#ifndef TIMER_FREQ
+#  define TIMER_FREQ 1193182
+#endif
+	handle_powerfail:
+		{
+		  static unsigned lastalert = 0;
+
+		  if(time_second - lastalert > 10)
+		    {
+		      log(LOG_WARNING, "NMI: power fail\n");
+		      sysbeep(TIMER_FREQ/880, hz);
+		      lastalert = time_second;
+		    }
+		    /* YYY mp count */
+		  goto out2;
+		}
+#else /* !POWERFAIL_NMI */
+			/* machine/parity/power fail/"kitchen sink" faults */
+			if (isa_nmi(code) == 0) {
+#ifdef DDB
+				/*
+				 * NMI can be hooked up to a pushbutton
+				 * for debugging.
+				 */
+				if (ddb_on_nmi) {
+					kprintf ("NMI ... going to debugger\n");
+					kdb_trap (type, 0, &frame);
+				}
+#endif /* DDB */
+				goto out2;
+			} else if (panic_on_nmi == 0)
+				goto out2;
+			/* FALL THROUGH */
+#endif /* POWERFAIL_NMI */
+#endif /* NISA > 0 */
+		}
+
+		MAKEMPSAFE(have_mplock);
+		trap_fatal(&frame, eva);
+		goto out2;
+	}
+
+	/*
+	 * Virtual kernel intercept - if the fault is directly related to a
+	 * VM context managed by a virtual kernel then let the virtual kernel
+	 * handle it.
+	 */
+	if (p->p_vkernel && p->p_vkernel->vk_current) {
+		vkernel_trap(p, &frame);
+		goto out;
+	}
+
+	/*
+	 * Translate fault for emulators (e.g. Linux) 
+	 */
+	if (*p->p_sysent->sv_transtrap)
+		i = (*p->p_sysent->sv_transtrap)(i, type);
+
+	MAKEMPSAFE(have_mplock);
+	trapsignal(p, i, ucode);
+
+#ifdef DEBUG
+	if (type <= MAX_TRAP_MSG) {
+		uprintf("fatal process exception: %s",
+			trap_msg[type]);
+		if ((type == T_PAGEFLT) || (type == T_PROTFLT))
+			uprintf(", fault VA = 0x%lx", (u_long)eva);
+		uprintf("\n");
+	}
+#endif
+
+out:
+#ifdef SMP
+        if (ISPL(frame.tf_cs) == SEL_UPL)
+		KASSERT(td->td_mpcount == have_mplock, ("badmpcount trap/end from %p", (void *)frame.tf_eip));
+#endif
+	userret(lp, &frame, sticks);
+	userexit(lp);
+out2:	;
+#ifdef SMP
+	if (have_mplock)
+		rel_mplock();
+#endif
+#ifdef INVARIANTS
+	KASSERT(crit_count == (td->td_pri & ~TDPRI_MASK),
+		("syscall: critical section count mismatch! %d/%d",
+		crit_count / TDPRI_CRIT, td->td_pri / TDPRI_CRIT));
+#endif
+}
+
+#ifdef notyet
+/*
+ * This version doesn't allow a page fault to user space while
+ * in the kernel. The rest of the kernel needs to be made "safe"
+ * before this can be used. I think the only things remaining
+ * to be made safe is the process tracing/debugging code.
+ */
+static int
+trap_pfault(struct trapframe *frame, int usermode, vm_offset_t eva)
+{
+	vm_offset_t va;
+	struct vmspace *vm = NULL;
+	vm_map_t map = 0;
+	int rv = 0;
+	vm_prot_t ftype;
+	thread_t td = curthread;
+	struct proc *p = td->td_proc;	/* may be NULL */
+
+	if (frame->tf_err & PGEX_W)
+		ftype = VM_PROT_WRITE;
+	else
+		ftype = VM_PROT_READ;
+
+	va = trunc_page(eva);
+	if (va < KvaStart) {
+		vm_offset_t v;
+		vm_page_t mpte;
+
+		if (p == NULL ||
+		    (!usermode && va < VM_MAX_USER_ADDRESS &&
+		     (td->td_gd->gd_intr_nesting_level != 0 || 
+		      td->td_pcb->pcb_onfault == NULL))) {
+			trap_fatal(frame, eva);
+			return (-1);
+		}
+
+		/*
+		 * This is a fault on non-kernel virtual memory.
+		 * vm is initialized above to NULL. If curproc is NULL
+		 * or curproc->p_vmspace is NULL the fault is fatal.
+		 */
+		vm = p->p_vmspace;
+		if (vm == NULL)
+			goto nogo;
+
+		map = &vm->vm_map;
+
+		/*
+		 * Keep swapout from messing with us during this
+		 *	critical time.
+		 */
+		++p->p_lock;
+
+		/*
+		 * Grow the stack if necessary
+		 */
+		/* grow_stack returns false only if va falls into
+		 * a growable stack region and the stack growth
+		 * fails.  It returns true if va was not within
+		 * a growable stack region, or if the stack 
+		 * growth succeeded.
+		 */
+		if (!grow_stack (p, va)) {
+			rv = KERN_FAILURE;
+			--p->p_lock;
+			goto nogo;
+		}
+		
+		/* Fault in the user page: */
+		rv = vm_fault(map, va, ftype,
+			      (ftype & VM_PROT_WRITE) ? VM_FAULT_DIRTY
+						      : VM_FAULT_NORMAL);
+
+		--p->p_lock;
+	} else {
+		/*
+		 * Don't allow user-mode faults in kernel address space.
+		 */
+		if (usermode)
+			goto nogo;
+
+		/*
+		 * Since we know that kernel virtual address addresses
+		 * always have pte pages mapped, we just have to fault
+		 * the page.
+		 */
+		rv = vm_fault(&kernel_map, va, ftype, VM_FAULT_NORMAL);
+	}
+
+	if (rv == KERN_SUCCESS)
+		return (0);
+nogo:
+	if (!usermode) {
+		if (mtd->td_gd->gd_intr_nesting_level == 0 && 
+		    td->td_pcb->pcb_onfault) {
+			frame->tf_eip = (register_t)td->td_pcb->pcb_onfault;
+			return (0);
+		}
+		trap_fatal(frame, eva);
+		return (-1);
+	}
+
+	/* kludge to pass faulting virtual address to sendsig */
+	frame->tf_err = eva;
+
+	return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV);
+}
+#endif
+
+int
+trap_pfault(struct trapframe *frame, int usermode, vm_offset_t eva)
+{
+	vm_offset_t va;
+	struct vmspace *vm = NULL;
+	vm_map_t map = 0;
+	int rv = 0;
+	vm_prot_t ftype;
+	thread_t td = curthread;
+	struct proc *p = td->td_proc;
+
+	va = trunc_page(eva);
+	if (va >= KERNBASE) {
+		/*
+		 * Don't allow user-mode faults in kernel address space.
+		 * An exception:  if the faulting address is the invalid
+		 * instruction entry in the IDT, then the Intel Pentium
+		 * F00F bug workaround was triggered, and we need to
+		 * treat it is as an illegal instruction, and not a page
+		 * fault.
+		 */
+		if (usermode)
+			goto nogo;
+
+		map = &kernel_map;
+	} else {
+		/*
+		 * This is a fault on non-kernel virtual memory.
+		 * vm is initialized above to NULL. If curproc is NULL
+		 * or curproc->p_vmspace is NULL the fault is fatal.
+		 */
+		if (p != NULL)
+			vm = p->p_vmspace;
+
+		if (vm == NULL)
+			goto nogo;
+
+		map = &vm->vm_map;
+	}
+
+	if (frame->tf_err & PGEX_W)
+		ftype = VM_PROT_WRITE;
+	else
+		ftype = VM_PROT_READ;
+
+	if (map != &kernel_map) {
+		/*
+		 * Keep swapout from messing with us during this
+		 *	critical time.
+		 */
+		++p->p_lock;
+
+		/*
+		 * Grow the stack if necessary
+		 */
+		/* grow_stack returns false only if va falls into
+		 * a growable stack region and the stack growth
+		 * fails.  It returns true if va was not within
+		 * a growable stack region, or if the stack 
+		 * growth succeeded.
+		 */
+		if (!grow_stack (p, va)) {
+			rv = KERN_FAILURE;
+			--p->p_lock;
+			goto nogo;
+		}
+
+		/* Fault in the user page: */
+		rv = vm_fault(map, va, ftype,
+			      (ftype & VM_PROT_WRITE) ? VM_FAULT_DIRTY
+						      : VM_FAULT_NORMAL);
+
+		--p->p_lock;
+	} else {
+		/*
+		 * Don't have to worry about process locking or stacks in the kernel.
+		 */
+		rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
+	}
+
+	if (rv == KERN_SUCCESS)
+		return (0);
+nogo:
+	if (!usermode) {
+		if (td->td_gd->gd_intr_nesting_level == 0 &&
+		    td->td_pcb->pcb_onfault) {
+			frame->tf_eip = (register_t)td->td_pcb->pcb_onfault;
+			return (0);
+		}
+		trap_fatal(frame, eva);
+		return (-1);
+	}
+
+	/* kludge to pass faulting virtual address to sendsig */
+	frame->tf_err = eva;
+
+	return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV);
+}
+
+static void
+trap_fatal(struct trapframe *frame, vm_offset_t eva)
+{
+	int code, type, ss, esp;
+
+	code = frame->tf_err;
+	type = frame->tf_trapno;
+
+	if (type <= MAX_TRAP_MSG)
+		kprintf("\n\nFatal trap %d: %s while in %s mode\n",
+			type, trap_msg[type],
+        		/*frame->tf_eflags & PSL_VM ? "vm86" :*/
+			ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel");
+#ifdef SMP
+	/* three separate prints in case of a trap on an unmapped page */
+	kprintf("mp_lock = %08x; ", mp_lock);
+	kprintf("cpuid = %d; ", mycpu->gd_cpuid);
+	kprintf("lapic.id = %08x\n", lapic.id);
+#endif
+	if (type == T_PAGEFLT) {
+		kprintf("fault virtual address	= 0x%x\n", eva);
+		kprintf("fault code		= %s %s, %s\n",
+			code & PGEX_U ? "user" : "supervisor",
+			code & PGEX_W ? "write" : "read",
+			code & PGEX_P ? "protection violation" : "page not present");
+	}
+	kprintf("instruction pointer	= 0x%x:0x%x\n",
+	       frame->tf_cs & 0xffff, frame->tf_eip);
+        if ((ISPL(frame->tf_cs) == SEL_UPL) /*||(frame->tf_eflags&PSL_VM)*/) {
+		ss = frame->tf_ss & 0xffff;
+		esp = frame->tf_esp;
+	} else {
+		ss = GSEL(GDATA_SEL, SEL_KPL);
+		esp = (int)&frame->tf_esp;
+	}
+	kprintf("stack pointer	        = 0x%x:0x%x\n", ss, esp);
+	kprintf("frame pointer	        = 0x%x:0x%x\n", ss, frame->tf_ebp);
+	kprintf("processor eflags	= ");
+	if (frame->tf_eflags & PSL_T)
+		kprintf("trace trap, ");
+	if (frame->tf_eflags & PSL_I)
+		kprintf("interrupt enabled, ");
+	if (frame->tf_eflags & PSL_NT)
+		kprintf("nested task, ");
+	if (frame->tf_eflags & PSL_RF)
+		kprintf("resume, ");
+#if 0
+	if (frame->tf_eflags & PSL_VM)
+		kprintf("vm86, ");
+#endif
+	kprintf("IOPL = %d\n", (frame->tf_eflags & PSL_IOPL) >> 12);
+	kprintf("current process		= ");
+	if (curproc) {
+		kprintf("%lu (%s)\n",
+		    (u_long)curproc->p_pid, curproc->p_comm ?
+		    curproc->p_comm : "");
+	} else {
+		kprintf("Idle\n");
+	}
+	kprintf("current thread          = pri %d ", curthread->td_pri);
+	if (curthread->td_pri >= TDPRI_CRIT)
+		kprintf("(CRIT)");
+	kprintf("\n");
+#ifdef SMP
+/**
+ *  XXX FIXME:
+ *	we probably SHOULD have stopped the other CPUs before now!
+ *	another CPU COULD have been touching cpl at this moment...
+ */
+	kprintf(" <- SMP: XXX");
+#endif
+	kprintf("\n");
+
+#ifdef KDB
+	if (kdb_trap(&psl))
+		return;
+#endif
+#ifdef DDB
+	if ((debugger_on_panic || db_active) && kdb_trap(type, code, frame))
+		return;
+#endif
+	kprintf("trap number		= %d\n", type);
+	if (type <= MAX_TRAP_MSG)
+		panic("%s", trap_msg[type]);
+	else
+		panic("unknown/reserved trap");
+}
+
+/*
+ * Double fault handler. Called when a fault occurs while writing
+ * a frame for a trap/exception onto the stack. This usually occurs
+ * when the stack overflows (such is the case with infinite recursion,
+ * for example).
+ *
+ * XXX Note that the current PTD gets replaced by IdlePTD when the
+ * task switch occurs. This means that the stack that was active at
+ * the time of the double fault is not available at <kstack> unless
+ * the machine was idle when the double fault occurred. The downside
+ * of this is that "trace <ebp>" in ddb won't work.
+ */
+void
+dblfault_handler(void)
+{
+	struct mdglobaldata *gd = mdcpu;
+
+	kprintf("\nFatal double fault:\n");
+	kprintf("eip = 0x%x\n", gd->gd_common_tss.tss_eip);
+	kprintf("esp = 0x%x\n", gd->gd_common_tss.tss_esp);
+	kprintf("ebp = 0x%x\n", gd->gd_common_tss.tss_ebp);
+#ifdef SMP
+	/* three separate prints in case of a trap on an unmapped page */
+	kprintf("mp_lock = %08x; ", mp_lock);
+	kprintf("cpuid = %d; ", mycpu->gd_cpuid);
+	kprintf("lapic.id = %08x\n", lapic.id);
+#endif
+	panic("double fault");
+}
+
+/*
+ * Compensate for 386 brain damage (missing URKR).
+ * This is a little simpler than the pagefault handler in trap() because
+ * it the page tables have already been faulted in and high addresses
+ * are thrown out early for other reasons.
+ */
+int
+trapwrite(unsigned addr)
+{
+	struct proc *p;
+	vm_offset_t va;
+	struct vmspace *vm;
+	int rv;
+
+	va = trunc_page((vm_offset_t)addr);
+	/*
+	 * XXX - MAX is END.  Changed > to >= for temp. fix.
+	 */
+	if (va >= VM_MAX_USER_ADDRESS)
+		return (1);
+
+	p = curproc;
+	vm = p->p_vmspace;
+
+	++p->p_lock;
+
+	if (!grow_stack (p, va)) {
+		--p->p_lock;
+		return (1);
+	}
+
+	/*
+	 * fault the data page
+	 */
+	rv = vm_fault(&vm->vm_map, va, VM_PROT_WRITE, VM_FAULT_DIRTY);
+
+	--p->p_lock;
+
+	if (rv != KERN_SUCCESS)
+		return 1;
+
+	return (0);
+}
+
+/*
+ *	syscall2 -	MP aware system call request C handler
+ *
+ *	A system call is essentially treated as a trap except that the
+ *	MP lock is not held on entry or return.  We are responsible for
+ *	obtaining the MP lock if necessary and for handling ASTs
+ *	(e.g. a task switch) prior to return.
+ *
+ *	In general, only simple access and manipulation of curproc and
+ *	the current stack is allowed without having to hold MP lock.
+ *
+ *	MPSAFE - note that large sections of this routine are run without
+ *		 the MP lock.
+ */
+
+void
+syscall2(struct trapframe frame)
+{
+	struct thread *td = curthread;
+	struct proc *p = td->td_proc;
+	struct lwp *lp = td->td_lwp;
+	caddr_t params;
+	struct sysent *callp;
+	register_t orig_tf_eflags;
+	int sticks;
+	int error;
+	int narg;
+#ifdef INVARIANTS
+	int crit_count = td->td_pri & ~TDPRI_MASK;
+#endif
+#ifdef SMP
+	int have_mplock = 0;
+#endif
+	u_int code;
+	union sysunion args;
+
+#ifdef DIAGNOSTIC
+	if (ISPL(frame.tf_cs) != SEL_UPL) {
+		get_mplock();
+		panic("syscall");
+		/* NOT REACHED */
+	}
+#endif
+
+#ifdef SMP
+	KASSERT(td->td_mpcount == 0, ("badmpcount syscall2 from %p", (void *)frame.tf_eip));
+	if (syscall_mpsafe == 0)
+		MAKEMPSAFE(have_mplock);
+#endif
+	userenter(td);		/* lazy raise our priority */
+
+	/*
+	 * Misc
+	 */
+	sticks = (int)td->td_sticks;
+	orig_tf_eflags = frame.tf_eflags;
+
+	/*
+	 * Virtual kernel intercept - if a VM context managed by a virtual
+	 * kernel issues a system call the virtual kernel handles it, not us.
+	 * Restore the virtual kernel context and return from its system
+	 * call.  The current frame is copied out to the virtual kernel.
+	 */
+	if (p->p_vkernel && p->p_vkernel->vk_current) {
+		error = vkernel_trap(p, &frame);
+		frame.tf_eax = error;
+		if (error)
+			frame.tf_eflags |= PSL_C;
+		error = EJUSTRETURN;
+		goto out;
+	}
+
+	/*
+	 * Get the system call parameters and account for time
+	 */
+	lp->lwp_md.md_regs = &frame;
+	params = (caddr_t)frame.tf_esp + sizeof(int);
+	code = frame.tf_eax;
+
+	if (p->p_sysent->sv_prepsyscall) {
+		(*p->p_sysent->sv_prepsyscall)(
+			&frame, (int *)(&args.nosys.sysmsg + 1),
+			&code, &params);
+	} else {
+		/*
+		 * Need to check if this is a 32 bit or 64 bit syscall.
+		 * fuword is MP aware.
+		 */
+		if (code == SYS_syscall) {
+			/*
+			 * Code is first argument, followed by actual args.
+			 */
+			code = fuword(params);
+			params += sizeof(int);
+		} else if (code == SYS___syscall) {
+			/*
+			 * Like syscall, but code is a quad, so as to maintain
+			 * quad alignment for the rest of the arguments.
+			 */
+			code = fuword(params);
+			params += sizeof(quad_t);
+		}
+	}
+
+	code &= p->p_sysent->sv_mask;
+	if (code >= p->p_sysent->sv_size)
+		callp = &p->p_sysent->sv_table[0];
+	else
+		callp = &p->p_sysent->sv_table[code];
+
+	narg = callp->sy_narg & SYF_ARGMASK;
+
+	/*
+	 * copyin is MP aware, but the tracing code is not
+	 */
+	if (narg && params) {
+		error = copyin(params, (caddr_t)(&args.nosys.sysmsg + 1),
+				narg * sizeof(register_t));
+		if (error) {
+#ifdef KTRACE
+			if (KTRPOINT(td, KTR_SYSCALL)) {
+				MAKEMPSAFE(have_mplock);
+				
+				ktrsyscall(p, code, narg,
+					(void *)(&args.nosys.sysmsg + 1));
+			}
+#endif
+			goto bad;
+		}
+	}
+
+#ifdef KTRACE
+	if (KTRPOINT(td, KTR_SYSCALL)) {
+		MAKEMPSAFE(have_mplock);
+		ktrsyscall(p, code, narg, (void *)(&args.nosys.sysmsg + 1));
+	}
+#endif
+
+	/*
+	 * For traditional syscall code edx is left untouched when 32 bit
+	 * results are returned.  Since edx is loaded from fds[1] when the 
+	 * system call returns we pre-set it here.
+	 */
+	args.sysmsg_fds[0] = 0;
+	args.sysmsg_fds[1] = frame.tf_edx;
+
+	/*
+	 * The syscall might manipulate the trap frame. If it does it
+	 * will probably return EJUSTRETURN.
+	 */
+	args.sysmsg_frame = &frame;
+
+	STOPEVENT(p, S_SCE, narg);	/* MP aware */
+
+#ifdef SMP
+	/*
+	 * Try to run the syscall without the MP lock if the syscall
+	 * is MP safe.  We have to obtain the MP lock no matter what if 
+	 * we are ktracing
+	 */
+	if ((callp->sy_narg & SYF_MPSAFE) == 0)
+		MAKEMPSAFE(have_mplock);
+#endif
+
+	error = (*callp->sy_call)(&args);
+
+out:
+	/*
+	 * MP SAFE (we may or may not have the MP lock at this point)
+	 */
+	switch (error) {
+	case 0:
+		/*
+		 * Reinitialize proc pointer `p' as it may be different
+		 * if this is a child returning from fork syscall.
+		 */
+		p = curproc;
+		lp = curthread->td_lwp;
+		frame.tf_eax = args.sysmsg_fds[0];
+		frame.tf_edx = args.sysmsg_fds[1];
+		frame.tf_eflags &= ~PSL_C;
+		break;
+	case ERESTART:
+		/*
+		 * Reconstruct pc, assuming lcall $X,y is 7 bytes,
+		 * int 0x80 is 2 bytes. We saved this in tf_err.
+		 */
+		frame.tf_eip -= frame.tf_err;
+		break;
+	case EJUSTRETURN:
+		break;
+	case EASYNC:
+		panic("Unexpected EASYNC return value (for now)");
+	default:
+bad:
+		if (p->p_sysent->sv_errsize) {
+			if (error >= p->p_sysent->sv_errsize)
+				error = -1;	/* XXX */
+			else
+				error = p->p_sysent->sv_errtbl[error];
+		}
+		frame.tf_eax = error;
+		frame.tf_eflags |= PSL_C;
+		break;
+	}
+
+	/*
+	 * Traced syscall.  trapsignal() is not MP aware.
+	 */
+	if ((orig_tf_eflags & PSL_T) /*&& !(orig_tf_eflags & PSL_VM)*/) {
+		MAKEMPSAFE(have_mplock);
+		frame.tf_eflags &= ~PSL_T;
+		trapsignal(p, SIGTRAP, 0);
+	}
+
+	/*
+	 * Handle reschedule and other end-of-syscall issues
+	 */
+	userret(lp, &frame, sticks);
+
+#ifdef KTRACE
+	if (KTRPOINT(td, KTR_SYSRET)) {
+		MAKEMPSAFE(have_mplock);
+		ktrsysret(p, code, error, args.sysmsg_result);
+	}
+#endif
+
+	/*
+	 * This works because errno is findable through the
+	 * register set.  If we ever support an emulation where this
+	 * is not the case, this code will need to be revisited.
+	 */
+	STOPEVENT(p, S_SCX, code);
+
+	userexit(lp);
+#ifdef SMP
+	/*
+	 * Release the MP lock if we had to get it
+	 */
+	KASSERT(td->td_mpcount == have_mplock, 
+		("badmpcount syscall2/end from %p", (void *)frame.tf_eip));
+	if (have_mplock)
+		rel_mplock();
+#endif
+#ifdef INVARIANTS
+	KASSERT(crit_count == (td->td_pri & ~TDPRI_MASK), 
+		("syscall: critical section count mismatch! %d/%d",
+		crit_count / TDPRI_CRIT, td->td_pri / TDPRI_CRIT));
+#endif
+}
+
+/*
+ * Simplified back end of syscall(), used when returning from fork()
+ * directly into user mode.  MP lock is held on entry and should be
+ * released on return.  This code will return back into the fork
+ * trampoline code which then runs doreti.
+ */
+void
+fork_return(struct lwp *lp, struct trapframe frame)
+{
+	struct proc *p = lp->lwp_proc;
+
+	frame.tf_eax = 0;		/* Child returns zero */
+	frame.tf_eflags &= ~PSL_C;	/* success */
+	frame.tf_edx = 1;
+
+	/*
+	 * Newly forked processes are given a kernel priority.  We have to
+	 * adjust the priority to a normal user priority and fake entry
+	 * into the kernel (call userenter()) to install a passive release
+	 * function just in case userret() decides to stop the process.  This
+	 * can occur when ^Z races a fork.  If we do not install the passive
+	 * release function the current process designation will not be
+	 * released when the thread goes to sleep.
+	 */
+	lwkt_setpri_self(TDPRI_USER_NORM);
+	userenter(lp->lwp_thread);
+	userret(lp, &frame, 0);
+#ifdef KTRACE
+	if (KTRPOINT(lp->lwp_thread, KTR_SYSRET))
+		ktrsysret(p, SYS_fork, 0, 0);
+#endif
+	p->p_flag |= P_PASSIVE_ACQ;
+	userexit(lp);
+	p->p_flag &= ~P_PASSIVE_ACQ;
+#ifdef SMP
+	KKASSERT(lp->lwp_thread->td_mpcount == 1);
+	rel_mplock();
+#endif
+}
diff --git a/sys/platform/vkernel/include/md_var.h b/sys/platform/vkernel/i386/userldt.c
similarity index 78%
copy from sys/platform/vkernel/include/md_var.h
copy to sys/platform/vkernel/i386/userldt.c
index 628dac29f2..c2f3ad6608 100644
--- a/sys/platform/vkernel/include/md_var.h
+++ b/sys/platform/vkernel/i386/userldt.c
@@ -31,29 +31,30 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/platform/vkernel/include/md_var.h,v 1.2 2007/01/02 04:24:26 dillon Exp $
+ * $DragonFly: src/sys/platform/vkernel/i386/userldt.c,v 1.1 2007/01/05 22:18:18 dillon Exp $
  */
 
-#ifndef _MACHINE_MD_VAR_H_
-#define _MACHINE_MD_VAR_H_
-
-#ifndef _SYS_TYPES_H_
 #include <sys/types.h>
-#endif
-#ifndef _SYS_VKERNEL_H_
-#include <sys/vkernel.h>
-#endif
-
-extern	char	sigcode[];
-extern	int	szsigcode;
-extern	vpte_t	*KernelPTA;
-extern	vpte_t	*KernelPTD;
-extern	vm_offset_t crashdumpmap;
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <machine/pcb.h>
+#include <machine/pcb_ext.h>
 
-struct mdglobaldata;
+void
+set_user_ldt (struct pcb *pcb)
+{
+	panic("set_user_ldt");
+}
 
-void cpu_gdinit (struct mdglobaldata *gd, int cpu);
-void cpu_idle_restore (void);
+struct pcb_ldt *
+user_ldt_alloc (struct pcb *pcb, int len)
+{
+	panic("user_ldt_alloc");
+}
 
-#endif
+void
+user_ldt_free (struct pcb *pcb)
+{
+	panic("user_ldt_free");
+}
 
diff --git a/sys/platform/vkernel/i386/vm_machdep.c b/sys/platform/vkernel/i386/vm_machdep.c
new file mode 100644
index 0000000000..5ddfe725f6
--- /dev/null
+++ b/sys/platform/vkernel/i386/vm_machdep.c
@@ -0,0 +1,398 @@
+/*-
+ * Copyright (c) 1982, 1986 The Regents of the University of California.
+ * Copyright (c) 1989, 1990 William Jolitz
+ * Copyright (c) 1994 John Dyson
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Systems Programming Group of the University of Utah Computer
+ * Science Department, and William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from: @(#)vm_machdep.c	7.3 (Berkeley) 5/13/91
+ *	Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
+ * $FreeBSD: src/sys/i386/i386/vm_machdep.c,v 1.132.2.9 2003/01/25 19:02:23 dillon Exp $
+ * $DragonFly: src/sys/platform/vkernel/i386/vm_machdep.c,v 1.1 2007/01/05 22:18:18 dillon Exp $
+ */
+
+#include "use_npx.h"
+#include "use_isa.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/proc.h>
+#include <sys/buf.h>
+#include <sys/interrupt.h>
+#include <sys/vnode.h>
+#include <sys/vmmeter.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
+#include <sys/unistd.h>
+
+#include <machine/clock.h>
+#include <machine/cpu.h>
+#include <machine/md_var.h>
+#include <machine/smp.h>
+#include <machine/pcb.h>
+#include <machine/pcb_ext.h>
+#include <machine/vm86.h>
+#include <machine/segments.h>
+#include <machine/globaldata.h>	/* npxthread */
+
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <sys/lock.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_page.h>
+#include <vm/vm_map.h>
+#include <vm/vm_extern.h>
+
+#include <sys/user.h>
+#include <sys/thread2.h>
+
+#include <bus/isa/i386/isa.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+
+char machine[] = MACHINE_CPU;
+SYSCTL_STRING(_hw, HW_MACHINE, machine, CTLFLAG_RD,
+	      machine, 0, "Machine class");
+
+/*
+ * Finish a fork operation, with lwp lp2 nearly set up.
+ * Copy and update the pcb, set up the stack so that the child
+ * ready to run and return to user mode.
+ */
+void
+cpu_fork(struct lwp *lp1, struct lwp *lp2, int flags)
+{
+	struct pcb *pcb2;
+
+	if ((flags & RFPROC) == 0) {
+		if ((flags & RFMEM) == 0) {
+			/* unshare user LDT */
+			struct pcb *pcb1 = lp1->lwp_thread->td_pcb;
+			struct pcb_ldt *pcb_ldt = pcb1->pcb_ldt;
+			if (pcb_ldt && pcb_ldt->ldt_refcnt > 1) {
+				pcb_ldt = user_ldt_alloc(pcb1,pcb_ldt->ldt_len);
+				user_ldt_free(pcb1);
+				pcb1->pcb_ldt = pcb_ldt;
+				set_user_ldt(pcb1);
+			}
+		}
+		return;
+	}
+
+#if NNPX > 0
+	/* Ensure that lp1's pcb is up to date. */
+	if (mdcpu->gd_npxthread == lp1->lwp_thread)
+		npxsave(lp1->lwp_thread->td_savefpu);
+#endif
+	
+	/*
+	 * Copy lp1's PCB.  This really only applies to the
+	 * debug registers and FP state, but its faster to just copy the
+	 * whole thing.  Because we only save the PCB at switchout time,
+	 * the register state (including pcb_gs) may not be current.
+	 */
+	pcb2 = lp2->lwp_thread->td_pcb;
+	*pcb2 = *lp1->lwp_thread->td_pcb;
+
+	/*
+	 * Create a new fresh stack for the new process.
+	 * Copy the trap frame for the return to user mode as if from a
+	 * syscall.  This copies the user mode register values.  The
+	 * 16 byte offset saves space for vm86, and must match 
+	 * common_tss.esp0 (kernel stack pointer on entry from user mode)
+	 *
+	 * pcb_esp must allocate an additional call-return pointer below
+	 * the trap frame which will be restored by cpu_restore from
+	 * PCB_EIP, and the thread's td_sp pointer must allocate an
+	 * additonal two worsd below the pcb_esp call-return pointer to
+	 * hold the LWKT restore function pointer and eflags.
+	 *
+	 * The LWKT restore function pointer must be set to cpu_restore,
+	 * which is our standard heavy weight process switch-in function.
+	 * YYY eventually we should shortcut fork_return and fork_trampoline
+	 * to use the LWKT restore function directly so we can get rid of
+	 * all the extra crap we are setting up.
+	 */
+	lp2->lwp_md.md_regs = (struct trapframe *)((char *)pcb2 - 16) - 1;
+	bcopy(lp1->lwp_md.md_regs, lp2->lwp_md.md_regs, sizeof(*lp2->lwp_md.md_regs));
+
+	/*
+	 * Set registers for trampoline to user mode.  Leave space for the
+	 * return address on stack.  These are the kernel mode register values.
+	 */
+	pcb2->pcb_cr3 = vtophys(vmspace_pmap(lp2->lwp_proc->p_vmspace)->pm_pdir);
+	pcb2->pcb_edi = 0;
+	pcb2->pcb_esi = (int)fork_return;	/* fork_trampoline argument */
+	pcb2->pcb_ebp = 0;
+	pcb2->pcb_esp = (int)lp2->lwp_md.md_regs - sizeof(void *);
+	pcb2->pcb_ebx = (int)lp2;		/* fork_trampoline argument */
+	pcb2->pcb_eip = (int)fork_trampoline;
+	lp2->lwp_thread->td_sp = (char *)(pcb2->pcb_esp - sizeof(void *));
+	*(u_int32_t *)lp2->lwp_thread->td_sp = PSL_USER;
+	lp2->lwp_thread->td_sp -= sizeof(void *);
+	*(void **)lp2->lwp_thread->td_sp = (void *)cpu_heavy_restore;
+
+	/*
+	 * Segment registers.
+	 */
+	pcb2->pcb_gs = rgs();
+
+	/*
+	 * pcb2->pcb_ldt:	duplicated below, if necessary.
+	 * pcb2->pcb_savefpu:	cloned above.
+	 * pcb2->pcb_flags:	cloned above (always 0 here?).
+	 * pcb2->pcb_onfault:	cloned above (always NULL here?).
+	 */
+
+	/*
+	 * XXX don't copy the i/o pages.  this should probably be fixed.
+	 */
+	pcb2->pcb_ext = 0;
+
+        /* Copy the LDT, if necessary. */
+        if (pcb2->pcb_ldt != 0) {
+		if (flags & RFMEM) {
+			pcb2->pcb_ldt->ldt_refcnt++;
+		} else {
+			pcb2->pcb_ldt = user_ldt_alloc(pcb2,
+				pcb2->pcb_ldt->ldt_len);
+		}
+        }
+	bcopy(&lp1->lwp_thread->td_tls, &lp2->lwp_thread->td_tls,
+	      sizeof(lp2->lwp_thread->td_tls));
+	/*
+	 * Now, cpu_switch() can schedule the new process.
+	 * pcb_esp is loaded pointing to the cpu_switch() stack frame
+	 * containing the return address when exiting cpu_switch.
+	 * This will normally be to fork_trampoline(), which will have
+	 * %ebx loaded with the new proc's pointer.  fork_trampoline()
+	 * will set up a stack to call fork_return(p, frame); to complete
+	 * the return to user-mode.
+	 */
+}
+
+/*
+ * Intercept the return address from a freshly forked process that has NOT
+ * been scheduled yet.
+ *
+ * This is needed to make kernel threads stay in kernel mode.
+ */
+void
+cpu_set_fork_handler(struct lwp *lp, void (*func)(void *), void *arg)
+{
+	/*
+	 * Note that the trap frame follows the args, so the function
+	 * is really called like this:  func(arg, frame);
+	 */
+	lp->lwp_thread->td_pcb->pcb_esi = (int) func;	/* function */
+	lp->lwp_thread->td_pcb->pcb_ebx = (int) arg;	/* first arg */
+}
+
+void
+cpu_set_thread_handler(thread_t td, void (*rfunc)(void), void *func, void *arg)
+{
+	td->td_pcb->pcb_esi = (int)func;
+	td->td_pcb->pcb_ebx = (int) arg;
+	td->td_switch = cpu_lwkt_switch;
+	td->td_sp -= sizeof(void *);
+	*(void **)td->td_sp = rfunc;	/* exit function on return */
+	td->td_sp -= sizeof(void *);
+	*(void **)td->td_sp = cpu_kthread_restore;
+}
+
+void
+cpu_proc_exit(void)
+{
+	struct thread *td = curthread;
+	struct pcb *pcb;
+	struct pcb_ext *ext;
+
+#if NNPX > 0
+	npxexit();
+#endif	/* NNPX */
+
+	/*
+	 * If we were using a private TSS do a forced-switch to ourselves
+	 * to switch back to the common TSS before freeing it.
+	 */
+	pcb = td->td_pcb;
+	if ((ext = pcb->pcb_ext) != NULL) {
+		crit_enter();
+		pcb->pcb_ext = NULL;
+		td->td_switch(td);
+		crit_exit();
+		kmem_free(&kernel_map, (vm_offset_t)ext, ctob(IOPAGES + 1));
+	}
+	user_ldt_free(pcb);
+        if (pcb->pcb_flags & PCB_DBREGS) {
+                /*
+                 * disable all hardware breakpoints
+                 */
+                reset_dbregs();
+                pcb->pcb_flags &= ~PCB_DBREGS;
+        }
+	td->td_gd->gd_cnt.v_swtch++;
+
+	crit_enter_quick(td);
+	lwkt_deschedule_self(td);
+	lwkt_remove_tdallq(td);
+	cpu_thread_exit();
+}
+
+/*
+ * Terminate the current thread.  The caller must have already acquired
+ * the thread's rwlock and placed it on a reap list or otherwise notified
+ * a reaper of its existance.  We set a special assembly switch function which
+ * releases td_rwlock after it has cleaned up the MMU state and switched
+ * out the stack.
+ *
+ * Must be caller from a critical section and with the thread descheduled.
+ */
+void
+cpu_thread_exit(void)
+{
+	curthread->td_switch = cpu_exit_switch;
+	curthread->td_flags |= TDF_EXITING;
+	lwkt_switch();
+	panic("cpu_exit");
+}
+
+/*
+ * Process Reaper.  Called after the caller has acquired the thread's
+ * rwlock and removed it from the reap list.
+ */
+void
+cpu_proc_wait(struct proc *p)
+{
+	struct thread *td;
+
+	/* drop per-process resources */
+	td = pmap_dispose_proc(p);
+	if (td)
+		lwkt_free_thread(td);
+}
+
+/*
+ * Dump the machine specific header information at the start of a core dump.
+ */
+int
+cpu_coredump(struct thread *td, struct vnode *vp, struct ucred *cred)
+{
+	struct proc *p = td->td_proc;
+	int error;
+	caddr_t tempuser;
+
+	KKASSERT(p);
+	tempuser = kmalloc(ctob(UPAGES), M_TEMP, M_WAITOK);
+	if (!tempuser)
+		return EINVAL;
+	
+	bzero(tempuser, ctob(UPAGES));
+	bcopy(p->p_addr, tempuser, sizeof(struct user));
+	bcopy(p->p_md.md_regs,
+	      tempuser + ((caddr_t) p->p_md.md_regs - (caddr_t) p->p_addr),
+	      sizeof(struct trapframe));
+	bcopy(p->p_thread->td_pcb, tempuser + ((char *)p->p_thread->td_pcb - (char *)p->p_addr), sizeof(struct pcb));
+
+	error = vn_rdwr(UIO_WRITE, vp, (caddr_t) tempuser, ctob(UPAGES),
+			(off_t)0, UIO_SYSSPACE, IO_UNIT, cred, (int *)NULL);
+
+	kfree(tempuser, M_TEMP);
+	
+	return error;
+}
+
+#ifdef notyet
+static void
+setredzone(u_short *pte, caddr_t vaddr)
+{
+/* eventually do this by setting up an expand-down stack segment
+   for ss0: selector, allowing stack access down to top of u.
+   this means though that protection violations need to be handled
+   thru a double fault exception that must do an integral task
+   switch to a known good context, within which a dump can be
+   taken. a sensible scheme might be to save the initial context
+   used by sched (that has physical memory mapped 1:1 at bottom)
+   and take the dump while still in mapped mode */
+}
+#endif
+
+/*
+ * Convert kernel VA to physical address
+ */
+vm_paddr_t
+kvtop(void *addr)
+{
+	vm_paddr_t pa;
+
+	pa = pmap_kextract((vm_offset_t)addr);
+	if (pa == 0)
+		panic("kvtop: zero page frame");
+	return (pa);
+}
+
+int
+grow_stack(struct proc *p, u_int sp)
+{
+	int rv;
+
+	rv = vm_map_growstack (p, sp);
+	if (rv != KERN_SUCCESS)
+		return (0);
+
+	return (1);
+}
+
+SYSCTL_DECL(_vm_stats_misc);
+
+static int cnt_prezero;
+
+SYSCTL_INT(_vm_stats_misc, OID_AUTO,
+	cnt_prezero, CTLFLAG_RD, &cnt_prezero, 0, "");
+
+/*
+ * Tell whether this address is in some physical memory region.
+ * Currently used by the kernel coredump code in order to avoid
+ * dumping the ``ISA memory hole'' which could cause indefinite hangs,
+ * or other unpredictable behaviour.
+ */
+
+int
+is_physical_memory(vm_offset_t addr)
+{
+	return 1;
+}
+
diff --git a/sys/platform/vkernel/include/globaldata.h b/sys/platform/vkernel/include/globaldata.h
index 793c8fb850..350c21b79c 100644
--- a/sys/platform/vkernel/include/globaldata.h
+++ b/sys/platform/vkernel/include/globaldata.h
@@ -28,7 +28,7 @@
  *	should not include this file.
  *
  * $FreeBSD: src/sys/i386/include/globaldata.h,v 1.11.2.1 2000/05/16 06:58:10 dillon Exp $
- * $DragonFly: src/sys/platform/vkernel/include/globaldata.h,v 1.3 2007/01/02 04:24:26 dillon Exp $
+ * $DragonFly: src/sys/platform/vkernel/include/globaldata.h,v 1.4 2007/01/05 22:18:19 dillon Exp $
  */
 
 #ifndef _MACHINE_GLOBALDATA_H_
@@ -83,7 +83,7 @@ struct mdglobaldata {
 	int		gd_spending;	/* software interrupt pending */
 	int		gd_sdelayed;	/* delayed software ints */
 	int		gd_currentldt;
-	int		gd_private_tss;
+	int		unused000;
 	u_int		unused001;
 	u_int		gd_other_cpus;
 	u_int		gd_ss_eflags;
diff --git a/sys/platform/vkernel/include/md_var.h b/sys/platform/vkernel/include/md_var.h
index 628dac29f2..26de6e4685 100644
--- a/sys/platform/vkernel/include/md_var.h
+++ b/sys/platform/vkernel/include/md_var.h
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/platform/vkernel/include/md_var.h,v 1.2 2007/01/02 04:24:26 dillon Exp $
+ * $DragonFly: src/sys/platform/vkernel/include/md_var.h,v 1.3 2007/01/05 22:18:19 dillon Exp $
  */
 
 #ifndef _MACHINE_MD_VAR_H_
@@ -49,11 +49,20 @@ extern	int	szsigcode;
 extern	vpte_t	*KernelPTA;
 extern	vpte_t	*KernelPTD;
 extern	vm_offset_t crashdumpmap;
+extern  int	cpu_fxsr;
 
 struct mdglobaldata;
 
+vpte_t *pmap_kpte(vm_offset_t va);
 void cpu_gdinit (struct mdglobaldata *gd, int cpu);
-void cpu_idle_restore (void);
+
+void cpu_heavy_restore(void);	/* cannot be called from C */
+void cpu_lwkt_restore(void);    /* cannot be called from C */
+void cpu_idle_restore(void);    /* cannot be called from C */
+void cpu_kthread_restore(void);	/* cannot be called from C */
+void cpu_exit_switch (struct thread *next);
+void cpu_setregs (void);
+void cpu_idle (void);
 
 #endif
 
diff --git a/sys/platform/vkernel/include/pcb_ext.h b/sys/platform/vkernel/include/pcb_ext.h
new file mode 100644
index 0000000000..59d0e61cbb
--- /dev/null
+++ b/sys/platform/vkernel/include/pcb_ext.h
@@ -0,0 +1,76 @@
+/*-
+ * Copyright (c) 1997 Jonathan Lemon
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: src/sys/i386/include/pcb_ext.h,v 1.4 1999/12/29 04:33:04 peter Exp $
+ * $DragonFly: src/sys/platform/vkernel/include/pcb_ext.h,v 1.1 2007/01/05 22:18:19 dillon Exp $
+ */
+
+#ifndef _MACHINE_PCB_EXT_H_
+#define _MACHINE_PCB_EXT_H_
+
+#ifndef _SYS_TYPES_H_
+#include <sys/types.h>
+#endif
+
+/*
+ * Extension to the 386 process control block
+ */
+#ifndef _MACHINE_TSS_H_
+#include <machine/tss.h>
+#endif
+#ifndef _MACHINE_VM86_H_
+#include <machine/vm86.h>
+#endif
+#ifndef _MACHINE_SEGMENTS_H_
+#include <machine/segments.h>
+#endif
+
+struct pcb_ext {
+	struct 	segment_descriptor ext_tssd;	/* tss descriptor */
+	struct 	i386tss	ext_tss;	/* per-process i386tss */
+	caddr_t	ext_iomap;		/* i/o permission bitmap */
+	struct	vm86_kernel ext_vm86;	/* vm86 area */
+};
+
+struct pcb_ldt {
+	caddr_t	ldt_base;
+	int	ldt_len;
+	int	ldt_refcnt;
+	u_long	ldt_active;
+	struct	segment_descriptor ldt_sd;
+};
+
+#ifdef _KERNEL
+
+struct pcb;
+
+void set_user_ldt (struct pcb *);
+struct pcb_ldt *user_ldt_alloc (struct pcb *, int);
+void user_ldt_free (struct pcb *);
+void set_user_TLS (void);
+
+#endif
+
+#endif /* _MACHINE_PCB_EXT_H_ */
diff --git a/sys/platform/vkernel/platform/busdma_machdep.c b/sys/platform/vkernel/platform/busdma_machdep.c
new file mode 100644
index 0000000000..f153c1f09e
--- /dev/null
+++ b/sys/platform/vkernel/platform/busdma_machdep.c
@@ -0,0 +1,900 @@
+/*
+ * Copyright (c) 1997, 1998 Justin T. Gibbs.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions, and the following disclaimer,
+ *    without modification, immediately at the beginning of the file.
+ * 2. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: src/sys/i386/i386/busdma_machdep.c,v 1.16.2.2 2003/01/23 00:55:27 scottl Exp $
+ * $DragonFly: src/sys/platform/vkernel/platform/busdma_machdep.c,v 1.1 2007/01/05 22:18:20 dillon Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/uio.h>
+#include <sys/thread2.h>
+#include <sys/bus_dma.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+
+/* XXX needed for to access pmap to convert per-proc virtual to physical */
+#include <sys/proc.h>
+#include <sys/lock.h>
+#include <vm/vm_map.h>
+
+#include <machine/md_var.h>
+
+#define MAX_BPAGES 128
+
+struct bus_dma_tag {
+	bus_dma_tag_t	  parent;
+	bus_size_t	  alignment;
+	bus_size_t	  boundary;
+	bus_addr_t	  lowaddr;
+	bus_addr_t	  highaddr;
+	bus_dma_filter_t *filter;
+	void		 *filterarg;
+	bus_size_t	  maxsize;
+	u_int		  nsegments;
+	bus_size_t	  maxsegsz;
+	int		  flags;
+	int		  ref_count;
+	int		  map_count;
+	bus_dma_segment_t *segments;
+};
+
+struct bounce_page {
+	vm_offset_t	vaddr;		/* kva of bounce buffer */
+	bus_addr_t	busaddr;	/* Physical address */
+	vm_offset_t	datavaddr;	/* kva of client data */
+	bus_size_t	datacount;	/* client data count */
+	STAILQ_ENTRY(bounce_page) links;
+};
+
+int busdma_swi_pending;
+
+static STAILQ_HEAD(bp_list, bounce_page) bounce_page_list;
+static int free_bpages;
+static int reserved_bpages;
+static int active_bpages;
+static int total_bpages;
+static bus_addr_t bounce_lowaddr = BUS_SPACE_MAXADDR;
+
+struct bus_dmamap {
+	struct bp_list	       bpages;
+	int		       pagesneeded;
+	int		       pagesreserved;
+	bus_dma_tag_t	       dmat;
+	void		      *buf;		/* unmapped buffer pointer */
+	bus_size_t	       buflen;		/* unmapped buffer length */
+	bus_dmamap_callback_t *callback;
+	void		      *callback_arg;
+	STAILQ_ENTRY(bus_dmamap) links;
+};
+
+static STAILQ_HEAD(, bus_dmamap) bounce_map_waitinglist;
+static STAILQ_HEAD(, bus_dmamap) bounce_map_callbacklist;
+static struct bus_dmamap nobounce_dmamap;
+
+static int alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages);
+static int reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map);
+static bus_addr_t add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map,
+				   vm_offset_t vaddr, bus_size_t size);
+static void free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage);
+static __inline int run_filter(bus_dma_tag_t dmat, bus_addr_t paddr);
+
+static __inline int
+run_filter(bus_dma_tag_t dmat, bus_addr_t paddr)
+{
+	int retval;
+
+	retval = 0;
+	do {
+		if (paddr > dmat->lowaddr
+		 && paddr <= dmat->highaddr
+		 && (dmat->filter == NULL
+		  || (*dmat->filter)(dmat->filterarg, paddr) != 0))
+			retval = 1;
+
+		dmat = dmat->parent;		
+	} while (retval == 0 && dmat != NULL);
+	return (retval);
+}
+
+#define BUS_DMA_MIN_ALLOC_COMP BUS_DMA_BUS4
+/*
+ * Allocate a device specific dma_tag.
+ */
+int
+bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment,
+		   bus_size_t boundary, bus_addr_t lowaddr,
+		   bus_addr_t highaddr, bus_dma_filter_t *filter,
+		   void *filterarg, bus_size_t maxsize, int nsegments,
+		   bus_size_t maxsegsz, int flags, bus_dma_tag_t *dmat)
+{
+	bus_dma_tag_t newtag;
+	int error = 0;
+
+	/* Return a NULL tag on failure */
+	*dmat = NULL;
+
+	newtag = kmalloc(sizeof(*newtag), M_DEVBUF, M_INTWAIT);
+
+	newtag->parent = parent;
+	newtag->alignment = alignment;
+	newtag->boundary = boundary;
+	newtag->lowaddr = trunc_page((vm_paddr_t)lowaddr) + (PAGE_SIZE - 1);
+	newtag->highaddr = trunc_page((vm_paddr_t)highaddr) + (PAGE_SIZE - 1);
+	newtag->filter = filter;
+	newtag->filterarg = filterarg;
+	newtag->maxsize = maxsize;
+	newtag->nsegments = nsegments;
+	newtag->maxsegsz = maxsegsz;
+	newtag->flags = flags;
+	newtag->ref_count = 1; /* Count ourself */
+	newtag->map_count = 0;
+	newtag->segments = NULL;
+	
+	/* Take into account any restrictions imposed by our parent tag */
+	if (parent != NULL) {
+		newtag->lowaddr = MIN(parent->lowaddr, newtag->lowaddr);
+		newtag->highaddr = MAX(parent->highaddr, newtag->highaddr);
+		/*
+		 * XXX Not really correct??? Probably need to honor boundary
+		 *     all the way up the inheritence chain.
+		 */
+		newtag->boundary = MAX(parent->boundary, newtag->boundary);
+		if (newtag->filter == NULL) {
+			/*
+			 * Short circuit looking at our parent directly
+			 * since we have encapsulated all of its information
+			 */
+			newtag->filter = parent->filter;
+			newtag->filterarg = parent->filterarg;
+			newtag->parent = parent->parent;
+		}
+		if (newtag->parent != NULL) {
+			parent->ref_count++;
+		}
+	}
+	
+	if (newtag->lowaddr < ptoa(Maxmem) &&
+	    (flags & BUS_DMA_ALLOCNOW) != 0) {
+		/* Must bounce */
+
+		if (lowaddr > bounce_lowaddr) {
+			/*
+			 * Go through the pool and kill any pages
+			 * that don't reside below lowaddr.
+			 */
+			panic("bus_dma_tag_create: page reallocation "
+			      "not implemented");
+		}
+		if (ptoa(total_bpages) < maxsize) {
+			int pages;
+
+			pages = atop(maxsize) - total_bpages;
+
+			/* Add pages to our bounce pool */
+			if (alloc_bounce_pages(newtag, pages) < pages)
+				error = ENOMEM;
+		}
+		/* Performed initial allocation */
+		newtag->flags |= BUS_DMA_MIN_ALLOC_COMP;
+	}
+	
+	if (error != 0) {
+		kfree(newtag, M_DEVBUF);
+	} else {
+		*dmat = newtag;
+	}
+	return (error);
+}
+
+int
+bus_dma_tag_destroy(bus_dma_tag_t dmat)
+{
+	if (dmat != NULL) {
+
+		if (dmat->map_count != 0)
+			return (EBUSY);
+
+		while (dmat != NULL) {
+			bus_dma_tag_t parent;
+
+			parent = dmat->parent;
+			dmat->ref_count--;
+			if (dmat->ref_count == 0) {
+				if (dmat->segments != NULL)
+					kfree(dmat->segments, M_DEVBUF);
+				kfree(dmat, M_DEVBUF);
+				/*
+				 * Last reference count, so
+				 * release our reference
+				 * count on our parent.
+				 */
+				dmat = parent;
+			} else
+				dmat = NULL;
+		}
+	}
+	return (0);
+}
+
+/*
+ * Allocate a handle for mapping from kva/uva/physical
+ * address space into bus device space.
+ */
+int
+bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp)
+{
+	int error;
+
+	error = 0;
+
+	if (dmat->segments == NULL) {
+		KKASSERT(dmat->nsegments && dmat->nsegments < 16384);
+		dmat->segments = kmalloc(sizeof(bus_dma_segment_t) * 
+					dmat->nsegments, M_DEVBUF, M_INTWAIT);
+	}
+
+	if (dmat->lowaddr < ptoa(Maxmem)) {
+		/* Must bounce */
+		int maxpages;
+
+		*mapp = kmalloc(sizeof(**mapp), M_DEVBUF, M_INTWAIT);
+		if (*mapp == NULL) {
+			return (ENOMEM);
+		} else {
+			/* Initialize the new map */
+			bzero(*mapp, sizeof(**mapp));
+			STAILQ_INIT(&((*mapp)->bpages));
+		}
+		/*
+		 * Attempt to add pages to our pool on a per-instance
+		 * basis up to a sane limit.
+		 */
+		maxpages = MIN(MAX_BPAGES, Maxmem - atop(dmat->lowaddr));
+		if ((dmat->flags & BUS_DMA_MIN_ALLOC_COMP) == 0
+		 || (dmat->map_count > 0
+		  && total_bpages < maxpages)) {
+			int pages;
+
+			if (dmat->lowaddr > bounce_lowaddr) {
+				/*
+				 * Go through the pool and kill any pages
+				 * that don't reside below lowaddr.
+				 */
+				panic("bus_dmamap_create: page reallocation "
+				      "not implemented");
+			}
+			pages = atop(dmat->maxsize);
+			pages = MIN(maxpages - total_bpages, pages);
+			error = alloc_bounce_pages(dmat, pages);
+
+			if ((dmat->flags & BUS_DMA_MIN_ALLOC_COMP) == 0) {
+				if (error == 0)
+					dmat->flags |= BUS_DMA_MIN_ALLOC_COMP;
+			} else {
+				error = 0;
+			}
+		}
+	} else {
+		*mapp = NULL;
+	}
+	if (error == 0)
+		dmat->map_count++;
+	return (error);
+}
+
+/*
+ * Destroy a handle for mapping from kva/uva/physical
+ * address space into bus device space.
+ */
+int
+bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map)
+{
+	if (map != NULL) {
+		if (STAILQ_FIRST(&map->bpages) != NULL)
+			return (EBUSY);
+		kfree(map, M_DEVBUF);
+	}
+	dmat->map_count--;
+	return (0);
+}
+
+
+/*
+ * Allocate a piece of memory that can be efficiently mapped into
+ * bus device space based on the constraints lited in the dma tag.
+ *
+ * mapp is degenerate.  By definition this allocation should not require
+ * bounce buffers so do not allocate a dma map.
+ */
+int
+bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags,
+		 bus_dmamap_t *mapp)
+{
+	int mflags;
+	/* If we succeed, no mapping/bouncing will be required */
+	*mapp = NULL;
+
+	if (dmat->segments == NULL) {
+		KKASSERT(dmat->nsegments < 16384);
+		dmat->segments = kmalloc(sizeof(bus_dma_segment_t) * 
+					dmat->nsegments, M_DEVBUF, M_INTWAIT);
+	}
+
+	if (flags & BUS_DMA_NOWAIT)
+		mflags = M_NOWAIT;
+	else
+		mflags = M_WAITOK;
+	if (flags & BUS_DMA_ZERO)
+		mflags |= M_ZERO;
+
+	if ((dmat->maxsize <= PAGE_SIZE) &&
+	    dmat->lowaddr >= ptoa(Maxmem)) {
+		*vaddr = kmalloc(dmat->maxsize, M_DEVBUF, mflags);
+		/*
+		 * XXX Check whether the allocation crossed a page boundary
+		 * and retry with power-of-2 alignment in that case.
+		 */
+		if ((((intptr_t)*vaddr) & PAGE_MASK) !=
+		    (((intptr_t)*vaddr + dmat->maxsize) & PAGE_MASK)) {
+			size_t size;
+			kfree(*vaddr, M_DEVBUF);
+			/* XXX check for overflow? */
+			for (size = 1; size <= dmat->maxsize; size <<= 1)
+				;
+			*vaddr = kmalloc(size, M_DEVBUF, mflags);
+		}
+	} else {
+		/*
+		 * XXX Use Contigmalloc until it is merged into this facility
+		 *     and handles multi-seg allocations.  Nobody is doing
+		 *     multi-seg allocations yet though.
+		 */
+		*vaddr = contigmalloc(dmat->maxsize, M_DEVBUF, mflags,
+		    0ul, dmat->lowaddr, dmat->alignment? dmat->alignment : 1ul,
+		    dmat->boundary);
+	}
+	if (*vaddr == NULL)
+		return (ENOMEM);
+	return (0);
+}
+
+/*
+ * Free a piece of memory and it's allociated dmamap, that was allocated
+ * via bus_dmamem_alloc.  Make the same choice for free/contigfree.
+ */
+void
+bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map)
+{
+	/*
+	 * dmamem does not need to be bounced, so the map should be
+	 * NULL
+	 */
+	if (map != NULL)
+		panic("bus_dmamem_free: Invalid map freed\n");
+	if ((dmat->maxsize <= PAGE_SIZE) &&
+	    dmat->lowaddr >= ptoa(Maxmem))
+		kfree(vaddr, M_DEVBUF);
+	else
+		contigfree(vaddr, dmat->maxsize, M_DEVBUF);
+}
+
+#define BUS_DMAMAP_NSEGS ((BUS_SPACE_MAXSIZE / PAGE_SIZE) + 1)
+
+/*
+ * Map the buffer buf into bus space using the dmamap map.
+ */
+int
+bus_dmamap_load(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf,
+		bus_size_t buflen, bus_dmamap_callback_t *callback,
+		void *callback_arg, int flags)
+{
+	vm_offset_t		vaddr;
+	vm_paddr_t		paddr;
+	bus_dma_segment_t      *sg;
+	int			seg;
+	int			error;
+	vm_paddr_t		nextpaddr;
+
+	if (map == NULL)
+		map = &nobounce_dmamap;
+
+	error = 0;
+	/*
+	 * If we are being called during a callback, pagesneeded will
+	 * be non-zero, so we can avoid doing the work twice.
+	 */
+	if (dmat->lowaddr < ptoa(Maxmem) &&
+	    map->pagesneeded == 0) {
+		vm_offset_t	vendaddr;
+
+		/*
+		 * Count the number of bounce pages
+		 * needed in order to complete this transfer
+		 */
+		vaddr = trunc_page((vm_offset_t)buf);
+		vendaddr = (vm_offset_t)buf + buflen;
+
+		while (vaddr < vendaddr) {
+			paddr = pmap_kextract(vaddr);
+			if (run_filter(dmat, paddr) != 0) {
+
+				map->pagesneeded++;
+			}
+			vaddr += PAGE_SIZE;
+		}
+	}
+
+	/* Reserve Necessary Bounce Pages */
+	if (map->pagesneeded != 0) {
+		crit_enter();
+	 	if (reserve_bounce_pages(dmat, map) != 0) {
+
+			/* Queue us for resources */
+			map->dmat = dmat;
+			map->buf = buf;
+			map->buflen = buflen;
+			map->callback = callback;
+			map->callback_arg = callback_arg;
+
+			STAILQ_INSERT_TAIL(&bounce_map_waitinglist, map, links);
+			crit_exit();
+
+			return (EINPROGRESS);
+		}
+		crit_exit();
+	}
+
+	vaddr = (vm_offset_t)buf;
+	sg = dmat->segments;
+	seg = 1;
+	sg->ds_len = 0;
+
+	nextpaddr = 0;
+	do {
+		bus_size_t	size;
+
+		paddr = pmap_kextract(vaddr);
+		size = PAGE_SIZE - (paddr & PAGE_MASK);
+		if (size > buflen)
+			size = buflen;
+
+		if (map->pagesneeded != 0 && run_filter(dmat, paddr)) {
+			paddr = add_bounce_page(dmat, map, vaddr, size);
+		}
+
+		if (sg->ds_len == 0) {
+			sg->ds_addr = paddr;
+			sg->ds_len = size;
+		} else if (paddr == nextpaddr) {
+			sg->ds_len += size;
+		} else {
+			/* Go to the next segment */
+			sg++;
+			seg++;
+			if (seg > dmat->nsegments)
+				break;
+			sg->ds_addr = paddr;
+			sg->ds_len = size;
+		}
+		vaddr += size;
+		nextpaddr = paddr + size;
+		buflen -= size;
+	} while (buflen > 0);
+
+	if (buflen != 0) {
+		kprintf("bus_dmamap_load: Too many segs! buf_len = 0x%lx\n",
+		       (u_long)buflen);
+		error = EFBIG;
+	}
+
+	(*callback)(callback_arg, dmat->segments, seg, error);
+
+	return (0);
+}
+
+/*
+ * Utility function to load a linear buffer.  lastaddrp holds state
+ * between invocations (for multiple-buffer loads).  segp contains
+ * the starting segment on entrace, and the ending segment on exit.
+ * first indicates if this is the first invocation of this function.
+ */
+static int
+_bus_dmamap_load_buffer(bus_dma_tag_t dmat,
+			void *buf, bus_size_t buflen,
+			struct thread *td,
+			int flags,
+			vm_offset_t *lastaddrp,
+			int *segp,
+			int first)
+{
+	bus_dma_segment_t *segs;
+	bus_size_t sgsize;
+	bus_addr_t curaddr, lastaddr, baddr, bmask;
+	vm_offset_t vaddr = (vm_offset_t)buf;
+	int seg;
+	pmap_t pmap;
+
+	if (td->td_proc != NULL)
+		pmap = vmspace_pmap(td->td_proc->p_vmspace);
+	else
+		pmap = NULL;
+
+	segs = dmat->segments;
+	lastaddr = *lastaddrp;
+	bmask  = ~(dmat->boundary - 1);
+
+	for (seg = *segp; buflen > 0 ; ) {
+		/*
+		 * Get the physical address for this segment.
+		 */
+		if (pmap)
+			curaddr = pmap_extract(pmap, vaddr);
+		else
+			curaddr = pmap_kextract(vaddr);
+
+		/*
+		 * Compute the segment size, and adjust counts.
+		 */
+		sgsize = PAGE_SIZE - ((u_long)curaddr & PAGE_MASK);
+		if (buflen < sgsize)
+			sgsize = buflen;
+
+		/*
+		 * Make sure we don't cross any boundaries.
+		 */
+		if (dmat->boundary > 0) {
+			baddr = (curaddr + dmat->boundary) & bmask;
+			if (sgsize > (baddr - curaddr))
+				sgsize = (baddr - curaddr);
+		}
+
+		/*
+		 * Insert chunk into a segment, coalescing with
+		 * previous segment if possible.
+		 */
+		if (first) {
+			segs[seg].ds_addr = curaddr;
+			segs[seg].ds_len = sgsize;
+			first = 0;
+		} else {
+			if (curaddr == lastaddr &&
+			    (segs[seg].ds_len + sgsize) <= dmat->maxsegsz &&
+			    (dmat->boundary == 0 ||
+			     (segs[seg].ds_addr & bmask) == (curaddr & bmask)))
+				segs[seg].ds_len += sgsize;
+			else {
+				if (++seg >= dmat->nsegments)
+					break;
+				segs[seg].ds_addr = curaddr;
+				segs[seg].ds_len = sgsize;
+			}
+		}
+
+		lastaddr = curaddr + sgsize;
+		vaddr += sgsize;
+		buflen -= sgsize;
+	}
+
+	*segp = seg;
+	*lastaddrp = lastaddr;
+
+	/*
+	 * Did we fit?
+	 */
+	return (buflen != 0 ? EFBIG : 0); /* XXX better return value here? */
+}
+
+/*
+ * Like _bus_dmamap_load(), but for mbufs.
+ */
+int
+bus_dmamap_load_mbuf(bus_dma_tag_t dmat, bus_dmamap_t map,
+		     struct mbuf *m0,
+		     bus_dmamap_callback2_t *callback, void *callback_arg,
+		     int flags)
+{
+	int nsegs, error;
+
+	KASSERT(dmat->lowaddr >= ptoa(Maxmem) || map != NULL,
+		("bus_dmamap_load_mbuf: No support for bounce pages!"));
+	KASSERT(m0->m_flags & M_PKTHDR,
+		("bus_dmamap_load_mbuf: no packet header"));
+
+	nsegs = 0;
+	error = 0;
+	if (m0->m_pkthdr.len <= dmat->maxsize) {
+		int first = 1;
+		vm_offset_t lastaddr = 0;
+		struct mbuf *m;
+
+		for (m = m0; m != NULL && error == 0; m = m->m_next) {
+			if ( m->m_len == 0 )
+				continue;
+			error = _bus_dmamap_load_buffer(dmat,
+					m->m_data, m->m_len,
+					curthread, flags, &lastaddr,
+					&nsegs, first);
+			first = 0;
+		}
+	} else {
+		error = EINVAL;
+	}
+
+	if (error) {
+		/* force "no valid mappings" in callback */
+		(*callback)(callback_arg, dmat->segments, 0, 0, error);
+	} else {
+		(*callback)(callback_arg, dmat->segments,
+			    nsegs+1, m0->m_pkthdr.len, error);
+	}
+	return (error);
+}
+
+/*
+ * Like _bus_dmamap_load(), but for uios.
+ */
+int
+bus_dmamap_load_uio(bus_dma_tag_t dmat, bus_dmamap_t map,
+		    struct uio *uio,
+		    bus_dmamap_callback2_t *callback, void *callback_arg,
+		    int flags)
+{
+	vm_offset_t lastaddr;
+	int nsegs, error, first, i;
+	bus_size_t resid;
+	struct iovec *iov;
+	struct thread *td = NULL;
+
+	KASSERT(dmat->lowaddr >= ptoa(Maxmem) || map != NULL,
+		("bus_dmamap_load_uio: No support for bounce pages!"));
+
+	resid = uio->uio_resid;
+	iov = uio->uio_iov;
+
+	if (uio->uio_segflg == UIO_USERSPACE) {
+		td = uio->uio_td;
+		KASSERT(td != NULL && td->td_proc != NULL,
+			("bus_dmamap_load_uio: USERSPACE but no proc"));
+	}
+
+	nsegs = 0;
+	error = 0;
+	first = 1;
+	for (i = 0; i < uio->uio_iovcnt && resid != 0 && !error; i++) {
+		/*
+		 * Now at the first iovec to load.  Load each iovec
+		 * until we have exhausted the residual count.
+		 */
+		bus_size_t minlen =
+			resid < iov[i].iov_len ? resid : iov[i].iov_len;
+		caddr_t addr = (caddr_t) iov[i].iov_base;
+
+		error = _bus_dmamap_load_buffer(dmat,
+				addr, minlen,
+				td, flags, &lastaddr, &nsegs, first);
+		first = 0;
+
+		resid -= minlen;
+	}
+
+	if (error) {
+		/* force "no valid mappings" in callback */
+		(*callback)(callback_arg, dmat->segments, 0, 0, error);
+	} else {
+		(*callback)(callback_arg, dmat->segments,
+			    nsegs+1, uio->uio_resid, error);
+	}
+	return (error);
+}
+
+/*
+ * Release the mapping held by map.
+ */
+void
+_bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map)
+{
+	struct bounce_page *bpage;
+
+	while ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) {
+		STAILQ_REMOVE_HEAD(&map->bpages, links);
+		free_bounce_page(dmat, bpage);
+	}
+}
+
+void
+_bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op)
+{
+	struct bounce_page *bpage;
+
+	if ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) {
+		
+		/*
+		 * Handle data bouncing.  We might also
+		 * want to add support for invalidating
+		 * the caches on broken hardware
+		 */
+		switch (op) {
+		case BUS_DMASYNC_PREWRITE:
+			while (bpage != NULL) {
+				bcopy((void *)bpage->datavaddr,
+				      (void *)bpage->vaddr,
+				      bpage->datacount);
+				bpage = STAILQ_NEXT(bpage, links);
+			}
+			break;
+
+		case BUS_DMASYNC_POSTREAD:
+			while (bpage != NULL) {
+				bcopy((void *)bpage->vaddr,
+				      (void *)bpage->datavaddr,
+				      bpage->datacount);
+				bpage = STAILQ_NEXT(bpage, links);
+			}
+			break;
+		case BUS_DMASYNC_PREREAD:
+		case BUS_DMASYNC_POSTWRITE:
+			/* No-ops */
+			break;
+		}
+	}
+}
+
+static int
+alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages)
+{
+	int count;
+
+	count = 0;
+	if (total_bpages == 0) {
+		STAILQ_INIT(&bounce_page_list);
+		STAILQ_INIT(&bounce_map_waitinglist);
+		STAILQ_INIT(&bounce_map_callbacklist);
+	}
+	
+	while (numpages > 0) {
+		struct bounce_page *bpage;
+
+		bpage = (struct bounce_page *)kmalloc(sizeof(*bpage), M_DEVBUF,
+						     M_INTWAIT);
+
+		if (bpage == NULL)
+			break;
+		bzero(bpage, sizeof(*bpage));
+		bpage->vaddr = (vm_offset_t)contigmalloc(PAGE_SIZE, M_DEVBUF,
+							 M_NOWAIT, 0ul,
+							 dmat->lowaddr,
+							 PAGE_SIZE,
+							 0);
+		if (bpage->vaddr == NULL) {
+			kfree(bpage, M_DEVBUF);
+			break;
+		}
+		bpage->busaddr = pmap_kextract(bpage->vaddr);
+		crit_enter();
+		STAILQ_INSERT_TAIL(&bounce_page_list, bpage, links);
+		total_bpages++;
+		free_bpages++;
+		crit_exit();
+		count++;
+		numpages--;
+	}
+	return (count);
+}
+
+static int
+reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map)
+{
+	int pages;
+
+	pages = MIN(free_bpages, map->pagesneeded - map->pagesreserved);
+	free_bpages -= pages;
+	reserved_bpages += pages;
+	map->pagesreserved += pages;
+	pages = map->pagesneeded - map->pagesreserved;
+
+	return (pages);
+}
+
+static bus_addr_t
+add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, vm_offset_t vaddr,
+		bus_size_t size)
+{
+	struct bounce_page *bpage;
+
+	if (map->pagesneeded == 0)
+		panic("add_bounce_page: map doesn't need any pages");
+	map->pagesneeded--;
+
+	if (map->pagesreserved == 0)
+		panic("add_bounce_page: map doesn't need any pages");
+	map->pagesreserved--;
+
+	crit_enter();
+	bpage = STAILQ_FIRST(&bounce_page_list);
+	if (bpage == NULL)
+		panic("add_bounce_page: free page list is empty");
+
+	STAILQ_REMOVE_HEAD(&bounce_page_list, links);
+	reserved_bpages--;
+	active_bpages++;
+	crit_exit();
+
+	bpage->datavaddr = vaddr;
+	bpage->datacount = size;
+	STAILQ_INSERT_TAIL(&(map->bpages), bpage, links);
+	return (bpage->busaddr);
+}
+
+static void
+free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage)
+{
+	struct bus_dmamap *map;
+
+	bpage->datavaddr = 0;
+	bpage->datacount = 0;
+
+	crit_enter();
+	STAILQ_INSERT_HEAD(&bounce_page_list, bpage, links);
+	free_bpages++;
+	active_bpages--;
+	if ((map = STAILQ_FIRST(&bounce_map_waitinglist)) != NULL) {
+		if (reserve_bounce_pages(map->dmat, map) == 0) {
+			panic("free_bounce_pages: uncoded\n");
+#if 0
+			STAILQ_REMOVE_HEAD(&bounce_map_waitinglist, links);
+			STAILQ_INSERT_TAIL(&bounce_map_callbacklist,
+					   map, links);
+			busdma_swi_pending = 1;
+			setsoftvm();
+#endif
+		}
+	}
+	crit_exit();
+}
+
+#if 0
+
+void
+busdma_swi(void)
+{
+	struct bus_dmamap *map;
+
+	crit_enter();
+	while ((map = STAILQ_FIRST(&bounce_map_callbacklist)) != NULL) {
+		STAILQ_REMOVE_HEAD(&bounce_map_callbacklist, links);
+		crit_exit();
+		bus_dmamap_load(map->dmat, map, map->buf, map->buflen,
+				map->callback, map->callback_arg, /*flags*/0);
+		crit_enter();
+	}
+	crit_exit();
+}
+
+#endif
+
diff --git a/sys/platform/vkernel/platform/console.c b/sys/platform/vkernel/platform/console.c
new file mode 100644
index 0000000000..d30170f677
--- /dev/null
+++ b/sys/platform/vkernel/platform/console.c
@@ -0,0 +1,241 @@
+/*
+ * Copyright (c) 2006 The DragonFly Project.  All rights reserved.
+ * 
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@backplane.com>
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific, prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * 
+ * $DragonFly: src/sys/platform/vkernel/platform/console.c,v 1.1 2007/01/05 22:18:20 dillon Exp $
+ */
+
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/conf.h>
+#include <sys/cons.h>
+#include <sys/tty.h>
+#include <sys/termios.h>
+#include <sys/fcntl.h>
+#include <unistd.h>
+
+/*
+ * Global console locking functions
+ */
+void
+cons_lock(void)
+{
+}
+
+void
+cons_unlock(void)
+{
+}
+
+/************************************************************************
+ *			    CONSOLE DEVICE				*
+ ************************************************************************
+ *
+ */
+
+#define CDEV_MAJOR	183
+
+static int vcons_tty_param(struct tty *tp, struct termios *tio);
+static void vcons_tty_start(struct tty *tp);
+
+static d_open_t         vcons_open;
+static d_close_t        vcons_close;
+static d_ioctl_t        vcons_ioctl;
+
+static struct dev_ops vcons_ops = {
+	{ "vcons", CDEV_MAJOR, D_TTY },
+	.d_open =	vcons_open,
+	.d_close =	vcons_close,
+	.d_read =	ttyread,
+	.d_write =	ttywrite,
+	.d_ioctl =	vcons_ioctl,
+	.d_poll =	ttypoll,
+};
+
+static int
+vcons_open(struct dev_open_args *ap)
+{
+	cdev_t dev = ap->a_head.a_dev;
+	struct tty *tp;
+	int error;
+
+	if (minor(dev) != 0)
+		return(ENXIO);
+
+	tp = dev->si_tty = ttymalloc(dev->si_tty);
+	tp->t_oproc = vcons_tty_start;
+	tp->t_param = vcons_tty_param;
+	tp->t_stop = nottystop;
+	tp->t_dev = dev;
+
+	if (tp->t_state & TS_ISOPEN)
+		return (EBUSY);
+
+	tp->t_state |= TS_CARR_ON;
+	ttychars(tp);
+	tp->t_iflag = TTYDEF_IFLAG;
+	tp->t_oflag = TTYDEF_OFLAG;
+	tp->t_cflag = TTYDEF_CFLAG;
+	tp->t_lflag = TTYDEF_LFLAG;
+	tp->t_ispeed = TTYDEF_SPEED;
+	tp->t_ospeed = TTYDEF_SPEED;
+	ttsetwater(tp);
+
+	error = (*linesw[tp->t_line].l_open)(dev, tp);
+	return(error);
+}
+
+static int
+vcons_close(struct dev_close_args *ap)
+{
+	cdev_t dev = ap->a_head.a_dev;
+	struct tty *tp;
+
+	if (minor(dev) != 0)
+		return(ENXIO);
+	tp = dev->si_tty;
+	if (tp->t_state & TS_ISOPEN) {
+		(*linesw[tp->t_line].l_close)(tp, ap->a_fflag);
+		ttyclose(tp);
+	}
+	return(0);
+}
+
+static int
+vcons_ioctl(struct dev_ioctl_args *ap)
+{
+	cdev_t dev = ap->a_head.a_dev;
+	struct tty *tp;
+	int error;
+
+	if (minor(dev) != 0)
+		return(ENXIO);
+	tp = dev->si_tty;
+	error = (*linesw[tp->t_line].l_ioctl)(tp, ap->a_cmd, ap->a_data,
+					      ap->a_fflag, ap->a_cred);
+	if (error != ENOIOCTL)
+		return (error);
+	error = ttioctl(tp, ap->a_cmd, ap->a_data, ap->a_fflag);
+	if (error != ENOIOCTL)
+		return (error);
+	return (ENOTTY);
+}
+
+static int
+vcons_tty_param(struct tty *tp, struct termios *tio)
+{
+	tp->t_ispeed = tio->c_ispeed;
+	tp->t_ospeed = tio->c_ospeed;
+	tp->t_cflag = tio->c_cflag;
+	return(0);
+}
+
+static void
+vcons_tty_start(struct tty *tp)
+{
+	int n;
+	char buf[64];
+
+	if (tp->t_state & (TS_TIMEOUT | TS_TTSTOP)) {
+		ttwwakeup(tp);
+		return;
+	}
+	tp->t_state |= TS_BUSY;
+	while ((n = q_to_b(&tp->t_outq, buf, sizeof(buf))) > 0)
+		write(1, buf, n);
+	tp->t_state &= ~TS_BUSY;
+	ttwwakeup(tp);
+}
+
+/************************************************************************
+ *			KERNEL CONSOLE INTERFACE			*
+ ************************************************************************
+ *
+ * Kernel direct-call interface console driver
+ */
+static cn_probe_t	vconsprobe;
+static cn_init_t	vconsinit;
+static cn_term_t	vconsterm;
+static cn_getc_t	vconsgetc;
+static cn_checkc_t	vconscheckc;
+static cn_putc_t	vconsputc;
+
+CONS_DRIVER(vcons, vconsprobe, vconsinit, vconsterm, vconsgetc, 
+		vconscheckc, vconsputc, NULL);
+
+static void
+vconsprobe(struct consdev *cp)
+{
+    cp->cn_pri = CN_NORMAL;
+    cp->cn_dev = make_dev(&vcons_ops, 255,
+			  UID_ROOT, GID_WHEEL, 0600, "vconsolectl");
+}
+
+static void
+vconsinit(struct consdev *cp)
+{
+}
+
+static void
+vconsterm(struct consdev *vp)
+{
+}
+
+static int
+vconsgetc(cdev_t dev)
+{
+	unsigned char c;
+
+	if (read(0, &c, 1) == 1)
+		return((int)c);
+	return(-1);
+}
+
+static int
+vconscheckc(cdev_t dev)
+{
+	unsigned char c;
+
+	if (__pread(0, &c, 1, O_FNONBLOCKING, -1LL) == 1)
+		return((int)c);
+	return(-1);
+}
+
+static void
+vconsputc(cdev_t dev, int c)
+{
+	char cc = c;
+
+	write(1, &cc, 1);
+}
+
+
diff --git a/sys/platform/vkernel/platform/copyio.c b/sys/platform/vkernel/platform/copyio.c
index 1d2fe55e44..ec46e380ab 100644
--- a/sys/platform/vkernel/platform/copyio.c
+++ b/sys/platform/vkernel/platform/copyio.c
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/platform/vkernel/platform/copyio.c,v 1.2 2007/01/02 04:24:26 dillon Exp $
+ * $DragonFly: src/sys/platform/vkernel/platform/copyio.c,v 1.3 2007/01/05 22:18:20 dillon Exp $
  */
 
 #include <sys/types.h>
@@ -50,6 +50,26 @@ ovbcopy(const void *src, void *dst, size_t len)
 	bcopy(src, dst, len);
 }
 
+void
+bcopyi(const void *src, void *dst, size_t len)
+{
+	bcopy(src, dst, len);
+}
+
+int
+copystr(const void *kfaddr, void *kdaddr, size_t len, size_t *lencopied)
+{
+	size_t i;
+
+	for (i = 0; i < len; ++i) {
+		if ((((char *)kdaddr)[i] = ((const char *)kfaddr)[i]) == 0) {
+			*lencopied = i + 1;
+			return(0);
+		}
+	}
+	return (ENAMETOOLONG);
+}
+
 /*
  * Copies a NUL-terminated string from user space to kernel space.
  * The number of bytes copied, including the terminator, is returned in
diff --git a/sys/platform/vkernel/platform/init.c b/sys/platform/vkernel/platform/init.c
index 43303b9a26..5f01c8610f 100644
--- a/sys/platform/vkernel/platform/init.c
+++ b/sys/platform/vkernel/platform/init.c
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/platform/vkernel/platform/init.c,v 1.5 2007/01/02 04:24:26 dillon Exp $
+ * $DragonFly: src/sys/platform/vkernel/platform/init.c,v 1.6 2007/01/05 22:18:20 dillon Exp $
  */
 
 #include <sys/types.h>
@@ -44,6 +44,7 @@
 #include <sys/vkernel.h>
 #include <sys/tls.h>
 #include <sys/proc.h>
+#include <sys/msgbuf.h>
 #include <vm/vm_page.h>
 
 #include <machine/globaldata.h>
@@ -71,8 +72,14 @@ vm_offset_t virtual_start;
 vm_offset_t virtual_end;
 vm_offset_t kernel_vm_end;
 vm_offset_t crashdumpmap;
+vm_offset_t clean_sva;
+vm_offset_t clean_eva;
+struct msgbuf *msgbufp;
+caddr_t ptvmmap;
 vpte_t	*KernelPTD;
 vpte_t	*KernelPTA;
+u_int cpu_feature;	/* XXX */
+u_int tsc_present;	/* XXX */
 
 struct privatespace *CPU_prvspace;
 
@@ -321,6 +328,24 @@ init_kern_memory(void)
 	crashdumpmap = virtual_start;
 	virtual_start += MAXDUMPPGS * PAGE_SIZE;
 
+	/*
+	 * msgbufp maps the system message buffer
+	 */
+	assert((MSGBUF_SIZE & PAGE_MASK) == 0);
+	msgbufp = (void *)virtual_start;
+	for (i = 0; i < (MSGBUF_SIZE >> PAGE_SHIFT); ++i) {
+		pmap_kenter_quick(virtual_start, phys_avail[0]);
+		virtual_start += PAGE_SIZE;
+		phys_avail[0] += PAGE_SIZE;
+	}
+	msgbufinit(msgbufp, MSGBUF_SIZE);
+
+	/*
+	 * used by kern_memio for /dev/mem access
+	 */
+	ptvmmap = (caddr_t)virtual_start;
+	virtual_start += PAGE_SIZE;
+
 	/*
 	 * Bootstrap the kernel_pmap
 	 */
@@ -443,3 +468,11 @@ cpu_reset(void)
 	kprintf("cpu reset\n");
 	exit(0);
 }
+
+void
+cpu_halt(void)
+{
+	kprintf("cpu halt\n");
+	for (;;)
+		__asm__ __volatile("hlt");
+}
diff --git a/sys/platform/vkernel/platform/ipl_funcs.c b/sys/platform/vkernel/platform/ipl_funcs.c
new file mode 100644
index 0000000000..cb7ed7d538
--- /dev/null
+++ b/sys/platform/vkernel/platform/ipl_funcs.c
@@ -0,0 +1,78 @@
+/*-
+ * Copyright (c) 1997 Bruce Evans.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: src/sys/i386/isa/ipl_funcs.c,v 1.32.2.5 2002/12/17 18:04:02 sam Exp $
+ * $DragonFly: src/sys/platform/vkernel/platform/ipl_funcs.c,v 1.1 2007/01/05 22:18:20 dillon Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
+#include <sys/proc.h>
+#include <sys/interrupt.h>
+#include <machine/globaldata.h>
+
+/*
+ * Bits in the ipending bitmap variable must be set atomically because
+ * ipending may be manipulated by interrupts or other cpu's without holding 
+ * any locks.
+ *
+ * Note: setbits uses a locked or, making simple cases MP safe.
+ */
+#define DO_SETBITS(name, var, bits) 					\
+void									\
+name(void)								\
+{									\
+	struct mdglobaldata *gd = mdcpu;				\
+	atomic_set_int_nonlocked(var, bits);				\
+	atomic_set_int_nonlocked(&gd->mi.gd_reqflags, RQF_INTPEND);	\
+}									\
+
+DO_SETBITS(setdelayed,   &gd->gd_spending, loadandclear(&gd->gd_sdelayed))
+
+DO_SETBITS(setsoftcamnet,&gd->gd_spending, SWI_CAMNET_PENDING)
+DO_SETBITS(setsoftcambio,&gd->gd_spending, SWI_CAMBIO_PENDING)
+DO_SETBITS(setsoftclock, &gd->gd_spending, SWI_CLOCK_PENDING)
+DO_SETBITS(setsoftnet,   &gd->gd_spending, SWI_NET_PENDING)
+DO_SETBITS(setsofttty,   &gd->gd_spending, SWI_TTY_PENDING)
+DO_SETBITS(setsoftvm,	 &gd->gd_spending, SWI_VM_PENDING)
+DO_SETBITS(setsofttq,	 &gd->gd_spending, SWI_TQ_PENDING)
+DO_SETBITS(setsoftcrypto,&gd->gd_spending, SWI_CRYPTO_PENDING)
+
+DO_SETBITS(schedsoftcamnet, &gd->gd_sdelayed, SWI_CAMNET_PENDING)
+DO_SETBITS(schedsoftcambio, &gd->gd_sdelayed, SWI_CAMBIO_PENDING)
+DO_SETBITS(schedsoftnet, &gd->gd_sdelayed, SWI_NET_PENDING)
+DO_SETBITS(schedsofttty, &gd->gd_sdelayed, SWI_TTY_PENDING)
+DO_SETBITS(schedsoftvm,	 &gd->gd_sdelayed, SWI_VM_PENDING)
+DO_SETBITS(schedsofttq,	 &gd->gd_sdelayed, SWI_TQ_PENDING)
+/* YYY schedsoft what? */
+
+unsigned
+softclockpending(void)
+{
+	return (mdcpu->gd_spending & SWI_CLOCK_PENDING);
+}
+
diff --git a/sys/platform/vkernel/platform/machintr.c b/sys/platform/vkernel/platform/machintr.c
index 84fe6aff25..d8b877ed45 100644
--- a/sys/platform/vkernel/platform/machintr.c
+++ b/sys/platform/vkernel/platform/machintr.c
@@ -31,14 +31,20 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/platform/vkernel/platform/machintr.c,v 1.2 2006/12/26 20:46:15 dillon Exp $
+ * $DragonFly: src/sys/platform/vkernel/platform/machintr.c,v 1.3 2007/01/05 22:18:20 dillon Exp $
  */
 
 #include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
 #include <sys/machintr.h>
 #include <sys/errno.h>
 #include <stdio.h>
 
+/*
+ * Interrupt Subsystem ABI
+ */
+
 static void dummy_intrdis(int);
 static void dummy_intren(int);
 static int dummy_vectorctl(int, int, int);
@@ -89,3 +95,10 @@ dummy_finalize(void)
 {
 }
 
+/*
+ * Process pending interrupts
+ */
+void
+splz(void)
+{
+}
diff --git a/sys/platform/vkernel/platform/pmap.c b/sys/platform/vkernel/platform/pmap.c
index 39c0579a09..919be81c09 100644
--- a/sys/platform/vkernel/platform/pmap.c
+++ b/sys/platform/vkernel/platform/pmap.c
@@ -38,7 +38,7 @@
  * 
  * from:   @(#)pmap.c      7.7 (Berkeley)  5/12/91
  * $FreeBSD: src/sys/i386/i386/pmap.c,v 1.250.2.18 2002/03/06 22:48:53 silby Exp $
- * $DragonFly: src/sys/platform/vkernel/platform/pmap.c,v 1.1 2007/01/02 04:24:26 dillon Exp $
+ * $DragonFly: src/sys/platform/vkernel/platform/pmap.c,v 1.2 2007/01/05 22:18:20 dillon Exp $
  */
 
 #include <sys/types.h>
@@ -409,7 +409,7 @@ pmap_pte(struct pmap *pmap, vm_offset_t va)
 {
 	vpte_t *ptep;
 
-	ptep = pmap->pm_pdir[va >> PAGE_SHIFT];
+	ptep = &pmap->pm_pdir[va >> PAGE_SHIFT];
 	if (*ptep & VPTE_PS)
 		return(ptep);
 	if (*ptep)
@@ -450,6 +450,46 @@ pmap_kenter(vm_offset_t va, vm_paddr_t pa)
 	}
 }
 
+void
+pmap_kenter_sync(vm_offset_t va)
+{
+	pmap_inval_info info;
+
+	pmap_inval_init(&info);
+	pmap_inval_add(&info, &kernel_pmap, va);
+	pmap_inval_flush(&info);
+}
+
+void
+pmap_kenter_sync_quick(vm_offset_t va)
+{
+	madvise((void *)va, PAGE_SIZE, MADV_INVAL);
+}
+
+/*
+ * Map a contiguous range of physical memory to a KVM
+ */
+vm_offset_t
+pmap_map(vm_offset_t virt, vm_paddr_t start, vm_paddr_t end, int prot)
+{
+	while (start < end) {
+		pmap_kenter(virt, start);
+		virt += PAGE_SIZE;
+		start += PAGE_SIZE;
+	}
+	return (virt);
+}
+
+vpte_t *
+pmap_kpte(vm_offset_t va)
+{
+	vpte_t *ptep;
+
+	KKASSERT(va >= KvaStart && va < KvaEnd);
+	ptep = KernelPTA + ((va - KvaStart) >> PAGE_SHIFT);
+	return(ptep);
+}
+
 /*
  * Enter a mapping into kernel_pmap without any SMP interactions.
  * 
@@ -1148,7 +1188,7 @@ pmap_remove_pte(struct pmap *pmap, vpte_t *ptq, vm_offset_t va,
 	 * the SMP case.
 	 */
 	if (oldpte & VPTE_G)
-		cpu_invlpg((void *)va);
+		madvise((void *)va, PAGE_SIZE, MADV_INVAL);
 	pmap->pm_stats.resident_count -= 1;
 	if (oldpte & PG_MANAGED) {
 		m = PHYS_TO_VM_PAGE(oldpte);
@@ -1711,6 +1751,25 @@ retry:
 	return mpte;
 }
 
+vm_paddr_t
+pmap_extract(pmap_t pmap, vm_offset_t va)
+{
+	vm_paddr_t rtval;
+	vpte_t pte;
+
+	if (pmap && (pte = pmap->pm_pdir[va >> SEG_SHIFT]) != 0) {
+		if (pte & VPTE_PS) {
+			rtval = pte & ~((vpte_t)(1 << SEG_SHIFT) - 1);
+			rtval |= va & SEG_MASK;
+		} else {
+			pte = *(get_ptbase(pmap) + (va >> PAGE_SHIFT));
+			rtval = (pte & VPTE_FRAME) | (va & PAGE_MASK);
+		}
+		return(rtval);
+	}
+	return(0);
+}
+
 #define MAX_INIT_PT (96)
 
 /*
@@ -2108,7 +2167,7 @@ pmap_zero_page(vm_paddr_t phys)
 		panic("pmap_zero_page: CMAP3 busy");
 	*(int *)gd->gd_CMAP3 =
 		    VPTE_V | VPTE_W | (phys & VPTE_FRAME) | VPTE_A | VPTE_M;
-	cpu_invlpg(gd->gd_CADDR3);
+	madvise(gd->gd_CADDR3, PAGE_SIZE, MADV_INVAL);
 
 	bzero(gd->gd_CADDR3, PAGE_SIZE);
 	*(int *) gd->gd_CMAP3 = 0;
@@ -2131,7 +2190,7 @@ pmap_page_assertzero(vm_paddr_t phys)
 		panic("pmap_zero_page: CMAP3 busy");
 	*(int *)gd->gd_CMAP3 =
 		    VPTE_V | VPTE_R | VPTE_W | (phys & VPTE_FRAME) | VPTE_A | VPTE_M;
-	cpu_invlpg(gd->gd_CADDR3);
+	madvise(gd->gd_CADDR3, PAGE_SIZE, MADV_INVAL);
 	for (i = 0; i < PAGE_SIZE; i += 4) {
 	    if (*(int *)((char *)gd->gd_CADDR3 + i) != 0) {
 		panic("pmap_page_assertzero() @ %p not zero!\n",
@@ -2159,7 +2218,7 @@ pmap_zero_page_area(vm_paddr_t phys, int off, int size)
 	if (*(int *) gd->gd_CMAP3)
 		panic("pmap_zero_page: CMAP3 busy");
 	*(int *) gd->gd_CMAP3 = VPTE_V | VPTE_R | VPTE_W | (phys & VPTE_FRAME) | VPTE_A | VPTE_M;
-	cpu_invlpg(gd->gd_CADDR3);
+	madvise(gd->gd_CADDR3, PAGE_SIZE, MADV_INVAL);
 
 	bzero((char *)gd->gd_CADDR3 + off, size);
 	*(int *) gd->gd_CMAP3 = 0;
@@ -2187,8 +2246,8 @@ pmap_copy_page(vm_paddr_t src, vm_paddr_t dst)
 	*(int *) gd->gd_CMAP1 = VPTE_V | (src & PG_FRAME) | PG_A;
 	*(int *) gd->gd_CMAP2 = VPTE_V | VPTE_R | VPTE_W | (dst & VPTE_FRAME) | VPTE_A | VPTE_M;
 
-	cpu_invlpg(gd->gd_CADDR1);
-	cpu_invlpg(gd->gd_CADDR2);
+	madvise(gd->gd_CADDR1, PAGE_SIZE, MADV_INVAL);
+	madvise(gd->gd_CADDR2, PAGE_SIZE, MADV_INVAL);
 
 	bcopy(gd->gd_CADDR1, gd->gd_CADDR2, PAGE_SIZE);
 
@@ -2218,8 +2277,8 @@ pmap_copy_page_frag(vm_paddr_t src, vm_paddr_t dst, size_t bytes)
 	*(int *) gd->gd_CMAP1 = VPTE_V | (src & VPTE_FRAME) | VPTE_A;
 	*(int *) gd->gd_CMAP2 = VPTE_V | VPTE_R | VPTE_W | (dst & VPTE_FRAME) | VPTE_A | VPTE_M;
 
-	cpu_invlpg(gd->gd_CADDR1);
-	cpu_invlpg(gd->gd_CADDR2);
+	madvise(gd->gd_CADDR1, PAGE_SIZE, MADV_INVAL);
+	madvise(gd->gd_CADDR2, PAGE_SIZE, MADV_INVAL);
 
 	bcopy((char *)gd->gd_CADDR1 + (src & PAGE_MASK),
 	      (char *)gd->gd_CADDR2 + (dst & PAGE_MASK),
@@ -2709,8 +2768,11 @@ pmap_activate(struct proc *p)
 #if defined(SWTCH_OPTIM_STATS)
 	tlb_flush_count++;
 #endif
+	panic("pmap_activate");	/* XXX store vmspace id in context */
+#if 0
 	p->p_thread->td_pcb->pcb_cr3 = vtophys(pmap->pm_pdir);
 	load_cr3(p->p_thread->td_pcb->pcb_cr3);
+#endif
 }
 
 void
diff --git a/sys/platform/vkernel/platform/pmap_inval.c b/sys/platform/vkernel/platform/pmap_inval.c
index c59011ec03..b4a5594acc 100644
--- a/sys/platform/vkernel/platform/pmap_inval.c
+++ b/sys/platform/vkernel/platform/pmap_inval.c
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/platform/vkernel/platform/pmap_inval.c,v 1.1 2007/01/02 04:24:26 dillon Exp $
+ * $DragonFly: src/sys/platform/vkernel/platform/pmap_inval.c,v 1.2 2007/01/05 22:18:20 dillon Exp $
  */
 
 /*
@@ -52,6 +52,8 @@
 #include <sys/vmmeter.h>
 #include <sys/thread2.h>
 
+#include <sys/mman.h>
+
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_object.h>
@@ -64,22 +66,19 @@
 #include <machine/pmap.h>
 #include <machine/pmap_inval.h>
 
-#ifdef SMP
-
 static void
 _cpu_invltlb(void *dummy)
 {
-    cpu_invltlb();
+    /* XXX madvise over entire address space is really expensive */
+    madvise((void *)KvaStart, KvaSize, MADV_INVAL);	
 }
 
 static void
 _cpu_invl1pg(void *data)
 {
-    cpu_invlpg(data);
+    madvise(data, PAGE_SIZE, MADV_INVAL);
 }
 
-#endif
-
 /*
  * Initialize for add or flush
  */
@@ -142,9 +141,9 @@ pmap_inval_flush(pmap_inval_info_t info)
 	lwkt_cpusync_finish(&info->pir_cpusync);
 #else
     if (info->pir_flags & PIRF_INVLTLB)
-	cpu_invltlb();
+	_cpu_invltlb(NULL);
     else if (info->pir_flags & PIRF_INVL1PG)
-	cpu_invlpg(info->pir_cpusync.cs_data);
+	_cpu_invl1pg(info->pir_cpusync.cs_data);
 #endif
     info->pir_flags = 0;
 }
diff --git a/sys/platform/vkernel/include/md_var.h b/sys/platform/vkernel/platform/sysarch.c
similarity index 78%
copy from sys/platform/vkernel/include/md_var.h
copy to sys/platform/vkernel/platform/sysarch.c
index 628dac29f2..0ddcaf9d54 100644
--- a/sys/platform/vkernel/include/md_var.h
+++ b/sys/platform/vkernel/platform/sysarch.c
@@ -31,29 +31,30 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/platform/vkernel/include/md_var.h,v 1.2 2007/01/02 04:24:26 dillon Exp $
+ * $DragonFly: src/sys/platform/vkernel/platform/sysarch.c,v 1.1 2007/01/05 22:18:20 dillon Exp $
  */
-
-#ifndef _MACHINE_MD_VAR_H_
-#define _MACHINE_MD_VAR_H_
-
-#ifndef _SYS_TYPES_H_
 #include <sys/types.h>
-#endif
-#ifndef _SYS_VKERNEL_H_
-#include <sys/vkernel.h>
-#endif
-
-extern	char	sigcode[];
-extern	int	szsigcode;
-extern	vpte_t	*KernelPTA;
-extern	vpte_t	*KernelPTD;
-extern	vm_offset_t crashdumpmap;
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/sysproto.h>
+#include <sys/memrange.h>
+#include <sys/errno.h>
 
-struct mdglobaldata;
+int
+sys_sysarch(struct sysarch_args *uap)
+{
+	return (EOPNOTSUPP);
+}
 
-void cpu_gdinit (struct mdglobaldata *gd, int cpu);
-void cpu_idle_restore (void);
+int
+cpu_set_iopl(void)
+{
+	return (EOPNOTSUPP);
+}
 
-#endif
+int
+cpu_clr_iopl(void)
+{
+	return (EOPNOTSUPP);
+}
 
diff --git a/sys/platform/vkernel/platform/machintr.c b/sys/platform/vkernel/platform/systimer.c
similarity index 66%
copy from sys/platform/vkernel/platform/machintr.c
copy to sys/platform/vkernel/platform/systimer.c
index 84fe6aff25..3d649c812c 100644
--- a/sys/platform/vkernel/platform/machintr.c
+++ b/sys/platform/vkernel/platform/systimer.c
@@ -31,61 +31,66 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/platform/vkernel/platform/machintr.c,v 1.2 2006/12/26 20:46:15 dillon Exp $
+ * $DragonFly: src/sys/platform/vkernel/platform/systimer.c,v 1.1 2007/01/05 22:18:20 dillon Exp $
  */
 
 #include <sys/types.h>
-#include <sys/machintr.h>
-#include <sys/errno.h>
-#include <stdio.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/systimer.h>
+#include <sys/sysctl.h>
+#include <machine/cpu.h>
 
-static void dummy_intrdis(int);
-static void dummy_intren(int);
-static int dummy_vectorctl(int, int, int);
-static int dummy_setvar(int, const void *);
-static int dummy_getvar(int, void *);
-static void dummy_finalize(void);
+#include <unistd.h>
 
-struct machintr_abi MachIntrABI = {
-	MACHINTR_GENERIC,
-	dummy_intrdis,
-	dummy_intren,
-	dummy_vectorctl,
-	dummy_setvar,
-	dummy_getvar,
-	dummy_finalize
-};
+int disable_rtc_set;
+SYSCTL_INT(_machdep, CPU_DISRTCSET, disable_rtc_set,
+	   CTLFLAG_RW, &disable_rtc_set, 0, "");
 
-static void
-dummy_intrdis(int intr)
+int adjkerntz;
+int wall_cmos_clock = 1;
+
+void
+cpu_initclocks(void)
 {
+	panic("cpu_initclocks");
 }
 
-static void
-dummy_intren(int intr)
+void
+cputimer_intr_config(struct cputimer *timer)
 {
+	panic("cputimer_intr_config");
 }
 
-static int
-dummy_vectorctl(int op, int intr, int flags)
+void
+cputimer_intr_reload(sysclock_t reload)
 {
-	return (EOPNOTSUPP);
+	panic("cputimer_intr_reload");
 }
 
-static int
-dummy_setvar(int varid, const void *buf)
+/*
+ * Initialize the time of day register, based on the time base which is, e.g.
+ * from a filesystem.
+ */
+void
+inittodr(time_t base)
 {
-	return (ENOENT);
+	panic("inittodr");
 }
 
-static int
-dummy_getvar(int varid, void *buf)
+/*
+ * Write system time back to the RTC
+ */
+void
+resettodr(void)
 {
-	return (ENOENT);
+	panic("resettodr");
 }
 
-static void
-dummy_finalize(void)
+void
+DELAY(int usec)
 {
+	usleep(usec);
 }
 
+
-- 
2.41.0