Continue fleshing out the VKERNEL.
authorMatthew Dillon <dillon@dragonflybsd.org>
Fri, 5 Jan 2007 22:18:20 +0000 (22:18 +0000)
committerMatthew Dillon <dillon@dragonflybsd.org>
Fri, 5 Jan 2007 22:18:20 +0000 (22:18 +0000)
26 files changed:
sys/platform/vkernel/conf/files
sys/platform/vkernel/i386/autoconf.c
sys/platform/vkernel/i386/cpu_regs.c [new file with mode: 0644]
sys/platform/vkernel/i386/db_interface.c [new file with mode: 0644]
sys/platform/vkernel/i386/db_trace.c [new file with mode: 0644]
sys/platform/vkernel/i386/global.s
sys/platform/vkernel/i386/locore.s
sys/platform/vkernel/i386/npx.c
sys/platform/vkernel/i386/swtch.s
sys/platform/vkernel/i386/tls.c [new file with mode: 0644]
sys/platform/vkernel/i386/trap.c [new file with mode: 0644]
sys/platform/vkernel/i386/userldt.c [copied from sys/platform/vkernel/include/md_var.h with 78% similarity]
sys/platform/vkernel/i386/vm_machdep.c [new file with mode: 0644]
sys/platform/vkernel/include/globaldata.h
sys/platform/vkernel/include/md_var.h
sys/platform/vkernel/include/pcb_ext.h [new file with mode: 0644]
sys/platform/vkernel/platform/busdma_machdep.c [new file with mode: 0644]
sys/platform/vkernel/platform/console.c [new file with mode: 0644]
sys/platform/vkernel/platform/copyio.c
sys/platform/vkernel/platform/init.c
sys/platform/vkernel/platform/ipl_funcs.c [new file with mode: 0644]
sys/platform/vkernel/platform/machintr.c
sys/platform/vkernel/platform/pmap.c
sys/platform/vkernel/platform/pmap_inval.c
sys/platform/vkernel/platform/sysarch.c [copied from sys/platform/vkernel/include/md_var.h with 78% similarity]
sys/platform/vkernel/platform/systimer.c [copied from sys/platform/vkernel/platform/machintr.c with 66% similarity]

index 1927fed..dda23b0 100644 (file)
@@ -1,7 +1,7 @@
 # This file tells config what files go into building a kernel,
 # files marked standard are always included.
 #
-# $DragonFly: src/sys/platform/vkernel/conf/files,v 1.6 2007/01/02 04:24:24 dillon Exp $
+# $DragonFly: src/sys/platform/vkernel/conf/files,v 1.7 2007/01/05 22:18:17 dillon Exp $
 #
 bf_enc.o                       optional        ipsec ipsec_esp         \
        dependency      "$S/crypto/blowfish/arch/i386/bf_enc.S $S/crypto/blowfish/arch/i386/bf_enc_586.S $S/crypto/blowfish/arch/i386/bf_enc_686.S"             \
@@ -36,6 +36,11 @@ machine/vkernel/i386/autoconf.c      standard
 cpu/i386/misc/elf_machdep.c            standard
 cpu/i386/misc/in_cksum2.s              optional        inet
 cpu/i386/misc/ktr.c                    optional        ktr
+cpu/i386/misc/db_disasm.c              optional        ddb
+#
+# DOS mbr
+kern/subr_diskmbr.c                    standard
+
 #vkernel/vkernel/pmap.c                standard
 #vkernel/vkernel/pmap_inval.c  standard
 #vkernel/vkernel/spinlock.s    standard
@@ -49,8 +54,21 @@ cpu/i386/misc/ktr.c                  optional        ktr
 machine/vkernel/i386/global.s          standard
 machine/vkernel/i386/swtch.s           standard
 machine/vkernel/i386/npx.c             mandatory       npx
+machine/vkernel/i386/db_interface.c    standard
+machine/vkernel/i386/db_trace.c                standard
+machine/vkernel/i386/vm_machdep.c      standard
+machine/vkernel/i386/cpu_regs.c                standard
+machine/vkernel/i386/userldt.c         standard
+machine/vkernel/i386/tls.c             standard
+machine/vkernel/i386/trap.c            standard
 machine/vkernel/platform/init.c                standard
 machine/vkernel/platform/globaldata.c  standard
 machine/vkernel/platform/machintr.c    standard
 machine/vkernel/platform/copyio.c      standard
 machine/vkernel/platform/pmap.c                standard
+machine/vkernel/platform/pmap_inval.c  standard
+machine/vkernel/platform/busdma_machdep.c standard
+machine/vkernel/platform/sysarch.c     standard
+machine/vkernel/platform/systimer.c    standard
+machine/vkernel/platform/console.c     standard
+machine/vkernel/platform/ipl_funcs.c   standard
index c2c4c94..4afec61 100644 (file)
@@ -35,7 +35,7 @@
  *
  *     from: @(#)autoconf.c    7.1 (Berkeley) 5/9/91
  * $FreeBSD: src/sys/i386/i386/autoconf.c,v 1.146.2.2 2001/06/07 06:05:58 dd Exp $
- * $DragonFly: src/sys/platform/vkernel/i386/autoconf.c,v 1.5 2006/12/23 00:27:03 swildner Exp $
+ * $DragonFly: src/sys/platform/vkernel/i386/autoconf.c,v 1.6 2007/01/05 22:18:18 dillon Exp $
  */
 
 /*
@@ -60,6 +60,7 @@
 #include <sys/systm.h>
 #include <sys/bootmaj.h>
 #include <sys/bus.h>
+#include <sys/buf.h>
 #include <sys/conf.h>
 #include <sys/disklabel.h>
 #include <sys/diskslice.h>
 #include <sys/device.h>
 #include <sys/machintr.h>
 
+#include <vm/pmap.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_pager.h>
+
 #if 0
 #include <machine/pcb.h>
 #include <machine/pcb_ext.h>
 device_t isa_bus_device = 0;
 #endif
 
-static void    configure_first (void *);
-static void    configure (void *);
-static void    configure_final (void *);
+static void cpu_startup (void *);
+static void configure_first (void *);
+static void configure (void *);
+static void configure_final (void *);
 
 #if defined(FFS) && defined(FFS_ROOT)
 static void    setroot (void);
@@ -99,6 +106,7 @@ static void  pxe_setup_nfsdiskless(void);
 #endif
 #endif
 
+SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL);
 SYSINIT(configure1, SI_SUB_CONFIGURE, SI_ORDER_FIRST, configure_first, NULL);
 /* SI_ORDER_SECOND is hookable */
 SYSINIT(configure2, SI_SUB_CONFIGURE, SI_ORDER_THIRD, configure, NULL);
@@ -108,6 +116,73 @@ SYSINIT(configure3, SI_SUB_CONFIGURE, SI_ORDER_ANY, configure_final, NULL);
 cdev_t rootdev = NOCDEV;
 cdev_t dumpdev = NOCDEV;
 
+/*
+ * 
+ */
+static void
+cpu_startup(void *dummy)
+{
+       vm_offset_t buffer_sva;
+       vm_offset_t buffer_eva;
+       vm_offset_t pager_sva;
+       vm_offset_t pager_eva;
+       vm_offset_t minaddr;
+       vm_offset_t maxaddr;
+
+       kprintf("%s", version);
+       kprintf("real memory = %llu (%lluK bytes)\n",
+               ptoa(Maxmem), ptoa(Maxmem) / 1024);
+
+       if (nbuf == 0) {
+               int factor = 4 * BKVASIZE / 1024;
+               int kbytes = physmem * (PAGE_SIZE / 1024);
+
+               nbuf = 50;
+               if (kbytes > 4096)
+                       nbuf += min((kbytes - 4096) / factor, 65536 / factor);
+               if (kbytes > 65536)
+                       nbuf += (kbytes - 65536) * 2 / (factor * 5);
+               if (maxbcache && nbuf > maxbcache / BKVASIZE)
+                       nbuf = maxbcache / BKVASIZE;
+       }
+       if (nbuf > (virtual_end - virtual_start) / (BKVASIZE * 2)) {
+               nbuf = (virtual_end - virtual_start) / (BKVASIZE * 2);
+               kprintf("Warning: nbufs capped at %d\n", nbuf);
+       }
+
+       nswbuf = max(min(nbuf/4, 256), 16);
+#ifdef NSWBUF_MIN
+       if (nswbuf < NSWBUF_MIN)
+               nswbuf = NSWBUF_MIN;
+#endif
+#ifdef DIRECTIO
+        ffs_rawread_setup();
+#endif
+       kmem_suballoc(&kernel_map, &clean_map, &clean_sva, &clean_eva,
+                     (nbuf*BKVASIZE) + (nswbuf*MAXPHYS) + pager_map_size);
+       kmem_suballoc(&clean_map, &buffer_map, &buffer_sva, &buffer_eva,
+                     (nbuf*BKVASIZE));
+       buffer_map.system_map = 1;
+       kmem_suballoc(&clean_map, &pager_map, &pager_sva, &pager_eva,
+                     (nswbuf*MAXPHYS) + pager_map_size);
+       pager_map.system_map = 1;
+       kmem_suballoc(&kernel_map, &exec_map, &minaddr, &maxaddr,
+                     (16*(ARG_MAX+(PAGE_SIZE*3))));
+#if defined(USERCONFIG)
+        userconfig();
+       cninit();               /* the preferred console may have changed */
+#endif
+       kprintf("avail memory = %u (%uK bytes)\n", ptoa(vmstats.v_free_count),
+               ptoa(vmstats.v_free_count) / 1024);
+       bufinit();
+       vm_pager_bufferinit();
+#ifdef SMP
+       mp_start();
+       mp_announce();
+#endif
+       cpu_setregs();
+}
+
 /*
  * Determine i/o configuration for a machine.
  */
diff --git a/sys/platform/vkernel/i386/cpu_regs.c b/sys/platform/vkernel/i386/cpu_regs.c
new file mode 100644 (file)
index 0000000..73bfa47
--- /dev/null
@@ -0,0 +1,1253 @@
+/*-
+ * Copyright (c) 1992 Terrence R. Lambert.
+ * Copyright (C) 1994, David Greenman
+ * Copyright (c) 1982, 1987, 1990, 1993
+ *     The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *     This product includes software developed by the University of
+ *     California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *     from: @(#)machdep.c     7.4 (Berkeley) 6/3/91
+ * $FreeBSD: src/sys/i386/i386/machdep.c,v 1.385.2.30 2003/05/31 08:48:05 alc Exp $
+ * $DragonFly: src/sys/platform/vkernel/i386/cpu_regs.c,v 1.1 2007/01/05 22:18:18 dillon Exp $
+ */
+
+#include "use_ether.h"
+#include "use_npx.h"
+#include "use_isa.h"
+#include "opt_atalk.h"
+#include "opt_compat.h"
+#include "opt_ddb.h"
+#include "opt_directio.h"
+#include "opt_inet.h"
+#include "opt_ipx.h"
+#include "opt_msgbuf.h"
+#include "opt_swap.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/sysproto.h>
+#include <sys/signalvar.h>
+#include <sys/kernel.h>
+#include <sys/linker.h>
+#include <sys/malloc.h>
+#include <sys/proc.h>
+#include <sys/buf.h>
+#include <sys/reboot.h>
+#include <sys/mbuf.h>
+#include <sys/msgbuf.h>
+#include <sys/sysent.h>
+#include <sys/sysctl.h>
+#include <sys/vmmeter.h>
+#include <sys/bus.h>
+#include <sys/upcall.h>
+#include <sys/usched.h>
+#include <sys/reg.h>
+
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <sys/lock.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_map.h>
+#include <vm/vm_pager.h>
+#include <vm/vm_extern.h>
+
+#include <sys/thread2.h>
+
+#include <sys/user.h>
+#include <sys/exec.h>
+#include <sys/cons.h>
+
+#include <ddb/ddb.h>
+
+#include <machine/cpu.h>
+#include <machine/clock.h>
+#include <machine/specialreg.h>
+#include <machine/md_var.h>
+#include <machine/pcb_ext.h>           /* pcb.h included via sys/user.h */
+#include <machine/globaldata.h>                /* CPU_prvspace */
+#include <machine/smp.h>
+#ifdef PERFMON
+#include <machine/perfmon.h>
+#endif
+#include <machine/cputypes.h>
+
+#include <bus/isa/rtc.h>
+#include <machine/vm86.h>
+#include <sys/random.h>
+#include <sys/ptrace.h>
+#include <machine/sigframe.h>
+
+extern void dblfault_handler (void);
+
+#ifndef CPU_DISABLE_SSE
+static void set_fpregs_xmm (struct save87 *, struct savexmm *);
+static void fill_fpregs_xmm (struct savexmm *, struct save87 *);
+#endif /* CPU_DISABLE_SSE */
+#ifdef DIRECTIO
+extern void ffs_rawread_setup(void);
+#endif /* DIRECTIO */
+
+#ifdef SMP
+int64_t tsc_offsets[MAXCPU];
+#else
+int64_t tsc_offsets[1];
+#endif
+
+#if defined(SWTCH_OPTIM_STATS)
+extern int swtch_optim_stats;
+SYSCTL_INT(_debug, OID_AUTO, swtch_optim_stats,
+       CTLFLAG_RD, &swtch_optim_stats, 0, "");
+SYSCTL_INT(_debug, OID_AUTO, tlb_flush_count,
+       CTLFLAG_RD, &tlb_flush_count, 0, "");
+#endif
+
+int physmem = 0;
+
+static int
+sysctl_hw_physmem(SYSCTL_HANDLER_ARGS)
+{
+       int error = sysctl_handle_int(oidp, 0, ctob(physmem), req);
+       return (error);
+}
+
+SYSCTL_PROC(_hw, HW_PHYSMEM, physmem, CTLTYPE_INT|CTLFLAG_RD,
+       0, 0, sysctl_hw_physmem, "IU", "");
+
+static int
+sysctl_hw_usermem(SYSCTL_HANDLER_ARGS)
+{
+       int error = sysctl_handle_int(oidp, 0,
+               ctob(physmem - vmstats.v_wire_count), req);
+       return (error);
+}
+
+SYSCTL_PROC(_hw, HW_USERMEM, usermem, CTLTYPE_INT|CTLFLAG_RD,
+       0, 0, sysctl_hw_usermem, "IU", "");
+
+#if 0
+
+static int
+sysctl_machdep_msgbuf(SYSCTL_HANDLER_ARGS)
+{
+       int error;
+
+       /* Unwind the buffer, so that it's linear (possibly starting with
+        * some initial nulls).
+        */
+       error=sysctl_handle_opaque(oidp,msgbufp->msg_ptr+msgbufp->msg_bufr,
+               msgbufp->msg_size-msgbufp->msg_bufr,req);
+       if(error) return(error);
+       if(msgbufp->msg_bufr>0) {
+               error=sysctl_handle_opaque(oidp,msgbufp->msg_ptr,
+                       msgbufp->msg_bufr,req);
+       }
+       return(error);
+}
+
+SYSCTL_PROC(_machdep, OID_AUTO, msgbuf, CTLTYPE_STRING|CTLFLAG_RD,
+       0, 0, sysctl_machdep_msgbuf, "A","Contents of kernel message buffer");
+
+static int msgbuf_clear;
+
+static int
+sysctl_machdep_msgbuf_clear(SYSCTL_HANDLER_ARGS)
+{
+       int error;
+       error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2,
+               req);
+       if (!error && req->newptr) {
+               /* Clear the buffer and reset write pointer */
+               bzero(msgbufp->msg_ptr,msgbufp->msg_size);
+               msgbufp->msg_bufr=msgbufp->msg_bufx=0;
+               msgbuf_clear=0;
+       }
+       return (error);
+}
+
+SYSCTL_PROC(_machdep, OID_AUTO, msgbuf_clear, CTLTYPE_INT|CTLFLAG_RW,
+       &msgbuf_clear, 0, sysctl_machdep_msgbuf_clear, "I",
+       "Clear kernel message buffer");
+
+#endif
+
+/*
+ * Send an interrupt to process.
+ *
+ * Stack is set up to allow sigcode stored
+ * at top to call routine, followed by kcall
+ * to sigreturn routine below.  After sigreturn
+ * resets the signal mask, the stack, and the
+ * frame pointer, it returns to the user
+ * specified pc, psl.
+ */
+void
+sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
+{
+       struct lwp *lp = curthread->td_lwp;
+       struct proc *p = lp->lwp_proc;
+       struct trapframe *regs;
+       struct sigacts *psp = p->p_sigacts;
+       struct sigframe sf, *sfp;
+       int oonstack;
+
+       regs = lp->lwp_md.md_regs;
+       oonstack = (lp->lwp_sigstk.ss_flags & SS_ONSTACK) ? 1 : 0;
+
+       /* save user context */
+       bzero(&sf, sizeof(struct sigframe));
+       sf.sf_uc.uc_sigmask = *mask;
+       sf.sf_uc.uc_stack = lp->lwp_sigstk;
+       sf.sf_uc.uc_mcontext.mc_onstack = oonstack;
+       sf.sf_uc.uc_mcontext.mc_gs = rgs();
+       bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(struct trapframe));
+
+       /* Allocate and validate space for the signal handler context. */
+       /* XXX lwp flags */
+        if ((p->p_flag & P_ALTSTACK) != 0 && !oonstack &&
+           SIGISMEMBER(psp->ps_sigonstack, sig)) {
+               sfp = (struct sigframe *)(lp->lwp_sigstk.ss_sp +
+                   lp->lwp_sigstk.ss_size - sizeof(struct sigframe));
+               lp->lwp_sigstk.ss_flags |= SS_ONSTACK;
+       }
+       else
+               sfp = (struct sigframe *)regs->tf_esp - 1;
+
+       /* Translate the signal is appropriate */
+       if (p->p_sysent->sv_sigtbl) {
+               if (sig <= p->p_sysent->sv_sigsize)
+                       sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
+       }
+
+       /* Build the argument list for the signal handler. */
+       sf.sf_signum = sig;
+       sf.sf_ucontext = (register_t)&sfp->sf_uc;
+       if (SIGISMEMBER(psp->ps_siginfo, sig)) {
+               /* Signal handler installed with SA_SIGINFO. */
+               sf.sf_siginfo = (register_t)&sfp->sf_si;
+               sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher;
+
+               /* fill siginfo structure */
+               sf.sf_si.si_signo = sig;
+               sf.sf_si.si_code = code;
+               sf.sf_si.si_addr = (void*)regs->tf_err;
+       }
+       else {
+               /* Old FreeBSD-style arguments. */
+               sf.sf_siginfo = code;
+               sf.sf_addr = regs->tf_err;
+               sf.sf_ahu.sf_handler = catcher;
+       }
+
+#if 0
+       /*
+        * If we're a vm86 process, we want to save the segment registers.
+        * We also change eflags to be our emulated eflags, not the actual
+        * eflags.
+        */
+       if (regs->tf_eflags & PSL_VM) {
+               struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
+               struct vm86_kernel *vm86 = &lp->lwp_thread->td_pcb->pcb_ext->ext_vm86;
+
+               sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs;
+               sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs;
+               sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es;
+               sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds;
+
+               if (vm86->vm86_has_vme == 0)
+                       sf.sf_uc.uc_mcontext.mc_eflags =
+                           (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
+                           (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
+
+               /*
+                * Clear PSL_NT to inhibit T_TSSFLT faults on return from
+                * syscalls made by the signal handler.  This just avoids
+                * wasting time for our lazy fixup of such faults.  PSL_NT
+                * does nothing in vm86 mode, but vm86 programs can set it
+                * almost legitimately in probes for old cpu types.
+                */
+               tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP);
+       }
+#endif
+
+       /*
+        * Copy the sigframe out to the user's stack.
+        */
+       if (copyout(&sf, sfp, sizeof(struct sigframe)) != 0) {
+               /*
+                * Something is wrong with the stack pointer.
+                * ...Kill the process.
+                */
+               sigexit(p, SIGILL);
+       }
+
+       regs->tf_esp = (int)sfp;
+       regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
+       regs->tf_eflags &= ~PSL_T;
+       regs->tf_cs = 0;
+       regs->tf_ds = 0;
+       regs->tf_es = 0;
+       regs->tf_fs = 0;
+       regs->tf_ss = 0;
+}
+
+/*
+ * Sanitize the trapframe for a virtual kernel passing control to a custom
+ * VM context.
+ *
+ * Allow userland to set or maintain PSL_RF, the resume flag.  This flag
+ * basically controls whether the return PC should skip the first instruction
+ * (as in an explicit system call) or re-execute it (as in an exception).
+ */
+int
+cpu_sanitize_frame(struct trapframe *frame)
+{
+       frame->tf_cs = 0;
+       frame->tf_ds = 0;
+       frame->tf_es = 0;
+       frame->tf_fs = 0;
+       frame->tf_ss = 0;
+       frame->tf_eflags &= (PSL_USER | PSL_RF);
+       frame->tf_eflags |= PSL_RESERVED_DEFAULT | PSL_I;
+       return(0);
+}
+
+/*
+ * sigreturn(ucontext_t *sigcntxp)
+ *
+ * System call to cleanup state after a signal
+ * has been taken.  Reset signal mask and
+ * stack state from context left by sendsig (above).
+ * Return to previous pc and psl as specified by
+ * context left by sendsig. Check carefully to
+ * make sure that the user has not modified the
+ * state to gain improper privileges.
+ */
+#define        EFL_SECURE(ef, oef)     ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
+#define        CS_SECURE(cs)           (ISPL(cs) == SEL_UPL)
+
+int
+sys_sigreturn(struct sigreturn_args *uap)
+{
+       struct lwp *lp = curthread->td_lwp;
+       struct trapframe *regs;
+       ucontext_t *ucp;
+       int cs, eflags;
+
+       ucp = uap->sigcntxp;
+
+       if (!useracc((caddr_t)ucp, sizeof(ucontext_t), VM_PROT_READ))
+               return (EFAULT);
+
+       regs = lp->lwp_md.md_regs;
+       eflags = ucp->uc_mcontext.mc_eflags;
+
+#if 0
+       if (eflags & PSL_VM) {
+               struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
+               struct vm86_kernel *vm86;
+
+               /*
+                * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
+                * set up the vm86 area, and we can't enter vm86 mode.
+                */
+               if (lp->lwp_thread->td_pcb->pcb_ext == 0)
+                       return (EINVAL);
+               vm86 = &lp->lwp_thread->td_pcb->pcb_ext->ext_vm86;
+               if (vm86->vm86_inited == 0)
+                       return (EINVAL);
+
+               /* go back to user mode if both flags are set */
+               if ((eflags & PSL_VIP) && (eflags & PSL_VIF))
+                       trapsignal(lp->lwp_proc, SIGBUS, 0);
+
+               if (vm86->vm86_has_vme) {
+                       eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
+                           (eflags & VME_USERCHANGE) | PSL_VM;
+               } else {
+                       vm86->vm86_eflags = eflags;     /* save VIF, VIP */
+                       eflags = (tf->tf_eflags & ~VM_USERCHANGE) |                                         (eflags & VM_USERCHANGE) | PSL_VM;
+               }
+               bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe));
+               tf->tf_eflags = eflags;
+               tf->tf_vm86_ds = tf->tf_ds;
+               tf->tf_vm86_es = tf->tf_es;
+               tf->tf_vm86_fs = tf->tf_fs;
+               tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs;
+               tf->tf_ds = 0;
+               tf->tf_es = 0;
+               tf->tf_fs = 0;
+       } else 
+#endif
+       {
+               /*
+                * Don't allow users to change privileged or reserved flags.
+                */
+               /*
+                * XXX do allow users to change the privileged flag PSL_RF.
+                * The cpu sets PSL_RF in tf_eflags for faults.  Debuggers
+                * should sometimes set it there too.  tf_eflags is kept in
+                * the signal context during signal handling and there is no
+                * other place to remember it, so the PSL_RF bit may be
+                * corrupted by the signal handler without us knowing.
+                * Corruption of the PSL_RF bit at worst causes one more or
+                * one less debugger trap, so allowing it is fairly harmless.
+                */
+               if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
+                       kprintf("sigreturn: eflags = 0x%x\n", eflags);
+                       return(EINVAL);
+               }
+
+               /*
+                * Don't allow users to load a valid privileged %cs.  Let the
+                * hardware check for invalid selectors, excess privilege in
+                * other selectors, invalid %eip's and invalid %esp's.
+                */
+               cs = ucp->uc_mcontext.mc_cs;
+               if (!CS_SECURE(cs)) {
+                       kprintf("sigreturn: cs = 0x%x\n", cs);
+                       trapsignal(lp->lwp_proc, SIGBUS, T_PROTFLT);
+                       return(EINVAL);
+               }
+               bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(struct trapframe));
+       }
+
+       if (ucp->uc_mcontext.mc_onstack & 1)
+               lp->lwp_sigstk.ss_flags |= SS_ONSTACK;
+       else
+               lp->lwp_sigstk.ss_flags &= ~SS_ONSTACK;
+
+       lp->lwp_sigmask = ucp->uc_sigmask;
+       SIG_CANTMASK(lp->lwp_sigmask);
+       return(EJUSTRETURN);
+}
+
+/*
+ * Stack frame on entry to function.  %eax will contain the function vector,
+ * %ecx will contain the function data.  flags, ecx, and eax will have 
+ * already been pushed on the stack.
+ */
+struct upc_frame {
+       register_t      eax;
+       register_t      ecx;
+       register_t      edx;
+       register_t      flags;
+       register_t      oldip;
+};
+
+void
+sendupcall(struct vmupcall *vu, int morepending)
+{
+       struct lwp *lp = curthread->td_lwp;
+       struct trapframe *regs;
+       struct upcall upcall;
+       struct upc_frame upc_frame;
+       int     crit_count = 0;
+
+       /*
+        * Get the upcall data structure
+        */
+       if (copyin(lp->lwp_upcall, &upcall, sizeof(upcall)) ||
+           copyin((char *)upcall.upc_uthread + upcall.upc_critoff, &crit_count, sizeof(int))
+       ) {
+               vu->vu_pending = 0;
+               kprintf("bad upcall address\n");
+               return;
+       }
+
+       /*
+        * If the data structure is already marked pending or has a critical
+        * section count, mark the data structure as pending and return 
+        * without doing an upcall.  vu_pending is left set.
+        */
+       if (upcall.upc_pending || crit_count >= vu->vu_pending) {
+               if (upcall.upc_pending < vu->vu_pending) {
+                       upcall.upc_pending = vu->vu_pending;
+                       copyout(&upcall.upc_pending, &lp->lwp_upcall->upc_pending,
+                               sizeof(upcall.upc_pending));
+               }
+               return;
+       }
+
+       /*
+        * We can run this upcall now, clear vu_pending.
+        *
+        * Bump our critical section count and set or clear the
+        * user pending flag depending on whether more upcalls are
+        * pending.  The user will be responsible for calling 
+        * upc_dispatch(-1) to process remaining upcalls.
+        */
+       vu->vu_pending = 0;
+       upcall.upc_pending = morepending;
+       crit_count += TDPRI_CRIT;
+       copyout(&upcall.upc_pending, &lp->lwp_upcall->upc_pending, 
+               sizeof(upcall.upc_pending));
+       copyout(&crit_count, (char *)upcall.upc_uthread + upcall.upc_critoff,
+               sizeof(int));
+
+       /*
+        * Construct a stack frame and issue the upcall
+        */
+       regs = lp->lwp_md.md_regs;
+       upc_frame.eax = regs->tf_eax;
+       upc_frame.ecx = regs->tf_ecx;
+       upc_frame.edx = regs->tf_edx;
+       upc_frame.flags = regs->tf_eflags;
+       upc_frame.oldip = regs->tf_eip;
+       if (copyout(&upc_frame, (void *)(regs->tf_esp - sizeof(upc_frame)),
+           sizeof(upc_frame)) != 0) {
+               kprintf("bad stack on upcall\n");
+       } else {
+               regs->tf_eax = (register_t)vu->vu_func;
+               regs->tf_ecx = (register_t)vu->vu_data;
+               regs->tf_edx = (register_t)lp->lwp_upcall;
+               regs->tf_eip = (register_t)vu->vu_ctx;
+               regs->tf_esp -= sizeof(upc_frame);
+       }
+}
+
+/*
+ * fetchupcall occurs in the context of a system call, which means that
+ * we have to return EJUSTRETURN in order to prevent eax and edx from
+ * being overwritten by the syscall return value.
+ *
+ * if vu is not NULL we return the new context in %edx, the new data in %ecx,
+ * and the function pointer in %eax.  
+ */
+int
+fetchupcall (struct vmupcall *vu, int morepending, void *rsp)
+{
+       struct upc_frame upc_frame;
+       struct lwp *lp = curthread->td_lwp;
+       struct trapframe *regs;
+       int error;
+       struct upcall upcall;
+       int crit_count;
+
+       regs = lp->lwp_md.md_regs;
+
+       error = copyout(&morepending, &lp->lwp_upcall->upc_pending, sizeof(int));
+       if (error == 0) {
+           if (vu) {
+               /*
+                * This jumps us to the next ready context.
+                */
+               vu->vu_pending = 0;
+               error = copyin(lp->lwp_upcall, &upcall, sizeof(upcall));
+               crit_count = 0;
+               if (error == 0)
+                       error = copyin((char *)upcall.upc_uthread + upcall.upc_critoff, &crit_count, sizeof(int));
+               crit_count += TDPRI_CRIT;
+               if (error == 0)
+                       error = copyout(&crit_count, (char *)upcall.upc_uthread + upcall.upc_critoff, sizeof(int));
+               regs->tf_eax = (register_t)vu->vu_func;
+               regs->tf_ecx = (register_t)vu->vu_data;
+               regs->tf_edx = (register_t)lp->lwp_upcall;
+               regs->tf_eip = (register_t)vu->vu_ctx;
+               regs->tf_esp = (register_t)rsp;
+           } else {
+               /*
+                * This returns us to the originally interrupted code.
+                */
+               error = copyin(rsp, &upc_frame, sizeof(upc_frame));
+               regs->tf_eax = upc_frame.eax;
+               regs->tf_ecx = upc_frame.ecx;
+               regs->tf_edx = upc_frame.edx;
+               regs->tf_eflags = (regs->tf_eflags & ~PSL_USERCHANGE) |
+                               (upc_frame.flags & PSL_USERCHANGE);
+               regs->tf_eip = upc_frame.oldip;
+               regs->tf_esp = (register_t)((char *)rsp + sizeof(upc_frame));
+           }
+       }
+       if (error == 0)
+               error = EJUSTRETURN;
+       return(error);
+}
+
+/*
+ * cpu_idle() represents the idle LWKT.  You cannot return from this function
+ * (unless you want to blow things up!).  Instead we look for runnable threads
+ * and loop or halt as appropriate.  Giant is not held on entry to the thread.
+ *
+ * The main loop is entered with a critical section held, we must release
+ * the critical section before doing anything else.  lwkt_switch() will
+ * check for pending interrupts due to entering and exiting its own 
+ * critical section.
+ *
+ * Note on cpu_idle_hlt:  On an SMP system we rely on a scheduler IPI
+ * to wake a HLTed cpu up.  However, there are cases where the idlethread
+ * will be entered with the possibility that no IPI will occur and in such
+ * cases lwkt_switch() sets TDF_IDLE_NOHLT.
+ */
+static int     cpu_idle_hlt = 1;
+static int     cpu_idle_hltcnt;
+static int     cpu_idle_spincnt;
+SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_hlt, CTLFLAG_RW,
+    &cpu_idle_hlt, 0, "Idle loop HLT enable");
+SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_hltcnt, CTLFLAG_RW,
+    &cpu_idle_hltcnt, 0, "Idle loop entry halts");
+SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_spincnt, CTLFLAG_RW,
+    &cpu_idle_spincnt, 0, "Idle loop entry spins");
+
+static void
+cpu_idle_default_hook(void)
+{
+       /*
+        * We must guarentee that hlt is exactly the instruction
+        * following the sti.
+        */
+       __asm __volatile("hlt");        /* sti; hlt */
+}
+
+/* Other subsystems (e.g., ACPI) can hook this later. */
+void (*cpu_idle_hook)(void) = cpu_idle_default_hook;
+
+void
+cpu_idle(void)
+{
+       struct thread *td = curthread;
+
+       crit_exit();
+       KKASSERT(td->td_pri < TDPRI_CRIT);
+       for (;;) {
+               /*
+                * See if there are any LWKTs ready to go.
+                */
+               lwkt_switch();
+
+               /*
+                * If we are going to halt call splz unconditionally after
+                * CLIing to catch any interrupt races.  Note that we are
+                * at SPL0 and interrupts are enabled.
+                */
+               if (cpu_idle_hlt && !lwkt_runnable() &&
+                   (td->td_flags & TDF_IDLE_NOHLT) == 0) {
+                       /* __asm __volatile("cli"); */
+                       splz();
+                       if (!lwkt_runnable())
+                           cpu_idle_hook();
+#ifdef SMP
+                       else
+                           __asm __volatile("pause");
+#endif
+                       ++cpu_idle_hltcnt;
+               } else {
+                       td->td_flags &= ~TDF_IDLE_NOHLT;
+                       splz();
+#ifdef SMP
+                       /*__asm __volatile("sti; pause");*/
+                       __asm __volatile("pause");
+#else
+                       /*__asm __volatile("sti");*/
+#endif
+                       ++cpu_idle_spincnt;
+               }
+       }
+}
+
+/*
+ * Clear registers on exec
+ */
+void
+setregs(struct lwp *lp, u_long entry, u_long stack, u_long ps_strings)
+{
+       struct trapframe *regs = lp->lwp_md.md_regs;
+       struct pcb *pcb = lp->lwp_thread->td_pcb;
+
+       /* Reset pc->pcb_gs and %gs before possibly invalidating it. */
+       pcb->pcb_gs = 0;
+#if 0
+       load_gs(_udatasel);
+#endif
+
+       /* was i386_user_cleanup() in NetBSD */
+       user_ldt_free(pcb);
+  
+       bzero((char *)regs, sizeof(struct trapframe));
+       regs->tf_eip = entry;
+       regs->tf_esp = stack;
+       regs->tf_eflags = PSL_USER | (regs->tf_eflags & PSL_T);
+       regs->tf_ss = 0;
+       regs->tf_ds = 0;
+       regs->tf_es = 0;
+       regs->tf_fs = 0;
+       regs->tf_cs = 0;
+
+       /* PS_STRINGS value for BSD/OS binaries.  It is 0 for non-BSD/OS. */
+       regs->tf_ebx = ps_strings;
+
+        /*
+         * Reset the hardware debug registers if they were in use.
+         * They won't have any meaning for the newly exec'd process.  
+         */
+        if (pcb->pcb_flags & PCB_DBREGS) {
+                pcb->pcb_dr0 = 0;
+                pcb->pcb_dr1 = 0;
+                pcb->pcb_dr2 = 0;
+                pcb->pcb_dr3 = 0;
+                pcb->pcb_dr6 = 0;
+                pcb->pcb_dr7 = 0;
+                if (pcb == curthread->td_pcb) {
+                       /*
+                        * Clear the debug registers on the running
+                        * CPU, otherwise they will end up affecting
+                        * the next process we switch to.
+                        */
+                       reset_dbregs();
+                }
+                pcb->pcb_flags &= ~PCB_DBREGS;
+        }
+
+       /*
+        * Initialize the math emulator (if any) for the current process.
+        * Actually, just clear the bit that says that the emulator has
+        * been initialized.  Initialization is delayed until the process
+        * traps to the emulator (if it is done at all) mainly because
+        * emulators don't provide an entry point for initialization.
+        */
+       lp->lwp_thread->td_pcb->pcb_flags &= ~FP_SOFTFP;
+
+       /*
+        * note: do not set CR0_TS here.  npxinit() must do it after clearing
+        * gd_npxthread.  Otherwise a preemptive interrupt thread may panic
+        * in npxdna().
+        */
+       crit_enter();
+#if 0
+       load_cr0(rcr0() | CR0_MP);
+#endif
+
+#if NNPX > 0
+       /* Initialize the npx (if any) for the current process. */
+       npxinit(__INITIAL_NPXCW__);
+#endif
+       crit_exit();
+
+       /*
+        * note: linux emulator needs edx to be 0x0 on entry, which is
+        * handled in execve simply by setting the 64 bit syscall
+        * return value to 0.
+        */
+}
+
+void
+cpu_setregs(void)
+{
+#if 0
+       unsigned int cr0;
+
+       cr0 = rcr0();
+       cr0 |= CR0_NE;                  /* Done by npxinit() */
+       cr0 |= CR0_MP | CR0_TS;         /* Done at every execve() too. */
+#ifdef I386_CPU
+       if (cpu_class != CPUCLASS_386)
+#endif
+               cr0 |= CR0_WP | CR0_AM;
+       load_cr0(cr0);
+       load_gs(_udatasel);
+#endif
+}
+
+static int
+sysctl_machdep_adjkerntz(SYSCTL_HANDLER_ARGS)
+{
+       int error;
+       error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2,
+               req);
+       if (!error && req->newptr)
+               resettodr();
+       return (error);
+}
+
+SYSCTL_PROC(_machdep, CPU_ADJKERNTZ, adjkerntz, CTLTYPE_INT|CTLFLAG_RW,
+       &adjkerntz, 0, sysctl_machdep_adjkerntz, "I", "");
+
+extern u_long bootdev;         /* not a cdev_t - encoding is different */
+SYSCTL_ULONG(_machdep, OID_AUTO, guessed_bootdev,
+       CTLFLAG_RD, &bootdev, 0, "Boot device (not in cdev_t format)");
+
+/*
+ * Initialize 386 and configure to run kernel
+ */
+
+/*
+ * Initialize segments & interrupt table
+ */
+
+extern  struct user *proc0paddr;
+
+#if 0
+
+extern inthand_t
+       IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
+       IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
+       IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
+       IDTVEC(page), IDTVEC(mchk), IDTVEC(fpu), IDTVEC(align),
+       IDTVEC(xmm), IDTVEC(syscall),
+       IDTVEC(rsvd0);
+extern inthand_t
+       IDTVEC(int0x80_syscall);
+
+#endif
+
+#ifdef DEBUG_INTERRUPTS
+extern inthand_t *Xrsvdary[256];
+#endif
+
+int
+ptrace_set_pc(struct proc *p, unsigned long addr)
+{
+       p->p_md.md_regs->tf_eip = addr;
+       return (0);
+}
+
+int
+ptrace_single_step(struct lwp *lp)
+{
+       lp->lwp_md.md_regs->tf_eflags |= PSL_T;
+       return (0);
+}
+
+int
+fill_regs(struct lwp *lp, struct reg *regs)
+{
+       struct pcb *pcb;
+       struct trapframe *tp;
+
+       tp = lp->lwp_md.md_regs;
+       regs->r_fs = tp->tf_fs;
+       regs->r_es = tp->tf_es;
+       regs->r_ds = tp->tf_ds;
+       regs->r_edi = tp->tf_edi;
+       regs->r_esi = tp->tf_esi;
+       regs->r_ebp = tp->tf_ebp;
+       regs->r_ebx = tp->tf_ebx;
+       regs->r_edx = tp->tf_edx;
+       regs->r_ecx = tp->tf_ecx;
+       regs->r_eax = tp->tf_eax;
+       regs->r_eip = tp->tf_eip;
+       regs->r_cs = tp->tf_cs;
+       regs->r_eflags = tp->tf_eflags;
+       regs->r_esp = tp->tf_esp;
+       regs->r_ss = tp->tf_ss;
+       pcb = lp->lwp_thread->td_pcb;
+       regs->r_gs = pcb->pcb_gs;
+       return (0);
+}
+
+int
+set_regs(struct lwp *lp, struct reg *regs)
+{
+       struct pcb *pcb;
+       struct trapframe *tp;
+
+       tp = lp->lwp_md.md_regs;
+       if (!EFL_SECURE(regs->r_eflags, tp->tf_eflags) ||
+           !CS_SECURE(regs->r_cs))
+               return (EINVAL);
+       tp->tf_fs = regs->r_fs;
+       tp->tf_es = regs->r_es;
+       tp->tf_ds = regs->r_ds;
+       tp->tf_edi = regs->r_edi;
+       tp->tf_esi = regs->r_esi;
+       tp->tf_ebp = regs->r_ebp;
+       tp->tf_ebx = regs->r_ebx;
+       tp->tf_edx = regs->r_edx;
+       tp->tf_ecx = regs->r_ecx;
+       tp->tf_eax = regs->r_eax;
+       tp->tf_eip = regs->r_eip;
+       tp->tf_cs = regs->r_cs;
+       tp->tf_eflags = regs->r_eflags;
+       tp->tf_esp = regs->r_esp;
+       tp->tf_ss = regs->r_ss;
+       pcb = lp->lwp_thread->td_pcb;
+       pcb->pcb_gs = regs->r_gs;
+       return (0);
+}
+
+#ifndef CPU_DISABLE_SSE
+static void
+fill_fpregs_xmm(struct savexmm *sv_xmm, struct save87 *sv_87)
+{
+       struct env87 *penv_87 = &sv_87->sv_env;
+       struct envxmm *penv_xmm = &sv_xmm->sv_env;
+       int i;
+
+       /* FPU control/status */
+       penv_87->en_cw = penv_xmm->en_cw;
+       penv_87->en_sw = penv_xmm->en_sw;
+       penv_87->en_tw = penv_xmm->en_tw;
+       penv_87->en_fip = penv_xmm->en_fip;
+       penv_87->en_fcs = penv_xmm->en_fcs;
+       penv_87->en_opcode = penv_xmm->en_opcode;
+       penv_87->en_foo = penv_xmm->en_foo;
+       penv_87->en_fos = penv_xmm->en_fos;
+
+       /* FPU registers */
+       for (i = 0; i < 8; ++i)
+               sv_87->sv_ac[i] = sv_xmm->sv_fp[i].fp_acc;
+
+       sv_87->sv_ex_sw = sv_xmm->sv_ex_sw;
+}
+
+static void
+set_fpregs_xmm(struct save87 *sv_87, struct savexmm *sv_xmm)
+{
+       struct env87 *penv_87 = &sv_87->sv_env;
+       struct envxmm *penv_xmm = &sv_xmm->sv_env;
+       int i;
+
+       /* FPU control/status */
+       penv_xmm->en_cw = penv_87->en_cw;
+       penv_xmm->en_sw = penv_87->en_sw;
+       penv_xmm->en_tw = penv_87->en_tw;
+       penv_xmm->en_fip = penv_87->en_fip;
+       penv_xmm->en_fcs = penv_87->en_fcs;
+       penv_xmm->en_opcode = penv_87->en_opcode;
+       penv_xmm->en_foo = penv_87->en_foo;
+       penv_xmm->en_fos = penv_87->en_fos;
+
+       /* FPU registers */
+       for (i = 0; i < 8; ++i)
+               sv_xmm->sv_fp[i].fp_acc = sv_87->sv_ac[i];
+
+       sv_xmm->sv_ex_sw = sv_87->sv_ex_sw;
+}
+#endif /* CPU_DISABLE_SSE */
+
+int
+fill_fpregs(struct lwp *lp, struct fpreg *fpregs)
+{
+#ifndef CPU_DISABLE_SSE
+       if (cpu_fxsr) {
+               fill_fpregs_xmm(&lp->lwp_thread->td_pcb->pcb_save.sv_xmm,
+                               (struct save87 *)fpregs);
+               return (0);
+       }
+#endif /* CPU_DISABLE_SSE */
+       bcopy(&lp->lwp_thread->td_pcb->pcb_save.sv_87, fpregs, sizeof *fpregs);
+       return (0);
+}
+
+int
+set_fpregs(struct lwp *lp, struct fpreg *fpregs)
+{
+#ifndef CPU_DISABLE_SSE
+       if (cpu_fxsr) {
+               set_fpregs_xmm((struct save87 *)fpregs,
+                              &lp->lwp_thread->td_pcb->pcb_save.sv_xmm);
+               return (0);
+       }
+#endif /* CPU_DISABLE_SSE */
+       bcopy(fpregs, &lp->lwp_thread->td_pcb->pcb_save.sv_87, sizeof *fpregs);
+       return (0);
+}
+
+int
+fill_dbregs(struct lwp *lp, struct dbreg *dbregs)
+{
+        if (lp == NULL) {
+                dbregs->dr0 = rdr0();
+                dbregs->dr1 = rdr1();
+                dbregs->dr2 = rdr2();
+                dbregs->dr3 = rdr3();
+                dbregs->dr4 = rdr4();
+                dbregs->dr5 = rdr5();
+                dbregs->dr6 = rdr6();
+                dbregs->dr7 = rdr7();
+        } else {
+               struct pcb *pcb;
+
+                pcb = lp->lwp_thread->td_pcb;
+                dbregs->dr0 = pcb->pcb_dr0;
+                dbregs->dr1 = pcb->pcb_dr1;
+                dbregs->dr2 = pcb->pcb_dr2;
+                dbregs->dr3 = pcb->pcb_dr3;
+                dbregs->dr4 = 0;
+                dbregs->dr5 = 0;
+                dbregs->dr6 = pcb->pcb_dr6;
+                dbregs->dr7 = pcb->pcb_dr7;
+        }
+       return (0);
+}
+
+int
+set_dbregs(struct lwp *lp, struct dbreg *dbregs)
+{
+       if (lp == NULL) {
+               load_dr0(dbregs->dr0);
+               load_dr1(dbregs->dr1);
+               load_dr2(dbregs->dr2);
+               load_dr3(dbregs->dr3);
+               load_dr4(dbregs->dr4);
+               load_dr5(dbregs->dr5);
+               load_dr6(dbregs->dr6);
+               load_dr7(dbregs->dr7);
+       } else {
+               struct pcb *pcb;
+               struct ucred *ucred;
+               int i;
+               uint32_t mask1, mask2;
+
+               /*
+                * Don't let an illegal value for dr7 get set.  Specifically,
+                * check for undefined settings.  Setting these bit patterns
+                * result in undefined behaviour and can lead to an unexpected
+                * TRCTRAP.
+                */
+               for (i = 0, mask1 = 0x3<<16, mask2 = 0x2<<16; i < 8; 
+                    i++, mask1 <<= 2, mask2 <<= 2)
+                       if ((dbregs->dr7 & mask1) == mask2)
+                               return (EINVAL);
+               
+               pcb = lp->lwp_thread->td_pcb;
+               ucred = lp->lwp_proc->p_ucred;
+
+               /*
+                * Don't let a process set a breakpoint that is not within the
+                * process's address space.  If a process could do this, it
+                * could halt the system by setting a breakpoint in the kernel
+                * (if ddb was enabled).  Thus, we need to check to make sure
+                * that no breakpoints are being enabled for addresses outside
+                * process's address space, unless, perhaps, we were called by
+                * uid 0.
+                *
+                * XXX - what about when the watched area of the user's
+                * address space is written into from within the kernel
+                * ... wouldn't that still cause a breakpoint to be generated
+                * from within kernel mode?
+                */
+
+               if (suser_cred(ucred, 0) != 0) {
+                       if (dbregs->dr7 & 0x3) {
+                               /* dr0 is enabled */
+                               if (dbregs->dr0 >= VM_MAX_USER_ADDRESS)
+                                       return (EINVAL);
+                       }
+
+                       if (dbregs->dr7 & (0x3<<2)) {
+                               /* dr1 is enabled */
+                               if (dbregs->dr1 >= VM_MAX_USER_ADDRESS)
+                                       return (EINVAL);
+                       }
+
+                       if (dbregs->dr7 & (0x3<<4)) {
+                               /* dr2 is enabled */
+                               if (dbregs->dr2 >= VM_MAX_USER_ADDRESS)
+                                       return (EINVAL);
+                       }
+
+                       if (dbregs->dr7 & (0x3<<6)) {
+                               /* dr3 is enabled */
+                               if (dbregs->dr3 >= VM_MAX_USER_ADDRESS)
+                                       return (EINVAL);
+                       }
+               }
+
+               pcb->pcb_dr0 = dbregs->dr0;
+               pcb->pcb_dr1 = dbregs->dr1;
+               pcb->pcb_dr2 = dbregs->dr2;
+               pcb->pcb_dr3 = dbregs->dr3;
+               pcb->pcb_dr6 = dbregs->dr6;
+               pcb->pcb_dr7 = dbregs->dr7;
+
+               pcb->pcb_flags |= PCB_DBREGS;
+       }
+
+       return (0);
+}
+
+#if 0
+/*
+ * Return > 0 if a hardware breakpoint has been hit, and the
+ * breakpoint was in user space.  Return 0, otherwise.
+ */
+int
+user_dbreg_trap(void)
+{
+        u_int32_t dr7, dr6; /* debug registers dr6 and dr7 */
+        u_int32_t bp;       /* breakpoint bits extracted from dr6 */
+        int nbp;            /* number of breakpoints that triggered */
+        caddr_t addr[4];    /* breakpoint addresses */
+        int i;
+        
+        dr7 = rdr7();
+        if ((dr7 & 0x000000ff) == 0) {
+                /*
+                 * all GE and LE bits in the dr7 register are zero,
+                 * thus the trap couldn't have been caused by the
+                 * hardware debug registers
+                 */
+                return 0;
+        }
+
+        nbp = 0;
+        dr6 = rdr6();
+        bp = dr6 & 0x0000000f;
+
+        if (!bp) {
+                /*
+                 * None of the breakpoint bits are set meaning this
+                 * trap was not caused by any of the debug registers
+                 */
+                return 0;
+        }
+
+        /*
+         * at least one of the breakpoints were hit, check to see
+         * which ones and if any of them are user space addresses
+         */
+
+        if (bp & 0x01) {
+                addr[nbp++] = (caddr_t)rdr0();
+        }
+        if (bp & 0x02) {
+                addr[nbp++] = (caddr_t)rdr1();
+        }
+        if (bp & 0x04) {
+                addr[nbp++] = (caddr_t)rdr2();
+        }
+        if (bp & 0x08) {
+                addr[nbp++] = (caddr_t)rdr3();
+        }
+
+        for (i=0; i<nbp; i++) {
+                if (addr[i] <
+                    (caddr_t)VM_MAX_USER_ADDRESS) {
+                        /*
+                         * addr[i] is in user space
+                         */
+                        return nbp;
+                }
+        }
+
+        /*
+         * None of the breakpoints are in user space.
+         */
+        return 0;
+}
+
+#endif
+
+
+#ifndef DDB
+void
+Debugger(const char *msg)
+{
+       kprintf("Debugger(\"%s\") called.\n", msg);
+}
+#endif /* no DDB */
+
+#include <sys/disklabel.h>
+
+/*
+ * Determine the size of the transfer, and make sure it is
+ * within the boundaries of the partition. Adjust transfer
+ * if needed, and signal errors or early completion.
+ *
+ * On success a new bio layer is pushed with the translated
+ * block number, and returned.
+ */
+struct bio *
+bounds_check_with_label(cdev_t dev, struct bio *bio,
+                       struct disklabel *lp, int wlabel)
+{
+       struct bio *nbio;
+       struct buf *bp = bio->bio_buf;
+        struct partition *p = lp->d_partitions + dkpart(dev);
+        int labelsect = lp->d_partitions[0].p_offset;
+        int maxsz = p->p_size,
+                sz = (bp->b_bcount + DEV_BSIZE - 1) >> DEV_BSHIFT;
+       daddr_t blkno = (daddr_t)(bio->bio_offset >> DEV_BSHIFT);
+
+        /* overwriting disk label ? */
+        /* XXX should also protect bootstrap in first 8K */
+        if (blkno + p->p_offset <= LABELSECTOR + labelsect &&
+#if LABELSECTOR != 0
+            blkno + p->p_offset + sz > LABELSECTOR + labelsect &&
+#endif
+            bp->b_cmd != BUF_CMD_READ && wlabel == 0) {
+                bp->b_error = EROFS;
+                goto error;
+        }
+
+#if     defined(DOSBBSECTOR) && defined(notyet)
+        /* overwriting master boot record? */
+        if (blkno + p->p_offset <= DOSBBSECTOR &&
+            bp->b_cmd != BUF_CMD_READ && wlabel == 0) {
+                bp->b_error = EROFS;
+                goto error;
+        }
+#endif
+
+       /*
+        * Check for out of bounds, EOF, and EOF clipping.
+        */
+       if (bio->bio_offset < 0)
+               goto bad;
+       if (blkno + sz > maxsz) {
+               /*
+                * Past EOF or B_BNOCLIP flag was set, the request is bad.
+                */
+               if (blkno > maxsz || (bp->b_flags & B_BNOCLIP))
+                       goto bad;
+
+               /*
+                * If exactly on EOF just complete the I/O with no bytes
+                * transfered.  B_INVAL must be set to throw away the
+                * contents of the buffer.  Otherwise clip b_bcount.
+                */
+                if (blkno == maxsz) {
+                        bp->b_resid = bp->b_bcount;
+                       bp->b_flags |= B_INVAL;
+                       goto done;
+                }
+                bp->b_bcount = (maxsz - blkno) << DEV_BSHIFT;
+        }
+       nbio = push_bio(bio);
+        nbio->bio_offset = bio->bio_offset + ((off_t)p->p_offset << DEV_BSHIFT);
+       return (nbio);
+
+       /*
+        * The caller is responsible for calling biodone() on the passed bio
+        * when we return NULL.
+        */
+bad:
+       bp->b_error = EINVAL;
+error:
+       bp->b_resid = bp->b_bcount;
+        bp->b_flags |= B_ERROR | B_INVAL;
+done:
+       return (NULL);
+}
+
diff --git a/sys/platform/vkernel/i386/db_interface.c b/sys/platform/vkernel/i386/db_interface.c
new file mode 100644 (file)
index 0000000..6f7634f
--- /dev/null
@@ -0,0 +1,328 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
+ *  School of Computer Science
+ *  Carnegie Mellon University
+ *  Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ *
+ * $FreeBSD: src/sys/i386/i386/db_interface.c,v 1.48.2.1 2000/07/07 00:38:46 obrien Exp $
+ * $DragonFly: src/sys/platform/vkernel/i386/db_interface.c,v 1.1 2007/01/05 22:18:18 dillon Exp $
+ */
+
+/*
+ * Interface to new debugger.
+ */
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/reboot.h>
+#include <sys/cons.h>
+#include <sys/vkernel.h>
+
+#include <machine/cpu.h>
+#include <machine/smp.h>
+#include <machine/globaldata.h>
+#include <machine/md_var.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <ddb/ddb.h>
+
+#include <setjmp.h>
+
+static jmp_buf *db_nofault = 0;
+extern jmp_buf db_jmpbuf;
+
+extern void    gdb_handle_exception (db_regs_t *, int, int);
+
+int    db_active;
+db_regs_t ddb_regs;
+
+static jmp_buf db_global_jmpbuf;
+static int     db_global_jmpbuf_valid;
+
+#ifdef __GNUC__
+#define        rss() ({u_short ss; __asm __volatile("mov %%ss,%0" : "=r" (ss)); ss;})
+#endif
+
+/*
+ *  kdb_trap - field a TRACE or BPT trap
+ */
+int
+kdb_trap(int type, int code, struct i386_saved_state *regs)
+{
+       volatile int ddb_mode = !(boothowto & RB_GDB);
+
+       /*
+        * XXX try to do nothing if the console is in graphics mode.
+        * Handle trace traps (and hardware breakpoints...) by ignoring
+        * them except for forgetting about them.  Return 0 for other
+        * traps to say that we haven't done anything.  The trap handler
+        * will usually panic.  We should handle breakpoint traps for
+        * our breakpoints by disarming our breakpoints and fixing up
+        * %eip.
+        */
+       if (cons_unavail && ddb_mode) {
+           if (type == T_TRCTRAP) {
+               regs->tf_eflags &= ~PSL_T;
+               return (1);
+           }
+           return (0);
+       }
+
+       switch (type) {
+           case T_BPTFLT:      /* breakpoint */
+           case T_TRCTRAP:     /* debug exception */
+               break;
+
+           default:
+               /*
+                * XXX this is almost useless now.  In most cases,
+                * trap_fatal() has already printed a much more verbose
+                * message.  However, it is dangerous to print things in
+                * trap_fatal() - kprintf() might be reentered and trap.
+                * The debugger should be given control first.
+                */
+               if (ddb_mode)
+                   db_printf("kernel: type %d trap, code=%x\n", type, code);
+
+               if (db_nofault) {
+                   jmp_buf *no_fault = db_nofault;
+                   db_nofault = 0;
+                   longjmp(*no_fault, 1);
+               }
+       }
+
+       /*
+        * This handles unexpected traps in ddb commands, including calls to
+        * non-ddb functions.  db_nofault only applies to memory accesses by
+        * internal ddb commands.
+        */
+       if (db_global_jmpbuf_valid)
+           longjmp(db_global_jmpbuf, 1);
+
+       /*
+        * XXX We really should switch to a local stack here.
+        */
+       ddb_regs = *regs;
+
+       /*
+        * If in kernel mode, esp and ss are not saved, so dummy them up.
+        */
+       if (ISPL(regs->tf_cs) == 0) {
+           ddb_regs.tf_esp = (int)&regs->tf_esp;
+           ddb_regs.tf_ss = rss();
+       }
+
+#ifdef SMP
+       db_printf("\nCPU%d stopping CPUs: 0x%08x\n", 
+           mycpu->gd_cpuid, mycpu->gd_other_cpus);
+
+       /* We stop all CPUs except ourselves (obviously) */
+       stop_cpus(mycpu->gd_other_cpus);
+
+       db_printf(" stopped\n");
+#endif /* SMP */
+
+       setjmp(db_global_jmpbuf);
+       db_global_jmpbuf_valid = TRUE;
+       db_active++;
+       if (ddb_mode) {
+           cndbctl(TRUE);
+           db_trap(type, code);
+           cndbctl(FALSE);
+       } else
+           gdb_handle_exception(&ddb_regs, type, code);
+       db_active--;
+       db_global_jmpbuf_valid = FALSE;
+
+#ifdef SMP
+       db_printf("\nCPU%d restarting CPUs: 0x%08x\n",
+           mycpu->gd_cpuid, stopped_cpus);
+
+       /* Restart all the CPUs we previously stopped */
+       if (stopped_cpus != mycpu->gd_other_cpus) {
+               db_printf("whoa, other_cpus: 0x%08x, stopped_cpus: 0x%08x\n",
+                         mycpu->gd_other_cpus, stopped_cpus);
+               panic("stop_cpus() failed");
+       }
+       restart_cpus(stopped_cpus);
+
+       db_printf(" restarted\n");
+#endif /* SMP */
+
+       regs->tf_eip    = ddb_regs.tf_eip;
+       regs->tf_eflags = ddb_regs.tf_eflags;
+       regs->tf_eax    = ddb_regs.tf_eax;
+       regs->tf_ecx    = ddb_regs.tf_ecx;
+       regs->tf_edx    = ddb_regs.tf_edx;
+       regs->tf_ebx    = ddb_regs.tf_ebx;
+
+       /*
+        * If in user mode, the saved ESP and SS were valid, restore them.
+        */
+       if (ISPL(regs->tf_cs)) {
+           regs->tf_esp = ddb_regs.tf_esp;
+           regs->tf_ss  = ddb_regs.tf_ss & 0xffff;
+       }
+
+       regs->tf_ebp    = ddb_regs.tf_ebp;
+       regs->tf_esi    = ddb_regs.tf_esi;
+       regs->tf_edi    = ddb_regs.tf_edi;
+       regs->tf_es     = ddb_regs.tf_es & 0xffff;
+       regs->tf_fs     = ddb_regs.tf_fs & 0xffff;
+       regs->tf_cs     = ddb_regs.tf_cs & 0xffff;
+       regs->tf_ds     = ddb_regs.tf_ds & 0xffff;
+       return (1);
+}
+
+/*
+ * Read bytes from kernel address space for debugger.
+ */
+void
+db_read_bytes(vm_offset_t addr, size_t size, char *data)
+{
+       char    *src;
+
+       db_nofault = &db_jmpbuf;
+
+       src = (char *)addr;
+       while (size-- > 0)
+           *data++ = *src++;
+
+       db_nofault = 0;
+}
+
+/*
+ * Write bytes to kernel address space for debugger.
+ */
+void
+db_write_bytes(vm_offset_t addr, size_t size, char *data)
+{
+       char    *dst;
+#if 0
+       vpte_t  *ptep0 = NULL;
+       vpte_t  oldmap0 = 0;
+       vm_offset_t     addr1;
+       vpte_t  *ptep1 = NULL;
+       vpte_t  oldmap1 = 0;
+#endif
+
+       db_nofault = &db_jmpbuf;
+#if 0
+       if (addr > trunc_page((vm_offset_t)btext) - size &&
+           addr < round_page((vm_offset_t)etext)) {
+
+           ptep0 = pmap_kpte(addr);
+           oldmap0 = *ptep0;
+           *ptep0 |= VPTE_W;
+
+           /* Map another page if the data crosses a page boundary. */
+           if ((*ptep0 & PG_PS) == 0) {
+               addr1 = trunc_page(addr + size - 1);
+               if (trunc_page(addr) != addr1) {
+                   ptep1 = pmap_kpte(addr1);
+                   oldmap1 = *ptep1;
+                   *ptep1 |= VPTE_W;
+               }
+           } else {
+               addr1 = trunc_4mpage(addr + size - 1);
+               if (trunc_4mpage(addr) != addr1) {
+                   ptep1 = pmap_kpte(addr1);
+                   oldmap1 = *ptep1;
+                   *ptep1 |= VPTE_W;
+               }
+           }
+
+           cpu_invltlb();
+       }
+#endif
+
+       dst = (char *)addr;
+
+       while (size-- > 0)
+           *dst++ = *data++;
+
+       db_nofault = 0;
+
+#if 0
+       if (ptep0) {
+           *ptep0 = oldmap0;
+
+           if (ptep1)
+               *ptep1 = oldmap1;
+
+           cpu_invltlb();
+       }
+#endif
+}
+
+/*
+ * The debugger sometimes needs to know the actual KVM address represented
+ * by the instruction pointer, stack pointer, or base pointer.  Normally
+ * the actual KVM address is simply the contents of the register.  However,
+ * if the debugger is entered from the BIOS or VM86 we need to figure out
+ * the offset from the segment register.
+ */
+db_addr_t
+PC_REGS(db_regs_t *regs)
+{
+    return(regs->tf_eip);
+}
+
+db_addr_t
+SP_REGS(db_regs_t *regs)
+{
+    return(regs->tf_esp);
+}
+
+db_addr_t
+BP_REGS(db_regs_t *regs)
+{
+    return(regs->tf_ebp);
+}
+
+/*
+ * XXX
+ * Move this to machdep.c and allow it to be called if any debugger is
+ * installed.
+ */
+void
+Debugger(const char *msg)
+{
+       static volatile u_char in_Debugger;
+
+       /*
+        * XXX
+        * Do nothing if the console is in graphics mode.  This is
+        * OK if the call is for the debugger hotkey but not if the call
+        * is a weak form of panicing.
+        */
+       if (cons_unavail && !(boothowto & RB_GDB))
+           return;
+
+       if (!in_Debugger) {
+           in_Debugger = 1;
+           db_printf("Debugger(\"%s\")\n", msg);
+           breakpoint();
+           in_Debugger = 0;
+       }
+}
diff --git a/sys/platform/vkernel/i386/db_trace.c b/sys/platform/vkernel/i386/db_trace.c
new file mode 100644 (file)
index 0000000..9951fb0
--- /dev/null
@@ -0,0 +1,642 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
+ *  School of Computer Science
+ *  Carnegie Mellon University
+ *  Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ *
+ * $FreeBSD: src/sys/i386/i386/db_trace.c,v 1.35.2.3 2002/02/21 22:31:25 silby Exp $
+ * $DragonFly: src/sys/platform/vkernel/i386/db_trace.c,v 1.1 2007/01/05 22:18:18 dillon Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/linker_set.h>
+#include <sys/lock.h>
+#include <sys/proc.h>
+#include <sys/reg.h>
+
+#include <machine/cpu.h>
+#include <machine/md_var.h>
+
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+#include <ddb/ddb.h>
+
+#include <sys/user.h>
+
+#include <ddb/db_access.h>
+#include <ddb/db_sym.h>
+#include <ddb/db_variables.h>
+
+db_varfcn_t db_dr0;
+db_varfcn_t db_dr1;
+db_varfcn_t db_dr2;
+db_varfcn_t db_dr3;
+db_varfcn_t db_dr4;
+db_varfcn_t db_dr5;
+db_varfcn_t db_dr6;
+db_varfcn_t db_dr7;
+
+/*
+ * Machine register set.
+ */
+struct db_variable db_regs[] = {
+       { "cs",         &ddb_regs.tf_cs,     FCN_NULL },
+       { "ds",         &ddb_regs.tf_ds,     FCN_NULL },
+       { "es",         &ddb_regs.tf_es,     FCN_NULL },
+       { "fs",         &ddb_regs.tf_fs,     FCN_NULL },
+#if 0
+       { "gs",         &ddb_regs.tf_gs,     FCN_NULL },
+#endif
+       { "ss",         &ddb_regs.tf_ss,     FCN_NULL },
+       { "eax",        &ddb_regs.tf_eax,    FCN_NULL },
+       { "ecx",        &ddb_regs.tf_ecx,    FCN_NULL },
+       { "edx",        &ddb_regs.tf_edx,    FCN_NULL },
+       { "ebx",        &ddb_regs.tf_ebx,    FCN_NULL },
+       { "esp",        &ddb_regs.tf_esp,    FCN_NULL },
+       { "ebp",        &ddb_regs.tf_ebp,    FCN_NULL },
+       { "esi",        &ddb_regs.tf_esi,    FCN_NULL },
+       { "edi",        &ddb_regs.tf_edi,    FCN_NULL },
+       { "eip",        &ddb_regs.tf_eip,    FCN_NULL },
+       { "efl",        &ddb_regs.tf_eflags, FCN_NULL },
+       { "dr0",        NULL,                db_dr0 },
+       { "dr1",        NULL,                db_dr1 },
+       { "dr2",        NULL,                db_dr2 },
+       { "dr3",        NULL,                db_dr3 },
+       { "dr4",        NULL,                db_dr4 },
+       { "dr5",        NULL,                db_dr5 },
+       { "dr6",        NULL,                db_dr6 },
+       { "dr7",        NULL,                db_dr7 },
+};
+struct db_variable *db_eregs = db_regs + sizeof(db_regs)/sizeof(db_regs[0]);
+
+/*
+ * Stack trace.
+ */
+#define        INKERNEL(va)    (((vm_offset_t)(va)) >= USRSTACK)
+
+struct i386_frame {
+       struct i386_frame       *f_frame;
+       int                     f_retaddr;
+       int                     f_arg0;
+};
+
+#define NORMAL         0
+#define        TRAP            1
+#define        INTERRUPT       2
+#define        SYSCALL         3
+
+static void    db_nextframe(struct i386_frame **, db_addr_t *);
+static int     db_numargs(struct i386_frame *);
+static void    db_print_stack_entry(const char *, int, char **, int *, db_addr_t);
+
+
+static char    *watchtype_str(int type);
+static int     ki386_set_watch(int watchnum, unsigned int watchaddr, 
+                               int size, int access, struct dbreg * d);
+static int     ki386_clr_watch(int watchnum, struct dbreg * d);
+int            db_md_set_watchpoint(db_expr_t addr, db_expr_t size);
+int            db_md_clr_watchpoint(db_expr_t addr, db_expr_t size);
+void           db_md_list_watchpoints(void);
+
+
+/*
+ * Figure out how many arguments were passed into the frame at "fp".
+ */
+static int
+db_numargs(struct i386_frame *fp)
+{
+       int     *argp;
+       int     inst;
+       int     args;
+
+       argp = (int *)db_get_value((int)&fp->f_retaddr, 4, FALSE);
+       /*
+        * XXX etext is wrong for LKMs.  We should attempt to interpret
+        * the instruction at the return address in all cases.  This
+        * may require better fault handling.
+        */
+       if (argp < (int *)btext || argp >= (int *)etext) {
+               args = 5;
+       } else {
+               inst = db_get_value((int)argp, 4, FALSE);
+               if ((inst & 0xff) == 0x59)      /* popl %ecx */
+                       args = 1;
+               else if ((inst & 0xffff) == 0xc483)     /* addl $Ibs, %esp */
+                       args = ((inst >> 16) & 0xff) / 4;
+               else
+                       args = 5;
+       }
+       return(args);
+}
+
+static void
+db_print_stack_entry(const char *name, int narg, char **argnp, int *argp,
+                    db_addr_t callpc)
+{
+       db_printf("%s(", name);
+       while (narg) {
+               if (argnp)
+                       db_printf("%s=", *argnp++);
+               db_printf("%r", db_get_value((int)argp, 4, FALSE));
+               argp++;
+               if (--narg != 0)
+                       db_printf(",");
+       }
+       db_printf(") at ");
+       db_printsym(callpc, DB_STGY_PROC);
+       db_printf("\n");
+}
+
+/*
+ * Figure out the next frame up in the call stack.
+ */
+static void
+db_nextframe(struct i386_frame **fp, db_addr_t *ip)
+{
+       struct trapframe *tf;
+       int frame_type;
+       int eip, esp, ebp;
+       db_expr_t offset;
+       const char *sym, *name;
+
+       eip = db_get_value((int) &(*fp)->f_retaddr, 4, FALSE);
+       ebp = db_get_value((int) &(*fp)->f_frame, 4, FALSE);
+
+       /*
+        * Figure out frame type.
+        */
+
+       frame_type = NORMAL;
+
+       sym = db_search_symbol(eip, DB_STGY_ANY, &offset);
+       db_symbol_values(sym, &name, NULL);
+       if (name != NULL) {
+               if (!strcmp(name, "calltrap")) {
+                       frame_type = TRAP;
+               } else if (!strncmp(name, "Xresume", 7)) {
+                       frame_type = INTERRUPT;
+               } else if (!strcmp(name, "_Xsyscall")) {
+                       frame_type = SYSCALL;
+               }
+       }
+
+       /*
+        * Normal frames need no special processing.
+        */
+       if (frame_type == NORMAL) {
+               *ip = (db_addr_t) eip;
+               *fp = (struct i386_frame *) ebp;
+               return;
+       }
+
+       db_print_stack_entry(name, 0, 0, 0, eip);
+
+       /*
+        * Point to base of trapframe which is just above the
+        * current frame.
+        */
+       tf = (struct trapframe *) ((int)*fp + 8);
+
+       esp = (ISPL(tf->tf_cs) == SEL_UPL) ?  tf->tf_esp : (int)&tf->tf_esp;
+       switch (frame_type) {
+       case TRAP:
+               if (INKERNEL((int) tf)) {
+                       eip = tf->tf_eip;
+                       ebp = tf->tf_ebp;
+                       db_printf(
+                   "--- trap %#r, eip = %#r, esp = %#r, ebp = %#r ---\n",
+                           tf->tf_trapno, eip, esp, ebp);
+               }
+               break;
+       case SYSCALL:
+               if (INKERNEL((int) tf)) {
+                       eip = tf->tf_eip;
+                       ebp = tf->tf_ebp;
+                       db_printf(
+                   "--- syscall %#r, eip = %#r, esp = %#r, ebp = %#r ---\n",
+                           tf->tf_eax, eip, esp, ebp);
+               }
+               break;
+       case INTERRUPT:
+               tf = (struct trapframe *)((int)*fp + 16);
+               if (INKERNEL((int) tf)) {
+                       eip = tf->tf_eip;
+                       ebp = tf->tf_ebp;
+                       db_printf(
+                   "--- interrupt, eip = %#r, esp = %#r, ebp = %#r ---\n",
+                           eip, esp, ebp);
+               }
+               break;
+       default:
+               break;
+       }
+
+       *ip = (db_addr_t) eip;
+       *fp = (struct i386_frame *) ebp;
+}
+
+void
+db_stack_trace_cmd(db_expr_t addr, boolean_t have_addr, db_expr_t count,
+                  char *modif)
+{
+       struct i386_frame *frame;
+       int *argp;
+       db_addr_t callpc;
+       boolean_t first;
+       int i;
+
+       if (count == -1)
+               count = 1024;
+
+       if (!have_addr) {
+               frame = (struct i386_frame *)BP_REGS(&ddb_regs);
+               if (frame == NULL)
+                       frame = (struct i386_frame *)(SP_REGS(&ddb_regs) - 4);
+               callpc = PC_REGS(&ddb_regs);
+       } else if (!INKERNEL(addr)) {
+#if needswork
+               pid = (addr % 16) + ((addr >> 4) % 16) * 10 +
+                   ((addr >> 8) % 16) * 100 + ((addr >> 12) % 16) * 1000 +
+                   ((addr >> 16) % 16) * 10000;
+               /*
+                * The pcb for curproc is not valid at this point,
+                * so fall back to the default case.
+                */
+               if ((curproc != NULL) && (pid == curproc->p_pid)) {
+                       frame = (struct i386_frame *)BP_REGS(&ddb_regs);
+                       if (frame == NULL)
+                               frame = (struct i386_frame *)
+                                   (SP_REGS(&ddb_regs) - 4);
+                       callpc = PC_REGS(&ddb_regs);
+               } else {
+                       pid_t pid;
+                       struct proc *p;
+                       struct pcb *pcb;
+
+                       p = pfind(pid);
+                       if (p == NULL) {
+                               db_printf("pid %d not found\n", pid);
+                               return;
+                       }
+                       if ((p->p_flag & P_SWAPPEDOUT)) {
+                               db_printf("pid %d swapped out\n", pid);
+                               return;
+                       }
+                       pcb = p->p_thread->td_pcb;
+                       frame = (struct i386_frame *)pcb->pcb_ebp;
+                       if (frame == NULL)
+                               frame = (struct i386_frame *)
+                                   (pcb->pcb_esp - 4);
+                       callpc = (db_addr_t)pcb->pcb_eip;
+               }
+#else
+               /* XXX */
+               db_printf("no kernel stack address\n");
+               return;
+#endif
+       } else {
+               /*
+                * Look for something that might be a frame pointer, just as
+                * a convenience.
+                */
+               frame = (struct i386_frame *)addr;
+               for (i = 0; i < 4096; i += 4) {
+                       struct i386_frame *check;
+
+                       check = (struct i386_frame *)db_get_value((int)((char *)&frame->f_frame + i), 4, FALSE);
+                       if ((char *)check - (char *)frame >= 0 &&
+                           (char *)check - (char *)frame < 4096
+                       ) {
+                               break;
+                       }
+                       db_printf("%p does not look like a stack frame, skipping\n", (char *)&frame->f_frame + i);
+               }
+               if (i == 4096) {
+                       db_printf("Unable to find anything that looks like a stack frame\n");
+                       return;
+               }
+               frame = (void *)((char *)frame + i);
+               db_printf("Trace beginning at frame %p\n", frame);
+               callpc = (db_addr_t)db_get_value((int)&frame->f_retaddr, 4, FALSE);
+       }
+
+       first = TRUE;
+       while (count--) {
+               struct i386_frame *actframe;
+               int             narg;
+               const char *    name;
+               db_expr_t       offset;
+               c_db_sym_t      sym;
+#define MAXNARG        16
+               char    *argnames[MAXNARG], **argnp = NULL;
+
+               sym = db_search_symbol(callpc, DB_STGY_ANY, &offset);
+               db_symbol_values(sym, &name, NULL);
+
+               /*
+                * Attempt to determine a (possibly fake) frame that gives
+                * the caller's pc.  It may differ from `frame' if the
+                * current function never sets up a standard frame or hasn't
+                * set one up yet or has just discarded one.  The last two
+                * cases can be guessed fairly reliably for code generated
+                * by gcc.  The first case is too much trouble to handle in
+                * general because the amount of junk on the stack depends
+                * on the pc (the special handling of "calltrap", etc. in
+                * db_nextframe() works because the `next' pc is special).
+                */
+               actframe = frame;
+               if (first) {
+                       if (!have_addr) {
+                               int instr;
+
+                               instr = db_get_value(callpc, 4, FALSE);
+                               if ((instr & 0x00ffffff) == 0x00e58955) {
+                                       /* pushl %ebp; movl %esp, %ebp */
+                                       actframe = (struct i386_frame *)
+                                           (SP_REGS(&ddb_regs) - 4);
+                               } else if ((instr & 0x0000ffff) == 0x0000e589) {
+                                       /* movl %esp, %ebp */
+                                       actframe = (struct i386_frame *)
+                                           SP_REGS(&ddb_regs);
+                                       if (ddb_regs.tf_ebp == 0) {
+                                               /* Fake caller's frame better. */
+                                               frame = actframe;
+                                       }
+                               } else if ((instr & 0x000000ff) == 0x000000c3) {
+                                       /* ret */
+                                       actframe = (struct i386_frame *)
+                                           (SP_REGS(&ddb_regs) - 4);
+                               } else if (offset == 0) {
+                                       /* Probably a symbol in assembler code. */
+                                       actframe = (struct i386_frame *)
+                                           (SP_REGS(&ddb_regs) - 4);
+                               }
+                       } else if (!strcmp(name, "fork_trampoline")) {
+                               /*
+                                * Don't try to walk back on a stack for a
+                                * process that hasn't actually been run yet.
+                                */
+                               db_print_stack_entry(name, 0, 0, 0, callpc);
+                               break;
+                       }
+                       first = FALSE;
+               }
+
+               argp = &actframe->f_arg0;
+               narg = MAXNARG;
+               if (sym != NULL && db_sym_numargs(sym, &narg, argnames)) {
+                       argnp = argnames;
+               } else {
+                       narg = db_numargs(frame);
+               }
+
+               db_print_stack_entry(name, narg, argnp, argp, callpc);
+
+               if (actframe != frame) {
+                       /* `frame' belongs to caller. */
+                       callpc = (db_addr_t)
+                           db_get_value((int)&actframe->f_retaddr, 4, FALSE);
+                       continue;
+               }
+
+               db_nextframe(&frame, &callpc);
+
+               if (INKERNEL((int) callpc) && !INKERNEL((int) frame)) {
+                       sym = db_search_symbol(callpc, DB_STGY_ANY, &offset);
+                       db_symbol_values(sym, &name, NULL);
+                       db_print_stack_entry(name, 0, 0, 0, callpc);
+                       break;
+               }
+               if (!INKERNEL((int) frame)) {
+                       break;
+               }
+       }
+}
+
+void
+db_print_backtrace(void)
+{
+       register_t  ebp;
+
+       __asm __volatile("movl %%ebp, %0" : "=r" (ebp));
+       db_stack_trace_cmd(ebp, 1, -1, NULL);
+}
+
+#define DB_DRX_FUNC(reg)                                               \
+int                                                                    \
+db_ ## reg (struct db_variable *vp, db_expr_t *valuep, int op)         \
+{                                                                      \
+       if (op == DB_VAR_GET)                                           \
+               *valuep = r ## reg ();                                  \
+       else                                                            \
+               load_ ## reg (*valuep);                                 \
+                                                                       \
+       return(0);                                                      \
+} 
+
+DB_DRX_FUNC(dr0)
+DB_DRX_FUNC(dr1)
+DB_DRX_FUNC(dr2)
+DB_DRX_FUNC(dr3)
+DB_DRX_FUNC(dr4)
+DB_DRX_FUNC(dr5)
+DB_DRX_FUNC(dr6)
+DB_DRX_FUNC(dr7)
+
+static int
+ki386_set_watch(int watchnum, unsigned int watchaddr, int size, int access,
+              struct dbreg *d)
+{
+       int i;
+       unsigned int mask;
+       
+       if (watchnum == -1) {
+               for (i = 0, mask = 0x3; i < 4; i++, mask <<= 2)
+                       if ((d->dr7 & mask) == 0)
+                               break;
+               if (i < 4)
+                       watchnum = i;
+               else
+                       return(-1);
+       }
+       
+       switch (access) {
+       case DBREG_DR7_EXEC:
+               size = 1; /* size must be 1 for an execution breakpoint */
+               /* fall through */
+       case DBREG_DR7_WRONLY:
+       case DBREG_DR7_RDWR:
+               break;
+       default:
+               return(-1);
+       }
+
+       /*
+        * we can watch a 1, 2, or 4 byte sized location
+        */
+       switch (size) {
+       case 1:
+               mask = 0x00;
+               break;
+       case 2:
+               mask = 0x01 << 2;
+               break;
+       case 4:
+               mask = 0x03 << 2;
+               break;
+       default:
+               return(-1);
+       }
+
+       mask |= access;
+
+       /* clear the bits we are about to affect */
+       d->dr7 &= ~((0x3 << (watchnum * 2)) | (0x0f << (watchnum * 4 + 16)));
+
+       /* set drN register to the address, N=watchnum */
+       DBREG_DRX(d, watchnum) = watchaddr;
+
+       /* enable the watchpoint */
+       d->dr7 |= (0x2 << (watchnum * 2)) | (mask << (watchnum * 4 + 16));
+
+       return(watchnum);
+}
+
+
+int
+ki386_clr_watch(int watchnum, struct dbreg *d)
+{
+       if (watchnum < 0 || watchnum >= 4)
+               return(-1);
+       
+       d->dr7 &= ~((0x3 << (watchnum * 2)) | (0x0f << (watchnum * 4 + 16)));
+       DBREG_DRX(d, watchnum) = 0;
+       
+       return(0);
+}
+
+
+int
+db_md_set_watchpoint(db_expr_t addr, db_expr_t size)
+{
+       int avail, wsize;
+       int i;
+       struct dbreg d;
+       
+       fill_dbregs(NULL, &d);
+       
+       avail = 0;
+       for(i=0; i < 4; i++) {
+               if ((d.dr7 & (3 << (i * 2))) == 0)
+                       avail++;
+       }
+       
+       if (avail * 4 < size)
+               return(-1);
+       
+       for (i=0; i < 4 && (size != 0); i++) {
+               if ((d.dr7 & (3 << (i * 2))) == 0) {
+                       if (size > 4)
+                               wsize = 4;
+                       else
+                               wsize = size;
+                       if (wsize == 3)
+                               wsize++;
+                       ki386_set_watch(i, addr, wsize, DBREG_DR7_WRONLY, &d);
+                       addr += wsize;
+                       size -= wsize;
+               }
+       }
+
+       set_dbregs(NULL, &d);
+
+       return(0);
+}
+
+int
+db_md_clr_watchpoint(db_expr_t addr, db_expr_t size)
+{
+       int i;
+       struct dbreg d;
+
+       fill_dbregs(NULL, &d);
+
+       for(i=0; i<4; i++) {
+               if (d.dr7 & (3 << (i * 2))) {
+                       if ((DBREG_DRX((&d), i) >= addr) && 
+                           (DBREG_DRX((&d), i) < addr + size))
+                               ki386_clr_watch(i, &d);
+               }
+       }
+
+       set_dbregs(NULL, &d);
+
+       return(0);
+}
+
+static char *
+watchtype_str(int type)
+{
+       switch (type) {
+       case DBREG_DR7_EXEC:
+               return "execute";
+       case DBREG_DR7_RDWR:
+               return "read/write";
+       case DBREG_DR7_WRONLY:
+               return "write";
+       default:
+               return "invalid";
+       }
+}
+
+void
+db_md_list_watchpoints(void)
+{
+       int i;
+       struct dbreg d;
+
+       fill_dbregs(NULL, &d);
+
+       db_printf("\nhardware watchpoints:\n");
+       db_printf("  watch    status        type  len     address\n"
+                 "  -----  --------  ----------  ---  ----------\n");
+       for (i=0; i < 4; i++) {
+               if (d.dr7 & (0x03 << (i * 2))) {
+                       unsigned type, len;
+                       type = (d.dr7 >> (16 + (i * 4))) & 3;
+                       len =  (d.dr7 >> (16 + (i * 4) + 2)) & 3;
+                       db_printf("  %-5d  %-8s  %10s  %3d  0x%08x\n",
+                                 i, "enabled", watchtype_str(type), 
+                                 len + 1, DBREG_DRX((&d), i));
+               } else {
+                       db_printf("  %-5d  disabled\n", i);
+               }
+       }
+
+       db_printf("\ndebug register values:\n");
+       for (i=0; i < 8; i++)
+               db_printf("  dr%d 0x%08x\n", i, DBREG_DRX((&d),i));
+       db_printf("\n");
+}
index 057e282..36fd01d 100644 (file)
@@ -24,7 +24,7 @@
  * SUCH DAMAGE.
  *
  * $FreeBSD: src/sys/i386/i386/globals.s,v 1.13.2.1 2000/05/16 06:58:06 dillon Exp $
- * $DragonFly: src/sys/platform/vkernel/i386/global.s,v 1.1 2006/12/26 20:46:10 dillon Exp $
+ * $DragonFly: src/sys/platform/vkernel/i386/global.s,v 1.2 2007/01/05 22:18:18 dillon Exp $
  */
 
 #include <machine/asmacros.h>
        .globl  gd_CMAP1, gd_CMAP2, gd_CMAP3, gd_PMAP1
        .globl  gd_CADDR1, gd_CADDR2, gd_CADDR3, gd_PADDR1
        .globl  gd_spending, gd_ipending, gd_fpending
-       .globl  gd_cnt, gd_private_tss
+       .globl  gd_cnt
 
        .set    gd_cpuid,globaldata + GD_CPUID
-       .set    gd_private_tss,globaldata + GD_PRIVATE_TSS
        .set    gd_other_cpus,globaldata + GD_OTHER_CPUS
        .set    gd_ss_eflags,globaldata + GD_SS_EFLAGS
        .set    gd_intr_nesting_level,globaldata + GD_INTR_NESTING_LEVEL
index b3e7c24..9c831c7 100644 (file)
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/platform/vkernel/i386/locore.s,v 1.3 2006/12/04 18:04:01 dillon Exp $
+ * $DragonFly: src/sys/platform/vkernel/i386/locore.s,v 1.4 2007/01/05 22:18:18 dillon Exp $
  */
 
+#include <sys/syscall.h>
 #include <machine/asmacros.h>
+#include <machine/psl.h>
 #include "assym.s"
        
        .globl  kernbase
@@ -57,10 +59,14 @@ NON_GPROF_ENTRY(sigcode)
        call    *SIGF_HANDLER(%esp)             /* call signal handler */
        lea     SIGF_UC(%esp),%eax              /* get ucontext_t */
        pushl   %eax
+#if 0
        testl   $PSL_VM,UC_EFLAGS(%eax)
        jne     9f
+#endif
        movl    UC_GS(%eax),%gs                 /* restore %gs */
+#if 0
 9:
+#endif
        movl    $SYS_sigreturn,%eax
        pushl   %eax                            /* junk to fake return addr. */
        int     $0x80                           /* enter kernel with args */
@@ -69,6 +75,17 @@ NON_GPROF_ENTRY(sigcode)
        ALIGN_TEXT
 esigcode:
 
+/* void reset_dbregs() */
+ENTRY(reset_dbregs)
+        movl    $0,%eax
+        movl    %eax,%dr7     /* disable all breapoints first */
+        movl    %eax,%dr0
+        movl    %eax,%dr1
+        movl    %eax,%dr2
+        movl    %eax,%dr3
+        movl    %eax,%dr6
+        ret
+
        .data
        .globl  szsigcode
 szsigcode:
index f8adc1c..3a35bad 100644 (file)
@@ -36,7 +36,7 @@
  * 
  * from: @(#)npx.c     7.2 (Berkeley) 5/12/91
  * $FreeBSD: src/sys/i386/isa/npx.c,v 1.80.2.3 2001/10/20 19:04:38 tegge Exp $
- * $DragonFly: src/sys/platform/vkernel/i386/npx.c,v 1.1 2007/01/02 04:24:25 dillon Exp $
+ * $DragonFly: src/sys/platform/vkernel/i386/npx.c,v 1.2 2007/01/05 22:18:18 dillon Exp $
  */
 
 #include "opt_debug_npx.h"
@@ -365,12 +365,16 @@ npx_intr(void *dummy)
         * before we entered our critical section.  If that occured, the
         * TS bit will be set and npxthread will be NULL.
         */
+       panic("npx_intr: not coded");
+       /* XXX FP STATE FLAG MUST BE PART OF CONTEXT SUPPLIED BY REAL KERNEL */
+#if 0
        if (rcr0() & CR0_TS) {
                KASSERT(mdcpu->gd_npxthread == NULL, ("gd_npxthread was %p with TS set!", mdcpu->gd_npxthread));
                npxdna();
                crit_exit();
                return;
        }
+#endif
        if (mdcpu->gd_npxthread == NULL) {
                get_mplock();
                kprintf("npxintr: npxthread = %p, curthread = %p\n",
@@ -396,7 +400,7 @@ npx_intr(void *dummy)
         * Pass exception to process.
         */
        frame = (struct intrframe *)&dummy;     /* XXX */
-       if ((ISPL(frame->if_cs) == SEL_UPL) || (frame->if_eflags & PSL_VM)) {
+       if ((ISPL(frame->if_cs) == SEL_UPL) /*||(frame->if_eflags&PSL_VM)*/) {
                /*
                 * Interrupt is essentially a trap, so we can afford to call
                 * the SIGFPE handler (if any) as soon as the interrupt
index 52c50c9..52287af 100644 (file)
@@ -66,7 +66,7 @@
  * SUCH DAMAGE.
  *
  * $FreeBSD: src/sys/i386/i386/swtch.s,v 1.89.2.10 2003/01/23 03:36:24 ps Exp $
- * $DragonFly: src/sys/platform/vkernel/i386/swtch.s,v 1.1 2007/01/02 04:24:25 dillon Exp $
+ * $DragonFly: src/sys/platform/vkernel/i386/swtch.s,v 1.2 2007/01/05 22:18:18 dillon Exp $
  */
 
 #include "use_npx.h"
@@ -212,12 +212,14 @@ ENTRY(cpu_exit_switch)
        /*
         * Get us out of the vmspace
         */
+#if 0
        movl    IdlePTD,%ecx
        movl    %cr3,%eax
        cmpl    %ecx,%eax
        je      1f
        movl    %ecx,%cr3
 1:
+#endif
        movl    PCPU(curthread),%ebx
        /*
         * Switch to the next thread.  RET into the restore function, which
@@ -282,6 +284,7 @@ ENTRY(cpu_heavy_restore)
         * YYY which naturally also means that the PM_ACTIVE bit had better
         * already have been set before we set it above, check? YYY
         */
+#if 0
        movl    %cr3,%esi
        movl    PCB_CR3(%edx),%ecx
        cmpl    %esi,%ecx
@@ -292,6 +295,7 @@ ENTRY(cpu_heavy_restore)
 #endif
        movl    %ecx,%cr3
 4:
+#endif
        /*
         * Clear TDF_RUNNING flag in old thread only after cleaning up
         * %cr3.  The target thread is already protected by being TDF_RUNQ
@@ -300,6 +304,7 @@ ENTRY(cpu_heavy_restore)
        andl    $~TDF_RUNNING,TD_FLAGS(%ebx)
        orl     $TDF_RUNNING,TD_FLAGS(%eax)
 
+#if 0
        /*
         * Deal with the PCB extension, restore the private tss
         */
@@ -344,8 +349,8 @@ ENTRY(cpu_heavy_restore)
        movl    %eax, 4(%ebx)
        movl    $GPROC0_SEL*8, %esi             /* GSEL(entry, SEL_KPL) */
        ltr     %si
-
 3:
+#endif
        /*
         * Restore general registers.
         */
@@ -357,6 +362,7 @@ ENTRY(cpu_heavy_restore)
        movl    PCB_EIP(%edx),%eax
        movl    %eax,(%esp)
 
+#if 0
        /*
         * Restore the user LDT if we have one
         */
@@ -372,12 +378,16 @@ ENTRY(cpu_heavy_restore)
        call    set_user_ldt
        popl    %edx
 2:
+#endif
+#if 0
        /*
         * Restore the user TLS if we have one
         */
        pushl   %edx
        call    set_user_TLS
        popl    %edx
+#endif
+#if 0
        /*
         * Restore the %gs segment register, which must be done after
         * loading the user LDT.  Since user processes can modify the
@@ -388,6 +398,7 @@ ENTRY(cpu_heavy_restore)
        .globl  cpu_switch_load_gs
 cpu_switch_load_gs:
        movl    PCB_GS(%edx),%gs
+#endif
 
        /*
         * Restore the DEBUG register state if necessary.
diff --git a/sys/platform/vkernel/i386/tls.c b/sys/platform/vkernel/i386/tls.c
new file mode 100644 (file)
index 0000000..ab21686
--- /dev/null
@@ -0,0 +1,204 @@
+/*
+ * Copyright (c) 2003,2004 The DragonFly Project.  All rights reserved.
+ * 
+ * This code is derived from software contributed to The DragonFly Project
+ * by David Xu <davidxu@t2t2.com> and Matthew Dillon <dillon@backplane.com>
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific, prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * 
+ * $DragonFly: src/sys/platform/vkernel/i386/tls.c,v 1.1 2007/01/05 22:18:18 dillon Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/sysproto.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/sysent.h>
+#include <sys/sysctl.h>
+#include <sys/tls.h>
+#include <sys/reg.h>
+#include <sys/thread2.h>
+
+#include <machine/cpu.h>
+#include <machine/clock.h>
+#include <machine/specialreg.h>
+#include <machine/md_var.h>
+#include <machine/pcb_ext.h>           /* pcb.h included via sys/user.h */
+#include <machine/globaldata.h>                /* CPU_prvspace */
+#include <machine/smp.h>
+
+/*
+ * set a TLS descriptor and resync the GDT.  A descriptor may be cleared
+ * by passing info=NULL and infosize=0.  Note that hardware limitations may
+ * cause the size passed in tls_info to be approximated. 
+ *
+ * Returns the value userland needs to load into %gs representing the 
+ * TLS descriptor or -1 on error.
+ *
+ * (struct tls_info *info, int infosize, int which)
+ */
+int
+sys_sys_set_tls_area(struct sys_set_tls_area_args *uap)
+{
+       struct tls_info info;
+       struct segment_descriptor *desc;
+       int error;
+       int i;
+
+       /*
+        * Sanity checks
+        */
+       i = uap->which;
+       if (i < 0 || i >= NGTLS)
+               return (ERANGE);
+       if (uap->infosize < 0)
+               return (EINVAL);
+
+       /*
+        * Maintain forwards compatibility with future extensions.
+        */
+       if (uap->infosize != sizeof(info)) {
+               bzero(&info, sizeof(info));
+               error = copyin(uap->info, &info, 
+                               min(sizeof(info), uap->infosize));
+       } else {
+               error = copyin(uap->info, &info, sizeof(info));
+       }
+       if (error)
+               return (error);
+       if (info.size < -1)
+               return (EINVAL);
+       if (info.size > (1 << 20))
+               info.size = (info.size + PAGE_MASK) & ~PAGE_MASK;
+
+       /*
+        * Load the descriptor.  A critical section is required in case
+        * an interrupt thread comes along and switches us out and then back
+        * in.
+        */
+       desc = &curthread->td_tls[i];
+       crit_enter();
+       if (info.size == 0) {
+               bzero(desc, sizeof(*desc));
+       } else {
+               desc->sd_lobase = (intptr_t)info.base;
+               desc->sd_hibase = (intptr_t)info.base >> 24;
+               desc->sd_def32 = 1;
+               desc->sd_type = SDT_MEMRWA;
+               desc->sd_dpl = SEL_UPL;
+               desc->sd_xx = 0;
+               desc->sd_p = 1;
+               if (info.size == -1) {
+                       /*
+                        * A descriptor size of -1 is a hack to map the
+                        * whole address space.  This type of mapping is
+                        * required for direct-tls accesses of variable
+                        * data, e.g. %gs:OFFSET where OFFSET is negative.
+                        */
+                       desc->sd_lolimit = -1;
+                       desc->sd_hilimit = -1;
+                       desc->sd_gran = 1;
+               } else if (info.size >= (1 << 20)) {
+                       /*
+                        * A descriptor size greater then 1MB requires page
+                        * granularity (the lo+hilimit field is only 20 bits)
+                        */
+                       desc->sd_lolimit = info.size >> PAGE_SHIFT;
+                       desc->sd_hilimit = info.size >> (PAGE_SHIFT + 16);
+                       desc->sd_gran = 1;
+               } else {
+                       /*
+                        * Otherwise a byte-granular size is supported.
+                        */
+                       desc->sd_lolimit = info.size;
+                       desc->sd_hilimit = info.size >> 16;
+                       desc->sd_gran = 0;
+               }
+       }
+       crit_exit();
+       uap->sysmsg_result = GSEL(GTLS_START + i, SEL_UPL);
+       set_user_TLS();
+       return(0);
+}
+       
+/*
+ * Return the specified TLS descriptor to userland.
+ *
+ * Returns the value userland needs to load into %gs representing the 
+ * TLS descriptor or -1 on error.
+ *
+ * (struct tls_info *info, int infosize, int which)
+ */
+int
+sys_sys_get_tls_area(struct sys_get_tls_area_args *uap)
+{
+       struct tls_info info;
+       struct segment_descriptor *desc;
+       int error;
+       int i;
+
+       /*
+        * Sanity checks
+        */
+       i = uap->which;
+       if (i < 0 || i >= NGTLS)
+               return (ERANGE);
+       if (uap->infosize < 0)
+               return (EINVAL);
+
+       /*
+        * unpack the descriptor, ENOENT is returned for any descriptor
+        * which has not been loaded.  uap->info may be NULL.
+        */
+       desc = &curthread->td_tls[i];
+       if (desc->sd_p) {
+               if (uap->info && uap->infosize > 0) {
+                       bzero(&info, sizeof(info));
+                       info.base = (void *)(intptr_t)
+                               ((desc->sd_hibase << 24) | desc->sd_lobase);
+                       info.size = (desc->sd_hilimit << 16) | desc->sd_lolimit;
+                       if (desc->sd_gran)
+                               info.size <<= PAGE_SHIFT;
+                       error = copyout(&info, uap->info,
+                                       min(sizeof(info), uap->infosize));
+               } else {
+                       error = 0;
+               }
+               uap->sysmsg_result = GSEL(GTLS_START + i, SEL_UPL);
+       } else {
+               error = ENOENT;
+       }
+       return(error);
+}
+
+void
+set_user_TLS(void)
+{
+       panic("set_user_TLS");
+}
diff --git a/sys/platform/vkernel/i386/trap.c b/sys/platform/vkernel/i386/trap.c
new file mode 100644 (file)
index 0000000..5473d51
--- /dev/null
@@ -0,0 +1,1551 @@
+/*-
+ * Copyright (C) 1994, David Greenman
+ * Copyright (c) 1990, 1993
+ *     The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the University of Utah, and William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *     This product includes software developed by the University of
+ *     California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *     from: @(#)trap.c        7.4 (Berkeley) 5/13/91
+ * $FreeBSD: src/sys/i386/i386/trap.c,v 1.147.2.11 2003/02/27 19:09:59 luoqi Exp $
+ * $DragonFly: src/sys/platform/vkernel/i386/trap.c,v 1.1 2007/01/05 22:18:18 dillon Exp $
+ */
+
+/*
+ * 386 Trap and System call handling
+ */
+
+#include "use_isa.h"
+#include "use_npx.h"
+
+#include "opt_ddb.h"
+#include "opt_ktrace.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/pioctl.h>
+#include <sys/kernel.h>
+#include <sys/resourcevar.h>
+#include <sys/signalvar.h>
+#include <sys/syscall.h>
+#include <sys/sysctl.h>
+#include <sys/sysent.h>
+#include <sys/uio.h>
+#include <sys/vmmeter.h>
+#include <sys/malloc.h>
+#ifdef KTRACE
+#include <sys/ktrace.h>
+#endif
+#include <sys/upcall.h>
+#include <sys/vkernel.h>
+#include <sys/sysproto.h>
+#include <sys/sysunion.h>
+
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <sys/lock.h>
+#include <vm/pmap.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_map.h>
+#include <vm/vm_page.h>
+#include <vm/vm_extern.h>
+
+#include <machine/cpu.h>
+#include <machine/md_var.h>
+#include <machine/pcb.h>
+#include <machine/smp.h>
+#include <machine/tss.h>
+#include <machine/globaldata.h>
+
+#include <machine/vm86.h>
+
+#include <ddb/ddb.h>
+#include <sys/msgport2.h>
+#include <sys/thread2.h>
+
+#ifdef SMP
+
+#define MAKEMPSAFE(have_mplock)                        \
+       if (have_mplock == 0) {                 \
+               get_mplock();                   \
+               have_mplock = 1;                \
+       }
+
+#else
+
+#define MAKEMPSAFE(have_mplock)
+
+#endif
+
+int (*pmath_emulate) (struct trapframe *);
+
+extern void trap (struct trapframe frame);
+extern int trapwrite (unsigned addr);
+extern void syscall2 (struct trapframe frame);
+
+static int trap_pfault (struct trapframe *, int, vm_offset_t);
+static void trap_fatal (struct trapframe *, vm_offset_t);
+void dblfault_handler (void);
+
+#if 0
+extern inthand_t IDTVEC(syscall);
+#endif
+
+#define MAX_TRAP_MSG           28
+static char *trap_msg[] = {
+       "",                                     /*  0 unused */
+       "privileged instruction fault",         /*  1 T_PRIVINFLT */
+       "",                                     /*  2 unused */
+       "breakpoint instruction fault",         /*  3 T_BPTFLT */
+       "",                                     /*  4 unused */
+       "",                                     /*  5 unused */
+       "arithmetic trap",                      /*  6 T_ARITHTRAP */
+       "system forced exception",              /*  7 T_ASTFLT */
+       "",                                     /*  8 unused */
+       "general protection fault",             /*  9 T_PROTFLT */
+       "trace trap",                           /* 10 T_TRCTRAP */
+       "",                                     /* 11 unused */
+       "page fault",                           /* 12 T_PAGEFLT */
+       "",                                     /* 13 unused */
+       "alignment fault",                      /* 14 T_ALIGNFLT */
+       "",                                     /* 15 unused */
+       "",                                     /* 16 unused */
+       "",                                     /* 17 unused */
+       "integer divide fault",                 /* 18 T_DIVIDE */
+       "non-maskable interrupt trap",          /* 19 T_NMI */
+       "overflow trap",                        /* 20 T_OFLOW */
+       "FPU bounds check fault",               /* 21 T_BOUND */
+       "FPU device not available",             /* 22 T_DNA */
+       "double fault",                         /* 23 T_DOUBLEFLT */
+       "FPU operand fetch fault",              /* 24 T_FPOPFLT */
+       "invalid TSS fault",                    /* 25 T_TSSFLT */
+       "segment not present fault",            /* 26 T_SEGNPFLT */
+       "stack fault",                          /* 27 T_STKFLT */
+       "machine check trap",                   /* 28 T_MCHK */
+};
+
+#ifdef DDB
+static int ddb_on_nmi = 1;
+SYSCTL_INT(_machdep, OID_AUTO, ddb_on_nmi, CTLFLAG_RW,
+       &ddb_on_nmi, 0, "Go to DDB on NMI");
+#endif
+static int panic_on_nmi = 1;
+SYSCTL_INT(_machdep, OID_AUTO, panic_on_nmi, CTLFLAG_RW,
+       &panic_on_nmi, 0, "Panic on NMI");
+static int fast_release;
+SYSCTL_INT(_machdep, OID_AUTO, fast_release, CTLFLAG_RW,
+       &fast_release, 0, "Passive Release was optimal");
+static int slow_release;
+SYSCTL_INT(_machdep, OID_AUTO, slow_release, CTLFLAG_RW,
+       &slow_release, 0, "Passive Release was nonoptimal");
+#ifdef SMP
+static int syscall_mpsafe = 0;
+SYSCTL_INT(_kern, OID_AUTO, syscall_mpsafe, CTLFLAG_RW,
+       &syscall_mpsafe, 0, "Allow MPSAFE marked syscalls to run without BGL");
+TUNABLE_INT("kern.syscall_mpsafe", &syscall_mpsafe);
+static int trap_mpsafe = 0;
+SYSCTL_INT(_kern, OID_AUTO, trap_mpsafe, CTLFLAG_RW,
+       &trap_mpsafe, 0, "Allow traps to mostly run without the BGL");
+TUNABLE_INT("kern.trap_mpsafe", &trap_mpsafe);
+#endif
+
+MALLOC_DEFINE(M_SYSMSG, "sysmsg", "sysmsg structure");
+extern int max_sysmsg;
+
+/*
+ * Passive USER->KERNEL transition.  This only occurs if we block in the
+ * kernel while still holding our userland priority.  We have to fixup our
+ * priority in order to avoid potential deadlocks before we allow the system
+ * to switch us to another thread.
+ */
+static void
+passive_release(struct thread *td)
+{
+       struct lwp *lp = td->td_lwp;
+
+       td->td_release = NULL;
+       lwkt_setpri_self(TDPRI_KERN_USER);
+       lp->lwp_proc->p_usched->release_curproc(lp);
+}
+
+/*
+ * userenter() passively intercepts the thread switch function to increase
+ * the thread priority from a user priority to a kernel priority, reducing
+ * syscall and trap overhead for the case where no switch occurs.
+ */
+
+static __inline void
+userenter(struct thread *curtd)
+{
+       curtd->td_release = passive_release;
+}
+
+/*
+ * Handle signals, upcalls, profiling, and other AST's and/or tasks that
+ * must be completed before we can return to or try to return to userland.
+ *
+ * Note that td_sticks is a 64 bit quantity, but there's no point doing 64
+ * arithmatic on the delta calculation so the absolute tick values are
+ * truncated to an integer.
+ */
+static void
+userret(struct lwp *lp, struct trapframe *frame, int sticks)
+{
+       struct proc *p = lp->lwp_proc;
+       int sig;
+
+       /*
+        * Charge system time if profiling.  Note: times are in microseconds.
+        * This may do a copyout and block, so do it first even though it
+        * means some system time will be charged as user time.
+        */
+       if (p->p_flag & P_PROFIL) {
+               addupc_task(p, frame->tf_eip, 
+                       (u_int)((int)lp->lwp_thread->td_sticks - sticks));
+       }
+
+recheck:
+       /*
+        * Block here if we are in a stopped state.
+        */
+       if (p->p_flag & P_STOPPED) {
+               get_mplock();
+               tstop(p);
+               rel_mplock();
+               goto recheck;
+       }
+
+       /*
+        * Post any pending upcalls
+        */
+       if (p->p_flag & P_UPCALLPEND) {
+               p->p_flag &= ~P_UPCALLPEND;
+               get_mplock();
+               postupcall(lp);
+               rel_mplock();
+               goto recheck;
+       }
+
+       /*
+        * Post any pending signals
+        */
+       if ((sig = CURSIG(p)) != 0) {
+               get_mplock();
+               postsig(sig);
+               rel_mplock();
+               goto recheck;
+       }
+
+       /*
+        * block here if we are swapped out, but still process signals
+        * (such as SIGKILL).  proc0 (the swapin scheduler) is already
+        * aware of our situation, we do not have to wake it up.
+        */
+       if (p->p_flag & P_SWAPPEDOUT) {
+               get_mplock();
+               p->p_flag |= P_SWAPWAIT;
+               swapin_request();
+               if (p->p_flag & P_SWAPWAIT)
+                       tsleep(p, PCATCH, "SWOUT", 0);
+               p->p_flag &= ~P_SWAPWAIT;
+               rel_mplock();
+               goto recheck;
+       }
+}
+
+/*
+ * Cleanup from userenter and any passive release that might have occured.
+ * We must reclaim the current-process designation before we can return
+ * to usermode.  We also handle both LWKT and USER reschedule requests.
+ */
+static __inline void
+userexit(struct lwp *lp)
+{
+       struct thread *td = lp->lwp_thread;
+       globaldata_t gd = td->td_gd;
+
+#if 0
+       /*
+        * If a user reschedule is requested force a new process to be
+        * chosen by releasing the current process.  Our process will only
+        * be chosen again if it has a considerably better priority.
+        */
+       if (user_resched_wanted())
+               lp->lwp_proc->p_usched->release_curproc(lp);
+#endif
+
+       /*
+        * Handle a LWKT reschedule request first.  Since our passive release
+        * is still in place we do not have to do anything special.
+        */
+       if (lwkt_resched_wanted())
+               lwkt_switch();
+
+       /*
+        * Acquire the current process designation for this user scheduler
+        * on this cpu.  This will also handle any user-reschedule requests.
+        */
+       lp->lwp_proc->p_usched->acquire_curproc(lp);
+       /* We may have switched cpus on acquisition */
+       gd = td->td_gd;
+
+       /*
+        * Reduce our priority in preparation for a return to userland.  If
+        * our passive release function was still in place, our priority was
+        * never raised and does not need to be reduced.
+        */
+       if (td->td_release == NULL)
+               lwkt_setpri_self(TDPRI_USER_NORM);
+       td->td_release = NULL;
+
+       /*
+        * After reducing our priority there might be other kernel-level
+        * LWKTs that now have a greater priority.  Run them as necessary.
+        * We don't have to worry about losing cpu to userland because
+        * we still control the current-process designation and we no longer
+        * have a passive release function installed.
+        */
+       if (lwkt_checkpri_self())
+               lwkt_switch();
+}
+
+/*
+ * Exception, fault, and trap interface to the kernel.
+ * This common code is called from assembly language IDT gate entry
+ * routines that prepare a suitable stack frame, and restore this
+ * frame after the exception has been processed.
+ *
+ * This function is also called from doreti in an interlock to handle ASTs.
+ * For example:  hardwareint->INTROUTINE->(set ast)->doreti->trap
+ *
+ * NOTE!  We have to retrieve the fault address prior to obtaining the
+ * MP lock because get_mplock() may switch out.  YYY cr2 really ought
+ * to be retrieved by the assembly code, not here.
+ *
+ * XXX gd_trap_nesting_level currently prevents lwkt_switch() from panicing
+ * if an attempt is made to switch from a fast interrupt or IPI.  This is
+ * necessary to properly take fatal kernel traps on SMP machines if 
+ * get_mplock() has to block.
+ */
+
+void
+trap(struct trapframe frame)
+{
+       struct globaldata *gd = mycpu;
+       struct thread *td = gd->gd_curthread;
+       struct lwp *lp = td->td_lwp;
+       struct proc *p;
+       int sticks = 0;
+       int i = 0, ucode = 0, type, code;
+#ifdef SMP
+       int have_mplock = 0;
+#endif
+#ifdef INVARIANTS
+       int crit_count = td->td_pri & ~TDPRI_MASK;
+#endif
+       vm_offset_t eva;
+
+       p = td->td_proc;
+#ifdef DDB
+       if (db_active) {
+               eva = (frame.tf_trapno == T_PAGEFLT ? rcr2() : 0);
+               ++gd->gd_trap_nesting_level;
+               MAKEMPSAFE(have_mplock);
+               trap_fatal(&frame, eva);
+               --gd->gd_trap_nesting_level;
+               goto out2;
+       }
+#endif
+
+       eva = 0;
+       ++gd->gd_trap_nesting_level;
+       if (frame.tf_trapno == T_PAGEFLT) {
+               /*
+                * For some Cyrix CPUs, %cr2 is clobbered by interrupts.
+                * This problem is worked around by using an interrupt
+                * gate for the pagefault handler.  We are finally ready
+                * to read %cr2 and then must reenable interrupts.
+                *
+                * XXX this should be in the switch statement, but the
+                * NO_FOOF_HACK and VM86 goto and ifdefs obfuscate the
+                * flow of control too much for this to be obviously
+                * correct.
+                */
+               eva = rcr2();
+               cpu_enable_intr();
+       }
+#ifdef SMP
+       if (trap_mpsafe == 0)
+               MAKEMPSAFE(have_mplock);
+#endif
+
+       --gd->gd_trap_nesting_level;
+
+       if (!(frame.tf_eflags & PSL_I)) {
+               /*
+                * Buggy application or kernel code has disabled interrupts
+                * and then trapped.  Enabling interrupts now is wrong, but
+                * it is better than running with interrupts disabled until
+                * they are accidentally enabled later.
+                */
+               type = frame.tf_trapno;
+               if (ISPL(frame.tf_cs)==SEL_UPL /*||(frame.tf_eflags&PSL_VM)*/) {
+                       MAKEMPSAFE(have_mplock);
+                       kprintf(
+                           "pid %ld (%s): trap %d with interrupts disabled\n",
+                           (long)curproc->p_pid, curproc->p_comm, type);
+               } else if (type != T_BPTFLT && type != T_TRCTRAP) {
+                       /*
+                        * XXX not quite right, since this may be for a
+                        * multiple fault in user mode.
+                        */
+                       MAKEMPSAFE(have_mplock);
+                       kprintf("kernel trap %d with interrupts disabled\n",
+                           type);
+               }
+               cpu_enable_intr();
+       }
+
+#if defined(I586_CPU) && !defined(NO_F00F_HACK)
+restart:
+#endif
+       type = frame.tf_trapno;
+       code = frame.tf_err;
+
+#if 0
+       if (in_vm86call) {
+               ASSERT_MP_LOCK_HELD(curthread);
+               if (frame.tf_eflags & PSL_VM &&
+                   (type == T_PROTFLT || type == T_STKFLT)) {
+#ifdef SMP
+                       KKASSERT(td->td_mpcount > 0);
+#endif
+                       i = vm86_emulate((struct vm86frame *)&frame);
+#ifdef SMP
+                       KKASSERT(td->td_mpcount > 0);
+#endif
+                       if (i != 0) {
+                               /*
+                                * returns to original process
+                                */
+#ifdef SMP
+                               vm86_trap((struct vm86frame *)&frame,
+                                         have_mplock);
+#else
+                               vm86_trap((struct vm86frame *)&frame, 0);
+#endif
+                               KKASSERT(0); /* NOT REACHED */
+                       }
+                       goto out2;
+               }
+               switch (type) {
+                       /*
+                        * these traps want either a process context, or
+                        * assume a normal userspace trap.
+                        */
+               case T_PROTFLT:
+               case T_SEGNPFLT:
+                       trap_fatal(&frame, eva);
+                       goto out2;
+               case T_TRCTRAP:
+                       type = T_BPTFLT;        /* kernel breakpoint */
+                       /* FALL THROUGH */
+               }
+               goto kernel_trap;       /* normal kernel trap handling */
+       }
+#endif
+
+        if ((ISPL(frame.tf_cs) == SEL_UPL) /*||(frame.tf_eflags & PSL_VM)*/) {
+               /* user trap */
+
+               userenter(td);
+
+               sticks = (int)td->td_sticks;
+               lp->lwp_md.md_regs = &frame;
+
+               switch (type) {
+               case T_PRIVINFLT:       /* privileged instruction fault */
+                       ucode = type;
+                       i = SIGILL;
+                       break;
+
+               case T_BPTFLT:          /* bpt instruction fault */
+               case T_TRCTRAP:         /* trace trap */
+                       frame.tf_eflags &= ~PSL_T;
+                       i = SIGTRAP;
+                       break;
+
+               case T_ARITHTRAP:       /* arithmetic trap */
+                       ucode = code;
+                       i = SIGFPE;
+                       break;
+
+               case T_ASTFLT:          /* Allow process switch */
+                       mycpu->gd_cnt.v_soft++;
+                       if (mycpu->gd_reqflags & RQF_AST_OWEUPC) {
+                               atomic_clear_int_nonlocked(&mycpu->gd_reqflags,
+                                           RQF_AST_OWEUPC);
+                               addupc_task(p, p->p_prof.pr_addr,
+                                           p->p_prof.pr_ticks);
+                       }
+                       goto out;
+
+                       /*
+                        * The following two traps can happen in
+                        * vm86 mode, and, if so, we want to handle
+                        * them specially.
+                        */
+               case T_PROTFLT:         /* general protection fault */
+               case T_STKFLT:          /* stack fault */
+#if 0
+                       if (frame.tf_eflags & PSL_VM) {
+                               i = vm86_emulate((struct vm86frame *)&frame);
+                               if (i == 0)
+                                       goto out;
+                               break;
+                       }
+#endif
+                       /* FALL THROUGH */
+
+               case T_SEGNPFLT:        /* segment not present fault */
+               case T_TSSFLT:          /* invalid TSS fault */
+               case T_DOUBLEFLT:       /* double fault */
+               default:
+                       ucode = code + BUS_SEGM_FAULT ;
+                       i = SIGBUS;
+                       break;
+
+               case T_PAGEFLT:         /* page fault */
+                       MAKEMPSAFE(have_mplock);
+                       i = trap_pfault(&frame, TRUE, eva);
+                       if (i == -1)
+                               goto out;
+#if defined(I586_CPU) && !defined(NO_F00F_HACK)
+                       if (i == -2)
+                               goto restart;
+#endif
+                       if (i == 0)
+                               goto out;
+
+                       ucode = T_PAGEFLT;
+
+                       /*
+                        * The code is lost because tf_err is overwritten
+                        * with the fault address.  Store it in the upper
+                        * 16 bits of tf_trapno for vkernel consumption.
+                        */
+                       if (p->p_vkernel && p->p_vkernel->vk_current) {
+                               frame.tf_trapno |= (code << 16);
+                       }
+                       break;
+
+               case T_DIVIDE:          /* integer divide fault */
+                       ucode = FPE_INTDIV;
+                       i = SIGFPE;
+                       break;
+
+#if NISA > 0
+               case T_NMI:
+                       MAKEMPSAFE(have_mplock);
+                       /* machine/parity/power fail/"kitchen sink" faults */
+                       if (isa_nmi(code) == 0) {
+#ifdef DDB
+                               /*
+                                * NMI can be hooked up to a pushbutton
+                                * for debugging.
+                                */
+                               if (ddb_on_nmi) {
+                                       kprintf ("NMI ... going to debugger\n");
+                                       kdb_trap (type, 0, &frame);
+                               }
+#endif /* DDB */
+                               goto out2;
+                       } else if (panic_on_nmi)
+                               panic("NMI indicates hardware failure");
+                       break;
+#endif /* NISA > 0 */
+
+               case T_OFLOW:           /* integer overflow fault */
+                       ucode = FPE_INTOVF;
+                       i = SIGFPE;
+                       break;
+
+               case T_BOUND:           /* bounds check fault */
+                       ucode = FPE_FLTSUB;
+                       i = SIGFPE;
+                       break;
+
+               case T_DNA:
+#if NNPX > 0
+                       /* 
+                        * The kernel may have switched out the FP unit's
+                        * state, causing the user process to take a fault
+                        * when it tries to use the FP unit.  Restore the
+                        * state here
+                        */
+                       if (npxdna())
+                               goto out;
+#endif
+                       if (!pmath_emulate) {
+                               i = SIGFPE;
+                               ucode = FPE_FPU_NP_TRAP;
+                               break;
+                       }
+                       i = (*pmath_emulate)(&frame);
+                       if (i == 0) {
+                               if (!(frame.tf_eflags & PSL_T))
+                                       goto out2;
+                               frame.tf_eflags &= ~PSL_T;
+                               i = SIGTRAP;
+                       }
+                       /* else ucode = emulator_only_knows() XXX */
+                       break;
+
+               case T_FPOPFLT:         /* FPU operand fetch fault */
+                       ucode = T_FPOPFLT;
+                       i = SIGILL;
+                       break;
+
+               case T_XMMFLT:          /* SIMD floating-point exception */
+                       ucode = 0; /* XXX */
+                       i = SIGFPE;
+                       break;
+               }
+       } else {
+#if 0
+kernel_trap:
+#endif
+               /* kernel trap */
+
+               switch (type) {
+               case T_PAGEFLT:                 /* page fault */
+                       MAKEMPSAFE(have_mplock);
+                       trap_pfault(&frame, FALSE, eva);
+                       goto out2;
+
+               case T_DNA:
+#if NNPX > 0
+                       /*
+                        * The kernel may be using npx for copying or other
+                        * purposes.
+                        */
+                       if (npxdna())
+                               goto out2;
+#endif
+                       break;
+
+               case T_PROTFLT:         /* general protection fault */
+               case T_SEGNPFLT:        /* segment not present fault */
+                       /*
+                        * Invalid segment selectors and out of bounds
+                        * %eip's and %esp's can be set up in user mode.
+                        * This causes a fault in kernel mode when the
+                        * kernel tries to return to user mode.  We want
+                        * to get this fault so that we can fix the
+                        * problem here and not have to check all the
+                        * selectors and pointers when the user changes
+                        * them.
+                        */
+#define        MAYBE_DORETI_FAULT(where, whereto)                              \
+       do {                                                            \
+               if (frame.tf_eip == (int)where) {                       \
+                       frame.tf_eip = (int)whereto;                    \
+                       goto out2;                                      \
+               }                                                       \
+       } while (0)
+
+#if 0
+                       /*
+                        * Since we don't save %gs across an interrupt
+                        * frame this check must occur outside the intr
+                        * nesting level check.
+                        */
+                       if (frame.tf_eip == (int)cpu_switch_load_gs) {
+                               td->td_pcb->pcb_gs = 0;
+                               MAKEMPSAFE(have_mplock);
+                               ksignal(p, SIGBUS);
+                               goto out2;
+                       }
+#endif
+                       if (mycpu->gd_intr_nesting_level == 0) {
+#if 0
+                               /*
+                                * Invalid %fs's and %gs's can be created using
+                                * procfs or PT_SETREGS or by invalidating the
+                                * underlying LDT entry.  This causes a fault
+                                * in kernel mode when the kernel attempts to
+                                * switch contexts.  Lose the bad context
+                                * (XXX) so that we can continue, and generate
+                                * a signal.
+                                */
+                               MAYBE_DORETI_FAULT(doreti_iret,
+                                                  doreti_iret_fault);
+                               MAYBE_DORETI_FAULT(doreti_popl_ds,
+                                                  doreti_popl_ds_fault);
+                               MAYBE_DORETI_FAULT(doreti_popl_es,
+                                                  doreti_popl_es_fault);
+                               MAYBE_DORETI_FAULT(doreti_popl_fs,
+                                                  doreti_popl_fs_fault);
+#endif
+                               if (td->td_pcb->pcb_onfault) {
+                                       frame.tf_eip = 
+                                           (register_t)td->td_pcb->pcb_onfault;
+                                       goto out2;
+                               }
+                       }
+                       break;
+
+               case T_TSSFLT:
+                       /*
+                        * PSL_NT can be set in user mode and isn't cleared
+                        * automatically when the kernel is entered.  This
+                        * causes a TSS fault when the kernel attempts to
+                        * `iret' because the TSS link is uninitialized.  We
+                        * want to get this fault so that we can fix the
+                        * problem here and not every time the kernel is
+                        * entered.
+                        */
+                       if (frame.tf_eflags & PSL_NT) {
+                               frame.tf_eflags &= ~PSL_NT;
+                               goto out2;
+                       }
+                       break;
+
+               case T_TRCTRAP:  /* trace trap */
+#if 0
+                       if (frame.tf_eip == (int)IDTVEC(syscall)) {
+                               /*
+                                * We've just entered system mode via the
+                                * syscall lcall.  Continue single stepping
+                                * silently until the syscall handler has
+                                * saved the flags.
+                                */
+                               goto out2;
+                       }
+                       if (frame.tf_eip == (int)IDTVEC(syscall) + 1) {
+                               /*
+                                * The syscall handler has now saved the
+                                * flags.  Stop single stepping it.
+                                */
+                               frame.tf_eflags &= ~PSL_T;
+                               goto out2;
+                       }
+#endif
+#if 0
+                        /*
+                         * Ignore debug register trace traps due to
+                         * accesses in the user's address space, which
+                         * can happen under several conditions such as
+                         * if a user sets a watchpoint on a buffer and
+                         * then passes that buffer to a system call.
+                         * We still want to get TRCTRAPS for addresses
+                         * in kernel space because that is useful when
+                         * debugging the kernel.
+                         */
+                        if (user_dbreg_trap()) {
+                                /*
+                                 * Reset breakpoint bits because the
+                                 * processor doesn't
+                                 */
+                                load_dr6(rdr6() & 0xfffffff0);
+                                goto out2;
+                        }
+#endif
+                       /*
+                        * Fall through (TRCTRAP kernel mode, kernel address)
+                        */
+               case T_BPTFLT:
+                       /*
+                        * If DDB is enabled, let it handle the debugger trap.
+                        * Otherwise, debugger traps "can't happen".
+                        */
+#ifdef DDB
+                       MAKEMPSAFE(have_mplock);
+                       if (kdb_trap (type, 0, &frame))
+                               goto out2;
+#endif
+                       break;
+
+#if NISA > 0
+               case T_NMI:
+                       MAKEMPSAFE(have_mplock);
+#ifdef POWERFAIL_NMI
+#ifndef TIMER_FREQ
+#  define TIMER_FREQ 1193182
+#endif
+       handle_powerfail:
+               {
+                 static unsigned lastalert = 0;
+
+                 if(time_second - lastalert > 10)
+                   {
+                     log(LOG_WARNING, "NMI: power fail\n");
+                     sysbeep(TIMER_FREQ/880, hz);
+                     lastalert = time_second;
+                   }
+                   /* YYY mp count */
+                 goto out2;
+               }
+#else /* !POWERFAIL_NMI */
+                       /* machine/parity/power fail/"kitchen sink" faults */
+                       if (isa_nmi(code) == 0) {
+#ifdef DDB
+                               /*
+                                * NMI can be hooked up to a pushbutton
+                                * for debugging.
+                                */
+                               if (ddb_on_nmi) {
+                                       kprintf ("NMI ... going to debugger\n");
+                                       kdb_trap (type, 0, &frame);
+                               }
+#endif /* DDB */
+                               goto out2;
+                       } else if (panic_on_nmi == 0)
+                               goto out2;
+                       /* FALL THROUGH */
+#endif /* POWERFAIL_NMI */
+#endif /* NISA > 0 */
+               }
+
+               MAKEMPSAFE(have_mplock);
+               trap_fatal(&frame, eva);
+               goto out2;
+       }
+
+       /*
+        * Virtual kernel intercept - if the fault is directly related to a
+        * VM context managed by a virtual kernel then let the virtual kernel
+        * handle it.
+        */
+       if (p->p_vkernel && p->p_vkernel->vk_current) {
+               vkernel_trap(p, &frame);
+               goto out;
+       }
+
+       /*
+        * Translate fault for emulators (e.g. Linux) 
+        */
+       if (*p->p_sysent->sv_transtrap)
+               i = (*p->p_sysent->sv_transtrap)(i, type);
+
+       MAKEMPSAFE(have_mplock);
+       trapsignal(p, i, ucode);
+
+#ifdef DEBUG
+       if (type <= MAX_TRAP_MSG) {
+               uprintf("fatal process exception: %s",
+                       trap_msg[type]);
+               if ((type == T_PAGEFLT) || (type == T_PROTFLT))
+                       uprintf(", fault VA = 0x%lx", (u_long)eva);
+               uprintf("\n");
+       }
+#endif
+
+out:
+#ifdef SMP
+        if (ISPL(frame.tf_cs) == SEL_UPL)
+               KASSERT(td->td_mpcount == have_mplock, ("badmpcount trap/end from %p", (void *)frame.tf_eip));
+#endif
+       userret(lp, &frame, sticks);
+       userexit(lp);
+out2:  ;
+#ifdef SMP
+       if (have_mplock)
+               rel_mplock();
+#endif
+#ifdef INVARIANTS
+       KASSERT(crit_count == (td->td_pri & ~TDPRI_MASK),
+               ("syscall: critical section count mismatch! %d/%d",
+               crit_count / TDPRI_CRIT, td->td_pri / TDPRI_CRIT));
+#endif
+}
+
+#ifdef notyet
+/*
+ * This version doesn't allow a page fault to user space while
+ * in the kernel. The rest of the kernel needs to be made "safe"
+ * before this can be used. I think the only things remaining
+ * to be made safe is the process tracing/debugging code.
+ */
+static int
+trap_pfault(struct trapframe *frame, int usermode, vm_offset_t eva)
+{
+       vm_offset_t va;
+       struct vmspace *vm = NULL;
+       vm_map_t map = 0;
+       int rv = 0;
+       vm_prot_t ftype;
+       thread_t td = curthread;
+       struct proc *p = td->td_proc;   /* may be NULL */
+
+       if (frame->tf_err & PGEX_W)
+               ftype = VM_PROT_WRITE;
+       else
+               ftype = VM_PROT_READ;
+
+       va = trunc_page(eva);
+       if (va < KvaStart) {
+               vm_offset_t v;
+               vm_page_t mpte;
+
+               if (p == NULL ||
+                   (!usermode && va < VM_MAX_USER_ADDRESS &&
+                    (td->td_gd->gd_intr_nesting_level != 0 || 
+                     td->td_pcb->pcb_onfault == NULL))) {
+                       trap_fatal(frame, eva);
+                       return (-1);
+               }
+
+               /*
+                * This is a fault on non-kernel virtual memory.
+                * vm is initialized above to NULL. If curproc is NULL
+                * or curproc->p_vmspace is NULL the fault is fatal.
+                */
+               vm = p->p_vmspace;
+               if (vm == NULL)
+                       goto nogo;
+
+               map = &vm->vm_map;
+
+               /*
+                * Keep swapout from messing with us during this
+                *      critical time.
+                */
+               ++p->p_lock;
+
+               /*
+                * Grow the stack if necessary
+                */
+               /* grow_stack returns false only if va falls into
+                * a growable stack region and the stack growth
+                * fails.  It returns true if va was not within
+                * a growable stack region, or if the stack 
+                * growth succeeded.
+                */
+               if (!grow_stack (p, va)) {
+                       rv = KERN_FAILURE;
+                       --p->p_lock;
+                       goto nogo;
+               }
+               
+               /* Fault in the user page: */
+               rv = vm_fault(map, va, ftype,
+                             (ftype & VM_PROT_WRITE) ? VM_FAULT_DIRTY
+                                                     : VM_FAULT_NORMAL);
+
+               --p->p_lock;
+       } else {
+               /*
+                * Don't allow user-mode faults in kernel address space.
+                */
+               if (usermode)
+                       goto nogo;
+
+               /*
+                * Since we know that kernel virtual address addresses
+                * always have pte pages mapped, we just have to fault
+                * the page.
+                */
+               rv = vm_fault(&kernel_map, va, ftype, VM_FAULT_NORMAL);
+       }
+
+       if (rv == KERN_SUCCESS)
+               return (0);
+nogo:
+       if (!usermode) {
+               if (mtd->td_gd->gd_intr_nesting_level == 0 && 
+                   td->td_pcb->pcb_onfault) {
+                       frame->tf_eip = (register_t)td->td_pcb->pcb_onfault;
+                       return (0);
+               }
+               trap_fatal(frame, eva);
+               return (-1);
+       }
+
+       /* kludge to pass faulting virtual address to sendsig */
+       frame->tf_err = eva;
+
+       return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV);
+}
+#endif
+
+int
+trap_pfault(struct trapframe *frame, int usermode, vm_offset_t eva)
+{
+       vm_offset_t va;
+       struct vmspace *vm = NULL;
+       vm_map_t map = 0;
+       int rv = 0;
+       vm_prot_t ftype;
+       thread_t td = curthread;
+       struct proc *p = td->td_proc;
+
+       va = trunc_page(eva);
+       if (va >= KERNBASE) {
+               /*
+                * Don't allow user-mode faults in kernel address space.
+                * An exception:  if the faulting address is the invalid
+                * instruction entry in the IDT, then the Intel Pentium
+                * F00F bug workaround was triggered, and we need to
+                * treat it is as an illegal instruction, and not a page
+                * fault.
+                */
+               if (usermode)
+                       goto nogo;
+
+               map = &kernel_map;
+       } else {
+               /*
+                * This is a fault on non-kernel virtual memory.
+                * vm is initialized above to NULL. If curproc is NULL
+                * or curproc->p_vmspace is NULL the fault is fatal.
+                */
+               if (p != NULL)
+                       vm = p->p_vmspace;
+
+               if (vm == NULL)
+                       goto nogo;
+
+               map = &vm->vm_map;
+       }
+
+       if (frame->tf_err & PGEX_W)
+               ftype = VM_PROT_WRITE;
+       else
+               ftype = VM_PROT_READ;
+
+       if (map != &kernel_map) {
+               /*
+                * Keep swapout from messing with us during this
+                *      critical time.
+                */
+               ++p->p_lock;
+
+               /*
+                * Grow the stack if necessary
+                */
+               /* grow_stack returns false only if va falls into
+                * a growable stack region and the stack growth
+                * fails.  It returns true if va was not within
+                * a growable stack region, or if the stack 
+                * growth succeeded.
+                */
+               if (!grow_stack (p, va)) {
+                       rv = KERN_FAILURE;
+                       --p->p_lock;
+                       goto nogo;
+               }
+
+               /* Fault in the user page: */
+               rv = vm_fault(map, va, ftype,
+                             (ftype & VM_PROT_WRITE) ? VM_FAULT_DIRTY
+                                                     : VM_FAULT_NORMAL);
+
+               --p->p_lock;
+       } else {
+               /*
+                * Don't have to worry about process locking or stacks in the kernel.
+                */
+               rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
+       }
+
+       if (rv == KERN_SUCCESS)
+               return (0);
+nogo:
+       if (!usermode) {
+               if (td->td_gd->gd_intr_nesting_level == 0 &&
+                   td->td_pcb->pcb_onfault) {
+                       frame->tf_eip = (register_t)td->td_pcb->pcb_onfault;
+                       return (0);
+               }
+               trap_fatal(frame, eva);
+               return (-1);
+       }
+
+       /* kludge to pass faulting virtual address to sendsig */
+       frame->tf_err = eva;
+
+       return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV);
+}
+
+static void
+trap_fatal(struct trapframe *frame, vm_offset_t eva)
+{
+       int code, type, ss, esp;
+
+       code = frame->tf_err;
+       type = frame->tf_trapno;
+
+       if (type <= MAX_TRAP_MSG)
+               kprintf("\n\nFatal trap %d: %s while in %s mode\n",
+                       type, trap_msg[type],
+                       /*frame->tf_eflags & PSL_VM ? "vm86" :*/
+                       ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel");
+#ifdef SMP
+       /* three separate prints in case of a trap on an unmapped page */
+       kprintf("mp_lock = %08x; ", mp_lock);
+       kprintf("cpuid = %d; ", mycpu->gd_cpuid);
+       kprintf("lapic.id = %08x\n", lapic.id);
+#endif
+       if (type == T_PAGEFLT) {
+               kprintf("fault virtual address  = 0x%x\n", eva);
+               kprintf("fault code             = %s %s, %s\n",
+                       code & PGEX_U ? "user" : "supervisor",
+                       code & PGEX_W ? "write" : "read",
+                       code & PGEX_P ? "protection violation" : "page not present");
+       }
+       kprintf("instruction pointer    = 0x%x:0x%x\n",
+              frame->tf_cs & 0xffff, frame->tf_eip);
+        if ((ISPL(frame->tf_cs) == SEL_UPL) /*||(frame->tf_eflags&PSL_VM)*/) {
+               ss = frame->tf_ss & 0xffff;
+               esp = frame->tf_esp;
+       } else {
+               ss = GSEL(GDATA_SEL, SEL_KPL);
+               esp = (int)&frame->tf_esp;
+       }
+       kprintf("stack pointer          = 0x%x:0x%x\n", ss, esp);
+       kprintf("frame pointer          = 0x%x:0x%x\n", ss, frame->tf_ebp);
+       kprintf("processor eflags       = ");
+       if (frame->tf_eflags & PSL_T)
+               kprintf("trace trap, ");
+       if (frame->tf_eflags & PSL_I)
+               kprintf("interrupt enabled, ");
+       if (frame->tf_eflags & PSL_NT)
+               kprintf("nested task, ");
+       if (frame->tf_eflags & PSL_RF)
+               kprintf("resume, ");
+#if 0
+       if (frame->tf_eflags & PSL_VM)
+               kprintf("vm86, ");
+#endif
+       kprintf("IOPL = %d\n", (frame->tf_eflags & PSL_IOPL) >> 12);
+       kprintf("current process                = ");
+       if (curproc) {
+               kprintf("%lu (%s)\n",
+                   (u_long)curproc->p_pid, curproc->p_comm ?
+                   curproc->p_comm : "");
+       } else {
+               kprintf("Idle\n");
+       }
+       kprintf("current thread          = pri %d ", curthread->td_pri);
+       if (curthread->td_pri >= TDPRI_CRIT)
+               kprintf("(CRIT)");
+       kprintf("\n");
+#ifdef SMP
+/**
+ *  XXX FIXME:
+ *     we probably SHOULD have stopped the other CPUs before now!
+ *     another CPU COULD have been touching cpl at this moment...
+ */
+       kprintf(" <- SMP: XXX");
+#endif
+       kprintf("\n");
+
+#ifdef KDB
+       if (kdb_trap(&psl))
+               return;
+#endif
+#ifdef DDB
+       if ((debugger_on_panic || db_active) && kdb_trap(type, code, frame))
+               return;
+#endif
+       kprintf("trap number            = %d\n", type);
+       if (type <= MAX_TRAP_MSG)
+               panic("%s", trap_msg[type]);
+       else
+               panic("unknown/reserved trap");
+}
+
+/*
+ * Double fault handler. Called when a fault occurs while writing
+ * a frame for a trap/exception onto the stack. This usually occurs
+ * when the stack overflows (such is the case with infinite recursion,
+ * for example).
+ *
+ * XXX Note that the current PTD gets replaced by IdlePTD when the
+ * task switch occurs. This means that the stack that was active at
+ * the time of the double fault is not available at <kstack> unless
+ * the machine was idle when the double fault occurred. The downside
+ * of this is that "trace <ebp>" in ddb won't work.
+ */
+void
+dblfault_handler(void)
+{
+       struct mdglobaldata *gd = mdcpu;
+
+       kprintf("\nFatal double fault:\n");
+       kprintf("eip = 0x%x\n", gd->gd_common_tss.tss_eip);
+       kprintf("esp = 0x%x\n", gd->gd_common_tss.tss_esp);
+       kprintf("ebp = 0x%x\n", gd->gd_common_tss.tss_ebp);
+#ifdef SMP
+       /* three separate prints in case of a trap on an unmapped page */
+       kprintf("mp_lock = %08x; ", mp_lock);
+       kprintf("cpuid = %d; ", mycpu->gd_cpuid);
+       kprintf("lapic.id = %08x\n", lapic.id);
+#endif
+       panic("double fault");
+}
+
+/*
+ * Compensate for 386 brain damage (missing URKR).
+ * This is a little simpler than the pagefault handler in trap() because
+ * it the page tables have already been faulted in and high addresses
+ * are thrown out early for other reasons.
+ */
+int
+trapwrite(unsigned addr)
+{
+       struct proc *p;
+       vm_offset_t va;
+       struct vmspace *vm;
+       int rv;
+
+       va = trunc_page((vm_offset_t)addr);
+       /*
+        * XXX - MAX is END.  Changed > to >= for temp. fix.
+        */
+       if (va >= VM_MAX_USER_ADDRESS)
+               return (1);
+
+       p = curproc;
+       vm = p->p_vmspace;
+
+       ++p->p_lock;
+
+       if (!grow_stack (p, va)) {
+               --p->p_lock;
+               return (1);
+       }
+
+       /*
+        * fault the data page
+        */
+       rv = vm_fault(&vm->vm_map, va, VM_PROT_WRITE, VM_FAULT_DIRTY);
+
+       --p->p_lock;
+
+       if (rv != KERN_SUCCESS)
+               return 1;
+
+       return (0);
+}
+
+/*
+ *     syscall2 -      MP aware system call request C handler
+ *
+ *     A system call is essentially treated as a trap except that the
+ *     MP lock is not held on entry or return.  We are responsible for
+ *     obtaining the MP lock if necessary and for handling ASTs
+ *     (e.g. a task switch) prior to return.
+ *
+ *     In general, only simple access and manipulation of curproc and
+ *     the current stack is allowed without having to hold MP lock.
+ *
+ *     MPSAFE - note that large sections of this routine are run without
+ *              the MP lock.
+ */
+
+void
+syscall2(struct trapframe frame)
+{
+       struct thread *td = curthread;
+       struct proc *p = td->td_proc;
+       struct lwp *lp = td->td_lwp;
+       caddr_t params;
+       struct sysent *callp;
+       register_t orig_tf_eflags;
+       int sticks;
+       int error;
+       int narg;
+#ifdef INVARIANTS
+       int crit_count = td->td_pri & ~TDPRI_MASK;
+#endif
+#ifdef SMP
+       int have_mplock = 0;
+#endif
+       u_int code;
+       union sysunion args;
+
+#ifdef DIAGNOSTIC
+       if (ISPL(frame.tf_cs) != SEL_UPL) {
+               get_mplock();
+               panic("syscall");
+               /* NOT REACHED */
+       }
+#endif
+
+#ifdef SMP
+       KASSERT(td->td_mpcount == 0, ("badmpcount syscall2 from %p", (void *)frame.tf_eip));
+       if (syscall_mpsafe == 0)
+               MAKEMPSAFE(have_mplock);
+#endif
+       userenter(td);          /* lazy raise our priority */
+
+       /*
+        * Misc
+        */
+       sticks = (int)td->td_sticks;
+       orig_tf_eflags = frame.tf_eflags;
+
+       /*
+        * Virtual kernel intercept - if a VM context managed by a virtual
+        * kernel issues a system call the virtual kernel handles it, not us.
+        * Restore the virtual kernel context and return from its system
+        * call.  The current frame is copied out to the virtual kernel.
+        */
+       if (p->p_vkernel && p->p_vkernel->vk_current) {
+               error = vkernel_trap(p, &frame);
+               frame.tf_eax = error;
+               if (error)
+                       frame.tf_eflags |= PSL_C;
+               error = EJUSTRETURN;
+               goto out;
+       }
+
+       /*
+        * Get the system call parameters and account for time
+        */
+       lp->lwp_md.md_regs = &frame;
+       params = (caddr_t)frame.tf_esp + sizeof(int);
+       code = frame.tf_eax;
+
+       if (p->p_sysent->sv_prepsyscall) {
+               (*p->p_sysent->sv_prepsyscall)(
+                       &frame, (int *)(&args.nosys.sysmsg + 1),
+                       &code, &params);
+       } else {
+               /*
+                * Need to check if this is a 32 bit or 64 bit syscall.
+                * fuword is MP aware.
+                */
+               if (code == SYS_syscall) {
+                       /*
+                        * Code is first argument, followed by actual args.
+                        */
+                       code = fuword(params);
+                       params += sizeof(int);
+               } else if (code == SYS___syscall) {
+                       /*
+                        * Like syscall, but code is a quad, so as to maintain
+                        * quad alignment for the rest of the arguments.
+                        */
+                       code = fuword(params);
+                       params += sizeof(quad_t);
+               }
+       }
+
+       code &= p->p_sysent->sv_mask;
+       if (code >= p->p_sysent->sv_size)
+               callp = &p->p_sysent->sv_table[0];
+       else
+               callp = &p->p_sysent->sv_table[code];
+
+       narg = callp->sy_narg & SYF_ARGMASK;
+
+       /*
+        * copyin is MP aware, but the tracing code is not
+        */
+       if (narg && params) {
+               error = copyin(params, (caddr_t)(&args.nosys.sysmsg + 1),
+                               narg * sizeof(register_t));
+               if (error) {
+#ifdef KTRACE
+                       if (KTRPOINT(td, KTR_SYSCALL)) {
+                               MAKEMPSAFE(have_mplock);
+                               
+                               ktrsyscall(p, code, narg,
+                                       (void *)(&args.nosys.sysmsg + 1));
+                       }
+#endif
+                       goto bad;
+               }
+       }
+
+#ifdef KTRACE
+       if (KTRPOINT(td, KTR_SYSCALL)) {
+               MAKEMPSAFE(have_mplock);
+               ktrsyscall(p, code, narg, (void *)(&args.nosys.sysmsg + 1));
+       }
+#endif
+
+       /*
+        * For traditional syscall code edx is left untouched when 32 bit
+        * results are returned.  Since edx is loaded from fds[1] when the 
+        * system call returns we pre-set it here.
+        */
+       args.sysmsg_fds[0] = 0;
+       args.sysmsg_fds[1] = frame.tf_edx;
+
+       /*
+        * The syscall might manipulate the trap frame. If it does it
+        * will probably return EJUSTRETURN.
+        */
+       args.sysmsg_frame = &frame;
+
+       STOPEVENT(p, S_SCE, narg);      /* MP aware */
+
+#ifdef SMP
+       /*
+        * Try to run the syscall without the MP lock if the syscall
+        * is MP safe.  We have to obtain the MP lock no matter what if 
+        * we are ktracing
+        */
+       if ((callp->sy_narg & SYF_MPSAFE) == 0)
+               MAKEMPSAFE(have_mplock);
+#endif
+
+       error = (*callp->sy_call)(&args);
+
+out:
+       /*
+        * MP SAFE (we may or may not have the MP lock at this point)
+        */
+       switch (error) {
+       case 0:
+               /*
+                * Reinitialize proc pointer `p' as it may be different
+                * if this is a child returning from fork syscall.
+                */
+               p = curproc;
+               lp = curthread->td_lwp;
+               frame.tf_eax = args.sysmsg_fds[0];
+               frame.tf_edx = args.sysmsg_fds[1];
+               frame.tf_eflags &= ~PSL_C;
+               break;
+       case ERESTART:
+               /*
+                * Reconstruct pc, assuming lcall $X,y is 7 bytes,
+                * int 0x80 is 2 bytes. We saved this in tf_err.
+                */
+               frame.tf_eip -= frame.tf_err;
+               break;
+       case EJUSTRETURN:
+               break;
+       case EASYNC:
+               panic("Unexpected EASYNC return value (for now)");
+       default:
+bad:
+               if (p->p_sysent->sv_errsize) {
+                       if (error >= p->p_sysent->sv_errsize)
+                               error = -1;     /* XXX */
+                       else
+                               error = p->p_sysent->sv_errtbl[error];
+               }
+               frame.tf_eax = error;
+               frame.tf_eflags |= PSL_C;
+               break;
+       }
+
+       /*
+        * Traced syscall.  trapsignal() is not MP aware.
+        */
+       if ((orig_tf_eflags & PSL_T) /*&& !(orig_tf_eflags & PSL_VM)*/) {
+               MAKEMPSAFE(have_mplock);
+               frame.tf_eflags &= ~PSL_T;
+               trapsignal(p, SIGTRAP, 0);
+       }
+
+       /*
+        * Handle reschedule and other end-of-syscall issues
+        */
+       userret(lp, &frame, sticks);
+
+#ifdef KTRACE
+       if (KTRPOINT(td, KTR_SYSRET)) {
+               MAKEMPSAFE(have_mplock);
+               ktrsysret(p, code, error, args.sysmsg_result);
+       }
+#endif
+
+       /*
+        * This works because errno is findable through the
+        * register set.  If we ever support an emulation where this
+        * is not the case, this code will need to be revisited.
+        */
+       STOPEVENT(p, S_SCX, code);
+
+       userexit(lp);
+#ifdef SMP
+       /*
+        * Release the MP lock if we had to get it
+        */
+       KASSERT(td->td_mpcount == have_mplock, 
+               ("badmpcount syscall2/end from %p", (void *)frame.tf_eip));
+       if (have_mplock)
+               rel_mplock();
+#endif
+#ifdef INVARIANTS
+       KASSERT(crit_count == (td->td_pri & ~TDPRI_MASK), 
+               ("syscall: critical section count mismatch! %d/%d",
+               crit_count / TDPRI_CRIT, td->td_pri / TDPRI_CRIT));
+#endif
+}
+
+/*
+ * Simplified back end of syscall(), used when returning from fork()
+ * directly into user mode.  MP lock is held on entry and should be
+ * released on return.  This code will return back into the fork
+ * trampoline code which then runs doreti.
+ */
+void
+fork_return(struct lwp *lp, struct trapframe frame)
+{
+       struct proc *p = lp->lwp_proc;
+
+       frame.tf_eax = 0;               /* Child returns zero */
+       frame.tf_eflags &= ~PSL_C;      /* success */
+       frame.tf_edx = 1;
+
+       /*
+        * Newly forked processes are given a kernel priority.  We have to
+        * adjust the priority to a normal user priority and fake entry
+        * into the kernel (call userenter()) to install a passive release
+        * function just in case userret() decides to stop the process.  This
+        * can occur when ^Z races a fork.  If we do not install the passive
+        * release function the current process designation will not be
+        * released when the thread goes to sleep.
+        */
+       lwkt_setpri_self(TDPRI_USER_NORM);
+       userenter(lp->lwp_thread);
+       userret(lp, &frame, 0);
+#ifdef KTRACE
+       if (KTRPOINT(lp->lwp_thread, KTR_SYSRET))
+               ktrsysret(p, SYS_fork, 0, 0);
+#endif
+       p->p_flag |= P_PASSIVE_ACQ;
+       userexit(lp);
+       p->p_flag &= ~P_PASSIVE_ACQ;
+#ifdef SMP
+       KKASSERT(lp->lwp_thread->td_mpcount == 1);
+       rel_mplock();
+#endif
+}
similarity index 78%
copy from sys/platform/vkernel/include/md_var.h
copy to sys/platform/vkernel/i386/userldt.c
index 628dac2..c2f3ad6 100644 (file)
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/platform/vkernel/include/md_var.h,v 1.2 2007/01/02 04:24:26 dillon Exp $
+ * $DragonFly: src/sys/platform/vkernel/i386/userldt.c,v 1.1 2007/01/05 22:18:18 dillon Exp $
  */
 
-#ifndef _MACHINE_MD_VAR_H_
-#define _MACHINE_MD_VAR_H_
-
-#ifndef _SYS_TYPES_H_
 #include <sys/types.h>
-#endif
-#ifndef _SYS_VKERNEL_H_
-#include <sys/vkernel.h>
-#endif
-
-extern char    sigcode[];
-extern int     szsigcode;
-extern vpte_t  *KernelPTA;
-extern vpte_t  *KernelPTD;
-extern vm_offset_t crashdumpmap;
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <machine/pcb.h>
+#include <machine/pcb_ext.h>
 
-struct mdglobaldata;
+void
+set_user_ldt (struct pcb *pcb)
+{
+       panic("set_user_ldt");
+}
 
-void cpu_gdinit (struct mdglobaldata *gd, int cpu);
-void cpu_idle_restore (void);
+struct pcb_ldt *
+user_ldt_alloc (struct pcb *pcb, int len)
+{
+       panic("user_ldt_alloc");
+}
 
-#endif
+void
+user_ldt_free (struct pcb *pcb)
+{
+       panic("user_ldt_free");
+}
 
diff --git a/sys/platform/vkernel/i386/vm_machdep.c b/sys/platform/vkernel/i386/vm_machdep.c
new file mode 100644 (file)
index 0000000..5ddfe72
--- /dev/null
@@ -0,0 +1,398 @@
+/*-
+ * Copyright (c) 1982, 1986 The Regents of the University of California.
+ * Copyright (c) 1989, 1990 William Jolitz
+ * Copyright (c) 1994 John Dyson
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Systems Programming Group of the University of Utah Computer
+ * Science Department, and William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *     This product includes software developed by the University of
+ *     California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *     from: @(#)vm_machdep.c  7.3 (Berkeley) 5/13/91
+ *     Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
+ * $FreeBSD: src/sys/i386/i386/vm_machdep.c,v 1.132.2.9 2003/01/25 19:02:23 dillon Exp $
+ * $DragonFly: src/sys/platform/vkernel/i386/vm_machdep.c,v 1.1 2007/01/05 22:18:18 dillon Exp $
+ */
+
+#include "use_npx.h"
+#include "use_isa.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/proc.h>
+#include <sys/buf.h>
+#include <sys/interrupt.h>
+#include <sys/vnode.h>
+#include <sys/vmmeter.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
+#include <sys/unistd.h>
+
+#include <machine/clock.h>
+#include <machine/cpu.h>
+#include <machine/md_var.h>
+#include <machine/smp.h>
+#include <machine/pcb.h>
+#include <machine/pcb_ext.h>
+#include <machine/vm86.h>
+#include <machine/segments.h>
+#include <machine/globaldata.h>        /* npxthread */
+
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <sys/lock.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_page.h>
+#include <vm/vm_map.h>
+#include <vm/vm_extern.h>
+
+#include <sys/user.h>
+#include <sys/thread2.h>
+
+#include <bus/isa/i386/isa.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+
+char machine[] = MACHINE_CPU;
+SYSCTL_STRING(_hw, HW_MACHINE, machine, CTLFLAG_RD,
+             machine, 0, "Machine class");
+
+/*
+ * Finish a fork operation, with lwp lp2 nearly set up.
+ * Copy and update the pcb, set up the stack so that the child
+ * ready to run and return to user mode.
+ */
+void
+cpu_fork(struct lwp *lp1, struct lwp *lp2, int flags)
+{
+       struct pcb *pcb2;
+
+       if ((flags & RFPROC) == 0) {
+               if ((flags & RFMEM) == 0) {
+                       /* unshare user LDT */
+                       struct pcb *pcb1 = lp1->lwp_thread->td_pcb;
+                       struct pcb_ldt *pcb_ldt = pcb1->pcb_ldt;
+                       if (pcb_ldt && pcb_ldt->ldt_refcnt > 1) {
+                               pcb_ldt = user_ldt_alloc(pcb1,pcb_ldt->ldt_len);
+                               user_ldt_free(pcb1);
+                               pcb1->pcb_ldt = pcb_ldt;
+                               set_user_ldt(pcb1);
+                       }
+               }
+               return;
+       }
+
+#if NNPX > 0
+       /* Ensure that lp1's pcb is up to date. */
+       if (mdcpu->gd_npxthread == lp1->lwp_thread)
+               npxsave(lp1->lwp_thread->td_savefpu);
+#endif
+       
+       /*
+        * Copy lp1's PCB.  This really only applies to the
+        * debug registers and FP state, but its faster to just copy the
+        * whole thing.  Because we only save the PCB at switchout time,
+        * the register state (including pcb_gs) may not be current.
+        */
+       pcb2 = lp2->lwp_thread->td_pcb;
+       *pcb2 = *lp1->lwp_thread->td_pcb;
+
+       /*
+        * Create a new fresh stack for the new process.
+        * Copy the trap frame for the return to user mode as if from a
+        * syscall.  This copies the user mode register values.  The
+        * 16 byte offset saves space for vm86, and must match 
+        * common_tss.esp0 (kernel stack pointer on entry from user mode)
+        *
+        * pcb_esp must allocate an additional call-return pointer below
+        * the trap frame which will be restored by cpu_restore from
+        * PCB_EIP, and the thread's td_sp pointer must allocate an
+        * additonal two worsd below the pcb_esp call-return pointer to
+        * hold the LWKT restore function pointer and eflags.
+        *
+        * The LWKT restore function pointer must be set to cpu_restore,
+        * which is our standard heavy weight process switch-in function.
+        * YYY eventually we should shortcut fork_return and fork_trampoline
+        * to use the LWKT restore function directly so we can get rid of
+        * all the extra crap we are setting up.
+        */
+       lp2->lwp_md.md_regs = (struct trapframe *)((char *)pcb2 - 16) - 1;
+       bcopy(lp1->lwp_md.md_regs, lp2->lwp_md.md_regs, sizeof(*lp2->lwp_md.md_regs));
+
+       /*
+        * Set registers for trampoline to user mode.  Leave space for the
+        * return address on stack.  These are the kernel mode register values.
+        */
+       pcb2->pcb_cr3 = vtophys(vmspace_pmap(lp2->lwp_proc->p_vmspace)->pm_pdir);
+       pcb2->pcb_edi = 0;
+       pcb2->pcb_esi = (int)fork_return;       /* fork_trampoline argument */
+       pcb2->pcb_ebp = 0;
+       pcb2->pcb_esp = (int)lp2->lwp_md.md_regs - sizeof(void *);
+       pcb2->pcb_ebx = (int)lp2;               /* fork_trampoline argument */
+       pcb2->pcb_eip = (int)fork_trampoline;
+       lp2->lwp_thread->td_sp = (char *)(pcb2->pcb_esp - sizeof(void *));
+       *(u_int32_t *)lp2->lwp_thread->td_sp = PSL_USER;
+       lp2->lwp_thread->td_sp -= sizeof(void *);
+       *(void **)lp2->lwp_thread->td_sp = (void *)cpu_heavy_restore;
+
+       /*
+        * Segment registers.
+        */
+       pcb2->pcb_gs = rgs();
+
+       /*
+        * pcb2->pcb_ldt:       duplicated below, if necessary.
+        * pcb2->pcb_savefpu:   cloned above.
+        * pcb2->pcb_flags:     cloned above (always 0 here?).
+        * pcb2->pcb_onfault:   cloned above (always NULL here?).
+        */
+
+       /*
+        * XXX don't copy the i/o pages.  this should probably be fixed.
+        */
+       pcb2->pcb_ext = 0;
+
+        /* Copy the LDT, if necessary. */
+        if (pcb2->pcb_ldt != 0) {
+               if (flags & RFMEM) {
+                       pcb2->pcb_ldt->ldt_refcnt++;
+               } else {
+                       pcb2->pcb_ldt = user_ldt_alloc(pcb2,
+                               pcb2->pcb_ldt->ldt_len);
+               }
+        }
+       bcopy(&lp1->lwp_thread->td_tls, &lp2->lwp_thread->td_tls,
+             sizeof(lp2->lwp_thread->td_tls));
+       /*
+        * Now, cpu_switch() can schedule the new process.
+        * pcb_esp is loaded pointing to the cpu_switch() stack frame
+        * containing the return address when exiting cpu_switch.
+        * This will normally be to fork_trampoline(), which will have
+        * %ebx loaded with the new proc's pointer.  fork_trampoline()
+        * will set up a stack to call fork_return(p, frame); to complete
+        * the return to user-mode.
+        */
+}
+
+/*
+ * Intercept the return address from a freshly forked process that has NOT
+ * been scheduled yet.
+ *
+ * This is needed to make kernel threads stay in kernel mode.
+ */
+void
+cpu_set_fork_handler(struct lwp *lp, void (*func)(void *), void *arg)
+{
+       /*
+        * Note that the trap frame follows the args, so the function
+        * is really called like this:  func(arg, frame);
+        */
+       lp->lwp_thread->td_pcb->pcb_esi = (int) func;   /* function */
+       lp->lwp_thread->td_pcb->pcb_ebx = (int) arg;    /* first arg */
+}
+
+void
+cpu_set_thread_handler(thread_t td, void (*rfunc)(void), void *func, void *arg)
+{
+       td->td_pcb->pcb_esi = (int)func;
+       td->td_pcb->pcb_ebx = (int) arg;
+       td->td_switch = cpu_lwkt_switch;
+       td->td_sp -= sizeof(void *);
+       *(void **)td->td_sp = rfunc;    /* exit function on return */
+       td->td_sp -= sizeof(void *);
+       *(void **)td->td_sp = cpu_kthread_restore;
+}
+
+void
+cpu_proc_exit(void)
+{
+       struct thread *td = curthread;
+       struct pcb *pcb;
+       struct pcb_ext *ext;
+
+#if NNPX > 0
+       npxexit();
+#endif /* NNPX */
+
+       /*
+        * If we were using a private TSS do a forced-switch to ourselves
+        * to switch back to the common TSS before freeing it.
+        */
+       pcb = td->td_pcb;
+       if ((ext = pcb->pcb_ext) != NULL) {
+               crit_enter();
+               pcb->pcb_ext = NULL;
+               td->td_switch(td);
+               crit_exit();
+               kmem_free(&kernel_map, (vm_offset_t)ext, ctob(IOPAGES + 1));
+       }
+       user_ldt_free(pcb);
+        if (pcb->pcb_flags & PCB_DBREGS) {
+                /*
+                 * disable all hardware breakpoints
+                 */
+                reset_dbregs();
+                pcb->pcb_flags &= ~PCB_DBREGS;
+        }
+       td->td_gd->gd_cnt.v_swtch++;
+
+       crit_enter_quick(td);
+       lwkt_deschedule_self(td);
+       lwkt_remove_tdallq(td);
+       cpu_thread_exit();
+}
+
+/*
+ * Terminate the current thread.  The caller must have already acquired
+ * the thread's rwlock and placed it on a reap list or otherwise notified
+ * a reaper of its existance.  We set a special assembly switch function which
+ * releases td_rwlock after it has cleaned up the MMU state and switched
+ * out the stack.
+ *
+ * Must be caller from a critical section and with the thread descheduled.
+ */
+void
+cpu_thread_exit(void)
+{
+       curthread->td_switch = cpu_exit_switch;
+       curthread->td_flags |= TDF_EXITING;
+       lwkt_switch();
+       panic("cpu_exit");
+}
+
+/*
+ * Process Reaper.  Called after the caller has acquired the thread's
+ * rwlock and removed it from the reap list.
+ */
+void
+cpu_proc_wait(struct proc *p)
+{
+       struct thread *td;
+
+       /* drop per-process resources */
+       td = pmap_dispose_proc(p);
+       if (td)
+               lwkt_free_thread(td);
+}
+
+/*
+ * Dump the machine specific header information at the start of a core dump.
+ */
+int
+cpu_coredump(struct thread *td, struct vnode *vp, struct ucred *cred)
+{
+       struct proc *p = td->td_proc;
+       int error;
+       caddr_t tempuser;
+
+       KKASSERT(p);
+       tempuser = kmalloc(ctob(UPAGES), M_TEMP, M_WAITOK);
+       if (!tempuser)
+               return EINVAL;
+       
+       bzero(tempuser, ctob(UPAGES));
+       bcopy(p->p_addr, tempuser, sizeof(struct user));
+       bcopy(p->p_md.md_regs,
+             tempuser + ((caddr_t) p->p_md.md_regs - (caddr_t) p->p_addr),
+             sizeof(struct trapframe));
+       bcopy(p->p_thread->td_pcb, tempuser + ((char *)p->p_thread->td_pcb - (char *)p->p_addr), sizeof(struct pcb));
+
+       error = vn_rdwr(UIO_WRITE, vp, (caddr_t) tempuser, ctob(UPAGES),
+                       (off_t)0, UIO_SYSSPACE, IO_UNIT, cred, (int *)NULL);
+
+       kfree(tempuser, M_TEMP);
+       
+       return error;
+}
+
+#ifdef notyet
+static void
+setredzone(u_short *pte, caddr_t vaddr)
+{
+/* eventually do this by setting up an expand-down stack segment
+   for ss0: selector, allowing stack access down to top of u.
+   this means though that protection violations need to be handled
+   thru a double fault exception that must do an integral task
+   switch to a known good context, within which a dump can be
+   taken. a sensible scheme might be to save the initial context
+   used by sched (that has physical memory mapped 1:1 at bottom)
+   and take the dump while still in mapped mode */
+}
+#endif
+
+/*
+ * Convert kernel VA to physical address
+ */
+vm_paddr_t
+kvtop(void *addr)
+{
+       vm_paddr_t pa;
+
+       pa = pmap_kextract((vm_offset_t)addr);
+       if (pa == 0)
+               panic("kvtop: zero page frame");
+       return (pa);
+}
+
+int
+grow_stack(struct proc *p, u_int sp)
+{
+       int rv;
+
+       rv = vm_map_growstack (p, sp);
+       if (rv != KERN_SUCCESS)
+               return (0);
+
+       return (1);
+}
+
+SYSCTL_DECL(_vm_stats_misc);
+
+static int cnt_prezero;
+
+SYSCTL_INT(_vm_stats_misc, OID_AUTO,
+       cnt_prezero, CTLFLAG_RD, &cnt_prezero, 0, "");
+
+/*
+ * Tell whether this address is in some physical memory region.
+ * Currently used by the kernel coredump code in order to avoid
+ * dumping the ``ISA memory hole'' which could cause indefinite hangs,
+ * or other unpredictable behaviour.
+ */
+
+int
+is_physical_memory(vm_offset_t addr)
+{
+       return 1;
+}
+
index 793c8fb..350c21b 100644 (file)
@@ -28,7 +28,7 @@
  *     should not include this file.
  *
  * $FreeBSD: src/sys/i386/include/globaldata.h,v 1.11.2.1 2000/05/16 06:58:10 dillon Exp $
- * $DragonFly: src/sys/platform/vkernel/include/globaldata.h,v 1.3 2007/01/02 04:24:26 dillon Exp $
+ * $DragonFly: src/sys/platform/vkernel/include/globaldata.h,v 1.4 2007/01/05 22:18:19 dillon Exp $
  */
 
 #ifndef _MACHINE_GLOBALDATA_H_
@@ -83,7 +83,7 @@ struct mdglobaldata {
        int             gd_spending;    /* software interrupt pending */
        int             gd_sdelayed;    /* delayed software ints */
        int             gd_currentldt;
-       int             gd_private_tss;
+       int             unused000;
        u_int           unused001;
        u_int           gd_other_cpus;
        u_int           gd_ss_eflags;
index 628dac2..26de6e4 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/platform/vkernel/include/md_var.h,v 1.2 2007/01/02 04:24:26 dillon Exp $
+ * $DragonFly: src/sys/platform/vkernel/include/md_var.h,v 1.3 2007/01/05 22:18:19 dillon Exp $
  */
 
 #ifndef _MACHINE_MD_VAR_H_
@@ -49,11 +49,20 @@ extern      int     szsigcode;
 extern vpte_t  *KernelPTA;
 extern vpte_t  *KernelPTD;
 extern vm_offset_t crashdumpmap;
+extern  int    cpu_fxsr;
 
 struct mdglobaldata;
 
+vpte_t *pmap_kpte(vm_offset_t va);
 void cpu_gdinit (struct mdglobaldata *gd, int cpu);
-void cpu_idle_restore (void);
+
+void cpu_heavy_restore(void);  /* cannot be called from C */
+void cpu_lwkt_restore(void);    /* cannot be called from C */
+void cpu_idle_restore(void);    /* cannot be called from C */
+void cpu_kthread_restore(void);        /* cannot be called from C */
+void cpu_exit_switch (struct thread *next);
+void cpu_setregs (void);
+void cpu_idle (void);
 
 #endif
 
diff --git a/sys/platform/vkernel/include/pcb_ext.h b/sys/platform/vkernel/include/pcb_ext.h
new file mode 100644 (file)
index 0000000..59d0e61
--- /dev/null
@@ -0,0 +1,76 @@
+/*-
+ * Copyright (c) 1997 Jonathan Lemon
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: src/sys/i386/include/pcb_ext.h,v 1.4 1999/12/29 04:33:04 peter Exp $
+ * $DragonFly: src/sys/platform/vkernel/include/pcb_ext.h,v 1.1 2007/01/05 22:18:19 dillon Exp $
+ */
+
+#ifndef _MACHINE_PCB_EXT_H_
+#define _MACHINE_PCB_EXT_H_
+
+#ifndef _SYS_TYPES_H_
+#include <sys/types.h>
+#endif
+
+/*
+ * Extension to the 386 process control block
+ */
+#ifndef _MACHINE_TSS_H_
+#include <machine/tss.h>
+#endif
+#ifndef _MACHINE_VM86_H_
+#include <machine/vm86.h>
+#endif
+#ifndef _MACHINE_SEGMENTS_H_
+#include <machine/segments.h>
+#endif
+
+struct pcb_ext {
+       struct  segment_descriptor ext_tssd;    /* tss descriptor */
+       struct  i386tss ext_tss;        /* per-process i386tss */
+       caddr_t ext_iomap;              /* i/o permission bitmap */
+       struct  vm86_kernel ext_vm86;   /* vm86 area */
+};
+
+struct pcb_ldt {
+       caddr_t ldt_base;
+       int     ldt_len;
+       int     ldt_refcnt;
+       u_long  ldt_active;
+       struct  segment_descriptor ldt_sd;
+};
+
+#ifdef _KERNEL
+
+struct pcb;
+
+void set_user_ldt (struct pcb *);
+struct pcb_ldt *user_ldt_alloc (struct pcb *, int);
+void user_ldt_free (struct pcb *);
+void set_user_TLS (void);
+
+#endif
+
+#endif /* _MACHINE_PCB_EXT_H_ */
diff --git a/sys/platform/vkernel/platform/busdma_machdep.c b/sys/platform/vkernel/platform/busdma_machdep.c
new file mode 100644 (file)
index 0000000..f153c1f
--- /dev/null
@@ -0,0 +1,900 @@
+/*
+ * Copyright (c) 1997, 1998 Justin T. Gibbs.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions, and the following disclaimer,
+ *    without modification, immediately at the beginning of the file.
+ * 2. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: src/sys/i386/i386/busdma_machdep.c,v 1.16.2.2 2003/01/23 00:55:27 scottl Exp $
+ * $DragonFly: src/sys/platform/vkernel/platform/busdma_machdep.c,v 1.1 2007/01/05 22:18:20 dillon Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/uio.h>
+#include <sys/thread2.h>
+#include <sys/bus_dma.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+
+/* XXX needed for to access pmap to convert per-proc virtual to physical */
+#include <sys/proc.h>
+#include <sys/lock.h>
+#include <vm/vm_map.h>
+
+#include <machine/md_var.h>
+
+#define MAX_BPAGES 128
+
+struct bus_dma_tag {
+       bus_dma_tag_t     parent;
+       bus_size_t        alignment;
+       bus_size_t        boundary;
+       bus_addr_t        lowaddr;
+       bus_addr_t        highaddr;
+       bus_dma_filter_t *filter;
+       void             *filterarg;
+       bus_size_t        maxsize;
+       u_int             nsegments;
+       bus_size_t        maxsegsz;
+       int               flags;
+       int               ref_count;
+       int               map_count;
+       bus_dma_segment_t *segments;
+};
+
+struct bounce_page {
+       vm_offset_t     vaddr;          /* kva of bounce buffer */
+       bus_addr_t      busaddr;        /* Physical address */
+       vm_offset_t     datavaddr;      /* kva of client data */
+       bus_size_t      datacount;      /* client data count */
+       STAILQ_ENTRY(bounce_page) links;
+};
+
+int busdma_swi_pending;
+
+static STAILQ_HEAD(bp_list, bounce_page) bounce_page_list;
+static int free_bpages;
+static int reserved_bpages;
+static int active_bpages;
+static int total_bpages;
+static bus_addr_t bounce_lowaddr = BUS_SPACE_MAXADDR;
+
+struct bus_dmamap {
+       struct bp_list         bpages;
+       int                    pagesneeded;
+       int                    pagesreserved;
+       bus_dma_tag_t          dmat;
+       void                  *buf;             /* unmapped buffer pointer */
+       bus_size_t             buflen;          /* unmapped buffer length */
+       bus_dmamap_callback_t *callback;
+       void                  *callback_arg;
+       STAILQ_ENTRY(bus_dmamap) links;
+};
+
+static STAILQ_HEAD(, bus_dmamap) bounce_map_waitinglist;
+static STAILQ_HEAD(, bus_dmamap) bounce_map_callbacklist;
+static struct bus_dmamap nobounce_dmamap;
+
+static int alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages);
+static int reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map);
+static bus_addr_t add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map,
+                                  vm_offset_t vaddr, bus_size_t size);
+static void free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage);
+static __inline int run_filter(bus_dma_tag_t dmat, bus_addr_t paddr);
+
+static __inline int
+run_filter(bus_dma_tag_t dmat, bus_addr_t paddr)
+{
+       int retval;
+
+       retval = 0;
+       do {
+               if (paddr > dmat->lowaddr
+                && paddr <= dmat->highaddr
+                && (dmat->filter == NULL
+                 || (*dmat->filter)(dmat->filterarg, paddr) != 0))
+                       retval = 1;
+
+               dmat = dmat->parent;            
+       } while (retval == 0 && dmat != NULL);
+       return (retval);
+}
+
+#define BUS_DMA_MIN_ALLOC_COMP BUS_DMA_BUS4
+/*
+ * Allocate a device specific dma_tag.
+ */
+int
+bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment,
+                  bus_size_t boundary, bus_addr_t lowaddr,
+                  bus_addr_t highaddr, bus_dma_filter_t *filter,
+                  void *filterarg, bus_size_t maxsize, int nsegments,
+                  bus_size_t maxsegsz, int flags, bus_dma_tag_t *dmat)
+{
+       bus_dma_tag_t newtag;
+       int error = 0;
+
+       /* Return a NULL tag on failure */
+       *dmat = NULL;
+
+       newtag = kmalloc(sizeof(*newtag), M_DEVBUF, M_INTWAIT);
+
+       newtag->parent = parent;
+       newtag->alignment = alignment;
+       newtag->boundary = boundary;
+       newtag->lowaddr = trunc_page((vm_paddr_t)lowaddr) + (PAGE_SIZE - 1);
+       newtag->highaddr = trunc_page((vm_paddr_t)highaddr) + (PAGE_SIZE - 1);
+       newtag->filter = filter;
+       newtag->filterarg = filterarg;
+       newtag->maxsize = maxsize;
+       newtag->nsegments = nsegments;
+       newtag->maxsegsz = maxsegsz;
+       newtag->flags = flags;
+       newtag->ref_count = 1; /* Count ourself */
+       newtag->map_count = 0;
+       newtag->segments = NULL;
+       
+       /* Take into account any restrictions imposed by our parent tag */
+       if (parent != NULL) {
+               newtag->lowaddr = MIN(parent->lowaddr, newtag->lowaddr);
+               newtag->highaddr = MAX(parent->highaddr, newtag->highaddr);
+               /*
+                * XXX Not really correct??? Probably need to honor boundary
+                *     all the way up the inheritence chain.
+                */
+               newtag->boundary = MAX(parent->boundary, newtag->boundary);
+               if (newtag->filter == NULL) {
+                       /*
+                        * Short circuit looking at our parent directly
+                        * since we have encapsulated all of its information
+                        */
+                       newtag->filter = parent->filter;
+                       newtag->filterarg = parent->filterarg;
+                       newtag->parent = parent->parent;
+               }
+               if (newtag->parent != NULL) {
+                       parent->ref_count++;
+               }
+       }
+       
+       if (newtag->lowaddr < ptoa(Maxmem) &&
+           (flags & BUS_DMA_ALLOCNOW) != 0) {
+               /* Must bounce */
+
+               if (lowaddr > bounce_lowaddr) {
+                       /*
+                        * Go through the pool and kill any pages
+                        * that don't reside below lowaddr.
+                        */
+                       panic("bus_dma_tag_create: page reallocation "
+                             "not implemented");
+               }
+               if (ptoa(total_bpages) < maxsize) {
+                       int pages;
+
+                       pages = atop(maxsize) - total_bpages;
+
+                       /* Add pages to our bounce pool */
+                       if (alloc_bounce_pages(newtag, pages) < pages)
+                               error = ENOMEM;
+               }
+               /* Performed initial allocation */
+               newtag->flags |= BUS_DMA_MIN_ALLOC_COMP;
+       }
+       
+       if (error != 0) {
+               kfree(newtag, M_DEVBUF);
+       } else {
+               *dmat = newtag;
+       }
+       return (error);
+}
+
+int
+bus_dma_tag_destroy(bus_dma_tag_t dmat)
+{
+       if (dmat != NULL) {
+
+               if (dmat->map_count != 0)
+                       return (EBUSY);
+
+               while (dmat != NULL) {
+                       bus_dma_tag_t parent;
+
+                       parent = dmat->parent;
+                       dmat->ref_count--;
+                       if (dmat->ref_count == 0) {
+                               if (dmat->segments != NULL)
+                                       kfree(dmat->segments, M_DEVBUF);
+                               kfree(dmat, M_DEVBUF);
+                               /*
+                                * Last reference count, so
+                                * release our reference
+                                * count on our parent.
+                                */
+                               dmat = parent;
+                       } else
+                               dmat = NULL;
+               }
+       }
+       return (0);
+}
+
+/*
+ * Allocate a handle for mapping from kva/uva/physical
+ * address space into bus device space.
+ */
+int
+bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp)
+{
+       int error;
+
+       error = 0;
+
+       if (dmat->segments == NULL) {
+               KKASSERT(dmat->nsegments && dmat->nsegments < 16384);
+               dmat->segments = kmalloc(sizeof(bus_dma_segment_t) * 
+                                       dmat->nsegments, M_DEVBUF, M_INTWAIT);
+       }
+
+       if (dmat->lowaddr < ptoa(Maxmem)) {
+               /* Must bounce */
+               int maxpages;
+
+               *mapp = kmalloc(sizeof(**mapp), M_DEVBUF, M_INTWAIT);
+               if (*mapp == NULL) {
+                       return (ENOMEM);
+               } else {
+                       /* Initialize the new map */
+                       bzero(*mapp, sizeof(**mapp));
+                       STAILQ_INIT(&((*mapp)->bpages));
+               }
+               /*
+                * Attempt to add pages to our pool on a per-instance
+                * basis up to a sane limit.
+                */
+               maxpages = MIN(MAX_BPAGES, Maxmem - atop(dmat->lowaddr));
+               if ((dmat->flags & BUS_DMA_MIN_ALLOC_COMP) == 0
+                || (dmat->map_count > 0
+                 && total_bpages < maxpages)) {
+                       int pages;
+
+                       if (dmat->lowaddr > bounce_lowaddr) {
+                               /*
+                                * Go through the pool and kill any pages
+                                * that don't reside below lowaddr.
+                                */
+                               panic("bus_dmamap_create: page reallocation "
+                                     "not implemented");
+                       }
+                       pages = atop(dmat->maxsize);
+                       pages = MIN(maxpages - total_bpages, pages);
+                       error = alloc_bounce_pages(dmat, pages);
+
+                       if ((dmat->flags & BUS_DMA_MIN_ALLOC_COMP) == 0) {
+                               if (error == 0)
+                                       dmat->flags |= BUS_DMA_MIN_ALLOC_COMP;
+                       } else {
+                               error = 0;
+                       }
+               }
+       } else {
+               *mapp = NULL;
+       }
+       if (error == 0)
+               dmat->map_count++;
+       return (error);
+}
+
+/*
+ * Destroy a handle for mapping from kva/uva/physical
+ * address space into bus device space.
+ */
+int
+bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map)
+{
+       if (map != NULL) {
+               if (STAILQ_FIRST(&map->bpages) != NULL)
+                       return (EBUSY);
+               kfree(map, M_DEVBUF);
+       }
+       dmat->map_count--;
+       return (0);
+}
+
+
+/*
+ * Allocate a piece of memory that can be efficiently mapped into
+ * bus device space based on the constraints lited in the dma tag.
+ *
+ * mapp is degenerate.  By definition this allocation should not require
+ * bounce buffers so do not allocate a dma map.
+ */
+int
+bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags,
+                bus_dmamap_t *mapp)
+{
+       int mflags;
+       /* If we succeed, no mapping/bouncing will be required */
+       *mapp = NULL;
+
+       if (dmat->segments == NULL) {
+               KKASSERT(dmat->nsegments < 16384);
+               dmat->segments = kmalloc(sizeof(bus_dma_segment_t) * 
+                                       dmat->nsegments, M_DEVBUF, M_INTWAIT);
+       }
+
+       if (flags & BUS_DMA_NOWAIT)
+               mflags = M_NOWAIT;
+       else
+               mflags = M_WAITOK;
+       if (flags & BUS_DMA_ZERO)
+               mflags |= M_ZERO;
+
+       if ((dmat->maxsize <= PAGE_SIZE) &&
+           dmat->lowaddr >= ptoa(Maxmem)) {
+               *vaddr = kmalloc(dmat->maxsize, M_DEVBUF, mflags);
+               /*
+                * XXX Check whether the allocation crossed a page boundary
+                * and retry with power-of-2 alignment in that case.
+                */
+               if ((((intptr_t)*vaddr) & PAGE_MASK) !=
+                   (((intptr_t)*vaddr + dmat->maxsize) & PAGE_MASK)) {
+                       size_t size;
+                       kfree(*vaddr, M_DEVBUF);
+                       /* XXX check for overflow? */
+                       for (size = 1; size <= dmat->maxsize; size <<= 1)
+                               ;
+                       *vaddr = kmalloc(size, M_DEVBUF, mflags);
+               }
+       } else {
+               /*
+                * XXX Use Contigmalloc until it is merged into this facility
+                *     and handles multi-seg allocations.  Nobody is doing
+                *     multi-seg allocations yet though.
+                */
+               *vaddr = contigmalloc(dmat->maxsize, M_DEVBUF, mflags,
+                   0ul, dmat->lowaddr, dmat->alignment? dmat->alignment : 1ul,
+                   dmat->boundary);
+       }
+       if (*vaddr == NULL)
+               return (ENOMEM);
+       return (0);
+}
+
+/*
+ * Free a piece of memory and it's allociated dmamap, that was allocated
+ * via bus_dmamem_alloc.  Make the same choice for free/contigfree.
+ */
+void
+bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map)
+{
+       /*
+        * dmamem does not need to be bounced, so the map should be
+        * NULL
+        */
+       if (map != NULL)
+               panic("bus_dmamem_free: Invalid map freed\n");
+       if ((dmat->maxsize <= PAGE_SIZE) &&
+           dmat->lowaddr >= ptoa(Maxmem))
+               kfree(vaddr, M_DEVBUF);
+       else
+               contigfree(vaddr, dmat->maxsize, M_DEVBUF);
+}
+
+#define BUS_DMAMAP_NSEGS ((BUS_SPACE_MAXSIZE / PAGE_SIZE) + 1)
+
+/*
+ * Map the buffer buf into bus space using the dmamap map.
+ */
+int
+bus_dmamap_load(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf,
+               bus_size_t buflen, bus_dmamap_callback_t *callback,
+               void *callback_arg, int flags)
+{
+       vm_offset_t             vaddr;
+       vm_paddr_t              paddr;
+       bus_dma_segment_t      *sg;
+       int                     seg;
+       int                     error;
+       vm_paddr_t              nextpaddr;
+
+       if (map == NULL)
+               map = &nobounce_dmamap;
+
+       error = 0;
+       /*
+        * If we are being called during a callback, pagesneeded will
+        * be non-zero, so we can avoid doing the work twice.
+        */
+       if (dmat->lowaddr < ptoa(Maxmem) &&
+           map->pagesneeded == 0) {
+               vm_offset_t     vendaddr;
+
+               /*
+                * Count the number of bounce pages
+                * needed in order to complete this transfer
+                */
+               vaddr = trunc_page((vm_offset_t)buf);
+               vendaddr = (vm_offset_t)buf + buflen;
+
+               while (vaddr < vendaddr) {
+                       paddr = pmap_kextract(vaddr);
+                       if (run_filter(dmat, paddr) != 0) {
+
+                               map->pagesneeded++;
+                       }
+                       vaddr += PAGE_SIZE;
+               }
+       }
+
+       /* Reserve Necessary Bounce Pages */
+       if (map->pagesneeded != 0) {
+               crit_enter();
+               if (reserve_bounce_pages(dmat, map) != 0) {
+
+                       /* Queue us for resources */
+                       map->dmat = dmat;
+                       map->buf = buf;
+                       map->buflen = buflen;
+                       map->callback = callback;
+                       map->callback_arg = callback_arg;
+
+                       STAILQ_INSERT_TAIL(&bounce_map_waitinglist, map, links);
+                       crit_exit();
+
+                       return (EINPROGRESS);
+               }
+               crit_exit();
+       }
+
+       vaddr = (vm_offset_t)buf;
+       sg = dmat->segments;
+       seg = 1;
+       sg->ds_len = 0;
+
+       nextpaddr = 0;
+       do {
+               bus_size_t      size;
+
+               paddr = pmap_kextract(vaddr);
+               size = PAGE_SIZE - (paddr & PAGE_MASK);
+               if (size > buflen)
+                       size = buflen;
+
+               if (map->pagesneeded != 0 && run_filter(dmat, paddr)) {
+                       paddr = add_bounce_page(dmat, map, vaddr, size);
+               }
+
+               if (sg->ds_len == 0) {
+                       sg->ds_addr = paddr;
+                       sg->ds_len = size;
+               } else if (paddr == nextpaddr) {
+                       sg->ds_len += size;
+               } else {
+                       /* Go to the next segment */
+                       sg++;
+                       seg++;
+                       if (seg > dmat->nsegments)
+                               break;
+                       sg->ds_addr = paddr;
+                       sg->ds_len = size;
+               }
+               vaddr += size;
+               nextpaddr = paddr + size;
+               buflen -= size;
+       } while (buflen > 0);
+
+       if (buflen != 0) {
+               kprintf("bus_dmamap_load: Too many segs! buf_len = 0x%lx\n",
+                      (u_long)buflen);
+               error = EFBIG;
+       }
+
+       (*callback)(callback_arg, dmat->segments, seg, error);
+
+       return (0);
+}
+
+/*
+ * Utility function to load a linear buffer.  lastaddrp holds state
+ * between invocations (for multiple-buffer loads).  segp contains
+ * the starting segment on entrace, and the ending segment on exit.
+ * first indicates if this is the first invocation of this function.
+ */
+static int
+_bus_dmamap_load_buffer(bus_dma_tag_t dmat,
+                       void *buf, bus_size_t buflen,
+                       struct thread *td,
+                       int flags,
+                       vm_offset_t *lastaddrp,
+                       int *segp,
+                       int first)
+{
+       bus_dma_segment_t *segs;
+       bus_size_t sgsize;
+       bus_addr_t curaddr, lastaddr, baddr, bmask;
+       vm_offset_t vaddr = (vm_offset_t)buf;
+       int seg;
+       pmap_t pmap;
+
+       if (td->td_proc != NULL)
+               pmap = vmspace_pmap(td->td_proc->p_vmspace);
+       else
+               pmap = NULL;
+
+       segs = dmat->segments;
+       lastaddr = *lastaddrp;
+       bmask  = ~(dmat->boundary - 1);
+
+       for (seg = *segp; buflen > 0 ; ) {
+               /*
+                * Get the physical address for this segment.
+                */
+               if (pmap)
+                       curaddr = pmap_extract(pmap, vaddr);
+               else
+                       curaddr = pmap_kextract(vaddr);
+
+               /*
+                * Compute the segment size, and adjust counts.
+                */
+               sgsize = PAGE_SIZE - ((u_long)curaddr & PAGE_MASK);
+               if (buflen < sgsize)
+                       sgsize = buflen;
+
+               /*
+                * Make sure we don't cross any boundaries.
+                */
+               if (dmat->boundary > 0) {
+                       baddr = (curaddr + dmat->boundary) & bmask;
+                       if (sgsize > (baddr - curaddr))
+                               sgsize = (baddr - curaddr);
+               }
+
+               /*
+                * Insert chunk into a segment, coalescing with
+                * previous segment if possible.
+                */
+               if (first) {
+                       segs[seg].ds_addr = curaddr;
+                       segs[seg].ds_len = sgsize;
+                       first = 0;
+               } else {
+                       if (curaddr == lastaddr &&
+                           (segs[seg].ds_len + sgsize) <= dmat->maxsegsz &&
+                           (dmat->boundary == 0 ||
+                            (segs[seg].ds_addr & bmask) == (curaddr & bmask)))
+                               segs[seg].ds_len += sgsize;
+                       else {
+                               if (++seg >= dmat->nsegments)
+                                       break;
+                               segs[seg].ds_addr = curaddr;
+                               segs[seg].ds_len = sgsize;
+                       }
+               }
+
+               lastaddr = curaddr + sgsize;
+               vaddr += sgsize;
+               buflen -= sgsize;
+       }
+
+       *segp = seg;
+       *lastaddrp = lastaddr;
+
+       /*
+        * Did we fit?
+        */
+       return (buflen != 0 ? EFBIG : 0); /* XXX better return value here? */
+}
+
+/*
+ * Like _bus_dmamap_load(), but for mbufs.
+ */
+int
+bus_dmamap_load_mbuf(bus_dma_tag_t dmat, bus_dmamap_t map,
+                    struct mbuf *m0,
+                    bus_dmamap_callback2_t *callback, void *callback_arg,
+                    int flags)
+{
+       int nsegs, error;
+
+       KASSERT(dmat->lowaddr >= ptoa(Maxmem) || map != NULL,
+               ("bus_dmamap_load_mbuf: No support for bounce pages!"));
+       KASSERT(m0->m_flags & M_PKTHDR,
+               ("bus_dmamap_load_mbuf: no packet header"));
+
+       nsegs = 0;
+       error = 0;
+       if (m0->m_pkthdr.len <= dmat->maxsize) {
+               int first = 1;
+               vm_offset_t lastaddr = 0;
+               struct mbuf *m;
+
+               for (m = m0; m != NULL && error == 0; m = m->m_next) {
+                       if ( m->m_len == 0 )
+                               continue;
+                       error = _bus_dmamap_load_buffer(dmat,
+                                       m->m_data, m->m_len,
+                                       curthread, flags, &lastaddr,
+                                       &nsegs, first);
+                       first = 0;
+               }
+       } else {
+               error = EINVAL;
+       }
+
+       if (error) {
+               /* force "no valid mappings" in callback */
+               (*callback)(callback_arg, dmat->segments, 0, 0, error);
+       } else {
+               (*callback)(callback_arg, dmat->segments,
+                           nsegs+1, m0->m_pkthdr.len, error);
+       }
+       return (error);
+}
+
+/*
+ * Like _bus_dmamap_load(), but for uios.
+ */
+int
+bus_dmamap_load_uio(bus_dma_tag_t dmat, bus_dmamap_t map,
+                   struct uio *uio,
+                   bus_dmamap_callback2_t *callback, void *callback_arg,
+                   int flags)
+{
+       vm_offset_t lastaddr;
+       int nsegs, error, first, i;
+       bus_size_t resid;
+       struct iovec *iov;
+       struct thread *td = NULL;
+
+       KASSERT(dmat->lowaddr >= ptoa(Maxmem) || map != NULL,
+               ("bus_dmamap_load_uio: No support for bounce pages!"));
+
+       resid = uio->uio_resid;
+       iov = uio->uio_iov;
+
+       if (uio->uio_segflg == UIO_USERSPACE) {
+               td = uio->uio_td;
+               KASSERT(td != NULL && td->td_proc != NULL,
+                       ("bus_dmamap_load_uio: USERSPACE but no proc"));
+       }
+
+       nsegs = 0;
+       error = 0;
+       first = 1;
+       for (i = 0; i < uio->uio_iovcnt && resid != 0 && !error; i++) {
+               /*
+                * Now at the first iovec to load.  Load each iovec
+                * until we have exhausted the residual count.
+                */
+               bus_size_t minlen =
+                       resid < iov[i].iov_len ? resid : iov[i].iov_len;
+               caddr_t addr = (caddr_t) iov[i].iov_base;
+
+               error = _bus_dmamap_load_buffer(dmat,
+                               addr, minlen,
+                               td, flags, &lastaddr, &nsegs, first);
+               first = 0;
+
+               resid -= minlen;
+       }
+
+       if (error) {
+               /* force "no valid mappings" in callback */
+               (*callback)(callback_arg, dmat->segments, 0, 0, error);
+       } else {
+               (*callback)(callback_arg, dmat->segments,
+                           nsegs+1, uio->uio_resid, error);
+       }
+       return (error);
+}
+
+/*
+ * Release the mapping held by map.
+ */
+void
+_bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map)
+{
+       struct bounce_page *bpage;
+
+       while ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) {
+               STAILQ_REMOVE_HEAD(&map->bpages, links);
+               free_bounce_page(dmat, bpage);
+       }
+}
+
+void
+_bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op)
+{
+       struct bounce_page *bpage;
+
+       if ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) {
+               
+               /*
+                * Handle data bouncing.  We might also
+                * want to add support for invalidating
+                * the caches on broken hardware
+                */
+               switch (op) {
+               case BUS_DMASYNC_PREWRITE:
+                       while (bpage != NULL) {
+                               bcopy((void *)bpage->datavaddr,
+                                     (void *)bpage->vaddr,
+                                     bpage->datacount);
+                               bpage = STAILQ_NEXT(bpage, links);
+                       }
+                       break;
+
+               case BUS_DMASYNC_POSTREAD:
+                       while (bpage != NULL) {
+                               bcopy((void *)bpage->vaddr,
+                                     (void *)bpage->datavaddr,
+                                     bpage->datacount);
+                               bpage = STAILQ_NEXT(bpage, links);
+                       }
+                       break;
+               case BUS_DMASYNC_PREREAD:
+               case BUS_DMASYNC_POSTWRITE:
+                       /* No-ops */
+                       break;
+               }
+       }
+}
+
+static int
+alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages)
+{
+       int count;
+
+       count = 0;
+       if (total_bpages == 0) {
+               STAILQ_INIT(&bounce_page_list);
+               STAILQ_INIT(&bounce_map_waitinglist);
+               STAILQ_INIT(&bounce_map_callbacklist);
+       }
+       
+       while (numpages > 0) {
+               struct bounce_page *bpage;
+
+               bpage = (struct bounce_page *)kmalloc(sizeof(*bpage), M_DEVBUF,
+                                                    M_INTWAIT);
+
+               if (bpage == NULL)
+                       break;
+               bzero(bpage, sizeof(*bpage));
+               bpage->vaddr = (vm_offset_t)contigmalloc(PAGE_SIZE, M_DEVBUF,
+                                                        M_NOWAIT, 0ul,
+                                                        dmat->lowaddr,
+                                                        PAGE_SIZE,
+                                                        0);
+               if (bpage->vaddr == NULL) {
+                       kfree(bpage, M_DEVBUF);
+                       break;
+               }
+               bpage->busaddr = pmap_kextract(bpage->vaddr);
+               crit_enter();
+               STAILQ_INSERT_TAIL(&bounce_page_list, bpage, links);
+               total_bpages++;
+               free_bpages++;
+               crit_exit();
+               count++;
+               numpages--;
+       }
+       return (count);
+}
+
+static int
+reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map)
+{
+       int pages;
+
+       pages = MIN(free_bpages, map->pagesneeded - map->pagesreserved);
+       free_bpages -= pages;
+       reserved_bpages += pages;
+       map->pagesreserved += pages;
+       pages = map->pagesneeded - map->pagesreserved;
+
+       return (pages);
+}
+
+static bus_addr_t
+add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, vm_offset_t vaddr,
+               bus_size_t size)
+{
+       struct bounce_page *bpage;
+
+       if (map->pagesneeded == 0)
+               panic("add_bounce_page: map doesn't need any pages");
+       map->pagesneeded--;
+
+       if (map->pagesreserved == 0)
+               panic("add_bounce_page: map doesn't need any pages");
+       map->pagesreserved--;
+
+       crit_enter();
+       bpage = STAILQ_FIRST(&bounce_page_list);
+       if (bpage == NULL)
+               panic("add_bounce_page: free page list is empty");
+
+       STAILQ_REMOVE_HEAD(&bounce_page_list, links);
+       reserved_bpages--;
+       active_bpages++;
+       crit_exit();
+
+       bpage->datavaddr = vaddr;
+       bpage->datacount = size;
+       STAILQ_INSERT_TAIL(&(map->bpages), bpage, links);
+       return (bpage->busaddr);
+}
+
+static void
+free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage)
+{
+       struct bus_dmamap *map;
+
+       bpage->datavaddr = 0;
+       bpage->datacount = 0;
+
+       crit_enter();
+       STAILQ_INSERT_HEAD(&bounce_page_list, bpage, links);
+       free_bpages++;
+       active_bpages--;
+       if ((map = STAILQ_FIRST(&bounce_map_waitinglist)) != NULL) {
+               if (reserve_bounce_pages(map->dmat, map) == 0) {
+                       panic("free_bounce_pages: uncoded\n");
+#if 0
+                       STAILQ_REMOVE_HEAD(&bounce_map_waitinglist, links);
+                       STAILQ_INSERT_TAIL(&bounce_map_callbacklist,
+                                          map, links);
+                       busdma_swi_pending = 1;
+                       setsoftvm();
+#endif
+               }
+       }
+       crit_exit();
+}
+
+#if 0
+
+void
+busdma_swi(void)
+{
+       struct bus_dmamap *map;
+
+       crit_enter();
+       while ((map = STAILQ_FIRST(&bounce_map_callbacklist)) != NULL) {
+               STAILQ_REMOVE_HEAD(&bounce_map_callbacklist, links);
+               crit_exit();
+               bus_dmamap_load(map->dmat, map, map->buf, map->buflen,
+                               map->callback, map->callback_arg, /*flags*/0);
+               crit_enter();
+       }
+       crit_exit();
+}
+
+#endif
+
diff --git a/sys/platform/vkernel/platform/console.c b/sys/platform/vkernel/platform/console.c
new file mode 100644 (file)
index 0000000..d30170f
--- /dev/null
@@ -0,0 +1,241 @@
+/*
+ * Copyright (c) 2006 The DragonFly Project.  All rights reserved.
+ * 
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@backplane.com>
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific, prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * 
+ * $DragonFly: src/sys/platform/vkernel/platform/console.c,v 1.1 2007/01/05 22:18:20 dillon Exp $
+ */
+
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/conf.h>
+#include <sys/cons.h>
+#include <sys/tty.h>
+#include <sys/termios.h>
+#include <sys/fcntl.h>
+#include <unistd.h>
+
+/*
+ * Global console locking functions
+ */
+void
+cons_lock(void)
+{
+}
+
+void
+cons_unlock(void)
+{
+}
+
+/************************************************************************
+ *                         CONSOLE DEVICE                              *
+ ************************************************************************
+ *
+ */
+
+#define CDEV_MAJOR     183
+
+static int vcons_tty_param(struct tty *tp, struct termios *tio);
+static void vcons_tty_start(struct tty *tp);
+
+static d_open_t         vcons_open;
+static d_close_t        vcons_close;
+static d_ioctl_t        vcons_ioctl;
+
+static struct dev_ops vcons_ops = {
+       { "vcons", CDEV_MAJOR, D_TTY },
+       .d_open =       vcons_open,
+       .d_close =      vcons_close,
+       .d_read =       ttyread,
+       .d_write =      ttywrite,
+       .d_ioctl =      vcons_ioctl,
+       .d_poll =       ttypoll,
+};
+
+static int
+vcons_open(struct dev_open_args *ap)
+{
+       cdev_t dev = ap->a_head.a_dev;
+       struct tty *tp;
+       int error;
+
+       if (minor(dev) != 0)
+               return(ENXIO);
+
+       tp = dev->si_tty = ttymalloc(dev->si_tty);
+       tp->t_oproc = vcons_tty_start;
+       tp->t_param = vcons_tty_param;
+       tp->t_stop = nottystop;
+       tp->t_dev = dev;
+
+       if (tp->t_state & TS_ISOPEN)
+               return (EBUSY);
+
+       tp->t_state |= TS_CARR_ON;
+       ttychars(tp);
+       tp->t_iflag = TTYDEF_IFLAG;
+       tp->t_oflag = TTYDEF_OFLAG;
+       tp->t_cflag = TTYDEF_CFLAG;
+       tp->t_lflag = TTYDEF_LFLAG;
+       tp->t_ispeed = TTYDEF_SPEED;
+       tp->t_ospeed = TTYDEF_SPEED;
+       ttsetwater(tp);
+
+       error = (*linesw[tp->t_line].l_open)(dev, tp);
+       return(error);
+}
+
+static int
+vcons_close(struct dev_close_args *ap)
+{
+       cdev_t dev = ap->a_head.a_dev;
+       struct tty *tp;
+
+       if (minor(dev) != 0)
+               return(ENXIO);
+       tp = dev->si_tty;
+       if (tp->t_state & TS_ISOPEN) {
+               (*linesw[tp->t_line].l_close)(tp, ap->a_fflag);
+               ttyclose(tp);
+       }
+       return(0);
+}
+
+static int
+vcons_ioctl(struct dev_ioctl_args *ap)
+{
+       cdev_t dev = ap->a_head.a_dev;
+       struct tty *tp;
+       int error;
+
+       if (minor(dev) != 0)
+               return(ENXIO);
+       tp = dev->si_tty;
+       error = (*linesw[tp->t_line].l_ioctl)(tp, ap->a_cmd, ap->a_data,
+                                             ap->a_fflag, ap->a_cred);
+       if (error != ENOIOCTL)
+               return (error);
+       error = ttioctl(tp, ap->a_cmd, ap->a_data, ap->a_fflag);
+       if (error != ENOIOCTL)
+               return (error);
+       return (ENOTTY);
+}
+
+static int
+vcons_tty_param(struct tty *tp, struct termios *tio)
+{
+       tp->t_ispeed = tio->c_ispeed;
+       tp->t_ospeed = tio->c_ospeed;
+       tp->t_cflag = tio->c_cflag;
+       return(0);
+}
+
+static void
+vcons_tty_start(struct tty *tp)
+{
+       int n;
+       char buf[64];
+
+       if (tp->t_state & (TS_TIMEOUT | TS_TTSTOP)) {
+               ttwwakeup(tp);
+               return;
+       }
+       tp->t_state |= TS_BUSY;
+       while ((n = q_to_b(&tp->t_outq, buf, sizeof(buf))) > 0)
+               write(1, buf, n);
+       tp->t_state &= ~TS_BUSY;
+       ttwwakeup(tp);
+}
+
+/************************************************************************
+ *                     KERNEL CONSOLE INTERFACE                        *
+ ************************************************************************
+ *
+ * Kernel direct-call interface console driver
+ */
+static cn_probe_t      vconsprobe;
+static cn_init_t       vconsinit;
+static cn_term_t       vconsterm;
+static cn_getc_t       vconsgetc;
+static cn_checkc_t     vconscheckc;
+static cn_putc_t       vconsputc;
+
+CONS_DRIVER(vcons, vconsprobe, vconsinit, vconsterm, vconsgetc, 
+               vconscheckc, vconsputc, NULL);
+
+static void
+vconsprobe(struct consdev *cp)
+{
+    cp->cn_pri = CN_NORMAL;
+    cp->cn_dev = make_dev(&vcons_ops, 255,
+                         UID_ROOT, GID_WHEEL, 0600, "vconsolectl");
+}
+
+static void
+vconsinit(struct consdev *cp)
+{
+}
+
+static void
+vconsterm(struct consdev *vp)
+{
+}
+
+static int
+vconsgetc(cdev_t dev)
+{
+       unsigned char c;
+
+       if (read(0, &c, 1) == 1)
+               return((int)c);
+       return(-1);
+}
+
+static int
+vconscheckc(cdev_t dev)
+{
+       unsigned char c;
+
+       if (__pread(0, &c, 1, O_FNONBLOCKING, -1LL) == 1)
+               return((int)c);
+       return(-1);
+}
+
+static void
+vconsputc(cdev_t dev, int c)
+{
+       char cc = c;
+
+       write(1, &cc, 1);
+}
+
+
index 1d2fe55..ec46e38 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/platform/vkernel/platform/copyio.c,v 1.2 2007/01/02 04:24:26 dillon Exp $
+ * $DragonFly: src/sys/platform/vkernel/platform/copyio.c,v 1.3 2007/01/05 22:18:20 dillon Exp $
  */
 
 #include <sys/types.h>
@@ -50,6 +50,26 @@ ovbcopy(const void *src, void *dst, size_t len)
        bcopy(src, dst, len);
 }
 
+void
+bcopyi(const void *src, void *dst, size_t len)
+{
+       bcopy(src, dst, len);
+}
+
+int
+copystr(const void *kfaddr, void *kdaddr, size_t len, size_t *lencopied)
+{
+       size_t i;
+
+       for (i = 0; i < len; ++i) {
+               if ((((char *)kdaddr)[i] = ((const char *)kfaddr)[i]) == 0) {
+                       *lencopied = i + 1;
+                       return(0);
+               }
+       }
+       return (ENAMETOOLONG);
+}
+
 /*
  * Copies a NUL-terminated string from user space to kernel space.
  * The number of bytes copied, including the terminator, is returned in
index 43303b9..5f01c86 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/platform/vkernel/platform/init.c,v 1.5 2007/01/02 04:24:26 dillon Exp $
+ * $DragonFly: src/sys/platform/vkernel/platform/init.c,v 1.6 2007/01/05 22:18:20 dillon Exp $
  */
 
 #include <sys/types.h>
@@ -44,6 +44,7 @@
 #include <sys/vkernel.h>
 #include <sys/tls.h>
 #include <sys/proc.h>
+#include <sys/msgbuf.h>
 #include <vm/vm_page.h>
 
 #include <machine/globaldata.h>
@@ -71,8 +72,14 @@ vm_offset_t virtual_start;
 vm_offset_t virtual_end;
 vm_offset_t kernel_vm_end;
 vm_offset_t crashdumpmap;
+vm_offset_t clean_sva;
+vm_offset_t clean_eva;
+struct msgbuf *msgbufp;
+caddr_t ptvmmap;
 vpte_t *KernelPTD;
 vpte_t *KernelPTA;
+u_int cpu_feature;     /* XXX */
+u_int tsc_present;     /* XXX */
 
 struct privatespace *CPU_prvspace;
 
@@ -321,6 +328,24 @@ init_kern_memory(void)
        crashdumpmap = virtual_start;
        virtual_start += MAXDUMPPGS * PAGE_SIZE;
 
+       /*
+        * msgbufp maps the system message buffer
+        */
+       assert((MSGBUF_SIZE & PAGE_MASK) == 0);
+       msgbufp = (void *)virtual_start;
+       for (i = 0; i < (MSGBUF_SIZE >> PAGE_SHIFT); ++i) {
+               pmap_kenter_quick(virtual_start, phys_avail[0]);
+               virtual_start += PAGE_SIZE;
+               phys_avail[0] += PAGE_SIZE;
+       }
+       msgbufinit(msgbufp, MSGBUF_SIZE);
+
+       /*
+        * used by kern_memio for /dev/mem access
+        */
+       ptvmmap = (caddr_t)virtual_start;
+       virtual_start += PAGE_SIZE;
+
        /*
         * Bootstrap the kernel_pmap
         */
@@ -443,3 +468,11 @@ cpu_reset(void)
        kprintf("cpu reset\n");
        exit(0);
 }
+
+void
+cpu_halt(void)
+{
+       kprintf("cpu halt\n");
+       for (;;)
+               __asm__ __volatile("hlt");
+}
diff --git a/sys/platform/vkernel/platform/ipl_funcs.c b/sys/platform/vkernel/platform/ipl_funcs.c
new file mode 100644 (file)
index 0000000..cb7ed7d
--- /dev/null
@@ -0,0 +1,78 @@
+/*-
+ * Copyright (c) 1997 Bruce Evans.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: src/sys/i386/isa/ipl_funcs.c,v 1.32.2.5 2002/12/17 18:04:02 sam Exp $
+ * $DragonFly: src/sys/platform/vkernel/platform/ipl_funcs.c,v 1.1 2007/01/05 22:18:20 dillon Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
+#include <sys/proc.h>
+#include <sys/interrupt.h>
+#include <machine/globaldata.h>
+
+/*
+ * Bits in the ipending bitmap variable must be set atomically because
+ * ipending may be manipulated by interrupts or other cpu's without holding 
+ * any locks.
+ *
+ * Note: setbits uses a locked or, making simple cases MP safe.
+ */
+#define DO_SETBITS(name, var, bits)                                    \
+void                                                                   \
+name(void)                                                             \
+{                                                                      \
+       struct mdglobaldata *gd = mdcpu;                                \
+       atomic_set_int_nonlocked(var, bits);                            \
+       atomic_set_int_nonlocked(&gd->mi.gd_reqflags, RQF_INTPEND);     \
+}                                                                      \
+
+DO_SETBITS(setdelayed,   &gd->gd_spending, loadandclear(&gd->gd_sdelayed))
+
+DO_SETBITS(setsoftcamnet,&gd->gd_spending, SWI_CAMNET_PENDING)
+DO_SETBITS(setsoftcambio,&gd->gd_spending, SWI_CAMBIO_PENDING)
+DO_SETBITS(setsoftclock, &gd->gd_spending, SWI_CLOCK_PENDING)
+DO_SETBITS(setsoftnet,   &gd->gd_spending, SWI_NET_PENDING)
+DO_SETBITS(setsofttty,   &gd->gd_spending, SWI_TTY_PENDING)
+DO_SETBITS(setsoftvm,   &gd->gd_spending, SWI_VM_PENDING)
+DO_SETBITS(setsofttq,   &gd->gd_spending, SWI_TQ_PENDING)
+DO_SETBITS(setsoftcrypto,&gd->gd_spending, SWI_CRYPTO_PENDING)
+
+DO_SETBITS(schedsoftcamnet, &gd->gd_sdelayed, SWI_CAMNET_PENDING)
+DO_SETBITS(schedsoftcambio, &gd->gd_sdelayed, SWI_CAMBIO_PENDING)
+DO_SETBITS(schedsoftnet, &gd->gd_sdelayed, SWI_NET_PENDING)
+DO_SETBITS(schedsofttty, &gd->gd_sdelayed, SWI_TTY_PENDING)
+DO_SETBITS(schedsoftvm,         &gd->gd_sdelayed, SWI_VM_PENDING)
+DO_SETBITS(schedsofttq,         &gd->gd_sdelayed, SWI_TQ_PENDING)
+/* YYY schedsoft what? */
+
+unsigned
+softclockpending(void)
+{
+       return (mdcpu->gd_spending & SWI_CLOCK_PENDING);
+}
+
index 84fe6af..d8b877e 100644 (file)
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/platform/vkernel/platform/machintr.c,v 1.2 2006/12/26 20:46:15 dillon Exp $
+ * $DragonFly: src/sys/platform/vkernel/platform/machintr.c,v 1.3 2007/01/05 22:18:20 dillon Exp $
  */
 
 #include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
 #include <sys/machintr.h>
 #include <sys/errno.h>
 #include <stdio.h>
 
+/*
+ * Interrupt Subsystem ABI
+ */
+
 static void dummy_intrdis(int);
 static void dummy_intren(int);
 static int dummy_vectorctl(int, int, int);
@@ -89,3 +95,10 @@ dummy_finalize(void)
 {
 }
 
+/*
+ * Process pending interrupts
+ */
+void
+splz(void)
+{
+}
index 39c0579..919be81 100644 (file)
@@ -38,7 +38,7 @@
  * 
  * from:   @(#)pmap.c      7.7 (Berkeley)  5/12/91
  * $FreeBSD: src/sys/i386/i386/pmap.c,v 1.250.2.18 2002/03/06 22:48:53 silby Exp $
- * $DragonFly: src/sys/platform/vkernel/platform/pmap.c,v 1.1 2007/01/02 04:24:26 dillon Exp $
+ * $DragonFly: src/sys/platform/vkernel/platform/pmap.c,v 1.2 2007/01/05 22:18:20 dillon Exp $
  */
 
 #include <sys/types.h>
@@ -409,7 +409,7 @@ pmap_pte(struct pmap *pmap, vm_offset_t va)
 {
        vpte_t *ptep;
 
-       ptep = pmap->pm_pdir[va >> PAGE_SHIFT];
+       ptep = &pmap->pm_pdir[va >> PAGE_SHIFT];
        if (*ptep & VPTE_PS)
                return(ptep);
        if (*ptep)
@@ -450,6 +450,46 @@ pmap_kenter(vm_offset_t va, vm_paddr_t pa)
        }
 }
 
+void
+pmap_kenter_sync(vm_offset_t va)
+{
+       pmap_inval_info info;
+
+       pmap_inval_init(&info);
+       pmap_inval_add(&info, &kernel_pmap, va);
+       pmap_inval_flush(&info);
+}
+
+void
+pmap_kenter_sync_quick(vm_offset_t va)
+{
+       madvise((void *)va, PAGE_SIZE, MADV_INVAL);
+}
+
+/*
+ * Map a contiguous range of physical memory to a KVM
+ */
+vm_offset_t
+pmap_map(vm_offset_t virt, vm_paddr_t start, vm_paddr_t end, int prot)
+{
+       while (start < end) {
+               pmap_kenter(virt, start);
+               virt += PAGE_SIZE;
+               start += PAGE_SIZE;
+       }
+       return (virt);
+}
+
+vpte_t *
+pmap_kpte(vm_offset_t va)
+{
+       vpte_t *ptep;
+
+       KKASSERT(va >= KvaStart && va < KvaEnd);
+       ptep = KernelPTA + ((va - KvaStart) >> PAGE_SHIFT);
+       return(ptep);
+}
+
 /*
  * Enter a mapping into kernel_pmap without any SMP interactions.
  * 
@@ -1148,7 +1188,7 @@ pmap_remove_pte(struct pmap *pmap, vpte_t *ptq, vm_offset_t va,
         * the SMP case.
         */
        if (oldpte & VPTE_G)
-               cpu_invlpg((void *)va);
+               madvise((void *)va, PAGE_SIZE, MADV_INVAL);
        pmap->pm_stats.resident_count -= 1;
        if (oldpte & PG_MANAGED) {
                m = PHYS_TO_VM_PAGE(oldpte);
@@ -1711,6 +1751,25 @@ retry:
        return mpte;
 }
 
+vm_paddr_t
+pmap_extract(pmap_t pmap, vm_offset_t va)
+{
+       vm_paddr_t rtval;
+       vpte_t pte;
+
+       if (pmap && (pte = pmap->pm_pdir[va >> SEG_SHIFT]) != 0) {
+               if (pte & VPTE_PS) {
+                       rtval = pte & ~((vpte_t)(1 << SEG_SHIFT) - 1);
+                       rtval |= va & SEG_MASK;
+               } else {
+                       pte = *(get_ptbase(pmap) + (va >> PAGE_SHIFT));
+                       rtval = (pte & VPTE_FRAME) | (va & PAGE_MASK);
+               }
+               return(rtval);
+       }
+       return(0);
+}
+
 #define MAX_INIT_PT (96)
 
 /*
@@ -2108,7 +2167,7 @@ pmap_zero_page(vm_paddr_t phys)
                panic("pmap_zero_page: CMAP3 busy");
        *(int *)gd->gd_CMAP3 =
                    VPTE_V | VPTE_W | (phys & VPTE_FRAME) | VPTE_A | VPTE_M;
-       cpu_invlpg(gd->gd_CADDR3);
+       madvise(gd->gd_CADDR3, PAGE_SIZE, MADV_INVAL);
 
        bzero(gd->gd_CADDR3, PAGE_SIZE);
        *(int *) gd->gd_CMAP3 = 0;
@@ -2131,7 +2190,7 @@ pmap_page_assertzero(vm_paddr_t phys)
                panic("pmap_zero_page: CMAP3 busy");
        *(int *)gd->gd_CMAP3 =
                    VPTE_V | VPTE_R | VPTE_W | (phys & VPTE_FRAME) | VPTE_A | VPTE_M;
-       cpu_invlpg(gd->gd_CADDR3);
+       madvise(gd->gd_CADDR3, PAGE_SIZE, MADV_INVAL);
        for (i = 0; i < PAGE_SIZE; i += 4) {
            if (*(int *)((char *)gd->gd_CADDR3 + i) != 0) {
                panic("pmap_page_assertzero() @ %p not zero!\n",
@@ -2159,7 +2218,7 @@ pmap_zero_page_area(vm_paddr_t phys, int off, int size)
        if (*(int *) gd->gd_CMAP3)
                panic("pmap_zero_page: CMAP3 busy");
        *(int *) gd->gd_CMAP3 = VPTE_V | VPTE_R | VPTE_W | (phys & VPTE_FRAME) | VPTE_A | VPTE_M;
-       cpu_invlpg(gd->gd_CADDR3);
+       madvise(gd->gd_CADDR3, PAGE_SIZE, MADV_INVAL);
 
        bzero((char *)gd->gd_CADDR3 + off, size);
        *(int *) gd->gd_CMAP3 = 0;
@@ -2187,8 +2246,8 @@ pmap_copy_page(vm_paddr_t src, vm_paddr_t dst)
        *(int *) gd->gd_CMAP1 = VPTE_V | (src & PG_FRAME) | PG_A;
        *(int *) gd->gd_CMAP2 = VPTE_V | VPTE_R | VPTE_W | (dst & VPTE_FRAME) | VPTE_A | VPTE_M;
 
-       cpu_invlpg(gd->gd_CADDR1);
-       cpu_invlpg(gd->gd_CADDR2);
+       madvise(gd->gd_CADDR1, PAGE_SIZE, MADV_INVAL);
+       madvise(gd->gd_CADDR2, PAGE_SIZE, MADV_INVAL);
 
        bcopy(gd->gd_CADDR1, gd->gd_CADDR2, PAGE_SIZE);
 
@@ -2218,8 +2277,8 @@ pmap_copy_page_frag(vm_paddr_t src, vm_paddr_t dst, size_t bytes)
        *(int *) gd->gd_CMAP1 = VPTE_V | (src & VPTE_FRAME) | VPTE_A;
        *(int *) gd->gd_CMAP2 = VPTE_V | VPTE_R | VPTE_W | (dst & VPTE_FRAME) | VPTE_A | VPTE_M;
 
-       cpu_invlpg(gd->gd_CADDR1);
-       cpu_invlpg(gd->gd_CADDR2);
+       madvise(gd->gd_CADDR1, PAGE_SIZE, MADV_INVAL);
+       madvise(gd->gd_CADDR2, PAGE_SIZE, MADV_INVAL);
 
        bcopy((char *)gd->gd_CADDR1 + (src & PAGE_MASK),
              (char *)gd->gd_CADDR2 + (dst & PAGE_MASK),
@@ -2709,8 +2768,11 @@ pmap_activate(struct proc *p)
 #if defined(SWTCH_OPTIM_STATS)
        tlb_flush_count++;
 #endif
+       panic("pmap_activate"); /* XXX store vmspace id in context */
+#if 0
        p->p_thread->td_pcb->pcb_cr3 = vtophys(pmap->pm_pdir);
        load_cr3(p->p_thread->td_pcb->pcb_cr3);
+#endif
 }
 
 void
index c59011e..b4a5594 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/platform/vkernel/platform/pmap_inval.c,v 1.1 2007/01/02 04:24:26 dillon Exp $
+ * $DragonFly: src/sys/platform/vkernel/platform/pmap_inval.c,v 1.2 2007/01/05 22:18:20 dillon Exp $
  */
 
 /*
@@ -52,6 +52,8 @@
 #include <sys/vmmeter.h>
 #include <sys/thread2.h>
 
+#include <sys/mman.h>
+
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_object.h>
 #include <machine/pmap.h>
 #include <machine/pmap_inval.h>
 
-#ifdef SMP
-
 static void
 _cpu_invltlb(void *dummy)
 {
-    cpu_invltlb();
+    /* XXX madvise over entire address space is really expensive */
+    madvise((void *)KvaStart, KvaSize, MADV_INVAL);    
 }
 
 static void
 _cpu_invl1pg(void *data)
 {
-    cpu_invlpg(data);
+    madvise(data, PAGE_SIZE, MADV_INVAL);
 }
 
-#endif
-
 /*
  * Initialize for add or flush
  */
@@ -142,9 +141,9 @@ pmap_inval_flush(pmap_inval_info_t info)
        lwkt_cpusync_finish(&info->pir_cpusync);
 #else
     if (info->pir_flags & PIRF_INVLTLB)
-       cpu_invltlb();
+       _cpu_invltlb(NULL);
     else if (info->pir_flags & PIRF_INVL1PG)
-       cpu_invlpg(info->pir_cpusync.cs_data);
+       _cpu_invl1pg(info->pir_cpusync.cs_data);
 #endif
     info->pir_flags = 0;
 }
similarity index 78%
copy from sys/platform/vkernel/include/md_var.h
copy to sys/platform/vkernel/platform/sysarch.c
index 628dac2..0ddcaf9 100644 (file)
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/platform/vkernel/include/md_var.h,v 1.2 2007/01/02 04:24:26 dillon Exp $
+ * $DragonFly: src/sys/platform/vkernel/platform/sysarch.c,v 1.1 2007/01/05 22:18:20 dillon Exp $
  */
-
-#ifndef _MACHINE_MD_VAR_H_
-#define _MACHINE_MD_VAR_H_
-
-#ifndef _SYS_TYPES_H_
 #include <sys/types.h>
-#endif
-#ifndef _SYS_VKERNEL_H_
-#include <sys/vkernel.h>
-#endif
-
-extern char    sigcode[];
-extern int     szsigcode;
-extern vpte_t  *KernelPTA;
-extern vpte_t  *KernelPTD;
-extern vm_offset_t crashdumpmap;
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/sysproto.h>
+#include <sys/memrange.h>
+#include <sys/errno.h>
 
-struct mdglobaldata;
+int
+sys_sysarch(struct sysarch_args *uap)
+{
+       return (EOPNOTSUPP);
+}
 
-void cpu_gdinit (struct mdglobaldata *gd, int cpu);
-void cpu_idle_restore (void);
+int
+cpu_set_iopl(void)
+{
+       return (EOPNOTSUPP);
+}
 
-#endif
+int
+cpu_clr_iopl(void)
+{
+       return (EOPNOTSUPP);
+}
 
similarity index 66%
copy from sys/platform/vkernel/platform/machintr.c
copy to sys/platform/vkernel/platform/systimer.c
index 84fe6af..3d649c8 100644 (file)
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/platform/vkernel/platform/machintr.c,v 1.2 2006/12/26 20:46:15 dillon Exp $
+ * $DragonFly: src/sys/platform/vkernel/platform/systimer.c,v 1.1 2007/01/05 22:18:20 dillon Exp $
  */
 
 #include <sys/types.h>
-#include <sys/machintr.h>
-#include <sys/errno.h>
-#include <stdio.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/systimer.h>
+#include <sys/sysctl.h>
+#include <machine/cpu.h>
 
-static void dummy_intrdis(int);
-static void dummy_intren(int);
-static int dummy_vectorctl(int, int, int);
-static int dummy_setvar(int, const void *);
-static int dummy_getvar(int, void *);
-static void dummy_finalize(void);
+#include <unistd.h>
 
-struct machintr_abi MachIntrABI = {
-       MACHINTR_GENERIC,
-       dummy_intrdis,
-       dummy_intren,
-       dummy_vectorctl,
-       dummy_setvar,
-       dummy_getvar,
-       dummy_finalize
-};
+int disable_rtc_set;
+SYSCTL_INT(_machdep, CPU_DISRTCSET, disable_rtc_set,
+          CTLFLAG_RW, &disable_rtc_set, 0, "");
 
-static void
-dummy_intrdis(int intr)
+int adjkerntz;
+int wall_cmos_clock = 1;
+
+void
+cpu_initclocks(void)
 {
+       panic("cpu_initclocks");
 }
 
-static void
-dummy_intren(int intr)
+void
+cputimer_intr_config(struct cputimer *timer)
 {
+       panic("cputimer_intr_config");
 }
 
-static int
-dummy_vectorctl(int op, int intr, int flags)
+void
+cputimer_intr_reload(sysclock_t reload)
 {
-       return (EOPNOTSUPP);
+       panic("cputimer_intr_reload");
 }
 
-static int
-dummy_setvar(int varid, const void *buf)
+/*
+ * Initialize the time of day register, based on the time base which is, e.g.
+ * from a filesystem.
+ */
+void
+inittodr(time_t base)
 {
-       return (ENOENT);
+       panic("inittodr");
 }
 
-static int
-dummy_getvar(int varid, void *buf)
+/*
+ * Write system time back to the RTC
+ */
+void
+resettodr(void)
 {
-       return (ENOENT);
+       panic("resettodr");
 }
 
-static void
-dummy_finalize(void)
+void
+DELAY(int usec)
 {
+       usleep(usec);
 }
 
+