From 6f7b98e081c59b0af2e4c3558c24bd00cd422419 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Fri, 5 Jan 2007 22:18:20 +0000 Subject: [PATCH] Continue fleshing out the VKERNEL. --- sys/platform/vkernel/conf/files | 20 +- sys/platform/vkernel/i386/autoconf.c | 83 +- sys/platform/vkernel/i386/cpu_regs.c | 1253 +++++++++++++ sys/platform/vkernel/i386/db_interface.c | 328 ++++ sys/platform/vkernel/i386/db_trace.c | 642 +++++++ sys/platform/vkernel/i386/global.s | 5 +- sys/platform/vkernel/i386/locore.s | 19 +- sys/platform/vkernel/i386/npx.c | 8 +- sys/platform/vkernel/i386/swtch.s | 15 +- sys/platform/vkernel/i386/tls.c | 204 +++ sys/platform/vkernel/i386/trap.c | 1551 +++++++++++++++++ .../{include/md_var.h => i386/userldt.c} | 39 +- sys/platform/vkernel/i386/vm_machdep.c | 398 +++++ sys/platform/vkernel/include/globaldata.h | 4 +- sys/platform/vkernel/include/md_var.h | 13 +- sys/platform/vkernel/include/pcb_ext.h | 76 + .../vkernel/platform/busdma_machdep.c | 900 ++++++++++ sys/platform/vkernel/platform/console.c | 241 +++ sys/platform/vkernel/platform/copyio.c | 22 +- sys/platform/vkernel/platform/init.c | 35 +- sys/platform/vkernel/platform/ipl_funcs.c | 78 + sys/platform/vkernel/platform/machintr.c | 15 +- sys/platform/vkernel/platform/pmap.c | 82 +- sys/platform/vkernel/platform/pmap_inval.c | 17 +- .../{include/md_var.h => platform/sysarch.c} | 41 +- .../platform/{machintr.c => systimer.c} | 73 +- 26 files changed, 6050 insertions(+), 112 deletions(-) create mode 100644 sys/platform/vkernel/i386/cpu_regs.c create mode 100644 sys/platform/vkernel/i386/db_interface.c create mode 100644 sys/platform/vkernel/i386/db_trace.c create mode 100644 sys/platform/vkernel/i386/tls.c create mode 100644 sys/platform/vkernel/i386/trap.c copy sys/platform/vkernel/{include/md_var.h => i386/userldt.c} (78%) create mode 100644 sys/platform/vkernel/i386/vm_machdep.c create mode 100644 sys/platform/vkernel/include/pcb_ext.h create mode 100644 sys/platform/vkernel/platform/busdma_machdep.c create mode 100644 sys/platform/vkernel/platform/console.c create mode 100644 sys/platform/vkernel/platform/ipl_funcs.c copy sys/platform/vkernel/{include/md_var.h => platform/sysarch.c} (78%) copy sys/platform/vkernel/platform/{machintr.c => systimer.c} (66%) diff --git a/sys/platform/vkernel/conf/files b/sys/platform/vkernel/conf/files index 1927feda8b..dda23b0c51 100644 --- a/sys/platform/vkernel/conf/files +++ b/sys/platform/vkernel/conf/files @@ -1,7 +1,7 @@ # This file tells config what files go into building a kernel, # files marked standard are always included. # -# $DragonFly: src/sys/platform/vkernel/conf/files,v 1.6 2007/01/02 04:24:24 dillon Exp $ +# $DragonFly: src/sys/platform/vkernel/conf/files,v 1.7 2007/01/05 22:18:17 dillon Exp $ # bf_enc.o optional ipsec ipsec_esp \ dependency "$S/crypto/blowfish/arch/i386/bf_enc.S $S/crypto/blowfish/arch/i386/bf_enc_586.S $S/crypto/blowfish/arch/i386/bf_enc_686.S" \ @@ -36,6 +36,11 @@ machine/vkernel/i386/autoconf.c standard cpu/i386/misc/elf_machdep.c standard cpu/i386/misc/in_cksum2.s optional inet cpu/i386/misc/ktr.c optional ktr +cpu/i386/misc/db_disasm.c optional ddb +# +# DOS mbr +kern/subr_diskmbr.c standard + #vkernel/vkernel/pmap.c standard #vkernel/vkernel/pmap_inval.c standard #vkernel/vkernel/spinlock.s standard @@ -49,8 +54,21 @@ cpu/i386/misc/ktr.c optional ktr machine/vkernel/i386/global.s standard machine/vkernel/i386/swtch.s standard machine/vkernel/i386/npx.c mandatory npx +machine/vkernel/i386/db_interface.c standard +machine/vkernel/i386/db_trace.c standard +machine/vkernel/i386/vm_machdep.c standard +machine/vkernel/i386/cpu_regs.c standard +machine/vkernel/i386/userldt.c standard +machine/vkernel/i386/tls.c standard +machine/vkernel/i386/trap.c standard machine/vkernel/platform/init.c standard machine/vkernel/platform/globaldata.c standard machine/vkernel/platform/machintr.c standard machine/vkernel/platform/copyio.c standard machine/vkernel/platform/pmap.c standard +machine/vkernel/platform/pmap_inval.c standard +machine/vkernel/platform/busdma_machdep.c standard +machine/vkernel/platform/sysarch.c standard +machine/vkernel/platform/systimer.c standard +machine/vkernel/platform/console.c standard +machine/vkernel/platform/ipl_funcs.c standard diff --git a/sys/platform/vkernel/i386/autoconf.c b/sys/platform/vkernel/i386/autoconf.c index c2c4c9410c..4afec6151d 100644 --- a/sys/platform/vkernel/i386/autoconf.c +++ b/sys/platform/vkernel/i386/autoconf.c @@ -35,7 +35,7 @@ * * from: @(#)autoconf.c 7.1 (Berkeley) 5/9/91 * $FreeBSD: src/sys/i386/i386/autoconf.c,v 1.146.2.2 2001/06/07 06:05:58 dd Exp $ - * $DragonFly: src/sys/platform/vkernel/i386/autoconf.c,v 1.5 2006/12/23 00:27:03 swildner Exp $ + * $DragonFly: src/sys/platform/vkernel/i386/autoconf.c,v 1.6 2007/01/05 22:18:18 dillon Exp $ */ /* @@ -60,6 +60,7 @@ #include #include #include +#include #include #include #include @@ -72,6 +73,11 @@ #include #include +#include +#include +#include +#include + #if 0 #include #include @@ -85,9 +91,10 @@ device_t isa_bus_device = 0; #endif -static void configure_first (void *); -static void configure (void *); -static void configure_final (void *); +static void cpu_startup (void *); +static void configure_first (void *); +static void configure (void *); +static void configure_final (void *); #if defined(FFS) && defined(FFS_ROOT) static void setroot (void); @@ -99,6 +106,7 @@ static void pxe_setup_nfsdiskless(void); #endif #endif +SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL); SYSINIT(configure1, SI_SUB_CONFIGURE, SI_ORDER_FIRST, configure_first, NULL); /* SI_ORDER_SECOND is hookable */ SYSINIT(configure2, SI_SUB_CONFIGURE, SI_ORDER_THIRD, configure, NULL); @@ -108,6 +116,73 @@ SYSINIT(configure3, SI_SUB_CONFIGURE, SI_ORDER_ANY, configure_final, NULL); cdev_t rootdev = NOCDEV; cdev_t dumpdev = NOCDEV; +/* + * + */ +static void +cpu_startup(void *dummy) +{ + vm_offset_t buffer_sva; + vm_offset_t buffer_eva; + vm_offset_t pager_sva; + vm_offset_t pager_eva; + vm_offset_t minaddr; + vm_offset_t maxaddr; + + kprintf("%s", version); + kprintf("real memory = %llu (%lluK bytes)\n", + ptoa(Maxmem), ptoa(Maxmem) / 1024); + + if (nbuf == 0) { + int factor = 4 * BKVASIZE / 1024; + int kbytes = physmem * (PAGE_SIZE / 1024); + + nbuf = 50; + if (kbytes > 4096) + nbuf += min((kbytes - 4096) / factor, 65536 / factor); + if (kbytes > 65536) + nbuf += (kbytes - 65536) * 2 / (factor * 5); + if (maxbcache && nbuf > maxbcache / BKVASIZE) + nbuf = maxbcache / BKVASIZE; + } + if (nbuf > (virtual_end - virtual_start) / (BKVASIZE * 2)) { + nbuf = (virtual_end - virtual_start) / (BKVASIZE * 2); + kprintf("Warning: nbufs capped at %d\n", nbuf); + } + + nswbuf = max(min(nbuf/4, 256), 16); +#ifdef NSWBUF_MIN + if (nswbuf < NSWBUF_MIN) + nswbuf = NSWBUF_MIN; +#endif +#ifdef DIRECTIO + ffs_rawread_setup(); +#endif + kmem_suballoc(&kernel_map, &clean_map, &clean_sva, &clean_eva, + (nbuf*BKVASIZE) + (nswbuf*MAXPHYS) + pager_map_size); + kmem_suballoc(&clean_map, &buffer_map, &buffer_sva, &buffer_eva, + (nbuf*BKVASIZE)); + buffer_map.system_map = 1; + kmem_suballoc(&clean_map, &pager_map, &pager_sva, &pager_eva, + (nswbuf*MAXPHYS) + pager_map_size); + pager_map.system_map = 1; + kmem_suballoc(&kernel_map, &exec_map, &minaddr, &maxaddr, + (16*(ARG_MAX+(PAGE_SIZE*3)))); +#if defined(USERCONFIG) + userconfig(); + cninit(); /* the preferred console may have changed */ +#endif + kprintf("avail memory = %u (%uK bytes)\n", ptoa(vmstats.v_free_count), + ptoa(vmstats.v_free_count) / 1024); + bufinit(); + vm_pager_bufferinit(); +#ifdef SMP + mp_start(); + mp_announce(); +#endif + cpu_setregs(); +} + /* * Determine i/o configuration for a machine. */ diff --git a/sys/platform/vkernel/i386/cpu_regs.c b/sys/platform/vkernel/i386/cpu_regs.c new file mode 100644 index 0000000000..73bfa47f9a --- /dev/null +++ b/sys/platform/vkernel/i386/cpu_regs.c @@ -0,0 +1,1253 @@ +/*- + * Copyright (c) 1992 Terrence R. Lambert. + * Copyright (C) 1994, David Greenman + * Copyright (c) 1982, 1987, 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * William Jolitz. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 + * $FreeBSD: src/sys/i386/i386/machdep.c,v 1.385.2.30 2003/05/31 08:48:05 alc Exp $ + * $DragonFly: src/sys/platform/vkernel/i386/cpu_regs.c,v 1.1 2007/01/05 22:18:18 dillon Exp $ + */ + +#include "use_ether.h" +#include "use_npx.h" +#include "use_isa.h" +#include "opt_atalk.h" +#include "opt_compat.h" +#include "opt_ddb.h" +#include "opt_directio.h" +#include "opt_inet.h" +#include "opt_ipx.h" +#include "opt_msgbuf.h" +#include "opt_swap.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +#include + +#include +#include +#include +#include +#include /* pcb.h included via sys/user.h */ +#include /* CPU_prvspace */ +#include +#ifdef PERFMON +#include +#endif +#include + +#include +#include +#include +#include +#include + +extern void dblfault_handler (void); + +#ifndef CPU_DISABLE_SSE +static void set_fpregs_xmm (struct save87 *, struct savexmm *); +static void fill_fpregs_xmm (struct savexmm *, struct save87 *); +#endif /* CPU_DISABLE_SSE */ +#ifdef DIRECTIO +extern void ffs_rawread_setup(void); +#endif /* DIRECTIO */ + +#ifdef SMP +int64_t tsc_offsets[MAXCPU]; +#else +int64_t tsc_offsets[1]; +#endif + +#if defined(SWTCH_OPTIM_STATS) +extern int swtch_optim_stats; +SYSCTL_INT(_debug, OID_AUTO, swtch_optim_stats, + CTLFLAG_RD, &swtch_optim_stats, 0, ""); +SYSCTL_INT(_debug, OID_AUTO, tlb_flush_count, + CTLFLAG_RD, &tlb_flush_count, 0, ""); +#endif + +int physmem = 0; + +static int +sysctl_hw_physmem(SYSCTL_HANDLER_ARGS) +{ + int error = sysctl_handle_int(oidp, 0, ctob(physmem), req); + return (error); +} + +SYSCTL_PROC(_hw, HW_PHYSMEM, physmem, CTLTYPE_INT|CTLFLAG_RD, + 0, 0, sysctl_hw_physmem, "IU", ""); + +static int +sysctl_hw_usermem(SYSCTL_HANDLER_ARGS) +{ + int error = sysctl_handle_int(oidp, 0, + ctob(physmem - vmstats.v_wire_count), req); + return (error); +} + +SYSCTL_PROC(_hw, HW_USERMEM, usermem, CTLTYPE_INT|CTLFLAG_RD, + 0, 0, sysctl_hw_usermem, "IU", ""); + +#if 0 + +static int +sysctl_machdep_msgbuf(SYSCTL_HANDLER_ARGS) +{ + int error; + + /* Unwind the buffer, so that it's linear (possibly starting with + * some initial nulls). + */ + error=sysctl_handle_opaque(oidp,msgbufp->msg_ptr+msgbufp->msg_bufr, + msgbufp->msg_size-msgbufp->msg_bufr,req); + if(error) return(error); + if(msgbufp->msg_bufr>0) { + error=sysctl_handle_opaque(oidp,msgbufp->msg_ptr, + msgbufp->msg_bufr,req); + } + return(error); +} + +SYSCTL_PROC(_machdep, OID_AUTO, msgbuf, CTLTYPE_STRING|CTLFLAG_RD, + 0, 0, sysctl_machdep_msgbuf, "A","Contents of kernel message buffer"); + +static int msgbuf_clear; + +static int +sysctl_machdep_msgbuf_clear(SYSCTL_HANDLER_ARGS) +{ + int error; + error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, + req); + if (!error && req->newptr) { + /* Clear the buffer and reset write pointer */ + bzero(msgbufp->msg_ptr,msgbufp->msg_size); + msgbufp->msg_bufr=msgbufp->msg_bufx=0; + msgbuf_clear=0; + } + return (error); +} + +SYSCTL_PROC(_machdep, OID_AUTO, msgbuf_clear, CTLTYPE_INT|CTLFLAG_RW, + &msgbuf_clear, 0, sysctl_machdep_msgbuf_clear, "I", + "Clear kernel message buffer"); + +#endif + +/* + * Send an interrupt to process. + * + * Stack is set up to allow sigcode stored + * at top to call routine, followed by kcall + * to sigreturn routine below. After sigreturn + * resets the signal mask, the stack, and the + * frame pointer, it returns to the user + * specified pc, psl. + */ +void +sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code) +{ + struct lwp *lp = curthread->td_lwp; + struct proc *p = lp->lwp_proc; + struct trapframe *regs; + struct sigacts *psp = p->p_sigacts; + struct sigframe sf, *sfp; + int oonstack; + + regs = lp->lwp_md.md_regs; + oonstack = (lp->lwp_sigstk.ss_flags & SS_ONSTACK) ? 1 : 0; + + /* save user context */ + bzero(&sf, sizeof(struct sigframe)); + sf.sf_uc.uc_sigmask = *mask; + sf.sf_uc.uc_stack = lp->lwp_sigstk; + sf.sf_uc.uc_mcontext.mc_onstack = oonstack; + sf.sf_uc.uc_mcontext.mc_gs = rgs(); + bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(struct trapframe)); + + /* Allocate and validate space for the signal handler context. */ + /* XXX lwp flags */ + if ((p->p_flag & P_ALTSTACK) != 0 && !oonstack && + SIGISMEMBER(psp->ps_sigonstack, sig)) { + sfp = (struct sigframe *)(lp->lwp_sigstk.ss_sp + + lp->lwp_sigstk.ss_size - sizeof(struct sigframe)); + lp->lwp_sigstk.ss_flags |= SS_ONSTACK; + } + else + sfp = (struct sigframe *)regs->tf_esp - 1; + + /* Translate the signal is appropriate */ + if (p->p_sysent->sv_sigtbl) { + if (sig <= p->p_sysent->sv_sigsize) + sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; + } + + /* Build the argument list for the signal handler. */ + sf.sf_signum = sig; + sf.sf_ucontext = (register_t)&sfp->sf_uc; + if (SIGISMEMBER(psp->ps_siginfo, sig)) { + /* Signal handler installed with SA_SIGINFO. */ + sf.sf_siginfo = (register_t)&sfp->sf_si; + sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher; + + /* fill siginfo structure */ + sf.sf_si.si_signo = sig; + sf.sf_si.si_code = code; + sf.sf_si.si_addr = (void*)regs->tf_err; + } + else { + /* Old FreeBSD-style arguments. */ + sf.sf_siginfo = code; + sf.sf_addr = regs->tf_err; + sf.sf_ahu.sf_handler = catcher; + } + +#if 0 + /* + * If we're a vm86 process, we want to save the segment registers. + * We also change eflags to be our emulated eflags, not the actual + * eflags. + */ + if (regs->tf_eflags & PSL_VM) { + struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; + struct vm86_kernel *vm86 = &lp->lwp_thread->td_pcb->pcb_ext->ext_vm86; + + sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs; + sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs; + sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es; + sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds; + + if (vm86->vm86_has_vme == 0) + sf.sf_uc.uc_mcontext.mc_eflags = + (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) | + (vm86->vm86_eflags & (PSL_VIF | PSL_VIP)); + + /* + * Clear PSL_NT to inhibit T_TSSFLT faults on return from + * syscalls made by the signal handler. This just avoids + * wasting time for our lazy fixup of such faults. PSL_NT + * does nothing in vm86 mode, but vm86 programs can set it + * almost legitimately in probes for old cpu types. + */ + tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP); + } +#endif + + /* + * Copy the sigframe out to the user's stack. + */ + if (copyout(&sf, sfp, sizeof(struct sigframe)) != 0) { + /* + * Something is wrong with the stack pointer. + * ...Kill the process. + */ + sigexit(p, SIGILL); + } + + regs->tf_esp = (int)sfp; + regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode); + regs->tf_eflags &= ~PSL_T; + regs->tf_cs = 0; + regs->tf_ds = 0; + regs->tf_es = 0; + regs->tf_fs = 0; + regs->tf_ss = 0; +} + +/* + * Sanitize the trapframe for a virtual kernel passing control to a custom + * VM context. + * + * Allow userland to set or maintain PSL_RF, the resume flag. This flag + * basically controls whether the return PC should skip the first instruction + * (as in an explicit system call) or re-execute it (as in an exception). + */ +int +cpu_sanitize_frame(struct trapframe *frame) +{ + frame->tf_cs = 0; + frame->tf_ds = 0; + frame->tf_es = 0; + frame->tf_fs = 0; + frame->tf_ss = 0; + frame->tf_eflags &= (PSL_USER | PSL_RF); + frame->tf_eflags |= PSL_RESERVED_DEFAULT | PSL_I; + return(0); +} + +/* + * sigreturn(ucontext_t *sigcntxp) + * + * System call to cleanup state after a signal + * has been taken. Reset signal mask and + * stack state from context left by sendsig (above). + * Return to previous pc and psl as specified by + * context left by sendsig. Check carefully to + * make sure that the user has not modified the + * state to gain improper privileges. + */ +#define EFL_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) +#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) + +int +sys_sigreturn(struct sigreturn_args *uap) +{ + struct lwp *lp = curthread->td_lwp; + struct trapframe *regs; + ucontext_t *ucp; + int cs, eflags; + + ucp = uap->sigcntxp; + + if (!useracc((caddr_t)ucp, sizeof(ucontext_t), VM_PROT_READ)) + return (EFAULT); + + regs = lp->lwp_md.md_regs; + eflags = ucp->uc_mcontext.mc_eflags; + +#if 0 + if (eflags & PSL_VM) { + struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; + struct vm86_kernel *vm86; + + /* + * if pcb_ext == 0 or vm86_inited == 0, the user hasn't + * set up the vm86 area, and we can't enter vm86 mode. + */ + if (lp->lwp_thread->td_pcb->pcb_ext == 0) + return (EINVAL); + vm86 = &lp->lwp_thread->td_pcb->pcb_ext->ext_vm86; + if (vm86->vm86_inited == 0) + return (EINVAL); + + /* go back to user mode if both flags are set */ + if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) + trapsignal(lp->lwp_proc, SIGBUS, 0); + + if (vm86->vm86_has_vme) { + eflags = (tf->tf_eflags & ~VME_USERCHANGE) | + (eflags & VME_USERCHANGE) | PSL_VM; + } else { + vm86->vm86_eflags = eflags; /* save VIF, VIP */ + eflags = (tf->tf_eflags & ~VM_USERCHANGE) | (eflags & VM_USERCHANGE) | PSL_VM; + } + bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe)); + tf->tf_eflags = eflags; + tf->tf_vm86_ds = tf->tf_ds; + tf->tf_vm86_es = tf->tf_es; + tf->tf_vm86_fs = tf->tf_fs; + tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs; + tf->tf_ds = 0; + tf->tf_es = 0; + tf->tf_fs = 0; + } else +#endif + { + /* + * Don't allow users to change privileged or reserved flags. + */ + /* + * XXX do allow users to change the privileged flag PSL_RF. + * The cpu sets PSL_RF in tf_eflags for faults. Debuggers + * should sometimes set it there too. tf_eflags is kept in + * the signal context during signal handling and there is no + * other place to remember it, so the PSL_RF bit may be + * corrupted by the signal handler without us knowing. + * Corruption of the PSL_RF bit at worst causes one more or + * one less debugger trap, so allowing it is fairly harmless. + */ + if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) { + kprintf("sigreturn: eflags = 0x%x\n", eflags); + return(EINVAL); + } + + /* + * Don't allow users to load a valid privileged %cs. Let the + * hardware check for invalid selectors, excess privilege in + * other selectors, invalid %eip's and invalid %esp's. + */ + cs = ucp->uc_mcontext.mc_cs; + if (!CS_SECURE(cs)) { + kprintf("sigreturn: cs = 0x%x\n", cs); + trapsignal(lp->lwp_proc, SIGBUS, T_PROTFLT); + return(EINVAL); + } + bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(struct trapframe)); + } + + if (ucp->uc_mcontext.mc_onstack & 1) + lp->lwp_sigstk.ss_flags |= SS_ONSTACK; + else + lp->lwp_sigstk.ss_flags &= ~SS_ONSTACK; + + lp->lwp_sigmask = ucp->uc_sigmask; + SIG_CANTMASK(lp->lwp_sigmask); + return(EJUSTRETURN); +} + +/* + * Stack frame on entry to function. %eax will contain the function vector, + * %ecx will contain the function data. flags, ecx, and eax will have + * already been pushed on the stack. + */ +struct upc_frame { + register_t eax; + register_t ecx; + register_t edx; + register_t flags; + register_t oldip; +}; + +void +sendupcall(struct vmupcall *vu, int morepending) +{ + struct lwp *lp = curthread->td_lwp; + struct trapframe *regs; + struct upcall upcall; + struct upc_frame upc_frame; + int crit_count = 0; + + /* + * Get the upcall data structure + */ + if (copyin(lp->lwp_upcall, &upcall, sizeof(upcall)) || + copyin((char *)upcall.upc_uthread + upcall.upc_critoff, &crit_count, sizeof(int)) + ) { + vu->vu_pending = 0; + kprintf("bad upcall address\n"); + return; + } + + /* + * If the data structure is already marked pending or has a critical + * section count, mark the data structure as pending and return + * without doing an upcall. vu_pending is left set. + */ + if (upcall.upc_pending || crit_count >= vu->vu_pending) { + if (upcall.upc_pending < vu->vu_pending) { + upcall.upc_pending = vu->vu_pending; + copyout(&upcall.upc_pending, &lp->lwp_upcall->upc_pending, + sizeof(upcall.upc_pending)); + } + return; + } + + /* + * We can run this upcall now, clear vu_pending. + * + * Bump our critical section count and set or clear the + * user pending flag depending on whether more upcalls are + * pending. The user will be responsible for calling + * upc_dispatch(-1) to process remaining upcalls. + */ + vu->vu_pending = 0; + upcall.upc_pending = morepending; + crit_count += TDPRI_CRIT; + copyout(&upcall.upc_pending, &lp->lwp_upcall->upc_pending, + sizeof(upcall.upc_pending)); + copyout(&crit_count, (char *)upcall.upc_uthread + upcall.upc_critoff, + sizeof(int)); + + /* + * Construct a stack frame and issue the upcall + */ + regs = lp->lwp_md.md_regs; + upc_frame.eax = regs->tf_eax; + upc_frame.ecx = regs->tf_ecx; + upc_frame.edx = regs->tf_edx; + upc_frame.flags = regs->tf_eflags; + upc_frame.oldip = regs->tf_eip; + if (copyout(&upc_frame, (void *)(regs->tf_esp - sizeof(upc_frame)), + sizeof(upc_frame)) != 0) { + kprintf("bad stack on upcall\n"); + } else { + regs->tf_eax = (register_t)vu->vu_func; + regs->tf_ecx = (register_t)vu->vu_data; + regs->tf_edx = (register_t)lp->lwp_upcall; + regs->tf_eip = (register_t)vu->vu_ctx; + regs->tf_esp -= sizeof(upc_frame); + } +} + +/* + * fetchupcall occurs in the context of a system call, which means that + * we have to return EJUSTRETURN in order to prevent eax and edx from + * being overwritten by the syscall return value. + * + * if vu is not NULL we return the new context in %edx, the new data in %ecx, + * and the function pointer in %eax. + */ +int +fetchupcall (struct vmupcall *vu, int morepending, void *rsp) +{ + struct upc_frame upc_frame; + struct lwp *lp = curthread->td_lwp; + struct trapframe *regs; + int error; + struct upcall upcall; + int crit_count; + + regs = lp->lwp_md.md_regs; + + error = copyout(&morepending, &lp->lwp_upcall->upc_pending, sizeof(int)); + if (error == 0) { + if (vu) { + /* + * This jumps us to the next ready context. + */ + vu->vu_pending = 0; + error = copyin(lp->lwp_upcall, &upcall, sizeof(upcall)); + crit_count = 0; + if (error == 0) + error = copyin((char *)upcall.upc_uthread + upcall.upc_critoff, &crit_count, sizeof(int)); + crit_count += TDPRI_CRIT; + if (error == 0) + error = copyout(&crit_count, (char *)upcall.upc_uthread + upcall.upc_critoff, sizeof(int)); + regs->tf_eax = (register_t)vu->vu_func; + regs->tf_ecx = (register_t)vu->vu_data; + regs->tf_edx = (register_t)lp->lwp_upcall; + regs->tf_eip = (register_t)vu->vu_ctx; + regs->tf_esp = (register_t)rsp; + } else { + /* + * This returns us to the originally interrupted code. + */ + error = copyin(rsp, &upc_frame, sizeof(upc_frame)); + regs->tf_eax = upc_frame.eax; + regs->tf_ecx = upc_frame.ecx; + regs->tf_edx = upc_frame.edx; + regs->tf_eflags = (regs->tf_eflags & ~PSL_USERCHANGE) | + (upc_frame.flags & PSL_USERCHANGE); + regs->tf_eip = upc_frame.oldip; + regs->tf_esp = (register_t)((char *)rsp + sizeof(upc_frame)); + } + } + if (error == 0) + error = EJUSTRETURN; + return(error); +} + +/* + * cpu_idle() represents the idle LWKT. You cannot return from this function + * (unless you want to blow things up!). Instead we look for runnable threads + * and loop or halt as appropriate. Giant is not held on entry to the thread. + * + * The main loop is entered with a critical section held, we must release + * the critical section before doing anything else. lwkt_switch() will + * check for pending interrupts due to entering and exiting its own + * critical section. + * + * Note on cpu_idle_hlt: On an SMP system we rely on a scheduler IPI + * to wake a HLTed cpu up. However, there are cases where the idlethread + * will be entered with the possibility that no IPI will occur and in such + * cases lwkt_switch() sets TDF_IDLE_NOHLT. + */ +static int cpu_idle_hlt = 1; +static int cpu_idle_hltcnt; +static int cpu_idle_spincnt; +SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_hlt, CTLFLAG_RW, + &cpu_idle_hlt, 0, "Idle loop HLT enable"); +SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_hltcnt, CTLFLAG_RW, + &cpu_idle_hltcnt, 0, "Idle loop entry halts"); +SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_spincnt, CTLFLAG_RW, + &cpu_idle_spincnt, 0, "Idle loop entry spins"); + +static void +cpu_idle_default_hook(void) +{ + /* + * We must guarentee that hlt is exactly the instruction + * following the sti. + */ + __asm __volatile("hlt"); /* sti; hlt */ +} + +/* Other subsystems (e.g., ACPI) can hook this later. */ +void (*cpu_idle_hook)(void) = cpu_idle_default_hook; + +void +cpu_idle(void) +{ + struct thread *td = curthread; + + crit_exit(); + KKASSERT(td->td_pri < TDPRI_CRIT); + for (;;) { + /* + * See if there are any LWKTs ready to go. + */ + lwkt_switch(); + + /* + * If we are going to halt call splz unconditionally after + * CLIing to catch any interrupt races. Note that we are + * at SPL0 and interrupts are enabled. + */ + if (cpu_idle_hlt && !lwkt_runnable() && + (td->td_flags & TDF_IDLE_NOHLT) == 0) { + /* __asm __volatile("cli"); */ + splz(); + if (!lwkt_runnable()) + cpu_idle_hook(); +#ifdef SMP + else + __asm __volatile("pause"); +#endif + ++cpu_idle_hltcnt; + } else { + td->td_flags &= ~TDF_IDLE_NOHLT; + splz(); +#ifdef SMP + /*__asm __volatile("sti; pause");*/ + __asm __volatile("pause"); +#else + /*__asm __volatile("sti");*/ +#endif + ++cpu_idle_spincnt; + } + } +} + +/* + * Clear registers on exec + */ +void +setregs(struct lwp *lp, u_long entry, u_long stack, u_long ps_strings) +{ + struct trapframe *regs = lp->lwp_md.md_regs; + struct pcb *pcb = lp->lwp_thread->td_pcb; + + /* Reset pc->pcb_gs and %gs before possibly invalidating it. */ + pcb->pcb_gs = 0; +#if 0 + load_gs(_udatasel); +#endif + + /* was i386_user_cleanup() in NetBSD */ + user_ldt_free(pcb); + + bzero((char *)regs, sizeof(struct trapframe)); + regs->tf_eip = entry; + regs->tf_esp = stack; + regs->tf_eflags = PSL_USER | (regs->tf_eflags & PSL_T); + regs->tf_ss = 0; + regs->tf_ds = 0; + regs->tf_es = 0; + regs->tf_fs = 0; + regs->tf_cs = 0; + + /* PS_STRINGS value for BSD/OS binaries. It is 0 for non-BSD/OS. */ + regs->tf_ebx = ps_strings; + + /* + * Reset the hardware debug registers if they were in use. + * They won't have any meaning for the newly exec'd process. + */ + if (pcb->pcb_flags & PCB_DBREGS) { + pcb->pcb_dr0 = 0; + pcb->pcb_dr1 = 0; + pcb->pcb_dr2 = 0; + pcb->pcb_dr3 = 0; + pcb->pcb_dr6 = 0; + pcb->pcb_dr7 = 0; + if (pcb == curthread->td_pcb) { + /* + * Clear the debug registers on the running + * CPU, otherwise they will end up affecting + * the next process we switch to. + */ + reset_dbregs(); + } + pcb->pcb_flags &= ~PCB_DBREGS; + } + + /* + * Initialize the math emulator (if any) for the current process. + * Actually, just clear the bit that says that the emulator has + * been initialized. Initialization is delayed until the process + * traps to the emulator (if it is done at all) mainly because + * emulators don't provide an entry point for initialization. + */ + lp->lwp_thread->td_pcb->pcb_flags &= ~FP_SOFTFP; + + /* + * note: do not set CR0_TS here. npxinit() must do it after clearing + * gd_npxthread. Otherwise a preemptive interrupt thread may panic + * in npxdna(). + */ + crit_enter(); +#if 0 + load_cr0(rcr0() | CR0_MP); +#endif + +#if NNPX > 0 + /* Initialize the npx (if any) for the current process. */ + npxinit(__INITIAL_NPXCW__); +#endif + crit_exit(); + + /* + * note: linux emulator needs edx to be 0x0 on entry, which is + * handled in execve simply by setting the 64 bit syscall + * return value to 0. + */ +} + +void +cpu_setregs(void) +{ +#if 0 + unsigned int cr0; + + cr0 = rcr0(); + cr0 |= CR0_NE; /* Done by npxinit() */ + cr0 |= CR0_MP | CR0_TS; /* Done at every execve() too. */ +#ifdef I386_CPU + if (cpu_class != CPUCLASS_386) +#endif + cr0 |= CR0_WP | CR0_AM; + load_cr0(cr0); + load_gs(_udatasel); +#endif +} + +static int +sysctl_machdep_adjkerntz(SYSCTL_HANDLER_ARGS) +{ + int error; + error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, + req); + if (!error && req->newptr) + resettodr(); + return (error); +} + +SYSCTL_PROC(_machdep, CPU_ADJKERNTZ, adjkerntz, CTLTYPE_INT|CTLFLAG_RW, + &adjkerntz, 0, sysctl_machdep_adjkerntz, "I", ""); + +extern u_long bootdev; /* not a cdev_t - encoding is different */ +SYSCTL_ULONG(_machdep, OID_AUTO, guessed_bootdev, + CTLFLAG_RD, &bootdev, 0, "Boot device (not in cdev_t format)"); + +/* + * Initialize 386 and configure to run kernel + */ + +/* + * Initialize segments & interrupt table + */ + +extern struct user *proc0paddr; + +#if 0 + +extern inthand_t + IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl), + IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm), + IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot), + IDTVEC(page), IDTVEC(mchk), IDTVEC(fpu), IDTVEC(align), + IDTVEC(xmm), IDTVEC(syscall), + IDTVEC(rsvd0); +extern inthand_t + IDTVEC(int0x80_syscall); + +#endif + +#ifdef DEBUG_INTERRUPTS +extern inthand_t *Xrsvdary[256]; +#endif + +int +ptrace_set_pc(struct proc *p, unsigned long addr) +{ + p->p_md.md_regs->tf_eip = addr; + return (0); +} + +int +ptrace_single_step(struct lwp *lp) +{ + lp->lwp_md.md_regs->tf_eflags |= PSL_T; + return (0); +} + +int +fill_regs(struct lwp *lp, struct reg *regs) +{ + struct pcb *pcb; + struct trapframe *tp; + + tp = lp->lwp_md.md_regs; + regs->r_fs = tp->tf_fs; + regs->r_es = tp->tf_es; + regs->r_ds = tp->tf_ds; + regs->r_edi = tp->tf_edi; + regs->r_esi = tp->tf_esi; + regs->r_ebp = tp->tf_ebp; + regs->r_ebx = tp->tf_ebx; + regs->r_edx = tp->tf_edx; + regs->r_ecx = tp->tf_ecx; + regs->r_eax = tp->tf_eax; + regs->r_eip = tp->tf_eip; + regs->r_cs = tp->tf_cs; + regs->r_eflags = tp->tf_eflags; + regs->r_esp = tp->tf_esp; + regs->r_ss = tp->tf_ss; + pcb = lp->lwp_thread->td_pcb; + regs->r_gs = pcb->pcb_gs; + return (0); +} + +int +set_regs(struct lwp *lp, struct reg *regs) +{ + struct pcb *pcb; + struct trapframe *tp; + + tp = lp->lwp_md.md_regs; + if (!EFL_SECURE(regs->r_eflags, tp->tf_eflags) || + !CS_SECURE(regs->r_cs)) + return (EINVAL); + tp->tf_fs = regs->r_fs; + tp->tf_es = regs->r_es; + tp->tf_ds = regs->r_ds; + tp->tf_edi = regs->r_edi; + tp->tf_esi = regs->r_esi; + tp->tf_ebp = regs->r_ebp; + tp->tf_ebx = regs->r_ebx; + tp->tf_edx = regs->r_edx; + tp->tf_ecx = regs->r_ecx; + tp->tf_eax = regs->r_eax; + tp->tf_eip = regs->r_eip; + tp->tf_cs = regs->r_cs; + tp->tf_eflags = regs->r_eflags; + tp->tf_esp = regs->r_esp; + tp->tf_ss = regs->r_ss; + pcb = lp->lwp_thread->td_pcb; + pcb->pcb_gs = regs->r_gs; + return (0); +} + +#ifndef CPU_DISABLE_SSE +static void +fill_fpregs_xmm(struct savexmm *sv_xmm, struct save87 *sv_87) +{ + struct env87 *penv_87 = &sv_87->sv_env; + struct envxmm *penv_xmm = &sv_xmm->sv_env; + int i; + + /* FPU control/status */ + penv_87->en_cw = penv_xmm->en_cw; + penv_87->en_sw = penv_xmm->en_sw; + penv_87->en_tw = penv_xmm->en_tw; + penv_87->en_fip = penv_xmm->en_fip; + penv_87->en_fcs = penv_xmm->en_fcs; + penv_87->en_opcode = penv_xmm->en_opcode; + penv_87->en_foo = penv_xmm->en_foo; + penv_87->en_fos = penv_xmm->en_fos; + + /* FPU registers */ + for (i = 0; i < 8; ++i) + sv_87->sv_ac[i] = sv_xmm->sv_fp[i].fp_acc; + + sv_87->sv_ex_sw = sv_xmm->sv_ex_sw; +} + +static void +set_fpregs_xmm(struct save87 *sv_87, struct savexmm *sv_xmm) +{ + struct env87 *penv_87 = &sv_87->sv_env; + struct envxmm *penv_xmm = &sv_xmm->sv_env; + int i; + + /* FPU control/status */ + penv_xmm->en_cw = penv_87->en_cw; + penv_xmm->en_sw = penv_87->en_sw; + penv_xmm->en_tw = penv_87->en_tw; + penv_xmm->en_fip = penv_87->en_fip; + penv_xmm->en_fcs = penv_87->en_fcs; + penv_xmm->en_opcode = penv_87->en_opcode; + penv_xmm->en_foo = penv_87->en_foo; + penv_xmm->en_fos = penv_87->en_fos; + + /* FPU registers */ + for (i = 0; i < 8; ++i) + sv_xmm->sv_fp[i].fp_acc = sv_87->sv_ac[i]; + + sv_xmm->sv_ex_sw = sv_87->sv_ex_sw; +} +#endif /* CPU_DISABLE_SSE */ + +int +fill_fpregs(struct lwp *lp, struct fpreg *fpregs) +{ +#ifndef CPU_DISABLE_SSE + if (cpu_fxsr) { + fill_fpregs_xmm(&lp->lwp_thread->td_pcb->pcb_save.sv_xmm, + (struct save87 *)fpregs); + return (0); + } +#endif /* CPU_DISABLE_SSE */ + bcopy(&lp->lwp_thread->td_pcb->pcb_save.sv_87, fpregs, sizeof *fpregs); + return (0); +} + +int +set_fpregs(struct lwp *lp, struct fpreg *fpregs) +{ +#ifndef CPU_DISABLE_SSE + if (cpu_fxsr) { + set_fpregs_xmm((struct save87 *)fpregs, + &lp->lwp_thread->td_pcb->pcb_save.sv_xmm); + return (0); + } +#endif /* CPU_DISABLE_SSE */ + bcopy(fpregs, &lp->lwp_thread->td_pcb->pcb_save.sv_87, sizeof *fpregs); + return (0); +} + +int +fill_dbregs(struct lwp *lp, struct dbreg *dbregs) +{ + if (lp == NULL) { + dbregs->dr0 = rdr0(); + dbregs->dr1 = rdr1(); + dbregs->dr2 = rdr2(); + dbregs->dr3 = rdr3(); + dbregs->dr4 = rdr4(); + dbregs->dr5 = rdr5(); + dbregs->dr6 = rdr6(); + dbregs->dr7 = rdr7(); + } else { + struct pcb *pcb; + + pcb = lp->lwp_thread->td_pcb; + dbregs->dr0 = pcb->pcb_dr0; + dbregs->dr1 = pcb->pcb_dr1; + dbregs->dr2 = pcb->pcb_dr2; + dbregs->dr3 = pcb->pcb_dr3; + dbregs->dr4 = 0; + dbregs->dr5 = 0; + dbregs->dr6 = pcb->pcb_dr6; + dbregs->dr7 = pcb->pcb_dr7; + } + return (0); +} + +int +set_dbregs(struct lwp *lp, struct dbreg *dbregs) +{ + if (lp == NULL) { + load_dr0(dbregs->dr0); + load_dr1(dbregs->dr1); + load_dr2(dbregs->dr2); + load_dr3(dbregs->dr3); + load_dr4(dbregs->dr4); + load_dr5(dbregs->dr5); + load_dr6(dbregs->dr6); + load_dr7(dbregs->dr7); + } else { + struct pcb *pcb; + struct ucred *ucred; + int i; + uint32_t mask1, mask2; + + /* + * Don't let an illegal value for dr7 get set. Specifically, + * check for undefined settings. Setting these bit patterns + * result in undefined behaviour and can lead to an unexpected + * TRCTRAP. + */ + for (i = 0, mask1 = 0x3<<16, mask2 = 0x2<<16; i < 8; + i++, mask1 <<= 2, mask2 <<= 2) + if ((dbregs->dr7 & mask1) == mask2) + return (EINVAL); + + pcb = lp->lwp_thread->td_pcb; + ucred = lp->lwp_proc->p_ucred; + + /* + * Don't let a process set a breakpoint that is not within the + * process's address space. If a process could do this, it + * could halt the system by setting a breakpoint in the kernel + * (if ddb was enabled). Thus, we need to check to make sure + * that no breakpoints are being enabled for addresses outside + * process's address space, unless, perhaps, we were called by + * uid 0. + * + * XXX - what about when the watched area of the user's + * address space is written into from within the kernel + * ... wouldn't that still cause a breakpoint to be generated + * from within kernel mode? + */ + + if (suser_cred(ucred, 0) != 0) { + if (dbregs->dr7 & 0x3) { + /* dr0 is enabled */ + if (dbregs->dr0 >= VM_MAX_USER_ADDRESS) + return (EINVAL); + } + + if (dbregs->dr7 & (0x3<<2)) { + /* dr1 is enabled */ + if (dbregs->dr1 >= VM_MAX_USER_ADDRESS) + return (EINVAL); + } + + if (dbregs->dr7 & (0x3<<4)) { + /* dr2 is enabled */ + if (dbregs->dr2 >= VM_MAX_USER_ADDRESS) + return (EINVAL); + } + + if (dbregs->dr7 & (0x3<<6)) { + /* dr3 is enabled */ + if (dbregs->dr3 >= VM_MAX_USER_ADDRESS) + return (EINVAL); + } + } + + pcb->pcb_dr0 = dbregs->dr0; + pcb->pcb_dr1 = dbregs->dr1; + pcb->pcb_dr2 = dbregs->dr2; + pcb->pcb_dr3 = dbregs->dr3; + pcb->pcb_dr6 = dbregs->dr6; + pcb->pcb_dr7 = dbregs->dr7; + + pcb->pcb_flags |= PCB_DBREGS; + } + + return (0); +} + +#if 0 +/* + * Return > 0 if a hardware breakpoint has been hit, and the + * breakpoint was in user space. Return 0, otherwise. + */ +int +user_dbreg_trap(void) +{ + u_int32_t dr7, dr6; /* debug registers dr6 and dr7 */ + u_int32_t bp; /* breakpoint bits extracted from dr6 */ + int nbp; /* number of breakpoints that triggered */ + caddr_t addr[4]; /* breakpoint addresses */ + int i; + + dr7 = rdr7(); + if ((dr7 & 0x000000ff) == 0) { + /* + * all GE and LE bits in the dr7 register are zero, + * thus the trap couldn't have been caused by the + * hardware debug registers + */ + return 0; + } + + nbp = 0; + dr6 = rdr6(); + bp = dr6 & 0x0000000f; + + if (!bp) { + /* + * None of the breakpoint bits are set meaning this + * trap was not caused by any of the debug registers + */ + return 0; + } + + /* + * at least one of the breakpoints were hit, check to see + * which ones and if any of them are user space addresses + */ + + if (bp & 0x01) { + addr[nbp++] = (caddr_t)rdr0(); + } + if (bp & 0x02) { + addr[nbp++] = (caddr_t)rdr1(); + } + if (bp & 0x04) { + addr[nbp++] = (caddr_t)rdr2(); + } + if (bp & 0x08) { + addr[nbp++] = (caddr_t)rdr3(); + } + + for (i=0; i + +/* + * Determine the size of the transfer, and make sure it is + * within the boundaries of the partition. Adjust transfer + * if needed, and signal errors or early completion. + * + * On success a new bio layer is pushed with the translated + * block number, and returned. + */ +struct bio * +bounds_check_with_label(cdev_t dev, struct bio *bio, + struct disklabel *lp, int wlabel) +{ + struct bio *nbio; + struct buf *bp = bio->bio_buf; + struct partition *p = lp->d_partitions + dkpart(dev); + int labelsect = lp->d_partitions[0].p_offset; + int maxsz = p->p_size, + sz = (bp->b_bcount + DEV_BSIZE - 1) >> DEV_BSHIFT; + daddr_t blkno = (daddr_t)(bio->bio_offset >> DEV_BSHIFT); + + /* overwriting disk label ? */ + /* XXX should also protect bootstrap in first 8K */ + if (blkno + p->p_offset <= LABELSECTOR + labelsect && +#if LABELSECTOR != 0 + blkno + p->p_offset + sz > LABELSECTOR + labelsect && +#endif + bp->b_cmd != BUF_CMD_READ && wlabel == 0) { + bp->b_error = EROFS; + goto error; + } + +#if defined(DOSBBSECTOR) && defined(notyet) + /* overwriting master boot record? */ + if (blkno + p->p_offset <= DOSBBSECTOR && + bp->b_cmd != BUF_CMD_READ && wlabel == 0) { + bp->b_error = EROFS; + goto error; + } +#endif + + /* + * Check for out of bounds, EOF, and EOF clipping. + */ + if (bio->bio_offset < 0) + goto bad; + if (blkno + sz > maxsz) { + /* + * Past EOF or B_BNOCLIP flag was set, the request is bad. + */ + if (blkno > maxsz || (bp->b_flags & B_BNOCLIP)) + goto bad; + + /* + * If exactly on EOF just complete the I/O with no bytes + * transfered. B_INVAL must be set to throw away the + * contents of the buffer. Otherwise clip b_bcount. + */ + if (blkno == maxsz) { + bp->b_resid = bp->b_bcount; + bp->b_flags |= B_INVAL; + goto done; + } + bp->b_bcount = (maxsz - blkno) << DEV_BSHIFT; + } + nbio = push_bio(bio); + nbio->bio_offset = bio->bio_offset + ((off_t)p->p_offset << DEV_BSHIFT); + return (nbio); + + /* + * The caller is responsible for calling biodone() on the passed bio + * when we return NULL. + */ +bad: + bp->b_error = EINVAL; +error: + bp->b_resid = bp->b_bcount; + bp->b_flags |= B_ERROR | B_INVAL; +done: + return (NULL); +} + diff --git a/sys/platform/vkernel/i386/db_interface.c b/sys/platform/vkernel/i386/db_interface.c new file mode 100644 index 0000000000..6f7634f94e --- /dev/null +++ b/sys/platform/vkernel/i386/db_interface.c @@ -0,0 +1,328 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie the + * rights to redistribute these changes. + * + * $FreeBSD: src/sys/i386/i386/db_interface.c,v 1.48.2.1 2000/07/07 00:38:46 obrien Exp $ + * $DragonFly: src/sys/platform/vkernel/i386/db_interface.c,v 1.1 2007/01/05 22:18:18 dillon Exp $ + */ + +/* + * Interface to new debugger. + */ +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include + +#include + +#include + +static jmp_buf *db_nofault = 0; +extern jmp_buf db_jmpbuf; + +extern void gdb_handle_exception (db_regs_t *, int, int); + +int db_active; +db_regs_t ddb_regs; + +static jmp_buf db_global_jmpbuf; +static int db_global_jmpbuf_valid; + +#ifdef __GNUC__ +#define rss() ({u_short ss; __asm __volatile("mov %%ss,%0" : "=r" (ss)); ss;}) +#endif + +/* + * kdb_trap - field a TRACE or BPT trap + */ +int +kdb_trap(int type, int code, struct i386_saved_state *regs) +{ + volatile int ddb_mode = !(boothowto & RB_GDB); + + /* + * XXX try to do nothing if the console is in graphics mode. + * Handle trace traps (and hardware breakpoints...) by ignoring + * them except for forgetting about them. Return 0 for other + * traps to say that we haven't done anything. The trap handler + * will usually panic. We should handle breakpoint traps for + * our breakpoints by disarming our breakpoints and fixing up + * %eip. + */ + if (cons_unavail && ddb_mode) { + if (type == T_TRCTRAP) { + regs->tf_eflags &= ~PSL_T; + return (1); + } + return (0); + } + + switch (type) { + case T_BPTFLT: /* breakpoint */ + case T_TRCTRAP: /* debug exception */ + break; + + default: + /* + * XXX this is almost useless now. In most cases, + * trap_fatal() has already printed a much more verbose + * message. However, it is dangerous to print things in + * trap_fatal() - kprintf() might be reentered and trap. + * The debugger should be given control first. + */ + if (ddb_mode) + db_printf("kernel: type %d trap, code=%x\n", type, code); + + if (db_nofault) { + jmp_buf *no_fault = db_nofault; + db_nofault = 0; + longjmp(*no_fault, 1); + } + } + + /* + * This handles unexpected traps in ddb commands, including calls to + * non-ddb functions. db_nofault only applies to memory accesses by + * internal ddb commands. + */ + if (db_global_jmpbuf_valid) + longjmp(db_global_jmpbuf, 1); + + /* + * XXX We really should switch to a local stack here. + */ + ddb_regs = *regs; + + /* + * If in kernel mode, esp and ss are not saved, so dummy them up. + */ + if (ISPL(regs->tf_cs) == 0) { + ddb_regs.tf_esp = (int)®s->tf_esp; + ddb_regs.tf_ss = rss(); + } + +#ifdef SMP + db_printf("\nCPU%d stopping CPUs: 0x%08x\n", + mycpu->gd_cpuid, mycpu->gd_other_cpus); + + /* We stop all CPUs except ourselves (obviously) */ + stop_cpus(mycpu->gd_other_cpus); + + db_printf(" stopped\n"); +#endif /* SMP */ + + setjmp(db_global_jmpbuf); + db_global_jmpbuf_valid = TRUE; + db_active++; + if (ddb_mode) { + cndbctl(TRUE); + db_trap(type, code); + cndbctl(FALSE); + } else + gdb_handle_exception(&ddb_regs, type, code); + db_active--; + db_global_jmpbuf_valid = FALSE; + +#ifdef SMP + db_printf("\nCPU%d restarting CPUs: 0x%08x\n", + mycpu->gd_cpuid, stopped_cpus); + + /* Restart all the CPUs we previously stopped */ + if (stopped_cpus != mycpu->gd_other_cpus) { + db_printf("whoa, other_cpus: 0x%08x, stopped_cpus: 0x%08x\n", + mycpu->gd_other_cpus, stopped_cpus); + panic("stop_cpus() failed"); + } + restart_cpus(stopped_cpus); + + db_printf(" restarted\n"); +#endif /* SMP */ + + regs->tf_eip = ddb_regs.tf_eip; + regs->tf_eflags = ddb_regs.tf_eflags; + regs->tf_eax = ddb_regs.tf_eax; + regs->tf_ecx = ddb_regs.tf_ecx; + regs->tf_edx = ddb_regs.tf_edx; + regs->tf_ebx = ddb_regs.tf_ebx; + + /* + * If in user mode, the saved ESP and SS were valid, restore them. + */ + if (ISPL(regs->tf_cs)) { + regs->tf_esp = ddb_regs.tf_esp; + regs->tf_ss = ddb_regs.tf_ss & 0xffff; + } + + regs->tf_ebp = ddb_regs.tf_ebp; + regs->tf_esi = ddb_regs.tf_esi; + regs->tf_edi = ddb_regs.tf_edi; + regs->tf_es = ddb_regs.tf_es & 0xffff; + regs->tf_fs = ddb_regs.tf_fs & 0xffff; + regs->tf_cs = ddb_regs.tf_cs & 0xffff; + regs->tf_ds = ddb_regs.tf_ds & 0xffff; + return (1); +} + +/* + * Read bytes from kernel address space for debugger. + */ +void +db_read_bytes(vm_offset_t addr, size_t size, char *data) +{ + char *src; + + db_nofault = &db_jmpbuf; + + src = (char *)addr; + while (size-- > 0) + *data++ = *src++; + + db_nofault = 0; +} + +/* + * Write bytes to kernel address space for debugger. + */ +void +db_write_bytes(vm_offset_t addr, size_t size, char *data) +{ + char *dst; +#if 0 + vpte_t *ptep0 = NULL; + vpte_t oldmap0 = 0; + vm_offset_t addr1; + vpte_t *ptep1 = NULL; + vpte_t oldmap1 = 0; +#endif + + db_nofault = &db_jmpbuf; +#if 0 + if (addr > trunc_page((vm_offset_t)btext) - size && + addr < round_page((vm_offset_t)etext)) { + + ptep0 = pmap_kpte(addr); + oldmap0 = *ptep0; + *ptep0 |= VPTE_W; + + /* Map another page if the data crosses a page boundary. */ + if ((*ptep0 & PG_PS) == 0) { + addr1 = trunc_page(addr + size - 1); + if (trunc_page(addr) != addr1) { + ptep1 = pmap_kpte(addr1); + oldmap1 = *ptep1; + *ptep1 |= VPTE_W; + } + } else { + addr1 = trunc_4mpage(addr + size - 1); + if (trunc_4mpage(addr) != addr1) { + ptep1 = pmap_kpte(addr1); + oldmap1 = *ptep1; + *ptep1 |= VPTE_W; + } + } + + cpu_invltlb(); + } +#endif + + dst = (char *)addr; + + while (size-- > 0) + *dst++ = *data++; + + db_nofault = 0; + +#if 0 + if (ptep0) { + *ptep0 = oldmap0; + + if (ptep1) + *ptep1 = oldmap1; + + cpu_invltlb(); + } +#endif +} + +/* + * The debugger sometimes needs to know the actual KVM address represented + * by the instruction pointer, stack pointer, or base pointer. Normally + * the actual KVM address is simply the contents of the register. However, + * if the debugger is entered from the BIOS or VM86 we need to figure out + * the offset from the segment register. + */ +db_addr_t +PC_REGS(db_regs_t *regs) +{ + return(regs->tf_eip); +} + +db_addr_t +SP_REGS(db_regs_t *regs) +{ + return(regs->tf_esp); +} + +db_addr_t +BP_REGS(db_regs_t *regs) +{ + return(regs->tf_ebp); +} + +/* + * XXX + * Move this to machdep.c and allow it to be called if any debugger is + * installed. + */ +void +Debugger(const char *msg) +{ + static volatile u_char in_Debugger; + + /* + * XXX + * Do nothing if the console is in graphics mode. This is + * OK if the call is for the debugger hotkey but not if the call + * is a weak form of panicing. + */ + if (cons_unavail && !(boothowto & RB_GDB)) + return; + + if (!in_Debugger) { + in_Debugger = 1; + db_printf("Debugger(\"%s\")\n", msg); + breakpoint(); + in_Debugger = 0; + } +} diff --git a/sys/platform/vkernel/i386/db_trace.c b/sys/platform/vkernel/i386/db_trace.c new file mode 100644 index 0000000000..9951fb024b --- /dev/null +++ b/sys/platform/vkernel/i386/db_trace.c @@ -0,0 +1,642 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie the + * rights to redistribute these changes. + * + * $FreeBSD: src/sys/i386/i386/db_trace.c,v 1.35.2.3 2002/02/21 22:31:25 silby Exp $ + * $DragonFly: src/sys/platform/vkernel/i386/db_trace.c,v 1.1 2007/01/05 22:18:18 dillon Exp $ + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +db_varfcn_t db_dr0; +db_varfcn_t db_dr1; +db_varfcn_t db_dr2; +db_varfcn_t db_dr3; +db_varfcn_t db_dr4; +db_varfcn_t db_dr5; +db_varfcn_t db_dr6; +db_varfcn_t db_dr7; + +/* + * Machine register set. + */ +struct db_variable db_regs[] = { + { "cs", &ddb_regs.tf_cs, FCN_NULL }, + { "ds", &ddb_regs.tf_ds, FCN_NULL }, + { "es", &ddb_regs.tf_es, FCN_NULL }, + { "fs", &ddb_regs.tf_fs, FCN_NULL }, +#if 0 + { "gs", &ddb_regs.tf_gs, FCN_NULL }, +#endif + { "ss", &ddb_regs.tf_ss, FCN_NULL }, + { "eax", &ddb_regs.tf_eax, FCN_NULL }, + { "ecx", &ddb_regs.tf_ecx, FCN_NULL }, + { "edx", &ddb_regs.tf_edx, FCN_NULL }, + { "ebx", &ddb_regs.tf_ebx, FCN_NULL }, + { "esp", &ddb_regs.tf_esp, FCN_NULL }, + { "ebp", &ddb_regs.tf_ebp, FCN_NULL }, + { "esi", &ddb_regs.tf_esi, FCN_NULL }, + { "edi", &ddb_regs.tf_edi, FCN_NULL }, + { "eip", &ddb_regs.tf_eip, FCN_NULL }, + { "efl", &ddb_regs.tf_eflags, FCN_NULL }, + { "dr0", NULL, db_dr0 }, + { "dr1", NULL, db_dr1 }, + { "dr2", NULL, db_dr2 }, + { "dr3", NULL, db_dr3 }, + { "dr4", NULL, db_dr4 }, + { "dr5", NULL, db_dr5 }, + { "dr6", NULL, db_dr6 }, + { "dr7", NULL, db_dr7 }, +}; +struct db_variable *db_eregs = db_regs + sizeof(db_regs)/sizeof(db_regs[0]); + +/* + * Stack trace. + */ +#define INKERNEL(va) (((vm_offset_t)(va)) >= USRSTACK) + +struct i386_frame { + struct i386_frame *f_frame; + int f_retaddr; + int f_arg0; +}; + +#define NORMAL 0 +#define TRAP 1 +#define INTERRUPT 2 +#define SYSCALL 3 + +static void db_nextframe(struct i386_frame **, db_addr_t *); +static int db_numargs(struct i386_frame *); +static void db_print_stack_entry(const char *, int, char **, int *, db_addr_t); + + +static char *watchtype_str(int type); +static int ki386_set_watch(int watchnum, unsigned int watchaddr, + int size, int access, struct dbreg * d); +static int ki386_clr_watch(int watchnum, struct dbreg * d); +int db_md_set_watchpoint(db_expr_t addr, db_expr_t size); +int db_md_clr_watchpoint(db_expr_t addr, db_expr_t size); +void db_md_list_watchpoints(void); + + +/* + * Figure out how many arguments were passed into the frame at "fp". + */ +static int +db_numargs(struct i386_frame *fp) +{ + int *argp; + int inst; + int args; + + argp = (int *)db_get_value((int)&fp->f_retaddr, 4, FALSE); + /* + * XXX etext is wrong for LKMs. We should attempt to interpret + * the instruction at the return address in all cases. This + * may require better fault handling. + */ + if (argp < (int *)btext || argp >= (int *)etext) { + args = 5; + } else { + inst = db_get_value((int)argp, 4, FALSE); + if ((inst & 0xff) == 0x59) /* popl %ecx */ + args = 1; + else if ((inst & 0xffff) == 0xc483) /* addl $Ibs, %esp */ + args = ((inst >> 16) & 0xff) / 4; + else + args = 5; + } + return(args); +} + +static void +db_print_stack_entry(const char *name, int narg, char **argnp, int *argp, + db_addr_t callpc) +{ + db_printf("%s(", name); + while (narg) { + if (argnp) + db_printf("%s=", *argnp++); + db_printf("%r", db_get_value((int)argp, 4, FALSE)); + argp++; + if (--narg != 0) + db_printf(","); + } + db_printf(") at "); + db_printsym(callpc, DB_STGY_PROC); + db_printf("\n"); +} + +/* + * Figure out the next frame up in the call stack. + */ +static void +db_nextframe(struct i386_frame **fp, db_addr_t *ip) +{ + struct trapframe *tf; + int frame_type; + int eip, esp, ebp; + db_expr_t offset; + const char *sym, *name; + + eip = db_get_value((int) &(*fp)->f_retaddr, 4, FALSE); + ebp = db_get_value((int) &(*fp)->f_frame, 4, FALSE); + + /* + * Figure out frame type. + */ + + frame_type = NORMAL; + + sym = db_search_symbol(eip, DB_STGY_ANY, &offset); + db_symbol_values(sym, &name, NULL); + if (name != NULL) { + if (!strcmp(name, "calltrap")) { + frame_type = TRAP; + } else if (!strncmp(name, "Xresume", 7)) { + frame_type = INTERRUPT; + } else if (!strcmp(name, "_Xsyscall")) { + frame_type = SYSCALL; + } + } + + /* + * Normal frames need no special processing. + */ + if (frame_type == NORMAL) { + *ip = (db_addr_t) eip; + *fp = (struct i386_frame *) ebp; + return; + } + + db_print_stack_entry(name, 0, 0, 0, eip); + + /* + * Point to base of trapframe which is just above the + * current frame. + */ + tf = (struct trapframe *) ((int)*fp + 8); + + esp = (ISPL(tf->tf_cs) == SEL_UPL) ? tf->tf_esp : (int)&tf->tf_esp; + switch (frame_type) { + case TRAP: + if (INKERNEL((int) tf)) { + eip = tf->tf_eip; + ebp = tf->tf_ebp; + db_printf( + "--- trap %#r, eip = %#r, esp = %#r, ebp = %#r ---\n", + tf->tf_trapno, eip, esp, ebp); + } + break; + case SYSCALL: + if (INKERNEL((int) tf)) { + eip = tf->tf_eip; + ebp = tf->tf_ebp; + db_printf( + "--- syscall %#r, eip = %#r, esp = %#r, ebp = %#r ---\n", + tf->tf_eax, eip, esp, ebp); + } + break; + case INTERRUPT: + tf = (struct trapframe *)((int)*fp + 16); + if (INKERNEL((int) tf)) { + eip = tf->tf_eip; + ebp = tf->tf_ebp; + db_printf( + "--- interrupt, eip = %#r, esp = %#r, ebp = %#r ---\n", + eip, esp, ebp); + } + break; + default: + break; + } + + *ip = (db_addr_t) eip; + *fp = (struct i386_frame *) ebp; +} + +void +db_stack_trace_cmd(db_expr_t addr, boolean_t have_addr, db_expr_t count, + char *modif) +{ + struct i386_frame *frame; + int *argp; + db_addr_t callpc; + boolean_t first; + int i; + + if (count == -1) + count = 1024; + + if (!have_addr) { + frame = (struct i386_frame *)BP_REGS(&ddb_regs); + if (frame == NULL) + frame = (struct i386_frame *)(SP_REGS(&ddb_regs) - 4); + callpc = PC_REGS(&ddb_regs); + } else if (!INKERNEL(addr)) { +#if needswork + pid = (addr % 16) + ((addr >> 4) % 16) * 10 + + ((addr >> 8) % 16) * 100 + ((addr >> 12) % 16) * 1000 + + ((addr >> 16) % 16) * 10000; + /* + * The pcb for curproc is not valid at this point, + * so fall back to the default case. + */ + if ((curproc != NULL) && (pid == curproc->p_pid)) { + frame = (struct i386_frame *)BP_REGS(&ddb_regs); + if (frame == NULL) + frame = (struct i386_frame *) + (SP_REGS(&ddb_regs) - 4); + callpc = PC_REGS(&ddb_regs); + } else { + pid_t pid; + struct proc *p; + struct pcb *pcb; + + p = pfind(pid); + if (p == NULL) { + db_printf("pid %d not found\n", pid); + return; + } + if ((p->p_flag & P_SWAPPEDOUT)) { + db_printf("pid %d swapped out\n", pid); + return; + } + pcb = p->p_thread->td_pcb; + frame = (struct i386_frame *)pcb->pcb_ebp; + if (frame == NULL) + frame = (struct i386_frame *) + (pcb->pcb_esp - 4); + callpc = (db_addr_t)pcb->pcb_eip; + } +#else + /* XXX */ + db_printf("no kernel stack address\n"); + return; +#endif + } else { + /* + * Look for something that might be a frame pointer, just as + * a convenience. + */ + frame = (struct i386_frame *)addr; + for (i = 0; i < 4096; i += 4) { + struct i386_frame *check; + + check = (struct i386_frame *)db_get_value((int)((char *)&frame->f_frame + i), 4, FALSE); + if ((char *)check - (char *)frame >= 0 && + (char *)check - (char *)frame < 4096 + ) { + break; + } + db_printf("%p does not look like a stack frame, skipping\n", (char *)&frame->f_frame + i); + } + if (i == 4096) { + db_printf("Unable to find anything that looks like a stack frame\n"); + return; + } + frame = (void *)((char *)frame + i); + db_printf("Trace beginning at frame %p\n", frame); + callpc = (db_addr_t)db_get_value((int)&frame->f_retaddr, 4, FALSE); + } + + first = TRUE; + while (count--) { + struct i386_frame *actframe; + int narg; + const char * name; + db_expr_t offset; + c_db_sym_t sym; +#define MAXNARG 16 + char *argnames[MAXNARG], **argnp = NULL; + + sym = db_search_symbol(callpc, DB_STGY_ANY, &offset); + db_symbol_values(sym, &name, NULL); + + /* + * Attempt to determine a (possibly fake) frame that gives + * the caller's pc. It may differ from `frame' if the + * current function never sets up a standard frame or hasn't + * set one up yet or has just discarded one. The last two + * cases can be guessed fairly reliably for code generated + * by gcc. The first case is too much trouble to handle in + * general because the amount of junk on the stack depends + * on the pc (the special handling of "calltrap", etc. in + * db_nextframe() works because the `next' pc is special). + */ + actframe = frame; + if (first) { + if (!have_addr) { + int instr; + + instr = db_get_value(callpc, 4, FALSE); + if ((instr & 0x00ffffff) == 0x00e58955) { + /* pushl %ebp; movl %esp, %ebp */ + actframe = (struct i386_frame *) + (SP_REGS(&ddb_regs) - 4); + } else if ((instr & 0x0000ffff) == 0x0000e589) { + /* movl %esp, %ebp */ + actframe = (struct i386_frame *) + SP_REGS(&ddb_regs); + if (ddb_regs.tf_ebp == 0) { + /* Fake caller's frame better. */ + frame = actframe; + } + } else if ((instr & 0x000000ff) == 0x000000c3) { + /* ret */ + actframe = (struct i386_frame *) + (SP_REGS(&ddb_regs) - 4); + } else if (offset == 0) { + /* Probably a symbol in assembler code. */ + actframe = (struct i386_frame *) + (SP_REGS(&ddb_regs) - 4); + } + } else if (!strcmp(name, "fork_trampoline")) { + /* + * Don't try to walk back on a stack for a + * process that hasn't actually been run yet. + */ + db_print_stack_entry(name, 0, 0, 0, callpc); + break; + } + first = FALSE; + } + + argp = &actframe->f_arg0; + narg = MAXNARG; + if (sym != NULL && db_sym_numargs(sym, &narg, argnames)) { + argnp = argnames; + } else { + narg = db_numargs(frame); + } + + db_print_stack_entry(name, narg, argnp, argp, callpc); + + if (actframe != frame) { + /* `frame' belongs to caller. */ + callpc = (db_addr_t) + db_get_value((int)&actframe->f_retaddr, 4, FALSE); + continue; + } + + db_nextframe(&frame, &callpc); + + if (INKERNEL((int) callpc) && !INKERNEL((int) frame)) { + sym = db_search_symbol(callpc, DB_STGY_ANY, &offset); + db_symbol_values(sym, &name, NULL); + db_print_stack_entry(name, 0, 0, 0, callpc); + break; + } + if (!INKERNEL((int) frame)) { + break; + } + } +} + +void +db_print_backtrace(void) +{ + register_t ebp; + + __asm __volatile("movl %%ebp, %0" : "=r" (ebp)); + db_stack_trace_cmd(ebp, 1, -1, NULL); +} + +#define DB_DRX_FUNC(reg) \ +int \ +db_ ## reg (struct db_variable *vp, db_expr_t *valuep, int op) \ +{ \ + if (op == DB_VAR_GET) \ + *valuep = r ## reg (); \ + else \ + load_ ## reg (*valuep); \ + \ + return(0); \ +} + +DB_DRX_FUNC(dr0) +DB_DRX_FUNC(dr1) +DB_DRX_FUNC(dr2) +DB_DRX_FUNC(dr3) +DB_DRX_FUNC(dr4) +DB_DRX_FUNC(dr5) +DB_DRX_FUNC(dr6) +DB_DRX_FUNC(dr7) + +static int +ki386_set_watch(int watchnum, unsigned int watchaddr, int size, int access, + struct dbreg *d) +{ + int i; + unsigned int mask; + + if (watchnum == -1) { + for (i = 0, mask = 0x3; i < 4; i++, mask <<= 2) + if ((d->dr7 & mask) == 0) + break; + if (i < 4) + watchnum = i; + else + return(-1); + } + + switch (access) { + case DBREG_DR7_EXEC: + size = 1; /* size must be 1 for an execution breakpoint */ + /* fall through */ + case DBREG_DR7_WRONLY: + case DBREG_DR7_RDWR: + break; + default: + return(-1); + } + + /* + * we can watch a 1, 2, or 4 byte sized location + */ + switch (size) { + case 1: + mask = 0x00; + break; + case 2: + mask = 0x01 << 2; + break; + case 4: + mask = 0x03 << 2; + break; + default: + return(-1); + } + + mask |= access; + + /* clear the bits we are about to affect */ + d->dr7 &= ~((0x3 << (watchnum * 2)) | (0x0f << (watchnum * 4 + 16))); + + /* set drN register to the address, N=watchnum */ + DBREG_DRX(d, watchnum) = watchaddr; + + /* enable the watchpoint */ + d->dr7 |= (0x2 << (watchnum * 2)) | (mask << (watchnum * 4 + 16)); + + return(watchnum); +} + + +int +ki386_clr_watch(int watchnum, struct dbreg *d) +{ + if (watchnum < 0 || watchnum >= 4) + return(-1); + + d->dr7 &= ~((0x3 << (watchnum * 2)) | (0x0f << (watchnum * 4 + 16))); + DBREG_DRX(d, watchnum) = 0; + + return(0); +} + + +int +db_md_set_watchpoint(db_expr_t addr, db_expr_t size) +{ + int avail, wsize; + int i; + struct dbreg d; + + fill_dbregs(NULL, &d); + + avail = 0; + for(i=0; i < 4; i++) { + if ((d.dr7 & (3 << (i * 2))) == 0) + avail++; + } + + if (avail * 4 < size) + return(-1); + + for (i=0; i < 4 && (size != 0); i++) { + if ((d.dr7 & (3 << (i * 2))) == 0) { + if (size > 4) + wsize = 4; + else + wsize = size; + if (wsize == 3) + wsize++; + ki386_set_watch(i, addr, wsize, DBREG_DR7_WRONLY, &d); + addr += wsize; + size -= wsize; + } + } + + set_dbregs(NULL, &d); + + return(0); +} + +int +db_md_clr_watchpoint(db_expr_t addr, db_expr_t size) +{ + int i; + struct dbreg d; + + fill_dbregs(NULL, &d); + + for(i=0; i<4; i++) { + if (d.dr7 & (3 << (i * 2))) { + if ((DBREG_DRX((&d), i) >= addr) && + (DBREG_DRX((&d), i) < addr + size)) + ki386_clr_watch(i, &d); + } + } + + set_dbregs(NULL, &d); + + return(0); +} + +static char * +watchtype_str(int type) +{ + switch (type) { + case DBREG_DR7_EXEC: + return "execute"; + case DBREG_DR7_RDWR: + return "read/write"; + case DBREG_DR7_WRONLY: + return "write"; + default: + return "invalid"; + } +} + +void +db_md_list_watchpoints(void) +{ + int i; + struct dbreg d; + + fill_dbregs(NULL, &d); + + db_printf("\nhardware watchpoints:\n"); + db_printf(" watch status type len address\n" + " ----- -------- ---------- --- ----------\n"); + for (i=0; i < 4; i++) { + if (d.dr7 & (0x03 << (i * 2))) { + unsigned type, len; + type = (d.dr7 >> (16 + (i * 4))) & 3; + len = (d.dr7 >> (16 + (i * 4) + 2)) & 3; + db_printf(" %-5d %-8s %10s %3d 0x%08x\n", + i, "enabled", watchtype_str(type), + len + 1, DBREG_DRX((&d), i)); + } else { + db_printf(" %-5d disabled\n", i); + } + } + + db_printf("\ndebug register values:\n"); + for (i=0; i < 8; i++) + db_printf(" dr%d 0x%08x\n", i, DBREG_DRX((&d),i)); + db_printf("\n"); +} diff --git a/sys/platform/vkernel/i386/global.s b/sys/platform/vkernel/i386/global.s index 057e282470..36fd01dcd0 100644 --- a/sys/platform/vkernel/i386/global.s +++ b/sys/platform/vkernel/i386/global.s @@ -24,7 +24,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/i386/globals.s,v 1.13.2.1 2000/05/16 06:58:06 dillon Exp $ - * $DragonFly: src/sys/platform/vkernel/i386/global.s,v 1.1 2006/12/26 20:46:10 dillon Exp $ + * $DragonFly: src/sys/platform/vkernel/i386/global.s,v 1.2 2007/01/05 22:18:18 dillon Exp $ */ #include @@ -75,10 +75,9 @@ .globl gd_CMAP1, gd_CMAP2, gd_CMAP3, gd_PMAP1 .globl gd_CADDR1, gd_CADDR2, gd_CADDR3, gd_PADDR1 .globl gd_spending, gd_ipending, gd_fpending - .globl gd_cnt, gd_private_tss + .globl gd_cnt .set gd_cpuid,globaldata + GD_CPUID - .set gd_private_tss,globaldata + GD_PRIVATE_TSS .set gd_other_cpus,globaldata + GD_OTHER_CPUS .set gd_ss_eflags,globaldata + GD_SS_EFLAGS .set gd_intr_nesting_level,globaldata + GD_INTR_NESTING_LEVEL diff --git a/sys/platform/vkernel/i386/locore.s b/sys/platform/vkernel/i386/locore.s index b3e7c2438a..9c831c77c7 100644 --- a/sys/platform/vkernel/i386/locore.s +++ b/sys/platform/vkernel/i386/locore.s @@ -31,10 +31,12 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/platform/vkernel/i386/locore.s,v 1.3 2006/12/04 18:04:01 dillon Exp $ + * $DragonFly: src/sys/platform/vkernel/i386/locore.s,v 1.4 2007/01/05 22:18:18 dillon Exp $ */ +#include #include +#include #include "assym.s" .globl kernbase @@ -57,10 +59,14 @@ NON_GPROF_ENTRY(sigcode) call *SIGF_HANDLER(%esp) /* call signal handler */ lea SIGF_UC(%esp),%eax /* get ucontext_t */ pushl %eax +#if 0 testl $PSL_VM,UC_EFLAGS(%eax) jne 9f +#endif movl UC_GS(%eax),%gs /* restore %gs */ +#if 0 9: +#endif movl $SYS_sigreturn,%eax pushl %eax /* junk to fake return addr. */ int $0x80 /* enter kernel with args */ @@ -69,6 +75,17 @@ NON_GPROF_ENTRY(sigcode) ALIGN_TEXT esigcode: +/* void reset_dbregs() */ +ENTRY(reset_dbregs) + movl $0,%eax + movl %eax,%dr7 /* disable all breapoints first */ + movl %eax,%dr0 + movl %eax,%dr1 + movl %eax,%dr2 + movl %eax,%dr3 + movl %eax,%dr6 + ret + .data .globl szsigcode szsigcode: diff --git a/sys/platform/vkernel/i386/npx.c b/sys/platform/vkernel/i386/npx.c index f8adc1c4e0..3a35bad869 100644 --- a/sys/platform/vkernel/i386/npx.c +++ b/sys/platform/vkernel/i386/npx.c @@ -36,7 +36,7 @@ * * from: @(#)npx.c 7.2 (Berkeley) 5/12/91 * $FreeBSD: src/sys/i386/isa/npx.c,v 1.80.2.3 2001/10/20 19:04:38 tegge Exp $ - * $DragonFly: src/sys/platform/vkernel/i386/npx.c,v 1.1 2007/01/02 04:24:25 dillon Exp $ + * $DragonFly: src/sys/platform/vkernel/i386/npx.c,v 1.2 2007/01/05 22:18:18 dillon Exp $ */ #include "opt_debug_npx.h" @@ -365,12 +365,16 @@ npx_intr(void *dummy) * before we entered our critical section. If that occured, the * TS bit will be set and npxthread will be NULL. */ + panic("npx_intr: not coded"); + /* XXX FP STATE FLAG MUST BE PART OF CONTEXT SUPPLIED BY REAL KERNEL */ +#if 0 if (rcr0() & CR0_TS) { KASSERT(mdcpu->gd_npxthread == NULL, ("gd_npxthread was %p with TS set!", mdcpu->gd_npxthread)); npxdna(); crit_exit(); return; } +#endif if (mdcpu->gd_npxthread == NULL) { get_mplock(); kprintf("npxintr: npxthread = %p, curthread = %p\n", @@ -396,7 +400,7 @@ npx_intr(void *dummy) * Pass exception to process. */ frame = (struct intrframe *)&dummy; /* XXX */ - if ((ISPL(frame->if_cs) == SEL_UPL) || (frame->if_eflags & PSL_VM)) { + if ((ISPL(frame->if_cs) == SEL_UPL) /*||(frame->if_eflags&PSL_VM)*/) { /* * Interrupt is essentially a trap, so we can afford to call * the SIGFPE handler (if any) as soon as the interrupt diff --git a/sys/platform/vkernel/i386/swtch.s b/sys/platform/vkernel/i386/swtch.s index 52c50c9693..52287af743 100644 --- a/sys/platform/vkernel/i386/swtch.s +++ b/sys/platform/vkernel/i386/swtch.s @@ -66,7 +66,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/i386/i386/swtch.s,v 1.89.2.10 2003/01/23 03:36:24 ps Exp $ - * $DragonFly: src/sys/platform/vkernel/i386/swtch.s,v 1.1 2007/01/02 04:24:25 dillon Exp $ + * $DragonFly: src/sys/platform/vkernel/i386/swtch.s,v 1.2 2007/01/05 22:18:18 dillon Exp $ */ #include "use_npx.h" @@ -212,12 +212,14 @@ ENTRY(cpu_exit_switch) /* * Get us out of the vmspace */ +#if 0 movl IdlePTD,%ecx movl %cr3,%eax cmpl %ecx,%eax je 1f movl %ecx,%cr3 1: +#endif movl PCPU(curthread),%ebx /* * Switch to the next thread. RET into the restore function, which @@ -282,6 +284,7 @@ ENTRY(cpu_heavy_restore) * YYY which naturally also means that the PM_ACTIVE bit had better * already have been set before we set it above, check? YYY */ +#if 0 movl %cr3,%esi movl PCB_CR3(%edx),%ecx cmpl %esi,%ecx @@ -292,6 +295,7 @@ ENTRY(cpu_heavy_restore) #endif movl %ecx,%cr3 4: +#endif /* * Clear TDF_RUNNING flag in old thread only after cleaning up * %cr3. The target thread is already protected by being TDF_RUNQ @@ -300,6 +304,7 @@ ENTRY(cpu_heavy_restore) andl $~TDF_RUNNING,TD_FLAGS(%ebx) orl $TDF_RUNNING,TD_FLAGS(%eax) +#if 0 /* * Deal with the PCB extension, restore the private tss */ @@ -344,8 +349,8 @@ ENTRY(cpu_heavy_restore) movl %eax, 4(%ebx) movl $GPROC0_SEL*8, %esi /* GSEL(entry, SEL_KPL) */ ltr %si - 3: +#endif /* * Restore general registers. */ @@ -357,6 +362,7 @@ ENTRY(cpu_heavy_restore) movl PCB_EIP(%edx),%eax movl %eax,(%esp) +#if 0 /* * Restore the user LDT if we have one */ @@ -372,12 +378,16 @@ ENTRY(cpu_heavy_restore) call set_user_ldt popl %edx 2: +#endif +#if 0 /* * Restore the user TLS if we have one */ pushl %edx call set_user_TLS popl %edx +#endif +#if 0 /* * Restore the %gs segment register, which must be done after * loading the user LDT. Since user processes can modify the @@ -388,6 +398,7 @@ ENTRY(cpu_heavy_restore) .globl cpu_switch_load_gs cpu_switch_load_gs: movl PCB_GS(%edx),%gs +#endif /* * Restore the DEBUG register state if necessary. diff --git a/sys/platform/vkernel/i386/tls.c b/sys/platform/vkernel/i386/tls.c new file mode 100644 index 0000000000..ab21686db5 --- /dev/null +++ b/sys/platform/vkernel/i386/tls.c @@ -0,0 +1,204 @@ +/* + * Copyright (c) 2003,2004 The DragonFly Project. All rights reserved. + * + * This code is derived from software contributed to The DragonFly Project + * by David Xu and Matthew Dillon + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of The DragonFly Project nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific, prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $DragonFly: src/sys/platform/vkernel/i386/tls.c,v 1.1 2007/01/05 22:18:18 dillon Exp $ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include /* pcb.h included via sys/user.h */ +#include /* CPU_prvspace */ +#include + +/* + * set a TLS descriptor and resync the GDT. A descriptor may be cleared + * by passing info=NULL and infosize=0. Note that hardware limitations may + * cause the size passed in tls_info to be approximated. + * + * Returns the value userland needs to load into %gs representing the + * TLS descriptor or -1 on error. + * + * (struct tls_info *info, int infosize, int which) + */ +int +sys_sys_set_tls_area(struct sys_set_tls_area_args *uap) +{ + struct tls_info info; + struct segment_descriptor *desc; + int error; + int i; + + /* + * Sanity checks + */ + i = uap->which; + if (i < 0 || i >= NGTLS) + return (ERANGE); + if (uap->infosize < 0) + return (EINVAL); + + /* + * Maintain forwards compatibility with future extensions. + */ + if (uap->infosize != sizeof(info)) { + bzero(&info, sizeof(info)); + error = copyin(uap->info, &info, + min(sizeof(info), uap->infosize)); + } else { + error = copyin(uap->info, &info, sizeof(info)); + } + if (error) + return (error); + if (info.size < -1) + return (EINVAL); + if (info.size > (1 << 20)) + info.size = (info.size + PAGE_MASK) & ~PAGE_MASK; + + /* + * Load the descriptor. A critical section is required in case + * an interrupt thread comes along and switches us out and then back + * in. + */ + desc = &curthread->td_tls[i]; + crit_enter(); + if (info.size == 0) { + bzero(desc, sizeof(*desc)); + } else { + desc->sd_lobase = (intptr_t)info.base; + desc->sd_hibase = (intptr_t)info.base >> 24; + desc->sd_def32 = 1; + desc->sd_type = SDT_MEMRWA; + desc->sd_dpl = SEL_UPL; + desc->sd_xx = 0; + desc->sd_p = 1; + if (info.size == -1) { + /* + * A descriptor size of -1 is a hack to map the + * whole address space. This type of mapping is + * required for direct-tls accesses of variable + * data, e.g. %gs:OFFSET where OFFSET is negative. + */ + desc->sd_lolimit = -1; + desc->sd_hilimit = -1; + desc->sd_gran = 1; + } else if (info.size >= (1 << 20)) { + /* + * A descriptor size greater then 1MB requires page + * granularity (the lo+hilimit field is only 20 bits) + */ + desc->sd_lolimit = info.size >> PAGE_SHIFT; + desc->sd_hilimit = info.size >> (PAGE_SHIFT + 16); + desc->sd_gran = 1; + } else { + /* + * Otherwise a byte-granular size is supported. + */ + desc->sd_lolimit = info.size; + desc->sd_hilimit = info.size >> 16; + desc->sd_gran = 0; + } + } + crit_exit(); + uap->sysmsg_result = GSEL(GTLS_START + i, SEL_UPL); + set_user_TLS(); + return(0); +} + +/* + * Return the specified TLS descriptor to userland. + * + * Returns the value userland needs to load into %gs representing the + * TLS descriptor or -1 on error. + * + * (struct tls_info *info, int infosize, int which) + */ +int +sys_sys_get_tls_area(struct sys_get_tls_area_args *uap) +{ + struct tls_info info; + struct segment_descriptor *desc; + int error; + int i; + + /* + * Sanity checks + */ + i = uap->which; + if (i < 0 || i >= NGTLS) + return (ERANGE); + if (uap->infosize < 0) + return (EINVAL); + + /* + * unpack the descriptor, ENOENT is returned for any descriptor + * which has not been loaded. uap->info may be NULL. + */ + desc = &curthread->td_tls[i]; + if (desc->sd_p) { + if (uap->info && uap->infosize > 0) { + bzero(&info, sizeof(info)); + info.base = (void *)(intptr_t) + ((desc->sd_hibase << 24) | desc->sd_lobase); + info.size = (desc->sd_hilimit << 16) | desc->sd_lolimit; + if (desc->sd_gran) + info.size <<= PAGE_SHIFT; + error = copyout(&info, uap->info, + min(sizeof(info), uap->infosize)); + } else { + error = 0; + } + uap->sysmsg_result = GSEL(GTLS_START + i, SEL_UPL); + } else { + error = ENOENT; + } + return(error); +} + +void +set_user_TLS(void) +{ + panic("set_user_TLS"); +} diff --git a/sys/platform/vkernel/i386/trap.c b/sys/platform/vkernel/i386/trap.c new file mode 100644 index 0000000000..5473d512d8 --- /dev/null +++ b/sys/platform/vkernel/i386/trap.c @@ -0,0 +1,1551 @@ +/*- + * Copyright (C) 1994, David Greenman + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * the University of Utah, and William Jolitz. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 + * $FreeBSD: src/sys/i386/i386/trap.c,v 1.147.2.11 2003/02/27 19:09:59 luoqi Exp $ + * $DragonFly: src/sys/platform/vkernel/i386/trap.c,v 1.1 2007/01/05 22:18:18 dillon Exp $ + */ + +/* + * 386 Trap and System call handling + */ + +#include "use_isa.h" +#include "use_npx.h" + +#include "opt_ddb.h" +#include "opt_ktrace.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef KTRACE +#include +#endif +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +#ifdef SMP + +#define MAKEMPSAFE(have_mplock) \ + if (have_mplock == 0) { \ + get_mplock(); \ + have_mplock = 1; \ + } + +#else + +#define MAKEMPSAFE(have_mplock) + +#endif + +int (*pmath_emulate) (struct trapframe *); + +extern void trap (struct trapframe frame); +extern int trapwrite (unsigned addr); +extern void syscall2 (struct trapframe frame); + +static int trap_pfault (struct trapframe *, int, vm_offset_t); +static void trap_fatal (struct trapframe *, vm_offset_t); +void dblfault_handler (void); + +#if 0 +extern inthand_t IDTVEC(syscall); +#endif + +#define MAX_TRAP_MSG 28 +static char *trap_msg[] = { + "", /* 0 unused */ + "privileged instruction fault", /* 1 T_PRIVINFLT */ + "", /* 2 unused */ + "breakpoint instruction fault", /* 3 T_BPTFLT */ + "", /* 4 unused */ + "", /* 5 unused */ + "arithmetic trap", /* 6 T_ARITHTRAP */ + "system forced exception", /* 7 T_ASTFLT */ + "", /* 8 unused */ + "general protection fault", /* 9 T_PROTFLT */ + "trace trap", /* 10 T_TRCTRAP */ + "", /* 11 unused */ + "page fault", /* 12 T_PAGEFLT */ + "", /* 13 unused */ + "alignment fault", /* 14 T_ALIGNFLT */ + "", /* 15 unused */ + "", /* 16 unused */ + "", /* 17 unused */ + "integer divide fault", /* 18 T_DIVIDE */ + "non-maskable interrupt trap", /* 19 T_NMI */ + "overflow trap", /* 20 T_OFLOW */ + "FPU bounds check fault", /* 21 T_BOUND */ + "FPU device not available", /* 22 T_DNA */ + "double fault", /* 23 T_DOUBLEFLT */ + "FPU operand fetch fault", /* 24 T_FPOPFLT */ + "invalid TSS fault", /* 25 T_TSSFLT */ + "segment not present fault", /* 26 T_SEGNPFLT */ + "stack fault", /* 27 T_STKFLT */ + "machine check trap", /* 28 T_MCHK */ +}; + +#ifdef DDB +static int ddb_on_nmi = 1; +SYSCTL_INT(_machdep, OID_AUTO, ddb_on_nmi, CTLFLAG_RW, + &ddb_on_nmi, 0, "Go to DDB on NMI"); +#endif +static int panic_on_nmi = 1; +SYSCTL_INT(_machdep, OID_AUTO, panic_on_nmi, CTLFLAG_RW, + &panic_on_nmi, 0, "Panic on NMI"); +static int fast_release; +SYSCTL_INT(_machdep, OID_AUTO, fast_release, CTLFLAG_RW, + &fast_release, 0, "Passive Release was optimal"); +static int slow_release; +SYSCTL_INT(_machdep, OID_AUTO, slow_release, CTLFLAG_RW, + &slow_release, 0, "Passive Release was nonoptimal"); +#ifdef SMP +static int syscall_mpsafe = 0; +SYSCTL_INT(_kern, OID_AUTO, syscall_mpsafe, CTLFLAG_RW, + &syscall_mpsafe, 0, "Allow MPSAFE marked syscalls to run without BGL"); +TUNABLE_INT("kern.syscall_mpsafe", &syscall_mpsafe); +static int trap_mpsafe = 0; +SYSCTL_INT(_kern, OID_AUTO, trap_mpsafe, CTLFLAG_RW, + &trap_mpsafe, 0, "Allow traps to mostly run without the BGL"); +TUNABLE_INT("kern.trap_mpsafe", &trap_mpsafe); +#endif + +MALLOC_DEFINE(M_SYSMSG, "sysmsg", "sysmsg structure"); +extern int max_sysmsg; + +/* + * Passive USER->KERNEL transition. This only occurs if we block in the + * kernel while still holding our userland priority. We have to fixup our + * priority in order to avoid potential deadlocks before we allow the system + * to switch us to another thread. + */ +static void +passive_release(struct thread *td) +{ + struct lwp *lp = td->td_lwp; + + td->td_release = NULL; + lwkt_setpri_self(TDPRI_KERN_USER); + lp->lwp_proc->p_usched->release_curproc(lp); +} + +/* + * userenter() passively intercepts the thread switch function to increase + * the thread priority from a user priority to a kernel priority, reducing + * syscall and trap overhead for the case where no switch occurs. + */ + +static __inline void +userenter(struct thread *curtd) +{ + curtd->td_release = passive_release; +} + +/* + * Handle signals, upcalls, profiling, and other AST's and/or tasks that + * must be completed before we can return to or try to return to userland. + * + * Note that td_sticks is a 64 bit quantity, but there's no point doing 64 + * arithmatic on the delta calculation so the absolute tick values are + * truncated to an integer. + */ +static void +userret(struct lwp *lp, struct trapframe *frame, int sticks) +{ + struct proc *p = lp->lwp_proc; + int sig; + + /* + * Charge system time if profiling. Note: times are in microseconds. + * This may do a copyout and block, so do it first even though it + * means some system time will be charged as user time. + */ + if (p->p_flag & P_PROFIL) { + addupc_task(p, frame->tf_eip, + (u_int)((int)lp->lwp_thread->td_sticks - sticks)); + } + +recheck: + /* + * Block here if we are in a stopped state. + */ + if (p->p_flag & P_STOPPED) { + get_mplock(); + tstop(p); + rel_mplock(); + goto recheck; + } + + /* + * Post any pending upcalls + */ + if (p->p_flag & P_UPCALLPEND) { + p->p_flag &= ~P_UPCALLPEND; + get_mplock(); + postupcall(lp); + rel_mplock(); + goto recheck; + } + + /* + * Post any pending signals + */ + if ((sig = CURSIG(p)) != 0) { + get_mplock(); + postsig(sig); + rel_mplock(); + goto recheck; + } + + /* + * block here if we are swapped out, but still process signals + * (such as SIGKILL). proc0 (the swapin scheduler) is already + * aware of our situation, we do not have to wake it up. + */ + if (p->p_flag & P_SWAPPEDOUT) { + get_mplock(); + p->p_flag |= P_SWAPWAIT; + swapin_request(); + if (p->p_flag & P_SWAPWAIT) + tsleep(p, PCATCH, "SWOUT", 0); + p->p_flag &= ~P_SWAPWAIT; + rel_mplock(); + goto recheck; + } +} + +/* + * Cleanup from userenter and any passive release that might have occured. + * We must reclaim the current-process designation before we can return + * to usermode. We also handle both LWKT and USER reschedule requests. + */ +static __inline void +userexit(struct lwp *lp) +{ + struct thread *td = lp->lwp_thread; + globaldata_t gd = td->td_gd; + +#if 0 + /* + * If a user reschedule is requested force a new process to be + * chosen by releasing the current process. Our process will only + * be chosen again if it has a considerably better priority. + */ + if (user_resched_wanted()) + lp->lwp_proc->p_usched->release_curproc(lp); +#endif + + /* + * Handle a LWKT reschedule request first. Since our passive release + * is still in place we do not have to do anything special. + */ + if (lwkt_resched_wanted()) + lwkt_switch(); + + /* + * Acquire the current process designation for this user scheduler + * on this cpu. This will also handle any user-reschedule requests. + */ + lp->lwp_proc->p_usched->acquire_curproc(lp); + /* We may have switched cpus on acquisition */ + gd = td->td_gd; + + /* + * Reduce our priority in preparation for a return to userland. If + * our passive release function was still in place, our priority was + * never raised and does not need to be reduced. + */ + if (td->td_release == NULL) + lwkt_setpri_self(TDPRI_USER_NORM); + td->td_release = NULL; + + /* + * After reducing our priority there might be other kernel-level + * LWKTs that now have a greater priority. Run them as necessary. + * We don't have to worry about losing cpu to userland because + * we still control the current-process designation and we no longer + * have a passive release function installed. + */ + if (lwkt_checkpri_self()) + lwkt_switch(); +} + +/* + * Exception, fault, and trap interface to the kernel. + * This common code is called from assembly language IDT gate entry + * routines that prepare a suitable stack frame, and restore this + * frame after the exception has been processed. + * + * This function is also called from doreti in an interlock to handle ASTs. + * For example: hardwareint->INTROUTINE->(set ast)->doreti->trap + * + * NOTE! We have to retrieve the fault address prior to obtaining the + * MP lock because get_mplock() may switch out. YYY cr2 really ought + * to be retrieved by the assembly code, not here. + * + * XXX gd_trap_nesting_level currently prevents lwkt_switch() from panicing + * if an attempt is made to switch from a fast interrupt or IPI. This is + * necessary to properly take fatal kernel traps on SMP machines if + * get_mplock() has to block. + */ + +void +trap(struct trapframe frame) +{ + struct globaldata *gd = mycpu; + struct thread *td = gd->gd_curthread; + struct lwp *lp = td->td_lwp; + struct proc *p; + int sticks = 0; + int i = 0, ucode = 0, type, code; +#ifdef SMP + int have_mplock = 0; +#endif +#ifdef INVARIANTS + int crit_count = td->td_pri & ~TDPRI_MASK; +#endif + vm_offset_t eva; + + p = td->td_proc; +#ifdef DDB + if (db_active) { + eva = (frame.tf_trapno == T_PAGEFLT ? rcr2() : 0); + ++gd->gd_trap_nesting_level; + MAKEMPSAFE(have_mplock); + trap_fatal(&frame, eva); + --gd->gd_trap_nesting_level; + goto out2; + } +#endif + + eva = 0; + ++gd->gd_trap_nesting_level; + if (frame.tf_trapno == T_PAGEFLT) { + /* + * For some Cyrix CPUs, %cr2 is clobbered by interrupts. + * This problem is worked around by using an interrupt + * gate for the pagefault handler. We are finally ready + * to read %cr2 and then must reenable interrupts. + * + * XXX this should be in the switch statement, but the + * NO_FOOF_HACK and VM86 goto and ifdefs obfuscate the + * flow of control too much for this to be obviously + * correct. + */ + eva = rcr2(); + cpu_enable_intr(); + } +#ifdef SMP + if (trap_mpsafe == 0) + MAKEMPSAFE(have_mplock); +#endif + + --gd->gd_trap_nesting_level; + + if (!(frame.tf_eflags & PSL_I)) { + /* + * Buggy application or kernel code has disabled interrupts + * and then trapped. Enabling interrupts now is wrong, but + * it is better than running with interrupts disabled until + * they are accidentally enabled later. + */ + type = frame.tf_trapno; + if (ISPL(frame.tf_cs)==SEL_UPL /*||(frame.tf_eflags&PSL_VM)*/) { + MAKEMPSAFE(have_mplock); + kprintf( + "pid %ld (%s): trap %d with interrupts disabled\n", + (long)curproc->p_pid, curproc->p_comm, type); + } else if (type != T_BPTFLT && type != T_TRCTRAP) { + /* + * XXX not quite right, since this may be for a + * multiple fault in user mode. + */ + MAKEMPSAFE(have_mplock); + kprintf("kernel trap %d with interrupts disabled\n", + type); + } + cpu_enable_intr(); + } + +#if defined(I586_CPU) && !defined(NO_F00F_HACK) +restart: +#endif + type = frame.tf_trapno; + code = frame.tf_err; + +#if 0 + if (in_vm86call) { + ASSERT_MP_LOCK_HELD(curthread); + if (frame.tf_eflags & PSL_VM && + (type == T_PROTFLT || type == T_STKFLT)) { +#ifdef SMP + KKASSERT(td->td_mpcount > 0); +#endif + i = vm86_emulate((struct vm86frame *)&frame); +#ifdef SMP + KKASSERT(td->td_mpcount > 0); +#endif + if (i != 0) { + /* + * returns to original process + */ +#ifdef SMP + vm86_trap((struct vm86frame *)&frame, + have_mplock); +#else + vm86_trap((struct vm86frame *)&frame, 0); +#endif + KKASSERT(0); /* NOT REACHED */ + } + goto out2; + } + switch (type) { + /* + * these traps want either a process context, or + * assume a normal userspace trap. + */ + case T_PROTFLT: + case T_SEGNPFLT: + trap_fatal(&frame, eva); + goto out2; + case T_TRCTRAP: + type = T_BPTFLT; /* kernel breakpoint */ + /* FALL THROUGH */ + } + goto kernel_trap; /* normal kernel trap handling */ + } +#endif + + if ((ISPL(frame.tf_cs) == SEL_UPL) /*||(frame.tf_eflags & PSL_VM)*/) { + /* user trap */ + + userenter(td); + + sticks = (int)td->td_sticks; + lp->lwp_md.md_regs = &frame; + + switch (type) { + case T_PRIVINFLT: /* privileged instruction fault */ + ucode = type; + i = SIGILL; + break; + + case T_BPTFLT: /* bpt instruction fault */ + case T_TRCTRAP: /* trace trap */ + frame.tf_eflags &= ~PSL_T; + i = SIGTRAP; + break; + + case T_ARITHTRAP: /* arithmetic trap */ + ucode = code; + i = SIGFPE; + break; + + case T_ASTFLT: /* Allow process switch */ + mycpu->gd_cnt.v_soft++; + if (mycpu->gd_reqflags & RQF_AST_OWEUPC) { + atomic_clear_int_nonlocked(&mycpu->gd_reqflags, + RQF_AST_OWEUPC); + addupc_task(p, p->p_prof.pr_addr, + p->p_prof.pr_ticks); + } + goto out; + + /* + * The following two traps can happen in + * vm86 mode, and, if so, we want to handle + * them specially. + */ + case T_PROTFLT: /* general protection fault */ + case T_STKFLT: /* stack fault */ +#if 0 + if (frame.tf_eflags & PSL_VM) { + i = vm86_emulate((struct vm86frame *)&frame); + if (i == 0) + goto out; + break; + } +#endif + /* FALL THROUGH */ + + case T_SEGNPFLT: /* segment not present fault */ + case T_TSSFLT: /* invalid TSS fault */ + case T_DOUBLEFLT: /* double fault */ + default: + ucode = code + BUS_SEGM_FAULT ; + i = SIGBUS; + break; + + case T_PAGEFLT: /* page fault */ + MAKEMPSAFE(have_mplock); + i = trap_pfault(&frame, TRUE, eva); + if (i == -1) + goto out; +#if defined(I586_CPU) && !defined(NO_F00F_HACK) + if (i == -2) + goto restart; +#endif + if (i == 0) + goto out; + + ucode = T_PAGEFLT; + + /* + * The code is lost because tf_err is overwritten + * with the fault address. Store it in the upper + * 16 bits of tf_trapno for vkernel consumption. + */ + if (p->p_vkernel && p->p_vkernel->vk_current) { + frame.tf_trapno |= (code << 16); + } + break; + + case T_DIVIDE: /* integer divide fault */ + ucode = FPE_INTDIV; + i = SIGFPE; + break; + +#if NISA > 0 + case T_NMI: + MAKEMPSAFE(have_mplock); + /* machine/parity/power fail/"kitchen sink" faults */ + if (isa_nmi(code) == 0) { +#ifdef DDB + /* + * NMI can be hooked up to a pushbutton + * for debugging. + */ + if (ddb_on_nmi) { + kprintf ("NMI ... going to debugger\n"); + kdb_trap (type, 0, &frame); + } +#endif /* DDB */ + goto out2; + } else if (panic_on_nmi) + panic("NMI indicates hardware failure"); + break; +#endif /* NISA > 0 */ + + case T_OFLOW: /* integer overflow fault */ + ucode = FPE_INTOVF; + i = SIGFPE; + break; + + case T_BOUND: /* bounds check fault */ + ucode = FPE_FLTSUB; + i = SIGFPE; + break; + + case T_DNA: +#if NNPX > 0 + /* + * The kernel may have switched out the FP unit's + * state, causing the user process to take a fault + * when it tries to use the FP unit. Restore the + * state here + */ + if (npxdna()) + goto out; +#endif + if (!pmath_emulate) { + i = SIGFPE; + ucode = FPE_FPU_NP_TRAP; + break; + } + i = (*pmath_emulate)(&frame); + if (i == 0) { + if (!(frame.tf_eflags & PSL_T)) + goto out2; + frame.tf_eflags &= ~PSL_T; + i = SIGTRAP; + } + /* else ucode = emulator_only_knows() XXX */ + break; + + case T_FPOPFLT: /* FPU operand fetch fault */ + ucode = T_FPOPFLT; + i = SIGILL; + break; + + case T_XMMFLT: /* SIMD floating-point exception */ + ucode = 0; /* XXX */ + i = SIGFPE; + break; + } + } else { +#if 0 +kernel_trap: +#endif + /* kernel trap */ + + switch (type) { + case T_PAGEFLT: /* page fault */ + MAKEMPSAFE(have_mplock); + trap_pfault(&frame, FALSE, eva); + goto out2; + + case T_DNA: +#if NNPX > 0 + /* + * The kernel may be using npx for copying or other + * purposes. + */ + if (npxdna()) + goto out2; +#endif + break; + + case T_PROTFLT: /* general protection fault */ + case T_SEGNPFLT: /* segment not present fault */ + /* + * Invalid segment selectors and out of bounds + * %eip's and %esp's can be set up in user mode. + * This causes a fault in kernel mode when the + * kernel tries to return to user mode. We want + * to get this fault so that we can fix the + * problem here and not have to check all the + * selectors and pointers when the user changes + * them. + */ +#define MAYBE_DORETI_FAULT(where, whereto) \ + do { \ + if (frame.tf_eip == (int)where) { \ + frame.tf_eip = (int)whereto; \ + goto out2; \ + } \ + } while (0) + +#if 0 + /* + * Since we don't save %gs across an interrupt + * frame this check must occur outside the intr + * nesting level check. + */ + if (frame.tf_eip == (int)cpu_switch_load_gs) { + td->td_pcb->pcb_gs = 0; + MAKEMPSAFE(have_mplock); + ksignal(p, SIGBUS); + goto out2; + } +#endif + if (mycpu->gd_intr_nesting_level == 0) { +#if 0 + /* + * Invalid %fs's and %gs's can be created using + * procfs or PT_SETREGS or by invalidating the + * underlying LDT entry. This causes a fault + * in kernel mode when the kernel attempts to + * switch contexts. Lose the bad context + * (XXX) so that we can continue, and generate + * a signal. + */ + MAYBE_DORETI_FAULT(doreti_iret, + doreti_iret_fault); + MAYBE_DORETI_FAULT(doreti_popl_ds, + doreti_popl_ds_fault); + MAYBE_DORETI_FAULT(doreti_popl_es, + doreti_popl_es_fault); + MAYBE_DORETI_FAULT(doreti_popl_fs, + doreti_popl_fs_fault); +#endif + if (td->td_pcb->pcb_onfault) { + frame.tf_eip = + (register_t)td->td_pcb->pcb_onfault; + goto out2; + } + } + break; + + case T_TSSFLT: + /* + * PSL_NT can be set in user mode and isn't cleared + * automatically when the kernel is entered. This + * causes a TSS fault when the kernel attempts to + * `iret' because the TSS link is uninitialized. We + * want to get this fault so that we can fix the + * problem here and not every time the kernel is + * entered. + */ + if (frame.tf_eflags & PSL_NT) { + frame.tf_eflags &= ~PSL_NT; + goto out2; + } + break; + + case T_TRCTRAP: /* trace trap */ +#if 0 + if (frame.tf_eip == (int)IDTVEC(syscall)) { + /* + * We've just entered system mode via the + * syscall lcall. Continue single stepping + * silently until the syscall handler has + * saved the flags. + */ + goto out2; + } + if (frame.tf_eip == (int)IDTVEC(syscall) + 1) { + /* + * The syscall handler has now saved the + * flags. Stop single stepping it. + */ + frame.tf_eflags &= ~PSL_T; + goto out2; + } +#endif +#if 0 + /* + * Ignore debug register trace traps due to + * accesses in the user's address space, which + * can happen under several conditions such as + * if a user sets a watchpoint on a buffer and + * then passes that buffer to a system call. + * We still want to get TRCTRAPS for addresses + * in kernel space because that is useful when + * debugging the kernel. + */ + if (user_dbreg_trap()) { + /* + * Reset breakpoint bits because the + * processor doesn't + */ + load_dr6(rdr6() & 0xfffffff0); + goto out2; + } +#endif + /* + * Fall through (TRCTRAP kernel mode, kernel address) + */ + case T_BPTFLT: + /* + * If DDB is enabled, let it handle the debugger trap. + * Otherwise, debugger traps "can't happen". + */ +#ifdef DDB + MAKEMPSAFE(have_mplock); + if (kdb_trap (type, 0, &frame)) + goto out2; +#endif + break; + +#if NISA > 0 + case T_NMI: + MAKEMPSAFE(have_mplock); +#ifdef POWERFAIL_NMI +#ifndef TIMER_FREQ +# define TIMER_FREQ 1193182 +#endif + handle_powerfail: + { + static unsigned lastalert = 0; + + if(time_second - lastalert > 10) + { + log(LOG_WARNING, "NMI: power fail\n"); + sysbeep(TIMER_FREQ/880, hz); + lastalert = time_second; + } + /* YYY mp count */ + goto out2; + } +#else /* !POWERFAIL_NMI */ + /* machine/parity/power fail/"kitchen sink" faults */ + if (isa_nmi(code) == 0) { +#ifdef DDB + /* + * NMI can be hooked up to a pushbutton + * for debugging. + */ + if (ddb_on_nmi) { + kprintf ("NMI ... going to debugger\n"); + kdb_trap (type, 0, &frame); + } +#endif /* DDB */ + goto out2; + } else if (panic_on_nmi == 0) + goto out2; + /* FALL THROUGH */ +#endif /* POWERFAIL_NMI */ +#endif /* NISA > 0 */ + } + + MAKEMPSAFE(have_mplock); + trap_fatal(&frame, eva); + goto out2; + } + + /* + * Virtual kernel intercept - if the fault is directly related to a + * VM context managed by a virtual kernel then let the virtual kernel + * handle it. + */ + if (p->p_vkernel && p->p_vkernel->vk_current) { + vkernel_trap(p, &frame); + goto out; + } + + /* + * Translate fault for emulators (e.g. Linux) + */ + if (*p->p_sysent->sv_transtrap) + i = (*p->p_sysent->sv_transtrap)(i, type); + + MAKEMPSAFE(have_mplock); + trapsignal(p, i, ucode); + +#ifdef DEBUG + if (type <= MAX_TRAP_MSG) { + uprintf("fatal process exception: %s", + trap_msg[type]); + if ((type == T_PAGEFLT) || (type == T_PROTFLT)) + uprintf(", fault VA = 0x%lx", (u_long)eva); + uprintf("\n"); + } +#endif + +out: +#ifdef SMP + if (ISPL(frame.tf_cs) == SEL_UPL) + KASSERT(td->td_mpcount == have_mplock, ("badmpcount trap/end from %p", (void *)frame.tf_eip)); +#endif + userret(lp, &frame, sticks); + userexit(lp); +out2: ; +#ifdef SMP + if (have_mplock) + rel_mplock(); +#endif +#ifdef INVARIANTS + KASSERT(crit_count == (td->td_pri & ~TDPRI_MASK), + ("syscall: critical section count mismatch! %d/%d", + crit_count / TDPRI_CRIT, td->td_pri / TDPRI_CRIT)); +#endif +} + +#ifdef notyet +/* + * This version doesn't allow a page fault to user space while + * in the kernel. The rest of the kernel needs to be made "safe" + * before this can be used. I think the only things remaining + * to be made safe is the process tracing/debugging code. + */ +static int +trap_pfault(struct trapframe *frame, int usermode, vm_offset_t eva) +{ + vm_offset_t va; + struct vmspace *vm = NULL; + vm_map_t map = 0; + int rv = 0; + vm_prot_t ftype; + thread_t td = curthread; + struct proc *p = td->td_proc; /* may be NULL */ + + if (frame->tf_err & PGEX_W) + ftype = VM_PROT_WRITE; + else + ftype = VM_PROT_READ; + + va = trunc_page(eva); + if (va < KvaStart) { + vm_offset_t v; + vm_page_t mpte; + + if (p == NULL || + (!usermode && va < VM_MAX_USER_ADDRESS && + (td->td_gd->gd_intr_nesting_level != 0 || + td->td_pcb->pcb_onfault == NULL))) { + trap_fatal(frame, eva); + return (-1); + } + + /* + * This is a fault on non-kernel virtual memory. + * vm is initialized above to NULL. If curproc is NULL + * or curproc->p_vmspace is NULL the fault is fatal. + */ + vm = p->p_vmspace; + if (vm == NULL) + goto nogo; + + map = &vm->vm_map; + + /* + * Keep swapout from messing with us during this + * critical time. + */ + ++p->p_lock; + + /* + * Grow the stack if necessary + */ + /* grow_stack returns false only if va falls into + * a growable stack region and the stack growth + * fails. It returns true if va was not within + * a growable stack region, or if the stack + * growth succeeded. + */ + if (!grow_stack (p, va)) { + rv = KERN_FAILURE; + --p->p_lock; + goto nogo; + } + + /* Fault in the user page: */ + rv = vm_fault(map, va, ftype, + (ftype & VM_PROT_WRITE) ? VM_FAULT_DIRTY + : VM_FAULT_NORMAL); + + --p->p_lock; + } else { + /* + * Don't allow user-mode faults in kernel address space. + */ + if (usermode) + goto nogo; + + /* + * Since we know that kernel virtual address addresses + * always have pte pages mapped, we just have to fault + * the page. + */ + rv = vm_fault(&kernel_map, va, ftype, VM_FAULT_NORMAL); + } + + if (rv == KERN_SUCCESS) + return (0); +nogo: + if (!usermode) { + if (mtd->td_gd->gd_intr_nesting_level == 0 && + td->td_pcb->pcb_onfault) { + frame->tf_eip = (register_t)td->td_pcb->pcb_onfault; + return (0); + } + trap_fatal(frame, eva); + return (-1); + } + + /* kludge to pass faulting virtual address to sendsig */ + frame->tf_err = eva; + + return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV); +} +#endif + +int +trap_pfault(struct trapframe *frame, int usermode, vm_offset_t eva) +{ + vm_offset_t va; + struct vmspace *vm = NULL; + vm_map_t map = 0; + int rv = 0; + vm_prot_t ftype; + thread_t td = curthread; + struct proc *p = td->td_proc; + + va = trunc_page(eva); + if (va >= KERNBASE) { + /* + * Don't allow user-mode faults in kernel address space. + * An exception: if the faulting address is the invalid + * instruction entry in the IDT, then the Intel Pentium + * F00F bug workaround was triggered, and we need to + * treat it is as an illegal instruction, and not a page + * fault. + */ + if (usermode) + goto nogo; + + map = &kernel_map; + } else { + /* + * This is a fault on non-kernel virtual memory. + * vm is initialized above to NULL. If curproc is NULL + * or curproc->p_vmspace is NULL the fault is fatal. + */ + if (p != NULL) + vm = p->p_vmspace; + + if (vm == NULL) + goto nogo; + + map = &vm->vm_map; + } + + if (frame->tf_err & PGEX_W) + ftype = VM_PROT_WRITE; + else + ftype = VM_PROT_READ; + + if (map != &kernel_map) { + /* + * Keep swapout from messing with us during this + * critical time. + */ + ++p->p_lock; + + /* + * Grow the stack if necessary + */ + /* grow_stack returns false only if va falls into + * a growable stack region and the stack growth + * fails. It returns true if va was not within + * a growable stack region, or if the stack + * growth succeeded. + */ + if (!grow_stack (p, va)) { + rv = KERN_FAILURE; + --p->p_lock; + goto nogo; + } + + /* Fault in the user page: */ + rv = vm_fault(map, va, ftype, + (ftype & VM_PROT_WRITE) ? VM_FAULT_DIRTY + : VM_FAULT_NORMAL); + + --p->p_lock; + } else { + /* + * Don't have to worry about process locking or stacks in the kernel. + */ + rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL); + } + + if (rv == KERN_SUCCESS) + return (0); +nogo: + if (!usermode) { + if (td->td_gd->gd_intr_nesting_level == 0 && + td->td_pcb->pcb_onfault) { + frame->tf_eip = (register_t)td->td_pcb->pcb_onfault; + return (0); + } + trap_fatal(frame, eva); + return (-1); + } + + /* kludge to pass faulting virtual address to sendsig */ + frame->tf_err = eva; + + return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV); +} + +static void +trap_fatal(struct trapframe *frame, vm_offset_t eva) +{ + int code, type, ss, esp; + + code = frame->tf_err; + type = frame->tf_trapno; + + if (type <= MAX_TRAP_MSG) + kprintf("\n\nFatal trap %d: %s while in %s mode\n", + type, trap_msg[type], + /*frame->tf_eflags & PSL_VM ? "vm86" :*/ + ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel"); +#ifdef SMP + /* three separate prints in case of a trap on an unmapped page */ + kprintf("mp_lock = %08x; ", mp_lock); + kprintf("cpuid = %d; ", mycpu->gd_cpuid); + kprintf("lapic.id = %08x\n", lapic.id); +#endif + if (type == T_PAGEFLT) { + kprintf("fault virtual address = 0x%x\n", eva); + kprintf("fault code = %s %s, %s\n", + code & PGEX_U ? "user" : "supervisor", + code & PGEX_W ? "write" : "read", + code & PGEX_P ? "protection violation" : "page not present"); + } + kprintf("instruction pointer = 0x%x:0x%x\n", + frame->tf_cs & 0xffff, frame->tf_eip); + if ((ISPL(frame->tf_cs) == SEL_UPL) /*||(frame->tf_eflags&PSL_VM)*/) { + ss = frame->tf_ss & 0xffff; + esp = frame->tf_esp; + } else { + ss = GSEL(GDATA_SEL, SEL_KPL); + esp = (int)&frame->tf_esp; + } + kprintf("stack pointer = 0x%x:0x%x\n", ss, esp); + kprintf("frame pointer = 0x%x:0x%x\n", ss, frame->tf_ebp); + kprintf("processor eflags = "); + if (frame->tf_eflags & PSL_T) + kprintf("trace trap, "); + if (frame->tf_eflags & PSL_I) + kprintf("interrupt enabled, "); + if (frame->tf_eflags & PSL_NT) + kprintf("nested task, "); + if (frame->tf_eflags & PSL_RF) + kprintf("resume, "); +#if 0 + if (frame->tf_eflags & PSL_VM) + kprintf("vm86, "); +#endif + kprintf("IOPL = %d\n", (frame->tf_eflags & PSL_IOPL) >> 12); + kprintf("current process = "); + if (curproc) { + kprintf("%lu (%s)\n", + (u_long)curproc->p_pid, curproc->p_comm ? + curproc->p_comm : ""); + } else { + kprintf("Idle\n"); + } + kprintf("current thread = pri %d ", curthread->td_pri); + if (curthread->td_pri >= TDPRI_CRIT) + kprintf("(CRIT)"); + kprintf("\n"); +#ifdef SMP +/** + * XXX FIXME: + * we probably SHOULD have stopped the other CPUs before now! + * another CPU COULD have been touching cpl at this moment... + */ + kprintf(" <- SMP: XXX"); +#endif + kprintf("\n"); + +#ifdef KDB + if (kdb_trap(&psl)) + return; +#endif +#ifdef DDB + if ((debugger_on_panic || db_active) && kdb_trap(type, code, frame)) + return; +#endif + kprintf("trap number = %d\n", type); + if (type <= MAX_TRAP_MSG) + panic("%s", trap_msg[type]); + else + panic("unknown/reserved trap"); +} + +/* + * Double fault handler. Called when a fault occurs while writing + * a frame for a trap/exception onto the stack. This usually occurs + * when the stack overflows (such is the case with infinite recursion, + * for example). + * + * XXX Note that the current PTD gets replaced by IdlePTD when the + * task switch occurs. This means that the stack that was active at + * the time of the double fault is not available at unless + * the machine was idle when the double fault occurred. The downside + * of this is that "trace " in ddb won't work. + */ +void +dblfault_handler(void) +{ + struct mdglobaldata *gd = mdcpu; + + kprintf("\nFatal double fault:\n"); + kprintf("eip = 0x%x\n", gd->gd_common_tss.tss_eip); + kprintf("esp = 0x%x\n", gd->gd_common_tss.tss_esp); + kprintf("ebp = 0x%x\n", gd->gd_common_tss.tss_ebp); +#ifdef SMP + /* three separate prints in case of a trap on an unmapped page */ + kprintf("mp_lock = %08x; ", mp_lock); + kprintf("cpuid = %d; ", mycpu->gd_cpuid); + kprintf("lapic.id = %08x\n", lapic.id); +#endif + panic("double fault"); +} + +/* + * Compensate for 386 brain damage (missing URKR). + * This is a little simpler than the pagefault handler in trap() because + * it the page tables have already been faulted in and high addresses + * are thrown out early for other reasons. + */ +int +trapwrite(unsigned addr) +{ + struct proc *p; + vm_offset_t va; + struct vmspace *vm; + int rv; + + va = trunc_page((vm_offset_t)addr); + /* + * XXX - MAX is END. Changed > to >= for temp. fix. + */ + if (va >= VM_MAX_USER_ADDRESS) + return (1); + + p = curproc; + vm = p->p_vmspace; + + ++p->p_lock; + + if (!grow_stack (p, va)) { + --p->p_lock; + return (1); + } + + /* + * fault the data page + */ + rv = vm_fault(&vm->vm_map, va, VM_PROT_WRITE, VM_FAULT_DIRTY); + + --p->p_lock; + + if (rv != KERN_SUCCESS) + return 1; + + return (0); +} + +/* + * syscall2 - MP aware system call request C handler + * + * A system call is essentially treated as a trap except that the + * MP lock is not held on entry or return. We are responsible for + * obtaining the MP lock if necessary and for handling ASTs + * (e.g. a task switch) prior to return. + * + * In general, only simple access and manipulation of curproc and + * the current stack is allowed without having to hold MP lock. + * + * MPSAFE - note that large sections of this routine are run without + * the MP lock. + */ + +void +syscall2(struct trapframe frame) +{ + struct thread *td = curthread; + struct proc *p = td->td_proc; + struct lwp *lp = td->td_lwp; + caddr_t params; + struct sysent *callp; + register_t orig_tf_eflags; + int sticks; + int error; + int narg; +#ifdef INVARIANTS + int crit_count = td->td_pri & ~TDPRI_MASK; +#endif +#ifdef SMP + int have_mplock = 0; +#endif + u_int code; + union sysunion args; + +#ifdef DIAGNOSTIC + if (ISPL(frame.tf_cs) != SEL_UPL) { + get_mplock(); + panic("syscall"); + /* NOT REACHED */ + } +#endif + +#ifdef SMP + KASSERT(td->td_mpcount == 0, ("badmpcount syscall2 from %p", (void *)frame.tf_eip)); + if (syscall_mpsafe == 0) + MAKEMPSAFE(have_mplock); +#endif + userenter(td); /* lazy raise our priority */ + + /* + * Misc + */ + sticks = (int)td->td_sticks; + orig_tf_eflags = frame.tf_eflags; + + /* + * Virtual kernel intercept - if a VM context managed by a virtual + * kernel issues a system call the virtual kernel handles it, not us. + * Restore the virtual kernel context and return from its system + * call. The current frame is copied out to the virtual kernel. + */ + if (p->p_vkernel && p->p_vkernel->vk_current) { + error = vkernel_trap(p, &frame); + frame.tf_eax = error; + if (error) + frame.tf_eflags |= PSL_C; + error = EJUSTRETURN; + goto out; + } + + /* + * Get the system call parameters and account for time + */ + lp->lwp_md.md_regs = &frame; + params = (caddr_t)frame.tf_esp + sizeof(int); + code = frame.tf_eax; + + if (p->p_sysent->sv_prepsyscall) { + (*p->p_sysent->sv_prepsyscall)( + &frame, (int *)(&args.nosys.sysmsg + 1), + &code, ¶ms); + } else { + /* + * Need to check if this is a 32 bit or 64 bit syscall. + * fuword is MP aware. + */ + if (code == SYS_syscall) { + /* + * Code is first argument, followed by actual args. + */ + code = fuword(params); + params += sizeof(int); + } else if (code == SYS___syscall) { + /* + * Like syscall, but code is a quad, so as to maintain + * quad alignment for the rest of the arguments. + */ + code = fuword(params); + params += sizeof(quad_t); + } + } + + code &= p->p_sysent->sv_mask; + if (code >= p->p_sysent->sv_size) + callp = &p->p_sysent->sv_table[0]; + else + callp = &p->p_sysent->sv_table[code]; + + narg = callp->sy_narg & SYF_ARGMASK; + + /* + * copyin is MP aware, but the tracing code is not + */ + if (narg && params) { + error = copyin(params, (caddr_t)(&args.nosys.sysmsg + 1), + narg * sizeof(register_t)); + if (error) { +#ifdef KTRACE + if (KTRPOINT(td, KTR_SYSCALL)) { + MAKEMPSAFE(have_mplock); + + ktrsyscall(p, code, narg, + (void *)(&args.nosys.sysmsg + 1)); + } +#endif + goto bad; + } + } + +#ifdef KTRACE + if (KTRPOINT(td, KTR_SYSCALL)) { + MAKEMPSAFE(have_mplock); + ktrsyscall(p, code, narg, (void *)(&args.nosys.sysmsg + 1)); + } +#endif + + /* + * For traditional syscall code edx is left untouched when 32 bit + * results are returned. Since edx is loaded from fds[1] when the + * system call returns we pre-set it here. + */ + args.sysmsg_fds[0] = 0; + args.sysmsg_fds[1] = frame.tf_edx; + + /* + * The syscall might manipulate the trap frame. If it does it + * will probably return EJUSTRETURN. + */ + args.sysmsg_frame = &frame; + + STOPEVENT(p, S_SCE, narg); /* MP aware */ + +#ifdef SMP + /* + * Try to run the syscall without the MP lock if the syscall + * is MP safe. We have to obtain the MP lock no matter what if + * we are ktracing + */ + if ((callp->sy_narg & SYF_MPSAFE) == 0) + MAKEMPSAFE(have_mplock); +#endif + + error = (*callp->sy_call)(&args); + +out: + /* + * MP SAFE (we may or may not have the MP lock at this point) + */ + switch (error) { + case 0: + /* + * Reinitialize proc pointer `p' as it may be different + * if this is a child returning from fork syscall. + */ + p = curproc; + lp = curthread->td_lwp; + frame.tf_eax = args.sysmsg_fds[0]; + frame.tf_edx = args.sysmsg_fds[1]; + frame.tf_eflags &= ~PSL_C; + break; + case ERESTART: + /* + * Reconstruct pc, assuming lcall $X,y is 7 bytes, + * int 0x80 is 2 bytes. We saved this in tf_err. + */ + frame.tf_eip -= frame.tf_err; + break; + case EJUSTRETURN: + break; + case EASYNC: + panic("Unexpected EASYNC return value (for now)"); + default: +bad: + if (p->p_sysent->sv_errsize) { + if (error >= p->p_sysent->sv_errsize) + error = -1; /* XXX */ + else + error = p->p_sysent->sv_errtbl[error]; + } + frame.tf_eax = error; + frame.tf_eflags |= PSL_C; + break; + } + + /* + * Traced syscall. trapsignal() is not MP aware. + */ + if ((orig_tf_eflags & PSL_T) /*&& !(orig_tf_eflags & PSL_VM)*/) { + MAKEMPSAFE(have_mplock); + frame.tf_eflags &= ~PSL_T; + trapsignal(p, SIGTRAP, 0); + } + + /* + * Handle reschedule and other end-of-syscall issues + */ + userret(lp, &frame, sticks); + +#ifdef KTRACE + if (KTRPOINT(td, KTR_SYSRET)) { + MAKEMPSAFE(have_mplock); + ktrsysret(p, code, error, args.sysmsg_result); + } +#endif + + /* + * This works because errno is findable through the + * register set. If we ever support an emulation where this + * is not the case, this code will need to be revisited. + */ + STOPEVENT(p, S_SCX, code); + + userexit(lp); +#ifdef SMP + /* + * Release the MP lock if we had to get it + */ + KASSERT(td->td_mpcount == have_mplock, + ("badmpcount syscall2/end from %p", (void *)frame.tf_eip)); + if (have_mplock) + rel_mplock(); +#endif +#ifdef INVARIANTS + KASSERT(crit_count == (td->td_pri & ~TDPRI_MASK), + ("syscall: critical section count mismatch! %d/%d", + crit_count / TDPRI_CRIT, td->td_pri / TDPRI_CRIT)); +#endif +} + +/* + * Simplified back end of syscall(), used when returning from fork() + * directly into user mode. MP lock is held on entry and should be + * released on return. This code will return back into the fork + * trampoline code which then runs doreti. + */ +void +fork_return(struct lwp *lp, struct trapframe frame) +{ + struct proc *p = lp->lwp_proc; + + frame.tf_eax = 0; /* Child returns zero */ + frame.tf_eflags &= ~PSL_C; /* success */ + frame.tf_edx = 1; + + /* + * Newly forked processes are given a kernel priority. We have to + * adjust the priority to a normal user priority and fake entry + * into the kernel (call userenter()) to install a passive release + * function just in case userret() decides to stop the process. This + * can occur when ^Z races a fork. If we do not install the passive + * release function the current process designation will not be + * released when the thread goes to sleep. + */ + lwkt_setpri_self(TDPRI_USER_NORM); + userenter(lp->lwp_thread); + userret(lp, &frame, 0); +#ifdef KTRACE + if (KTRPOINT(lp->lwp_thread, KTR_SYSRET)) + ktrsysret(p, SYS_fork, 0, 0); +#endif + p->p_flag |= P_PASSIVE_ACQ; + userexit(lp); + p->p_flag &= ~P_PASSIVE_ACQ; +#ifdef SMP + KKASSERT(lp->lwp_thread->td_mpcount == 1); + rel_mplock(); +#endif +} diff --git a/sys/platform/vkernel/include/md_var.h b/sys/platform/vkernel/i386/userldt.c similarity index 78% copy from sys/platform/vkernel/include/md_var.h copy to sys/platform/vkernel/i386/userldt.c index 628dac29f2..c2f3ad6608 100644 --- a/sys/platform/vkernel/include/md_var.h +++ b/sys/platform/vkernel/i386/userldt.c @@ -31,29 +31,30 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/platform/vkernel/include/md_var.h,v 1.2 2007/01/02 04:24:26 dillon Exp $ + * $DragonFly: src/sys/platform/vkernel/i386/userldt.c,v 1.1 2007/01/05 22:18:18 dillon Exp $ */ -#ifndef _MACHINE_MD_VAR_H_ -#define _MACHINE_MD_VAR_H_ - -#ifndef _SYS_TYPES_H_ #include -#endif -#ifndef _SYS_VKERNEL_H_ -#include -#endif - -extern char sigcode[]; -extern int szsigcode; -extern vpte_t *KernelPTA; -extern vpte_t *KernelPTD; -extern vm_offset_t crashdumpmap; +#include +#include +#include +#include -struct mdglobaldata; +void +set_user_ldt (struct pcb *pcb) +{ + panic("set_user_ldt"); +} -void cpu_gdinit (struct mdglobaldata *gd, int cpu); -void cpu_idle_restore (void); +struct pcb_ldt * +user_ldt_alloc (struct pcb *pcb, int len) +{ + panic("user_ldt_alloc"); +} -#endif +void +user_ldt_free (struct pcb *pcb) +{ + panic("user_ldt_free"); +} diff --git a/sys/platform/vkernel/i386/vm_machdep.c b/sys/platform/vkernel/i386/vm_machdep.c new file mode 100644 index 0000000000..5ddfe725f6 --- /dev/null +++ b/sys/platform/vkernel/i386/vm_machdep.c @@ -0,0 +1,398 @@ +/*- + * Copyright (c) 1982, 1986 The Regents of the University of California. + * Copyright (c) 1989, 1990 William Jolitz + * Copyright (c) 1994 John Dyson + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * the Systems Programming Group of the University of Utah Computer + * Science Department, and William Jolitz. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91 + * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$ + * $FreeBSD: src/sys/i386/i386/vm_machdep.c,v 1.132.2.9 2003/01/25 19:02:23 dillon Exp $ + * $DragonFly: src/sys/platform/vkernel/i386/vm_machdep.c,v 1.1 2007/01/05 22:18:18 dillon Exp $ + */ + +#include "use_npx.h" +#include "use_isa.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include /* npxthread */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +#include +#include + +char machine[] = MACHINE_CPU; +SYSCTL_STRING(_hw, HW_MACHINE, machine, CTLFLAG_RD, + machine, 0, "Machine class"); + +/* + * Finish a fork operation, with lwp lp2 nearly set up. + * Copy and update the pcb, set up the stack so that the child + * ready to run and return to user mode. + */ +void +cpu_fork(struct lwp *lp1, struct lwp *lp2, int flags) +{ + struct pcb *pcb2; + + if ((flags & RFPROC) == 0) { + if ((flags & RFMEM) == 0) { + /* unshare user LDT */ + struct pcb *pcb1 = lp1->lwp_thread->td_pcb; + struct pcb_ldt *pcb_ldt = pcb1->pcb_ldt; + if (pcb_ldt && pcb_ldt->ldt_refcnt > 1) { + pcb_ldt = user_ldt_alloc(pcb1,pcb_ldt->ldt_len); + user_ldt_free(pcb1); + pcb1->pcb_ldt = pcb_ldt; + set_user_ldt(pcb1); + } + } + return; + } + +#if NNPX > 0 + /* Ensure that lp1's pcb is up to date. */ + if (mdcpu->gd_npxthread == lp1->lwp_thread) + npxsave(lp1->lwp_thread->td_savefpu); +#endif + + /* + * Copy lp1's PCB. This really only applies to the + * debug registers and FP state, but its faster to just copy the + * whole thing. Because we only save the PCB at switchout time, + * the register state (including pcb_gs) may not be current. + */ + pcb2 = lp2->lwp_thread->td_pcb; + *pcb2 = *lp1->lwp_thread->td_pcb; + + /* + * Create a new fresh stack for the new process. + * Copy the trap frame for the return to user mode as if from a + * syscall. This copies the user mode register values. The + * 16 byte offset saves space for vm86, and must match + * common_tss.esp0 (kernel stack pointer on entry from user mode) + * + * pcb_esp must allocate an additional call-return pointer below + * the trap frame which will be restored by cpu_restore from + * PCB_EIP, and the thread's td_sp pointer must allocate an + * additonal two worsd below the pcb_esp call-return pointer to + * hold the LWKT restore function pointer and eflags. + * + * The LWKT restore function pointer must be set to cpu_restore, + * which is our standard heavy weight process switch-in function. + * YYY eventually we should shortcut fork_return and fork_trampoline + * to use the LWKT restore function directly so we can get rid of + * all the extra crap we are setting up. + */ + lp2->lwp_md.md_regs = (struct trapframe *)((char *)pcb2 - 16) - 1; + bcopy(lp1->lwp_md.md_regs, lp2->lwp_md.md_regs, sizeof(*lp2->lwp_md.md_regs)); + + /* + * Set registers for trampoline to user mode. Leave space for the + * return address on stack. These are the kernel mode register values. + */ + pcb2->pcb_cr3 = vtophys(vmspace_pmap(lp2->lwp_proc->p_vmspace)->pm_pdir); + pcb2->pcb_edi = 0; + pcb2->pcb_esi = (int)fork_return; /* fork_trampoline argument */ + pcb2->pcb_ebp = 0; + pcb2->pcb_esp = (int)lp2->lwp_md.md_regs - sizeof(void *); + pcb2->pcb_ebx = (int)lp2; /* fork_trampoline argument */ + pcb2->pcb_eip = (int)fork_trampoline; + lp2->lwp_thread->td_sp = (char *)(pcb2->pcb_esp - sizeof(void *)); + *(u_int32_t *)lp2->lwp_thread->td_sp = PSL_USER; + lp2->lwp_thread->td_sp -= sizeof(void *); + *(void **)lp2->lwp_thread->td_sp = (void *)cpu_heavy_restore; + + /* + * Segment registers. + */ + pcb2->pcb_gs = rgs(); + + /* + * pcb2->pcb_ldt: duplicated below, if necessary. + * pcb2->pcb_savefpu: cloned above. + * pcb2->pcb_flags: cloned above (always 0 here?). + * pcb2->pcb_onfault: cloned above (always NULL here?). + */ + + /* + * XXX don't copy the i/o pages. this should probably be fixed. + */ + pcb2->pcb_ext = 0; + + /* Copy the LDT, if necessary. */ + if (pcb2->pcb_ldt != 0) { + if (flags & RFMEM) { + pcb2->pcb_ldt->ldt_refcnt++; + } else { + pcb2->pcb_ldt = user_ldt_alloc(pcb2, + pcb2->pcb_ldt->ldt_len); + } + } + bcopy(&lp1->lwp_thread->td_tls, &lp2->lwp_thread->td_tls, + sizeof(lp2->lwp_thread->td_tls)); + /* + * Now, cpu_switch() can schedule the new process. + * pcb_esp is loaded pointing to the cpu_switch() stack frame + * containing the return address when exiting cpu_switch. + * This will normally be to fork_trampoline(), which will have + * %ebx loaded with the new proc's pointer. fork_trampoline() + * will set up a stack to call fork_return(p, frame); to complete + * the return to user-mode. + */ +} + +/* + * Intercept the return address from a freshly forked process that has NOT + * been scheduled yet. + * + * This is needed to make kernel threads stay in kernel mode. + */ +void +cpu_set_fork_handler(struct lwp *lp, void (*func)(void *), void *arg) +{ + /* + * Note that the trap frame follows the args, so the function + * is really called like this: func(arg, frame); + */ + lp->lwp_thread->td_pcb->pcb_esi = (int) func; /* function */ + lp->lwp_thread->td_pcb->pcb_ebx = (int) arg; /* first arg */ +} + +void +cpu_set_thread_handler(thread_t td, void (*rfunc)(void), void *func, void *arg) +{ + td->td_pcb->pcb_esi = (int)func; + td->td_pcb->pcb_ebx = (int) arg; + td->td_switch = cpu_lwkt_switch; + td->td_sp -= sizeof(void *); + *(void **)td->td_sp = rfunc; /* exit function on return */ + td->td_sp -= sizeof(void *); + *(void **)td->td_sp = cpu_kthread_restore; +} + +void +cpu_proc_exit(void) +{ + struct thread *td = curthread; + struct pcb *pcb; + struct pcb_ext *ext; + +#if NNPX > 0 + npxexit(); +#endif /* NNPX */ + + /* + * If we were using a private TSS do a forced-switch to ourselves + * to switch back to the common TSS before freeing it. + */ + pcb = td->td_pcb; + if ((ext = pcb->pcb_ext) != NULL) { + crit_enter(); + pcb->pcb_ext = NULL; + td->td_switch(td); + crit_exit(); + kmem_free(&kernel_map, (vm_offset_t)ext, ctob(IOPAGES + 1)); + } + user_ldt_free(pcb); + if (pcb->pcb_flags & PCB_DBREGS) { + /* + * disable all hardware breakpoints + */ + reset_dbregs(); + pcb->pcb_flags &= ~PCB_DBREGS; + } + td->td_gd->gd_cnt.v_swtch++; + + crit_enter_quick(td); + lwkt_deschedule_self(td); + lwkt_remove_tdallq(td); + cpu_thread_exit(); +} + +/* + * Terminate the current thread. The caller must have already acquired + * the thread's rwlock and placed it on a reap list or otherwise notified + * a reaper of its existance. We set a special assembly switch function which + * releases td_rwlock after it has cleaned up the MMU state and switched + * out the stack. + * + * Must be caller from a critical section and with the thread descheduled. + */ +void +cpu_thread_exit(void) +{ + curthread->td_switch = cpu_exit_switch; + curthread->td_flags |= TDF_EXITING; + lwkt_switch(); + panic("cpu_exit"); +} + +/* + * Process Reaper. Called after the caller has acquired the thread's + * rwlock and removed it from the reap list. + */ +void +cpu_proc_wait(struct proc *p) +{ + struct thread *td; + + /* drop per-process resources */ + td = pmap_dispose_proc(p); + if (td) + lwkt_free_thread(td); +} + +/* + * Dump the machine specific header information at the start of a core dump. + */ +int +cpu_coredump(struct thread *td, struct vnode *vp, struct ucred *cred) +{ + struct proc *p = td->td_proc; + int error; + caddr_t tempuser; + + KKASSERT(p); + tempuser = kmalloc(ctob(UPAGES), M_TEMP, M_WAITOK); + if (!tempuser) + return EINVAL; + + bzero(tempuser, ctob(UPAGES)); + bcopy(p->p_addr, tempuser, sizeof(struct user)); + bcopy(p->p_md.md_regs, + tempuser + ((caddr_t) p->p_md.md_regs - (caddr_t) p->p_addr), + sizeof(struct trapframe)); + bcopy(p->p_thread->td_pcb, tempuser + ((char *)p->p_thread->td_pcb - (char *)p->p_addr), sizeof(struct pcb)); + + error = vn_rdwr(UIO_WRITE, vp, (caddr_t) tempuser, ctob(UPAGES), + (off_t)0, UIO_SYSSPACE, IO_UNIT, cred, (int *)NULL); + + kfree(tempuser, M_TEMP); + + return error; +} + +#ifdef notyet +static void +setredzone(u_short *pte, caddr_t vaddr) +{ +/* eventually do this by setting up an expand-down stack segment + for ss0: selector, allowing stack access down to top of u. + this means though that protection violations need to be handled + thru a double fault exception that must do an integral task + switch to a known good context, within which a dump can be + taken. a sensible scheme might be to save the initial context + used by sched (that has physical memory mapped 1:1 at bottom) + and take the dump while still in mapped mode */ +} +#endif + +/* + * Convert kernel VA to physical address + */ +vm_paddr_t +kvtop(void *addr) +{ + vm_paddr_t pa; + + pa = pmap_kextract((vm_offset_t)addr); + if (pa == 0) + panic("kvtop: zero page frame"); + return (pa); +} + +int +grow_stack(struct proc *p, u_int sp) +{ + int rv; + + rv = vm_map_growstack (p, sp); + if (rv != KERN_SUCCESS) + return (0); + + return (1); +} + +SYSCTL_DECL(_vm_stats_misc); + +static int cnt_prezero; + +SYSCTL_INT(_vm_stats_misc, OID_AUTO, + cnt_prezero, CTLFLAG_RD, &cnt_prezero, 0, ""); + +/* + * Tell whether this address is in some physical memory region. + * Currently used by the kernel coredump code in order to avoid + * dumping the ``ISA memory hole'' which could cause indefinite hangs, + * or other unpredictable behaviour. + */ + +int +is_physical_memory(vm_offset_t addr) +{ + return 1; +} + diff --git a/sys/platform/vkernel/include/globaldata.h b/sys/platform/vkernel/include/globaldata.h index 793c8fb850..350c21b79c 100644 --- a/sys/platform/vkernel/include/globaldata.h +++ b/sys/platform/vkernel/include/globaldata.h @@ -28,7 +28,7 @@ * should not include this file. * * $FreeBSD: src/sys/i386/include/globaldata.h,v 1.11.2.1 2000/05/16 06:58:10 dillon Exp $ - * $DragonFly: src/sys/platform/vkernel/include/globaldata.h,v 1.3 2007/01/02 04:24:26 dillon Exp $ + * $DragonFly: src/sys/platform/vkernel/include/globaldata.h,v 1.4 2007/01/05 22:18:19 dillon Exp $ */ #ifndef _MACHINE_GLOBALDATA_H_ @@ -83,7 +83,7 @@ struct mdglobaldata { int gd_spending; /* software interrupt pending */ int gd_sdelayed; /* delayed software ints */ int gd_currentldt; - int gd_private_tss; + int unused000; u_int unused001; u_int gd_other_cpus; u_int gd_ss_eflags; diff --git a/sys/platform/vkernel/include/md_var.h b/sys/platform/vkernel/include/md_var.h index 628dac29f2..26de6e4685 100644 --- a/sys/platform/vkernel/include/md_var.h +++ b/sys/platform/vkernel/include/md_var.h @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/platform/vkernel/include/md_var.h,v 1.2 2007/01/02 04:24:26 dillon Exp $ + * $DragonFly: src/sys/platform/vkernel/include/md_var.h,v 1.3 2007/01/05 22:18:19 dillon Exp $ */ #ifndef _MACHINE_MD_VAR_H_ @@ -49,11 +49,20 @@ extern int szsigcode; extern vpte_t *KernelPTA; extern vpte_t *KernelPTD; extern vm_offset_t crashdumpmap; +extern int cpu_fxsr; struct mdglobaldata; +vpte_t *pmap_kpte(vm_offset_t va); void cpu_gdinit (struct mdglobaldata *gd, int cpu); -void cpu_idle_restore (void); + +void cpu_heavy_restore(void); /* cannot be called from C */ +void cpu_lwkt_restore(void); /* cannot be called from C */ +void cpu_idle_restore(void); /* cannot be called from C */ +void cpu_kthread_restore(void); /* cannot be called from C */ +void cpu_exit_switch (struct thread *next); +void cpu_setregs (void); +void cpu_idle (void); #endif diff --git a/sys/platform/vkernel/include/pcb_ext.h b/sys/platform/vkernel/include/pcb_ext.h new file mode 100644 index 0000000000..59d0e61cbb --- /dev/null +++ b/sys/platform/vkernel/include/pcb_ext.h @@ -0,0 +1,76 @@ +/*- + * Copyright (c) 1997 Jonathan Lemon + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/sys/i386/include/pcb_ext.h,v 1.4 1999/12/29 04:33:04 peter Exp $ + * $DragonFly: src/sys/platform/vkernel/include/pcb_ext.h,v 1.1 2007/01/05 22:18:19 dillon Exp $ + */ + +#ifndef _MACHINE_PCB_EXT_H_ +#define _MACHINE_PCB_EXT_H_ + +#ifndef _SYS_TYPES_H_ +#include +#endif + +/* + * Extension to the 386 process control block + */ +#ifndef _MACHINE_TSS_H_ +#include +#endif +#ifndef _MACHINE_VM86_H_ +#include +#endif +#ifndef _MACHINE_SEGMENTS_H_ +#include +#endif + +struct pcb_ext { + struct segment_descriptor ext_tssd; /* tss descriptor */ + struct i386tss ext_tss; /* per-process i386tss */ + caddr_t ext_iomap; /* i/o permission bitmap */ + struct vm86_kernel ext_vm86; /* vm86 area */ +}; + +struct pcb_ldt { + caddr_t ldt_base; + int ldt_len; + int ldt_refcnt; + u_long ldt_active; + struct segment_descriptor ldt_sd; +}; + +#ifdef _KERNEL + +struct pcb; + +void set_user_ldt (struct pcb *); +struct pcb_ldt *user_ldt_alloc (struct pcb *, int); +void user_ldt_free (struct pcb *); +void set_user_TLS (void); + +#endif + +#endif /* _MACHINE_PCB_EXT_H_ */ diff --git a/sys/platform/vkernel/platform/busdma_machdep.c b/sys/platform/vkernel/platform/busdma_machdep.c new file mode 100644 index 0000000000..f153c1f09e --- /dev/null +++ b/sys/platform/vkernel/platform/busdma_machdep.c @@ -0,0 +1,900 @@ +/* + * Copyright (c) 1997, 1998 Justin T. Gibbs. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification, immediately at the beginning of the file. + * 2. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/sys/i386/i386/busdma_machdep.c,v 1.16.2.2 2003/01/23 00:55:27 scottl Exp $ + * $DragonFly: src/sys/platform/vkernel/platform/busdma_machdep.c,v 1.1 2007/01/05 22:18:20 dillon Exp $ + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +/* XXX needed for to access pmap to convert per-proc virtual to physical */ +#include +#include +#include + +#include + +#define MAX_BPAGES 128 + +struct bus_dma_tag { + bus_dma_tag_t parent; + bus_size_t alignment; + bus_size_t boundary; + bus_addr_t lowaddr; + bus_addr_t highaddr; + bus_dma_filter_t *filter; + void *filterarg; + bus_size_t maxsize; + u_int nsegments; + bus_size_t maxsegsz; + int flags; + int ref_count; + int map_count; + bus_dma_segment_t *segments; +}; + +struct bounce_page { + vm_offset_t vaddr; /* kva of bounce buffer */ + bus_addr_t busaddr; /* Physical address */ + vm_offset_t datavaddr; /* kva of client data */ + bus_size_t datacount; /* client data count */ + STAILQ_ENTRY(bounce_page) links; +}; + +int busdma_swi_pending; + +static STAILQ_HEAD(bp_list, bounce_page) bounce_page_list; +static int free_bpages; +static int reserved_bpages; +static int active_bpages; +static int total_bpages; +static bus_addr_t bounce_lowaddr = BUS_SPACE_MAXADDR; + +struct bus_dmamap { + struct bp_list bpages; + int pagesneeded; + int pagesreserved; + bus_dma_tag_t dmat; + void *buf; /* unmapped buffer pointer */ + bus_size_t buflen; /* unmapped buffer length */ + bus_dmamap_callback_t *callback; + void *callback_arg; + STAILQ_ENTRY(bus_dmamap) links; +}; + +static STAILQ_HEAD(, bus_dmamap) bounce_map_waitinglist; +static STAILQ_HEAD(, bus_dmamap) bounce_map_callbacklist; +static struct bus_dmamap nobounce_dmamap; + +static int alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages); +static int reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map); +static bus_addr_t add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, + vm_offset_t vaddr, bus_size_t size); +static void free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage); +static __inline int run_filter(bus_dma_tag_t dmat, bus_addr_t paddr); + +static __inline int +run_filter(bus_dma_tag_t dmat, bus_addr_t paddr) +{ + int retval; + + retval = 0; + do { + if (paddr > dmat->lowaddr + && paddr <= dmat->highaddr + && (dmat->filter == NULL + || (*dmat->filter)(dmat->filterarg, paddr) != 0)) + retval = 1; + + dmat = dmat->parent; + } while (retval == 0 && dmat != NULL); + return (retval); +} + +#define BUS_DMA_MIN_ALLOC_COMP BUS_DMA_BUS4 +/* + * Allocate a device specific dma_tag. + */ +int +bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment, + bus_size_t boundary, bus_addr_t lowaddr, + bus_addr_t highaddr, bus_dma_filter_t *filter, + void *filterarg, bus_size_t maxsize, int nsegments, + bus_size_t maxsegsz, int flags, bus_dma_tag_t *dmat) +{ + bus_dma_tag_t newtag; + int error = 0; + + /* Return a NULL tag on failure */ + *dmat = NULL; + + newtag = kmalloc(sizeof(*newtag), M_DEVBUF, M_INTWAIT); + + newtag->parent = parent; + newtag->alignment = alignment; + newtag->boundary = boundary; + newtag->lowaddr = trunc_page((vm_paddr_t)lowaddr) + (PAGE_SIZE - 1); + newtag->highaddr = trunc_page((vm_paddr_t)highaddr) + (PAGE_SIZE - 1); + newtag->filter = filter; + newtag->filterarg = filterarg; + newtag->maxsize = maxsize; + newtag->nsegments = nsegments; + newtag->maxsegsz = maxsegsz; + newtag->flags = flags; + newtag->ref_count = 1; /* Count ourself */ + newtag->map_count = 0; + newtag->segments = NULL; + + /* Take into account any restrictions imposed by our parent tag */ + if (parent != NULL) { + newtag->lowaddr = MIN(parent->lowaddr, newtag->lowaddr); + newtag->highaddr = MAX(parent->highaddr, newtag->highaddr); + /* + * XXX Not really correct??? Probably need to honor boundary + * all the way up the inheritence chain. + */ + newtag->boundary = MAX(parent->boundary, newtag->boundary); + if (newtag->filter == NULL) { + /* + * Short circuit looking at our parent directly + * since we have encapsulated all of its information + */ + newtag->filter = parent->filter; + newtag->filterarg = parent->filterarg; + newtag->parent = parent->parent; + } + if (newtag->parent != NULL) { + parent->ref_count++; + } + } + + if (newtag->lowaddr < ptoa(Maxmem) && + (flags & BUS_DMA_ALLOCNOW) != 0) { + /* Must bounce */ + + if (lowaddr > bounce_lowaddr) { + /* + * Go through the pool and kill any pages + * that don't reside below lowaddr. + */ + panic("bus_dma_tag_create: page reallocation " + "not implemented"); + } + if (ptoa(total_bpages) < maxsize) { + int pages; + + pages = atop(maxsize) - total_bpages; + + /* Add pages to our bounce pool */ + if (alloc_bounce_pages(newtag, pages) < pages) + error = ENOMEM; + } + /* Performed initial allocation */ + newtag->flags |= BUS_DMA_MIN_ALLOC_COMP; + } + + if (error != 0) { + kfree(newtag, M_DEVBUF); + } else { + *dmat = newtag; + } + return (error); +} + +int +bus_dma_tag_destroy(bus_dma_tag_t dmat) +{ + if (dmat != NULL) { + + if (dmat->map_count != 0) + return (EBUSY); + + while (dmat != NULL) { + bus_dma_tag_t parent; + + parent = dmat->parent; + dmat->ref_count--; + if (dmat->ref_count == 0) { + if (dmat->segments != NULL) + kfree(dmat->segments, M_DEVBUF); + kfree(dmat, M_DEVBUF); + /* + * Last reference count, so + * release our reference + * count on our parent. + */ + dmat = parent; + } else + dmat = NULL; + } + } + return (0); +} + +/* + * Allocate a handle for mapping from kva/uva/physical + * address space into bus device space. + */ +int +bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp) +{ + int error; + + error = 0; + + if (dmat->segments == NULL) { + KKASSERT(dmat->nsegments && dmat->nsegments < 16384); + dmat->segments = kmalloc(sizeof(bus_dma_segment_t) * + dmat->nsegments, M_DEVBUF, M_INTWAIT); + } + + if (dmat->lowaddr < ptoa(Maxmem)) { + /* Must bounce */ + int maxpages; + + *mapp = kmalloc(sizeof(**mapp), M_DEVBUF, M_INTWAIT); + if (*mapp == NULL) { + return (ENOMEM); + } else { + /* Initialize the new map */ + bzero(*mapp, sizeof(**mapp)); + STAILQ_INIT(&((*mapp)->bpages)); + } + /* + * Attempt to add pages to our pool on a per-instance + * basis up to a sane limit. + */ + maxpages = MIN(MAX_BPAGES, Maxmem - atop(dmat->lowaddr)); + if ((dmat->flags & BUS_DMA_MIN_ALLOC_COMP) == 0 + || (dmat->map_count > 0 + && total_bpages < maxpages)) { + int pages; + + if (dmat->lowaddr > bounce_lowaddr) { + /* + * Go through the pool and kill any pages + * that don't reside below lowaddr. + */ + panic("bus_dmamap_create: page reallocation " + "not implemented"); + } + pages = atop(dmat->maxsize); + pages = MIN(maxpages - total_bpages, pages); + error = alloc_bounce_pages(dmat, pages); + + if ((dmat->flags & BUS_DMA_MIN_ALLOC_COMP) == 0) { + if (error == 0) + dmat->flags |= BUS_DMA_MIN_ALLOC_COMP; + } else { + error = 0; + } + } + } else { + *mapp = NULL; + } + if (error == 0) + dmat->map_count++; + return (error); +} + +/* + * Destroy a handle for mapping from kva/uva/physical + * address space into bus device space. + */ +int +bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map) +{ + if (map != NULL) { + if (STAILQ_FIRST(&map->bpages) != NULL) + return (EBUSY); + kfree(map, M_DEVBUF); + } + dmat->map_count--; + return (0); +} + + +/* + * Allocate a piece of memory that can be efficiently mapped into + * bus device space based on the constraints lited in the dma tag. + * + * mapp is degenerate. By definition this allocation should not require + * bounce buffers so do not allocate a dma map. + */ +int +bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags, + bus_dmamap_t *mapp) +{ + int mflags; + /* If we succeed, no mapping/bouncing will be required */ + *mapp = NULL; + + if (dmat->segments == NULL) { + KKASSERT(dmat->nsegments < 16384); + dmat->segments = kmalloc(sizeof(bus_dma_segment_t) * + dmat->nsegments, M_DEVBUF, M_INTWAIT); + } + + if (flags & BUS_DMA_NOWAIT) + mflags = M_NOWAIT; + else + mflags = M_WAITOK; + if (flags & BUS_DMA_ZERO) + mflags |= M_ZERO; + + if ((dmat->maxsize <= PAGE_SIZE) && + dmat->lowaddr >= ptoa(Maxmem)) { + *vaddr = kmalloc(dmat->maxsize, M_DEVBUF, mflags); + /* + * XXX Check whether the allocation crossed a page boundary + * and retry with power-of-2 alignment in that case. + */ + if ((((intptr_t)*vaddr) & PAGE_MASK) != + (((intptr_t)*vaddr + dmat->maxsize) & PAGE_MASK)) { + size_t size; + kfree(*vaddr, M_DEVBUF); + /* XXX check for overflow? */ + for (size = 1; size <= dmat->maxsize; size <<= 1) + ; + *vaddr = kmalloc(size, M_DEVBUF, mflags); + } + } else { + /* + * XXX Use Contigmalloc until it is merged into this facility + * and handles multi-seg allocations. Nobody is doing + * multi-seg allocations yet though. + */ + *vaddr = contigmalloc(dmat->maxsize, M_DEVBUF, mflags, + 0ul, dmat->lowaddr, dmat->alignment? dmat->alignment : 1ul, + dmat->boundary); + } + if (*vaddr == NULL) + return (ENOMEM); + return (0); +} + +/* + * Free a piece of memory and it's allociated dmamap, that was allocated + * via bus_dmamem_alloc. Make the same choice for free/contigfree. + */ +void +bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map) +{ + /* + * dmamem does not need to be bounced, so the map should be + * NULL + */ + if (map != NULL) + panic("bus_dmamem_free: Invalid map freed\n"); + if ((dmat->maxsize <= PAGE_SIZE) && + dmat->lowaddr >= ptoa(Maxmem)) + kfree(vaddr, M_DEVBUF); + else + contigfree(vaddr, dmat->maxsize, M_DEVBUF); +} + +#define BUS_DMAMAP_NSEGS ((BUS_SPACE_MAXSIZE / PAGE_SIZE) + 1) + +/* + * Map the buffer buf into bus space using the dmamap map. + */ +int +bus_dmamap_load(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf, + bus_size_t buflen, bus_dmamap_callback_t *callback, + void *callback_arg, int flags) +{ + vm_offset_t vaddr; + vm_paddr_t paddr; + bus_dma_segment_t *sg; + int seg; + int error; + vm_paddr_t nextpaddr; + + if (map == NULL) + map = &nobounce_dmamap; + + error = 0; + /* + * If we are being called during a callback, pagesneeded will + * be non-zero, so we can avoid doing the work twice. + */ + if (dmat->lowaddr < ptoa(Maxmem) && + map->pagesneeded == 0) { + vm_offset_t vendaddr; + + /* + * Count the number of bounce pages + * needed in order to complete this transfer + */ + vaddr = trunc_page((vm_offset_t)buf); + vendaddr = (vm_offset_t)buf + buflen; + + while (vaddr < vendaddr) { + paddr = pmap_kextract(vaddr); + if (run_filter(dmat, paddr) != 0) { + + map->pagesneeded++; + } + vaddr += PAGE_SIZE; + } + } + + /* Reserve Necessary Bounce Pages */ + if (map->pagesneeded != 0) { + crit_enter(); + if (reserve_bounce_pages(dmat, map) != 0) { + + /* Queue us for resources */ + map->dmat = dmat; + map->buf = buf; + map->buflen = buflen; + map->callback = callback; + map->callback_arg = callback_arg; + + STAILQ_INSERT_TAIL(&bounce_map_waitinglist, map, links); + crit_exit(); + + return (EINPROGRESS); + } + crit_exit(); + } + + vaddr = (vm_offset_t)buf; + sg = dmat->segments; + seg = 1; + sg->ds_len = 0; + + nextpaddr = 0; + do { + bus_size_t size; + + paddr = pmap_kextract(vaddr); + size = PAGE_SIZE - (paddr & PAGE_MASK); + if (size > buflen) + size = buflen; + + if (map->pagesneeded != 0 && run_filter(dmat, paddr)) { + paddr = add_bounce_page(dmat, map, vaddr, size); + } + + if (sg->ds_len == 0) { + sg->ds_addr = paddr; + sg->ds_len = size; + } else if (paddr == nextpaddr) { + sg->ds_len += size; + } else { + /* Go to the next segment */ + sg++; + seg++; + if (seg > dmat->nsegments) + break; + sg->ds_addr = paddr; + sg->ds_len = size; + } + vaddr += size; + nextpaddr = paddr + size; + buflen -= size; + } while (buflen > 0); + + if (buflen != 0) { + kprintf("bus_dmamap_load: Too many segs! buf_len = 0x%lx\n", + (u_long)buflen); + error = EFBIG; + } + + (*callback)(callback_arg, dmat->segments, seg, error); + + return (0); +} + +/* + * Utility function to load a linear buffer. lastaddrp holds state + * between invocations (for multiple-buffer loads). segp contains + * the starting segment on entrace, and the ending segment on exit. + * first indicates if this is the first invocation of this function. + */ +static int +_bus_dmamap_load_buffer(bus_dma_tag_t dmat, + void *buf, bus_size_t buflen, + struct thread *td, + int flags, + vm_offset_t *lastaddrp, + int *segp, + int first) +{ + bus_dma_segment_t *segs; + bus_size_t sgsize; + bus_addr_t curaddr, lastaddr, baddr, bmask; + vm_offset_t vaddr = (vm_offset_t)buf; + int seg; + pmap_t pmap; + + if (td->td_proc != NULL) + pmap = vmspace_pmap(td->td_proc->p_vmspace); + else + pmap = NULL; + + segs = dmat->segments; + lastaddr = *lastaddrp; + bmask = ~(dmat->boundary - 1); + + for (seg = *segp; buflen > 0 ; ) { + /* + * Get the physical address for this segment. + */ + if (pmap) + curaddr = pmap_extract(pmap, vaddr); + else + curaddr = pmap_kextract(vaddr); + + /* + * Compute the segment size, and adjust counts. + */ + sgsize = PAGE_SIZE - ((u_long)curaddr & PAGE_MASK); + if (buflen < sgsize) + sgsize = buflen; + + /* + * Make sure we don't cross any boundaries. + */ + if (dmat->boundary > 0) { + baddr = (curaddr + dmat->boundary) & bmask; + if (sgsize > (baddr - curaddr)) + sgsize = (baddr - curaddr); + } + + /* + * Insert chunk into a segment, coalescing with + * previous segment if possible. + */ + if (first) { + segs[seg].ds_addr = curaddr; + segs[seg].ds_len = sgsize; + first = 0; + } else { + if (curaddr == lastaddr && + (segs[seg].ds_len + sgsize) <= dmat->maxsegsz && + (dmat->boundary == 0 || + (segs[seg].ds_addr & bmask) == (curaddr & bmask))) + segs[seg].ds_len += sgsize; + else { + if (++seg >= dmat->nsegments) + break; + segs[seg].ds_addr = curaddr; + segs[seg].ds_len = sgsize; + } + } + + lastaddr = curaddr + sgsize; + vaddr += sgsize; + buflen -= sgsize; + } + + *segp = seg; + *lastaddrp = lastaddr; + + /* + * Did we fit? + */ + return (buflen != 0 ? EFBIG : 0); /* XXX better return value here? */ +} + +/* + * Like _bus_dmamap_load(), but for mbufs. + */ +int +bus_dmamap_load_mbuf(bus_dma_tag_t dmat, bus_dmamap_t map, + struct mbuf *m0, + bus_dmamap_callback2_t *callback, void *callback_arg, + int flags) +{ + int nsegs, error; + + KASSERT(dmat->lowaddr >= ptoa(Maxmem) || map != NULL, + ("bus_dmamap_load_mbuf: No support for bounce pages!")); + KASSERT(m0->m_flags & M_PKTHDR, + ("bus_dmamap_load_mbuf: no packet header")); + + nsegs = 0; + error = 0; + if (m0->m_pkthdr.len <= dmat->maxsize) { + int first = 1; + vm_offset_t lastaddr = 0; + struct mbuf *m; + + for (m = m0; m != NULL && error == 0; m = m->m_next) { + if ( m->m_len == 0 ) + continue; + error = _bus_dmamap_load_buffer(dmat, + m->m_data, m->m_len, + curthread, flags, &lastaddr, + &nsegs, first); + first = 0; + } + } else { + error = EINVAL; + } + + if (error) { + /* force "no valid mappings" in callback */ + (*callback)(callback_arg, dmat->segments, 0, 0, error); + } else { + (*callback)(callback_arg, dmat->segments, + nsegs+1, m0->m_pkthdr.len, error); + } + return (error); +} + +/* + * Like _bus_dmamap_load(), but for uios. + */ +int +bus_dmamap_load_uio(bus_dma_tag_t dmat, bus_dmamap_t map, + struct uio *uio, + bus_dmamap_callback2_t *callback, void *callback_arg, + int flags) +{ + vm_offset_t lastaddr; + int nsegs, error, first, i; + bus_size_t resid; + struct iovec *iov; + struct thread *td = NULL; + + KASSERT(dmat->lowaddr >= ptoa(Maxmem) || map != NULL, + ("bus_dmamap_load_uio: No support for bounce pages!")); + + resid = uio->uio_resid; + iov = uio->uio_iov; + + if (uio->uio_segflg == UIO_USERSPACE) { + td = uio->uio_td; + KASSERT(td != NULL && td->td_proc != NULL, + ("bus_dmamap_load_uio: USERSPACE but no proc")); + } + + nsegs = 0; + error = 0; + first = 1; + for (i = 0; i < uio->uio_iovcnt && resid != 0 && !error; i++) { + /* + * Now at the first iovec to load. Load each iovec + * until we have exhausted the residual count. + */ + bus_size_t minlen = + resid < iov[i].iov_len ? resid : iov[i].iov_len; + caddr_t addr = (caddr_t) iov[i].iov_base; + + error = _bus_dmamap_load_buffer(dmat, + addr, minlen, + td, flags, &lastaddr, &nsegs, first); + first = 0; + + resid -= minlen; + } + + if (error) { + /* force "no valid mappings" in callback */ + (*callback)(callback_arg, dmat->segments, 0, 0, error); + } else { + (*callback)(callback_arg, dmat->segments, + nsegs+1, uio->uio_resid, error); + } + return (error); +} + +/* + * Release the mapping held by map. + */ +void +_bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map) +{ + struct bounce_page *bpage; + + while ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) { + STAILQ_REMOVE_HEAD(&map->bpages, links); + free_bounce_page(dmat, bpage); + } +} + +void +_bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op) +{ + struct bounce_page *bpage; + + if ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) { + + /* + * Handle data bouncing. We might also + * want to add support for invalidating + * the caches on broken hardware + */ + switch (op) { + case BUS_DMASYNC_PREWRITE: + while (bpage != NULL) { + bcopy((void *)bpage->datavaddr, + (void *)bpage->vaddr, + bpage->datacount); + bpage = STAILQ_NEXT(bpage, links); + } + break; + + case BUS_DMASYNC_POSTREAD: + while (bpage != NULL) { + bcopy((void *)bpage->vaddr, + (void *)bpage->datavaddr, + bpage->datacount); + bpage = STAILQ_NEXT(bpage, links); + } + break; + case BUS_DMASYNC_PREREAD: + case BUS_DMASYNC_POSTWRITE: + /* No-ops */ + break; + } + } +} + +static int +alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages) +{ + int count; + + count = 0; + if (total_bpages == 0) { + STAILQ_INIT(&bounce_page_list); + STAILQ_INIT(&bounce_map_waitinglist); + STAILQ_INIT(&bounce_map_callbacklist); + } + + while (numpages > 0) { + struct bounce_page *bpage; + + bpage = (struct bounce_page *)kmalloc(sizeof(*bpage), M_DEVBUF, + M_INTWAIT); + + if (bpage == NULL) + break; + bzero(bpage, sizeof(*bpage)); + bpage->vaddr = (vm_offset_t)contigmalloc(PAGE_SIZE, M_DEVBUF, + M_NOWAIT, 0ul, + dmat->lowaddr, + PAGE_SIZE, + 0); + if (bpage->vaddr == NULL) { + kfree(bpage, M_DEVBUF); + break; + } + bpage->busaddr = pmap_kextract(bpage->vaddr); + crit_enter(); + STAILQ_INSERT_TAIL(&bounce_page_list, bpage, links); + total_bpages++; + free_bpages++; + crit_exit(); + count++; + numpages--; + } + return (count); +} + +static int +reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map) +{ + int pages; + + pages = MIN(free_bpages, map->pagesneeded - map->pagesreserved); + free_bpages -= pages; + reserved_bpages += pages; + map->pagesreserved += pages; + pages = map->pagesneeded - map->pagesreserved; + + return (pages); +} + +static bus_addr_t +add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, vm_offset_t vaddr, + bus_size_t size) +{ + struct bounce_page *bpage; + + if (map->pagesneeded == 0) + panic("add_bounce_page: map doesn't need any pages"); + map->pagesneeded--; + + if (map->pagesreserved == 0) + panic("add_bounce_page: map doesn't need any pages"); + map->pagesreserved--; + + crit_enter(); + bpage = STAILQ_FIRST(&bounce_page_list); + if (bpage == NULL) + panic("add_bounce_page: free page list is empty"); + + STAILQ_REMOVE_HEAD(&bounce_page_list, links); + reserved_bpages--; + active_bpages++; + crit_exit(); + + bpage->datavaddr = vaddr; + bpage->datacount = size; + STAILQ_INSERT_TAIL(&(map->bpages), bpage, links); + return (bpage->busaddr); +} + +static void +free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage) +{ + struct bus_dmamap *map; + + bpage->datavaddr = 0; + bpage->datacount = 0; + + crit_enter(); + STAILQ_INSERT_HEAD(&bounce_page_list, bpage, links); + free_bpages++; + active_bpages--; + if ((map = STAILQ_FIRST(&bounce_map_waitinglist)) != NULL) { + if (reserve_bounce_pages(map->dmat, map) == 0) { + panic("free_bounce_pages: uncoded\n"); +#if 0 + STAILQ_REMOVE_HEAD(&bounce_map_waitinglist, links); + STAILQ_INSERT_TAIL(&bounce_map_callbacklist, + map, links); + busdma_swi_pending = 1; + setsoftvm(); +#endif + } + } + crit_exit(); +} + +#if 0 + +void +busdma_swi(void) +{ + struct bus_dmamap *map; + + crit_enter(); + while ((map = STAILQ_FIRST(&bounce_map_callbacklist)) != NULL) { + STAILQ_REMOVE_HEAD(&bounce_map_callbacklist, links); + crit_exit(); + bus_dmamap_load(map->dmat, map, map->buf, map->buflen, + map->callback, map->callback_arg, /*flags*/0); + crit_enter(); + } + crit_exit(); +} + +#endif + diff --git a/sys/platform/vkernel/platform/console.c b/sys/platform/vkernel/platform/console.c new file mode 100644 index 0000000000..d30170f677 --- /dev/null +++ b/sys/platform/vkernel/platform/console.c @@ -0,0 +1,241 @@ +/* + * Copyright (c) 2006 The DragonFly Project. All rights reserved. + * + * This code is derived from software contributed to The DragonFly Project + * by Matthew Dillon + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of The DragonFly Project nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific, prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $DragonFly: src/sys/platform/vkernel/platform/console.c,v 1.1 2007/01/05 22:18:20 dillon Exp $ + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Global console locking functions + */ +void +cons_lock(void) +{ +} + +void +cons_unlock(void) +{ +} + +/************************************************************************ + * CONSOLE DEVICE * + ************************************************************************ + * + */ + +#define CDEV_MAJOR 183 + +static int vcons_tty_param(struct tty *tp, struct termios *tio); +static void vcons_tty_start(struct tty *tp); + +static d_open_t vcons_open; +static d_close_t vcons_close; +static d_ioctl_t vcons_ioctl; + +static struct dev_ops vcons_ops = { + { "vcons", CDEV_MAJOR, D_TTY }, + .d_open = vcons_open, + .d_close = vcons_close, + .d_read = ttyread, + .d_write = ttywrite, + .d_ioctl = vcons_ioctl, + .d_poll = ttypoll, +}; + +static int +vcons_open(struct dev_open_args *ap) +{ + cdev_t dev = ap->a_head.a_dev; + struct tty *tp; + int error; + + if (minor(dev) != 0) + return(ENXIO); + + tp = dev->si_tty = ttymalloc(dev->si_tty); + tp->t_oproc = vcons_tty_start; + tp->t_param = vcons_tty_param; + tp->t_stop = nottystop; + tp->t_dev = dev; + + if (tp->t_state & TS_ISOPEN) + return (EBUSY); + + tp->t_state |= TS_CARR_ON; + ttychars(tp); + tp->t_iflag = TTYDEF_IFLAG; + tp->t_oflag = TTYDEF_OFLAG; + tp->t_cflag = TTYDEF_CFLAG; + tp->t_lflag = TTYDEF_LFLAG; + tp->t_ispeed = TTYDEF_SPEED; + tp->t_ospeed = TTYDEF_SPEED; + ttsetwater(tp); + + error = (*linesw[tp->t_line].l_open)(dev, tp); + return(error); +} + +static int +vcons_close(struct dev_close_args *ap) +{ + cdev_t dev = ap->a_head.a_dev; + struct tty *tp; + + if (minor(dev) != 0) + return(ENXIO); + tp = dev->si_tty; + if (tp->t_state & TS_ISOPEN) { + (*linesw[tp->t_line].l_close)(tp, ap->a_fflag); + ttyclose(tp); + } + return(0); +} + +static int +vcons_ioctl(struct dev_ioctl_args *ap) +{ + cdev_t dev = ap->a_head.a_dev; + struct tty *tp; + int error; + + if (minor(dev) != 0) + return(ENXIO); + tp = dev->si_tty; + error = (*linesw[tp->t_line].l_ioctl)(tp, ap->a_cmd, ap->a_data, + ap->a_fflag, ap->a_cred); + if (error != ENOIOCTL) + return (error); + error = ttioctl(tp, ap->a_cmd, ap->a_data, ap->a_fflag); + if (error != ENOIOCTL) + return (error); + return (ENOTTY); +} + +static int +vcons_tty_param(struct tty *tp, struct termios *tio) +{ + tp->t_ispeed = tio->c_ispeed; + tp->t_ospeed = tio->c_ospeed; + tp->t_cflag = tio->c_cflag; + return(0); +} + +static void +vcons_tty_start(struct tty *tp) +{ + int n; + char buf[64]; + + if (tp->t_state & (TS_TIMEOUT | TS_TTSTOP)) { + ttwwakeup(tp); + return; + } + tp->t_state |= TS_BUSY; + while ((n = q_to_b(&tp->t_outq, buf, sizeof(buf))) > 0) + write(1, buf, n); + tp->t_state &= ~TS_BUSY; + ttwwakeup(tp); +} + +/************************************************************************ + * KERNEL CONSOLE INTERFACE * + ************************************************************************ + * + * Kernel direct-call interface console driver + */ +static cn_probe_t vconsprobe; +static cn_init_t vconsinit; +static cn_term_t vconsterm; +static cn_getc_t vconsgetc; +static cn_checkc_t vconscheckc; +static cn_putc_t vconsputc; + +CONS_DRIVER(vcons, vconsprobe, vconsinit, vconsterm, vconsgetc, + vconscheckc, vconsputc, NULL); + +static void +vconsprobe(struct consdev *cp) +{ + cp->cn_pri = CN_NORMAL; + cp->cn_dev = make_dev(&vcons_ops, 255, + UID_ROOT, GID_WHEEL, 0600, "vconsolectl"); +} + +static void +vconsinit(struct consdev *cp) +{ +} + +static void +vconsterm(struct consdev *vp) +{ +} + +static int +vconsgetc(cdev_t dev) +{ + unsigned char c; + + if (read(0, &c, 1) == 1) + return((int)c); + return(-1); +} + +static int +vconscheckc(cdev_t dev) +{ + unsigned char c; + + if (__pread(0, &c, 1, O_FNONBLOCKING, -1LL) == 1) + return((int)c); + return(-1); +} + +static void +vconsputc(cdev_t dev, int c) +{ + char cc = c; + + write(1, &cc, 1); +} + + diff --git a/sys/platform/vkernel/platform/copyio.c b/sys/platform/vkernel/platform/copyio.c index 1d2fe55e44..ec46e380ab 100644 --- a/sys/platform/vkernel/platform/copyio.c +++ b/sys/platform/vkernel/platform/copyio.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/platform/vkernel/platform/copyio.c,v 1.2 2007/01/02 04:24:26 dillon Exp $ + * $DragonFly: src/sys/platform/vkernel/platform/copyio.c,v 1.3 2007/01/05 22:18:20 dillon Exp $ */ #include @@ -50,6 +50,26 @@ ovbcopy(const void *src, void *dst, size_t len) bcopy(src, dst, len); } +void +bcopyi(const void *src, void *dst, size_t len) +{ + bcopy(src, dst, len); +} + +int +copystr(const void *kfaddr, void *kdaddr, size_t len, size_t *lencopied) +{ + size_t i; + + for (i = 0; i < len; ++i) { + if ((((char *)kdaddr)[i] = ((const char *)kfaddr)[i]) == 0) { + *lencopied = i + 1; + return(0); + } + } + return (ENAMETOOLONG); +} + /* * Copies a NUL-terminated string from user space to kernel space. * The number of bytes copied, including the terminator, is returned in diff --git a/sys/platform/vkernel/platform/init.c b/sys/platform/vkernel/platform/init.c index 43303b9a26..5f01c8610f 100644 --- a/sys/platform/vkernel/platform/init.c +++ b/sys/platform/vkernel/platform/init.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/platform/vkernel/platform/init.c,v 1.5 2007/01/02 04:24:26 dillon Exp $ + * $DragonFly: src/sys/platform/vkernel/platform/init.c,v 1.6 2007/01/05 22:18:20 dillon Exp $ */ #include @@ -44,6 +44,7 @@ #include #include #include +#include #include #include @@ -71,8 +72,14 @@ vm_offset_t virtual_start; vm_offset_t virtual_end; vm_offset_t kernel_vm_end; vm_offset_t crashdumpmap; +vm_offset_t clean_sva; +vm_offset_t clean_eva; +struct msgbuf *msgbufp; +caddr_t ptvmmap; vpte_t *KernelPTD; vpte_t *KernelPTA; +u_int cpu_feature; /* XXX */ +u_int tsc_present; /* XXX */ struct privatespace *CPU_prvspace; @@ -321,6 +328,24 @@ init_kern_memory(void) crashdumpmap = virtual_start; virtual_start += MAXDUMPPGS * PAGE_SIZE; + /* + * msgbufp maps the system message buffer + */ + assert((MSGBUF_SIZE & PAGE_MASK) == 0); + msgbufp = (void *)virtual_start; + for (i = 0; i < (MSGBUF_SIZE >> PAGE_SHIFT); ++i) { + pmap_kenter_quick(virtual_start, phys_avail[0]); + virtual_start += PAGE_SIZE; + phys_avail[0] += PAGE_SIZE; + } + msgbufinit(msgbufp, MSGBUF_SIZE); + + /* + * used by kern_memio for /dev/mem access + */ + ptvmmap = (caddr_t)virtual_start; + virtual_start += PAGE_SIZE; + /* * Bootstrap the kernel_pmap */ @@ -443,3 +468,11 @@ cpu_reset(void) kprintf("cpu reset\n"); exit(0); } + +void +cpu_halt(void) +{ + kprintf("cpu halt\n"); + for (;;) + __asm__ __volatile("hlt"); +} diff --git a/sys/platform/vkernel/platform/ipl_funcs.c b/sys/platform/vkernel/platform/ipl_funcs.c new file mode 100644 index 0000000000..cb7ed7d538 --- /dev/null +++ b/sys/platform/vkernel/platform/ipl_funcs.c @@ -0,0 +1,78 @@ +/*- + * Copyright (c) 1997 Bruce Evans. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/sys/i386/isa/ipl_funcs.c,v 1.32.2.5 2002/12/17 18:04:02 sam Exp $ + * $DragonFly: src/sys/platform/vkernel/platform/ipl_funcs.c,v 1.1 2007/01/05 22:18:20 dillon Exp $ + */ + +#include +#include +#include +#include +#include +#include +#include + +/* + * Bits in the ipending bitmap variable must be set atomically because + * ipending may be manipulated by interrupts or other cpu's without holding + * any locks. + * + * Note: setbits uses a locked or, making simple cases MP safe. + */ +#define DO_SETBITS(name, var, bits) \ +void \ +name(void) \ +{ \ + struct mdglobaldata *gd = mdcpu; \ + atomic_set_int_nonlocked(var, bits); \ + atomic_set_int_nonlocked(&gd->mi.gd_reqflags, RQF_INTPEND); \ +} \ + +DO_SETBITS(setdelayed, &gd->gd_spending, loadandclear(&gd->gd_sdelayed)) + +DO_SETBITS(setsoftcamnet,&gd->gd_spending, SWI_CAMNET_PENDING) +DO_SETBITS(setsoftcambio,&gd->gd_spending, SWI_CAMBIO_PENDING) +DO_SETBITS(setsoftclock, &gd->gd_spending, SWI_CLOCK_PENDING) +DO_SETBITS(setsoftnet, &gd->gd_spending, SWI_NET_PENDING) +DO_SETBITS(setsofttty, &gd->gd_spending, SWI_TTY_PENDING) +DO_SETBITS(setsoftvm, &gd->gd_spending, SWI_VM_PENDING) +DO_SETBITS(setsofttq, &gd->gd_spending, SWI_TQ_PENDING) +DO_SETBITS(setsoftcrypto,&gd->gd_spending, SWI_CRYPTO_PENDING) + +DO_SETBITS(schedsoftcamnet, &gd->gd_sdelayed, SWI_CAMNET_PENDING) +DO_SETBITS(schedsoftcambio, &gd->gd_sdelayed, SWI_CAMBIO_PENDING) +DO_SETBITS(schedsoftnet, &gd->gd_sdelayed, SWI_NET_PENDING) +DO_SETBITS(schedsofttty, &gd->gd_sdelayed, SWI_TTY_PENDING) +DO_SETBITS(schedsoftvm, &gd->gd_sdelayed, SWI_VM_PENDING) +DO_SETBITS(schedsofttq, &gd->gd_sdelayed, SWI_TQ_PENDING) +/* YYY schedsoft what? */ + +unsigned +softclockpending(void) +{ + return (mdcpu->gd_spending & SWI_CLOCK_PENDING); +} + diff --git a/sys/platform/vkernel/platform/machintr.c b/sys/platform/vkernel/platform/machintr.c index 84fe6aff25..d8b877ed45 100644 --- a/sys/platform/vkernel/platform/machintr.c +++ b/sys/platform/vkernel/platform/machintr.c @@ -31,14 +31,20 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/platform/vkernel/platform/machintr.c,v 1.2 2006/12/26 20:46:15 dillon Exp $ + * $DragonFly: src/sys/platform/vkernel/platform/machintr.c,v 1.3 2007/01/05 22:18:20 dillon Exp $ */ #include +#include +#include #include #include #include +/* + * Interrupt Subsystem ABI + */ + static void dummy_intrdis(int); static void dummy_intren(int); static int dummy_vectorctl(int, int, int); @@ -89,3 +95,10 @@ dummy_finalize(void) { } +/* + * Process pending interrupts + */ +void +splz(void) +{ +} diff --git a/sys/platform/vkernel/platform/pmap.c b/sys/platform/vkernel/platform/pmap.c index 39c0579a09..919be81c09 100644 --- a/sys/platform/vkernel/platform/pmap.c +++ b/sys/platform/vkernel/platform/pmap.c @@ -38,7 +38,7 @@ * * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 * $FreeBSD: src/sys/i386/i386/pmap.c,v 1.250.2.18 2002/03/06 22:48:53 silby Exp $ - * $DragonFly: src/sys/platform/vkernel/platform/pmap.c,v 1.1 2007/01/02 04:24:26 dillon Exp $ + * $DragonFly: src/sys/platform/vkernel/platform/pmap.c,v 1.2 2007/01/05 22:18:20 dillon Exp $ */ #include @@ -409,7 +409,7 @@ pmap_pte(struct pmap *pmap, vm_offset_t va) { vpte_t *ptep; - ptep = pmap->pm_pdir[va >> PAGE_SHIFT]; + ptep = &pmap->pm_pdir[va >> PAGE_SHIFT]; if (*ptep & VPTE_PS) return(ptep); if (*ptep) @@ -450,6 +450,46 @@ pmap_kenter(vm_offset_t va, vm_paddr_t pa) } } +void +pmap_kenter_sync(vm_offset_t va) +{ + pmap_inval_info info; + + pmap_inval_init(&info); + pmap_inval_add(&info, &kernel_pmap, va); + pmap_inval_flush(&info); +} + +void +pmap_kenter_sync_quick(vm_offset_t va) +{ + madvise((void *)va, PAGE_SIZE, MADV_INVAL); +} + +/* + * Map a contiguous range of physical memory to a KVM + */ +vm_offset_t +pmap_map(vm_offset_t virt, vm_paddr_t start, vm_paddr_t end, int prot) +{ + while (start < end) { + pmap_kenter(virt, start); + virt += PAGE_SIZE; + start += PAGE_SIZE; + } + return (virt); +} + +vpte_t * +pmap_kpte(vm_offset_t va) +{ + vpte_t *ptep; + + KKASSERT(va >= KvaStart && va < KvaEnd); + ptep = KernelPTA + ((va - KvaStart) >> PAGE_SHIFT); + return(ptep); +} + /* * Enter a mapping into kernel_pmap without any SMP interactions. * @@ -1148,7 +1188,7 @@ pmap_remove_pte(struct pmap *pmap, vpte_t *ptq, vm_offset_t va, * the SMP case. */ if (oldpte & VPTE_G) - cpu_invlpg((void *)va); + madvise((void *)va, PAGE_SIZE, MADV_INVAL); pmap->pm_stats.resident_count -= 1; if (oldpte & PG_MANAGED) { m = PHYS_TO_VM_PAGE(oldpte); @@ -1711,6 +1751,25 @@ retry: return mpte; } +vm_paddr_t +pmap_extract(pmap_t pmap, vm_offset_t va) +{ + vm_paddr_t rtval; + vpte_t pte; + + if (pmap && (pte = pmap->pm_pdir[va >> SEG_SHIFT]) != 0) { + if (pte & VPTE_PS) { + rtval = pte & ~((vpte_t)(1 << SEG_SHIFT) - 1); + rtval |= va & SEG_MASK; + } else { + pte = *(get_ptbase(pmap) + (va >> PAGE_SHIFT)); + rtval = (pte & VPTE_FRAME) | (va & PAGE_MASK); + } + return(rtval); + } + return(0); +} + #define MAX_INIT_PT (96) /* @@ -2108,7 +2167,7 @@ pmap_zero_page(vm_paddr_t phys) panic("pmap_zero_page: CMAP3 busy"); *(int *)gd->gd_CMAP3 = VPTE_V | VPTE_W | (phys & VPTE_FRAME) | VPTE_A | VPTE_M; - cpu_invlpg(gd->gd_CADDR3); + madvise(gd->gd_CADDR3, PAGE_SIZE, MADV_INVAL); bzero(gd->gd_CADDR3, PAGE_SIZE); *(int *) gd->gd_CMAP3 = 0; @@ -2131,7 +2190,7 @@ pmap_page_assertzero(vm_paddr_t phys) panic("pmap_zero_page: CMAP3 busy"); *(int *)gd->gd_CMAP3 = VPTE_V | VPTE_R | VPTE_W | (phys & VPTE_FRAME) | VPTE_A | VPTE_M; - cpu_invlpg(gd->gd_CADDR3); + madvise(gd->gd_CADDR3, PAGE_SIZE, MADV_INVAL); for (i = 0; i < PAGE_SIZE; i += 4) { if (*(int *)((char *)gd->gd_CADDR3 + i) != 0) { panic("pmap_page_assertzero() @ %p not zero!\n", @@ -2159,7 +2218,7 @@ pmap_zero_page_area(vm_paddr_t phys, int off, int size) if (*(int *) gd->gd_CMAP3) panic("pmap_zero_page: CMAP3 busy"); *(int *) gd->gd_CMAP3 = VPTE_V | VPTE_R | VPTE_W | (phys & VPTE_FRAME) | VPTE_A | VPTE_M; - cpu_invlpg(gd->gd_CADDR3); + madvise(gd->gd_CADDR3, PAGE_SIZE, MADV_INVAL); bzero((char *)gd->gd_CADDR3 + off, size); *(int *) gd->gd_CMAP3 = 0; @@ -2187,8 +2246,8 @@ pmap_copy_page(vm_paddr_t src, vm_paddr_t dst) *(int *) gd->gd_CMAP1 = VPTE_V | (src & PG_FRAME) | PG_A; *(int *) gd->gd_CMAP2 = VPTE_V | VPTE_R | VPTE_W | (dst & VPTE_FRAME) | VPTE_A | VPTE_M; - cpu_invlpg(gd->gd_CADDR1); - cpu_invlpg(gd->gd_CADDR2); + madvise(gd->gd_CADDR1, PAGE_SIZE, MADV_INVAL); + madvise(gd->gd_CADDR2, PAGE_SIZE, MADV_INVAL); bcopy(gd->gd_CADDR1, gd->gd_CADDR2, PAGE_SIZE); @@ -2218,8 +2277,8 @@ pmap_copy_page_frag(vm_paddr_t src, vm_paddr_t dst, size_t bytes) *(int *) gd->gd_CMAP1 = VPTE_V | (src & VPTE_FRAME) | VPTE_A; *(int *) gd->gd_CMAP2 = VPTE_V | VPTE_R | VPTE_W | (dst & VPTE_FRAME) | VPTE_A | VPTE_M; - cpu_invlpg(gd->gd_CADDR1); - cpu_invlpg(gd->gd_CADDR2); + madvise(gd->gd_CADDR1, PAGE_SIZE, MADV_INVAL); + madvise(gd->gd_CADDR2, PAGE_SIZE, MADV_INVAL); bcopy((char *)gd->gd_CADDR1 + (src & PAGE_MASK), (char *)gd->gd_CADDR2 + (dst & PAGE_MASK), @@ -2709,8 +2768,11 @@ pmap_activate(struct proc *p) #if defined(SWTCH_OPTIM_STATS) tlb_flush_count++; #endif + panic("pmap_activate"); /* XXX store vmspace id in context */ +#if 0 p->p_thread->td_pcb->pcb_cr3 = vtophys(pmap->pm_pdir); load_cr3(p->p_thread->td_pcb->pcb_cr3); +#endif } void diff --git a/sys/platform/vkernel/platform/pmap_inval.c b/sys/platform/vkernel/platform/pmap_inval.c index c59011ec03..b4a5594acc 100644 --- a/sys/platform/vkernel/platform/pmap_inval.c +++ b/sys/platform/vkernel/platform/pmap_inval.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/platform/vkernel/platform/pmap_inval.c,v 1.1 2007/01/02 04:24:26 dillon Exp $ + * $DragonFly: src/sys/platform/vkernel/platform/pmap_inval.c,v 1.2 2007/01/05 22:18:20 dillon Exp $ */ /* @@ -52,6 +52,8 @@ #include #include +#include + #include #include #include @@ -64,22 +66,19 @@ #include #include -#ifdef SMP - static void _cpu_invltlb(void *dummy) { - cpu_invltlb(); + /* XXX madvise over entire address space is really expensive */ + madvise((void *)KvaStart, KvaSize, MADV_INVAL); } static void _cpu_invl1pg(void *data) { - cpu_invlpg(data); + madvise(data, PAGE_SIZE, MADV_INVAL); } -#endif - /* * Initialize for add or flush */ @@ -142,9 +141,9 @@ pmap_inval_flush(pmap_inval_info_t info) lwkt_cpusync_finish(&info->pir_cpusync); #else if (info->pir_flags & PIRF_INVLTLB) - cpu_invltlb(); + _cpu_invltlb(NULL); else if (info->pir_flags & PIRF_INVL1PG) - cpu_invlpg(info->pir_cpusync.cs_data); + _cpu_invl1pg(info->pir_cpusync.cs_data); #endif info->pir_flags = 0; } diff --git a/sys/platform/vkernel/include/md_var.h b/sys/platform/vkernel/platform/sysarch.c similarity index 78% copy from sys/platform/vkernel/include/md_var.h copy to sys/platform/vkernel/platform/sysarch.c index 628dac29f2..0ddcaf9d54 100644 --- a/sys/platform/vkernel/include/md_var.h +++ b/sys/platform/vkernel/platform/sysarch.c @@ -31,29 +31,30 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/platform/vkernel/include/md_var.h,v 1.2 2007/01/02 04:24:26 dillon Exp $ + * $DragonFly: src/sys/platform/vkernel/platform/sysarch.c,v 1.1 2007/01/05 22:18:20 dillon Exp $ */ - -#ifndef _MACHINE_MD_VAR_H_ -#define _MACHINE_MD_VAR_H_ - -#ifndef _SYS_TYPES_H_ #include -#endif -#ifndef _SYS_VKERNEL_H_ -#include -#endif - -extern char sigcode[]; -extern int szsigcode; -extern vpte_t *KernelPTA; -extern vpte_t *KernelPTD; -extern vm_offset_t crashdumpmap; +#include +#include +#include +#include +#include -struct mdglobaldata; +int +sys_sysarch(struct sysarch_args *uap) +{ + return (EOPNOTSUPP); +} -void cpu_gdinit (struct mdglobaldata *gd, int cpu); -void cpu_idle_restore (void); +int +cpu_set_iopl(void) +{ + return (EOPNOTSUPP); +} -#endif +int +cpu_clr_iopl(void) +{ + return (EOPNOTSUPP); +} diff --git a/sys/platform/vkernel/platform/machintr.c b/sys/platform/vkernel/platform/systimer.c similarity index 66% copy from sys/platform/vkernel/platform/machintr.c copy to sys/platform/vkernel/platform/systimer.c index 84fe6aff25..3d649c812c 100644 --- a/sys/platform/vkernel/platform/machintr.c +++ b/sys/platform/vkernel/platform/systimer.c @@ -31,61 +31,66 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/platform/vkernel/platform/machintr.c,v 1.2 2006/12/26 20:46:15 dillon Exp $ + * $DragonFly: src/sys/platform/vkernel/platform/systimer.c,v 1.1 2007/01/05 22:18:20 dillon Exp $ */ #include -#include -#include -#include +#include +#include +#include +#include +#include -static void dummy_intrdis(int); -static void dummy_intren(int); -static int dummy_vectorctl(int, int, int); -static int dummy_setvar(int, const void *); -static int dummy_getvar(int, void *); -static void dummy_finalize(void); +#include -struct machintr_abi MachIntrABI = { - MACHINTR_GENERIC, - dummy_intrdis, - dummy_intren, - dummy_vectorctl, - dummy_setvar, - dummy_getvar, - dummy_finalize -}; +int disable_rtc_set; +SYSCTL_INT(_machdep, CPU_DISRTCSET, disable_rtc_set, + CTLFLAG_RW, &disable_rtc_set, 0, ""); -static void -dummy_intrdis(int intr) +int adjkerntz; +int wall_cmos_clock = 1; + +void +cpu_initclocks(void) { + panic("cpu_initclocks"); } -static void -dummy_intren(int intr) +void +cputimer_intr_config(struct cputimer *timer) { + panic("cputimer_intr_config"); } -static int -dummy_vectorctl(int op, int intr, int flags) +void +cputimer_intr_reload(sysclock_t reload) { - return (EOPNOTSUPP); + panic("cputimer_intr_reload"); } -static int -dummy_setvar(int varid, const void *buf) +/* + * Initialize the time of day register, based on the time base which is, e.g. + * from a filesystem. + */ +void +inittodr(time_t base) { - return (ENOENT); + panic("inittodr"); } -static int -dummy_getvar(int varid, void *buf) +/* + * Write system time back to the RTC + */ +void +resettodr(void) { - return (ENOENT); + panic("resettodr"); } -static void -dummy_finalize(void) +void +DELAY(int usec) { + usleep(usec); } + -- 2.41.0