2 * Copyright (c) 1997 Jonathan Lemon
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * modification, are permitted provided that the following conditions
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * $FreeBSD: src/sys/i386/i386/vm86.c,v 1.31.2.2 2001/10/05 06:18:55 peter Exp $
28 * $DragonFly: src/sys/platform/pc32/i386/vm86.c,v 1.26 2008/08/02 01:14:43 dillon Exp $
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/kernel.h>
37 #include <sys/malloc.h>
38 #include <sys/sysctl.h>
42 #include <vm/vm_map.h>
43 #include <vm/vm_page.h>
47 #include <sys/thread2.h>
48 #include <sys/mplock2.h>
50 #include <machine/md_var.h>
51 #include <machine/pcb_ext.h> /* pcb.h included via sys/user.h */
52 #include <machine/psl.h>
53 #include <machine/specialreg.h>
54 #include <machine/sysarch.h>
55 #include <machine/clock.h>
56 #include <bus/isa/isa.h>
57 #include <bus/isa/rtc.h>
58 #include <machine_base/isa/timerreg.h>
60 extern int i386_extend_pcb (struct lwp *);
62 extern struct pcb *vm86pcb;
64 extern int vm86_bioscall(struct vm86frame *);
65 extern void vm86_biosret(struct vm86frame *);
67 #define PGTABLE_SIZE ((1024 + 64) * 1024 / PAGE_SIZE)
68 #define INTMAP_SIZE 32
69 #define IOMAP_SIZE ctob(IOPAGES)
71 (sizeof(struct pcb_ext) - sizeof(struct segment_descriptor) + \
72 INTMAP_SIZE + IOMAP_SIZE + 1)
75 pt_entry_t vml_pgtbl[PGTABLE_SIZE];
77 struct pcb_ext vml_ext;
78 char vml_intmap[INTMAP_SIZE];
79 char vml_iomap[IOMAP_SIZE];
80 char vml_iomap_trailer;
83 void vm86_prepcall(struct vm86frame *);
107 #define OPERAND_SIZE_PREFIX 0x66
108 #define ADDRESS_SIZE_PREFIX 0x67
109 #define PUSH_MASK ~(PSL_VM | PSL_RF | PSL_I)
110 #define POP_MASK ~(PSL_VIP | PSL_VIF | PSL_VM | PSL_RF | PSL_IOPL)
112 static void vm86_setup_timer_fault(void);
113 static void vm86_clear_timer_fault(void);
115 static int vm86_blew_up_timer;
117 static int timer_warn = 1;
118 SYSCTL_INT(_debug, OID_AUTO, timer_warn, CTLFLAG_RW, &timer_warn, 0,
119 "Warn if BIOS has played with the 8254 timer");
121 static __inline caddr_t
122 MAKE_ADDR(u_short sel, u_short off)
124 return ((caddr_t)((sel << 4) + off));
128 GET_VEC(u_int vec, u_short *sel, u_short *off)
134 static __inline u_int
135 MAKE_VEC(u_short sel, u_short off)
137 return ((sel << 16) | off);
141 PUSH(u_short x, struct vm86frame *vmf)
144 susword(MAKE_ADDR(vmf->vmf_ss, vmf->vmf_sp), x);
148 PUSHL(u_int x, struct vm86frame *vmf)
151 suword(MAKE_ADDR(vmf->vmf_ss, vmf->vmf_sp), x);
154 static __inline u_short
155 POP(struct vm86frame *vmf)
157 u_short x = fusword(MAKE_ADDR(vmf->vmf_ss, vmf->vmf_sp));
163 static __inline u_int
164 POPL(struct vm86frame *vmf)
166 u_int x = fuword(MAKE_ADDR(vmf->vmf_ss, vmf->vmf_sp));
176 vm86_emulate(struct vm86frame *vmf)
178 struct vm86_kernel *vm86;
186 * pcb_ext contains the address of the extension area, or zero if
187 * the extension is not present. (This check should not be needed,
188 * as we can't enter vm86 mode until we set up an extension area)
190 if (curthread->td_pcb->pcb_ext == 0)
192 vm86 = &curthread->td_pcb->pcb_ext->ext_vm86;
194 if (vmf->vmf_eflags & PSL_T)
198 * Instruction emulation
200 addr = MAKE_ADDR(vmf->vmf_cs, vmf->vmf_ip);
201 i_byte = fubyte(addr);
202 if (i_byte == ADDRESS_SIZE_PREFIX) {
203 i_byte = fubyte(++addr);
208 * I/O emulation (TIMER only, a big hack). Just reenable the
209 * IO bits involved, flag it, and retry the instruction.
216 vm86_blew_up_timer = 1;
222 vm86_clear_timer_fault();
227 if (vm86->vm86_has_vme) {
229 case OPERAND_SIZE_PREFIX:
230 i_byte = fubyte(++addr);
234 if (vmf->vmf_eflags & PSL_VIF)
235 PUSHL((vmf->vmf_eflags & PUSH_MASK)
236 | PSL_IOPL | PSL_I, vmf);
238 PUSHL((vmf->vmf_eflags & PUSH_MASK)
240 vmf->vmf_ip += inc_ip;
244 temp_flags = POPL(vmf) & POP_MASK;
245 vmf->vmf_eflags = (vmf->vmf_eflags & ~POP_MASK)
246 | temp_flags | PSL_VM | PSL_I;
247 vmf->vmf_ip += inc_ip;
248 if (temp_flags & PSL_I) {
249 vmf->vmf_eflags |= PSL_VIF;
250 if (vmf->vmf_eflags & PSL_VIP)
253 vmf->vmf_eflags &= ~PSL_VIF;
259 /* VME faults here if VIP is set, but does not set VIF. */
261 vmf->vmf_eflags |= PSL_VIF;
262 vmf->vmf_ip += inc_ip;
263 if ((vmf->vmf_eflags & PSL_VIP) == 0) {
264 uprintf("fatal sti\n");
269 /* VME if no redirection support */
273 /* VME if trying to set PSL_TF, or PSL_I when VIP is set */
275 temp_flags = POP(vmf) & POP_MASK;
276 vmf->vmf_flags = (vmf->vmf_flags & ~POP_MASK)
277 | temp_flags | PSL_VM | PSL_I;
278 vmf->vmf_ip += inc_ip;
279 if (temp_flags & PSL_I) {
280 vmf->vmf_eflags |= PSL_VIF;
281 if (vmf->vmf_eflags & PSL_VIP)
284 vmf->vmf_eflags &= ~PSL_VIF;
288 /* VME if trying to set PSL_TF, or PSL_I when VIP is set */
290 vmf->vmf_ip = POP(vmf);
291 vmf->vmf_cs = POP(vmf);
292 temp_flags = POP(vmf) & POP_MASK;
293 vmf->vmf_flags = (vmf->vmf_flags & ~POP_MASK)
294 | temp_flags | PSL_VM | PSL_I;
295 if (temp_flags & PSL_I) {
296 vmf->vmf_eflags |= PSL_VIF;
297 if (vmf->vmf_eflags & PSL_VIP)
300 vmf->vmf_eflags &= ~PSL_VIF;
309 case OPERAND_SIZE_PREFIX:
310 i_byte = fubyte(++addr);
314 if (vm86->vm86_eflags & PSL_VIF)
315 PUSHL((vmf->vmf_flags & PUSH_MASK)
316 | PSL_IOPL | PSL_I, vmf);
318 PUSHL((vmf->vmf_flags & PUSH_MASK)
320 vmf->vmf_ip += inc_ip;
324 temp_flags = POPL(vmf) & POP_MASK;
325 vmf->vmf_eflags = (vmf->vmf_eflags & ~POP_MASK)
326 | temp_flags | PSL_VM | PSL_I;
327 vmf->vmf_ip += inc_ip;
328 if (temp_flags & PSL_I) {
329 vm86->vm86_eflags |= PSL_VIF;
330 if (vm86->vm86_eflags & PSL_VIP)
333 vm86->vm86_eflags &= ~PSL_VIF;
340 vm86->vm86_eflags &= ~PSL_VIF;
341 vmf->vmf_ip += inc_ip;
345 /* if there is a pending interrupt, go to the emulator */
346 vm86->vm86_eflags |= PSL_VIF;
347 vmf->vmf_ip += inc_ip;
348 if (vm86->vm86_eflags & PSL_VIP)
353 if (vm86->vm86_eflags & PSL_VIF)
354 PUSH((vmf->vmf_flags & PUSH_MASK)
355 | PSL_IOPL | PSL_I, vmf);
357 PUSH((vmf->vmf_flags & PUSH_MASK) | PSL_IOPL, vmf);
358 vmf->vmf_ip += inc_ip;
362 i_byte = fubyte(addr + 1);
363 if ((vm86->vm86_intmap[i_byte >> 3] & (1 << (i_byte & 7))) != 0)
365 if (vm86->vm86_eflags & PSL_VIF)
366 PUSH((vmf->vmf_flags & PUSH_MASK)
367 | PSL_IOPL | PSL_I, vmf);
369 PUSH((vmf->vmf_flags & PUSH_MASK) | PSL_IOPL, vmf);
370 PUSH(vmf->vmf_cs, vmf);
371 PUSH(vmf->vmf_ip + inc_ip + 1, vmf); /* increment IP */
372 GET_VEC(fuword((caddr_t)(i_byte * 4)),
373 &vmf->vmf_cs, &vmf->vmf_ip);
374 vmf->vmf_flags &= ~PSL_T;
375 vm86->vm86_eflags &= ~PSL_VIF;
379 vmf->vmf_ip = POP(vmf);
380 vmf->vmf_cs = POP(vmf);
381 temp_flags = POP(vmf) & POP_MASK;
382 vmf->vmf_flags = (vmf->vmf_flags & ~POP_MASK)
383 | temp_flags | PSL_VM | PSL_I;
384 if (temp_flags & PSL_I) {
385 vm86->vm86_eflags |= PSL_VIF;
386 if (vm86->vm86_eflags & PSL_VIP)
389 vm86->vm86_eflags &= ~PSL_VIF;
394 temp_flags = POP(vmf) & POP_MASK;
395 vmf->vmf_flags = (vmf->vmf_flags & ~POP_MASK)
396 | temp_flags | PSL_VM | PSL_I;
397 vmf->vmf_ip += inc_ip;
398 if (temp_flags & PSL_I) {
399 vm86->vm86_eflags |= PSL_VIF;
400 if (vm86->vm86_eflags & PSL_VIP)
403 vm86->vm86_eflags &= ~PSL_VIF;
411 vm86_initialize(void)
415 struct vm86_layout *vml = (struct vm86_layout *)vm86paddr;
418 struct soft_segment_descriptor ssd = {
419 0, /* segment base address (overwritten) */
420 0, /* length (overwritten) */
421 SDT_SYS386TSS, /* segment type */
422 0, /* priority level */
423 1, /* descriptor present */
425 0, /* default 16 size */
430 * this should be a compile time error, but cpp doesn't grok sizeof().
432 if (sizeof(struct vm86_layout) > ctob(3))
433 panic("struct vm86_layout exceeds space allocated in locore.s");
436 * Below is the memory layout that we use for the vm86 region.
444 * +--------+ +--------+ <--------- vm86paddr
445 * | | |Page Tbl| 1M + 64K = 272 entries = 1088 bytes
447 * | | | PCB | size: ~240 bytes
448 * | page 1 | |PCB Ext | size: ~140 bytes (includes TSS)
454 * +--------+ | bitmap |
461 * A rudimentary PCB must be installed, in order to get to the
462 * PCB extension area. We use the PCB area as a scratchpad for
463 * data storage, the layout of which is shown below.
465 * pcb_esi = new PTD entry 0
466 * pcb_ebp = pointer to frame on vm86 stack
467 * pcb_esp = stack frame pointer at time of switch
468 * pcb_ebx = va of vm86 page table
469 * pcb_eip = argument pointer to initial call
470 * pcb_spare[0] = saved TSS descriptor, word 0
471 * pcb_space[1] = saved TSS descriptor, word 1
473 #define new_ptd pcb_esi
474 #define vm86_frame pcb_ebp
475 #define pgtable_va pcb_ebx
480 bzero(pcb, sizeof(struct pcb));
481 pcb->new_ptd = vm86pa | PG_V | PG_RW | PG_U;
482 pcb->vm86_frame = (pt_entry_t)vm86paddr - sizeof(struct vm86frame);
483 pcb->pgtable_va = (vm_offset_t)vm86paddr;
486 bzero(ext, sizeof(struct pcb_ext));
487 ext->ext_tss.tss_esp0 = (vm_offset_t)vm86paddr;
488 ext->ext_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
489 ext->ext_tss.tss_ioopt =
490 ((u_int)vml->vml_iomap - (u_int)&ext->ext_tss) << 16;
491 ext->ext_iomap = vml->vml_iomap;
492 ext->ext_vm86.vm86_intmap = vml->vml_intmap;
494 if (cpu_feature & CPUID_VME)
495 ext->ext_vm86.vm86_has_vme = (rcr4() & CR4_VME ? 1 : 0);
497 addr = (u_int *)ext->ext_vm86.vm86_intmap;
498 for (i = 0; i < (INTMAP_SIZE + IOMAP_SIZE) / sizeof(u_int); i++)
500 vml->vml_iomap_trailer = 0xff;
502 ssd.ssd_base = (u_int)&ext->ext_tss;
503 ssd.ssd_limit = TSS_SIZE - 1;
504 ssdtosd(&ssd, &ext->ext_tssd);
510 * use whatever is leftover of the vm86 page layout as a
511 * message buffer so we can capture early output.
513 msgbufinit((vm_offset_t)vm86paddr + sizeof(struct vm86_layout),
514 ctob(3) - sizeof(struct vm86_layout));
519 vm86_getpage(struct vm86context *vmc, int pagenum)
523 for (i = 0; i < vmc->npages; i++)
524 if (vmc->pmap[i].pte_num == pagenum)
525 return (vmc->pmap[i].kva);
530 vm86_addpage(struct vm86context *vmc, int pagenum, vm_offset_t kva)
534 for (i = 0; i < vmc->npages; i++)
535 if (vmc->pmap[i].pte_num == pagenum)
538 if (vmc->npages == VM86_PMAPSIZE)
539 goto bad; /* XXX grow map? */
542 kva = (vm_offset_t)kmalloc(PAGE_SIZE, M_TEMP, M_WAITOK);
547 vmc->pmap[i].flags = flags;
548 vmc->pmap[i].kva = kva;
549 vmc->pmap[i].pte_num = pagenum;
552 panic("vm86_addpage: not enough room, or overlap");
556 vm86_initflags(struct vm86frame *vmf)
558 int eflags = vmf->vmf_eflags;
559 struct vm86_kernel *vm86 = &curthread->td_pcb->pcb_ext->ext_vm86;
561 if (vm86->vm86_has_vme) {
562 eflags = (vmf->vmf_eflags & ~VME_USERCHANGE) |
563 (eflags & VME_USERCHANGE) | PSL_VM;
565 vm86->vm86_eflags = eflags; /* save VIF, VIP */
566 eflags = (vmf->vmf_eflags & ~VM_USERCHANGE) |
567 (eflags & VM_USERCHANGE) | PSL_VM;
569 vmf->vmf_eflags = eflags | PSL_VM;
573 * called from vm86_bioscall, while in vm86 address space, to finalize setup.
576 vm86_prepcall(struct vm86frame *vmf)
578 uintptr_t addr[] = { 0xA00, 0x1000 }; /* code, stack */
580 CLI, INTn, 0x00, STI, HLT
583 if ((vmf->vmf_trapno & PAGE_MASK) <= 0xff) {
584 /* interrupt call requested */
585 intcall[2] = (u_char)(vmf->vmf_trapno & 0xff);
586 memcpy((void *)addr[0], (void *)intcall, sizeof(intcall));
587 vmf->vmf_ip = addr[0];
590 vmf->vmf_sp = addr[1] - 2; /* keep aligned */
591 vmf->kernel_fs = vmf->kernel_es = vmf->kernel_ds = vmf->kernel_gs = 0;
593 vmf->vmf_eflags = PSL_VIF | PSL_VM | PSL_USER;
598 * vm86 trap handler; determines whether routine succeeded or not.
599 * Called while in vm86 space, returns to calling process.
601 * A MP lock ref is held on entry from trap() and must be released prior
602 * to returning to the VM86 call.
605 vm86_trap(struct vm86frame *vmf, int have_mplock)
609 /* "should not happen" */
610 if ((vmf->vmf_eflags & PSL_VM) == 0)
611 panic("vm86_trap called, but not in vm86 mode");
613 addr = MAKE_ADDR(vmf->vmf_cs, vmf->vmf_ip);
614 if (*(u_char *)addr == HLT)
615 vmf->vmf_trapno = vmf->vmf_eflags & PSL_C;
617 vmf->vmf_trapno = vmf->vmf_trapno << 16;
625 vm86_intcall(int intnum, struct vm86frame *vmf)
629 if (intnum < 0 || intnum > 0xff)
633 ASSERT_MP_LOCK_HELD();
635 vm86_setup_timer_fault();
636 vmf->vmf_trapno = intnum;
637 error = vm86_bioscall(vmf);
640 * Yes, this happens, especially with video BIOS calls. The BIOS
641 * will sometimes eat timer 2 for lunch, and we need timer 2.
643 if (vm86_blew_up_timer) {
644 vm86_blew_up_timer = 0;
647 kprintf("Warning: BIOS played with the 8254, "
656 * struct vm86context contains the page table to use when making
657 * vm86 calls. If intnum is a valid interrupt number (0-255), then
658 * the "interrupt trampoline" will be used, otherwise we use the
659 * caller's cs:ip routine.
662 vm86_datacall(int intnum, struct vm86frame *vmf, struct vm86context *vmc)
664 pt_entry_t *pte = vm86paddr;
666 int i, entry, retval;
669 ASSERT_MP_LOCK_HELD();
671 for (i = 0; i < vmc->npages; i++) {
672 page = vtophys(vmc->pmap[i].kva & PG_FRAME);
673 entry = vmc->pmap[i].pte_num;
674 vmc->pmap[i].old_pte = pte[entry];
675 pte[entry] = page | PG_V | PG_RW | PG_U;
678 vmf->vmf_trapno = intnum;
679 retval = vm86_bioscall(vmf);
681 for (i = 0; i < vmc->npages; i++) {
682 entry = vmc->pmap[i].pte_num;
683 pte[entry] = vmc->pmap[i].old_pte;
690 vm86_getaddr(struct vm86context *vmc, u_short sel, u_short off)
695 addr = (vm_offset_t)MAKE_ADDR(sel, off);
696 page = addr >> PAGE_SHIFT;
697 for (i = 0; i < vmc->npages; i++)
698 if (page == vmc->pmap[i].pte_num)
699 return (vmc->pmap[i].kva + (addr & PAGE_MASK));
704 vm86_getptr(struct vm86context *vmc, vm_offset_t kva, u_short *sel,
709 for (i = 0; i < vmc->npages; i++)
710 if (kva >= vmc->pmap[i].kva &&
711 kva < vmc->pmap[i].kva + PAGE_SIZE) {
712 *off = kva - vmc->pmap[i].kva;
713 *sel = vmc->pmap[i].pte_num << 8;
717 panic("vm86_getptr: address not found");
721 vm86_sysarch(struct lwp *lp, char *args)
724 struct i386_vm86_args ua;
725 struct vm86_kernel *vm86;
727 if ((error = copyin(args, &ua, sizeof(struct i386_vm86_args))) != 0)
730 if (lp->lwp_thread->td_pcb->pcb_ext == 0)
731 if ((error = i386_extend_pcb(lp)) != 0)
733 vm86 = &lp->lwp_thread->td_pcb->pcb_ext->ext_vm86;
737 struct vm86_init_args sa;
739 if ((error = copyin(ua.sub_args, &sa, sizeof(sa))) != 0)
741 if (cpu_feature & CPUID_VME)
742 vm86->vm86_has_vme = (rcr4() & CR4_VME ? 1 : 0);
744 vm86->vm86_has_vme = 0;
745 vm86->vm86_inited = 1;
746 vm86->vm86_debug = sa.debug;
747 bcopy(&sa.int_map, vm86->vm86_intmap, 32);
753 struct vm86_vme_args sa;
755 if ((cpu_feature & CPUID_VME) == 0)
758 if (error = copyin(ua.sub_args, &sa, sizeof(sa)))
761 load_cr4(rcr4() | CR4_VME);
763 load_cr4(rcr4() & ~CR4_VME);
769 struct vm86_vme_args sa;
771 sa.state = (rcr4() & CR4_VME ? 1 : 0);
772 error = copyout(&sa, ua.sub_args, sizeof(sa));
777 struct vm86_intcall_args sa;
779 if ((error = priv_check_cred(lp->lwp_proc->p_ucred, PRIV_ROOT, 0)))
781 if ((error = copyin(ua.sub_args, &sa, sizeof(sa))))
783 if ((error = vm86_intcall(sa.intnum, &sa.vmf)))
785 error = copyout(&sa, ua.sub_args, sizeof(sa));
796 * Setup the VM86 I/O map to take faults on the timer
799 vm86_setup_timer_fault(void)
801 struct vm86_layout *vml = (struct vm86_layout *)vm86paddr;
803 vml->vml_iomap[TIMER_MODE >> 3] |= 1 << (TIMER_MODE & 7);
804 vml->vml_iomap[TIMER_CNTR0 >> 3] |= 1 << (TIMER_CNTR0 & 7);
805 vml->vml_iomap[TIMER_CNTR1 >> 3] |= 1 << (TIMER_CNTR1 & 7);
806 vml->vml_iomap[TIMER_CNTR2 >> 3] |= 1 << (TIMER_CNTR2 & 7);
810 * Setup the VM86 I/O map to not fault on the timer
813 vm86_clear_timer_fault(void)
815 struct vm86_layout *vml = (struct vm86_layout *)vm86paddr;
817 vml->vml_iomap[TIMER_MODE >> 3] &= ~(1 << (TIMER_MODE & 7));
818 vml->vml_iomap[TIMER_CNTR0 >> 3] &= ~(1 << (TIMER_CNTR0 & 7));
819 vml->vml_iomap[TIMER_CNTR1 >> 3] &= ~(1 << (TIMER_CNTR1 & 7));
820 vml->vml_iomap[TIMER_CNTR2 >> 3] &= ~(1 << (TIMER_CNTR2 & 7));