Merge branch 'vendor/AWK'
[dragonfly.git] / sys / platform / pc32 / i386 / vm86.c
1 /*-
2  * Copyright (c) 1997 Jonathan Lemon
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  * modification, are permitted provided that the following conditions
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  *
27  * $FreeBSD: src/sys/i386/i386/vm86.c,v 1.31.2.2 2001/10/05 06:18:55 peter Exp $
28  * $DragonFly: src/sys/platform/pc32/i386/vm86.c,v 1.26 2008/08/02 01:14:43 dillon Exp $
29  */
30
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/kernel.h>
34 #include <sys/proc.h>
35 #include <sys/priv.h>
36 #include <sys/lock.h>
37 #include <sys/malloc.h>
38 #include <sys/sysctl.h>
39
40 #include <vm/vm.h>
41 #include <vm/pmap.h>
42 #include <vm/vm_map.h>
43 #include <vm/vm_page.h>
44
45 #include <sys/user.h>
46
47 #include <sys/thread2.h>
48 #include <sys/mplock2.h>
49
50 #include <machine/md_var.h>
51 #include <machine/pcb_ext.h>    /* pcb.h included via sys/user.h */
52 #include <machine/psl.h>
53 #include <machine/specialreg.h>
54 #include <machine/sysarch.h>
55 #include <machine/clock.h>
56 #include <bus/isa/isa.h>
57 #include <bus/isa/rtc.h>
58 #include <machine_base/isa/timerreg.h>
59
60 extern int i386_extend_pcb      (struct lwp *);
61 extern int vm86pa;
62 extern struct pcb *vm86pcb;
63
64 extern int vm86_bioscall(struct vm86frame *);
65 extern void vm86_biosret(struct vm86frame *);
66
67 #define PGTABLE_SIZE    ((1024 + 64) * 1024 / PAGE_SIZE)
68 #define INTMAP_SIZE     32
69 #define IOMAP_SIZE      ctob(IOPAGES)
70 #define TSS_SIZE \
71         (sizeof(struct pcb_ext) - sizeof(struct segment_descriptor) + \
72          INTMAP_SIZE + IOMAP_SIZE + 1)
73
74 struct vm86_layout {
75         pt_entry_t      vml_pgtbl[PGTABLE_SIZE];
76         struct  pcb vml_pcb;
77         struct  pcb_ext vml_ext;
78         char    vml_intmap[INTMAP_SIZE];
79         char    vml_iomap[IOMAP_SIZE];
80         char    vml_iomap_trailer;
81 };
82
83 void vm86_prepcall(struct vm86frame *);
84
85 struct system_map {
86         int             type;
87         vm_offset_t     start;
88         vm_offset_t     end;
89 };
90
91 #define HLT     0xf4
92 #define CLI     0xfa
93 #define STI     0xfb
94 #define PUSHF   0x9c
95 #define POPF    0x9d
96 #define INTn    0xcd
97 #define IRET    0xcf
98 #define INB     0xe4
99 #define INW     0xe5
100 #define INBDX   0xec
101 #define INWDX   0xed
102 #define OUTB    0xe6
103 #define OUTW    0xe7
104 #define OUTBDX  0xee
105 #define OUTWDX  0xef
106 #define CALLm   0xff
107 #define OPERAND_SIZE_PREFIX     0x66
108 #define ADDRESS_SIZE_PREFIX     0x67
109 #define PUSH_MASK       ~(PSL_VM | PSL_RF | PSL_I)
110 #define POP_MASK        ~(PSL_VIP | PSL_VIF | PSL_VM | PSL_RF | PSL_IOPL)
111
112 static void vm86_setup_timer_fault(void);
113 static void vm86_clear_timer_fault(void);
114
115 static int vm86_blew_up_timer;
116
117 static int timer_warn = 1;
118 SYSCTL_INT(_debug, OID_AUTO, timer_warn, CTLFLAG_RW, &timer_warn, 0,
119     "Warn if BIOS has played with the 8254 timer");
120
121 static __inline caddr_t
122 MAKE_ADDR(u_short sel, u_short off)
123 {
124         return ((caddr_t)((sel << 4) + off));
125 }
126
127 static __inline void
128 GET_VEC(u_int vec, u_short *sel, u_short *off)
129 {
130         *sel = vec >> 16;
131         *off = vec & 0xffff;
132 }
133
134 static __inline u_int
135 MAKE_VEC(u_short sel, u_short off)
136 {
137         return ((sel << 16) | off);
138 }
139
140 static __inline void
141 PUSH(u_short x, struct vm86frame *vmf)
142 {
143         vmf->vmf_sp -= 2;
144         susword(MAKE_ADDR(vmf->vmf_ss, vmf->vmf_sp), x);
145 }
146
147 static __inline void
148 PUSHL(u_int x, struct vm86frame *vmf)
149 {
150         vmf->vmf_sp -= 4;
151         suword(MAKE_ADDR(vmf->vmf_ss, vmf->vmf_sp), x);
152 }
153
154 static __inline u_short
155 POP(struct vm86frame *vmf)
156 {
157         u_short x = fusword(MAKE_ADDR(vmf->vmf_ss, vmf->vmf_sp));
158
159         vmf->vmf_sp += 2;
160         return (x);
161 }
162
163 static __inline u_int
164 POPL(struct vm86frame *vmf)
165 {
166         u_int x = fuword(MAKE_ADDR(vmf->vmf_ss, vmf->vmf_sp));
167
168         vmf->vmf_sp += 4;
169         return (x);
170 }
171
172 /*
173  * MPSAFE
174  */
175 int
176 vm86_emulate(struct vm86frame *vmf)
177 {
178         struct vm86_kernel *vm86;
179         caddr_t addr;
180         u_char i_byte;
181         u_int temp_flags;
182         int inc_ip = 1;
183         int retcode = 0;
184
185         /*
186          * pcb_ext contains the address of the extension area, or zero if
187          * the extension is not present.  (This check should not be needed,
188          * as we can't enter vm86 mode until we set up an extension area)
189          */
190         if (curthread->td_pcb->pcb_ext == 0)
191                 return (SIGBUS);
192         vm86 = &curthread->td_pcb->pcb_ext->ext_vm86;
193
194         if (vmf->vmf_eflags & PSL_T)
195                 retcode = SIGTRAP;
196
197         /*
198          * Instruction emulation
199          */
200         addr = MAKE_ADDR(vmf->vmf_cs, vmf->vmf_ip);
201         i_byte = fubyte(addr);
202         if (i_byte == ADDRESS_SIZE_PREFIX) {
203                 i_byte = fubyte(++addr);
204                 inc_ip++;
205         }
206
207         /*
208          * I/O emulation (TIMER only, a big hack).  Just reenable the
209          * IO bits involved, flag it, and retry the instruction.
210          */
211         switch(i_byte) {
212         case OUTB:
213         case OUTW:
214         case OUTBDX:
215         case OUTWDX:
216                 vm86_blew_up_timer = 1;
217                 /* fall through */
218         case INB:
219         case INW:
220         case INBDX:
221         case INWDX:
222                 vm86_clear_timer_fault();
223                 /* retry insn */
224                 return(0);
225         }
226
227         if (vm86->vm86_has_vme) {
228                 switch (i_byte) {
229                 case OPERAND_SIZE_PREFIX:
230                         i_byte = fubyte(++addr);
231                         inc_ip++;
232                         switch (i_byte) {
233                         case PUSHF:
234                                 if (vmf->vmf_eflags & PSL_VIF)
235                                         PUSHL((vmf->vmf_eflags & PUSH_MASK)
236                                             | PSL_IOPL | PSL_I, vmf);
237                                 else
238                                         PUSHL((vmf->vmf_eflags & PUSH_MASK)
239                                             | PSL_IOPL, vmf);
240                                 vmf->vmf_ip += inc_ip;
241                                 return (0);
242
243                         case POPF:
244                                 temp_flags = POPL(vmf) & POP_MASK;
245                                 vmf->vmf_eflags = (vmf->vmf_eflags & ~POP_MASK)
246                                     | temp_flags | PSL_VM | PSL_I;
247                                 vmf->vmf_ip += inc_ip;
248                                 if (temp_flags & PSL_I) {
249                                         vmf->vmf_eflags |= PSL_VIF;
250                                         if (vmf->vmf_eflags & PSL_VIP)
251                                                 break;
252                                 } else {
253                                         vmf->vmf_eflags &= ~PSL_VIF;
254                                 }
255                                 return (0);
256                         }
257                         break;
258
259                 /* VME faults here if VIP is set, but does not set VIF. */
260                 case STI:
261                         vmf->vmf_eflags |= PSL_VIF;
262                         vmf->vmf_ip += inc_ip;
263                         if ((vmf->vmf_eflags & PSL_VIP) == 0) {
264                                 uprintf("fatal sti\n");
265                                 return (SIGKILL);
266                         }
267                         break;
268
269                 /* VME if no redirection support */
270                 case INTn:
271                         break;
272
273                 /* VME if trying to set PSL_TF, or PSL_I when VIP is set */
274                 case POPF:
275                         temp_flags = POP(vmf) & POP_MASK;
276                         vmf->vmf_flags = (vmf->vmf_flags & ~POP_MASK)
277                             | temp_flags | PSL_VM | PSL_I;
278                         vmf->vmf_ip += inc_ip;
279                         if (temp_flags & PSL_I) {
280                                 vmf->vmf_eflags |= PSL_VIF;
281                                 if (vmf->vmf_eflags & PSL_VIP)
282                                         break;
283                         } else {
284                                 vmf->vmf_eflags &= ~PSL_VIF;
285                         }
286                         return (retcode);
287
288                 /* VME if trying to set PSL_TF, or PSL_I when VIP is set */
289                 case IRET:
290                         vmf->vmf_ip = POP(vmf);
291                         vmf->vmf_cs = POP(vmf);
292                         temp_flags = POP(vmf) & POP_MASK;
293                         vmf->vmf_flags = (vmf->vmf_flags & ~POP_MASK)
294                             | temp_flags | PSL_VM | PSL_I;
295                         if (temp_flags & PSL_I) {
296                                 vmf->vmf_eflags |= PSL_VIF;
297                                 if (vmf->vmf_eflags & PSL_VIP)
298                                         break;
299                         } else {
300                                 vmf->vmf_eflags &= ~PSL_VIF;
301                         }
302                         return (retcode);
303
304                 }
305                 return (SIGBUS);
306         }
307
308         switch (i_byte) {
309         case OPERAND_SIZE_PREFIX:
310                 i_byte = fubyte(++addr);
311                 inc_ip++;
312                 switch (i_byte) {
313                 case PUSHF:
314                         if (vm86->vm86_eflags & PSL_VIF)
315                                 PUSHL((vmf->vmf_flags & PUSH_MASK)
316                                     | PSL_IOPL | PSL_I, vmf);
317                         else
318                                 PUSHL((vmf->vmf_flags & PUSH_MASK)
319                                     | PSL_IOPL, vmf);
320                         vmf->vmf_ip += inc_ip;
321                         return (retcode);
322
323                 case POPF:
324                         temp_flags = POPL(vmf) & POP_MASK;
325                         vmf->vmf_eflags = (vmf->vmf_eflags & ~POP_MASK)
326                             | temp_flags | PSL_VM | PSL_I;
327                         vmf->vmf_ip += inc_ip;
328                         if (temp_flags & PSL_I) {
329                                 vm86->vm86_eflags |= PSL_VIF;
330                                 if (vm86->vm86_eflags & PSL_VIP)
331                                         break;
332                         } else {
333                                 vm86->vm86_eflags &= ~PSL_VIF;
334                         }
335                         return (retcode);
336                 }
337                 return (SIGBUS);
338
339         case CLI:
340                 vm86->vm86_eflags &= ~PSL_VIF;
341                 vmf->vmf_ip += inc_ip;
342                 return (retcode);
343
344         case STI:
345                 /* if there is a pending interrupt, go to the emulator */
346                 vm86->vm86_eflags |= PSL_VIF;
347                 vmf->vmf_ip += inc_ip;
348                 if (vm86->vm86_eflags & PSL_VIP)
349                         break;
350                 return (retcode);
351
352         case PUSHF:
353                 if (vm86->vm86_eflags & PSL_VIF)
354                         PUSH((vmf->vmf_flags & PUSH_MASK)
355                             | PSL_IOPL | PSL_I, vmf);
356                 else
357                         PUSH((vmf->vmf_flags & PUSH_MASK) | PSL_IOPL, vmf);
358                 vmf->vmf_ip += inc_ip;
359                 return (retcode);
360
361         case INTn:
362                 i_byte = fubyte(addr + 1);
363                 if ((vm86->vm86_intmap[i_byte >> 3] & (1 << (i_byte & 7))) != 0)
364                         break;
365                 if (vm86->vm86_eflags & PSL_VIF)
366                         PUSH((vmf->vmf_flags & PUSH_MASK)
367                             | PSL_IOPL | PSL_I, vmf);
368                 else
369                         PUSH((vmf->vmf_flags & PUSH_MASK) | PSL_IOPL, vmf);
370                 PUSH(vmf->vmf_cs, vmf);
371                 PUSH(vmf->vmf_ip + inc_ip + 1, vmf);    /* increment IP */
372                 GET_VEC(fuword((caddr_t)(i_byte * 4)),
373                      &vmf->vmf_cs, &vmf->vmf_ip);
374                 vmf->vmf_flags &= ~PSL_T;
375                 vm86->vm86_eflags &= ~PSL_VIF;
376                 return (retcode);
377
378         case IRET:
379                 vmf->vmf_ip = POP(vmf);
380                 vmf->vmf_cs = POP(vmf);
381                 temp_flags = POP(vmf) & POP_MASK;
382                 vmf->vmf_flags = (vmf->vmf_flags & ~POP_MASK)
383                     | temp_flags | PSL_VM | PSL_I;
384                 if (temp_flags & PSL_I) {
385                         vm86->vm86_eflags |= PSL_VIF;
386                         if (vm86->vm86_eflags & PSL_VIP)
387                                 break;
388                 } else {
389                         vm86->vm86_eflags &= ~PSL_VIF;
390                 }
391                 return (retcode);
392
393         case POPF:
394                 temp_flags = POP(vmf) & POP_MASK;
395                 vmf->vmf_flags = (vmf->vmf_flags & ~POP_MASK)
396                     | temp_flags | PSL_VM | PSL_I;
397                 vmf->vmf_ip += inc_ip;
398                 if (temp_flags & PSL_I) {
399                         vm86->vm86_eflags |= PSL_VIF;
400                         if (vm86->vm86_eflags & PSL_VIP)
401                                 break;
402                 } else {
403                         vm86->vm86_eflags &= ~PSL_VIF;
404                 }
405                 return (retcode);
406         }
407         return (SIGBUS);
408 }
409
410 void
411 vm86_initialize(void)
412 {
413         int i;
414         u_int *addr;
415         struct vm86_layout *vml = (struct vm86_layout *)vm86paddr;
416         struct pcb *pcb;
417         struct pcb_ext *ext;
418         struct soft_segment_descriptor ssd = {
419                 0,                      /* segment base address (overwritten) */
420                 0,                      /* length (overwritten) */
421                 SDT_SYS386TSS,          /* segment type */
422                 0,                      /* priority level */
423                 1,                      /* descriptor present */
424                 0, 0,
425                 0,                      /* default 16 size */
426                 0                       /* granularity */
427         };
428
429         /*
430          * this should be a compile time error, but cpp doesn't grok sizeof().
431          */
432         if (sizeof(struct vm86_layout) > ctob(3))
433                 panic("struct vm86_layout exceeds space allocated in locore.s");
434
435         /*
436          * Below is the memory layout that we use for the vm86 region.
437          *
438          * +--------+
439          * |        | 
440          * |        |
441          * | page 0 |       
442          * |        | +--------+
443          * |        | | stack  |
444          * +--------+ +--------+ <--------- vm86paddr
445          * |        | |Page Tbl| 1M + 64K = 272 entries = 1088 bytes
446          * |        | +--------+
447          * |        | |  PCB   | size: ~240 bytes
448          * | page 1 | |PCB Ext | size: ~140 bytes (includes TSS)
449          * |        | +--------+
450          * |        | |int map |
451          * |        | +--------+
452          * +--------+ |        |
453          * | page 2 | |  I/O   |
454          * +--------+ | bitmap |
455          * | page 3 | |        |
456          * |        | +--------+
457          * +--------+ 
458          */
459
460         /*
461          * A rudimentary PCB must be installed, in order to get to the
462          * PCB extension area.  We use the PCB area as a scratchpad for
463          * data storage, the layout of which is shown below.
464          *
465          * pcb_esi      = new PTD entry 0
466          * pcb_ebp      = pointer to frame on vm86 stack
467          * pcb_esp      =    stack frame pointer at time of switch
468          * pcb_ebx      = va of vm86 page table
469          * pcb_eip      =    argument pointer to initial call
470          * pcb_spare[0] =    saved TSS descriptor, word 0
471          * pcb_space[1] =    saved TSS descriptor, word 1
472          */
473 #define new_ptd         pcb_esi
474 #define vm86_frame      pcb_ebp
475 #define pgtable_va      pcb_ebx
476
477         pcb = &vml->vml_pcb;
478         ext = &vml->vml_ext;
479
480         bzero(pcb, sizeof(struct pcb));
481         pcb->new_ptd = vm86pa | PG_V | PG_RW | PG_U;
482         pcb->vm86_frame = (pt_entry_t)vm86paddr - sizeof(struct vm86frame);
483         pcb->pgtable_va = (vm_offset_t)vm86paddr;
484         pcb->pcb_ext = ext;
485
486         bzero(ext, sizeof(struct pcb_ext)); 
487         ext->ext_tss.tss_esp0 = (vm_offset_t)vm86paddr;
488         ext->ext_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
489         ext->ext_tss.tss_ioopt = 
490                 ((u_int)vml->vml_iomap - (u_int)&ext->ext_tss) << 16;
491         ext->ext_iomap = vml->vml_iomap;
492         ext->ext_vm86.vm86_intmap = vml->vml_intmap;
493
494         if (cpu_feature & CPUID_VME)
495                 ext->ext_vm86.vm86_has_vme = (rcr4() & CR4_VME ? 1 : 0);
496
497         addr = (u_int *)ext->ext_vm86.vm86_intmap;
498         for (i = 0; i < (INTMAP_SIZE + IOMAP_SIZE) / sizeof(u_int); i++)
499                 *addr++ = 0;
500         vml->vml_iomap_trailer = 0xff;
501
502         ssd.ssd_base = (u_int)&ext->ext_tss;
503         ssd.ssd_limit = TSS_SIZE - 1; 
504         ssdtosd(&ssd, &ext->ext_tssd);
505
506         vm86pcb = pcb;
507
508 #if 0
509         /*
510          * use whatever is leftover of the vm86 page layout as a
511          * message buffer so we can capture early output.
512          */
513         msgbufinit((vm_offset_t)vm86paddr + sizeof(struct vm86_layout),
514             ctob(3) - sizeof(struct vm86_layout));
515 #endif
516 }
517
518 vm_offset_t
519 vm86_getpage(struct vm86context *vmc, int pagenum)
520 {
521         int i;
522
523         for (i = 0; i < vmc->npages; i++)
524                 if (vmc->pmap[i].pte_num == pagenum)
525                         return (vmc->pmap[i].kva);
526         return (0);
527 }
528
529 vm_offset_t
530 vm86_addpage(struct vm86context *vmc, int pagenum, vm_offset_t kva)
531 {
532         int i, flags = 0;
533
534         for (i = 0; i < vmc->npages; i++)
535                 if (vmc->pmap[i].pte_num == pagenum)
536                         goto bad;
537
538         if (vmc->npages == VM86_PMAPSIZE)
539                 goto bad;                       /* XXX grow map? */
540
541         if (kva == 0) {
542                 kva = (vm_offset_t)kmalloc(PAGE_SIZE, M_TEMP, M_WAITOK);
543                 flags = VMAP_MALLOC;
544         }
545
546         i = vmc->npages++;
547         vmc->pmap[i].flags = flags;
548         vmc->pmap[i].kva = kva;
549         vmc->pmap[i].pte_num = pagenum;
550         return (kva);
551 bad:
552         panic("vm86_addpage: not enough room, or overlap");
553 }
554
555 static void
556 vm86_initflags(struct vm86frame *vmf)
557 {
558         int eflags = vmf->vmf_eflags;
559         struct vm86_kernel *vm86 = &curthread->td_pcb->pcb_ext->ext_vm86;
560
561         if (vm86->vm86_has_vme) {
562                 eflags = (vmf->vmf_eflags & ~VME_USERCHANGE) |
563                     (eflags & VME_USERCHANGE) | PSL_VM;
564         } else {
565                 vm86->vm86_eflags = eflags;     /* save VIF, VIP */
566                 eflags = (vmf->vmf_eflags & ~VM_USERCHANGE) |             
567                     (eflags & VM_USERCHANGE) | PSL_VM;
568         }
569         vmf->vmf_eflags = eflags | PSL_VM;
570 }
571
572 /*
573  * called from vm86_bioscall, while in vm86 address space, to finalize setup.
574  */
575 void
576 vm86_prepcall(struct vm86frame *vmf)
577 {
578         uintptr_t addr[] = { 0xA00, 0x1000 };   /* code, stack */
579         u_char intcall[] = {
580                 CLI, INTn, 0x00, STI, HLT
581         };
582
583         if ((vmf->vmf_trapno & PAGE_MASK) <= 0xff) {
584                 /* interrupt call requested */
585                 intcall[2] = (u_char)(vmf->vmf_trapno & 0xff);
586                 memcpy((void *)addr[0], (void *)intcall, sizeof(intcall));
587                 vmf->vmf_ip = addr[0];
588                 vmf->vmf_cs = 0;
589         }
590         vmf->vmf_sp = addr[1] - 2;              /* keep aligned */
591         vmf->kernel_fs = vmf->kernel_es = vmf->kernel_ds = vmf->kernel_gs = 0;
592         vmf->vmf_ss = 0;
593         vmf->vmf_eflags = PSL_VIF | PSL_VM | PSL_USER;
594         vm86_initflags(vmf);
595 }
596
597 /*
598  * vm86 trap handler; determines whether routine succeeded or not.
599  * Called while in vm86 space, returns to calling process.
600  *
601  * A MP lock ref is held on entry from trap() and must be released prior
602  * to returning to the VM86 call.
603  */
604 void
605 vm86_trap(struct vm86frame *vmf, int have_mplock)
606 {
607         caddr_t addr;
608
609         /* "should not happen" */
610         if ((vmf->vmf_eflags & PSL_VM) == 0)
611                 panic("vm86_trap called, but not in vm86 mode");
612
613         addr = MAKE_ADDR(vmf->vmf_cs, vmf->vmf_ip);
614         if (*(u_char *)addr == HLT)
615                 vmf->vmf_trapno = vmf->vmf_eflags & PSL_C;
616         else
617                 vmf->vmf_trapno = vmf->vmf_trapno << 16;
618
619         if (have_mplock)
620                 rel_mplock();
621         vm86_biosret(vmf);
622 }
623
624 int
625 vm86_intcall(int intnum, struct vm86frame *vmf)
626 {
627         int error;
628
629         if (intnum < 0 || intnum > 0xff)
630                 return (EINVAL);
631
632         crit_enter();
633         ASSERT_MP_LOCK_HELD();
634
635         vm86_setup_timer_fault();
636         vmf->vmf_trapno = intnum;
637         error = vm86_bioscall(vmf);
638
639         /*
640          * Yes, this happens, especially with video BIOS calls.  The BIOS
641          * will sometimes eat timer 2 for lunch, and we need timer 2.
642          */
643         if (vm86_blew_up_timer) {
644                 vm86_blew_up_timer = 0;
645                 timer_restore();
646                 if (timer_warn) {
647                         kprintf("Warning: BIOS played with the 8254, "
648                                 "resetting it\n");
649                 }
650         }
651         crit_exit();
652         return(error);
653 }
654
655 /*
656  * struct vm86context contains the page table to use when making
657  * vm86 calls.  If intnum is a valid interrupt number (0-255), then
658  * the "interrupt trampoline" will be used, otherwise we use the
659  * caller's cs:ip routine.  
660  */
661 int
662 vm86_datacall(int intnum, struct vm86frame *vmf, struct vm86context *vmc)
663 {
664         pt_entry_t *pte = vm86paddr;
665         u_int page;
666         int i, entry, retval;
667
668         crit_enter();
669         ASSERT_MP_LOCK_HELD();
670
671         for (i = 0; i < vmc->npages; i++) {
672                 page = vtophys(vmc->pmap[i].kva & PG_FRAME);
673                 entry = vmc->pmap[i].pte_num; 
674                 vmc->pmap[i].old_pte = pte[entry];
675                 pte[entry] = page | PG_V | PG_RW | PG_U;
676         }
677
678         vmf->vmf_trapno = intnum;
679         retval = vm86_bioscall(vmf);
680
681         for (i = 0; i < vmc->npages; i++) {
682                 entry = vmc->pmap[i].pte_num;
683                 pte[entry] = vmc->pmap[i].old_pte;
684         }
685         crit_exit();
686         return (retval);
687 }
688
689 vm_offset_t
690 vm86_getaddr(struct vm86context *vmc, u_short sel, u_short off)
691 {
692         int i, page;
693         vm_offset_t addr;
694
695         addr = (vm_offset_t)MAKE_ADDR(sel, off);
696         page = addr >> PAGE_SHIFT;
697         for (i = 0; i < vmc->npages; i++)
698                 if (page == vmc->pmap[i].pte_num)
699                         return (vmc->pmap[i].kva + (addr & PAGE_MASK));
700         return (0);
701 }
702
703 int
704 vm86_getptr(struct vm86context *vmc, vm_offset_t kva, u_short *sel,
705             u_short *off)
706 {
707         int i;
708
709         for (i = 0; i < vmc->npages; i++)
710                 if (kva >= vmc->pmap[i].kva &&
711                     kva < vmc->pmap[i].kva + PAGE_SIZE) {
712                         *off = kva - vmc->pmap[i].kva;
713                         *sel = vmc->pmap[i].pte_num << 8;
714                         return (1);
715                 }
716         return (0);
717         panic("vm86_getptr: address not found");
718 }
719         
720 int
721 vm86_sysarch(struct lwp *lp, char *args)
722 {
723         int error = 0;
724         struct i386_vm86_args ua;
725         struct vm86_kernel *vm86;
726
727         if ((error = copyin(args, &ua, sizeof(struct i386_vm86_args))) != 0)
728                 return (error);
729
730         if (lp->lwp_thread->td_pcb->pcb_ext == 0)
731                 if ((error = i386_extend_pcb(lp)) != 0)
732                         return (error);
733         vm86 = &lp->lwp_thread->td_pcb->pcb_ext->ext_vm86;
734
735         switch (ua.sub_op) {
736         case VM86_INIT: {
737                 struct vm86_init_args sa;
738
739                 if ((error = copyin(ua.sub_args, &sa, sizeof(sa))) != 0)
740                         return (error);
741                 if (cpu_feature & CPUID_VME)
742                         vm86->vm86_has_vme = (rcr4() & CR4_VME ? 1 : 0);
743                 else
744                         vm86->vm86_has_vme = 0;
745                 vm86->vm86_inited = 1;
746                 vm86->vm86_debug = sa.debug;
747                 bcopy(&sa.int_map, vm86->vm86_intmap, 32);
748                 }
749                 break;
750
751 #if 0
752         case VM86_SET_VME: {
753                 struct vm86_vme_args sa;
754         
755                 if ((cpu_feature & CPUID_VME) == 0)
756                         return (ENODEV);
757
758                 if (error = copyin(ua.sub_args, &sa, sizeof(sa)))
759                         return (error);
760                 if (sa.state)
761                         load_cr4(rcr4() | CR4_VME);
762                 else
763                         load_cr4(rcr4() & ~CR4_VME);
764                 }
765                 break;
766 #endif
767
768         case VM86_GET_VME: {
769                 struct vm86_vme_args sa;
770
771                 sa.state = (rcr4() & CR4_VME ? 1 : 0);
772                 error = copyout(&sa, ua.sub_args, sizeof(sa));
773                 }
774                 break;
775
776         case VM86_INTCALL: {
777                 struct vm86_intcall_args sa;
778
779                 if ((error = priv_check_cred(lp->lwp_proc->p_ucred, PRIV_ROOT, 0)))
780                         return (error);
781                 if ((error = copyin(ua.sub_args, &sa, sizeof(sa))))
782                         return (error);
783                 if ((error = vm86_intcall(sa.intnum, &sa.vmf)))
784                         return (error);
785                 error = copyout(&sa, ua.sub_args, sizeof(sa));
786                 }
787                 break;
788
789         default:
790                 error = EINVAL;
791         }
792         return (error);
793 }
794
795 /*
796  * Setup the VM86 I/O map to take faults on the timer
797  */
798 static void
799 vm86_setup_timer_fault(void)
800 {
801         struct vm86_layout *vml = (struct vm86_layout *)vm86paddr;
802
803         vml->vml_iomap[TIMER_MODE >> 3] |= 1 << (TIMER_MODE & 7);
804         vml->vml_iomap[TIMER_CNTR0 >> 3] |= 1 << (TIMER_CNTR0 & 7);
805         vml->vml_iomap[TIMER_CNTR1 >> 3] |= 1 << (TIMER_CNTR1 & 7);
806         vml->vml_iomap[TIMER_CNTR2 >> 3] |= 1 << (TIMER_CNTR2 & 7);
807 }
808
809 /*
810  * Setup the VM86 I/O map to not fault on the timer
811  */
812 static void
813 vm86_clear_timer_fault(void)
814 {
815         struct vm86_layout *vml = (struct vm86_layout *)vm86paddr;
816
817         vml->vml_iomap[TIMER_MODE >> 3] &= ~(1 << (TIMER_MODE & 7));
818         vml->vml_iomap[TIMER_CNTR0 >> 3] &= ~(1 << (TIMER_CNTR0 & 7));
819         vml->vml_iomap[TIMER_CNTR1 >> 3] &= ~(1 << (TIMER_CNTR1 & 7));
820         vml->vml_iomap[TIMER_CNTR2 >> 3] &= ~(1 << (TIMER_CNTR2 & 7));
821 }
822