2 * Copyright (c) 1990 The Regents of the University of California.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * from: @(#)sys_machdep.c 5.5 (Berkeley) 1/19/91
34 * $FreeBSD: src/sys/i386/i386/sys_machdep.c,v 1.47.2.3 2002/10/07 17:20:00 jhb Exp $
35 * $DragonFly: src/sys/i386/i386/Attic/sys_machdep.c,v 1.17 2005/05/23 18:23:29 dillon Exp $
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/sysproto.h>
42 #include <sys/malloc.h>
43 #include <sys/thread.h>
45 #include <sys/thread.h>
50 #include <vm/vm_map.h>
51 #include <vm/vm_extern.h>
55 #include <machine/cpu.h>
56 #include <machine/ipl.h>
57 #include <machine/pcb_ext.h> /* pcb.h included by sys/user.h */
58 #include <machine/sysarch.h>
60 #include <machine/smp.h>
62 #include <machine/globaldata.h> /* mdcpu */
64 #include <vm/vm_kern.h> /* for kernel_map */
65 #include <sys/thread2.h>
68 #define LD_PER_PAGE 512
69 #define NEW_MAX_LD(num) ((num + LD_PER_PAGE) & ~(LD_PER_PAGE-1))
70 #define SIZE_FROM_LARGEST_LD(num) (NEW_MAX_LD(num) << 3)
74 static int i386_get_ldt (struct proc *, char *, int *);
75 static int i386_set_ldt (struct proc *, char *, int *);
76 static int i386_get_ioperm (struct proc *, char *);
77 static int i386_set_ioperm (struct proc *, char *);
78 static int check_descs(union descriptor *, int);
79 int i386_extend_pcb (struct proc *);
82 * sysarch_args(int op, char *params)
86 sysarch(struct sysarch_args *uap)
88 struct proc *p = curproc;
93 error = i386_get_ldt(p, uap->parms, &uap->sysmsg_result);
96 error = i386_set_ldt(p, uap->parms, &uap->sysmsg_result);
99 error = i386_get_ioperm(p, uap->parms);
101 case I386_SET_IOPERM:
102 error = i386_set_ioperm(p, uap->parms);
105 error = vm86_sysarch(p, uap->parms);
115 i386_extend_pcb(struct proc *p)
120 struct soft_segment_descriptor ssd = {
121 0, /* segment base address (overwritten) */
122 ctob(IOPAGES + 1) - 1, /* length */
123 SDT_SYS386TSS, /* segment type */
124 0, /* priority level */
125 1, /* descriptor present */
127 0, /* default 32 size */
131 ext = (struct pcb_ext *)kmem_alloc(kernel_map, ctob(IOPAGES+1));
134 p->p_thread->td_pcb->pcb_ext = ext;
135 bzero(ext, sizeof(struct pcb_ext));
136 ext->ext_tss.tss_esp0 = (unsigned)((char *)p->p_thread->td_pcb - 16);
137 ext->ext_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
139 * The last byte of the i/o map must be followed by an 0xff byte.
140 * We arbitrarily allocate 16 bytes here, to keep the starting
141 * address on a doubleword boundary.
143 offset = PAGE_SIZE - 16;
144 ext->ext_tss.tss_ioopt =
145 (offset - ((unsigned)&ext->ext_tss - (unsigned)ext)) << 16;
146 ext->ext_iomap = (caddr_t)ext + offset;
147 ext->ext_vm86.vm86_intmap = (caddr_t)ext + offset - 32;
149 addr = (u_long *)ext->ext_vm86.vm86_intmap;
150 for (i = 0; i < (ctob(IOPAGES) + 32 + 16) / sizeof(u_long); i++)
153 ssd.ssd_base = (unsigned)&ext->ext_tss;
154 ssd.ssd_limit -= ((unsigned)&ext->ext_tss - (unsigned)ext);
155 ssdtosd(&ssd, &ext->ext_tssd);
157 /* switch to the new TSS after syscall completes */
164 i386_set_ioperm(struct proc *p, char *args)
167 struct i386_ioperm_args ua;
170 if ((error = copyin(args, &ua, sizeof(struct i386_ioperm_args))) != 0)
173 if ((error = suser_cred(p->p_ucred, 0)) != 0)
179 * While this is restricted to root, we should probably figure out
180 * whether any other driver is using this i/o address, as so not to
181 * cause confusion. This probably requires a global 'usage registry'.
184 if (p->p_thread->td_pcb->pcb_ext == 0)
185 if ((error = i386_extend_pcb(p)) != 0)
187 iomap = (char *)p->p_thread->td_pcb->pcb_ext->ext_iomap;
189 if (ua.start + ua.length > IOPAGES * PAGE_SIZE * NBBY)
192 for (i = ua.start; i < ua.start + ua.length; i++) {
194 iomap[i >> 3] &= ~(1 << (i & 7));
196 iomap[i >> 3] |= (1 << (i & 7));
202 i386_get_ioperm(struct proc *p, char *args)
205 struct i386_ioperm_args ua;
208 if ((error = copyin(args, &ua, sizeof(struct i386_ioperm_args))) != 0)
210 if (ua.start >= IOPAGES * PAGE_SIZE * NBBY)
213 if (p->p_thread->td_pcb->pcb_ext == 0) {
218 iomap = (char *)p->p_thread->td_pcb->pcb_ext->ext_iomap;
221 state = (iomap[i >> 3] >> (i & 7)) & 1;
225 for (i = ua.start + 1; i < IOPAGES * PAGE_SIZE * NBBY; i++) {
226 if (state != ((iomap[i >> 3] >> (i & 7)) & 1))
232 error = copyout(&ua, args, sizeof(struct i386_ioperm_args));
237 * Update the TLS entries for the process. Used by assembly, do not staticize.
239 * Must be called from a critical section (else an interrupt thread preemption
240 * may cause %gs to fault). Normally called from the low level swtch.s code.
245 struct thread *td = curthread;
248 int off = GTLS_START + mycpu->gd_cpuid * NGDT;
250 const int off = GTLS_START;
252 for (i = 0; i < NGTLS; ++i)
253 gdt[off + i].sd = td->td_tls[i];
257 * Update the GDT entry pointing to the LDT to point to the LDT of the
258 * current process. Used by assembly, do not staticize.
260 * Must be called from a critical section (else an interrupt thread preemption
261 * may cause %gs to fault). Normally called from the low level swtch.s code.
264 set_user_ldt(struct pcb *pcb)
266 struct pcb_ldt *pcb_ldt;
268 if (pcb != curthread->td_pcb)
271 pcb_ldt = pcb->pcb_ldt;
273 gdt[mycpu->gd_cpuid * NGDT + GUSERLDT_SEL].sd = pcb_ldt->ldt_sd;
275 gdt[GUSERLDT_SEL].sd = pcb_ldt->ldt_sd;
277 lldt(GSEL(GUSERLDT_SEL, SEL_KPL));
278 mdcpu->gd_currentldt = GSEL(GUSERLDT_SEL, SEL_KPL);
282 user_ldt_alloc(struct pcb *pcb, int len)
284 struct pcb_ldt *pcb_ldt, *new_ldt;
286 MALLOC(new_ldt, struct pcb_ldt *, sizeof(struct pcb_ldt),
287 M_SUBPROC, M_WAITOK);
291 new_ldt->ldt_len = len = NEW_MAX_LD(len);
292 new_ldt->ldt_base = (caddr_t)kmem_alloc(kernel_map,
293 len * sizeof(union descriptor));
294 if (new_ldt->ldt_base == NULL) {
295 FREE(new_ldt, M_SUBPROC);
298 new_ldt->ldt_refcnt = 1;
299 new_ldt->ldt_active = 0;
301 gdt_segs[GUSERLDT_SEL].ssd_base = (unsigned)new_ldt->ldt_base;
302 gdt_segs[GUSERLDT_SEL].ssd_limit = len * sizeof(union descriptor) - 1;
303 ssdtosd(&gdt_segs[GUSERLDT_SEL], &new_ldt->ldt_sd);
305 if ((pcb_ldt = pcb->pcb_ldt)) {
306 if (len > pcb_ldt->ldt_len)
307 len = pcb_ldt->ldt_len;
308 bcopy(pcb_ldt->ldt_base, new_ldt->ldt_base,
309 len * sizeof(union descriptor));
311 bcopy(ldt, new_ldt->ldt_base, sizeof(ldt));
317 user_ldt_free(struct pcb *pcb)
319 struct pcb_ldt *pcb_ldt = pcb->pcb_ldt;
325 if (pcb == curthread->td_pcb) {
327 mdcpu->gd_currentldt = _default_ldt;
332 if (--pcb_ldt->ldt_refcnt == 0) {
333 kmem_free(kernel_map, (vm_offset_t)pcb_ldt->ldt_base,
334 pcb_ldt->ldt_len * sizeof(union descriptor));
335 FREE(pcb_ldt, M_SUBPROC);
340 i386_get_ldt(struct proc *p, char *args, int *res)
343 struct pcb *pcb = p->p_thread->td_pcb;
344 struct pcb_ldt *pcb_ldt = pcb->pcb_ldt;
345 unsigned int nldt, num;
346 union descriptor *lp;
348 struct i386_ldt_args ua, *uap = &ua;
350 if ((error = copyin(args, uap, sizeof(struct i386_ldt_args))) < 0)
354 printf("i386_get_ldt: start=%d num=%d descs=%p\n",
355 uap->start, uap->num, (void *)uap->descs);
361 nldt = (unsigned int)pcb_ldt->ldt_len;
362 num = min(uap->num, nldt);
363 lp = &((union descriptor *)(pcb_ldt->ldt_base))[uap->start];
365 nldt = (unsigned int)(sizeof(ldt) / sizeof(ldt[0]));
366 num = min(uap->num, nldt);
367 lp = &ldt[uap->start];
371 * note: uap->(args), num, and nldt are unsigned. nldt and num
372 * are limited in scope, but uap->start can be anything.
374 if (uap->start > nldt || uap->start + num > nldt) {
379 error = copyout(lp, uap->descs, num * sizeof(union descriptor));
387 i386_set_ldt(struct proc *p, char *args, int *res)
391 struct pcb *pcb = p->p_thread->td_pcb;
392 struct pcb_ldt *pcb_ldt = pcb->pcb_ldt;
393 union descriptor *descs;
395 struct i386_ldt_args ua, *uap = &ua;
397 if ((error = copyin(args, uap, sizeof(struct i386_ldt_args))) < 0)
401 printf("i386_set_ldt: start=%d num=%d descs=%p\n",
402 uap->start, uap->num, (void *)uap->descs);
405 /* verify range of descriptors to modify */
406 if ((uap->start < 0) || (uap->start >= MAX_LD) || (uap->num < 0) ||
411 largest_ld = uap->start + uap->num - 1;
412 if (largest_ld >= MAX_LD)
415 /* allocate user ldt */
416 if (!pcb_ldt || largest_ld >= pcb_ldt->ldt_len) {
417 struct pcb_ldt *new_ldt = user_ldt_alloc(pcb, largest_ld);
421 pcb_ldt->ldt_sd = new_ldt->ldt_sd;
422 kmem_free(kernel_map, (vm_offset_t)pcb_ldt->ldt_base,
423 pcb_ldt->ldt_len * sizeof(union descriptor));
424 pcb_ldt->ldt_base = new_ldt->ldt_base;
425 pcb_ldt->ldt_len = new_ldt->ldt_len;
426 FREE(new_ldt, M_SUBPROC);
428 pcb->pcb_ldt = pcb_ldt = new_ldt;
431 * Since the LDT may be shared, we must signal other cpus to
432 * reload it. XXX we need to track which cpus might be
433 * using the shared ldt and only signal those.
436 smp_rendezvous(NULL, (void (*)(void *))set_user_ldt, NULL, pcb);
442 descs_size = uap->num * sizeof(union descriptor);
443 descs = (union descriptor *)kmem_alloc(kernel_map, descs_size);
446 error = copyin(&uap->descs[0], descs, descs_size);
448 kmem_free(kernel_map, (vm_offset_t)descs, descs_size);
451 /* Check descriptors for access violations */
452 error = check_descs(descs, uap->num);
454 kmem_free(kernel_map, (vm_offset_t)descs, descs_size);
459 * Fill in the actual ldt entries. Since %fs might point to one of
460 * these entries a critical section is required to prevent an
461 * interrupt thread from preempting us, switch back, and faulting
462 * on the load of %fs due to a half-formed descriptor.
466 &((union descriptor *)(pcb_ldt->ldt_base))[uap->start],
467 uap->num * sizeof(union descriptor));
471 kmem_free(kernel_map, (vm_offset_t)descs, descs_size);
476 check_descs(union descriptor *descs, int num)
480 /* Check descriptors for access violations */
481 for (i = 0; i < num; i++) {
482 union descriptor *dp;
485 switch (dp->sd.sd_type) {
486 case SDT_SYSNULL: /* system null */
489 case SDT_SYS286TSS: /* system 286 TSS available */
490 case SDT_SYSLDT: /* system local descriptor table */
491 case SDT_SYS286BSY: /* system 286 TSS busy */
492 case SDT_SYSTASKGT: /* system task gate */
493 case SDT_SYS286IGT: /* system 286 interrupt gate */
494 case SDT_SYS286TGT: /* system 286 trap gate */
495 case SDT_SYSNULL2: /* undefined by Intel */
496 case SDT_SYS386TSS: /* system 386 TSS available */
497 case SDT_SYSNULL3: /* undefined by Intel */
498 case SDT_SYS386BSY: /* system 386 TSS busy */
499 case SDT_SYSNULL4: /* undefined by Intel */
500 case SDT_SYS386IGT: /* system 386 interrupt gate */
501 case SDT_SYS386TGT: /* system 386 trap gate */
502 case SDT_SYS286CGT: /* system 286 call gate */
503 case SDT_SYS386CGT: /* system 386 call gate */
504 /* I can't think of any reason to allow a user proc
505 * to create a segment of these types. They are
510 /* memory segment types */
511 case SDT_MEMEC: /* memory execute only conforming */
512 case SDT_MEMEAC: /* memory execute only accessed conforming */
513 case SDT_MEMERC: /* memory execute read conforming */
514 case SDT_MEMERAC: /* memory execute read accessed conforming */
515 /* Must be "present" if executable and conforming. */
516 if (dp->sd.sd_p == 0)
519 case SDT_MEMRO: /* memory read only */
520 case SDT_MEMROA: /* memory read only accessed */
521 case SDT_MEMRW: /* memory read write */
522 case SDT_MEMRWA: /* memory read write accessed */
523 case SDT_MEMROD: /* memory read only expand dwn limit */
524 case SDT_MEMRODA: /* memory read only expand dwn lim accessed */
525 case SDT_MEMRWD: /* memory read write expand dwn limit */
526 case SDT_MEMRWDA: /* memory read write expand dwn lim acessed */
527 case SDT_MEME: /* memory execute only */
528 case SDT_MEMEA: /* memory execute only accessed */
529 case SDT_MEMER: /* memory execute read */
530 case SDT_MEMERA: /* memory execute read accessed */
537 /* Only user (ring-3) descriptors may be present. */
538 if ((dp->sd.sd_p != 0) && (dp->sd.sd_dpl != SEL_UPL))