proc->thread stage 3.5: Add an IO_CORE flag so coda doesn't have to dig
[dragonfly.git] / sys / platform / pc32 / i386 / sys_machdep.c
CommitLineData
984263bc
MD
1/*-
2 * Copyright (c) 1990 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * from: @(#)sys_machdep.c 5.5 (Berkeley) 1/19/91
34 * $FreeBSD: src/sys/i386/i386/sys_machdep.c,v 1.47.2.3 2002/10/07 17:20:00 jhb Exp $
41c20dac 35 * $DragonFly: src/sys/platform/pc32/i386/sys_machdep.c,v 1.4 2003/06/23 17:55:38 dillon Exp $
984263bc
MD
36 *
37 */
38
39#include "opt_user_ldt.h"
40
41#include <sys/param.h>
42#include <sys/systm.h>
43#include <sys/sysproto.h>
44#include <sys/malloc.h>
45#include <sys/proc.h>
46
47#include <vm/vm.h>
48#include <sys/lock.h>
49#include <vm/pmap.h>
50#include <vm/vm_map.h>
51#include <vm/vm_extern.h>
52
53#include <sys/user.h>
54
55#include <machine/cpu.h>
56#include <machine/ipl.h>
57#include <machine/pcb_ext.h> /* pcb.h included by sys/user.h */
58#include <machine/sysarch.h>
59#ifdef SMP
60#include <machine/smp.h>
61#endif
62
63#include <vm/vm_kern.h> /* for kernel_map */
64
65#define MAX_LD 8192
66#define LD_PER_PAGE 512
67#define NEW_MAX_LD(num) ((num + LD_PER_PAGE) & ~(LD_PER_PAGE-1))
68#define SIZE_FROM_LARGEST_LD(num) (NEW_MAX_LD(num) << 3)
69
70
71
72#ifdef USER_LDT
73static int i386_get_ldt __P((struct proc *, char *));
74static int i386_set_ldt __P((struct proc *, char *));
75#endif
76static int i386_get_ioperm __P((struct proc *, char *));
77static int i386_set_ioperm __P((struct proc *, char *));
78int i386_extend_pcb __P((struct proc *));
79
41c20dac
MD
80/*
81 * sysarch_args(int op, char *params)
82 */
984263bc
MD
83
84int
41c20dac 85sysarch(struct sysarch_args *uap)
984263bc 86{
41c20dac 87 struct proc *p = curproc;
984263bc
MD
88 int error = 0;
89
90 switch(uap->op) {
91#ifdef USER_LDT
92 case I386_GET_LDT:
93 error = i386_get_ldt(p, uap->parms);
94 break;
95
96 case I386_SET_LDT:
97 error = i386_set_ldt(p, uap->parms);
98 break;
99#endif
100 case I386_GET_IOPERM:
101 error = i386_get_ioperm(p, uap->parms);
102 break;
103 case I386_SET_IOPERM:
104 error = i386_set_ioperm(p, uap->parms);
105 break;
106 case I386_VM86:
107 error = vm86_sysarch(p, uap->parms);
108 break;
109 default:
110 error = EOPNOTSUPP;
111 break;
112 }
113 return (error);
114}
115
116int
117i386_extend_pcb(struct proc *p)
118{
119 int i, offset;
120 u_long *addr;
121 struct pcb_ext *ext;
122 struct soft_segment_descriptor ssd = {
123 0, /* segment base address (overwritten) */
124 ctob(IOPAGES + 1) - 1, /* length */
125 SDT_SYS386TSS, /* segment type */
126 0, /* priority level */
127 1, /* descriptor present */
128 0, 0,
129 0, /* default 32 size */
130 0 /* granularity */
131 };
132
133 ext = (struct pcb_ext *)kmem_alloc(kernel_map, ctob(IOPAGES+1));
134 if (ext == 0)
135 return (ENOMEM);
b7c628e4 136 p->p_thread->td_pcb->pcb_ext = ext;
984263bc 137 bzero(ext, sizeof(struct pcb_ext));
b7c628e4 138 ext->ext_tss.tss_esp0 = (unsigned)((char *)p->p_thread->td_pcb - 16);
984263bc
MD
139 ext->ext_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
140 /*
141 * The last byte of the i/o map must be followed by an 0xff byte.
142 * We arbitrarily allocate 16 bytes here, to keep the starting
143 * address on a doubleword boundary.
144 */
145 offset = PAGE_SIZE - 16;
146 ext->ext_tss.tss_ioopt =
147 (offset - ((unsigned)&ext->ext_tss - (unsigned)ext)) << 16;
148 ext->ext_iomap = (caddr_t)ext + offset;
149 ext->ext_vm86.vm86_intmap = (caddr_t)ext + offset - 32;
150
151 addr = (u_long *)ext->ext_vm86.vm86_intmap;
152 for (i = 0; i < (ctob(IOPAGES) + 32 + 16) / sizeof(u_long); i++)
153 *addr++ = ~0;
154
155 ssd.ssd_base = (unsigned)&ext->ext_tss;
156 ssd.ssd_limit -= ((unsigned)&ext->ext_tss - (unsigned)ext);
157 ssdtosd(&ssd, &ext->ext_tssd);
158
159 /* switch to the new TSS after syscall completes */
160 need_resched();
161
162 return 0;
163}
164
165static int
166i386_set_ioperm(p, args)
167 struct proc *p;
168 char *args;
169{
170 int i, error;
171 struct i386_ioperm_args ua;
172 char *iomap;
173
174 if ((error = copyin(args, &ua, sizeof(struct i386_ioperm_args))) != 0)
175 return (error);
176
41c20dac 177 if ((error = suser_xxx(p->p_ucred, 0)) != 0)
984263bc
MD
178 return (error);
179 if (securelevel > 0)
180 return (EPERM);
181 /*
182 * XXX
183 * While this is restricted to root, we should probably figure out
184 * whether any other driver is using this i/o address, as so not to
185 * cause confusion. This probably requires a global 'usage registry'.
186 */
187
b7c628e4 188 if (p->p_thread->td_pcb->pcb_ext == 0)
984263bc
MD
189 if ((error = i386_extend_pcb(p)) != 0)
190 return (error);
b7c628e4 191 iomap = (char *)p->p_thread->td_pcb->pcb_ext->ext_iomap;
984263bc
MD
192
193 if (ua.start + ua.length > IOPAGES * PAGE_SIZE * NBBY)
194 return (EINVAL);
195
196 for (i = ua.start; i < ua.start + ua.length; i++) {
197 if (ua.enable)
198 iomap[i >> 3] &= ~(1 << (i & 7));
199 else
200 iomap[i >> 3] |= (1 << (i & 7));
201 }
202 return (error);
203}
204
205static int
206i386_get_ioperm(p, args)
207 struct proc *p;
208 char *args;
209{
210 int i, state, error;
211 struct i386_ioperm_args ua;
212 char *iomap;
213
214 if ((error = copyin(args, &ua, sizeof(struct i386_ioperm_args))) != 0)
215 return (error);
216 if (ua.start >= IOPAGES * PAGE_SIZE * NBBY)
217 return (EINVAL);
218
b7c628e4 219 if (p->p_thread->td_pcb->pcb_ext == 0) {
984263bc
MD
220 ua.length = 0;
221 goto done;
222 }
223
b7c628e4 224 iomap = (char *)p->p_thread->td_pcb->pcb_ext->ext_iomap;
984263bc
MD
225
226 i = ua.start;
227 state = (iomap[i >> 3] >> (i & 7)) & 1;
228 ua.enable = !state;
229 ua.length = 1;
230
231 for (i = ua.start + 1; i < IOPAGES * PAGE_SIZE * NBBY; i++) {
232 if (state != ((iomap[i >> 3] >> (i & 7)) & 1))
233 break;
234 ua.length++;
235 }
236
237done:
238 error = copyout(&ua, args, sizeof(struct i386_ioperm_args));
239 return (error);
240}
241
242#ifdef USER_LDT
243/*
244 * Update the GDT entry pointing to the LDT to point to the LDT of the
245 * current process. Do not staticize.
246 */
247void
248set_user_ldt(struct pcb *pcb)
249{
250 struct pcb_ldt *pcb_ldt;
251
b7c628e4 252 if (pcb != curthread->td_pcb)
984263bc
MD
253 return;
254
255 pcb_ldt = pcb->pcb_ldt;
256#ifdef SMP
257 gdt[cpuid * NGDT + GUSERLDT_SEL].sd = pcb_ldt->ldt_sd;
258#else
259 gdt[GUSERLDT_SEL].sd = pcb_ldt->ldt_sd;
260#endif
261 lldt(GSEL(GUSERLDT_SEL, SEL_KPL));
262 currentldt = GSEL(GUSERLDT_SEL, SEL_KPL);
263}
264
265struct pcb_ldt *
266user_ldt_alloc(struct pcb *pcb, int len)
267{
268 struct pcb_ldt *pcb_ldt, *new_ldt;
269
270 MALLOC(new_ldt, struct pcb_ldt *, sizeof(struct pcb_ldt),
271 M_SUBPROC, M_WAITOK);
272 if (new_ldt == NULL)
273 return NULL;
274
275 new_ldt->ldt_len = len = NEW_MAX_LD(len);
276 new_ldt->ldt_base = (caddr_t)kmem_alloc(kernel_map,
277 len * sizeof(union descriptor));
278 if (new_ldt->ldt_base == NULL) {
279 FREE(new_ldt, M_SUBPROC);
280 return NULL;
281 }
282 new_ldt->ldt_refcnt = 1;
283 new_ldt->ldt_active = 0;
284
285 gdt_segs[GUSERLDT_SEL].ssd_base = (unsigned)new_ldt->ldt_base;
286 gdt_segs[GUSERLDT_SEL].ssd_limit = len * sizeof(union descriptor) - 1;
287 ssdtosd(&gdt_segs[GUSERLDT_SEL], &new_ldt->ldt_sd);
288
289 if ((pcb_ldt = pcb->pcb_ldt)) {
290 if (len > pcb_ldt->ldt_len)
291 len = pcb_ldt->ldt_len;
292 bcopy(pcb_ldt->ldt_base, new_ldt->ldt_base,
293 len * sizeof(union descriptor));
294 } else {
295 bcopy(ldt, new_ldt->ldt_base, sizeof(ldt));
296 }
297 return new_ldt;
298}
299
300void
301user_ldt_free(struct pcb *pcb)
302{
303 struct pcb_ldt *pcb_ldt = pcb->pcb_ldt;
304
305 if (pcb_ldt == NULL)
306 return;
307
b7c628e4 308 if (pcb == curthread->td_pcb) {
984263bc
MD
309 lldt(_default_ldt);
310 currentldt = _default_ldt;
311 }
312
313 if (--pcb_ldt->ldt_refcnt == 0) {
314 kmem_free(kernel_map, (vm_offset_t)pcb_ldt->ldt_base,
315 pcb_ldt->ldt_len * sizeof(union descriptor));
316 FREE(pcb_ldt, M_SUBPROC);
317 }
318 pcb->pcb_ldt = NULL;
319}
320
321static int
322i386_get_ldt(p, args)
323 struct proc *p;
324 char *args;
325{
326 int error = 0;
b7c628e4 327 struct pcb *pcb = p->p_thread->td_pcb;
984263bc
MD
328 struct pcb_ldt *pcb_ldt = pcb->pcb_ldt;
329 int nldt, num;
330 union descriptor *lp;
331 int s;
332 struct i386_ldt_args ua, *uap = &ua;
333
334 if ((error = copyin(args, uap, sizeof(struct i386_ldt_args))) < 0)
335 return(error);
336
337#ifdef DEBUG
338 printf("i386_get_ldt: start=%d num=%d descs=%p\n",
339 uap->start, uap->num, (void *)uap->descs);
340#endif
341
342 /* verify range of LDTs exist */
343 if ((uap->start < 0) || (uap->num <= 0))
344 return(EINVAL);
345
346 s = splhigh();
347
348 if (pcb_ldt) {
349 nldt = pcb_ldt->ldt_len;
350 num = min(uap->num, nldt);
351 lp = &((union descriptor *)(pcb_ldt->ldt_base))[uap->start];
352 } else {
353 nldt = sizeof(ldt)/sizeof(ldt[0]);
354 num = min(uap->num, nldt);
355 lp = &ldt[uap->start];
356 }
357 if (uap->start + num > nldt) {
358 splx(s);
359 return(EINVAL);
360 }
361
362 error = copyout(lp, uap->descs, num * sizeof(union descriptor));
363 if (!error)
364 p->p_retval[0] = num;
365
366 splx(s);
367 return(error);
368}
369
370static int
371i386_set_ldt(p, args)
372 struct proc *p;
373 char *args;
374{
375 int error = 0, i, n;
376 int largest_ld;
b7c628e4 377 struct pcb *pcb = p->p_thread->td_pcb;
984263bc
MD
378 struct pcb_ldt *pcb_ldt = pcb->pcb_ldt;
379 union descriptor *descs;
380 int descs_size, s;
381 struct i386_ldt_args ua, *uap = &ua;
382
383 if ((error = copyin(args, uap, sizeof(struct i386_ldt_args))) < 0)
384 return(error);
385
386#ifdef DEBUG
387 printf("i386_set_ldt: start=%d num=%d descs=%p\n",
388 uap->start, uap->num, (void *)uap->descs);
389#endif
390
391 /* verify range of descriptors to modify */
392 if ((uap->start < 0) || (uap->start >= MAX_LD) || (uap->num < 0) ||
393 (uap->num > MAX_LD))
394 {
395 return(EINVAL);
396 }
397 largest_ld = uap->start + uap->num - 1;
398 if (largest_ld >= MAX_LD)
399 return(EINVAL);
400
401 /* allocate user ldt */
402 if (!pcb_ldt || largest_ld >= pcb_ldt->ldt_len) {
403 struct pcb_ldt *new_ldt = user_ldt_alloc(pcb, largest_ld);
404 if (new_ldt == NULL)
405 return ENOMEM;
406 if (pcb_ldt) {
407 pcb_ldt->ldt_sd = new_ldt->ldt_sd;
408 kmem_free(kernel_map, (vm_offset_t)pcb_ldt->ldt_base,
409 pcb_ldt->ldt_len * sizeof(union descriptor));
410 pcb_ldt->ldt_base = new_ldt->ldt_base;
411 pcb_ldt->ldt_len = new_ldt->ldt_len;
412 FREE(new_ldt, M_SUBPROC);
413 } else
414 pcb->pcb_ldt = pcb_ldt = new_ldt;
415#ifdef SMP
416 /* signal other cpus to reload ldt */
417 smp_rendezvous(NULL, (void (*)(void *))set_user_ldt, NULL, pcb);
418#else
419 set_user_ldt(pcb);
420#endif
421 }
422
423 descs_size = uap->num * sizeof(union descriptor);
424 descs = (union descriptor *)kmem_alloc(kernel_map, descs_size);
425 if (descs == NULL)
426 return (ENOMEM);
427 error = copyin(&uap->descs[0], descs, descs_size);
428 if (error) {
429 kmem_free(kernel_map, (vm_offset_t)descs, descs_size);
430 return (error);
431 }
432 /* Check descriptors for access violations */
433 for (i = 0, n = uap->start; i < uap->num; i++, n++) {
434 union descriptor *dp;
435 dp = &descs[i];
436
437 switch (dp->sd.sd_type) {
438 case SDT_SYSNULL: /* system null */
439 dp->sd.sd_p = 0;
440 break;
441 case SDT_SYS286TSS: /* system 286 TSS available */
442 case SDT_SYSLDT: /* system local descriptor table */
443 case SDT_SYS286BSY: /* system 286 TSS busy */
444 case SDT_SYSTASKGT: /* system task gate */
445 case SDT_SYS286IGT: /* system 286 interrupt gate */
446 case SDT_SYS286TGT: /* system 286 trap gate */
447 case SDT_SYSNULL2: /* undefined by Intel */
448 case SDT_SYS386TSS: /* system 386 TSS available */
449 case SDT_SYSNULL3: /* undefined by Intel */
450 case SDT_SYS386BSY: /* system 386 TSS busy */
451 case SDT_SYSNULL4: /* undefined by Intel */
452 case SDT_SYS386IGT: /* system 386 interrupt gate */
453 case SDT_SYS386TGT: /* system 386 trap gate */
454 case SDT_SYS286CGT: /* system 286 call gate */
455 case SDT_SYS386CGT: /* system 386 call gate */
456 /* I can't think of any reason to allow a user proc
457 * to create a segment of these types. They are
458 * for OS use only.
459 */
460 kmem_free(kernel_map, (vm_offset_t)descs, descs_size);
461 return EACCES;
462
463 /* memory segment types */
464 case SDT_MEMEC: /* memory execute only conforming */
465 case SDT_MEMEAC: /* memory execute only accessed conforming */
466 case SDT_MEMERC: /* memory execute read conforming */
467 case SDT_MEMERAC: /* memory execute read accessed conforming */
468 /* Must be "present" if executable and conforming. */
469 if (dp->sd.sd_p == 0) {
470 kmem_free(kernel_map, (vm_offset_t)descs,
471 descs_size);
472 return (EACCES);
473 }
474 break;
475 case SDT_MEMRO: /* memory read only */
476 case SDT_MEMROA: /* memory read only accessed */
477 case SDT_MEMRW: /* memory read write */
478 case SDT_MEMRWA: /* memory read write accessed */
479 case SDT_MEMROD: /* memory read only expand dwn limit */
480 case SDT_MEMRODA: /* memory read only expand dwn lim accessed */
481 case SDT_MEMRWD: /* memory read write expand dwn limit */
482 case SDT_MEMRWDA: /* memory read write expand dwn lim acessed */
483 case SDT_MEME: /* memory execute only */
484 case SDT_MEMEA: /* memory execute only accessed */
485 case SDT_MEMER: /* memory execute read */
486 case SDT_MEMERA: /* memory execute read accessed */
487 break;
488 default:
489 kmem_free(kernel_map, (vm_offset_t)descs, descs_size);
490 return(EINVAL);
491 /*NOTREACHED*/
492 }
493
494 /* Only user (ring-3) descriptors may be present. */
495 if ((dp->sd.sd_p != 0) && (dp->sd.sd_dpl != SEL_UPL)) {
496 kmem_free(kernel_map, (vm_offset_t)descs, descs_size);
497 return (EACCES);
498 }
499 }
500
501 s = splhigh();
502
503 /* Fill in range */
504 bcopy(descs,
505 &((union descriptor *)(pcb_ldt->ldt_base))[uap->start],
506 uap->num * sizeof(union descriptor));
507 p->p_retval[0] = uap->start;
508
509 splx(s);
510 kmem_free(kernel_map, (vm_offset_t)descs, descs_size);
511 return (0);
512}
513#endif /* USER_LDT */