Initial import from FreeBSD RELENG_4:
[dragonfly.git] / sys / kern / kern_memio.c
CommitLineData
984263bc
MD
1/*-
2 * Copyright (c) 1988 University of Utah.
3 * Copyright (c) 1982, 1986, 1990 The Regents of the University of California.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * the Systems Programming Group of the University of Utah Computer
8 * Science Department, and code derived from software contributed to
9 * Berkeley by William Jolitz.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 *
39 * from: Utah $Hdr: mem.c 1.13 89/10/08$
40 * from: @(#)mem.c 7.2 (Berkeley) 5/9/91
41 * $FreeBSD: src/sys/i386/i386/mem.c,v 1.79.2.9 2003/01/04 22:58:01 njl Exp $
42 */
43
44/*
45 * Memory special file
46 */
47
48#include <sys/param.h>
49#include <sys/systm.h>
50#include <sys/buf.h>
51#include <sys/conf.h>
52#include <sys/fcntl.h>
53#include <sys/filio.h>
54#include <sys/ioccom.h>
55#include <sys/kernel.h>
56#include <sys/malloc.h>
57#include <sys/memrange.h>
58#include <sys/proc.h>
59#include <sys/random.h>
60#include <sys/signalvar.h>
61#include <sys/uio.h>
62#include <sys/vnode.h>
63
64#include <machine/frame.h>
65#include <machine/psl.h>
66#include <machine/specialreg.h>
67#include <i386/isa/intr_machdep.h>
68
69#include <vm/vm.h>
70#include <vm/pmap.h>
71#include <vm/vm_extern.h>
72
73
74static d_open_t mmopen;
75static d_close_t mmclose;
76static d_read_t mmrw;
77static d_ioctl_t mmioctl;
78static d_mmap_t memmmap;
79static d_poll_t mmpoll;
80
81#define CDEV_MAJOR 2
82static struct cdevsw mem_cdevsw = {
83 /* open */ mmopen,
84 /* close */ mmclose,
85 /* read */ mmrw,
86 /* write */ mmrw,
87 /* ioctl */ mmioctl,
88 /* poll */ mmpoll,
89 /* mmap */ memmmap,
90 /* strategy */ nostrategy,
91 /* name */ "mem",
92 /* maj */ CDEV_MAJOR,
93 /* dump */ nodump,
94 /* psize */ nopsize,
95 /* flags */ D_MEM,
96 /* bmaj */ -1
97};
98
99static struct random_softc random_softc[16];
100static caddr_t zbuf;
101
102MALLOC_DEFINE(M_MEMDESC, "memdesc", "memory range descriptors");
103static int mem_ioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
104static int random_ioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
105
106struct mem_range_softc mem_range_softc;
107
108
109static int
110mmclose(dev, flags, fmt, p)
111 dev_t dev;
112 int flags;
113 int fmt;
114 struct proc *p;
115{
116 switch (minor(dev)) {
117 case 14:
118 p->p_md.md_regs->tf_eflags &= ~PSL_IOPL;
119 break;
120 default:
121 break;
122 }
123 return (0);
124}
125
126static int
127mmopen(dev, flags, fmt, p)
128 dev_t dev;
129 int flags;
130 int fmt;
131 struct proc *p;
132{
133 int error;
134
135 switch (minor(dev)) {
136 case 0:
137 case 1:
138 if ((flags & FWRITE) && securelevel > 0)
139 return (EPERM);
140 break;
141 case 14:
142 error = suser(p);
143 if (error != 0)
144 return (error);
145 if (securelevel > 0)
146 return (EPERM);
147 p->p_md.md_regs->tf_eflags |= PSL_IOPL;
148 break;
149 default:
150 break;
151 }
152 return (0);
153}
154
155static int
156mmrw(dev, uio, flags)
157 dev_t dev;
158 struct uio *uio;
159 int flags;
160{
161 register int o;
162 register u_int c, v;
163 u_int poolsize;
164 register struct iovec *iov;
165 int error = 0;
166 caddr_t buf = NULL;
167
168 while (uio->uio_resid > 0 && error == 0) {
169 iov = uio->uio_iov;
170 if (iov->iov_len == 0) {
171 uio->uio_iov++;
172 uio->uio_iovcnt--;
173 if (uio->uio_iovcnt < 0)
174 panic("mmrw");
175 continue;
176 }
177 switch (minor(dev)) {
178
179/* minor device 0 is physical memory */
180 case 0:
181 v = uio->uio_offset;
182 v &= ~PAGE_MASK;
183 pmap_kenter((vm_offset_t)ptvmmap, v);
184 o = (int)uio->uio_offset & PAGE_MASK;
185 c = (u_int)(PAGE_SIZE - ((int)iov->iov_base & PAGE_MASK));
186 c = min(c, (u_int)(PAGE_SIZE - o));
187 c = min(c, (u_int)iov->iov_len);
188 error = uiomove((caddr_t)&ptvmmap[o], (int)c, uio);
189 pmap_kremove((vm_offset_t)ptvmmap);
190 continue;
191
192/* minor device 1 is kernel memory */
193 case 1: {
194 vm_offset_t addr, eaddr;
195 c = iov->iov_len;
196
197 /*
198 * Make sure that all of the pages are currently resident so
199 * that we don't create any zero-fill pages.
200 */
201 addr = trunc_page(uio->uio_offset);
202 eaddr = round_page(uio->uio_offset + c);
203
204 if (addr < (vm_offset_t)VADDR(PTDPTDI, 0))
205 return EFAULT;
206 if (eaddr >= (vm_offset_t)VADDR(APTDPTDI, 0))
207 return EFAULT;
208 for (; addr < eaddr; addr += PAGE_SIZE)
209 if (pmap_extract(kernel_pmap, addr) == 0)
210 return EFAULT;
211
212 if (!kernacc((caddr_t)(int)uio->uio_offset, c,
213 uio->uio_rw == UIO_READ ?
214 VM_PROT_READ : VM_PROT_WRITE))
215 return (EFAULT);
216 error = uiomove((caddr_t)(int)uio->uio_offset, (int)c, uio);
217 continue;
218 }
219
220/* minor device 2 is EOF/RATHOLE */
221 case 2:
222 if (uio->uio_rw == UIO_READ)
223 return (0);
224 c = iov->iov_len;
225 break;
226
227/* minor device 3 (/dev/random) is source of filth on read, rathole on write */
228 case 3:
229 if (uio->uio_rw == UIO_WRITE) {
230 c = iov->iov_len;
231 break;
232 }
233 if (buf == NULL)
234 buf = (caddr_t)
235 malloc(PAGE_SIZE, M_TEMP, M_WAITOK);
236 c = min(iov->iov_len, PAGE_SIZE);
237 poolsize = read_random(buf, c);
238 if (poolsize == 0) {
239 if (buf)
240 free(buf, M_TEMP);
241 if ((flags & IO_NDELAY) != 0)
242 return (EWOULDBLOCK);
243 return (0);
244 }
245 c = min(c, poolsize);
246 error = uiomove(buf, (int)c, uio);
247 continue;
248
249/* minor device 4 (/dev/urandom) is source of muck on read, rathole on write */
250 case 4:
251 if (uio->uio_rw == UIO_WRITE) {
252 c = iov->iov_len;
253 break;
254 }
255 if (CURSIG(curproc) != 0) {
256 /*
257 * Use tsleep() to get the error code right.
258 * It should return immediately.
259 */
260 error = tsleep(&random_softc[0],
261 PZERO | PCATCH, "urand", 1);
262 if (error != 0 && error != EWOULDBLOCK)
263 continue;
264 }
265 if (buf == NULL)
266 buf = (caddr_t)
267 malloc(PAGE_SIZE, M_TEMP, M_WAITOK);
268 c = min(iov->iov_len, PAGE_SIZE);
269 poolsize = read_random_unlimited(buf, c);
270 c = min(c, poolsize);
271 error = uiomove(buf, (int)c, uio);
272 continue;
273
274/* minor device 12 (/dev/zero) is source of nulls on read, rathole on write */
275 case 12:
276 if (uio->uio_rw == UIO_WRITE) {
277 c = iov->iov_len;
278 break;
279 }
280 if (zbuf == NULL) {
281 zbuf = (caddr_t)
282 malloc(PAGE_SIZE, M_TEMP, M_WAITOK);
283 bzero(zbuf, PAGE_SIZE);
284 }
285 c = min(iov->iov_len, PAGE_SIZE);
286 error = uiomove(zbuf, (int)c, uio);
287 continue;
288
289 default:
290 return (ENODEV);
291 }
292 if (error)
293 break;
294 iov->iov_base += c;
295 iov->iov_len -= c;
296 uio->uio_offset += c;
297 uio->uio_resid -= c;
298 }
299 if (buf)
300 free(buf, M_TEMP);
301 return (error);
302}
303
304
305
306
307/*******************************************************\
308* allow user processes to MMAP some memory sections *
309* instead of going through read/write *
310\*******************************************************/
311static int
312memmmap(dev_t dev, vm_offset_t offset, int nprot)
313{
314 switch (minor(dev))
315 {
316
317/* minor device 0 is physical memory */
318 case 0:
319 return i386_btop(offset);
320
321/* minor device 1 is kernel memory */
322 case 1:
323 return i386_btop(vtophys(offset));
324
325 default:
326 return -1;
327 }
328}
329
330static int
331mmioctl(dev, cmd, data, flags, p)
332 dev_t dev;
333 u_long cmd;
334 caddr_t data;
335 int flags;
336 struct proc *p;
337{
338
339 switch (minor(dev)) {
340 case 0:
341 return mem_ioctl(dev, cmd, data, flags, p);
342 case 3:
343 case 4:
344 return random_ioctl(dev, cmd, data, flags, p);
345 }
346 return (ENODEV);
347}
348
349/*
350 * Operations for changing memory attributes.
351 *
352 * This is basically just an ioctl shim for mem_range_attr_get
353 * and mem_range_attr_set.
354 */
355static int
356mem_ioctl(dev, cmd, data, flags, p)
357 dev_t dev;
358 u_long cmd;
359 caddr_t data;
360 int flags;
361 struct proc *p;
362{
363 int nd, error = 0;
364 struct mem_range_op *mo = (struct mem_range_op *)data;
365 struct mem_range_desc *md;
366
367 /* is this for us? */
368 if ((cmd != MEMRANGE_GET) &&
369 (cmd != MEMRANGE_SET))
370 return (ENOTTY);
371
372 /* any chance we can handle this? */
373 if (mem_range_softc.mr_op == NULL)
374 return (EOPNOTSUPP);
375
376 /* do we have any descriptors? */
377 if (mem_range_softc.mr_ndesc == 0)
378 return (ENXIO);
379
380 switch (cmd) {
381 case MEMRANGE_GET:
382 nd = imin(mo->mo_arg[0], mem_range_softc.mr_ndesc);
383 if (nd > 0) {
384 md = (struct mem_range_desc *)
385 malloc(nd * sizeof(struct mem_range_desc),
386 M_MEMDESC, M_WAITOK);
387 error = mem_range_attr_get(md, &nd);
388 if (!error)
389 error = copyout(md, mo->mo_desc,
390 nd * sizeof(struct mem_range_desc));
391 free(md, M_MEMDESC);
392 } else {
393 nd = mem_range_softc.mr_ndesc;
394 }
395 mo->mo_arg[0] = nd;
396 break;
397
398 case MEMRANGE_SET:
399 md = (struct mem_range_desc *)malloc(sizeof(struct mem_range_desc),
400 M_MEMDESC, M_WAITOK);
401 error = copyin(mo->mo_desc, md, sizeof(struct mem_range_desc));
402 /* clamp description string */
403 md->mr_owner[sizeof(md->mr_owner) - 1] = 0;
404 if (error == 0)
405 error = mem_range_attr_set(md, &mo->mo_arg[0]);
406 free(md, M_MEMDESC);
407 break;
408 }
409 return (error);
410}
411
412/*
413 * Implementation-neutral, kernel-callable functions for manipulating
414 * memory range attributes.
415 */
416int
417mem_range_attr_get(mrd, arg)
418 struct mem_range_desc *mrd;
419 int *arg;
420{
421 /* can we handle this? */
422 if (mem_range_softc.mr_op == NULL)
423 return (EOPNOTSUPP);
424
425 if (*arg == 0) {
426 *arg = mem_range_softc.mr_ndesc;
427 } else {
428 bcopy(mem_range_softc.mr_desc, mrd, (*arg) * sizeof(struct mem_range_desc));
429 }
430 return (0);
431}
432
433int
434mem_range_attr_set(mrd, arg)
435 struct mem_range_desc *mrd;
436 int *arg;
437{
438 /* can we handle this? */
439 if (mem_range_softc.mr_op == NULL)
440 return (EOPNOTSUPP);
441
442 return (mem_range_softc.mr_op->set(&mem_range_softc, mrd, arg));
443}
444
445#ifdef SMP
446void
447mem_range_AP_init(void)
448{
449 if (mem_range_softc.mr_op && mem_range_softc.mr_op->initAP)
450 return (mem_range_softc.mr_op->initAP(&mem_range_softc));
451}
452#endif
453
454static int
455random_ioctl(dev, cmd, data, flags, p)
456 dev_t dev;
457 u_long cmd;
458 caddr_t data;
459 int flags;
460 struct proc *p;
461{
462 static intrmask_t interrupt_allowed;
463 intrmask_t interrupt_mask;
464 int error, intr;
465 struct random_softc *sc;
466
467 /*
468 * We're the random or urandom device. The only ioctls are for
469 * selecting and inspecting which interrupts are used in the muck
470 * gathering business and the fcntl() stuff.
471 */
472 if (cmd != MEM_SETIRQ && cmd != MEM_CLEARIRQ && cmd != MEM_RETURNIRQ
473 && cmd != FIONBIO && cmd != FIOASYNC)
474 return (ENOTTY);
475
476 /*
477 * XXX the data is 16-bit due to a historical botch, so we use
478 * magic 16's instead of ICU_LEN and can't support 24 interrupts
479 * under SMP.
480 * Even inspecting the state is privileged, since it gives a hint
481 * about how easily the randomness might be guessed.
482 */
483 intr = *(int16_t *)data;
484 interrupt_mask = 1 << intr;
485 sc = &random_softc[intr];
486 switch (cmd) {
487 /* Really handled in upper layer */
488 case FIOASYNC:
489 case FIONBIO:
490 break;
491 case MEM_SETIRQ:
492 error = suser(p);
493 if (error != 0)
494 return (error);
495 if (intr < 0 || intr >= 16)
496 return (EINVAL);
497 if (interrupt_allowed & interrupt_mask)
498 break;
499 interrupt_allowed |= interrupt_mask;
500 sc->sc_intr = intr;
501 disable_intr();
502 sc->sc_handler = intr_handler[intr];
503 intr_handler[intr] = add_interrupt_randomness;
504 sc->sc_arg = intr_unit[intr];
505 intr_unit[intr] = sc;
506 enable_intr();
507 break;
508 case MEM_CLEARIRQ:
509 error = suser(p);
510 if (error != 0)
511 return (error);
512 if (intr < 0 || intr >= 16)
513 return (EINVAL);
514 if (!(interrupt_allowed & interrupt_mask))
515 break;
516 interrupt_allowed &= ~interrupt_mask;
517 disable_intr();
518 intr_handler[intr] = sc->sc_handler;
519 intr_unit[intr] = sc->sc_arg;
520 enable_intr();
521 break;
522 case MEM_RETURNIRQ:
523 error = suser(p);
524 if (error != 0)
525 return (error);
526 *(u_int16_t *)data = interrupt_allowed;
527 break;
528 }
529 return (0);
530}
531
532int
533mmpoll(dev, events, p)
534 dev_t dev;
535 int events;
536 struct proc *p;
537{
538 switch (minor(dev)) {
539 case 3: /* /dev/random */
540 return random_poll(dev, events, p);
541 case 4: /* /dev/urandom */
542 default:
543 return seltrue(dev, events, p);
544 }
545}
546
547int
548iszerodev(dev)
549 dev_t dev;
550{
551 return ((major(dev) == mem_cdevsw.d_maj)
552 && minor(dev) == 12);
553}
554
555static void
556mem_drvinit(void *unused)
557{
558
559 /* Initialise memory range handling */
560 if (mem_range_softc.mr_op != NULL)
561 mem_range_softc.mr_op->init(&mem_range_softc);
562
563 make_dev(&mem_cdevsw, 0, UID_ROOT, GID_KMEM, 0640, "mem");
564 make_dev(&mem_cdevsw, 1, UID_ROOT, GID_KMEM, 0640, "kmem");
565 make_dev(&mem_cdevsw, 2, UID_ROOT, GID_WHEEL, 0666, "null");
566 make_dev(&mem_cdevsw, 3, UID_ROOT, GID_WHEEL, 0644, "random");
567 make_dev(&mem_cdevsw, 4, UID_ROOT, GID_WHEEL, 0644, "urandom");
568 make_dev(&mem_cdevsw, 12, UID_ROOT, GID_WHEEL, 0666, "zero");
569 make_dev(&mem_cdevsw, 14, UID_ROOT, GID_WHEEL, 0600, "io");
570}
571
572SYSINIT(memdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,mem_drvinit,NULL)
573