Optimize lwkt_rwlock.c a bit
[dragonfly.git] / sys / kern / kern_memio.c
CommitLineData
984263bc
MD
1/*-
2 * Copyright (c) 1988 University of Utah.
3 * Copyright (c) 1982, 1986, 1990 The Regents of the University of California.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * the Systems Programming Group of the University of Utah Computer
8 * Science Department, and code derived from software contributed to
9 * Berkeley by William Jolitz.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 *
39 * from: Utah $Hdr: mem.c 1.13 89/10/08$
40 * from: @(#)mem.c 7.2 (Berkeley) 5/9/91
41 * $FreeBSD: src/sys/i386/i386/mem.c,v 1.79.2.9 2003/01/04 22:58:01 njl Exp $
1de703da 42 * $DragonFly: src/sys/kern/kern_memio.c,v 1.2 2003/06/17 04:28:35 dillon Exp $
984263bc
MD
43 */
44
45/*
46 * Memory special file
47 */
48
49#include <sys/param.h>
50#include <sys/systm.h>
51#include <sys/buf.h>
52#include <sys/conf.h>
53#include <sys/fcntl.h>
54#include <sys/filio.h>
55#include <sys/ioccom.h>
56#include <sys/kernel.h>
57#include <sys/malloc.h>
58#include <sys/memrange.h>
59#include <sys/proc.h>
60#include <sys/random.h>
61#include <sys/signalvar.h>
62#include <sys/uio.h>
63#include <sys/vnode.h>
64
65#include <machine/frame.h>
66#include <machine/psl.h>
67#include <machine/specialreg.h>
68#include <i386/isa/intr_machdep.h>
69
70#include <vm/vm.h>
71#include <vm/pmap.h>
72#include <vm/vm_extern.h>
73
74
75static d_open_t mmopen;
76static d_close_t mmclose;
77static d_read_t mmrw;
78static d_ioctl_t mmioctl;
79static d_mmap_t memmmap;
80static d_poll_t mmpoll;
81
82#define CDEV_MAJOR 2
83static struct cdevsw mem_cdevsw = {
84 /* open */ mmopen,
85 /* close */ mmclose,
86 /* read */ mmrw,
87 /* write */ mmrw,
88 /* ioctl */ mmioctl,
89 /* poll */ mmpoll,
90 /* mmap */ memmmap,
91 /* strategy */ nostrategy,
92 /* name */ "mem",
93 /* maj */ CDEV_MAJOR,
94 /* dump */ nodump,
95 /* psize */ nopsize,
96 /* flags */ D_MEM,
97 /* bmaj */ -1
98};
99
100static struct random_softc random_softc[16];
101static caddr_t zbuf;
102
103MALLOC_DEFINE(M_MEMDESC, "memdesc", "memory range descriptors");
104static int mem_ioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
105static int random_ioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
106
107struct mem_range_softc mem_range_softc;
108
109
110static int
111mmclose(dev, flags, fmt, p)
112 dev_t dev;
113 int flags;
114 int fmt;
115 struct proc *p;
116{
117 switch (minor(dev)) {
118 case 14:
119 p->p_md.md_regs->tf_eflags &= ~PSL_IOPL;
120 break;
121 default:
122 break;
123 }
124 return (0);
125}
126
127static int
128mmopen(dev, flags, fmt, p)
129 dev_t dev;
130 int flags;
131 int fmt;
132 struct proc *p;
133{
134 int error;
135
136 switch (minor(dev)) {
137 case 0:
138 case 1:
139 if ((flags & FWRITE) && securelevel > 0)
140 return (EPERM);
141 break;
142 case 14:
143 error = suser(p);
144 if (error != 0)
145 return (error);
146 if (securelevel > 0)
147 return (EPERM);
148 p->p_md.md_regs->tf_eflags |= PSL_IOPL;
149 break;
150 default:
151 break;
152 }
153 return (0);
154}
155
156static int
157mmrw(dev, uio, flags)
158 dev_t dev;
159 struct uio *uio;
160 int flags;
161{
162 register int o;
163 register u_int c, v;
164 u_int poolsize;
165 register struct iovec *iov;
166 int error = 0;
167 caddr_t buf = NULL;
168
169 while (uio->uio_resid > 0 && error == 0) {
170 iov = uio->uio_iov;
171 if (iov->iov_len == 0) {
172 uio->uio_iov++;
173 uio->uio_iovcnt--;
174 if (uio->uio_iovcnt < 0)
175 panic("mmrw");
176 continue;
177 }
178 switch (minor(dev)) {
179
180/* minor device 0 is physical memory */
181 case 0:
182 v = uio->uio_offset;
183 v &= ~PAGE_MASK;
184 pmap_kenter((vm_offset_t)ptvmmap, v);
185 o = (int)uio->uio_offset & PAGE_MASK;
186 c = (u_int)(PAGE_SIZE - ((int)iov->iov_base & PAGE_MASK));
187 c = min(c, (u_int)(PAGE_SIZE - o));
188 c = min(c, (u_int)iov->iov_len);
189 error = uiomove((caddr_t)&ptvmmap[o], (int)c, uio);
190 pmap_kremove((vm_offset_t)ptvmmap);
191 continue;
192
193/* minor device 1 is kernel memory */
194 case 1: {
195 vm_offset_t addr, eaddr;
196 c = iov->iov_len;
197
198 /*
199 * Make sure that all of the pages are currently resident so
200 * that we don't create any zero-fill pages.
201 */
202 addr = trunc_page(uio->uio_offset);
203 eaddr = round_page(uio->uio_offset + c);
204
205 if (addr < (vm_offset_t)VADDR(PTDPTDI, 0))
206 return EFAULT;
207 if (eaddr >= (vm_offset_t)VADDR(APTDPTDI, 0))
208 return EFAULT;
209 for (; addr < eaddr; addr += PAGE_SIZE)
210 if (pmap_extract(kernel_pmap, addr) == 0)
211 return EFAULT;
212
213 if (!kernacc((caddr_t)(int)uio->uio_offset, c,
214 uio->uio_rw == UIO_READ ?
215 VM_PROT_READ : VM_PROT_WRITE))
216 return (EFAULT);
217 error = uiomove((caddr_t)(int)uio->uio_offset, (int)c, uio);
218 continue;
219 }
220
221/* minor device 2 is EOF/RATHOLE */
222 case 2:
223 if (uio->uio_rw == UIO_READ)
224 return (0);
225 c = iov->iov_len;
226 break;
227
228/* minor device 3 (/dev/random) is source of filth on read, rathole on write */
229 case 3:
230 if (uio->uio_rw == UIO_WRITE) {
231 c = iov->iov_len;
232 break;
233 }
234 if (buf == NULL)
235 buf = (caddr_t)
236 malloc(PAGE_SIZE, M_TEMP, M_WAITOK);
237 c = min(iov->iov_len, PAGE_SIZE);
238 poolsize = read_random(buf, c);
239 if (poolsize == 0) {
240 if (buf)
241 free(buf, M_TEMP);
242 if ((flags & IO_NDELAY) != 0)
243 return (EWOULDBLOCK);
244 return (0);
245 }
246 c = min(c, poolsize);
247 error = uiomove(buf, (int)c, uio);
248 continue;
249
250/* minor device 4 (/dev/urandom) is source of muck on read, rathole on write */
251 case 4:
252 if (uio->uio_rw == UIO_WRITE) {
253 c = iov->iov_len;
254 break;
255 }
256 if (CURSIG(curproc) != 0) {
257 /*
258 * Use tsleep() to get the error code right.
259 * It should return immediately.
260 */
261 error = tsleep(&random_softc[0],
262 PZERO | PCATCH, "urand", 1);
263 if (error != 0 && error != EWOULDBLOCK)
264 continue;
265 }
266 if (buf == NULL)
267 buf = (caddr_t)
268 malloc(PAGE_SIZE, M_TEMP, M_WAITOK);
269 c = min(iov->iov_len, PAGE_SIZE);
270 poolsize = read_random_unlimited(buf, c);
271 c = min(c, poolsize);
272 error = uiomove(buf, (int)c, uio);
273 continue;
274
275/* minor device 12 (/dev/zero) is source of nulls on read, rathole on write */
276 case 12:
277 if (uio->uio_rw == UIO_WRITE) {
278 c = iov->iov_len;
279 break;
280 }
281 if (zbuf == NULL) {
282 zbuf = (caddr_t)
283 malloc(PAGE_SIZE, M_TEMP, M_WAITOK);
284 bzero(zbuf, PAGE_SIZE);
285 }
286 c = min(iov->iov_len, PAGE_SIZE);
287 error = uiomove(zbuf, (int)c, uio);
288 continue;
289
290 default:
291 return (ENODEV);
292 }
293 if (error)
294 break;
295 iov->iov_base += c;
296 iov->iov_len -= c;
297 uio->uio_offset += c;
298 uio->uio_resid -= c;
299 }
300 if (buf)
301 free(buf, M_TEMP);
302 return (error);
303}
304
305
306
307
308/*******************************************************\
309* allow user processes to MMAP some memory sections *
310* instead of going through read/write *
311\*******************************************************/
312static int
313memmmap(dev_t dev, vm_offset_t offset, int nprot)
314{
315 switch (minor(dev))
316 {
317
318/* minor device 0 is physical memory */
319 case 0:
320 return i386_btop(offset);
321
322/* minor device 1 is kernel memory */
323 case 1:
324 return i386_btop(vtophys(offset));
325
326 default:
327 return -1;
328 }
329}
330
331static int
332mmioctl(dev, cmd, data, flags, p)
333 dev_t dev;
334 u_long cmd;
335 caddr_t data;
336 int flags;
337 struct proc *p;
338{
339
340 switch (minor(dev)) {
341 case 0:
342 return mem_ioctl(dev, cmd, data, flags, p);
343 case 3:
344 case 4:
345 return random_ioctl(dev, cmd, data, flags, p);
346 }
347 return (ENODEV);
348}
349
350/*
351 * Operations for changing memory attributes.
352 *
353 * This is basically just an ioctl shim for mem_range_attr_get
354 * and mem_range_attr_set.
355 */
356static int
357mem_ioctl(dev, cmd, data, flags, p)
358 dev_t dev;
359 u_long cmd;
360 caddr_t data;
361 int flags;
362 struct proc *p;
363{
364 int nd, error = 0;
365 struct mem_range_op *mo = (struct mem_range_op *)data;
366 struct mem_range_desc *md;
367
368 /* is this for us? */
369 if ((cmd != MEMRANGE_GET) &&
370 (cmd != MEMRANGE_SET))
371 return (ENOTTY);
372
373 /* any chance we can handle this? */
374 if (mem_range_softc.mr_op == NULL)
375 return (EOPNOTSUPP);
376
377 /* do we have any descriptors? */
378 if (mem_range_softc.mr_ndesc == 0)
379 return (ENXIO);
380
381 switch (cmd) {
382 case MEMRANGE_GET:
383 nd = imin(mo->mo_arg[0], mem_range_softc.mr_ndesc);
384 if (nd > 0) {
385 md = (struct mem_range_desc *)
386 malloc(nd * sizeof(struct mem_range_desc),
387 M_MEMDESC, M_WAITOK);
388 error = mem_range_attr_get(md, &nd);
389 if (!error)
390 error = copyout(md, mo->mo_desc,
391 nd * sizeof(struct mem_range_desc));
392 free(md, M_MEMDESC);
393 } else {
394 nd = mem_range_softc.mr_ndesc;
395 }
396 mo->mo_arg[0] = nd;
397 break;
398
399 case MEMRANGE_SET:
400 md = (struct mem_range_desc *)malloc(sizeof(struct mem_range_desc),
401 M_MEMDESC, M_WAITOK);
402 error = copyin(mo->mo_desc, md, sizeof(struct mem_range_desc));
403 /* clamp description string */
404 md->mr_owner[sizeof(md->mr_owner) - 1] = 0;
405 if (error == 0)
406 error = mem_range_attr_set(md, &mo->mo_arg[0]);
407 free(md, M_MEMDESC);
408 break;
409 }
410 return (error);
411}
412
413/*
414 * Implementation-neutral, kernel-callable functions for manipulating
415 * memory range attributes.
416 */
417int
418mem_range_attr_get(mrd, arg)
419 struct mem_range_desc *mrd;
420 int *arg;
421{
422 /* can we handle this? */
423 if (mem_range_softc.mr_op == NULL)
424 return (EOPNOTSUPP);
425
426 if (*arg == 0) {
427 *arg = mem_range_softc.mr_ndesc;
428 } else {
429 bcopy(mem_range_softc.mr_desc, mrd, (*arg) * sizeof(struct mem_range_desc));
430 }
431 return (0);
432}
433
434int
435mem_range_attr_set(mrd, arg)
436 struct mem_range_desc *mrd;
437 int *arg;
438{
439 /* can we handle this? */
440 if (mem_range_softc.mr_op == NULL)
441 return (EOPNOTSUPP);
442
443 return (mem_range_softc.mr_op->set(&mem_range_softc, mrd, arg));
444}
445
446#ifdef SMP
447void
448mem_range_AP_init(void)
449{
450 if (mem_range_softc.mr_op && mem_range_softc.mr_op->initAP)
451 return (mem_range_softc.mr_op->initAP(&mem_range_softc));
452}
453#endif
454
455static int
456random_ioctl(dev, cmd, data, flags, p)
457 dev_t dev;
458 u_long cmd;
459 caddr_t data;
460 int flags;
461 struct proc *p;
462{
463 static intrmask_t interrupt_allowed;
464 intrmask_t interrupt_mask;
465 int error, intr;
466 struct random_softc *sc;
467
468 /*
469 * We're the random or urandom device. The only ioctls are for
470 * selecting and inspecting which interrupts are used in the muck
471 * gathering business and the fcntl() stuff.
472 */
473 if (cmd != MEM_SETIRQ && cmd != MEM_CLEARIRQ && cmd != MEM_RETURNIRQ
474 && cmd != FIONBIO && cmd != FIOASYNC)
475 return (ENOTTY);
476
477 /*
478 * XXX the data is 16-bit due to a historical botch, so we use
479 * magic 16's instead of ICU_LEN and can't support 24 interrupts
480 * under SMP.
481 * Even inspecting the state is privileged, since it gives a hint
482 * about how easily the randomness might be guessed.
483 */
484 intr = *(int16_t *)data;
485 interrupt_mask = 1 << intr;
486 sc = &random_softc[intr];
487 switch (cmd) {
488 /* Really handled in upper layer */
489 case FIOASYNC:
490 case FIONBIO:
491 break;
492 case MEM_SETIRQ:
493 error = suser(p);
494 if (error != 0)
495 return (error);
496 if (intr < 0 || intr >= 16)
497 return (EINVAL);
498 if (interrupt_allowed & interrupt_mask)
499 break;
500 interrupt_allowed |= interrupt_mask;
501 sc->sc_intr = intr;
502 disable_intr();
503 sc->sc_handler = intr_handler[intr];
504 intr_handler[intr] = add_interrupt_randomness;
505 sc->sc_arg = intr_unit[intr];
506 intr_unit[intr] = sc;
507 enable_intr();
508 break;
509 case MEM_CLEARIRQ:
510 error = suser(p);
511 if (error != 0)
512 return (error);
513 if (intr < 0 || intr >= 16)
514 return (EINVAL);
515 if (!(interrupt_allowed & interrupt_mask))
516 break;
517 interrupt_allowed &= ~interrupt_mask;
518 disable_intr();
519 intr_handler[intr] = sc->sc_handler;
520 intr_unit[intr] = sc->sc_arg;
521 enable_intr();
522 break;
523 case MEM_RETURNIRQ:
524 error = suser(p);
525 if (error != 0)
526 return (error);
527 *(u_int16_t *)data = interrupt_allowed;
528 break;
529 }
530 return (0);
531}
532
533int
534mmpoll(dev, events, p)
535 dev_t dev;
536 int events;
537 struct proc *p;
538{
539 switch (minor(dev)) {
540 case 3: /* /dev/random */
541 return random_poll(dev, events, p);
542 case 4: /* /dev/urandom */
543 default:
544 return seltrue(dev, events, p);
545 }
546}
547
548int
549iszerodev(dev)
550 dev_t dev;
551{
552 return ((major(dev) == mem_cdevsw.d_maj)
553 && minor(dev) == 12);
554}
555
556static void
557mem_drvinit(void *unused)
558{
559
560 /* Initialise memory range handling */
561 if (mem_range_softc.mr_op != NULL)
562 mem_range_softc.mr_op->init(&mem_range_softc);
563
564 make_dev(&mem_cdevsw, 0, UID_ROOT, GID_KMEM, 0640, "mem");
565 make_dev(&mem_cdevsw, 1, UID_ROOT, GID_KMEM, 0640, "kmem");
566 make_dev(&mem_cdevsw, 2, UID_ROOT, GID_WHEEL, 0666, "null");
567 make_dev(&mem_cdevsw, 3, UID_ROOT, GID_WHEEL, 0644, "random");
568 make_dev(&mem_cdevsw, 4, UID_ROOT, GID_WHEEL, 0644, "urandom");
569 make_dev(&mem_cdevsw, 12, UID_ROOT, GID_WHEEL, 0666, "zero");
570 make_dev(&mem_cdevsw, 14, UID_ROOT, GID_WHEEL, 0600, "io");
571}
572
573SYSINIT(memdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,mem_drvinit,NULL)
574