Nuke huge mbuf macros stage 1/2: Remove massive inline mbuf macros to reduce
[dragonfly.git] / sys / kern / kern_memio.c
CommitLineData
984263bc
MD
1/*-
2 * Copyright (c) 1988 University of Utah.
3 * Copyright (c) 1982, 1986, 1990 The Regents of the University of California.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * the Systems Programming Group of the University of Utah Computer
8 * Science Department, and code derived from software contributed to
9 * Berkeley by William Jolitz.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 *
39 * from: Utah $Hdr: mem.c 1.13 89/10/08$
40 * from: @(#)mem.c 7.2 (Berkeley) 5/9/91
41 * $FreeBSD: src/sys/i386/i386/mem.c,v 1.79.2.9 2003/01/04 22:58:01 njl Exp $
7e071e7a 42 * $DragonFly: src/sys/kern/kern_memio.c,v 1.5 2003/07/04 05:57:25 dillon Exp $
984263bc
MD
43 */
44
45/*
46 * Memory special file
47 */
48
49#include <sys/param.h>
50#include <sys/systm.h>
51#include <sys/buf.h>
52#include <sys/conf.h>
53#include <sys/fcntl.h>
54#include <sys/filio.h>
55#include <sys/ioccom.h>
56#include <sys/kernel.h>
57#include <sys/malloc.h>
58#include <sys/memrange.h>
59#include <sys/proc.h>
60#include <sys/random.h>
61#include <sys/signalvar.h>
62#include <sys/uio.h>
63#include <sys/vnode.h>
64
65#include <machine/frame.h>
66#include <machine/psl.h>
67#include <machine/specialreg.h>
68#include <i386/isa/intr_machdep.h>
69
70#include <vm/vm.h>
71#include <vm/pmap.h>
72#include <vm/vm_extern.h>
73
74
75static d_open_t mmopen;
76static d_close_t mmclose;
77static d_read_t mmrw;
78static d_ioctl_t mmioctl;
79static d_mmap_t memmmap;
80static d_poll_t mmpoll;
81
82#define CDEV_MAJOR 2
83static struct cdevsw mem_cdevsw = {
84 /* open */ mmopen,
85 /* close */ mmclose,
86 /* read */ mmrw,
87 /* write */ mmrw,
88 /* ioctl */ mmioctl,
89 /* poll */ mmpoll,
90 /* mmap */ memmmap,
91 /* strategy */ nostrategy,
92 /* name */ "mem",
93 /* maj */ CDEV_MAJOR,
94 /* dump */ nodump,
95 /* psize */ nopsize,
96 /* flags */ D_MEM,
97 /* bmaj */ -1
98};
99
7e071e7a 100static int rand_bolt;
984263bc
MD
101static caddr_t zbuf;
102
103MALLOC_DEFINE(M_MEMDESC, "memdesc", "memory range descriptors");
41c20dac
MD
104static int mem_ioctl __P((dev_t, u_long, caddr_t, int, struct thread *));
105static int random_ioctl __P((dev_t, u_long, caddr_t, int, struct thread *));
984263bc
MD
106
107struct mem_range_softc mem_range_softc;
108
109
110static int
41c20dac 111mmclose(dev_t dev, int flags, int fmt, struct thread *td)
984263bc 112{
41c20dac
MD
113 struct proc *p = td->td_proc;
114
984263bc
MD
115 switch (minor(dev)) {
116 case 14:
117 p->p_md.md_regs->tf_eflags &= ~PSL_IOPL;
118 break;
119 default:
120 break;
121 }
122 return (0);
123}
124
125static int
41c20dac 126mmopen(dev_t dev, int flags, int fmt, struct thread *td)
984263bc
MD
127{
128 int error;
41c20dac 129 struct proc *p = td->td_proc;
984263bc
MD
130
131 switch (minor(dev)) {
132 case 0:
133 case 1:
134 if ((flags & FWRITE) && securelevel > 0)
135 return (EPERM);
136 break;
137 case 14:
dadab5e9 138 error = suser(td);
984263bc
MD
139 if (error != 0)
140 return (error);
141 if (securelevel > 0)
142 return (EPERM);
143 p->p_md.md_regs->tf_eflags |= PSL_IOPL;
144 break;
145 default:
146 break;
147 }
148 return (0);
149}
150
151static int
152mmrw(dev, uio, flags)
153 dev_t dev;
154 struct uio *uio;
155 int flags;
156{
157 register int o;
158 register u_int c, v;
159 u_int poolsize;
160 register struct iovec *iov;
161 int error = 0;
162 caddr_t buf = NULL;
163
164 while (uio->uio_resid > 0 && error == 0) {
165 iov = uio->uio_iov;
166 if (iov->iov_len == 0) {
167 uio->uio_iov++;
168 uio->uio_iovcnt--;
169 if (uio->uio_iovcnt < 0)
170 panic("mmrw");
171 continue;
172 }
173 switch (minor(dev)) {
174
175/* minor device 0 is physical memory */
176 case 0:
177 v = uio->uio_offset;
178 v &= ~PAGE_MASK;
179 pmap_kenter((vm_offset_t)ptvmmap, v);
180 o = (int)uio->uio_offset & PAGE_MASK;
181 c = (u_int)(PAGE_SIZE - ((int)iov->iov_base & PAGE_MASK));
182 c = min(c, (u_int)(PAGE_SIZE - o));
183 c = min(c, (u_int)iov->iov_len);
184 error = uiomove((caddr_t)&ptvmmap[o], (int)c, uio);
185 pmap_kremove((vm_offset_t)ptvmmap);
186 continue;
187
188/* minor device 1 is kernel memory */
189 case 1: {
190 vm_offset_t addr, eaddr;
191 c = iov->iov_len;
192
193 /*
194 * Make sure that all of the pages are currently resident so
195 * that we don't create any zero-fill pages.
196 */
197 addr = trunc_page(uio->uio_offset);
198 eaddr = round_page(uio->uio_offset + c);
199
200 if (addr < (vm_offset_t)VADDR(PTDPTDI, 0))
201 return EFAULT;
202 if (eaddr >= (vm_offset_t)VADDR(APTDPTDI, 0))
203 return EFAULT;
204 for (; addr < eaddr; addr += PAGE_SIZE)
205 if (pmap_extract(kernel_pmap, addr) == 0)
206 return EFAULT;
207
208 if (!kernacc((caddr_t)(int)uio->uio_offset, c,
209 uio->uio_rw == UIO_READ ?
210 VM_PROT_READ : VM_PROT_WRITE))
211 return (EFAULT);
212 error = uiomove((caddr_t)(int)uio->uio_offset, (int)c, uio);
213 continue;
214 }
215
216/* minor device 2 is EOF/RATHOLE */
217 case 2:
218 if (uio->uio_rw == UIO_READ)
219 return (0);
220 c = iov->iov_len;
221 break;
222
223/* minor device 3 (/dev/random) is source of filth on read, rathole on write */
224 case 3:
225 if (uio->uio_rw == UIO_WRITE) {
226 c = iov->iov_len;
227 break;
228 }
229 if (buf == NULL)
230 buf = (caddr_t)
231 malloc(PAGE_SIZE, M_TEMP, M_WAITOK);
232 c = min(iov->iov_len, PAGE_SIZE);
233 poolsize = read_random(buf, c);
234 if (poolsize == 0) {
235 if (buf)
236 free(buf, M_TEMP);
237 if ((flags & IO_NDELAY) != 0)
238 return (EWOULDBLOCK);
239 return (0);
240 }
241 c = min(c, poolsize);
242 error = uiomove(buf, (int)c, uio);
243 continue;
244
245/* minor device 4 (/dev/urandom) is source of muck on read, rathole on write */
246 case 4:
247 if (uio->uio_rw == UIO_WRITE) {
248 c = iov->iov_len;
249 break;
250 }
251 if (CURSIG(curproc) != 0) {
252 /*
253 * Use tsleep() to get the error code right.
254 * It should return immediately.
255 */
7e071e7a 256 error = tsleep(&rand_bolt,
984263bc
MD
257 PZERO | PCATCH, "urand", 1);
258 if (error != 0 && error != EWOULDBLOCK)
259 continue;
260 }
261 if (buf == NULL)
262 buf = (caddr_t)
263 malloc(PAGE_SIZE, M_TEMP, M_WAITOK);
264 c = min(iov->iov_len, PAGE_SIZE);
265 poolsize = read_random_unlimited(buf, c);
266 c = min(c, poolsize);
267 error = uiomove(buf, (int)c, uio);
268 continue;
269
270/* minor device 12 (/dev/zero) is source of nulls on read, rathole on write */
271 case 12:
272 if (uio->uio_rw == UIO_WRITE) {
273 c = iov->iov_len;
274 break;
275 }
276 if (zbuf == NULL) {
277 zbuf = (caddr_t)
278 malloc(PAGE_SIZE, M_TEMP, M_WAITOK);
279 bzero(zbuf, PAGE_SIZE);
280 }
281 c = min(iov->iov_len, PAGE_SIZE);
282 error = uiomove(zbuf, (int)c, uio);
283 continue;
284
285 default:
286 return (ENODEV);
287 }
288 if (error)
289 break;
290 iov->iov_base += c;
291 iov->iov_len -= c;
292 uio->uio_offset += c;
293 uio->uio_resid -= c;
294 }
295 if (buf)
296 free(buf, M_TEMP);
297 return (error);
298}
299
300
301
302
303/*******************************************************\
304* allow user processes to MMAP some memory sections *
305* instead of going through read/write *
306\*******************************************************/
307static int
308memmmap(dev_t dev, vm_offset_t offset, int nprot)
309{
310 switch (minor(dev))
311 {
312
313/* minor device 0 is physical memory */
314 case 0:
315 return i386_btop(offset);
316
317/* minor device 1 is kernel memory */
318 case 1:
319 return i386_btop(vtophys(offset));
320
321 default:
322 return -1;
323 }
324}
325
326static int
41c20dac 327mmioctl(dev_t dev, u_long cmd, caddr_t data, int flags, struct thread *td)
984263bc
MD
328{
329
330 switch (minor(dev)) {
331 case 0:
41c20dac 332 return mem_ioctl(dev, cmd, data, flags, td);
984263bc
MD
333 case 3:
334 case 4:
41c20dac 335 return random_ioctl(dev, cmd, data, flags, td);
984263bc
MD
336 }
337 return (ENODEV);
338}
339
340/*
341 * Operations for changing memory attributes.
342 *
343 * This is basically just an ioctl shim for mem_range_attr_get
344 * and mem_range_attr_set.
345 */
346static int
41c20dac 347mem_ioctl(dev_t dev, u_long cmd, caddr_t data, int flags, struct thread *td)
984263bc
MD
348{
349 int nd, error = 0;
350 struct mem_range_op *mo = (struct mem_range_op *)data;
351 struct mem_range_desc *md;
352
353 /* is this for us? */
354 if ((cmd != MEMRANGE_GET) &&
355 (cmd != MEMRANGE_SET))
356 return (ENOTTY);
357
358 /* any chance we can handle this? */
359 if (mem_range_softc.mr_op == NULL)
360 return (EOPNOTSUPP);
361
362 /* do we have any descriptors? */
363 if (mem_range_softc.mr_ndesc == 0)
364 return (ENXIO);
365
366 switch (cmd) {
367 case MEMRANGE_GET:
368 nd = imin(mo->mo_arg[0], mem_range_softc.mr_ndesc);
369 if (nd > 0) {
370 md = (struct mem_range_desc *)
371 malloc(nd * sizeof(struct mem_range_desc),
372 M_MEMDESC, M_WAITOK);
373 error = mem_range_attr_get(md, &nd);
374 if (!error)
375 error = copyout(md, mo->mo_desc,
376 nd * sizeof(struct mem_range_desc));
377 free(md, M_MEMDESC);
378 } else {
379 nd = mem_range_softc.mr_ndesc;
380 }
381 mo->mo_arg[0] = nd;
382 break;
383
384 case MEMRANGE_SET:
385 md = (struct mem_range_desc *)malloc(sizeof(struct mem_range_desc),
386 M_MEMDESC, M_WAITOK);
387 error = copyin(mo->mo_desc, md, sizeof(struct mem_range_desc));
388 /* clamp description string */
389 md->mr_owner[sizeof(md->mr_owner) - 1] = 0;
390 if (error == 0)
391 error = mem_range_attr_set(md, &mo->mo_arg[0]);
392 free(md, M_MEMDESC);
393 break;
394 }
395 return (error);
396}
397
398/*
399 * Implementation-neutral, kernel-callable functions for manipulating
400 * memory range attributes.
401 */
402int
403mem_range_attr_get(mrd, arg)
404 struct mem_range_desc *mrd;
405 int *arg;
406{
407 /* can we handle this? */
408 if (mem_range_softc.mr_op == NULL)
409 return (EOPNOTSUPP);
410
411 if (*arg == 0) {
412 *arg = mem_range_softc.mr_ndesc;
413 } else {
414 bcopy(mem_range_softc.mr_desc, mrd, (*arg) * sizeof(struct mem_range_desc));
415 }
416 return (0);
417}
418
419int
420mem_range_attr_set(mrd, arg)
421 struct mem_range_desc *mrd;
422 int *arg;
423{
424 /* can we handle this? */
425 if (mem_range_softc.mr_op == NULL)
426 return (EOPNOTSUPP);
427
428 return (mem_range_softc.mr_op->set(&mem_range_softc, mrd, arg));
429}
430
431#ifdef SMP
432void
433mem_range_AP_init(void)
434{
435 if (mem_range_softc.mr_op && mem_range_softc.mr_op->initAP)
436 return (mem_range_softc.mr_op->initAP(&mem_range_softc));
437}
438#endif
439
440static int
41c20dac 441random_ioctl(dev_t dev, u_long cmd, caddr_t data, int flags, struct thread *td)
984263bc
MD
442{
443 static intrmask_t interrupt_allowed;
444 intrmask_t interrupt_mask;
445 int error, intr;
984263bc
MD
446
447 /*
448 * We're the random or urandom device. The only ioctls are for
449 * selecting and inspecting which interrupts are used in the muck
450 * gathering business and the fcntl() stuff.
451 */
452 if (cmd != MEM_SETIRQ && cmd != MEM_CLEARIRQ && cmd != MEM_RETURNIRQ
453 && cmd != FIONBIO && cmd != FIOASYNC)
454 return (ENOTTY);
455
456 /*
457 * XXX the data is 16-bit due to a historical botch, so we use
458 * magic 16's instead of ICU_LEN and can't support 24 interrupts
459 * under SMP.
460 * Even inspecting the state is privileged, since it gives a hint
461 * about how easily the randomness might be guessed.
462 */
463 intr = *(int16_t *)data;
464 interrupt_mask = 1 << intr;
984263bc
MD
465 switch (cmd) {
466 /* Really handled in upper layer */
467 case FIOASYNC:
468 case FIONBIO:
469 break;
470 case MEM_SETIRQ:
dadab5e9 471 error = suser(td);
984263bc
MD
472 if (error != 0)
473 return (error);
474 if (intr < 0 || intr >= 16)
475 return (EINVAL);
476 if (interrupt_allowed & interrupt_mask)
477 break;
478 interrupt_allowed |= interrupt_mask;
7e071e7a 479 register_randintr(intr);
984263bc
MD
480 break;
481 case MEM_CLEARIRQ:
dadab5e9 482 error = suser(td);
984263bc
MD
483 if (error != 0)
484 return (error);
485 if (intr < 0 || intr >= 16)
486 return (EINVAL);
487 if (!(interrupt_allowed & interrupt_mask))
488 break;
489 interrupt_allowed &= ~interrupt_mask;
7e071e7a 490 unregister_randintr(intr);
984263bc
MD
491 break;
492 case MEM_RETURNIRQ:
dadab5e9 493 error = suser(td);
984263bc
MD
494 if (error != 0)
495 return (error);
496 *(u_int16_t *)data = interrupt_allowed;
497 break;
498 }
499 return (0);
500}
501
502int
41c20dac 503mmpoll(dev_t dev, int events, struct thread *td)
984263bc
MD
504{
505 switch (minor(dev)) {
506 case 3: /* /dev/random */
41c20dac 507 return random_poll(dev, events, td);
984263bc
MD
508 case 4: /* /dev/urandom */
509 default:
41c20dac 510 return seltrue(dev, events, td);
984263bc
MD
511 }
512}
513
514int
515iszerodev(dev)
516 dev_t dev;
517{
518 return ((major(dev) == mem_cdevsw.d_maj)
519 && minor(dev) == 12);
520}
521
522static void
523mem_drvinit(void *unused)
524{
525
526 /* Initialise memory range handling */
527 if (mem_range_softc.mr_op != NULL)
528 mem_range_softc.mr_op->init(&mem_range_softc);
529
530 make_dev(&mem_cdevsw, 0, UID_ROOT, GID_KMEM, 0640, "mem");
531 make_dev(&mem_cdevsw, 1, UID_ROOT, GID_KMEM, 0640, "kmem");
532 make_dev(&mem_cdevsw, 2, UID_ROOT, GID_WHEEL, 0666, "null");
533 make_dev(&mem_cdevsw, 3, UID_ROOT, GID_WHEEL, 0644, "random");
534 make_dev(&mem_cdevsw, 4, UID_ROOT, GID_WHEEL, 0644, "urandom");
535 make_dev(&mem_cdevsw, 12, UID_ROOT, GID_WHEEL, 0666, "zero");
536 make_dev(&mem_cdevsw, 14, UID_ROOT, GID_WHEEL, 0600, "io");
537}
538
539SYSINIT(memdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,mem_drvinit,NULL)
540