| 1 | /*- |
| 2 | * Copyright (c) 1988 University of Utah. |
| 3 | * Copyright (c) 1982, 1986, 1990 The Regents of the University of California. |
| 4 | * All rights reserved. |
| 5 | * |
| 6 | * This code is derived from software contributed to Berkeley by |
| 7 | * the Systems Programming Group of the University of Utah Computer |
| 8 | * Science Department, and code derived from software contributed to |
| 9 | * Berkeley by William Jolitz. |
| 10 | * |
| 11 | * Redistribution and use in source and binary forms, with or without |
| 12 | * modification, are permitted provided that the following conditions |
| 13 | * are met: |
| 14 | * 1. Redistributions of source code must retain the above copyright |
| 15 | * notice, this list of conditions and the following disclaimer. |
| 16 | * 2. Redistributions in binary form must reproduce the above copyright |
| 17 | * notice, this list of conditions and the following disclaimer in the |
| 18 | * documentation and/or other materials provided with the distribution. |
| 19 | * 3. All advertising materials mentioning features or use of this software |
| 20 | * must display the following acknowledgement: |
| 21 | * This product includes software developed by the University of |
| 22 | * California, Berkeley and its contributors. |
| 23 | * 4. Neither the name of the University nor the names of its contributors |
| 24 | * may be used to endorse or promote products derived from this software |
| 25 | * without specific prior written permission. |
| 26 | * |
| 27 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
| 28 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 29 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 30 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
| 31 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| 32 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
| 33 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| 34 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| 35 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
| 36 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| 37 | * SUCH DAMAGE. |
| 38 | * |
| 39 | * from: Utah $Hdr: mem.c 1.13 89/10/08$ |
| 40 | * from: @(#)mem.c 7.2 (Berkeley) 5/9/91 |
| 41 | * $FreeBSD: src/sys/i386/i386/mem.c,v 1.79.2.9 2003/01/04 22:58:01 njl Exp $ |
| 42 | * $DragonFly: src/sys/i386/i386/Attic/mem.c,v 1.8 2003/07/26 19:07:47 rob Exp $ |
| 43 | */ |
| 44 | |
| 45 | /* |
| 46 | * Memory special file |
| 47 | */ |
| 48 | |
| 49 | #include <sys/param.h> |
| 50 | #include <sys/systm.h> |
| 51 | #include <sys/buf.h> |
| 52 | #include <sys/conf.h> |
| 53 | #include <sys/fcntl.h> |
| 54 | #include <sys/filio.h> |
| 55 | #include <sys/ioccom.h> |
| 56 | #include <sys/kernel.h> |
| 57 | #include <sys/malloc.h> |
| 58 | #include <sys/memrange.h> |
| 59 | #include <sys/proc.h> |
| 60 | #include <sys/random.h> |
| 61 | #include <sys/signalvar.h> |
| 62 | #include <sys/uio.h> |
| 63 | #include <sys/vnode.h> |
| 64 | |
| 65 | #include <machine/frame.h> |
| 66 | #include <machine/psl.h> |
| 67 | #include <machine/specialreg.h> |
| 68 | #include <i386/isa/intr_machdep.h> |
| 69 | |
| 70 | #include <vm/vm.h> |
| 71 | #include <vm/pmap.h> |
| 72 | #include <vm/vm_extern.h> |
| 73 | |
| 74 | |
| 75 | static d_open_t mmopen; |
| 76 | static d_close_t mmclose; |
| 77 | static d_read_t mmrw; |
| 78 | static d_ioctl_t mmioctl; |
| 79 | static d_mmap_t memmmap; |
| 80 | static d_poll_t mmpoll; |
| 81 | |
| 82 | #define CDEV_MAJOR 2 |
| 83 | static struct cdevsw mem_cdevsw = { |
| 84 | /* name */ "mem", |
| 85 | /* maj */ CDEV_MAJOR, |
| 86 | /* flags */ D_MEM, |
| 87 | /* port */ NULL, |
| 88 | /* autoq */ 0, |
| 89 | |
| 90 | /* open */ mmopen, |
| 91 | /* close */ mmclose, |
| 92 | /* read */ mmrw, |
| 93 | /* write */ mmrw, |
| 94 | /* ioctl */ mmioctl, |
| 95 | /* poll */ mmpoll, |
| 96 | /* mmap */ memmmap, |
| 97 | /* strategy */ nostrategy, |
| 98 | /* dump */ nodump, |
| 99 | /* psize */ nopsize |
| 100 | }; |
| 101 | |
| 102 | static int rand_bolt; |
| 103 | static caddr_t zbuf; |
| 104 | |
| 105 | MALLOC_DEFINE(M_MEMDESC, "memdesc", "memory range descriptors"); |
| 106 | static int mem_ioctl __P((dev_t, u_long, caddr_t, int, struct thread *)); |
| 107 | static int random_ioctl __P((dev_t, u_long, caddr_t, int, struct thread *)); |
| 108 | |
| 109 | struct mem_range_softc mem_range_softc; |
| 110 | |
| 111 | |
| 112 | static int |
| 113 | mmclose(dev_t dev, int flags, int fmt, struct thread *td) |
| 114 | { |
| 115 | struct proc *p = td->td_proc; |
| 116 | |
| 117 | switch (minor(dev)) { |
| 118 | case 14: |
| 119 | p->p_md.md_regs->tf_eflags &= ~PSL_IOPL; |
| 120 | break; |
| 121 | default: |
| 122 | break; |
| 123 | } |
| 124 | return (0); |
| 125 | } |
| 126 | |
| 127 | static int |
| 128 | mmopen(dev_t dev, int flags, int fmt, struct thread *td) |
| 129 | { |
| 130 | int error; |
| 131 | struct proc *p = td->td_proc; |
| 132 | |
| 133 | switch (minor(dev)) { |
| 134 | case 0: |
| 135 | case 1: |
| 136 | if ((flags & FWRITE) && securelevel > 0) |
| 137 | return (EPERM); |
| 138 | break; |
| 139 | case 14: |
| 140 | error = suser(td); |
| 141 | if (error != 0) |
| 142 | return (error); |
| 143 | if (securelevel > 0) |
| 144 | return (EPERM); |
| 145 | p->p_md.md_regs->tf_eflags |= PSL_IOPL; |
| 146 | break; |
| 147 | default: |
| 148 | break; |
| 149 | } |
| 150 | return (0); |
| 151 | } |
| 152 | |
| 153 | static int |
| 154 | mmrw(dev, uio, flags) |
| 155 | dev_t dev; |
| 156 | struct uio *uio; |
| 157 | int flags; |
| 158 | { |
| 159 | int o; |
| 160 | u_int c, v; |
| 161 | u_int poolsize; |
| 162 | struct iovec *iov; |
| 163 | int error = 0; |
| 164 | caddr_t buf = NULL; |
| 165 | |
| 166 | while (uio->uio_resid > 0 && error == 0) { |
| 167 | iov = uio->uio_iov; |
| 168 | if (iov->iov_len == 0) { |
| 169 | uio->uio_iov++; |
| 170 | uio->uio_iovcnt--; |
| 171 | if (uio->uio_iovcnt < 0) |
| 172 | panic("mmrw"); |
| 173 | continue; |
| 174 | } |
| 175 | switch (minor(dev)) { |
| 176 | |
| 177 | /* minor device 0 is physical memory */ |
| 178 | case 0: |
| 179 | v = uio->uio_offset; |
| 180 | v &= ~PAGE_MASK; |
| 181 | pmap_kenter((vm_offset_t)ptvmmap, v); |
| 182 | o = (int)uio->uio_offset & PAGE_MASK; |
| 183 | c = (u_int)(PAGE_SIZE - ((int)iov->iov_base & PAGE_MASK)); |
| 184 | c = min(c, (u_int)(PAGE_SIZE - o)); |
| 185 | c = min(c, (u_int)iov->iov_len); |
| 186 | error = uiomove((caddr_t)&ptvmmap[o], (int)c, uio); |
| 187 | pmap_kremove((vm_offset_t)ptvmmap); |
| 188 | continue; |
| 189 | |
| 190 | /* minor device 1 is kernel memory */ |
| 191 | case 1: { |
| 192 | vm_offset_t addr, eaddr; |
| 193 | c = iov->iov_len; |
| 194 | |
| 195 | /* |
| 196 | * Make sure that all of the pages are currently resident so |
| 197 | * that we don't create any zero-fill pages. |
| 198 | */ |
| 199 | addr = trunc_page(uio->uio_offset); |
| 200 | eaddr = round_page(uio->uio_offset + c); |
| 201 | |
| 202 | if (addr < (vm_offset_t)VADDR(PTDPTDI, 0)) |
| 203 | return EFAULT; |
| 204 | if (eaddr >= (vm_offset_t)VADDR(APTDPTDI, 0)) |
| 205 | return EFAULT; |
| 206 | for (; addr < eaddr; addr += PAGE_SIZE) |
| 207 | if (pmap_extract(kernel_pmap, addr) == 0) |
| 208 | return EFAULT; |
| 209 | |
| 210 | if (!kernacc((caddr_t)(int)uio->uio_offset, c, |
| 211 | uio->uio_rw == UIO_READ ? |
| 212 | VM_PROT_READ : VM_PROT_WRITE)) |
| 213 | return (EFAULT); |
| 214 | error = uiomove((caddr_t)(int)uio->uio_offset, (int)c, uio); |
| 215 | continue; |
| 216 | } |
| 217 | |
| 218 | /* minor device 2 is EOF/RATHOLE */ |
| 219 | case 2: |
| 220 | if (uio->uio_rw == UIO_READ) |
| 221 | return (0); |
| 222 | c = iov->iov_len; |
| 223 | break; |
| 224 | |
| 225 | /* minor device 3 (/dev/random) is source of filth on read, rathole on write */ |
| 226 | case 3: |
| 227 | if (uio->uio_rw == UIO_WRITE) { |
| 228 | c = iov->iov_len; |
| 229 | break; |
| 230 | } |
| 231 | if (buf == NULL) |
| 232 | buf = (caddr_t) |
| 233 | malloc(PAGE_SIZE, M_TEMP, M_WAITOK); |
| 234 | c = min(iov->iov_len, PAGE_SIZE); |
| 235 | poolsize = read_random(buf, c); |
| 236 | if (poolsize == 0) { |
| 237 | if (buf) |
| 238 | free(buf, M_TEMP); |
| 239 | if ((flags & IO_NDELAY) != 0) |
| 240 | return (EWOULDBLOCK); |
| 241 | return (0); |
| 242 | } |
| 243 | c = min(c, poolsize); |
| 244 | error = uiomove(buf, (int)c, uio); |
| 245 | continue; |
| 246 | |
| 247 | /* minor device 4 (/dev/urandom) is source of muck on read, rathole on write */ |
| 248 | case 4: |
| 249 | if (uio->uio_rw == UIO_WRITE) { |
| 250 | c = iov->iov_len; |
| 251 | break; |
| 252 | } |
| 253 | if (CURSIG(curproc) != 0) { |
| 254 | /* |
| 255 | * Use tsleep() to get the error code right. |
| 256 | * It should return immediately. |
| 257 | */ |
| 258 | error = tsleep(&rand_bolt, PCATCH, "urand", 1); |
| 259 | if (error != 0 && error != EWOULDBLOCK) |
| 260 | continue; |
| 261 | } |
| 262 | if (buf == NULL) |
| 263 | buf = (caddr_t) |
| 264 | malloc(PAGE_SIZE, M_TEMP, M_WAITOK); |
| 265 | c = min(iov->iov_len, PAGE_SIZE); |
| 266 | poolsize = read_random_unlimited(buf, c); |
| 267 | c = min(c, poolsize); |
| 268 | error = uiomove(buf, (int)c, uio); |
| 269 | continue; |
| 270 | |
| 271 | /* minor device 12 (/dev/zero) is source of nulls on read, rathole on write */ |
| 272 | case 12: |
| 273 | if (uio->uio_rw == UIO_WRITE) { |
| 274 | c = iov->iov_len; |
| 275 | break; |
| 276 | } |
| 277 | if (zbuf == NULL) { |
| 278 | zbuf = (caddr_t) |
| 279 | malloc(PAGE_SIZE, M_TEMP, M_WAITOK); |
| 280 | bzero(zbuf, PAGE_SIZE); |
| 281 | } |
| 282 | c = min(iov->iov_len, PAGE_SIZE); |
| 283 | error = uiomove(zbuf, (int)c, uio); |
| 284 | continue; |
| 285 | |
| 286 | default: |
| 287 | return (ENODEV); |
| 288 | } |
| 289 | if (error) |
| 290 | break; |
| 291 | iov->iov_base += c; |
| 292 | iov->iov_len -= c; |
| 293 | uio->uio_offset += c; |
| 294 | uio->uio_resid -= c; |
| 295 | } |
| 296 | if (buf) |
| 297 | free(buf, M_TEMP); |
| 298 | return (error); |
| 299 | } |
| 300 | |
| 301 | |
| 302 | |
| 303 | |
| 304 | /*******************************************************\ |
| 305 | * allow user processes to MMAP some memory sections * |
| 306 | * instead of going through read/write * |
| 307 | \*******************************************************/ |
| 308 | static int |
| 309 | memmmap(dev_t dev, vm_offset_t offset, int nprot) |
| 310 | { |
| 311 | switch (minor(dev)) |
| 312 | { |
| 313 | |
| 314 | /* minor device 0 is physical memory */ |
| 315 | case 0: |
| 316 | return i386_btop(offset); |
| 317 | |
| 318 | /* minor device 1 is kernel memory */ |
| 319 | case 1: |
| 320 | return i386_btop(vtophys(offset)); |
| 321 | |
| 322 | default: |
| 323 | return -1; |
| 324 | } |
| 325 | } |
| 326 | |
| 327 | static int |
| 328 | mmioctl(dev_t dev, u_long cmd, caddr_t data, int flags, struct thread *td) |
| 329 | { |
| 330 | |
| 331 | switch (minor(dev)) { |
| 332 | case 0: |
| 333 | return mem_ioctl(dev, cmd, data, flags, td); |
| 334 | case 3: |
| 335 | case 4: |
| 336 | return random_ioctl(dev, cmd, data, flags, td); |
| 337 | } |
| 338 | return (ENODEV); |
| 339 | } |
| 340 | |
| 341 | /* |
| 342 | * Operations for changing memory attributes. |
| 343 | * |
| 344 | * This is basically just an ioctl shim for mem_range_attr_get |
| 345 | * and mem_range_attr_set. |
| 346 | */ |
| 347 | static int |
| 348 | mem_ioctl(dev_t dev, u_long cmd, caddr_t data, int flags, struct thread *td) |
| 349 | { |
| 350 | int nd, error = 0; |
| 351 | struct mem_range_op *mo = (struct mem_range_op *)data; |
| 352 | struct mem_range_desc *md; |
| 353 | |
| 354 | /* is this for us? */ |
| 355 | if ((cmd != MEMRANGE_GET) && |
| 356 | (cmd != MEMRANGE_SET)) |
| 357 | return (ENOTTY); |
| 358 | |
| 359 | /* any chance we can handle this? */ |
| 360 | if (mem_range_softc.mr_op == NULL) |
| 361 | return (EOPNOTSUPP); |
| 362 | |
| 363 | /* do we have any descriptors? */ |
| 364 | if (mem_range_softc.mr_ndesc == 0) |
| 365 | return (ENXIO); |
| 366 | |
| 367 | switch (cmd) { |
| 368 | case MEMRANGE_GET: |
| 369 | nd = imin(mo->mo_arg[0], mem_range_softc.mr_ndesc); |
| 370 | if (nd > 0) { |
| 371 | md = (struct mem_range_desc *) |
| 372 | malloc(nd * sizeof(struct mem_range_desc), |
| 373 | M_MEMDESC, M_WAITOK); |
| 374 | error = mem_range_attr_get(md, &nd); |
| 375 | if (!error) |
| 376 | error = copyout(md, mo->mo_desc, |
| 377 | nd * sizeof(struct mem_range_desc)); |
| 378 | free(md, M_MEMDESC); |
| 379 | } else { |
| 380 | nd = mem_range_softc.mr_ndesc; |
| 381 | } |
| 382 | mo->mo_arg[0] = nd; |
| 383 | break; |
| 384 | |
| 385 | case MEMRANGE_SET: |
| 386 | md = (struct mem_range_desc *)malloc(sizeof(struct mem_range_desc), |
| 387 | M_MEMDESC, M_WAITOK); |
| 388 | error = copyin(mo->mo_desc, md, sizeof(struct mem_range_desc)); |
| 389 | /* clamp description string */ |
| 390 | md->mr_owner[sizeof(md->mr_owner) - 1] = 0; |
| 391 | if (error == 0) |
| 392 | error = mem_range_attr_set(md, &mo->mo_arg[0]); |
| 393 | free(md, M_MEMDESC); |
| 394 | break; |
| 395 | } |
| 396 | return (error); |
| 397 | } |
| 398 | |
| 399 | /* |
| 400 | * Implementation-neutral, kernel-callable functions for manipulating |
| 401 | * memory range attributes. |
| 402 | */ |
| 403 | int |
| 404 | mem_range_attr_get(mrd, arg) |
| 405 | struct mem_range_desc *mrd; |
| 406 | int *arg; |
| 407 | { |
| 408 | /* can we handle this? */ |
| 409 | if (mem_range_softc.mr_op == NULL) |
| 410 | return (EOPNOTSUPP); |
| 411 | |
| 412 | if (*arg == 0) { |
| 413 | *arg = mem_range_softc.mr_ndesc; |
| 414 | } else { |
| 415 | bcopy(mem_range_softc.mr_desc, mrd, (*arg) * sizeof(struct mem_range_desc)); |
| 416 | } |
| 417 | return (0); |
| 418 | } |
| 419 | |
| 420 | int |
| 421 | mem_range_attr_set(mrd, arg) |
| 422 | struct mem_range_desc *mrd; |
| 423 | int *arg; |
| 424 | { |
| 425 | /* can we handle this? */ |
| 426 | if (mem_range_softc.mr_op == NULL) |
| 427 | return (EOPNOTSUPP); |
| 428 | |
| 429 | return (mem_range_softc.mr_op->set(&mem_range_softc, mrd, arg)); |
| 430 | } |
| 431 | |
| 432 | #ifdef SMP |
| 433 | void |
| 434 | mem_range_AP_init(void) |
| 435 | { |
| 436 | if (mem_range_softc.mr_op && mem_range_softc.mr_op->initAP) |
| 437 | return (mem_range_softc.mr_op->initAP(&mem_range_softc)); |
| 438 | } |
| 439 | #endif |
| 440 | |
| 441 | static int |
| 442 | random_ioctl(dev_t dev, u_long cmd, caddr_t data, int flags, struct thread *td) |
| 443 | { |
| 444 | static intrmask_t interrupt_allowed; |
| 445 | intrmask_t interrupt_mask; |
| 446 | int error, intr; |
| 447 | |
| 448 | /* |
| 449 | * We're the random or urandom device. The only ioctls are for |
| 450 | * selecting and inspecting which interrupts are used in the muck |
| 451 | * gathering business and the fcntl() stuff. |
| 452 | */ |
| 453 | if (cmd != MEM_SETIRQ && cmd != MEM_CLEARIRQ && cmd != MEM_RETURNIRQ |
| 454 | && cmd != FIONBIO && cmd != FIOASYNC) |
| 455 | return (ENOTTY); |
| 456 | |
| 457 | /* |
| 458 | * XXX the data is 16-bit due to a historical botch, so we use |
| 459 | * magic 16's instead of ICU_LEN and can't support 24 interrupts |
| 460 | * under SMP. |
| 461 | * Even inspecting the state is privileged, since it gives a hint |
| 462 | * about how easily the randomness might be guessed. |
| 463 | */ |
| 464 | intr = *(int16_t *)data; |
| 465 | interrupt_mask = 1 << intr; |
| 466 | switch (cmd) { |
| 467 | /* Really handled in upper layer */ |
| 468 | case FIOASYNC: |
| 469 | case FIONBIO: |
| 470 | break; |
| 471 | case MEM_SETIRQ: |
| 472 | error = suser(td); |
| 473 | if (error != 0) |
| 474 | return (error); |
| 475 | if (intr < 0 || intr >= 16) |
| 476 | return (EINVAL); |
| 477 | if (interrupt_allowed & interrupt_mask) |
| 478 | break; |
| 479 | interrupt_allowed |= interrupt_mask; |
| 480 | register_randintr(intr); |
| 481 | break; |
| 482 | case MEM_CLEARIRQ: |
| 483 | error = suser(td); |
| 484 | if (error != 0) |
| 485 | return (error); |
| 486 | if (intr < 0 || intr >= 16) |
| 487 | return (EINVAL); |
| 488 | if (!(interrupt_allowed & interrupt_mask)) |
| 489 | break; |
| 490 | interrupt_allowed &= ~interrupt_mask; |
| 491 | unregister_randintr(intr); |
| 492 | break; |
| 493 | case MEM_RETURNIRQ: |
| 494 | error = suser(td); |
| 495 | if (error != 0) |
| 496 | return (error); |
| 497 | *(u_int16_t *)data = interrupt_allowed; |
| 498 | break; |
| 499 | } |
| 500 | return (0); |
| 501 | } |
| 502 | |
| 503 | int |
| 504 | mmpoll(dev_t dev, int events, struct thread *td) |
| 505 | { |
| 506 | switch (minor(dev)) { |
| 507 | case 3: /* /dev/random */ |
| 508 | return random_poll(dev, events, td); |
| 509 | case 4: /* /dev/urandom */ |
| 510 | default: |
| 511 | return seltrue(dev, events, td); |
| 512 | } |
| 513 | } |
| 514 | |
| 515 | int |
| 516 | iszerodev(dev) |
| 517 | dev_t dev; |
| 518 | { |
| 519 | return ((major(dev) == mem_cdevsw.d_maj) |
| 520 | && minor(dev) == 12); |
| 521 | } |
| 522 | |
| 523 | static void |
| 524 | mem_drvinit(void *unused) |
| 525 | { |
| 526 | |
| 527 | /* Initialise memory range handling */ |
| 528 | if (mem_range_softc.mr_op != NULL) |
| 529 | mem_range_softc.mr_op->init(&mem_range_softc); |
| 530 | |
| 531 | make_dev(&mem_cdevsw, 0, UID_ROOT, GID_KMEM, 0640, "mem"); |
| 532 | make_dev(&mem_cdevsw, 1, UID_ROOT, GID_KMEM, 0640, "kmem"); |
| 533 | make_dev(&mem_cdevsw, 2, UID_ROOT, GID_WHEEL, 0666, "null"); |
| 534 | make_dev(&mem_cdevsw, 3, UID_ROOT, GID_WHEEL, 0644, "random"); |
| 535 | make_dev(&mem_cdevsw, 4, UID_ROOT, GID_WHEEL, 0644, "urandom"); |
| 536 | make_dev(&mem_cdevsw, 12, UID_ROOT, GID_WHEEL, 0666, "zero"); |
| 537 | make_dev(&mem_cdevsw, 14, UID_ROOT, GID_WHEEL, 0600, "io"); |
| 538 | } |
| 539 | |
| 540 | SYSINIT(memdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,mem_drvinit,NULL) |
| 541 | |