| Commit | Line | Data |
|---|---|---|
| 984263bc MD |
1 | /* |
| 2 | * Copyright (c) 1988 University of Utah. | |
| 3 | * Copyright (c) 1991, 1993 | |
| 4 | * The Regents of the University of California. All rights reserved. | |
| 5 | * | |
| 6 | * This code is derived from software contributed to Berkeley by | |
| 7 | * the Systems Programming Group of the University of Utah Computer | |
| 8 | * Science Department. | |
| 9 | * | |
| 10 | * Redistribution and use in source and binary forms, with or without | |
| 11 | * modification, are permitted provided that the following conditions | |
| 12 | * are met: | |
| 13 | * 1. Redistributions of source code must retain the above copyright | |
| 14 | * notice, this list of conditions and the following disclaimer. | |
| 15 | * 2. Redistributions in binary form must reproduce the above copyright | |
| 16 | * notice, this list of conditions and the following disclaimer in the | |
| 17 | * documentation and/or other materials provided with the distribution. | |
| 18 | * 3. All advertising materials mentioning features or use of this software | |
| 19 | * must display the following acknowledgement: | |
| 20 | * This product includes software developed by the University of | |
| 21 | * California, Berkeley and its contributors. | |
| 22 | * 4. Neither the name of the University nor the names of its contributors | |
| 23 | * may be used to endorse or promote products derived from this software | |
| 24 | * without specific prior written permission. | |
| 25 | * | |
| 26 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
| 27 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
| 28 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
| 29 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
| 30 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
| 31 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
| 32 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
| 33 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
| 34 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
| 35 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
| 36 | * SUCH DAMAGE. | |
| 37 | * | |
| 38 | * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$ | |
| 39 | * | |
| 40 | * @(#)vm_mmap.c 8.4 (Berkeley) 1/12/94 | |
| 41 | * $FreeBSD: src/sys/vm/vm_mmap.c,v 1.108.2.6 2002/07/02 20:06:19 dillon Exp $ | |
| ba39e2e0 | 42 | * $DragonFly: src/sys/vm/vm_mmap.c,v 1.39 2007/04/30 07:18:57 dillon Exp $ |
| 984263bc MD |
43 | */ |
| 44 | ||
| 45 | /* | |
| 46 | * Mapped file (mmap) interface to VM | |
| 47 | */ | |
| 48 | ||
| 984263bc MD |
49 | #include <sys/param.h> |
| 50 | #include <sys/kernel.h> | |
| 51 | #include <sys/systm.h> | |
| 52 | #include <sys/sysproto.h> | |
| 53 | #include <sys/filedesc.h> | |
| a0ff68c9 | 54 | #include <sys/kern_syscall.h> |
| 984263bc | 55 | #include <sys/proc.h> |
| 895c1f85 | 56 | #include <sys/priv.h> |
| 984263bc MD |
57 | #include <sys/resource.h> |
| 58 | #include <sys/resourcevar.h> | |
| 59 | #include <sys/vnode.h> | |
| 60 | #include <sys/fcntl.h> | |
| 61 | #include <sys/file.h> | |
| 62 | #include <sys/mman.h> | |
| 63 | #include <sys/conf.h> | |
| 64 | #include <sys/stat.h> | |
| 65 | #include <sys/vmmeter.h> | |
| 66 | #include <sys/sysctl.h> | |
| 67 | ||
| 68 | #include <vm/vm.h> | |
| 69 | #include <vm/vm_param.h> | |
| 70 | #include <sys/lock.h> | |
| 71 | #include <vm/pmap.h> | |
| 72 | #include <vm/vm_map.h> | |
| 73 | #include <vm/vm_object.h> | |
| 74 | #include <vm/vm_page.h> | |
| 75 | #include <vm/vm_pager.h> | |
| 76 | #include <vm/vm_pageout.h> | |
| 77 | #include <vm/vm_extern.h> | |
| 78 | #include <vm/vm_page.h> | |
| 79 | #include <vm/vm_kern.h> | |
| 80 | ||
| dadab5e9 | 81 | #include <sys/file2.h> |
| 654a39f0 | 82 | #include <sys/thread2.h> |
| dadab5e9 | 83 | |
| 984263bc MD |
84 | static int max_proc_mmap; |
| 85 | SYSCTL_INT(_vm, OID_AUTO, max_proc_mmap, CTLFLAG_RW, &max_proc_mmap, 0, ""); | |
| 568e6804 MD |
86 | int vkernel_enable; |
| 87 | SYSCTL_INT(_vm, OID_AUTO, vkernel_enable, CTLFLAG_RW, &vkernel_enable, 0, ""); | |
| 984263bc MD |
88 | |
| 89 | /* | |
| 90 | * Set the maximum number of vm_map_entry structures per process. Roughly | |
| 91 | * speaking vm_map_entry structures are tiny, so allowing them to eat 1/100 | |
| 92 | * of our KVM malloc space still results in generous limits. We want a | |
| 93 | * default that is good enough to prevent the kernel running out of resources | |
| 94 | * if attacked from compromised user account but generous enough such that | |
| 95 | * multi-threaded processes are not unduly inconvenienced. | |
| 96 | */ | |
| 97 | ||
| 1388df65 | 98 | static void vmmapentry_rsrc_init (void *); |
| ba39e2e0 | 99 | SYSINIT(vmmersrc, SI_BOOT1_POST, SI_ORDER_ANY, vmmapentry_rsrc_init, NULL) |
| 984263bc MD |
100 | |
| 101 | static void | |
| 57e43348 | 102 | vmmapentry_rsrc_init(void *dummy) |
| 984263bc | 103 | { |
| c439ad8f | 104 | max_proc_mmap = KvaSize / sizeof(struct vm_map_entry); |
| 984263bc MD |
105 | max_proc_mmap /= 100; |
| 106 | } | |
| 107 | ||
| 108 | /* ARGSUSED */ | |
| 109 | int | |
| 753fd850 | 110 | sys_sbrk(struct sbrk_args *uap) |
| 984263bc | 111 | { |
| 984263bc MD |
112 | /* Not yet implemented */ |
| 113 | return (EOPNOTSUPP); | |
| 114 | } | |
| 115 | ||
| 41c20dac MD |
116 | /* |
| 117 | * sstk_args(int incr) | |
| 118 | */ | |
| 984263bc MD |
119 | /* ARGSUSED */ |
| 120 | int | |
| 753fd850 | 121 | sys_sstk(struct sstk_args *uap) |
| 984263bc | 122 | { |
| 984263bc MD |
123 | /* Not yet implemented */ |
| 124 | return (EOPNOTSUPP); | |
| 125 | } | |
| 126 | ||
| 984263bc | 127 | /* |
| 41c20dac MD |
128 | * mmap_args(void *addr, size_t len, int prot, int flags, int fd, |
| 129 | * long pad, off_t pos) | |
| 130 | * | |
| 984263bc MD |
131 | * Memory Map (mmap) system call. Note that the file offset |
| 132 | * and address are allowed to be NOT page aligned, though if | |
| 133 | * the MAP_FIXED flag it set, both must have the same remainder | |
| 134 | * modulo the PAGE_SIZE (POSIX 1003.1b). If the address is not | |
| 135 | * page-aligned, the actual mapping starts at trunc_page(addr) | |
| 136 | * and the return value is adjusted up by the page offset. | |
| 137 | * | |
| 138 | * Generally speaking, only character devices which are themselves | |
| 139 | * memory-based, such as a video framebuffer, can be mmap'd. Otherwise | |
| 140 | * there would be no cache coherency between a descriptor and a VM mapping | |
| 141 | * both to the same character device. | |
| 142 | * | |
| 143 | * Block devices can be mmap'd no matter what they represent. Cache coherency | |
| 144 | * is maintained as long as you do not write directly to the underlying | |
| 145 | * character device. | |
| 146 | */ | |
| 984263bc MD |
147 | |
| 148 | int | |
| d3313941 MD |
149 | kern_mmap(struct vmspace *vms, caddr_t uaddr, size_t ulen, |
| 150 | int uprot, int uflags, int fd, off_t upos, void **res) | |
| 984263bc | 151 | { |
| dadab5e9 MD |
152 | struct thread *td = curthread; |
| 153 | struct proc *p = td->td_proc; | |
| 41c20dac | 154 | struct file *fp = NULL; |
| 984263bc MD |
155 | struct vnode *vp; |
| 156 | vm_offset_t addr; | |
| e54488bb | 157 | vm_offset_t tmpaddr; |
| 984263bc MD |
158 | vm_size_t size, pageoff; |
| 159 | vm_prot_t prot, maxprot; | |
| 160 | void *handle; | |
| 161 | int flags, error; | |
| 162 | int disablexworkaround; | |
| 163 | off_t pos; | |
| 984263bc MD |
164 | vm_object_t obj; |
| 165 | ||
| dadab5e9 MD |
166 | KKASSERT(p); |
| 167 | ||
| a0ff68c9 DRJ |
168 | addr = (vm_offset_t) uaddr; |
| 169 | size = ulen; | |
| 170 | prot = uprot & VM_PROT_ALL; | |
| 171 | flags = uflags; | |
| 172 | pos = upos; | |
| 984263bc | 173 | |
| e54488bb MD |
174 | /* |
| 175 | * Make sure mapping fits into numeric range etc. | |
| 176 | * | |
| 177 | * NOTE: We support the full unsigned range for size now. | |
| 178 | */ | |
| 179 | if (((flags & MAP_ANON) && fd != -1)) | |
| 984263bc MD |
180 | return (EINVAL); |
| 181 | ||
| 182 | if (flags & MAP_STACK) { | |
| a0ff68c9 | 183 | if ((fd != -1) || |
| 984263bc MD |
184 | ((prot & (PROT_READ | PROT_WRITE)) != (PROT_READ | PROT_WRITE))) |
| 185 | return (EINVAL); | |
| 186 | flags |= MAP_ANON; | |
| 187 | pos = 0; | |
| 188 | } | |
| 189 | ||
| 190 | /* | |
| 568e6804 | 191 | * Virtual page tables cannot be used with MAP_STACK. Apart from |
| afeabdca | 192 | * it not making any sense, the aux union is used by both |
| 568e6804 MD |
193 | * types. |
| 194 | * | |
| 195 | * Because the virtual page table is stored in the backing object | |
| 196 | * and might be updated by the kernel, the mapping must be R+W. | |
| 197 | */ | |
| 198 | if (flags & MAP_VPAGETABLE) { | |
| 199 | if (vkernel_enable == 0) | |
| 200 | return (EOPNOTSUPP); | |
| 201 | if (flags & MAP_STACK) | |
| 202 | return (EINVAL); | |
| 203 | if ((prot & (PROT_READ|PROT_WRITE)) != (PROT_READ|PROT_WRITE)) | |
| 204 | return (EINVAL); | |
| 205 | } | |
| 206 | ||
| 207 | /* | |
| 984263bc MD |
208 | * Align the file position to a page boundary, |
| 209 | * and save its page offset component. | |
| 210 | */ | |
| 211 | pageoff = (pos & PAGE_MASK); | |
| 212 | pos -= pageoff; | |
| 213 | ||
| 214 | /* Adjust size for rounding (on both ends). */ | |
| 215 | size += pageoff; /* low end... */ | |
| 216 | size = (vm_size_t) round_page(size); /* hi end */ | |
| e54488bb MD |
217 | if (size < ulen) /* wrap */ |
| 218 | return(EINVAL); | |
| 984263bc MD |
219 | |
| 220 | /* | |
| 221 | * Check for illegal addresses. Watch out for address wrap... Note | |
| 222 | * that VM_*_ADDRESS are not constants due to casts (argh). | |
| 223 | */ | |
| c809941b | 224 | if (flags & (MAP_FIXED | MAP_TRYFIXED)) { |
| 984263bc MD |
225 | /* |
| 226 | * The specified address must have the same remainder | |
| 227 | * as the file offset taken modulo PAGE_SIZE, so it | |
| 228 | * should be aligned after adjustment by pageoff. | |
| 229 | */ | |
| 230 | addr -= pageoff; | |
| 231 | if (addr & PAGE_MASK) | |
| 232 | return (EINVAL); | |
| e54488bb MD |
233 | |
| 234 | /* | |
| 235 | * Address range must be all in user VM space and not wrap. | |
| 236 | */ | |
| 237 | tmpaddr = addr + size; | |
| 238 | if (tmpaddr < addr) | |
| 239 | return (EINVAL); | |
| 240 | if (VM_MAX_USER_ADDRESS > 0 && tmpaddr > VM_MAX_USER_ADDRESS) | |
| 984263bc | 241 | return (EINVAL); |
| 88181b08 | 242 | if (VM_MIN_USER_ADDRESS > 0 && addr < VM_MIN_USER_ADDRESS) |
| 984263bc | 243 | return (EINVAL); |
| c809941b | 244 | } else { |
| 459f5c1e | 245 | /* |
| c809941b MD |
246 | * Set a reasonable start point for the hint if it was |
| 247 | * not specified or if it falls within the heap space. | |
| 248 | * Hinted mmap()s do not allocate out of the heap space. | |
| 459f5c1e SS |
249 | */ |
| 250 | if (addr == 0 || | |
| 251 | (addr >= round_page((vm_offset_t)vms->vm_taddr) && | |
| 252 | addr < round_page((vm_offset_t)vms->vm_daddr + maxdsiz))) | |
| 253 | addr = round_page((vm_offset_t)vms->vm_daddr + maxdsiz); | |
| 984263bc | 254 | } |
| 984263bc MD |
255 | |
| 256 | if (flags & MAP_ANON) { | |
| 257 | /* | |
| 258 | * Mapping blank space is trivial. | |
| 259 | */ | |
| 260 | handle = NULL; | |
| 261 | maxprot = VM_PROT_ALL; | |
| 262 | pos = 0; | |
| 263 | } else { | |
| 264 | /* | |
| 265 | * Mapping file, get fp for validation. Obtain vnode and make | |
| 266 | * sure it is of appropriate type. | |
| 267 | */ | |
| 228b401d MD |
268 | fp = holdfp(p->p_fd, fd, -1); |
| 269 | if (fp == NULL) | |
| 984263bc | 270 | return (EBADF); |
| 228b401d MD |
271 | if (fp->f_type != DTYPE_VNODE) { |
| 272 | error = EINVAL; | |
| 273 | goto done; | |
| 274 | } | |
| 984263bc MD |
275 | /* |
| 276 | * POSIX shared-memory objects are defined to have | |
| 277 | * kernel persistence, and are not defined to support | |
| 278 | * read(2)/write(2) -- or even open(2). Thus, we can | |
| 279 | * use MAP_ASYNC to trade on-disk coherence for speed. | |
| 280 | * The shm_open(3) library routine turns on the FPOSIXSHM | |
| 281 | * flag to request this behavior. | |
| 282 | */ | |
| 283 | if (fp->f_flag & FPOSIXSHM) | |
| 284 | flags |= MAP_NOSYNC; | |
| 285 | vp = (struct vnode *) fp->f_data; | |
| 339fa1ed MD |
286 | |
| 287 | /* | |
| 288 | * Validate the vnode for the operation. | |
| 289 | */ | |
| 290 | switch(vp->v_type) { | |
| 291 | case VREG: | |
| 984263bc MD |
292 | /* |
| 293 | * Get the proper underlying object | |
| 294 | */ | |
| 228b401d MD |
295 | if ((obj = vp->v_object) == NULL) { |
| 296 | error = EINVAL; | |
| 297 | goto done; | |
| 298 | } | |
| 339fa1ed MD |
299 | KKASSERT((struct vnode *)obj->handle == vp); |
| 300 | break; | |
| 301 | case VCHR: | |
| 302 | /* | |
| 303 | * Make sure a device has not been revoked. | |
| 304 | * Mappability is handled by the device layer. | |
| 305 | */ | |
| 306 | if (vp->v_rdev == NULL) { | |
| 307 | error = EBADF; | |
| 308 | goto done; | |
| 309 | } | |
| 310 | break; | |
| 311 | default: | |
| 312 | /* | |
| 313 | * Nothing else is mappable. | |
| 314 | */ | |
| 315 | error = EINVAL; | |
| 316 | goto done; | |
| 984263bc MD |
317 | } |
| 318 | ||
| 319 | /* | |
| 984263bc MD |
320 | * XXX hack to handle use of /dev/zero to map anon memory (ala |
| 321 | * SunOS). | |
| 322 | */ | |
| 323 | if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) { | |
| 324 | handle = NULL; | |
| 325 | maxprot = VM_PROT_ALL; | |
| 326 | flags |= MAP_ANON; | |
| 327 | pos = 0; | |
| 328 | } else { | |
| 329 | /* | |
| 330 | * cdevs does not provide private mappings of any kind. | |
| 331 | */ | |
| 332 | /* | |
| 333 | * However, for XIG X server to continue to work, | |
| 334 | * we should allow the superuser to do it anyway. | |
| 335 | * We only allow it at securelevel < 1. | |
| 336 | * (Because the XIG X server writes directly to video | |
| 337 | * memory via /dev/mem, it should never work at any | |
| 338 | * other securelevel. | |
| 339 | * XXX this will have to go | |
| 340 | */ | |
| 341 | if (securelevel >= 1) | |
| 342 | disablexworkaround = 1; | |
| 343 | else | |
| 895c1f85 | 344 | disablexworkaround = priv_check(td, PRIV_ROOT); |
| 984263bc MD |
345 | if (vp->v_type == VCHR && disablexworkaround && |
| 346 | (flags & (MAP_PRIVATE|MAP_COPY))) { | |
| 347 | error = EINVAL; | |
| 348 | goto done; | |
| 349 | } | |
| 350 | /* | |
| 351 | * Ensure that file and memory protections are | |
| 352 | * compatible. Note that we only worry about | |
| 353 | * writability if mapping is shared; in this case, | |
| 354 | * current and max prot are dictated by the open file. | |
| 355 | * XXX use the vnode instead? Problem is: what | |
| 356 | * credentials do we use for determination? What if | |
| 357 | * proc does a setuid? | |
| 358 | */ | |
| 359 | maxprot = VM_PROT_EXECUTE; /* ??? */ | |
| 360 | if (fp->f_flag & FREAD) { | |
| 361 | maxprot |= VM_PROT_READ; | |
| 362 | } else if (prot & PROT_READ) { | |
| 363 | error = EACCES; | |
| 364 | goto done; | |
| 365 | } | |
| 366 | /* | |
| 367 | * If we are sharing potential changes (either via | |
| 368 | * MAP_SHARED or via the implicit sharing of character | |
| 369 | * device mappings), and we are trying to get write | |
| 370 | * permission although we opened it without asking | |
| 371 | * for it, bail out. Check for superuser, only if | |
| 372 | * we're at securelevel < 1, to allow the XIG X server | |
| 373 | * to continue to work. | |
| 374 | */ | |
| 375 | ||
| 376 | if ((flags & MAP_SHARED) != 0 || | |
| 377 | (vp->v_type == VCHR && disablexworkaround)) { | |
| 378 | if ((fp->f_flag & FWRITE) != 0) { | |
| 379 | struct vattr va; | |
| 87de5057 | 380 | if ((error = VOP_GETATTR(vp, &va))) { |
| 984263bc MD |
381 | goto done; |
| 382 | } | |
| 383 | if ((va.va_flags & | |
| 384 | (IMMUTABLE|APPEND)) == 0) { | |
| 385 | maxprot |= VM_PROT_WRITE; | |
| 386 | } else if (prot & PROT_WRITE) { | |
| 387 | error = EPERM; | |
| 388 | goto done; | |
| 389 | } | |
| 390 | } else if ((prot & PROT_WRITE) != 0) { | |
| 391 | error = EACCES; | |
| 392 | goto done; | |
| 393 | } | |
| 394 | } else { | |
| 395 | maxprot |= VM_PROT_WRITE; | |
| 396 | } | |
| 397 | handle = (void *)vp; | |
| 398 | } | |
| 399 | } | |
| 400 | ||
| 401 | /* | |
| 402 | * Do not allow more then a certain number of vm_map_entry structures | |
| 403 | * per process. Scale with the number of rforks sharing the map | |
| 404 | * to make the limit reasonable for threads. | |
| 405 | */ | |
| 406 | if (max_proc_mmap && | |
| e3161323 | 407 | vms->vm_map.nentries >= max_proc_mmap * vms->vm_sysref.refcnt) { |
| 984263bc MD |
408 | error = ENOMEM; |
| 409 | goto done; | |
| 410 | } | |
| 411 | ||
| 412 | error = vm_mmap(&vms->vm_map, &addr, size, prot, maxprot, | |
| e54488bb | 413 | flags, handle, pos); |
| 984263bc | 414 | if (error == 0) |
| a0ff68c9 | 415 | *res = (void *)(addr + pageoff); |
| 984263bc MD |
416 | done: |
| 417 | if (fp) | |
| 9f87144f | 418 | fdrop(fp); |
| 984263bc MD |
419 | return (error); |
| 420 | } | |
| 421 | ||
| 984263bc | 422 | int |
| 753fd850 | 423 | sys_mmap(struct mmap_args *uap) |
| 984263bc | 424 | { |
| a0ff68c9 | 425 | int error; |
| 984263bc | 426 | |
| d3313941 MD |
427 | error = kern_mmap(curproc->p_vmspace, uap->addr, uap->len, |
| 428 | uap->prot, uap->flags, | |
| 429 | uap->fd, uap->pos, &uap->sysmsg_resultp); | |
| 984263bc | 430 | |
| a0ff68c9 | 431 | return (error); |
| 984263bc | 432 | } |
| 984263bc | 433 | |
| 41c20dac | 434 | /* |
| e54488bb | 435 | * msync_args(void *addr, size_t len, int flags) |
| 41c20dac | 436 | */ |
| 984263bc | 437 | int |
| 753fd850 | 438 | sys_msync(struct msync_args *uap) |
| 984263bc | 439 | { |
| 41c20dac | 440 | struct proc *p = curproc; |
| 984263bc | 441 | vm_offset_t addr; |
| e54488bb | 442 | vm_offset_t tmpaddr; |
| 984263bc MD |
443 | vm_size_t size, pageoff; |
| 444 | int flags; | |
| 445 | vm_map_t map; | |
| 446 | int rv; | |
| 447 | ||
| 448 | addr = (vm_offset_t) uap->addr; | |
| 449 | size = uap->len; | |
| 450 | flags = uap->flags; | |
| 451 | ||
| 452 | pageoff = (addr & PAGE_MASK); | |
| 453 | addr -= pageoff; | |
| 454 | size += pageoff; | |
| 455 | size = (vm_size_t) round_page(size); | |
| e54488bb MD |
456 | if (size < uap->len) /* wrap */ |
| 457 | return(EINVAL); | |
| 458 | tmpaddr = addr + size; /* workaround gcc4 opt */ | |
| 459 | if (tmpaddr < addr) /* wrap */ | |
| 984263bc MD |
460 | return(EINVAL); |
| 461 | ||
| 462 | if ((flags & (MS_ASYNC|MS_INVALIDATE)) == (MS_ASYNC|MS_INVALIDATE)) | |
| 463 | return (EINVAL); | |
| 464 | ||
| 465 | map = &p->p_vmspace->vm_map; | |
| 466 | ||
| 467 | /* | |
| 468 | * XXX Gak! If size is zero we are supposed to sync "all modified | |
| 469 | * pages with the region containing addr". Unfortunately, we don't | |
| 470 | * really keep track of individual mmaps so we approximate by flushing | |
| 471 | * the range of the map entry containing addr. This can be incorrect | |
| 472 | * if the region splits or is coalesced with a neighbor. | |
| 473 | */ | |
| 474 | if (size == 0) { | |
| 475 | vm_map_entry_t entry; | |
| 476 | ||
| 477 | vm_map_lock_read(map); | |
| 478 | rv = vm_map_lookup_entry(map, addr, &entry); | |
| 479 | vm_map_unlock_read(map); | |
| 480 | if (rv == FALSE) | |
| 481 | return (EINVAL); | |
| 482 | addr = entry->start; | |
| 483 | size = entry->end - entry->start; | |
| 484 | } | |
| 485 | ||
| 486 | /* | |
| 487 | * Clean the pages and interpret the return value. | |
| 488 | */ | |
| 489 | rv = vm_map_clean(map, addr, addr + size, (flags & MS_ASYNC) == 0, | |
| 490 | (flags & MS_INVALIDATE) != 0); | |
| 491 | ||
| 492 | switch (rv) { | |
| 493 | case KERN_SUCCESS: | |
| 494 | break; | |
| 495 | case KERN_INVALID_ADDRESS: | |
| 496 | return (EINVAL); /* Sun returns ENOMEM? */ | |
| 497 | case KERN_FAILURE: | |
| 498 | return (EIO); | |
| 499 | default: | |
| 500 | return (EINVAL); | |
| 501 | } | |
| 502 | ||
| 503 | return (0); | |
| 504 | } | |
| 505 | ||
| 41c20dac MD |
506 | /* |
| 507 | * munmap_args(void *addr, size_t len) | |
| 508 | */ | |
| 984263bc | 509 | int |
| 753fd850 | 510 | sys_munmap(struct munmap_args *uap) |
| 984263bc | 511 | { |
| 41c20dac | 512 | struct proc *p = curproc; |
| 984263bc | 513 | vm_offset_t addr; |
| e54488bb | 514 | vm_offset_t tmpaddr; |
| 984263bc MD |
515 | vm_size_t size, pageoff; |
| 516 | vm_map_t map; | |
| 517 | ||
| 518 | addr = (vm_offset_t) uap->addr; | |
| 519 | size = uap->len; | |
| 520 | ||
| 521 | pageoff = (addr & PAGE_MASK); | |
| 522 | addr -= pageoff; | |
| 523 | size += pageoff; | |
| 524 | size = (vm_size_t) round_page(size); | |
| e54488bb MD |
525 | if (size < uap->len) /* wrap */ |
| 526 | return(EINVAL); | |
| 527 | tmpaddr = addr + size; /* workaround gcc4 opt */ | |
| 528 | if (tmpaddr < addr) /* wrap */ | |
| 984263bc MD |
529 | return(EINVAL); |
| 530 | ||
| 531 | if (size == 0) | |
| 532 | return (0); | |
| 533 | ||
| 534 | /* | |
| 535 | * Check for illegal addresses. Watch out for address wrap... Note | |
| 536 | * that VM_*_ADDRESS are not constants due to casts (argh). | |
| 537 | */ | |
| e54488bb | 538 | if (VM_MAX_USER_ADDRESS > 0 && tmpaddr > VM_MAX_USER_ADDRESS) |
| 984263bc | 539 | return (EINVAL); |
| 88181b08 | 540 | if (VM_MIN_USER_ADDRESS > 0 && addr < VM_MIN_USER_ADDRESS) |
| 984263bc | 541 | return (EINVAL); |
| 984263bc MD |
542 | map = &p->p_vmspace->vm_map; |
| 543 | /* | |
| 544 | * Make sure entire range is allocated. | |
| 545 | */ | |
| 546 | if (!vm_map_check_protection(map, addr, addr + size, VM_PROT_NONE)) | |
| 547 | return (EINVAL); | |
| 548 | /* returns nothing but KERN_SUCCESS anyway */ | |
| 418ff780 | 549 | vm_map_remove(map, addr, addr + size); |
| 984263bc MD |
550 | return (0); |
| 551 | } | |
| 552 | ||
| 41c20dac MD |
553 | /* |
| 554 | * mprotect_args(const void *addr, size_t len, int prot) | |
| 555 | */ | |
| 984263bc | 556 | int |
| 753fd850 | 557 | sys_mprotect(struct mprotect_args *uap) |
| 984263bc | 558 | { |
| 41c20dac | 559 | struct proc *p = curproc; |
| 984263bc | 560 | vm_offset_t addr; |
| e54488bb | 561 | vm_offset_t tmpaddr; |
| 984263bc | 562 | vm_size_t size, pageoff; |
| 5f910b2f | 563 | vm_prot_t prot; |
| 984263bc MD |
564 | |
| 565 | addr = (vm_offset_t) uap->addr; | |
| 566 | size = uap->len; | |
| 567 | prot = uap->prot & VM_PROT_ALL; | |
| 568 | #if defined(VM_PROT_READ_IS_EXEC) | |
| 569 | if (prot & VM_PROT_READ) | |
| 570 | prot |= VM_PROT_EXECUTE; | |
| 571 | #endif | |
| 572 | ||
| 573 | pageoff = (addr & PAGE_MASK); | |
| 574 | addr -= pageoff; | |
| 575 | size += pageoff; | |
| 576 | size = (vm_size_t) round_page(size); | |
| e54488bb MD |
577 | if (size < uap->len) /* wrap */ |
| 578 | return(EINVAL); | |
| 579 | tmpaddr = addr + size; /* workaround gcc4 opt */ | |
| 580 | if (tmpaddr < addr) /* wrap */ | |
| 984263bc MD |
581 | return(EINVAL); |
| 582 | ||
| 583 | switch (vm_map_protect(&p->p_vmspace->vm_map, addr, addr + size, prot, | |
| 584 | FALSE)) { | |
| 585 | case KERN_SUCCESS: | |
| 586 | return (0); | |
| 587 | case KERN_PROTECTION_FAILURE: | |
| 588 | return (EACCES); | |
| 589 | } | |
| 590 | return (EINVAL); | |
| 591 | } | |
| 592 | ||
| 41c20dac MD |
593 | /* |
| 594 | * minherit_args(void *addr, size_t len, int inherit) | |
| 595 | */ | |
| 984263bc | 596 | int |
| 753fd850 | 597 | sys_minherit(struct minherit_args *uap) |
| 984263bc | 598 | { |
| 41c20dac | 599 | struct proc *p = curproc; |
| 984263bc | 600 | vm_offset_t addr; |
| e54488bb | 601 | vm_offset_t tmpaddr; |
| 984263bc | 602 | vm_size_t size, pageoff; |
| 5f910b2f | 603 | vm_inherit_t inherit; |
| 984263bc MD |
604 | |
| 605 | addr = (vm_offset_t)uap->addr; | |
| 606 | size = uap->len; | |
| 607 | inherit = uap->inherit; | |
| 608 | ||
| 609 | pageoff = (addr & PAGE_MASK); | |
| 610 | addr -= pageoff; | |
| 611 | size += pageoff; | |
| 612 | size = (vm_size_t) round_page(size); | |
| e54488bb MD |
613 | if (size < uap->len) /* wrap */ |
| 614 | return(EINVAL); | |
| 615 | tmpaddr = addr + size; /* workaround gcc4 opt */ | |
| 616 | if (tmpaddr < addr) /* wrap */ | |
| 984263bc MD |
617 | return(EINVAL); |
| 618 | ||
| 619 | switch (vm_map_inherit(&p->p_vmspace->vm_map, addr, addr+size, | |
| 620 | inherit)) { | |
| 621 | case KERN_SUCCESS: | |
| 622 | return (0); | |
| 623 | case KERN_PROTECTION_FAILURE: | |
| 624 | return (EACCES); | |
| 625 | } | |
| 626 | return (EINVAL); | |
| 627 | } | |
| 628 | ||
| 41c20dac MD |
629 | /* |
| 630 | * madvise_args(void *addr, size_t len, int behav) | |
| 631 | */ | |
| 984263bc MD |
632 | /* ARGSUSED */ |
| 633 | int | |
| 753fd850 | 634 | sys_madvise(struct madvise_args *uap) |
| 984263bc | 635 | { |
| 41c20dac | 636 | struct proc *p = curproc; |
| 984263bc | 637 | vm_offset_t start, end; |
| e54488bb | 638 | vm_offset_t tmpaddr = (vm_offset_t)uap->addr + uap->len; |
| 984263bc MD |
639 | |
| 640 | /* | |
| 641 | * Check for illegal behavior | |
| 642 | */ | |
| afeabdca | 643 | if (uap->behav < 0 || uap->behav >= MADV_CONTROL_END) |
| 984263bc MD |
644 | return (EINVAL); |
| 645 | /* | |
| 646 | * Check for illegal addresses. Watch out for address wrap... Note | |
| 647 | * that VM_*_ADDRESS are not constants due to casts (argh). | |
| 648 | */ | |
| e54488bb | 649 | if (tmpaddr < (vm_offset_t)uap->addr) |
| 984263bc | 650 | return (EINVAL); |
| e54488bb | 651 | if (VM_MAX_USER_ADDRESS > 0 && tmpaddr > VM_MAX_USER_ADDRESS) |
| 984263bc | 652 | return (EINVAL); |
| e54488bb | 653 | if (VM_MIN_USER_ADDRESS > 0 && uap->addr < VM_MIN_USER_ADDRESS) |
| 984263bc MD |
654 | return (EINVAL); |
| 655 | ||
| 656 | /* | |
| 657 | * Since this routine is only advisory, we default to conservative | |
| 658 | * behavior. | |
| 659 | */ | |
| e54488bb MD |
660 | start = trunc_page((vm_offset_t)uap->addr); |
| 661 | end = round_page(tmpaddr); | |
| 984263bc | 662 | |
| afeabdca | 663 | return (vm_map_madvise(&p->p_vmspace->vm_map, start, end, |
| e54488bb | 664 | uap->behav, 0)); |
| afeabdca MD |
665 | } |
| 666 | ||
| 667 | /* | |
| 668 | * mcontrol_args(void *addr, size_t len, int behav, off_t value) | |
| 669 | */ | |
| 670 | /* ARGSUSED */ | |
| 671 | int | |
| 672 | sys_mcontrol(struct mcontrol_args *uap) | |
| 673 | { | |
| 674 | struct proc *p = curproc; | |
| 675 | vm_offset_t start, end; | |
| e54488bb | 676 | vm_offset_t tmpaddr = (vm_offset_t)uap->addr + uap->len; |
| afeabdca MD |
677 | |
| 678 | /* | |
| 679 | * Check for illegal behavior | |
| 680 | */ | |
| 681 | if (uap->behav < 0 || uap->behav > MADV_CONTROL_END) | |
| 984263bc | 682 | return (EINVAL); |
| afeabdca MD |
683 | /* |
| 684 | * Check for illegal addresses. Watch out for address wrap... Note | |
| 685 | * that VM_*_ADDRESS are not constants due to casts (argh). | |
| 686 | */ | |
| e54488bb | 687 | if (tmpaddr < (vm_offset_t) uap->addr) |
| afeabdca | 688 | return (EINVAL); |
| e54488bb | 689 | if (VM_MAX_USER_ADDRESS > 0 && tmpaddr > VM_MAX_USER_ADDRESS) |
| afeabdca | 690 | return (EINVAL); |
| e54488bb | 691 | if (VM_MIN_USER_ADDRESS > 0 && uap->addr < VM_MIN_USER_ADDRESS) |
| afeabdca MD |
692 | return (EINVAL); |
| 693 | ||
| 694 | /* | |
| 695 | * Since this routine is only advisory, we default to conservative | |
| 696 | * behavior. | |
| 697 | */ | |
| e54488bb MD |
698 | start = trunc_page((vm_offset_t)uap->addr); |
| 699 | end = round_page(tmpaddr); | |
| afeabdca MD |
700 | |
| 701 | return (vm_map_madvise(&p->p_vmspace->vm_map, start, end, | |
| e54488bb | 702 | uap->behav, uap->value)); |
| 984263bc MD |
703 | } |
| 704 | ||
| afeabdca | 705 | |
| 41c20dac MD |
706 | /* |
| 707 | * mincore_args(const void *addr, size_t len, char *vec) | |
| 708 | */ | |
| 984263bc MD |
709 | /* ARGSUSED */ |
| 710 | int | |
| 753fd850 | 711 | sys_mincore(struct mincore_args *uap) |
| 984263bc | 712 | { |
| 41c20dac | 713 | struct proc *p = curproc; |
| 984263bc MD |
714 | vm_offset_t addr, first_addr; |
| 715 | vm_offset_t end, cend; | |
| 716 | pmap_t pmap; | |
| 717 | vm_map_t map; | |
| 718 | char *vec; | |
| 719 | int error; | |
| 720 | int vecindex, lastvecindex; | |
| 5f910b2f | 721 | vm_map_entry_t current; |
| 984263bc MD |
722 | vm_map_entry_t entry; |
| 723 | int mincoreinfo; | |
| 724 | unsigned int timestamp; | |
| 725 | ||
| 726 | /* | |
| 727 | * Make sure that the addresses presented are valid for user | |
| 728 | * mode. | |
| 729 | */ | |
| 730 | first_addr = addr = trunc_page((vm_offset_t) uap->addr); | |
| 731 | end = addr + (vm_size_t)round_page(uap->len); | |
| 984263bc MD |
732 | if (end < addr) |
| 733 | return (EINVAL); | |
| e54488bb MD |
734 | if (VM_MAX_USER_ADDRESS > 0 && end > VM_MAX_USER_ADDRESS) |
| 735 | return (EINVAL); | |
| 984263bc MD |
736 | |
| 737 | /* | |
| 738 | * Address of byte vector | |
| 739 | */ | |
| 740 | vec = uap->vec; | |
| 741 | ||
| 742 | map = &p->p_vmspace->vm_map; | |
| 743 | pmap = vmspace_pmap(p->p_vmspace); | |
| 744 | ||
| 745 | vm_map_lock_read(map); | |
| 746 | RestartScan: | |
| 747 | timestamp = map->timestamp; | |
| 748 | ||
| 749 | if (!vm_map_lookup_entry(map, addr, &entry)) | |
| 750 | entry = entry->next; | |
| 751 | ||
| 752 | /* | |
| 753 | * Do this on a map entry basis so that if the pages are not | |
| 754 | * in the current processes address space, we can easily look | |
| 755 | * up the pages elsewhere. | |
| 756 | */ | |
| 757 | lastvecindex = -1; | |
| 758 | for(current = entry; | |
| 759 | (current != &map->header) && (current->start < end); | |
| 760 | current = current->next) { | |
| 761 | ||
| 762 | /* | |
| 763 | * ignore submaps (for now) or null objects | |
| 764 | */ | |
| 1b874851 MD |
765 | if (current->maptype != VM_MAPTYPE_NORMAL && |
| 766 | current->maptype != VM_MAPTYPE_VPAGETABLE) { | |
| 767 | continue; | |
| 768 | } | |
| 769 | if (current->object.vm_object == NULL) | |
| 984263bc MD |
770 | continue; |
| 771 | ||
| 772 | /* | |
| 773 | * limit this scan to the current map entry and the | |
| 774 | * limits for the mincore call | |
| 775 | */ | |
| 776 | if (addr < current->start) | |
| 777 | addr = current->start; | |
| 778 | cend = current->end; | |
| 779 | if (cend > end) | |
| 780 | cend = end; | |
| 781 | ||
| 782 | /* | |
| 783 | * scan this entry one page at a time | |
| 784 | */ | |
| 06ecca5a | 785 | while (addr < cend) { |
| 984263bc MD |
786 | /* |
| 787 | * Check pmap first, it is likely faster, also | |
| 788 | * it can provide info as to whether we are the | |
| 789 | * one referencing or modifying the page. | |
| 1b874851 MD |
790 | * |
| 791 | * If we have to check the VM object, only mess | |
| 792 | * around with normal maps. Do not mess around | |
| 793 | * with virtual page tables (XXX). | |
| 984263bc MD |
794 | */ |
| 795 | mincoreinfo = pmap_mincore(pmap, addr); | |
| 1b874851 MD |
796 | if (mincoreinfo == 0 && |
| 797 | current->maptype == VM_MAPTYPE_NORMAL) { | |
| 984263bc MD |
798 | vm_pindex_t pindex; |
| 799 | vm_ooffset_t offset; | |
| 800 | vm_page_t m; | |
| 06ecca5a | 801 | |
| 984263bc MD |
802 | /* |
| 803 | * calculate the page index into the object | |
| 804 | */ | |
| 805 | offset = current->offset + (addr - current->start); | |
| 806 | pindex = OFF_TO_IDX(offset); | |
| 06ecca5a | 807 | |
| 984263bc | 808 | /* |
| 06ecca5a MD |
809 | * if the page is resident, then gather |
| 810 | * information about it. spl protection is | |
| 811 | * required to maintain the object | |
| 812 | * association. And XXX what if the page is | |
| 813 | * busy? What's the deal with that? | |
| 984263bc | 814 | */ |
| 654a39f0 | 815 | crit_enter(); |
| 06ecca5a MD |
816 | m = vm_page_lookup(current->object.vm_object, |
| 817 | pindex); | |
| 2ff71562 | 818 | if (m && m->valid) { |
| 984263bc MD |
819 | mincoreinfo = MINCORE_INCORE; |
| 820 | if (m->dirty || | |
| 821 | pmap_is_modified(m)) | |
| 822 | mincoreinfo |= MINCORE_MODIFIED_OTHER; | |
| 823 | if ((m->flags & PG_REFERENCED) || | |
| 824 | pmap_ts_referenced(m)) { | |
| 825 | vm_page_flag_set(m, PG_REFERENCED); | |
| 826 | mincoreinfo |= MINCORE_REFERENCED_OTHER; | |
| 827 | } | |
| 828 | } | |
| 654a39f0 | 829 | crit_exit(); |
| 984263bc MD |
830 | } |
| 831 | ||
| 832 | /* | |
| 833 | * subyte may page fault. In case it needs to modify | |
| 834 | * the map, we release the lock. | |
| 835 | */ | |
| 836 | vm_map_unlock_read(map); | |
| 837 | ||
| 838 | /* | |
| 839 | * calculate index into user supplied byte vector | |
| 840 | */ | |
| 841 | vecindex = OFF_TO_IDX(addr - first_addr); | |
| 842 | ||
| 843 | /* | |
| 844 | * If we have skipped map entries, we need to make sure that | |
| 845 | * the byte vector is zeroed for those skipped entries. | |
| 846 | */ | |
| 847 | while((lastvecindex + 1) < vecindex) { | |
| 848 | error = subyte( vec + lastvecindex, 0); | |
| 849 | if (error) { | |
| 850 | return (EFAULT); | |
| 851 | } | |
| 852 | ++lastvecindex; | |
| 853 | } | |
| 854 | ||
| 855 | /* | |
| 856 | * Pass the page information to the user | |
| 857 | */ | |
| 858 | error = subyte( vec + vecindex, mincoreinfo); | |
| 859 | if (error) { | |
| 860 | return (EFAULT); | |
| 861 | } | |
| 862 | ||
| 863 | /* | |
| 864 | * If the map has changed, due to the subyte, the previous | |
| 865 | * output may be invalid. | |
| 866 | */ | |
| 867 | vm_map_lock_read(map); | |
| 868 | if (timestamp != map->timestamp) | |
| 869 | goto RestartScan; | |
| 870 | ||
| 871 | lastvecindex = vecindex; | |
| 872 | addr += PAGE_SIZE; | |
| 873 | } | |
| 874 | } | |
| 875 | ||
| 876 | /* | |
| 877 | * subyte may page fault. In case it needs to modify | |
| 878 | * the map, we release the lock. | |
| 879 | */ | |
| 880 | vm_map_unlock_read(map); | |
| 881 | ||
| 882 | /* | |
| 883 | * Zero the last entries in the byte vector. | |
| 884 | */ | |
| 885 | vecindex = OFF_TO_IDX(end - first_addr); | |
| 886 | while((lastvecindex + 1) < vecindex) { | |
| 887 | error = subyte( vec + lastvecindex, 0); | |
| 888 | if (error) { | |
| 889 | return (EFAULT); | |
| 890 | } | |
| 891 | ++lastvecindex; | |
| 892 | } | |
| 893 | ||
| 894 | /* | |
| 895 | * If the map has changed, due to the subyte, the previous | |
| 896 | * output may be invalid. | |
| 897 | */ | |
| 898 | vm_map_lock_read(map); | |
| 899 | if (timestamp != map->timestamp) | |
| 900 | goto RestartScan; | |
| 901 | vm_map_unlock_read(map); | |
| 902 | ||
| 903 | return (0); | |
| 904 | } | |
| 905 | ||
| 41c20dac MD |
906 | /* |
| 907 | * mlock_args(const void *addr, size_t len) | |
| 908 | */ | |
| 984263bc | 909 | int |
| 753fd850 | 910 | sys_mlock(struct mlock_args *uap) |
| 984263bc MD |
911 | { |
| 912 | vm_offset_t addr; | |
| e54488bb | 913 | vm_offset_t tmpaddr; |
| 984263bc MD |
914 | vm_size_t size, pageoff; |
| 915 | int error; | |
| 41c20dac | 916 | struct proc *p = curproc; |
| 984263bc MD |
917 | |
| 918 | addr = (vm_offset_t) uap->addr; | |
| 919 | size = uap->len; | |
| 920 | ||
| 921 | pageoff = (addr & PAGE_MASK); | |
| 922 | addr -= pageoff; | |
| 923 | size += pageoff; | |
| 924 | size = (vm_size_t) round_page(size); | |
| e54488bb MD |
925 | if (size < uap->len) /* wrap */ |
| 926 | return(EINVAL); | |
| 927 | tmpaddr = addr + size; /* workaround gcc4 opt */ | |
| 928 | if (tmpaddr < addr) /* wrap */ | |
| 984263bc MD |
929 | return (EINVAL); |
| 930 | ||
| 12e4aaff | 931 | if (atop(size) + vmstats.v_wire_count > vm_page_max_wired) |
| 984263bc MD |
932 | return (EAGAIN); |
| 933 | ||
| 934 | #ifdef pmap_wired_count | |
| 935 | if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) > | |
| 936 | p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur) | |
| 937 | return (ENOMEM); | |
| 938 | #else | |
| 895c1f85 | 939 | error = priv_check_cred(p->p_ucred, PRIV_ROOT, 0); |
| 984263bc MD |
940 | if (error) |
| 941 | return (error); | |
| 942 | #endif | |
| 943 | ||
| cde87949 | 944 | error = vm_map_unwire(&p->p_vmspace->vm_map, addr, addr + size, FALSE); |
| 984263bc MD |
945 | return (error == KERN_SUCCESS ? 0 : ENOMEM); |
| 946 | } | |
| 947 | ||
| 41c20dac MD |
948 | /* |
| 949 | * mlockall_args(int how) | |
| 950 | */ | |
| 984263bc | 951 | int |
| 753fd850 | 952 | sys_mlockall(struct mlockall_args *uap) |
| 984263bc MD |
953 | { |
| 954 | return 0; | |
| 955 | } | |
| 956 | ||
| 41c20dac | 957 | /* |
| efbaff5d | 958 | * munlockall_args(void) |
| 41c20dac | 959 | */ |
| 984263bc | 960 | int |
| 753fd850 | 961 | sys_munlockall(struct munlockall_args *uap) |
| 984263bc MD |
962 | { |
| 963 | return 0; | |
| 964 | } | |
| 965 | ||
| 41c20dac MD |
966 | /* |
| 967 | * munlock_args(const void *addr, size_t len) | |
| 968 | */ | |
| 984263bc | 969 | int |
| 753fd850 | 970 | sys_munlock(struct munlock_args *uap) |
| 984263bc | 971 | { |
| dadab5e9 MD |
972 | struct thread *td = curthread; |
| 973 | struct proc *p = td->td_proc; | |
| 984263bc | 974 | vm_offset_t addr; |
| e54488bb | 975 | vm_offset_t tmpaddr; |
| 984263bc MD |
976 | vm_size_t size, pageoff; |
| 977 | int error; | |
| 978 | ||
| 979 | addr = (vm_offset_t) uap->addr; | |
| 980 | size = uap->len; | |
| 981 | ||
| 982 | pageoff = (addr & PAGE_MASK); | |
| 983 | addr -= pageoff; | |
| 984 | size += pageoff; | |
| 985 | size = (vm_size_t) round_page(size); | |
| 986 | ||
| e54488bb MD |
987 | tmpaddr = addr + size; |
| 988 | if (tmpaddr < addr) /* wrap */ | |
| 984263bc MD |
989 | return (EINVAL); |
| 990 | ||
| 991 | #ifndef pmap_wired_count | |
| 895c1f85 | 992 | error = priv_check(td, PRIV_ROOT); |
| 984263bc MD |
993 | if (error) |
| 994 | return (error); | |
| 995 | #endif | |
| 996 | ||
| cde87949 | 997 | error = vm_map_unwire(&p->p_vmspace->vm_map, addr, addr + size, TRUE); |
| 984263bc MD |
998 | return (error == KERN_SUCCESS ? 0 : ENOMEM); |
| 999 | } | |
| 1000 | ||
| 1001 | /* | |
| 1002 | * Internal version of mmap. | |
| 1003 | * Currently used by mmap, exec, and sys5 shared memory. | |
| 1004 | * Handle is either a vnode pointer or NULL for MAP_ANON. | |
| 1005 | */ | |
| 1006 | int | |
| 1007 | vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, | |
| c809941b | 1008 | vm_prot_t maxprot, int flags, void *handle, vm_ooffset_t foff) |
| 984263bc MD |
1009 | { |
| 1010 | boolean_t fitit; | |
| 1011 | vm_object_t object; | |
| 85d25bcf MD |
1012 | vm_offset_t eaddr; |
| 1013 | vm_size_t esize; | |
| a6e41612 | 1014 | struct vnode *vp; |
| 349433c9 | 1015 | struct thread *td = curthread; |
| d3313941 | 1016 | struct proc *p; |
| 984263bc MD |
1017 | objtype_t type; |
| 1018 | int rv = KERN_SUCCESS; | |
| 57f7b636 | 1019 | off_t objsize; |
| 984263bc | 1020 | int docow; |
| 984263bc MD |
1021 | |
| 1022 | if (size == 0) | |
| 1023 | return (0); | |
| 1024 | ||
| e54488bb MD |
1025 | objsize = round_page(size); |
| 1026 | if (objsize < size) | |
| 1027 | return (EINVAL); | |
| 1028 | size = objsize; | |
| 984263bc | 1029 | |
| d3313941 MD |
1030 | /* |
| 1031 | * XXX messy code, fixme | |
| 85d25bcf MD |
1032 | * |
| 1033 | * NOTE: Overflow checks require discrete statements or GCC4 | |
| 1034 | * will optimize it out. | |
| d3313941 MD |
1035 | */ |
| 1036 | if ((p = curproc) != NULL && map == &p->p_vmspace->vm_map) { | |
| e54488bb | 1037 | esize = map->size + size; /* workaround gcc4 opt */ |
| 85d25bcf MD |
1038 | if (esize < map->size || |
| 1039 | esize > p->p_rlimit[RLIMIT_VMEM].rlim_cur) { | |
| d3313941 | 1040 | return(ENOMEM); |
| 85d25bcf | 1041 | } |
| 984263bc MD |
1042 | } |
| 1043 | ||
| 1044 | /* | |
| 1045 | * We currently can only deal with page aligned file offsets. | |
| 1046 | * The check is here rather than in the syscall because the | |
| 1047 | * kernel calls this function internally for other mmaping | |
| 1048 | * operations (such as in exec) and non-aligned offsets will | |
| 1049 | * cause pmap inconsistencies...so we want to be sure to | |
| 1050 | * disallow this in all cases. | |
| 85d25bcf MD |
1051 | * |
| 1052 | * NOTE: Overflow checks require discrete statements or GCC4 | |
| 1053 | * will optimize it out. | |
| 984263bc MD |
1054 | */ |
| 1055 | if (foff & PAGE_MASK) | |
| 1056 | return (EINVAL); | |
| 1057 | ||
| c809941b | 1058 | if ((flags & (MAP_FIXED | MAP_TRYFIXED)) == 0) { |
| 984263bc MD |
1059 | fitit = TRUE; |
| 1060 | *addr = round_page(*addr); | |
| 1061 | } else { | |
| 1062 | if (*addr != trunc_page(*addr)) | |
| 1063 | return (EINVAL); | |
| 85d25bcf MD |
1064 | eaddr = *addr + size; |
| 1065 | if (eaddr < *addr) | |
| 1066 | return (EINVAL); | |
| 984263bc | 1067 | fitit = FALSE; |
| c809941b MD |
1068 | if ((flags & MAP_TRYFIXED) == 0) |
| 1069 | vm_map_remove(map, *addr, *addr + size); | |
| 984263bc MD |
1070 | } |
| 1071 | ||
| 1072 | /* | |
| 1073 | * Lookup/allocate object. | |
| 1074 | */ | |
| 1075 | if (flags & MAP_ANON) { | |
| 1076 | type = OBJT_DEFAULT; | |
| 1077 | /* | |
| 1078 | * Unnamed anonymous regions always start at 0. | |
| 1079 | */ | |
| a6e41612 | 1080 | if (handle == NULL) |
| 984263bc | 1081 | foff = 0; |
| a6e41612 | 1082 | vp = NULL; |
| 984263bc | 1083 | } else { |
| a6e41612 | 1084 | vp = (struct vnode *)handle; |
| 984263bc MD |
1085 | if (vp->v_type == VCHR) { |
| 1086 | type = OBJT_DEVICE; | |
| 1087 | handle = (void *)(intptr_t)vp->v_rdev; | |
| 1088 | } else { | |
| f9cae863 | 1089 | struct vattr vat; |
| 984263bc MD |
1090 | int error; |
| 1091 | ||
| 87de5057 | 1092 | error = VOP_GETATTR(vp, &vat); |
| 984263bc MD |
1093 | if (error) |
| 1094 | return (error); | |
| 57f7b636 | 1095 | objsize = vat.va_size; |
| 984263bc MD |
1096 | type = OBJT_VNODE; |
| 1097 | /* | |
| 1098 | * if it is a regular file without any references | |
| 1099 | * we do not need to sync it. | |
| 1100 | */ | |
| 1101 | if (vp->v_type == VREG && vat.va_nlink == 0) { | |
| 1102 | flags |= MAP_NOSYNC; | |
| 1103 | } | |
| 1104 | } | |
| 1105 | } | |
| 1106 | ||
| 1107 | if (handle == NULL) { | |
| 1108 | object = NULL; | |
| 1109 | docow = 0; | |
| 1110 | } else { | |
| 57f7b636 | 1111 | object = vm_pager_allocate(type, handle, objsize, prot, foff); |
| 984263bc MD |
1112 | if (object == NULL) |
| 1113 | return (type == OBJT_DEVICE ? EINVAL : ENOMEM); | |
| 1114 | docow = MAP_PREFAULT_PARTIAL; | |
| 1115 | } | |
| 1116 | ||
| 1117 | /* | |
| 1118 | * Force device mappings to be shared. | |
| 1119 | */ | |
| 1120 | if (type == OBJT_DEVICE || type == OBJT_PHYS) { | |
| 1121 | flags &= ~(MAP_PRIVATE|MAP_COPY); | |
| 1122 | flags |= MAP_SHARED; | |
| 1123 | } | |
| 1124 | ||
| 1125 | if ((flags & (MAP_ANON|MAP_SHARED)) == 0) | |
| 1126 | docow |= MAP_COPY_ON_WRITE; | |
| 1127 | if (flags & MAP_NOSYNC) | |
| 1128 | docow |= MAP_DISABLE_SYNCER; | |
| 1129 | if (flags & MAP_NOCORE) | |
| 1130 | docow |= MAP_DISABLE_COREDUMP; | |
| 1131 | ||
| 1132 | #if defined(VM_PROT_READ_IS_EXEC) | |
| 1133 | if (prot & VM_PROT_READ) | |
| 1134 | prot |= VM_PROT_EXECUTE; | |
| 1135 | ||
| 1136 | if (maxprot & VM_PROT_READ) | |
| 1137 | maxprot |= VM_PROT_EXECUTE; | |
| 1138 | #endif | |
| 1139 | ||
| c809941b MD |
1140 | /* |
| 1141 | * This may place the area in its own page directory if (size) is | |
| 1142 | * large enough, otherwise it typically returns its argument. | |
| 1143 | */ | |
| 984263bc MD |
1144 | if (fitit) { |
| 1145 | *addr = pmap_addr_hint(object, *addr, size); | |
| 1146 | } | |
| 1147 | ||
| 568e6804 | 1148 | /* |
| c809941b MD |
1149 | * Stack mappings need special attention. |
| 1150 | * | |
| 1151 | * Mappings that use virtual page tables will default to storing | |
| 1152 | * the page table at offset 0. | |
| 568e6804 | 1153 | */ |
| 1b874851 | 1154 | if (flags & MAP_STACK) { |
| c809941b | 1155 | rv = vm_map_stack(map, *addr, size, flags, |
| 85d25bcf | 1156 | prot, maxprot, docow); |
| 568e6804 MD |
1157 | } else if (flags & MAP_VPAGETABLE) { |
| 1158 | rv = vm_map_find(map, object, foff, addr, size, fitit, | |
| 1159 | VM_MAPTYPE_VPAGETABLE, prot, maxprot, docow); | |
| 1b874851 | 1160 | } else { |
| 984263bc | 1161 | rv = vm_map_find(map, object, foff, addr, size, fitit, |
| 1b874851 MD |
1162 | VM_MAPTYPE_NORMAL, prot, maxprot, docow); |
| 1163 | } | |
| 984263bc MD |
1164 | |
| 1165 | if (rv != KERN_SUCCESS) { | |
| 1166 | /* | |
| 1167 | * Lose the object reference. Will destroy the | |
| 1168 | * object if it's an unnamed anonymous mapping | |
| 1169 | * or named anonymous without other references. | |
| 1170 | */ | |
| 1171 | vm_object_deallocate(object); | |
| 1172 | goto out; | |
| 1173 | } | |
| 1174 | ||
| 1175 | /* | |
| 1176 | * Shared memory is also shared with children. | |
| 1177 | */ | |
| 1178 | if (flags & (MAP_SHARED|MAP_INHERIT)) { | |
| 1179 | rv = vm_map_inherit(map, *addr, *addr + size, VM_INHERIT_SHARE); | |
| 1180 | if (rv != KERN_SUCCESS) { | |
| 418ff780 | 1181 | vm_map_remove(map, *addr, *addr + size); |
| 984263bc MD |
1182 | goto out; |
| 1183 | } | |
| 1184 | } | |
| 349433c9 MD |
1185 | |
| 1186 | /* | |
| 1187 | * Set the access time on the vnode | |
| 1188 | */ | |
| a6e41612 MD |
1189 | if (vp != NULL) |
| 1190 | vn_mark_atime(vp, td); | |
| 984263bc MD |
1191 | out: |
| 1192 | switch (rv) { | |
| 1193 | case KERN_SUCCESS: | |
| 1194 | return (0); | |
| 1195 | case KERN_INVALID_ADDRESS: | |
| 1196 | case KERN_NO_SPACE: | |
| 1197 | return (ENOMEM); | |
| 1198 | case KERN_PROTECTION_FAILURE: | |
| 1199 | return (EACCES); | |
| 1200 | default: | |
| 1201 | return (EINVAL); | |
| 1202 | } | |
| 1203 | } |