| Commit | Line | Data |
|---|---|---|
| 984263bc MD |
1 | /* |
| 2 | * Copyright (c) 1988 University of Utah. | |
| 3 | * Copyright (c) 1991, 1993 | |
| 4 | * The Regents of the University of California. All rights reserved. | |
| 5 | * | |
| 6 | * This code is derived from software contributed to Berkeley by | |
| 7 | * the Systems Programming Group of the University of Utah Computer | |
| 8 | * Science Department. | |
| 9 | * | |
| 10 | * Redistribution and use in source and binary forms, with or without | |
| 11 | * modification, are permitted provided that the following conditions | |
| 12 | * are met: | |
| 13 | * 1. Redistributions of source code must retain the above copyright | |
| 14 | * notice, this list of conditions and the following disclaimer. | |
| 15 | * 2. Redistributions in binary form must reproduce the above copyright | |
| 16 | * notice, this list of conditions and the following disclaimer in the | |
| 17 | * documentation and/or other materials provided with the distribution. | |
| 18 | * 3. All advertising materials mentioning features or use of this software | |
| 19 | * must display the following acknowledgement: | |
| 20 | * This product includes software developed by the University of | |
| 21 | * California, Berkeley and its contributors. | |
| 22 | * 4. Neither the name of the University nor the names of its contributors | |
| 23 | * may be used to endorse or promote products derived from this software | |
| 24 | * without specific prior written permission. | |
| 25 | * | |
| 26 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
| 27 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
| 28 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
| 29 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
| 30 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
| 31 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
| 32 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
| 33 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
| 34 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
| 35 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
| 36 | * SUCH DAMAGE. | |
| 37 | * | |
| 38 | * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$ | |
| 39 | * | |
| 40 | * @(#)vm_mmap.c 8.4 (Berkeley) 1/12/94 | |
| 41 | * $FreeBSD: src/sys/vm/vm_mmap.c,v 1.108.2.6 2002/07/02 20:06:19 dillon Exp $ | |
| ba39e2e0 | 42 | * $DragonFly: src/sys/vm/vm_mmap.c,v 1.39 2007/04/30 07:18:57 dillon Exp $ |
| 984263bc MD |
43 | */ |
| 44 | ||
| 45 | /* | |
| 46 | * Mapped file (mmap) interface to VM | |
| 47 | */ | |
| 48 | ||
| 984263bc MD |
49 | #include <sys/param.h> |
| 50 | #include <sys/kernel.h> | |
| 51 | #include <sys/systm.h> | |
| 52 | #include <sys/sysproto.h> | |
| 53 | #include <sys/filedesc.h> | |
| a0ff68c9 | 54 | #include <sys/kern_syscall.h> |
| 984263bc | 55 | #include <sys/proc.h> |
| 895c1f85 | 56 | #include <sys/priv.h> |
| 984263bc MD |
57 | #include <sys/resource.h> |
| 58 | #include <sys/resourcevar.h> | |
| 59 | #include <sys/vnode.h> | |
| 60 | #include <sys/fcntl.h> | |
| 61 | #include <sys/file.h> | |
| 62 | #include <sys/mman.h> | |
| 63 | #include <sys/conf.h> | |
| 64 | #include <sys/stat.h> | |
| 65 | #include <sys/vmmeter.h> | |
| 66 | #include <sys/sysctl.h> | |
| 67 | ||
| 68 | #include <vm/vm.h> | |
| 69 | #include <vm/vm_param.h> | |
| 70 | #include <sys/lock.h> | |
| 71 | #include <vm/pmap.h> | |
| 72 | #include <vm/vm_map.h> | |
| 73 | #include <vm/vm_object.h> | |
| 74 | #include <vm/vm_page.h> | |
| 75 | #include <vm/vm_pager.h> | |
| 76 | #include <vm/vm_pageout.h> | |
| 77 | #include <vm/vm_extern.h> | |
| 78 | #include <vm/vm_page.h> | |
| 79 | #include <vm/vm_kern.h> | |
| 80 | ||
| dadab5e9 | 81 | #include <sys/file2.h> |
| 654a39f0 | 82 | #include <sys/thread2.h> |
| 684a93c4 | 83 | #include <sys/mplock2.h> |
| dadab5e9 | 84 | |
| 984263bc MD |
85 | static int max_proc_mmap; |
| 86 | SYSCTL_INT(_vm, OID_AUTO, max_proc_mmap, CTLFLAG_RW, &max_proc_mmap, 0, ""); | |
| 568e6804 MD |
87 | int vkernel_enable; |
| 88 | SYSCTL_INT(_vm, OID_AUTO, vkernel_enable, CTLFLAG_RW, &vkernel_enable, 0, ""); | |
| 984263bc MD |
89 | |
| 90 | /* | |
| 91 | * Set the maximum number of vm_map_entry structures per process. Roughly | |
| 92 | * speaking vm_map_entry structures are tiny, so allowing them to eat 1/100 | |
| 93 | * of our KVM malloc space still results in generous limits. We want a | |
| 94 | * default that is good enough to prevent the kernel running out of resources | |
| 95 | * if attacked from compromised user account but generous enough such that | |
| 96 | * multi-threaded processes are not unduly inconvenienced. | |
| 97 | */ | |
| 98 | ||
| 1388df65 | 99 | static void vmmapentry_rsrc_init (void *); |
| ba39e2e0 | 100 | SYSINIT(vmmersrc, SI_BOOT1_POST, SI_ORDER_ANY, vmmapentry_rsrc_init, NULL) |
| 984263bc MD |
101 | |
| 102 | static void | |
| 57e43348 | 103 | vmmapentry_rsrc_init(void *dummy) |
| 984263bc | 104 | { |
| c439ad8f | 105 | max_proc_mmap = KvaSize / sizeof(struct vm_map_entry); |
| 984263bc MD |
106 | max_proc_mmap /= 100; |
| 107 | } | |
| 108 | ||
| 3919ced0 MD |
109 | /* |
| 110 | * MPSAFE | |
| 111 | */ | |
| 984263bc | 112 | int |
| 753fd850 | 113 | sys_sbrk(struct sbrk_args *uap) |
| 984263bc | 114 | { |
| 984263bc MD |
115 | /* Not yet implemented */ |
| 116 | return (EOPNOTSUPP); | |
| 117 | } | |
| 118 | ||
| 41c20dac MD |
119 | /* |
| 120 | * sstk_args(int incr) | |
| 3919ced0 MD |
121 | * |
| 122 | * MPSAFE | |
| 41c20dac | 123 | */ |
| 984263bc | 124 | int |
| 753fd850 | 125 | sys_sstk(struct sstk_args *uap) |
| 984263bc | 126 | { |
| 984263bc MD |
127 | /* Not yet implemented */ |
| 128 | return (EOPNOTSUPP); | |
| 129 | } | |
| 130 | ||
| 984263bc | 131 | /* |
| 41c20dac MD |
132 | * mmap_args(void *addr, size_t len, int prot, int flags, int fd, |
| 133 | * long pad, off_t pos) | |
| 134 | * | |
| 984263bc MD |
135 | * Memory Map (mmap) system call. Note that the file offset |
| 136 | * and address are allowed to be NOT page aligned, though if | |
| 137 | * the MAP_FIXED flag it set, both must have the same remainder | |
| 138 | * modulo the PAGE_SIZE (POSIX 1003.1b). If the address is not | |
| 139 | * page-aligned, the actual mapping starts at trunc_page(addr) | |
| 140 | * and the return value is adjusted up by the page offset. | |
| 141 | * | |
| 142 | * Generally speaking, only character devices which are themselves | |
| 143 | * memory-based, such as a video framebuffer, can be mmap'd. Otherwise | |
| 144 | * there would be no cache coherency between a descriptor and a VM mapping | |
| 145 | * both to the same character device. | |
| 146 | * | |
| 147 | * Block devices can be mmap'd no matter what they represent. Cache coherency | |
| 148 | * is maintained as long as you do not write directly to the underlying | |
| 149 | * character device. | |
| 150 | */ | |
| 984263bc MD |
151 | |
| 152 | int | |
| d3313941 MD |
153 | kern_mmap(struct vmspace *vms, caddr_t uaddr, size_t ulen, |
| 154 | int uprot, int uflags, int fd, off_t upos, void **res) | |
| 984263bc | 155 | { |
| dadab5e9 MD |
156 | struct thread *td = curthread; |
| 157 | struct proc *p = td->td_proc; | |
| 41c20dac | 158 | struct file *fp = NULL; |
| 984263bc MD |
159 | struct vnode *vp; |
| 160 | vm_offset_t addr; | |
| e54488bb | 161 | vm_offset_t tmpaddr; |
| 984263bc MD |
162 | vm_size_t size, pageoff; |
| 163 | vm_prot_t prot, maxprot; | |
| 164 | void *handle; | |
| 165 | int flags, error; | |
| 166 | int disablexworkaround; | |
| 167 | off_t pos; | |
| 984263bc MD |
168 | vm_object_t obj; |
| 169 | ||
| dadab5e9 MD |
170 | KKASSERT(p); |
| 171 | ||
| a0ff68c9 DRJ |
172 | addr = (vm_offset_t) uaddr; |
| 173 | size = ulen; | |
| 174 | prot = uprot & VM_PROT_ALL; | |
| 175 | flags = uflags; | |
| 176 | pos = upos; | |
| 984263bc | 177 | |
| e54488bb MD |
178 | /* |
| 179 | * Make sure mapping fits into numeric range etc. | |
| 180 | * | |
| 181 | * NOTE: We support the full unsigned range for size now. | |
| 182 | */ | |
| 183 | if (((flags & MAP_ANON) && fd != -1)) | |
| 984263bc MD |
184 | return (EINVAL); |
| 185 | ||
| 186 | if (flags & MAP_STACK) { | |
| a0ff68c9 | 187 | if ((fd != -1) || |
| 984263bc MD |
188 | ((prot & (PROT_READ | PROT_WRITE)) != (PROT_READ | PROT_WRITE))) |
| 189 | return (EINVAL); | |
| 190 | flags |= MAP_ANON; | |
| 191 | pos = 0; | |
| 192 | } | |
| 193 | ||
| 194 | /* | |
| 568e6804 | 195 | * Virtual page tables cannot be used with MAP_STACK. Apart from |
| afeabdca | 196 | * it not making any sense, the aux union is used by both |
| 568e6804 MD |
197 | * types. |
| 198 | * | |
| 199 | * Because the virtual page table is stored in the backing object | |
| 200 | * and might be updated by the kernel, the mapping must be R+W. | |
| 201 | */ | |
| 202 | if (flags & MAP_VPAGETABLE) { | |
| 203 | if (vkernel_enable == 0) | |
| 204 | return (EOPNOTSUPP); | |
| 205 | if (flags & MAP_STACK) | |
| 206 | return (EINVAL); | |
| 207 | if ((prot & (PROT_READ|PROT_WRITE)) != (PROT_READ|PROT_WRITE)) | |
| 208 | return (EINVAL); | |
| 209 | } | |
| 210 | ||
| 211 | /* | |
| 984263bc MD |
212 | * Align the file position to a page boundary, |
| 213 | * and save its page offset component. | |
| 214 | */ | |
| 215 | pageoff = (pos & PAGE_MASK); | |
| 216 | pos -= pageoff; | |
| 217 | ||
| 218 | /* Adjust size for rounding (on both ends). */ | |
| 219 | size += pageoff; /* low end... */ | |
| 220 | size = (vm_size_t) round_page(size); /* hi end */ | |
| e54488bb MD |
221 | if (size < ulen) /* wrap */ |
| 222 | return(EINVAL); | |
| 984263bc MD |
223 | |
| 224 | /* | |
| 225 | * Check for illegal addresses. Watch out for address wrap... Note | |
| 226 | * that VM_*_ADDRESS are not constants due to casts (argh). | |
| 227 | */ | |
| c809941b | 228 | if (flags & (MAP_FIXED | MAP_TRYFIXED)) { |
| 984263bc MD |
229 | /* |
| 230 | * The specified address must have the same remainder | |
| 231 | * as the file offset taken modulo PAGE_SIZE, so it | |
| 232 | * should be aligned after adjustment by pageoff. | |
| 233 | */ | |
| 234 | addr -= pageoff; | |
| 235 | if (addr & PAGE_MASK) | |
| 236 | return (EINVAL); | |
| e54488bb MD |
237 | |
| 238 | /* | |
| 239 | * Address range must be all in user VM space and not wrap. | |
| 240 | */ | |
| 241 | tmpaddr = addr + size; | |
| 242 | if (tmpaddr < addr) | |
| 243 | return (EINVAL); | |
| 244 | if (VM_MAX_USER_ADDRESS > 0 && tmpaddr > VM_MAX_USER_ADDRESS) | |
| 984263bc | 245 | return (EINVAL); |
| 88181b08 | 246 | if (VM_MIN_USER_ADDRESS > 0 && addr < VM_MIN_USER_ADDRESS) |
| 984263bc | 247 | return (EINVAL); |
| c809941b | 248 | } else { |
| 459f5c1e | 249 | /* |
| c809941b MD |
250 | * Set a reasonable start point for the hint if it was |
| 251 | * not specified or if it falls within the heap space. | |
| 252 | * Hinted mmap()s do not allocate out of the heap space. | |
| 459f5c1e SS |
253 | */ |
| 254 | if (addr == 0 || | |
| 255 | (addr >= round_page((vm_offset_t)vms->vm_taddr) && | |
| 256 | addr < round_page((vm_offset_t)vms->vm_daddr + maxdsiz))) | |
| 257 | addr = round_page((vm_offset_t)vms->vm_daddr + maxdsiz); | |
| 984263bc | 258 | } |
| 984263bc MD |
259 | |
| 260 | if (flags & MAP_ANON) { | |
| 261 | /* | |
| 262 | * Mapping blank space is trivial. | |
| 263 | */ | |
| 264 | handle = NULL; | |
| 265 | maxprot = VM_PROT_ALL; | |
| 266 | pos = 0; | |
| 267 | } else { | |
| 268 | /* | |
| 269 | * Mapping file, get fp for validation. Obtain vnode and make | |
| 270 | * sure it is of appropriate type. | |
| 271 | */ | |
| 228b401d MD |
272 | fp = holdfp(p->p_fd, fd, -1); |
| 273 | if (fp == NULL) | |
| 984263bc | 274 | return (EBADF); |
| 228b401d MD |
275 | if (fp->f_type != DTYPE_VNODE) { |
| 276 | error = EINVAL; | |
| 277 | goto done; | |
| 278 | } | |
| 984263bc MD |
279 | /* |
| 280 | * POSIX shared-memory objects are defined to have | |
| 281 | * kernel persistence, and are not defined to support | |
| 282 | * read(2)/write(2) -- or even open(2). Thus, we can | |
| 283 | * use MAP_ASYNC to trade on-disk coherence for speed. | |
| 284 | * The shm_open(3) library routine turns on the FPOSIXSHM | |
| 285 | * flag to request this behavior. | |
| 286 | */ | |
| 287 | if (fp->f_flag & FPOSIXSHM) | |
| 288 | flags |= MAP_NOSYNC; | |
| 289 | vp = (struct vnode *) fp->f_data; | |
| 339fa1ed MD |
290 | |
| 291 | /* | |
| 292 | * Validate the vnode for the operation. | |
| 293 | */ | |
| 294 | switch(vp->v_type) { | |
| 295 | case VREG: | |
| 984263bc MD |
296 | /* |
| 297 | * Get the proper underlying object | |
| 298 | */ | |
| 228b401d MD |
299 | if ((obj = vp->v_object) == NULL) { |
| 300 | error = EINVAL; | |
| 301 | goto done; | |
| 302 | } | |
| 339fa1ed MD |
303 | KKASSERT((struct vnode *)obj->handle == vp); |
| 304 | break; | |
| 305 | case VCHR: | |
| 306 | /* | |
| 307 | * Make sure a device has not been revoked. | |
| 308 | * Mappability is handled by the device layer. | |
| 309 | */ | |
| 310 | if (vp->v_rdev == NULL) { | |
| 311 | error = EBADF; | |
| 312 | goto done; | |
| 313 | } | |
| 314 | break; | |
| 315 | default: | |
| 316 | /* | |
| 317 | * Nothing else is mappable. | |
| 318 | */ | |
| 319 | error = EINVAL; | |
| 320 | goto done; | |
| 984263bc MD |
321 | } |
| 322 | ||
| 323 | /* | |
| 984263bc MD |
324 | * XXX hack to handle use of /dev/zero to map anon memory (ala |
| 325 | * SunOS). | |
| 326 | */ | |
| 327 | if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) { | |
| 328 | handle = NULL; | |
| 329 | maxprot = VM_PROT_ALL; | |
| 330 | flags |= MAP_ANON; | |
| 331 | pos = 0; | |
| 332 | } else { | |
| 333 | /* | |
| 334 | * cdevs does not provide private mappings of any kind. | |
| 335 | */ | |
| 336 | /* | |
| 337 | * However, for XIG X server to continue to work, | |
| 338 | * we should allow the superuser to do it anyway. | |
| 339 | * We only allow it at securelevel < 1. | |
| 340 | * (Because the XIG X server writes directly to video | |
| 341 | * memory via /dev/mem, it should never work at any | |
| 342 | * other securelevel. | |
| 343 | * XXX this will have to go | |
| 344 | */ | |
| 345 | if (securelevel >= 1) | |
| 346 | disablexworkaround = 1; | |
| 347 | else | |
| 895c1f85 | 348 | disablexworkaround = priv_check(td, PRIV_ROOT); |
| 984263bc MD |
349 | if (vp->v_type == VCHR && disablexworkaround && |
| 350 | (flags & (MAP_PRIVATE|MAP_COPY))) { | |
| 351 | error = EINVAL; | |
| 352 | goto done; | |
| 353 | } | |
| 354 | /* | |
| 355 | * Ensure that file and memory protections are | |
| 356 | * compatible. Note that we only worry about | |
| 357 | * writability if mapping is shared; in this case, | |
| 358 | * current and max prot are dictated by the open file. | |
| 359 | * XXX use the vnode instead? Problem is: what | |
| 360 | * credentials do we use for determination? What if | |
| 361 | * proc does a setuid? | |
| 362 | */ | |
| 363 | maxprot = VM_PROT_EXECUTE; /* ??? */ | |
| 364 | if (fp->f_flag & FREAD) { | |
| 365 | maxprot |= VM_PROT_READ; | |
| 366 | } else if (prot & PROT_READ) { | |
| 367 | error = EACCES; | |
| 368 | goto done; | |
| 369 | } | |
| 370 | /* | |
| 371 | * If we are sharing potential changes (either via | |
| 372 | * MAP_SHARED or via the implicit sharing of character | |
| 373 | * device mappings), and we are trying to get write | |
| 374 | * permission although we opened it without asking | |
| 375 | * for it, bail out. Check for superuser, only if | |
| 376 | * we're at securelevel < 1, to allow the XIG X server | |
| 377 | * to continue to work. | |
| 378 | */ | |
| 379 | ||
| 380 | if ((flags & MAP_SHARED) != 0 || | |
| 381 | (vp->v_type == VCHR && disablexworkaround)) { | |
| 382 | if ((fp->f_flag & FWRITE) != 0) { | |
| 383 | struct vattr va; | |
| 87de5057 | 384 | if ((error = VOP_GETATTR(vp, &va))) { |
| 984263bc MD |
385 | goto done; |
| 386 | } | |
| 387 | if ((va.va_flags & | |
| 388 | (IMMUTABLE|APPEND)) == 0) { | |
| 389 | maxprot |= VM_PROT_WRITE; | |
| 390 | } else if (prot & PROT_WRITE) { | |
| 391 | error = EPERM; | |
| 392 | goto done; | |
| 393 | } | |
| 394 | } else if ((prot & PROT_WRITE) != 0) { | |
| 395 | error = EACCES; | |
| 396 | goto done; | |
| 397 | } | |
| 398 | } else { | |
| 399 | maxprot |= VM_PROT_WRITE; | |
| 400 | } | |
| 401 | handle = (void *)vp; | |
| 402 | } | |
| 403 | } | |
| 404 | ||
| 405 | /* | |
| 406 | * Do not allow more then a certain number of vm_map_entry structures | |
| 407 | * per process. Scale with the number of rforks sharing the map | |
| 408 | * to make the limit reasonable for threads. | |
| 409 | */ | |
| 410 | if (max_proc_mmap && | |
| e3161323 | 411 | vms->vm_map.nentries >= max_proc_mmap * vms->vm_sysref.refcnt) { |
| 984263bc MD |
412 | error = ENOMEM; |
| 413 | goto done; | |
| 414 | } | |
| 415 | ||
| 416 | error = vm_mmap(&vms->vm_map, &addr, size, prot, maxprot, | |
| e54488bb | 417 | flags, handle, pos); |
| 984263bc | 418 | if (error == 0) |
| a0ff68c9 | 419 | *res = (void *)(addr + pageoff); |
| 984263bc MD |
420 | done: |
| 421 | if (fp) | |
| 9f87144f | 422 | fdrop(fp); |
| 984263bc MD |
423 | return (error); |
| 424 | } | |
| 425 | ||
| 3919ced0 MD |
426 | /* |
| 427 | * MPALMOSTSAFE | |
| 428 | */ | |
| 984263bc | 429 | int |
| 753fd850 | 430 | sys_mmap(struct mmap_args *uap) |
| 984263bc | 431 | { |
| a0ff68c9 | 432 | int error; |
| 984263bc | 433 | |
| 3919ced0 | 434 | get_mplock(); |
| d3313941 MD |
435 | error = kern_mmap(curproc->p_vmspace, uap->addr, uap->len, |
| 436 | uap->prot, uap->flags, | |
| 437 | uap->fd, uap->pos, &uap->sysmsg_resultp); | |
| 3919ced0 | 438 | rel_mplock(); |
| 984263bc | 439 | |
| a0ff68c9 | 440 | return (error); |
| 984263bc | 441 | } |
| 984263bc | 442 | |
| 41c20dac | 443 | /* |
| e54488bb | 444 | * msync_args(void *addr, size_t len, int flags) |
| 3919ced0 MD |
445 | * |
| 446 | * MPALMOSTSAFE | |
| 41c20dac | 447 | */ |
| 984263bc | 448 | int |
| 753fd850 | 449 | sys_msync(struct msync_args *uap) |
| 984263bc | 450 | { |
| 41c20dac | 451 | struct proc *p = curproc; |
| 984263bc | 452 | vm_offset_t addr; |
| e54488bb | 453 | vm_offset_t tmpaddr; |
| 984263bc MD |
454 | vm_size_t size, pageoff; |
| 455 | int flags; | |
| 456 | vm_map_t map; | |
| 457 | int rv; | |
| 458 | ||
| 459 | addr = (vm_offset_t) uap->addr; | |
| 460 | size = uap->len; | |
| 461 | flags = uap->flags; | |
| 462 | ||
| 463 | pageoff = (addr & PAGE_MASK); | |
| 464 | addr -= pageoff; | |
| 465 | size += pageoff; | |
| 466 | size = (vm_size_t) round_page(size); | |
| e54488bb MD |
467 | if (size < uap->len) /* wrap */ |
| 468 | return(EINVAL); | |
| 469 | tmpaddr = addr + size; /* workaround gcc4 opt */ | |
| 470 | if (tmpaddr < addr) /* wrap */ | |
| 984263bc MD |
471 | return(EINVAL); |
| 472 | ||
| 473 | if ((flags & (MS_ASYNC|MS_INVALIDATE)) == (MS_ASYNC|MS_INVALIDATE)) | |
| 474 | return (EINVAL); | |
| 475 | ||
| 3919ced0 | 476 | get_mplock(); |
| 984263bc MD |
477 | map = &p->p_vmspace->vm_map; |
| 478 | ||
| 479 | /* | |
| 480 | * XXX Gak! If size is zero we are supposed to sync "all modified | |
| 481 | * pages with the region containing addr". Unfortunately, we don't | |
| 482 | * really keep track of individual mmaps so we approximate by flushing | |
| 483 | * the range of the map entry containing addr. This can be incorrect | |
| 484 | * if the region splits or is coalesced with a neighbor. | |
| 485 | */ | |
| 486 | if (size == 0) { | |
| 487 | vm_map_entry_t entry; | |
| 488 | ||
| 489 | vm_map_lock_read(map); | |
| 490 | rv = vm_map_lookup_entry(map, addr, &entry); | |
| 3919ced0 MD |
491 | if (rv == FALSE) { |
| 492 | vm_map_unlock_read(map); | |
| 493 | rv = KERN_INVALID_ADDRESS; | |
| 494 | goto done; | |
| 495 | } | |
| 984263bc MD |
496 | addr = entry->start; |
| 497 | size = entry->end - entry->start; | |
| 3919ced0 | 498 | vm_map_unlock_read(map); |
| 984263bc MD |
499 | } |
| 500 | ||
| 501 | /* | |
| 502 | * Clean the pages and interpret the return value. | |
| 503 | */ | |
| 504 | rv = vm_map_clean(map, addr, addr + size, (flags & MS_ASYNC) == 0, | |
| 3919ced0 MD |
505 | (flags & MS_INVALIDATE) != 0); |
| 506 | done: | |
| 507 | rel_mplock(); | |
| 984263bc MD |
508 | |
| 509 | switch (rv) { | |
| 510 | case KERN_SUCCESS: | |
| 511 | break; | |
| 512 | case KERN_INVALID_ADDRESS: | |
| 513 | return (EINVAL); /* Sun returns ENOMEM? */ | |
| 514 | case KERN_FAILURE: | |
| 515 | return (EIO); | |
| 516 | default: | |
| 517 | return (EINVAL); | |
| 518 | } | |
| 519 | ||
| 520 | return (0); | |
| 521 | } | |
| 522 | ||
| 41c20dac MD |
523 | /* |
| 524 | * munmap_args(void *addr, size_t len) | |
| 3919ced0 MD |
525 | * |
| 526 | * MPALMOSTSAFE | |
| 41c20dac | 527 | */ |
| 984263bc | 528 | int |
| 753fd850 | 529 | sys_munmap(struct munmap_args *uap) |
| 984263bc | 530 | { |
| 41c20dac | 531 | struct proc *p = curproc; |
| 984263bc | 532 | vm_offset_t addr; |
| e54488bb | 533 | vm_offset_t tmpaddr; |
| 984263bc MD |
534 | vm_size_t size, pageoff; |
| 535 | vm_map_t map; | |
| 536 | ||
| 537 | addr = (vm_offset_t) uap->addr; | |
| 538 | size = uap->len; | |
| 539 | ||
| 540 | pageoff = (addr & PAGE_MASK); | |
| 541 | addr -= pageoff; | |
| 542 | size += pageoff; | |
| 543 | size = (vm_size_t) round_page(size); | |
| e54488bb MD |
544 | if (size < uap->len) /* wrap */ |
| 545 | return(EINVAL); | |
| 546 | tmpaddr = addr + size; /* workaround gcc4 opt */ | |
| 547 | if (tmpaddr < addr) /* wrap */ | |
| 984263bc MD |
548 | return(EINVAL); |
| 549 | ||
| 550 | if (size == 0) | |
| 551 | return (0); | |
| 552 | ||
| 553 | /* | |
| 554 | * Check for illegal addresses. Watch out for address wrap... Note | |
| 555 | * that VM_*_ADDRESS are not constants due to casts (argh). | |
| 556 | */ | |
| e54488bb | 557 | if (VM_MAX_USER_ADDRESS > 0 && tmpaddr > VM_MAX_USER_ADDRESS) |
| 984263bc | 558 | return (EINVAL); |
| 88181b08 | 559 | if (VM_MIN_USER_ADDRESS > 0 && addr < VM_MIN_USER_ADDRESS) |
| 984263bc | 560 | return (EINVAL); |
| 3919ced0 MD |
561 | |
| 562 | get_mplock(); | |
| 984263bc MD |
563 | map = &p->p_vmspace->vm_map; |
| 564 | /* | |
| 565 | * Make sure entire range is allocated. | |
| 566 | */ | |
| 3919ced0 MD |
567 | if (!vm_map_check_protection(map, addr, addr + size, VM_PROT_NONE)) { |
| 568 | rel_mplock(); | |
| 984263bc | 569 | return (EINVAL); |
| 3919ced0 | 570 | } |
| 984263bc | 571 | /* returns nothing but KERN_SUCCESS anyway */ |
| 418ff780 | 572 | vm_map_remove(map, addr, addr + size); |
| 3919ced0 | 573 | rel_mplock(); |
| 984263bc MD |
574 | return (0); |
| 575 | } | |
| 576 | ||
| 41c20dac MD |
577 | /* |
| 578 | * mprotect_args(const void *addr, size_t len, int prot) | |
| 3919ced0 MD |
579 | * |
| 580 | * MPALMOSTSAFE | |
| 41c20dac | 581 | */ |
| 984263bc | 582 | int |
| 753fd850 | 583 | sys_mprotect(struct mprotect_args *uap) |
| 984263bc | 584 | { |
| 41c20dac | 585 | struct proc *p = curproc; |
| 984263bc | 586 | vm_offset_t addr; |
| e54488bb | 587 | vm_offset_t tmpaddr; |
| 984263bc | 588 | vm_size_t size, pageoff; |
| 5f910b2f | 589 | vm_prot_t prot; |
| 3919ced0 | 590 | int error; |
| 984263bc MD |
591 | |
| 592 | addr = (vm_offset_t) uap->addr; | |
| 593 | size = uap->len; | |
| 594 | prot = uap->prot & VM_PROT_ALL; | |
| 595 | #if defined(VM_PROT_READ_IS_EXEC) | |
| 596 | if (prot & VM_PROT_READ) | |
| 597 | prot |= VM_PROT_EXECUTE; | |
| 598 | #endif | |
| 599 | ||
| 600 | pageoff = (addr & PAGE_MASK); | |
| 601 | addr -= pageoff; | |
| 602 | size += pageoff; | |
| 603 | size = (vm_size_t) round_page(size); | |
| e54488bb MD |
604 | if (size < uap->len) /* wrap */ |
| 605 | return(EINVAL); | |
| 606 | tmpaddr = addr + size; /* workaround gcc4 opt */ | |
| 607 | if (tmpaddr < addr) /* wrap */ | |
| 984263bc MD |
608 | return(EINVAL); |
| 609 | ||
| 3919ced0 MD |
610 | get_mplock(); |
| 611 | switch (vm_map_protect(&p->p_vmspace->vm_map, addr, addr + size, | |
| 612 | prot, FALSE)) { | |
| 984263bc | 613 | case KERN_SUCCESS: |
| 3919ced0 MD |
614 | error = 0; |
| 615 | break; | |
| 984263bc | 616 | case KERN_PROTECTION_FAILURE: |
| 3919ced0 MD |
617 | error = EACCES; |
| 618 | break; | |
| 619 | default: | |
| 620 | error = EINVAL; | |
| 621 | break; | |
| 984263bc | 622 | } |
| 3919ced0 MD |
623 | rel_mplock(); |
| 624 | return (error); | |
| 984263bc MD |
625 | } |
| 626 | ||
| 41c20dac MD |
627 | /* |
| 628 | * minherit_args(void *addr, size_t len, int inherit) | |
| 3919ced0 MD |
629 | * |
| 630 | * MPALMOSTSAFE | |
| 41c20dac | 631 | */ |
| 984263bc | 632 | int |
| 753fd850 | 633 | sys_minherit(struct minherit_args *uap) |
| 984263bc | 634 | { |
| 41c20dac | 635 | struct proc *p = curproc; |
| 984263bc | 636 | vm_offset_t addr; |
| e54488bb | 637 | vm_offset_t tmpaddr; |
| 984263bc | 638 | vm_size_t size, pageoff; |
| 5f910b2f | 639 | vm_inherit_t inherit; |
| 3919ced0 | 640 | int error; |
| 984263bc MD |
641 | |
| 642 | addr = (vm_offset_t)uap->addr; | |
| 643 | size = uap->len; | |
| 644 | inherit = uap->inherit; | |
| 645 | ||
| 646 | pageoff = (addr & PAGE_MASK); | |
| 647 | addr -= pageoff; | |
| 648 | size += pageoff; | |
| 649 | size = (vm_size_t) round_page(size); | |
| e54488bb MD |
650 | if (size < uap->len) /* wrap */ |
| 651 | return(EINVAL); | |
| 652 | tmpaddr = addr + size; /* workaround gcc4 opt */ | |
| 653 | if (tmpaddr < addr) /* wrap */ | |
| 984263bc MD |
654 | return(EINVAL); |
| 655 | ||
| 3919ced0 MD |
656 | get_mplock(); |
| 657 | ||
| 658 | switch (vm_map_inherit(&p->p_vmspace->vm_map, addr, | |
| 659 | addr + size, inherit)) { | |
| 984263bc | 660 | case KERN_SUCCESS: |
| 3919ced0 MD |
661 | error = 0; |
| 662 | break; | |
| 984263bc | 663 | case KERN_PROTECTION_FAILURE: |
| 3919ced0 MD |
664 | error = EACCES; |
| 665 | break; | |
| 666 | default: | |
| 667 | error = EINVAL; | |
| 668 | break; | |
| 984263bc | 669 | } |
| 3919ced0 MD |
670 | rel_mplock(); |
| 671 | return (error); | |
| 984263bc MD |
672 | } |
| 673 | ||
| 41c20dac MD |
674 | /* |
| 675 | * madvise_args(void *addr, size_t len, int behav) | |
| 3919ced0 MD |
676 | * |
| 677 | * MPALMOSTSAFE | |
| 41c20dac | 678 | */ |
| 984263bc | 679 | int |
| 753fd850 | 680 | sys_madvise(struct madvise_args *uap) |
| 984263bc | 681 | { |
| 41c20dac | 682 | struct proc *p = curproc; |
| 984263bc | 683 | vm_offset_t start, end; |
| e54488bb | 684 | vm_offset_t tmpaddr = (vm_offset_t)uap->addr + uap->len; |
| 3919ced0 | 685 | int error; |
| 984263bc MD |
686 | |
| 687 | /* | |
| 688 | * Check for illegal behavior | |
| 689 | */ | |
| afeabdca | 690 | if (uap->behav < 0 || uap->behav >= MADV_CONTROL_END) |
| 984263bc MD |
691 | return (EINVAL); |
| 692 | /* | |
| 693 | * Check for illegal addresses. Watch out for address wrap... Note | |
| 694 | * that VM_*_ADDRESS are not constants due to casts (argh). | |
| 695 | */ | |
| e54488bb | 696 | if (tmpaddr < (vm_offset_t)uap->addr) |
| 984263bc | 697 | return (EINVAL); |
| e54488bb | 698 | if (VM_MAX_USER_ADDRESS > 0 && tmpaddr > VM_MAX_USER_ADDRESS) |
| 984263bc | 699 | return (EINVAL); |
| e54488bb | 700 | if (VM_MIN_USER_ADDRESS > 0 && uap->addr < VM_MIN_USER_ADDRESS) |
| 984263bc MD |
701 | return (EINVAL); |
| 702 | ||
| 703 | /* | |
| 704 | * Since this routine is only advisory, we default to conservative | |
| 705 | * behavior. | |
| 706 | */ | |
| e54488bb MD |
707 | start = trunc_page((vm_offset_t)uap->addr); |
| 708 | end = round_page(tmpaddr); | |
| 3919ced0 MD |
709 | |
| 710 | get_mplock(); | |
| 711 | error = vm_map_madvise(&p->p_vmspace->vm_map, start, end, | |
| 712 | uap->behav, 0); | |
| 713 | rel_mplock(); | |
| 714 | return (error); | |
| afeabdca MD |
715 | } |
| 716 | ||
| 717 | /* | |
| 718 | * mcontrol_args(void *addr, size_t len, int behav, off_t value) | |
| 3919ced0 MD |
719 | * |
| 720 | * MPALMOSTSAFE | |
| afeabdca | 721 | */ |
| afeabdca MD |
722 | int |
| 723 | sys_mcontrol(struct mcontrol_args *uap) | |
| 724 | { | |
| 725 | struct proc *p = curproc; | |
| 726 | vm_offset_t start, end; | |
| e54488bb | 727 | vm_offset_t tmpaddr = (vm_offset_t)uap->addr + uap->len; |
| 3919ced0 | 728 | int error; |
| afeabdca MD |
729 | |
| 730 | /* | |
| 731 | * Check for illegal behavior | |
| 732 | */ | |
| 733 | if (uap->behav < 0 || uap->behav > MADV_CONTROL_END) | |
| 984263bc | 734 | return (EINVAL); |
| afeabdca MD |
735 | /* |
| 736 | * Check for illegal addresses. Watch out for address wrap... Note | |
| 737 | * that VM_*_ADDRESS are not constants due to casts (argh). | |
| 738 | */ | |
| e54488bb | 739 | if (tmpaddr < (vm_offset_t) uap->addr) |
| afeabdca | 740 | return (EINVAL); |
| e54488bb | 741 | if (VM_MAX_USER_ADDRESS > 0 && tmpaddr > VM_MAX_USER_ADDRESS) |
| afeabdca | 742 | return (EINVAL); |
| e54488bb | 743 | if (VM_MIN_USER_ADDRESS > 0 && uap->addr < VM_MIN_USER_ADDRESS) |
| afeabdca MD |
744 | return (EINVAL); |
| 745 | ||
| 746 | /* | |
| 747 | * Since this routine is only advisory, we default to conservative | |
| 748 | * behavior. | |
| 749 | */ | |
| e54488bb MD |
750 | start = trunc_page((vm_offset_t)uap->addr); |
| 751 | end = round_page(tmpaddr); | |
| afeabdca | 752 | |
| 3919ced0 MD |
753 | get_mplock(); |
| 754 | error = vm_map_madvise(&p->p_vmspace->vm_map, start, end, | |
| 755 | uap->behav, uap->value); | |
| 756 | rel_mplock(); | |
| 757 | return (error); | |
| 984263bc MD |
758 | } |
| 759 | ||
| afeabdca | 760 | |
| 41c20dac MD |
761 | /* |
| 762 | * mincore_args(const void *addr, size_t len, char *vec) | |
| 3919ced0 MD |
763 | * |
| 764 | * MPALMOSTSAFE | |
| 41c20dac | 765 | */ |
| 984263bc | 766 | int |
| 753fd850 | 767 | sys_mincore(struct mincore_args *uap) |
| 984263bc | 768 | { |
| 41c20dac | 769 | struct proc *p = curproc; |
| 984263bc MD |
770 | vm_offset_t addr, first_addr; |
| 771 | vm_offset_t end, cend; | |
| 772 | pmap_t pmap; | |
| 773 | vm_map_t map; | |
| 774 | char *vec; | |
| 775 | int error; | |
| 776 | int vecindex, lastvecindex; | |
| 5f910b2f | 777 | vm_map_entry_t current; |
| 984263bc MD |
778 | vm_map_entry_t entry; |
| 779 | int mincoreinfo; | |
| 780 | unsigned int timestamp; | |
| 781 | ||
| 782 | /* | |
| 783 | * Make sure that the addresses presented are valid for user | |
| 784 | * mode. | |
| 785 | */ | |
| 786 | first_addr = addr = trunc_page((vm_offset_t) uap->addr); | |
| 787 | end = addr + (vm_size_t)round_page(uap->len); | |
| 984263bc MD |
788 | if (end < addr) |
| 789 | return (EINVAL); | |
| e54488bb MD |
790 | if (VM_MAX_USER_ADDRESS > 0 && end > VM_MAX_USER_ADDRESS) |
| 791 | return (EINVAL); | |
| 984263bc MD |
792 | |
| 793 | /* | |
| 794 | * Address of byte vector | |
| 795 | */ | |
| 796 | vec = uap->vec; | |
| 797 | ||
| 798 | map = &p->p_vmspace->vm_map; | |
| 799 | pmap = vmspace_pmap(p->p_vmspace); | |
| 800 | ||
| 3919ced0 | 801 | get_mplock(); |
| 984263bc MD |
802 | vm_map_lock_read(map); |
| 803 | RestartScan: | |
| 804 | timestamp = map->timestamp; | |
| 805 | ||
| 806 | if (!vm_map_lookup_entry(map, addr, &entry)) | |
| 807 | entry = entry->next; | |
| 808 | ||
| 809 | /* | |
| 810 | * Do this on a map entry basis so that if the pages are not | |
| 811 | * in the current processes address space, we can easily look | |
| 812 | * up the pages elsewhere. | |
| 813 | */ | |
| 814 | lastvecindex = -1; | |
| 815 | for(current = entry; | |
| 816 | (current != &map->header) && (current->start < end); | |
| 817 | current = current->next) { | |
| 818 | ||
| 819 | /* | |
| 820 | * ignore submaps (for now) or null objects | |
| 821 | */ | |
| 1b874851 MD |
822 | if (current->maptype != VM_MAPTYPE_NORMAL && |
| 823 | current->maptype != VM_MAPTYPE_VPAGETABLE) { | |
| 824 | continue; | |
| 825 | } | |
| 826 | if (current->object.vm_object == NULL) | |
| 984263bc MD |
827 | continue; |
| 828 | ||
| 829 | /* | |
| 830 | * limit this scan to the current map entry and the | |
| 831 | * limits for the mincore call | |
| 832 | */ | |
| 833 | if (addr < current->start) | |
| 834 | addr = current->start; | |
| 835 | cend = current->end; | |
| 836 | if (cend > end) | |
| 837 | cend = end; | |
| 838 | ||
| 839 | /* | |
| 840 | * scan this entry one page at a time | |
| 841 | */ | |
| 06ecca5a | 842 | while (addr < cend) { |
| 984263bc MD |
843 | /* |
| 844 | * Check pmap first, it is likely faster, also | |
| 845 | * it can provide info as to whether we are the | |
| 846 | * one referencing or modifying the page. | |
| 1b874851 MD |
847 | * |
| 848 | * If we have to check the VM object, only mess | |
| 849 | * around with normal maps. Do not mess around | |
| 850 | * with virtual page tables (XXX). | |
| 984263bc MD |
851 | */ |
| 852 | mincoreinfo = pmap_mincore(pmap, addr); | |
| 1b874851 MD |
853 | if (mincoreinfo == 0 && |
| 854 | current->maptype == VM_MAPTYPE_NORMAL) { | |
| 984263bc MD |
855 | vm_pindex_t pindex; |
| 856 | vm_ooffset_t offset; | |
| 857 | vm_page_t m; | |
| 06ecca5a | 858 | |
| 984263bc MD |
859 | /* |
| 860 | * calculate the page index into the object | |
| 861 | */ | |
| 862 | offset = current->offset + (addr - current->start); | |
| 863 | pindex = OFF_TO_IDX(offset); | |
| 06ecca5a | 864 | |
| 984263bc | 865 | /* |
| 06ecca5a MD |
866 | * if the page is resident, then gather |
| 867 | * information about it. spl protection is | |
| 868 | * required to maintain the object | |
| 869 | * association. And XXX what if the page is | |
| 870 | * busy? What's the deal with that? | |
| 984263bc | 871 | */ |
| 654a39f0 | 872 | crit_enter(); |
| 06ecca5a MD |
873 | m = vm_page_lookup(current->object.vm_object, |
| 874 | pindex); | |
| 2ff71562 | 875 | if (m && m->valid) { |
| 984263bc MD |
876 | mincoreinfo = MINCORE_INCORE; |
| 877 | if (m->dirty || | |
| 878 | pmap_is_modified(m)) | |
| 879 | mincoreinfo |= MINCORE_MODIFIED_OTHER; | |
| 880 | if ((m->flags & PG_REFERENCED) || | |
| 881 | pmap_ts_referenced(m)) { | |
| 882 | vm_page_flag_set(m, PG_REFERENCED); | |
| 883 | mincoreinfo |= MINCORE_REFERENCED_OTHER; | |
| 884 | } | |
| 885 | } | |
| 654a39f0 | 886 | crit_exit(); |
| 984263bc MD |
887 | } |
| 888 | ||
| 889 | /* | |
| 890 | * subyte may page fault. In case it needs to modify | |
| 891 | * the map, we release the lock. | |
| 892 | */ | |
| 893 | vm_map_unlock_read(map); | |
| 894 | ||
| 895 | /* | |
| 896 | * calculate index into user supplied byte vector | |
| 897 | */ | |
| 898 | vecindex = OFF_TO_IDX(addr - first_addr); | |
| 899 | ||
| 900 | /* | |
| 901 | * If we have skipped map entries, we need to make sure that | |
| 902 | * the byte vector is zeroed for those skipped entries. | |
| 903 | */ | |
| 904 | while((lastvecindex + 1) < vecindex) { | |
| 905 | error = subyte( vec + lastvecindex, 0); | |
| 906 | if (error) { | |
| 3919ced0 MD |
907 | error = EFAULT; |
| 908 | goto done; | |
| 984263bc MD |
909 | } |
| 910 | ++lastvecindex; | |
| 911 | } | |
| 912 | ||
| 913 | /* | |
| 914 | * Pass the page information to the user | |
| 915 | */ | |
| 916 | error = subyte( vec + vecindex, mincoreinfo); | |
| 917 | if (error) { | |
| 3919ced0 MD |
918 | error = EFAULT; |
| 919 | goto done; | |
| 984263bc MD |
920 | } |
| 921 | ||
| 922 | /* | |
| 923 | * If the map has changed, due to the subyte, the previous | |
| 924 | * output may be invalid. | |
| 925 | */ | |
| 926 | vm_map_lock_read(map); | |
| 927 | if (timestamp != map->timestamp) | |
| 928 | goto RestartScan; | |
| 929 | ||
| 930 | lastvecindex = vecindex; | |
| 931 | addr += PAGE_SIZE; | |
| 932 | } | |
| 933 | } | |
| 934 | ||
| 935 | /* | |
| 936 | * subyte may page fault. In case it needs to modify | |
| 937 | * the map, we release the lock. | |
| 938 | */ | |
| 939 | vm_map_unlock_read(map); | |
| 940 | ||
| 941 | /* | |
| 942 | * Zero the last entries in the byte vector. | |
| 943 | */ | |
| 944 | vecindex = OFF_TO_IDX(end - first_addr); | |
| 945 | while((lastvecindex + 1) < vecindex) { | |
| 946 | error = subyte( vec + lastvecindex, 0); | |
| 947 | if (error) { | |
| 3919ced0 MD |
948 | error = EFAULT; |
| 949 | goto done; | |
| 984263bc MD |
950 | } |
| 951 | ++lastvecindex; | |
| 952 | } | |
| 953 | ||
| 954 | /* | |
| 955 | * If the map has changed, due to the subyte, the previous | |
| 956 | * output may be invalid. | |
| 957 | */ | |
| 958 | vm_map_lock_read(map); | |
| 959 | if (timestamp != map->timestamp) | |
| 960 | goto RestartScan; | |
| 961 | vm_map_unlock_read(map); | |
| 962 | ||
| 3919ced0 MD |
963 | error = 0; |
| 964 | done: | |
| 965 | rel_mplock(); | |
| 966 | return (error); | |
| 984263bc MD |
967 | } |
| 968 | ||
| 41c20dac MD |
969 | /* |
| 970 | * mlock_args(const void *addr, size_t len) | |
| 3919ced0 MD |
971 | * |
| 972 | * MPALMOSTSAFE | |
| 41c20dac | 973 | */ |
| 984263bc | 974 | int |
| 753fd850 | 975 | sys_mlock(struct mlock_args *uap) |
| 984263bc MD |
976 | { |
| 977 | vm_offset_t addr; | |
| e54488bb | 978 | vm_offset_t tmpaddr; |
| 984263bc | 979 | vm_size_t size, pageoff; |
| 9910d07b MD |
980 | struct thread *td = curthread; |
| 981 | struct proc *p = td->td_proc; | |
| 3919ced0 | 982 | int error; |
| 984263bc MD |
983 | |
| 984 | addr = (vm_offset_t) uap->addr; | |
| 985 | size = uap->len; | |
| 986 | ||
| 987 | pageoff = (addr & PAGE_MASK); | |
| 988 | addr -= pageoff; | |
| 989 | size += pageoff; | |
| 990 | size = (vm_size_t) round_page(size); | |
| e54488bb MD |
991 | if (size < uap->len) /* wrap */ |
| 992 | return(EINVAL); | |
| 993 | tmpaddr = addr + size; /* workaround gcc4 opt */ | |
| 994 | if (tmpaddr < addr) /* wrap */ | |
| 984263bc MD |
995 | return (EINVAL); |
| 996 | ||
| 12e4aaff | 997 | if (atop(size) + vmstats.v_wire_count > vm_page_max_wired) |
| 984263bc MD |
998 | return (EAGAIN); |
| 999 | ||
| 3919ced0 | 1000 | get_mplock(); |
| 984263bc MD |
1001 | #ifdef pmap_wired_count |
| 1002 | if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) > | |
| 3919ced0 MD |
1003 | p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur) { |
| 1004 | rel_mplock(); | |
| 984263bc | 1005 | return (ENOMEM); |
| 3919ced0 | 1006 | } |
| 984263bc | 1007 | #else |
| 9910d07b | 1008 | error = priv_check_cred(td->td_ucred, PRIV_ROOT, 0); |
| 3919ced0 MD |
1009 | if (error) { |
| 1010 | rel_mplock(); | |
| 984263bc | 1011 | return (error); |
| 3919ced0 | 1012 | } |
| 984263bc | 1013 | #endif |
| cde87949 | 1014 | error = vm_map_unwire(&p->p_vmspace->vm_map, addr, addr + size, FALSE); |
| 3919ced0 | 1015 | rel_mplock(); |
| 984263bc MD |
1016 | return (error == KERN_SUCCESS ? 0 : ENOMEM); |
| 1017 | } | |
| 1018 | ||
| 41c20dac MD |
1019 | /* |
| 1020 | * mlockall_args(int how) | |
| 3919ced0 MD |
1021 | * |
| 1022 | * Dummy routine, doesn't actually do anything. | |
| 1023 | * | |
| 1024 | * MPSAFE | |
| 41c20dac | 1025 | */ |
| 984263bc | 1026 | int |
| 753fd850 | 1027 | sys_mlockall(struct mlockall_args *uap) |
| 984263bc | 1028 | { |
| 5b6d4784 | 1029 | return (ENOSYS); |
| 984263bc MD |
1030 | } |
| 1031 | ||
| 41c20dac | 1032 | /* |
| efbaff5d | 1033 | * munlockall_args(void) |
| 3919ced0 MD |
1034 | * |
| 1035 | * Dummy routine, doesn't actually do anything. | |
| 1036 | * | |
| 1037 | * MPSAFE | |
| 41c20dac | 1038 | */ |
| 984263bc | 1039 | int |
| 753fd850 | 1040 | sys_munlockall(struct munlockall_args *uap) |
| 984263bc | 1041 | { |
| 5b6d4784 | 1042 | return (ENOSYS); |
| 984263bc MD |
1043 | } |
| 1044 | ||
| 41c20dac MD |
1045 | /* |
| 1046 | * munlock_args(const void *addr, size_t len) | |
| 3919ced0 MD |
1047 | * |
| 1048 | * MPALMOSTSAFE | |
| 41c20dac | 1049 | */ |
| 984263bc | 1050 | int |
| 753fd850 | 1051 | sys_munlock(struct munlock_args *uap) |
| 984263bc | 1052 | { |
| dadab5e9 MD |
1053 | struct thread *td = curthread; |
| 1054 | struct proc *p = td->td_proc; | |
| 984263bc | 1055 | vm_offset_t addr; |
| e54488bb | 1056 | vm_offset_t tmpaddr; |
| 984263bc MD |
1057 | vm_size_t size, pageoff; |
| 1058 | int error; | |
| 1059 | ||
| 1060 | addr = (vm_offset_t) uap->addr; | |
| 1061 | size = uap->len; | |
| 1062 | ||
| 1063 | pageoff = (addr & PAGE_MASK); | |
| 1064 | addr -= pageoff; | |
| 1065 | size += pageoff; | |
| 1066 | size = (vm_size_t) round_page(size); | |
| 1067 | ||
| e54488bb MD |
1068 | tmpaddr = addr + size; |
| 1069 | if (tmpaddr < addr) /* wrap */ | |
| 984263bc MD |
1070 | return (EINVAL); |
| 1071 | ||
| 1072 | #ifndef pmap_wired_count | |
| 895c1f85 | 1073 | error = priv_check(td, PRIV_ROOT); |
| 984263bc MD |
1074 | if (error) |
| 1075 | return (error); | |
| 1076 | #endif | |
| 1077 | ||
| 3919ced0 | 1078 | get_mplock(); |
| cde87949 | 1079 | error = vm_map_unwire(&p->p_vmspace->vm_map, addr, addr + size, TRUE); |
| 3919ced0 | 1080 | rel_mplock(); |
| 984263bc MD |
1081 | return (error == KERN_SUCCESS ? 0 : ENOMEM); |
| 1082 | } | |
| 1083 | ||
| 1084 | /* | |
| 1085 | * Internal version of mmap. | |
| 1086 | * Currently used by mmap, exec, and sys5 shared memory. | |
| 1087 | * Handle is either a vnode pointer or NULL for MAP_ANON. | |
| 1088 | */ | |
| 1089 | int | |
| 1090 | vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, | |
| c809941b | 1091 | vm_prot_t maxprot, int flags, void *handle, vm_ooffset_t foff) |
| 984263bc MD |
1092 | { |
| 1093 | boolean_t fitit; | |
| 1094 | vm_object_t object; | |
| 85d25bcf MD |
1095 | vm_offset_t eaddr; |
| 1096 | vm_size_t esize; | |
| a6e41612 | 1097 | struct vnode *vp; |
| 349433c9 | 1098 | struct thread *td = curthread; |
| d3313941 | 1099 | struct proc *p; |
| 984263bc | 1100 | int rv = KERN_SUCCESS; |
| 57f7b636 | 1101 | off_t objsize; |
| 984263bc | 1102 | int docow; |
| 984263bc MD |
1103 | |
| 1104 | if (size == 0) | |
| 1105 | return (0); | |
| 1106 | ||
| e54488bb MD |
1107 | objsize = round_page(size); |
| 1108 | if (objsize < size) | |
| 1109 | return (EINVAL); | |
| 1110 | size = objsize; | |
| 984263bc | 1111 | |
| d3313941 MD |
1112 | /* |
| 1113 | * XXX messy code, fixme | |
| 85d25bcf MD |
1114 | * |
| 1115 | * NOTE: Overflow checks require discrete statements or GCC4 | |
| 1116 | * will optimize it out. | |
| d3313941 MD |
1117 | */ |
| 1118 | if ((p = curproc) != NULL && map == &p->p_vmspace->vm_map) { | |
| e54488bb | 1119 | esize = map->size + size; /* workaround gcc4 opt */ |
| 85d25bcf MD |
1120 | if (esize < map->size || |
| 1121 | esize > p->p_rlimit[RLIMIT_VMEM].rlim_cur) { | |
| d3313941 | 1122 | return(ENOMEM); |
| 85d25bcf | 1123 | } |
| 984263bc MD |
1124 | } |
| 1125 | ||
| 1126 | /* | |
| 1127 | * We currently can only deal with page aligned file offsets. | |
| 1128 | * The check is here rather than in the syscall because the | |
| 1129 | * kernel calls this function internally for other mmaping | |
| 1130 | * operations (such as in exec) and non-aligned offsets will | |
| 1131 | * cause pmap inconsistencies...so we want to be sure to | |
| 1132 | * disallow this in all cases. | |
| 85d25bcf MD |
1133 | * |
| 1134 | * NOTE: Overflow checks require discrete statements or GCC4 | |
| 1135 | * will optimize it out. | |
| 984263bc MD |
1136 | */ |
| 1137 | if (foff & PAGE_MASK) | |
| 1138 | return (EINVAL); | |
| 1139 | ||
| c809941b | 1140 | if ((flags & (MAP_FIXED | MAP_TRYFIXED)) == 0) { |
| 984263bc MD |
1141 | fitit = TRUE; |
| 1142 | *addr = round_page(*addr); | |
| 1143 | } else { | |
| 1144 | if (*addr != trunc_page(*addr)) | |
| 1145 | return (EINVAL); | |
| 85d25bcf MD |
1146 | eaddr = *addr + size; |
| 1147 | if (eaddr < *addr) | |
| 1148 | return (EINVAL); | |
| 984263bc | 1149 | fitit = FALSE; |
| c809941b MD |
1150 | if ((flags & MAP_TRYFIXED) == 0) |
| 1151 | vm_map_remove(map, *addr, *addr + size); | |
| 984263bc MD |
1152 | } |
| 1153 | ||
| 1154 | /* | |
| 1155 | * Lookup/allocate object. | |
| 1156 | */ | |
| 1157 | if (flags & MAP_ANON) { | |
| 984263bc MD |
1158 | /* |
| 1159 | * Unnamed anonymous regions always start at 0. | |
| 1160 | */ | |
| 5a648714 MD |
1161 | if (handle) { |
| 1162 | /* | |
| 1163 | * Default memory object | |
| 1164 | */ | |
| 1165 | object = default_pager_alloc(handle, objsize, | |
| 1166 | prot, foff); | |
| 1167 | if (object == NULL) | |
| 1168 | return(ENOMEM); | |
| 1169 | docow = MAP_PREFAULT_PARTIAL; | |
| 1170 | } else { | |
| 1171 | /* | |
| 1172 | * Implicit single instance of a default memory | |
| 1173 | * object, so we don't need a VM object yet. | |
| 1174 | */ | |
| 984263bc | 1175 | foff = 0; |
| 5a648714 MD |
1176 | object = NULL; |
| 1177 | docow = 0; | |
| 1178 | } | |
| a6e41612 | 1179 | vp = NULL; |
| 984263bc | 1180 | } else { |
| a6e41612 | 1181 | vp = (struct vnode *)handle; |
| 984263bc | 1182 | if (vp->v_type == VCHR) { |
| 5a648714 MD |
1183 | /* |
| 1184 | * Device mappings (device size unknown?). | |
| 1185 | * Force them to be shared. | |
| 1186 | */ | |
| 984263bc | 1187 | handle = (void *)(intptr_t)vp->v_rdev; |
| 5a648714 MD |
1188 | object = dev_pager_alloc(handle, objsize, prot, foff); |
| 1189 | if (object == NULL) | |
| 1190 | return(EINVAL); | |
| 1191 | docow = MAP_PREFAULT_PARTIAL; | |
| 1192 | flags &= ~(MAP_PRIVATE|MAP_COPY); | |
| 1193 | flags |= MAP_SHARED; | |
| 984263bc | 1194 | } else { |
| 5a648714 MD |
1195 | /* |
| 1196 | * Regular file mapping (typically). The attribute | |
| 1197 | * check is for the link count test only. Mmapble | |
| 1198 | * vnodes must already have a VM object assigned. | |
| 1199 | */ | |
| f9cae863 | 1200 | struct vattr vat; |
| 984263bc MD |
1201 | int error; |
| 1202 | ||
| 87de5057 | 1203 | error = VOP_GETATTR(vp, &vat); |
| 984263bc MD |
1204 | if (error) |
| 1205 | return (error); | |
| 5a648714 MD |
1206 | docow = MAP_PREFAULT_PARTIAL; |
| 1207 | object = vnode_pager_reference(vp); | |
| 1208 | if (object == NULL && vp->v_type == VREG) { | |
| 1209 | kprintf("Warning: cannot mmap vnode %p, no " | |
| 1210 | "object\n", vp); | |
| 1211 | return(EINVAL); | |
| 1212 | } | |
| 1213 | ||
| 984263bc | 1214 | /* |
| 5a648714 | 1215 | * If it is a regular file without any references |
| 984263bc MD |
1216 | * we do not need to sync it. |
| 1217 | */ | |
| 1218 | if (vp->v_type == VREG && vat.va_nlink == 0) { | |
| 1219 | flags |= MAP_NOSYNC; | |
| 1220 | } | |
| 1221 | } | |
| 1222 | } | |
| 1223 | ||
| 984263bc | 1224 | /* |
| 5a648714 | 1225 | * Deal with the adjusted flags |
| 984263bc | 1226 | */ |
| 984263bc MD |
1227 | if ((flags & (MAP_ANON|MAP_SHARED)) == 0) |
| 1228 | docow |= MAP_COPY_ON_WRITE; | |
| 1229 | if (flags & MAP_NOSYNC) | |
| 1230 | docow |= MAP_DISABLE_SYNCER; | |
| 1231 | if (flags & MAP_NOCORE) | |
| 1232 | docow |= MAP_DISABLE_COREDUMP; | |
| 1233 | ||
| 1234 | #if defined(VM_PROT_READ_IS_EXEC) | |
| 1235 | if (prot & VM_PROT_READ) | |
| 1236 | prot |= VM_PROT_EXECUTE; | |
| 1237 | ||
| 1238 | if (maxprot & VM_PROT_READ) | |
| 1239 | maxprot |= VM_PROT_EXECUTE; | |
| 1240 | #endif | |
| 1241 | ||
| c809941b MD |
1242 | /* |
| 1243 | * This may place the area in its own page directory if (size) is | |
| 1244 | * large enough, otherwise it typically returns its argument. | |
| 1245 | */ | |
| 984263bc MD |
1246 | if (fitit) { |
| 1247 | *addr = pmap_addr_hint(object, *addr, size); | |
| 1248 | } | |
| 1249 | ||
| 568e6804 | 1250 | /* |
| c809941b MD |
1251 | * Stack mappings need special attention. |
| 1252 | * | |
| 1253 | * Mappings that use virtual page tables will default to storing | |
| 1254 | * the page table at offset 0. | |
| 568e6804 | 1255 | */ |
| 1b874851 | 1256 | if (flags & MAP_STACK) { |
| c809941b | 1257 | rv = vm_map_stack(map, *addr, size, flags, |
| 85d25bcf | 1258 | prot, maxprot, docow); |
| 568e6804 | 1259 | } else if (flags & MAP_VPAGETABLE) { |
| 9388fcaa MD |
1260 | rv = vm_map_find(map, object, foff, addr, size, PAGE_SIZE, |
| 1261 | fitit, VM_MAPTYPE_VPAGETABLE, | |
| 1262 | prot, maxprot, docow); | |
| 1b874851 | 1263 | } else { |
| 9388fcaa MD |
1264 | rv = vm_map_find(map, object, foff, addr, size, PAGE_SIZE, |
| 1265 | fitit, VM_MAPTYPE_NORMAL, | |
| 1266 | prot, maxprot, docow); | |
| 1b874851 | 1267 | } |
| 984263bc MD |
1268 | |
| 1269 | if (rv != KERN_SUCCESS) { | |
| 1270 | /* | |
| 1271 | * Lose the object reference. Will destroy the | |
| 1272 | * object if it's an unnamed anonymous mapping | |
| 1273 | * or named anonymous without other references. | |
| 1274 | */ | |
| 1275 | vm_object_deallocate(object); | |
| 1276 | goto out; | |
| 1277 | } | |
| 1278 | ||
| 1279 | /* | |
| 1280 | * Shared memory is also shared with children. | |
| 1281 | */ | |
| 1282 | if (flags & (MAP_SHARED|MAP_INHERIT)) { | |
| 1283 | rv = vm_map_inherit(map, *addr, *addr + size, VM_INHERIT_SHARE); | |
| 1284 | if (rv != KERN_SUCCESS) { | |
| 418ff780 | 1285 | vm_map_remove(map, *addr, *addr + size); |
| 984263bc MD |
1286 | goto out; |
| 1287 | } | |
| 1288 | } | |
| 349433c9 MD |
1289 | |
| 1290 | /* | |
| 1291 | * Set the access time on the vnode | |
| 1292 | */ | |
| a6e41612 MD |
1293 | if (vp != NULL) |
| 1294 | vn_mark_atime(vp, td); | |
| 984263bc MD |
1295 | out: |
| 1296 | switch (rv) { | |
| 1297 | case KERN_SUCCESS: | |
| 1298 | return (0); | |
| 1299 | case KERN_INVALID_ADDRESS: | |
| 1300 | case KERN_NO_SPACE: | |
| 1301 | return (ENOMEM); | |
| 1302 | case KERN_PROTECTION_FAILURE: | |
| 1303 | return (EACCES); | |
| 1304 | default: | |
| 1305 | return (EINVAL); | |
| 1306 | } | |
| 1307 | } |