2 * Copyright (c) 1988 University of Utah.
3 * Copyright (c) 1991, 1993
4 * The Regents of the University of California. All rights reserved.
6 * This code is derived from software contributed to Berkeley by
7 * the Systems Programming Group of the University of Utah Computer
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
40 * @(#)vm_mmap.c 8.4 (Berkeley) 1/12/94
41 * $FreeBSD: src/sys/vm/vm_mmap.c,v 1.108.2.6 2002/07/02 20:06:19 dillon Exp $
42 * $DragonFly: src/sys/vm/vm_mmap.c,v 1.39 2007/04/30 07:18:57 dillon Exp $
46 * Mapped file (mmap) interface to VM
49 #include <sys/param.h>
50 #include <sys/kernel.h>
51 #include <sys/systm.h>
52 #include <sys/sysproto.h>
53 #include <sys/filedesc.h>
54 #include <sys/kern_syscall.h>
56 #include <sys/resource.h>
57 #include <sys/resourcevar.h>
58 #include <sys/vnode.h>
59 #include <sys/fcntl.h>
64 #include <sys/vmmeter.h>
65 #include <sys/sysctl.h>
68 #include <vm/vm_param.h>
71 #include <vm/vm_map.h>
72 #include <vm/vm_object.h>
73 #include <vm/vm_page.h>
74 #include <vm/vm_pager.h>
75 #include <vm/vm_pageout.h>
76 #include <vm/vm_extern.h>
77 #include <vm/vm_page.h>
78 #include <vm/vm_kern.h>
80 #include <sys/file2.h>
81 #include <sys/thread2.h>
83 static int max_proc_mmap;
84 SYSCTL_INT(_vm, OID_AUTO, max_proc_mmap, CTLFLAG_RW, &max_proc_mmap, 0, "");
86 SYSCTL_INT(_vm, OID_AUTO, vkernel_enable, CTLFLAG_RW, &vkernel_enable, 0, "");
89 * Set the maximum number of vm_map_entry structures per process. Roughly
90 * speaking vm_map_entry structures are tiny, so allowing them to eat 1/100
91 * of our KVM malloc space still results in generous limits. We want a
92 * default that is good enough to prevent the kernel running out of resources
93 * if attacked from compromised user account but generous enough such that
94 * multi-threaded processes are not unduly inconvenienced.
97 static void vmmapentry_rsrc_init (void *);
98 SYSINIT(vmmersrc, SI_BOOT1_POST, SI_ORDER_ANY, vmmapentry_rsrc_init, NULL)
101 vmmapentry_rsrc_init(void *dummy)
103 max_proc_mmap = KvaSize / sizeof(struct vm_map_entry);
104 max_proc_mmap /= 100;
109 sys_sbrk(struct sbrk_args *uap)
111 /* Not yet implemented */
116 * sstk_args(int incr)
120 sys_sstk(struct sstk_args *uap)
122 /* Not yet implemented */
127 * mmap_args(void *addr, size_t len, int prot, int flags, int fd,
128 * long pad, off_t pos)
130 * Memory Map (mmap) system call. Note that the file offset
131 * and address are allowed to be NOT page aligned, though if
132 * the MAP_FIXED flag it set, both must have the same remainder
133 * modulo the PAGE_SIZE (POSIX 1003.1b). If the address is not
134 * page-aligned, the actual mapping starts at trunc_page(addr)
135 * and the return value is adjusted up by the page offset.
137 * Generally speaking, only character devices which are themselves
138 * memory-based, such as a video framebuffer, can be mmap'd. Otherwise
139 * there would be no cache coherency between a descriptor and a VM mapping
140 * both to the same character device.
142 * Block devices can be mmap'd no matter what they represent. Cache coherency
143 * is maintained as long as you do not write directly to the underlying
148 kern_mmap(struct vmspace *vms, caddr_t uaddr, size_t ulen,
149 int uprot, int uflags, int fd, off_t upos, void **res)
151 struct thread *td = curthread;
152 struct proc *p = td->td_proc;
153 struct file *fp = NULL;
156 vm_size_t size, pageoff;
157 vm_prot_t prot, maxprot;
160 int disablexworkaround;
166 addr = (vm_offset_t) uaddr;
168 prot = uprot & VM_PROT_ALL;
172 /* make sure mapping fits into numeric range etc */
173 if ((ssize_t) ulen < 0 || ((flags & MAP_ANON) && fd != -1))
176 if (flags & MAP_STACK) {
178 ((prot & (PROT_READ | PROT_WRITE)) != (PROT_READ | PROT_WRITE)))
185 * Virtual page tables cannot be used with MAP_STACK. Apart from
186 * it not making any sense, the aux union is used by both
189 * Because the virtual page table is stored in the backing object
190 * and might be updated by the kernel, the mapping must be R+W.
192 if (flags & MAP_VPAGETABLE) {
193 if (vkernel_enable == 0)
195 if (flags & MAP_STACK)
197 if ((prot & (PROT_READ|PROT_WRITE)) != (PROT_READ|PROT_WRITE))
202 * Align the file position to a page boundary,
203 * and save its page offset component.
205 pageoff = (pos & PAGE_MASK);
208 /* Adjust size for rounding (on both ends). */
209 size += pageoff; /* low end... */
210 size = (vm_size_t) round_page(size); /* hi end */
213 * Check for illegal addresses. Watch out for address wrap... Note
214 * that VM_*_ADDRESS are not constants due to casts (argh).
216 if (flags & MAP_FIXED) {
218 * The specified address must have the same remainder
219 * as the file offset taken modulo PAGE_SIZE, so it
220 * should be aligned after adjustment by pageoff.
223 if (addr & PAGE_MASK)
225 /* Address range must be all in user VM space. */
226 if (VM_MAX_USER_ADDRESS > 0 && addr + size > VM_MAX_USER_ADDRESS)
228 if (VM_MIN_USER_ADDRESS > 0 && addr < VM_MIN_USER_ADDRESS)
230 if (addr + size < addr)
232 } else if ((flags & MAP_TRYFIXED) == 0) {
234 * XXX for non-fixed mappings where no hint is provided or
235 * the hint would fall in the potential heap space,
236 * place it after the end of the largest possible heap.
238 * There should really be a pmap call to determine a reasonable
242 (addr >= round_page((vm_offset_t)vms->vm_taddr) &&
243 addr < round_page((vm_offset_t)vms->vm_daddr + maxdsiz)))
244 addr = round_page((vm_offset_t)vms->vm_daddr + maxdsiz);
247 if (flags & MAP_ANON) {
249 * Mapping blank space is trivial.
252 maxprot = VM_PROT_ALL;
256 * Mapping file, get fp for validation. Obtain vnode and make
257 * sure it is of appropriate type.
259 fp = holdfp(p->p_fd, fd, -1);
262 if (fp->f_type != DTYPE_VNODE) {
267 * POSIX shared-memory objects are defined to have
268 * kernel persistence, and are not defined to support
269 * read(2)/write(2) -- or even open(2). Thus, we can
270 * use MAP_ASYNC to trade on-disk coherence for speed.
271 * The shm_open(3) library routine turns on the FPOSIXSHM
272 * flag to request this behavior.
274 if (fp->f_flag & FPOSIXSHM)
276 vp = (struct vnode *) fp->f_data;
279 * Validate the vnode for the operation.
284 * Get the proper underlying object
286 if ((obj = vp->v_object) == NULL) {
290 KKASSERT((struct vnode *)obj->handle == vp);
294 * Make sure a device has not been revoked.
295 * Mappability is handled by the device layer.
297 if (vp->v_rdev == NULL) {
304 * Nothing else is mappable.
311 * XXX hack to handle use of /dev/zero to map anon memory (ala
314 if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) {
316 maxprot = VM_PROT_ALL;
321 * cdevs does not provide private mappings of any kind.
324 * However, for XIG X server to continue to work,
325 * we should allow the superuser to do it anyway.
326 * We only allow it at securelevel < 1.
327 * (Because the XIG X server writes directly to video
328 * memory via /dev/mem, it should never work at any
330 * XXX this will have to go
332 if (securelevel >= 1)
333 disablexworkaround = 1;
335 disablexworkaround = suser(td);
336 if (vp->v_type == VCHR && disablexworkaround &&
337 (flags & (MAP_PRIVATE|MAP_COPY))) {
342 * Ensure that file and memory protections are
343 * compatible. Note that we only worry about
344 * writability if mapping is shared; in this case,
345 * current and max prot are dictated by the open file.
346 * XXX use the vnode instead? Problem is: what
347 * credentials do we use for determination? What if
348 * proc does a setuid?
350 maxprot = VM_PROT_EXECUTE; /* ??? */
351 if (fp->f_flag & FREAD) {
352 maxprot |= VM_PROT_READ;
353 } else if (prot & PROT_READ) {
358 * If we are sharing potential changes (either via
359 * MAP_SHARED or via the implicit sharing of character
360 * device mappings), and we are trying to get write
361 * permission although we opened it without asking
362 * for it, bail out. Check for superuser, only if
363 * we're at securelevel < 1, to allow the XIG X server
364 * to continue to work.
367 if ((flags & MAP_SHARED) != 0 ||
368 (vp->v_type == VCHR && disablexworkaround)) {
369 if ((fp->f_flag & FWRITE) != 0) {
371 if ((error = VOP_GETATTR(vp, &va))) {
375 (IMMUTABLE|APPEND)) == 0) {
376 maxprot |= VM_PROT_WRITE;
377 } else if (prot & PROT_WRITE) {
381 } else if ((prot & PROT_WRITE) != 0) {
386 maxprot |= VM_PROT_WRITE;
393 * Do not allow more then a certain number of vm_map_entry structures
394 * per process. Scale with the number of rforks sharing the map
395 * to make the limit reasonable for threads.
398 vms->vm_map.nentries >= max_proc_mmap * vms->vm_sysref.refcnt) {
403 error = vm_mmap(&vms->vm_map, &addr, size, prot, maxprot,
406 *res = (void *)(addr + pageoff);
414 sys_mmap(struct mmap_args *uap)
418 error = kern_mmap(curproc->p_vmspace, uap->addr, uap->len,
419 uap->prot, uap->flags,
420 uap->fd, uap->pos, &uap->sysmsg_resultp);
426 * msync_args(void *addr, int len, int flags)
429 sys_msync(struct msync_args *uap)
431 struct proc *p = curproc;
433 vm_size_t size, pageoff;
438 addr = (vm_offset_t) uap->addr;
442 pageoff = (addr & PAGE_MASK);
445 size = (vm_size_t) round_page(size);
446 if (addr + size < addr)
449 if ((flags & (MS_ASYNC|MS_INVALIDATE)) == (MS_ASYNC|MS_INVALIDATE))
452 map = &p->p_vmspace->vm_map;
455 * XXX Gak! If size is zero we are supposed to sync "all modified
456 * pages with the region containing addr". Unfortunately, we don't
457 * really keep track of individual mmaps so we approximate by flushing
458 * the range of the map entry containing addr. This can be incorrect
459 * if the region splits or is coalesced with a neighbor.
462 vm_map_entry_t entry;
464 vm_map_lock_read(map);
465 rv = vm_map_lookup_entry(map, addr, &entry);
466 vm_map_unlock_read(map);
470 size = entry->end - entry->start;
474 * Clean the pages and interpret the return value.
476 rv = vm_map_clean(map, addr, addr + size, (flags & MS_ASYNC) == 0,
477 (flags & MS_INVALIDATE) != 0);
482 case KERN_INVALID_ADDRESS:
483 return (EINVAL); /* Sun returns ENOMEM? */
494 * munmap_args(void *addr, size_t len)
497 sys_munmap(struct munmap_args *uap)
499 struct proc *p = curproc;
501 vm_size_t size, pageoff;
504 addr = (vm_offset_t) uap->addr;
507 pageoff = (addr & PAGE_MASK);
510 size = (vm_size_t) round_page(size);
511 if (addr + size < addr)
518 * Check for illegal addresses. Watch out for address wrap... Note
519 * that VM_*_ADDRESS are not constants due to casts (argh).
521 if (VM_MAX_USER_ADDRESS > 0 && addr + size > VM_MAX_USER_ADDRESS)
523 if (VM_MIN_USER_ADDRESS > 0 && addr < VM_MIN_USER_ADDRESS)
525 map = &p->p_vmspace->vm_map;
527 * Make sure entire range is allocated.
529 if (!vm_map_check_protection(map, addr, addr + size, VM_PROT_NONE))
531 /* returns nothing but KERN_SUCCESS anyway */
532 vm_map_remove(map, addr, addr + size);
537 * mprotect_args(const void *addr, size_t len, int prot)
540 sys_mprotect(struct mprotect_args *uap)
542 struct proc *p = curproc;
544 vm_size_t size, pageoff;
547 addr = (vm_offset_t) uap->addr;
549 prot = uap->prot & VM_PROT_ALL;
550 #if defined(VM_PROT_READ_IS_EXEC)
551 if (prot & VM_PROT_READ)
552 prot |= VM_PROT_EXECUTE;
555 pageoff = (addr & PAGE_MASK);
558 size = (vm_size_t) round_page(size);
559 if (addr + size < addr)
562 switch (vm_map_protect(&p->p_vmspace->vm_map, addr, addr + size, prot,
566 case KERN_PROTECTION_FAILURE:
573 * minherit_args(void *addr, size_t len, int inherit)
576 sys_minherit(struct minherit_args *uap)
578 struct proc *p = curproc;
580 vm_size_t size, pageoff;
581 vm_inherit_t inherit;
583 addr = (vm_offset_t)uap->addr;
585 inherit = uap->inherit;
587 pageoff = (addr & PAGE_MASK);
590 size = (vm_size_t) round_page(size);
591 if (addr + size < addr)
594 switch (vm_map_inherit(&p->p_vmspace->vm_map, addr, addr+size,
598 case KERN_PROTECTION_FAILURE:
605 * madvise_args(void *addr, size_t len, int behav)
609 sys_madvise(struct madvise_args *uap)
611 struct proc *p = curproc;
612 vm_offset_t start, end;
615 * Check for illegal behavior
617 if (uap->behav < 0 || uap->behav >= MADV_CONTROL_END)
620 * Check for illegal addresses. Watch out for address wrap... Note
621 * that VM_*_ADDRESS are not constants due to casts (argh).
623 if (VM_MAX_USER_ADDRESS > 0 &&
624 ((vm_offset_t) uap->addr + uap->len) > VM_MAX_USER_ADDRESS)
626 if (VM_MIN_USER_ADDRESS > 0 && uap->addr < VM_MIN_USER_ADDRESS)
628 if (((vm_offset_t) uap->addr + uap->len) < (vm_offset_t) uap->addr)
632 * Since this routine is only advisory, we default to conservative
635 start = trunc_page((vm_offset_t) uap->addr);
636 end = round_page((vm_offset_t) uap->addr + uap->len);
638 return (vm_map_madvise(&p->p_vmspace->vm_map, start, end,
643 * mcontrol_args(void *addr, size_t len, int behav, off_t value)
647 sys_mcontrol(struct mcontrol_args *uap)
649 struct proc *p = curproc;
650 vm_offset_t start, end;
653 * Check for illegal behavior
655 if (uap->behav < 0 || uap->behav > MADV_CONTROL_END)
658 * Check for illegal addresses. Watch out for address wrap... Note
659 * that VM_*_ADDRESS are not constants due to casts (argh).
661 if (VM_MAX_USER_ADDRESS > 0 &&
662 ((vm_offset_t) uap->addr + uap->len) > VM_MAX_USER_ADDRESS)
664 if (VM_MIN_USER_ADDRESS > 0 && uap->addr < VM_MIN_USER_ADDRESS)
666 if (((vm_offset_t) uap->addr + uap->len) < (vm_offset_t) uap->addr)
670 * Since this routine is only advisory, we default to conservative
673 start = trunc_page((vm_offset_t) uap->addr);
674 end = round_page((vm_offset_t) uap->addr + uap->len);
676 return (vm_map_madvise(&p->p_vmspace->vm_map, start, end,
677 uap->behav, uap->value));
682 * mincore_args(const void *addr, size_t len, char *vec)
686 sys_mincore(struct mincore_args *uap)
688 struct proc *p = curproc;
689 vm_offset_t addr, first_addr;
690 vm_offset_t end, cend;
695 int vecindex, lastvecindex;
696 vm_map_entry_t current;
697 vm_map_entry_t entry;
699 unsigned int timestamp;
702 * Make sure that the addresses presented are valid for user
705 first_addr = addr = trunc_page((vm_offset_t) uap->addr);
706 end = addr + (vm_size_t)round_page(uap->len);
707 if (VM_MAX_USER_ADDRESS > 0 && end > VM_MAX_USER_ADDRESS)
713 * Address of byte vector
717 map = &p->p_vmspace->vm_map;
718 pmap = vmspace_pmap(p->p_vmspace);
720 vm_map_lock_read(map);
722 timestamp = map->timestamp;
724 if (!vm_map_lookup_entry(map, addr, &entry))
728 * Do this on a map entry basis so that if the pages are not
729 * in the current processes address space, we can easily look
730 * up the pages elsewhere.
734 (current != &map->header) && (current->start < end);
735 current = current->next) {
738 * ignore submaps (for now) or null objects
740 if (current->maptype != VM_MAPTYPE_NORMAL &&
741 current->maptype != VM_MAPTYPE_VPAGETABLE) {
744 if (current->object.vm_object == NULL)
748 * limit this scan to the current map entry and the
749 * limits for the mincore call
751 if (addr < current->start)
752 addr = current->start;
758 * scan this entry one page at a time
760 while (addr < cend) {
762 * Check pmap first, it is likely faster, also
763 * it can provide info as to whether we are the
764 * one referencing or modifying the page.
766 * If we have to check the VM object, only mess
767 * around with normal maps. Do not mess around
768 * with virtual page tables (XXX).
770 mincoreinfo = pmap_mincore(pmap, addr);
771 if (mincoreinfo == 0 &&
772 current->maptype == VM_MAPTYPE_NORMAL) {
778 * calculate the page index into the object
780 offset = current->offset + (addr - current->start);
781 pindex = OFF_TO_IDX(offset);
784 * if the page is resident, then gather
785 * information about it. spl protection is
786 * required to maintain the object
787 * association. And XXX what if the page is
788 * busy? What's the deal with that?
791 m = vm_page_lookup(current->object.vm_object,
794 mincoreinfo = MINCORE_INCORE;
797 mincoreinfo |= MINCORE_MODIFIED_OTHER;
798 if ((m->flags & PG_REFERENCED) ||
799 pmap_ts_referenced(m)) {
800 vm_page_flag_set(m, PG_REFERENCED);
801 mincoreinfo |= MINCORE_REFERENCED_OTHER;
808 * subyte may page fault. In case it needs to modify
809 * the map, we release the lock.
811 vm_map_unlock_read(map);
814 * calculate index into user supplied byte vector
816 vecindex = OFF_TO_IDX(addr - first_addr);
819 * If we have skipped map entries, we need to make sure that
820 * the byte vector is zeroed for those skipped entries.
822 while((lastvecindex + 1) < vecindex) {
823 error = subyte( vec + lastvecindex, 0);
831 * Pass the page information to the user
833 error = subyte( vec + vecindex, mincoreinfo);
839 * If the map has changed, due to the subyte, the previous
840 * output may be invalid.
842 vm_map_lock_read(map);
843 if (timestamp != map->timestamp)
846 lastvecindex = vecindex;
852 * subyte may page fault. In case it needs to modify
853 * the map, we release the lock.
855 vm_map_unlock_read(map);
858 * Zero the last entries in the byte vector.
860 vecindex = OFF_TO_IDX(end - first_addr);
861 while((lastvecindex + 1) < vecindex) {
862 error = subyte( vec + lastvecindex, 0);
870 * If the map has changed, due to the subyte, the previous
871 * output may be invalid.
873 vm_map_lock_read(map);
874 if (timestamp != map->timestamp)
876 vm_map_unlock_read(map);
882 * mlock_args(const void *addr, size_t len)
885 sys_mlock(struct mlock_args *uap)
888 vm_size_t size, pageoff;
890 struct proc *p = curproc;
892 addr = (vm_offset_t) uap->addr;
895 pageoff = (addr & PAGE_MASK);
898 size = (vm_size_t) round_page(size);
900 /* disable wrap around */
901 if (addr + size < addr)
904 if (atop(size) + vmstats.v_wire_count > vm_page_max_wired)
907 #ifdef pmap_wired_count
908 if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) >
909 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur)
912 error = suser_cred(p->p_ucred, 0);
917 error = vm_map_unwire(&p->p_vmspace->vm_map, addr, addr + size, FALSE);
918 return (error == KERN_SUCCESS ? 0 : ENOMEM);
922 * mlockall_args(int how)
925 sys_mlockall(struct mlockall_args *uap)
931 * munlockall_args(void)
934 sys_munlockall(struct munlockall_args *uap)
940 * munlock_args(const void *addr, size_t len)
943 sys_munlock(struct munlock_args *uap)
945 struct thread *td = curthread;
946 struct proc *p = td->td_proc;
948 vm_size_t size, pageoff;
951 addr = (vm_offset_t) uap->addr;
954 pageoff = (addr & PAGE_MASK);
957 size = (vm_size_t) round_page(size);
959 /* disable wrap around */
960 if (addr + size < addr)
963 #ifndef pmap_wired_count
969 error = vm_map_unwire(&p->p_vmspace->vm_map, addr, addr + size, TRUE);
970 return (error == KERN_SUCCESS ? 0 : ENOMEM);
974 * Internal version of mmap.
975 * Currently used by mmap, exec, and sys5 shared memory.
976 * Handle is either a vnode pointer or NULL for MAP_ANON.
979 vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot,
980 vm_prot_t maxprot, int flags,
986 struct vnode *vp = NULL;
989 int rv = KERN_SUCCESS;
996 objsize = size = round_page(size);
999 * XXX messy code, fixme
1001 if ((p = curproc) != NULL && map == &p->p_vmspace->vm_map) {
1002 if (map->size + size > p->p_rlimit[RLIMIT_VMEM].rlim_cur)
1007 * We currently can only deal with page aligned file offsets.
1008 * The check is here rather than in the syscall because the
1009 * kernel calls this function internally for other mmaping
1010 * operations (such as in exec) and non-aligned offsets will
1011 * cause pmap inconsistencies...so we want to be sure to
1012 * disallow this in all cases.
1014 if (foff & PAGE_MASK)
1017 if ((flags & MAP_FIXED) == 0) {
1019 *addr = round_page(*addr);
1021 if (*addr != trunc_page(*addr))
1024 vm_map_remove(map, *addr, *addr + size);
1028 * Lookup/allocate object.
1030 if (flags & MAP_ANON) {
1031 type = OBJT_DEFAULT;
1033 * Unnamed anonymous regions always start at 0.
1038 vp = (struct vnode *) handle;
1039 if (vp->v_type == VCHR) {
1041 handle = (void *)(intptr_t)vp->v_rdev;
1043 struct vattr vat, tsvat;
1046 error = VOP_GETATTR(vp, &vat);
1050 /* Update access time */
1052 vfs_timestamp(&tsvat.va_atime);
1053 VOP_SETATTR(vp, &tsvat, curproc != NULL ? curproc->p_ucred : NULL);
1055 objsize = vat.va_size;
1058 * if it is a regular file without any references
1059 * we do not need to sync it.
1061 if (vp->v_type == VREG && vat.va_nlink == 0) {
1062 flags |= MAP_NOSYNC;
1067 if (handle == NULL) {
1071 object = vm_pager_allocate(type, handle, objsize, prot, foff);
1073 return (type == OBJT_DEVICE ? EINVAL : ENOMEM);
1074 docow = MAP_PREFAULT_PARTIAL;
1078 * Force device mappings to be shared.
1080 if (type == OBJT_DEVICE || type == OBJT_PHYS) {
1081 flags &= ~(MAP_PRIVATE|MAP_COPY);
1082 flags |= MAP_SHARED;
1085 if ((flags & (MAP_ANON|MAP_SHARED)) == 0)
1086 docow |= MAP_COPY_ON_WRITE;
1087 if (flags & MAP_NOSYNC)
1088 docow |= MAP_DISABLE_SYNCER;
1089 if (flags & MAP_NOCORE)
1090 docow |= MAP_DISABLE_COREDUMP;
1092 #if defined(VM_PROT_READ_IS_EXEC)
1093 if (prot & VM_PROT_READ)
1094 prot |= VM_PROT_EXECUTE;
1096 if (maxprot & VM_PROT_READ)
1097 maxprot |= VM_PROT_EXECUTE;
1101 *addr = pmap_addr_hint(object, *addr, size);
1105 * Stack mappings need special attention. Mappings that use virtual
1106 * page tables will default to storing the page table at offset 0.
1108 if (flags & MAP_STACK) {
1109 rv = vm_map_stack (map, *addr, size, prot, maxprot, docow);
1110 } else if (flags & MAP_VPAGETABLE) {
1111 rv = vm_map_find(map, object, foff, addr, size, fitit,
1112 VM_MAPTYPE_VPAGETABLE, prot, maxprot, docow);
1114 rv = vm_map_find(map, object, foff, addr, size, fitit,
1115 VM_MAPTYPE_NORMAL, prot, maxprot, docow);
1118 if (rv != KERN_SUCCESS) {
1120 * Lose the object reference. Will destroy the
1121 * object if it's an unnamed anonymous mapping
1122 * or named anonymous without other references.
1124 vm_object_deallocate(object);
1129 * Shared memory is also shared with children.
1131 if (flags & (MAP_SHARED|MAP_INHERIT)) {
1132 rv = vm_map_inherit(map, *addr, *addr + size, VM_INHERIT_SHARE);
1133 if (rv != KERN_SUCCESS) {
1134 vm_map_remove(map, *addr, *addr + size);
1142 case KERN_INVALID_ADDRESS:
1145 case KERN_PROTECTION_FAILURE: