gitweb.dragonflybsd.org Git - dragonfly.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 1991, 1993
	3	* The Regents of the University of California. All rights reserved.
	4	*
	5	* This code is derived from software contributed to Berkeley by
	6	* The Mach Operating System project at Carnegie-Mellon University.
	7	*
	8	* Redistribution and use in source and binary forms, with or without
	9	* modification, are permitted provided that the following conditions
	10	* are met:
	11	* 1. Redistributions of source code must retain the above copyright
	12	* notice, this list of conditions and the following disclaimer.
	13	* 2. Redistributions in binary form must reproduce the above copyright
	14	* notice, this list of conditions and the following disclaimer in the
	15	* documentation and/or other materials provided with the distribution.
	16	* 3. All advertising materials mentioning features or use of this software
	17	* must display the following acknowledgement:
	18	* This product includes software developed by the University of
	19	* California, Berkeley and its contributors.
	20	* 4. Neither the name of the University nor the names of its contributors
	21	* may be used to endorse or promote products derived from this software
	22	* without specific prior written permission.
	23	*
	24	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	25	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	26	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	27	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	28	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	29	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	30	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	31	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	32	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	33	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	34	* SUCH DAMAGE.
	35	*
	36	* from: @(#)vm_map.c 8.3 (Berkeley) 1/12/94
	37	*
	38	*
	39	* Copyright (c) 1987, 1990 Carnegie-Mellon University.
	40	* All rights reserved.
	41	*
	42	* Authors: Avadis Tevanian, Jr., Michael Wayne Young
	43	*
	44	* Permission to use, copy, modify and distribute this software and
	45	* its documentation is hereby granted, provided that both the copyright
	46	* notice and this permission notice appear in all copies of the
	47	* software, derivative works or modified versions, and any portions
	48	* thereof, and that both notices appear in supporting documentation.
	49	*
	50	* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
	51	* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
	52	* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
	53	*
	54	* Carnegie Mellon requests users of this software to return to
	55	*
	56	* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
	57	* School of Computer Science
	58	* Carnegie Mellon University
	59	* Pittsburgh PA 15213-3890
	60	*
	61	* any improvements or extensions that they make and grant Carnegie the
	62	* rights to redistribute these changes.
	63	*
	64	* $FreeBSD: src/sys/vm/vm_map.c,v 1.187.2.19 2003/05/27 00:47:02 alc Exp $
	65	* $DragonFly: src/sys/vm/vm_map.c,v 1.22 2004/03/01 06:33:24 dillon Exp $
	66	*/
	67
	68	/*
	69	* Virtual memory mapping module.
	70	*/
	71
	72	#include <sys/param.h>
	73	#include <sys/systm.h>
	74	#include <sys/proc.h>
	75	#include <sys/lock.h>
	76	#include <sys/vmmeter.h>
	77	#include <sys/mman.h>
	78	#include <sys/vnode.h>
	79	#include <sys/resourcevar.h>
	80	#include <sys/shm.h>
	81
	82	#include <vm/vm.h>
	83	#include <vm/vm_param.h>
	84	#include <vm/pmap.h>
	85	#include <vm/vm_map.h>
	86	#include <vm/vm_page.h>
	87	#include <vm/vm_object.h>
	88	#include <vm/vm_pager.h>
	89	#include <vm/vm_kern.h>
	90	#include <vm/vm_extern.h>
	91	#include <vm/swap_pager.h>
	92	#include <vm/vm_zone.h>
	93
	94	#include <sys/thread2.h>
	95
	96	/*
	97	* Virtual memory maps provide for the mapping, protection,
	98	* and sharing of virtual memory objects. In addition,
	99	* this module provides for an efficient virtual copy of
	100	* memory from one map to another.
	101	*
	102	* Synchronization is required prior to most operations.
	103	*
	104	* Maps consist of an ordered doubly-linked list of simple
	105	* entries; a single hint is used to speed up lookups.
	106	*
	107	* Since portions of maps are specified by start/end addresses,
	108	* which may not align with existing map entries, all
	109	* routines merely "clip" entries to these start/end values.
	110	* [That is, an entry is split into two, bordering at a
	111	* start or end value.] Note that these clippings may not
	112	* always be necessary (as the two resulting entries are then
	113	* not changed); however, the clipping is done for convenience.
	114	*
	115	* As mentioned above, virtual copy operations are performed
	116	* by copying VM object references from one map to
	117	* another, and then marking both regions as copy-on-write.
	118	*/
	119
	120	/*
	121	* vm_map_startup:
	122	*
	123	* Initialize the vm_map module. Must be called before
	124	* any other vm_map routines.
	125	*
	126	* Map and entry structures are allocated from the general
	127	* purpose memory pool with some exceptions:
	128	*
	129	* - The kernel map and kmem submap are allocated statically.
	130	* - Kernel map entries are allocated out of a static pool.
	131	*
	132	* These restrictions are necessary since malloc() uses the
	133	* maps and requires map entries.
	134	*/
	135
	136	static struct vm_zone mapentzone_store, mapzone_store;
	137	static vm_zone_t mapentzone, mapzone, vmspace_zone;
	138	static struct vm_object mapentobj, mapobj;
	139
	140	static struct vm_map_entry map_entry_init[MAX_MAPENT];
	141	static struct vm_map map_init[MAX_KMAP];
	142
	143	static vm_map_entry_t vm_map_entry_create(vm_map_t map, int *);
	144	static void vm_map_entry_dispose (vm_map_t map, vm_map_entry_t entry, int *);
	145	static void _vm_map_clip_end (vm_map_t, vm_map_entry_t, vm_offset_t, int *);
	146	static void _vm_map_clip_start (vm_map_t, vm_map_entry_t, vm_offset_t, int *);
	147	static void vm_map_entry_delete (vm_map_t, vm_map_entry_t, int *);
	148	static void vm_map_entry_unwire (vm_map_t, vm_map_entry_t);
	149	static void vm_map_copy_entry (vm_map_t, vm_map_t, vm_map_entry_t,
	150	vm_map_entry_t);
	151	static void vm_map_split (vm_map_entry_t);
	152	static void vm_map_unclip_range (vm_map_t map, vm_map_entry_t start_entry, vm_offset_t start, vm_offset_t end, int *count, int flags);
	153
	154	void
	155	vm_map_startup()
	156	{
	157	mapzone = &mapzone_store;
	158	zbootinit(mapzone, "MAP", sizeof (struct vm_map),
	159	map_init, MAX_KMAP);
	160	mapentzone = &mapentzone_store;
	161	zbootinit(mapentzone, "MAP ENTRY", sizeof (struct vm_map_entry),
	162	map_entry_init, MAX_MAPENT);
	163	}
	164
	165	/*
	166	* Allocate a vmspace structure, including a vm_map and pmap,
	167	* and initialize those structures. The refcnt is set to 1.
	168	* The remaining fields must be initialized by the caller.
	169	*/
	170	struct vmspace *
	171	vmspace_alloc(min, max)
	172	vm_offset_t min, max;
	173	{
	174	struct vmspace *vm;
	175
	176	vm = zalloc(vmspace_zone);
	177	vm_map_init(&vm->vm_map, min, max);
	178	pmap_pinit(vmspace_pmap(vm));
	179	vm->vm_map.pmap = vmspace_pmap(vm); /* XXX */
	180	vm->vm_refcnt = 1;
	181	vm->vm_shm = NULL;
	182	vm->vm_exitingcnt = 0;
	183	return (vm);
	184	}
	185
	186	void
	187	vm_init2(void)
	188	{
	189	zinitna(mapentzone, &mapentobj, NULL, 0, 0, ZONE_USE_RESERVE, 1);
	190	zinitna(mapzone, &mapobj, NULL, 0, 0, 0, 1);
	191	vmspace_zone = zinit("VMSPACE", sizeof (struct vmspace), 0, 0, 3);
	192	pmap_init2();
	193	vm_object_init2();
	194	}
	195
	196	static __inline void
	197	vmspace_dofree(struct vmspace *vm)
	198	{
	199	int count;
	200
	201	/*
	202	* Make sure any SysV shm is freed, it might not have in
	203	* exit1()
	204	*/
	205	shmexit(vm);
	206
	207	KKASSERT(vm->vm_upcalls == NULL);
	208
	209	/*
	210	* Lock the map, to wait out all other references to it.
	211	* Delete all of the mappings and pages they hold, then call
	212	* the pmap module to reclaim anything left.
	213	*/
	214	count = vm_map_entry_reserve(MAP_RESERVE_COUNT);
	215	vm_map_lock(&vm->vm_map);
	216	vm_map_delete(&vm->vm_map, vm->vm_map.min_offset,
	217	vm->vm_map.max_offset, &count);
	218	vm_map_unlock(&vm->vm_map);
	219	vm_map_entry_release(count);
	220
	221	pmap_release(vmspace_pmap(vm));
	222	zfree(vmspace_zone, vm);
	223	}
	224
	225	void
	226	vmspace_free(struct vmspace *vm)
	227	{
	228	if (vm->vm_refcnt == 0)
	229	panic("vmspace_free: attempt to free already freed vmspace");
	230
	231	if (--vm->vm_refcnt == 0 && vm->vm_exitingcnt == 0)
	232	vmspace_dofree(vm);
	233	}
	234
	235	void
	236	vmspace_exitfree(struct proc *p)
	237	{
	238	struct vmspace *vm;
	239
	240	vm = p->p_vmspace;
	241	p->p_vmspace = NULL;
	242
	243	/*
	244	* cleanup by parent process wait()ing on exiting child. vm_refcnt
	245	* may not be 0 (e.g. fork() and child exits without exec()ing).
	246	* exitingcnt may increment above 0 and drop back down to zero
	247	* several times while vm_refcnt is held non-zero. vm_refcnt
	248	* may also increment above 0 and drop back down to zero several
	249	* times while vm_exitingcnt is held non-zero.
	250	*
	251	* The last wait on the exiting child's vmspace will clean up
	252	* the remainder of the vmspace.
	253	*/
	254	if (--vm->vm_exitingcnt == 0 && vm->vm_refcnt == 0)
	255	vmspace_dofree(vm);
	256	}
	257
	258	/*
	259	* vmspace_swap_count() - count the approximate swap useage in pages for a
	260	* vmspace.
	261	*
	262	* Swap useage is determined by taking the proportional swap used by
	263	* VM objects backing the VM map. To make up for fractional losses,
	264	* if the VM object has any swap use at all the associated map entries
	265	* count for at least 1 swap page.
	266	*/
	267	int
	268	vmspace_swap_count(struct vmspace *vmspace)
	269	{
	270	vm_map_t map = &vmspace->vm_map;
	271	vm_map_entry_t cur;
	272	int count = 0;
	273
	274	for (cur = map->header.next; cur != &map->header; cur = cur->next) {
	275	vm_object_t object;
	276
	277	if ((cur->eflags & MAP_ENTRY_IS_SUB_MAP) == 0 &&
	278	(object = cur->object.vm_object) != NULL &&
	279	object->type == OBJT_SWAP
	280	) {
	281	int n = (cur->end - cur->start) / PAGE_SIZE;
	282
	283	if (object->un_pager.swp.swp_bcount) {
	284	count += object->un_pager.swp.swp_bcount *
	285	SWAP_META_PAGES * n / object->size + 1;
	286	}
	287	}
	288	}
	289	return(count);
	290	}
	291
	292
	293	/*
	294	* vm_map_create:
	295	*
	296	* Creates and returns a new empty VM map with
	297	* the given physical map structure, and having
	298	* the given lower and upper address bounds.
	299	*/
	300	vm_map_t
	301	vm_map_create(pmap_t pmap, vm_offset_t min, vm_offset_t max)
	302	{
	303	vm_map_t result;
	304
	305	result = zalloc(mapzone);
	306	vm_map_init(result, min, max);
	307	result->pmap = pmap;
	308	return (result);
	309	}
	310
	311	/*
	312	* Initialize an existing vm_map structure
	313	* such as that in the vmspace structure.
	314	* The pmap is set elsewhere.
	315	*/
	316	void
	317	vm_map_init(struct vm_map *map, vm_offset_t min, vm_offset_t max)
	318	{
	319	map->header.next = map->header.prev = &map->header;
	320	map->nentries = 0;
	321	map->size = 0;
	322	map->system_map = 0;
	323	map->infork = 0;
	324	map->min_offset = min;
	325	map->max_offset = max;
	326	map->first_free = &map->header;
	327	map->hint = &map->header;
	328	map->timestamp = 0;
	329	lockinit(&map->lock, 0, "thrd_sleep", 0, LK_NOPAUSE);
	330	}
	331
	332	/*
	333	* vm_map_entry_cpu_init:
	334	*
	335	* Set an initial negative count so the first attempt to reserve
	336	* space preloads a bunch of vm_map_entry's for this cpu. This
	337	* routine is called in early boot so we cannot just call
	338	* vm_map_entry_reserve().
	339	*
	340	* May be called for a gd other then mycpu.
	341	*/
	342	void
	343	vm_map_entry_reserve_cpu_init(globaldata_t gd)
	344	{
	345	gd->gd_vme_avail -= MAP_RESERVE_COUNT * 2;
	346	}
	347
	348	/*
	349	* vm_map_entry_reserve:
	350	*
	351	* Reserves vm_map_entry structures so code later on can manipulate
	352	* map_entry structures within a locked map without blocking trying
	353	* to allocate a new vm_map_entry.
	354	*/
	355	int
	356	vm_map_entry_reserve(int count)
	357	{
	358	struct globaldata *gd = mycpu;
	359	vm_map_entry_t entry;
	360
	361	crit_enter();
	362	gd->gd_vme_avail -= count;
	363
	364	/*
	365	* Make sure we have enough structures in gd_vme_base to handle
	366	* the reservation request.
	367	*/
	368	while (gd->gd_vme_avail < 0) {
	369	entry = zalloc(mapentzone);
	370	entry->next = gd->gd_vme_base;
	371	gd->gd_vme_base = entry;
	372	++gd->gd_vme_avail;
	373	}
	374	crit_exit();
	375	return(count);
	376	}
	377
	378	/*
	379	* vm_map_entry_release:
	380	*
	381	* Releases previously reserved vm_map_entry structures that were not
	382	* used. If we have too much junk in our per-cpu cache clean some of
	383	* it out.
	384	*/
	385	void
	386	vm_map_entry_release(int count)
	387	{
	388	struct globaldata *gd = mycpu;
	389	vm_map_entry_t entry;
	390
	391	crit_enter();
	392	gd->gd_vme_avail += count;
	393	while (gd->gd_vme_avail > MAP_RESERVE_SLOP) {
	394	entry = gd->gd_vme_base;
	395	KKASSERT(entry != NULL);
	396	gd->gd_vme_base = entry->next;
	397	--gd->gd_vme_avail;
	398	crit_exit();
	399	zfree(mapentzone, entry);
	400	crit_enter();
	401	}
	402	crit_exit();
	403	}
	404
	405	/*
	406	* vm_map_entry_kreserve:
	407	*
	408	* Reserve map entry structures for use in kernel_map or (if it exists)
	409	* kmem_map. These entries have ALREADY been reserved on a per-cpu
	410	* basis when the map was inited. This function is used by zalloc()
	411	* to avoid a recursion when zalloc() itself needs to allocate additional
	412	* kernel memory.
	413	*
	414	* This function should only be used when the caller intends to later
	415	* call vm_map_entry_reserve() to 'normalize' the reserve cache.
	416	*/
	417	int
	418	vm_map_entry_kreserve(int count)
	419	{
	420	struct globaldata *gd = mycpu;
	421
	422	crit_enter();
	423	gd->gd_vme_kdeficit += count;
	424	crit_exit();
	425	KKASSERT(gd->gd_vme_base != NULL);
	426	return(count);
	427	}
	428
	429	/*
	430	* vm_map_entry_krelease:
	431	*
	432	* Release previously reserved map entries for kernel_map or kmem_map
	433	* use. This routine determines how many entries were actually used and
	434	* replentishes the kernel reserve supply from vme_avail.
	435	*
	436	* If there is insufficient supply vme_avail will go negative, which is
	437	* ok. We cannot safely call zalloc in this function without getting
	438	* into a recursion deadlock. zalloc() will call vm_map_entry_reserve()
	439	* to regenerate the lost entries.
	440	*/
	441	void
	442	vm_map_entry_krelease(int count)
	443	{
	444	struct globaldata *gd = mycpu;
	445
	446	crit_enter();
	447	gd->gd_vme_kdeficit -= count;
	448	gd->gd_vme_avail -= gd->gd_vme_kdeficit; /* can go negative */
	449	gd->gd_vme_kdeficit = 0;
	450	crit_exit();
	451	}
	452
	453	/*
	454	* vm_map_entry_create: [ internal use only ]
	455	*
	456	* Allocates a VM map entry for insertion. No entry fields are filled
	457	* in.
	458	*
	459	* This routine may be called from an interrupt thread but not a FAST
	460	* interrupt. This routine may recurse the map lock.
	461	*/
	462	static vm_map_entry_t
	463	vm_map_entry_create(vm_map_t map, int *countp)
	464	{
	465	struct globaldata *gd = mycpu;
	466	vm_map_entry_t entry;
	467
	468	KKASSERT(*countp > 0);
	469	--*countp;
	470	crit_enter();
	471	entry = gd->gd_vme_base;
	472	KASSERT(entry != NULL, ("gd_vme_base NULL! count %d", *countp));
	473	gd->gd_vme_base = entry->next;
	474	crit_exit();
	475	return(entry);
	476	}
	477
	478	/*
	479	* vm_map_entry_dispose: [ internal use only ]
	480	*
	481	* Dispose of a vm_map_entry that is no longer being referenced. This
	482	* function may be called from an interrupt.
	483	*/
	484	static void
	485	vm_map_entry_dispose(vm_map_t map, vm_map_entry_t entry, int *countp)
	486	{
	487	struct globaldata *gd = mycpu;
	488
	489	++*countp;
	490	crit_enter();
	491	entry->next = gd->gd_vme_base;
	492	gd->gd_vme_base = entry;
	493	crit_exit();
	494	}
	495
	496
	497	/*
	498	* vm_map_entry_{un,}link:
	499	*
	500	* Insert/remove entries from maps.
	501	*/
	502	static __inline void
	503	vm_map_entry_link(vm_map_t map,
	504	vm_map_entry_t after_where,
	505	vm_map_entry_t entry)
	506	{
	507	map->nentries++;
	508	entry->prev = after_where;
	509	entry->next = after_where->next;
	510	entry->next->prev = entry;
	511	after_where->next = entry;
	512	}
	513
	514	static __inline void
	515	vm_map_entry_unlink(vm_map_t map,
	516	vm_map_entry_t entry)
	517	{
	518	vm_map_entry_t prev;
	519	vm_map_entry_t next;
	520
	521	if (entry->eflags & MAP_ENTRY_IN_TRANSITION)
	522	panic("vm_map_entry_unlink: attempt to mess with locked entry! %p", entry);
	523	prev = entry->prev;
	524	next = entry->next;
	525	next->prev = prev;
	526	prev->next = next;
	527	map->nentries--;
	528	}
	529
	530	/*
	531	* SAVE_HINT:
	532	*
	533	* Saves the specified entry as the hint for
	534	* future lookups.
	535	*/
	536	#define SAVE_HINT(map,value) \
	537	(map)->hint = (value);
	538
	539	/*
	540	* vm_map_lookup_entry: [ internal use only ]
	541	*
	542	* Finds the map entry containing (or
	543	* immediately preceding) the specified address
	544	* in the given map; the entry is returned
	545	* in the "entry" parameter. The boolean
	546	* result indicates whether the address is
	547	* actually contained in the map.
	548	*/
	549	boolean_t
	550	vm_map_lookup_entry(map, address, entry)
	551	vm_map_t map;
	552	vm_offset_t address;
	553	vm_map_entry_t entry; / OUT */
	554	{
	555	vm_map_entry_t cur;
	556	vm_map_entry_t last;
	557
	558	/*
	559	* Start looking either from the head of the list, or from the hint.
	560	*/
	561
	562	cur = map->hint;
	563
	564	if (cur == &map->header)
	565	cur = cur->next;
	566
	567	if (address >= cur->start) {
	568	/*
	569	* Go from hint to end of list.
	570	*
	571	* But first, make a quick check to see if we are already looking
	572	* at the entry we want (which is usually the case). Note also
	573	* that we don't need to save the hint here... it is the same
	574	* hint (unless we are at the header, in which case the hint
	575	* didn't buy us anything anyway).
	576	*/
	577	last = &map->header;
	578	if ((cur != last) && (cur->end > address)) {
	579	*entry = cur;
	580	return (TRUE);
	581	}
	582	} else {
	583	/*
	584	* Go from start to hint, inclusively
	585	*/
	586	last = cur->next;
	587	cur = map->header.next;
	588	}
	589
	590	/*
	591	* Search linearly
	592	*/
	593
	594	while (cur != last) {
	595	if (cur->end > address) {
	596	if (address >= cur->start) {
	597	/*
	598	* Save this lookup for future hints, and
	599	* return
	600	*/
	601
	602	*entry = cur;
	603	SAVE_HINT(map, cur);
	604	return (TRUE);
	605	}
	606	break;
	607	}
	608	cur = cur->next;
	609	}
	610	*entry = cur->prev;
	611	SAVE_HINT(map, *entry);
	612	return (FALSE);
	613	}
	614
	615	/*
	616	* vm_map_insert:
	617	*
	618	* Inserts the given whole VM object into the target
	619	* map at the specified address range. The object's
	620	* size should match that of the address range.
	621	*
	622	* Requires that the map be locked, and leaves it so. Requires that
	623	* sufficient vm_map_entry structures have been reserved and tracks
	624	* the use via countp.
	625	*
	626	* If object is non-NULL, ref count must be bumped by caller
	627	* prior to making call to account for the new entry.
	628	*/
	629	int
	630	vm_map_insert(vm_map_t map, int *countp,
	631	vm_object_t object, vm_ooffset_t offset,
	632	vm_offset_t start, vm_offset_t end, vm_prot_t prot, vm_prot_t max,
	633	int cow)
	634	{
	635	vm_map_entry_t new_entry;
	636	vm_map_entry_t prev_entry;
	637	vm_map_entry_t temp_entry;
	638	vm_eflags_t protoeflags;
	639
	640	/*
	641	* Check that the start and end points are not bogus.
	642	*/
	643
	644	if ((start < map->min_offset) \|\| (end > map->max_offset) \|\|
	645	(start >= end))
	646	return (KERN_INVALID_ADDRESS);
	647
	648	/*
	649	* Find the entry prior to the proposed starting address; if it's part
	650	* of an existing entry, this range is bogus.
	651	*/
	652
	653	if (vm_map_lookup_entry(map, start, &temp_entry))
	654	return (KERN_NO_SPACE);
	655
	656	prev_entry = temp_entry;
	657
	658	/*
	659	* Assert that the next entry doesn't overlap the end point.
	660	*/
	661
	662	if ((prev_entry->next != &map->header) &&
	663	(prev_entry->next->start < end))
	664	return (KERN_NO_SPACE);
	665
	666	protoeflags = 0;
	667
	668	if (cow & MAP_COPY_ON_WRITE)
	669	protoeflags \|= MAP_ENTRY_COW\|MAP_ENTRY_NEEDS_COPY;
	670
	671	if (cow & MAP_NOFAULT) {
	672	protoeflags \|= MAP_ENTRY_NOFAULT;
	673
	674	KASSERT(object == NULL,
	675	("vm_map_insert: paradoxical MAP_NOFAULT request"));
	676	}
	677	if (cow & MAP_DISABLE_SYNCER)
	678	protoeflags \|= MAP_ENTRY_NOSYNC;
	679	if (cow & MAP_DISABLE_COREDUMP)
	680	protoeflags \|= MAP_ENTRY_NOCOREDUMP;
	681
	682	if (object) {
	683	/*
	684	* When object is non-NULL, it could be shared with another
	685	* process. We have to set or clear OBJ_ONEMAPPING
	686	* appropriately.
	687	*/
	688	if ((object->ref_count > 1) \|\| (object->shadow_count != 0)) {
	689	vm_object_clear_flag(object, OBJ_ONEMAPPING);
	690	}
	691	}
	692	else if ((prev_entry != &map->header) &&
	693	(prev_entry->eflags == protoeflags) &&
	694	(prev_entry->end == start) &&
	695	(prev_entry->wired_count == 0) &&
	696	((prev_entry->object.vm_object == NULL) \|\|
	697	vm_object_coalesce(prev_entry->object.vm_object,
	698	OFF_TO_IDX(prev_entry->offset),
	699	(vm_size_t)(prev_entry->end - prev_entry->start),
	700	(vm_size_t)(end - prev_entry->end)))) {
	701	/*
	702	* We were able to extend the object. Determine if we
	703	* can extend the previous map entry to include the
	704	* new range as well.
	705	*/
	706	if ((prev_entry->inheritance == VM_INHERIT_DEFAULT) &&
	707	(prev_entry->protection == prot) &&
	708	(prev_entry->max_protection == max)) {
	709	map->size += (end - prev_entry->end);
	710	prev_entry->end = end;
	711	vm_map_simplify_entry(map, prev_entry, countp);
	712	return (KERN_SUCCESS);
	713	}
	714
	715	/*
	716	* If we can extend the object but cannot extend the
	717	* map entry, we have to create a new map entry. We
	718	* must bump the ref count on the extended object to
	719	* account for it. object may be NULL.
	720	*/
	721	object = prev_entry->object.vm_object;
	722	offset = prev_entry->offset +
	723	(prev_entry->end - prev_entry->start);
	724	vm_object_reference(object);
	725	}
	726
	727	/*
	728	* NOTE: if conditionals fail, object can be NULL here. This occurs
	729	* in things like the buffer map where we manage kva but do not manage
	730	* backing objects.
	731	*/
	732
	733	/*
	734	* Create a new entry
	735	*/
	736
	737	new_entry = vm_map_entry_create(map, countp);
	738	new_entry->start = start;
	739	new_entry->end = end;
	740
	741	new_entry->eflags = protoeflags;
	742	new_entry->object.vm_object = object;
	743	new_entry->offset = offset;
	744	new_entry->avail_ssize = 0;
	745
	746	new_entry->inheritance = VM_INHERIT_DEFAULT;
	747	new_entry->protection = prot;
	748	new_entry->max_protection = max;
	749	new_entry->wired_count = 0;
	750
	751	/*
	752	* Insert the new entry into the list
	753	*/
	754
	755	vm_map_entry_link(map, prev_entry, new_entry);
	756	map->size += new_entry->end - new_entry->start;
	757
	758	/*
	759	* Update the free space hint
	760	*/
	761	if ((map->first_free == prev_entry) &&
	762	(prev_entry->end >= new_entry->start)) {
	763	map->first_free = new_entry;
	764	}
	765
	766	#if 0
	767	/*
	768	* Temporarily removed to avoid MAP_STACK panic, due to
	769	* MAP_STACK being a huge hack. Will be added back in
	770	* when MAP_STACK (and the user stack mapping) is fixed.
	771	*/
	772	/*
	773	* It may be possible to simplify the entry
	774	*/
	775	vm_map_simplify_entry(map, new_entry, countp);
	776	#endif
	777
	778	if (cow & (MAP_PREFAULT\|MAP_PREFAULT_PARTIAL)) {
	779	pmap_object_init_pt(map->pmap, start,
	780	object, OFF_TO_IDX(offset), end - start,
	781	cow & MAP_PREFAULT_PARTIAL);
	782	}
	783
	784	return (KERN_SUCCESS);
	785	}
	786
	787	/*
	788	* Find sufficient space for `length' bytes in the given map, starting at
	789	* `start'. The map must be locked. Returns 0 on success, 1 on no space.
	790	*
	791	* This function will returned an arbitrarily aligned pointer. If no
	792	* particular alignment is required you should pass align as 1. Note that
	793	* the map may return PAGE_SIZE aligned pointers if all the lengths used in
	794	* the map are a multiple of PAGE_SIZE, even if you pass a smaller align
	795	* argument.
	796	*
	797	* 'align' should be a power of 2 but is not required to be.
	798	*/
	799	int
	800	vm_map_findspace(
	801	vm_map_t map,
	802	vm_offset_t start,
	803	vm_size_t length,
	804	vm_offset_t align,
	805	vm_offset_t *addr)
	806	{
	807	vm_map_entry_t entry, next;
	808	vm_offset_t end;
	809	vm_offset_t align_mask;
	810
	811	if (start < map->min_offset)
	812	start = map->min_offset;
	813	if (start > map->max_offset)
	814	return (1);
	815
	816	/*
	817	* If the alignment is not a power of 2 we will have to use
	818	* a mod/division, set align_mask to a special value.
	819	*/
	820	if ((align \| (align - 1)) + 1 != (align << 1))
	821	align_mask = (vm_offset_t)-1;
	822	else
	823	align_mask = align - 1;
	824
	825	retry:
	826	/*
	827	* Look for the first possible address; if there's already something
	828	* at this address, we have to start after it.
	829	*/
	830	if (start == map->min_offset) {
	831	if ((entry = map->first_free) != &map->header)
	832	start = entry->end;
	833	} else {
	834	vm_map_entry_t tmp;
	835
	836	if (vm_map_lookup_entry(map, start, &tmp))
	837	start = tmp->end;
	838	entry = tmp;
	839	}
	840
	841	/*
	842	* Look through the rest of the map, trying to fit a new region in the
	843	* gap between existing regions, or after the very last region.
	844	*/
	845	for (;; start = (entry = next)->end) {
	846	/*
	847	* Adjust the proposed start by the requested alignment,
	848	* be sure that we didn't wrap the address.
	849	*/
	850	if (align_mask == (vm_offset_t)-1)
	851	end = ((start + align - 1) / align) * align;
	852	else
	853	end = (start + align_mask) & ~align_mask;
	854	if (end < start)
	855	return (1);
	856	start = end;
	857	/*
	858	* Find the end of the proposed new region. Be sure we didn't
	859	* go beyond the end of the map, or wrap around the address.
	860	* Then check to see if this is the last entry or if the
	861	* proposed end fits in the gap between this and the next
	862	* entry.
	863	*/
	864	end = start + length;
	865	if (end > map->max_offset \|\| end < start)
	866	return (1);
	867	next = entry->next;
	868	if (next == &map->header \|\| next->start >= end)
	869	break;
	870	}
	871	SAVE_HINT(map, entry);
	872	if (map == kernel_map) {
	873	vm_offset_t ksize;
	874	if ((ksize = round_page(start + length)) > kernel_vm_end) {
	875	pmap_growkernel(ksize);
	876	goto retry;
	877	}
	878	}
	879	*addr = start;
	880	return (0);
	881	}
	882
	883	/*
	884	* vm_map_find finds an unallocated region in the target address
	885	* map with the given length. The search is defined to be
	886	* first-fit from the specified address; the region found is
	887	* returned in the same parameter.
	888	*
	889	* If object is non-NULL, ref count must be bumped by caller
	890	* prior to making call to account for the new entry.
	891	*/
	892	int
	893	vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
	894	vm_offset_t addr, / IN/OUT */
	895	vm_size_t length, boolean_t find_space, vm_prot_t prot,
	896	vm_prot_t max, int cow)
	897	{
	898	vm_offset_t start;
	899	int result;
	900	int count;
	901
	902	start = *addr;
	903
	904	count = vm_map_entry_reserve(MAP_RESERVE_COUNT);
	905	vm_map_lock(map);
	906	if (find_space) {
	907	if (vm_map_findspace(map, start, length, 1, addr)) {
	908	vm_map_unlock(map);
	909	vm_map_entry_release(count);
	910	return (KERN_NO_SPACE);
	911	}
	912	start = *addr;
	913	}
	914	result = vm_map_insert(map, &count, object, offset,
	915	start, start + length, prot, max, cow);
	916	vm_map_unlock(map);
	917	vm_map_entry_release(count);
	918
	919	return (result);
	920	}
	921
	922	/*
	923	* vm_map_simplify_entry:
	924	*
	925	* Simplify the given map entry by merging with either neighbor. This
	926	* routine also has the ability to merge with both neighbors.
	927	*
	928	* The map must be locked.
	929	*
	930	* This routine guarentees that the passed entry remains valid (though
	931	* possibly extended). When merging, this routine may delete one or
	932	* both neighbors. No action is taken on entries which have their
	933	* in-transition flag set.
	934	*/
	935	void
	936	vm_map_simplify_entry(vm_map_t map, vm_map_entry_t entry, int *countp)
	937	{
	938	vm_map_entry_t next, prev;
	939	vm_size_t prevsize, esize;
	940
	941	if (entry->eflags & (MAP_ENTRY_IN_TRANSITION \| MAP_ENTRY_IS_SUB_MAP)) {
	942	++mycpu->gd_cnt.v_intrans_coll;
	943	return;
	944	}
	945
	946	prev = entry->prev;
	947	if (prev != &map->header) {
	948	prevsize = prev->end - prev->start;
	949	if ( (prev->end == entry->start) &&
	950	(prev->object.vm_object == entry->object.vm_object) &&
	951	(!prev->object.vm_object \|\|
	952	(prev->offset + prevsize == entry->offset)) &&
	953	(prev->eflags == entry->eflags) &&
	954	(prev->protection == entry->protection) &&
	955	(prev->max_protection == entry->max_protection) &&
	956	(prev->inheritance == entry->inheritance) &&
	957	(prev->wired_count == entry->wired_count)) {
	958	if (map->first_free == prev)
	959	map->first_free = entry;
	960	if (map->hint == prev)
	961	map->hint = entry;
	962	vm_map_entry_unlink(map, prev);
	963	entry->start = prev->start;
	964	entry->offset = prev->offset;
	965	if (prev->object.vm_object)
	966	vm_object_deallocate(prev->object.vm_object);
	967	vm_map_entry_dispose(map, prev, countp);
	968	}
	969	}
	970
	971	next = entry->next;
	972	if (next != &map->header) {
	973	esize = entry->end - entry->start;
	974	if ((entry->end == next->start) &&
	975	(next->object.vm_object == entry->object.vm_object) &&
	976	(!entry->object.vm_object \|\|
	977	(entry->offset + esize == next->offset)) &&
	978	(next->eflags == entry->eflags) &&
	979	(next->protection == entry->protection) &&
	980	(next->max_protection == entry->max_protection) &&
	981	(next->inheritance == entry->inheritance) &&
	982	(next->wired_count == entry->wired_count)) {
	983	if (map->first_free == next)
	984	map->first_free = entry;
	985	if (map->hint == next)
	986	map->hint = entry;
	987	vm_map_entry_unlink(map, next);
	988	entry->end = next->end;
	989	if (next->object.vm_object)
	990	vm_object_deallocate(next->object.vm_object);
	991	vm_map_entry_dispose(map, next, countp);
	992	}
	993	}
	994	}
	995	/*
	996	* vm_map_clip_start: [ internal use only ]
	997	*
	998	* Asserts that the given entry begins at or after
	999	* the specified address; if necessary,
	1000	* it splits the entry into two.
	1001	*/
	1002	#define vm_map_clip_start(map, entry, startaddr, countp) \
	1003	{ \
	1004	if (startaddr > entry->start) \
	1005	_vm_map_clip_start(map, entry, startaddr, countp); \
	1006	}
	1007
	1008	/*
	1009	* This routine is called only when it is known that
	1010	* the entry must be split.
	1011	*/
	1012	static void
	1013	_vm_map_clip_start(vm_map_t map, vm_map_entry_t entry, vm_offset_t start, int *countp)
	1014	{
	1015	vm_map_entry_t new_entry;
	1016
	1017	/*
	1018	* Split off the front portion -- note that we must insert the new
	1019	* entry BEFORE this one, so that this entry has the specified
	1020	* starting address.
	1021	*/
	1022
	1023	vm_map_simplify_entry(map, entry, countp);
	1024
	1025	/*
	1026	* If there is no object backing this entry, we might as well create
	1027	* one now. If we defer it, an object can get created after the map
	1028	* is clipped, and individual objects will be created for the split-up
	1029	* map. This is a bit of a hack, but is also about the best place to
	1030	* put this improvement.
	1031	*/
	1032
	1033	if (entry->object.vm_object == NULL && !map->system_map) {
	1034	vm_object_t object;
	1035	object = vm_object_allocate(OBJT_DEFAULT,
	1036	atop(entry->end - entry->start));
	1037	entry->object.vm_object = object;
	1038	entry->offset = 0;
	1039	}
	1040
	1041	new_entry = vm_map_entry_create(map, countp);
	1042	new_entry = entry;
	1043
	1044	new_entry->end = start;
	1045	entry->offset += (start - entry->start);
	1046	entry->start = start;
	1047
	1048	vm_map_entry_link(map, entry->prev, new_entry);
	1049
	1050	if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
	1051	vm_object_reference(new_entry->object.vm_object);
	1052	}
	1053	}
	1054
	1055	/*
	1056	* vm_map_clip_end: [ internal use only ]
	1057	*
	1058	* Asserts that the given entry ends at or before
	1059	* the specified address; if necessary,
	1060	* it splits the entry into two.
	1061	*/
	1062
	1063	#define vm_map_clip_end(map, entry, endaddr, countp) \
	1064	{ \
	1065	if (endaddr < entry->end) \
	1066	_vm_map_clip_end(map, entry, endaddr, countp); \
	1067	}
	1068
	1069	/*
	1070	* This routine is called only when it is known that
	1071	* the entry must be split.
	1072	*/
	1073	static void
	1074	_vm_map_clip_end(vm_map_t map, vm_map_entry_t entry, vm_offset_t end, int *countp)
	1075	{
	1076	vm_map_entry_t new_entry;
	1077
	1078	/*
	1079	* If there is no object backing this entry, we might as well create
	1080	* one now. If we defer it, an object can get created after the map
	1081	* is clipped, and individual objects will be created for the split-up
	1082	* map. This is a bit of a hack, but is also about the best place to
	1083	* put this improvement.
	1084	*/
	1085
	1086	if (entry->object.vm_object == NULL && !map->system_map) {
	1087	vm_object_t object;
	1088	object = vm_object_allocate(OBJT_DEFAULT,
	1089	atop(entry->end - entry->start));
	1090	entry->object.vm_object = object;
	1091	entry->offset = 0;
	1092	}
	1093
	1094	/*
	1095	* Create a new entry and insert it AFTER the specified entry
	1096	*/
	1097
	1098	new_entry = vm_map_entry_create(map, countp);
	1099	new_entry = entry;
	1100
	1101	new_entry->start = entry->end = end;
	1102	new_entry->offset += (end - entry->start);
	1103
	1104	vm_map_entry_link(map, entry, new_entry);
	1105
	1106	if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
	1107	vm_object_reference(new_entry->object.vm_object);
	1108	}
	1109	}
	1110
	1111	/*
	1112	* VM_MAP_RANGE_CHECK: [ internal use only ]
	1113	*
	1114	* Asserts that the starting and ending region
	1115	* addresses fall within the valid range of the map.
	1116	*/
	1117	#define VM_MAP_RANGE_CHECK(map, start, end) \
	1118	{ \
	1119	if (start < vm_map_min(map)) \
	1120	start = vm_map_min(map); \
	1121	if (end > vm_map_max(map)) \
	1122	end = vm_map_max(map); \
	1123	if (start > end) \
	1124	start = end; \
	1125	}
	1126
	1127	/*
	1128	* vm_map_transition_wait: [ kernel use only ]
	1129	*
	1130	* Used to block when an in-transition collison occurs. The map
	1131	* is unlocked for the sleep and relocked before the return.
	1132	*/
	1133	static
	1134	void
	1135	vm_map_transition_wait(vm_map_t map)
	1136	{
	1137	vm_map_unlock(map);
	1138	tsleep(map, 0, "vment", 0);
	1139	vm_map_lock(map);
	1140	}
	1141
	1142	/*
	1143	* CLIP_CHECK_BACK
	1144	* CLIP_CHECK_FWD
	1145	*
	1146	* When we do blocking operations with the map lock held it is
	1147	* possible that a clip might have occured on our in-transit entry,
	1148	* requiring an adjustment to the entry in our loop. These macros
	1149	* help the pageable and clip_range code deal with the case. The
	1150	* conditional costs virtually nothing if no clipping has occured.
	1151	*/
	1152
	1153	#define CLIP_CHECK_BACK(entry, save_start) \
	1154	do { \
	1155	while (entry->start != save_start) { \
	1156	entry = entry->prev; \
	1157	KASSERT(entry != &map->header, ("bad entry clip")); \
	1158	} \
	1159	} while(0)
	1160
	1161	#define CLIP_CHECK_FWD(entry, save_end) \
	1162	do { \
	1163	while (entry->end != save_end) { \
	1164	entry = entry->next; \
	1165	KASSERT(entry != &map->header, ("bad entry clip")); \
	1166	} \
	1167	} while(0)
	1168
	1169
	1170	/*
	1171	* vm_map_clip_range: [ kernel use only ]
	1172	*
	1173	* Clip the specified range and return the base entry. The
	1174	* range may cover several entries starting at the returned base
	1175	* and the first and last entry in the covering sequence will be
	1176	* properly clipped to the requested start and end address.
	1177	*
	1178	* If no holes are allowed you should pass the MAP_CLIP_NO_HOLES
	1179	* flag.
	1180	*
	1181	* The MAP_ENTRY_IN_TRANSITION flag will be set for the entries
	1182	* covered by the requested range.
	1183	*
	1184	* The map must be exclusively locked on entry and will remain locked
	1185	* on return. If no range exists or the range contains holes and you
	1186	* specified that no holes were allowed, NULL will be returned. This
	1187	* routine may temporarily unlock the map in order avoid a deadlock when
	1188	* sleeping.
	1189	*/
	1190	static
	1191	vm_map_entry_t
	1192	vm_map_clip_range(vm_map_t map, vm_offset_t start, vm_offset_t end,
	1193	int *countp, int flags)
	1194	{
	1195	vm_map_entry_t start_entry;
	1196	vm_map_entry_t entry;
	1197
	1198	/*
	1199	* Locate the entry and effect initial clipping. The in-transition
	1200	* case does not occur very often so do not try to optimize it.
	1201	*/
	1202	again:
	1203	if (vm_map_lookup_entry(map, start, &start_entry) == FALSE)
	1204	return (NULL);
	1205	entry = start_entry;
	1206	if (entry->eflags & MAP_ENTRY_IN_TRANSITION) {
	1207	entry->eflags \|= MAP_ENTRY_NEEDS_WAKEUP;
	1208	++mycpu->gd_cnt.v_intrans_coll;
	1209	++mycpu->gd_cnt.v_intrans_wait;
	1210	vm_map_transition_wait(map);
	1211	/*
	1212	* entry and/or start_entry may have been clipped while
	1213	* we slept, or may have gone away entirely. We have
	1214	* to restart from the lookup.
	1215	*/
	1216	goto again;
	1217	}
	1218	/*
	1219	* Since we hold an exclusive map lock we do not have to restart
	1220	* after clipping, even though clipping may block in zalloc.
	1221	*/
	1222	vm_map_clip_start(map, entry, start, countp);
	1223	vm_map_clip_end(map, entry, end, countp);
	1224	entry->eflags \|= MAP_ENTRY_IN_TRANSITION;
	1225
	1226	/*
	1227	* Scan entries covered by the range. When working on the next
	1228	* entry a restart need only re-loop on the current entry which
	1229	* we have already locked, since 'next' may have changed. Also,
	1230	* even though entry is safe, it may have been clipped so we
	1231	* have to iterate forwards through the clip after sleeping.
	1232	*/
	1233	while (entry->next != &map->header && entry->next->start < end) {
	1234	vm_map_entry_t next = entry->next;
	1235
	1236	if (flags & MAP_CLIP_NO_HOLES) {
	1237	if (next->start > entry->end) {
	1238	vm_map_unclip_range(map, start_entry,
	1239	start, entry->end, countp, flags);
	1240	return(NULL);
	1241	}
	1242	}
	1243
	1244	if (next->eflags & MAP_ENTRY_IN_TRANSITION) {
	1245	vm_offset_t save_end = entry->end;
	1246	next->eflags \|= MAP_ENTRY_NEEDS_WAKEUP;
	1247	++mycpu->gd_cnt.v_intrans_coll;
	1248	++mycpu->gd_cnt.v_intrans_wait;
	1249	vm_map_transition_wait(map);
	1250
	1251	/*
	1252	* clips might have occured while we blocked.
	1253	*/
	1254	CLIP_CHECK_FWD(entry, save_end);
	1255	CLIP_CHECK_BACK(start_entry, start);
	1256	continue;
	1257	}
	1258	/*
	1259	* No restart necessary even though clip_end may block, we
	1260	* are holding the map lock.
	1261	*/
	1262	vm_map_clip_end(map, next, end, countp);
	1263	next->eflags \|= MAP_ENTRY_IN_TRANSITION;
	1264	entry = next;
	1265	}
	1266	if (flags & MAP_CLIP_NO_HOLES) {
	1267	if (entry->end != end) {
	1268	vm_map_unclip_range(map, start_entry,
	1269	start, entry->end, countp, flags);
	1270	return(NULL);
	1271	}
	1272	}
	1273	return(start_entry);
	1274	}
	1275
	1276	/*
	1277	* vm_map_unclip_range: [ kernel use only ]
	1278	*
	1279	* Undo the effect of vm_map_clip_range(). You should pass the same
	1280	* flags and the same range that you passed to vm_map_clip_range().
	1281	* This code will clear the in-transition flag on the entries and
	1282	* wake up anyone waiting. This code will also simplify the sequence
	1283	* and attempt to merge it with entries before and after the sequence.
	1284	*
	1285	* The map must be locked on entry and will remain locked on return.
	1286	*
	1287	* Note that you should also pass the start_entry returned by
	1288	* vm_map_clip_range(). However, if you block between the two calls
	1289	* with the map unlocked please be aware that the start_entry may
	1290	* have been clipped and you may need to scan it backwards to find
	1291	* the entry corresponding with the original start address. You are
	1292	* responsible for this, vm_map_unclip_range() expects the correct
	1293	* start_entry to be passed to it and will KASSERT otherwise.
	1294	*/
	1295	static
	1296	void
	1297	vm_map_unclip_range(
	1298	vm_map_t map,
	1299	vm_map_entry_t start_entry,
	1300	vm_offset_t start,
	1301	vm_offset_t end,
	1302	int *countp,
	1303	int flags)
	1304	{
	1305	vm_map_entry_t entry;
	1306
	1307	entry = start_entry;
	1308
	1309	KASSERT(entry->start == start, ("unclip_range: illegal base entry"));
	1310	while (entry != &map->header && entry->start < end) {
	1311	KASSERT(entry->eflags & MAP_ENTRY_IN_TRANSITION, ("in-transition flag not set during unclip on: %p", entry));
	1312	KASSERT(entry->end <= end, ("unclip_range: tail wasn't clipped"));
	1313	entry->eflags &= ~MAP_ENTRY_IN_TRANSITION;
	1314	if (entry->eflags & MAP_ENTRY_NEEDS_WAKEUP) {
	1315	entry->eflags &= ~MAP_ENTRY_NEEDS_WAKEUP;
	1316	wakeup(map);
	1317	}
	1318	entry = entry->next;
	1319	}
	1320
	1321	/*
	1322	* Simplification does not block so there is no restart case.
	1323	*/
	1324	entry = start_entry;
	1325	while (entry != &map->header && entry->start < end) {
	1326	vm_map_simplify_entry(map, entry, countp);
	1327	entry = entry->next;
	1328	}
	1329	}
	1330
	1331	/*
	1332	* vm_map_submap: [ kernel use only ]
	1333	*
	1334	* Mark the given range as handled by a subordinate map.
	1335	*
	1336	* This range must have been created with vm_map_find,
	1337	* and no other operations may have been performed on this
	1338	* range prior to calling vm_map_submap.
	1339	*
	1340	* Only a limited number of operations can be performed
	1341	* within this rage after calling vm_map_submap:
	1342	* vm_fault
	1343	* [Don't try vm_map_copy!]
	1344	*
	1345	* To remove a submapping, one must first remove the
	1346	* range from the superior map, and then destroy the
	1347	* submap (if desired). [Better yet, don't try it.]
	1348	*/
	1349	int
	1350	vm_map_submap(vm_map_t map, vm_offset_t start, vm_offset_t end, vm_map_t submap)
	1351	{
	1352	vm_map_entry_t entry;
	1353	int result = KERN_INVALID_ARGUMENT;
	1354	int count;
	1355
	1356	count = vm_map_entry_reserve(MAP_RESERVE_COUNT);
	1357	vm_map_lock(map);
	1358
	1359	VM_MAP_RANGE_CHECK(map, start, end);
	1360
	1361	if (vm_map_lookup_entry(map, start, &entry)) {
	1362	vm_map_clip_start(map, entry, start, &count);
	1363	} else {
	1364	entry = entry->next;
	1365	}
	1366
	1367	vm_map_clip_end(map, entry, end, &count);
	1368
	1369	if ((entry->start == start) && (entry->end == end) &&
	1370	((entry->eflags & MAP_ENTRY_COW) == 0) &&
	1371	(entry->object.vm_object == NULL)) {
	1372	entry->object.sub_map = submap;
	1373	entry->eflags \|= MAP_ENTRY_IS_SUB_MAP;
	1374	result = KERN_SUCCESS;
	1375	}
	1376	vm_map_unlock(map);
	1377	vm_map_entry_release(count);
	1378
	1379	return (result);
	1380	}
	1381
	1382	/*
	1383	* vm_map_protect:
	1384	*
	1385	* Sets the protection of the specified address
	1386	* region in the target map. If "set_max" is
	1387	* specified, the maximum protection is to be set;
	1388	* otherwise, only the current protection is affected.
	1389	*/
	1390	int
	1391	vm_map_protect(vm_map_t map, vm_offset_t start, vm_offset_t end,
	1392	vm_prot_t new_prot, boolean_t set_max)
	1393	{
	1394	vm_map_entry_t current;
	1395	vm_map_entry_t entry;
	1396	int count;
	1397
	1398	count = vm_map_entry_reserve(MAP_RESERVE_COUNT);
	1399	vm_map_lock(map);
	1400
	1401	VM_MAP_RANGE_CHECK(map, start, end);
	1402
	1403	if (vm_map_lookup_entry(map, start, &entry)) {
	1404	vm_map_clip_start(map, entry, start, &count);
	1405	} else {
	1406	entry = entry->next;
	1407	}
	1408
	1409	/*
	1410	* Make a first pass to check for protection violations.
	1411	*/
	1412
	1413	current = entry;
	1414	while ((current != &map->header) && (current->start < end)) {
	1415	if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
	1416	vm_map_unlock(map);
	1417	vm_map_entry_release(count);
	1418	return (KERN_INVALID_ARGUMENT);
	1419	}
	1420	if ((new_prot & current->max_protection) != new_prot) {
	1421	vm_map_unlock(map);
	1422	vm_map_entry_release(count);
	1423	return (KERN_PROTECTION_FAILURE);
	1424	}
	1425	current = current->next;
	1426	}
	1427
	1428	/*
	1429	* Go back and fix up protections. [Note that clipping is not
	1430	* necessary the second time.]
	1431	*/
	1432	current = entry;
	1433
	1434	while ((current != &map->header) && (current->start < end)) {
	1435	vm_prot_t old_prot;
	1436
	1437	vm_map_clip_end(map, current, end, &count);
	1438
	1439	old_prot = current->protection;
	1440	if (set_max)
	1441	current->protection =
	1442	(current->max_protection = new_prot) &
	1443	old_prot;
	1444	else
	1445	current->protection = new_prot;
	1446
	1447	/*
	1448	* Update physical map if necessary. Worry about copy-on-write
	1449	* here -- CHECK THIS XXX
	1450	*/
	1451
	1452	if (current->protection != old_prot) {
	1453	#define MASK(entry) (((entry)->eflags & MAP_ENTRY_COW) ? ~VM_PROT_WRITE : \
	1454	VM_PROT_ALL)
	1455
	1456	pmap_protect(map->pmap, current->start,
	1457	current->end,
	1458	current->protection & MASK(current));
	1459	#undef MASK
	1460	}
	1461
	1462	vm_map_simplify_entry(map, current, &count);
	1463
	1464	current = current->next;
	1465	}
	1466
	1467	vm_map_unlock(map);
	1468	vm_map_entry_release(count);
	1469	return (KERN_SUCCESS);
	1470	}
	1471
	1472	/*
	1473	* vm_map_madvise:
	1474	*
	1475	* This routine traverses a processes map handling the madvise
	1476	* system call. Advisories are classified as either those effecting
	1477	* the vm_map_entry structure, or those effecting the underlying
	1478	* objects.
	1479	*/
	1480
	1481	int
	1482	vm_map_madvise(vm_map_t map, vm_offset_t start, vm_offset_t end, int behav)
	1483	{
	1484	vm_map_entry_t current, entry;
	1485	int modify_map = 0;
	1486	int count;
	1487
	1488	/*
	1489	* Some madvise calls directly modify the vm_map_entry, in which case
	1490	* we need to use an exclusive lock on the map and we need to perform
	1491	* various clipping operations. Otherwise we only need a read-lock
	1492	* on the map.
	1493	*/
	1494
	1495	count = vm_map_entry_reserve(MAP_RESERVE_COUNT);
	1496
	1497	switch(behav) {
	1498	case MADV_NORMAL:
	1499	case MADV_SEQUENTIAL:
	1500	case MADV_RANDOM:
	1501	case MADV_NOSYNC:
	1502	case MADV_AUTOSYNC:
	1503	case MADV_NOCORE:
	1504	case MADV_CORE:
	1505	modify_map = 1;
	1506	vm_map_lock(map);
	1507	break;
	1508	case MADV_WILLNEED:
	1509	case MADV_DONTNEED:
	1510	case MADV_FREE:
	1511	vm_map_lock_read(map);
	1512	break;
	1513	default:
	1514	vm_map_entry_release(count);
	1515	return (KERN_INVALID_ARGUMENT);
	1516	}
	1517
	1518	/*
	1519	* Locate starting entry and clip if necessary.
	1520	*/
	1521
	1522	VM_MAP_RANGE_CHECK(map, start, end);
	1523
	1524	if (vm_map_lookup_entry(map, start, &entry)) {
	1525	if (modify_map)
	1526	vm_map_clip_start(map, entry, start, &count);
	1527	} else {
	1528	entry = entry->next;
	1529	}
	1530
	1531	if (modify_map) {
	1532	/*
	1533	* madvise behaviors that are implemented in the vm_map_entry.
	1534	*
	1535	* We clip the vm_map_entry so that behavioral changes are
	1536	* limited to the specified address range.
	1537	*/
	1538	for (current = entry;
	1539	(current != &map->header) && (current->start < end);
	1540	current = current->next
	1541	) {
	1542	if (current->eflags & MAP_ENTRY_IS_SUB_MAP)
	1543	continue;
	1544
	1545	vm_map_clip_end(map, current, end, &count);
	1546
	1547	switch (behav) {
	1548	case MADV_NORMAL:
	1549	vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_NORMAL);
	1550	break;
	1551	case MADV_SEQUENTIAL:
	1552	vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_SEQUENTIAL);
	1553	break;
	1554	case MADV_RANDOM:
	1555	vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_RANDOM);
	1556	break;
	1557	case MADV_NOSYNC:
	1558	current->eflags \|= MAP_ENTRY_NOSYNC;
	1559	break;
	1560	case MADV_AUTOSYNC:
	1561	current->eflags &= ~MAP_ENTRY_NOSYNC;
	1562	break;
	1563	case MADV_NOCORE:
	1564	current->eflags \|= MAP_ENTRY_NOCOREDUMP;
	1565	break;
	1566	case MADV_CORE:
	1567	current->eflags &= ~MAP_ENTRY_NOCOREDUMP;
	1568	break;
	1569	default:
	1570	break;
	1571	}
	1572	vm_map_simplify_entry(map, current, &count);
	1573	}
	1574	vm_map_unlock(map);
	1575	} else {
	1576	vm_pindex_t pindex;
	1577	int count;
	1578
	1579	/*
	1580	* madvise behaviors that are implemented in the underlying
	1581	* vm_object.
	1582	*
	1583	* Since we don't clip the vm_map_entry, we have to clip
	1584	* the vm_object pindex and count.
	1585	*/
	1586	for (current = entry;
	1587	(current != &map->header) && (current->start < end);
	1588	current = current->next
	1589	) {
	1590	vm_offset_t useStart;
	1591
	1592	if (current->eflags & MAP_ENTRY_IS_SUB_MAP)
	1593	continue;
	1594
	1595	pindex = OFF_TO_IDX(current->offset);
	1596	count = atop(current->end - current->start);
	1597	useStart = current->start;
	1598
	1599	if (current->start < start) {
	1600	pindex += atop(start - current->start);
	1601	count -= atop(start - current->start);
	1602	useStart = start;
	1603	}
	1604	if (current->end > end)
	1605	count -= atop(current->end - end);
	1606
	1607	if (count <= 0)
	1608	continue;
	1609
	1610	vm_object_madvise(current->object.vm_object,
	1611	pindex, count, behav);
	1612	if (behav == MADV_WILLNEED) {
	1613	pmap_object_init_pt(
	1614	map->pmap,
	1615	useStart,
	1616	current->object.vm_object,
	1617	pindex,
	1618	(count << PAGE_SHIFT),
	1619	MAP_PREFAULT_MADVISE
	1620	);
	1621	}
	1622	}
	1623	vm_map_unlock_read(map);
	1624	}
	1625	vm_map_entry_release(count);
	1626	return(0);
	1627	}
	1628
	1629
	1630	/*
	1631	* vm_map_inherit:
	1632	*
	1633	* Sets the inheritance of the specified address
	1634	* range in the target map. Inheritance
	1635	* affects how the map will be shared with
	1636	* child maps at the time of vm_map_fork.
	1637	*/
	1638	int
	1639	vm_map_inherit(vm_map_t map, vm_offset_t start, vm_offset_t end,
	1640	vm_inherit_t new_inheritance)
	1641	{
	1642	vm_map_entry_t entry;
	1643	vm_map_entry_t temp_entry;
	1644	int count;
	1645
	1646	switch (new_inheritance) {
	1647	case VM_INHERIT_NONE:
	1648	case VM_INHERIT_COPY:
	1649	case VM_INHERIT_SHARE:
	1650	break;
	1651	default:
	1652	return (KERN_INVALID_ARGUMENT);
	1653	}
	1654
	1655	count = vm_map_entry_reserve(MAP_RESERVE_COUNT);
	1656	vm_map_lock(map);
	1657
	1658	VM_MAP_RANGE_CHECK(map, start, end);
	1659
	1660	if (vm_map_lookup_entry(map, start, &temp_entry)) {
	1661	entry = temp_entry;
	1662	vm_map_clip_start(map, entry, start, &count);
	1663	} else
	1664	entry = temp_entry->next;
	1665
	1666	while ((entry != &map->header) && (entry->start < end)) {
	1667	vm_map_clip_end(map, entry, end, &count);
	1668
	1669	entry->inheritance = new_inheritance;
	1670
	1671	vm_map_simplify_entry(map, entry, &count);
	1672
	1673	entry = entry->next;
	1674	}
	1675	vm_map_unlock(map);
	1676	vm_map_entry_release(count);
	1677	return (KERN_SUCCESS);
	1678	}
	1679
	1680	/*
	1681	* Implement the semantics of mlock
	1682	*/
	1683	int
	1684	vm_map_unwire(map, start, real_end, new_pageable)
	1685	vm_map_t map;
	1686	vm_offset_t start;
	1687	vm_offset_t real_end;
	1688	boolean_t new_pageable;
	1689	{
	1690	vm_map_entry_t entry;
	1691	vm_map_entry_t start_entry;
	1692	vm_offset_t end;
	1693	int rv = KERN_SUCCESS;
	1694	int count;
	1695
	1696	count = vm_map_entry_reserve(MAP_RESERVE_COUNT);
	1697	vm_map_lock(map);
	1698	VM_MAP_RANGE_CHECK(map, start, real_end);
	1699	end = real_end;
	1700
	1701	start_entry = vm_map_clip_range(map, start, end, &count, MAP_CLIP_NO_HOLES);
	1702	if (start_entry == NULL) {
	1703	vm_map_unlock(map);
	1704	vm_map_entry_release(count);
	1705	return (KERN_INVALID_ADDRESS);
	1706	}
	1707
	1708	if (new_pageable == 0) {
	1709	entry = start_entry;
	1710	while ((entry != &map->header) && (entry->start < end)) {
	1711	vm_offset_t save_start;
	1712	vm_offset_t save_end;
	1713
	1714	/*
	1715	* Already user wired or hard wired (trivial cases)
	1716	*/
	1717	if (entry->eflags & MAP_ENTRY_USER_WIRED) {
	1718	entry = entry->next;
	1719	continue;
	1720	}
	1721	if (entry->wired_count != 0) {
	1722	entry->wired_count++;
	1723	entry->eflags \|= MAP_ENTRY_USER_WIRED;
	1724	entry = entry->next;
	1725	continue;
	1726	}
	1727
	1728	/*
	1729	* A new wiring requires instantiation of appropriate
	1730	* management structures and the faulting in of the
	1731	* page.
	1732	*/
	1733	if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
	1734	int copyflag = entry->eflags & MAP_ENTRY_NEEDS_COPY;
	1735	if (copyflag && ((entry->protection & VM_PROT_WRITE) != 0)) {
	1736
	1737	vm_object_shadow(&entry->object.vm_object,
	1738	&entry->offset,
	1739	atop(entry->end - entry->start));
	1740	entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
	1741
	1742	} else if (entry->object.vm_object == NULL &&
	1743	!map->system_map) {
	1744
	1745	entry->object.vm_object =
	1746	vm_object_allocate(OBJT_DEFAULT,
	1747	atop(entry->end - entry->start));
	1748	entry->offset = (vm_offset_t) 0;
	1749
	1750	}
	1751	}
	1752	entry->wired_count++;
	1753	entry->eflags \|= MAP_ENTRY_USER_WIRED;
	1754
	1755	/*
	1756	* Now fault in the area. The map lock needs to be
	1757	* manipulated to avoid deadlocks. The in-transition
	1758	* flag protects the entries.
	1759	*/
	1760	save_start = entry->start;
	1761	save_end = entry->end;
	1762	vm_map_unlock(map);
	1763	map->timestamp++;
	1764	rv = vm_fault_user_wire(map, save_start, save_end);
	1765	vm_map_lock(map);
	1766	if (rv) {
	1767	CLIP_CHECK_BACK(entry, save_start);
	1768	for (;;) {
	1769	KASSERT(entry->wired_count == 1, ("bad wired_count on entry"));
	1770	entry->eflags &= ~MAP_ENTRY_USER_WIRED;
	1771	entry->wired_count = 0;
	1772	if (entry->end == save_end)
	1773	break;
	1774	entry = entry->next;
	1775	KASSERT(entry != &map->header, ("bad entry clip during backout"));
	1776	}
	1777	end = save_start; /* unwire the rest */
	1778	break;
	1779	}
	1780	/*
	1781	* note that even though the entry might have been
	1782	* clipped, the USER_WIRED flag we set prevents
	1783	* duplication so we do not have to do a
	1784	* clip check.
	1785	*/
	1786	entry = entry->next;
	1787	}
	1788
	1789	/*
	1790	* If we failed fall through to the unwiring section to
	1791	* unwire what we had wired so far. 'end' has already
	1792	* been adjusted.
	1793	*/
	1794	if (rv)
	1795	new_pageable = 1;
	1796
	1797	/*
	1798	* start_entry might have been clipped if we unlocked the
	1799	* map and blocked. No matter how clipped it has gotten
	1800	* there should be a fragment that is on our start boundary.
	1801	*/
	1802	CLIP_CHECK_BACK(start_entry, start);
	1803	}
	1804
	1805	/*
	1806	* Deal with the unwiring case.
	1807	*/
	1808	if (new_pageable) {
	1809	/*
	1810	* This is the unwiring case. We must first ensure that the
	1811	* range to be unwired is really wired down. We know there
	1812	* are no holes.
	1813	*/
	1814	entry = start_entry;
	1815	while ((entry != &map->header) && (entry->start < end)) {
	1816	if ((entry->eflags & MAP_ENTRY_USER_WIRED) == 0) {
	1817	rv = KERN_INVALID_ARGUMENT;
	1818	goto done;
	1819	}
	1820	KASSERT(entry->wired_count != 0, ("wired count was 0 with USER_WIRED set! %p", entry));
	1821	entry = entry->next;
	1822	}
	1823
	1824	/*
	1825	* Now decrement the wiring count for each region. If a region
	1826	* becomes completely unwired, unwire its physical pages and
	1827	* mappings.
	1828	*/
	1829	/*
	1830	* The map entries are processed in a loop, checking to
	1831	* make sure the entry is wired and asserting it has a wired
	1832	* count. However, another loop was inserted more-or-less in
	1833	* the middle of the unwiring path. This loop picks up the
	1834	* "entry" loop variable from the first loop without first
	1835	* setting it to start_entry. Naturally, the secound loop
	1836	* is never entered and the pages backing the entries are
	1837	* never unwired. This can lead to a leak of wired pages.
	1838	*/
	1839	entry = start_entry;
	1840	while ((entry != &map->header) && (entry->start < end)) {
	1841	KASSERT(entry->eflags & MAP_ENTRY_USER_WIRED, ("expected USER_WIRED on entry %p", entry));
	1842	entry->eflags &= ~MAP_ENTRY_USER_WIRED;
	1843	entry->wired_count--;
	1844	if (entry->wired_count == 0)
	1845	vm_fault_unwire(map, entry->start, entry->end);
	1846	entry = entry->next;
	1847	}
	1848	}
	1849	done:
	1850	vm_map_unclip_range(map, start_entry, start, real_end, &count,
	1851	MAP_CLIP_NO_HOLES);
	1852	map->timestamp++;
	1853	vm_map_unlock(map);
	1854	vm_map_entry_release(count);
	1855	return (rv);
	1856	}
	1857
	1858	/*
	1859	* vm_map_wire:
	1860	*
	1861	* Sets the pageability of the specified address
	1862	* range in the target map. Regions specified
	1863	* as not pageable require locked-down physical
	1864	* memory and physical page maps.
	1865	*
	1866	* The map must not be locked, but a reference
	1867	* must remain to the map throughout the call.
	1868	*
	1869	* This function may be called via the zalloc path and must properly
	1870	* reserve map entries for kernel_map.
	1871	*/
	1872	int
	1873	vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t real_end, int kmflags)
	1874	{
	1875	vm_map_entry_t entry;
	1876	vm_map_entry_t start_entry;
	1877	vm_offset_t end;
	1878	int rv = KERN_SUCCESS;
	1879	int count;
	1880	int s;
	1881
	1882	if (kmflags & KM_KRESERVE)
	1883	count = vm_map_entry_kreserve(MAP_RESERVE_COUNT);
	1884	else
	1885	count = vm_map_entry_reserve(MAP_RESERVE_COUNT);
	1886	vm_map_lock(map);
	1887	VM_MAP_RANGE_CHECK(map, start, real_end);
	1888	end = real_end;
	1889
	1890	start_entry = vm_map_clip_range(map, start, end, &count, MAP_CLIP_NO_HOLES);
	1891	if (start_entry == NULL) {
	1892	vm_map_unlock(map);
	1893	rv = KERN_INVALID_ADDRESS;
	1894	goto failure;
	1895	}
	1896	if ((kmflags & KM_PAGEABLE) == 0) {
	1897	/*
	1898	* Wiring.
	1899	*
	1900	* 1. Holding the write lock, we create any shadow or zero-fill
	1901	* objects that need to be created. Then we clip each map
	1902	* entry to the region to be wired and increment its wiring
	1903	* count. We create objects before clipping the map entries
	1904	* to avoid object proliferation.
	1905	*
	1906	* 2. We downgrade to a read lock, and call vm_fault_wire to
	1907	* fault in the pages for any newly wired area (wired_count is
	1908	* 1).
	1909	*
	1910	* Downgrading to a read lock for vm_fault_wire avoids a
	1911	* possible deadlock with another process that may have faulted
	1912	* on one of the pages to be wired (it would mark the page busy,
	1913	* blocking us, then in turn block on the map lock that we
	1914	* hold). Because of problems in the recursive lock package,
	1915	* we cannot upgrade to a write lock in vm_map_lookup. Thus,
	1916	* any actions that require the write lock must be done
	1917	* beforehand. Because we keep the read lock on the map, the
	1918	* copy-on-write status of the entries we modify here cannot
	1919	* change.
	1920	*/
	1921
	1922	entry = start_entry;
	1923	while ((entry != &map->header) && (entry->start < end)) {
	1924	/*
	1925	* Trivial case if the entry is already wired
	1926	*/
	1927	if (entry->wired_count) {
	1928	entry->wired_count++;
	1929	entry = entry->next;
	1930	continue;
	1931	}
	1932
	1933	/*
	1934	* The entry is being newly wired, we have to setup
	1935	* appropriate management structures. A shadow
	1936	* object is required for a copy-on-write region,
	1937	* or a normal object for a zero-fill region. We
	1938	* do not have to do this for entries that point to sub
	1939	* maps because we won't hold the lock on the sub map.
	1940	*/
	1941	if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
	1942	int copyflag = entry->eflags & MAP_ENTRY_NEEDS_COPY;
	1943	if (copyflag &&
	1944	((entry->protection & VM_PROT_WRITE) != 0)) {
	1945
	1946	vm_object_shadow(&entry->object.vm_object,
	1947	&entry->offset,
	1948	atop(entry->end - entry->start));
	1949	entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
	1950	} else if (entry->object.vm_object == NULL &&
	1951	!map->system_map) {
	1952	entry->object.vm_object =
	1953	vm_object_allocate(OBJT_DEFAULT,
	1954	atop(entry->end - entry->start));
	1955	entry->offset = (vm_offset_t) 0;
	1956	}
	1957	}
	1958
	1959	entry->wired_count++;
	1960	entry = entry->next;
	1961	}
	1962
	1963	/*
	1964	* Pass 2.
	1965	*/
	1966
	1967	/*
	1968	* HACK HACK HACK HACK
	1969	*
	1970	* Unlock the map to avoid deadlocks. The in-transit flag
	1971	* protects us from most changes but note that
	1972	* clipping may still occur. To prevent clipping from
	1973	* occuring after the unlock, except for when we are
	1974	* blocking in vm_fault_wire, we must run at splvm().
	1975	* Otherwise our accesses to entry->start and entry->end
	1976	* could be corrupted. We have to set splvm() prior to
	1977	* unlocking so start_entry does not change out from
	1978	* under us at the very beginning of the loop.
	1979	*
	1980	* HACK HACK HACK HACK
	1981	*/
	1982
	1983	s = splvm();
	1984	vm_map_unlock(map);
	1985
	1986	entry = start_entry;
	1987	while (entry != &map->header && entry->start < end) {
	1988	/*
	1989	* If vm_fault_wire fails for any page we need to undo
	1990	* what has been done. We decrement the wiring count
	1991	* for those pages which have not yet been wired (now)
	1992	* and unwire those that have (later).
	1993	*/
	1994	vm_offset_t save_start = entry->start;
	1995	vm_offset_t save_end = entry->end;
	1996
	1997	if (entry->wired_count == 1)
	1998	rv = vm_fault_wire(map, entry->start, entry->end);
	1999	if (rv) {
	2000	CLIP_CHECK_BACK(entry, save_start);
	2001	for (;;) {
	2002	KASSERT(entry->wired_count == 1, ("wired_count changed unexpectedly"));
	2003	entry->wired_count = 0;
	2004	if (entry->end == save_end)
	2005	break;
	2006	entry = entry->next;
	2007	KASSERT(entry != &map->header, ("bad entry clip during backout"));
	2008	}
	2009	end = save_start;
	2010	break;
	2011	}
	2012	CLIP_CHECK_FWD(entry, save_end);
	2013	entry = entry->next;
	2014	}
	2015	splx(s);
	2016
	2017	/*
	2018	* relock. start_entry is still IN_TRANSITION and must
	2019	* still exist, but may have been clipped (handled just
	2020	* below).
	2021	*/
	2022	vm_map_lock(map);
	2023
	2024	/*
	2025	* If a failure occured undo everything by falling through
	2026	* to the unwiring code. 'end' has already been adjusted
	2027	* appropriately.
	2028	*/
	2029	if (rv)
	2030	kmflags \|= KM_PAGEABLE;
	2031
	2032	/*
	2033	* start_entry might have been clipped if we unlocked the
	2034	* map and blocked. No matter how clipped it has gotten
	2035	* there should be a fragment that is on our start boundary.
	2036	*/
	2037	CLIP_CHECK_BACK(start_entry, start);
	2038	}
	2039
	2040	if (kmflags & KM_PAGEABLE) {
	2041	/*
	2042	* This is the unwiring case. We must first ensure that the
	2043	* range to be unwired is really wired down. We know there
	2044	* are no holes.
	2045	*/
	2046	entry = start_entry;
	2047	while ((entry != &map->header) && (entry->start < end)) {
	2048	if (entry->wired_count == 0) {
	2049	rv = KERN_INVALID_ARGUMENT;
	2050	goto done;
	2051	}
	2052	entry = entry->next;
	2053	}
	2054
	2055	/*
	2056	* Now decrement the wiring count for each region. If a region
	2057	* becomes completely unwired, unwire its physical pages and
	2058	* mappings.
	2059	*/
	2060	entry = start_entry;
	2061	while ((entry != &map->header) && (entry->start < end)) {
	2062	entry->wired_count--;
	2063	if (entry->wired_count == 0)
	2064	vm_fault_unwire(map, entry->start, entry->end);
	2065	entry = entry->next;
	2066	}
	2067	}
	2068	done:
	2069	vm_map_unclip_range(map, start_entry, start, real_end, &count,
	2070	MAP_CLIP_NO_HOLES);
	2071	map->timestamp++;
	2072	vm_map_unlock(map);
	2073	failure:
	2074	if (kmflags & KM_KRESERVE)
	2075	vm_map_entry_krelease(count);
	2076	else
	2077	vm_map_entry_release(count);
	2078	return (rv);
	2079	}
	2080
	2081	/*
	2082	* vm_map_set_wired_quick()
	2083	*
	2084	* Mark a newly allocated address range as wired but do not fault in
	2085	* the pages. The caller is expected to load the pages into the object.
	2086	*
	2087	* The map must be locked on entry and will remain locked on return.
	2088	*/
	2089	void
	2090	vm_map_set_wired_quick(vm_map_t map, vm_offset_t addr, vm_size_t size, int *countp)
	2091	{
	2092	vm_map_entry_t scan;
	2093	vm_map_entry_t entry;
	2094
	2095	entry = vm_map_clip_range(map, addr, addr + size, countp, MAP_CLIP_NO_HOLES);
	2096	for (scan = entry; scan != &map->header && scan->start < addr + size; scan = scan->next) {
	2097	KKASSERT(entry->wired_count == 0);
	2098	entry->wired_count = 1;
	2099	}
	2100	vm_map_unclip_range(map, entry, addr, addr + size, countp, MAP_CLIP_NO_HOLES);
	2101	}
	2102
	2103	/*
	2104	* vm_map_clean
	2105	*
	2106	* Push any dirty cached pages in the address range to their pager.
	2107	* If syncio is TRUE, dirty pages are written synchronously.
	2108	* If invalidate is TRUE, any cached pages are freed as well.
	2109	*
	2110	* Returns an error if any part of the specified range is not mapped.
	2111	*/
	2112	int
	2113	vm_map_clean(map, start, end, syncio, invalidate)
	2114	vm_map_t map;
	2115	vm_offset_t start;
	2116	vm_offset_t end;
	2117	boolean_t syncio;
	2118	boolean_t invalidate;
	2119	{
	2120	vm_map_entry_t current;
	2121	vm_map_entry_t entry;
	2122	vm_size_t size;
	2123	vm_object_t object;
	2124	vm_ooffset_t offset;
	2125
	2126	vm_map_lock_read(map);
	2127	VM_MAP_RANGE_CHECK(map, start, end);
	2128	if (!vm_map_lookup_entry(map, start, &entry)) {
	2129	vm_map_unlock_read(map);
	2130	return (KERN_INVALID_ADDRESS);
	2131	}
	2132	/*
	2133	* Make a first pass to check for holes.
	2134	*/
	2135	for (current = entry; current->start < end; current = current->next) {
	2136	if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
	2137	vm_map_unlock_read(map);
	2138	return (KERN_INVALID_ARGUMENT);
	2139	}
	2140	if (end > current->end &&
	2141	(current->next == &map->header \|\|
	2142	current->end != current->next->start)) {
	2143	vm_map_unlock_read(map);
	2144	return (KERN_INVALID_ADDRESS);
	2145	}
	2146	}
	2147
	2148	if (invalidate)
	2149	pmap_remove(vm_map_pmap(map), start, end);
	2150	/*
	2151	* Make a second pass, cleaning/uncaching pages from the indicated
	2152	* objects as we go.
	2153	*/
	2154	for (current = entry; current->start < end; current = current->next) {
	2155	offset = current->offset + (start - current->start);
	2156	size = (end <= current->end ? end : current->end) - start;
	2157	if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
	2158	vm_map_t smap;
	2159	vm_map_entry_t tentry;
	2160	vm_size_t tsize;
	2161
	2162	smap = current->object.sub_map;
	2163	vm_map_lock_read(smap);
	2164	(void) vm_map_lookup_entry(smap, offset, &tentry);
	2165	tsize = tentry->end - offset;
	2166	if (tsize < size)
	2167	size = tsize;
	2168	object = tentry->object.vm_object;
	2169	offset = tentry->offset + (offset - tentry->start);
	2170	vm_map_unlock_read(smap);
	2171	} else {
	2172	object = current->object.vm_object;
	2173	}
	2174	/*
	2175	* Note that there is absolutely no sense in writing out
	2176	* anonymous objects, so we track down the vnode object
	2177	* to write out.
	2178	* We invalidate (remove) all pages from the address space
	2179	* anyway, for semantic correctness.
	2180	*
	2181	* note: certain anonymous maps, such as MAP_NOSYNC maps,
	2182	* may start out with a NULL object.
	2183	*/
	2184	while (object && object->backing_object) {
	2185	object = object->backing_object;
	2186	offset += object->backing_object_offset;
	2187	if (object->size < OFF_TO_IDX( offset + size))
	2188	size = IDX_TO_OFF(object->size) - offset;
	2189	}
	2190	if (object && (object->type == OBJT_VNODE) &&
	2191	(current->protection & VM_PROT_WRITE)) {
	2192	/*
	2193	* Flush pages if writing is allowed, invalidate them
	2194	* if invalidation requested. Pages undergoing I/O
	2195	* will be ignored by vm_object_page_remove().
	2196	*
	2197	* We cannot lock the vnode and then wait for paging
	2198	* to complete without deadlocking against vm_fault.
	2199	* Instead we simply call vm_object_page_remove() and
	2200	* allow it to block internally on a page-by-page
	2201	* basis when it encounters pages undergoing async
	2202	* I/O.
	2203	*/
	2204	int flags;
	2205
	2206	vm_object_reference(object);
	2207	vn_lock(object->handle, NULL,
	2208	LK_EXCLUSIVE \| LK_RETRY, curthread);
	2209	flags = (syncio \|\| invalidate) ? OBJPC_SYNC : 0;
	2210	flags \|= invalidate ? OBJPC_INVAL : 0;
	2211	vm_object_page_clean(object,
	2212	OFF_TO_IDX(offset),
	2213	OFF_TO_IDX(offset + size + PAGE_MASK),
	2214	flags);
	2215	VOP_UNLOCK(object->handle, NULL, 0, curthread);
	2216	vm_object_deallocate(object);
	2217	}
	2218	if (object && invalidate &&
	2219	((object->type == OBJT_VNODE) \|\|
	2220	(object->type == OBJT_DEVICE))) {
	2221	vm_object_reference(object);
	2222	vm_object_page_remove(object,
	2223	OFF_TO_IDX(offset),
	2224	OFF_TO_IDX(offset + size + PAGE_MASK),
	2225	FALSE);
	2226	vm_object_deallocate(object);
	2227	}
	2228	start += size;
	2229	}
	2230
	2231	vm_map_unlock_read(map);
	2232	return (KERN_SUCCESS);
	2233	}
	2234
	2235	/*
	2236	* vm_map_entry_unwire: [ internal use only ]
	2237	*
	2238	* Make the region specified by this entry pageable.
	2239	*
	2240	* The map in question should be locked.
	2241	* [This is the reason for this routine's existence.]
	2242	*/
	2243	static void
	2244	vm_map_entry_unwire(vm_map_t map, vm_map_entry_t entry)
	2245	{
	2246	vm_fault_unwire(map, entry->start, entry->end);
	2247	entry->wired_count = 0;
	2248	}
	2249
	2250	/*
	2251	* vm_map_entry_delete: [ internal use only ]
	2252	*
	2253	* Deallocate the given entry from the target map.
	2254	*/
	2255	static void
	2256	vm_map_entry_delete(vm_map_t map, vm_map_entry_t entry, int *countp)
	2257	{
	2258	vm_map_entry_unlink(map, entry);
	2259	map->size -= entry->end - entry->start;
	2260
	2261	if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
	2262	vm_object_deallocate(entry->object.vm_object);
	2263	}
	2264
	2265	vm_map_entry_dispose(map, entry, countp);
	2266	}
	2267
	2268	/*
	2269	* vm_map_delete: [ internal use only ]
	2270	*
	2271	* Deallocates the given address range from the target
	2272	* map.
	2273	*/
	2274	int
	2275	vm_map_delete(vm_map_t map, vm_offset_t start, vm_offset_t end, int *countp)
	2276	{
	2277	vm_object_t object;
	2278	vm_map_entry_t entry;
	2279	vm_map_entry_t first_entry;
	2280
	2281	/*
	2282	* Find the start of the region, and clip it
	2283	*/
	2284
	2285	again:
	2286	if (!vm_map_lookup_entry(map, start, &first_entry))
	2287	entry = first_entry->next;
	2288	else {
	2289	entry = first_entry;
	2290	vm_map_clip_start(map, entry, start, countp);
	2291	/*
	2292	* Fix the lookup hint now, rather than each time though the
	2293	* loop.
	2294	*/
	2295	SAVE_HINT(map, entry->prev);
	2296	}
	2297
	2298	/*
	2299	* Save the free space hint
	2300	*/
	2301
	2302	if (entry == &map->header) {
	2303	map->first_free = &map->header;
	2304	} else if (map->first_free->start >= start) {
	2305	map->first_free = entry->prev;
	2306	}
	2307
	2308	/*
	2309	* Step through all entries in this region
	2310	*/
	2311
	2312	while ((entry != &map->header) && (entry->start < end)) {
	2313	vm_map_entry_t next;
	2314	vm_offset_t s, e;
	2315	vm_pindex_t offidxstart, offidxend, count;
	2316
	2317	/*
	2318	* If we hit an in-transition entry we have to sleep and
	2319	* retry. It's easier (and not really slower) to just retry
	2320	* since this case occurs so rarely and the hint is already
	2321	* pointing at the right place. We have to reset the
	2322	* start offset so as not to accidently delete an entry
	2323	* another process just created in vacated space.
	2324	*/
	2325	if (entry->eflags & MAP_ENTRY_IN_TRANSITION) {
	2326	entry->eflags \|= MAP_ENTRY_NEEDS_WAKEUP;
	2327	start = entry->start;
	2328	++mycpu->gd_cnt.v_intrans_coll;
	2329	++mycpu->gd_cnt.v_intrans_wait;
	2330	vm_map_transition_wait(map);
	2331	goto again;
	2332	}
	2333	vm_map_clip_end(map, entry, end, countp);
	2334
	2335	s = entry->start;
	2336	e = entry->end;
	2337	next = entry->next;
	2338
	2339	offidxstart = OFF_TO_IDX(entry->offset);
	2340	count = OFF_TO_IDX(e - s);
	2341	object = entry->object.vm_object;
	2342
	2343	/*
	2344	* Unwire before removing addresses from the pmap; otherwise,
	2345	* unwiring will put the entries back in the pmap.
	2346	*/
	2347	if (entry->wired_count != 0) {
	2348	vm_map_entry_unwire(map, entry);
	2349	}
	2350
	2351	offidxend = offidxstart + count;
	2352
	2353	if ((object == kernel_object) \|\| (object == kmem_object)) {
	2354	vm_object_page_remove(object, offidxstart, offidxend, FALSE);
	2355	} else {
	2356	pmap_remove(map->pmap, s, e);
	2357	if (object != NULL &&
	2358	object->ref_count != 1 &&
	2359	(object->flags & (OBJ_NOSPLIT\|OBJ_ONEMAPPING)) == OBJ_ONEMAPPING &&
	2360	(object->type == OBJT_DEFAULT \|\| object->type == OBJT_SWAP)) {
	2361	vm_object_collapse(object);
	2362	vm_object_page_remove(object, offidxstart, offidxend, FALSE);
	2363	if (object->type == OBJT_SWAP) {
	2364	swap_pager_freespace(object, offidxstart, count);
	2365	}
	2366	if (offidxend >= object->size &&
	2367	offidxstart < object->size) {
	2368	object->size = offidxstart;
	2369	}
	2370	}
	2371	}
	2372
	2373	/*
	2374	* Delete the entry (which may delete the object) only after
	2375	* removing all pmap entries pointing to its pages.
	2376	* (Otherwise, its page frames may be reallocated, and any
	2377	* modify bits will be set in the wrong object!)
	2378	*/
	2379	vm_map_entry_delete(map, entry, countp);
	2380	entry = next;
	2381	}
	2382	return (KERN_SUCCESS);
	2383	}
	2384
	2385	/*
	2386	* vm_map_remove:
	2387	*
	2388	* Remove the given address range from the target map.
	2389	* This is the exported form of vm_map_delete.
	2390	*/
	2391	int
	2392	vm_map_remove(vm_map_t map, vm_offset_t start, vm_offset_t end)
	2393	{
	2394	int result;
	2395	int count;
	2396
	2397	count = vm_map_entry_reserve(MAP_RESERVE_COUNT);
	2398	vm_map_lock(map);
	2399	VM_MAP_RANGE_CHECK(map, start, end);
	2400	result = vm_map_delete(map, start, end, &count);
	2401	vm_map_unlock(map);
	2402	vm_map_entry_release(count);
	2403
	2404	return (result);
	2405	}
	2406
	2407	/*
	2408	* vm_map_check_protection:
	2409	*
	2410	* Assert that the target map allows the specified
	2411	* privilege on the entire address region given.
	2412	* The entire region must be allocated.
	2413	*/
	2414	boolean_t
	2415	vm_map_check_protection(vm_map_t map, vm_offset_t start, vm_offset_t end,
	2416	vm_prot_t protection)
	2417	{
	2418	vm_map_entry_t entry;
	2419	vm_map_entry_t tmp_entry;
	2420
	2421	if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
	2422	return (FALSE);
	2423	}
	2424	entry = tmp_entry;
	2425
	2426	while (start < end) {
	2427	if (entry == &map->header) {
	2428	return (FALSE);
	2429	}
	2430	/*
	2431	* No holes allowed!
	2432	*/
	2433
	2434	if (start < entry->start) {
	2435	return (FALSE);
	2436	}
	2437	/*
	2438	* Check protection associated with entry.
	2439	*/
	2440
	2441	if ((entry->protection & protection) != protection) {
	2442	return (FALSE);
	2443	}
	2444	/* go to next entry */
	2445
	2446	start = entry->end;
	2447	entry = entry->next;
	2448	}
	2449	return (TRUE);
	2450	}
	2451
	2452	/*
	2453	* Split the pages in a map entry into a new object. This affords
	2454	* easier removal of unused pages, and keeps object inheritance from
	2455	* being a negative impact on memory usage.
	2456	*/
	2457	static void
	2458	vm_map_split(vm_map_entry_t entry)
	2459	{
	2460	vm_page_t m;
	2461	vm_object_t orig_object, new_object, source;
	2462	vm_offset_t s, e;
	2463	vm_pindex_t offidxstart, offidxend, idx;
	2464	vm_size_t size;
	2465	vm_ooffset_t offset;
	2466
	2467	orig_object = entry->object.vm_object;
	2468	if (orig_object->type != OBJT_DEFAULT && orig_object->type != OBJT_SWAP)
	2469	return;
	2470	if (orig_object->ref_count <= 1)
	2471	return;
	2472
	2473	offset = entry->offset;
	2474	s = entry->start;
	2475	e = entry->end;
	2476
	2477	offidxstart = OFF_TO_IDX(offset);
	2478	offidxend = offidxstart + OFF_TO_IDX(e - s);
	2479	size = offidxend - offidxstart;
	2480
	2481	new_object = vm_pager_allocate(orig_object->type,
	2482	NULL, IDX_TO_OFF(size), VM_PROT_ALL, 0LL);
	2483	if (new_object == NULL)
	2484	return;
	2485
	2486	source = orig_object->backing_object;
	2487	if (source != NULL) {
	2488	vm_object_reference(source); /* Referenced by new_object */
	2489	LIST_INSERT_HEAD(&source->shadow_head,
	2490	new_object, shadow_list);
	2491	vm_object_clear_flag(source, OBJ_ONEMAPPING);
	2492	new_object->backing_object_offset =
	2493	orig_object->backing_object_offset + IDX_TO_OFF(offidxstart);
	2494	new_object->backing_object = source;
	2495	source->shadow_count++;
	2496	source->generation++;
	2497	}
	2498
	2499	for (idx = 0; idx < size; idx++) {
	2500	vm_page_t m;
	2501
	2502	retry:
	2503	m = vm_page_lookup(orig_object, offidxstart + idx);
	2504	if (m == NULL)
	2505	continue;
	2506
	2507	/*
	2508	* We must wait for pending I/O to complete before we can
	2509	* rename the page.
	2510	*
	2511	* We do not have to VM_PROT_NONE the page as mappings should
	2512	* not be changed by this operation.
	2513	*/
	2514	if (vm_page_sleep_busy(m, TRUE, "spltwt"))
	2515	goto retry;
	2516
	2517	vm_page_busy(m);
	2518	vm_page_rename(m, new_object, idx);
	2519	/* page automatically made dirty by rename and cache handled */
	2520	vm_page_busy(m);
	2521	}
	2522
	2523	if (orig_object->type == OBJT_SWAP) {
	2524	vm_object_pip_add(orig_object, 1);
	2525	/*
	2526	* copy orig_object pages into new_object
	2527	* and destroy unneeded pages in
	2528	* shadow object.
	2529	*/
	2530	swap_pager_copy(orig_object, new_object, offidxstart, 0);
	2531	vm_object_pip_wakeup(orig_object);
	2532	}
	2533
	2534	for (idx = 0; idx < size; idx++) {
	2535	m = vm_page_lookup(new_object, idx);
	2536	if (m) {
	2537	vm_page_wakeup(m);
	2538	}
	2539	}
	2540
	2541	entry->object.vm_object = new_object;
	2542	entry->offset = 0LL;
	2543	vm_object_deallocate(orig_object);
	2544	}
	2545
	2546	/*
	2547	* vm_map_copy_entry:
	2548	*
	2549	* Copies the contents of the source entry to the destination
	2550	* entry. The entries must be aligned properly.
	2551	*/
	2552	static void
	2553	vm_map_copy_entry(vm_map_t src_map, vm_map_t dst_map,
	2554	vm_map_entry_t src_entry, vm_map_entry_t dst_entry)
	2555	{
	2556	vm_object_t src_object;
	2557
	2558	if ((dst_entry->eflags\|src_entry->eflags) & MAP_ENTRY_IS_SUB_MAP)
	2559	return;
	2560
	2561	if (src_entry->wired_count == 0) {
	2562
	2563	/*
	2564	* If the source entry is marked needs_copy, it is already
	2565	* write-protected.
	2566	*/
	2567	if ((src_entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) {
	2568	pmap_protect(src_map->pmap,
	2569	src_entry->start,
	2570	src_entry->end,
	2571	src_entry->protection & ~VM_PROT_WRITE);
	2572	}
	2573
	2574	/*
	2575	* Make a copy of the object.
	2576	*/
	2577	if ((src_object = src_entry->object.vm_object) != NULL) {
	2578
	2579	if ((src_object->handle == NULL) &&
	2580	(src_object->type == OBJT_DEFAULT \|\|
	2581	src_object->type == OBJT_SWAP)) {
	2582	vm_object_collapse(src_object);
	2583	if ((src_object->flags & (OBJ_NOSPLIT\|OBJ_ONEMAPPING)) == OBJ_ONEMAPPING) {
	2584	vm_map_split(src_entry);
	2585	src_object = src_entry->object.vm_object;
	2586	}
	2587	}
	2588
	2589	vm_object_reference(src_object);
	2590	vm_object_clear_flag(src_object, OBJ_ONEMAPPING);
	2591	dst_entry->object.vm_object = src_object;
	2592	src_entry->eflags \|= (MAP_ENTRY_COW\|MAP_ENTRY_NEEDS_COPY);
	2593	dst_entry->eflags \|= (MAP_ENTRY_COW\|MAP_ENTRY_NEEDS_COPY);
	2594	dst_entry->offset = src_entry->offset;
	2595	} else {
	2596	dst_entry->object.vm_object = NULL;
	2597	dst_entry->offset = 0;
	2598	}
	2599
	2600	pmap_copy(dst_map->pmap, src_map->pmap, dst_entry->start,
	2601	dst_entry->end - dst_entry->start, src_entry->start);
	2602	} else {
	2603	/*
	2604	* Of course, wired down pages can't be set copy-on-write.
	2605	* Cause wired pages to be copied into the new map by
	2606	* simulating faults (the new pages are pageable)
	2607	*/
	2608	vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry);
	2609	}
	2610	}
	2611
	2612	/*
	2613	* vmspace_fork:
	2614	* Create a new process vmspace structure and vm_map
	2615	* based on those of an existing process. The new map
	2616	* is based on the old map, according to the inheritance
	2617	* values on the regions in that map.
	2618	*
	2619	* The source map must not be locked.
	2620	*/
	2621	struct vmspace *
	2622	vmspace_fork(struct vmspace *vm1)
	2623	{
	2624	struct vmspace *vm2;
	2625	vm_map_t old_map = &vm1->vm_map;
	2626	vm_map_t new_map;
	2627	vm_map_entry_t old_entry;
	2628	vm_map_entry_t new_entry;
	2629	vm_object_t object;
	2630	int count;
	2631
	2632	vm_map_lock(old_map);
	2633	old_map->infork = 1;
	2634
	2635	vm2 = vmspace_alloc(old_map->min_offset, old_map->max_offset);
	2636	bcopy(&vm1->vm_startcopy, &vm2->vm_startcopy,
	2637	(caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy);
	2638	new_map = &vm2->vm_map; /* XXX */
	2639	new_map->timestamp = 1;
	2640
	2641	count = 0;
	2642	old_entry = old_map->header.next;
	2643	while (old_entry != &old_map->header) {
	2644	++count;
	2645	old_entry = old_entry->next;
	2646	}
	2647
	2648	count = vm_map_entry_reserve(count + MAP_RESERVE_COUNT);
	2649
	2650	old_entry = old_map->header.next;
	2651	while (old_entry != &old_map->header) {
	2652	if (old_entry->eflags & MAP_ENTRY_IS_SUB_MAP)
	2653	panic("vm_map_fork: encountered a submap");
	2654
	2655	switch (old_entry->inheritance) {
	2656	case VM_INHERIT_NONE:
	2657	break;
	2658
	2659	case VM_INHERIT_SHARE:
	2660	/*
	2661	* Clone the entry, creating the shared object if necessary.
	2662	*/
	2663	object = old_entry->object.vm_object;
	2664	if (object == NULL) {
	2665	object = vm_object_allocate(OBJT_DEFAULT,
	2666	atop(old_entry->end - old_entry->start));
	2667	old_entry->object.vm_object = object;
	2668	old_entry->offset = (vm_offset_t) 0;
	2669	}
	2670
	2671	/*
	2672	* Add the reference before calling vm_object_shadow
	2673	* to insure that a shadow object is created.
	2674	*/
	2675	vm_object_reference(object);
	2676	if (old_entry->eflags & MAP_ENTRY_NEEDS_COPY) {
	2677	vm_object_shadow(&old_entry->object.vm_object,
	2678	&old_entry->offset,
	2679	atop(old_entry->end - old_entry->start));
	2680	old_entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
	2681	/* Transfer the second reference too. */
	2682	vm_object_reference(
	2683	old_entry->object.vm_object);
	2684	vm_object_deallocate(object);
	2685	object = old_entry->object.vm_object;
	2686	}
	2687	vm_object_clear_flag(object, OBJ_ONEMAPPING);
	2688
	2689	/*
	2690	* Clone the entry, referencing the shared object.
	2691	*/
	2692	new_entry = vm_map_entry_create(new_map, &count);
	2693	new_entry = old_entry;
	2694	new_entry->eflags &= ~MAP_ENTRY_USER_WIRED;
	2695	new_entry->wired_count = 0;
	2696
	2697	/*
	2698	* Insert the entry into the new map -- we know we're
	2699	* inserting at the end of the new map.
	2700	*/
	2701
	2702	vm_map_entry_link(new_map, new_map->header.prev,
	2703	new_entry);
	2704
	2705	/*
	2706	* Update the physical map
	2707	*/
	2708
	2709	pmap_copy(new_map->pmap, old_map->pmap,
	2710	new_entry->start,
	2711	(old_entry->end - old_entry->start),
	2712	old_entry->start);
	2713	break;
	2714
	2715	case VM_INHERIT_COPY:
	2716	/*
	2717	* Clone the entry and link into the map.
	2718	*/
	2719	new_entry = vm_map_entry_create(new_map, &count);
	2720	new_entry = old_entry;
	2721	new_entry->eflags &= ~MAP_ENTRY_USER_WIRED;
	2722	new_entry->wired_count = 0;
	2723	new_entry->object.vm_object = NULL;
	2724	vm_map_entry_link(new_map, new_map->header.prev,
	2725	new_entry);
	2726	vm_map_copy_entry(old_map, new_map, old_entry,
	2727	new_entry);
	2728	break;
	2729	}
	2730	old_entry = old_entry->next;
	2731	}
	2732
	2733	new_map->size = old_map->size;
	2734	old_map->infork = 0;
	2735	vm_map_unlock(old_map);
	2736	vm_map_entry_release(count);
	2737
	2738	return (vm2);
	2739	}
	2740
	2741	int
	2742	vm_map_stack (vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize,
	2743	vm_prot_t prot, vm_prot_t max, int cow)
	2744	{
	2745	vm_map_entry_t prev_entry;
	2746	vm_map_entry_t new_stack_entry;
	2747	vm_size_t init_ssize;
	2748	int rv;
	2749	int count;
	2750
	2751	if (VM_MIN_ADDRESS > 0 && addrbos < VM_MIN_ADDRESS)
	2752	return (KERN_NO_SPACE);
	2753
	2754	if (max_ssize < sgrowsiz)
	2755	init_ssize = max_ssize;
	2756	else
	2757	init_ssize = sgrowsiz;
	2758
	2759	count = vm_map_entry_reserve(MAP_RESERVE_COUNT);
	2760	vm_map_lock(map);
	2761
	2762	/* If addr is already mapped, no go */
	2763	if (vm_map_lookup_entry(map, addrbos, &prev_entry)) {
	2764	vm_map_unlock(map);
	2765	vm_map_entry_release(count);
	2766	return (KERN_NO_SPACE);
	2767	}
	2768
	2769	/* If we would blow our VMEM resource limit, no go */
	2770	if (map->size + init_ssize >
	2771	curproc->p_rlimit[RLIMIT_VMEM].rlim_cur) {
	2772	vm_map_unlock(map);
	2773	vm_map_entry_release(count);
	2774	return (KERN_NO_SPACE);
	2775	}
	2776
	2777	/* If we can't accomodate max_ssize in the current mapping,
	2778	* no go. However, we need to be aware that subsequent user
	2779	* mappings might map into the space we have reserved for
	2780	* stack, and currently this space is not protected.
	2781	*
	2782	* Hopefully we will at least detect this condition
	2783	* when we try to grow the stack.
	2784	*/
	2785	if ((prev_entry->next != &map->header) &&
	2786	(prev_entry->next->start < addrbos + max_ssize)) {
	2787	vm_map_unlock(map);
	2788	vm_map_entry_release(count);
	2789	return (KERN_NO_SPACE);
	2790	}
	2791
	2792	/* We initially map a stack of only init_ssize. We will
	2793	* grow as needed later. Since this is to be a grow
	2794	* down stack, we map at the top of the range.
	2795	*
	2796	* Note: we would normally expect prot and max to be
	2797	* VM_PROT_ALL, and cow to be 0. Possibly we should
	2798	* eliminate these as input parameters, and just
	2799	* pass these values here in the insert call.
	2800	*/
	2801	rv = vm_map_insert(map, &count,
	2802	NULL, 0, addrbos + max_ssize - init_ssize,
	2803	addrbos + max_ssize, prot, max, cow);
	2804
	2805	/* Now set the avail_ssize amount */
	2806	if (rv == KERN_SUCCESS){
	2807	if (prev_entry != &map->header)
	2808	vm_map_clip_end(map, prev_entry, addrbos + max_ssize - init_ssize, &count);
	2809	new_stack_entry = prev_entry->next;
	2810	if (new_stack_entry->end != addrbos + max_ssize \|\|
	2811	new_stack_entry->start != addrbos + max_ssize - init_ssize)
	2812	panic ("Bad entry start/end for new stack entry");
	2813	else
	2814	new_stack_entry->avail_ssize = max_ssize - init_ssize;
	2815	}
	2816
	2817	vm_map_unlock(map);
	2818	vm_map_entry_release(count);
	2819	return (rv);
	2820	}
	2821
	2822	/* Attempts to grow a vm stack entry. Returns KERN_SUCCESS if the
	2823	* desired address is already mapped, or if we successfully grow
	2824	* the stack. Also returns KERN_SUCCESS if addr is outside the
	2825	* stack range (this is strange, but preserves compatibility with
	2826	* the grow function in vm_machdep.c).
	2827	*/
	2828	int
	2829	vm_map_growstack (struct proc *p, vm_offset_t addr)
	2830	{
	2831	vm_map_entry_t prev_entry;
	2832	vm_map_entry_t stack_entry;
	2833	vm_map_entry_t new_stack_entry;
	2834	struct vmspace *vm = p->p_vmspace;
	2835	vm_map_t map = &vm->vm_map;
	2836	vm_offset_t end;
	2837	int grow_amount;
	2838	int rv = KERN_SUCCESS;
	2839	int is_procstack;
	2840	int use_read_lock = 1;
	2841	int count;
	2842
	2843	count = vm_map_entry_reserve(MAP_RESERVE_COUNT);
	2844	Retry:
	2845	if (use_read_lock)
	2846	vm_map_lock_read(map);
	2847	else
	2848	vm_map_lock(map);
	2849
	2850	/* If addr is already in the entry range, no need to grow.*/
	2851	if (vm_map_lookup_entry(map, addr, &prev_entry))
	2852	goto done;
	2853
	2854	if ((stack_entry = prev_entry->next) == &map->header)
	2855	goto done;
	2856	if (prev_entry == &map->header)
	2857	end = stack_entry->start - stack_entry->avail_ssize;
	2858	else
	2859	end = prev_entry->end;
	2860
	2861	/* This next test mimics the old grow function in vm_machdep.c.
	2862	* It really doesn't quite make sense, but we do it anyway
	2863	* for compatibility.
	2864	*
	2865	* If not growable stack, return success. This signals the
	2866	* caller to proceed as he would normally with normal vm.
	2867	*/
	2868	if (stack_entry->avail_ssize < 1 \|\|
	2869	addr >= stack_entry->start \|\|
	2870	addr < stack_entry->start - stack_entry->avail_ssize) {
	2871	goto done;
	2872	}
	2873
	2874	/* Find the minimum grow amount */
	2875	grow_amount = roundup (stack_entry->start - addr, PAGE_SIZE);
	2876	if (grow_amount > stack_entry->avail_ssize) {
	2877	rv = KERN_NO_SPACE;
	2878	goto done;
	2879	}
	2880
	2881	/* If there is no longer enough space between the entries
	2882	* nogo, and adjust the available space. Note: this
	2883	* should only happen if the user has mapped into the
	2884	* stack area after the stack was created, and is
	2885	* probably an error.
	2886	*
	2887	* This also effectively destroys any guard page the user
	2888	* might have intended by limiting the stack size.
	2889	*/
	2890	if (grow_amount > stack_entry->start - end) {
	2891	if (use_read_lock && vm_map_lock_upgrade(map)) {
	2892	use_read_lock = 0;
	2893	goto Retry;
	2894	}
	2895	use_read_lock = 0;
	2896	stack_entry->avail_ssize = stack_entry->start - end;
	2897	rv = KERN_NO_SPACE;
	2898	goto done;
	2899	}
	2900
	2901	is_procstack = addr >= (vm_offset_t)vm->vm_maxsaddr;
	2902
	2903	/* If this is the main process stack, see if we're over the
	2904	* stack limit.
	2905	*/
	2906	if (is_procstack && (ctob(vm->vm_ssize) + grow_amount >
	2907	p->p_rlimit[RLIMIT_STACK].rlim_cur)) {
	2908	rv = KERN_NO_SPACE;
	2909	goto done;
	2910	}
	2911
	2912	/* Round up the grow amount modulo SGROWSIZ */
	2913	grow_amount = roundup (grow_amount, sgrowsiz);
	2914	if (grow_amount > stack_entry->avail_ssize) {
	2915	grow_amount = stack_entry->avail_ssize;
	2916	}
	2917	if (is_procstack && (ctob(vm->vm_ssize) + grow_amount >
	2918	p->p_rlimit[RLIMIT_STACK].rlim_cur)) {
	2919	grow_amount = p->p_rlimit[RLIMIT_STACK].rlim_cur -
	2920	ctob(vm->vm_ssize);
	2921	}
	2922
	2923	/* If we would blow our VMEM resource limit, no go */
	2924	if (map->size + grow_amount > p->p_rlimit[RLIMIT_VMEM].rlim_cur) {
	2925	rv = KERN_NO_SPACE;
	2926	goto done;
	2927	}
	2928
	2929	if (use_read_lock && vm_map_lock_upgrade(map)) {
	2930	use_read_lock = 0;
	2931	goto Retry;
	2932	}
	2933	use_read_lock = 0;
	2934
	2935	/* Get the preliminary new entry start value */
	2936	addr = stack_entry->start - grow_amount;
	2937
	2938	/* If this puts us into the previous entry, cut back our growth
	2939	* to the available space. Also, see the note above.
	2940	*/
	2941	if (addr < end) {
	2942	stack_entry->avail_ssize = stack_entry->start - end;
	2943	addr = end;
	2944	}
	2945
	2946	rv = vm_map_insert(map, &count,
	2947	NULL, 0, addr, stack_entry->start,
	2948	VM_PROT_ALL,
	2949	VM_PROT_ALL,
	2950	0);
	2951
	2952	/* Adjust the available stack space by the amount we grew. */
	2953	if (rv == KERN_SUCCESS) {
	2954	if (prev_entry != &map->header)
	2955	vm_map_clip_end(map, prev_entry, addr, &count);
	2956	new_stack_entry = prev_entry->next;
	2957	if (new_stack_entry->end != stack_entry->start \|\|
	2958	new_stack_entry->start != addr)
	2959	panic ("Bad stack grow start/end in new stack entry");
	2960	else {
	2961	new_stack_entry->avail_ssize = stack_entry->avail_ssize -
	2962	(new_stack_entry->end -
	2963	new_stack_entry->start);
	2964	if (is_procstack)
	2965	vm->vm_ssize += btoc(new_stack_entry->end -
	2966	new_stack_entry->start);
	2967	}
	2968	}
	2969
	2970	done:
	2971	if (use_read_lock)
	2972	vm_map_unlock_read(map);
	2973	else
	2974	vm_map_unlock(map);
	2975	vm_map_entry_release(count);
	2976	return (rv);
	2977	}
	2978
	2979	/*
	2980	* Unshare the specified VM space for exec. If other processes are
	2981	* mapped to it, then create a new one. The new vmspace is null.
	2982	*/
	2983
	2984	void
	2985	vmspace_exec(struct proc p, struct vmspace vmcopy)
	2986	{
	2987	struct vmspace *oldvmspace = p->p_vmspace;
	2988	struct vmspace *newvmspace;
	2989	vm_map_t map = &p->p_vmspace->vm_map;
	2990
	2991	/*
	2992	* If we are execing a resident vmspace we fork it, otherwise
	2993	* we create a new vmspace.
	2994	*/
	2995	if (vmcopy) {
	2996	newvmspace = vmspace_fork(vmcopy);
	2997	} else {
	2998	newvmspace = vmspace_alloc(map->min_offset, map->max_offset);
	2999	bcopy(&oldvmspace->vm_startcopy, &newvmspace->vm_startcopy,
	3000	(caddr_t)(newvmspace+1) - (caddr_t) &newvmspace->vm_startcopy);
	3001	}
	3002
	3003	/*
	3004	* This code is written like this for prototype purposes. The
	3005	* goal is to avoid running down the vmspace here, but let the
	3006	* other process's that are still using the vmspace to finally
	3007	* run it down. Even though there is little or no chance of blocking
	3008	* here, it is a good idea to keep this form for future mods.
	3009	*/
	3010	p->p_vmspace = newvmspace;
	3011	pmap_pinit2(vmspace_pmap(newvmspace));
	3012	if (p == curproc)
	3013	pmap_activate(p);
	3014	vmspace_free(oldvmspace);
	3015	}
	3016
	3017	/*
	3018	* Unshare the specified VM space for forcing COW. This
	3019	* is called by rfork, for the (RFMEM\|RFPROC) == 0 case.
	3020	*/
	3021
	3022	void
	3023	vmspace_unshare(struct proc *p)
	3024	{
	3025	struct vmspace *oldvmspace = p->p_vmspace;
	3026	struct vmspace *newvmspace;
	3027
	3028	if (oldvmspace->vm_refcnt == 1)
	3029	return;
	3030	newvmspace = vmspace_fork(oldvmspace);
	3031	p->p_vmspace = newvmspace;
	3032	pmap_pinit2(vmspace_pmap(newvmspace));
	3033	if (p == curproc)
	3034	pmap_activate(p);
	3035	vmspace_free(oldvmspace);
	3036	}
	3037
	3038	/*
	3039	* vm_map_lookup:
	3040	*
	3041	* Finds the VM object, offset, and
	3042	* protection for a given virtual address in the
	3043	* specified map, assuming a page fault of the
	3044	* type specified.
	3045	*
	3046	* Leaves the map in question locked for read; return
	3047	* values are guaranteed until a vm_map_lookup_done
	3048	* call is performed. Note that the map argument
	3049	* is in/out; the returned map must be used in
	3050	* the call to vm_map_lookup_done.
	3051	*
	3052	* A handle (out_entry) is returned for use in
	3053	* vm_map_lookup_done, to make that fast.
	3054	*
	3055	* If a lookup is requested with "write protection"
	3056	* specified, the map may be changed to perform virtual
	3057	* copying operations, although the data referenced will
	3058	* remain the same.
	3059	*/
	3060	int
	3061	vm_map_lookup(vm_map_t var_map, / IN/OUT */
	3062	vm_offset_t vaddr,
	3063	vm_prot_t fault_typea,
	3064	vm_map_entry_t out_entry, / OUT */
	3065	vm_object_t object, / OUT */
	3066	vm_pindex_t pindex, / OUT */
	3067	vm_prot_t out_prot, / OUT */
	3068	boolean_t wired) / OUT */
	3069	{
	3070	vm_map_entry_t entry;
	3071	vm_map_t map = *var_map;
	3072	vm_prot_t prot;
	3073	vm_prot_t fault_type = fault_typea;
	3074	int use_read_lock = 1;
	3075	int rv = KERN_SUCCESS;
	3076
	3077	RetryLookup:
	3078	if (use_read_lock)
	3079	vm_map_lock_read(map);
	3080	else
	3081	vm_map_lock(map);
	3082
	3083	/*
	3084	* If the map has an interesting hint, try it before calling full
	3085	* blown lookup routine.
	3086	*/
	3087	entry = map->hint;
	3088	*out_entry = entry;
	3089
	3090	if ((entry == &map->header) \|\|
	3091	(vaddr < entry->start) \|\| (vaddr >= entry->end)) {
	3092	vm_map_entry_t tmp_entry;
	3093
	3094	/*
	3095	* Entry was either not a valid hint, or the vaddr was not
	3096	* contained in the entry, so do a full lookup.
	3097	*/
	3098	if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
	3099	rv = KERN_INVALID_ADDRESS;
	3100	goto done;
	3101	}
	3102
	3103	entry = tmp_entry;
	3104	*out_entry = entry;
	3105	}
	3106
	3107	/*
	3108	* Handle submaps.
	3109	*/
	3110
	3111	if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
	3112	vm_map_t old_map = map;
	3113
	3114	*var_map = map = entry->object.sub_map;
	3115	if (use_read_lock)
	3116	vm_map_unlock_read(old_map);
	3117	else
	3118	vm_map_unlock(old_map);
	3119	use_read_lock = 1;
	3120	goto RetryLookup;
	3121	}
	3122
	3123	/*
	3124	* Check whether this task is allowed to have this page.
	3125	* Note the special case for MAP_ENTRY_COW
	3126	* pages with an override. This is to implement a forced
	3127	* COW for debuggers.
	3128	*/
	3129
	3130	if (fault_type & VM_PROT_OVERRIDE_WRITE)
	3131	prot = entry->max_protection;
	3132	else
	3133	prot = entry->protection;
	3134
	3135	fault_type &= (VM_PROT_READ\|VM_PROT_WRITE\|VM_PROT_EXECUTE);
	3136	if ((fault_type & prot) != fault_type) {
	3137	rv = KERN_PROTECTION_FAILURE;
	3138	goto done;
	3139	}
	3140
	3141	if ((entry->eflags & MAP_ENTRY_USER_WIRED) &&
	3142	(entry->eflags & MAP_ENTRY_COW) &&
	3143	(fault_type & VM_PROT_WRITE) &&
	3144	(fault_typea & VM_PROT_OVERRIDE_WRITE) == 0) {
	3145	rv = KERN_PROTECTION_FAILURE;
	3146	goto done;
	3147	}
	3148
	3149	/*
	3150	* If this page is not pageable, we have to get it for all possible
	3151	* accesses.
	3152	*/
	3153
	3154	*wired = (entry->wired_count != 0);
	3155	if (*wired)
	3156	prot = fault_type = entry->protection;
	3157
	3158	/*
	3159	* If the entry was copy-on-write, we either ...
	3160	*/
	3161
	3162	if (entry->eflags & MAP_ENTRY_NEEDS_COPY) {
	3163	/*
	3164	* If we want to write the page, we may as well handle that
	3165	* now since we've got the map locked.
	3166	*
	3167	* If we don't need to write the page, we just demote the
	3168	* permissions allowed.
	3169	*/
	3170
	3171	if (fault_type & VM_PROT_WRITE) {
	3172	/*
	3173	* Make a new object, and place it in the object
	3174	* chain. Note that no new references have appeared
	3175	* -- one just moved from the map to the new
	3176	* object.
	3177	*/
	3178
	3179	if (use_read_lock && vm_map_lock_upgrade(map)) {
	3180	use_read_lock = 0;
	3181	goto RetryLookup;
	3182	}
	3183	use_read_lock = 0;
	3184
	3185	vm_object_shadow(
	3186	&entry->object.vm_object,
	3187	&entry->offset,
	3188	atop(entry->end - entry->start));
	3189
	3190	entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
	3191	} else {
	3192	/*
	3193	* We're attempting to read a copy-on-write page --
	3194	* don't allow writes.
	3195	*/
	3196
	3197	prot &= ~VM_PROT_WRITE;
	3198	}
	3199	}
	3200
	3201	/*
	3202	* Create an object if necessary.
	3203	*/
	3204	if (entry->object.vm_object == NULL &&
	3205	!map->system_map) {
	3206	if (use_read_lock && vm_map_lock_upgrade(map)) {
	3207	use_read_lock = 0;
	3208	goto RetryLookup;
	3209	}
	3210	use_read_lock = 0;
	3211	entry->object.vm_object = vm_object_allocate(OBJT_DEFAULT,
	3212	atop(entry->end - entry->start));
	3213	entry->offset = 0;
	3214	}
	3215
	3216	/*
	3217	* Return the object/offset from this entry. If the entry was
	3218	* copy-on-write or empty, it has been fixed up.
	3219	*/
	3220
	3221	*pindex = OFF_TO_IDX((vaddr - entry->start) + entry->offset);
	3222	*object = entry->object.vm_object;
	3223
	3224	/*
	3225	* Return whether this is the only map sharing this data. On
	3226	* success we return with a read lock held on the map. On failure
	3227	* we return with the map unlocked.
	3228	*/
	3229	*out_prot = prot;
	3230	done:
	3231	if (rv == KERN_SUCCESS) {
	3232	if (use_read_lock == 0)
	3233	vm_map_lock_downgrade(map);
	3234	} else if (use_read_lock) {
	3235	vm_map_unlock_read(map);
	3236	} else {
	3237	vm_map_unlock(map);
	3238	}
	3239	return (rv);
	3240	}
	3241
	3242	/*
	3243	* vm_map_lookup_done:
	3244	*
	3245	* Releases locks acquired by a vm_map_lookup
	3246	* (according to the handle returned by that lookup).
	3247	*/
	3248
	3249	void
	3250	vm_map_lookup_done(vm_map_t map, vm_map_entry_t entry, int count)
	3251	{
	3252	/*
	3253	* Unlock the main-level map
	3254	*/
	3255	vm_map_unlock_read(map);
	3256	if (count)
	3257	vm_map_entry_release(count);
	3258	}
	3259
	3260	#ifdef ENABLE_VFS_IOOPT
	3261
	3262	/*
	3263	* Implement uiomove with VM operations. This handles (and collateral changes)
	3264	* support every combination of source object modification, and COW type
	3265	* operations.
	3266	*/
	3267	int
	3268	vm_uiomove(mapa, srcobject, cp, cnta, uaddra, npages)
	3269	vm_map_t mapa;
	3270	vm_object_t srcobject;
	3271	off_t cp;
	3272	int cnta;
	3273	vm_offset_t uaddra;
	3274	int *npages;
	3275	{
	3276	vm_map_t map;
	3277	vm_object_t first_object, oldobject, object;
	3278	vm_map_entry_t entry;
	3279	vm_prot_t prot;
	3280	boolean_t wired;
	3281	int tcnt, rv;
	3282	vm_offset_t uaddr, start, end, tend;
	3283	vm_pindex_t first_pindex, osize, oindex;
	3284	off_t ooffset;
	3285	int cnt;
	3286	int count;
	3287
	3288	if (npages)
	3289	*npages = 0;
	3290
	3291	cnt = cnta;
	3292	uaddr = uaddra;
	3293
	3294	while (cnt > 0) {
	3295	map = mapa;
	3296
	3297	count = vm_map_entry_reserve(MAP_RESERVE_COUNT);
	3298
	3299	if ((vm_map_lookup(&map, uaddr,
	3300	VM_PROT_READ, &entry, &first_object,
	3301	&first_pindex, &prot, &wired)) != KERN_SUCCESS) {
	3302	return EFAULT;
	3303	}
	3304
	3305	vm_map_clip_start(map, entry, uaddr, &count);
	3306
	3307	tcnt = cnt;
	3308	tend = uaddr + tcnt;
	3309	if (tend > entry->end) {
	3310	tcnt = entry->end - uaddr;
	3311	tend = entry->end;
	3312	}
	3313
	3314	vm_map_clip_end(map, entry, tend, &count);
	3315
	3316	start = entry->start;
	3317	end = entry->end;
	3318
	3319	osize = atop(tcnt);
	3320
	3321	oindex = OFF_TO_IDX(cp);
	3322	if (npages) {
	3323	vm_pindex_t idx;
	3324	for (idx = 0; idx < osize; idx++) {
	3325	vm_page_t m;
	3326	if ((m = vm_page_lookup(srcobject, oindex + idx)) == NULL) {
	3327	vm_map_lookup_done(map, entry, count);
	3328	return 0;
	3329	}
	3330	/*
	3331	* disallow busy or invalid pages, but allow
	3332	* m->busy pages if they are entirely valid.
	3333	*/
	3334	if ((m->flags & PG_BUSY) \|\|
	3335	((m->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL)) {
	3336	vm_map_lookup_done(map, entry, count);
	3337	return 0;
	3338	}
	3339	}
	3340	}
	3341
	3342	/*
	3343	* If we are changing an existing map entry, just redirect
	3344	* the object, and change mappings.
	3345	*/
	3346	if ((first_object->type == OBJT_VNODE) &&
	3347	((oldobject = entry->object.vm_object) == first_object)) {
	3348
	3349	if ((entry->offset != cp) \|\| (oldobject != srcobject)) {
	3350	/*
	3351	* Remove old window into the file
	3352	*/
	3353	pmap_remove (map->pmap, uaddr, tend);
	3354
	3355	/*
	3356	* Force copy on write for mmaped regions
	3357	*/
	3358	vm_object_pmap_copy_1 (srcobject, oindex, oindex + osize);
	3359
	3360	/*
	3361	* Point the object appropriately
	3362	*/
	3363	if (oldobject != srcobject) {
	3364
	3365	/*
	3366	* Set the object optimization hint flag
	3367	*/
	3368	vm_object_set_flag(srcobject, OBJ_OPT);
	3369	vm_object_reference(srcobject);
	3370	entry->object.vm_object = srcobject;
	3371
	3372	if (oldobject) {
	3373	vm_object_deallocate(oldobject);
	3374	}
	3375	}
	3376
	3377	entry->offset = cp;
	3378	map->timestamp++;
	3379	} else {
	3380	pmap_remove (map->pmap, uaddr, tend);
	3381	}
	3382
	3383	} else if ((first_object->ref_count == 1) &&
	3384	(first_object->size == osize) &&
	3385	((first_object->type == OBJT_DEFAULT) \|\|
	3386	(first_object->type == OBJT_SWAP)) ) {
	3387
	3388	oldobject = first_object->backing_object;
	3389
	3390	if ((first_object->backing_object_offset != cp) \|\|
	3391	(oldobject != srcobject)) {
	3392	/*
	3393	* Remove old window into the file
	3394	*/
	3395	pmap_remove (map->pmap, uaddr, tend);
	3396
	3397	/*
	3398	* Remove unneeded old pages
	3399	*/
	3400	vm_object_page_remove(first_object, 0, 0, 0);
	3401
	3402	/*
	3403	* Invalidate swap space
	3404	*/
	3405	if (first_object->type == OBJT_SWAP) {
	3406	swap_pager_freespace(first_object,
	3407	0,
	3408	first_object->size);
	3409	}
	3410
	3411	/*
	3412	* Force copy on write for mmaped regions
	3413	*/
	3414	vm_object_pmap_copy_1 (srcobject, oindex, oindex + osize);
	3415
	3416	/*
	3417	* Point the object appropriately
	3418	*/
	3419	if (oldobject != srcobject) {
	3420
	3421	/*
	3422	* Set the object optimization hint flag
	3423	*/
	3424	vm_object_set_flag(srcobject, OBJ_OPT);
	3425	vm_object_reference(srcobject);
	3426
	3427	if (oldobject) {
	3428	LIST_REMOVE(
	3429	first_object, shadow_list);
	3430	oldobject->shadow_count--;
	3431	/* XXX bump generation? */
	3432	vm_object_deallocate(oldobject);
	3433	}
	3434
	3435	LIST_INSERT_HEAD(&srcobject->shadow_head,
	3436	first_object, shadow_list);
	3437	srcobject->shadow_count++;
	3438	/* XXX bump generation? */
	3439
	3440	first_object->backing_object = srcobject;
	3441	}
	3442	first_object->backing_object_offset = cp;
	3443	map->timestamp++;
	3444	} else {
	3445	pmap_remove (map->pmap, uaddr, tend);
	3446	}
	3447	/*
	3448	* Otherwise, we have to do a logical mmap.
	3449	*/
	3450	} else {
	3451
	3452	vm_object_set_flag(srcobject, OBJ_OPT);
	3453	vm_object_reference(srcobject);
	3454
	3455	pmap_remove (map->pmap, uaddr, tend);
	3456
	3457	vm_object_pmap_copy_1 (srcobject, oindex, oindex + osize);
	3458	vm_map_lock_upgrade(map);
	3459
	3460	if (entry == &map->header) {
	3461	map->first_free = &map->header;
	3462	} else if (map->first_free->start >= start) {
	3463	map->first_free = entry->prev;
	3464	}
	3465
	3466	SAVE_HINT(map, entry->prev);
	3467	vm_map_entry_delete(map, entry, &count);
	3468
	3469	object = srcobject;
	3470	ooffset = cp;
	3471
	3472	rv = vm_map_insert(map, &count,
	3473	object, ooffset, start, tend,
	3474	VM_PROT_ALL, VM_PROT_ALL, MAP_COPY_ON_WRITE);
	3475
	3476	if (rv != KERN_SUCCESS)
	3477	panic("vm_uiomove: could not insert new entry: %d", rv);
	3478	}
	3479
	3480	/*
	3481	* Map the window directly, if it is already in memory
	3482	*/
	3483	pmap_object_init_pt(map->pmap, uaddr,
	3484	srcobject, oindex, tcnt, 0);
	3485
	3486	map->timestamp++;
	3487	vm_map_unlock(map);
	3488	vm_map_entry_release(count);
	3489
	3490	cnt -= tcnt;
	3491	uaddr += tcnt;
	3492	cp += tcnt;
	3493	if (npages)
	3494	*npages += osize;
	3495	}
	3496	return 0;
	3497	}
	3498
	3499	#endif
	3500
	3501	/*
	3502	* Performs the copy_on_write operations necessary to allow the virtual copies
	3503	* into user space to work. This has to be called for write(2) system calls
	3504	* from other processes, file unlinking, and file size shrinkage.
	3505	*/
	3506	void
	3507	vm_freeze_copyopts(object, froma, toa)
	3508	vm_object_t object;
	3509	vm_pindex_t froma, toa;
	3510	{
	3511	int rv;
	3512	vm_object_t robject;
	3513	vm_pindex_t idx;
	3514
	3515	if ((object == NULL) \|\|
	3516	((object->flags & OBJ_OPT) == 0))
	3517	return;
	3518
	3519	if (object->shadow_count > object->ref_count)
	3520	panic("vm_freeze_copyopts: sc > rc");
	3521
	3522	while((robject = LIST_FIRST(&object->shadow_head)) != NULL) {
	3523	vm_pindex_t bo_pindex;
	3524	vm_page_t m_in, m_out;
	3525
	3526	bo_pindex = OFF_TO_IDX(robject->backing_object_offset);
	3527
	3528	vm_object_reference(robject);
	3529
	3530	vm_object_pip_wait(robject, "objfrz");
	3531
	3532	if (robject->ref_count == 1) {
	3533	vm_object_deallocate(robject);
	3534	continue;
	3535	}
	3536
	3537	vm_object_pip_add(robject, 1);
	3538
	3539	for (idx = 0; idx < robject->size; idx++) {
	3540
	3541	m_out = vm_page_grab(robject, idx,
	3542	VM_ALLOC_NORMAL \| VM_ALLOC_RETRY);
	3543
	3544	if (m_out->valid == 0) {
	3545	m_in = vm_page_grab(object, bo_pindex + idx,
	3546	VM_ALLOC_NORMAL \| VM_ALLOC_RETRY);
	3547	if (m_in->valid == 0) {
	3548	rv = vm_pager_get_pages(object, &m_in, 1, 0);
	3549	if (rv != VM_PAGER_OK) {
	3550	printf("vm_freeze_copyopts: cannot read page from file: %lx\n", (long)m_in->pindex);
	3551	continue;
	3552	}
	3553	vm_page_deactivate(m_in);
	3554	}
	3555
	3556	vm_page_protect(m_in, VM_PROT_NONE);
	3557	pmap_copy_page(VM_PAGE_TO_PHYS(m_in), VM_PAGE_TO_PHYS(m_out));
	3558	m_out->valid = m_in->valid;
	3559	vm_page_dirty(m_out);
	3560	vm_page_activate(m_out);
	3561	vm_page_wakeup(m_in);
	3562	}
	3563	vm_page_wakeup(m_out);
	3564	}
	3565
	3566	object->shadow_count--;
	3567	object->ref_count--;
	3568	LIST_REMOVE(robject, shadow_list);
	3569	robject->backing_object = NULL;
	3570	robject->backing_object_offset = 0;
	3571
	3572	vm_object_pip_wakeup(robject);
	3573	vm_object_deallocate(robject);
	3574	}
	3575
	3576	vm_object_clear_flag(object, OBJ_OPT);
	3577	}
	3578
	3579	#include "opt_ddb.h"
	3580	#ifdef DDB
	3581	#include <sys/kernel.h>
	3582
	3583	#include <ddb/ddb.h>
	3584
	3585	/*
	3586	* vm_map_print: [ debug ]
	3587	*/
	3588	DB_SHOW_COMMAND(map, vm_map_print)
	3589	{
	3590	static int nlines;
	3591	/* XXX convert args. */
	3592	vm_map_t map = (vm_map_t)addr;
	3593	boolean_t full = have_addr;
	3594
	3595	vm_map_entry_t entry;
	3596
	3597	db_iprintf("Task map %p: pmap=%p, nentries=%d, version=%u\n",
	3598	(void *)map,
	3599	(void *)map->pmap, map->nentries, map->timestamp);
	3600	nlines++;
	3601
	3602	if (!full && db_indent)
	3603	return;
	3604
	3605	db_indent += 2;
	3606	for (entry = map->header.next; entry != &map->header;
	3607	entry = entry->next) {
	3608	db_iprintf("map entry %p: start=%p, end=%p\n",
	3609	(void )entry, (void )entry->start, (void *)entry->end);
	3610	nlines++;
	3611	{
	3612	static char *inheritance_name[4] =
	3613	{"share", "copy", "none", "donate_copy"};
	3614
	3615	db_iprintf(" prot=%x/%x/%s",
	3616	entry->protection,
	3617	entry->max_protection,
	3618	inheritance_name[(int)(unsigned char)entry->inheritance]);
	3619	if (entry->wired_count != 0)
	3620	db_printf(", wired");
	3621	}
	3622	if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
	3623	/* XXX no %qd in kernel. Truncate entry->offset. */
	3624	db_printf(", share=%p, offset=0x%lx\n",
	3625	(void *)entry->object.sub_map,
	3626	(long)entry->offset);
	3627	nlines++;
	3628	if ((entry->prev == &map->header) \|\|
	3629	(entry->prev->object.sub_map !=
	3630	entry->object.sub_map)) {
	3631	db_indent += 2;
	3632	vm_map_print((db_expr_t)(intptr_t)
	3633	entry->object.sub_map,
	3634	full, 0, (char *)0);
	3635	db_indent -= 2;
	3636	}
	3637	} else {
	3638	/* XXX no %qd in kernel. Truncate entry->offset. */
	3639	db_printf(", object=%p, offset=0x%lx",
	3640	(void *)entry->object.vm_object,
	3641	(long)entry->offset);
	3642	if (entry->eflags & MAP_ENTRY_COW)
	3643	db_printf(", copy (%s)",
	3644	(entry->eflags & MAP_ENTRY_NEEDS_COPY) ? "needed" : "done");
	3645	db_printf("\n");
	3646	nlines++;
	3647
	3648	if ((entry->prev == &map->header) \|\|
	3649	(entry->prev->object.vm_object !=
	3650	entry->object.vm_object)) {
	3651	db_indent += 2;
	3652	vm_object_print((db_expr_t)(intptr_t)
	3653	entry->object.vm_object,
	3654	full, 0, (char *)0);
	3655	nlines += 4;
	3656	db_indent -= 2;
	3657	}
	3658	}
	3659	}
	3660	db_indent -= 2;
	3661	if (db_indent == 0)
	3662	nlines = 0;
	3663	}
	3664
	3665
	3666	DB_SHOW_COMMAND(procvm, procvm)
	3667	{
	3668	struct proc *p;
	3669
	3670	if (have_addr) {
	3671	p = (struct proc *) addr;
	3672	} else {
	3673	p = curproc;
	3674	}
	3675
	3676	db_printf("p = %p, vmspace = %p, map = %p, pmap = %p\n",
	3677	(void )p, (void )p->p_vmspace, (void *)&p->p_vmspace->vm_map,
	3678	(void *)vmspace_pmap(p->p_vmspace));
	3679
	3680	vm_map_print((db_expr_t)(intptr_t)&p->p_vmspace->vm_map, 1, 0, NULL);
	3681	}
	3682
	3683	#endif /* DDB */