gitweb.dragonflybsd.org Git - dragonfly.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 1991, 1993
	3	* The Regents of the University of California. All rights reserved.
	4	*
	5	* This code is derived from software contributed to Berkeley by
	6	* The Mach Operating System project at Carnegie-Mellon University.
	7	*
	8	* Redistribution and use in source and binary forms, with or without
	9	* modification, are permitted provided that the following conditions
	10	* are met:
	11	* 1. Redistributions of source code must retain the above copyright
	12	* notice, this list of conditions and the following disclaimer.
	13	* 2. Redistributions in binary form must reproduce the above copyright
	14	* notice, this list of conditions and the following disclaimer in the
	15	* documentation and/or other materials provided with the distribution.
	16	* 3. All advertising materials mentioning features or use of this software
	17	* must display the following acknowledgement:
	18	* This product includes software developed by the University of
	19	* California, Berkeley and its contributors.
	20	* 4. Neither the name of the University nor the names of its contributors
	21	* may be used to endorse or promote products derived from this software
	22	* without specific prior written permission.
	23	*
	24	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	25	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	26	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	27	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	28	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	29	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	30	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	31	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	32	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	33	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	34	* SUCH DAMAGE.
	35	*
	36	* from: @(#)vm_map.c 8.3 (Berkeley) 1/12/94
	37	*
	38	*
	39	* Copyright (c) 1987, 1990 Carnegie-Mellon University.
	40	* All rights reserved.
	41	*
	42	* Authors: Avadis Tevanian, Jr., Michael Wayne Young
	43	*
	44	* Permission to use, copy, modify and distribute this software and
	45	* its documentation is hereby granted, provided that both the copyright
	46	* notice and this permission notice appear in all copies of the
	47	* software, derivative works or modified versions, and any portions
	48	* thereof, and that both notices appear in supporting documentation.
	49	*
	50	* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
	51	* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
	52	* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
	53	*
	54	* Carnegie Mellon requests users of this software to return to
	55	*
	56	* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
	57	* School of Computer Science
	58	* Carnegie Mellon University
	59	* Pittsburgh PA 15213-3890
	60	*
	61	* any improvements or extensions that they make and grant Carnegie the
	62	* rights to redistribute these changes.
	63	*
	64	* $FreeBSD: src/sys/vm/vm_map.c,v 1.187.2.19 2003/05/27 00:47:02 alc Exp $
	65	* $DragonFly: src/sys/vm/vm_map.c,v 1.36 2004/12/21 02:42:41 hsu Exp $
	66	*/
	67
	68	/*
	69	* Virtual memory mapping module.
	70	*/
	71
	72	#include <sys/param.h>
	73	#include <sys/systm.h>
	74	#include <sys/proc.h>
	75	#include <sys/lock.h>
	76	#include <sys/vmmeter.h>
	77	#include <sys/mman.h>
	78	#include <sys/vnode.h>
	79	#include <sys/resourcevar.h>
	80	#include <sys/shm.h>
	81
	82	#include <vm/vm.h>
	83	#include <vm/vm_param.h>
	84	#include <vm/pmap.h>
	85	#include <vm/vm_map.h>
	86	#include <vm/vm_page.h>
	87	#include <vm/vm_object.h>
	88	#include <vm/vm_pager.h>
	89	#include <vm/vm_kern.h>
	90	#include <vm/vm_extern.h>
	91	#include <vm/swap_pager.h>
	92	#include <vm/vm_zone.h>
	93
	94	#include <sys/thread2.h>
	95
	96	/*
	97	* Virtual memory maps provide for the mapping, protection,
	98	* and sharing of virtual memory objects. In addition,
	99	* this module provides for an efficient virtual copy of
	100	* memory from one map to another.
	101	*
	102	* Synchronization is required prior to most operations.
	103	*
	104	* Maps consist of an ordered doubly-linked list of simple
	105	* entries; a single hint is used to speed up lookups.
	106	*
	107	* Since portions of maps are specified by start/end addresses,
	108	* which may not align with existing map entries, all
	109	* routines merely "clip" entries to these start/end values.
	110	* [That is, an entry is split into two, bordering at a
	111	* start or end value.] Note that these clippings may not
	112	* always be necessary (as the two resulting entries are then
	113	* not changed); however, the clipping is done for convenience.
	114	*
	115	* As mentioned above, virtual copy operations are performed
	116	* by copying VM object references from one map to
	117	* another, and then marking both regions as copy-on-write.
	118	*/
	119
	120	/*
	121	* vm_map_startup:
	122	*
	123	* Initialize the vm_map module. Must be called before
	124	* any other vm_map routines.
	125	*
	126	* Map and entry structures are allocated from the general
	127	* purpose memory pool with some exceptions:
	128	*
	129	* - The kernel map and kmem submap are allocated statically.
	130	* - Kernel map entries are allocated out of a static pool.
	131	*
	132	* These restrictions are necessary since malloc() uses the
	133	* maps and requires map entries.
	134	*/
	135
	136	#define VMEPERCPU 2
	137
	138	static struct vm_zone mapentzone_store, mapzone_store;
	139	static vm_zone_t mapentzone, mapzone, vmspace_zone;
	140	static struct vm_object mapentobj, mapobj;
	141
	142	static struct vm_map_entry map_entry_init[MAX_MAPENT];
	143	static struct vm_map_entry cpu_map_entry_init[MAXCPU][VMEPERCPU];
	144	static struct vm_map map_init[MAX_KMAP];
	145
	146	static vm_map_entry_t vm_map_entry_create(vm_map_t map, int *);
	147	static void vm_map_entry_dispose (vm_map_t map, vm_map_entry_t entry, int *);
	148	static void _vm_map_clip_end (vm_map_t, vm_map_entry_t, vm_offset_t, int *);
	149	static void _vm_map_clip_start (vm_map_t, vm_map_entry_t, vm_offset_t, int *);
	150	static void vm_map_entry_delete (vm_map_t, vm_map_entry_t, int *);
	151	static void vm_map_entry_unwire (vm_map_t, vm_map_entry_t);
	152	static void vm_map_copy_entry (vm_map_t, vm_map_t, vm_map_entry_t,
	153	vm_map_entry_t);
	154	static void vm_map_split (vm_map_entry_t);
	155	static void vm_map_unclip_range (vm_map_t map, vm_map_entry_t start_entry, vm_offset_t start, vm_offset_t end, int *count, int flags);
	156
	157	void
	158	vm_map_startup(void)
	159	{
	160	mapzone = &mapzone_store;
	161	zbootinit(mapzone, "MAP", sizeof (struct vm_map),
	162	map_init, MAX_KMAP);
	163	mapentzone = &mapentzone_store;
	164	zbootinit(mapentzone, "MAP ENTRY", sizeof (struct vm_map_entry),
	165	map_entry_init, MAX_MAPENT);
	166	}
	167
	168	/*
	169	* Allocate a vmspace structure, including a vm_map and pmap,
	170	* and initialize those structures. The refcnt is set to 1.
	171	* The remaining fields must be initialized by the caller.
	172	*/
	173	struct vmspace *
	174	vmspace_alloc(vm_offset_t min, vm_offset_t max)
	175	{
	176	struct vmspace *vm;
	177
	178	vm = zalloc(vmspace_zone);
	179	vm_map_init(&vm->vm_map, min, max);
	180	pmap_pinit(vmspace_pmap(vm));
	181	vm->vm_map.pmap = vmspace_pmap(vm); /* XXX */
	182	vm->vm_refcnt = 1;
	183	vm->vm_shm = NULL;
	184	vm->vm_exitingcnt = 0;
	185	return (vm);
	186	}
	187
	188	void
	189	vm_init2(void)
	190	{
	191	zinitna(mapentzone, &mapentobj, NULL, 0, 0,
	192	ZONE_USE_RESERVE \| ZONE_SPECIAL, 1);
	193	zinitna(mapzone, &mapobj, NULL, 0, 0, 0, 1);
	194	vmspace_zone = zinit("VMSPACE", sizeof (struct vmspace), 0, 0, 3);
	195	pmap_init2();
	196	vm_object_init2();
	197	}
	198
	199	static __inline void
	200	vmspace_dofree(struct vmspace *vm)
	201	{
	202	int count;
	203
	204	/*
	205	* Make sure any SysV shm is freed, it might not have in
	206	* exit1()
	207	*/
	208	shmexit(vm);
	209
	210	KKASSERT(vm->vm_upcalls == NULL);
	211
	212	/*
	213	* Lock the map, to wait out all other references to it.
	214	* Delete all of the mappings and pages they hold, then call
	215	* the pmap module to reclaim anything left.
	216	*/
	217	count = vm_map_entry_reserve(MAP_RESERVE_COUNT);
	218	vm_map_lock(&vm->vm_map);
	219	vm_map_delete(&vm->vm_map, vm->vm_map.min_offset,
	220	vm->vm_map.max_offset, &count);
	221	vm_map_unlock(&vm->vm_map);
	222	vm_map_entry_release(count);
	223
	224	pmap_release(vmspace_pmap(vm));
	225	zfree(vmspace_zone, vm);
	226	}
	227
	228	void
	229	vmspace_free(struct vmspace *vm)
	230	{
	231	if (vm->vm_refcnt == 0)
	232	panic("vmspace_free: attempt to free already freed vmspace");
	233
	234	if (--vm->vm_refcnt == 0 && vm->vm_exitingcnt == 0)
	235	vmspace_dofree(vm);
	236	}
	237
	238	void
	239	vmspace_exitfree(struct proc *p)
	240	{
	241	struct vmspace *vm;
	242
	243	vm = p->p_vmspace;
	244	p->p_vmspace = NULL;
	245
	246	/*
	247	* cleanup by parent process wait()ing on exiting child. vm_refcnt
	248	* may not be 0 (e.g. fork() and child exits without exec()ing).
	249	* exitingcnt may increment above 0 and drop back down to zero
	250	* several times while vm_refcnt is held non-zero. vm_refcnt
	251	* may also increment above 0 and drop back down to zero several
	252	* times while vm_exitingcnt is held non-zero.
	253	*
	254	* The last wait on the exiting child's vmspace will clean up
	255	* the remainder of the vmspace.
	256	*/
	257	if (--vm->vm_exitingcnt == 0 && vm->vm_refcnt == 0)
	258	vmspace_dofree(vm);
	259	}
	260
	261	/*
	262	* vmspace_swap_count() - count the approximate swap useage in pages for a
	263	* vmspace.
	264	*
	265	* Swap useage is determined by taking the proportional swap used by
	266	* VM objects backing the VM map. To make up for fractional losses,
	267	* if the VM object has any swap use at all the associated map entries
	268	* count for at least 1 swap page.
	269	*/
	270	int
	271	vmspace_swap_count(struct vmspace *vmspace)
	272	{
	273	vm_map_t map = &vmspace->vm_map;
	274	vm_map_entry_t cur;
	275	int count = 0;
	276
	277	for (cur = map->header.next; cur != &map->header; cur = cur->next) {
	278	vm_object_t object;
	279
	280	if ((cur->eflags & MAP_ENTRY_IS_SUB_MAP) == 0 &&
	281	(object = cur->object.vm_object) != NULL &&
	282	object->type == OBJT_SWAP
	283	) {
	284	int n = (cur->end - cur->start) / PAGE_SIZE;
	285
	286	if (object->un_pager.swp.swp_bcount) {
	287	count += object->un_pager.swp.swp_bcount *
	288	SWAP_META_PAGES * n / object->size + 1;
	289	}
	290	}
	291	}
	292	return(count);
	293	}
	294
	295
	296	/*
	297	* vm_map_create:
	298	*
	299	* Creates and returns a new empty VM map with
	300	* the given physical map structure, and having
	301	* the given lower and upper address bounds.
	302	*/
	303	vm_map_t
	304	vm_map_create(pmap_t pmap, vm_offset_t min, vm_offset_t max)
	305	{
	306	vm_map_t result;
	307
	308	result = zalloc(mapzone);
	309	vm_map_init(result, min, max);
	310	result->pmap = pmap;
	311	return (result);
	312	}
	313
	314	/*
	315	* Initialize an existing vm_map structure
	316	* such as that in the vmspace structure.
	317	* The pmap is set elsewhere.
	318	*/
	319	void
	320	vm_map_init(struct vm_map *map, vm_offset_t min, vm_offset_t max)
	321	{
	322	map->header.next = map->header.prev = &map->header;
	323	map->nentries = 0;
	324	map->size = 0;
	325	map->system_map = 0;
	326	map->infork = 0;
	327	map->min_offset = min;
	328	map->max_offset = max;
	329	map->first_free = &map->header;
	330	map->hint = &map->header;
	331	map->timestamp = 0;
	332	lockinit(&map->lock, 0, "thrd_sleep", 0, LK_NOPAUSE);
	333	}
	334
	335	/*
	336	* vm_map_entry_reserve_cpu_init:
	337	*
	338	* Set an initial negative count so the first attempt to reserve
	339	* space preloads a bunch of vm_map_entry's for this cpu. Also
	340	* pre-allocate 2 vm_map_entries which will be needed by zalloc() to
	341	* map a new page for vm_map_entry structures. SMP systems are
	342	* particularly sensitive.
	343	*
	344	* This routine is called in early boot so we cannot just call
	345	* vm_map_entry_reserve().
	346	*
	347	* May be called for a gd other then mycpu, but may only be called
	348	* during early boot.
	349	*/
	350	void
	351	vm_map_entry_reserve_cpu_init(globaldata_t gd)
	352	{
	353	vm_map_entry_t entry;
	354	int i;
	355
	356	gd->gd_vme_avail -= MAP_RESERVE_COUNT * 2;
	357	entry = &cpu_map_entry_init[gd->gd_cpuid][0];
	358	for (i = 0; i < VMEPERCPU; ++i, ++entry) {
	359	entry->next = gd->gd_vme_base;
	360	gd->gd_vme_base = entry;
	361	}
	362	}
	363
	364	/*
	365	* vm_map_entry_reserve:
	366	*
	367	* Reserves vm_map_entry structures so code later on can manipulate
	368	* map_entry structures within a locked map without blocking trying
	369	* to allocate a new vm_map_entry.
	370	*/
	371	int
	372	vm_map_entry_reserve(int count)
	373	{
	374	struct globaldata *gd = mycpu;
	375	vm_map_entry_t entry;
	376
	377	crit_enter();
	378
	379	/*
	380	* Make sure we have enough structures in gd_vme_base to handle
	381	* the reservation request.
	382	*/
	383	while (gd->gd_vme_avail < count) {
	384	entry = zalloc(mapentzone);
	385	entry->next = gd->gd_vme_base;
	386	gd->gd_vme_base = entry;
	387	++gd->gd_vme_avail;
	388	}
	389	gd->gd_vme_avail -= count;
	390	crit_exit();
	391	return(count);
	392	}
	393
	394	/*
	395	* vm_map_entry_release:
	396	*
	397	* Releases previously reserved vm_map_entry structures that were not
	398	* used. If we have too much junk in our per-cpu cache clean some of
	399	* it out.
	400	*/
	401	void
	402	vm_map_entry_release(int count)
	403	{
	404	struct globaldata *gd = mycpu;
	405	vm_map_entry_t entry;
	406
	407	crit_enter();
	408	gd->gd_vme_avail += count;
	409	while (gd->gd_vme_avail > MAP_RESERVE_SLOP) {
	410	entry = gd->gd_vme_base;
	411	KKASSERT(entry != NULL);
	412	gd->gd_vme_base = entry->next;
	413	--gd->gd_vme_avail;
	414	crit_exit();
	415	zfree(mapentzone, entry);
	416	crit_enter();
	417	}
	418	crit_exit();
	419	}
	420
	421	/*
	422	* vm_map_entry_kreserve:
	423	*
	424	* Reserve map entry structures for use in kernel_map itself. These
	425	* entries have ALREADY been reserved on a per-cpu basis when the map
	426	* was inited. This function is used by zalloc() to avoid a recursion
	427	* when zalloc() itself needs to allocate additional kernel memory.
	428	*
	429	* This function works like the normal reserve but does not load the
	430	* vm_map_entry cache (because that would result in an infinite
	431	* recursion). Note that gd_vme_avail may go negative. This is expected.
	432	*
	433	* Any caller of this function must be sure to renormalize after
	434	* potentially eating entries to ensure that the reserve supply
	435	* remains intact.
	436	*/
	437	int
	438	vm_map_entry_kreserve(int count)
	439	{
	440	struct globaldata *gd = mycpu;
	441
	442	crit_enter();
	443	gd->gd_vme_avail -= count;
	444	crit_exit();
	445	KASSERT(gd->gd_vme_base != NULL, ("no reserved entries left, gd_vme_avail = %d\n", gd->gd_vme_avail));
	446	return(count);
	447	}
	448
	449	/*
	450	* vm_map_entry_krelease:
	451	*
	452	* Release previously reserved map entries for kernel_map. We do not
	453	* attempt to clean up like the normal release function as this would
	454	* cause an unnecessary (but probably not fatal) deep procedure call.
	455	*/
	456	void
	457	vm_map_entry_krelease(int count)
	458	{
	459	struct globaldata *gd = mycpu;
	460
	461	crit_enter();
	462	gd->gd_vme_avail += count;
	463	crit_exit();
	464	}
	465
	466	/*
	467	* vm_map_entry_create: [ internal use only ]
	468	*
	469	* Allocates a VM map entry for insertion. No entry fields are filled
	470	* in.
	471	*
	472	* This routine may be called from an interrupt thread but not a FAST
	473	* interrupt. This routine may recurse the map lock.
	474	*/
	475	static vm_map_entry_t
	476	vm_map_entry_create(vm_map_t map, int *countp)
	477	{
	478	struct globaldata *gd = mycpu;
	479	vm_map_entry_t entry;
	480
	481	KKASSERT(*countp > 0);
	482	--*countp;
	483	crit_enter();
	484	entry = gd->gd_vme_base;
	485	KASSERT(entry != NULL, ("gd_vme_base NULL! count %d", *countp));
	486	gd->gd_vme_base = entry->next;
	487	crit_exit();
	488	return(entry);
	489	}
	490
	491	/*
	492	* vm_map_entry_dispose: [ internal use only ]
	493	*
	494	* Dispose of a vm_map_entry that is no longer being referenced. This
	495	* function may be called from an interrupt.
	496	*/
	497	static void
	498	vm_map_entry_dispose(vm_map_t map, vm_map_entry_t entry, int *countp)
	499	{
	500	struct globaldata *gd = mycpu;
	501
	502	++*countp;
	503	crit_enter();
	504	entry->next = gd->gd_vme_base;
	505	gd->gd_vme_base = entry;
	506	crit_exit();
	507	}
	508
	509
	510	/*
	511	* vm_map_entry_{un,}link:
	512	*
	513	* Insert/remove entries from maps.
	514	*/
	515	static __inline void
	516	vm_map_entry_link(vm_map_t map,
	517	vm_map_entry_t after_where,
	518	vm_map_entry_t entry)
	519	{
	520	map->nentries++;
	521	entry->prev = after_where;
	522	entry->next = after_where->next;
	523	entry->next->prev = entry;
	524	after_where->next = entry;
	525	}
	526
	527	static __inline void
	528	vm_map_entry_unlink(vm_map_t map,
	529	vm_map_entry_t entry)
	530	{
	531	vm_map_entry_t prev;
	532	vm_map_entry_t next;
	533
	534	if (entry->eflags & MAP_ENTRY_IN_TRANSITION)
	535	panic("vm_map_entry_unlink: attempt to mess with locked entry! %p", entry);
	536	prev = entry->prev;
	537	next = entry->next;
	538	next->prev = prev;
	539	prev->next = next;
	540	map->nentries--;
	541	}
	542
	543	/*
	544	* SAVE_HINT:
	545	*
	546	* Saves the specified entry as the hint for
	547	* future lookups.
	548	*/
	549	#define SAVE_HINT(map,value) \
	550	(map)->hint = (value);
	551
	552	/*
	553	* vm_map_lookup_entry: [ internal use only ]
	554	*
	555	* Finds the map entry containing (or
	556	* immediately preceding) the specified address
	557	* in the given map; the entry is returned
	558	* in the "entry" parameter. The boolean
	559	* result indicates whether the address is
	560	* actually contained in the map.
	561	*/
	562	boolean_t
	563	vm_map_lookup_entry(vm_map_t map, vm_offset_t address,
	564	vm_map_entry_t entry / OUT */)
	565	{
	566	vm_map_entry_t cur;
	567	vm_map_entry_t last;
	568
	569	/*
	570	* Start looking either from the head of the list, or from the hint.
	571	*/
	572
	573	cur = map->hint;
	574
	575	if (cur == &map->header)
	576	cur = cur->next;
	577
	578	if (address >= cur->start) {
	579	/*
	580	* Go from hint to end of list.
	581	*
	582	* But first, make a quick check to see if we are already looking
	583	* at the entry we want (which is usually the case). Note also
	584	* that we don't need to save the hint here... it is the same
	585	* hint (unless we are at the header, in which case the hint
	586	* didn't buy us anything anyway).
	587	*/
	588	last = &map->header;
	589	if ((cur != last) && (cur->end > address)) {
	590	*entry = cur;
	591	return (TRUE);
	592	}
	593	} else {
	594	/*
	595	* Go from start to hint, inclusively
	596	*/
	597	last = cur->next;
	598	cur = map->header.next;
	599	}
	600
	601	/*
	602	* Search linearly
	603	*/
	604
	605	while (cur != last) {
	606	if (cur->end > address) {
	607	if (address >= cur->start) {
	608	/*
	609	* Save this lookup for future hints, and
	610	* return
	611	*/
	612
	613	*entry = cur;
	614	SAVE_HINT(map, cur);
	615	return (TRUE);
	616	}
	617	break;
	618	}
	619	cur = cur->next;
	620	}
	621	*entry = cur->prev;
	622	SAVE_HINT(map, *entry);
	623	return (FALSE);
	624	}
	625
	626	/*
	627	* vm_map_insert:
	628	*
	629	* Inserts the given whole VM object into the target
	630	* map at the specified address range. The object's
	631	* size should match that of the address range.
	632	*
	633	* Requires that the map be locked, and leaves it so. Requires that
	634	* sufficient vm_map_entry structures have been reserved and tracks
	635	* the use via countp.
	636	*
	637	* If object is non-NULL, ref count must be bumped by caller
	638	* prior to making call to account for the new entry.
	639	*/
	640	int
	641	vm_map_insert(vm_map_t map, int *countp,
	642	vm_object_t object, vm_ooffset_t offset,
	643	vm_offset_t start, vm_offset_t end, vm_prot_t prot, vm_prot_t max,
	644	int cow)
	645	{
	646	vm_map_entry_t new_entry;
	647	vm_map_entry_t prev_entry;
	648	vm_map_entry_t temp_entry;
	649	vm_eflags_t protoeflags;
	650
	651	/*
	652	* Check that the start and end points are not bogus.
	653	*/
	654
	655	if ((start < map->min_offset) \|\| (end > map->max_offset) \|\|
	656	(start >= end))
	657	return (KERN_INVALID_ADDRESS);
	658
	659	/*
	660	* Find the entry prior to the proposed starting address; if it's part
	661	* of an existing entry, this range is bogus.
	662	*/
	663
	664	if (vm_map_lookup_entry(map, start, &temp_entry))
	665	return (KERN_NO_SPACE);
	666
	667	prev_entry = temp_entry;
	668
	669	/*
	670	* Assert that the next entry doesn't overlap the end point.
	671	*/
	672
	673	if ((prev_entry->next != &map->header) &&
	674	(prev_entry->next->start < end))
	675	return (KERN_NO_SPACE);
	676
	677	protoeflags = 0;
	678
	679	if (cow & MAP_COPY_ON_WRITE)
	680	protoeflags \|= MAP_ENTRY_COW\|MAP_ENTRY_NEEDS_COPY;
	681
	682	if (cow & MAP_NOFAULT) {
	683	protoeflags \|= MAP_ENTRY_NOFAULT;
	684
	685	KASSERT(object == NULL,
	686	("vm_map_insert: paradoxical MAP_NOFAULT request"));
	687	}
	688	if (cow & MAP_DISABLE_SYNCER)
	689	protoeflags \|= MAP_ENTRY_NOSYNC;
	690	if (cow & MAP_DISABLE_COREDUMP)
	691	protoeflags \|= MAP_ENTRY_NOCOREDUMP;
	692
	693	if (object) {
	694	/*
	695	* When object is non-NULL, it could be shared with another
	696	* process. We have to set or clear OBJ_ONEMAPPING
	697	* appropriately.
	698	*/
	699	if ((object->ref_count > 1) \|\| (object->shadow_count != 0)) {
	700	vm_object_clear_flag(object, OBJ_ONEMAPPING);
	701	}
	702	}
	703	else if ((prev_entry != &map->header) &&
	704	(prev_entry->eflags == protoeflags) &&
	705	(prev_entry->end == start) &&
	706	(prev_entry->wired_count == 0) &&
	707	((prev_entry->object.vm_object == NULL) \|\|
	708	vm_object_coalesce(prev_entry->object.vm_object,
	709	OFF_TO_IDX(prev_entry->offset),
	710	(vm_size_t)(prev_entry->end - prev_entry->start),
	711	(vm_size_t)(end - prev_entry->end)))) {
	712	/*
	713	* We were able to extend the object. Determine if we
	714	* can extend the previous map entry to include the
	715	* new range as well.
	716	*/
	717	if ((prev_entry->inheritance == VM_INHERIT_DEFAULT) &&
	718	(prev_entry->protection == prot) &&
	719	(prev_entry->max_protection == max)) {
	720	map->size += (end - prev_entry->end);
	721	prev_entry->end = end;
	722	vm_map_simplify_entry(map, prev_entry, countp);
	723	return (KERN_SUCCESS);
	724	}
	725
	726	/*
	727	* If we can extend the object but cannot extend the
	728	* map entry, we have to create a new map entry. We
	729	* must bump the ref count on the extended object to
	730	* account for it. object may be NULL.
	731	*/
	732	object = prev_entry->object.vm_object;
	733	offset = prev_entry->offset +
	734	(prev_entry->end - prev_entry->start);
	735	vm_object_reference(object);
	736	}
	737
	738	/*
	739	* NOTE: if conditionals fail, object can be NULL here. This occurs
	740	* in things like the buffer map where we manage kva but do not manage
	741	* backing objects.
	742	*/
	743
	744	/*
	745	* Create a new entry
	746	*/
	747
	748	new_entry = vm_map_entry_create(map, countp);
	749	new_entry->start = start;
	750	new_entry->end = end;
	751
	752	new_entry->eflags = protoeflags;
	753	new_entry->object.vm_object = object;
	754	new_entry->offset = offset;
	755	new_entry->avail_ssize = 0;
	756
	757	new_entry->inheritance = VM_INHERIT_DEFAULT;
	758	new_entry->protection = prot;
	759	new_entry->max_protection = max;
	760	new_entry->wired_count = 0;
	761
	762	/*
	763	* Insert the new entry into the list
	764	*/
	765
	766	vm_map_entry_link(map, prev_entry, new_entry);
	767	map->size += new_entry->end - new_entry->start;
	768
	769	/*
	770	* Update the free space hint
	771	*/
	772	if ((map->first_free == prev_entry) &&
	773	(prev_entry->end >= new_entry->start)) {
	774	map->first_free = new_entry;
	775	}
	776
	777	#if 0
	778	/*
	779	* Temporarily removed to avoid MAP_STACK panic, due to
	780	* MAP_STACK being a huge hack. Will be added back in
	781	* when MAP_STACK (and the user stack mapping) is fixed.
	782	*/
	783	/*
	784	* It may be possible to simplify the entry
	785	*/
	786	vm_map_simplify_entry(map, new_entry, countp);
	787	#endif
	788
	789	if (cow & (MAP_PREFAULT\|MAP_PREFAULT_PARTIAL)) {
	790	pmap_object_init_pt(map->pmap, start, prot,
	791	object, OFF_TO_IDX(offset), end - start,
	792	cow & MAP_PREFAULT_PARTIAL);
	793	}
	794
	795	return (KERN_SUCCESS);
	796	}
	797
	798	/*
	799	* Find sufficient space for `length' bytes in the given map, starting at
	800	* `start'. The map must be locked. Returns 0 on success, 1 on no space.
	801	*
	802	* This function will returned an arbitrarily aligned pointer. If no
	803	* particular alignment is required you should pass align as 1. Note that
	804	* the map may return PAGE_SIZE aligned pointers if all the lengths used in
	805	* the map are a multiple of PAGE_SIZE, even if you pass a smaller align
	806	* argument.
	807	*
	808	* 'align' should be a power of 2 but is not required to be.
	809	*/
	810	int
	811	vm_map_findspace(
	812	vm_map_t map,
	813	vm_offset_t start,
	814	vm_size_t length,
	815	vm_offset_t align,
	816	vm_offset_t *addr)
	817	{
	818	vm_map_entry_t entry, next;
	819	vm_offset_t end;
	820	vm_offset_t align_mask;
	821
	822	if (start < map->min_offset)
	823	start = map->min_offset;
	824	if (start > map->max_offset)
	825	return (1);
	826
	827	/*
	828	* If the alignment is not a power of 2 we will have to use
	829	* a mod/division, set align_mask to a special value.
	830	*/
	831	if ((align \| (align - 1)) + 1 != (align << 1))
	832	align_mask = (vm_offset_t)-1;
	833	else
	834	align_mask = align - 1;
	835
	836	retry:
	837	/*
	838	* Look for the first possible address; if there's already something
	839	* at this address, we have to start after it.
	840	*/
	841	if (start == map->min_offset) {
	842	if ((entry = map->first_free) != &map->header)
	843	start = entry->end;
	844	} else {
	845	vm_map_entry_t tmp;
	846
	847	if (vm_map_lookup_entry(map, start, &tmp))
	848	start = tmp->end;
	849	entry = tmp;
	850	}
	851
	852	/*
	853	* Look through the rest of the map, trying to fit a new region in the
	854	* gap between existing regions, or after the very last region.
	855	*/
	856	for (;; start = (entry = next)->end) {
	857	/*
	858	* Adjust the proposed start by the requested alignment,
	859	* be sure that we didn't wrap the address.
	860	*/
	861	if (align_mask == (vm_offset_t)-1)
	862	end = ((start + align - 1) / align) * align;
	863	else
	864	end = (start + align_mask) & ~align_mask;
	865	if (end < start)
	866	return (1);
	867	start = end;
	868	/*
	869	* Find the end of the proposed new region. Be sure we didn't
	870	* go beyond the end of the map, or wrap around the address.
	871	* Then check to see if this is the last entry or if the
	872	* proposed end fits in the gap between this and the next
	873	* entry.
	874	*/
	875	end = start + length;
	876	if (end > map->max_offset \|\| end < start)
	877	return (1);
	878	next = entry->next;
	879	if (next == &map->header \|\| next->start >= end)
	880	break;
	881	}
	882	SAVE_HINT(map, entry);
	883	if (map == kernel_map) {
	884	vm_offset_t ksize;
	885	if ((ksize = round_page(start + length)) > kernel_vm_end) {
	886	pmap_growkernel(ksize);
	887	goto retry;
	888	}
	889	}
	890	*addr = start;
	891	return (0);
	892	}
	893
	894	/*
	895	* vm_map_find finds an unallocated region in the target address
	896	* map with the given length. The search is defined to be
	897	* first-fit from the specified address; the region found is
	898	* returned in the same parameter.
	899	*
	900	* If object is non-NULL, ref count must be bumped by caller
	901	* prior to making call to account for the new entry.
	902	*/
	903	int
	904	vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
	905	vm_offset_t addr, / IN/OUT */
	906	vm_size_t length, boolean_t find_space, vm_prot_t prot,
	907	vm_prot_t max, int cow)
	908	{
	909	vm_offset_t start;
	910	int result;
	911	int count;
	912
	913	start = *addr;
	914
	915	count = vm_map_entry_reserve(MAP_RESERVE_COUNT);
	916	vm_map_lock(map);
	917	if (find_space) {
	918	if (vm_map_findspace(map, start, length, 1, addr)) {
	919	vm_map_unlock(map);
	920	vm_map_entry_release(count);
	921	return (KERN_NO_SPACE);
	922	}
	923	start = *addr;
	924	}
	925	result = vm_map_insert(map, &count, object, offset,
	926	start, start + length, prot, max, cow);
	927	vm_map_unlock(map);
	928	vm_map_entry_release(count);
	929
	930	return (result);
	931	}
	932
	933	/*
	934	* vm_map_simplify_entry:
	935	*
	936	* Simplify the given map entry by merging with either neighbor. This
	937	* routine also has the ability to merge with both neighbors.
	938	*
	939	* The map must be locked.
	940	*
	941	* This routine guarentees that the passed entry remains valid (though
	942	* possibly extended). When merging, this routine may delete one or
	943	* both neighbors. No action is taken on entries which have their
	944	* in-transition flag set.
	945	*/
	946	void
	947	vm_map_simplify_entry(vm_map_t map, vm_map_entry_t entry, int *countp)
	948	{
	949	vm_map_entry_t next, prev;
	950	vm_size_t prevsize, esize;
	951
	952	if (entry->eflags & (MAP_ENTRY_IN_TRANSITION \| MAP_ENTRY_IS_SUB_MAP)) {
	953	++mycpu->gd_cnt.v_intrans_coll;
	954	return;
	955	}
	956
	957	prev = entry->prev;
	958	if (prev != &map->header) {
	959	prevsize = prev->end - prev->start;
	960	if ( (prev->end == entry->start) &&
	961	(prev->object.vm_object == entry->object.vm_object) &&
	962	(!prev->object.vm_object \|\|
	963	(prev->offset + prevsize == entry->offset)) &&
	964	(prev->eflags == entry->eflags) &&
	965	(prev->protection == entry->protection) &&
	966	(prev->max_protection == entry->max_protection) &&
	967	(prev->inheritance == entry->inheritance) &&
	968	(prev->wired_count == entry->wired_count)) {
	969	if (map->first_free == prev)
	970	map->first_free = entry;
	971	if (map->hint == prev)
	972	map->hint = entry;
	973	vm_map_entry_unlink(map, prev);
	974	entry->start = prev->start;
	975	entry->offset = prev->offset;
	976	if (prev->object.vm_object)
	977	vm_object_deallocate(prev->object.vm_object);
	978	vm_map_entry_dispose(map, prev, countp);
	979	}
	980	}
	981
	982	next = entry->next;
	983	if (next != &map->header) {
	984	esize = entry->end - entry->start;
	985	if ((entry->end == next->start) &&
	986	(next->object.vm_object == entry->object.vm_object) &&
	987	(!entry->object.vm_object \|\|
	988	(entry->offset + esize == next->offset)) &&
	989	(next->eflags == entry->eflags) &&
	990	(next->protection == entry->protection) &&
	991	(next->max_protection == entry->max_protection) &&
	992	(next->inheritance == entry->inheritance) &&
	993	(next->wired_count == entry->wired_count)) {
	994	if (map->first_free == next)
	995	map->first_free = entry;
	996	if (map->hint == next)
	997	map->hint = entry;
	998	vm_map_entry_unlink(map, next);
	999	entry->end = next->end;
	1000	if (next->object.vm_object)
	1001	vm_object_deallocate(next->object.vm_object);
	1002	vm_map_entry_dispose(map, next, countp);
	1003	}
	1004	}
	1005	}
	1006	/*
	1007	* vm_map_clip_start: [ internal use only ]
	1008	*
	1009	* Asserts that the given entry begins at or after
	1010	* the specified address; if necessary,
	1011	* it splits the entry into two.
	1012	*/
	1013	#define vm_map_clip_start(map, entry, startaddr, countp) \
	1014	{ \
	1015	if (startaddr > entry->start) \
	1016	_vm_map_clip_start(map, entry, startaddr, countp); \
	1017	}
	1018
	1019	/*
	1020	* This routine is called only when it is known that
	1021	* the entry must be split.
	1022	*/
	1023	static void
	1024	_vm_map_clip_start(vm_map_t map, vm_map_entry_t entry, vm_offset_t start, int *countp)
	1025	{
	1026	vm_map_entry_t new_entry;
	1027
	1028	/*
	1029	* Split off the front portion -- note that we must insert the new
	1030	* entry BEFORE this one, so that this entry has the specified
	1031	* starting address.
	1032	*/
	1033
	1034	vm_map_simplify_entry(map, entry, countp);
	1035
	1036	/*
	1037	* If there is no object backing this entry, we might as well create
	1038	* one now. If we defer it, an object can get created after the map
	1039	* is clipped, and individual objects will be created for the split-up
	1040	* map. This is a bit of a hack, but is also about the best place to
	1041	* put this improvement.
	1042	*/
	1043
	1044	if (entry->object.vm_object == NULL && !map->system_map) {
	1045	vm_object_t object;
	1046	object = vm_object_allocate(OBJT_DEFAULT,
	1047	atop(entry->end - entry->start));
	1048	entry->object.vm_object = object;
	1049	entry->offset = 0;
	1050	}
	1051
	1052	new_entry = vm_map_entry_create(map, countp);
	1053	new_entry = entry;
	1054
	1055	new_entry->end = start;
	1056	entry->offset += (start - entry->start);
	1057	entry->start = start;
	1058
	1059	vm_map_entry_link(map, entry->prev, new_entry);
	1060
	1061	if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
	1062	vm_object_reference(new_entry->object.vm_object);
	1063	}
	1064	}
	1065
	1066	/*
	1067	* vm_map_clip_end: [ internal use only ]
	1068	*
	1069	* Asserts that the given entry ends at or before
	1070	* the specified address; if necessary,
	1071	* it splits the entry into two.
	1072	*/
	1073
	1074	#define vm_map_clip_end(map, entry, endaddr, countp) \
	1075	{ \
	1076	if (endaddr < entry->end) \
	1077	_vm_map_clip_end(map, entry, endaddr, countp); \
	1078	}
	1079
	1080	/*
	1081	* This routine is called only when it is known that
	1082	* the entry must be split.
	1083	*/
	1084	static void
	1085	_vm_map_clip_end(vm_map_t map, vm_map_entry_t entry, vm_offset_t end, int *countp)
	1086	{
	1087	vm_map_entry_t new_entry;
	1088
	1089	/*
	1090	* If there is no object backing this entry, we might as well create
	1091	* one now. If we defer it, an object can get created after the map
	1092	* is clipped, and individual objects will be created for the split-up
	1093	* map. This is a bit of a hack, but is also about the best place to
	1094	* put this improvement.
	1095	*/
	1096
	1097	if (entry->object.vm_object == NULL && !map->system_map) {
	1098	vm_object_t object;
	1099	object = vm_object_allocate(OBJT_DEFAULT,
	1100	atop(entry->end - entry->start));
	1101	entry->object.vm_object = object;
	1102	entry->offset = 0;
	1103	}
	1104
	1105	/*
	1106	* Create a new entry and insert it AFTER the specified entry
	1107	*/
	1108
	1109	new_entry = vm_map_entry_create(map, countp);
	1110	new_entry = entry;
	1111
	1112	new_entry->start = entry->end = end;
	1113	new_entry->offset += (end - entry->start);
	1114
	1115	vm_map_entry_link(map, entry, new_entry);
	1116
	1117	if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
	1118	vm_object_reference(new_entry->object.vm_object);
	1119	}
	1120	}
	1121
	1122	/*
	1123	* VM_MAP_RANGE_CHECK: [ internal use only ]
	1124	*
	1125	* Asserts that the starting and ending region
	1126	* addresses fall within the valid range of the map.
	1127	*/
	1128	#define VM_MAP_RANGE_CHECK(map, start, end) \
	1129	{ \
	1130	if (start < vm_map_min(map)) \
	1131	start = vm_map_min(map); \
	1132	if (end > vm_map_max(map)) \
	1133	end = vm_map_max(map); \
	1134	if (start > end) \
	1135	start = end; \
	1136	}
	1137
	1138	/*
	1139	* vm_map_transition_wait: [ kernel use only ]
	1140	*
	1141	* Used to block when an in-transition collison occurs. The map
	1142	* is unlocked for the sleep and relocked before the return.
	1143	*/
	1144	static
	1145	void
	1146	vm_map_transition_wait(vm_map_t map)
	1147	{
	1148	vm_map_unlock(map);
	1149	tsleep(map, 0, "vment", 0);
	1150	vm_map_lock(map);
	1151	}
	1152
	1153	/*
	1154	* CLIP_CHECK_BACK
	1155	* CLIP_CHECK_FWD
	1156	*
	1157	* When we do blocking operations with the map lock held it is
	1158	* possible that a clip might have occured on our in-transit entry,
	1159	* requiring an adjustment to the entry in our loop. These macros
	1160	* help the pageable and clip_range code deal with the case. The
	1161	* conditional costs virtually nothing if no clipping has occured.
	1162	*/
	1163
	1164	#define CLIP_CHECK_BACK(entry, save_start) \
	1165	do { \
	1166	while (entry->start != save_start) { \
	1167	entry = entry->prev; \
	1168	KASSERT(entry != &map->header, ("bad entry clip")); \
	1169	} \
	1170	} while(0)
	1171
	1172	#define CLIP_CHECK_FWD(entry, save_end) \
	1173	do { \
	1174	while (entry->end != save_end) { \
	1175	entry = entry->next; \
	1176	KASSERT(entry != &map->header, ("bad entry clip")); \
	1177	} \
	1178	} while(0)
	1179
	1180
	1181	/*
	1182	* vm_map_clip_range: [ kernel use only ]
	1183	*
	1184	* Clip the specified range and return the base entry. The
	1185	* range may cover several entries starting at the returned base
	1186	* and the first and last entry in the covering sequence will be
	1187	* properly clipped to the requested start and end address.
	1188	*
	1189	* If no holes are allowed you should pass the MAP_CLIP_NO_HOLES
	1190	* flag.
	1191	*
	1192	* The MAP_ENTRY_IN_TRANSITION flag will be set for the entries
	1193	* covered by the requested range.
	1194	*
	1195	* The map must be exclusively locked on entry and will remain locked
	1196	* on return. If no range exists or the range contains holes and you
	1197	* specified that no holes were allowed, NULL will be returned. This
	1198	* routine may temporarily unlock the map in order avoid a deadlock when
	1199	* sleeping.
	1200	*/
	1201	static
	1202	vm_map_entry_t
	1203	vm_map_clip_range(vm_map_t map, vm_offset_t start, vm_offset_t end,
	1204	int *countp, int flags)
	1205	{
	1206	vm_map_entry_t start_entry;
	1207	vm_map_entry_t entry;
	1208
	1209	/*
	1210	* Locate the entry and effect initial clipping. The in-transition
	1211	* case does not occur very often so do not try to optimize it.
	1212	*/
	1213	again:
	1214	if (vm_map_lookup_entry(map, start, &start_entry) == FALSE)
	1215	return (NULL);
	1216	entry = start_entry;
	1217	if (entry->eflags & MAP_ENTRY_IN_TRANSITION) {
	1218	entry->eflags \|= MAP_ENTRY_NEEDS_WAKEUP;
	1219	++mycpu->gd_cnt.v_intrans_coll;
	1220	++mycpu->gd_cnt.v_intrans_wait;
	1221	vm_map_transition_wait(map);
	1222	/*
	1223	* entry and/or start_entry may have been clipped while
	1224	* we slept, or may have gone away entirely. We have
	1225	* to restart from the lookup.
	1226	*/
	1227	goto again;
	1228	}
	1229	/*
	1230	* Since we hold an exclusive map lock we do not have to restart
	1231	* after clipping, even though clipping may block in zalloc.
	1232	*/
	1233	vm_map_clip_start(map, entry, start, countp);
	1234	vm_map_clip_end(map, entry, end, countp);
	1235	entry->eflags \|= MAP_ENTRY_IN_TRANSITION;
	1236
	1237	/*
	1238	* Scan entries covered by the range. When working on the next
	1239	* entry a restart need only re-loop on the current entry which
	1240	* we have already locked, since 'next' may have changed. Also,
	1241	* even though entry is safe, it may have been clipped so we
	1242	* have to iterate forwards through the clip after sleeping.
	1243	*/
	1244	while (entry->next != &map->header && entry->next->start < end) {
	1245	vm_map_entry_t next = entry->next;
	1246
	1247	if (flags & MAP_CLIP_NO_HOLES) {
	1248	if (next->start > entry->end) {
	1249	vm_map_unclip_range(map, start_entry,
	1250	start, entry->end, countp, flags);
	1251	return(NULL);
	1252	}
	1253	}
	1254
	1255	if (next->eflags & MAP_ENTRY_IN_TRANSITION) {
	1256	vm_offset_t save_end = entry->end;
	1257	next->eflags \|= MAP_ENTRY_NEEDS_WAKEUP;
	1258	++mycpu->gd_cnt.v_intrans_coll;
	1259	++mycpu->gd_cnt.v_intrans_wait;
	1260	vm_map_transition_wait(map);
	1261
	1262	/*
	1263	* clips might have occured while we blocked.
	1264	*/
	1265	CLIP_CHECK_FWD(entry, save_end);
	1266	CLIP_CHECK_BACK(start_entry, start);
	1267	continue;
	1268	}
	1269	/*
	1270	* No restart necessary even though clip_end may block, we
	1271	* are holding the map lock.
	1272	*/
	1273	vm_map_clip_end(map, next, end, countp);
	1274	next->eflags \|= MAP_ENTRY_IN_TRANSITION;
	1275	entry = next;
	1276	}
	1277	if (flags & MAP_CLIP_NO_HOLES) {
	1278	if (entry->end != end) {
	1279	vm_map_unclip_range(map, start_entry,
	1280	start, entry->end, countp, flags);
	1281	return(NULL);
	1282	}
	1283	}
	1284	return(start_entry);
	1285	}
	1286
	1287	/*
	1288	* vm_map_unclip_range: [ kernel use only ]
	1289	*
	1290	* Undo the effect of vm_map_clip_range(). You should pass the same
	1291	* flags and the same range that you passed to vm_map_clip_range().
	1292	* This code will clear the in-transition flag on the entries and
	1293	* wake up anyone waiting. This code will also simplify the sequence
	1294	* and attempt to merge it with entries before and after the sequence.
	1295	*
	1296	* The map must be locked on entry and will remain locked on return.
	1297	*
	1298	* Note that you should also pass the start_entry returned by
	1299	* vm_map_clip_range(). However, if you block between the two calls
	1300	* with the map unlocked please be aware that the start_entry may
	1301	* have been clipped and you may need to scan it backwards to find
	1302	* the entry corresponding with the original start address. You are
	1303	* responsible for this, vm_map_unclip_range() expects the correct
	1304	* start_entry to be passed to it and will KASSERT otherwise.
	1305	*/
	1306	static
	1307	void
	1308	vm_map_unclip_range(
	1309	vm_map_t map,
	1310	vm_map_entry_t start_entry,
	1311	vm_offset_t start,
	1312	vm_offset_t end,
	1313	int *countp,
	1314	int flags)
	1315	{
	1316	vm_map_entry_t entry;
	1317
	1318	entry = start_entry;
	1319
	1320	KASSERT(entry->start == start, ("unclip_range: illegal base entry"));
	1321	while (entry != &map->header && entry->start < end) {
	1322	KASSERT(entry->eflags & MAP_ENTRY_IN_TRANSITION, ("in-transition flag not set during unclip on: %p", entry));
	1323	KASSERT(entry->end <= end, ("unclip_range: tail wasn't clipped"));
	1324	entry->eflags &= ~MAP_ENTRY_IN_TRANSITION;
	1325	if (entry->eflags & MAP_ENTRY_NEEDS_WAKEUP) {
	1326	entry->eflags &= ~MAP_ENTRY_NEEDS_WAKEUP;
	1327	wakeup(map);
	1328	}
	1329	entry = entry->next;
	1330	}
	1331
	1332	/*
	1333	* Simplification does not block so there is no restart case.
	1334	*/
	1335	entry = start_entry;
	1336	while (entry != &map->header && entry->start < end) {
	1337	vm_map_simplify_entry(map, entry, countp);
	1338	entry = entry->next;
	1339	}
	1340	}
	1341
	1342	/*
	1343	* vm_map_submap: [ kernel use only ]
	1344	*
	1345	* Mark the given range as handled by a subordinate map.
	1346	*
	1347	* This range must have been created with vm_map_find,
	1348	* and no other operations may have been performed on this
	1349	* range prior to calling vm_map_submap.
	1350	*
	1351	* Only a limited number of operations can be performed
	1352	* within this rage after calling vm_map_submap:
	1353	* vm_fault
	1354	* [Don't try vm_map_copy!]
	1355	*
	1356	* To remove a submapping, one must first remove the
	1357	* range from the superior map, and then destroy the
	1358	* submap (if desired). [Better yet, don't try it.]
	1359	*/
	1360	int
	1361	vm_map_submap(vm_map_t map, vm_offset_t start, vm_offset_t end, vm_map_t submap)
	1362	{
	1363	vm_map_entry_t entry;
	1364	int result = KERN_INVALID_ARGUMENT;
	1365	int count;
	1366
	1367	count = vm_map_entry_reserve(MAP_RESERVE_COUNT);
	1368	vm_map_lock(map);
	1369
	1370	VM_MAP_RANGE_CHECK(map, start, end);
	1371
	1372	if (vm_map_lookup_entry(map, start, &entry)) {
	1373	vm_map_clip_start(map, entry, start, &count);
	1374	} else {
	1375	entry = entry->next;
	1376	}
	1377
	1378	vm_map_clip_end(map, entry, end, &count);
	1379
	1380	if ((entry->start == start) && (entry->end == end) &&
	1381	((entry->eflags & MAP_ENTRY_COW) == 0) &&
	1382	(entry->object.vm_object == NULL)) {
	1383	entry->object.sub_map = submap;
	1384	entry->eflags \|= MAP_ENTRY_IS_SUB_MAP;
	1385	result = KERN_SUCCESS;
	1386	}
	1387	vm_map_unlock(map);
	1388	vm_map_entry_release(count);
	1389
	1390	return (result);
	1391	}
	1392
	1393	/*
	1394	* vm_map_protect:
	1395	*
	1396	* Sets the protection of the specified address
	1397	* region in the target map. If "set_max" is
	1398	* specified, the maximum protection is to be set;
	1399	* otherwise, only the current protection is affected.
	1400	*/
	1401	int
	1402	vm_map_protect(vm_map_t map, vm_offset_t start, vm_offset_t end,
	1403	vm_prot_t new_prot, boolean_t set_max)
	1404	{
	1405	vm_map_entry_t current;
	1406	vm_map_entry_t entry;
	1407	int count;
	1408
	1409	count = vm_map_entry_reserve(MAP_RESERVE_COUNT);
	1410	vm_map_lock(map);
	1411
	1412	VM_MAP_RANGE_CHECK(map, start, end);
	1413
	1414	if (vm_map_lookup_entry(map, start, &entry)) {
	1415	vm_map_clip_start(map, entry, start, &count);
	1416	} else {
	1417	entry = entry->next;
	1418	}
	1419
	1420	/*
	1421	* Make a first pass to check for protection violations.
	1422	*/
	1423
	1424	current = entry;
	1425	while ((current != &map->header) && (current->start < end)) {
	1426	if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
	1427	vm_map_unlock(map);
	1428	vm_map_entry_release(count);
	1429	return (KERN_INVALID_ARGUMENT);
	1430	}
	1431	if ((new_prot & current->max_protection) != new_prot) {
	1432	vm_map_unlock(map);
	1433	vm_map_entry_release(count);
	1434	return (KERN_PROTECTION_FAILURE);
	1435	}
	1436	current = current->next;
	1437	}
	1438
	1439	/*
	1440	* Go back and fix up protections. [Note that clipping is not
	1441	* necessary the second time.]
	1442	*/
	1443	current = entry;
	1444
	1445	while ((current != &map->header) && (current->start < end)) {
	1446	vm_prot_t old_prot;
	1447
	1448	vm_map_clip_end(map, current, end, &count);
	1449
	1450	old_prot = current->protection;
	1451	if (set_max)
	1452	current->protection =
	1453	(current->max_protection = new_prot) &
	1454	old_prot;
	1455	else
	1456	current->protection = new_prot;
	1457
	1458	/*
	1459	* Update physical map if necessary. Worry about copy-on-write
	1460	* here -- CHECK THIS XXX
	1461	*/
	1462
	1463	if (current->protection != old_prot) {
	1464	#define MASK(entry) (((entry)->eflags & MAP_ENTRY_COW) ? ~VM_PROT_WRITE : \
	1465	VM_PROT_ALL)
	1466
	1467	pmap_protect(map->pmap, current->start,
	1468	current->end,
	1469	current->protection & MASK(current));
	1470	#undef MASK
	1471	}
	1472
	1473	vm_map_simplify_entry(map, current, &count);
	1474
	1475	current = current->next;
	1476	}
	1477
	1478	vm_map_unlock(map);
	1479	vm_map_entry_release(count);
	1480	return (KERN_SUCCESS);
	1481	}
	1482
	1483	/*
	1484	* vm_map_madvise:
	1485	*
	1486	* This routine traverses a processes map handling the madvise
	1487	* system call. Advisories are classified as either those effecting
	1488	* the vm_map_entry structure, or those effecting the underlying
	1489	* objects.
	1490	*/
	1491
	1492	int
	1493	vm_map_madvise(vm_map_t map, vm_offset_t start, vm_offset_t end, int behav)
	1494	{
	1495	vm_map_entry_t current, entry;
	1496	int modify_map = 0;
	1497	int count;
	1498
	1499	/*
	1500	* Some madvise calls directly modify the vm_map_entry, in which case
	1501	* we need to use an exclusive lock on the map and we need to perform
	1502	* various clipping operations. Otherwise we only need a read-lock
	1503	* on the map.
	1504	*/
	1505
	1506	count = vm_map_entry_reserve(MAP_RESERVE_COUNT);
	1507
	1508	switch(behav) {
	1509	case MADV_NORMAL:
	1510	case MADV_SEQUENTIAL:
	1511	case MADV_RANDOM:
	1512	case MADV_NOSYNC:
	1513	case MADV_AUTOSYNC:
	1514	case MADV_NOCORE:
	1515	case MADV_CORE:
	1516	modify_map = 1;
	1517	vm_map_lock(map);
	1518	break;
	1519	case MADV_WILLNEED:
	1520	case MADV_DONTNEED:
	1521	case MADV_FREE:
	1522	vm_map_lock_read(map);
	1523	break;
	1524	default:
	1525	vm_map_entry_release(count);
	1526	return (KERN_INVALID_ARGUMENT);
	1527	}
	1528
	1529	/*
	1530	* Locate starting entry and clip if necessary.
	1531	*/
	1532
	1533	VM_MAP_RANGE_CHECK(map, start, end);
	1534
	1535	if (vm_map_lookup_entry(map, start, &entry)) {
	1536	if (modify_map)
	1537	vm_map_clip_start(map, entry, start, &count);
	1538	} else {
	1539	entry = entry->next;
	1540	}
	1541
	1542	if (modify_map) {
	1543	/*
	1544	* madvise behaviors that are implemented in the vm_map_entry.
	1545	*
	1546	* We clip the vm_map_entry so that behavioral changes are
	1547	* limited to the specified address range.
	1548	*/
	1549	for (current = entry;
	1550	(current != &map->header) && (current->start < end);
	1551	current = current->next
	1552	) {
	1553	if (current->eflags & MAP_ENTRY_IS_SUB_MAP)
	1554	continue;
	1555
	1556	vm_map_clip_end(map, current, end, &count);
	1557
	1558	switch (behav) {
	1559	case MADV_NORMAL:
	1560	vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_NORMAL);
	1561	break;
	1562	case MADV_SEQUENTIAL:
	1563	vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_SEQUENTIAL);
	1564	break;
	1565	case MADV_RANDOM:
	1566	vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_RANDOM);
	1567	break;
	1568	case MADV_NOSYNC:
	1569	current->eflags \|= MAP_ENTRY_NOSYNC;
	1570	break;
	1571	case MADV_AUTOSYNC:
	1572	current->eflags &= ~MAP_ENTRY_NOSYNC;
	1573	break;
	1574	case MADV_NOCORE:
	1575	current->eflags \|= MAP_ENTRY_NOCOREDUMP;
	1576	break;
	1577	case MADV_CORE:
	1578	current->eflags &= ~MAP_ENTRY_NOCOREDUMP;
	1579	break;
	1580	default:
	1581	break;
	1582	}
	1583	vm_map_simplify_entry(map, current, &count);
	1584	}
	1585	vm_map_unlock(map);
	1586	} else {
	1587	vm_pindex_t pindex;
	1588	int count;
	1589
	1590	/*
	1591	* madvise behaviors that are implemented in the underlying
	1592	* vm_object.
	1593	*
	1594	* Since we don't clip the vm_map_entry, we have to clip
	1595	* the vm_object pindex and count.
	1596	*/
	1597	for (current = entry;
	1598	(current != &map->header) && (current->start < end);
	1599	current = current->next
	1600	) {
	1601	vm_offset_t useStart;
	1602
	1603	if (current->eflags & MAP_ENTRY_IS_SUB_MAP)
	1604	continue;
	1605
	1606	pindex = OFF_TO_IDX(current->offset);
	1607	count = atop(current->end - current->start);
	1608	useStart = current->start;
	1609
	1610	if (current->start < start) {
	1611	pindex += atop(start - current->start);
	1612	count -= atop(start - current->start);
	1613	useStart = start;
	1614	}
	1615	if (current->end > end)
	1616	count -= atop(current->end - end);
	1617
	1618	if (count <= 0)
	1619	continue;
	1620
	1621	vm_object_madvise(current->object.vm_object,
	1622	pindex, count, behav);
	1623	if (behav == MADV_WILLNEED) {
	1624	pmap_object_init_pt(
	1625	map->pmap,
	1626	useStart,
	1627	current->protection,
	1628	current->object.vm_object,
	1629	pindex,
	1630	(count << PAGE_SHIFT),
	1631	MAP_PREFAULT_MADVISE
	1632	);
	1633	}
	1634	}
	1635	vm_map_unlock_read(map);
	1636	}
	1637	vm_map_entry_release(count);
	1638	return(0);
	1639	}
	1640
	1641
	1642	/*
	1643	* vm_map_inherit:
	1644	*
	1645	* Sets the inheritance of the specified address
	1646	* range in the target map. Inheritance
	1647	* affects how the map will be shared with
	1648	* child maps at the time of vm_map_fork.
	1649	*/
	1650	int
	1651	vm_map_inherit(vm_map_t map, vm_offset_t start, vm_offset_t end,
	1652	vm_inherit_t new_inheritance)
	1653	{
	1654	vm_map_entry_t entry;
	1655	vm_map_entry_t temp_entry;
	1656	int count;
	1657
	1658	switch (new_inheritance) {
	1659	case VM_INHERIT_NONE:
	1660	case VM_INHERIT_COPY:
	1661	case VM_INHERIT_SHARE:
	1662	break;
	1663	default:
	1664	return (KERN_INVALID_ARGUMENT);
	1665	}
	1666
	1667	count = vm_map_entry_reserve(MAP_RESERVE_COUNT);
	1668	vm_map_lock(map);
	1669
	1670	VM_MAP_RANGE_CHECK(map, start, end);
	1671
	1672	if (vm_map_lookup_entry(map, start, &temp_entry)) {
	1673	entry = temp_entry;
	1674	vm_map_clip_start(map, entry, start, &count);
	1675	} else
	1676	entry = temp_entry->next;
	1677
	1678	while ((entry != &map->header) && (entry->start < end)) {
	1679	vm_map_clip_end(map, entry, end, &count);
	1680
	1681	entry->inheritance = new_inheritance;
	1682
	1683	vm_map_simplify_entry(map, entry, &count);
	1684
	1685	entry = entry->next;
	1686	}
	1687	vm_map_unlock(map);
	1688	vm_map_entry_release(count);
	1689	return (KERN_SUCCESS);
	1690	}
	1691
	1692	/*
	1693	* Implement the semantics of mlock
	1694	*/
	1695	int
	1696	vm_map_unwire(vm_map_t map, vm_offset_t start, vm_offset_t real_end,
	1697	boolean_t new_pageable)
	1698	{
	1699	vm_map_entry_t entry;
	1700	vm_map_entry_t start_entry;
	1701	vm_offset_t end;
	1702	int rv = KERN_SUCCESS;
	1703	int count;
	1704
	1705	count = vm_map_entry_reserve(MAP_RESERVE_COUNT);
	1706	vm_map_lock(map);
	1707	VM_MAP_RANGE_CHECK(map, start, real_end);
	1708	end = real_end;
	1709
	1710	start_entry = vm_map_clip_range(map, start, end, &count, MAP_CLIP_NO_HOLES);
	1711	if (start_entry == NULL) {
	1712	vm_map_unlock(map);
	1713	vm_map_entry_release(count);
	1714	return (KERN_INVALID_ADDRESS);
	1715	}
	1716
	1717	if (new_pageable == 0) {
	1718	entry = start_entry;
	1719	while ((entry != &map->header) && (entry->start < end)) {
	1720	vm_offset_t save_start;
	1721	vm_offset_t save_end;
	1722
	1723	/*
	1724	* Already user wired or hard wired (trivial cases)
	1725	*/
	1726	if (entry->eflags & MAP_ENTRY_USER_WIRED) {
	1727	entry = entry->next;
	1728	continue;
	1729	}
	1730	if (entry->wired_count != 0) {
	1731	entry->wired_count++;
	1732	entry->eflags \|= MAP_ENTRY_USER_WIRED;
	1733	entry = entry->next;
	1734	continue;
	1735	}
	1736
	1737	/*
	1738	* A new wiring requires instantiation of appropriate
	1739	* management structures and the faulting in of the
	1740	* page.
	1741	*/
	1742	if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
	1743	int copyflag = entry->eflags & MAP_ENTRY_NEEDS_COPY;
	1744	if (copyflag && ((entry->protection & VM_PROT_WRITE) != 0)) {
	1745
	1746	vm_object_shadow(&entry->object.vm_object,
	1747	&entry->offset,
	1748	atop(entry->end - entry->start));
	1749	entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
	1750
	1751	} else if (entry->object.vm_object == NULL &&
	1752	!map->system_map) {
	1753
	1754	entry->object.vm_object =
	1755	vm_object_allocate(OBJT_DEFAULT,
	1756	atop(entry->end - entry->start));
	1757	entry->offset = (vm_offset_t) 0;
	1758
	1759	}
	1760	}
	1761	entry->wired_count++;
	1762	entry->eflags \|= MAP_ENTRY_USER_WIRED;
	1763
	1764	/*
	1765	* Now fault in the area. Note that vm_fault_wire()
	1766	* may release the map lock temporarily, it will be
	1767	* relocked on return. The in-transition
	1768	* flag protects the entries.
	1769	*/
	1770	save_start = entry->start;
	1771	save_end = entry->end;
	1772	rv = vm_fault_wire(map, entry, TRUE);
	1773	if (rv) {
	1774	CLIP_CHECK_BACK(entry, save_start);
	1775	for (;;) {
	1776	KASSERT(entry->wired_count == 1, ("bad wired_count on entry"));
	1777	entry->eflags &= ~MAP_ENTRY_USER_WIRED;
	1778	entry->wired_count = 0;
	1779	if (entry->end == save_end)
	1780	break;
	1781	entry = entry->next;
	1782	KASSERT(entry != &map->header, ("bad entry clip during backout"));
	1783	}
	1784	end = save_start; /* unwire the rest */
	1785	break;
	1786	}
	1787	/*
	1788	* note that even though the entry might have been
	1789	* clipped, the USER_WIRED flag we set prevents
	1790	* duplication so we do not have to do a
	1791	* clip check.
	1792	*/
	1793	entry = entry->next;
	1794	}
	1795
	1796	/*
	1797	* If we failed fall through to the unwiring section to
	1798	* unwire what we had wired so far. 'end' has already
	1799	* been adjusted.
	1800	*/
	1801	if (rv)
	1802	new_pageable = 1;
	1803
	1804	/*
	1805	* start_entry might have been clipped if we unlocked the
	1806	* map and blocked. No matter how clipped it has gotten
	1807	* there should be a fragment that is on our start boundary.
	1808	*/
	1809	CLIP_CHECK_BACK(start_entry, start);
	1810	}
	1811
	1812	/*
	1813	* Deal with the unwiring case.
	1814	*/
	1815	if (new_pageable) {
	1816	/*
	1817	* This is the unwiring case. We must first ensure that the
	1818	* range to be unwired is really wired down. We know there
	1819	* are no holes.
	1820	*/
	1821	entry = start_entry;
	1822	while ((entry != &map->header) && (entry->start < end)) {
	1823	if ((entry->eflags & MAP_ENTRY_USER_WIRED) == 0) {
	1824	rv = KERN_INVALID_ARGUMENT;
	1825	goto done;
	1826	}
	1827	KASSERT(entry->wired_count != 0, ("wired count was 0 with USER_WIRED set! %p", entry));
	1828	entry = entry->next;
	1829	}
	1830
	1831	/*
	1832	* Now decrement the wiring count for each region. If a region
	1833	* becomes completely unwired, unwire its physical pages and
	1834	* mappings.
	1835	*/
	1836	/*
	1837	* The map entries are processed in a loop, checking to
	1838	* make sure the entry is wired and asserting it has a wired
	1839	* count. However, another loop was inserted more-or-less in
	1840	* the middle of the unwiring path. This loop picks up the
	1841	* "entry" loop variable from the first loop without first
	1842	* setting it to start_entry. Naturally, the secound loop
	1843	* is never entered and the pages backing the entries are
	1844	* never unwired. This can lead to a leak of wired pages.
	1845	*/
	1846	entry = start_entry;
	1847	while ((entry != &map->header) && (entry->start < end)) {
	1848	KASSERT(entry->eflags & MAP_ENTRY_USER_WIRED,
	1849	("expected USER_WIRED on entry %p", entry));
	1850	entry->eflags &= ~MAP_ENTRY_USER_WIRED;
	1851	entry->wired_count--;
	1852	if (entry->wired_count == 0)
	1853	vm_fault_unwire(map, entry);
	1854	entry = entry->next;
	1855	}
	1856	}
	1857	done:
	1858	vm_map_unclip_range(map, start_entry, start, real_end, &count,
	1859	MAP_CLIP_NO_HOLES);
	1860	map->timestamp++;
	1861	vm_map_unlock(map);
	1862	vm_map_entry_release(count);
	1863	return (rv);
	1864	}
	1865
	1866	/*
	1867	* vm_map_wire:
	1868	*
	1869	* Sets the pageability of the specified address
	1870	* range in the target map. Regions specified
	1871	* as not pageable require locked-down physical
	1872	* memory and physical page maps.
	1873	*
	1874	* The map must not be locked, but a reference
	1875	* must remain to the map throughout the call.
	1876	*
	1877	* This function may be called via the zalloc path and must properly
	1878	* reserve map entries for kernel_map.
	1879	*/
	1880	int
	1881	vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t real_end, int kmflags)
	1882	{
	1883	vm_map_entry_t entry;
	1884	vm_map_entry_t start_entry;
	1885	vm_offset_t end;
	1886	int rv = KERN_SUCCESS;
	1887	int count;
	1888
	1889	if (kmflags & KM_KRESERVE)
	1890	count = vm_map_entry_kreserve(MAP_RESERVE_COUNT);
	1891	else
	1892	count = vm_map_entry_reserve(MAP_RESERVE_COUNT);
	1893	vm_map_lock(map);
	1894	VM_MAP_RANGE_CHECK(map, start, real_end);
	1895	end = real_end;
	1896
	1897	start_entry = vm_map_clip_range(map, start, end, &count, MAP_CLIP_NO_HOLES);
	1898	if (start_entry == NULL) {
	1899	vm_map_unlock(map);
	1900	rv = KERN_INVALID_ADDRESS;
	1901	goto failure;
	1902	}
	1903	if ((kmflags & KM_PAGEABLE) == 0) {
	1904	/*
	1905	* Wiring.
	1906	*
	1907	* 1. Holding the write lock, we create any shadow or zero-fill
	1908	* objects that need to be created. Then we clip each map
	1909	* entry to the region to be wired and increment its wiring
	1910	* count. We create objects before clipping the map entries
	1911	* to avoid object proliferation.
	1912	*
	1913	* 2. We downgrade to a read lock, and call vm_fault_wire to
	1914	* fault in the pages for any newly wired area (wired_count is
	1915	* 1).
	1916	*
	1917	* Downgrading to a read lock for vm_fault_wire avoids a
	1918	* possible deadlock with another process that may have faulted
	1919	* on one of the pages to be wired (it would mark the page busy,
	1920	* blocking us, then in turn block on the map lock that we
	1921	* hold). Because of problems in the recursive lock package,
	1922	* we cannot upgrade to a write lock in vm_map_lookup. Thus,
	1923	* any actions that require the write lock must be done
	1924	* beforehand. Because we keep the read lock on the map, the
	1925	* copy-on-write status of the entries we modify here cannot
	1926	* change.
	1927	*/
	1928
	1929	entry = start_entry;
	1930	while ((entry != &map->header) && (entry->start < end)) {
	1931	/*
	1932	* Trivial case if the entry is already wired
	1933	*/
	1934	if (entry->wired_count) {
	1935	entry->wired_count++;
	1936	entry = entry->next;
	1937	continue;
	1938	}
	1939
	1940	/*
	1941	* The entry is being newly wired, we have to setup
	1942	* appropriate management structures. A shadow
	1943	* object is required for a copy-on-write region,
	1944	* or a normal object for a zero-fill region. We
	1945	* do not have to do this for entries that point to sub
	1946	* maps because we won't hold the lock on the sub map.
	1947	*/
	1948	if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
	1949	int copyflag = entry->eflags & MAP_ENTRY_NEEDS_COPY;
	1950	if (copyflag &&
	1951	((entry->protection & VM_PROT_WRITE) != 0)) {
	1952
	1953	vm_object_shadow(&entry->object.vm_object,
	1954	&entry->offset,
	1955	atop(entry->end - entry->start));
	1956	entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
	1957	} else if (entry->object.vm_object == NULL &&
	1958	!map->system_map) {
	1959	entry->object.vm_object =
	1960	vm_object_allocate(OBJT_DEFAULT,
	1961	atop(entry->end - entry->start));
	1962	entry->offset = (vm_offset_t) 0;
	1963	}
	1964	}
	1965
	1966	entry->wired_count++;
	1967	entry = entry->next;
	1968	}
	1969
	1970	/*
	1971	* Pass 2.
	1972	*/
	1973
	1974	/*
	1975	* HACK HACK HACK HACK
	1976	*
	1977	* Unlock the map to avoid deadlocks. The in-transit flag
	1978	* protects us from most changes but note that
	1979	* clipping may still occur. To prevent clipping from
	1980	* occuring after the unlock, except for when we are
	1981	* blocking in vm_fault_wire, we must run in a critical
	1982	* section, otherwise our accesses to entry->start and
	1983	* entry->end could be corrupted. We have to enter the
	1984	* critical section prior to unlocking so start_entry does
	1985	* not change out from under us at the very beginning of the
	1986	* loop.
	1987	*
	1988	* HACK HACK HACK HACK
	1989	*/
	1990
	1991	crit_enter();
	1992
	1993	entry = start_entry;
	1994	while (entry != &map->header && entry->start < end) {
	1995	/*
	1996	* If vm_fault_wire fails for any page we need to undo
	1997	* what has been done. We decrement the wiring count
	1998	* for those pages which have not yet been wired (now)
	1999	* and unwire those that have (later).
	2000	*/
	2001	vm_offset_t save_start = entry->start;
	2002	vm_offset_t save_end = entry->end;
	2003
	2004	if (entry->wired_count == 1)
	2005	rv = vm_fault_wire(map, entry, FALSE);
	2006	if (rv) {
	2007	CLIP_CHECK_BACK(entry, save_start);
	2008	for (;;) {
	2009	KASSERT(entry->wired_count == 1, ("wired_count changed unexpectedly"));
	2010	entry->wired_count = 0;
	2011	if (entry->end == save_end)
	2012	break;
	2013	entry = entry->next;
	2014	KASSERT(entry != &map->header, ("bad entry clip during backout"));
	2015	}
	2016	end = save_start;
	2017	break;
	2018	}
	2019	CLIP_CHECK_FWD(entry, save_end);
	2020	entry = entry->next;
	2021	}
	2022	crit_exit();
	2023
	2024	/*
	2025	* If a failure occured undo everything by falling through
	2026	* to the unwiring code. 'end' has already been adjusted
	2027	* appropriately.
	2028	*/
	2029	if (rv)
	2030	kmflags \|= KM_PAGEABLE;
	2031
	2032	/*
	2033	* start_entry is still IN_TRANSITION but may have been
	2034	* clipped since vm_fault_wire() unlocks and relocks the
	2035	* map. No matter how clipped it has gotten there should
	2036	* be a fragment that is on our start boundary.
	2037	*/
	2038	CLIP_CHECK_BACK(start_entry, start);
	2039	}
	2040
	2041	if (kmflags & KM_PAGEABLE) {
	2042	/*
	2043	* This is the unwiring case. We must first ensure that the
	2044	* range to be unwired is really wired down. We know there
	2045	* are no holes.
	2046	*/
	2047	entry = start_entry;
	2048	while ((entry != &map->header) && (entry->start < end)) {
	2049	if (entry->wired_count == 0) {
	2050	rv = KERN_INVALID_ARGUMENT;
	2051	goto done;
	2052	}
	2053	entry = entry->next;
	2054	}
	2055
	2056	/*
	2057	* Now decrement the wiring count for each region. If a region
	2058	* becomes completely unwired, unwire its physical pages and
	2059	* mappings.
	2060	*/
	2061	entry = start_entry;
	2062	while ((entry != &map->header) && (entry->start < end)) {
	2063	entry->wired_count--;
	2064	if (entry->wired_count == 0)
	2065	vm_fault_unwire(map, entry);
	2066	entry = entry->next;
	2067	}
	2068	}
	2069	done:
	2070	vm_map_unclip_range(map, start_entry, start, real_end, &count,
	2071	MAP_CLIP_NO_HOLES);
	2072	map->timestamp++;
	2073	vm_map_unlock(map);
	2074	failure:
	2075	if (kmflags & KM_KRESERVE)
	2076	vm_map_entry_krelease(count);
	2077	else
	2078	vm_map_entry_release(count);
	2079	return (rv);
	2080	}
	2081
	2082	/*
	2083	* vm_map_set_wired_quick()
	2084	*
	2085	* Mark a newly allocated address range as wired but do not fault in
	2086	* the pages. The caller is expected to load the pages into the object.
	2087	*
	2088	* The map must be locked on entry and will remain locked on return.
	2089	*/
	2090	void
	2091	vm_map_set_wired_quick(vm_map_t map, vm_offset_t addr, vm_size_t size, int *countp)
	2092	{
	2093	vm_map_entry_t scan;
	2094	vm_map_entry_t entry;
	2095
	2096	entry = vm_map_clip_range(map, addr, addr + size, countp, MAP_CLIP_NO_HOLES);
	2097	for (scan = entry; scan != &map->header && scan->start < addr + size; scan = scan->next) {
	2098	KKASSERT(entry->wired_count == 0);
	2099	entry->wired_count = 1;
	2100	}
	2101	vm_map_unclip_range(map, entry, addr, addr + size, countp, MAP_CLIP_NO_HOLES);
	2102	}
	2103
	2104	/*
	2105	* vm_map_clean
	2106	*
	2107	* Push any dirty cached pages in the address range to their pager.
	2108	* If syncio is TRUE, dirty pages are written synchronously.
	2109	* If invalidate is TRUE, any cached pages are freed as well.
	2110	*
	2111	* Returns an error if any part of the specified range is not mapped.
	2112	*/
	2113	int
	2114	vm_map_clean(vm_map_t map, vm_offset_t start, vm_offset_t end, boolean_t syncio,
	2115	boolean_t invalidate)
	2116	{
	2117	vm_map_entry_t current;
	2118	vm_map_entry_t entry;
	2119	vm_size_t size;
	2120	vm_object_t object;
	2121	vm_ooffset_t offset;
	2122
	2123	vm_map_lock_read(map);
	2124	VM_MAP_RANGE_CHECK(map, start, end);
	2125	if (!vm_map_lookup_entry(map, start, &entry)) {
	2126	vm_map_unlock_read(map);
	2127	return (KERN_INVALID_ADDRESS);
	2128	}
	2129	/*
	2130	* Make a first pass to check for holes.
	2131	*/
	2132	for (current = entry; current->start < end; current = current->next) {
	2133	if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
	2134	vm_map_unlock_read(map);
	2135	return (KERN_INVALID_ARGUMENT);
	2136	}
	2137	if (end > current->end &&
	2138	(current->next == &map->header \|\|
	2139	current->end != current->next->start)) {
	2140	vm_map_unlock_read(map);
	2141	return (KERN_INVALID_ADDRESS);
	2142	}
	2143	}
	2144
	2145	if (invalidate)
	2146	pmap_remove(vm_map_pmap(map), start, end);
	2147	/*
	2148	* Make a second pass, cleaning/uncaching pages from the indicated
	2149	* objects as we go.
	2150	*/
	2151	for (current = entry; current->start < end; current = current->next) {
	2152	offset = current->offset + (start - current->start);
	2153	size = (end <= current->end ? end : current->end) - start;
	2154	if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
	2155	vm_map_t smap;
	2156	vm_map_entry_t tentry;
	2157	vm_size_t tsize;
	2158
	2159	smap = current->object.sub_map;
	2160	vm_map_lock_read(smap);
	2161	(void) vm_map_lookup_entry(smap, offset, &tentry);
	2162	tsize = tentry->end - offset;
	2163	if (tsize < size)
	2164	size = tsize;
	2165	object = tentry->object.vm_object;
	2166	offset = tentry->offset + (offset - tentry->start);
	2167	vm_map_unlock_read(smap);
	2168	} else {
	2169	object = current->object.vm_object;
	2170	}
	2171	/*
	2172	* Note that there is absolutely no sense in writing out
	2173	* anonymous objects, so we track down the vnode object
	2174	* to write out.
	2175	* We invalidate (remove) all pages from the address space
	2176	* anyway, for semantic correctness.
	2177	*
	2178	* note: certain anonymous maps, such as MAP_NOSYNC maps,
	2179	* may start out with a NULL object.
	2180	*/
	2181	while (object && object->backing_object) {
	2182	offset += object->backing_object_offset;
	2183	object = object->backing_object;
	2184	if (object->size < OFF_TO_IDX( offset + size))
	2185	size = IDX_TO_OFF(object->size) - offset;
	2186	}
	2187	if (object && (object->type == OBJT_VNODE) &&
	2188	(current->protection & VM_PROT_WRITE)) {
	2189	/*
	2190	* Flush pages if writing is allowed, invalidate them
	2191	* if invalidation requested. Pages undergoing I/O
	2192	* will be ignored by vm_object_page_remove().
	2193	*
	2194	* We cannot lock the vnode and then wait for paging
	2195	* to complete without deadlocking against vm_fault.
	2196	* Instead we simply call vm_object_page_remove() and
	2197	* allow it to block internally on a page-by-page
	2198	* basis when it encounters pages undergoing async
	2199	* I/O.
	2200	*/
	2201	int flags;
	2202
	2203	vm_object_reference(object);
	2204	vn_lock(object->handle,
	2205	LK_EXCLUSIVE \| LK_RETRY, curthread);
	2206	flags = (syncio \|\| invalidate) ? OBJPC_SYNC : 0;
	2207	flags \|= invalidate ? OBJPC_INVAL : 0;
	2208	vm_object_page_clean(object,
	2209	OFF_TO_IDX(offset),
	2210	OFF_TO_IDX(offset + size + PAGE_MASK),
	2211	flags);
	2212	VOP_UNLOCK(((struct vnode *)object->handle),
	2213	0, curthread);
	2214	vm_object_deallocate(object);
	2215	}
	2216	if (object && invalidate &&
	2217	((object->type == OBJT_VNODE) \|\|
	2218	(object->type == OBJT_DEVICE))) {
	2219	int clean_only =
	2220	(object->type == OBJT_DEVICE) ? FALSE : TRUE;
	2221	vm_object_reference(object);
	2222	vm_object_page_remove(object,
	2223	OFF_TO_IDX(offset),
	2224	OFF_TO_IDX(offset + size + PAGE_MASK),
	2225	clean_only);
	2226	vm_object_deallocate(object);
	2227	}
	2228	start += size;
	2229	}
	2230
	2231	vm_map_unlock_read(map);
	2232	return (KERN_SUCCESS);
	2233	}
	2234
	2235	/*
	2236	* vm_map_entry_unwire: [ internal use only ]
	2237	*
	2238	* Make the region specified by this entry pageable.
	2239	*
	2240	* The map in question should be locked.
	2241	* [This is the reason for this routine's existence.]
	2242	*/
	2243	static void
	2244	vm_map_entry_unwire(vm_map_t map, vm_map_entry_t entry)
	2245	{
	2246	entry->eflags &= ~MAP_ENTRY_USER_WIRED;
	2247	entry->wired_count = 0;
	2248	vm_fault_unwire(map, entry);
	2249	}
	2250
	2251	/*
	2252	* vm_map_entry_delete: [ internal use only ]
	2253	*
	2254	* Deallocate the given entry from the target map.
	2255	*/
	2256	static void
	2257	vm_map_entry_delete(vm_map_t map, vm_map_entry_t entry, int *countp)
	2258	{
	2259	vm_map_entry_unlink(map, entry);
	2260	map->size -= entry->end - entry->start;
	2261
	2262	if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
	2263	vm_object_deallocate(entry->object.vm_object);
	2264	}
	2265
	2266	vm_map_entry_dispose(map, entry, countp);
	2267	}
	2268
	2269	/*
	2270	* vm_map_delete: [ internal use only ]
	2271	*
	2272	* Deallocates the given address range from the target
	2273	* map.
	2274	*/
	2275	int
	2276	vm_map_delete(vm_map_t map, vm_offset_t start, vm_offset_t end, int *countp)
	2277	{
	2278	vm_object_t object;
	2279	vm_map_entry_t entry;
	2280	vm_map_entry_t first_entry;
	2281
	2282	/*
	2283	* Find the start of the region, and clip it
	2284	*/
	2285
	2286	again:
	2287	if (!vm_map_lookup_entry(map, start, &first_entry)) {
	2288	entry = first_entry->next;
	2289	} else {
	2290	entry = first_entry;
	2291	vm_map_clip_start(map, entry, start, countp);
	2292	/*
	2293	* Fix the lookup hint now, rather than each time though the
	2294	* loop.
	2295	*/
	2296	SAVE_HINT(map, entry->prev);
	2297	}
	2298
	2299	/*
	2300	* Save the free space hint
	2301	*/
	2302
	2303	if (entry == &map->header) {
	2304	map->first_free = &map->header;
	2305	} else if (map->first_free->start >= start) {
	2306	map->first_free = entry->prev;
	2307	}
	2308
	2309	/*
	2310	* Step through all entries in this region
	2311	*/
	2312
	2313	while ((entry != &map->header) && (entry->start < end)) {
	2314	vm_map_entry_t next;
	2315	vm_offset_t s, e;
	2316	vm_pindex_t offidxstart, offidxend, count;
	2317
	2318	/*
	2319	* If we hit an in-transition entry we have to sleep and
	2320	* retry. It's easier (and not really slower) to just retry
	2321	* since this case occurs so rarely and the hint is already
	2322	* pointing at the right place. We have to reset the
	2323	* start offset so as not to accidently delete an entry
	2324	* another process just created in vacated space.
	2325	*/
	2326	if (entry->eflags & MAP_ENTRY_IN_TRANSITION) {
	2327	entry->eflags \|= MAP_ENTRY_NEEDS_WAKEUP;
	2328	start = entry->start;
	2329	++mycpu->gd_cnt.v_intrans_coll;
	2330	++mycpu->gd_cnt.v_intrans_wait;
	2331	vm_map_transition_wait(map);
	2332	goto again;
	2333	}
	2334	vm_map_clip_end(map, entry, end, countp);
	2335
	2336	s = entry->start;
	2337	e = entry->end;
	2338	next = entry->next;
	2339
	2340	offidxstart = OFF_TO_IDX(entry->offset);
	2341	count = OFF_TO_IDX(e - s);
	2342	object = entry->object.vm_object;
	2343
	2344	/*
	2345	* Unwire before removing addresses from the pmap; otherwise,
	2346	* unwiring will put the entries back in the pmap.
	2347	*/
	2348	if (entry->wired_count != 0)
	2349	vm_map_entry_unwire(map, entry);
	2350
	2351	offidxend = offidxstart + count;
	2352
	2353	if ((object == kernel_object) \|\| (object == kmem_object)) {
	2354	vm_object_page_remove(object, offidxstart, offidxend, FALSE);
	2355	} else {
	2356	pmap_remove(map->pmap, s, e);
	2357	if (object != NULL &&
	2358	object->ref_count != 1 &&
	2359	(object->flags & (OBJ_NOSPLIT\|OBJ_ONEMAPPING)) == OBJ_ONEMAPPING &&
	2360	(object->type == OBJT_DEFAULT \|\| object->type == OBJT_SWAP)) {
	2361	vm_object_collapse(object);
	2362	vm_object_page_remove(object, offidxstart, offidxend, FALSE);
	2363	if (object->type == OBJT_SWAP) {
	2364	swap_pager_freespace(object, offidxstart, count);
	2365	}
	2366	if (offidxend >= object->size &&
	2367	offidxstart < object->size) {
	2368	object->size = offidxstart;
	2369	}
	2370	}
	2371	}
	2372
	2373	/*
	2374	* Delete the entry (which may delete the object) only after
	2375	* removing all pmap entries pointing to its pages.
	2376	* (Otherwise, its page frames may be reallocated, and any
	2377	* modify bits will be set in the wrong object!)
	2378	*/
	2379	vm_map_entry_delete(map, entry, countp);
	2380	entry = next;
	2381	}
	2382	return (KERN_SUCCESS);
	2383	}
	2384
	2385	/*
	2386	* vm_map_remove:
	2387	*
	2388	* Remove the given address range from the target map.
	2389	* This is the exported form of vm_map_delete.
	2390	*/
	2391	int
	2392	vm_map_remove(vm_map_t map, vm_offset_t start, vm_offset_t end)
	2393	{
	2394	int result;
	2395	int count;
	2396
	2397	count = vm_map_entry_reserve(MAP_RESERVE_COUNT);
	2398	vm_map_lock(map);
	2399	VM_MAP_RANGE_CHECK(map, start, end);
	2400	result = vm_map_delete(map, start, end, &count);
	2401	vm_map_unlock(map);
	2402	vm_map_entry_release(count);
	2403
	2404	return (result);
	2405	}
	2406
	2407	/*
	2408	* vm_map_check_protection:
	2409	*
	2410	* Assert that the target map allows the specified
	2411	* privilege on the entire address region given.
	2412	* The entire region must be allocated.
	2413	*/
	2414	boolean_t
	2415	vm_map_check_protection(vm_map_t map, vm_offset_t start, vm_offset_t end,
	2416	vm_prot_t protection)
	2417	{
	2418	vm_map_entry_t entry;
	2419	vm_map_entry_t tmp_entry;
	2420
	2421	if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
	2422	return (FALSE);
	2423	}
	2424	entry = tmp_entry;
	2425
	2426	while (start < end) {
	2427	if (entry == &map->header) {
	2428	return (FALSE);
	2429	}
	2430	/*
	2431	* No holes allowed!
	2432	*/
	2433
	2434	if (start < entry->start) {
	2435	return (FALSE);
	2436	}
	2437	/*
	2438	* Check protection associated with entry.
	2439	*/
	2440
	2441	if ((entry->protection & protection) != protection) {
	2442	return (FALSE);
	2443	}
	2444	/* go to next entry */
	2445
	2446	start = entry->end;
	2447	entry = entry->next;
	2448	}
	2449	return (TRUE);
	2450	}
	2451
	2452	/*
	2453	* Split the pages in a map entry into a new object. This affords
	2454	* easier removal of unused pages, and keeps object inheritance from
	2455	* being a negative impact on memory usage.
	2456	*/
	2457	static void
	2458	vm_map_split(vm_map_entry_t entry)
	2459	{
	2460	vm_page_t m;
	2461	vm_object_t orig_object, new_object, source;
	2462	vm_offset_t s, e;
	2463	vm_pindex_t offidxstart, offidxend, idx;
	2464	vm_size_t size;
	2465	vm_ooffset_t offset;
	2466
	2467	orig_object = entry->object.vm_object;
	2468	if (orig_object->type != OBJT_DEFAULT && orig_object->type != OBJT_SWAP)
	2469	return;
	2470	if (orig_object->ref_count <= 1)
	2471	return;
	2472
	2473	offset = entry->offset;
	2474	s = entry->start;
	2475	e = entry->end;
	2476
	2477	offidxstart = OFF_TO_IDX(offset);
	2478	offidxend = offidxstart + OFF_TO_IDX(e - s);
	2479	size = offidxend - offidxstart;
	2480
	2481	new_object = vm_pager_allocate(orig_object->type,
	2482	NULL, IDX_TO_OFF(size), VM_PROT_ALL, 0LL);
	2483	if (new_object == NULL)
	2484	return;
	2485
	2486	source = orig_object->backing_object;
	2487	if (source != NULL) {
	2488	vm_object_reference(source); /* Referenced by new_object */
	2489	LIST_INSERT_HEAD(&source->shadow_head,
	2490	new_object, shadow_list);
	2491	vm_object_clear_flag(source, OBJ_ONEMAPPING);
	2492	new_object->backing_object_offset =
	2493	orig_object->backing_object_offset + IDX_TO_OFF(offidxstart);
	2494	new_object->backing_object = source;
	2495	source->shadow_count++;
	2496	source->generation++;
	2497	}
	2498
	2499	for (idx = 0; idx < size; idx++) {
	2500	vm_page_t m;
	2501
	2502	/*
	2503	* A critical section is required to avoid a race between
	2504	* the lookup and an interrupt/unbusy/free and our busy
	2505	* check.
	2506	*/
	2507	crit_enter();
	2508	retry:
	2509	m = vm_page_lookup(orig_object, offidxstart + idx);
	2510	if (m == NULL) {
	2511	crit_exit();
	2512	continue;
	2513	}
	2514
	2515	/*
	2516	* We must wait for pending I/O to complete before we can
	2517	* rename the page.
	2518	*
	2519	* We do not have to VM_PROT_NONE the page as mappings should
	2520	* not be changed by this operation.
	2521	*/
	2522	if (vm_page_sleep_busy(m, TRUE, "spltwt"))
	2523	goto retry;
	2524	vm_page_busy(m);
	2525	vm_page_rename(m, new_object, idx);
	2526	/* page automatically made dirty by rename and cache handled */
	2527	vm_page_busy(m);
	2528	crit_exit();
	2529	}
	2530
	2531	if (orig_object->type == OBJT_SWAP) {
	2532	vm_object_pip_add(orig_object, 1);
	2533	/*
	2534	* copy orig_object pages into new_object
	2535	* and destroy unneeded pages in
	2536	* shadow object.
	2537	*/
	2538	swap_pager_copy(orig_object, new_object, offidxstart, 0);
	2539	vm_object_pip_wakeup(orig_object);
	2540	}
	2541
	2542	/*
	2543	* Wakeup the pages we played with. No spl protection is needed
	2544	* for a simple wakeup.
	2545	*/
	2546	for (idx = 0; idx < size; idx++) {
	2547	m = vm_page_lookup(new_object, idx);
	2548	if (m)
	2549	vm_page_wakeup(m);
	2550	}
	2551
	2552	entry->object.vm_object = new_object;
	2553	entry->offset = 0LL;
	2554	vm_object_deallocate(orig_object);
	2555	}
	2556
	2557	/*
	2558	* vm_map_copy_entry:
	2559	*
	2560	* Copies the contents of the source entry to the destination
	2561	* entry. The entries must be aligned properly.
	2562	*/
	2563	static void
	2564	vm_map_copy_entry(vm_map_t src_map, vm_map_t dst_map,
	2565	vm_map_entry_t src_entry, vm_map_entry_t dst_entry)
	2566	{
	2567	vm_object_t src_object;
	2568
	2569	if ((dst_entry->eflags\|src_entry->eflags) & MAP_ENTRY_IS_SUB_MAP)
	2570	return;
	2571
	2572	if (src_entry->wired_count == 0) {
	2573
	2574	/*
	2575	* If the source entry is marked needs_copy, it is already
	2576	* write-protected.
	2577	*/
	2578	if ((src_entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) {
	2579	pmap_protect(src_map->pmap,
	2580	src_entry->start,
	2581	src_entry->end,
	2582	src_entry->protection & ~VM_PROT_WRITE);
	2583	}
	2584
	2585	/*
	2586	* Make a copy of the object.
	2587	*/
	2588	if ((src_object = src_entry->object.vm_object) != NULL) {
	2589
	2590	if ((src_object->handle == NULL) &&
	2591	(src_object->type == OBJT_DEFAULT \|\|
	2592	src_object->type == OBJT_SWAP)) {
	2593	vm_object_collapse(src_object);
	2594	if ((src_object->flags & (OBJ_NOSPLIT\|OBJ_ONEMAPPING)) == OBJ_ONEMAPPING) {
	2595	vm_map_split(src_entry);
	2596	src_object = src_entry->object.vm_object;
	2597	}
	2598	}
	2599
	2600	vm_object_reference(src_object);
	2601	vm_object_clear_flag(src_object, OBJ_ONEMAPPING);
	2602	dst_entry->object.vm_object = src_object;
	2603	src_entry->eflags \|= (MAP_ENTRY_COW\|MAP_ENTRY_NEEDS_COPY);
	2604	dst_entry->eflags \|= (MAP_ENTRY_COW\|MAP_ENTRY_NEEDS_COPY);
	2605	dst_entry->offset = src_entry->offset;
	2606	} else {
	2607	dst_entry->object.vm_object = NULL;
	2608	dst_entry->offset = 0;
	2609	}
	2610
	2611	pmap_copy(dst_map->pmap, src_map->pmap, dst_entry->start,
	2612	dst_entry->end - dst_entry->start, src_entry->start);
	2613	} else {
	2614	/*
	2615	* Of course, wired down pages can't be set copy-on-write.
	2616	* Cause wired pages to be copied into the new map by
	2617	* simulating faults (the new pages are pageable)
	2618	*/
	2619	vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry);
	2620	}
	2621	}
	2622
	2623	/*
	2624	* vmspace_fork:
	2625	* Create a new process vmspace structure and vm_map
	2626	* based on those of an existing process. The new map
	2627	* is based on the old map, according to the inheritance
	2628	* values on the regions in that map.
	2629	*
	2630	* The source map must not be locked.
	2631	*/
	2632	struct vmspace *
	2633	vmspace_fork(struct vmspace *vm1)
	2634	{
	2635	struct vmspace *vm2;
	2636	vm_map_t old_map = &vm1->vm_map;
	2637	vm_map_t new_map;
	2638	vm_map_entry_t old_entry;
	2639	vm_map_entry_t new_entry;
	2640	vm_object_t object;
	2641	int count;
	2642
	2643	vm_map_lock(old_map);
	2644	old_map->infork = 1;
	2645
	2646	/*
	2647	* XXX Note: upcalls are not copied.
	2648	*/
	2649	vm2 = vmspace_alloc(old_map->min_offset, old_map->max_offset);
	2650	bcopy(&vm1->vm_startcopy, &vm2->vm_startcopy,
	2651	(caddr_t)&vm1->vm_endcopy - (caddr_t)&vm1->vm_startcopy);
	2652	new_map = &vm2->vm_map; /* XXX */
	2653	new_map->timestamp = 1;
	2654
	2655	count = 0;
	2656	old_entry = old_map->header.next;
	2657	while (old_entry != &old_map->header) {
	2658	++count;
	2659	old_entry = old_entry->next;
	2660	}
	2661
	2662	count = vm_map_entry_reserve(count + MAP_RESERVE_COUNT);
	2663
	2664	old_entry = old_map->header.next;
	2665	while (old_entry != &old_map->header) {
	2666	if (old_entry->eflags & MAP_ENTRY_IS_SUB_MAP)
	2667	panic("vm_map_fork: encountered a submap");
	2668
	2669	switch (old_entry->inheritance) {
	2670	case VM_INHERIT_NONE:
	2671	break;
	2672
	2673	case VM_INHERIT_SHARE:
	2674	/*
	2675	* Clone the entry, creating the shared object if necessary.
	2676	*/
	2677	object = old_entry->object.vm_object;
	2678	if (object == NULL) {
	2679	object = vm_object_allocate(OBJT_DEFAULT,
	2680	atop(old_entry->end - old_entry->start));
	2681	old_entry->object.vm_object = object;
	2682	old_entry->offset = (vm_offset_t) 0;
	2683	}
	2684
	2685	/*
	2686	* Add the reference before calling vm_object_shadow
	2687	* to insure that a shadow object is created.
	2688	*/
	2689	vm_object_reference(object);
	2690	if (old_entry->eflags & MAP_ENTRY_NEEDS_COPY) {
	2691	vm_object_shadow(&old_entry->object.vm_object,
	2692	&old_entry->offset,
	2693	atop(old_entry->end - old_entry->start));
	2694	old_entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
	2695	/* Transfer the second reference too. */
	2696	vm_object_reference(
	2697	old_entry->object.vm_object);
	2698	vm_object_deallocate(object);
	2699	object = old_entry->object.vm_object;
	2700	}
	2701	vm_object_clear_flag(object, OBJ_ONEMAPPING);
	2702
	2703	/*
	2704	* Clone the entry, referencing the shared object.
	2705	*/
	2706	new_entry = vm_map_entry_create(new_map, &count);
	2707	new_entry = old_entry;
	2708	new_entry->eflags &= ~MAP_ENTRY_USER_WIRED;
	2709	new_entry->wired_count = 0;
	2710
	2711	/*
	2712	* Insert the entry into the new map -- we know we're
	2713	* inserting at the end of the new map.
	2714	*/
	2715
	2716	vm_map_entry_link(new_map, new_map->header.prev,
	2717	new_entry);
	2718
	2719	/*
	2720	* Update the physical map
	2721	*/
	2722
	2723	pmap_copy(new_map->pmap, old_map->pmap,
	2724	new_entry->start,
	2725	(old_entry->end - old_entry->start),
	2726	old_entry->start);
	2727	break;
	2728
	2729	case VM_INHERIT_COPY:
	2730	/*
	2731	* Clone the entry and link into the map.
	2732	*/
	2733	new_entry = vm_map_entry_create(new_map, &count);
	2734	new_entry = old_entry;
	2735	new_entry->eflags &= ~MAP_ENTRY_USER_WIRED;
	2736	new_entry->wired_count = 0;
	2737	new_entry->object.vm_object = NULL;
	2738	vm_map_entry_link(new_map, new_map->header.prev,
	2739	new_entry);
	2740	vm_map_copy_entry(old_map, new_map, old_entry,
	2741	new_entry);
	2742	break;
	2743	}
	2744	old_entry = old_entry->next;
	2745	}
	2746
	2747	new_map->size = old_map->size;
	2748	old_map->infork = 0;
	2749	vm_map_unlock(old_map);
	2750	vm_map_entry_release(count);
	2751
	2752	return (vm2);
	2753	}
	2754
	2755	int
	2756	vm_map_stack (vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize,
	2757	vm_prot_t prot, vm_prot_t max, int cow)
	2758	{
	2759	vm_map_entry_t prev_entry;
	2760	vm_map_entry_t new_stack_entry;
	2761	vm_size_t init_ssize;
	2762	int rv;
	2763	int count;
	2764
	2765	if (VM_MIN_ADDRESS > 0 && addrbos < VM_MIN_ADDRESS)
	2766	return (KERN_NO_SPACE);
	2767
	2768	if (max_ssize < sgrowsiz)
	2769	init_ssize = max_ssize;
	2770	else
	2771	init_ssize = sgrowsiz;
	2772
	2773	count = vm_map_entry_reserve(MAP_RESERVE_COUNT);
	2774	vm_map_lock(map);
	2775
	2776	/* If addr is already mapped, no go */
	2777	if (vm_map_lookup_entry(map, addrbos, &prev_entry)) {
	2778	vm_map_unlock(map);
	2779	vm_map_entry_release(count);
	2780	return (KERN_NO_SPACE);
	2781	}
	2782
	2783	/* If we would blow our VMEM resource limit, no go */
	2784	if (map->size + init_ssize >
	2785	curproc->p_rlimit[RLIMIT_VMEM].rlim_cur) {
	2786	vm_map_unlock(map);
	2787	vm_map_entry_release(count);
	2788	return (KERN_NO_SPACE);
	2789	}
	2790
	2791	/* If we can't accomodate max_ssize in the current mapping,
	2792	* no go. However, we need to be aware that subsequent user
	2793	* mappings might map into the space we have reserved for
	2794	* stack, and currently this space is not protected.
	2795	*
	2796	* Hopefully we will at least detect this condition
	2797	* when we try to grow the stack.
	2798	*/
	2799	if ((prev_entry->next != &map->header) &&
	2800	(prev_entry->next->start < addrbos + max_ssize)) {
	2801	vm_map_unlock(map);
	2802	vm_map_entry_release(count);
	2803	return (KERN_NO_SPACE);
	2804	}
	2805
	2806	/* We initially map a stack of only init_ssize. We will
	2807	* grow as needed later. Since this is to be a grow
	2808	* down stack, we map at the top of the range.
	2809	*
	2810	* Note: we would normally expect prot and max to be
	2811	* VM_PROT_ALL, and cow to be 0. Possibly we should
	2812	* eliminate these as input parameters, and just
	2813	* pass these values here in the insert call.
	2814	*/
	2815	rv = vm_map_insert(map, &count,
	2816	NULL, 0, addrbos + max_ssize - init_ssize,
	2817	addrbos + max_ssize, prot, max, cow);
	2818
	2819	/* Now set the avail_ssize amount */
	2820	if (rv == KERN_SUCCESS) {
	2821	if (prev_entry != &map->header)
	2822	vm_map_clip_end(map, prev_entry, addrbos + max_ssize - init_ssize, &count);
	2823	new_stack_entry = prev_entry->next;
	2824	if (new_stack_entry->end != addrbos + max_ssize \|\|
	2825	new_stack_entry->start != addrbos + max_ssize - init_ssize)
	2826	panic ("Bad entry start/end for new stack entry");
	2827	else
	2828	new_stack_entry->avail_ssize = max_ssize - init_ssize;
	2829	}
	2830
	2831	vm_map_unlock(map);
	2832	vm_map_entry_release(count);
	2833	return (rv);
	2834	}
	2835
	2836	/* Attempts to grow a vm stack entry. Returns KERN_SUCCESS if the
	2837	* desired address is already mapped, or if we successfully grow
	2838	* the stack. Also returns KERN_SUCCESS if addr is outside the
	2839	* stack range (this is strange, but preserves compatibility with
	2840	* the grow function in vm_machdep.c).
	2841	*/
	2842	int
	2843	vm_map_growstack (struct proc *p, vm_offset_t addr)
	2844	{
	2845	vm_map_entry_t prev_entry;
	2846	vm_map_entry_t stack_entry;
	2847	vm_map_entry_t new_stack_entry;
	2848	struct vmspace *vm = p->p_vmspace;
	2849	vm_map_t map = &vm->vm_map;
	2850	vm_offset_t end;
	2851	int grow_amount;
	2852	int rv = KERN_SUCCESS;
	2853	int is_procstack;
	2854	int use_read_lock = 1;
	2855	int count;
	2856
	2857	count = vm_map_entry_reserve(MAP_RESERVE_COUNT);
	2858	Retry:
	2859	if (use_read_lock)
	2860	vm_map_lock_read(map);
	2861	else
	2862	vm_map_lock(map);
	2863
	2864	/* If addr is already in the entry range, no need to grow.*/
	2865	if (vm_map_lookup_entry(map, addr, &prev_entry))
	2866	goto done;
	2867
	2868	if ((stack_entry = prev_entry->next) == &map->header)
	2869	goto done;
	2870	if (prev_entry == &map->header)
	2871	end = stack_entry->start - stack_entry->avail_ssize;
	2872	else
	2873	end = prev_entry->end;
	2874
	2875	/* This next test mimics the old grow function in vm_machdep.c.
	2876	* It really doesn't quite make sense, but we do it anyway
	2877	* for compatibility.
	2878	*
	2879	* If not growable stack, return success. This signals the
	2880	* caller to proceed as he would normally with normal vm.
	2881	*/
	2882	if (stack_entry->avail_ssize < 1 \|\|
	2883	addr >= stack_entry->start \|\|
	2884	addr < stack_entry->start - stack_entry->avail_ssize) {
	2885	goto done;
	2886	}
	2887
	2888	/* Find the minimum grow amount */
	2889	grow_amount = roundup (stack_entry->start - addr, PAGE_SIZE);
	2890	if (grow_amount > stack_entry->avail_ssize) {
	2891	rv = KERN_NO_SPACE;
	2892	goto done;
	2893	}
	2894
	2895	/* If there is no longer enough space between the entries
	2896	* nogo, and adjust the available space. Note: this
	2897	* should only happen if the user has mapped into the
	2898	* stack area after the stack was created, and is
	2899	* probably an error.
	2900	*
	2901	* This also effectively destroys any guard page the user
	2902	* might have intended by limiting the stack size.
	2903	*/
	2904	if (grow_amount > stack_entry->start - end) {
	2905	if (use_read_lock && vm_map_lock_upgrade(map)) {
	2906	use_read_lock = 0;
	2907	goto Retry;
	2908	}
	2909	use_read_lock = 0;
	2910	stack_entry->avail_ssize = stack_entry->start - end;
	2911	rv = KERN_NO_SPACE;
	2912	goto done;
	2913	}
	2914
	2915	is_procstack = addr >= (vm_offset_t)vm->vm_maxsaddr;
	2916
	2917	/* If this is the main process stack, see if we're over the
	2918	* stack limit.
	2919	*/
	2920	if (is_procstack && (ctob(vm->vm_ssize) + grow_amount >
	2921	p->p_rlimit[RLIMIT_STACK].rlim_cur)) {
	2922	rv = KERN_NO_SPACE;
	2923	goto done;
	2924	}
	2925
	2926	/* Round up the grow amount modulo SGROWSIZ */
	2927	grow_amount = roundup (grow_amount, sgrowsiz);
	2928	if (grow_amount > stack_entry->avail_ssize) {
	2929	grow_amount = stack_entry->avail_ssize;
	2930	}
	2931	if (is_procstack && (ctob(vm->vm_ssize) + grow_amount >
	2932	p->p_rlimit[RLIMIT_STACK].rlim_cur)) {
	2933	grow_amount = p->p_rlimit[RLIMIT_STACK].rlim_cur -
	2934	ctob(vm->vm_ssize);
	2935	}
	2936
	2937	/* If we would blow our VMEM resource limit, no go */
	2938	if (map->size + grow_amount > p->p_rlimit[RLIMIT_VMEM].rlim_cur) {
	2939	rv = KERN_NO_SPACE;
	2940	goto done;
	2941	}
	2942
	2943	if (use_read_lock && vm_map_lock_upgrade(map)) {
	2944	use_read_lock = 0;
	2945	goto Retry;
	2946	}
	2947	use_read_lock = 0;
	2948
	2949	/* Get the preliminary new entry start value */
	2950	addr = stack_entry->start - grow_amount;
	2951
	2952	/* If this puts us into the previous entry, cut back our growth
	2953	* to the available space. Also, see the note above.
	2954	*/
	2955	if (addr < end) {
	2956	stack_entry->avail_ssize = stack_entry->start - end;
	2957	addr = end;
	2958	}
	2959
	2960	rv = vm_map_insert(map, &count,
	2961	NULL, 0, addr, stack_entry->start,
	2962	VM_PROT_ALL,
	2963	VM_PROT_ALL,
	2964	0);
	2965
	2966	/* Adjust the available stack space by the amount we grew. */
	2967	if (rv == KERN_SUCCESS) {
	2968	if (prev_entry != &map->header)
	2969	vm_map_clip_end(map, prev_entry, addr, &count);
	2970	new_stack_entry = prev_entry->next;
	2971	if (new_stack_entry->end != stack_entry->start \|\|
	2972	new_stack_entry->start != addr)
	2973	panic ("Bad stack grow start/end in new stack entry");
	2974	else {
	2975	new_stack_entry->avail_ssize = stack_entry->avail_ssize -
	2976	(new_stack_entry->end -
	2977	new_stack_entry->start);
	2978	if (is_procstack)
	2979	vm->vm_ssize += btoc(new_stack_entry->end -
	2980	new_stack_entry->start);
	2981	}
	2982	}
	2983
	2984	done:
	2985	if (use_read_lock)
	2986	vm_map_unlock_read(map);
	2987	else
	2988	vm_map_unlock(map);
	2989	vm_map_entry_release(count);
	2990	return (rv);
	2991	}
	2992
	2993	/*
	2994	* Unshare the specified VM space for exec. If other processes are
	2995	* mapped to it, then create a new one. The new vmspace is null.
	2996	*/
	2997
	2998	void
	2999	vmspace_exec(struct proc p, struct vmspace vmcopy)
	3000	{
	3001	struct vmspace *oldvmspace = p->p_vmspace;
	3002	struct vmspace *newvmspace;
	3003	vm_map_t map = &p->p_vmspace->vm_map;
	3004
	3005	/*
	3006	* If we are execing a resident vmspace we fork it, otherwise
	3007	* we create a new vmspace. Note that exitingcnt and upcalls
	3008	* are not copied to the new vmspace.
	3009	*/
	3010	if (vmcopy) {
	3011	newvmspace = vmspace_fork(vmcopy);
	3012	} else {
	3013	newvmspace = vmspace_alloc(map->min_offset, map->max_offset);
	3014	bcopy(&oldvmspace->vm_startcopy, &newvmspace->vm_startcopy,
	3015	(caddr_t)&oldvmspace->vm_endcopy -
	3016	(caddr_t)&oldvmspace->vm_startcopy);
	3017	}
	3018
	3019	/*
	3020	* This code is written like this for prototype purposes. The
	3021	* goal is to avoid running down the vmspace here, but let the
	3022	* other process's that are still using the vmspace to finally
	3023	* run it down. Even though there is little or no chance of blocking
	3024	* here, it is a good idea to keep this form for future mods.
	3025	*/
	3026	p->p_vmspace = newvmspace;
	3027	pmap_pinit2(vmspace_pmap(newvmspace));
	3028	if (p == curproc)
	3029	pmap_activate(p);
	3030	vmspace_free(oldvmspace);
	3031	}
	3032
	3033	/*
	3034	* Unshare the specified VM space for forcing COW. This
	3035	* is called by rfork, for the (RFMEM\|RFPROC) == 0 case.
	3036	*
	3037	* The exitingcnt test is not strictly necessary but has been
	3038	* included for code sanity (to make the code a bit more deterministic).
	3039	*/
	3040
	3041	void
	3042	vmspace_unshare(struct proc *p)
	3043	{
	3044	struct vmspace *oldvmspace = p->p_vmspace;
	3045	struct vmspace *newvmspace;
	3046
	3047	if (oldvmspace->vm_refcnt == 1 && oldvmspace->vm_exitingcnt == 0)
	3048	return;
	3049	newvmspace = vmspace_fork(oldvmspace);
	3050	p->p_vmspace = newvmspace;
	3051	pmap_pinit2(vmspace_pmap(newvmspace));
	3052	if (p == curproc)
	3053	pmap_activate(p);
	3054	vmspace_free(oldvmspace);
	3055	}
	3056
	3057	/*
	3058	* vm_map_lookup:
	3059	*
	3060	* Finds the VM object, offset, and
	3061	* protection for a given virtual address in the
	3062	* specified map, assuming a page fault of the
	3063	* type specified.
	3064	*
	3065	* Leaves the map in question locked for read; return
	3066	* values are guaranteed until a vm_map_lookup_done
	3067	* call is performed. Note that the map argument
	3068	* is in/out; the returned map must be used in
	3069	* the call to vm_map_lookup_done.
	3070	*
	3071	* A handle (out_entry) is returned for use in
	3072	* vm_map_lookup_done, to make that fast.
	3073	*
	3074	* If a lookup is requested with "write protection"
	3075	* specified, the map may be changed to perform virtual
	3076	* copying operations, although the data referenced will
	3077	* remain the same.
	3078	*/
	3079	int
	3080	vm_map_lookup(vm_map_t var_map, / IN/OUT */
	3081	vm_offset_t vaddr,
	3082	vm_prot_t fault_typea,
	3083	vm_map_entry_t out_entry, / OUT */
	3084	vm_object_t object, / OUT */
	3085	vm_pindex_t pindex, / OUT */
	3086	vm_prot_t out_prot, / OUT */
	3087	boolean_t wired) / OUT */
	3088	{
	3089	vm_map_entry_t entry;
	3090	vm_map_t map = *var_map;
	3091	vm_prot_t prot;
	3092	vm_prot_t fault_type = fault_typea;
	3093	int use_read_lock = 1;
	3094	int rv = KERN_SUCCESS;
	3095
	3096	RetryLookup:
	3097	if (use_read_lock)
	3098	vm_map_lock_read(map);
	3099	else
	3100	vm_map_lock(map);
	3101
	3102	/*
	3103	* If the map has an interesting hint, try it before calling full
	3104	* blown lookup routine.
	3105	*/
	3106	entry = map->hint;
	3107	*out_entry = entry;
	3108
	3109	if ((entry == &map->header) \|\|
	3110	(vaddr < entry->start) \|\| (vaddr >= entry->end)) {
	3111	vm_map_entry_t tmp_entry;
	3112
	3113	/*
	3114	* Entry was either not a valid hint, or the vaddr was not
	3115	* contained in the entry, so do a full lookup.
	3116	*/
	3117	if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
	3118	rv = KERN_INVALID_ADDRESS;
	3119	goto done;
	3120	}
	3121
	3122	entry = tmp_entry;
	3123	*out_entry = entry;
	3124	}
	3125
	3126	/*
	3127	* Handle submaps.
	3128	*/
	3129
	3130	if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
	3131	vm_map_t old_map = map;
	3132
	3133	*var_map = map = entry->object.sub_map;
	3134	if (use_read_lock)
	3135	vm_map_unlock_read(old_map);
	3136	else
	3137	vm_map_unlock(old_map);
	3138	use_read_lock = 1;
	3139	goto RetryLookup;
	3140	}
	3141
	3142	/*
	3143	* Check whether this task is allowed to have this page.
	3144	* Note the special case for MAP_ENTRY_COW
	3145	* pages with an override. This is to implement a forced
	3146	* COW for debuggers.
	3147	*/
	3148
	3149	if (fault_type & VM_PROT_OVERRIDE_WRITE)
	3150	prot = entry->max_protection;
	3151	else
	3152	prot = entry->protection;
	3153
	3154	fault_type &= (VM_PROT_READ\|VM_PROT_WRITE\|VM_PROT_EXECUTE);
	3155	if ((fault_type & prot) != fault_type) {
	3156	rv = KERN_PROTECTION_FAILURE;
	3157	goto done;
	3158	}
	3159
	3160	if ((entry->eflags & MAP_ENTRY_USER_WIRED) &&
	3161	(entry->eflags & MAP_ENTRY_COW) &&
	3162	(fault_type & VM_PROT_WRITE) &&
	3163	(fault_typea & VM_PROT_OVERRIDE_WRITE) == 0) {
	3164	rv = KERN_PROTECTION_FAILURE;
	3165	goto done;
	3166	}
	3167
	3168	/*
	3169	* If this page is not pageable, we have to get it for all possible
	3170	* accesses.
	3171	*/
	3172
	3173	*wired = (entry->wired_count != 0);
	3174	if (*wired)
	3175	prot = fault_type = entry->protection;
	3176
	3177	/*
	3178	* If the entry was copy-on-write, we either ...
	3179	*/
	3180
	3181	if (entry->eflags & MAP_ENTRY_NEEDS_COPY) {
	3182	/*
	3183	* If we want to write the page, we may as well handle that
	3184	* now since we've got the map locked.
	3185	*
	3186	* If we don't need to write the page, we just demote the
	3187	* permissions allowed.
	3188	*/
	3189
	3190	if (fault_type & VM_PROT_WRITE) {
	3191	/*
	3192	* Make a new object, and place it in the object
	3193	* chain. Note that no new references have appeared
	3194	* -- one just moved from the map to the new
	3195	* object.
	3196	*/
	3197
	3198	if (use_read_lock && vm_map_lock_upgrade(map)) {
	3199	use_read_lock = 0;
	3200	goto RetryLookup;
	3201	}
	3202	use_read_lock = 0;
	3203
	3204	vm_object_shadow(
	3205	&entry->object.vm_object,
	3206	&entry->offset,
	3207	atop(entry->end - entry->start));
	3208
	3209	entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
	3210	} else {
	3211	/*
	3212	* We're attempting to read a copy-on-write page --
	3213	* don't allow writes.
	3214	*/
	3215
	3216	prot &= ~VM_PROT_WRITE;
	3217	}
	3218	}
	3219
	3220	/*
	3221	* Create an object if necessary.
	3222	*/
	3223	if (entry->object.vm_object == NULL &&
	3224	!map->system_map) {
	3225	if (use_read_lock && vm_map_lock_upgrade(map)) {
	3226	use_read_lock = 0;
	3227	goto RetryLookup;
	3228	}
	3229	use_read_lock = 0;
	3230	entry->object.vm_object = vm_object_allocate(OBJT_DEFAULT,
	3231	atop(entry->end - entry->start));
	3232	entry->offset = 0;
	3233	}
	3234
	3235	/*
	3236	* Return the object/offset from this entry. If the entry was
	3237	* copy-on-write or empty, it has been fixed up.
	3238	*/
	3239
	3240	*pindex = OFF_TO_IDX((vaddr - entry->start) + entry->offset);
	3241	*object = entry->object.vm_object;
	3242
	3243	/*
	3244	* Return whether this is the only map sharing this data. On
	3245	* success we return with a read lock held on the map. On failure
	3246	* we return with the map unlocked.
	3247	*/
	3248	*out_prot = prot;
	3249	done:
	3250	if (rv == KERN_SUCCESS) {
	3251	if (use_read_lock == 0)
	3252	vm_map_lock_downgrade(map);
	3253	} else if (use_read_lock) {
	3254	vm_map_unlock_read(map);
	3255	} else {
	3256	vm_map_unlock(map);
	3257	}
	3258	return (rv);
	3259	}
	3260
	3261	/*
	3262	* vm_map_lookup_done:
	3263	*
	3264	* Releases locks acquired by a vm_map_lookup
	3265	* (according to the handle returned by that lookup).
	3266	*/
	3267
	3268	void
	3269	vm_map_lookup_done(vm_map_t map, vm_map_entry_t entry, int count)
	3270	{
	3271	/*
	3272	* Unlock the main-level map
	3273	*/
	3274	vm_map_unlock_read(map);
	3275	if (count)
	3276	vm_map_entry_release(count);
	3277	}
	3278
	3279	/*
	3280	* Performs the copy_on_write operations necessary to allow the virtual copies
	3281	* into user space to work. This has to be called for write(2) system calls
	3282	* from other processes, file unlinking, and file size shrinkage.
	3283	*/
	3284	void
	3285	vm_freeze_copyopts(vm_object_t object, vm_pindex_t froma, vm_pindex_t toa)
	3286	{
	3287	int rv;
	3288	vm_object_t robject;
	3289	vm_pindex_t idx;
	3290
	3291	if ((object == NULL) \|\|
	3292	((object->flags & OBJ_OPT) == 0))
	3293	return;
	3294
	3295	if (object->shadow_count > object->ref_count)
	3296	panic("vm_freeze_copyopts: sc > rc");
	3297
	3298	while ((robject = LIST_FIRST(&object->shadow_head)) != NULL) {
	3299	vm_pindex_t bo_pindex;
	3300	vm_page_t m_in, m_out;
	3301
	3302	bo_pindex = OFF_TO_IDX(robject->backing_object_offset);
	3303
	3304	vm_object_reference(robject);
	3305
	3306	vm_object_pip_wait(robject, "objfrz");
	3307
	3308	if (robject->ref_count == 1) {
	3309	vm_object_deallocate(robject);
	3310	continue;
	3311	}
	3312
	3313	vm_object_pip_add(robject, 1);
	3314
	3315	for (idx = 0; idx < robject->size; idx++) {
	3316
	3317	m_out = vm_page_grab(robject, idx,
	3318	VM_ALLOC_NORMAL \| VM_ALLOC_RETRY);
	3319
	3320	if (m_out->valid == 0) {
	3321	m_in = vm_page_grab(object, bo_pindex + idx,
	3322	VM_ALLOC_NORMAL \| VM_ALLOC_RETRY);
	3323	if (m_in->valid == 0) {
	3324	rv = vm_pager_get_pages(object, &m_in, 1, 0);
	3325	if (rv != VM_PAGER_OK) {
	3326	printf("vm_freeze_copyopts: cannot read page from file: %lx\n", (long)m_in->pindex);
	3327	continue;
	3328	}
	3329	vm_page_deactivate(m_in);
	3330	}
	3331
	3332	vm_page_protect(m_in, VM_PROT_NONE);
	3333	pmap_copy_page(VM_PAGE_TO_PHYS(m_in), VM_PAGE_TO_PHYS(m_out));
	3334	m_out->valid = m_in->valid;
	3335	vm_page_dirty(m_out);
	3336	vm_page_activate(m_out);
	3337	vm_page_wakeup(m_in);
	3338	}
	3339	vm_page_wakeup(m_out);
	3340	}
	3341
	3342	object->shadow_count--;
	3343	object->ref_count--;
	3344	LIST_REMOVE(robject, shadow_list);
	3345	robject->backing_object = NULL;
	3346	robject->backing_object_offset = 0;
	3347
	3348	vm_object_pip_wakeup(robject);
	3349	vm_object_deallocate(robject);
	3350	}
	3351
	3352	vm_object_clear_flag(object, OBJ_OPT);
	3353	}
	3354
	3355	#include "opt_ddb.h"
	3356	#ifdef DDB
	3357	#include <sys/kernel.h>
	3358
	3359	#include <ddb/ddb.h>
	3360
	3361	/*
	3362	* vm_map_print: [ debug ]
	3363	*/
	3364	DB_SHOW_COMMAND(map, vm_map_print)
	3365	{
	3366	static int nlines;
	3367	/* XXX convert args. */
	3368	vm_map_t map = (vm_map_t)addr;
	3369	boolean_t full = have_addr;
	3370
	3371	vm_map_entry_t entry;
	3372
	3373	db_iprintf("Task map %p: pmap=%p, nentries=%d, version=%u\n",
	3374	(void *)map,
	3375	(void *)map->pmap, map->nentries, map->timestamp);
	3376	nlines++;
	3377
	3378	if (!full && db_indent)
	3379	return;
	3380
	3381	db_indent += 2;
	3382	for (entry = map->header.next; entry != &map->header;
	3383	entry = entry->next) {
	3384	db_iprintf("map entry %p: start=%p, end=%p\n",
	3385	(void )entry, (void )entry->start, (void *)entry->end);
	3386	nlines++;
	3387	{
	3388	static char *inheritance_name[4] =
	3389	{"share", "copy", "none", "donate_copy"};
	3390
	3391	db_iprintf(" prot=%x/%x/%s",
	3392	entry->protection,
	3393	entry->max_protection,
	3394	inheritance_name[(int)(unsigned char)entry->inheritance]);
	3395	if (entry->wired_count != 0)
	3396	db_printf(", wired");
	3397	}
	3398	if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
	3399	/* XXX no %qd in kernel. Truncate entry->offset. */
	3400	db_printf(", share=%p, offset=0x%lx\n",
	3401	(void *)entry->object.sub_map,
	3402	(long)entry->offset);
	3403	nlines++;
	3404	if ((entry->prev == &map->header) \|\|
	3405	(entry->prev->object.sub_map !=
	3406	entry->object.sub_map)) {
	3407	db_indent += 2;
	3408	vm_map_print((db_expr_t)(intptr_t)
	3409	entry->object.sub_map,
	3410	full, 0, (char *)0);
	3411	db_indent -= 2;
	3412	}
	3413	} else {
	3414	/* XXX no %qd in kernel. Truncate entry->offset. */
	3415	db_printf(", object=%p, offset=0x%lx",
	3416	(void *)entry->object.vm_object,
	3417	(long)entry->offset);
	3418	if (entry->eflags & MAP_ENTRY_COW)
	3419	db_printf(", copy (%s)",
	3420	(entry->eflags & MAP_ENTRY_NEEDS_COPY) ? "needed" : "done");
	3421	db_printf("\n");
	3422	nlines++;
	3423
	3424	if ((entry->prev == &map->header) \|\|
	3425	(entry->prev->object.vm_object !=
	3426	entry->object.vm_object)) {
	3427	db_indent += 2;
	3428	vm_object_print((db_expr_t)(intptr_t)
	3429	entry->object.vm_object,
	3430	full, 0, (char *)0);
	3431	nlines += 4;
	3432	db_indent -= 2;
	3433	}
	3434	}
	3435	}
	3436	db_indent -= 2;
	3437	if (db_indent == 0)
	3438	nlines = 0;
	3439	}
	3440
	3441
	3442	DB_SHOW_COMMAND(procvm, procvm)
	3443	{
	3444	struct proc *p;
	3445
	3446	if (have_addr) {
	3447	p = (struct proc *) addr;
	3448	} else {
	3449	p = curproc;
	3450	}
	3451
	3452	db_printf("p = %p, vmspace = %p, map = %p, pmap = %p\n",
	3453	(void )p, (void )p->p_vmspace, (void *)&p->p_vmspace->vm_map,
	3454	(void *)vmspace_pmap(p->p_vmspace));
	3455
	3456	vm_map_print((db_expr_t)(intptr_t)&p->p_vmspace->vm_map, 1, 0, NULL);
	3457	}
	3458
	3459	#endif /* DDB */