gitweb.dragonflybsd.org Git - dragonfly.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 1991, 1993
	3	* The Regents of the University of California. All rights reserved.
	4	*
	5	* This code is derived from software contributed to Berkeley by
	6	* The Mach Operating System project at Carnegie-Mellon University.
	7	*
	8	* Redistribution and use in source and binary forms, with or without
	9	* modification, are permitted provided that the following conditions
	10	* are met:
	11	* 1. Redistributions of source code must retain the above copyright
	12	* notice, this list of conditions and the following disclaimer.
	13	* 2. Redistributions in binary form must reproduce the above copyright
	14	* notice, this list of conditions and the following disclaimer in the
	15	* documentation and/or other materials provided with the distribution.
	16	* 3. All advertising materials mentioning features or use of this software
	17	* must display the following acknowledgement:
	18	* This product includes software developed by the University of
	19	* California, Berkeley and its contributors.
	20	* 4. Neither the name of the University nor the names of its contributors
	21	* may be used to endorse or promote products derived from this software
	22	* without specific prior written permission.
	23	*
	24	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	25	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	26	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	27	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	28	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	29	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	30	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	31	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	32	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	33	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	34	* SUCH DAMAGE.
	35	*
	36	* from: @(#)vm_map.c 8.3 (Berkeley) 1/12/94
	37	*
	38	*
	39	* Copyright (c) 1987, 1990 Carnegie-Mellon University.
	40	* All rights reserved.
	41	*
	42	* Authors: Avadis Tevanian, Jr., Michael Wayne Young
	43	*
	44	* Permission to use, copy, modify and distribute this software and
	45	* its documentation is hereby granted, provided that both the copyright
	46	* notice and this permission notice appear in all copies of the
	47	* software, derivative works or modified versions, and any portions
	48	* thereof, and that both notices appear in supporting documentation.
	49	*
	50	* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
	51	* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
	52	* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
	53	*
	54	* Carnegie Mellon requests users of this software to return to
	55	*
	56	* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
	57	* School of Computer Science
	58	* Carnegie Mellon University
	59	* Pittsburgh PA 15213-3890
	60	*
	61	* any improvements or extensions that they make and grant Carnegie the
	62	* rights to redistribute these changes.
	63	*
	64	* $FreeBSD: src/sys/vm/vm_map.c,v 1.187.2.19 2003/05/27 00:47:02 alc Exp $
	65	* $DragonFly: src/sys/vm/vm_map.c,v 1.47 2006/09/11 20:25:31 dillon Exp $
	66	*/
	67
	68	/*
	69	* Virtual memory mapping module.
	70	*/
	71
	72	#include <sys/param.h>
	73	#include <sys/systm.h>
	74	#include <sys/proc.h>
	75	#include <sys/lock.h>
	76	#include <sys/vmmeter.h>
	77	#include <sys/mman.h>
	78	#include <sys/vnode.h>
	79	#include <sys/resourcevar.h>
	80	#include <sys/shm.h>
	81	#include <sys/tree.h>
	82
	83	#include <vm/vm.h>
	84	#include <vm/vm_param.h>
	85	#include <vm/pmap.h>
	86	#include <vm/vm_map.h>
	87	#include <vm/vm_page.h>
	88	#include <vm/vm_object.h>
	89	#include <vm/vm_pager.h>
	90	#include <vm/vm_kern.h>
	91	#include <vm/vm_extern.h>
	92	#include <vm/swap_pager.h>
	93	#include <vm/vm_zone.h>
	94
	95	#include <sys/thread2.h>
	96
	97	/*
	98	* Virtual memory maps provide for the mapping, protection,
	99	* and sharing of virtual memory objects. In addition,
	100	* this module provides for an efficient virtual copy of
	101	* memory from one map to another.
	102	*
	103	* Synchronization is required prior to most operations.
	104	*
	105	* Maps consist of an ordered doubly-linked list of simple
	106	* entries; a single hint is used to speed up lookups.
	107	*
	108	* Since portions of maps are specified by start/end addresses,
	109	* which may not align with existing map entries, all
	110	* routines merely "clip" entries to these start/end values.
	111	* [That is, an entry is split into two, bordering at a
	112	* start or end value.] Note that these clippings may not
	113	* always be necessary (as the two resulting entries are then
	114	* not changed); however, the clipping is done for convenience.
	115	*
	116	* As mentioned above, virtual copy operations are performed
	117	* by copying VM object references from one map to
	118	* another, and then marking both regions as copy-on-write.
	119	*/
	120
	121	/*
	122	* vm_map_startup:
	123	*
	124	* Initialize the vm_map module. Must be called before
	125	* any other vm_map routines.
	126	*
	127	* Map and entry structures are allocated from the general
	128	* purpose memory pool with some exceptions:
	129	*
	130	* - The kernel map and kmem submap are allocated statically.
	131	* - Kernel map entries are allocated out of a static pool.
	132	*
	133	* These restrictions are necessary since malloc() uses the
	134	* maps and requires map entries.
	135	*/
	136
	137	#define VMEPERCPU 2
	138
	139	static struct vm_zone mapentzone_store, mapzone_store;
	140	static vm_zone_t mapentzone, mapzone, vmspace_zone;
	141	static struct vm_object mapentobj, mapobj;
	142
	143	static struct vm_map_entry map_entry_init[MAX_MAPENT];
	144	static struct vm_map_entry cpu_map_entry_init[MAXCPU][VMEPERCPU];
	145	static struct vm_map map_init[MAX_KMAP];
	146
	147	static vm_map_entry_t vm_map_entry_create(vm_map_t map, int *);
	148	static void vm_map_entry_dispose (vm_map_t map, vm_map_entry_t entry, int *);
	149	static void _vm_map_clip_end (vm_map_t, vm_map_entry_t, vm_offset_t, int *);
	150	static void _vm_map_clip_start (vm_map_t, vm_map_entry_t, vm_offset_t, int *);
	151	static void vm_map_entry_delete (vm_map_t, vm_map_entry_t, int *);
	152	static void vm_map_entry_unwire (vm_map_t, vm_map_entry_t);
	153	static void vm_map_copy_entry (vm_map_t, vm_map_t, vm_map_entry_t,
	154	vm_map_entry_t);
	155	static void vm_map_split (vm_map_entry_t);
	156	static void vm_map_unclip_range (vm_map_t map, vm_map_entry_t start_entry, vm_offset_t start, vm_offset_t end, int *count, int flags);
	157
	158	void
	159	vm_map_startup(void)
	160	{
	161	mapzone = &mapzone_store;
	162	zbootinit(mapzone, "MAP", sizeof (struct vm_map),
	163	map_init, MAX_KMAP);
	164	mapentzone = &mapentzone_store;
	165	zbootinit(mapentzone, "MAP ENTRY", sizeof (struct vm_map_entry),
	166	map_entry_init, MAX_MAPENT);
	167	}
	168
	169	/*
	170	* Red black tree functions
	171	*/
	172	static int rb_vm_map_compare(vm_map_entry_t a, vm_map_entry_t b);
	173	RB_GENERATE(vm_map_rb_tree, vm_map_entry, rb_entry, rb_vm_map_compare);
	174
	175	/* a->start is address, and the only field has to be initialized */
	176	static int
	177	rb_vm_map_compare(vm_map_entry_t a, vm_map_entry_t b)
	178	{
	179	if (a->start < b->start)
	180	return(-1);
	181	else if (a->start > b->start)
	182	return(1);
	183	return(0);
	184	}
	185
	186	/*
	187	* Allocate a vmspace structure, including a vm_map and pmap,
	188	* and initialize those structures. The refcnt is set to 1.
	189	* The remaining fields must be initialized by the caller.
	190	*/
	191	struct vmspace *
	192	vmspace_alloc(vm_offset_t min, vm_offset_t max)
	193	{
	194	struct vmspace *vm;
	195
	196	vm = zalloc(vmspace_zone);
	197	vm_map_init(&vm->vm_map, min, max);
	198	pmap_pinit(vmspace_pmap(vm));
	199	vm->vm_map.pmap = vmspace_pmap(vm); /* XXX */
	200	vm->vm_refcnt = 1;
	201	vm->vm_shm = NULL;
	202	vm->vm_exitingcnt = 0;
	203	return (vm);
	204	}
	205
	206	void
	207	vm_init2(void)
	208	{
	209	zinitna(mapentzone, &mapentobj, NULL, 0, 0,
	210	ZONE_USE_RESERVE \| ZONE_SPECIAL, 1);
	211	zinitna(mapzone, &mapobj, NULL, 0, 0, 0, 1);
	212	vmspace_zone = zinit("VMSPACE", sizeof (struct vmspace), 0, 0, 3);
	213	pmap_init2();
	214	vm_object_init2();
	215	}
	216
	217	static __inline void
	218	vmspace_dofree(struct vmspace *vm)
	219	{
	220	int count;
	221
	222	/*
	223	* Make sure any SysV shm is freed, it might not have in
	224	* exit1()
	225	*/
	226	shmexit(vm);
	227
	228	KKASSERT(vm->vm_upcalls == NULL);
	229
	230	/*
	231	* Lock the map, to wait out all other references to it.
	232	* Delete all of the mappings and pages they hold, then call
	233	* the pmap module to reclaim anything left.
	234	*/
	235	count = vm_map_entry_reserve(MAP_RESERVE_COUNT);
	236	vm_map_lock(&vm->vm_map);
	237	vm_map_delete(&vm->vm_map, vm->vm_map.min_offset,
	238	vm->vm_map.max_offset, &count);
	239	vm_map_unlock(&vm->vm_map);
	240	vm_map_entry_release(count);
	241
	242	pmap_release(vmspace_pmap(vm));
	243	zfree(vmspace_zone, vm);
	244	}
	245
	246	void
	247	vmspace_free(struct vmspace *vm)
	248	{
	249	if (vm->vm_refcnt == 0)
	250	panic("vmspace_free: attempt to free already freed vmspace");
	251
	252	if (--vm->vm_refcnt == 0 && vm->vm_exitingcnt == 0)
	253	vmspace_dofree(vm);
	254	}
	255
	256	void
	257	vmspace_exitfree(struct proc *p)
	258	{
	259	struct vmspace *vm;
	260
	261	vm = p->p_vmspace;
	262	p->p_vmspace = NULL;
	263
	264	/*
	265	* cleanup by parent process wait()ing on exiting child. vm_refcnt
	266	* may not be 0 (e.g. fork() and child exits without exec()ing).
	267	* exitingcnt may increment above 0 and drop back down to zero
	268	* several times while vm_refcnt is held non-zero. vm_refcnt
	269	* may also increment above 0 and drop back down to zero several
	270	* times while vm_exitingcnt is held non-zero.
	271	*
	272	* The last wait on the exiting child's vmspace will clean up
	273	* the remainder of the vmspace.
	274	*/
	275	if (--vm->vm_exitingcnt == 0 && vm->vm_refcnt == 0)
	276	vmspace_dofree(vm);
	277	}
	278
	279	/*
	280	* vmspace_swap_count() - count the approximate swap useage in pages for a
	281	* vmspace.
	282	*
	283	* Swap useage is determined by taking the proportional swap used by
	284	* VM objects backing the VM map. To make up for fractional losses,
	285	* if the VM object has any swap use at all the associated map entries
	286	* count for at least 1 swap page.
	287	*/
	288	int
	289	vmspace_swap_count(struct vmspace *vmspace)
	290	{
	291	vm_map_t map = &vmspace->vm_map;
	292	vm_map_entry_t cur;
	293	vm_object_t object;
	294	int count = 0;
	295	int n;
	296
	297	for (cur = map->header.next; cur != &map->header; cur = cur->next) {
	298	switch(cur->maptype) {
	299	case VM_MAPTYPE_NORMAL:
	300	case VM_MAPTYPE_VPAGETABLE:
	301	if ((object = cur->object.vm_object) == NULL)
	302	break;
	303	if (object->type != OBJT_SWAP)
	304	break;
	305	n = (cur->end - cur->start) / PAGE_SIZE;
	306	if (object->un_pager.swp.swp_bcount) {
	307	count += object->un_pager.swp.swp_bcount *
	308	SWAP_META_PAGES * n / object->size + 1;
	309	}
	310	break;
	311	default:
	312	break;
	313	}
	314	}
	315	return(count);
	316	}
	317
	318
	319	/*
	320	* vm_map_create:
	321	*
	322	* Creates and returns a new empty VM map with
	323	* the given physical map structure, and having
	324	* the given lower and upper address bounds.
	325	*/
	326	vm_map_t
	327	vm_map_create(pmap_t pmap, vm_offset_t min, vm_offset_t max)
	328	{
	329	vm_map_t result;
	330
	331	result = zalloc(mapzone);
	332	vm_map_init(result, min, max);
	333	result->pmap = pmap;
	334	return (result);
	335	}
	336
	337	/*
	338	* Initialize an existing vm_map structure
	339	* such as that in the vmspace structure.
	340	* The pmap is set elsewhere.
	341	*/
	342	void
	343	vm_map_init(struct vm_map *map, vm_offset_t min, vm_offset_t max)
	344	{
	345	map->header.next = map->header.prev = &map->header;
	346	RB_INIT(&map->rb_root);
	347	map->nentries = 0;
	348	map->size = 0;
	349	map->system_map = 0;
	350	map->infork = 0;
	351	map->min_offset = min;
	352	map->max_offset = max;
	353	map->first_free = &map->header;
	354	map->hint = &map->header;
	355	map->timestamp = 0;
	356	lockinit(&map->lock, "thrd_sleep", 0, 0);
	357	}
	358
	359	/*
	360	* vm_map_entry_reserve_cpu_init:
	361	*
	362	* Set an initial negative count so the first attempt to reserve
	363	* space preloads a bunch of vm_map_entry's for this cpu. Also
	364	* pre-allocate 2 vm_map_entries which will be needed by zalloc() to
	365	* map a new page for vm_map_entry structures. SMP systems are
	366	* particularly sensitive.
	367	*
	368	* This routine is called in early boot so we cannot just call
	369	* vm_map_entry_reserve().
	370	*
	371	* May be called for a gd other then mycpu, but may only be called
	372	* during early boot.
	373	*/
	374	void
	375	vm_map_entry_reserve_cpu_init(globaldata_t gd)
	376	{
	377	vm_map_entry_t entry;
	378	int i;
	379
	380	gd->gd_vme_avail -= MAP_RESERVE_COUNT * 2;
	381	entry = &cpu_map_entry_init[gd->gd_cpuid][0];
	382	for (i = 0; i < VMEPERCPU; ++i, ++entry) {
	383	entry->next = gd->gd_vme_base;
	384	gd->gd_vme_base = entry;
	385	}
	386	}
	387
	388	/*
	389	* vm_map_entry_reserve:
	390	*
	391	* Reserves vm_map_entry structures so code later on can manipulate
	392	* map_entry structures within a locked map without blocking trying
	393	* to allocate a new vm_map_entry.
	394	*/
	395	int
	396	vm_map_entry_reserve(int count)
	397	{
	398	struct globaldata *gd = mycpu;
	399	vm_map_entry_t entry;
	400
	401	crit_enter();
	402
	403	/*
	404	* Make sure we have enough structures in gd_vme_base to handle
	405	* the reservation request.
	406	*/
	407	while (gd->gd_vme_avail < count) {
	408	entry = zalloc(mapentzone);
	409	entry->next = gd->gd_vme_base;
	410	gd->gd_vme_base = entry;
	411	++gd->gd_vme_avail;
	412	}
	413	gd->gd_vme_avail -= count;
	414	crit_exit();
	415	return(count);
	416	}
	417
	418	/*
	419	* vm_map_entry_release:
	420	*
	421	* Releases previously reserved vm_map_entry structures that were not
	422	* used. If we have too much junk in our per-cpu cache clean some of
	423	* it out.
	424	*/
	425	void
	426	vm_map_entry_release(int count)
	427	{
	428	struct globaldata *gd = mycpu;
	429	vm_map_entry_t entry;
	430
	431	crit_enter();
	432	gd->gd_vme_avail += count;
	433	while (gd->gd_vme_avail > MAP_RESERVE_SLOP) {
	434	entry = gd->gd_vme_base;
	435	KKASSERT(entry != NULL);
	436	gd->gd_vme_base = entry->next;
	437	--gd->gd_vme_avail;
	438	crit_exit();
	439	zfree(mapentzone, entry);
	440	crit_enter();
	441	}
	442	crit_exit();
	443	}
	444
	445	/*
	446	* vm_map_entry_kreserve:
	447	*
	448	* Reserve map entry structures for use in kernel_map itself. These
	449	* entries have ALREADY been reserved on a per-cpu basis when the map
	450	* was inited. This function is used by zalloc() to avoid a recursion
	451	* when zalloc() itself needs to allocate additional kernel memory.
	452	*
	453	* This function works like the normal reserve but does not load the
	454	* vm_map_entry cache (because that would result in an infinite
	455	* recursion). Note that gd_vme_avail may go negative. This is expected.
	456	*
	457	* Any caller of this function must be sure to renormalize after
	458	* potentially eating entries to ensure that the reserve supply
	459	* remains intact.
	460	*/
	461	int
	462	vm_map_entry_kreserve(int count)
	463	{
	464	struct globaldata *gd = mycpu;
	465
	466	crit_enter();
	467	gd->gd_vme_avail -= count;
	468	crit_exit();
	469	KASSERT(gd->gd_vme_base != NULL, ("no reserved entries left, gd_vme_avail = %d\n", gd->gd_vme_avail));
	470	return(count);
	471	}
	472
	473	/*
	474	* vm_map_entry_krelease:
	475	*
	476	* Release previously reserved map entries for kernel_map. We do not
	477	* attempt to clean up like the normal release function as this would
	478	* cause an unnecessary (but probably not fatal) deep procedure call.
	479	*/
	480	void
	481	vm_map_entry_krelease(int count)
	482	{
	483	struct globaldata *gd = mycpu;
	484
	485	crit_enter();
	486	gd->gd_vme_avail += count;
	487	crit_exit();
	488	}
	489
	490	/*
	491	* vm_map_entry_create: [ internal use only ]
	492	*
	493	* Allocates a VM map entry for insertion. No entry fields are filled
	494	* in.
	495	*
	496	* This routine may be called from an interrupt thread but not a FAST
	497	* interrupt. This routine may recurse the map lock.
	498	*/
	499	static vm_map_entry_t
	500	vm_map_entry_create(vm_map_t map, int *countp)
	501	{
	502	struct globaldata *gd = mycpu;
	503	vm_map_entry_t entry;
	504
	505	KKASSERT(*countp > 0);
	506	--*countp;
	507	crit_enter();
	508	entry = gd->gd_vme_base;
	509	KASSERT(entry != NULL, ("gd_vme_base NULL! count %d", *countp));
	510	gd->gd_vme_base = entry->next;
	511	crit_exit();
	512	return(entry);
	513	}
	514
	515	/*
	516	* vm_map_entry_dispose: [ internal use only ]
	517	*
	518	* Dispose of a vm_map_entry that is no longer being referenced. This
	519	* function may be called from an interrupt.
	520	*/
	521	static void
	522	vm_map_entry_dispose(vm_map_t map, vm_map_entry_t entry, int *countp)
	523	{
	524	struct globaldata *gd = mycpu;
	525
	526	KKASSERT(map->hint != entry);
	527	KKASSERT(map->first_free != entry);
	528
	529	++*countp;
	530	crit_enter();
	531	entry->next = gd->gd_vme_base;
	532	gd->gd_vme_base = entry;
	533	crit_exit();
	534	}
	535
	536
	537	/*
	538	* vm_map_entry_{un,}link:
	539	*
	540	* Insert/remove entries from maps.
	541	*/
	542	static __inline void
	543	vm_map_entry_link(vm_map_t map,
	544	vm_map_entry_t after_where,
	545	vm_map_entry_t entry)
	546	{
	547	map->nentries++;
	548	entry->prev = after_where;
	549	entry->next = after_where->next;
	550	entry->next->prev = entry;
	551	after_where->next = entry;
	552	if (vm_map_rb_tree_RB_INSERT(&map->rb_root, entry))
	553	panic("vm_map_entry_link: dup addr map %p ent %p", map, entry);
	554	}
	555
	556	static __inline void
	557	vm_map_entry_unlink(vm_map_t map,
	558	vm_map_entry_t entry)
	559	{
	560	vm_map_entry_t prev;
	561	vm_map_entry_t next;
	562
	563	if (entry->eflags & MAP_ENTRY_IN_TRANSITION)
	564	panic("vm_map_entry_unlink: attempt to mess with locked entry! %p", entry);
	565	prev = entry->prev;
	566	next = entry->next;
	567	next->prev = prev;
	568	prev->next = next;
	569	vm_map_rb_tree_RB_REMOVE(&map->rb_root, entry);
	570	map->nentries--;
	571	}
	572
	573	/*
	574	* vm_map_lookup_entry: [ internal use only ]
	575	*
	576	* Finds the map entry containing (or
	577	* immediately preceding) the specified address
	578	* in the given map; the entry is returned
	579	* in the "entry" parameter. The boolean
	580	* result indicates whether the address is
	581	* actually contained in the map.
	582	*/
	583	boolean_t
	584	vm_map_lookup_entry(vm_map_t map, vm_offset_t address,
	585	vm_map_entry_t entry / OUT */)
	586	{
	587	vm_map_entry_t tmp;
	588	vm_map_entry_t last;
	589
	590	#if 0
	591	/*
	592	* XXX TEMPORARILY DISABLED. For some reason our attempt to revive
	593	* the hint code with the red-black lookup meets with system crashes
	594	* and lockups. We do not yet know why.
	595	*
	596	* It is possible that the problem is related to the setting
	597	* of the hint during map_entry deletion, in the code specified
	598	* at the GGG comment later on in this file.
	599	*/
	600	/*
	601	* Quickly check the cached hint, there's a good chance of a match.
	602	*/
	603	if (map->hint != &map->header) {
	604	tmp = map->hint;
	605	if (address >= tmp->start && address < tmp->end) {
	606	*entry = tmp;
	607	return(TRUE);
	608	}
	609	}
	610	#endif
	611
	612	/*
	613	* Locate the record from the top of the tree. 'last' tracks the
	614	* closest prior record and is returned if no match is found, which
	615	* in binary tree terms means tracking the most recent right-branch
	616	* taken. If there is no prior record, &map->header is returned.
	617	*/
	618	last = &map->header;
	619	tmp = RB_ROOT(&map->rb_root);
	620
	621	while (tmp) {
	622	if (address >= tmp->start) {
	623	if (address < tmp->end) {
	624	*entry = tmp;
	625	map->hint = tmp;
	626	return(TRUE);
	627	}
	628	last = tmp;
	629	tmp = RB_RIGHT(tmp, rb_entry);
	630	} else {
	631	tmp = RB_LEFT(tmp, rb_entry);
	632	}
	633	}
	634	*entry = last;
	635	return (FALSE);
	636	}
	637
	638	/*
	639	* vm_map_insert:
	640	*
	641	* Inserts the given whole VM object into the target
	642	* map at the specified address range. The object's
	643	* size should match that of the address range.
	644	*
	645	* Requires that the map be locked, and leaves it so. Requires that
	646	* sufficient vm_map_entry structures have been reserved and tracks
	647	* the use via countp.
	648	*
	649	* If object is non-NULL, ref count must be bumped by caller
	650	* prior to making call to account for the new entry.
	651	*/
	652	int
	653	vm_map_insert(vm_map_t map, int *countp,
	654	vm_object_t object, vm_ooffset_t offset,
	655	vm_offset_t start, vm_offset_t end,
	656	vm_maptype_t maptype,
	657	vm_prot_t prot, vm_prot_t max,
	658	int cow)
	659	{
	660	vm_map_entry_t new_entry;
	661	vm_map_entry_t prev_entry;
	662	vm_map_entry_t temp_entry;
	663	vm_eflags_t protoeflags;
	664
	665	/*
	666	* Check that the start and end points are not bogus.
	667	*/
	668
	669	if ((start < map->min_offset) \|\| (end > map->max_offset) \|\|
	670	(start >= end))
	671	return (KERN_INVALID_ADDRESS);
	672
	673	/*
	674	* Find the entry prior to the proposed starting address; if it's part
	675	* of an existing entry, this range is bogus.
	676	*/
	677
	678	if (vm_map_lookup_entry(map, start, &temp_entry))
	679	return (KERN_NO_SPACE);
	680
	681	prev_entry = temp_entry;
	682
	683	/*
	684	* Assert that the next entry doesn't overlap the end point.
	685	*/
	686
	687	if ((prev_entry->next != &map->header) &&
	688	(prev_entry->next->start < end))
	689	return (KERN_NO_SPACE);
	690
	691	protoeflags = 0;
	692
	693	if (cow & MAP_COPY_ON_WRITE)
	694	protoeflags \|= MAP_ENTRY_COW\|MAP_ENTRY_NEEDS_COPY;
	695
	696	if (cow & MAP_NOFAULT) {
	697	protoeflags \|= MAP_ENTRY_NOFAULT;
	698
	699	KASSERT(object == NULL,
	700	("vm_map_insert: paradoxical MAP_NOFAULT request"));
	701	}
	702	if (cow & MAP_DISABLE_SYNCER)
	703	protoeflags \|= MAP_ENTRY_NOSYNC;
	704	if (cow & MAP_DISABLE_COREDUMP)
	705	protoeflags \|= MAP_ENTRY_NOCOREDUMP;
	706
	707	if (object) {
	708	/*
	709	* When object is non-NULL, it could be shared with another
	710	* process. We have to set or clear OBJ_ONEMAPPING
	711	* appropriately.
	712	*/
	713	if ((object->ref_count > 1) \|\| (object->shadow_count != 0)) {
	714	vm_object_clear_flag(object, OBJ_ONEMAPPING);
	715	}
	716	}
	717	else if ((prev_entry != &map->header) &&
	718	(prev_entry->eflags == protoeflags) &&
	719	(prev_entry->end == start) &&
	720	(prev_entry->wired_count == 0) &&
	721	prev_entry->maptype == maptype &&
	722	((prev_entry->object.vm_object == NULL) \|\|
	723	vm_object_coalesce(prev_entry->object.vm_object,
	724	OFF_TO_IDX(prev_entry->offset),
	725	(vm_size_t)(prev_entry->end - prev_entry->start),
	726	(vm_size_t)(end - prev_entry->end)))) {
	727	/*
	728	* We were able to extend the object. Determine if we
	729	* can extend the previous map entry to include the
	730	* new range as well.
	731	*/
	732	if ((prev_entry->inheritance == VM_INHERIT_DEFAULT) &&
	733	(prev_entry->protection == prot) &&
	734	(prev_entry->max_protection == max)) {
	735	map->size += (end - prev_entry->end);
	736	prev_entry->end = end;
	737	vm_map_simplify_entry(map, prev_entry, countp);
	738	return (KERN_SUCCESS);
	739	}
	740
	741	/*
	742	* If we can extend the object but cannot extend the
	743	* map entry, we have to create a new map entry. We
	744	* must bump the ref count on the extended object to
	745	* account for it. object may be NULL.
	746	*/
	747	object = prev_entry->object.vm_object;
	748	offset = prev_entry->offset +
	749	(prev_entry->end - prev_entry->start);
	750	vm_object_reference(object);
	751	}
	752
	753	/*
	754	* NOTE: if conditionals fail, object can be NULL here. This occurs
	755	* in things like the buffer map where we manage kva but do not manage
	756	* backing objects.
	757	*/
	758
	759	/*
	760	* Create a new entry
	761	*/
	762
	763	new_entry = vm_map_entry_create(map, countp);
	764	new_entry->start = start;
	765	new_entry->end = end;
	766
	767	new_entry->maptype = maptype;
	768	new_entry->eflags = protoeflags;
	769	new_entry->object.vm_object = object;
	770	new_entry->offset = offset;
	771	new_entry->avail_ssize = 0;
	772
	773	new_entry->inheritance = VM_INHERIT_DEFAULT;
	774	new_entry->protection = prot;
	775	new_entry->max_protection = max;
	776	new_entry->wired_count = 0;
	777
	778	/*
	779	* Insert the new entry into the list
	780	*/
	781
	782	vm_map_entry_link(map, prev_entry, new_entry);
	783	map->size += new_entry->end - new_entry->start;
	784
	785	/*
	786	* Update the free space hint
	787	*/
	788	if ((map->first_free == prev_entry) &&
	789	(prev_entry->end >= new_entry->start)) {
	790	map->first_free = new_entry;
	791	}
	792
	793	#if 0
	794	/*
	795	* Temporarily removed to avoid MAP_STACK panic, due to
	796	* MAP_STACK being a huge hack. Will be added back in
	797	* when MAP_STACK (and the user stack mapping) is fixed.
	798	*/
	799	/*
	800	* It may be possible to simplify the entry
	801	*/
	802	vm_map_simplify_entry(map, new_entry, countp);
	803	#endif
	804
	805	if (cow & (MAP_PREFAULT\|MAP_PREFAULT_PARTIAL)) {
	806	pmap_object_init_pt(map->pmap, start, prot,
	807	object, OFF_TO_IDX(offset), end - start,
	808	cow & MAP_PREFAULT_PARTIAL);
	809	}
	810
	811	return (KERN_SUCCESS);
	812	}
	813
	814	/*
	815	* Find sufficient space for `length' bytes in the given map, starting at
	816	* `start'. The map must be locked. Returns 0 on success, 1 on no space.
	817	*
	818	* This function will returned an arbitrarily aligned pointer. If no
	819	* particular alignment is required you should pass align as 1. Note that
	820	* the map may return PAGE_SIZE aligned pointers if all the lengths used in
	821	* the map are a multiple of PAGE_SIZE, even if you pass a smaller align
	822	* argument.
	823	*
	824	* 'align' should be a power of 2 but is not required to be.
	825	*/
	826	int
	827	vm_map_findspace(
	828	vm_map_t map,
	829	vm_offset_t start,
	830	vm_size_t length,
	831	vm_offset_t align,
	832	vm_offset_t *addr)
	833	{
	834	vm_map_entry_t entry, next;
	835	vm_offset_t end;
	836	vm_offset_t align_mask;
	837
	838	if (start < map->min_offset)
	839	start = map->min_offset;
	840	if (start > map->max_offset)
	841	return (1);
	842
	843	/*
	844	* If the alignment is not a power of 2 we will have to use
	845	* a mod/division, set align_mask to a special value.
	846	*/
	847	if ((align \| (align - 1)) + 1 != (align << 1))
	848	align_mask = (vm_offset_t)-1;
	849	else
	850	align_mask = align - 1;
	851
	852	retry:
	853	/*
	854	* Look for the first possible address; if there's already something
	855	* at this address, we have to start after it.
	856	*/
	857	if (start == map->min_offset) {
	858	if ((entry = map->first_free) != &map->header)
	859	start = entry->end;
	860	} else {
	861	vm_map_entry_t tmp;
	862
	863	if (vm_map_lookup_entry(map, start, &tmp))
	864	start = tmp->end;
	865	entry = tmp;
	866	}
	867
	868	/*
	869	* Look through the rest of the map, trying to fit a new region in the
	870	* gap between existing regions, or after the very last region.
	871	*/
	872	for (;; start = (entry = next)->end) {
	873	/*
	874	* Adjust the proposed start by the requested alignment,
	875	* be sure that we didn't wrap the address.
	876	*/
	877	if (align_mask == (vm_offset_t)-1)
	878	end = ((start + align - 1) / align) * align;
	879	else
	880	end = (start + align_mask) & ~align_mask;
	881	if (end < start)
	882	return (1);
	883	start = end;
	884	/*
	885	* Find the end of the proposed new region. Be sure we didn't
	886	* go beyond the end of the map, or wrap around the address.
	887	* Then check to see if this is the last entry or if the
	888	* proposed end fits in the gap between this and the next
	889	* entry.
	890	*/
	891	end = start + length;
	892	if (end > map->max_offset \|\| end < start)
	893	return (1);
	894	next = entry->next;
	895	if (next == &map->header \|\| next->start >= end)
	896	break;
	897	}
	898	map->hint = entry;
	899	if (map == kernel_map) {
	900	vm_offset_t ksize;
	901	if ((ksize = round_page(start + length)) > kernel_vm_end) {
	902	pmap_growkernel(ksize);
	903	goto retry;
	904	}
	905	}
	906	*addr = start;
	907	return (0);
	908	}
	909
	910	/*
	911	* vm_map_find finds an unallocated region in the target address
	912	* map with the given length. The search is defined to be
	913	* first-fit from the specified address; the region found is
	914	* returned in the same parameter.
	915	*
	916	* If object is non-NULL, ref count must be bumped by caller
	917	* prior to making call to account for the new entry.
	918	*/
	919	int
	920	vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
	921	vm_offset_t *addr, vm_size_t length,
	922	boolean_t find_space,
	923	vm_maptype_t maptype,
	924	vm_prot_t prot, vm_prot_t max,
	925	int cow)
	926	{
	927	vm_offset_t start;
	928	int result;
	929	int count;
	930
	931	start = *addr;
	932
	933	count = vm_map_entry_reserve(MAP_RESERVE_COUNT);
	934	vm_map_lock(map);
	935	if (find_space) {
	936	if (vm_map_findspace(map, start, length, 1, addr)) {
	937	vm_map_unlock(map);
	938	vm_map_entry_release(count);
	939	return (KERN_NO_SPACE);
	940	}
	941	start = *addr;
	942	}
	943	result = vm_map_insert(map, &count, object, offset,
	944	start, start + length,
	945	maptype,
	946	prot, max,
	947	cow);
	948	vm_map_unlock(map);
	949	vm_map_entry_release(count);
	950
	951	return (result);
	952	}
	953
	954	/*
	955	* vm_map_simplify_entry:
	956	*
	957	* Simplify the given map entry by merging with either neighbor. This
	958	* routine also has the ability to merge with both neighbors.
	959	*
	960	* The map must be locked.
	961	*
	962	* This routine guarentees that the passed entry remains valid (though
	963	* possibly extended). When merging, this routine may delete one or
	964	* both neighbors. No action is taken on entries which have their
	965	* in-transition flag set.
	966	*/
	967	void
	968	vm_map_simplify_entry(vm_map_t map, vm_map_entry_t entry, int *countp)
	969	{
	970	vm_map_entry_t next, prev;
	971	vm_size_t prevsize, esize;
	972
	973	if (entry->eflags & MAP_ENTRY_IN_TRANSITION) {
	974	++mycpu->gd_cnt.v_intrans_coll;
	975	return;
	976	}
	977
	978	if (entry->maptype == VM_MAPTYPE_SUBMAP)
	979	return;
	980
	981	prev = entry->prev;
	982	if (prev != &map->header) {
	983	prevsize = prev->end - prev->start;
	984	if ( (prev->end == entry->start) &&
	985	(prev->maptype == entry->maptype) &&
	986	(prev->object.vm_object == entry->object.vm_object) &&
	987	(!prev->object.vm_object \|\|
	988	(prev->offset + prevsize == entry->offset)) &&
	989	(prev->eflags == entry->eflags) &&
	990	(prev->protection == entry->protection) &&
	991	(prev->max_protection == entry->max_protection) &&
	992	(prev->inheritance == entry->inheritance) &&
	993	(prev->wired_count == entry->wired_count)) {
	994	if (map->first_free == prev)
	995	map->first_free = entry;
	996	if (map->hint == prev)
	997	map->hint = entry;
	998	vm_map_entry_unlink(map, prev);
	999	entry->start = prev->start;
	1000	entry->offset = prev->offset;
	1001	if (prev->object.vm_object)
	1002	vm_object_deallocate(prev->object.vm_object);
	1003	vm_map_entry_dispose(map, prev, countp);
	1004	}
	1005	}
	1006
	1007	next = entry->next;
	1008	if (next != &map->header) {
	1009	esize = entry->end - entry->start;
	1010	if ((entry->end == next->start) &&
	1011	(next->maptype == entry->maptype) &&
	1012	(next->object.vm_object == entry->object.vm_object) &&
	1013	(!entry->object.vm_object \|\|
	1014	(entry->offset + esize == next->offset)) &&
	1015	(next->eflags == entry->eflags) &&
	1016	(next->protection == entry->protection) &&
	1017	(next->max_protection == entry->max_protection) &&
	1018	(next->inheritance == entry->inheritance) &&
	1019	(next->wired_count == entry->wired_count)) {
	1020	if (map->first_free == next)
	1021	map->first_free = entry;
	1022	if (map->hint == next)
	1023	map->hint = entry;
	1024	vm_map_entry_unlink(map, next);
	1025	entry->end = next->end;
	1026	if (next->object.vm_object)
	1027	vm_object_deallocate(next->object.vm_object);
	1028	vm_map_entry_dispose(map, next, countp);
	1029	}
	1030	}
	1031	}
	1032	/*
	1033	* vm_map_clip_start: [ internal use only ]
	1034	*
	1035	* Asserts that the given entry begins at or after
	1036	* the specified address; if necessary,
	1037	* it splits the entry into two.
	1038	*/
	1039	#define vm_map_clip_start(map, entry, startaddr, countp) \
	1040	{ \
	1041	if (startaddr > entry->start) \
	1042	_vm_map_clip_start(map, entry, startaddr, countp); \
	1043	}
	1044
	1045	/*
	1046	* This routine is called only when it is known that
	1047	* the entry must be split.
	1048	*/
	1049	static void
	1050	_vm_map_clip_start(vm_map_t map, vm_map_entry_t entry, vm_offset_t start, int *countp)
	1051	{
	1052	vm_map_entry_t new_entry;
	1053
	1054	/*
	1055	* Split off the front portion -- note that we must insert the new
	1056	* entry BEFORE this one, so that this entry has the specified
	1057	* starting address.
	1058	*/
	1059
	1060	vm_map_simplify_entry(map, entry, countp);
	1061
	1062	/*
	1063	* If there is no object backing this entry, we might as well create
	1064	* one now. If we defer it, an object can get created after the map
	1065	* is clipped, and individual objects will be created for the split-up
	1066	* map. This is a bit of a hack, but is also about the best place to
	1067	* put this improvement.
	1068	*/
	1069
	1070	if (entry->object.vm_object == NULL && !map->system_map) {
	1071	vm_object_t object;
	1072	object = vm_object_allocate(OBJT_DEFAULT,
	1073	atop(entry->end - entry->start));
	1074	entry->object.vm_object = object;
	1075	entry->offset = 0;
	1076	}
	1077
	1078	new_entry = vm_map_entry_create(map, countp);
	1079	new_entry = entry;
	1080
	1081	new_entry->end = start;
	1082	entry->offset += (start - entry->start);
	1083	entry->start = start;
	1084
	1085	vm_map_entry_link(map, entry->prev, new_entry);
	1086
	1087	switch(entry->maptype) {
	1088	case VM_MAPTYPE_NORMAL:
	1089	case VM_MAPTYPE_VPAGETABLE:
	1090	vm_object_reference(new_entry->object.vm_object);
	1091	break;
	1092	default:
	1093	break;
	1094	}
	1095	}
	1096
	1097	/*
	1098	* vm_map_clip_end: [ internal use only ]
	1099	*
	1100	* Asserts that the given entry ends at or before
	1101	* the specified address; if necessary,
	1102	* it splits the entry into two.
	1103	*/
	1104
	1105	#define vm_map_clip_end(map, entry, endaddr, countp) \
	1106	{ \
	1107	if (endaddr < entry->end) \
	1108	_vm_map_clip_end(map, entry, endaddr, countp); \
	1109	}
	1110
	1111	/*
	1112	* This routine is called only when it is known that
	1113	* the entry must be split.
	1114	*/
	1115	static void
	1116	_vm_map_clip_end(vm_map_t map, vm_map_entry_t entry, vm_offset_t end, int *countp)
	1117	{
	1118	vm_map_entry_t new_entry;
	1119
	1120	/*
	1121	* If there is no object backing this entry, we might as well create
	1122	* one now. If we defer it, an object can get created after the map
	1123	* is clipped, and individual objects will be created for the split-up
	1124	* map. This is a bit of a hack, but is also about the best place to
	1125	* put this improvement.
	1126	*/
	1127
	1128	if (entry->object.vm_object == NULL && !map->system_map) {
	1129	vm_object_t object;
	1130	object = vm_object_allocate(OBJT_DEFAULT,
	1131	atop(entry->end - entry->start));
	1132	entry->object.vm_object = object;
	1133	entry->offset = 0;
	1134	}
	1135
	1136	/*
	1137	* Create a new entry and insert it AFTER the specified entry
	1138	*/
	1139
	1140	new_entry = vm_map_entry_create(map, countp);
	1141	new_entry = entry;
	1142
	1143	new_entry->start = entry->end = end;
	1144	new_entry->offset += (end - entry->start);
	1145
	1146	vm_map_entry_link(map, entry, new_entry);
	1147
	1148	switch(entry->maptype) {
	1149	case VM_MAPTYPE_NORMAL:
	1150	case VM_MAPTYPE_VPAGETABLE:
	1151	vm_object_reference(new_entry->object.vm_object);
	1152	break;
	1153	default:
	1154	break;
	1155	}
	1156	}
	1157
	1158	/*
	1159	* VM_MAP_RANGE_CHECK: [ internal use only ]
	1160	*
	1161	* Asserts that the starting and ending region
	1162	* addresses fall within the valid range of the map.
	1163	*/
	1164	#define VM_MAP_RANGE_CHECK(map, start, end) \
	1165	{ \
	1166	if (start < vm_map_min(map)) \
	1167	start = vm_map_min(map); \
	1168	if (end > vm_map_max(map)) \
	1169	end = vm_map_max(map); \
	1170	if (start > end) \
	1171	start = end; \
	1172	}
	1173
	1174	/*
	1175	* vm_map_transition_wait: [ kernel use only ]
	1176	*
	1177	* Used to block when an in-transition collison occurs. The map
	1178	* is unlocked for the sleep and relocked before the return.
	1179	*/
	1180	static
	1181	void
	1182	vm_map_transition_wait(vm_map_t map)
	1183	{
	1184	vm_map_unlock(map);
	1185	tsleep(map, 0, "vment", 0);
	1186	vm_map_lock(map);
	1187	}
	1188
	1189	/*
	1190	* CLIP_CHECK_BACK
	1191	* CLIP_CHECK_FWD
	1192	*
	1193	* When we do blocking operations with the map lock held it is
	1194	* possible that a clip might have occured on our in-transit entry,
	1195	* requiring an adjustment to the entry in our loop. These macros
	1196	* help the pageable and clip_range code deal with the case. The
	1197	* conditional costs virtually nothing if no clipping has occured.
	1198	*/
	1199
	1200	#define CLIP_CHECK_BACK(entry, save_start) \
	1201	do { \
	1202	while (entry->start != save_start) { \
	1203	entry = entry->prev; \
	1204	KASSERT(entry != &map->header, ("bad entry clip")); \
	1205	} \
	1206	} while(0)
	1207
	1208	#define CLIP_CHECK_FWD(entry, save_end) \
	1209	do { \
	1210	while (entry->end != save_end) { \
	1211	entry = entry->next; \
	1212	KASSERT(entry != &map->header, ("bad entry clip")); \
	1213	} \
	1214	} while(0)
	1215
	1216
	1217	/*
	1218	* vm_map_clip_range: [ kernel use only ]
	1219	*
	1220	* Clip the specified range and return the base entry. The
	1221	* range may cover several entries starting at the returned base
	1222	* and the first and last entry in the covering sequence will be
	1223	* properly clipped to the requested start and end address.
	1224	*
	1225	* If no holes are allowed you should pass the MAP_CLIP_NO_HOLES
	1226	* flag.
	1227	*
	1228	* The MAP_ENTRY_IN_TRANSITION flag will be set for the entries
	1229	* covered by the requested range.
	1230	*
	1231	* The map must be exclusively locked on entry and will remain locked
	1232	* on return. If no range exists or the range contains holes and you
	1233	* specified that no holes were allowed, NULL will be returned. This
	1234	* routine may temporarily unlock the map in order avoid a deadlock when
	1235	* sleeping.
	1236	*/
	1237	static
	1238	vm_map_entry_t
	1239	vm_map_clip_range(vm_map_t map, vm_offset_t start, vm_offset_t end,
	1240	int *countp, int flags)
	1241	{
	1242	vm_map_entry_t start_entry;
	1243	vm_map_entry_t entry;
	1244
	1245	/*
	1246	* Locate the entry and effect initial clipping. The in-transition
	1247	* case does not occur very often so do not try to optimize it.
	1248	*/
	1249	again:
	1250	if (vm_map_lookup_entry(map, start, &start_entry) == FALSE)
	1251	return (NULL);
	1252	entry = start_entry;
	1253	if (entry->eflags & MAP_ENTRY_IN_TRANSITION) {
	1254	entry->eflags \|= MAP_ENTRY_NEEDS_WAKEUP;
	1255	++mycpu->gd_cnt.v_intrans_coll;
	1256	++mycpu->gd_cnt.v_intrans_wait;
	1257	vm_map_transition_wait(map);
	1258	/*
	1259	* entry and/or start_entry may have been clipped while
	1260	* we slept, or may have gone away entirely. We have
	1261	* to restart from the lookup.
	1262	*/
	1263	goto again;
	1264	}
	1265	/*
	1266	* Since we hold an exclusive map lock we do not have to restart
	1267	* after clipping, even though clipping may block in zalloc.
	1268	*/
	1269	vm_map_clip_start(map, entry, start, countp);
	1270	vm_map_clip_end(map, entry, end, countp);
	1271	entry->eflags \|= MAP_ENTRY_IN_TRANSITION;
	1272
	1273	/*
	1274	* Scan entries covered by the range. When working on the next
	1275	* entry a restart need only re-loop on the current entry which
	1276	* we have already locked, since 'next' may have changed. Also,
	1277	* even though entry is safe, it may have been clipped so we
	1278	* have to iterate forwards through the clip after sleeping.
	1279	*/
	1280	while (entry->next != &map->header && entry->next->start < end) {
	1281	vm_map_entry_t next = entry->next;
	1282
	1283	if (flags & MAP_CLIP_NO_HOLES) {
	1284	if (next->start > entry->end) {
	1285	vm_map_unclip_range(map, start_entry,
	1286	start, entry->end, countp, flags);
	1287	return(NULL);
	1288	}
	1289	}
	1290
	1291	if (next->eflags & MAP_ENTRY_IN_TRANSITION) {
	1292	vm_offset_t save_end = entry->end;
	1293	next->eflags \|= MAP_ENTRY_NEEDS_WAKEUP;
	1294	++mycpu->gd_cnt.v_intrans_coll;
	1295	++mycpu->gd_cnt.v_intrans_wait;
	1296	vm_map_transition_wait(map);
	1297
	1298	/*
	1299	* clips might have occured while we blocked.
	1300	*/
	1301	CLIP_CHECK_FWD(entry, save_end);
	1302	CLIP_CHECK_BACK(start_entry, start);
	1303	continue;
	1304	}
	1305	/*
	1306	* No restart necessary even though clip_end may block, we
	1307	* are holding the map lock.
	1308	*/
	1309	vm_map_clip_end(map, next, end, countp);
	1310	next->eflags \|= MAP_ENTRY_IN_TRANSITION;
	1311	entry = next;
	1312	}
	1313	if (flags & MAP_CLIP_NO_HOLES) {
	1314	if (entry->end != end) {
	1315	vm_map_unclip_range(map, start_entry,
	1316	start, entry->end, countp, flags);
	1317	return(NULL);
	1318	}
	1319	}
	1320	return(start_entry);
	1321	}
	1322
	1323	/*
	1324	* vm_map_unclip_range: [ kernel use only ]
	1325	*
	1326	* Undo the effect of vm_map_clip_range(). You should pass the same
	1327	* flags and the same range that you passed to vm_map_clip_range().
	1328	* This code will clear the in-transition flag on the entries and
	1329	* wake up anyone waiting. This code will also simplify the sequence
	1330	* and attempt to merge it with entries before and after the sequence.
	1331	*
	1332	* The map must be locked on entry and will remain locked on return.
	1333	*
	1334	* Note that you should also pass the start_entry returned by
	1335	* vm_map_clip_range(). However, if you block between the two calls
	1336	* with the map unlocked please be aware that the start_entry may
	1337	* have been clipped and you may need to scan it backwards to find
	1338	* the entry corresponding with the original start address. You are
	1339	* responsible for this, vm_map_unclip_range() expects the correct
	1340	* start_entry to be passed to it and will KASSERT otherwise.
	1341	*/
	1342	static
	1343	void
	1344	vm_map_unclip_range(
	1345	vm_map_t map,
	1346	vm_map_entry_t start_entry,
	1347	vm_offset_t start,
	1348	vm_offset_t end,
	1349	int *countp,
	1350	int flags)
	1351	{
	1352	vm_map_entry_t entry;
	1353
	1354	entry = start_entry;
	1355
	1356	KASSERT(entry->start == start, ("unclip_range: illegal base entry"));
	1357	while (entry != &map->header && entry->start < end) {
	1358	KASSERT(entry->eflags & MAP_ENTRY_IN_TRANSITION, ("in-transition flag not set during unclip on: %p", entry));
	1359	KASSERT(entry->end <= end, ("unclip_range: tail wasn't clipped"));
	1360	entry->eflags &= ~MAP_ENTRY_IN_TRANSITION;
	1361	if (entry->eflags & MAP_ENTRY_NEEDS_WAKEUP) {
	1362	entry->eflags &= ~MAP_ENTRY_NEEDS_WAKEUP;
	1363	wakeup(map);
	1364	}
	1365	entry = entry->next;
	1366	}
	1367
	1368	/*
	1369	* Simplification does not block so there is no restart case.
	1370	*/
	1371	entry = start_entry;
	1372	while (entry != &map->header && entry->start < end) {
	1373	vm_map_simplify_entry(map, entry, countp);
	1374	entry = entry->next;
	1375	}
	1376	}
	1377
	1378	/*
	1379	* vm_map_submap: [ kernel use only ]
	1380	*
	1381	* Mark the given range as handled by a subordinate map.
	1382	*
	1383	* This range must have been created with vm_map_find,
	1384	* and no other operations may have been performed on this
	1385	* range prior to calling vm_map_submap.
	1386	*
	1387	* Only a limited number of operations can be performed
	1388	* within this rage after calling vm_map_submap:
	1389	* vm_fault
	1390	* [Don't try vm_map_copy!]
	1391	*
	1392	* To remove a submapping, one must first remove the
	1393	* range from the superior map, and then destroy the
	1394	* submap (if desired). [Better yet, don't try it.]
	1395	*/
	1396	int
	1397	vm_map_submap(vm_map_t map, vm_offset_t start, vm_offset_t end, vm_map_t submap)
	1398	{
	1399	vm_map_entry_t entry;
	1400	int result = KERN_INVALID_ARGUMENT;
	1401	int count;
	1402
	1403	count = vm_map_entry_reserve(MAP_RESERVE_COUNT);
	1404	vm_map_lock(map);
	1405
	1406	VM_MAP_RANGE_CHECK(map, start, end);
	1407
	1408	if (vm_map_lookup_entry(map, start, &entry)) {
	1409	vm_map_clip_start(map, entry, start, &count);
	1410	} else {
	1411	entry = entry->next;
	1412	}
	1413
	1414	vm_map_clip_end(map, entry, end, &count);
	1415
	1416	if ((entry->start == start) && (entry->end == end) &&
	1417	((entry->eflags & MAP_ENTRY_COW) == 0) &&
	1418	(entry->object.vm_object == NULL)) {
	1419	entry->object.sub_map = submap;
	1420	entry->maptype = VM_MAPTYPE_SUBMAP;
	1421	result = KERN_SUCCESS;
	1422	}
	1423	vm_map_unlock(map);
	1424	vm_map_entry_release(count);
	1425
	1426	return (result);
	1427	}
	1428
	1429	/*
	1430	* vm_map_protect:
	1431	*
	1432	* Sets the protection of the specified address region in the target map.
	1433	* If "set_max" is specified, the maximum protection is to be set;
	1434	* otherwise, only the current protection is affected.
	1435	*
	1436	* The protection is not applicable to submaps, but is applicable to normal
	1437	* maps and maps governed by virtual page tables. For example, when operating
	1438	* on a virtual page table our protection basically controls how COW occurs
	1439	* on the backing object, whereas the virtual page table abstraction itself
	1440	* is an abstraction for userland.
	1441	*/
	1442	int
	1443	vm_map_protect(vm_map_t map, vm_offset_t start, vm_offset_t end,
	1444	vm_prot_t new_prot, boolean_t set_max)
	1445	{
	1446	vm_map_entry_t current;
	1447	vm_map_entry_t entry;
	1448	int count;
	1449
	1450	count = vm_map_entry_reserve(MAP_RESERVE_COUNT);
	1451	vm_map_lock(map);
	1452
	1453	VM_MAP_RANGE_CHECK(map, start, end);
	1454
	1455	if (vm_map_lookup_entry(map, start, &entry)) {
	1456	vm_map_clip_start(map, entry, start, &count);
	1457	} else {
	1458	entry = entry->next;
	1459	}
	1460
	1461	/*
	1462	* Make a first pass to check for protection violations.
	1463	*/
	1464	current = entry;
	1465	while ((current != &map->header) && (current->start < end)) {
	1466	if (current->maptype == VM_MAPTYPE_SUBMAP) {
	1467	vm_map_unlock(map);
	1468	vm_map_entry_release(count);
	1469	return (KERN_INVALID_ARGUMENT);
	1470	}
	1471	if ((new_prot & current->max_protection) != new_prot) {
	1472	vm_map_unlock(map);
	1473	vm_map_entry_release(count);
	1474	return (KERN_PROTECTION_FAILURE);
	1475	}
	1476	current = current->next;
	1477	}
	1478
	1479	/*
	1480	* Go back and fix up protections. [Note that clipping is not
	1481	* necessary the second time.]
	1482	*/
	1483	current = entry;
	1484
	1485	while ((current != &map->header) && (current->start < end)) {
	1486	vm_prot_t old_prot;
	1487
	1488	vm_map_clip_end(map, current, end, &count);
	1489
	1490	old_prot = current->protection;
	1491	if (set_max) {
	1492	current->protection =
	1493	(current->max_protection = new_prot) &
	1494	old_prot;
	1495	} else {
	1496	current->protection = new_prot;
	1497	}
	1498
	1499	/*
	1500	* Update physical map if necessary. Worry about copy-on-write
	1501	* here -- CHECK THIS XXX
	1502	*/
	1503
	1504	if (current->protection != old_prot) {
	1505	#define MASK(entry) (((entry)->eflags & MAP_ENTRY_COW) ? ~VM_PROT_WRITE : \
	1506	VM_PROT_ALL)
	1507
	1508	pmap_protect(map->pmap, current->start,
	1509	current->end,
	1510	current->protection & MASK(current));
	1511	#undef MASK
	1512	}
	1513
	1514	vm_map_simplify_entry(map, current, &count);
	1515
	1516	current = current->next;
	1517	}
	1518
	1519	vm_map_unlock(map);
	1520	vm_map_entry_release(count);
	1521	return (KERN_SUCCESS);
	1522	}
	1523
	1524	/*
	1525	* vm_map_madvise:
	1526	*
	1527	* This routine traverses a processes map handling the madvise
	1528	* system call. Advisories are classified as either those effecting
	1529	* the vm_map_entry structure, or those effecting the underlying
	1530	* objects.
	1531	*/
	1532
	1533	int
	1534	vm_map_madvise(vm_map_t map, vm_offset_t start, vm_offset_t end, int behav)
	1535	{
	1536	vm_map_entry_t current, entry;
	1537	int modify_map = 0;
	1538	int count;
	1539
	1540	/*
	1541	* Some madvise calls directly modify the vm_map_entry, in which case
	1542	* we need to use an exclusive lock on the map and we need to perform
	1543	* various clipping operations. Otherwise we only need a read-lock
	1544	* on the map.
	1545	*/
	1546
	1547	count = vm_map_entry_reserve(MAP_RESERVE_COUNT);
	1548
	1549	switch(behav) {
	1550	case MADV_NORMAL:
	1551	case MADV_SEQUENTIAL:
	1552	case MADV_RANDOM:
	1553	case MADV_NOSYNC:
	1554	case MADV_AUTOSYNC:
	1555	case MADV_NOCORE:
	1556	case MADV_CORE:
	1557	modify_map = 1;
	1558	vm_map_lock(map);
	1559	break;
	1560	case MADV_WILLNEED:
	1561	case MADV_DONTNEED:
	1562	case MADV_FREE:
	1563	vm_map_lock_read(map);
	1564	break;
	1565	default:
	1566	vm_map_entry_release(count);
	1567	return (KERN_INVALID_ARGUMENT);
	1568	}
	1569
	1570	/*
	1571	* Locate starting entry and clip if necessary.
	1572	*/
	1573
	1574	VM_MAP_RANGE_CHECK(map, start, end);
	1575
	1576	if (vm_map_lookup_entry(map, start, &entry)) {
	1577	if (modify_map)
	1578	vm_map_clip_start(map, entry, start, &count);
	1579	} else {
	1580	entry = entry->next;
	1581	}
	1582
	1583	if (modify_map) {
	1584	/*
	1585	* madvise behaviors that are implemented in the vm_map_entry.
	1586	*
	1587	* We clip the vm_map_entry so that behavioral changes are
	1588	* limited to the specified address range.
	1589	*/
	1590	for (current = entry;
	1591	(current != &map->header) && (current->start < end);
	1592	current = current->next
	1593	) {
	1594	if (current->maptype == VM_MAPTYPE_SUBMAP)
	1595	continue;
	1596
	1597	vm_map_clip_end(map, current, end, &count);
	1598
	1599	switch (behav) {
	1600	case MADV_NORMAL:
	1601	vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_NORMAL);
	1602	break;
	1603	case MADV_SEQUENTIAL:
	1604	vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_SEQUENTIAL);
	1605	break;
	1606	case MADV_RANDOM:
	1607	vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_RANDOM);
	1608	break;
	1609	case MADV_NOSYNC:
	1610	current->eflags \|= MAP_ENTRY_NOSYNC;
	1611	break;
	1612	case MADV_AUTOSYNC:
	1613	current->eflags &= ~MAP_ENTRY_NOSYNC;
	1614	break;
	1615	case MADV_NOCORE:
	1616	current->eflags \|= MAP_ENTRY_NOCOREDUMP;
	1617	break;
	1618	case MADV_CORE:
	1619	current->eflags &= ~MAP_ENTRY_NOCOREDUMP;
	1620	break;
	1621	default:
	1622	break;
	1623	}
	1624	vm_map_simplify_entry(map, current, &count);
	1625	}
	1626	vm_map_unlock(map);
	1627	} else {
	1628	vm_pindex_t pindex;
	1629	int count;
	1630
	1631	/*
	1632	* madvise behaviors that are implemented in the underlying
	1633	* vm_object.
	1634	*
	1635	* Since we don't clip the vm_map_entry, we have to clip
	1636	* the vm_object pindex and count.
	1637	*
	1638	* NOTE! We currently do not support these functions on
	1639	* virtual page tables.
	1640	*/
	1641	for (current = entry;
	1642	(current != &map->header) && (current->start < end);
	1643	current = current->next
	1644	) {
	1645	vm_offset_t useStart;
	1646
	1647	if (current->maptype != VM_MAPTYPE_NORMAL)
	1648	continue;
	1649
	1650	pindex = OFF_TO_IDX(current->offset);
	1651	count = atop(current->end - current->start);
	1652	useStart = current->start;
	1653
	1654	if (current->start < start) {
	1655	pindex += atop(start - current->start);
	1656	count -= atop(start - current->start);
	1657	useStart = start;
	1658	}
	1659	if (current->end > end)
	1660	count -= atop(current->end - end);
	1661
	1662	if (count <= 0)
	1663	continue;
	1664
	1665	vm_object_madvise(current->object.vm_object,
	1666	pindex, count, behav);
	1667	if (behav == MADV_WILLNEED) {
	1668	pmap_object_init_pt(
	1669	map->pmap,
	1670	useStart,
	1671	current->protection,
	1672	current->object.vm_object,
	1673	pindex,
	1674	(count << PAGE_SHIFT),
	1675	MAP_PREFAULT_MADVISE
	1676	);
	1677	}
	1678	}
	1679	vm_map_unlock_read(map);
	1680	}
	1681	vm_map_entry_release(count);
	1682	return(0);
	1683	}
	1684
	1685
	1686	/*
	1687	* vm_map_inherit:
	1688	*
	1689	* Sets the inheritance of the specified address
	1690	* range in the target map. Inheritance
	1691	* affects how the map will be shared with
	1692	* child maps at the time of vm_map_fork.
	1693	*/
	1694	int
	1695	vm_map_inherit(vm_map_t map, vm_offset_t start, vm_offset_t end,
	1696	vm_inherit_t new_inheritance)
	1697	{
	1698	vm_map_entry_t entry;
	1699	vm_map_entry_t temp_entry;
	1700	int count;
	1701
	1702	switch (new_inheritance) {
	1703	case VM_INHERIT_NONE:
	1704	case VM_INHERIT_COPY:
	1705	case VM_INHERIT_SHARE:
	1706	break;
	1707	default:
	1708	return (KERN_INVALID_ARGUMENT);
	1709	}
	1710
	1711	count = vm_map_entry_reserve(MAP_RESERVE_COUNT);
	1712	vm_map_lock(map);
	1713
	1714	VM_MAP_RANGE_CHECK(map, start, end);
	1715
	1716	if (vm_map_lookup_entry(map, start, &temp_entry)) {
	1717	entry = temp_entry;
	1718	vm_map_clip_start(map, entry, start, &count);
	1719	} else
	1720	entry = temp_entry->next;
	1721
	1722	while ((entry != &map->header) && (entry->start < end)) {
	1723	vm_map_clip_end(map, entry, end, &count);
	1724
	1725	entry->inheritance = new_inheritance;
	1726
	1727	vm_map_simplify_entry(map, entry, &count);
	1728
	1729	entry = entry->next;
	1730	}
	1731	vm_map_unlock(map);
	1732	vm_map_entry_release(count);
	1733	return (KERN_SUCCESS);
	1734	}
	1735
	1736	/*
	1737	* Implement the semantics of mlock
	1738	*/
	1739	int
	1740	vm_map_unwire(vm_map_t map, vm_offset_t start, vm_offset_t real_end,
	1741	boolean_t new_pageable)
	1742	{
	1743	vm_map_entry_t entry;
	1744	vm_map_entry_t start_entry;
	1745	vm_offset_t end;
	1746	int rv = KERN_SUCCESS;
	1747	int count;
	1748
	1749	count = vm_map_entry_reserve(MAP_RESERVE_COUNT);
	1750	vm_map_lock(map);
	1751	VM_MAP_RANGE_CHECK(map, start, real_end);
	1752	end = real_end;
	1753
	1754	start_entry = vm_map_clip_range(map, start, end, &count, MAP_CLIP_NO_HOLES);
	1755	if (start_entry == NULL) {
	1756	vm_map_unlock(map);
	1757	vm_map_entry_release(count);
	1758	return (KERN_INVALID_ADDRESS);
	1759	}
	1760
	1761	if (new_pageable == 0) {
	1762	entry = start_entry;
	1763	while ((entry != &map->header) && (entry->start < end)) {
	1764	vm_offset_t save_start;
	1765	vm_offset_t save_end;
	1766
	1767	/*
	1768	* Already user wired or hard wired (trivial cases)
	1769	*/
	1770	if (entry->eflags & MAP_ENTRY_USER_WIRED) {
	1771	entry = entry->next;
	1772	continue;
	1773	}
	1774	if (entry->wired_count != 0) {
	1775	entry->wired_count++;
	1776	entry->eflags \|= MAP_ENTRY_USER_WIRED;
	1777	entry = entry->next;
	1778	continue;
	1779	}
	1780
	1781	/*
	1782	* A new wiring requires instantiation of appropriate
	1783	* management structures and the faulting in of the
	1784	* page.
	1785	*/
	1786	if (entry->maptype != VM_MAPTYPE_SUBMAP) {
	1787	int copyflag = entry->eflags & MAP_ENTRY_NEEDS_COPY;
	1788	if (copyflag && ((entry->protection & VM_PROT_WRITE) != 0)) {
	1789
	1790	vm_object_shadow(&entry->object.vm_object,
	1791	&entry->offset,
	1792	atop(entry->end - entry->start));
	1793	entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
	1794
	1795	} else if (entry->object.vm_object == NULL &&
	1796	!map->system_map) {
	1797
	1798	entry->object.vm_object =
	1799	vm_object_allocate(OBJT_DEFAULT,
	1800	atop(entry->end - entry->start));
	1801	entry->offset = (vm_offset_t) 0;
	1802
	1803	}
	1804	}
	1805	entry->wired_count++;
	1806	entry->eflags \|= MAP_ENTRY_USER_WIRED;
	1807
	1808	/*
	1809	* Now fault in the area. Note that vm_fault_wire()
	1810	* may release the map lock temporarily, it will be
	1811	* relocked on return. The in-transition
	1812	* flag protects the entries.
	1813	*/
	1814	save_start = entry->start;
	1815	save_end = entry->end;
	1816	rv = vm_fault_wire(map, entry, TRUE);
	1817	if (rv) {
	1818	CLIP_CHECK_BACK(entry, save_start);
	1819	for (;;) {
	1820	KASSERT(entry->wired_count == 1, ("bad wired_count on entry"));
	1821	entry->eflags &= ~MAP_ENTRY_USER_WIRED;
	1822	entry->wired_count = 0;
	1823	if (entry->end == save_end)
	1824	break;
	1825	entry = entry->next;
	1826	KASSERT(entry != &map->header, ("bad entry clip during backout"));
	1827	}
	1828	end = save_start; /* unwire the rest */
	1829	break;
	1830	}
	1831	/*
	1832	* note that even though the entry might have been
	1833	* clipped, the USER_WIRED flag we set prevents
	1834	* duplication so we do not have to do a
	1835	* clip check.
	1836	*/
	1837	entry = entry->next;
	1838	}
	1839
	1840	/*
	1841	* If we failed fall through to the unwiring section to
	1842	* unwire what we had wired so far. 'end' has already
	1843	* been adjusted.
	1844	*/
	1845	if (rv)
	1846	new_pageable = 1;
	1847
	1848	/*
	1849	* start_entry might have been clipped if we unlocked the
	1850	* map and blocked. No matter how clipped it has gotten
	1851	* there should be a fragment that is on our start boundary.
	1852	*/
	1853	CLIP_CHECK_BACK(start_entry, start);
	1854	}
	1855
	1856	/*
	1857	* Deal with the unwiring case.
	1858	*/
	1859	if (new_pageable) {
	1860	/*
	1861	* This is the unwiring case. We must first ensure that the
	1862	* range to be unwired is really wired down. We know there
	1863	* are no holes.
	1864	*/
	1865	entry = start_entry;
	1866	while ((entry != &map->header) && (entry->start < end)) {
	1867	if ((entry->eflags & MAP_ENTRY_USER_WIRED) == 0) {
	1868	rv = KERN_INVALID_ARGUMENT;
	1869	goto done;
	1870	}
	1871	KASSERT(entry->wired_count != 0, ("wired count was 0 with USER_WIRED set! %p", entry));
	1872	entry = entry->next;
	1873	}
	1874
	1875	/*
	1876	* Now decrement the wiring count for each region. If a region
	1877	* becomes completely unwired, unwire its physical pages and
	1878	* mappings.
	1879	*/
	1880	/*
	1881	* The map entries are processed in a loop, checking to
	1882	* make sure the entry is wired and asserting it has a wired
	1883	* count. However, another loop was inserted more-or-less in
	1884	* the middle of the unwiring path. This loop picks up the
	1885	* "entry" loop variable from the first loop without first
	1886	* setting it to start_entry. Naturally, the secound loop
	1887	* is never entered and the pages backing the entries are
	1888	* never unwired. This can lead to a leak of wired pages.
	1889	*/
	1890	entry = start_entry;
	1891	while ((entry != &map->header) && (entry->start < end)) {
	1892	KASSERT(entry->eflags & MAP_ENTRY_USER_WIRED,
	1893	("expected USER_WIRED on entry %p", entry));
	1894	entry->eflags &= ~MAP_ENTRY_USER_WIRED;
	1895	entry->wired_count--;
	1896	if (entry->wired_count == 0)
	1897	vm_fault_unwire(map, entry);
	1898	entry = entry->next;
	1899	}
	1900	}
	1901	done:
	1902	vm_map_unclip_range(map, start_entry, start, real_end, &count,
	1903	MAP_CLIP_NO_HOLES);
	1904	map->timestamp++;
	1905	vm_map_unlock(map);
	1906	vm_map_entry_release(count);
	1907	return (rv);
	1908	}
	1909
	1910	/*
	1911	* vm_map_wire:
	1912	*
	1913	* Sets the pageability of the specified address
	1914	* range in the target map. Regions specified
	1915	* as not pageable require locked-down physical
	1916	* memory and physical page maps.
	1917	*
	1918	* The map must not be locked, but a reference
	1919	* must remain to the map throughout the call.
	1920	*
	1921	* This function may be called via the zalloc path and must properly
	1922	* reserve map entries for kernel_map.
	1923	*/
	1924	int
	1925	vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t real_end, int kmflags)
	1926	{
	1927	vm_map_entry_t entry;
	1928	vm_map_entry_t start_entry;
	1929	vm_offset_t end;
	1930	int rv = KERN_SUCCESS;
	1931	int count;
	1932
	1933	if (kmflags & KM_KRESERVE)
	1934	count = vm_map_entry_kreserve(MAP_RESERVE_COUNT);
	1935	else
	1936	count = vm_map_entry_reserve(MAP_RESERVE_COUNT);
	1937	vm_map_lock(map);
	1938	VM_MAP_RANGE_CHECK(map, start, real_end);
	1939	end = real_end;
	1940
	1941	start_entry = vm_map_clip_range(map, start, end, &count, MAP_CLIP_NO_HOLES);
	1942	if (start_entry == NULL) {
	1943	vm_map_unlock(map);
	1944	rv = KERN_INVALID_ADDRESS;
	1945	goto failure;
	1946	}
	1947	if ((kmflags & KM_PAGEABLE) == 0) {
	1948	/*
	1949	* Wiring.
	1950	*
	1951	* 1. Holding the write lock, we create any shadow or zero-fill
	1952	* objects that need to be created. Then we clip each map
	1953	* entry to the region to be wired and increment its wiring
	1954	* count. We create objects before clipping the map entries
	1955	* to avoid object proliferation.
	1956	*
	1957	* 2. We downgrade to a read lock, and call vm_fault_wire to
	1958	* fault in the pages for any newly wired area (wired_count is
	1959	* 1).
	1960	*
	1961	* Downgrading to a read lock for vm_fault_wire avoids a
	1962	* possible deadlock with another process that may have faulted
	1963	* on one of the pages to be wired (it would mark the page busy,
	1964	* blocking us, then in turn block on the map lock that we
	1965	* hold). Because of problems in the recursive lock package,
	1966	* we cannot upgrade to a write lock in vm_map_lookup. Thus,
	1967	* any actions that require the write lock must be done
	1968	* beforehand. Because we keep the read lock on the map, the
	1969	* copy-on-write status of the entries we modify here cannot
	1970	* change.
	1971	*/
	1972
	1973	entry = start_entry;
	1974	while ((entry != &map->header) && (entry->start < end)) {
	1975	/*
	1976	* Trivial case if the entry is already wired
	1977	*/
	1978	if (entry->wired_count) {
	1979	entry->wired_count++;
	1980	entry = entry->next;
	1981	continue;
	1982	}
	1983
	1984	/*
	1985	* The entry is being newly wired, we have to setup
	1986	* appropriate management structures. A shadow
	1987	* object is required for a copy-on-write region,
	1988	* or a normal object for a zero-fill region. We
	1989	* do not have to do this for entries that point to sub
	1990	* maps because we won't hold the lock on the sub map.
	1991	*/
	1992	if (entry->maptype != VM_MAPTYPE_SUBMAP) {
	1993	int copyflag = entry->eflags & MAP_ENTRY_NEEDS_COPY;
	1994	if (copyflag &&
	1995	((entry->protection & VM_PROT_WRITE) != 0)) {
	1996
	1997	vm_object_shadow(&entry->object.vm_object,
	1998	&entry->offset,
	1999	atop(entry->end - entry->start));
	2000	entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
	2001	} else if (entry->object.vm_object == NULL &&
	2002	!map->system_map) {
	2003	entry->object.vm_object =
	2004	vm_object_allocate(OBJT_DEFAULT,
	2005	atop(entry->end - entry->start));
	2006	entry->offset = (vm_offset_t) 0;
	2007	}
	2008	}
	2009
	2010	entry->wired_count++;
	2011	entry = entry->next;
	2012	}
	2013
	2014	/*
	2015	* Pass 2.
	2016	*/
	2017
	2018	/*
	2019	* HACK HACK HACK HACK
	2020	*
	2021	* Unlock the map to avoid deadlocks. The in-transit flag
	2022	* protects us from most changes but note that
	2023	* clipping may still occur. To prevent clipping from
	2024	* occuring after the unlock, except for when we are
	2025	* blocking in vm_fault_wire, we must run in a critical
	2026	* section, otherwise our accesses to entry->start and
	2027	* entry->end could be corrupted. We have to enter the
	2028	* critical section prior to unlocking so start_entry does
	2029	* not change out from under us at the very beginning of the
	2030	* loop.
	2031	*
	2032	* HACK HACK HACK HACK
	2033	*/
	2034
	2035	crit_enter();
	2036
	2037	entry = start_entry;
	2038	while (entry != &map->header && entry->start < end) {
	2039	/*
	2040	* If vm_fault_wire fails for any page we need to undo
	2041	* what has been done. We decrement the wiring count
	2042	* for those pages which have not yet been wired (now)
	2043	* and unwire those that have (later).
	2044	*/
	2045	vm_offset_t save_start = entry->start;
	2046	vm_offset_t save_end = entry->end;
	2047
	2048	if (entry->wired_count == 1)
	2049	rv = vm_fault_wire(map, entry, FALSE);
	2050	if (rv) {
	2051	CLIP_CHECK_BACK(entry, save_start);
	2052	for (;;) {
	2053	KASSERT(entry->wired_count == 1, ("wired_count changed unexpectedly"));
	2054	entry->wired_count = 0;
	2055	if (entry->end == save_end)
	2056	break;
	2057	entry = entry->next;
	2058	KASSERT(entry != &map->header, ("bad entry clip during backout"));
	2059	}
	2060	end = save_start;
	2061	break;
	2062	}
	2063	CLIP_CHECK_FWD(entry, save_end);
	2064	entry = entry->next;
	2065	}
	2066	crit_exit();
	2067
	2068	/*
	2069	* If a failure occured undo everything by falling through
	2070	* to the unwiring code. 'end' has already been adjusted
	2071	* appropriately.
	2072	*/
	2073	if (rv)
	2074	kmflags \|= KM_PAGEABLE;
	2075
	2076	/*
	2077	* start_entry is still IN_TRANSITION but may have been
	2078	* clipped since vm_fault_wire() unlocks and relocks the
	2079	* map. No matter how clipped it has gotten there should
	2080	* be a fragment that is on our start boundary.
	2081	*/
	2082	CLIP_CHECK_BACK(start_entry, start);
	2083	}
	2084
	2085	if (kmflags & KM_PAGEABLE) {
	2086	/*
	2087	* This is the unwiring case. We must first ensure that the
	2088	* range to be unwired is really wired down. We know there
	2089	* are no holes.
	2090	*/
	2091	entry = start_entry;
	2092	while ((entry != &map->header) && (entry->start < end)) {
	2093	if (entry->wired_count == 0) {
	2094	rv = KERN_INVALID_ARGUMENT;
	2095	goto done;
	2096	}
	2097	entry = entry->next;
	2098	}
	2099
	2100	/*
	2101	* Now decrement the wiring count for each region. If a region
	2102	* becomes completely unwired, unwire its physical pages and
	2103	* mappings.
	2104	*/
	2105	entry = start_entry;
	2106	while ((entry != &map->header) && (entry->start < end)) {
	2107	entry->wired_count--;
	2108	if (entry->wired_count == 0)
	2109	vm_fault_unwire(map, entry);
	2110	entry = entry->next;
	2111	}
	2112	}
	2113	done:
	2114	vm_map_unclip_range(map, start_entry, start, real_end, &count,
	2115	MAP_CLIP_NO_HOLES);
	2116	map->timestamp++;
	2117	vm_map_unlock(map);
	2118	failure:
	2119	if (kmflags & KM_KRESERVE)
	2120	vm_map_entry_krelease(count);
	2121	else
	2122	vm_map_entry_release(count);
	2123	return (rv);
	2124	}
	2125
	2126	/*
	2127	* vm_map_set_wired_quick()
	2128	*
	2129	* Mark a newly allocated address range as wired but do not fault in
	2130	* the pages. The caller is expected to load the pages into the object.
	2131	*
	2132	* The map must be locked on entry and will remain locked on return.
	2133	*/
	2134	void
	2135	vm_map_set_wired_quick(vm_map_t map, vm_offset_t addr, vm_size_t size, int *countp)
	2136	{
	2137	vm_map_entry_t scan;
	2138	vm_map_entry_t entry;
	2139
	2140	entry = vm_map_clip_range(map, addr, addr + size, countp, MAP_CLIP_NO_HOLES);
	2141	for (scan = entry; scan != &map->header && scan->start < addr + size; scan = scan->next) {
	2142	KKASSERT(entry->wired_count == 0);
	2143	entry->wired_count = 1;
	2144	}
	2145	vm_map_unclip_range(map, entry, addr, addr + size, countp, MAP_CLIP_NO_HOLES);
	2146	}
	2147
	2148	/*
	2149	* vm_map_clean
	2150	*
	2151	* Push any dirty cached pages in the address range to their pager.
	2152	* If syncio is TRUE, dirty pages are written synchronously.
	2153	* If invalidate is TRUE, any cached pages are freed as well.
	2154	*
	2155	* Returns an error if any part of the specified range is not mapped.
	2156	*/
	2157	int
	2158	vm_map_clean(vm_map_t map, vm_offset_t start, vm_offset_t end, boolean_t syncio,
	2159	boolean_t invalidate)
	2160	{
	2161	vm_map_entry_t current;
	2162	vm_map_entry_t entry;
	2163	vm_size_t size;
	2164	vm_object_t object;
	2165	vm_ooffset_t offset;
	2166
	2167	vm_map_lock_read(map);
	2168	VM_MAP_RANGE_CHECK(map, start, end);
	2169	if (!vm_map_lookup_entry(map, start, &entry)) {
	2170	vm_map_unlock_read(map);
	2171	return (KERN_INVALID_ADDRESS);
	2172	}
	2173	/*
	2174	* Make a first pass to check for holes.
	2175	*/
	2176	for (current = entry; current->start < end; current = current->next) {
	2177	if (current->maptype == VM_MAPTYPE_SUBMAP) {
	2178	vm_map_unlock_read(map);
	2179	return (KERN_INVALID_ARGUMENT);
	2180	}
	2181	if (end > current->end &&
	2182	(current->next == &map->header \|\|
	2183	current->end != current->next->start)) {
	2184	vm_map_unlock_read(map);
	2185	return (KERN_INVALID_ADDRESS);
	2186	}
	2187	}
	2188
	2189	if (invalidate)
	2190	pmap_remove(vm_map_pmap(map), start, end);
	2191	/*
	2192	* Make a second pass, cleaning/uncaching pages from the indicated
	2193	* objects as we go.
	2194	*/
	2195	for (current = entry; current->start < end; current = current->next) {
	2196	offset = current->offset + (start - current->start);
	2197	size = (end <= current->end ? end : current->end) - start;
	2198	if (current->maptype == VM_MAPTYPE_SUBMAP) {
	2199	vm_map_t smap;
	2200	vm_map_entry_t tentry;
	2201	vm_size_t tsize;
	2202
	2203	smap = current->object.sub_map;
	2204	vm_map_lock_read(smap);
	2205	vm_map_lookup_entry(smap, offset, &tentry);
	2206	tsize = tentry->end - offset;
	2207	if (tsize < size)
	2208	size = tsize;
	2209	object = tentry->object.vm_object;
	2210	offset = tentry->offset + (offset - tentry->start);
	2211	vm_map_unlock_read(smap);
	2212	} else {
	2213	object = current->object.vm_object;
	2214	}
	2215	/*
	2216	* Note that there is absolutely no sense in writing out
	2217	* anonymous objects, so we track down the vnode object
	2218	* to write out.
	2219	* We invalidate (remove) all pages from the address space
	2220	* anyway, for semantic correctness.
	2221	*
	2222	* note: certain anonymous maps, such as MAP_NOSYNC maps,
	2223	* may start out with a NULL object.
	2224	*/
	2225	while (object && object->backing_object) {
	2226	offset += object->backing_object_offset;
	2227	object = object->backing_object;
	2228	if (object->size < OFF_TO_IDX( offset + size))
	2229	size = IDX_TO_OFF(object->size) - offset;
	2230	}
	2231	if (object && (object->type == OBJT_VNODE) &&
	2232	(current->protection & VM_PROT_WRITE)) {
	2233	/*
	2234	* Flush pages if writing is allowed, invalidate them
	2235	* if invalidation requested. Pages undergoing I/O
	2236	* will be ignored by vm_object_page_remove().
	2237	*
	2238	* We cannot lock the vnode and then wait for paging
	2239	* to complete without deadlocking against vm_fault.
	2240	* Instead we simply call vm_object_page_remove() and
	2241	* allow it to block internally on a page-by-page
	2242	* basis when it encounters pages undergoing async
	2243	* I/O.
	2244	*/
	2245	int flags;
	2246
	2247	vm_object_reference(object);
	2248	vn_lock(object->handle, LK_EXCLUSIVE \| LK_RETRY);
	2249	flags = (syncio \|\| invalidate) ? OBJPC_SYNC : 0;
	2250	flags \|= invalidate ? OBJPC_INVAL : 0;
	2251
	2252	/*
	2253	* When operating on a virtual page table just
	2254	* flush the whole object. XXX we probably ought
	2255	* to
	2256	*/
	2257	switch(current->maptype) {
	2258	case VM_MAPTYPE_NORMAL:
	2259	vm_object_page_clean(object,
	2260	OFF_TO_IDX(offset),
	2261	OFF_TO_IDX(offset + size + PAGE_MASK),
	2262	flags);
	2263	break;
	2264	case VM_MAPTYPE_VPAGETABLE:
	2265	vm_object_page_clean(object, 0, 0, flags);
	2266	break;
	2267	}
	2268	vn_unlock(((struct vnode *)object->handle));
	2269	vm_object_deallocate(object);
	2270	}
	2271	if (object && invalidate &&
	2272	((object->type == OBJT_VNODE) \|\|
	2273	(object->type == OBJT_DEVICE))) {
	2274	int clean_only =
	2275	(object->type == OBJT_DEVICE) ? FALSE : TRUE;
	2276	vm_object_reference(object);
	2277	switch(current->maptype) {
	2278	case VM_MAPTYPE_NORMAL:
	2279	vm_object_page_remove(object,
	2280	OFF_TO_IDX(offset),
	2281	OFF_TO_IDX(offset + size + PAGE_MASK),
	2282	clean_only);
	2283	break;
	2284	case VM_MAPTYPE_VPAGETABLE:
	2285	vm_object_page_remove(object, 0, 0, clean_only);
	2286	break;
	2287	}
	2288	vm_object_deallocate(object);
	2289	}
	2290	start += size;
	2291	}
	2292
	2293	vm_map_unlock_read(map);
	2294	return (KERN_SUCCESS);
	2295	}
	2296
	2297	/*
	2298	* vm_map_entry_unwire: [ internal use only ]
	2299	*
	2300	* Make the region specified by this entry pageable.
	2301	*
	2302	* The map in question should be locked.
	2303	* [This is the reason for this routine's existence.]
	2304	*/
	2305	static void
	2306	vm_map_entry_unwire(vm_map_t map, vm_map_entry_t entry)
	2307	{
	2308	entry->eflags &= ~MAP_ENTRY_USER_WIRED;
	2309	entry->wired_count = 0;
	2310	vm_fault_unwire(map, entry);
	2311	}
	2312
	2313	/*
	2314	* vm_map_entry_delete: [ internal use only ]
	2315	*
	2316	* Deallocate the given entry from the target map.
	2317	*/
	2318	static void
	2319	vm_map_entry_delete(vm_map_t map, vm_map_entry_t entry, int *countp)
	2320	{
	2321	vm_map_entry_unlink(map, entry);
	2322	map->size -= entry->end - entry->start;
	2323
	2324	switch(entry->maptype) {
	2325	case VM_MAPTYPE_NORMAL:
	2326	case VM_MAPTYPE_VPAGETABLE:
	2327	vm_object_deallocate(entry->object.vm_object);
	2328	break;
	2329	default:
	2330	break;
	2331	}
	2332
	2333	vm_map_entry_dispose(map, entry, countp);
	2334	}
	2335
	2336	/*
	2337	* vm_map_delete: [ internal use only ]
	2338	*
	2339	* Deallocates the given address range from the target
	2340	* map.
	2341	*/
	2342	int
	2343	vm_map_delete(vm_map_t map, vm_offset_t start, vm_offset_t end, int *countp)
	2344	{
	2345	vm_object_t object;
	2346	vm_map_entry_t entry;
	2347	vm_map_entry_t first_entry;
	2348
	2349	again:
	2350	/*
	2351	* Find the start of the region, and clip it. Set entry to point
	2352	* at the first record containing the requested address or, if no
	2353	* such record exists, the next record with a greater address. The
	2354	* loop will run from this point until a record beyond the termination
	2355	* address is encountered.
	2356	*
	2357	* map->hint must be adjusted to not point to anything we delete,
	2358	* so set it to the entry prior to the one being deleted.
	2359	*
	2360	* GGG see other GGG comment.
	2361	*/
	2362	if (vm_map_lookup_entry(map, start, &first_entry)) {
	2363	entry = first_entry;
	2364	vm_map_clip_start(map, entry, start, countp);
	2365	map->hint = entry->prev; /* possible problem XXX */
	2366	} else {
	2367	map->hint = first_entry; /* possible problem XXX */
	2368	entry = first_entry->next;
	2369	}
	2370
	2371	/*
	2372	* If a hole opens up prior to the current first_free then
	2373	* adjust first_free. As with map->hint, map->first_free
	2374	* cannot be left set to anything we might delete.
	2375	*/
	2376	if (entry == &map->header) {
	2377	map->first_free = &map->header;
	2378	} else if (map->first_free->start >= start) {
	2379	map->first_free = entry->prev;
	2380	}
	2381
	2382	/*
	2383	* Step through all entries in this region
	2384	*/
	2385
	2386	while ((entry != &map->header) && (entry->start < end)) {
	2387	vm_map_entry_t next;
	2388	vm_offset_t s, e;
	2389	vm_pindex_t offidxstart, offidxend, count;
	2390
	2391	/*
	2392	* If we hit an in-transition entry we have to sleep and
	2393	* retry. It's easier (and not really slower) to just retry
	2394	* since this case occurs so rarely and the hint is already
	2395	* pointing at the right place. We have to reset the
	2396	* start offset so as not to accidently delete an entry
	2397	* another process just created in vacated space.
	2398	*/
	2399	if (entry->eflags & MAP_ENTRY_IN_TRANSITION) {
	2400	entry->eflags \|= MAP_ENTRY_NEEDS_WAKEUP;
	2401	start = entry->start;
	2402	++mycpu->gd_cnt.v_intrans_coll;
	2403	++mycpu->gd_cnt.v_intrans_wait;
	2404	vm_map_transition_wait(map);
	2405	goto again;
	2406	}
	2407	vm_map_clip_end(map, entry, end, countp);
	2408
	2409	s = entry->start;
	2410	e = entry->end;
	2411	next = entry->next;
	2412
	2413	offidxstart = OFF_TO_IDX(entry->offset);
	2414	count = OFF_TO_IDX(e - s);
	2415	object = entry->object.vm_object;
	2416
	2417	/*
	2418	* Unwire before removing addresses from the pmap; otherwise,
	2419	* unwiring will put the entries back in the pmap.
	2420	*/
	2421	if (entry->wired_count != 0)
	2422	vm_map_entry_unwire(map, entry);
	2423
	2424	offidxend = offidxstart + count;
	2425
	2426	if ((object == kernel_object) \|\| (object == kmem_object)) {
	2427	vm_object_page_remove(object, offidxstart, offidxend, FALSE);
	2428	} else {
	2429	pmap_remove(map->pmap, s, e);
	2430	if (object != NULL &&
	2431	object->ref_count != 1 &&
	2432	(object->flags & (OBJ_NOSPLIT\|OBJ_ONEMAPPING)) == OBJ_ONEMAPPING &&
	2433	(object->type == OBJT_DEFAULT \|\| object->type == OBJT_SWAP)) {
	2434	vm_object_collapse(object);
	2435	vm_object_page_remove(object, offidxstart, offidxend, FALSE);
	2436	if (object->type == OBJT_SWAP) {
	2437	swap_pager_freespace(object, offidxstart, count);
	2438	}
	2439	if (offidxend >= object->size &&
	2440	offidxstart < object->size) {
	2441	object->size = offidxstart;
	2442	}
	2443	}
	2444	}
	2445
	2446	/*
	2447	* Delete the entry (which may delete the object) only after
	2448	* removing all pmap entries pointing to its pages.
	2449	* (Otherwise, its page frames may be reallocated, and any
	2450	* modify bits will be set in the wrong object!)
	2451	*/
	2452	vm_map_entry_delete(map, entry, countp);
	2453	entry = next;
	2454	}
	2455	return (KERN_SUCCESS);
	2456	}
	2457
	2458	/*
	2459	* vm_map_remove:
	2460	*
	2461	* Remove the given address range from the target map.
	2462	* This is the exported form of vm_map_delete.
	2463	*/
	2464	int
	2465	vm_map_remove(vm_map_t map, vm_offset_t start, vm_offset_t end)
	2466	{
	2467	int result;
	2468	int count;
	2469
	2470	count = vm_map_entry_reserve(MAP_RESERVE_COUNT);
	2471	vm_map_lock(map);
	2472	VM_MAP_RANGE_CHECK(map, start, end);
	2473	result = vm_map_delete(map, start, end, &count);
	2474	vm_map_unlock(map);
	2475	vm_map_entry_release(count);
	2476
	2477	return (result);
	2478	}
	2479
	2480	/*
	2481	* vm_map_check_protection:
	2482	*
	2483	* Assert that the target map allows the specified
	2484	* privilege on the entire address region given.
	2485	* The entire region must be allocated.
	2486	*/
	2487	boolean_t
	2488	vm_map_check_protection(vm_map_t map, vm_offset_t start, vm_offset_t end,
	2489	vm_prot_t protection)
	2490	{
	2491	vm_map_entry_t entry;
	2492	vm_map_entry_t tmp_entry;
	2493
	2494	if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
	2495	return (FALSE);
	2496	}
	2497	entry = tmp_entry;
	2498
	2499	while (start < end) {
	2500	if (entry == &map->header) {
	2501	return (FALSE);
	2502	}
	2503	/*
	2504	* No holes allowed!
	2505	*/
	2506
	2507	if (start < entry->start) {
	2508	return (FALSE);
	2509	}
	2510	/*
	2511	* Check protection associated with entry.
	2512	*/
	2513
	2514	if ((entry->protection & protection) != protection) {
	2515	return (FALSE);
	2516	}
	2517	/* go to next entry */
	2518
	2519	start = entry->end;
	2520	entry = entry->next;
	2521	}
	2522	return (TRUE);
	2523	}
	2524
	2525	/*
	2526	* Split the pages in a map entry into a new object. This affords
	2527	* easier removal of unused pages, and keeps object inheritance from
	2528	* being a negative impact on memory usage.
	2529	*/
	2530	static void
	2531	vm_map_split(vm_map_entry_t entry)
	2532	{
	2533	vm_page_t m;
	2534	vm_object_t orig_object, new_object, source;
	2535	vm_offset_t s, e;
	2536	vm_pindex_t offidxstart, offidxend, idx;
	2537	vm_size_t size;
	2538	vm_ooffset_t offset;
	2539
	2540	orig_object = entry->object.vm_object;
	2541	if (orig_object->type != OBJT_DEFAULT && orig_object->type != OBJT_SWAP)
	2542	return;
	2543	if (orig_object->ref_count <= 1)
	2544	return;
	2545
	2546	offset = entry->offset;
	2547	s = entry->start;
	2548	e = entry->end;
	2549
	2550	offidxstart = OFF_TO_IDX(offset);
	2551	offidxend = offidxstart + OFF_TO_IDX(e - s);
	2552	size = offidxend - offidxstart;
	2553
	2554	new_object = vm_pager_allocate(orig_object->type, NULL,
	2555	IDX_TO_OFF(size), VM_PROT_ALL, 0);
	2556	if (new_object == NULL)
	2557	return;
	2558
	2559	source = orig_object->backing_object;
	2560	if (source != NULL) {
	2561	vm_object_reference(source); /* Referenced by new_object */
	2562	LIST_INSERT_HEAD(&source->shadow_head,
	2563	new_object, shadow_list);
	2564	vm_object_clear_flag(source, OBJ_ONEMAPPING);
	2565	new_object->backing_object_offset =
	2566	orig_object->backing_object_offset + IDX_TO_OFF(offidxstart);
	2567	new_object->backing_object = source;
	2568	source->shadow_count++;
	2569	source->generation++;
	2570	}
	2571
	2572	for (idx = 0; idx < size; idx++) {
	2573	vm_page_t m;
	2574
	2575	/*
	2576	* A critical section is required to avoid a race between
	2577	* the lookup and an interrupt/unbusy/free and our busy
	2578	* check.
	2579	*/
	2580	crit_enter();
	2581	retry:
	2582	m = vm_page_lookup(orig_object, offidxstart + idx);
	2583	if (m == NULL) {
	2584	crit_exit();
	2585	continue;
	2586	}
	2587
	2588	/*
	2589	* We must wait for pending I/O to complete before we can
	2590	* rename the page.
	2591	*
	2592	* We do not have to VM_PROT_NONE the page as mappings should
	2593	* not be changed by this operation.
	2594	*/
	2595	if (vm_page_sleep_busy(m, TRUE, "spltwt"))
	2596	goto retry;
	2597	vm_page_busy(m);
	2598	vm_page_rename(m, new_object, idx);
	2599	/* page automatically made dirty by rename and cache handled */
	2600	vm_page_busy(m);
	2601	crit_exit();
	2602	}
	2603
	2604	if (orig_object->type == OBJT_SWAP) {
	2605	vm_object_pip_add(orig_object, 1);
	2606	/*
	2607	* copy orig_object pages into new_object
	2608	* and destroy unneeded pages in
	2609	* shadow object.
	2610	*/
	2611	swap_pager_copy(orig_object, new_object, offidxstart, 0);
	2612	vm_object_pip_wakeup(orig_object);
	2613	}
	2614
	2615	/*
	2616	* Wakeup the pages we played with. No spl protection is needed
	2617	* for a simple wakeup.
	2618	*/
	2619	for (idx = 0; idx < size; idx++) {
	2620	m = vm_page_lookup(new_object, idx);
	2621	if (m)
	2622	vm_page_wakeup(m);
	2623	}
	2624
	2625	entry->object.vm_object = new_object;
	2626	entry->offset = 0LL;
	2627	vm_object_deallocate(orig_object);
	2628	}
	2629
	2630	/*
	2631	* vm_map_copy_entry:
	2632	*
	2633	* Copies the contents of the source entry to the destination
	2634	* entry. The entries must be aligned properly.
	2635	*/
	2636	static void
	2637	vm_map_copy_entry(vm_map_t src_map, vm_map_t dst_map,
	2638	vm_map_entry_t src_entry, vm_map_entry_t dst_entry)
	2639	{
	2640	vm_object_t src_object;
	2641
	2642	if (dst_entry->maptype == VM_MAPTYPE_SUBMAP)
	2643	return;
	2644	if (src_entry->maptype == VM_MAPTYPE_SUBMAP)
	2645	return;
	2646
	2647	if (src_entry->wired_count == 0) {
	2648	/*
	2649	* If the source entry is marked needs_copy, it is already
	2650	* write-protected.
	2651	*/
	2652	if ((src_entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) {
	2653	pmap_protect(src_map->pmap,
	2654	src_entry->start,
	2655	src_entry->end,
	2656	src_entry->protection & ~VM_PROT_WRITE);
	2657	}
	2658
	2659	/*
	2660	* Make a copy of the object.
	2661	*/
	2662	if ((src_object = src_entry->object.vm_object) != NULL) {
	2663	if ((src_object->handle == NULL) &&
	2664	(src_object->type == OBJT_DEFAULT \|\|
	2665	src_object->type == OBJT_SWAP)) {
	2666	vm_object_collapse(src_object);
	2667	if ((src_object->flags & (OBJ_NOSPLIT\|OBJ_ONEMAPPING)) == OBJ_ONEMAPPING) {
	2668	vm_map_split(src_entry);
	2669	src_object = src_entry->object.vm_object;
	2670	}
	2671	}
	2672
	2673	vm_object_reference(src_object);
	2674	vm_object_clear_flag(src_object, OBJ_ONEMAPPING);
	2675	dst_entry->object.vm_object = src_object;
	2676	src_entry->eflags \|= (MAP_ENTRY_COW\|MAP_ENTRY_NEEDS_COPY);
	2677	dst_entry->eflags \|= (MAP_ENTRY_COW\|MAP_ENTRY_NEEDS_COPY);
	2678	dst_entry->offset = src_entry->offset;
	2679	} else {
	2680	dst_entry->object.vm_object = NULL;
	2681	dst_entry->offset = 0;
	2682	}
	2683
	2684	pmap_copy(dst_map->pmap, src_map->pmap, dst_entry->start,
	2685	dst_entry->end - dst_entry->start, src_entry->start);
	2686	} else {
	2687	/*
	2688	* Of course, wired down pages can't be set copy-on-write.
	2689	* Cause wired pages to be copied into the new map by
	2690	* simulating faults (the new pages are pageable)
	2691	*/
	2692	vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry);
	2693	}
	2694	}
	2695
	2696	/*
	2697	* vmspace_fork:
	2698	* Create a new process vmspace structure and vm_map
	2699	* based on those of an existing process. The new map
	2700	* is based on the old map, according to the inheritance
	2701	* values on the regions in that map.
	2702	*
	2703	* The source map must not be locked.
	2704	*/
	2705	struct vmspace *
	2706	vmspace_fork(struct vmspace *vm1)
	2707	{
	2708	struct vmspace *vm2;
	2709	vm_map_t old_map = &vm1->vm_map;
	2710	vm_map_t new_map;
	2711	vm_map_entry_t old_entry;
	2712	vm_map_entry_t new_entry;
	2713	vm_object_t object;
	2714	int count;
	2715
	2716	vm_map_lock(old_map);
	2717	old_map->infork = 1;
	2718
	2719	/*
	2720	* XXX Note: upcalls are not copied.
	2721	*/
	2722	vm2 = vmspace_alloc(old_map->min_offset, old_map->max_offset);
	2723	bcopy(&vm1->vm_startcopy, &vm2->vm_startcopy,
	2724	(caddr_t)&vm1->vm_endcopy - (caddr_t)&vm1->vm_startcopy);
	2725	new_map = &vm2->vm_map; /* XXX */
	2726	new_map->timestamp = 1;
	2727
	2728	count = 0;
	2729	old_entry = old_map->header.next;
	2730	while (old_entry != &old_map->header) {
	2731	++count;
	2732	old_entry = old_entry->next;
	2733	}
	2734
	2735	count = vm_map_entry_reserve(count + MAP_RESERVE_COUNT);
	2736
	2737	old_entry = old_map->header.next;
	2738	while (old_entry != &old_map->header) {
	2739	if (old_entry->maptype == VM_MAPTYPE_SUBMAP)
	2740	panic("vm_map_fork: encountered a submap");
	2741
	2742	switch (old_entry->inheritance) {
	2743	case VM_INHERIT_NONE:
	2744	break;
	2745
	2746	case VM_INHERIT_SHARE:
	2747	/*
	2748	* Clone the entry, creating the shared object if
	2749	* necessary.
	2750	*/
	2751	object = old_entry->object.vm_object;
	2752	if (object == NULL) {
	2753	object = vm_object_allocate(OBJT_DEFAULT,
	2754	atop(old_entry->end - old_entry->start));
	2755	old_entry->object.vm_object = object;
	2756	old_entry->offset = (vm_offset_t) 0;
	2757	}
	2758
	2759	/*
	2760	* Add the reference before calling vm_object_shadow
	2761	* to insure that a shadow object is created.
	2762	*/
	2763	vm_object_reference(object);
	2764	if (old_entry->eflags & MAP_ENTRY_NEEDS_COPY) {
	2765	vm_object_shadow(&old_entry->object.vm_object,
	2766	&old_entry->offset,
	2767	atop(old_entry->end - old_entry->start));
	2768	old_entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
	2769	/* Transfer the second reference too. */
	2770	vm_object_reference(
	2771	old_entry->object.vm_object);
	2772	vm_object_deallocate(object);
	2773	object = old_entry->object.vm_object;
	2774	}
	2775	vm_object_clear_flag(object, OBJ_ONEMAPPING);
	2776
	2777	/*
	2778	* Clone the entry, referencing the shared object.
	2779	*/
	2780	new_entry = vm_map_entry_create(new_map, &count);
	2781	new_entry = old_entry;
	2782	new_entry->eflags &= ~MAP_ENTRY_USER_WIRED;
	2783	new_entry->wired_count = 0;
	2784
	2785	/*
	2786	* Insert the entry into the new map -- we know we're
	2787	* inserting at the end of the new map.
	2788	*/
	2789
	2790	vm_map_entry_link(new_map, new_map->header.prev,
	2791	new_entry);
	2792
	2793	/*
	2794	* Update the physical map
	2795	*/
	2796
	2797	pmap_copy(new_map->pmap, old_map->pmap,
	2798	new_entry->start,
	2799	(old_entry->end - old_entry->start),
	2800	old_entry->start);
	2801	break;
	2802
	2803	case VM_INHERIT_COPY:
	2804	/*
	2805	* Clone the entry and link into the map.
	2806	*/
	2807	new_entry = vm_map_entry_create(new_map, &count);
	2808	new_entry = old_entry;
	2809	new_entry->eflags &= ~MAP_ENTRY_USER_WIRED;
	2810	new_entry->wired_count = 0;
	2811	new_entry->object.vm_object = NULL;
	2812	vm_map_entry_link(new_map, new_map->header.prev,
	2813	new_entry);
	2814	vm_map_copy_entry(old_map, new_map, old_entry,
	2815	new_entry);
	2816	break;
	2817	}
	2818	old_entry = old_entry->next;
	2819	}
	2820
	2821	new_map->size = old_map->size;
	2822	old_map->infork = 0;
	2823	vm_map_unlock(old_map);
	2824	vm_map_entry_release(count);
	2825
	2826	return (vm2);
	2827	}
	2828
	2829	int
	2830	vm_map_stack (vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize,
	2831	vm_prot_t prot, vm_prot_t max, int cow)
	2832	{
	2833	vm_map_entry_t prev_entry;
	2834	vm_map_entry_t new_stack_entry;
	2835	vm_size_t init_ssize;
	2836	int rv;
	2837	int count;
	2838
	2839	if (VM_MIN_ADDRESS > 0 && addrbos < VM_MIN_ADDRESS)
	2840	return (KERN_NO_SPACE);
	2841
	2842	if (max_ssize < sgrowsiz)
	2843	init_ssize = max_ssize;
	2844	else
	2845	init_ssize = sgrowsiz;
	2846
	2847	count = vm_map_entry_reserve(MAP_RESERVE_COUNT);
	2848	vm_map_lock(map);
	2849
	2850	/* If addr is already mapped, no go */
	2851	if (vm_map_lookup_entry(map, addrbos, &prev_entry)) {
	2852	vm_map_unlock(map);
	2853	vm_map_entry_release(count);
	2854	return (KERN_NO_SPACE);
	2855	}
	2856
	2857	/* If we would blow our VMEM resource limit, no go */
	2858	if (map->size + init_ssize >
	2859	curproc->p_rlimit[RLIMIT_VMEM].rlim_cur) {
	2860	vm_map_unlock(map);
	2861	vm_map_entry_release(count);
	2862	return (KERN_NO_SPACE);
	2863	}
	2864
	2865	/* If we can't accomodate max_ssize in the current mapping,
	2866	* no go. However, we need to be aware that subsequent user
	2867	* mappings might map into the space we have reserved for
	2868	* stack, and currently this space is not protected.
	2869	*
	2870	* Hopefully we will at least detect this condition
	2871	* when we try to grow the stack.
	2872	*/
	2873	if ((prev_entry->next != &map->header) &&
	2874	(prev_entry->next->start < addrbos + max_ssize)) {
	2875	vm_map_unlock(map);
	2876	vm_map_entry_release(count);
	2877	return (KERN_NO_SPACE);
	2878	}
	2879
	2880	/* We initially map a stack of only init_ssize. We will
	2881	* grow as needed later. Since this is to be a grow
	2882	* down stack, we map at the top of the range.
	2883	*
	2884	* Note: we would normally expect prot and max to be
	2885	* VM_PROT_ALL, and cow to be 0. Possibly we should
	2886	* eliminate these as input parameters, and just
	2887	* pass these values here in the insert call.
	2888	*/
	2889	rv = vm_map_insert(map, &count,
	2890	NULL, 0, addrbos + max_ssize - init_ssize,
	2891	addrbos + max_ssize,
	2892	VM_MAPTYPE_NORMAL,
	2893	prot, max,
	2894	cow);
	2895
	2896	/* Now set the avail_ssize amount */
	2897	if (rv == KERN_SUCCESS) {
	2898	if (prev_entry != &map->header)
	2899	vm_map_clip_end(map, prev_entry, addrbos + max_ssize - init_ssize, &count);
	2900	new_stack_entry = prev_entry->next;
	2901	if (new_stack_entry->end != addrbos + max_ssize \|\|
	2902	new_stack_entry->start != addrbos + max_ssize - init_ssize)
	2903	panic ("Bad entry start/end for new stack entry");
	2904	else
	2905	new_stack_entry->avail_ssize = max_ssize - init_ssize;
	2906	}
	2907
	2908	vm_map_unlock(map);
	2909	vm_map_entry_release(count);
	2910	return (rv);
	2911	}
	2912
	2913	/* Attempts to grow a vm stack entry. Returns KERN_SUCCESS if the
	2914	* desired address is already mapped, or if we successfully grow
	2915	* the stack. Also returns KERN_SUCCESS if addr is outside the
	2916	* stack range (this is strange, but preserves compatibility with
	2917	* the grow function in vm_machdep.c).
	2918	*/
	2919	int
	2920	vm_map_growstack (struct proc *p, vm_offset_t addr)
	2921	{
	2922	vm_map_entry_t prev_entry;
	2923	vm_map_entry_t stack_entry;
	2924	vm_map_entry_t new_stack_entry;
	2925	struct vmspace *vm = p->p_vmspace;
	2926	vm_map_t map = &vm->vm_map;
	2927	vm_offset_t end;
	2928	int grow_amount;
	2929	int rv = KERN_SUCCESS;
	2930	int is_procstack;
	2931	int use_read_lock = 1;
	2932	int count;
	2933
	2934	count = vm_map_entry_reserve(MAP_RESERVE_COUNT);
	2935	Retry:
	2936	if (use_read_lock)
	2937	vm_map_lock_read(map);
	2938	else
	2939	vm_map_lock(map);
	2940
	2941	/* If addr is already in the entry range, no need to grow.*/
	2942	if (vm_map_lookup_entry(map, addr, &prev_entry))
	2943	goto done;
	2944
	2945	if ((stack_entry = prev_entry->next) == &map->header)
	2946	goto done;
	2947	if (prev_entry == &map->header)
	2948	end = stack_entry->start - stack_entry->avail_ssize;
	2949	else
	2950	end = prev_entry->end;
	2951
	2952	/* This next test mimics the old grow function in vm_machdep.c.
	2953	* It really doesn't quite make sense, but we do it anyway
	2954	* for compatibility.
	2955	*
	2956	* If not growable stack, return success. This signals the
	2957	* caller to proceed as he would normally with normal vm.
	2958	*/
	2959	if (stack_entry->avail_ssize < 1 \|\|
	2960	addr >= stack_entry->start \|\|
	2961	addr < stack_entry->start - stack_entry->avail_ssize) {
	2962	goto done;
	2963	}
	2964
	2965	/* Find the minimum grow amount */
	2966	grow_amount = roundup (stack_entry->start - addr, PAGE_SIZE);
	2967	if (grow_amount > stack_entry->avail_ssize) {
	2968	rv = KERN_NO_SPACE;
	2969	goto done;
	2970	}
	2971
	2972	/* If there is no longer enough space between the entries
	2973	* nogo, and adjust the available space. Note: this
	2974	* should only happen if the user has mapped into the
	2975	* stack area after the stack was created, and is
	2976	* probably an error.
	2977	*
	2978	* This also effectively destroys any guard page the user
	2979	* might have intended by limiting the stack size.
	2980	*/
	2981	if (grow_amount > stack_entry->start - end) {
	2982	if (use_read_lock && vm_map_lock_upgrade(map)) {
	2983	use_read_lock = 0;
	2984	goto Retry;
	2985	}
	2986	use_read_lock = 0;
	2987	stack_entry->avail_ssize = stack_entry->start - end;
	2988	rv = KERN_NO_SPACE;
	2989	goto done;
	2990	}
	2991
	2992	is_procstack = addr >= (vm_offset_t)vm->vm_maxsaddr;
	2993
	2994	/* If this is the main process stack, see if we're over the
	2995	* stack limit.
	2996	*/
	2997	if (is_procstack && (ctob(vm->vm_ssize) + grow_amount >
	2998	p->p_rlimit[RLIMIT_STACK].rlim_cur)) {
	2999	rv = KERN_NO_SPACE;
	3000	goto done;
	3001	}
	3002
	3003	/* Round up the grow amount modulo SGROWSIZ */
	3004	grow_amount = roundup (grow_amount, sgrowsiz);
	3005	if (grow_amount > stack_entry->avail_ssize) {
	3006	grow_amount = stack_entry->avail_ssize;
	3007	}
	3008	if (is_procstack && (ctob(vm->vm_ssize) + grow_amount >
	3009	p->p_rlimit[RLIMIT_STACK].rlim_cur)) {
	3010	grow_amount = p->p_rlimit[RLIMIT_STACK].rlim_cur -
	3011	ctob(vm->vm_ssize);
	3012	}
	3013
	3014	/* If we would blow our VMEM resource limit, no go */
	3015	if (map->size + grow_amount > p->p_rlimit[RLIMIT_VMEM].rlim_cur) {
	3016	rv = KERN_NO_SPACE;
	3017	goto done;
	3018	}
	3019
	3020	if (use_read_lock && vm_map_lock_upgrade(map)) {
	3021	use_read_lock = 0;
	3022	goto Retry;
	3023	}
	3024	use_read_lock = 0;
	3025
	3026	/* Get the preliminary new entry start value */
	3027	addr = stack_entry->start - grow_amount;
	3028
	3029	/* If this puts us into the previous entry, cut back our growth
	3030	* to the available space. Also, see the note above.
	3031	*/
	3032	if (addr < end) {
	3033	stack_entry->avail_ssize = stack_entry->start - end;
	3034	addr = end;
	3035	}
	3036
	3037	rv = vm_map_insert(map, &count,
	3038	NULL, 0, addr, stack_entry->start,
	3039	VM_MAPTYPE_NORMAL,
	3040	VM_PROT_ALL, VM_PROT_ALL,
	3041	0);
	3042
	3043	/* Adjust the available stack space by the amount we grew. */
	3044	if (rv == KERN_SUCCESS) {
	3045	if (prev_entry != &map->header)
	3046	vm_map_clip_end(map, prev_entry, addr, &count);
	3047	new_stack_entry = prev_entry->next;
	3048	if (new_stack_entry->end != stack_entry->start \|\|
	3049	new_stack_entry->start != addr)
	3050	panic ("Bad stack grow start/end in new stack entry");
	3051	else {
	3052	new_stack_entry->avail_ssize = stack_entry->avail_ssize -
	3053	(new_stack_entry->end -
	3054	new_stack_entry->start);
	3055	if (is_procstack)
	3056	vm->vm_ssize += btoc(new_stack_entry->end -
	3057	new_stack_entry->start);
	3058	}
	3059	}
	3060
	3061	done:
	3062	if (use_read_lock)
	3063	vm_map_unlock_read(map);
	3064	else
	3065	vm_map_unlock(map);
	3066	vm_map_entry_release(count);
	3067	return (rv);
	3068	}
	3069
	3070	/*
	3071	* Unshare the specified VM space for exec. If other processes are
	3072	* mapped to it, then create a new one. The new vmspace is null.
	3073	*/
	3074
	3075	void
	3076	vmspace_exec(struct proc p, struct vmspace vmcopy)
	3077	{
	3078	struct vmspace *oldvmspace = p->p_vmspace;
	3079	struct vmspace *newvmspace;
	3080	vm_map_t map = &p->p_vmspace->vm_map;
	3081
	3082	/*
	3083	* If we are execing a resident vmspace we fork it, otherwise
	3084	* we create a new vmspace. Note that exitingcnt and upcalls
	3085	* are not copied to the new vmspace.
	3086	*/
	3087	if (vmcopy) {
	3088	newvmspace = vmspace_fork(vmcopy);
	3089	} else {
	3090	newvmspace = vmspace_alloc(map->min_offset, map->max_offset);
	3091	bcopy(&oldvmspace->vm_startcopy, &newvmspace->vm_startcopy,
	3092	(caddr_t)&oldvmspace->vm_endcopy -
	3093	(caddr_t)&oldvmspace->vm_startcopy);
	3094	}
	3095
	3096	/*
	3097	* This code is written like this for prototype purposes. The
	3098	* goal is to avoid running down the vmspace here, but let the
	3099	* other process's that are still using the vmspace to finally
	3100	* run it down. Even though there is little or no chance of blocking
	3101	* here, it is a good idea to keep this form for future mods.
	3102	*/
	3103	p->p_vmspace = newvmspace;
	3104	pmap_pinit2(vmspace_pmap(newvmspace));
	3105	if (p == curproc)
	3106	pmap_activate(p);
	3107	vmspace_free(oldvmspace);
	3108	}
	3109
	3110	/*
	3111	* Unshare the specified VM space for forcing COW. This
	3112	* is called by rfork, for the (RFMEM\|RFPROC) == 0 case.
	3113	*
	3114	* The exitingcnt test is not strictly necessary but has been
	3115	* included for code sanity (to make the code a bit more deterministic).
	3116	*/
	3117
	3118	void
	3119	vmspace_unshare(struct proc *p)
	3120	{
	3121	struct vmspace *oldvmspace = p->p_vmspace;
	3122	struct vmspace *newvmspace;
	3123
	3124	if (oldvmspace->vm_refcnt == 1 && oldvmspace->vm_exitingcnt == 0)
	3125	return;
	3126	newvmspace = vmspace_fork(oldvmspace);
	3127	p->p_vmspace = newvmspace;
	3128	pmap_pinit2(vmspace_pmap(newvmspace));
	3129	if (p == curproc)
	3130	pmap_activate(p);
	3131	vmspace_free(oldvmspace);
	3132	}
	3133
	3134	/*
	3135	* vm_map_lookup:
	3136	*
	3137	* Finds the VM object, offset, and
	3138	* protection for a given virtual address in the
	3139	* specified map, assuming a page fault of the
	3140	* type specified.
	3141	*
	3142	* Leaves the map in question locked for read; return
	3143	* values are guaranteed until a vm_map_lookup_done
	3144	* call is performed. Note that the map argument
	3145	* is in/out; the returned map must be used in
	3146	* the call to vm_map_lookup_done.
	3147	*
	3148	* A handle (out_entry) is returned for use in
	3149	* vm_map_lookup_done, to make that fast.
	3150	*
	3151	* If a lookup is requested with "write protection"
	3152	* specified, the map may be changed to perform virtual
	3153	* copying operations, although the data referenced will
	3154	* remain the same.
	3155	*/
	3156	int
	3157	vm_map_lookup(vm_map_t var_map, / IN/OUT */
	3158	vm_offset_t vaddr,
	3159	vm_prot_t fault_typea,
	3160	vm_map_entry_t out_entry, / OUT */
	3161	vm_object_t object, / OUT */
	3162	vm_pindex_t pindex, / OUT */
	3163	vm_prot_t out_prot, / OUT */
	3164	boolean_t wired) / OUT */
	3165	{
	3166	vm_map_entry_t entry;
	3167	vm_map_t map = *var_map;
	3168	vm_prot_t prot;
	3169	vm_prot_t fault_type = fault_typea;
	3170	int use_read_lock = 1;
	3171	int rv = KERN_SUCCESS;
	3172
	3173	RetryLookup:
	3174	if (use_read_lock)
	3175	vm_map_lock_read(map);
	3176	else
	3177	vm_map_lock(map);
	3178
	3179	/*
	3180	* If the map has an interesting hint, try it before calling full
	3181	* blown lookup routine.
	3182	*/
	3183	entry = map->hint;
	3184	*out_entry = entry;
	3185
	3186	if ((entry == &map->header) \|\|
	3187	(vaddr < entry->start) \|\| (vaddr >= entry->end)) {
	3188	vm_map_entry_t tmp_entry;
	3189
	3190	/*
	3191	* Entry was either not a valid hint, or the vaddr was not
	3192	* contained in the entry, so do a full lookup.
	3193	*/
	3194	if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
	3195	rv = KERN_INVALID_ADDRESS;
	3196	goto done;
	3197	}
	3198
	3199	entry = tmp_entry;
	3200	*out_entry = entry;
	3201	}
	3202
	3203	/*
	3204	* Handle submaps.
	3205	*/
	3206	if (entry->maptype == VM_MAPTYPE_SUBMAP) {
	3207	vm_map_t old_map = map;
	3208
	3209	*var_map = map = entry->object.sub_map;
	3210	if (use_read_lock)
	3211	vm_map_unlock_read(old_map);
	3212	else
	3213	vm_map_unlock(old_map);
	3214	use_read_lock = 1;
	3215	goto RetryLookup;
	3216	}
	3217
	3218	/*
	3219	* Check whether this task is allowed to have this page.
	3220	* Note the special case for MAP_ENTRY_COW
	3221	* pages with an override. This is to implement a forced
	3222	* COW for debuggers.
	3223	*/
	3224
	3225	if (fault_type & VM_PROT_OVERRIDE_WRITE)
	3226	prot = entry->max_protection;
	3227	else
	3228	prot = entry->protection;
	3229
	3230	fault_type &= (VM_PROT_READ\|VM_PROT_WRITE\|VM_PROT_EXECUTE);
	3231	if ((fault_type & prot) != fault_type) {
	3232	rv = KERN_PROTECTION_FAILURE;
	3233	goto done;
	3234	}
	3235
	3236	if ((entry->eflags & MAP_ENTRY_USER_WIRED) &&
	3237	(entry->eflags & MAP_ENTRY_COW) &&
	3238	(fault_type & VM_PROT_WRITE) &&
	3239	(fault_typea & VM_PROT_OVERRIDE_WRITE) == 0) {
	3240	rv = KERN_PROTECTION_FAILURE;
	3241	goto done;
	3242	}
	3243
	3244	/*
	3245	* If this page is not pageable, we have to get it for all possible
	3246	* accesses.
	3247	*/
	3248
	3249	*wired = (entry->wired_count != 0);
	3250	if (*wired)
	3251	prot = fault_type = entry->protection;
	3252
	3253	/*
	3254	* If the entry was copy-on-write, we either ...
	3255	*/
	3256
	3257	if (entry->eflags & MAP_ENTRY_NEEDS_COPY) {
	3258	/*
	3259	* If we want to write the page, we may as well handle that
	3260	* now since we've got the map locked.
	3261	*
	3262	* If we don't need to write the page, we just demote the
	3263	* permissions allowed.
	3264	*/
	3265
	3266	if (fault_type & VM_PROT_WRITE) {
	3267	/*
	3268	* Make a new object, and place it in the object
	3269	* chain. Note that no new references have appeared
	3270	* -- one just moved from the map to the new
	3271	* object.
	3272	*/
	3273
	3274	if (use_read_lock && vm_map_lock_upgrade(map)) {
	3275	use_read_lock = 0;
	3276	goto RetryLookup;
	3277	}
	3278	use_read_lock = 0;
	3279
	3280	vm_object_shadow(
	3281	&entry->object.vm_object,
	3282	&entry->offset,
	3283	atop(entry->end - entry->start));
	3284
	3285	entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
	3286	} else {
	3287	/*
	3288	* We're attempting to read a copy-on-write page --
	3289	* don't allow writes.
	3290	*/
	3291
	3292	prot &= ~VM_PROT_WRITE;
	3293	}
	3294	}
	3295
	3296	/*
	3297	* Create an object if necessary.
	3298	*/
	3299	if (entry->object.vm_object == NULL &&
	3300	!map->system_map) {
	3301	if (use_read_lock && vm_map_lock_upgrade(map)) {
	3302	use_read_lock = 0;
	3303	goto RetryLookup;
	3304	}
	3305	use_read_lock = 0;
	3306	entry->object.vm_object = vm_object_allocate(OBJT_DEFAULT,
	3307	atop(entry->end - entry->start));
	3308	entry->offset = 0;
	3309	}
	3310
	3311	/*
	3312	* Return the object/offset from this entry. If the entry was
	3313	* copy-on-write or empty, it has been fixed up.
	3314	*/
	3315
	3316	*pindex = OFF_TO_IDX((vaddr - entry->start) + entry->offset);
	3317	*object = entry->object.vm_object;
	3318
	3319	/*
	3320	* Return whether this is the only map sharing this data. On
	3321	* success we return with a read lock held on the map. On failure
	3322	* we return with the map unlocked.
	3323	*/
	3324	*out_prot = prot;
	3325	done:
	3326	if (rv == KERN_SUCCESS) {
	3327	if (use_read_lock == 0)
	3328	vm_map_lock_downgrade(map);
	3329	} else if (use_read_lock) {
	3330	vm_map_unlock_read(map);
	3331	} else {
	3332	vm_map_unlock(map);
	3333	}
	3334	return (rv);
	3335	}
	3336
	3337	/*
	3338	* vm_map_lookup_done:
	3339	*
	3340	* Releases locks acquired by a vm_map_lookup
	3341	* (according to the handle returned by that lookup).
	3342	*/
	3343
	3344	void
	3345	vm_map_lookup_done(vm_map_t map, vm_map_entry_t entry, int count)
	3346	{
	3347	/*
	3348	* Unlock the main-level map
	3349	*/
	3350	vm_map_unlock_read(map);
	3351	if (count)
	3352	vm_map_entry_release(count);
	3353	}
	3354
	3355	#include "opt_ddb.h"
	3356	#ifdef DDB
	3357	#include <sys/kernel.h>
	3358
	3359	#include <ddb/ddb.h>
	3360
	3361	/*
	3362	* vm_map_print: [ debug ]
	3363	*/
	3364	DB_SHOW_COMMAND(map, vm_map_print)
	3365	{
	3366	static int nlines;
	3367	/* XXX convert args. */
	3368	vm_map_t map = (vm_map_t)addr;
	3369	boolean_t full = have_addr;
	3370
	3371	vm_map_entry_t entry;
	3372
	3373	db_iprintf("Task map %p: pmap=%p, nentries=%d, version=%u\n",
	3374	(void *)map,
	3375	(void *)map->pmap, map->nentries, map->timestamp);
	3376	nlines++;
	3377
	3378	if (!full && db_indent)
	3379	return;
	3380
	3381	db_indent += 2;
	3382	for (entry = map->header.next; entry != &map->header;
	3383	entry = entry->next) {
	3384	db_iprintf("map entry %p: start=%p, end=%p\n",
	3385	(void )entry, (void )entry->start, (void *)entry->end);
	3386	nlines++;
	3387	{
	3388	static char *inheritance_name[4] =
	3389	{"share", "copy", "none", "donate_copy"};
	3390
	3391	db_iprintf(" prot=%x/%x/%s",
	3392	entry->protection,
	3393	entry->max_protection,
	3394	inheritance_name[(int)(unsigned char)entry->inheritance]);
	3395	if (entry->wired_count != 0)
	3396	db_printf(", wired");
	3397	}
	3398	if (entry->maptype == VM_MAPTYPE_SUBMAP) {
	3399	/* XXX no %qd in kernel. Truncate entry->offset. */
	3400	db_printf(", share=%p, offset=0x%lx\n",
	3401	(void *)entry->object.sub_map,
	3402	(long)entry->offset);
	3403	nlines++;
	3404	if ((entry->prev == &map->header) \|\|
	3405	(entry->prev->object.sub_map !=
	3406	entry->object.sub_map)) {
	3407	db_indent += 2;
	3408	vm_map_print((db_expr_t)(intptr_t)
	3409	entry->object.sub_map,
	3410	full, 0, (char *)0);
	3411	db_indent -= 2;
	3412	}
	3413	} else {
	3414	/* XXX no %qd in kernel. Truncate entry->offset. */
	3415	db_printf(", object=%p, offset=0x%lx",
	3416	(void *)entry->object.vm_object,
	3417	(long)entry->offset);
	3418	if (entry->eflags & MAP_ENTRY_COW)
	3419	db_printf(", copy (%s)",
	3420	(entry->eflags & MAP_ENTRY_NEEDS_COPY) ? "needed" : "done");
	3421	db_printf("\n");
	3422	nlines++;
	3423
	3424	if ((entry->prev == &map->header) \|\|
	3425	(entry->prev->object.vm_object !=
	3426	entry->object.vm_object)) {
	3427	db_indent += 2;
	3428	vm_object_print((db_expr_t)(intptr_t)
	3429	entry->object.vm_object,
	3430	full, 0, (char *)0);
	3431	nlines += 4;
	3432	db_indent -= 2;
	3433	}
	3434	}
	3435	}
	3436	db_indent -= 2;
	3437	if (db_indent == 0)
	3438	nlines = 0;
	3439	}
	3440
	3441
	3442	DB_SHOW_COMMAND(procvm, procvm)
	3443	{
	3444	struct proc *p;
	3445
	3446	if (have_addr) {
	3447	p = (struct proc *) addr;
	3448	} else {
	3449	p = curproc;
	3450	}
	3451
	3452	db_printf("p = %p, vmspace = %p, map = %p, pmap = %p\n",
	3453	(void )p, (void )p->p_vmspace, (void *)&p->p_vmspace->vm_map,
	3454	(void *)vmspace_pmap(p->p_vmspace));
	3455
	3456	vm_map_print((db_expr_t)(intptr_t)&p->p_vmspace->vm_map, 1, 0, NULL);
	3457	}
	3458
	3459	#endif /* DDB */