gitweb.dragonflybsd.org Git - dragonfly.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 1991, 1993
	3	* The Regents of the University of California. All rights reserved.
	4	*
	5	* This code is derived from software contributed to Berkeley by
	6	* The Mach Operating System project at Carnegie-Mellon University.
	7	*
	8	* Redistribution and use in source and binary forms, with or without
	9	* modification, are permitted provided that the following conditions
	10	* are met:
	11	* 1. Redistributions of source code must retain the above copyright
	12	* notice, this list of conditions and the following disclaimer.
	13	* 2. Redistributions in binary form must reproduce the above copyright
	14	* notice, this list of conditions and the following disclaimer in the
	15	* documentation and/or other materials provided with the distribution.
	16	* 3. All advertising materials mentioning features or use of this software
	17	* must display the following acknowledgement:
	18	* This product includes software developed by the University of
	19	* California, Berkeley and its contributors.
	20	* 4. Neither the name of the University nor the names of its contributors
	21	* may be used to endorse or promote products derived from this software
	22	* without specific prior written permission.
	23	*
	24	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	25	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	26	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	27	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	28	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	29	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	30	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	31	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	32	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	33	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	34	* SUCH DAMAGE.
	35	*
	36	* from: @(#)vm_object.c 8.5 (Berkeley) 3/22/94
	37	*
	38	*
	39	* Copyright (c) 1987, 1990 Carnegie-Mellon University.
	40	* All rights reserved.
	41	*
	42	* Authors: Avadis Tevanian, Jr., Michael Wayne Young
	43	*
	44	* Permission to use, copy, modify and distribute this software and
	45	* its documentation is hereby granted, provided that both the copyright
	46	* notice and this permission notice appear in all copies of the
	47	* software, derivative works or modified versions, and any portions
	48	* thereof, and that both notices appear in supporting documentation.
	49	*
	50	* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
	51	* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
	52	* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
	53	*
	54	* Carnegie Mellon requests users of this software to return to
	55	*
	56	* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
	57	* School of Computer Science
	58	* Carnegie Mellon University
	59	* Pittsburgh PA 15213-3890
	60	*
	61	* any improvements or extensions that they make and grant Carnegie the
	62	* rights to redistribute these changes.
	63	*
	64	* $FreeBSD: src/sys/vm/vm_object.c,v 1.171.2.8 2003/05/26 19:17:56 alc Exp $
	65	* $DragonFly: src/sys/vm/vm_object.c,v 1.3 2003/06/25 03:56:13 dillon Exp $
	66	*/
	67
	68	/*
	69	* Virtual memory object module.
	70	*/
	71
	72	#include <sys/param.h>
	73	#include <sys/systm.h>
	74	#include <sys/proc.h> /* for curproc, pageproc */
	75	#include <sys/vnode.h>
	76	#include <sys/vmmeter.h>
	77	#include <sys/mman.h>
	78	#include <sys/mount.h>
	79	#include <sys/kernel.h>
	80	#include <sys/sysctl.h>
	81
	82	#include <vm/vm.h>
	83	#include <vm/vm_param.h>
	84	#include <vm/pmap.h>
	85	#include <vm/vm_map.h>
	86	#include <vm/vm_object.h>
	87	#include <vm/vm_page.h>
	88	#include <vm/vm_pageout.h>
	89	#include <vm/vm_pager.h>
	90	#include <vm/swap_pager.h>
	91	#include <vm/vm_kern.h>
	92	#include <vm/vm_extern.h>
	93	#include <vm/vm_zone.h>
	94
	95	#define EASY_SCAN_FACTOR 8
	96
	97	#define MSYNC_FLUSH_HARDSEQ 0x01
	98	#define MSYNC_FLUSH_SOFTSEQ 0x02
	99
	100	static int msync_flush_flags = MSYNC_FLUSH_HARDSEQ \| MSYNC_FLUSH_SOFTSEQ;
	101	SYSCTL_INT(_vm, OID_AUTO, msync_flush_flags,
	102	CTLFLAG_RW, &msync_flush_flags, 0, "");
	103
	104	static void vm_object_qcollapse (vm_object_t object);
	105	static int vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int curgeneration, int pagerflags);
	106
	107	/*
	108	* Virtual memory objects maintain the actual data
	109	* associated with allocated virtual memory. A given
	110	* page of memory exists within exactly one object.
	111	*
	112	* An object is only deallocated when all "references"
	113	* are given up. Only one "reference" to a given
	114	* region of an object should be writeable.
	115	*
	116	* Associated with each object is a list of all resident
	117	* memory pages belonging to that object; this list is
	118	* maintained by the "vm_page" module, and locked by the object's
	119	* lock.
	120	*
	121	* Each object also records a "pager" routine which is
	122	* used to retrieve (and store) pages to the proper backing
	123	* storage. In addition, objects may be backed by other
	124	* objects from which they were virtual-copied.
	125	*
	126	* The only items within the object structure which are
	127	* modified after time of creation are:
	128	* reference count locked by object's lock
	129	* pager routine locked by object's lock
	130	*
	131	*/
	132
	133	struct object_q vm_object_list;
	134	#ifndef NULL_SIMPLELOCKS
	135	static struct simplelock vm_object_list_lock;
	136	#endif
	137	static long vm_object_count; /* count of all objects */
	138	vm_object_t kernel_object;
	139	vm_object_t kmem_object;
	140	static struct vm_object kernel_object_store;
	141	static struct vm_object kmem_object_store;
	142	extern int vm_pageout_page_count;
	143
	144	static long object_collapses;
	145	static long object_bypasses;
	146	static int next_index;
	147	static vm_zone_t obj_zone;
	148	static struct vm_zone obj_zone_store;
	149	static int object_hash_rand;
	150	#define VM_OBJECTS_INIT 256
	151	static struct vm_object vm_objects_init[VM_OBJECTS_INIT];
	152
	153	void
	154	_vm_object_allocate(type, size, object)
	155	objtype_t type;
	156	vm_size_t size;
	157	vm_object_t object;
	158	{
	159	int incr;
	160	TAILQ_INIT(&object->memq);
	161	LIST_INIT(&object->shadow_head);
	162
	163	object->type = type;
	164	object->size = size;
	165	object->ref_count = 1;
	166	object->flags = 0;
	167	if ((object->type == OBJT_DEFAULT) \|\| (object->type == OBJT_SWAP))
	168	vm_object_set_flag(object, OBJ_ONEMAPPING);
	169	object->paging_in_progress = 0;
	170	object->resident_page_count = 0;
	171	object->shadow_count = 0;
	172	object->pg_color = next_index;
	173	if ( size > (PQ_L2_SIZE / 3 + PQ_PRIME1))
	174	incr = PQ_L2_SIZE / 3 + PQ_PRIME1;
	175	else
	176	incr = size;
	177	next_index = (next_index + incr) & PQ_L2_MASK;
	178	object->handle = NULL;
	179	object->backing_object = NULL;
	180	object->backing_object_offset = (vm_ooffset_t) 0;
	181	/*
	182	* Try to generate a number that will spread objects out in the
	183	* hash table. We 'wipe' new objects across the hash in 128 page
	184	* increments plus 1 more to offset it a little more by the time
	185	* it wraps around.
	186	*/
	187	object->hash_rand = object_hash_rand - 129;
	188
	189	object->generation++;
	190
	191	TAILQ_INSERT_TAIL(&vm_object_list, object, object_list);
	192	vm_object_count++;
	193	object_hash_rand = object->hash_rand;
	194	}
	195
	196	/*
	197	* vm_object_init:
	198	*
	199	* Initialize the VM objects module.
	200	*/
	201	void
	202	vm_object_init()
	203	{
	204	TAILQ_INIT(&vm_object_list);
	205	simple_lock_init(&vm_object_list_lock);
	206	vm_object_count = 0;
	207
	208	kernel_object = &kernel_object_store;
	209	_vm_object_allocate(OBJT_DEFAULT, OFF_TO_IDX(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS),
	210	kernel_object);
	211
	212	kmem_object = &kmem_object_store;
	213	_vm_object_allocate(OBJT_DEFAULT, OFF_TO_IDX(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS),
	214	kmem_object);
	215
	216	obj_zone = &obj_zone_store;
	217	zbootinit(obj_zone, "VM OBJECT", sizeof (struct vm_object),
	218	vm_objects_init, VM_OBJECTS_INIT);
	219	}
	220
	221	void
	222	vm_object_init2() {
	223	zinitna(obj_zone, NULL, NULL, 0, 0, 0, 1);
	224	}
	225
	226	/*
	227	* vm_object_allocate:
	228	*
	229	* Returns a new object with the given size.
	230	*/
	231
	232	vm_object_t
	233	vm_object_allocate(type, size)
	234	objtype_t type;
	235	vm_size_t size;
	236	{
	237	vm_object_t result;
	238
	239	result = (vm_object_t) zalloc(obj_zone);
	240
	241	_vm_object_allocate(type, size, result);
	242
	243	return (result);
	244	}
	245
	246
	247	/*
	248	* vm_object_reference:
	249	*
	250	* Gets another reference to the given object.
	251	*/
	252	void
	253	vm_object_reference(object)
	254	vm_object_t object;
	255	{
	256	if (object == NULL)
	257	return;
	258
	259	#if 0
	260	/* object can be re-referenced during final cleaning */
	261	KASSERT(!(object->flags & OBJ_DEAD),
	262	("vm_object_reference: attempting to reference dead obj"));
	263	#endif
	264
	265	object->ref_count++;
	266	if (object->type == OBJT_VNODE) {
	267	while (vget((struct vnode *) object->handle, LK_RETRY\|LK_NOOBJ, curthread)) {
	268	printf("vm_object_reference: delay in getting object\n");
	269	}
	270	}
	271	}
	272
	273	void
	274	vm_object_vndeallocate(object)
	275	vm_object_t object;
	276	{
	277	struct vnode vp = (struct vnode ) object->handle;
	278
	279	KASSERT(object->type == OBJT_VNODE,
	280	("vm_object_vndeallocate: not a vnode object"));
	281	KASSERT(vp != NULL, ("vm_object_vndeallocate: missing vp"));
	282	#ifdef INVARIANTS
	283	if (object->ref_count == 0) {
	284	vprint("vm_object_vndeallocate", vp);
	285	panic("vm_object_vndeallocate: bad object reference count");
	286	}
	287	#endif
	288
	289	object->ref_count--;
	290	if (object->ref_count == 0) {
	291	vp->v_flag &= ~VTEXT;
	292	vm_object_clear_flag(object, OBJ_OPT);
	293	}
	294	vrele(vp);
	295	}
	296
	297	/*
	298	* vm_object_deallocate:
	299	*
	300	* Release a reference to the specified object,
	301	* gained either through a vm_object_allocate
	302	* or a vm_object_reference call. When all references
	303	* are gone, storage associated with this object
	304	* may be relinquished.
	305	*
	306	* No object may be locked.
	307	*/
	308	void
	309	vm_object_deallocate(object)
	310	vm_object_t object;
	311	{
	312	vm_object_t temp;
	313
	314	while (object != NULL) {
	315
	316	if (object->type == OBJT_VNODE) {
	317	vm_object_vndeallocate(object);
	318	return;
	319	}
	320
	321	if (object->ref_count == 0) {
	322	panic("vm_object_deallocate: object deallocated too many times: %d", object->type);
	323	} else if (object->ref_count > 2) {
	324	object->ref_count--;
	325	return;
	326	}
	327
	328	/*
	329	* Here on ref_count of one or two, which are special cases for
	330	* objects.
	331	*/
	332	if ((object->ref_count == 2) && (object->shadow_count == 0)) {
	333	vm_object_set_flag(object, OBJ_ONEMAPPING);
	334	object->ref_count--;
	335	return;
	336	} else if ((object->ref_count == 2) && (object->shadow_count == 1)) {
	337	object->ref_count--;
	338	if ((object->handle == NULL) &&
	339	(object->type == OBJT_DEFAULT \|\|
	340	object->type == OBJT_SWAP)) {
	341	vm_object_t robject;
	342
	343	robject = LIST_FIRST(&object->shadow_head);
	344	KASSERT(robject != NULL,
	345	("vm_object_deallocate: ref_count: %d, shadow_count: %d",
	346	object->ref_count,
	347	object->shadow_count));
	348	if ((robject->handle == NULL) &&
	349	(robject->type == OBJT_DEFAULT \|\|
	350	robject->type == OBJT_SWAP)) {
	351
	352	robject->ref_count++;
	353
	354	while (
	355	robject->paging_in_progress \|\|
	356	object->paging_in_progress
	357	) {
	358	vm_object_pip_sleep(robject, "objde1");
	359	vm_object_pip_sleep(object, "objde2");
	360	}
	361
	362	if (robject->ref_count == 1) {
	363	robject->ref_count--;
	364	object = robject;
	365	goto doterm;
	366	}
	367
	368	object = robject;
	369	vm_object_collapse(object);
	370	continue;
	371	}
	372	}
	373
	374	return;
	375
	376	} else {
	377	object->ref_count--;
	378	if (object->ref_count != 0)
	379	return;
	380	}
	381
	382	doterm:
	383
	384	temp = object->backing_object;
	385	if (temp) {
	386	LIST_REMOVE(object, shadow_list);
	387	temp->shadow_count--;
	388	if (temp->ref_count == 0)
	389	vm_object_clear_flag(temp, OBJ_OPT);
	390	temp->generation++;
	391	object->backing_object = NULL;
	392	}
	393
	394	/*
	395	* Don't double-terminate, we could be in a termination
	396	* recursion due to the terminate having to sync data
	397	* to disk.
	398	*/
	399	if ((object->flags & OBJ_DEAD) == 0)
	400	vm_object_terminate(object);
	401	object = temp;
	402	}
	403	}
	404
	405	/*
	406	* vm_object_terminate actually destroys the specified object, freeing
	407	* up all previously used resources.
	408	*
	409	* The object must be locked.
	410	* This routine may block.
	411	*/
	412	void
	413	vm_object_terminate(object)
	414	vm_object_t object;
	415	{
	416	vm_page_t p;
	417	int s;
	418
	419	/*
	420	* Make sure no one uses us.
	421	*/
	422	vm_object_set_flag(object, OBJ_DEAD);
	423
	424	/*
	425	* wait for the pageout daemon to be done with the object
	426	*/
	427	vm_object_pip_wait(object, "objtrm");
	428
	429	KASSERT(!object->paging_in_progress,
	430	("vm_object_terminate: pageout in progress"));
	431
	432	/*
	433	* Clean and free the pages, as appropriate. All references to the
	434	* object are gone, so we don't need to lock it.
	435	*/
	436	if (object->type == OBJT_VNODE) {
	437	struct vnode *vp;
	438
	439	/*
	440	* Freeze optimized copies.
	441	*/
	442	vm_freeze_copyopts(object, 0, object->size);
	443
	444	/*
	445	* Clean pages and flush buffers.
	446	*/
	447	vm_object_page_clean(object, 0, 0, OBJPC_SYNC);
	448
	449	vp = (struct vnode *) object->handle;
	450	vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0);
	451	}
	452
	453	/*
	454	* Wait for any I/O to complete, after which there had better not
	455	* be any references left on the object.
	456	*/
	457	vm_object_pip_wait(object, "objtrm");
	458
	459	if (object->ref_count != 0)
	460	panic("vm_object_terminate: object with references, ref_count=%d", object->ref_count);
	461
	462	/*
	463	* Now free any remaining pages. For internal objects, this also
	464	* removes them from paging queues. Don't free wired pages, just
	465	* remove them from the object.
	466	*/
	467	s = splvm();
	468	while ((p = TAILQ_FIRST(&object->memq)) != NULL) {
	469	if (p->busy \|\| (p->flags & PG_BUSY))
	470	panic("vm_object_terminate: freeing busy page %p\n", p);
	471	if (p->wire_count == 0) {
	472	vm_page_busy(p);
	473	vm_page_free(p);
	474	cnt.v_pfree++;
	475	} else {
	476	vm_page_busy(p);
	477	vm_page_remove(p);
	478	}
	479	}
	480	splx(s);
	481
	482	/*
	483	* Let the pager know object is dead.
	484	*/
	485	vm_pager_deallocate(object);
	486
	487	/*
	488	* Remove the object from the global object list.
	489	*/
	490	simple_lock(&vm_object_list_lock);
	491	TAILQ_REMOVE(&vm_object_list, object, object_list);
	492	simple_unlock(&vm_object_list_lock);
	493
	494	wakeup(object);
	495
	496	/*
	497	* Free the space for the object.
	498	*/
	499	zfree(obj_zone, object);
	500	}
	501
	502	/*
	503	* vm_object_page_clean
	504	*
	505	* Clean all dirty pages in the specified range of object. Leaves page
	506	* on whatever queue it is currently on. If NOSYNC is set then do not
	507	* write out pages with PG_NOSYNC set (originally comes from MAP_NOSYNC),
	508	* leaving the object dirty.
	509	*
	510	* When stuffing pages asynchronously, allow clustering. XXX we need a
	511	* synchronous clustering mode implementation.
	512	*
	513	* Odd semantics: if start == end, we clean everything.
	514	*
	515	* The object must be locked.
	516	*/
	517
	518	void
	519	vm_object_page_clean(object, start, end, flags)
	520	vm_object_t object;
	521	vm_pindex_t start;
	522	vm_pindex_t end;
	523	int flags;
	524	{
	525	vm_page_t p, np;
	526	vm_offset_t tstart, tend;
	527	vm_pindex_t pi;
	528	struct vnode *vp;
	529	int clearobjflags;
	530	int pagerflags;
	531	int curgeneration;
	532
	533	if (object->type != OBJT_VNODE \|\|
	534	(object->flags & OBJ_MIGHTBEDIRTY) == 0)
	535	return;
	536
	537	pagerflags = (flags & (OBJPC_SYNC \| OBJPC_INVAL)) ? VM_PAGER_PUT_SYNC : VM_PAGER_CLUSTER_OK;
	538	pagerflags \|= (flags & OBJPC_INVAL) ? VM_PAGER_PUT_INVAL : 0;
	539
	540	vp = object->handle;
	541
	542	vm_object_set_flag(object, OBJ_CLEANING);
	543
	544	/*
	545	* Handle 'entire object' case
	546	*/
	547	tstart = start;
	548	if (end == 0) {
	549	tend = object->size;
	550	} else {
	551	tend = end;
	552	}
	553
	554	/*
	555	* If the caller is smart and only msync()s a range he knows is
	556	* dirty, we may be able to avoid an object scan. This results in
	557	* a phenominal improvement in performance. We cannot do this
	558	* as a matter of course because the object may be huge - e.g.
	559	* the size might be in the gigabytes or terrabytes.
	560	*/
	561	if (msync_flush_flags & MSYNC_FLUSH_HARDSEQ) {
	562	vm_offset_t tscan;
	563	int scanlimit;
	564	int scanreset;
	565
	566	scanreset = object->resident_page_count / EASY_SCAN_FACTOR;
	567	if (scanreset < 16)
	568	scanreset = 16;
	569	pagerflags \|= VM_PAGER_IGNORE_CLEANCHK;
	570
	571	scanlimit = scanreset;
	572	tscan = tstart;
	573	while (tscan < tend) {
	574	curgeneration = object->generation;
	575	p = vm_page_lookup(object, tscan);
	576	if (p == NULL \|\| p->valid == 0 \|\|
	577	(p->queue - p->pc) == PQ_CACHE) {
	578	if (--scanlimit == 0)
	579	break;
	580	++tscan;
	581	continue;
	582	}
	583	vm_page_test_dirty(p);
	584	if ((p->dirty & p->valid) == 0) {
	585	if (--scanlimit == 0)
	586	break;
	587	++tscan;
	588	continue;
	589	}
	590	/*
	591	* If we have been asked to skip nosync pages and
	592	* this is a nosync page, we can't continue.
	593	*/
	594	if ((flags & OBJPC_NOSYNC) && (p->flags & PG_NOSYNC)) {
	595	if (--scanlimit == 0)
	596	break;
	597	++tscan;
	598	continue;
	599	}
	600	scanlimit = scanreset;
	601
	602	/*
	603	* This returns 0 if it was unable to busy the first
	604	* page (i.e. had to sleep).
	605	*/
	606	tscan += vm_object_page_collect_flush(object, p, curgeneration, pagerflags);
	607	}
	608
	609	/*
	610	* If everything was dirty and we flushed it successfully,
	611	* and the requested range is not the entire object, we
	612	* don't have to mess with CLEANCHK or MIGHTBEDIRTY and can
	613	* return immediately.
	614	*/
	615	if (tscan >= tend && (tstart \|\| tend < object->size)) {
	616	vm_object_clear_flag(object, OBJ_CLEANING);
	617	return;
	618	}
	619	pagerflags &= ~VM_PAGER_IGNORE_CLEANCHK;
	620	}
	621
	622	/*
	623	* Generally set CLEANCHK interlock and make the page read-only so
	624	* we can then clear the object flags.
	625	*
	626	* However, if this is a nosync mmap then the object is likely to
	627	* stay dirty so do not mess with the page and do not clear the
	628	* object flags.
	629	*/
	630
	631	clearobjflags = 1;
	632
	633	for(p = TAILQ_FIRST(&object->memq); p; p = TAILQ_NEXT(p, listq)) {
	634	vm_page_flag_set(p, PG_CLEANCHK);
	635	if ((flags & OBJPC_NOSYNC) && (p->flags & PG_NOSYNC))
	636	clearobjflags = 0;
	637	else
	638	vm_page_protect(p, VM_PROT_READ);
	639	}
	640
	641	if (clearobjflags && (tstart == 0) && (tend == object->size)) {
	642	struct vnode *vp;
	643
	644	vm_object_clear_flag(object, OBJ_WRITEABLE\|OBJ_MIGHTBEDIRTY);
	645	if (object->type == OBJT_VNODE &&
	646	(vp = (struct vnode *)object->handle) != NULL) {
	647	if (vp->v_flag & VOBJDIRTY) {
	648	simple_lock(&vp->v_interlock);
	649	vp->v_flag &= ~VOBJDIRTY;
	650	simple_unlock(&vp->v_interlock);
	651	}
	652	}
	653	}
	654
	655	rescan:
	656	curgeneration = object->generation;
	657
	658	for(p = TAILQ_FIRST(&object->memq); p; p = np) {
	659	int n;
	660
	661	np = TAILQ_NEXT(p, listq);
	662
	663	again:
	664	pi = p->pindex;
	665	if (((p->flags & PG_CLEANCHK) == 0) \|\|
	666	(pi < tstart) \|\| (pi >= tend) \|\|
	667	(p->valid == 0) \|\|
	668	((p->queue - p->pc) == PQ_CACHE)) {
	669	vm_page_flag_clear(p, PG_CLEANCHK);
	670	continue;
	671	}
	672
	673	vm_page_test_dirty(p);
	674	if ((p->dirty & p->valid) == 0) {
	675	vm_page_flag_clear(p, PG_CLEANCHK);
	676	continue;
	677	}
	678
	679	/*
	680	* If we have been asked to skip nosync pages and this is a
	681	* nosync page, skip it. Note that the object flags were
	682	* not cleared in this case so we do not have to set them.
	683	*/
	684	if ((flags & OBJPC_NOSYNC) && (p->flags & PG_NOSYNC)) {
	685	vm_page_flag_clear(p, PG_CLEANCHK);
	686	continue;
	687	}
	688
	689	n = vm_object_page_collect_flush(object, p,
	690	curgeneration, pagerflags);
	691	if (n == 0)
	692	goto rescan;
	693	if (object->generation != curgeneration)
	694	goto rescan;
	695
	696	/*
	697	* Try to optimize the next page. If we can't we pick up
	698	* our (random) scan where we left off.
	699	*/
	700	if (msync_flush_flags & MSYNC_FLUSH_SOFTSEQ) {
	701	if ((p = vm_page_lookup(object, pi + n)) != NULL)
	702	goto again;
	703	}
	704	}
	705
	706	#if 0
	707	VOP_FSYNC(vp, NULL, (pagerflags & VM_PAGER_PUT_SYNC)?MNT_WAIT:0, curproc);
	708	#endif
	709
	710	vm_object_clear_flag(object, OBJ_CLEANING);
	711	return;
	712	}
	713
	714	static int
	715	vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int curgeneration, int pagerflags)
	716	{
	717	int runlen;
	718	int s;
	719	int maxf;
	720	int chkb;
	721	int maxb;
	722	int i;
	723	vm_pindex_t pi;
	724	vm_page_t maf[vm_pageout_page_count];
	725	vm_page_t mab[vm_pageout_page_count];
	726	vm_page_t ma[vm_pageout_page_count];
	727
	728	s = splvm();
	729	pi = p->pindex;
	730	while (vm_page_sleep_busy(p, TRUE, "vpcwai")) {
	731	if (object->generation != curgeneration) {
	732	splx(s);
	733	return(0);
	734	}
	735	}
	736
	737	maxf = 0;
	738	for(i = 1; i < vm_pageout_page_count; i++) {
	739	vm_page_t tp;
	740
	741	if ((tp = vm_page_lookup(object, pi + i)) != NULL) {
	742	if ((tp->flags & PG_BUSY) \|\|
	743	((pagerflags & VM_PAGER_IGNORE_CLEANCHK) == 0 &&
	744	(tp->flags & PG_CLEANCHK) == 0) \|\|
	745	(tp->busy != 0))
	746	break;
	747	if((tp->queue - tp->pc) == PQ_CACHE) {
	748	vm_page_flag_clear(tp, PG_CLEANCHK);
	749	break;
	750	}
	751	vm_page_test_dirty(tp);
	752	if ((tp->dirty & tp->valid) == 0) {
	753	vm_page_flag_clear(tp, PG_CLEANCHK);
	754	break;
	755	}
	756	maf[ i - 1 ] = tp;
	757	maxf++;
	758	continue;
	759	}
	760	break;
	761	}
	762
	763	maxb = 0;
	764	chkb = vm_pageout_page_count - maxf;
	765	if (chkb) {
	766	for(i = 1; i < chkb;i++) {
	767	vm_page_t tp;
	768
	769	if ((tp = vm_page_lookup(object, pi - i)) != NULL) {
	770	if ((tp->flags & PG_BUSY) \|\|
	771	((pagerflags & VM_PAGER_IGNORE_CLEANCHK) == 0 &&
	772	(tp->flags & PG_CLEANCHK) == 0) \|\|
	773	(tp->busy != 0))
	774	break;
	775	if((tp->queue - tp->pc) == PQ_CACHE) {
	776	vm_page_flag_clear(tp, PG_CLEANCHK);
	777	break;
	778	}
	779	vm_page_test_dirty(tp);
	780	if ((tp->dirty & tp->valid) == 0) {
	781	vm_page_flag_clear(tp, PG_CLEANCHK);
	782	break;
	783	}
	784	mab[ i - 1 ] = tp;
	785	maxb++;
	786	continue;
	787	}
	788	break;
	789	}
	790	}
	791
	792	for(i = 0; i < maxb; i++) {
	793	int index = (maxb - i) - 1;
	794	ma[index] = mab[i];
	795	vm_page_flag_clear(ma[index], PG_CLEANCHK);
	796	}
	797	vm_page_flag_clear(p, PG_CLEANCHK);
	798	ma[maxb] = p;
	799	for(i = 0; i < maxf; i++) {
	800	int index = (maxb + i) + 1;
	801	ma[index] = maf[i];
	802	vm_page_flag_clear(ma[index], PG_CLEANCHK);
	803	}
	804	runlen = maxb + maxf + 1;
	805
	806	splx(s);
	807	vm_pageout_flush(ma, runlen, pagerflags);
	808	for (i = 0; i < runlen; i++) {
	809	if (ma[i]->valid & ma[i]->dirty) {
	810	vm_page_protect(ma[i], VM_PROT_READ);
	811	vm_page_flag_set(ma[i], PG_CLEANCHK);
	812
	813	/*
	814	* maxf will end up being the actual number of pages
	815	* we wrote out contiguously, non-inclusive of the
	816	* first page. We do not count look-behind pages.
	817	*/
	818	if (i >= maxb + 1 && (maxf > i - maxb - 1))
	819	maxf = i - maxb - 1;
	820	}
	821	}
	822	return(maxf + 1);
	823	}
	824
	825	#ifdef not_used
	826	/* XXX I cannot tell if this should be an exported symbol */
	827	/*
	828	* vm_object_deactivate_pages
	829	*
	830	* Deactivate all pages in the specified object. (Keep its pages
	831	* in memory even though it is no longer referenced.)
	832	*
	833	* The object must be locked.
	834	*/
	835	static void
	836	vm_object_deactivate_pages(object)
	837	vm_object_t object;
	838	{
	839	vm_page_t p, next;
	840
	841	for (p = TAILQ_FIRST(&object->memq); p != NULL; p = next) {
	842	next = TAILQ_NEXT(p, listq);
	843	vm_page_deactivate(p);
	844	}
	845	}
	846	#endif
	847
	848	/*
	849	* Same as vm_object_pmap_copy, except range checking really
	850	* works, and is meant for small sections of an object.
	851	*
	852	* This code protects resident pages by making them read-only
	853	* and is typically called on a fork or split when a page
	854	* is converted to copy-on-write.
	855	*
	856	* NOTE: If the page is already at VM_PROT_NONE, calling
	857	* vm_page_protect will have no effect.
	858	*/
	859
	860	void
	861	vm_object_pmap_copy_1(object, start, end)
	862	vm_object_t object;
	863	vm_pindex_t start;
	864	vm_pindex_t end;
	865	{
	866	vm_pindex_t idx;
	867	vm_page_t p;
	868
	869	if (object == NULL \|\| (object->flags & OBJ_WRITEABLE) == 0)
	870	return;
	871
	872	for (idx = start; idx < end; idx++) {
	873	p = vm_page_lookup(object, idx);
	874	if (p == NULL)
	875	continue;
	876	vm_page_protect(p, VM_PROT_READ);
	877	}
	878	}
	879
	880	/*
	881	* vm_object_pmap_remove:
	882	*
	883	* Removes all physical pages in the specified
	884	* object range from all physical maps.
	885	*
	886	* The object must not be locked.
	887	*/
	888	void
	889	vm_object_pmap_remove(object, start, end)
	890	vm_object_t object;
	891	vm_pindex_t start;
	892	vm_pindex_t end;
	893	{
	894	vm_page_t p;
	895
	896	if (object == NULL)
	897	return;
	898	for (p = TAILQ_FIRST(&object->memq);
	899	p != NULL;
	900	p = TAILQ_NEXT(p, listq)) {
	901	if (p->pindex >= start && p->pindex < end)
	902	vm_page_protect(p, VM_PROT_NONE);
	903	}
	904	if ((start == 0) && (object->size == end))
	905	vm_object_clear_flag(object, OBJ_WRITEABLE);
	906	}
	907
	908	/*
	909	* vm_object_madvise:
	910	*
	911	* Implements the madvise function at the object/page level.
	912	*
	913	* MADV_WILLNEED (any object)
	914	*
	915	* Activate the specified pages if they are resident.
	916	*
	917	* MADV_DONTNEED (any object)
	918	*
	919	* Deactivate the specified pages if they are resident.
	920	*
	921	* MADV_FREE (OBJT_DEFAULT/OBJT_SWAP objects,
	922	* OBJ_ONEMAPPING only)
	923	*
	924	* Deactivate and clean the specified pages if they are
	925	* resident. This permits the process to reuse the pages
	926	* without faulting or the kernel to reclaim the pages
	927	* without I/O.
	928	*/
	929	void
	930	vm_object_madvise(object, pindex, count, advise)
	931	vm_object_t object;
	932	vm_pindex_t pindex;
	933	int count;
	934	int advise;
	935	{
	936	vm_pindex_t end, tpindex;
	937	vm_object_t tobject;
	938	vm_page_t m;
	939
	940	if (object == NULL)
	941	return;
	942
	943	end = pindex + count;
	944
	945	/*
	946	* Locate and adjust resident pages
	947	*/
	948
	949	for (; pindex < end; pindex += 1) {
	950	relookup:
	951	tobject = object;
	952	tpindex = pindex;
	953	shadowlookup:
	954	/*
	955	* MADV_FREE only operates on OBJT_DEFAULT or OBJT_SWAP pages
	956	* and those pages must be OBJ_ONEMAPPING.
	957	*/
	958	if (advise == MADV_FREE) {
	959	if ((tobject->type != OBJT_DEFAULT &&
	960	tobject->type != OBJT_SWAP) \|\|
	961	(tobject->flags & OBJ_ONEMAPPING) == 0) {
	962	continue;
	963	}
	964	}
	965
	966	m = vm_page_lookup(tobject, tpindex);
	967
	968	if (m == NULL) {
	969	/*
	970	* There may be swap even if there is no backing page
	971	*/
	972	if (advise == MADV_FREE && tobject->type == OBJT_SWAP)
	973	swap_pager_freespace(tobject, tpindex, 1);
	974
	975	/*
	976	* next object
	977	*/
	978	tobject = tobject->backing_object;
	979	if (tobject == NULL)
	980	continue;
	981	tpindex += OFF_TO_IDX(tobject->backing_object_offset);
	982	goto shadowlookup;
	983	}
	984
	985	/*
	986	* If the page is busy or not in a normal active state,
	987	* we skip it. If the page is not managed there are no
	988	* page queues to mess with. Things can break if we mess
	989	* with pages in any of the below states.
	990	*/
	991	if (
	992	m->hold_count \|\|
	993	m->wire_count \|\|
	994	(m->flags & PG_UNMANAGED) \|\|
	995	m->valid != VM_PAGE_BITS_ALL
	996	) {
	997	continue;
	998	}
	999
	1000	if (vm_page_sleep_busy(m, TRUE, "madvpo"))
	1001	goto relookup;
	1002
	1003	if (advise == MADV_WILLNEED) {
	1004	vm_page_activate(m);
	1005	} else if (advise == MADV_DONTNEED) {
	1006	vm_page_dontneed(m);
	1007	} else if (advise == MADV_FREE) {
	1008	/*
	1009	* Mark the page clean. This will allow the page
	1010	* to be freed up by the system. However, such pages
	1011	* are often reused quickly by malloc()/free()
	1012	* so we do not do anything that would cause
	1013	* a page fault if we can help it.
	1014	*
	1015	* Specifically, we do not try to actually free
	1016	* the page now nor do we try to put it in the
	1017	* cache (which would cause a page fault on reuse).
	1018	*
	1019	* But we do make the page is freeable as we
	1020	* can without actually taking the step of unmapping
	1021	* it.
	1022	*/
	1023	pmap_clear_modify(m);
	1024	m->dirty = 0;
	1025	m->act_count = 0;
	1026	vm_page_dontneed(m);
	1027	if (tobject->type == OBJT_SWAP)
	1028	swap_pager_freespace(tobject, tpindex, 1);
	1029	}
	1030	}
	1031	}
	1032
	1033	/*
	1034	* vm_object_shadow:
	1035	*
	1036	* Create a new object which is backed by the
	1037	* specified existing object range. The source
	1038	* object reference is deallocated.
	1039	*
	1040	* The new object and offset into that object
	1041	* are returned in the source parameters.
	1042	*/
	1043
	1044	void
	1045	vm_object_shadow(object, offset, length)
	1046	vm_object_t object; / IN/OUT */
	1047	vm_ooffset_t offset; / IN/OUT */
	1048	vm_size_t length;
	1049	{
	1050	vm_object_t source;
	1051	vm_object_t result;
	1052
	1053	source = *object;
	1054
	1055	/*
	1056	* Don't create the new object if the old object isn't shared.
	1057	*/
	1058
	1059	if (source != NULL &&
	1060	source->ref_count == 1 &&
	1061	source->handle == NULL &&
	1062	(source->type == OBJT_DEFAULT \|\|
	1063	source->type == OBJT_SWAP))
	1064	return;
	1065
	1066	/*
	1067	* Allocate a new object with the given length
	1068	*/
	1069
	1070	if ((result = vm_object_allocate(OBJT_DEFAULT, length)) == NULL)
	1071	panic("vm_object_shadow: no object for shadowing");
	1072
	1073	/*
	1074	* The new object shadows the source object, adding a reference to it.
	1075	* Our caller changes his reference to point to the new object,
	1076	* removing a reference to the source object. Net result: no change
	1077	* of reference count.
	1078	*
	1079	* Try to optimize the result object's page color when shadowing
	1080	* in order to maintain page coloring consistency in the combined
	1081	* shadowed object.
	1082	*/
	1083	result->backing_object = source;
	1084	if (source) {
	1085	LIST_INSERT_HEAD(&source->shadow_head, result, shadow_list);
	1086	source->shadow_count++;
	1087	source->generation++;
	1088	result->pg_color = (source->pg_color + OFF_TO_IDX(*offset)) & PQ_L2_MASK;
	1089	}
	1090
	1091	/*
	1092	* Store the offset into the source object, and fix up the offset into
	1093	* the new object.
	1094	*/
	1095
	1096	result->backing_object_offset = *offset;
	1097
	1098	/*
	1099	* Return the new things
	1100	*/
	1101
	1102	*offset = 0;
	1103	*object = result;
	1104	}
	1105
	1106	#define OBSC_TEST_ALL_SHADOWED 0x0001
	1107	#define OBSC_COLLAPSE_NOWAIT 0x0002
	1108	#define OBSC_COLLAPSE_WAIT 0x0004
	1109
	1110	static __inline int
	1111	vm_object_backing_scan(vm_object_t object, int op)
	1112	{
	1113	int s;
	1114	int r = 1;
	1115	vm_page_t p;
	1116	vm_object_t backing_object;
	1117	vm_pindex_t backing_offset_index;
	1118
	1119	s = splvm();
	1120
	1121	backing_object = object->backing_object;
	1122	backing_offset_index = OFF_TO_IDX(object->backing_object_offset);
	1123
	1124	/*
	1125	* Initial conditions
	1126	*/
	1127
	1128	if (op & OBSC_TEST_ALL_SHADOWED) {
	1129	/*
	1130	* We do not want to have to test for the existence of
	1131	* swap pages in the backing object. XXX but with the
	1132	* new swapper this would be pretty easy to do.
	1133	*
	1134	* XXX what about anonymous MAP_SHARED memory that hasn't
	1135	* been ZFOD faulted yet? If we do not test for this, the
	1136	* shadow test may succeed! XXX
	1137	*/
	1138	if (backing_object->type != OBJT_DEFAULT) {
	1139	splx(s);
	1140	return(0);
	1141	}
	1142	}
	1143	if (op & OBSC_COLLAPSE_WAIT) {
	1144	vm_object_set_flag(backing_object, OBJ_DEAD);
	1145	}
	1146
	1147	/*
	1148	* Our scan
	1149	*/
	1150
	1151	p = TAILQ_FIRST(&backing_object->memq);
	1152	while (p) {
	1153	vm_page_t next = TAILQ_NEXT(p, listq);
	1154	vm_pindex_t new_pindex = p->pindex - backing_offset_index;
	1155
	1156	if (op & OBSC_TEST_ALL_SHADOWED) {
	1157	vm_page_t pp;
	1158
	1159	/*
	1160	* Ignore pages outside the parent object's range
	1161	* and outside the parent object's mapping of the
	1162	* backing object.
	1163	*
	1164	* note that we do not busy the backing object's
	1165	* page.
	1166	*/
	1167
	1168	if (
	1169	p->pindex < backing_offset_index \|\|
	1170	new_pindex >= object->size
	1171	) {
	1172	p = next;
	1173	continue;
	1174	}
	1175
	1176	/*
	1177	* See if the parent has the page or if the parent's
	1178	* object pager has the page. If the parent has the
	1179	* page but the page is not valid, the parent's
	1180	* object pager must have the page.
	1181	*
	1182	* If this fails, the parent does not completely shadow
	1183	* the object and we might as well give up now.
	1184	*/
	1185
	1186	pp = vm_page_lookup(object, new_pindex);
	1187	if (
	1188	(pp == NULL \|\| pp->valid == 0) &&
	1189	!vm_pager_has_page(object, new_pindex, NULL, NULL)
	1190	) {
	1191	r = 0;
	1192	break;
	1193	}
	1194	}
	1195
	1196	/*
	1197	* Check for busy page
	1198	*/
	1199
	1200	if (op & (OBSC_COLLAPSE_WAIT \| OBSC_COLLAPSE_NOWAIT)) {
	1201	vm_page_t pp;
	1202
	1203	if (op & OBSC_COLLAPSE_NOWAIT) {
	1204	if (
	1205	(p->flags & PG_BUSY) \|\|
	1206	!p->valid \|\|
	1207	p->hold_count \|\|
	1208	p->wire_count \|\|
	1209	p->busy
	1210	) {
	1211	p = next;
	1212	continue;
	1213	}
	1214	} else if (op & OBSC_COLLAPSE_WAIT) {
	1215	if (vm_page_sleep_busy(p, TRUE, "vmocol")) {
	1216	/*
	1217	* If we slept, anything could have
	1218	* happened. Since the object is
	1219	* marked dead, the backing offset
	1220	* should not have changed so we
	1221	* just restart our scan.
	1222	*/
	1223	p = TAILQ_FIRST(&backing_object->memq);
	1224	continue;
	1225	}
	1226	}
	1227
	1228	/*
	1229	* Busy the page
	1230	*/
	1231	vm_page_busy(p);
	1232
	1233	KASSERT(
	1234	p->object == backing_object,
	1235	("vm_object_qcollapse(): object mismatch")
	1236	);
	1237
	1238	/*
	1239	* Destroy any associated swap
	1240	*/
	1241	if (backing_object->type == OBJT_SWAP) {
	1242	swap_pager_freespace(
	1243	backing_object,
	1244	p->pindex,
	1245	1
	1246	);
	1247	}
	1248
	1249	if (
	1250	p->pindex < backing_offset_index \|\|
	1251	new_pindex >= object->size
	1252	) {
	1253	/*
	1254	* Page is out of the parent object's range, we
	1255	* can simply destroy it.
	1256	*/
	1257	vm_page_protect(p, VM_PROT_NONE);
	1258	vm_page_free(p);
	1259	p = next;
	1260	continue;
	1261	}
	1262
	1263	pp = vm_page_lookup(object, new_pindex);
	1264	if (
	1265	pp != NULL \|\|
	1266	vm_pager_has_page(object, new_pindex, NULL, NULL)
	1267	) {
	1268	/*
	1269	* page already exists in parent OR swap exists
	1270	* for this location in the parent. Destroy
	1271	* the original page from the backing object.
	1272	*
	1273	* Leave the parent's page alone
	1274	*/
	1275	vm_page_protect(p, VM_PROT_NONE);
	1276	vm_page_free(p);
	1277	p = next;
	1278	continue;
	1279	}
	1280
	1281	/*
	1282	* Page does not exist in parent, rename the
	1283	* page from the backing object to the main object.
	1284	*
	1285	* If the page was mapped to a process, it can remain
	1286	* mapped through the rename.
	1287	*/
	1288	if ((p->queue - p->pc) == PQ_CACHE)
	1289	vm_page_deactivate(p);
	1290
	1291	vm_page_rename(p, object, new_pindex);
	1292	/* page automatically made dirty by rename */
	1293	}
	1294	p = next;
	1295	}
	1296	splx(s);
	1297	return(r);
	1298	}
	1299
	1300
	1301	/*
	1302	* this version of collapse allows the operation to occur earlier and
	1303	* when paging_in_progress is true for an object... This is not a complete
	1304	* operation, but should plug 99.9% of the rest of the leaks.
	1305	*/
	1306	static void
	1307	vm_object_qcollapse(object)
	1308	vm_object_t object;
	1309	{
	1310	vm_object_t backing_object = object->backing_object;
	1311
	1312	if (backing_object->ref_count != 1)
	1313	return;
	1314
	1315	backing_object->ref_count += 2;
	1316
	1317	vm_object_backing_scan(object, OBSC_COLLAPSE_NOWAIT);
	1318
	1319	backing_object->ref_count -= 2;
	1320	}
	1321
	1322	/*
	1323	* vm_object_collapse:
	1324	*
	1325	* Collapse an object with the object backing it.
	1326	* Pages in the backing object are moved into the
	1327	* parent, and the backing object is deallocated.
	1328	*/
	1329	void
	1330	vm_object_collapse(object)
	1331	vm_object_t object;
	1332	{
	1333	while (TRUE) {
	1334	vm_object_t backing_object;
	1335
	1336	/*
	1337	* Verify that the conditions are right for collapse:
	1338	*
	1339	* The object exists and the backing object exists.
	1340	*/
	1341	if (object == NULL)
	1342	break;
	1343
	1344	if ((backing_object = object->backing_object) == NULL)
	1345	break;
	1346
	1347	/*
	1348	* we check the backing object first, because it is most likely
	1349	* not collapsable.
	1350	*/
	1351	if (backing_object->handle != NULL \|\|
	1352	(backing_object->type != OBJT_DEFAULT &&
	1353	backing_object->type != OBJT_SWAP) \|\|
	1354	(backing_object->flags & OBJ_DEAD) \|\|
	1355	object->handle != NULL \|\|
	1356	(object->type != OBJT_DEFAULT &&
	1357	object->type != OBJT_SWAP) \|\|
	1358	(object->flags & OBJ_DEAD)) {
	1359	break;
	1360	}
	1361
	1362	if (
	1363	object->paging_in_progress != 0 \|\|
	1364	backing_object->paging_in_progress != 0
	1365	) {
	1366	vm_object_qcollapse(object);
	1367	break;
	1368	}
	1369
	1370	/*
	1371	* We know that we can either collapse the backing object (if
	1372	* the parent is the only reference to it) or (perhaps) have
	1373	* the parent bypass the object if the parent happens to shadow
	1374	* all the resident pages in the entire backing object.
	1375	*
	1376	* This is ignoring pager-backed pages such as swap pages.
	1377	* vm_object_backing_scan fails the shadowing test in this
	1378	* case.
	1379	*/
	1380
	1381	if (backing_object->ref_count == 1) {
	1382	/*
	1383	* If there is exactly one reference to the backing
	1384	* object, we can collapse it into the parent.
	1385	*/
	1386
	1387	vm_object_backing_scan(object, OBSC_COLLAPSE_WAIT);
	1388
	1389	/*
	1390	* Move the pager from backing_object to object.
	1391	*/
	1392
	1393	if (backing_object->type == OBJT_SWAP) {
	1394	vm_object_pip_add(backing_object, 1);
	1395
	1396	/*
	1397	* scrap the paging_offset junk and do a
	1398	* discrete copy. This also removes major
	1399	* assumptions about how the swap-pager
	1400	* works from where it doesn't belong. The
	1401	* new swapper is able to optimize the
	1402	* destroy-source case.
	1403	*/
	1404
	1405	vm_object_pip_add(object, 1);
	1406	swap_pager_copy(
	1407	backing_object,
	1408	object,
	1409	OFF_TO_IDX(object->backing_object_offset), TRUE);
	1410	vm_object_pip_wakeup(object);
	1411
	1412	vm_object_pip_wakeup(backing_object);
	1413	}
	1414	/*
	1415	* Object now shadows whatever backing_object did.
	1416	* Note that the reference to
	1417	* backing_object->backing_object moves from within
	1418	* backing_object to within object.
	1419	*/
	1420
	1421	LIST_REMOVE(object, shadow_list);
	1422	object->backing_object->shadow_count--;
	1423	object->backing_object->generation++;
	1424	if (backing_object->backing_object) {
	1425	LIST_REMOVE(backing_object, shadow_list);
	1426	backing_object->backing_object->shadow_count--;
	1427	backing_object->backing_object->generation++;
	1428	}
	1429	object->backing_object = backing_object->backing_object;
	1430	if (object->backing_object) {
	1431	LIST_INSERT_HEAD(
	1432	&object->backing_object->shadow_head,
	1433	object,
	1434	shadow_list
	1435	);
	1436	object->backing_object->shadow_count++;
	1437	object->backing_object->generation++;
	1438	}
	1439
	1440	object->backing_object_offset +=
	1441	backing_object->backing_object_offset;
	1442
	1443	/*
	1444	* Discard backing_object.
	1445	*
	1446	* Since the backing object has no pages, no pager left,
	1447	* and no object references within it, all that is
	1448	* necessary is to dispose of it.
	1449	*/
	1450
	1451	KASSERT(backing_object->ref_count == 1, ("backing_object %p was somehow re-referenced during collapse!", backing_object));
	1452	KASSERT(TAILQ_FIRST(&backing_object->memq) == NULL, ("backing_object %p somehow has left over pages during collapse!", backing_object));
	1453	TAILQ_REMOVE(
	1454	&vm_object_list,
	1455	backing_object,
	1456	object_list
	1457	);
	1458	vm_object_count--;
	1459
	1460	zfree(obj_zone, backing_object);
	1461
	1462	object_collapses++;
	1463	} else {
	1464	vm_object_t new_backing_object;
	1465
	1466	/*
	1467	* If we do not entirely shadow the backing object,
	1468	* there is nothing we can do so we give up.
	1469	*/
	1470
	1471	if (vm_object_backing_scan(object, OBSC_TEST_ALL_SHADOWED) == 0) {
	1472	break;
	1473	}
	1474
	1475	/*
	1476	* Make the parent shadow the next object in the
	1477	* chain. Deallocating backing_object will not remove
	1478	* it, since its reference count is at least 2.
	1479	*/
	1480
	1481	LIST_REMOVE(object, shadow_list);
	1482	backing_object->shadow_count--;
	1483	backing_object->generation++;
	1484
	1485	new_backing_object = backing_object->backing_object;
	1486	if ((object->backing_object = new_backing_object) != NULL) {
	1487	vm_object_reference(new_backing_object);
	1488	LIST_INSERT_HEAD(
	1489	&new_backing_object->shadow_head,
	1490	object,
	1491	shadow_list
	1492	);
	1493	new_backing_object->shadow_count++;
	1494	new_backing_object->generation++;
	1495	object->backing_object_offset +=
	1496	backing_object->backing_object_offset;
	1497	}
	1498
	1499	/*
	1500	* Drop the reference count on backing_object. Since
	1501	* its ref_count was at least 2, it will not vanish;
	1502	* so we don't need to call vm_object_deallocate, but
	1503	* we do anyway.
	1504	*/
	1505	vm_object_deallocate(backing_object);
	1506	object_bypasses++;
	1507	}
	1508
	1509	/*
	1510	* Try again with this object's new backing object.
	1511	*/
	1512	}
	1513	}
	1514
	1515	/*
	1516	* vm_object_page_remove: [internal]
	1517	*
	1518	* Removes all physical pages in the specified
	1519	* object range from the object's list of pages.
	1520	*
	1521	* The object must be locked.
	1522	*/
	1523	void
	1524	vm_object_page_remove(object, start, end, clean_only)
	1525	vm_object_t object;
	1526	vm_pindex_t start;
	1527	vm_pindex_t end;
	1528	boolean_t clean_only;
	1529	{
	1530	vm_page_t p, next;
	1531	unsigned int size;
	1532	int all;
	1533
	1534	if (object == NULL \|\|
	1535	object->resident_page_count == 0)
	1536	return;
	1537
	1538	all = ((end == 0) && (start == 0));
	1539
	1540	/*
	1541	* Since physically-backed objects do not use managed pages, we can't
	1542	* remove pages from the object (we must instead remove the page
	1543	* references, and then destroy the object).
	1544	*/
	1545	KASSERT(object->type != OBJT_PHYS, ("attempt to remove pages from a physical object"));
	1546
	1547	vm_object_pip_add(object, 1);
	1548	again:
	1549	size = end - start;
	1550	if (all \|\| size > object->resident_page_count / 4) {
	1551	for (p = TAILQ_FIRST(&object->memq); p != NULL; p = next) {
	1552	next = TAILQ_NEXT(p, listq);
	1553	if (all \|\| ((start <= p->pindex) && (p->pindex < end))) {
	1554	if (p->wire_count != 0) {
	1555	vm_page_protect(p, VM_PROT_NONE);
	1556	if (!clean_only)
	1557	p->valid = 0;
	1558	continue;
	1559	}
	1560
	1561	/*
	1562	* The busy flags are only cleared at
	1563	* interrupt -- minimize the spl transitions
	1564	*/
	1565
	1566	if (vm_page_sleep_busy(p, TRUE, "vmopar"))
	1567	goto again;
	1568
	1569	if (clean_only && p->valid) {
	1570	vm_page_test_dirty(p);
	1571	if (p->valid & p->dirty)
	1572	continue;
	1573	}
	1574
	1575	vm_page_busy(p);
	1576	vm_page_protect(p, VM_PROT_NONE);
	1577	vm_page_free(p);
	1578	}
	1579	}
	1580	} else {
	1581	while (size > 0) {
	1582	if ((p = vm_page_lookup(object, start)) != 0) {
	1583
	1584	if (p->wire_count != 0) {
	1585	vm_page_protect(p, VM_PROT_NONE);
	1586	if (!clean_only)
	1587	p->valid = 0;
	1588	start += 1;
	1589	size -= 1;
	1590	continue;
	1591	}
	1592
	1593	/*
	1594	* The busy flags are only cleared at
	1595	* interrupt -- minimize the spl transitions
	1596	*/
	1597	if (vm_page_sleep_busy(p, TRUE, "vmopar"))
	1598	goto again;
	1599
	1600	if (clean_only && p->valid) {
	1601	vm_page_test_dirty(p);
	1602	if (p->valid & p->dirty) {
	1603	start += 1;
	1604	size -= 1;
	1605	continue;
	1606	}
	1607	}
	1608
	1609	vm_page_busy(p);
	1610	vm_page_protect(p, VM_PROT_NONE);
	1611	vm_page_free(p);
	1612	}
	1613	start += 1;
	1614	size -= 1;
	1615	}
	1616	}
	1617	vm_object_pip_wakeup(object);
	1618	}
	1619
	1620	/*
	1621	* Routine: vm_object_coalesce
	1622	* Function: Coalesces two objects backing up adjoining
	1623	* regions of memory into a single object.
	1624	*
	1625	* returns TRUE if objects were combined.
	1626	*
	1627	* NOTE: Only works at the moment if the second object is NULL -
	1628	* if it's not, which object do we lock first?
	1629	*
	1630	* Parameters:
	1631	* prev_object First object to coalesce
	1632	* prev_offset Offset into prev_object
	1633	* next_object Second object into coalesce
	1634	* next_offset Offset into next_object
	1635	*
	1636	* prev_size Size of reference to prev_object
	1637	* next_size Size of reference to next_object
	1638	*
	1639	* Conditions:
	1640	* The object must not be locked.
	1641	*/
	1642	boolean_t
	1643	vm_object_coalesce(prev_object, prev_pindex, prev_size, next_size)
	1644	vm_object_t prev_object;
	1645	vm_pindex_t prev_pindex;
	1646	vm_size_t prev_size, next_size;
	1647	{
	1648	vm_pindex_t next_pindex;
	1649
	1650	if (prev_object == NULL) {
	1651	return (TRUE);
	1652	}
	1653
	1654	if (prev_object->type != OBJT_DEFAULT &&
	1655	prev_object->type != OBJT_SWAP) {
	1656	return (FALSE);
	1657	}
	1658
	1659	/*
	1660	* Try to collapse the object first
	1661	*/
	1662	vm_object_collapse(prev_object);
	1663
	1664	/*
	1665	* Can't coalesce if: . more than one reference . paged out . shadows
	1666	* another object . has a copy elsewhere (any of which mean that the
	1667	* pages not mapped to prev_entry may be in use anyway)
	1668	*/
	1669
	1670	if (prev_object->backing_object != NULL) {
	1671	return (FALSE);
	1672	}
	1673
	1674	prev_size >>= PAGE_SHIFT;
	1675	next_size >>= PAGE_SHIFT;
	1676	next_pindex = prev_pindex + prev_size;
	1677
	1678	if ((prev_object->ref_count > 1) &&
	1679	(prev_object->size != next_pindex)) {
	1680	return (FALSE);
	1681	}
	1682
	1683	/*
	1684	* Remove any pages that may still be in the object from a previous
	1685	* deallocation.
	1686	*/
	1687	if (next_pindex < prev_object->size) {
	1688	vm_object_page_remove(prev_object,
	1689	next_pindex,
	1690	next_pindex + next_size, FALSE);
	1691	if (prev_object->type == OBJT_SWAP)
	1692	swap_pager_freespace(prev_object,
	1693	next_pindex, next_size);
	1694	}
	1695
	1696	/*
	1697	* Extend the object if necessary.
	1698	*/
	1699	if (next_pindex + next_size > prev_object->size)
	1700	prev_object->size = next_pindex + next_size;
	1701
	1702	return (TRUE);
	1703	}
	1704
	1705	void
	1706	vm_object_set_writeable_dirty(vm_object_t object)
	1707	{
	1708	struct vnode *vp;
	1709
	1710	vm_object_set_flag(object, OBJ_WRITEABLE\|OBJ_MIGHTBEDIRTY);
	1711	if (object->type == OBJT_VNODE &&
	1712	(vp = (struct vnode *)object->handle) != NULL) {
	1713	if ((vp->v_flag & VOBJDIRTY) == 0) {
	1714	simple_lock(&vp->v_interlock);
	1715	vp->v_flag \|= VOBJDIRTY;
	1716	simple_unlock(&vp->v_interlock);
	1717	}
	1718	}
	1719	}
	1720
	1721
	1722
	1723	#include "opt_ddb.h"
	1724	#ifdef DDB
	1725	#include <sys/kernel.h>
	1726
	1727	#include <sys/cons.h>
	1728
	1729	#include <ddb/ddb.h>
	1730
	1731	static int _vm_object_in_map __P((vm_map_t map, vm_object_t object,
	1732	vm_map_entry_t entry));
	1733	static int vm_object_in_map __P((vm_object_t object));
	1734
	1735	static int
	1736	_vm_object_in_map(map, object, entry)
	1737	vm_map_t map;
	1738	vm_object_t object;
	1739	vm_map_entry_t entry;
	1740	{
	1741	vm_map_t tmpm;
	1742	vm_map_entry_t tmpe;
	1743	vm_object_t obj;
	1744	int entcount;
	1745
	1746	if (map == 0)
	1747	return 0;
	1748
	1749	if (entry == 0) {
	1750	tmpe = map->header.next;
	1751	entcount = map->nentries;
	1752	while (entcount-- && (tmpe != &map->header)) {
	1753	if( _vm_object_in_map(map, object, tmpe)) {
	1754	return 1;
	1755	}
	1756	tmpe = tmpe->next;
	1757	}
	1758	} else if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
	1759	tmpm = entry->object.sub_map;
	1760	tmpe = tmpm->header.next;
	1761	entcount = tmpm->nentries;
	1762	while (entcount-- && tmpe != &tmpm->header) {
	1763	if( _vm_object_in_map(tmpm, object, tmpe)) {
	1764	return 1;
	1765	}
	1766	tmpe = tmpe->next;
	1767	}
	1768	} else if ((obj = entry->object.vm_object) != NULL) {
	1769	for(; obj; obj=obj->backing_object)
	1770	if( obj == object) {
	1771	return 1;
	1772	}
	1773	}
	1774	return 0;
	1775	}
	1776
	1777	static int
	1778	vm_object_in_map( object)
	1779	vm_object_t object;
	1780	{
	1781	struct proc *p;
	1782	for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
	1783	if( !p->p_vmspace /* \|\| (p->p_flag & (P_SYSTEM\|P_WEXIT)) */)
	1784	continue;
	1785	if( _vm_object_in_map(&p->p_vmspace->vm_map, object, 0))
	1786	return 1;
	1787	}
	1788	if( _vm_object_in_map( kernel_map, object, 0))
	1789	return 1;
	1790	if( _vm_object_in_map( kmem_map, object, 0))
	1791	return 1;
	1792	if( _vm_object_in_map( pager_map, object, 0))
	1793	return 1;
	1794	if( _vm_object_in_map( buffer_map, object, 0))
	1795	return 1;
	1796	if( _vm_object_in_map( mb_map, object, 0))
	1797	return 1;
	1798	return 0;
	1799	}
	1800
	1801	DB_SHOW_COMMAND(vmochk, vm_object_check)
	1802	{
	1803	vm_object_t object;
	1804
	1805	/*
	1806	* make sure that internal objs are in a map somewhere
	1807	* and none have zero ref counts.
	1808	*/
	1809	for (object = TAILQ_FIRST(&vm_object_list);
	1810	object != NULL;
	1811	object = TAILQ_NEXT(object, object_list)) {
	1812	if (object->handle == NULL &&
	1813	(object->type == OBJT_DEFAULT \|\| object->type == OBJT_SWAP)) {
	1814	if (object->ref_count == 0) {
	1815	db_printf("vmochk: internal obj has zero ref count: %ld\n",
	1816	(long)object->size);
	1817	}
	1818	if (!vm_object_in_map(object)) {
	1819	db_printf(
	1820	"vmochk: internal obj is not in a map: "
	1821	"ref: %d, size: %lu: 0x%lx, backing_object: %p\n",
	1822	object->ref_count, (u_long)object->size,
	1823	(u_long)object->size,
	1824	(void *)object->backing_object);
	1825	}
	1826	}
	1827	}
	1828	}
	1829
	1830	/*
	1831	* vm_object_print: [ debug ]
	1832	*/
	1833	DB_SHOW_COMMAND(object, vm_object_print_static)
	1834	{
	1835	/* XXX convert args. */
	1836	vm_object_t object = (vm_object_t)addr;
	1837	boolean_t full = have_addr;
	1838
	1839	vm_page_t p;
	1840
	1841	/* XXX count is an (unused) arg. Avoid shadowing it. */
	1842	#define count was_count
	1843
	1844	int count;
	1845
	1846	if (object == NULL)
	1847	return;
	1848
	1849	db_iprintf(
	1850	"Object %p: type=%d, size=0x%lx, res=%d, ref=%d, flags=0x%x\n",
	1851	object, (int)object->type, (u_long)object->size,
	1852	object->resident_page_count, object->ref_count, object->flags);
	1853	/*
	1854	* XXX no %qd in kernel. Truncate object->backing_object_offset.
	1855	*/
	1856	db_iprintf(" sref=%d, backing_object(%d)=(%p)+0x%lx\n",
	1857	object->shadow_count,
	1858	object->backing_object ? object->backing_object->ref_count : 0,
	1859	object->backing_object, (long)object->backing_object_offset);
	1860
	1861	if (!full)
	1862	return;
	1863
	1864	db_indent += 2;
	1865	count = 0;
	1866	for (p = TAILQ_FIRST(&object->memq); p != NULL; p = TAILQ_NEXT(p, listq)) {
	1867	if (count == 0)
	1868	db_iprintf("memory:=");
	1869	else if (count == 6) {
	1870	db_printf("\n");
	1871	db_iprintf(" ...");
	1872	count = 0;
	1873	} else
	1874	db_printf(",");
	1875	count++;
	1876
	1877	db_printf("(off=0x%lx,page=0x%lx)",
	1878	(u_long) p->pindex, (u_long) VM_PAGE_TO_PHYS(p));
	1879	}
	1880	if (count != 0)
	1881	db_printf("\n");
	1882	db_indent -= 2;
	1883	}
	1884
	1885	/* XXX. */
	1886	#undef count
	1887
	1888	/* XXX need this non-static entry for calling from vm_map_print. */
	1889	void
	1890	vm_object_print(addr, have_addr, count, modif)
	1891	/* db_expr_t */ long addr;
	1892	boolean_t have_addr;
	1893	/* db_expr_t */ long count;
	1894	char *modif;
	1895	{
	1896	vm_object_print_static(addr, have_addr, count, modif);
	1897	}
	1898
	1899	DB_SHOW_COMMAND(vmopag, vm_object_print_pages)
	1900	{
	1901	vm_object_t object;
	1902	int nl = 0;
	1903	int c;
	1904	for (object = TAILQ_FIRST(&vm_object_list);
	1905	object != NULL;
	1906	object = TAILQ_NEXT(object, object_list)) {
	1907	vm_pindex_t idx, fidx;
	1908	vm_pindex_t osize;
	1909	vm_offset_t pa = -1, padiff;
	1910	int rcount;
	1911	vm_page_t m;
	1912
	1913	db_printf("new object: %p\n", (void *)object);
	1914	if ( nl > 18) {
	1915	c = cngetc();
	1916	if (c != ' ')
	1917	return;
	1918	nl = 0;
	1919	}
	1920	nl++;
	1921	rcount = 0;
	1922	fidx = 0;
	1923	osize = object->size;
	1924	if (osize > 128)
	1925	osize = 128;
	1926	for(idx=0;idx<osize;idx++) {
	1927	m = vm_page_lookup(object, idx);
	1928	if (m == NULL) {
	1929	if (rcount) {
	1930	db_printf(" index(%ld)run(%d)pa(0x%lx)\n",
	1931	(long)fidx, rcount, (long)pa);
	1932	if ( nl > 18) {
	1933	c = cngetc();
	1934	if (c != ' ')
	1935	return;
	1936	nl = 0;
	1937	}
	1938	nl++;
	1939	rcount = 0;
	1940	}
	1941	continue;
	1942	}
	1943
	1944
	1945	if (rcount &&
	1946	(VM_PAGE_TO_PHYS(m) == pa + rcount * PAGE_SIZE)) {
	1947	++rcount;
	1948	continue;
	1949	}
	1950	if (rcount) {
	1951	padiff = pa + rcount * PAGE_SIZE - VM_PAGE_TO_PHYS(m);
	1952	padiff >>= PAGE_SHIFT;
	1953	padiff &= PQ_L2_MASK;
	1954	if (padiff == 0) {
	1955	pa = VM_PAGE_TO_PHYS(m) - rcount * PAGE_SIZE;
	1956	++rcount;
	1957	continue;
	1958	}
	1959	db_printf(" index(%ld)run(%d)pa(0x%lx)",
	1960	(long)fidx, rcount, (long)pa);
	1961	db_printf("pd(%ld)\n", (long)padiff);
	1962	if ( nl > 18) {
	1963	c = cngetc();
	1964	if (c != ' ')
	1965	return;
	1966	nl = 0;
	1967	}
	1968	nl++;
	1969	}
	1970	fidx = idx;
	1971	pa = VM_PAGE_TO_PHYS(m);
	1972	rcount = 1;
	1973	}
	1974	if (rcount) {
	1975	db_printf(" index(%ld)run(%d)pa(0x%lx)\n",
	1976	(long)fidx, rcount, (long)pa);
	1977	if ( nl > 18) {
	1978	c = cngetc();
	1979	if (c != ' ')
	1980	return;
	1981	nl = 0;
	1982	}
	1983	nl++;
	1984	}
	1985	}
	1986	}
	1987	#endif /* DDB */