gitweb.dragonflybsd.org Git - dragonfly.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 1991 Regents of the University of California.
	3	* All rights reserved.
	4	* Copyright (c) 1994 John S. Dyson
	5	* All rights reserved.
	6	* Copyright (c) 1994 David Greenman
	7	* All rights reserved.
	8	*
	9	* This code is derived from software contributed to Berkeley by
	10	* The Mach Operating System project at Carnegie-Mellon University.
	11	*
	12	* Redistribution and use in source and binary forms, with or without
	13	* modification, are permitted provided that the following conditions
	14	* are met:
	15	* 1. Redistributions of source code must retain the above copyright
	16	* notice, this list of conditions and the following disclaimer.
	17	* 2. Redistributions in binary form must reproduce the above copyright
	18	* notice, this list of conditions and the following disclaimer in the
	19	* documentation and/or other materials provided with the distribution.
	20	* 3. All advertising materials mentioning features or use of this software
	21	* must display the following acknowledgement:
	22	* This product includes software developed by the University of
	23	* California, Berkeley and its contributors.
	24	* 4. Neither the name of the University nor the names of its contributors
	25	* may be used to endorse or promote products derived from this software
	26	* without specific prior written permission.
	27	*
	28	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	29	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	30	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	31	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	32	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	33	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	34	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	35	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	36	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	37	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	38	* SUCH DAMAGE.
	39	*
	40	* from: @(#)vm_pageout.c 7.4 (Berkeley) 5/7/91
	41	*
	42	*
	43	* Copyright (c) 1987, 1990 Carnegie-Mellon University.
	44	* All rights reserved.
	45	*
	46	* Authors: Avadis Tevanian, Jr., Michael Wayne Young
	47	*
	48	* Permission to use, copy, modify and distribute this software and
	49	* its documentation is hereby granted, provided that both the copyright
	50	* notice and this permission notice appear in all copies of the
	51	* software, derivative works or modified versions, and any portions
	52	* thereof, and that both notices appear in supporting documentation.
	53	*
	54	* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
	55	* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
	56	* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
	57	*
	58	* Carnegie Mellon requests users of this software to return to
	59	*
	60	* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
	61	* School of Computer Science
	62	* Carnegie Mellon University
	63	* Pittsburgh PA 15213-3890
	64	*
	65	* any improvements or extensions that they make and grant Carnegie the
	66	* rights to redistribute these changes.
	67	*
	68	* $FreeBSD: src/sys/vm/vm_pageout.c,v 1.151.2.15 2002/12/29 18:21:04 dillon Exp $
	69	*/
	70
	71	/*
	72	* The proverbial page-out daemon.
	73	*/
	74
	75	#include "opt_vm.h"
	76	#include <sys/param.h>
	77	#include <sys/systm.h>
	78	#include <sys/kernel.h>
	79	#include <sys/proc.h>
	80	#include <sys/kthread.h>
	81	#include <sys/resourcevar.h>
	82	#include <sys/signalvar.h>
	83	#include <sys/vnode.h>
	84	#include <sys/vmmeter.h>
	85	#include <sys/sysctl.h>
	86
	87	#include <vm/vm.h>
	88	#include <vm/vm_param.h>
	89	#include <sys/lock.h>
	90	#include <vm/vm_object.h>
	91	#include <vm/vm_page.h>
	92	#include <vm/vm_map.h>
	93	#include <vm/vm_pageout.h>
	94	#include <vm/vm_pager.h>
	95	#include <vm/swap_pager.h>
	96	#include <vm/vm_extern.h>
	97
	98	/*
	99	* System initialization
	100	*/
	101
	102	/* the kernel process "vm_pageout"*/
	103	static void vm_pageout __P((void));
	104	static int vm_pageout_clean __P((vm_page_t));
	105	static void vm_pageout_scan __P((int pass));
	106	static int vm_pageout_free_page_calc __P((vm_size_t count));
	107	struct proc *pageproc;
	108
	109	static struct kproc_desc page_kp = {
	110	"pagedaemon",
	111	vm_pageout,
	112	&pageproc
	113	};
	114	SYSINIT(pagedaemon, SI_SUB_KTHREAD_PAGE, SI_ORDER_FIRST, kproc_start, &page_kp)
	115
	116	#if !defined(NO_SWAPPING)
	117	/* the kernel process "vm_daemon"*/
	118	static void vm_daemon __P((void));
	119	static struct proc *vmproc;
	120
	121	static struct kproc_desc vm_kp = {
	122	"vmdaemon",
	123	vm_daemon,
	124	&vmproc
	125	};
	126	SYSINIT(vmdaemon, SI_SUB_KTHREAD_VM, SI_ORDER_FIRST, kproc_start, &vm_kp)
	127	#endif
	128
	129
	130	int vm_pages_needed=0; /* Event on which pageout daemon sleeps */
	131	int vm_pageout_deficit=0; /* Estimated number of pages deficit */
	132	int vm_pageout_pages_needed=0; /* flag saying that the pageout daemon needs pages */
	133
	134	#if !defined(NO_SWAPPING)
	135	static int vm_pageout_req_swapout; /* XXX */
	136	static int vm_daemon_needed;
	137	#endif
	138	extern int vm_swap_size;
	139	static int vm_max_launder = 32;
	140	static int vm_pageout_stats_max=0, vm_pageout_stats_interval = 0;
	141	static int vm_pageout_full_stats_interval = 0;
	142	static int vm_pageout_stats_free_max=0, vm_pageout_algorithm=0;
	143	static int defer_swap_pageouts=0;
	144	static int disable_swap_pageouts=0;
	145
	146	#if defined(NO_SWAPPING)
	147	static int vm_swap_enabled=0;
	148	static int vm_swap_idle_enabled=0;
	149	#else
	150	static int vm_swap_enabled=1;
	151	static int vm_swap_idle_enabled=0;
	152	#endif
	153
	154	SYSCTL_INT(_vm, VM_PAGEOUT_ALGORITHM, pageout_algorithm,
	155	CTLFLAG_RW, &vm_pageout_algorithm, 0, "LRU page mgmt");
	156
	157	SYSCTL_INT(_vm, OID_AUTO, max_launder,
	158	CTLFLAG_RW, &vm_max_launder, 0, "Limit dirty flushes in pageout");
	159
	160	SYSCTL_INT(_vm, OID_AUTO, pageout_stats_max,
	161	CTLFLAG_RW, &vm_pageout_stats_max, 0, "Max pageout stats scan length");
	162
	163	SYSCTL_INT(_vm, OID_AUTO, pageout_full_stats_interval,
	164	CTLFLAG_RW, &vm_pageout_full_stats_interval, 0, "Interval for full stats scan");
	165
	166	SYSCTL_INT(_vm, OID_AUTO, pageout_stats_interval,
	167	CTLFLAG_RW, &vm_pageout_stats_interval, 0, "Interval for partial stats scan");
	168
	169	SYSCTL_INT(_vm, OID_AUTO, pageout_stats_free_max,
	170	CTLFLAG_RW, &vm_pageout_stats_free_max, 0, "Not implemented");
	171
	172	#if defined(NO_SWAPPING)
	173	SYSCTL_INT(_vm, VM_SWAPPING_ENABLED, swap_enabled,
	174	CTLFLAG_RD, &vm_swap_enabled, 0, "");
	175	SYSCTL_INT(_vm, OID_AUTO, swap_idle_enabled,
	176	CTLFLAG_RD, &vm_swap_idle_enabled, 0, "");
	177	#else
	178	SYSCTL_INT(_vm, VM_SWAPPING_ENABLED, swap_enabled,
	179	CTLFLAG_RW, &vm_swap_enabled, 0, "Enable entire process swapout");
	180	SYSCTL_INT(_vm, OID_AUTO, swap_idle_enabled,
	181	CTLFLAG_RW, &vm_swap_idle_enabled, 0, "Allow swapout on idle criteria");
	182	#endif
	183
	184	SYSCTL_INT(_vm, OID_AUTO, defer_swapspace_pageouts,
	185	CTLFLAG_RW, &defer_swap_pageouts, 0, "Give preference to dirty pages in mem");
	186
	187	SYSCTL_INT(_vm, OID_AUTO, disable_swapspace_pageouts,
	188	CTLFLAG_RW, &disable_swap_pageouts, 0, "Disallow swapout of dirty pages");
	189
	190	static int pageout_lock_miss;
	191	SYSCTL_INT(_vm, OID_AUTO, pageout_lock_miss,
	192	CTLFLAG_RD, &pageout_lock_miss, 0, "vget() lock misses during pageout");
	193
	194	#define VM_PAGEOUT_PAGE_COUNT 16
	195	int vm_pageout_page_count = VM_PAGEOUT_PAGE_COUNT;
	196
	197	int vm_page_max_wired; /* XXX max # of wired pages system-wide */
	198
	199	#if !defined(NO_SWAPPING)
	200	typedef void freeer_fcn_t __P((vm_map_t, vm_object_t, vm_pindex_t, int));
	201	static void vm_pageout_map_deactivate_pages __P((vm_map_t, vm_pindex_t));
	202	static freeer_fcn_t vm_pageout_object_deactivate_pages;
	203	static void vm_req_vmdaemon __P((void));
	204	#endif
	205	static void vm_pageout_page_stats(void);
	206
	207	/*
	208	* vm_pageout_clean:
	209	*
	210	* Clean the page and remove it from the laundry.
	211	*
	212	* We set the busy bit to cause potential page faults on this page to
	213	* block. Note the careful timing, however, the busy bit isn't set till
	214	* late and we cannot do anything that will mess with the page.
	215	*/
	216
	217	static int
	218	vm_pageout_clean(m)
	219	vm_page_t m;
	220	{
	221	register vm_object_t object;
	222	vm_page_t mc[2*vm_pageout_page_count];
	223	int pageout_count;
	224	int ib, is, page_base;
	225	vm_pindex_t pindex = m->pindex;
	226
	227	object = m->object;
	228
	229	/*
	230	* It doesn't cost us anything to pageout OBJT_DEFAULT or OBJT_SWAP
	231	* with the new swapper, but we could have serious problems paging
	232	* out other object types if there is insufficient memory.
	233	*
	234	* Unfortunately, checking free memory here is far too late, so the
	235	* check has been moved up a procedural level.
	236	*/
	237
	238	/*
	239	* Don't mess with the page if it's busy, held, or special
	240	*/
	241	if ((m->hold_count != 0) \|\|
	242	((m->busy != 0) \|\| (m->flags & (PG_BUSY\|PG_UNMANAGED)))) {
	243	return 0;
	244	}
	245
	246	mc[vm_pageout_page_count] = m;
	247	pageout_count = 1;
	248	page_base = vm_pageout_page_count;
	249	ib = 1;
	250	is = 1;
	251
	252	/*
	253	* Scan object for clusterable pages.
	254	*
	255	* We can cluster ONLY if: ->> the page is NOT
	256	* clean, wired, busy, held, or mapped into a
	257	* buffer, and one of the following:
	258	* 1) The page is inactive, or a seldom used
	259	* active page.
	260	* -or-
	261	* 2) we force the issue.
	262	*
	263	* During heavy mmap/modification loads the pageout
	264	* daemon can really fragment the underlying file
	265	* due to flushing pages out of order and not trying
	266	* align the clusters (which leave sporatic out-of-order
	267	* holes). To solve this problem we do the reverse scan
	268	* first and attempt to align our cluster, then do a
	269	* forward scan if room remains.
	270	*/
	271
	272	more:
	273	while (ib && pageout_count < vm_pageout_page_count) {
	274	vm_page_t p;
	275
	276	if (ib > pindex) {
	277	ib = 0;
	278	break;
	279	}
	280
	281	if ((p = vm_page_lookup(object, pindex - ib)) == NULL) {
	282	ib = 0;
	283	break;
	284	}
	285	if (((p->queue - p->pc) == PQ_CACHE) \|\|
	286	(p->flags & (PG_BUSY\|PG_UNMANAGED)) \|\| p->busy) {
	287	ib = 0;
	288	break;
	289	}
	290	vm_page_test_dirty(p);
	291	if ((p->dirty & p->valid) == 0 \|\|
	292	p->queue != PQ_INACTIVE \|\|
	293	p->wire_count != 0 \|\| /* may be held by buf cache */
	294	p->hold_count != 0) { /* may be undergoing I/O */
	295	ib = 0;
	296	break;
	297	}
	298	mc[--page_base] = p;
	299	++pageout_count;
	300	++ib;
	301	/*
	302	* alignment boundry, stop here and switch directions. Do
	303	* not clear ib.
	304	*/
	305	if ((pindex - (ib - 1)) % vm_pageout_page_count == 0)
	306	break;
	307	}
	308
	309	while (pageout_count < vm_pageout_page_count &&
	310	pindex + is < object->size) {
	311	vm_page_t p;
	312
	313	if ((p = vm_page_lookup(object, pindex + is)) == NULL)
	314	break;
	315	if (((p->queue - p->pc) == PQ_CACHE) \|\|
	316	(p->flags & (PG_BUSY\|PG_UNMANAGED)) \|\| p->busy) {
	317	break;
	318	}
	319	vm_page_test_dirty(p);
	320	if ((p->dirty & p->valid) == 0 \|\|
	321	p->queue != PQ_INACTIVE \|\|
	322	p->wire_count != 0 \|\| /* may be held by buf cache */
	323	p->hold_count != 0) { /* may be undergoing I/O */
	324	break;
	325	}
	326	mc[page_base + pageout_count] = p;
	327	++pageout_count;
	328	++is;
	329	}
	330
	331	/*
	332	* If we exhausted our forward scan, continue with the reverse scan
	333	* when possible, even past a page boundry. This catches boundry
	334	* conditions.
	335	*/
	336	if (ib && pageout_count < vm_pageout_page_count)
	337	goto more;
	338
	339	/*
	340	* we allow reads during pageouts...
	341	*/
	342	return vm_pageout_flush(&mc[page_base], pageout_count, 0);
	343	}
	344
	345	/*
	346	* vm_pageout_flush() - launder the given pages
	347	*
	348	* The given pages are laundered. Note that we setup for the start of
	349	* I/O ( i.e. busy the page ), mark it read-only, and bump the object
	350	* reference count all in here rather then in the parent. If we want
	351	* the parent to do more sophisticated things we may have to change
	352	* the ordering.
	353	*/
	354
	355	int
	356	vm_pageout_flush(mc, count, flags)
	357	vm_page_t *mc;
	358	int count;
	359	int flags;
	360	{
	361	register vm_object_t object;
	362	int pageout_status[count];
	363	int numpagedout = 0;
	364	int i;
	365
	366	/*
	367	* Initiate I/O. Bump the vm_page_t->busy counter and
	368	* mark the pages read-only.
	369	*
	370	* We do not have to fixup the clean/dirty bits here... we can
	371	* allow the pager to do it after the I/O completes.
	372	*/
	373
	374	for (i = 0; i < count; i++) {
	375	KASSERT(mc[i]->valid == VM_PAGE_BITS_ALL, ("vm_pageout_flush page %p index %d/%d: partially invalid page", mc[i], i, count));
	376	vm_page_io_start(mc[i]);
	377	vm_page_protect(mc[i], VM_PROT_READ);
	378	}
	379
	380	object = mc[0]->object;
	381	vm_object_pip_add(object, count);
	382
	383	vm_pager_put_pages(object, mc, count,
	384	(flags \| ((object == kernel_object) ? VM_PAGER_PUT_SYNC : 0)),
	385	pageout_status);
	386
	387	for (i = 0; i < count; i++) {
	388	vm_page_t mt = mc[i];
	389
	390	switch (pageout_status[i]) {
	391	case VM_PAGER_OK:
	392	numpagedout++;
	393	break;
	394	case VM_PAGER_PEND:
	395	numpagedout++;
	396	break;
	397	case VM_PAGER_BAD:
	398	/*
	399	* Page outside of range of object. Right now we
	400	* essentially lose the changes by pretending it
	401	* worked.
	402	*/
	403	pmap_clear_modify(mt);
	404	vm_page_undirty(mt);
	405	break;
	406	case VM_PAGER_ERROR:
	407	case VM_PAGER_FAIL:
	408	/*
	409	* If page couldn't be paged out, then reactivate the
	410	* page so it doesn't clog the inactive list. (We
	411	* will try paging out it again later).
	412	*/
	413	vm_page_activate(mt);
	414	break;
	415	case VM_PAGER_AGAIN:
	416	break;
	417	}
	418
	419	/*
	420	* If the operation is still going, leave the page busy to
	421	* block all other accesses. Also, leave the paging in
	422	* progress indicator set so that we don't attempt an object
	423	* collapse.
	424	*/
	425	if (pageout_status[i] != VM_PAGER_PEND) {
	426	vm_object_pip_wakeup(object);
	427	vm_page_io_finish(mt);
	428	if (!vm_page_count_severe() \|\| !vm_page_try_to_cache(mt))
	429	vm_page_protect(mt, VM_PROT_READ);
	430	}
	431	}
	432	return numpagedout;
	433	}
	434
	435	#if !defined(NO_SWAPPING)
	436	/*
	437	* vm_pageout_object_deactivate_pages
	438	*
	439	* deactivate enough pages to satisfy the inactive target
	440	* requirements or if vm_page_proc_limit is set, then
	441	* deactivate all of the pages in the object and its
	442	* backing_objects.
	443	*
	444	* The object and map must be locked.
	445	*/
	446	static void
	447	vm_pageout_object_deactivate_pages(map, object, desired, map_remove_only)
	448	vm_map_t map;
	449	vm_object_t object;
	450	vm_pindex_t desired;
	451	int map_remove_only;
	452	{
	453	register vm_page_t p, next;
	454	int rcount;
	455	int remove_mode;
	456	int s;
	457
	458	if (object->type == OBJT_DEVICE \|\| object->type == OBJT_PHYS)
	459	return;
	460
	461	while (object) {
	462	if (pmap_resident_count(vm_map_pmap(map)) <= desired)
	463	return;
	464	if (object->paging_in_progress)
	465	return;
	466
	467	remove_mode = map_remove_only;
	468	if (object->shadow_count > 1)
	469	remove_mode = 1;
	470	/*
	471	* scan the objects entire memory queue
	472	*/
	473	rcount = object->resident_page_count;
	474	p = TAILQ_FIRST(&object->memq);
	475	while (p && (rcount-- > 0)) {
	476	int actcount;
	477	if (pmap_resident_count(vm_map_pmap(map)) <= desired)
	478	return;
	479	next = TAILQ_NEXT(p, listq);
	480	cnt.v_pdpages++;
	481	if (p->wire_count != 0 \|\|
	482	p->hold_count != 0 \|\|
	483	p->busy != 0 \|\|
	484	(p->flags & (PG_BUSY\|PG_UNMANAGED)) \|\|
	485	!pmap_page_exists_quick(vm_map_pmap(map), p)) {
	486	p = next;
	487	continue;
	488	}
	489
	490	actcount = pmap_ts_referenced(p);
	491	if (actcount) {
	492	vm_page_flag_set(p, PG_REFERENCED);
	493	} else if (p->flags & PG_REFERENCED) {
	494	actcount = 1;
	495	}
	496
	497	if ((p->queue != PQ_ACTIVE) &&
	498	(p->flags & PG_REFERENCED)) {
	499	vm_page_activate(p);
	500	p->act_count += actcount;
	501	vm_page_flag_clear(p, PG_REFERENCED);
	502	} else if (p->queue == PQ_ACTIVE) {
	503	if ((p->flags & PG_REFERENCED) == 0) {
	504	p->act_count -= min(p->act_count, ACT_DECLINE);
	505	if (!remove_mode && (vm_pageout_algorithm \|\| (p->act_count == 0))) {
	506	vm_page_protect(p, VM_PROT_NONE);
	507	vm_page_deactivate(p);
	508	} else {
	509	s = splvm();
	510	TAILQ_REMOVE(&vm_page_queues[PQ_ACTIVE].pl, p, pageq);
	511	TAILQ_INSERT_TAIL(&vm_page_queues[PQ_ACTIVE].pl, p, pageq);
	512	splx(s);
	513	}
	514	} else {
	515	vm_page_activate(p);
	516	vm_page_flag_clear(p, PG_REFERENCED);
	517	if (p->act_count < (ACT_MAX - ACT_ADVANCE))
	518	p->act_count += ACT_ADVANCE;
	519	s = splvm();
	520	TAILQ_REMOVE(&vm_page_queues[PQ_ACTIVE].pl, p, pageq);
	521	TAILQ_INSERT_TAIL(&vm_page_queues[PQ_ACTIVE].pl, p, pageq);
	522	splx(s);
	523	}
	524	} else if (p->queue == PQ_INACTIVE) {
	525	vm_page_protect(p, VM_PROT_NONE);
	526	}
	527	p = next;
	528	}
	529	object = object->backing_object;
	530	}
	531	return;
	532	}
	533
	534	/*
	535	* deactivate some number of pages in a map, try to do it fairly, but
	536	* that is really hard to do.
	537	*/
	538	static void
	539	vm_pageout_map_deactivate_pages(map, desired)
	540	vm_map_t map;
	541	vm_pindex_t desired;
	542	{
	543	vm_map_entry_t tmpe;
	544	vm_object_t obj, bigobj;
	545	int nothingwired;
	546
	547	if (lockmgr(&map->lock, LK_EXCLUSIVE \| LK_NOWAIT, (void *)0, curproc)) {
	548	return;
	549	}
	550
	551	bigobj = NULL;
	552	nothingwired = TRUE;
	553
	554	/*
	555	* first, search out the biggest object, and try to free pages from
	556	* that.
	557	*/
	558	tmpe = map->header.next;
	559	while (tmpe != &map->header) {
	560	if ((tmpe->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
	561	obj = tmpe->object.vm_object;
	562	if ((obj != NULL) && (obj->shadow_count <= 1) &&
	563	((bigobj == NULL) \|\|
	564	(bigobj->resident_page_count < obj->resident_page_count))) {
	565	bigobj = obj;
	566	}
	567	}
	568	if (tmpe->wired_count > 0)
	569	nothingwired = FALSE;
	570	tmpe = tmpe->next;
	571	}
	572
	573	if (bigobj)
	574	vm_pageout_object_deactivate_pages(map, bigobj, desired, 0);
	575
	576	/*
	577	* Next, hunt around for other pages to deactivate. We actually
	578	* do this search sort of wrong -- .text first is not the best idea.
	579	*/
	580	tmpe = map->header.next;
	581	while (tmpe != &map->header) {
	582	if (pmap_resident_count(vm_map_pmap(map)) <= desired)
	583	break;
	584	if ((tmpe->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
	585	obj = tmpe->object.vm_object;
	586	if (obj)
	587	vm_pageout_object_deactivate_pages(map, obj, desired, 0);
	588	}
	589	tmpe = tmpe->next;
	590	};
	591
	592	/*
	593	* Remove all mappings if a process is swapped out, this will free page
	594	* table pages.
	595	*/
	596	if (desired == 0 && nothingwired)
	597	pmap_remove(vm_map_pmap(map),
	598	VM_MIN_ADDRESS, VM_MAXUSER_ADDRESS);
	599	vm_map_unlock(map);
	600	return;
	601	}
	602	#endif
	603
	604	/*
	605	* Don't try to be fancy - being fancy can lead to VOP_LOCK's and therefore
	606	* to vnode deadlocks. We only do it for OBJT_DEFAULT and OBJT_SWAP objects
	607	* which we know can be trivially freed.
	608	*/
	609
	610	void
	611	vm_pageout_page_free(vm_page_t m) {
	612	vm_object_t object = m->object;
	613	int type = object->type;
	614
	615	if (type == OBJT_SWAP \|\| type == OBJT_DEFAULT)
	616	vm_object_reference(object);
	617	vm_page_busy(m);
	618	vm_page_protect(m, VM_PROT_NONE);
	619	vm_page_free(m);
	620	if (type == OBJT_SWAP \|\| type == OBJT_DEFAULT)
	621	vm_object_deallocate(object);
	622	}
	623
	624	/*
	625	* vm_pageout_scan does the dirty work for the pageout daemon.
	626	*/
	627	static void
	628	vm_pageout_scan(int pass)
	629	{
	630	vm_page_t m, next;
	631	struct vm_page marker;
	632	int page_shortage, maxscan, pcount;
	633	int addl_page_shortage, addl_page_shortage_init;
	634	struct proc p, bigproc;
	635	vm_offset_t size, bigsize;
	636	vm_object_t object;
	637	int actcount;
	638	int vnodes_skipped = 0;
	639	int maxlaunder;
	640	int s;
	641
	642	/*
	643	* Do whatever cleanup that the pmap code can.
	644	*/
	645	pmap_collect();
	646
	647	addl_page_shortage_init = vm_pageout_deficit;
	648	vm_pageout_deficit = 0;
	649
	650	/*
	651	* Calculate the number of pages we want to either free or move
	652	* to the cache.
	653	*/
	654	page_shortage = vm_paging_target() + addl_page_shortage_init;
	655
	656	/*
	657	* Initialize our marker
	658	*/
	659	bzero(&marker, sizeof(marker));
	660	marker.flags = PG_BUSY \| PG_FICTITIOUS \| PG_MARKER;
	661	marker.queue = PQ_INACTIVE;
	662	marker.wire_count = 1;
	663
	664	/*
	665	* Start scanning the inactive queue for pages we can move to the
	666	* cache or free. The scan will stop when the target is reached or
	667	* we have scanned the entire inactive queue. Note that m->act_count
	668	* is not used to form decisions for the inactive queue, only for the
	669	* active queue.
	670	*
	671	* maxlaunder limits the number of dirty pages we flush per scan.
	672	* For most systems a smaller value (16 or 32) is more robust under
	673	* extreme memory and disk pressure because any unnecessary writes
	674	* to disk can result in extreme performance degredation. However,
	675	* systems with excessive dirty pages (especially when MAP_NOSYNC is
	676	* used) will die horribly with limited laundering. If the pageout
	677	* daemon cannot clean enough pages in the first pass, we let it go
	678	* all out in succeeding passes.
	679	*/
	680	if ((maxlaunder = vm_max_launder) <= 1)
	681	maxlaunder = 1;
	682	if (pass)
	683	maxlaunder = 10000;
	684
	685	rescan0:
	686	addl_page_shortage = addl_page_shortage_init;
	687	maxscan = cnt.v_inactive_count;
	688	for (m = TAILQ_FIRST(&vm_page_queues[PQ_INACTIVE].pl);
	689	m != NULL && maxscan-- > 0 && page_shortage > 0;
	690	m = next) {
	691
	692	cnt.v_pdpages++;
	693
	694	if (m->queue != PQ_INACTIVE) {
	695	goto rescan0;
	696	}
	697
	698	next = TAILQ_NEXT(m, pageq);
	699
	700	/*
	701	* skip marker pages
	702	*/
	703	if (m->flags & PG_MARKER)
	704	continue;
	705
	706	/*
	707	* A held page may be undergoing I/O, so skip it.
	708	*/
	709	if (m->hold_count) {
	710	s = splvm();
	711	TAILQ_REMOVE(&vm_page_queues[PQ_INACTIVE].pl, m, pageq);
	712	TAILQ_INSERT_TAIL(&vm_page_queues[PQ_INACTIVE].pl, m, pageq);
	713	splx(s);
	714	addl_page_shortage++;
	715	continue;
	716	}
	717	/*
	718	* Dont mess with busy pages, keep in the front of the
	719	* queue, most likely are being paged out.
	720	*/
	721	if (m->busy \|\| (m->flags & PG_BUSY)) {
	722	addl_page_shortage++;
	723	continue;
	724	}
	725
	726	/*
	727	* If the object is not being used, we ignore previous
	728	* references.
	729	*/
	730	if (m->object->ref_count == 0) {
	731	vm_page_flag_clear(m, PG_REFERENCED);
	732	pmap_clear_reference(m);
	733
	734	/*
	735	* Otherwise, if the page has been referenced while in the
	736	* inactive queue, we bump the "activation count" upwards,
	737	* making it less likely that the page will be added back to
	738	* the inactive queue prematurely again. Here we check the
	739	* page tables (or emulated bits, if any), given the upper
	740	* level VM system not knowing anything about existing
	741	* references.
	742	*/
	743	} else if (((m->flags & PG_REFERENCED) == 0) &&
	744	(actcount = pmap_ts_referenced(m))) {
	745	vm_page_activate(m);
	746	m->act_count += (actcount + ACT_ADVANCE);
	747	continue;
	748	}
	749
	750	/*
	751	* If the upper level VM system knows about any page
	752	* references, we activate the page. We also set the
	753	* "activation count" higher than normal so that we will less
	754	* likely place pages back onto the inactive queue again.
	755	*/
	756	if ((m->flags & PG_REFERENCED) != 0) {
	757	vm_page_flag_clear(m, PG_REFERENCED);
	758	actcount = pmap_ts_referenced(m);
	759	vm_page_activate(m);
	760	m->act_count += (actcount + ACT_ADVANCE + 1);
	761	continue;
	762	}
	763
	764	/*
	765	* If the upper level VM system doesn't know anything about
	766	* the page being dirty, we have to check for it again. As
	767	* far as the VM code knows, any partially dirty pages are
	768	* fully dirty.
	769	*/
	770	if (m->dirty == 0) {
	771	vm_page_test_dirty(m);
	772	} else {
	773	vm_page_dirty(m);
	774	}
	775
	776	/*
	777	* Invalid pages can be easily freed
	778	*/
	779	if (m->valid == 0) {
	780	vm_pageout_page_free(m);
	781	cnt.v_dfree++;
	782	--page_shortage;
	783
	784	/*
	785	* Clean pages can be placed onto the cache queue. This
	786	* effectively frees them.
	787	*/
	788	} else if (m->dirty == 0) {
	789	/*
	790	* Clean pages can be immediately freed to the cache.
	791	*/
	792	vm_page_cache(m);
	793	--page_shortage;
	794	} else if ((m->flags & PG_WINATCFLS) == 0 && pass == 0) {
	795	/*
	796	* Dirty pages need to be paged out, but flushing
	797	* a page is extremely expensive verses freeing
	798	* a clean page. Rather then artificially limiting
	799	* the number of pages we can flush, we instead give
	800	* dirty pages extra priority on the inactive queue
	801	* by forcing them to be cycled through the queue
	802	* twice before being flushed, after which the
	803	* (now clean) page will cycle through once more
	804	* before being freed. This significantly extends
	805	* the thrash point for a heavily loaded machine.
	806	*/
	807	s = splvm();
	808	vm_page_flag_set(m, PG_WINATCFLS);
	809	TAILQ_REMOVE(&vm_page_queues[PQ_INACTIVE].pl, m, pageq);
	810	TAILQ_INSERT_TAIL(&vm_page_queues[PQ_INACTIVE].pl, m, pageq);
	811	splx(s);
	812	} else if (maxlaunder > 0) {
	813	/*
	814	* We always want to try to flush some dirty pages if
	815	* we encounter them, to keep the system stable.
	816	* Normally this number is small, but under extreme
	817	* pressure where there are insufficient clean pages
	818	* on the inactive queue, we may have to go all out.
	819	*/
	820	int swap_pageouts_ok;
	821	struct vnode *vp = NULL;
	822
	823	object = m->object;
	824
	825	if ((object->type != OBJT_SWAP) && (object->type != OBJT_DEFAULT)) {
	826	swap_pageouts_ok = 1;
	827	} else {
	828	swap_pageouts_ok = !(defer_swap_pageouts \|\| disable_swap_pageouts);
	829	swap_pageouts_ok \|= (!disable_swap_pageouts && defer_swap_pageouts &&
	830	vm_page_count_min());
	831
	832	}
	833
	834	/*
	835	* We don't bother paging objects that are "dead".
	836	* Those objects are in a "rundown" state.
	837	*/
	838	if (!swap_pageouts_ok \|\| (object->flags & OBJ_DEAD)) {
	839	s = splvm();
	840	TAILQ_REMOVE(&vm_page_queues[PQ_INACTIVE].pl, m, pageq);
	841	TAILQ_INSERT_TAIL(&vm_page_queues[PQ_INACTIVE].pl, m, pageq);
	842	splx(s);
	843	continue;
	844	}
	845
	846	/*
	847	* The object is already known NOT to be dead. It
	848	* is possible for the vget() to block the whole
	849	* pageout daemon, but the new low-memory handling
	850	* code should prevent it.
	851	*
	852	* The previous code skipped locked vnodes and, worse,
	853	* reordered pages in the queue. This results in
	854	* completely non-deterministic operation because,
	855	* quite often, a vm_fault has initiated an I/O and
	856	* is holding a locked vnode at just the point where
	857	* the pageout daemon is woken up.
	858	*
	859	* We can't wait forever for the vnode lock, we might
	860	* deadlock due to a vn_read() getting stuck in
	861	* vm_wait while holding this vnode. We skip the
	862	* vnode if we can't get it in a reasonable amount
	863	* of time.
	864	*/
	865
	866	if (object->type == OBJT_VNODE) {
	867	vp = object->handle;
	868
	869	if (vget(vp, LK_EXCLUSIVE\|LK_NOOBJ\|LK_TIMELOCK, curproc)) {
	870	++pageout_lock_miss;
	871	if (object->flags & OBJ_MIGHTBEDIRTY)
	872	vnodes_skipped++;
	873	continue;
	874	}
	875
	876	/*
	877	* The page might have been moved to another
	878	* queue during potential blocking in vget()
	879	* above. The page might have been freed and
	880	* reused for another vnode. The object might
	881	* have been reused for another vnode.
	882	*/
	883	if (m->queue != PQ_INACTIVE \|\|
	884	m->object != object \|\|
	885	object->handle != vp) {
	886	if (object->flags & OBJ_MIGHTBEDIRTY)
	887	vnodes_skipped++;
	888	vput(vp);
	889	continue;
	890	}
	891
	892	/*
	893	* The page may have been busied during the
	894	* blocking in vput(); We don't move the
	895	* page back onto the end of the queue so that
	896	* statistics are more correct if we don't.
	897	*/
	898	if (m->busy \|\| (m->flags & PG_BUSY)) {
	899	vput(vp);
	900	continue;
	901	}
	902
	903	/*
	904	* If the page has become held it might
	905	* be undergoing I/O, so skip it
	906	*/
	907	if (m->hold_count) {
	908	s = splvm();
	909	TAILQ_REMOVE(&vm_page_queues[PQ_INACTIVE].pl, m, pageq);
	910	TAILQ_INSERT_TAIL(&vm_page_queues[PQ_INACTIVE].pl, m, pageq);
	911	splx(s);
	912	if (object->flags & OBJ_MIGHTBEDIRTY)
	913	vnodes_skipped++;
	914	vput(vp);
	915	continue;
	916	}
	917	}
	918
	919	/*
	920	* If a page is dirty, then it is either being washed
	921	* (but not yet cleaned) or it is still in the
	922	* laundry. If it is still in the laundry, then we
	923	* start the cleaning operation.
	924	*
	925	* This operation may cluster, invalidating the 'next'
	926	* pointer. To prevent an inordinate number of
	927	* restarts we use our marker to remember our place.
	928	*
	929	* decrement page_shortage on success to account for
	930	* the (future) cleaned page. Otherwise we could wind
	931	* up laundering or cleaning too many pages.
	932	*/
	933	s = splvm();
	934	TAILQ_INSERT_AFTER(&vm_page_queues[PQ_INACTIVE].pl, m, &marker, pageq);
	935	splx(s);
	936	if (vm_pageout_clean(m) != 0) {
	937	--page_shortage;
	938	--maxlaunder;
	939	}
	940	s = splvm();
	941	next = TAILQ_NEXT(&marker, pageq);
	942	TAILQ_REMOVE(&vm_page_queues[PQ_INACTIVE].pl, &marker, pageq);
	943	splx(s);
	944	if (vp != NULL)
	945	vput(vp);
	946	}
	947	}
	948
	949	/*
	950	* Compute the number of pages we want to try to move from the
	951	* active queue to the inactive queue.
	952	*/
	953	page_shortage = vm_paging_target() +
	954	cnt.v_inactive_target - cnt.v_inactive_count;
	955	page_shortage += addl_page_shortage;
	956
	957	/*
	958	* Scan the active queue for things we can deactivate. We nominally
	959	* track the per-page activity counter and use it to locate
	960	* deactivation candidates.
	961	*/
	962
	963	pcount = cnt.v_active_count;
	964	m = TAILQ_FIRST(&vm_page_queues[PQ_ACTIVE].pl);
	965
	966	while ((m != NULL) && (pcount-- > 0) && (page_shortage > 0)) {
	967
	968	/*
	969	* This is a consistency check, and should likely be a panic
	970	* or warning.
	971	*/
	972	if (m->queue != PQ_ACTIVE) {
	973	break;
	974	}
	975
	976	next = TAILQ_NEXT(m, pageq);
	977	/*
	978	* Don't deactivate pages that are busy.
	979	*/
	980	if ((m->busy != 0) \|\|
	981	(m->flags & PG_BUSY) \|\|
	982	(m->hold_count != 0)) {
	983	s = splvm();
	984	TAILQ_REMOVE(&vm_page_queues[PQ_ACTIVE].pl, m, pageq);
	985	TAILQ_INSERT_TAIL(&vm_page_queues[PQ_ACTIVE].pl, m, pageq);
	986	splx(s);
	987	m = next;
	988	continue;
	989	}
	990
	991	/*
	992	* The count for pagedaemon pages is done after checking the
	993	* page for eligibility...
	994	*/
	995	cnt.v_pdpages++;
	996
	997	/*
	998	* Check to see "how much" the page has been used.
	999	*/
	1000	actcount = 0;
	1001	if (m->object->ref_count != 0) {
	1002	if (m->flags & PG_REFERENCED) {
	1003	actcount += 1;
	1004	}
	1005	actcount += pmap_ts_referenced(m);
	1006	if (actcount) {
	1007	m->act_count += ACT_ADVANCE + actcount;
	1008	if (m->act_count > ACT_MAX)
	1009	m->act_count = ACT_MAX;
	1010	}
	1011	}
	1012
	1013	/*
	1014	* Since we have "tested" this bit, we need to clear it now.
	1015	*/
	1016	vm_page_flag_clear(m, PG_REFERENCED);
	1017
	1018	/*
	1019	* Only if an object is currently being used, do we use the
	1020	* page activation count stats.
	1021	*/
	1022	if (actcount && (m->object->ref_count != 0)) {
	1023	s = splvm();
	1024	TAILQ_REMOVE(&vm_page_queues[PQ_ACTIVE].pl, m, pageq);
	1025	TAILQ_INSERT_TAIL(&vm_page_queues[PQ_ACTIVE].pl, m, pageq);
	1026	splx(s);
	1027	} else {
	1028	m->act_count -= min(m->act_count, ACT_DECLINE);
	1029	if (vm_pageout_algorithm \|\|
	1030	m->object->ref_count == 0 \|\|
	1031	m->act_count == 0) {
	1032	page_shortage--;
	1033	if (m->object->ref_count == 0) {
	1034	vm_page_protect(m, VM_PROT_NONE);
	1035	if (m->dirty == 0)
	1036	vm_page_cache(m);
	1037	else
	1038	vm_page_deactivate(m);
	1039	} else {
	1040	vm_page_deactivate(m);
	1041	}
	1042	} else {
	1043	s = splvm();
	1044	TAILQ_REMOVE(&vm_page_queues[PQ_ACTIVE].pl, m, pageq);
	1045	TAILQ_INSERT_TAIL(&vm_page_queues[PQ_ACTIVE].pl, m, pageq);
	1046	splx(s);
	1047	}
	1048	}
	1049	m = next;
	1050	}
	1051
	1052	s = splvm();
	1053
	1054	/*
	1055	* We try to maintain some really free pages, this allows interrupt
	1056	* code to be guaranteed space. Since both cache and free queues
	1057	* are considered basically 'free', moving pages from cache to free
	1058	* does not effect other calculations.
	1059	*/
	1060
	1061	while (cnt.v_free_count < cnt.v_free_reserved) {
	1062	static int cache_rover = 0;
	1063	m = vm_page_list_find(PQ_CACHE, cache_rover, FALSE);
	1064	if (!m)
	1065	break;
	1066	if ((m->flags & (PG_BUSY\|PG_UNMANAGED)) \|\|
	1067	m->busy \|\|
	1068	m->hold_count \|\|
	1069	m->wire_count) {
	1070	#ifdef INVARIANTS
	1071	printf("Warning: busy page %p found in cache\n", m);
	1072	#endif
	1073	vm_page_deactivate(m);
	1074	continue;
	1075	}
	1076	cache_rover = (cache_rover + PQ_PRIME2) & PQ_L2_MASK;
	1077	vm_pageout_page_free(m);
	1078	cnt.v_dfree++;
	1079	}
	1080	splx(s);
	1081
	1082	#if !defined(NO_SWAPPING)
	1083	/*
	1084	* Idle process swapout -- run once per second.
	1085	*/
	1086	if (vm_swap_idle_enabled) {
	1087	static long lsec;
	1088	if (time_second != lsec) {
	1089	vm_pageout_req_swapout \|= VM_SWAP_IDLE;
	1090	vm_req_vmdaemon();
	1091	lsec = time_second;
	1092	}
	1093	}
	1094	#endif
	1095
	1096	/*
	1097	* If we didn't get enough free pages, and we have skipped a vnode
	1098	* in a writeable object, wakeup the sync daemon. And kick swapout
	1099	* if we did not get enough free pages.
	1100	*/
	1101	if (vm_paging_target() > 0) {
	1102	if (vnodes_skipped && vm_page_count_min())
	1103	(void) speedup_syncer();
	1104	#if !defined(NO_SWAPPING)
	1105	if (vm_swap_enabled && vm_page_count_target()) {
	1106	vm_req_vmdaemon();
	1107	vm_pageout_req_swapout \|= VM_SWAP_NORMAL;
	1108	}
	1109	#endif
	1110	}
	1111
	1112	/*
	1113	* If we are out of swap and were not able to reach our paging
	1114	* target, kill the largest process.
	1115	*/
	1116	if ((vm_swap_size < 64 && vm_page_count_min()) \|\|
	1117	(swap_pager_full && vm_paging_target() > 0)) {
	1118	#if 0
	1119	if ((vm_swap_size < 64 \|\| swap_pager_full) && vm_page_count_min()) {
	1120	#endif
	1121	bigproc = NULL;
	1122	bigsize = 0;
	1123	for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
	1124	/*
	1125	* if this is a system process, skip it
	1126	*/
	1127	if ((p->p_flag & P_SYSTEM) \|\| (p->p_pid == 1) \|\|
	1128	((p->p_pid < 48) && (vm_swap_size != 0))) {
	1129	continue;
	1130	}
	1131	/*
	1132	* if the process is in a non-running type state,
	1133	* don't touch it.
	1134	*/
	1135	if (p->p_stat != SRUN && p->p_stat != SSLEEP) {
	1136	continue;
	1137	}
	1138	/*
	1139	* get the process size
	1140	*/
	1141	size = vmspace_resident_count(p->p_vmspace) +
	1142	vmspace_swap_count(p->p_vmspace);
	1143	/*
	1144	* if the this process is bigger than the biggest one
	1145	* remember it.
	1146	*/
	1147	if (size > bigsize) {
	1148	bigproc = p;
	1149	bigsize = size;
	1150	}
	1151	}
	1152	if (bigproc != NULL) {
	1153	killproc(bigproc, "out of swap space");
	1154	bigproc->p_estcpu = 0;
	1155	bigproc->p_nice = PRIO_MIN;
	1156	resetpriority(bigproc);
	1157	wakeup(&cnt.v_free_count);
	1158	}
	1159	}
	1160	}
	1161
	1162	/*
	1163	* This routine tries to maintain the pseudo LRU active queue,
	1164	* so that during long periods of time where there is no paging,
	1165	* that some statistic accumulation still occurs. This code
	1166	* helps the situation where paging just starts to occur.
	1167	*/
	1168	static void
	1169	vm_pageout_page_stats()
	1170	{
	1171	int s;
	1172	vm_page_t m,next;
	1173	int pcount,tpcount; /* Number of pages to check */
	1174	static int fullintervalcount = 0;
	1175	int page_shortage;
	1176	int s0;
	1177
	1178	page_shortage =
	1179	(cnt.v_inactive_target + cnt.v_cache_max + cnt.v_free_min) -
	1180	(cnt.v_free_count + cnt.v_inactive_count + cnt.v_cache_count);
	1181
	1182	if (page_shortage <= 0)
	1183	return;
	1184
	1185	s0 = splvm();
	1186
	1187	pcount = cnt.v_active_count;
	1188	fullintervalcount += vm_pageout_stats_interval;
	1189	if (fullintervalcount < vm_pageout_full_stats_interval) {
	1190	tpcount = (vm_pageout_stats_max * cnt.v_active_count) / cnt.v_page_count;
	1191	if (pcount > tpcount)
	1192	pcount = tpcount;
	1193	} else {
	1194	fullintervalcount = 0;
	1195	}
	1196
	1197	m = TAILQ_FIRST(&vm_page_queues[PQ_ACTIVE].pl);
	1198	while ((m != NULL) && (pcount-- > 0)) {
	1199	int actcount;
	1200
	1201	if (m->queue != PQ_ACTIVE) {
	1202	break;
	1203	}
	1204
	1205	next = TAILQ_NEXT(m, pageq);
	1206	/*
	1207	* Don't deactivate pages that are busy.
	1208	*/
	1209	if ((m->busy != 0) \|\|
	1210	(m->flags & PG_BUSY) \|\|
	1211	(m->hold_count != 0)) {
	1212	s = splvm();
	1213	TAILQ_REMOVE(&vm_page_queues[PQ_ACTIVE].pl, m, pageq);
	1214	TAILQ_INSERT_TAIL(&vm_page_queues[PQ_ACTIVE].pl, m, pageq);
	1215	splx(s);
	1216	m = next;
	1217	continue;
	1218	}
	1219
	1220	actcount = 0;
	1221	if (m->flags & PG_REFERENCED) {
	1222	vm_page_flag_clear(m, PG_REFERENCED);
	1223	actcount += 1;
	1224	}
	1225
	1226	actcount += pmap_ts_referenced(m);
	1227	if (actcount) {
	1228	m->act_count += ACT_ADVANCE + actcount;
	1229	if (m->act_count > ACT_MAX)
	1230	m->act_count = ACT_MAX;
	1231	s = splvm();
	1232	TAILQ_REMOVE(&vm_page_queues[PQ_ACTIVE].pl, m, pageq);
	1233	TAILQ_INSERT_TAIL(&vm_page_queues[PQ_ACTIVE].pl, m, pageq);
	1234	splx(s);
	1235	} else {
	1236	if (m->act_count == 0) {
	1237	/*
	1238	* We turn off page access, so that we have
	1239	* more accurate RSS stats. We don't do this
	1240	* in the normal page deactivation when the
	1241	* system is loaded VM wise, because the
	1242	* cost of the large number of page protect
	1243	* operations would be higher than the value
	1244	* of doing the operation.
	1245	*/
	1246	vm_page_protect(m, VM_PROT_NONE);
	1247	vm_page_deactivate(m);
	1248	} else {
	1249	m->act_count -= min(m->act_count, ACT_DECLINE);
	1250	s = splvm();
	1251	TAILQ_REMOVE(&vm_page_queues[PQ_ACTIVE].pl, m, pageq);
	1252	TAILQ_INSERT_TAIL(&vm_page_queues[PQ_ACTIVE].pl, m, pageq);
	1253	splx(s);
	1254	}
	1255	}
	1256
	1257	m = next;
	1258	}
	1259	splx(s0);
	1260	}
	1261
	1262	static int
	1263	vm_pageout_free_page_calc(count)
	1264	vm_size_t count;
	1265	{
	1266	if (count < cnt.v_page_count)
	1267	return 0;
	1268	/*
	1269	* free_reserved needs to include enough for the largest swap pager
	1270	* structures plus enough for any pv_entry structs when paging.
	1271	*/
	1272	if (cnt.v_page_count > 1024)
	1273	cnt.v_free_min = 4 + (cnt.v_page_count - 1024) / 200;
	1274	else
	1275	cnt.v_free_min = 4;
	1276	cnt.v_pageout_free_min = (2*MAXBSIZE)/PAGE_SIZE +
	1277	cnt.v_interrupt_free_min;
	1278	cnt.v_free_reserved = vm_pageout_page_count +
	1279	cnt.v_pageout_free_min + (count / 768) + PQ_L2_SIZE;
	1280	cnt.v_free_severe = cnt.v_free_min / 2;
	1281	cnt.v_free_min += cnt.v_free_reserved;
	1282	cnt.v_free_severe += cnt.v_free_reserved;
	1283	return 1;
	1284	}
	1285
	1286
	1287	/*
	1288	* vm_pageout is the high level pageout daemon.
	1289	*/
	1290	static void
	1291	vm_pageout()
	1292	{
	1293	int pass;
	1294
	1295	/*
	1296	* Initialize some paging parameters.
	1297	*/
	1298
	1299	cnt.v_interrupt_free_min = 2;
	1300	if (cnt.v_page_count < 2000)
	1301	vm_pageout_page_count = 8;
	1302
	1303	vm_pageout_free_page_calc(cnt.v_page_count);
	1304	/*
	1305	* v_free_target and v_cache_min control pageout hysteresis. Note
	1306	* that these are more a measure of the VM cache queue hysteresis
	1307	* then the VM free queue. Specifically, v_free_target is the
	1308	* high water mark (free+cache pages).
	1309	*
	1310	* v_free_reserved + v_cache_min (mostly means v_cache_min) is the
	1311	* low water mark, while v_free_min is the stop. v_cache_min must
	1312	* be big enough to handle memory needs while the pageout daemon
	1313	* is signalled and run to free more pages.
	1314	*/
	1315	if (cnt.v_free_count > 6144)
	1316	cnt.v_free_target = 4 * cnt.v_free_min + cnt.v_free_reserved;
	1317	else
	1318	cnt.v_free_target = 2 * cnt.v_free_min + cnt.v_free_reserved;
	1319
	1320	if (cnt.v_free_count > 2048) {
	1321	cnt.v_cache_min = cnt.v_free_target;
	1322	cnt.v_cache_max = 2 * cnt.v_cache_min;
	1323	cnt.v_inactive_target = (3 * cnt.v_free_target) / 2;
	1324	} else {
	1325	cnt.v_cache_min = 0;
	1326	cnt.v_cache_max = 0;
	1327	cnt.v_inactive_target = cnt.v_free_count / 4;
	1328	}
	1329	if (cnt.v_inactive_target > cnt.v_free_count / 3)
	1330	cnt.v_inactive_target = cnt.v_free_count / 3;
	1331
	1332	/* XXX does not really belong here */
	1333	if (vm_page_max_wired == 0)
	1334	vm_page_max_wired = cnt.v_free_count / 3;
	1335
	1336	if (vm_pageout_stats_max == 0)
	1337	vm_pageout_stats_max = cnt.v_free_target;
	1338
	1339	/*
	1340	* Set interval in seconds for stats scan.
	1341	*/
	1342	if (vm_pageout_stats_interval == 0)
	1343	vm_pageout_stats_interval = 5;
	1344	if (vm_pageout_full_stats_interval == 0)
	1345	vm_pageout_full_stats_interval = vm_pageout_stats_interval * 4;
	1346
	1347
	1348	/*
	1349	* Set maximum free per pass
	1350	*/
	1351	if (vm_pageout_stats_free_max == 0)
	1352	vm_pageout_stats_free_max = 5;
	1353
	1354	swap_pager_swap_init();
	1355	pass = 0;
	1356	/*
	1357	* The pageout daemon is never done, so loop forever.
	1358	*/
	1359	while (TRUE) {
	1360	int error;
	1361	int s = splvm();
	1362
	1363	/*
	1364	* If we have enough free memory, wakeup waiters. Do
	1365	* not clear vm_pages_needed until we reach our target,
	1366	* otherwise we may be woken up over and over again and
	1367	* waste a lot of cpu.
	1368	*/
	1369	if (vm_pages_needed && !vm_page_count_min()) {
	1370	if (vm_paging_needed() <= 0)
	1371	vm_pages_needed = 0;
	1372	wakeup(&cnt.v_free_count);
	1373	}
	1374	if (vm_pages_needed) {
	1375	/*
	1376	* Still not done, take a second pass without waiting
	1377	* (unlimited dirty cleaning), otherwise sleep a bit
	1378	* and try again.
	1379	*/
	1380	++pass;
	1381	if (pass > 1)
	1382	tsleep(&vm_pages_needed, PVM, "psleep", hz/2);
	1383	} else {
	1384	/*
	1385	* Good enough, sleep & handle stats. Prime the pass
	1386	* for the next run.
	1387	*/
	1388	if (pass > 1)
	1389	pass = 1;
	1390	else
	1391	pass = 0;
	1392	error = tsleep(&vm_pages_needed,
	1393	PVM, "psleep", vm_pageout_stats_interval * hz);
	1394	if (error && !vm_pages_needed) {
	1395	splx(s);
	1396	pass = 0;
	1397	vm_pageout_page_stats();
	1398	continue;
	1399	}
	1400	}
	1401
	1402	if (vm_pages_needed)
	1403	cnt.v_pdwakeups++;
	1404	splx(s);
	1405	vm_pageout_scan(pass);
	1406	vm_pageout_deficit = 0;
	1407	}
	1408	}
	1409
	1410	void
	1411	pagedaemon_wakeup()
	1412	{
	1413	if (!vm_pages_needed && curproc != pageproc) {
	1414	vm_pages_needed++;
	1415	wakeup(&vm_pages_needed);
	1416	}
	1417	}
	1418
	1419	#if !defined(NO_SWAPPING)
	1420	static void
	1421	vm_req_vmdaemon()
	1422	{
	1423	static int lastrun = 0;
	1424
	1425	if ((ticks > (lastrun + hz)) \|\| (ticks < lastrun)) {
	1426	wakeup(&vm_daemon_needed);
	1427	lastrun = ticks;
	1428	}
	1429	}
	1430
	1431	static void
	1432	vm_daemon()
	1433	{
	1434	struct proc *p;
	1435
	1436	while (TRUE) {
	1437	tsleep(&vm_daemon_needed, PPAUSE, "psleep", 0);
	1438	if (vm_pageout_req_swapout) {
	1439	swapout_procs(vm_pageout_req_swapout);
	1440	vm_pageout_req_swapout = 0;
	1441	}
	1442	/*
	1443	* scan the processes for exceeding their rlimits or if
	1444	* process is swapped out -- deactivate pages
	1445	*/
	1446
	1447	for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
	1448	vm_pindex_t limit, size;
	1449
	1450	/*
	1451	* if this is a system process or if we have already
	1452	* looked at this process, skip it.
	1453	*/
	1454	if (p->p_flag & (P_SYSTEM \| P_WEXIT)) {
	1455	continue;
	1456	}
	1457	/*
	1458	* if the process is in a non-running type state,
	1459	* don't touch it.
	1460	*/
	1461	if (p->p_stat != SRUN && p->p_stat != SSLEEP) {
	1462	continue;
	1463	}
	1464	/*
	1465	* get a limit
	1466	*/
	1467	limit = OFF_TO_IDX(
	1468	qmin(p->p_rlimit[RLIMIT_RSS].rlim_cur,
	1469	p->p_rlimit[RLIMIT_RSS].rlim_max));
	1470
	1471	/*
	1472	* let processes that are swapped out really be
	1473	* swapped out set the limit to nothing (will force a
	1474	* swap-out.)
	1475	*/
	1476	if ((p->p_flag & P_INMEM) == 0)
	1477	limit = 0; /* XXX */
	1478
	1479	size = vmspace_resident_count(p->p_vmspace);
	1480	if (limit >= 0 && size >= limit) {
	1481	vm_pageout_map_deactivate_pages(
	1482	&p->p_vmspace->vm_map, limit);
	1483	}
	1484	}
	1485	}
	1486	}
	1487	#endif