gitweb.dragonflybsd.org Git - dragonfly.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 1991 Regents of the University of California.
	3	* All rights reserved.
	4	* Copyright (c) 1994 John S. Dyson
	5	* All rights reserved.
	6	* Copyright (c) 1994 David Greenman
	7	* All rights reserved.
	8	*
	9	* This code is derived from software contributed to Berkeley by
	10	* The Mach Operating System project at Carnegie-Mellon University.
	11	*
	12	* Redistribution and use in source and binary forms, with or without
	13	* modification, are permitted provided that the following conditions
	14	* are met:
	15	* 1. Redistributions of source code must retain the above copyright
	16	* notice, this list of conditions and the following disclaimer.
	17	* 2. Redistributions in binary form must reproduce the above copyright
	18	* notice, this list of conditions and the following disclaimer in the
	19	* documentation and/or other materials provided with the distribution.
	20	* 3. All advertising materials mentioning features or use of this software
	21	* must display the following acknowledgement:
	22	* This product includes software developed by the University of
	23	* California, Berkeley and its contributors.
	24	* 4. Neither the name of the University nor the names of its contributors
	25	* may be used to endorse or promote products derived from this software
	26	* without specific prior written permission.
	27	*
	28	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	29	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	30	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	31	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	32	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	33	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	34	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	35	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	36	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	37	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	38	* SUCH DAMAGE.
	39	*
	40	* from: @(#)vm_pageout.c 7.4 (Berkeley) 5/7/91
	41	*
	42	*
	43	* Copyright (c) 1987, 1990 Carnegie-Mellon University.
	44	* All rights reserved.
	45	*
	46	* Authors: Avadis Tevanian, Jr., Michael Wayne Young
	47	*
	48	* Permission to use, copy, modify and distribute this software and
	49	* its documentation is hereby granted, provided that both the copyright
	50	* notice and this permission notice appear in all copies of the
	51	* software, derivative works or modified versions, and any portions
	52	* thereof, and that both notices appear in supporting documentation.
	53	*
	54	* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
	55	* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
	56	* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
	57	*
	58	* Carnegie Mellon requests users of this software to return to
	59	*
	60	* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
	61	* School of Computer Science
	62	* Carnegie Mellon University
	63	* Pittsburgh PA 15213-3890
	64	*
	65	* any improvements or extensions that they make and grant Carnegie the
	66	* rights to redistribute these changes.
	67	*
	68	* $FreeBSD: src/sys/vm/vm_pageout.c,v 1.151.2.15 2002/12/29 18:21:04 dillon Exp $
	69	* $DragonFly: src/sys/vm/vm_pageout.c,v 1.10 2004/03/23 22:54:32 dillon Exp $
	70	*/
	71
	72	/*
	73	* The proverbial page-out daemon.
	74	*/
	75
	76	#include "opt_vm.h"
	77	#include <sys/param.h>
	78	#include <sys/systm.h>
	79	#include <sys/kernel.h>
	80	#include <sys/proc.h>
	81	#include <sys/kthread.h>
	82	#include <sys/resourcevar.h>
	83	#include <sys/signalvar.h>
	84	#include <sys/vnode.h>
	85	#include <sys/vmmeter.h>
	86	#include <sys/sysctl.h>
	87
	88	#include <vm/vm.h>
	89	#include <vm/vm_param.h>
	90	#include <sys/lock.h>
	91	#include <vm/vm_object.h>
	92	#include <vm/vm_page.h>
	93	#include <vm/vm_map.h>
	94	#include <vm/vm_pageout.h>
	95	#include <vm/vm_pager.h>
	96	#include <vm/swap_pager.h>
	97	#include <vm/vm_extern.h>
	98	#include <vm/vm_page2.h>
	99
	100	/*
	101	* System initialization
	102	*/
	103
	104	/* the kernel process "vm_pageout"*/
	105	static void vm_pageout (void);
	106	static int vm_pageout_clean (vm_page_t);
	107	static void vm_pageout_scan (int pass);
	108	static int vm_pageout_free_page_calc (vm_size_t count);
	109	struct thread *pagethread;
	110
	111	static struct kproc_desc page_kp = {
	112	"pagedaemon",
	113	vm_pageout,
	114	&pagethread
	115	};
	116	SYSINIT(pagedaemon, SI_SUB_KTHREAD_PAGE, SI_ORDER_FIRST, kproc_start, &page_kp)
	117
	118	#if !defined(NO_SWAPPING)
	119	/* the kernel process "vm_daemon"*/
	120	static void vm_daemon (void);
	121	static struct thread *vmthread;
	122
	123	static struct kproc_desc vm_kp = {
	124	"vmdaemon",
	125	vm_daemon,
	126	&vmthread
	127	};
	128	SYSINIT(vmdaemon, SI_SUB_KTHREAD_VM, SI_ORDER_FIRST, kproc_start, &vm_kp)
	129	#endif
	130
	131
	132	int vm_pages_needed=0; /* Event on which pageout daemon sleeps */
	133	int vm_pageout_deficit=0; /* Estimated number of pages deficit */
	134	int vm_pageout_pages_needed=0; /* flag saying that the pageout daemon needs pages */
	135
	136	#if !defined(NO_SWAPPING)
	137	static int vm_pageout_req_swapout; /* XXX */
	138	static int vm_daemon_needed;
	139	#endif
	140	extern int vm_swap_size;
	141	static int vm_max_launder = 32;
	142	static int vm_pageout_stats_max=0, vm_pageout_stats_interval = 0;
	143	static int vm_pageout_full_stats_interval = 0;
	144	static int vm_pageout_stats_free_max=0, vm_pageout_algorithm=0;
	145	static int defer_swap_pageouts=0;
	146	static int disable_swap_pageouts=0;
	147
	148	#if defined(NO_SWAPPING)
	149	static int vm_swap_enabled=0;
	150	static int vm_swap_idle_enabled=0;
	151	#else
	152	static int vm_swap_enabled=1;
	153	static int vm_swap_idle_enabled=0;
	154	#endif
	155
	156	SYSCTL_INT(_vm, VM_PAGEOUT_ALGORITHM, pageout_algorithm,
	157	CTLFLAG_RW, &vm_pageout_algorithm, 0, "LRU page mgmt");
	158
	159	SYSCTL_INT(_vm, OID_AUTO, max_launder,
	160	CTLFLAG_RW, &vm_max_launder, 0, "Limit dirty flushes in pageout");
	161
	162	SYSCTL_INT(_vm, OID_AUTO, pageout_stats_max,
	163	CTLFLAG_RW, &vm_pageout_stats_max, 0, "Max pageout stats scan length");
	164
	165	SYSCTL_INT(_vm, OID_AUTO, pageout_full_stats_interval,
	166	CTLFLAG_RW, &vm_pageout_full_stats_interval, 0, "Interval for full stats scan");
	167
	168	SYSCTL_INT(_vm, OID_AUTO, pageout_stats_interval,
	169	CTLFLAG_RW, &vm_pageout_stats_interval, 0, "Interval for partial stats scan");
	170
	171	SYSCTL_INT(_vm, OID_AUTO, pageout_stats_free_max,
	172	CTLFLAG_RW, &vm_pageout_stats_free_max, 0, "Not implemented");
	173
	174	#if defined(NO_SWAPPING)
	175	SYSCTL_INT(_vm, VM_SWAPPING_ENABLED, swap_enabled,
	176	CTLFLAG_RD, &vm_swap_enabled, 0, "");
	177	SYSCTL_INT(_vm, OID_AUTO, swap_idle_enabled,
	178	CTLFLAG_RD, &vm_swap_idle_enabled, 0, "");
	179	#else
	180	SYSCTL_INT(_vm, VM_SWAPPING_ENABLED, swap_enabled,
	181	CTLFLAG_RW, &vm_swap_enabled, 0, "Enable entire process swapout");
	182	SYSCTL_INT(_vm, OID_AUTO, swap_idle_enabled,
	183	CTLFLAG_RW, &vm_swap_idle_enabled, 0, "Allow swapout on idle criteria");
	184	#endif
	185
	186	SYSCTL_INT(_vm, OID_AUTO, defer_swapspace_pageouts,
	187	CTLFLAG_RW, &defer_swap_pageouts, 0, "Give preference to dirty pages in mem");
	188
	189	SYSCTL_INT(_vm, OID_AUTO, disable_swapspace_pageouts,
	190	CTLFLAG_RW, &disable_swap_pageouts, 0, "Disallow swapout of dirty pages");
	191
	192	static int pageout_lock_miss;
	193	SYSCTL_INT(_vm, OID_AUTO, pageout_lock_miss,
	194	CTLFLAG_RD, &pageout_lock_miss, 0, "vget() lock misses during pageout");
	195
	196	#define VM_PAGEOUT_PAGE_COUNT 16
	197	int vm_pageout_page_count = VM_PAGEOUT_PAGE_COUNT;
	198
	199	int vm_page_max_wired; /* XXX max # of wired pages system-wide */
	200
	201	#if !defined(NO_SWAPPING)
	202	typedef void freeer_fcn_t (vm_map_t, vm_object_t, vm_pindex_t, int);
	203	static void vm_pageout_map_deactivate_pages (vm_map_t, vm_pindex_t);
	204	static freeer_fcn_t vm_pageout_object_deactivate_pages;
	205	static void vm_req_vmdaemon (void);
	206	#endif
	207	static void vm_pageout_page_stats(void);
	208
	209	/*
	210	* vm_pageout_clean:
	211	*
	212	* Clean the page and remove it from the laundry.
	213	*
	214	* We set the busy bit to cause potential page faults on this page to
	215	* block. Note the careful timing, however, the busy bit isn't set till
	216	* late and we cannot do anything that will mess with the page.
	217	*/
	218
	219	static int
	220	vm_pageout_clean(vm_page_t m)
	221	{
	222	vm_object_t object;
	223	vm_page_t mc[2*vm_pageout_page_count];
	224	int pageout_count;
	225	int ib, is, page_base;
	226	vm_pindex_t pindex = m->pindex;
	227
	228	object = m->object;
	229
	230	/*
	231	* It doesn't cost us anything to pageout OBJT_DEFAULT or OBJT_SWAP
	232	* with the new swapper, but we could have serious problems paging
	233	* out other object types if there is insufficient memory.
	234	*
	235	* Unfortunately, checking free memory here is far too late, so the
	236	* check has been moved up a procedural level.
	237	*/
	238
	239	/*
	240	* Don't mess with the page if it's busy, held, or special
	241	*/
	242	if ((m->hold_count != 0) \|\|
	243	((m->busy != 0) \|\| (m->flags & (PG_BUSY\|PG_UNMANAGED)))) {
	244	return 0;
	245	}
	246
	247	mc[vm_pageout_page_count] = m;
	248	pageout_count = 1;
	249	page_base = vm_pageout_page_count;
	250	ib = 1;
	251	is = 1;
	252
	253	/*
	254	* Scan object for clusterable pages.
	255	*
	256	* We can cluster ONLY if: ->> the page is NOT
	257	* clean, wired, busy, held, or mapped into a
	258	* buffer, and one of the following:
	259	* 1) The page is inactive, or a seldom used
	260	* active page.
	261	* -or-
	262	* 2) we force the issue.
	263	*
	264	* During heavy mmap/modification loads the pageout
	265	* daemon can really fragment the underlying file
	266	* due to flushing pages out of order and not trying
	267	* align the clusters (which leave sporatic out-of-order
	268	* holes). To solve this problem we do the reverse scan
	269	* first and attempt to align our cluster, then do a
	270	* forward scan if room remains.
	271	*/
	272
	273	more:
	274	while (ib && pageout_count < vm_pageout_page_count) {
	275	vm_page_t p;
	276
	277	if (ib > pindex) {
	278	ib = 0;
	279	break;
	280	}
	281
	282	if ((p = vm_page_lookup(object, pindex - ib)) == NULL) {
	283	ib = 0;
	284	break;
	285	}
	286	if (((p->queue - p->pc) == PQ_CACHE) \|\|
	287	(p->flags & (PG_BUSY\|PG_UNMANAGED)) \|\| p->busy) {
	288	ib = 0;
	289	break;
	290	}
	291	vm_page_test_dirty(p);
	292	if ((p->dirty & p->valid) == 0 \|\|
	293	p->queue != PQ_INACTIVE \|\|
	294	p->wire_count != 0 \|\| /* may be held by buf cache */
	295	p->hold_count != 0) { /* may be undergoing I/O */
	296	ib = 0;
	297	break;
	298	}
	299	mc[--page_base] = p;
	300	++pageout_count;
	301	++ib;
	302	/*
	303	* alignment boundry, stop here and switch directions. Do
	304	* not clear ib.
	305	*/
	306	if ((pindex - (ib - 1)) % vm_pageout_page_count == 0)
	307	break;
	308	}
	309
	310	while (pageout_count < vm_pageout_page_count &&
	311	pindex + is < object->size) {
	312	vm_page_t p;
	313
	314	if ((p = vm_page_lookup(object, pindex + is)) == NULL)
	315	break;
	316	if (((p->queue - p->pc) == PQ_CACHE) \|\|
	317	(p->flags & (PG_BUSY\|PG_UNMANAGED)) \|\| p->busy) {
	318	break;
	319	}
	320	vm_page_test_dirty(p);
	321	if ((p->dirty & p->valid) == 0 \|\|
	322	p->queue != PQ_INACTIVE \|\|
	323	p->wire_count != 0 \|\| /* may be held by buf cache */
	324	p->hold_count != 0) { /* may be undergoing I/O */
	325	break;
	326	}
	327	mc[page_base + pageout_count] = p;
	328	++pageout_count;
	329	++is;
	330	}
	331
	332	/*
	333	* If we exhausted our forward scan, continue with the reverse scan
	334	* when possible, even past a page boundry. This catches boundry
	335	* conditions.
	336	*/
	337	if (ib && pageout_count < vm_pageout_page_count)
	338	goto more;
	339
	340	/*
	341	* we allow reads during pageouts...
	342	*/
	343	return vm_pageout_flush(&mc[page_base], pageout_count, 0);
	344	}
	345
	346	/*
	347	* vm_pageout_flush() - launder the given pages
	348	*
	349	* The given pages are laundered. Note that we setup for the start of
	350	* I/O ( i.e. busy the page ), mark it read-only, and bump the object
	351	* reference count all in here rather then in the parent. If we want
	352	* the parent to do more sophisticated things we may have to change
	353	* the ordering.
	354	*/
	355
	356	int
	357	vm_pageout_flush(vm_page_t *mc, int count, int flags)
	358	{
	359	vm_object_t object;
	360	int pageout_status[count];
	361	int numpagedout = 0;
	362	int i;
	363
	364	/*
	365	* Initiate I/O. Bump the vm_page_t->busy counter and
	366	* mark the pages read-only.
	367	*
	368	* We do not have to fixup the clean/dirty bits here... we can
	369	* allow the pager to do it after the I/O completes.
	370	*/
	371
	372	for (i = 0; i < count; i++) {
	373	KASSERT(mc[i]->valid == VM_PAGE_BITS_ALL, ("vm_pageout_flush page %p index %d/%d: partially invalid page", mc[i], i, count));
	374	vm_page_io_start(mc[i]);
	375	vm_page_protect(mc[i], VM_PROT_READ);
	376	}
	377
	378	object = mc[0]->object;
	379	vm_object_pip_add(object, count);
	380
	381	vm_pager_put_pages(object, mc, count,
	382	(flags \| ((object == kernel_object) ? VM_PAGER_PUT_SYNC : 0)),
	383	pageout_status);
	384
	385	for (i = 0; i < count; i++) {
	386	vm_page_t mt = mc[i];
	387
	388	switch (pageout_status[i]) {
	389	case VM_PAGER_OK:
	390	numpagedout++;
	391	break;
	392	case VM_PAGER_PEND:
	393	numpagedout++;
	394	break;
	395	case VM_PAGER_BAD:
	396	/*
	397	* Page outside of range of object. Right now we
	398	* essentially lose the changes by pretending it
	399	* worked.
	400	*/
	401	pmap_clear_modify(mt);
	402	vm_page_undirty(mt);
	403	break;
	404	case VM_PAGER_ERROR:
	405	case VM_PAGER_FAIL:
	406	/*
	407	* If page couldn't be paged out, then reactivate the
	408	* page so it doesn't clog the inactive list. (We
	409	* will try paging out it again later).
	410	*/
	411	vm_page_activate(mt);
	412	break;
	413	case VM_PAGER_AGAIN:
	414	break;
	415	}
	416
	417	/*
	418	* If the operation is still going, leave the page busy to
	419	* block all other accesses. Also, leave the paging in
	420	* progress indicator set so that we don't attempt an object
	421	* collapse.
	422	*/
	423	if (pageout_status[i] != VM_PAGER_PEND) {
	424	vm_object_pip_wakeup(object);
	425	vm_page_io_finish(mt);
	426	if (!vm_page_count_severe() \|\| !vm_page_try_to_cache(mt))
	427	vm_page_protect(mt, VM_PROT_READ);
	428	}
	429	}
	430	return numpagedout;
	431	}
	432
	433	#if !defined(NO_SWAPPING)
	434	/*
	435	* vm_pageout_object_deactivate_pages
	436	*
	437	* deactivate enough pages to satisfy the inactive target
	438	* requirements or if vm_page_proc_limit is set, then
	439	* deactivate all of the pages in the object and its
	440	* backing_objects.
	441	*
	442	* The object and map must be locked.
	443	*/
	444	static void
	445	vm_pageout_object_deactivate_pages(vm_map_t map, vm_object_t object,
	446	vm_pindex_t desired, int map_remove_only)
	447	{
	448	vm_page_t p, next;
	449	int rcount;
	450	int remove_mode;
	451	int s;
	452
	453	if (object->type == OBJT_DEVICE \|\| object->type == OBJT_PHYS)
	454	return;
	455
	456	while (object) {
	457	if (pmap_resident_count(vm_map_pmap(map)) <= desired)
	458	return;
	459	if (object->paging_in_progress)
	460	return;
	461
	462	remove_mode = map_remove_only;
	463	if (object->shadow_count > 1)
	464	remove_mode = 1;
	465	/*
	466	* scan the objects entire memory queue
	467	*/
	468	rcount = object->resident_page_count;
	469	p = TAILQ_FIRST(&object->memq);
	470	while (p && (rcount-- > 0)) {
	471	int actcount;
	472	if (pmap_resident_count(vm_map_pmap(map)) <= desired)
	473	return;
	474	next = TAILQ_NEXT(p, listq);
	475	mycpu->gd_cnt.v_pdpages++;
	476	if (p->wire_count != 0 \|\|
	477	p->hold_count != 0 \|\|
	478	p->busy != 0 \|\|
	479	(p->flags & (PG_BUSY\|PG_UNMANAGED)) \|\|
	480	!pmap_page_exists_quick(vm_map_pmap(map), p)) {
	481	p = next;
	482	continue;
	483	}
	484
	485	actcount = pmap_ts_referenced(p);
	486	if (actcount) {
	487	vm_page_flag_set(p, PG_REFERENCED);
	488	} else if (p->flags & PG_REFERENCED) {
	489	actcount = 1;
	490	}
	491
	492	if ((p->queue != PQ_ACTIVE) &&
	493	(p->flags & PG_REFERENCED)) {
	494	vm_page_activate(p);
	495	p->act_count += actcount;
	496	vm_page_flag_clear(p, PG_REFERENCED);
	497	} else if (p->queue == PQ_ACTIVE) {
	498	if ((p->flags & PG_REFERENCED) == 0) {
	499	p->act_count -= min(p->act_count, ACT_DECLINE);
	500	if (!remove_mode && (vm_pageout_algorithm \|\| (p->act_count == 0))) {
	501	vm_page_protect(p, VM_PROT_NONE);
	502	vm_page_deactivate(p);
	503	} else {
	504	s = splvm();
	505	TAILQ_REMOVE(&vm_page_queues[PQ_ACTIVE].pl, p, pageq);
	506	TAILQ_INSERT_TAIL(&vm_page_queues[PQ_ACTIVE].pl, p, pageq);
	507	splx(s);
	508	}
	509	} else {
	510	vm_page_activate(p);
	511	vm_page_flag_clear(p, PG_REFERENCED);
	512	if (p->act_count < (ACT_MAX - ACT_ADVANCE))
	513	p->act_count += ACT_ADVANCE;
	514	s = splvm();
	515	TAILQ_REMOVE(&vm_page_queues[PQ_ACTIVE].pl, p, pageq);
	516	TAILQ_INSERT_TAIL(&vm_page_queues[PQ_ACTIVE].pl, p, pageq);
	517	splx(s);
	518	}
	519	} else if (p->queue == PQ_INACTIVE) {
	520	vm_page_protect(p, VM_PROT_NONE);
	521	}
	522	p = next;
	523	}
	524	object = object->backing_object;
	525	}
	526	return;
	527	}
	528
	529	/*
	530	* deactivate some number of pages in a map, try to do it fairly, but
	531	* that is really hard to do.
	532	*/
	533	static void
	534	vm_pageout_map_deactivate_pages(vm_map_t map, vm_pindex_t desired)
	535	{
	536	vm_map_entry_t tmpe;
	537	vm_object_t obj, bigobj;
	538	int nothingwired;
	539
	540	if (lockmgr(&map->lock, LK_EXCLUSIVE \| LK_NOWAIT, NULL, curthread)) {
	541	return;
	542	}
	543
	544	bigobj = NULL;
	545	nothingwired = TRUE;
	546
	547	/*
	548	* first, search out the biggest object, and try to free pages from
	549	* that.
	550	*/
	551	tmpe = map->header.next;
	552	while (tmpe != &map->header) {
	553	if ((tmpe->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
	554	obj = tmpe->object.vm_object;
	555	if ((obj != NULL) && (obj->shadow_count <= 1) &&
	556	((bigobj == NULL) \|\|
	557	(bigobj->resident_page_count < obj->resident_page_count))) {
	558	bigobj = obj;
	559	}
	560	}
	561	if (tmpe->wired_count > 0)
	562	nothingwired = FALSE;
	563	tmpe = tmpe->next;
	564	}
	565
	566	if (bigobj)
	567	vm_pageout_object_deactivate_pages(map, bigobj, desired, 0);
	568
	569	/*
	570	* Next, hunt around for other pages to deactivate. We actually
	571	* do this search sort of wrong -- .text first is not the best idea.
	572	*/
	573	tmpe = map->header.next;
	574	while (tmpe != &map->header) {
	575	if (pmap_resident_count(vm_map_pmap(map)) <= desired)
	576	break;
	577	if ((tmpe->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
	578	obj = tmpe->object.vm_object;
	579	if (obj)
	580	vm_pageout_object_deactivate_pages(map, obj, desired, 0);
	581	}
	582	tmpe = tmpe->next;
	583	};
	584
	585	/*
	586	* Remove all mappings if a process is swapped out, this will free page
	587	* table pages.
	588	*/
	589	if (desired == 0 && nothingwired)
	590	pmap_remove(vm_map_pmap(map),
	591	VM_MIN_ADDRESS, VM_MAXUSER_ADDRESS);
	592	vm_map_unlock(map);
	593	return;
	594	}
	595	#endif
	596
	597	/*
	598	* Don't try to be fancy - being fancy can lead to VOP_LOCK's and therefore
	599	* to vnode deadlocks. We only do it for OBJT_DEFAULT and OBJT_SWAP objects
	600	* which we know can be trivially freed.
	601	*/
	602
	603	void
	604	vm_pageout_page_free(vm_page_t m) {
	605	vm_object_t object = m->object;
	606	int type = object->type;
	607
	608	if (type == OBJT_SWAP \|\| type == OBJT_DEFAULT)
	609	vm_object_reference(object);
	610	vm_page_busy(m);
	611	vm_page_protect(m, VM_PROT_NONE);
	612	vm_page_free(m);
	613	if (type == OBJT_SWAP \|\| type == OBJT_DEFAULT)
	614	vm_object_deallocate(object);
	615	}
	616
	617	/*
	618	* vm_pageout_scan does the dirty work for the pageout daemon.
	619	*/
	620	static void
	621	vm_pageout_scan(int pass)
	622	{
	623	vm_page_t m, next;
	624	struct vm_page marker;
	625	int page_shortage, maxscan, pcount;
	626	int addl_page_shortage, addl_page_shortage_init;
	627	struct proc p, bigproc;
	628	vm_offset_t size, bigsize;
	629	vm_object_t object;
	630	int actcount;
	631	int vnodes_skipped = 0;
	632	int maxlaunder;
	633	int s;
	634
	635	/*
	636	* Do whatever cleanup that the pmap code can.
	637	*/
	638	pmap_collect();
	639
	640	addl_page_shortage_init = vm_pageout_deficit;
	641	vm_pageout_deficit = 0;
	642
	643	/*
	644	* Calculate the number of pages we want to either free or move
	645	* to the cache.
	646	*/
	647	page_shortage = vm_paging_target() + addl_page_shortage_init;
	648
	649	/*
	650	* Initialize our marker
	651	*/
	652	bzero(&marker, sizeof(marker));
	653	marker.flags = PG_BUSY \| PG_FICTITIOUS \| PG_MARKER;
	654	marker.queue = PQ_INACTIVE;
	655	marker.wire_count = 1;
	656
	657	/*
	658	* Start scanning the inactive queue for pages we can move to the
	659	* cache or free. The scan will stop when the target is reached or
	660	* we have scanned the entire inactive queue. Note that m->act_count
	661	* is not used to form decisions for the inactive queue, only for the
	662	* active queue.
	663	*
	664	* maxlaunder limits the number of dirty pages we flush per scan.
	665	* For most systems a smaller value (16 or 32) is more robust under
	666	* extreme memory and disk pressure because any unnecessary writes
	667	* to disk can result in extreme performance degredation. However,
	668	* systems with excessive dirty pages (especially when MAP_NOSYNC is
	669	* used) will die horribly with limited laundering. If the pageout
	670	* daemon cannot clean enough pages in the first pass, we let it go
	671	* all out in succeeding passes.
	672	*/
	673	if ((maxlaunder = vm_max_launder) <= 1)
	674	maxlaunder = 1;
	675	if (pass)
	676	maxlaunder = 10000;
	677
	678	rescan0:
	679	addl_page_shortage = addl_page_shortage_init;
	680	maxscan = vmstats.v_inactive_count;
	681	for (m = TAILQ_FIRST(&vm_page_queues[PQ_INACTIVE].pl);
	682	m != NULL && maxscan-- > 0 && page_shortage > 0;
	683	m = next) {
	684
	685	mycpu->gd_cnt.v_pdpages++;
	686
	687	if (m->queue != PQ_INACTIVE) {
	688	goto rescan0;
	689	}
	690
	691	next = TAILQ_NEXT(m, pageq);
	692
	693	/*
	694	* skip marker pages
	695	*/
	696	if (m->flags & PG_MARKER)
	697	continue;
	698
	699	/*
	700	* A held page may be undergoing I/O, so skip it.
	701	*/
	702	if (m->hold_count) {
	703	s = splvm();
	704	TAILQ_REMOVE(&vm_page_queues[PQ_INACTIVE].pl, m, pageq);
	705	TAILQ_INSERT_TAIL(&vm_page_queues[PQ_INACTIVE].pl, m, pageq);
	706	splx(s);
	707	addl_page_shortage++;
	708	continue;
	709	}
	710	/*
	711	* Dont mess with busy pages, keep in the front of the
	712	* queue, most likely are being paged out.
	713	*/
	714	if (m->busy \|\| (m->flags & PG_BUSY)) {
	715	addl_page_shortage++;
	716	continue;
	717	}
	718
	719	/*
	720	* If the object is not being used, we ignore previous
	721	* references.
	722	*/
	723	if (m->object->ref_count == 0) {
	724	vm_page_flag_clear(m, PG_REFERENCED);
	725	pmap_clear_reference(m);
	726
	727	/*
	728	* Otherwise, if the page has been referenced while in the
	729	* inactive queue, we bump the "activation count" upwards,
	730	* making it less likely that the page will be added back to
	731	* the inactive queue prematurely again. Here we check the
	732	* page tables (or emulated bits, if any), given the upper
	733	* level VM system not knowing anything about existing
	734	* references.
	735	*/
	736	} else if (((m->flags & PG_REFERENCED) == 0) &&
	737	(actcount = pmap_ts_referenced(m))) {
	738	vm_page_activate(m);
	739	m->act_count += (actcount + ACT_ADVANCE);
	740	continue;
	741	}
	742
	743	/*
	744	* If the upper level VM system knows about any page
	745	* references, we activate the page. We also set the
	746	* "activation count" higher than normal so that we will less
	747	* likely place pages back onto the inactive queue again.
	748	*/
	749	if ((m->flags & PG_REFERENCED) != 0) {
	750	vm_page_flag_clear(m, PG_REFERENCED);
	751	actcount = pmap_ts_referenced(m);
	752	vm_page_activate(m);
	753	m->act_count += (actcount + ACT_ADVANCE + 1);
	754	continue;
	755	}
	756
	757	/*
	758	* If the upper level VM system doesn't know anything about
	759	* the page being dirty, we have to check for it again. As
	760	* far as the VM code knows, any partially dirty pages are
	761	* fully dirty.
	762	*
	763	* Pages marked PG_WRITEABLE may be mapped into the user
	764	* address space of a process running on another cpu. A
	765	* user process (without holding the MP lock) running on
	766	* another cpu may be able to touch the page while we are
	767	* trying to remove it. To prevent this from occuring we
	768	* must call pmap_remove_all() or otherwise make the page
	769	* read-only. If the race occured pmap_remove_all() is
	770	* responsible for setting m->dirty.
	771	*/
	772	if (m->dirty == 0) {
	773	vm_page_test_dirty(m);
	774	#if 0
	775	if (m->dirty == 0 && (m->flags & PG_WRITEABLE) != 0)
	776	pmap_remove_all(m);
	777	#endif
	778	} else {
	779	vm_page_dirty(m);
	780	}
	781
	782	if (m->valid == 0) {
	783	/*
	784	* Invalid pages can be easily freed
	785	*/
	786	vm_pageout_page_free(m);
	787	mycpu->gd_cnt.v_dfree++;
	788	--page_shortage;
	789	} else if (m->dirty == 0) {
	790	/*
	791	* Clean pages can be placed onto the cache queue.
	792	* This effectively frees them.
	793	*/
	794	vm_page_cache(m);
	795	--page_shortage;
	796	} else if ((m->flags & PG_WINATCFLS) == 0 && pass == 0) {
	797	/*
	798	* Dirty pages need to be paged out, but flushing
	799	* a page is extremely expensive verses freeing
	800	* a clean page. Rather then artificially limiting
	801	* the number of pages we can flush, we instead give
	802	* dirty pages extra priority on the inactive queue
	803	* by forcing them to be cycled through the queue
	804	* twice before being flushed, after which the
	805	* (now clean) page will cycle through once more
	806	* before being freed. This significantly extends
	807	* the thrash point for a heavily loaded machine.
	808	*/
	809	s = splvm();
	810	vm_page_flag_set(m, PG_WINATCFLS);
	811	TAILQ_REMOVE(&vm_page_queues[PQ_INACTIVE].pl, m, pageq);
	812	TAILQ_INSERT_TAIL(&vm_page_queues[PQ_INACTIVE].pl, m, pageq);
	813	splx(s);
	814	} else if (maxlaunder > 0) {
	815	/*
	816	* We always want to try to flush some dirty pages if
	817	* we encounter them, to keep the system stable.
	818	* Normally this number is small, but under extreme
	819	* pressure where there are insufficient clean pages
	820	* on the inactive queue, we may have to go all out.
	821	*/
	822	int swap_pageouts_ok;
	823	struct vnode *vp = NULL;
	824
	825	object = m->object;
	826
	827	if ((object->type != OBJT_SWAP) && (object->type != OBJT_DEFAULT)) {
	828	swap_pageouts_ok = 1;
	829	} else {
	830	swap_pageouts_ok = !(defer_swap_pageouts \|\| disable_swap_pageouts);
	831	swap_pageouts_ok \|= (!disable_swap_pageouts && defer_swap_pageouts &&
	832	vm_page_count_min());
	833
	834	}
	835
	836	/*
	837	* We don't bother paging objects that are "dead".
	838	* Those objects are in a "rundown" state.
	839	*/
	840	if (!swap_pageouts_ok \|\| (object->flags & OBJ_DEAD)) {
	841	s = splvm();
	842	TAILQ_REMOVE(&vm_page_queues[PQ_INACTIVE].pl, m, pageq);
	843	TAILQ_INSERT_TAIL(&vm_page_queues[PQ_INACTIVE].pl, m, pageq);
	844	splx(s);
	845	continue;
	846	}
	847
	848	/*
	849	* The object is already known NOT to be dead. It
	850	* is possible for the vget() to block the whole
	851	* pageout daemon, but the new low-memory handling
	852	* code should prevent it.
	853	*
	854	* The previous code skipped locked vnodes and, worse,
	855	* reordered pages in the queue. This results in
	856	* completely non-deterministic operation because,
	857	* quite often, a vm_fault has initiated an I/O and
	858	* is holding a locked vnode at just the point where
	859	* the pageout daemon is woken up.
	860	*
	861	* We can't wait forever for the vnode lock, we might
	862	* deadlock due to a vn_read() getting stuck in
	863	* vm_wait while holding this vnode. We skip the
	864	* vnode if we can't get it in a reasonable amount
	865	* of time.
	866	*/
	867
	868	if (object->type == OBJT_VNODE) {
	869	vp = object->handle;
	870
	871	if (vget(vp, NULL, LK_EXCLUSIVE\|LK_NOOBJ\|LK_TIMELOCK, curthread)) {
	872	++pageout_lock_miss;
	873	if (object->flags & OBJ_MIGHTBEDIRTY)
	874	vnodes_skipped++;
	875	continue;
	876	}
	877
	878	/*
	879	* The page might have been moved to another
	880	* queue during potential blocking in vget()
	881	* above. The page might have been freed and
	882	* reused for another vnode. The object might
	883	* have been reused for another vnode.
	884	*/
	885	if (m->queue != PQ_INACTIVE \|\|
	886	m->object != object \|\|
	887	object->handle != vp) {
	888	if (object->flags & OBJ_MIGHTBEDIRTY)
	889	vnodes_skipped++;
	890	vput(vp);
	891	continue;
	892	}
	893
	894	/*
	895	* The page may have been busied during the
	896	* blocking in vput(); We don't move the
	897	* page back onto the end of the queue so that
	898	* statistics are more correct if we don't.
	899	*/
	900	if (m->busy \|\| (m->flags & PG_BUSY)) {
	901	vput(vp);
	902	continue;
	903	}
	904
	905	/*
	906	* If the page has become held it might
	907	* be undergoing I/O, so skip it
	908	*/
	909	if (m->hold_count) {
	910	s = splvm();
	911	TAILQ_REMOVE(&vm_page_queues[PQ_INACTIVE].pl, m, pageq);
	912	TAILQ_INSERT_TAIL(&vm_page_queues[PQ_INACTIVE].pl, m, pageq);
	913	splx(s);
	914	if (object->flags & OBJ_MIGHTBEDIRTY)
	915	vnodes_skipped++;
	916	vput(vp);
	917	continue;
	918	}
	919	}
	920
	921	/*
	922	* If a page is dirty, then it is either being washed
	923	* (but not yet cleaned) or it is still in the
	924	* laundry. If it is still in the laundry, then we
	925	* start the cleaning operation.
	926	*
	927	* This operation may cluster, invalidating the 'next'
	928	* pointer. To prevent an inordinate number of
	929	* restarts we use our marker to remember our place.
	930	*
	931	* decrement page_shortage on success to account for
	932	* the (future) cleaned page. Otherwise we could wind
	933	* up laundering or cleaning too many pages.
	934	*/
	935	s = splvm();
	936	TAILQ_INSERT_AFTER(&vm_page_queues[PQ_INACTIVE].pl, m, &marker, pageq);
	937	splx(s);
	938	if (vm_pageout_clean(m) != 0) {
	939	--page_shortage;
	940	--maxlaunder;
	941	}
	942	s = splvm();
	943	next = TAILQ_NEXT(&marker, pageq);
	944	TAILQ_REMOVE(&vm_page_queues[PQ_INACTIVE].pl, &marker, pageq);
	945	splx(s);
	946	if (vp != NULL)
	947	vput(vp);
	948	}
	949	}
	950
	951	/*
	952	* Compute the number of pages we want to try to move from the
	953	* active queue to the inactive queue.
	954	*/
	955	page_shortage = vm_paging_target() +
	956	vmstats.v_inactive_target - vmstats.v_inactive_count;
	957	page_shortage += addl_page_shortage;
	958
	959	/*
	960	* Scan the active queue for things we can deactivate. We nominally
	961	* track the per-page activity counter and use it to locate
	962	* deactivation candidates.
	963	*/
	964
	965	pcount = vmstats.v_active_count;
	966	m = TAILQ_FIRST(&vm_page_queues[PQ_ACTIVE].pl);
	967
	968	while ((m != NULL) && (pcount-- > 0) && (page_shortage > 0)) {
	969
	970	/*
	971	* This is a consistency check, and should likely be a panic
	972	* or warning.
	973	*/
	974	if (m->queue != PQ_ACTIVE) {
	975	break;
	976	}
	977
	978	next = TAILQ_NEXT(m, pageq);
	979	/*
	980	* Don't deactivate pages that are busy.
	981	*/
	982	if ((m->busy != 0) \|\|
	983	(m->flags & PG_BUSY) \|\|
	984	(m->hold_count != 0)) {
	985	s = splvm();
	986	TAILQ_REMOVE(&vm_page_queues[PQ_ACTIVE].pl, m, pageq);
	987	TAILQ_INSERT_TAIL(&vm_page_queues[PQ_ACTIVE].pl, m, pageq);
	988	splx(s);
	989	m = next;
	990	continue;
	991	}
	992
	993	/*
	994	* The count for pagedaemon pages is done after checking the
	995	* page for eligibility...
	996	*/
	997	mycpu->gd_cnt.v_pdpages++;
	998
	999	/*
	1000	* Check to see "how much" the page has been used.
	1001	*/
	1002	actcount = 0;
	1003	if (m->object->ref_count != 0) {
	1004	if (m->flags & PG_REFERENCED) {
	1005	actcount += 1;
	1006	}
	1007	actcount += pmap_ts_referenced(m);
	1008	if (actcount) {
	1009	m->act_count += ACT_ADVANCE + actcount;
	1010	if (m->act_count > ACT_MAX)
	1011	m->act_count = ACT_MAX;
	1012	}
	1013	}
	1014
	1015	/*
	1016	* Since we have "tested" this bit, we need to clear it now.
	1017	*/
	1018	vm_page_flag_clear(m, PG_REFERENCED);
	1019
	1020	/*
	1021	* Only if an object is currently being used, do we use the
	1022	* page activation count stats.
	1023	*/
	1024	if (actcount && (m->object->ref_count != 0)) {
	1025	s = splvm();
	1026	TAILQ_REMOVE(&vm_page_queues[PQ_ACTIVE].pl, m, pageq);
	1027	TAILQ_INSERT_TAIL(&vm_page_queues[PQ_ACTIVE].pl, m, pageq);
	1028	splx(s);
	1029	} else {
	1030	m->act_count -= min(m->act_count, ACT_DECLINE);
	1031	if (vm_pageout_algorithm \|\|
	1032	m->object->ref_count == 0 \|\|
	1033	m->act_count == 0) {
	1034	page_shortage--;
	1035	if (m->object->ref_count == 0) {
	1036	vm_page_protect(m, VM_PROT_NONE);
	1037	if (m->dirty == 0)
	1038	vm_page_cache(m);
	1039	else
	1040	vm_page_deactivate(m);
	1041	} else {
	1042	vm_page_deactivate(m);
	1043	}
	1044	} else {
	1045	s = splvm();
	1046	TAILQ_REMOVE(&vm_page_queues[PQ_ACTIVE].pl, m, pageq);
	1047	TAILQ_INSERT_TAIL(&vm_page_queues[PQ_ACTIVE].pl, m, pageq);
	1048	splx(s);
	1049	}
	1050	}
	1051	m = next;
	1052	}
	1053
	1054	s = splvm();
	1055
	1056	/*
	1057	* We try to maintain some really free pages, this allows interrupt
	1058	* code to be guaranteed space. Since both cache and free queues
	1059	* are considered basically 'free', moving pages from cache to free
	1060	* does not effect other calculations.
	1061	*/
	1062
	1063	while (vmstats.v_free_count < vmstats.v_free_reserved) {
	1064	static int cache_rover = 0;
	1065	m = vm_page_list_find(PQ_CACHE, cache_rover, FALSE);
	1066	if (!m)
	1067	break;
	1068	if ((m->flags & (PG_BUSY\|PG_UNMANAGED)) \|\|
	1069	m->busy \|\|
	1070	m->hold_count \|\|
	1071	m->wire_count) {
	1072	#ifdef INVARIANTS
	1073	printf("Warning: busy page %p found in cache\n", m);
	1074	#endif
	1075	vm_page_deactivate(m);
	1076	continue;
	1077	}
	1078	cache_rover = (cache_rover + PQ_PRIME2) & PQ_L2_MASK;
	1079	vm_pageout_page_free(m);
	1080	mycpu->gd_cnt.v_dfree++;
	1081	}
	1082	splx(s);
	1083
	1084	#if !defined(NO_SWAPPING)
	1085	/*
	1086	* Idle process swapout -- run once per second.
	1087	*/
	1088	if (vm_swap_idle_enabled) {
	1089	static long lsec;
	1090	if (time_second != lsec) {
	1091	vm_pageout_req_swapout \|= VM_SWAP_IDLE;
	1092	vm_req_vmdaemon();
	1093	lsec = time_second;
	1094	}
	1095	}
	1096	#endif
	1097
	1098	/*
	1099	* If we didn't get enough free pages, and we have skipped a vnode
	1100	* in a writeable object, wakeup the sync daemon. And kick swapout
	1101	* if we did not get enough free pages.
	1102	*/
	1103	if (vm_paging_target() > 0) {
	1104	if (vnodes_skipped && vm_page_count_min())
	1105	(void) speedup_syncer();
	1106	#if !defined(NO_SWAPPING)
	1107	if (vm_swap_enabled && vm_page_count_target()) {
	1108	vm_req_vmdaemon();
	1109	vm_pageout_req_swapout \|= VM_SWAP_NORMAL;
	1110	}
	1111	#endif
	1112	}
	1113
	1114	/*
	1115	* If we are out of swap and were not able to reach our paging
	1116	* target, kill the largest process.
	1117	*/
	1118	if ((vm_swap_size < 64 && vm_page_count_min()) \|\|
	1119	(swap_pager_full && vm_paging_target() > 0)) {
	1120	#if 0
	1121	if ((vm_swap_size < 64 \|\| swap_pager_full) && vm_page_count_min()) {
	1122	#endif
	1123	bigproc = NULL;
	1124	bigsize = 0;
	1125	for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
	1126	/*
	1127	* if this is a system process, skip it
	1128	*/
	1129	if ((p->p_flag & P_SYSTEM) \|\| (p->p_pid == 1) \|\|
	1130	((p->p_pid < 48) && (vm_swap_size != 0))) {
	1131	continue;
	1132	}
	1133	/*
	1134	* if the process is in a non-running type state,
	1135	* don't touch it.
	1136	*/
	1137	if (p->p_stat != SRUN && p->p_stat != SSLEEP) {
	1138	continue;
	1139	}
	1140	/*
	1141	* get the process size
	1142	*/
	1143	size = vmspace_resident_count(p->p_vmspace) +
	1144	vmspace_swap_count(p->p_vmspace);
	1145	/*
	1146	* if the this process is bigger than the biggest one
	1147	* remember it.
	1148	*/
	1149	if (size > bigsize) {
	1150	bigproc = p;
	1151	bigsize = size;
	1152	}
	1153	}
	1154	if (bigproc != NULL) {
	1155	killproc(bigproc, "out of swap space");
	1156	bigproc->p_estcpu = 0;
	1157	bigproc->p_nice = PRIO_MIN;
	1158	resetpriority(bigproc);
	1159	wakeup(&vmstats.v_free_count);
	1160	}
	1161	}
	1162	}
	1163
	1164	/*
	1165	* This routine tries to maintain the pseudo LRU active queue,
	1166	* so that during long periods of time where there is no paging,
	1167	* that some statistic accumulation still occurs. This code
	1168	* helps the situation where paging just starts to occur.
	1169	*/
	1170	static void
	1171	vm_pageout_page_stats(void)
	1172	{
	1173	int s;
	1174	vm_page_t m,next;
	1175	int pcount,tpcount; /* Number of pages to check */
	1176	static int fullintervalcount = 0;
	1177	int page_shortage;
	1178	int s0;
	1179
	1180	page_shortage =
	1181	(vmstats.v_inactive_target + vmstats.v_cache_max + vmstats.v_free_min) -
	1182	(vmstats.v_free_count + vmstats.v_inactive_count + vmstats.v_cache_count);
	1183
	1184	if (page_shortage <= 0)
	1185	return;
	1186
	1187	s0 = splvm();
	1188
	1189	pcount = vmstats.v_active_count;
	1190	fullintervalcount += vm_pageout_stats_interval;
	1191	if (fullintervalcount < vm_pageout_full_stats_interval) {
	1192	tpcount = (vm_pageout_stats_max * vmstats.v_active_count) / vmstats.v_page_count;
	1193	if (pcount > tpcount)
	1194	pcount = tpcount;
	1195	} else {
	1196	fullintervalcount = 0;
	1197	}
	1198
	1199	m = TAILQ_FIRST(&vm_page_queues[PQ_ACTIVE].pl);
	1200	while ((m != NULL) && (pcount-- > 0)) {
	1201	int actcount;
	1202
	1203	if (m->queue != PQ_ACTIVE) {
	1204	break;
	1205	}
	1206
	1207	next = TAILQ_NEXT(m, pageq);
	1208	/*
	1209	* Don't deactivate pages that are busy.
	1210	*/
	1211	if ((m->busy != 0) \|\|
	1212	(m->flags & PG_BUSY) \|\|
	1213	(m->hold_count != 0)) {
	1214	s = splvm();
	1215	TAILQ_REMOVE(&vm_page_queues[PQ_ACTIVE].pl, m, pageq);
	1216	TAILQ_INSERT_TAIL(&vm_page_queues[PQ_ACTIVE].pl, m, pageq);
	1217	splx(s);
	1218	m = next;
	1219	continue;
	1220	}
	1221
	1222	actcount = 0;
	1223	if (m->flags & PG_REFERENCED) {
	1224	vm_page_flag_clear(m, PG_REFERENCED);
	1225	actcount += 1;
	1226	}
	1227
	1228	actcount += pmap_ts_referenced(m);
	1229	if (actcount) {
	1230	m->act_count += ACT_ADVANCE + actcount;
	1231	if (m->act_count > ACT_MAX)
	1232	m->act_count = ACT_MAX;
	1233	s = splvm();
	1234	TAILQ_REMOVE(&vm_page_queues[PQ_ACTIVE].pl, m, pageq);
	1235	TAILQ_INSERT_TAIL(&vm_page_queues[PQ_ACTIVE].pl, m, pageq);
	1236	splx(s);
	1237	} else {
	1238	if (m->act_count == 0) {
	1239	/*
	1240	* We turn off page access, so that we have
	1241	* more accurate RSS stats. We don't do this
	1242	* in the normal page deactivation when the
	1243	* system is loaded VM wise, because the
	1244	* cost of the large number of page protect
	1245	* operations would be higher than the value
	1246	* of doing the operation.
	1247	*/
	1248	vm_page_protect(m, VM_PROT_NONE);
	1249	vm_page_deactivate(m);
	1250	} else {
	1251	m->act_count -= min(m->act_count, ACT_DECLINE);
	1252	s = splvm();
	1253	TAILQ_REMOVE(&vm_page_queues[PQ_ACTIVE].pl, m, pageq);
	1254	TAILQ_INSERT_TAIL(&vm_page_queues[PQ_ACTIVE].pl, m, pageq);
	1255	splx(s);
	1256	}
	1257	}
	1258
	1259	m = next;
	1260	}
	1261	splx(s0);
	1262	}
	1263
	1264	static int
	1265	vm_pageout_free_page_calc(vm_size_t count)
	1266	{
	1267	if (count < vmstats.v_page_count)
	1268	return 0;
	1269	/*
	1270	* free_reserved needs to include enough for the largest swap pager
	1271	* structures plus enough for any pv_entry structs when paging.
	1272	*/
	1273	if (vmstats.v_page_count > 1024)
	1274	vmstats.v_free_min = 4 + (vmstats.v_page_count - 1024) / 200;
	1275	else
	1276	vmstats.v_free_min = 4;
	1277	vmstats.v_pageout_free_min = (2*MAXBSIZE)/PAGE_SIZE +
	1278	vmstats.v_interrupt_free_min;
	1279	vmstats.v_free_reserved = vm_pageout_page_count +
	1280	vmstats.v_pageout_free_min + (count / 768) + PQ_L2_SIZE;
	1281	vmstats.v_free_severe = vmstats.v_free_min / 2;
	1282	vmstats.v_free_min += vmstats.v_free_reserved;
	1283	vmstats.v_free_severe += vmstats.v_free_reserved;
	1284	return 1;
	1285	}
	1286
	1287
	1288	/*
	1289	* vm_pageout is the high level pageout daemon.
	1290	*/
	1291	static void
	1292	vm_pageout(void)
	1293	{
	1294	int pass;
	1295
	1296	/*
	1297	* Initialize some paging parameters.
	1298	*/
	1299
	1300	vmstats.v_interrupt_free_min = 2;
	1301	if (vmstats.v_page_count < 2000)
	1302	vm_pageout_page_count = 8;
	1303
	1304	vm_pageout_free_page_calc(vmstats.v_page_count);
	1305	/*
	1306	* v_free_target and v_cache_min control pageout hysteresis. Note
	1307	* that these are more a measure of the VM cache queue hysteresis
	1308	* then the VM free queue. Specifically, v_free_target is the
	1309	* high water mark (free+cache pages).
	1310	*
	1311	* v_free_reserved + v_cache_min (mostly means v_cache_min) is the
	1312	* low water mark, while v_free_min is the stop. v_cache_min must
	1313	* be big enough to handle memory needs while the pageout daemon
	1314	* is signalled and run to free more pages.
	1315	*/
	1316	if (vmstats.v_free_count > 6144)
	1317	vmstats.v_free_target = 4 * vmstats.v_free_min + vmstats.v_free_reserved;
	1318	else
	1319	vmstats.v_free_target = 2 * vmstats.v_free_min + vmstats.v_free_reserved;
	1320
	1321	if (vmstats.v_free_count > 2048) {
	1322	vmstats.v_cache_min = vmstats.v_free_target;
	1323	vmstats.v_cache_max = 2 * vmstats.v_cache_min;
	1324	vmstats.v_inactive_target = (3 * vmstats.v_free_target) / 2;
	1325	} else {
	1326	vmstats.v_cache_min = 0;
	1327	vmstats.v_cache_max = 0;
	1328	vmstats.v_inactive_target = vmstats.v_free_count / 4;
	1329	}
	1330	if (vmstats.v_inactive_target > vmstats.v_free_count / 3)
	1331	vmstats.v_inactive_target = vmstats.v_free_count / 3;
	1332
	1333	/* XXX does not really belong here */
	1334	if (vm_page_max_wired == 0)
	1335	vm_page_max_wired = vmstats.v_free_count / 3;
	1336
	1337	if (vm_pageout_stats_max == 0)
	1338	vm_pageout_stats_max = vmstats.v_free_target;
	1339
	1340	/*
	1341	* Set interval in seconds for stats scan.
	1342	*/
	1343	if (vm_pageout_stats_interval == 0)
	1344	vm_pageout_stats_interval = 5;
	1345	if (vm_pageout_full_stats_interval == 0)
	1346	vm_pageout_full_stats_interval = vm_pageout_stats_interval * 4;
	1347
	1348
	1349	/*
	1350	* Set maximum free per pass
	1351	*/
	1352	if (vm_pageout_stats_free_max == 0)
	1353	vm_pageout_stats_free_max = 5;
	1354
	1355	swap_pager_swap_init();
	1356	pass = 0;
	1357	/*
	1358	* The pageout daemon is never done, so loop forever.
	1359	*/
	1360	while (TRUE) {
	1361	int error;
	1362	int s = splvm();
	1363
	1364	/*
	1365	* If we have enough free memory, wakeup waiters. Do
	1366	* not clear vm_pages_needed until we reach our target,
	1367	* otherwise we may be woken up over and over again and
	1368	* waste a lot of cpu.
	1369	*/
	1370	if (vm_pages_needed && !vm_page_count_min()) {
	1371	if (vm_paging_needed() <= 0)
	1372	vm_pages_needed = 0;
	1373	wakeup(&vmstats.v_free_count);
	1374	}
	1375	if (vm_pages_needed) {
	1376	/*
	1377	* Still not done, take a second pass without waiting
	1378	* (unlimited dirty cleaning), otherwise sleep a bit
	1379	* and try again.
	1380	*/
	1381	++pass;
	1382	if (pass > 1)
	1383	tsleep(&vm_pages_needed, 0, "psleep", hz/2);
	1384	} else {
	1385	/*
	1386	* Good enough, sleep & handle stats. Prime the pass
	1387	* for the next run.
	1388	*/
	1389	if (pass > 1)
	1390	pass = 1;
	1391	else
	1392	pass = 0;
	1393	error = tsleep(&vm_pages_needed,
	1394	0, "psleep", vm_pageout_stats_interval * hz);
	1395	if (error && !vm_pages_needed) {
	1396	splx(s);
	1397	pass = 0;
	1398	vm_pageout_page_stats();
	1399	continue;
	1400	}
	1401	}
	1402
	1403	if (vm_pages_needed)
	1404	mycpu->gd_cnt.v_pdwakeups++;
	1405	splx(s);
	1406	vm_pageout_scan(pass);
	1407	vm_pageout_deficit = 0;
	1408	}
	1409	}
	1410
	1411	void
	1412	pagedaemon_wakeup(void)
	1413	{
	1414	if (!vm_pages_needed && curthread != pagethread) {
	1415	vm_pages_needed++;
	1416	wakeup(&vm_pages_needed);
	1417	}
	1418	}
	1419
	1420	#if !defined(NO_SWAPPING)
	1421	static void
	1422	vm_req_vmdaemon(void)
	1423	{
	1424	static int lastrun = 0;
	1425
	1426	if ((ticks > (lastrun + hz)) \|\| (ticks < lastrun)) {
	1427	wakeup(&vm_daemon_needed);
	1428	lastrun = ticks;
	1429	}
	1430	}
	1431
	1432	static void
	1433	vm_daemon(void)
	1434	{
	1435	struct proc *p;
	1436
	1437	while (TRUE) {
	1438	tsleep(&vm_daemon_needed, 0, "psleep", 0);
	1439	if (vm_pageout_req_swapout) {
	1440	swapout_procs(vm_pageout_req_swapout);
	1441	vm_pageout_req_swapout = 0;
	1442	}
	1443	/*
	1444	* scan the processes for exceeding their rlimits or if
	1445	* process is swapped out -- deactivate pages
	1446	*/
	1447
	1448	for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
	1449	vm_pindex_t limit, size;
	1450
	1451	/*
	1452	* if this is a system process or if we have already
	1453	* looked at this process, skip it.
	1454	*/
	1455	if (p->p_flag & (P_SYSTEM \| P_WEXIT)) {
	1456	continue;
	1457	}
	1458	/*
	1459	* if the process is in a non-running type state,
	1460	* don't touch it.
	1461	*/
	1462	if (p->p_stat != SRUN && p->p_stat != SSLEEP) {
	1463	continue;
	1464	}
	1465	/*
	1466	* get a limit
	1467	*/
	1468	limit = OFF_TO_IDX(
	1469	qmin(p->p_rlimit[RLIMIT_RSS].rlim_cur,
	1470	p->p_rlimit[RLIMIT_RSS].rlim_max));
	1471
	1472	/*
	1473	* let processes that are swapped out really be
	1474	* swapped out set the limit to nothing (will force a
	1475	* swap-out.)
	1476	*/
	1477	if ((p->p_flag & P_INMEM) == 0)
	1478	limit = 0; /* XXX */
	1479
	1480	size = vmspace_resident_count(p->p_vmspace);
	1481	if (limit >= 0 && size >= limit) {
	1482	vm_pageout_map_deactivate_pages(
	1483	&p->p_vmspace->vm_map, limit);
	1484	}
	1485	}
	1486	}
	1487	}
	1488	#endif