gitweb.dragonflybsd.org Git - dragonfly.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 1991 Regents of the University of California.
	3	* All rights reserved.
	4	* Copyright (c) 1994 John S. Dyson
	5	* All rights reserved.
	6	* Copyright (c) 1994 David Greenman
	7	* All rights reserved.
	8	*
	9	* This code is derived from software contributed to Berkeley by
	10	* The Mach Operating System project at Carnegie-Mellon University.
	11	*
	12	* Redistribution and use in source and binary forms, with or without
	13	* modification, are permitted provided that the following conditions
	14	* are met:
	15	* 1. Redistributions of source code must retain the above copyright
	16	* notice, this list of conditions and the following disclaimer.
	17	* 2. Redistributions in binary form must reproduce the above copyright
	18	* notice, this list of conditions and the following disclaimer in the
	19	* documentation and/or other materials provided with the distribution.
	20	* 3. All advertising materials mentioning features or use of this software
	21	* must display the following acknowledgement:
	22	* This product includes software developed by the University of
	23	* California, Berkeley and its contributors.
	24	* 4. Neither the name of the University nor the names of its contributors
	25	* may be used to endorse or promote products derived from this software
	26	* without specific prior written permission.
	27	*
	28	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	29	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	30	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	31	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	32	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	33	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	34	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	35	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	36	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	37	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	38	* SUCH DAMAGE.
	39	*
	40	* from: @(#)vm_pageout.c 7.4 (Berkeley) 5/7/91
	41	*
	42	*
	43	* Copyright (c) 1987, 1990 Carnegie-Mellon University.
	44	* All rights reserved.
	45	*
	46	* Authors: Avadis Tevanian, Jr., Michael Wayne Young
	47	*
	48	* Permission to use, copy, modify and distribute this software and
	49	* its documentation is hereby granted, provided that both the copyright
	50	* notice and this permission notice appear in all copies of the
	51	* software, derivative works or modified versions, and any portions
	52	* thereof, and that both notices appear in supporting documentation.
	53	*
	54	* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
	55	* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
	56	* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
	57	*
	58	* Carnegie Mellon requests users of this software to return to
	59	*
	60	* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
	61	* School of Computer Science
	62	* Carnegie Mellon University
	63	* Pittsburgh PA 15213-3890
	64	*
	65	* any improvements or extensions that they make and grant Carnegie the
	66	* rights to redistribute these changes.
	67	*
	68	* $FreeBSD: src/sys/vm/vm_pageout.c,v 1.151.2.15 2002/12/29 18:21:04 dillon Exp $
	69	* $DragonFly: src/sys/vm/vm_pageout.c,v 1.36 2008/07/01 02:02:56 dillon Exp $
	70	*/
	71
	72	/*
	73	* The proverbial page-out daemon.
	74	*/
	75
	76	#include "opt_vm.h"
	77	#include <sys/param.h>
	78	#include <sys/systm.h>
	79	#include <sys/kernel.h>
	80	#include <sys/proc.h>
	81	#include <sys/kthread.h>
	82	#include <sys/resourcevar.h>
	83	#include <sys/signalvar.h>
	84	#include <sys/vnode.h>
	85	#include <sys/vmmeter.h>
	86	#include <sys/sysctl.h>
	87
	88	#include <vm/vm.h>
	89	#include <vm/vm_param.h>
	90	#include <sys/lock.h>
	91	#include <vm/vm_object.h>
	92	#include <vm/vm_page.h>
	93	#include <vm/vm_map.h>
	94	#include <vm/vm_pageout.h>
	95	#include <vm/vm_pager.h>
	96	#include <vm/swap_pager.h>
	97	#include <vm/vm_extern.h>
	98
	99	#include <sys/thread2.h>
	100	#include <vm/vm_page2.h>
	101
	102	/*
	103	* System initialization
	104	*/
	105
	106	/* the kernel process "vm_pageout"*/
	107	static void vm_pageout (void);
	108	static int vm_pageout_clean (vm_page_t);
	109	static void vm_pageout_scan (int pass);
	110	static int vm_pageout_free_page_calc (vm_size_t count);
	111	struct thread *pagethread;
	112
	113	static struct kproc_desc page_kp = {
	114	"pagedaemon",
	115	vm_pageout,
	116	&pagethread
	117	};
	118	SYSINIT(pagedaemon, SI_SUB_KTHREAD_PAGE, SI_ORDER_FIRST, kproc_start, &page_kp)
	119
	120	#if !defined(NO_SWAPPING)
	121	/* the kernel process "vm_daemon"*/
	122	static void vm_daemon (void);
	123	static struct thread *vmthread;
	124
	125	static struct kproc_desc vm_kp = {
	126	"vmdaemon",
	127	vm_daemon,
	128	&vmthread
	129	};
	130	SYSINIT(vmdaemon, SI_SUB_KTHREAD_VM, SI_ORDER_FIRST, kproc_start, &vm_kp)
	131	#endif
	132
	133
	134	int vm_pages_needed=0; /* Event on which pageout daemon sleeps */
	135	int vm_pageout_deficit=0; /* Estimated number of pages deficit */
	136	int vm_pageout_pages_needed=0; /* flag saying that the pageout daemon needs pages */
	137
	138	#if !defined(NO_SWAPPING)
	139	static int vm_pageout_req_swapout; /* XXX */
	140	static int vm_daemon_needed;
	141	#endif
	142	extern int vm_swap_size;
	143	static int vm_max_launder = 32;
	144	static int vm_pageout_stats_max=0, vm_pageout_stats_interval = 0;
	145	static int vm_pageout_full_stats_interval = 0;
	146	static int vm_pageout_stats_free_max=0, vm_pageout_algorithm=0;
	147	static int defer_swap_pageouts=0;
	148	static int disable_swap_pageouts=0;
	149
	150	#if defined(NO_SWAPPING)
	151	static int vm_swap_enabled=0;
	152	static int vm_swap_idle_enabled=0;
	153	#else
	154	static int vm_swap_enabled=1;
	155	static int vm_swap_idle_enabled=0;
	156	#endif
	157
	158	SYSCTL_INT(_vm, VM_PAGEOUT_ALGORITHM, pageout_algorithm,
	159	CTLFLAG_RW, &vm_pageout_algorithm, 0, "LRU page mgmt");
	160
	161	SYSCTL_INT(_vm, OID_AUTO, max_launder,
	162	CTLFLAG_RW, &vm_max_launder, 0, "Limit dirty flushes in pageout");
	163
	164	SYSCTL_INT(_vm, OID_AUTO, pageout_stats_max,
	165	CTLFLAG_RW, &vm_pageout_stats_max, 0, "Max pageout stats scan length");
	166
	167	SYSCTL_INT(_vm, OID_AUTO, pageout_full_stats_interval,
	168	CTLFLAG_RW, &vm_pageout_full_stats_interval, 0, "Interval for full stats scan");
	169
	170	SYSCTL_INT(_vm, OID_AUTO, pageout_stats_interval,
	171	CTLFLAG_RW, &vm_pageout_stats_interval, 0, "Interval for partial stats scan");
	172
	173	SYSCTL_INT(_vm, OID_AUTO, pageout_stats_free_max,
	174	CTLFLAG_RW, &vm_pageout_stats_free_max, 0, "Not implemented");
	175
	176	#if defined(NO_SWAPPING)
	177	SYSCTL_INT(_vm, VM_SWAPPING_ENABLED, swap_enabled,
	178	CTLFLAG_RD, &vm_swap_enabled, 0, "");
	179	SYSCTL_INT(_vm, OID_AUTO, swap_idle_enabled,
	180	CTLFLAG_RD, &vm_swap_idle_enabled, 0, "");
	181	#else
	182	SYSCTL_INT(_vm, VM_SWAPPING_ENABLED, swap_enabled,
	183	CTLFLAG_RW, &vm_swap_enabled, 0, "Enable entire process swapout");
	184	SYSCTL_INT(_vm, OID_AUTO, swap_idle_enabled,
	185	CTLFLAG_RW, &vm_swap_idle_enabled, 0, "Allow swapout on idle criteria");
	186	#endif
	187
	188	SYSCTL_INT(_vm, OID_AUTO, defer_swapspace_pageouts,
	189	CTLFLAG_RW, &defer_swap_pageouts, 0, "Give preference to dirty pages in mem");
	190
	191	SYSCTL_INT(_vm, OID_AUTO, disable_swapspace_pageouts,
	192	CTLFLAG_RW, &disable_swap_pageouts, 0, "Disallow swapout of dirty pages");
	193
	194	static int pageout_lock_miss;
	195	SYSCTL_INT(_vm, OID_AUTO, pageout_lock_miss,
	196	CTLFLAG_RD, &pageout_lock_miss, 0, "vget() lock misses during pageout");
	197
	198	int vm_load;
	199	SYSCTL_INT(_vm, OID_AUTO, vm_load,
	200	CTLFLAG_RD, &vm_load, 0, "load on the VM system");
	201	int vm_load_enable = 1;
	202	SYSCTL_INT(_vm, OID_AUTO, vm_load_enable,
	203	CTLFLAG_RW, &vm_load_enable, 0, "enable vm_load rate limiting");
	204	#ifdef INVARIANTS
	205	int vm_load_debug;
	206	SYSCTL_INT(_vm, OID_AUTO, vm_load_debug,
	207	CTLFLAG_RW, &vm_load_debug, 0, "debug vm_load");
	208	#endif
	209
	210	#define VM_PAGEOUT_PAGE_COUNT 16
	211	int vm_pageout_page_count = VM_PAGEOUT_PAGE_COUNT;
	212
	213	int vm_page_max_wired; /* XXX max # of wired pages system-wide */
	214
	215	#if !defined(NO_SWAPPING)
	216	typedef void freeer_fcn_t (vm_map_t, vm_object_t, vm_pindex_t, int);
	217	static void vm_pageout_map_deactivate_pages (vm_map_t, vm_pindex_t);
	218	static freeer_fcn_t vm_pageout_object_deactivate_pages;
	219	static void vm_req_vmdaemon (void);
	220	#endif
	221	static void vm_pageout_page_stats(void);
	222
	223	/*
	224	* Update
	225	*/
	226	void
	227	vm_fault_ratecheck(void)
	228	{
	229	if (vm_pages_needed) {
	230	if (vm_load < 1000)
	231	++vm_load;
	232	} else {
	233	if (vm_load > 0)
	234	--vm_load;
	235	}
	236	}
	237
	238	/*
	239	* vm_pageout_clean:
	240	*
	241	* Clean the page and remove it from the laundry. The page must not be
	242	* busy on-call.
	243	*
	244	* We set the busy bit to cause potential page faults on this page to
	245	* block. Note the careful timing, however, the busy bit isn't set till
	246	* late and we cannot do anything that will mess with the page.
	247	*/
	248
	249	static int
	250	vm_pageout_clean(vm_page_t m)
	251	{
	252	vm_object_t object;
	253	vm_page_t mc[2*vm_pageout_page_count];
	254	int pageout_count;
	255	int ib, is, page_base;
	256	vm_pindex_t pindex = m->pindex;
	257
	258	object = m->object;
	259
	260	/*
	261	* It doesn't cost us anything to pageout OBJT_DEFAULT or OBJT_SWAP
	262	* with the new swapper, but we could have serious problems paging
	263	* out other object types if there is insufficient memory.
	264	*
	265	* Unfortunately, checking free memory here is far too late, so the
	266	* check has been moved up a procedural level.
	267	*/
	268
	269	/*
	270	* Don't mess with the page if it's busy, held, or special
	271	*/
	272	if ((m->hold_count != 0) \|\|
	273	((m->busy != 0) \|\| (m->flags & (PG_BUSY\|PG_UNMANAGED)))) {
	274	return 0;
	275	}
	276
	277	mc[vm_pageout_page_count] = m;
	278	pageout_count = 1;
	279	page_base = vm_pageout_page_count;
	280	ib = 1;
	281	is = 1;
	282
	283	/*
	284	* Scan object for clusterable pages.
	285	*
	286	* We can cluster ONLY if: ->> the page is NOT
	287	* clean, wired, busy, held, or mapped into a
	288	* buffer, and one of the following:
	289	* 1) The page is inactive, or a seldom used
	290	* active page.
	291	* -or-
	292	* 2) we force the issue.
	293	*
	294	* During heavy mmap/modification loads the pageout
	295	* daemon can really fragment the underlying file
	296	* due to flushing pages out of order and not trying
	297	* align the clusters (which leave sporatic out-of-order
	298	* holes). To solve this problem we do the reverse scan
	299	* first and attempt to align our cluster, then do a
	300	* forward scan if room remains.
	301	*/
	302
	303	more:
	304	while (ib && pageout_count < vm_pageout_page_count) {
	305	vm_page_t p;
	306
	307	if (ib > pindex) {
	308	ib = 0;
	309	break;
	310	}
	311
	312	if ((p = vm_page_lookup(object, pindex - ib)) == NULL) {
	313	ib = 0;
	314	break;
	315	}
	316	if (((p->queue - p->pc) == PQ_CACHE) \|\|
	317	(p->flags & (PG_BUSY\|PG_UNMANAGED)) \|\| p->busy) {
	318	ib = 0;
	319	break;
	320	}
	321	vm_page_test_dirty(p);
	322	if ((p->dirty & p->valid) == 0 \|\|
	323	p->queue != PQ_INACTIVE \|\|
	324	p->wire_count != 0 \|\| /* may be held by buf cache */
	325	p->hold_count != 0) { /* may be undergoing I/O */
	326	ib = 0;
	327	break;
	328	}
	329	mc[--page_base] = p;
	330	++pageout_count;
	331	++ib;
	332	/*
	333	* alignment boundry, stop here and switch directions. Do
	334	* not clear ib.
	335	*/
	336	if ((pindex - (ib - 1)) % vm_pageout_page_count == 0)
	337	break;
	338	}
	339
	340	while (pageout_count < vm_pageout_page_count &&
	341	pindex + is < object->size) {
	342	vm_page_t p;
	343
	344	if ((p = vm_page_lookup(object, pindex + is)) == NULL)
	345	break;
	346	if (((p->queue - p->pc) == PQ_CACHE) \|\|
	347	(p->flags & (PG_BUSY\|PG_UNMANAGED)) \|\| p->busy) {
	348	break;
	349	}
	350	vm_page_test_dirty(p);
	351	if ((p->dirty & p->valid) == 0 \|\|
	352	p->queue != PQ_INACTIVE \|\|
	353	p->wire_count != 0 \|\| /* may be held by buf cache */
	354	p->hold_count != 0) { /* may be undergoing I/O */
	355	break;
	356	}
	357	mc[page_base + pageout_count] = p;
	358	++pageout_count;
	359	++is;
	360	}
	361
	362	/*
	363	* If we exhausted our forward scan, continue with the reverse scan
	364	* when possible, even past a page boundry. This catches boundry
	365	* conditions.
	366	*/
	367	if (ib && pageout_count < vm_pageout_page_count)
	368	goto more;
	369
	370	/*
	371	* we allow reads during pageouts...
	372	*/
	373	return vm_pageout_flush(&mc[page_base], pageout_count, 0);
	374	}
	375
	376	/*
	377	* vm_pageout_flush() - launder the given pages
	378	*
	379	* The given pages are laundered. Note that we setup for the start of
	380	* I/O ( i.e. busy the page ), mark it read-only, and bump the object
	381	* reference count all in here rather then in the parent. If we want
	382	* the parent to do more sophisticated things we may have to change
	383	* the ordering.
	384	*/
	385
	386	int
	387	vm_pageout_flush(vm_page_t *mc, int count, int flags)
	388	{
	389	vm_object_t object;
	390	int pageout_status[count];
	391	int numpagedout = 0;
	392	int i;
	393
	394	/*
	395	* Initiate I/O. Bump the vm_page_t->busy counter.
	396	*/
	397	for (i = 0; i < count; i++) {
	398	KASSERT(mc[i]->valid == VM_PAGE_BITS_ALL, ("vm_pageout_flush page %p index %d/%d: partially invalid page", mc[i], i, count));
	399	vm_page_io_start(mc[i]);
	400	}
	401
	402	/*
	403	* We must make the pages read-only. This will also force the
	404	* modified bit in the related pmaps to be cleared. The pager
	405	* cannot clear the bit for us since the I/O completion code
	406	* typically runs from an interrupt. The act of making the page
	407	* read-only handles the case for us.
	408	*/
	409	for (i = 0; i < count; i++) {
	410	vm_page_protect(mc[i], VM_PROT_READ);
	411	}
	412
	413	object = mc[0]->object;
	414	vm_object_pip_add(object, count);
	415
	416	vm_pager_put_pages(object, mc, count,
	417	(flags \| ((object == &kernel_object) ? VM_PAGER_PUT_SYNC : 0)),
	418	pageout_status);
	419
	420	for (i = 0; i < count; i++) {
	421	vm_page_t mt = mc[i];
	422
	423	switch (pageout_status[i]) {
	424	case VM_PAGER_OK:
	425	numpagedout++;
	426	break;
	427	case VM_PAGER_PEND:
	428	numpagedout++;
	429	break;
	430	case VM_PAGER_BAD:
	431	/*
	432	* Page outside of range of object. Right now we
	433	* essentially lose the changes by pretending it
	434	* worked.
	435	*/
	436	pmap_clear_modify(mt);
	437	vm_page_undirty(mt);
	438	break;
	439	case VM_PAGER_ERROR:
	440	case VM_PAGER_FAIL:
	441	/*
	442	* A page typically cannot be paged out when we
	443	* have run out of swap. We leave the page
	444	* marked inactive and will try to page it out
	445	* again later.
	446	*
	447	* Starvation of the active page list is used to
	448	* determine when the system is massively memory
	449	* starved.
	450	*/
	451	break;
	452	case VM_PAGER_AGAIN:
	453	break;
	454	}
	455
	456	/*
	457	* If the operation is still going, leave the page busy to
	458	* block all other accesses. Also, leave the paging in
	459	* progress indicator set so that we don't attempt an object
	460	* collapse.
	461	*
	462	* For any pages which have completed synchronously,
	463	* deactivate the page if we are under a severe deficit.
	464	* Do not try to enter them into the cache, though, they
	465	* might still be read-heavy.
	466	*/
	467	if (pageout_status[i] != VM_PAGER_PEND) {
	468	vm_object_pip_wakeup(object);
	469	vm_page_io_finish(mt);
	470	if (vm_page_count_severe())
	471	vm_page_deactivate(mt);
	472	#if 0
	473	if (!vm_page_count_severe() \|\| !vm_page_try_to_cache(mt))
	474	vm_page_protect(mt, VM_PROT_READ);
	475	#endif
	476	}
	477	}
	478	return numpagedout;
	479	}
	480
	481	#if !defined(NO_SWAPPING)
	482	/*
	483	* vm_pageout_object_deactivate_pages
	484	*
	485	* deactivate enough pages to satisfy the inactive target
	486	* requirements or if vm_page_proc_limit is set, then
	487	* deactivate all of the pages in the object and its
	488	* backing_objects.
	489	*
	490	* The object and map must be locked.
	491	*/
	492	static int vm_pageout_object_deactivate_pages_callback(vm_page_t, void *);
	493
	494	static void
	495	vm_pageout_object_deactivate_pages(vm_map_t map, vm_object_t object,
	496	vm_pindex_t desired, int map_remove_only)
	497	{
	498	struct rb_vm_page_scan_info info;
	499	int remove_mode;
	500
	501	if (object->type == OBJT_DEVICE \|\| object->type == OBJT_PHYS)
	502	return;
	503
	504	while (object) {
	505	if (pmap_resident_count(vm_map_pmap(map)) <= desired)
	506	return;
	507	if (object->paging_in_progress)
	508	return;
	509
	510	remove_mode = map_remove_only;
	511	if (object->shadow_count > 1)
	512	remove_mode = 1;
	513
	514	/*
	515	* scan the objects entire memory queue. spl protection is
	516	* required to avoid an interrupt unbusy/free race against
	517	* our busy check.
	518	*/
	519	crit_enter();
	520	info.limit = remove_mode;
	521	info.map = map;
	522	info.desired = desired;
	523	vm_page_rb_tree_RB_SCAN(&object->rb_memq, NULL,
	524	vm_pageout_object_deactivate_pages_callback,
	525	&info
	526	);
	527	crit_exit();
	528	object = object->backing_object;
	529	}
	530	}
	531
	532	static int
	533	vm_pageout_object_deactivate_pages_callback(vm_page_t p, void *data)
	534	{
	535	struct rb_vm_page_scan_info *info = data;
	536	int actcount;
	537
	538	if (pmap_resident_count(vm_map_pmap(info->map)) <= info->desired) {
	539	return(-1);
	540	}
	541	mycpu->gd_cnt.v_pdpages++;
	542	if (p->wire_count != 0 \|\| p->hold_count != 0 \|\| p->busy != 0 \|\|
	543	(p->flags & (PG_BUSY\|PG_UNMANAGED)) \|\|
	544	!pmap_page_exists_quick(vm_map_pmap(info->map), p)) {
	545	return(0);
	546	}
	547
	548	actcount = pmap_ts_referenced(p);
	549	if (actcount) {
	550	vm_page_flag_set(p, PG_REFERENCED);
	551	} else if (p->flags & PG_REFERENCED) {
	552	actcount = 1;
	553	}
	554
	555	if ((p->queue != PQ_ACTIVE) &&
	556	(p->flags & PG_REFERENCED)) {
	557	vm_page_activate(p);
	558	p->act_count += actcount;
	559	vm_page_flag_clear(p, PG_REFERENCED);
	560	} else if (p->queue == PQ_ACTIVE) {
	561	if ((p->flags & PG_REFERENCED) == 0) {
	562	p->act_count -= min(p->act_count, ACT_DECLINE);
	563	if (!info->limit && (vm_pageout_algorithm \|\| (p->act_count == 0))) {
	564	vm_page_busy(p);
	565	vm_page_protect(p, VM_PROT_NONE);
	566	vm_page_wakeup(p);
	567	vm_page_deactivate(p);
	568	} else {
	569	TAILQ_REMOVE(&vm_page_queues[PQ_ACTIVE].pl, p, pageq);
	570	TAILQ_INSERT_TAIL(&vm_page_queues[PQ_ACTIVE].pl, p, pageq);
	571	}
	572	} else {
	573	vm_page_activate(p);
	574	vm_page_flag_clear(p, PG_REFERENCED);
	575	if (p->act_count < (ACT_MAX - ACT_ADVANCE))
	576	p->act_count += ACT_ADVANCE;
	577	TAILQ_REMOVE(&vm_page_queues[PQ_ACTIVE].pl, p, pageq);
	578	TAILQ_INSERT_TAIL(&vm_page_queues[PQ_ACTIVE].pl, p, pageq);
	579	}
	580	} else if (p->queue == PQ_INACTIVE) {
	581	vm_page_busy(p);
	582	vm_page_protect(p, VM_PROT_NONE);
	583	vm_page_wakeup(p);
	584	}
	585	return(0);
	586	}
	587
	588	/*
	589	* deactivate some number of pages in a map, try to do it fairly, but
	590	* that is really hard to do.
	591	*/
	592	static void
	593	vm_pageout_map_deactivate_pages(vm_map_t map, vm_pindex_t desired)
	594	{
	595	vm_map_entry_t tmpe;
	596	vm_object_t obj, bigobj;
	597	int nothingwired;
	598
	599	if (lockmgr(&map->lock, LK_EXCLUSIVE \| LK_NOWAIT)) {
	600	return;
	601	}
	602
	603	bigobj = NULL;
	604	nothingwired = TRUE;
	605
	606	/*
	607	* first, search out the biggest object, and try to free pages from
	608	* that.
	609	*/
	610	tmpe = map->header.next;
	611	while (tmpe != &map->header) {
	612	switch(tmpe->maptype) {
	613	case VM_MAPTYPE_NORMAL:
	614	case VM_MAPTYPE_VPAGETABLE:
	615	obj = tmpe->object.vm_object;
	616	if ((obj != NULL) && (obj->shadow_count <= 1) &&
	617	((bigobj == NULL) \|\|
	618	(bigobj->resident_page_count < obj->resident_page_count))) {
	619	bigobj = obj;
	620	}
	621	break;
	622	default:
	623	break;
	624	}
	625	if (tmpe->wired_count > 0)
	626	nothingwired = FALSE;
	627	tmpe = tmpe->next;
	628	}
	629
	630	if (bigobj)
	631	vm_pageout_object_deactivate_pages(map, bigobj, desired, 0);
	632
	633	/*
	634	* Next, hunt around for other pages to deactivate. We actually
	635	* do this search sort of wrong -- .text first is not the best idea.
	636	*/
	637	tmpe = map->header.next;
	638	while (tmpe != &map->header) {
	639	if (pmap_resident_count(vm_map_pmap(map)) <= desired)
	640	break;
	641	switch(tmpe->maptype) {
	642	case VM_MAPTYPE_NORMAL:
	643	case VM_MAPTYPE_VPAGETABLE:
	644	obj = tmpe->object.vm_object;
	645	if (obj)
	646	vm_pageout_object_deactivate_pages(map, obj, desired, 0);
	647	break;
	648	default:
	649	break;
	650	}
	651	tmpe = tmpe->next;
	652	};
	653
	654	/*
	655	* Remove all mappings if a process is swapped out, this will free page
	656	* table pages.
	657	*/
	658	if (desired == 0 && nothingwired)
	659	pmap_remove(vm_map_pmap(map),
	660	VM_MIN_USER_ADDRESS, VM_MAX_USER_ADDRESS);
	661	vm_map_unlock(map);
	662	}
	663	#endif
	664
	665	/*
	666	* Don't try to be fancy - being fancy can lead to vnode deadlocks. We
	667	* only do it for OBJT_DEFAULT and OBJT_SWAP objects which we know can
	668	* be trivially freed.
	669	*/
	670	void
	671	vm_pageout_page_free(vm_page_t m)
	672	{
	673	vm_object_t object = m->object;
	674	int type = object->type;
	675
	676	if (type == OBJT_SWAP \|\| type == OBJT_DEFAULT)
	677	vm_object_reference(object);
	678	vm_page_busy(m);
	679	vm_page_protect(m, VM_PROT_NONE);
	680	vm_page_free(m);
	681	if (type == OBJT_SWAP \|\| type == OBJT_DEFAULT)
	682	vm_object_deallocate(object);
	683	}
	684
	685	/*
	686	* vm_pageout_scan does the dirty work for the pageout daemon.
	687	*/
	688
	689	struct vm_pageout_scan_info {
	690	struct proc *bigproc;
	691	vm_offset_t bigsize;
	692	};
	693
	694	static int vm_pageout_scan_callback(struct proc p, void data);
	695
	696	static void
	697	vm_pageout_scan(int pass)
	698	{
	699	struct vm_pageout_scan_info info;
	700	vm_page_t m, next;
	701	struct vm_page marker;
	702	int page_shortage, maxscan, pcount;
	703	int addl_page_shortage, addl_page_shortage_init;
	704	vm_object_t object;
	705	int actcount;
	706	int vnodes_skipped = 0;
	707	int pages_freed = 0;
	708	int maxlaunder;
	709
	710	/*
	711	* Do whatever cleanup that the pmap code can.
	712	*/
	713	pmap_collect();
	714
	715	addl_page_shortage_init = vm_pageout_deficit;
	716	vm_pageout_deficit = 0;
	717
	718	/*
	719	* Calculate the number of pages we want to either free or move
	720	* to the cache.
	721	*/
	722	page_shortage = vm_paging_target() + addl_page_shortage_init;
	723
	724	/*
	725	* Initialize our marker
	726	*/
	727	bzero(&marker, sizeof(marker));
	728	marker.flags = PG_BUSY \| PG_FICTITIOUS \| PG_MARKER;
	729	marker.queue = PQ_INACTIVE;
	730	marker.wire_count = 1;
	731
	732	/*
	733	* Start scanning the inactive queue for pages we can move to the
	734	* cache or free. The scan will stop when the target is reached or
	735	* we have scanned the entire inactive queue. Note that m->act_count
	736	* is not used to form decisions for the inactive queue, only for the
	737	* active queue.
	738	*
	739	* maxlaunder limits the number of dirty pages we flush per scan.
	740	* For most systems a smaller value (16 or 32) is more robust under
	741	* extreme memory and disk pressure because any unnecessary writes
	742	* to disk can result in extreme performance degredation. However,
	743	* systems with excessive dirty pages (especially when MAP_NOSYNC is
	744	* used) will die horribly with limited laundering. If the pageout
	745	* daemon cannot clean enough pages in the first pass, we let it go
	746	* all out in succeeding passes.
	747	*/
	748	if ((maxlaunder = vm_max_launder) <= 1)
	749	maxlaunder = 1;
	750	if (pass)
	751	maxlaunder = 10000;
	752
	753	/*
	754	* We will generally be in a critical section throughout the
	755	* scan, but we can release it temporarily when we are sitting on a
	756	* non-busy page without fear. this is required to prevent an
	757	* interrupt from unbusying or freeing a page prior to our busy
	758	* check, leaving us on the wrong queue or checking the wrong
	759	* page.
	760	*/
	761	crit_enter();
	762	rescan0:
	763	addl_page_shortage = addl_page_shortage_init;
	764	maxscan = vmstats.v_inactive_count;
	765	for (m = TAILQ_FIRST(&vm_page_queues[PQ_INACTIVE].pl);
	766	m != NULL && maxscan-- > 0 && page_shortage > 0;
	767	m = next
	768	) {
	769	mycpu->gd_cnt.v_pdpages++;
	770
	771	/*
	772	* Give interrupts a chance
	773	*/
	774	crit_exit();
	775	crit_enter();
	776
	777	/*
	778	* It's easier for some of the conditions below to just loop
	779	* and catch queue changes here rather then check everywhere
	780	* else.
	781	*/
	782	if (m->queue != PQ_INACTIVE)
	783	goto rescan0;
	784	next = TAILQ_NEXT(m, pageq);
	785
	786	/*
	787	* skip marker pages
	788	*/
	789	if (m->flags & PG_MARKER)
	790	continue;
	791
	792	/*
	793	* A held page may be undergoing I/O, so skip it.
	794	*/
	795	if (m->hold_count) {
	796	TAILQ_REMOVE(&vm_page_queues[PQ_INACTIVE].pl, m, pageq);
	797	TAILQ_INSERT_TAIL(&vm_page_queues[PQ_INACTIVE].pl, m, pageq);
	798	addl_page_shortage++;
	799	continue;
	800	}
	801
	802	/*
	803	* Dont mess with busy pages, keep in the front of the
	804	* queue, most likely are being paged out.
	805	*/
	806	if (m->busy \|\| (m->flags & PG_BUSY)) {
	807	addl_page_shortage++;
	808	continue;
	809	}
	810
	811	if (m->object->ref_count == 0) {
	812	/*
	813	* If the object is not being used, we ignore previous
	814	* references.
	815	*/
	816	vm_page_flag_clear(m, PG_REFERENCED);
	817	pmap_clear_reference(m);
	818
	819	} else if (((m->flags & PG_REFERENCED) == 0) &&
	820	(actcount = pmap_ts_referenced(m))) {
	821	/*
	822	* Otherwise, if the page has been referenced while
	823	* in the inactive queue, we bump the "activation
	824	* count" upwards, making it less likely that the
	825	* page will be added back to the inactive queue
	826	* prematurely again. Here we check the page tables
	827	* (or emulated bits, if any), given the upper level
	828	* VM system not knowing anything about existing
	829	* references.
	830	*/
	831	vm_page_activate(m);
	832	m->act_count += (actcount + ACT_ADVANCE);
	833	continue;
	834	}
	835
	836	/*
	837	* If the upper level VM system knows about any page
	838	* references, we activate the page. We also set the
	839	* "activation count" higher than normal so that we will less
	840	* likely place pages back onto the inactive queue again.
	841	*/
	842	if ((m->flags & PG_REFERENCED) != 0) {
	843	vm_page_flag_clear(m, PG_REFERENCED);
	844	actcount = pmap_ts_referenced(m);
	845	vm_page_activate(m);
	846	m->act_count += (actcount + ACT_ADVANCE + 1);
	847	continue;
	848	}
	849
	850	/*
	851	* If the upper level VM system doesn't know anything about
	852	* the page being dirty, we have to check for it again. As
	853	* far as the VM code knows, any partially dirty pages are
	854	* fully dirty.
	855	*
	856	* Pages marked PG_WRITEABLE may be mapped into the user
	857	* address space of a process running on another cpu. A
	858	* user process (without holding the MP lock) running on
	859	* another cpu may be able to touch the page while we are
	860	* trying to remove it. vm_page_cache() will handle this
	861	* case for us.
	862	*/
	863	if (m->dirty == 0) {
	864	vm_page_test_dirty(m);
	865	} else {
	866	vm_page_dirty(m);
	867	}
	868
	869	if (m->valid == 0) {
	870	/*
	871	* Invalid pages can be easily freed
	872	*/
	873	vm_pageout_page_free(m);
	874	mycpu->gd_cnt.v_dfree++;
	875	--page_shortage;
	876	++pages_freed;
	877	} else if (m->dirty == 0) {
	878	/*
	879	* Clean pages can be placed onto the cache queue.
	880	* This effectively frees them.
	881	*/
	882	vm_page_cache(m);
	883	--page_shortage;
	884	++pages_freed;
	885	} else if ((m->flags & PG_WINATCFLS) == 0 && pass == 0) {
	886	/*
	887	* Dirty pages need to be paged out, but flushing
	888	* a page is extremely expensive verses freeing
	889	* a clean page. Rather then artificially limiting
	890	* the number of pages we can flush, we instead give
	891	* dirty pages extra priority on the inactive queue
	892	* by forcing them to be cycled through the queue
	893	* twice before being flushed, after which the
	894	* (now clean) page will cycle through once more
	895	* before being freed. This significantly extends
	896	* the thrash point for a heavily loaded machine.
	897	*/
	898	vm_page_flag_set(m, PG_WINATCFLS);
	899	TAILQ_REMOVE(&vm_page_queues[PQ_INACTIVE].pl, m, pageq);
	900	TAILQ_INSERT_TAIL(&vm_page_queues[PQ_INACTIVE].pl, m, pageq);
	901	} else if (maxlaunder > 0) {
	902	/*
	903	* We always want to try to flush some dirty pages if
	904	* we encounter them, to keep the system stable.
	905	* Normally this number is small, but under extreme
	906	* pressure where there are insufficient clean pages
	907	* on the inactive queue, we may have to go all out.
	908	*/
	909	int swap_pageouts_ok;
	910	struct vnode *vp = NULL;
	911
	912	object = m->object;
	913
	914	if ((object->type != OBJT_SWAP) && (object->type != OBJT_DEFAULT)) {
	915	swap_pageouts_ok = 1;
	916	} else {
	917	swap_pageouts_ok = !(defer_swap_pageouts \|\| disable_swap_pageouts);
	918	swap_pageouts_ok \|= (!disable_swap_pageouts && defer_swap_pageouts &&
	919	vm_page_count_min());
	920
	921	}
	922
	923	/*
	924	* We don't bother paging objects that are "dead".
	925	* Those objects are in a "rundown" state.
	926	*/
	927	if (!swap_pageouts_ok \|\| (object->flags & OBJ_DEAD)) {
	928	TAILQ_REMOVE(&vm_page_queues[PQ_INACTIVE].pl, m, pageq);
	929	TAILQ_INSERT_TAIL(&vm_page_queues[PQ_INACTIVE].pl, m, pageq);
	930	continue;
	931	}
	932
	933	/*
	934	* The object is already known NOT to be dead. It
	935	* is possible for the vget() to block the whole
	936	* pageout daemon, but the new low-memory handling
	937	* code should prevent it.
	938	*
	939	* The previous code skipped locked vnodes and, worse,
	940	* reordered pages in the queue. This results in
	941	* completely non-deterministic operation because,
	942	* quite often, a vm_fault has initiated an I/O and
	943	* is holding a locked vnode at just the point where
	944	* the pageout daemon is woken up.
	945	*
	946	* We can't wait forever for the vnode lock, we might
	947	* deadlock due to a vn_read() getting stuck in
	948	* vm_wait while holding this vnode. We skip the
	949	* vnode if we can't get it in a reasonable amount
	950	* of time.
	951	*/
	952
	953	if (object->type == OBJT_VNODE) {
	954	vp = object->handle;
	955
	956	if (vget(vp, LK_EXCLUSIVE\|LK_NOOBJ\|LK_TIMELOCK)) {
	957	++pageout_lock_miss;
	958	if (object->flags & OBJ_MIGHTBEDIRTY)
	959	vnodes_skipped++;
	960	continue;
	961	}
	962
	963	/*
	964	* The page might have been moved to another
	965	* queue during potential blocking in vget()
	966	* above. The page might have been freed and
	967	* reused for another vnode. The object might
	968	* have been reused for another vnode.
	969	*/
	970	if (m->queue != PQ_INACTIVE \|\|
	971	m->object != object \|\|
	972	object->handle != vp) {
	973	if (object->flags & OBJ_MIGHTBEDIRTY)
	974	vnodes_skipped++;
	975	vput(vp);
	976	continue;
	977	}
	978
	979	/*
	980	* The page may have been busied during the
	981	* blocking in vput(); We don't move the
	982	* page back onto the end of the queue so that
	983	* statistics are more correct if we don't.
	984	*/
	985	if (m->busy \|\| (m->flags & PG_BUSY)) {
	986	vput(vp);
	987	continue;
	988	}
	989
	990	/*
	991	* If the page has become held it might
	992	* be undergoing I/O, so skip it
	993	*/
	994	if (m->hold_count) {
	995	TAILQ_REMOVE(&vm_page_queues[PQ_INACTIVE].pl, m, pageq);
	996	TAILQ_INSERT_TAIL(&vm_page_queues[PQ_INACTIVE].pl, m, pageq);
	997	if (object->flags & OBJ_MIGHTBEDIRTY)
	998	vnodes_skipped++;
	999	vput(vp);
	1000	continue;
	1001	}
	1002	}
	1003
	1004	/*
	1005	* If a page is dirty, then it is either being washed
	1006	* (but not yet cleaned) or it is still in the
	1007	* laundry. If it is still in the laundry, then we
	1008	* start the cleaning operation.
	1009	*
	1010	* This operation may cluster, invalidating the 'next'
	1011	* pointer. To prevent an inordinate number of
	1012	* restarts we use our marker to remember our place.
	1013	*
	1014	* decrement page_shortage on success to account for
	1015	* the (future) cleaned page. Otherwise we could wind
	1016	* up laundering or cleaning too many pages.
	1017	*/
	1018	TAILQ_INSERT_AFTER(&vm_page_queues[PQ_INACTIVE].pl, m, &marker, pageq);
	1019	if (vm_pageout_clean(m) != 0) {
	1020	--page_shortage;
	1021	--maxlaunder;
	1022	} else {
	1023	addl_page_shortage++;
	1024	}
	1025	next = TAILQ_NEXT(&marker, pageq);
	1026	TAILQ_REMOVE(&vm_page_queues[PQ_INACTIVE].pl, &marker, pageq);
	1027	if (vp != NULL)
	1028	vput(vp);
	1029	}
	1030	}
	1031
	1032	/*
	1033	* Compute the number of pages we want to try to move from the
	1034	* active queue to the inactive queue.
	1035	*/
	1036	page_shortage = vm_paging_target() +
	1037	vmstats.v_inactive_target - vmstats.v_inactive_count;
	1038	page_shortage += addl_page_shortage;
	1039
	1040	/*
	1041	* If the system is running out of swap or has none a large backlog
	1042	* can accumulate in the inactive list. Continue moving pages to
	1043	* the inactive list even though its 'target' has been met due to
	1044	* being unable to drain. We can then use a low active count to
	1045	* measure stress and out-of-memory conditions.
	1046	*/
	1047	if (page_shortage < addl_page_shortage)
	1048	page_shortage = addl_page_shortage;
	1049
	1050	/*
	1051	* Scan the active queue for things we can deactivate. We nominally
	1052	* track the per-page activity counter and use it to locate
	1053	* deactivation candidates.
	1054	*
	1055	* NOTE: we are still in a critical section.
	1056	*/
	1057	pcount = vmstats.v_active_count;
	1058	m = TAILQ_FIRST(&vm_page_queues[PQ_ACTIVE].pl);
	1059
	1060	while ((m != NULL) && (pcount-- > 0) && (page_shortage > 0)) {
	1061	/*
	1062	* Give interrupts a chance.
	1063	*/
	1064	crit_exit();
	1065	crit_enter();
	1066
	1067	/*
	1068	* If the page was ripped out from under us, just stop.
	1069	*/
	1070	if (m->queue != PQ_ACTIVE)
	1071	break;
	1072	next = TAILQ_NEXT(m, pageq);
	1073
	1074	/*
	1075	* Don't deactivate pages that are busy.
	1076	*/
	1077	if ((m->busy != 0) \|\|
	1078	(m->flags & PG_BUSY) \|\|
	1079	(m->hold_count != 0)) {
	1080	TAILQ_REMOVE(&vm_page_queues[PQ_ACTIVE].pl, m, pageq);
	1081	TAILQ_INSERT_TAIL(&vm_page_queues[PQ_ACTIVE].pl, m, pageq);
	1082	m = next;
	1083	continue;
	1084	}
	1085
	1086	/*
	1087	* The count for pagedaemon pages is done after checking the
	1088	* page for eligibility...
	1089	*/
	1090	mycpu->gd_cnt.v_pdpages++;
	1091
	1092	/*
	1093	* Check to see "how much" the page has been used.
	1094	*/
	1095	actcount = 0;
	1096	if (m->object->ref_count != 0) {
	1097	if (m->flags & PG_REFERENCED) {
	1098	actcount += 1;
	1099	}
	1100	actcount += pmap_ts_referenced(m);
	1101	if (actcount) {
	1102	m->act_count += ACT_ADVANCE + actcount;
	1103	if (m->act_count > ACT_MAX)
	1104	m->act_count = ACT_MAX;
	1105	}
	1106	}
	1107
	1108	/*
	1109	* Since we have "tested" this bit, we need to clear it now.
	1110	*/
	1111	vm_page_flag_clear(m, PG_REFERENCED);
	1112
	1113	/*
	1114	* Only if an object is currently being used, do we use the
	1115	* page activation count stats.
	1116	*/
	1117	if (actcount && (m->object->ref_count != 0)) {
	1118	TAILQ_REMOVE(&vm_page_queues[PQ_ACTIVE].pl, m, pageq);
	1119	TAILQ_INSERT_TAIL(&vm_page_queues[PQ_ACTIVE].pl, m, pageq);
	1120	} else {
	1121	m->act_count -= min(m->act_count, ACT_DECLINE);
	1122	if (vm_pageout_algorithm \|\|
	1123	m->object->ref_count == 0 \|\|
	1124	m->act_count < pass) {
	1125	page_shortage--;
	1126	if (m->object->ref_count == 0) {
	1127	vm_page_busy(m);
	1128	vm_page_protect(m, VM_PROT_NONE);
	1129	vm_page_wakeup(m);
	1130	if (m->dirty == 0) {
	1131	++pages_freed;
	1132	vm_page_cache(m);
	1133	} else {
	1134	vm_page_deactivate(m);
	1135	}
	1136	} else {
	1137	vm_page_deactivate(m);
	1138	}
	1139	} else {
	1140	TAILQ_REMOVE(&vm_page_queues[PQ_ACTIVE].pl, m, pageq);
	1141	TAILQ_INSERT_TAIL(&vm_page_queues[PQ_ACTIVE].pl, m, pageq);
	1142	}
	1143	}
	1144	m = next;
	1145	}
	1146
	1147	/*
	1148	* We try to maintain some really free pages, this allows interrupt
	1149	* code to be guaranteed space. Since both cache and free queues
	1150	* are considered basically 'free', moving pages from cache to free
	1151	* does not effect other calculations.
	1152	*
	1153	* NOTE: we are still in a critical section.
	1154	*
	1155	* Pages moved from PQ_CACHE to totally free are not counted in the
	1156	* pages_freed counter.
	1157	*/
	1158
	1159	while (vmstats.v_free_count < vmstats.v_free_reserved) {
	1160	static int cache_rover = 0;
	1161	m = vm_page_list_find(PQ_CACHE, cache_rover, FALSE);
	1162	if (!m)
	1163	break;
	1164	if ((m->flags & (PG_BUSY\|PG_UNMANAGED)) \|\|
	1165	m->busy \|\|
	1166	m->hold_count \|\|
	1167	m->wire_count) {
	1168	#ifdef INVARIANTS
	1169	kprintf("Warning: busy page %p found in cache\n", m);
	1170	#endif
	1171	vm_page_deactivate(m);
	1172	continue;
	1173	}
	1174	KKASSERT((m->flags & PG_MAPPED) == 0);
	1175	KKASSERT(m->dirty == 0);
	1176	cache_rover = (cache_rover + PQ_PRIME2) & PQ_L2_MASK;
	1177	vm_pageout_page_free(m);
	1178	mycpu->gd_cnt.v_dfree++;
	1179	}
	1180
	1181	crit_exit();
	1182
	1183	#if !defined(NO_SWAPPING)
	1184	/*
	1185	* Idle process swapout -- run once per second.
	1186	*/
	1187	if (vm_swap_idle_enabled) {
	1188	static long lsec;
	1189	if (time_second != lsec) {
	1190	vm_pageout_req_swapout \|= VM_SWAP_IDLE;
	1191	vm_req_vmdaemon();
	1192	lsec = time_second;
	1193	}
	1194	}
	1195	#endif
	1196
	1197	/*
	1198	* If we didn't get enough free pages, and we have skipped a vnode
	1199	* in a writeable object, wakeup the sync daemon. And kick swapout
	1200	* if we did not get enough free pages.
	1201	*/
	1202	if (vm_paging_target() > 0) {
	1203	if (vnodes_skipped && vm_page_count_min())
	1204	speedup_syncer();
	1205	#if !defined(NO_SWAPPING)
	1206	if (vm_swap_enabled && vm_page_count_target()) {
	1207	vm_req_vmdaemon();
	1208	vm_pageout_req_swapout \|= VM_SWAP_NORMAL;
	1209	}
	1210	#endif
	1211	}
	1212
	1213	/*
	1214	* If we are out of swap space (or have no swap) then we
	1215	* can detect when the system has completely run out of
	1216	* memory by observing several variables.
	1217	*
	1218	* - swap_pager_full is set if insufficient swap was
	1219	* available to satisfy a requested pageout.
	1220	*
	1221	* - vm_page_count_min() means we could not recover
	1222	* enough pages to meet bare minimum needs.
	1223	*
	1224	* - vm_active_count
	1225	*
	1226	*and we were
	1227	* not able to reach our minimum free page count target,
	1228	* then we can detect whether we have run out of memory
	1229	* by observing the active count. A memory starved
	1230	* system will reduce the active count
	1231	*
	1232	* If under these circumstances our paging target exceeds
	1233	* 1/2 the number of active pages we have a very serious
	1234	* problem that the deactivation of pages failed to solve
	1235	* and must start killing things.
	1236	*/
	1237	if (swap_pager_full && vm_page_count_min())
	1238	kprintf("Warning: system low on memory+swap!\n");
	1239	if (swap_pager_full && vm_page_count_min() &&
	1240	vm_paging_target() > vmstats.v_active_count / 4) {
	1241	info.bigproc = NULL;
	1242	info.bigsize = 0;
	1243	allproc_scan(vm_pageout_scan_callback, &info);
	1244	if (info.bigproc != NULL) {
	1245	killproc(info.bigproc, "out of swap space");
	1246	info.bigproc->p_nice = PRIO_MIN;
	1247	info.bigproc->p_usched->resetpriority(
	1248	FIRST_LWP_IN_PROC(info.bigproc));
	1249	wakeup(&vmstats.v_free_count);
	1250	PRELE(info.bigproc);
	1251	}
	1252	}
	1253	}
	1254
	1255	static int
	1256	vm_pageout_scan_callback(struct proc p, void data)
	1257	{
	1258	struct vm_pageout_scan_info *info = data;
	1259	vm_offset_t size;
	1260
	1261	/*
	1262	* if this is a system process, skip it
	1263	*/
	1264	if ((p->p_flag & P_SYSTEM) \|\| (p->p_pid == 1) \|\|
	1265	((p->p_pid < 48) && (vm_swap_size != 0))) {
	1266	return (0);
	1267	}
	1268
	1269	/*
	1270	* if the process is in a non-running type state,
	1271	* don't touch it.
	1272	*/
	1273	if (p->p_stat != SACTIVE && p->p_stat != SSTOP) {
	1274	return (0);
	1275	}
	1276
	1277	/*
	1278	* get the process size
	1279	*/
	1280	size = vmspace_resident_count(p->p_vmspace) +
	1281	vmspace_swap_count(p->p_vmspace);
	1282
	1283	/*
	1284	* If the this process is bigger than the biggest one
	1285	* remember it.
	1286	*/
	1287	if (size > info->bigsize) {
	1288	if (info->bigproc)
	1289	PRELE(info->bigproc);
	1290	PHOLD(p);
	1291	info->bigproc = p;
	1292	info->bigsize = size;
	1293	}
	1294	return(0);
	1295	}
	1296
	1297	/*
	1298	* This routine tries to maintain the pseudo LRU active queue,
	1299	* so that during long periods of time where there is no paging,
	1300	* that some statistic accumulation still occurs. This code
	1301	* helps the situation where paging just starts to occur.
	1302	*/
	1303	static void
	1304	vm_pageout_page_stats(void)
	1305	{
	1306	vm_page_t m,next;
	1307	int pcount,tpcount; /* Number of pages to check */
	1308	static int fullintervalcount = 0;
	1309	int page_shortage;
	1310
	1311	page_shortage =
	1312	(vmstats.v_inactive_target + vmstats.v_cache_max + vmstats.v_free_min) -
	1313	(vmstats.v_free_count + vmstats.v_inactive_count + vmstats.v_cache_count);
	1314
	1315	if (page_shortage <= 0)
	1316	return;
	1317
	1318	crit_enter();
	1319
	1320	pcount = vmstats.v_active_count;
	1321	fullintervalcount += vm_pageout_stats_interval;
	1322	if (fullintervalcount < vm_pageout_full_stats_interval) {
	1323	tpcount = (vm_pageout_stats_max * vmstats.v_active_count) / vmstats.v_page_count;
	1324	if (pcount > tpcount)
	1325	pcount = tpcount;
	1326	} else {
	1327	fullintervalcount = 0;
	1328	}
	1329
	1330	m = TAILQ_FIRST(&vm_page_queues[PQ_ACTIVE].pl);
	1331	while ((m != NULL) && (pcount-- > 0)) {
	1332	int actcount;
	1333
	1334	if (m->queue != PQ_ACTIVE) {
	1335	break;
	1336	}
	1337
	1338	next = TAILQ_NEXT(m, pageq);
	1339	/*
	1340	* Don't deactivate pages that are busy.
	1341	*/
	1342	if ((m->busy != 0) \|\|
	1343	(m->flags & PG_BUSY) \|\|
	1344	(m->hold_count != 0)) {
	1345	TAILQ_REMOVE(&vm_page_queues[PQ_ACTIVE].pl, m, pageq);
	1346	TAILQ_INSERT_TAIL(&vm_page_queues[PQ_ACTIVE].pl, m, pageq);
	1347	m = next;
	1348	continue;
	1349	}
	1350
	1351	actcount = 0;
	1352	if (m->flags & PG_REFERENCED) {
	1353	vm_page_flag_clear(m, PG_REFERENCED);
	1354	actcount += 1;
	1355	}
	1356
	1357	actcount += pmap_ts_referenced(m);
	1358	if (actcount) {
	1359	m->act_count += ACT_ADVANCE + actcount;
	1360	if (m->act_count > ACT_MAX)
	1361	m->act_count = ACT_MAX;
	1362	TAILQ_REMOVE(&vm_page_queues[PQ_ACTIVE].pl, m, pageq);
	1363	TAILQ_INSERT_TAIL(&vm_page_queues[PQ_ACTIVE].pl, m, pageq);
	1364	} else {
	1365	if (m->act_count == 0) {
	1366	/*
	1367	* We turn off page access, so that we have
	1368	* more accurate RSS stats. We don't do this
	1369	* in the normal page deactivation when the
	1370	* system is loaded VM wise, because the
	1371	* cost of the large number of page protect
	1372	* operations would be higher than the value
	1373	* of doing the operation.
	1374	*/
	1375	vm_page_busy(m);
	1376	vm_page_protect(m, VM_PROT_NONE);
	1377	vm_page_wakeup(m);
	1378	vm_page_deactivate(m);
	1379	} else {
	1380	m->act_count -= min(m->act_count, ACT_DECLINE);
	1381	TAILQ_REMOVE(&vm_page_queues[PQ_ACTIVE].pl, m, pageq);
	1382	TAILQ_INSERT_TAIL(&vm_page_queues[PQ_ACTIVE].pl, m, pageq);
	1383	}
	1384	}
	1385
	1386	m = next;
	1387	}
	1388	crit_exit();
	1389	}
	1390
	1391	static int
	1392	vm_pageout_free_page_calc(vm_size_t count)
	1393	{
	1394	if (count < vmstats.v_page_count)
	1395	return 0;
	1396	/*
	1397	* free_reserved needs to include enough for the largest swap pager
	1398	* structures plus enough for any pv_entry structs when paging.
	1399	*/
	1400	if (vmstats.v_page_count > 1024)
	1401	vmstats.v_free_min = 4 + (vmstats.v_page_count - 1024) / 200;
	1402	else
	1403	vmstats.v_free_min = 4;
	1404	vmstats.v_pageout_free_min = (2*MAXBSIZE)/PAGE_SIZE +
	1405	vmstats.v_interrupt_free_min;
	1406	vmstats.v_free_reserved = vm_pageout_page_count +
	1407	vmstats.v_pageout_free_min + (count / 768) + PQ_L2_SIZE;
	1408	vmstats.v_free_severe = vmstats.v_free_min / 2;
	1409	vmstats.v_free_min += vmstats.v_free_reserved;
	1410	vmstats.v_free_severe += vmstats.v_free_reserved;
	1411	return 1;
	1412	}
	1413
	1414
	1415	/*
	1416	* vm_pageout is the high level pageout daemon.
	1417	*/
	1418	static void
	1419	vm_pageout(void)
	1420	{
	1421	int pass;
	1422
	1423	/*
	1424	* Initialize some paging parameters.
	1425	*/
	1426	curthread->td_flags \|= TDF_SYSTHREAD;
	1427
	1428	vmstats.v_interrupt_free_min = 2;
	1429	if (vmstats.v_page_count < 2000)
	1430	vm_pageout_page_count = 8;
	1431
	1432	vm_pageout_free_page_calc(vmstats.v_page_count);
	1433	/*
	1434	* v_free_target and v_cache_min control pageout hysteresis. Note
	1435	* that these are more a measure of the VM cache queue hysteresis
	1436	* then the VM free queue. Specifically, v_free_target is the
	1437	* high water mark (free+cache pages).
	1438	*
	1439	* v_free_reserved + v_cache_min (mostly means v_cache_min) is the
	1440	* low water mark, while v_free_min is the stop. v_cache_min must
	1441	* be big enough to handle memory needs while the pageout daemon
	1442	* is signalled and run to free more pages.
	1443	*/
	1444	if (vmstats.v_free_count > 6144)
	1445	vmstats.v_free_target = 4 * vmstats.v_free_min + vmstats.v_free_reserved;
	1446	else
	1447	vmstats.v_free_target = 2 * vmstats.v_free_min + vmstats.v_free_reserved;
	1448
	1449	if (vmstats.v_free_count > 2048) {
	1450	vmstats.v_cache_min = vmstats.v_free_target;
	1451	vmstats.v_cache_max = 2 * vmstats.v_cache_min;
	1452	vmstats.v_inactive_target = (3 * vmstats.v_free_target) / 2;
	1453	} else {
	1454	vmstats.v_cache_min = 0;
	1455	vmstats.v_cache_max = 0;
	1456	vmstats.v_inactive_target = vmstats.v_free_count / 4;
	1457	}
	1458	if (vmstats.v_inactive_target > vmstats.v_free_count / 3)
	1459	vmstats.v_inactive_target = vmstats.v_free_count / 3;
	1460
	1461	/* XXX does not really belong here */
	1462	if (vm_page_max_wired == 0)
	1463	vm_page_max_wired = vmstats.v_free_count / 3;
	1464
	1465	if (vm_pageout_stats_max == 0)
	1466	vm_pageout_stats_max = vmstats.v_free_target;
	1467
	1468	/*
	1469	* Set interval in seconds for stats scan.
	1470	*/
	1471	if (vm_pageout_stats_interval == 0)
	1472	vm_pageout_stats_interval = 5;
	1473	if (vm_pageout_full_stats_interval == 0)
	1474	vm_pageout_full_stats_interval = vm_pageout_stats_interval * 4;
	1475
	1476
	1477	/*
	1478	* Set maximum free per pass
	1479	*/
	1480	if (vm_pageout_stats_free_max == 0)
	1481	vm_pageout_stats_free_max = 5;
	1482
	1483	swap_pager_swap_init();
	1484	pass = 0;
	1485	/*
	1486	* The pageout daemon is never done, so loop forever.
	1487	*/
	1488	while (TRUE) {
	1489	int error;
	1490
	1491	/*
	1492	* If we have enough free memory, wakeup waiters. Do
	1493	* not clear vm_pages_needed until we reach our target,
	1494	* otherwise we may be woken up over and over again and
	1495	* waste a lot of cpu.
	1496	*/
	1497	crit_enter();
	1498	if (vm_pages_needed && !vm_page_count_min()) {
	1499	if (vm_paging_needed() <= 0)
	1500	vm_pages_needed = 0;
	1501	wakeup(&vmstats.v_free_count);
	1502	}
	1503	if (vm_pages_needed) {
	1504	/*
	1505	* Still not done, take a second pass without waiting
	1506	* (unlimited dirty cleaning), otherwise sleep a bit
	1507	* and try again.
	1508	*/
	1509	++pass;
	1510	if (pass > 1)
	1511	tsleep(&vm_pages_needed, 0, "psleep", hz/2);
	1512	} else {
	1513	/*
	1514	* Good enough, sleep & handle stats. Prime the pass
	1515	* for the next run.
	1516	*/
	1517	if (pass > 1)
	1518	pass = 1;
	1519	else
	1520	pass = 0;
	1521	error = tsleep(&vm_pages_needed,
	1522	0, "psleep", vm_pageout_stats_interval * hz);
	1523	if (error && !vm_pages_needed) {
	1524	crit_exit();
	1525	pass = 0;
	1526	vm_pageout_page_stats();
	1527	continue;
	1528	}
	1529	}
	1530
	1531	if (vm_pages_needed)
	1532	mycpu->gd_cnt.v_pdwakeups++;
	1533	crit_exit();
	1534	vm_pageout_scan(pass);
	1535	vm_pageout_deficit = 0;
	1536	}
	1537	}
	1538
	1539	void
	1540	pagedaemon_wakeup(void)
	1541	{
	1542	if (!vm_pages_needed && curthread != pagethread) {
	1543	vm_pages_needed++;
	1544	wakeup(&vm_pages_needed);
	1545	}
	1546	}
	1547
	1548	#if !defined(NO_SWAPPING)
	1549	static void
	1550	vm_req_vmdaemon(void)
	1551	{
	1552	static int lastrun = 0;
	1553
	1554	if ((ticks > (lastrun + hz)) \|\| (ticks < lastrun)) {
	1555	wakeup(&vm_daemon_needed);
	1556	lastrun = ticks;
	1557	}
	1558	}
	1559
	1560	static int vm_daemon_callback(struct proc p, void data __unused);
	1561
	1562	static void
	1563	vm_daemon(void)
	1564	{
	1565	while (TRUE) {
	1566	tsleep(&vm_daemon_needed, 0, "psleep", 0);
	1567	if (vm_pageout_req_swapout) {
	1568	swapout_procs(vm_pageout_req_swapout);
	1569	vm_pageout_req_swapout = 0;
	1570	}
	1571	/*
	1572	* scan the processes for exceeding their rlimits or if
	1573	* process is swapped out -- deactivate pages
	1574	*/
	1575	allproc_scan(vm_daemon_callback, NULL);
	1576	}
	1577	}
	1578
	1579	static int
	1580	vm_daemon_callback(struct proc p, void data __unused)
	1581	{
	1582	vm_pindex_t limit, size;
	1583
	1584	/*
	1585	* if this is a system process or if we have already
	1586	* looked at this process, skip it.
	1587	*/
	1588	if (p->p_flag & (P_SYSTEM \| P_WEXIT))
	1589	return (0);
	1590
	1591	/*
	1592	* if the process is in a non-running type state,
	1593	* don't touch it.
	1594	*/
	1595	if (p->p_stat != SACTIVE && p->p_stat != SSTOP)
	1596	return (0);
	1597
	1598	/*
	1599	* get a limit
	1600	*/
	1601	limit = OFF_TO_IDX(qmin(p->p_rlimit[RLIMIT_RSS].rlim_cur,
	1602	p->p_rlimit[RLIMIT_RSS].rlim_max));
	1603
	1604	/*
	1605	* let processes that are swapped out really be
	1606	* swapped out. Set the limit to nothing to get as
	1607	* many pages out to swap as possible.
	1608	*/
	1609	if (p->p_flag & P_SWAPPEDOUT)
	1610	limit = 0;
	1611
	1612	size = vmspace_resident_count(p->p_vmspace);
	1613	if (limit >= 0 && size >= limit) {
	1614	vm_pageout_map_deactivate_pages(
	1615	&p->p_vmspace->vm_map, limit);
	1616	}
	1617	return (0);
	1618	}
	1619
	1620	#endif