| Commit | Line | Data |
|---|---|---|
| 984263bc MD |
1 | /* |
| 2 | * Copyright (c) 1991, 1993 | |
| 3 | * The Regents of the University of California. All rights reserved. | |
| 4 | * | |
| 5 | * This code is derived from software contributed to Berkeley by | |
| 6 | * The Mach Operating System project at Carnegie-Mellon University. | |
| 7 | * | |
| 8 | * Redistribution and use in source and binary forms, with or without | |
| 9 | * modification, are permitted provided that the following conditions | |
| 10 | * are met: | |
| 11 | * 1. Redistributions of source code must retain the above copyright | |
| 12 | * notice, this list of conditions and the following disclaimer. | |
| 13 | * 2. Redistributions in binary form must reproduce the above copyright | |
| 14 | * notice, this list of conditions and the following disclaimer in the | |
| 15 | * documentation and/or other materials provided with the distribution. | |
| 16 | * 3. All advertising materials mentioning features or use of this software | |
| 17 | * must display the following acknowledgement: | |
| 18 | * This product includes software developed by the University of | |
| 19 | * California, Berkeley and its contributors. | |
| 20 | * 4. Neither the name of the University nor the names of its contributors | |
| 21 | * may be used to endorse or promote products derived from this software | |
| 22 | * without specific prior written permission. | |
| 23 | * | |
| 24 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
| 25 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
| 26 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
| 27 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
| 28 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
| 29 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
| 30 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
| 31 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
| 32 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
| 33 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
| 34 | * SUCH DAMAGE. | |
| 35 | * | |
| 36 | * from: @(#)vm_page.h 8.2 (Berkeley) 12/13/93 | |
| 37 | * | |
| 38 | * | |
| 39 | * Copyright (c) 1987, 1990 Carnegie-Mellon University. | |
| 40 | * All rights reserved. | |
| 41 | * | |
| 42 | * Authors: Avadis Tevanian, Jr., Michael Wayne Young | |
| 43 | * | |
| 44 | * Permission to use, copy, modify and distribute this software and | |
| 45 | * its documentation is hereby granted, provided that both the copyright | |
| 46 | * notice and this permission notice appear in all copies of the | |
| 47 | * software, derivative works or modified versions, and any portions | |
| 48 | * thereof, and that both notices appear in supporting documentation. | |
| 49 | * | |
| 50 | * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" | |
| 51 | * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND | |
| 52 | * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. | |
| 53 | * | |
| 54 | * Carnegie Mellon requests users of this software to return to | |
| 55 | * | |
| 56 | * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU | |
| 57 | * School of Computer Science | |
| 58 | * Carnegie Mellon University | |
| 59 | * Pittsburgh PA 15213-3890 | |
| 60 | * | |
| 61 | * any improvements or extensions that they make and grant Carnegie the | |
| 62 | * rights to redistribute these changes. | |
| 63 | * | |
| 64 | * $FreeBSD: src/sys/vm/vm_page.h,v 1.75.2.8 2002/03/06 01:07:09 dillon Exp $ | |
| 17cde63e | 65 | * $DragonFly: src/sys/vm/vm_page.h,v 1.28 2008/05/09 07:24:48 dillon Exp $ |
| 984263bc MD |
66 | */ |
| 67 | ||
| 68 | /* | |
| 69 | * Resident memory system definitions. | |
| 70 | */ | |
| 71 | ||
| 1bd40720 MD |
72 | #ifndef _VM_VM_PAGE_H_ |
| 73 | #define _VM_VM_PAGE_H_ | |
| 984263bc | 74 | |
| 3c923499 | 75 | #if !defined(KLD_MODULE) && defined(_KERNEL) |
| 984263bc MD |
76 | #include "opt_vmpage.h" |
| 77 | #endif | |
| 78 | ||
| 1bd40720 MD |
79 | #ifndef _SYS_TYPES_H_ |
| 80 | #include <sys/types.h> | |
| 81 | #endif | |
| 1f804340 MD |
82 | #ifndef _SYS_TREE_H_ |
| 83 | #include <sys/tree.h> | |
| 84 | #endif | |
| 1bd40720 MD |
85 | #ifndef _MACHINE_PMAP_H_ |
| 86 | #include <machine/pmap.h> | |
| 87 | #endif | |
| 88 | #ifndef _VM_PMAP_H_ | |
| 984263bc | 89 | #include <vm/pmap.h> |
| 1bd40720 MD |
90 | #endif |
| 91 | #ifndef _MACHINE_ATOMIC_H_ | |
| 984263bc | 92 | #include <machine/atomic.h> |
| 1bd40720 MD |
93 | #endif |
| 94 | ||
| 668b1228 | 95 | #ifdef _KERNEL |
| 1bd40720 | 96 | |
| 03d6a592 MD |
97 | #ifndef _SYS_SYSTM_H_ |
| 98 | #include <sys/systm.h> | |
| 99 | #endif | |
| 1bd40720 | 100 | #ifndef _SYS_THREAD2_H_ |
| cdd46d2e | 101 | #include <sys/thread2.h> |
| 668b1228 | 102 | #endif |
| 984263bc | 103 | |
| b2b3ffcd | 104 | #ifdef __x86_64__ |
| 973c11b9 MD |
105 | #include <machine/vmparam.h> |
| 106 | #endif | |
| 107 | ||
| 1bd40720 MD |
108 | #endif |
| 109 | ||
| 10192bae MD |
110 | typedef enum vm_page_event { VMEVENT_NONE, VMEVENT_COW } vm_page_event_t; |
| 111 | ||
| 112 | struct vm_page_action { | |
| 113 | LIST_ENTRY(vm_page_action) entry; | |
| 114 | vm_page_event_t event; | |
| 115 | void (*func)(struct vm_page *, | |
| 116 | struct vm_page_action *); | |
| 117 | void *data; | |
| 118 | }; | |
| 119 | ||
| 120 | typedef struct vm_page_action *vm_page_action_t; | |
| 121 | ||
| 984263bc MD |
122 | /* |
| 123 | * Management of resident (logical) pages. | |
| 124 | * | |
| 125 | * A small structure is kept for each resident | |
| 126 | * page, indexed by page number. Each structure | |
| 127 | * is an element of several lists: | |
| 128 | * | |
| 129 | * A hash table bucket used to quickly | |
| 130 | * perform object/offset lookups | |
| 131 | * | |
| 132 | * A list of all pages for a given object, | |
| 133 | * so they can be quickly deactivated at | |
| 134 | * time of deallocation. | |
| 135 | * | |
| 136 | * An ordered list of pages due for pageout. | |
| 137 | * | |
| 138 | * In addition, the structure contains the object | |
| 139 | * and offset to which this page belongs (for pageout), | |
| 140 | * and sundry status bits. | |
| 141 | * | |
| 142 | * Fields in this structure are locked either by the lock on the | |
| 143 | * object that the page belongs to (O) or by the lock on the page | |
| 144 | * queues (P). | |
| 145 | * | |
| 146 | * The 'valid' and 'dirty' fields are distinct. A page may have dirty | |
| 147 | * bits set without having associated valid bits set. This is used by | |
| 148 | * NFS to implement piecemeal writes. | |
| 149 | */ | |
| 150 | ||
| 151 | TAILQ_HEAD(pglist, vm_page); | |
| 152 | ||
| 03d6a592 MD |
153 | struct vm_object; |
| 154 | ||
| 1f804340 MD |
155 | int rb_vm_page_compare(struct vm_page *, struct vm_page *); |
| 156 | ||
| 157 | struct vm_page_rb_tree; | |
| 158 | RB_PROTOTYPE2(vm_page_rb_tree, vm_page, rb_entry, rb_vm_page_compare, vm_pindex_t); | |
| 159 | ||
| 984263bc | 160 | struct vm_page { |
| a441ad78 | 161 | TAILQ_ENTRY(vm_page) pageq; /* vm_page_queues[] list (P) */ |
| 1f804340 | 162 | RB_ENTRY(vm_page) rb_entry; /* Red-Black tree based at object */ |
| 984263bc | 163 | |
| 03d6a592 | 164 | struct vm_object *object; /* which object am I in (O,P)*/ |
| 984263bc | 165 | vm_pindex_t pindex; /* offset into object (O,P) */ |
| 6ef943a3 | 166 | vm_paddr_t phys_addr; /* physical address of page */ |
| 984263bc MD |
167 | struct md_page md; /* machine dependant stuff */ |
| 168 | u_short queue; /* page queue index */ | |
| a441ad78 MD |
169 | u_short flags; /* see below */ |
| 170 | u_short pc; /* page color */ | |
| 984263bc MD |
171 | u_char act_count; /* page usage count */ |
| 172 | u_char busy; /* page busy count */ | |
| 10192bae MD |
173 | u_int wire_count; /* wired down maps refs (P) */ |
| 174 | int hold_count; /* page hold count */ | |
| a441ad78 MD |
175 | |
| 176 | /* | |
| 177 | * NOTE that these must support one bit per DEV_BSIZE in a page!!! | |
| 178 | * so, on normal X86 kernels, they must be at least 8 bits wide. | |
| 179 | */ | |
| 984263bc MD |
180 | #if PAGE_SIZE == 4096 |
| 181 | u_char valid; /* map of valid DEV_BSIZE chunks */ | |
| 182 | u_char dirty; /* map of dirty DEV_BSIZE chunks */ | |
| 183 | #elif PAGE_SIZE == 8192 | |
| 184 | u_short valid; /* map of valid DEV_BSIZE chunks */ | |
| 185 | u_short dirty; /* map of dirty DEV_BSIZE chunks */ | |
| 186 | #endif | |
| 10192bae | 187 | LIST_HEAD(,vm_page_action) action_list; |
| 984263bc MD |
188 | }; |
| 189 | ||
| 03d6a592 MD |
190 | #ifndef __VM_PAGE_T_DEFINED__ |
| 191 | #define __VM_PAGE_T_DEFINED__ | |
| 192 | typedef struct vm_page *vm_page_t; | |
| 193 | #endif | |
| 194 | ||
| 984263bc MD |
195 | /* |
| 196 | * note: currently use SWAPBLK_NONE as an absolute value rather then | |
| 197 | * a flag bit. | |
| 198 | */ | |
| 984263bc MD |
199 | #define SWAPBLK_MASK ((daddr_t)((u_daddr_t)-1 >> 1)) /* mask */ |
| 200 | #define SWAPBLK_NONE ((daddr_t)((u_daddr_t)SWAPBLK_MASK + 1))/* flag */ | |
| 201 | ||
| 984263bc | 202 | /* |
| 74232d8e MD |
203 | * Page coloring parameters. We default to a middle of the road optimization. |
| 204 | * Larger selections would not really hurt us but if a machine does not have | |
| 205 | * a lot of memory it could cause vm_page_alloc() to eat more cpu cycles | |
| 206 | * looking for free pages. | |
| 207 | * | |
| 208 | * Page coloring cannot be disabled. Modules do not have access to most PQ | |
| 209 | * constants because they can change between builds. | |
| 984263bc | 210 | */ |
| 74232d8e | 211 | #if defined(_KERNEL) && !defined(KLD_MODULE) |
| 984263bc | 212 | |
| 984263bc | 213 | #if !defined(PQ_CACHESIZE) |
| 74232d8e | 214 | #define PQ_CACHESIZE 256 /* max is 1024 (MB) */ |
| 984263bc MD |
215 | #endif |
| 216 | ||
| 217 | #if PQ_CACHESIZE >= 1024 | |
| 218 | #define PQ_PRIME1 31 /* Prime number somewhat less than PQ_HASH_SIZE */ | |
| 219 | #define PQ_PRIME2 23 /* Prime number somewhat less than PQ_HASH_SIZE */ | |
| 220 | #define PQ_L2_SIZE 256 /* A number of colors opt for 1M cache */ | |
| 221 | ||
| 222 | #elif PQ_CACHESIZE >= 512 | |
| 223 | #define PQ_PRIME1 31 /* Prime number somewhat less than PQ_HASH_SIZE */ | |
| 224 | #define PQ_PRIME2 23 /* Prime number somewhat less than PQ_HASH_SIZE */ | |
| 225 | #define PQ_L2_SIZE 128 /* A number of colors opt for 512K cache */ | |
| 226 | ||
| 227 | #elif PQ_CACHESIZE >= 256 | |
| 228 | #define PQ_PRIME1 13 /* Prime number somewhat less than PQ_HASH_SIZE */ | |
| 229 | #define PQ_PRIME2 7 /* Prime number somewhat less than PQ_HASH_SIZE */ | |
| 230 | #define PQ_L2_SIZE 64 /* A number of colors opt for 256K cache */ | |
| 231 | ||
| 232 | #elif PQ_CACHESIZE >= 128 | |
| 233 | #define PQ_PRIME1 9 /* Produces a good PQ_L2_SIZE/3 + PQ_PRIME1 */ | |
| 234 | #define PQ_PRIME2 5 /* Prime number somewhat less than PQ_HASH_SIZE */ | |
| 235 | #define PQ_L2_SIZE 32 /* A number of colors opt for 128k cache */ | |
| 236 | ||
| 74232d8e | 237 | #else |
| 984263bc MD |
238 | #define PQ_PRIME1 5 /* Prime number somewhat less than PQ_HASH_SIZE */ |
| 239 | #define PQ_PRIME2 3 /* Prime number somewhat less than PQ_HASH_SIZE */ | |
| 240 | #define PQ_L2_SIZE 16 /* A reasonable number of colors (opt for 64K cache) */ | |
| 241 | ||
| 984263bc MD |
242 | #endif |
| 243 | ||
| 74232d8e | 244 | #define PQ_L2_MASK (PQ_L2_SIZE - 1) |
| 984263bc | 245 | |
| 74232d8e MD |
246 | #endif /* KERNEL && !KLD_MODULE */ |
| 247 | ||
| 248 | /* | |
| 249 | * | |
| 250 | * The queue array is always based on PQ_MAXL2_SIZE regardless of the actual | |
| 251 | * cache size chosen in order to present a uniform interface for modules. | |
| 252 | */ | |
| 253 | #define PQ_MAXL2_SIZE 256 /* fixed maximum (in pages) / module compat */ | |
| 254 | ||
| 255 | #if PQ_L2_SIZE > PQ_MAXL2_SIZE | |
| 256 | #error "Illegal PQ_L2_SIZE" | |
| 257 | #endif | |
| 258 | ||
| 259 | #define PQ_NONE 0 | |
| 260 | #define PQ_FREE 1 | |
| 261 | #define PQ_INACTIVE (1 + 1*PQ_MAXL2_SIZE) | |
| 262 | #define PQ_ACTIVE (2 + 1*PQ_MAXL2_SIZE) | |
| 263 | #define PQ_CACHE (3 + 1*PQ_MAXL2_SIZE) | |
| 264 | #define PQ_HOLD (3 + 2*PQ_MAXL2_SIZE) | |
| 265 | #define PQ_COUNT (4 + 2*PQ_MAXL2_SIZE) | |
| 984263bc | 266 | |
| 1f804340 MD |
267 | /* |
| 268 | * Scan support | |
| 269 | */ | |
| 270 | struct vm_map; | |
| 271 | ||
| 272 | struct rb_vm_page_scan_info { | |
| 273 | vm_pindex_t start_pindex; | |
| 274 | vm_pindex_t end_pindex; | |
| 275 | int limit; | |
| 276 | int desired; | |
| 277 | int error; | |
| 278 | int pagerflags; | |
| 279 | vm_offset_t addr; | |
| 280 | vm_pindex_t backing_offset_index; | |
| 281 | struct vm_object *object; | |
| 282 | struct vm_object *backing_object; | |
| 283 | struct vm_page *mpte; | |
| 284 | struct pmap *pmap; | |
| 285 | struct vm_map *map; | |
| 286 | }; | |
| 287 | ||
| 288 | int rb_vm_page_scancmp(struct vm_page *, void *); | |
| 289 | ||
| 984263bc MD |
290 | struct vpgqueues { |
| 291 | struct pglist pl; | |
| 292 | int *cnt; | |
| 293 | int lcnt; | |
| 161399b3 | 294 | int flipflop; /* probably not the best place */ |
| 984263bc MD |
295 | }; |
| 296 | ||
| 297 | extern struct vpgqueues vm_page_queues[PQ_COUNT]; | |
| 298 | ||
| 984263bc MD |
299 | /* |
| 300 | * These are the flags defined for vm_page. | |
| 301 | * | |
| 17cde63e MD |
302 | * PG_UNMANAGED (used by OBJT_PHYS) indicates that the page is |
| 303 | * not under PV management but otherwise should be treated as a | |
| 304 | * normal page. Pages not under PV management cannot be paged out | |
| 305 | * via the object/vm_page_t because there is no knowledge of their | |
| 306 | * pte mappings, nor can they be removed from their objects via | |
| 307 | * the object, and such pages are also not on any PQ queue. The | |
| 308 | * PG_MAPPED and PG_WRITEABLE flags are not applicable. | |
| 309 | * | |
| 310 | * PG_MAPPED only applies to managed pages, indicating whether the page | |
| 311 | * is mapped onto one or more pmaps. A page might still be mapped to | |
| 312 | * special pmaps in an unmanaged fashion, for example when mapped into a | |
| 313 | * buffer cache buffer, without setting PG_MAPPED. | |
| 314 | * | |
| 315 | * PG_WRITEABLE indicates that there may be a writeable managed pmap entry | |
| 316 | * somewhere, and that the page can be dirtied by hardware at any time | |
| 317 | * and may have to be tested for that. The modified bit in unmanaged | |
| 318 | * mappings or in the special clean map is not tested. | |
| 67803f3e MD |
319 | * |
| 320 | * PG_SWAPPED indicates that the page is backed by a swap block. Any | |
| 321 | * VM object type other than OBJT_DEFAULT can have swap-backed pages now. | |
| 984263bc MD |
322 | */ |
| 323 | #define PG_BUSY 0x0001 /* page is in transit (O) */ | |
| 324 | #define PG_WANTED 0x0002 /* someone is waiting for page (O) */ | |
| 325 | #define PG_WINATCFLS 0x0004 /* flush dirty page on inactive q */ | |
| 326 | #define PG_FICTITIOUS 0x0008 /* physical page doesn't exist (O) */ | |
| 17cde63e MD |
327 | #define PG_WRITEABLE 0x0010 /* page is writeable */ |
| 328 | #define PG_MAPPED 0x0020 /* page is mapped (managed) */ | |
| 984263bc MD |
329 | #define PG_ZERO 0x0040 /* page is zeroed */ |
| 330 | #define PG_REFERENCED 0x0080 /* page has been referenced */ | |
| 331 | #define PG_CLEANCHK 0x0100 /* page will be checked for cleaning */ | |
| 332 | #define PG_SWAPINPROG 0x0200 /* swap I/O in progress on page */ | |
| 333 | #define PG_NOSYNC 0x0400 /* do not collect for syncer */ | |
| 334 | #define PG_UNMANAGED 0x0800 /* No PV management for page */ | |
| 335 | #define PG_MARKER 0x1000 /* special queue marker page */ | |
| cf1bb2a8 | 336 | #define PG_RAM 0x2000 /* read ahead mark */ |
| 67803f3e | 337 | #define PG_SWAPPED 0x4000 /* backed by swap */ |
| b8a41159 MD |
338 | #define PG_NOTMETA 0x8000 /* do not back with swap */ |
| 339 | /* u_short, only 16 flag bits */ | |
| 984263bc MD |
340 | |
| 341 | /* | |
| 342 | * Misc constants. | |
| 343 | */ | |
| 344 | ||
| 345 | #define ACT_DECLINE 1 | |
| 346 | #define ACT_ADVANCE 3 | |
| 347 | #define ACT_INIT 5 | |
| 348 | #define ACT_MAX 64 | |
| 984263bc MD |
349 | |
| 350 | #ifdef _KERNEL | |
| 351 | /* | |
| 352 | * Each pageable resident page falls into one of four lists: | |
| 353 | * | |
| 354 | * free | |
| 355 | * Available for allocation now. | |
| 356 | * | |
| 357 | * The following are all LRU sorted: | |
| 358 | * | |
| 359 | * cache | |
| 360 | * Almost available for allocation. Still in an | |
| 361 | * object, but clean and immediately freeable at | |
| 362 | * non-interrupt times. | |
| 363 | * | |
| 364 | * inactive | |
| 365 | * Low activity, candidates for reclamation. | |
| 366 | * This is the list of pages that should be | |
| 367 | * paged out next. | |
| 368 | * | |
| 369 | * active | |
| 370 | * Pages that are "active" i.e. they have been | |
| 371 | * recently referenced. | |
| 372 | * | |
| 373 | * zero | |
| 374 | * Pages that are really free and have been pre-zeroed | |
| 375 | * | |
| 376 | */ | |
| 377 | ||
| 378 | extern int vm_page_zero_count; | |
| 03d6a592 | 379 | extern struct vm_page *vm_page_array; /* First resident page in table */ |
| 984263bc MD |
380 | extern int vm_page_array_size; /* number of vm_page_t's */ |
| 381 | extern long first_page; /* first physical page number */ | |
| 382 | ||
| a441ad78 MD |
383 | #define VM_PAGE_TO_PHYS(entry) \ |
| 384 | ((entry)->phys_addr) | |
| 984263bc | 385 | |
| a441ad78 MD |
386 | #define PHYS_TO_VM_PAGE(pa) \ |
| 387 | (&vm_page_array[atop(pa) - first_page]) | |
| 984263bc MD |
388 | |
| 389 | /* | |
| 390 | * Functions implemented as macros | |
| 391 | */ | |
| 392 | ||
| 393 | static __inline void | |
| 394 | vm_page_flag_set(vm_page_t m, unsigned int bits) | |
| 395 | { | |
| 396 | atomic_set_short(&(m)->flags, bits); | |
| 397 | } | |
| 398 | ||
| 399 | static __inline void | |
| 400 | vm_page_flag_clear(vm_page_t m, unsigned int bits) | |
| 401 | { | |
| 402 | atomic_clear_short(&(m)->flags, bits); | |
| 403 | } | |
| 404 | ||
| 984263bc MD |
405 | static __inline void |
| 406 | vm_page_busy(vm_page_t m) | |
| 407 | { | |
| 573fb415 | 408 | ASSERT_LWKT_TOKEN_HELD(&vm_token); |
| a441ad78 MD |
409 | KASSERT((m->flags & PG_BUSY) == 0, |
| 410 | ("vm_page_busy: page already busy!!!")); | |
| 984263bc MD |
411 | vm_page_flag_set(m, PG_BUSY); |
| 412 | } | |
| 413 | ||
| 414 | /* | |
| 415 | * vm_page_flash: | |
| 416 | * | |
| 417 | * wakeup anyone waiting for the page. | |
| 418 | */ | |
| 419 | ||
| 420 | static __inline void | |
| 421 | vm_page_flash(vm_page_t m) | |
| 422 | { | |
| 9ad0147b | 423 | lwkt_gettoken(&vm_token); |
| 984263bc MD |
424 | if (m->flags & PG_WANTED) { |
| 425 | vm_page_flag_clear(m, PG_WANTED); | |
| 426 | wakeup(m); | |
| 427 | } | |
| 9ad0147b | 428 | lwkt_reltoken(&vm_token); |
| 984263bc MD |
429 | } |
| 430 | ||
| 431 | /* | |
| a441ad78 MD |
432 | * Clear the PG_BUSY flag and wakeup anyone waiting for the page. This |
| 433 | * is typically the last call you make on a page before moving onto | |
| 434 | * other things. | |
| 984263bc | 435 | */ |
| 984263bc MD |
436 | static __inline void |
| 437 | vm_page_wakeup(vm_page_t m) | |
| 438 | { | |
| 439 | KASSERT(m->flags & PG_BUSY, ("vm_page_wakeup: page not busy!!!")); | |
| 440 | vm_page_flag_clear(m, PG_BUSY); | |
| 441 | vm_page_flash(m); | |
| 442 | } | |
| 443 | ||
| a441ad78 MD |
444 | /* |
| 445 | * These routines manipulate the 'soft busy' count for a page. A soft busy | |
| 446 | * is almost like PG_BUSY except that it allows certain compatible operations | |
| 447 | * to occur on the page while it is busy. For example, a page undergoing a | |
| 448 | * write can still be mapped read-only. | |
| 449 | */ | |
| 984263bc MD |
450 | static __inline void |
| 451 | vm_page_io_start(vm_page_t m) | |
| 452 | { | |
| 453 | atomic_add_char(&(m)->busy, 1); | |
| 454 | } | |
| 455 | ||
| 456 | static __inline void | |
| 457 | vm_page_io_finish(vm_page_t m) | |
| 458 | { | |
| 459 | atomic_subtract_char(&m->busy, 1); | |
| 460 | if (m->busy == 0) | |
| 461 | vm_page_flash(m); | |
| 462 | } | |
| 463 | ||
| 464 | ||
| 465 | #if PAGE_SIZE == 4096 | |
| 466 | #define VM_PAGE_BITS_ALL 0xff | |
| 467 | #endif | |
| 468 | ||
| 469 | #if PAGE_SIZE == 8192 | |
| 470 | #define VM_PAGE_BITS_ALL 0xffff | |
| 471 | #endif | |
| 472 | ||
| dc1fd4b3 MD |
473 | /* |
| 474 | * Note: the code will always use nominally free pages from the free list | |
| 475 | * before trying other flag-specified sources. | |
| 476 | * | |
| 477 | * At least one of VM_ALLOC_NORMAL|VM_ALLOC_SYSTEM|VM_ALLOC_INTERRUPT | |
| 478 | * must be specified. VM_ALLOC_RETRY may only be specified if VM_ALLOC_NORMAL | |
| 479 | * is also specified. | |
| 480 | */ | |
| 481 | #define VM_ALLOC_NORMAL 0x01 /* ok to use cache pages */ | |
| 482 | #define VM_ALLOC_SYSTEM 0x02 /* ok to exhaust most of free list */ | |
| 483 | #define VM_ALLOC_INTERRUPT 0x04 /* ok to exhaust entire free list */ | |
| 484 | #define VM_ALLOC_ZERO 0x08 /* req pre-zero'd memory if avail */ | |
| 39208dbe | 485 | #define VM_ALLOC_QUICK 0x10 /* like NORMAL but do not use cache */ |
| dc1fd4b3 | 486 | #define VM_ALLOC_RETRY 0x80 /* indefinite block (vm_page_grab()) */ |
| 984263bc | 487 | |
| 573fb415 MD |
488 | void vm_page_hold(vm_page_t); |
| 489 | void vm_page_unhold(vm_page_t); | |
| 984263bc | 490 | void vm_page_activate (vm_page_t); |
| 03d6a592 MD |
491 | vm_page_t vm_page_alloc (struct vm_object *, vm_pindex_t, int); |
| 492 | vm_page_t vm_page_grab (struct vm_object *, vm_pindex_t, int); | |
| 5f910b2f | 493 | void vm_page_cache (vm_page_t); |
| 984263bc MD |
494 | int vm_page_try_to_cache (vm_page_t); |
| 495 | int vm_page_try_to_free (vm_page_t); | |
| 5f910b2f | 496 | void vm_page_dontneed (vm_page_t); |
| 984263bc | 497 | void vm_page_deactivate (vm_page_t); |
| 03d6a592 MD |
498 | void vm_page_insert (vm_page_t, struct vm_object *, vm_pindex_t); |
| 499 | vm_page_t vm_page_lookup (struct vm_object *, vm_pindex_t); | |
| 984263bc | 500 | void vm_page_remove (vm_page_t); |
| 03d6a592 | 501 | void vm_page_rename (vm_page_t, struct vm_object *, vm_pindex_t); |
| 26bcc0c0 | 502 | vm_offset_t vm_page_startup (vm_offset_t); |
| 6ef943a3 | 503 | vm_page_t vm_add_new_page (vm_paddr_t pa); |
| 984263bc MD |
504 | void vm_page_unmanage (vm_page_t); |
| 505 | void vm_page_unwire (vm_page_t, int); | |
| 506 | void vm_page_wire (vm_page_t); | |
| 507 | void vm_page_unqueue (vm_page_t); | |
| 508 | void vm_page_unqueue_nowakeup (vm_page_t); | |
| 509 | void vm_page_set_validclean (vm_page_t, int, int); | |
| 0a8aee15 | 510 | void vm_page_set_validdirty (vm_page_t, int, int); |
| 1a54183b | 511 | void vm_page_set_valid (vm_page_t, int, int); |
| 984263bc MD |
512 | void vm_page_set_dirty (vm_page_t, int, int); |
| 513 | void vm_page_clear_dirty (vm_page_t, int, int); | |
| 514 | void vm_page_set_invalid (vm_page_t, int, int); | |
| 984263bc MD |
515 | int vm_page_is_valid (vm_page_t, int, int); |
| 516 | void vm_page_test_dirty (vm_page_t); | |
| 517 | int vm_page_bits (int, int); | |
| 74232d8e | 518 | vm_page_t vm_page_list_find(int basequeue, int index, boolean_t prefer_zero); |
| 984263bc MD |
519 | void vm_page_zero_invalid(vm_page_t m, boolean_t setvalid); |
| 520 | void vm_page_free_toq(vm_page_t m); | |
| bb6811be | 521 | vm_page_t vm_page_free_fromq_fast(void); |
| ba0fefd4 | 522 | vm_offset_t vm_contig_pg_kmap(int, u_long, vm_map_t, int); |
| d1fcdd16 | 523 | void vm_contig_pg_free(int, u_long); |
| 10192bae | 524 | void vm_page_event_internal(vm_page_t, vm_page_event_t); |
| 17cde63e | 525 | void vm_page_dirty(vm_page_t m); |
| d1fcdd16 | 526 | |
| 984263bc | 527 | /* |
| 06ecca5a MD |
528 | * Reduce the protection of a page. This routine never raises the |
| 529 | * protection and therefore can be safely called if the page is already | |
| 530 | * at VM_PROT_NONE (it will be a NOP effectively ). | |
| 531 | * | |
| 532 | * VM_PROT_NONE will remove all user mappings of a page. This is often | |
| 533 | * necessary when a page changes state (for example, turns into a copy-on-write | |
| 534 | * page or needs to be frozen for write I/O) in order to force a fault, or | |
| 535 | * to force a page's dirty bits to be synchronized and avoid hardware | |
| 536 | * (modified/accessed) bit update races with pmap changes. | |
| 537 | * | |
| 538 | * Since 'prot' is usually a constant, this inline usually winds up optimizing | |
| 539 | * out the primary conditional. | |
| 17cde63e MD |
540 | * |
| 541 | * WARNING: VM_PROT_NONE can block, but will loop until all mappings have | |
| 542 | * been cleared. Callers should be aware that other page related elements | |
| 543 | * might have changed, however. | |
| 984263bc | 544 | */ |
| 984263bc MD |
545 | static __inline void |
| 546 | vm_page_protect(vm_page_t mem, int prot) | |
| 547 | { | |
| 548 | if (prot == VM_PROT_NONE) { | |
| 549 | if (mem->flags & (PG_WRITEABLE|PG_MAPPED)) { | |
| 550 | pmap_page_protect(mem, VM_PROT_NONE); | |
| 17cde63e | 551 | /* PG_WRITEABLE & PG_MAPPED cleared by call */ |
| 984263bc MD |
552 | } |
| 553 | } else if ((prot == VM_PROT_READ) && (mem->flags & PG_WRITEABLE)) { | |
| 554 | pmap_page_protect(mem, VM_PROT_READ); | |
| 17cde63e | 555 | /* PG_WRITEABLE cleared by call */ |
| 984263bc MD |
556 | } |
| 557 | } | |
| 558 | ||
| 559 | /* | |
| 06ecca5a MD |
560 | * Zero-fill the specified page. The entire contents of the page will be |
| 561 | * zero'd out. | |
| 984263bc MD |
562 | */ |
| 563 | static __inline boolean_t | |
| 06ecca5a | 564 | vm_page_zero_fill(vm_page_t m) |
| 984263bc MD |
565 | { |
| 566 | pmap_zero_page(VM_PAGE_TO_PHYS(m)); | |
| 567 | return (TRUE); | |
| 568 | } | |
| 569 | ||
| 570 | /* | |
| 06ecca5a MD |
571 | * Copy the contents of src_m to dest_m. The pages must be stable but spl |
| 572 | * and other protections depend on context. | |
| 984263bc MD |
573 | */ |
| 574 | static __inline void | |
| 06ecca5a | 575 | vm_page_copy(vm_page_t src_m, vm_page_t dest_m) |
| 984263bc MD |
576 | { |
| 577 | pmap_copy_page(VM_PAGE_TO_PHYS(src_m), VM_PAGE_TO_PHYS(dest_m)); | |
| 578 | dest_m->valid = VM_PAGE_BITS_ALL; | |
| 17cde63e | 579 | dest_m->dirty = VM_PAGE_BITS_ALL; |
| 984263bc MD |
580 | } |
| 581 | ||
| 582 | /* | |
| a441ad78 | 583 | * Free a page. The page must be marked BUSY. |
| 984263bc | 584 | * |
| a441ad78 MD |
585 | * The clearing of PG_ZERO is a temporary safety until the code can be |
| 586 | * reviewed to determine that PG_ZERO is being properly cleared on | |
| 587 | * write faults or maps. PG_ZERO was previously cleared in | |
| 588 | * vm_page_alloc(). | |
| 984263bc MD |
589 | */ |
| 590 | static __inline void | |
| a441ad78 | 591 | vm_page_free(vm_page_t m) |
| 984263bc MD |
592 | { |
| 593 | vm_page_flag_clear(m, PG_ZERO); | |
| 594 | vm_page_free_toq(m); | |
| 595 | } | |
| 596 | ||
| 597 | /* | |
| a441ad78 | 598 | * Free a page to the zerod-pages queue |
| 984263bc MD |
599 | */ |
| 600 | static __inline void | |
| a441ad78 | 601 | vm_page_free_zero(vm_page_t m) |
| 984263bc | 602 | { |
| b2b3ffcd | 603 | #ifdef __x86_64__ |
| 973c11b9 MD |
604 | /* JG DEBUG64 We check if the page is really zeroed. */ |
| 605 | char *p = (char *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); | |
| 606 | int i; | |
| 607 | ||
| 608 | for (i = 0; i < PAGE_SIZE; i++) { | |
| 609 | if (p[i] != 0) { | |
| 610 | panic("non-zero page in vm_page_free_zero()"); | |
| 611 | } | |
| 612 | } | |
| 613 | ||
| 614 | #endif | |
| 984263bc MD |
615 | vm_page_flag_set(m, PG_ZERO); |
| 616 | vm_page_free_toq(m); | |
| 617 | } | |
| 618 | ||
| 619 | /* | |
| a441ad78 MD |
620 | * Wait until page is no longer PG_BUSY or (if also_m_busy is TRUE) |
| 621 | * m->busy is zero. Returns TRUE if it had to sleep ( including if | |
| 622 | * it almost had to sleep and made temporary spl*() mods), FALSE | |
| 623 | * otherwise. | |
| 624 | * | |
| 625 | * This routine assumes that interrupts can only remove the busy | |
| 626 | * status from a page, not set the busy status or change it from | |
| 627 | * PG_BUSY to m->busy or vise versa (which would create a timing | |
| 628 | * window). | |
| 629 | * | |
| 630 | * Note: as an inline, 'also_m_busy' is usually a constant and well | |
| 631 | * optimized. | |
| 984263bc | 632 | */ |
| 984263bc MD |
633 | static __inline int |
| 634 | vm_page_sleep_busy(vm_page_t m, int also_m_busy, const char *msg) | |
| 635 | { | |
| 636 | if ((m->flags & PG_BUSY) || (also_m_busy && m->busy)) { | |
| 9ad0147b | 637 | lwkt_gettoken(&vm_token); |
| 984263bc MD |
638 | if ((m->flags & PG_BUSY) || (also_m_busy && m->busy)) { |
| 639 | /* | |
| 640 | * Page is busy. Wait and retry. | |
| 641 | */ | |
| 642 | vm_page_flag_set(m, PG_WANTED | PG_REFERENCED); | |
| 377d4740 | 643 | tsleep(m, 0, msg, 0); |
| 984263bc | 644 | } |
| 9ad0147b | 645 | lwkt_reltoken(&vm_token); |
| 984263bc MD |
646 | return(TRUE); |
| 647 | /* not reached */ | |
| 648 | } | |
| 649 | return(FALSE); | |
| 650 | } | |
| 651 | ||
| 652 | /* | |
| a441ad78 | 653 | * Set page to not be dirty. Note: does not clear pmap modify bits . |
| 984263bc | 654 | */ |
| 984263bc MD |
655 | static __inline void |
| 656 | vm_page_undirty(vm_page_t m) | |
| 657 | { | |
| 658 | m->dirty = 0; | |
| 659 | } | |
| 660 | ||
| 984263bc | 661 | #endif /* _KERNEL */ |
| 1bd40720 | 662 | #endif /* !_VM_VM_PAGE_H_ */ |