| Commit | Line | Data |
|---|---|---|
| 984263bc MD |
1 | /* |
| 2 | * Copyright (c) 1991, 1993 | |
| 3 | * The Regents of the University of California. All rights reserved. | |
| 4 | * | |
| 5 | * This code is derived from software contributed to Berkeley by | |
| 6 | * The Mach Operating System project at Carnegie-Mellon University. | |
| 7 | * | |
| 8 | * Redistribution and use in source and binary forms, with or without | |
| 9 | * modification, are permitted provided that the following conditions | |
| 10 | * are met: | |
| 11 | * 1. Redistributions of source code must retain the above copyright | |
| 12 | * notice, this list of conditions and the following disclaimer. | |
| 13 | * 2. Redistributions in binary form must reproduce the above copyright | |
| 14 | * notice, this list of conditions and the following disclaimer in the | |
| 15 | * documentation and/or other materials provided with the distribution. | |
| 16 | * 3. All advertising materials mentioning features or use of this software | |
| 17 | * must display the following acknowledgement: | |
| 18 | * This product includes software developed by the University of | |
| 19 | * California, Berkeley and its contributors. | |
| 20 | * 4. Neither the name of the University nor the names of its contributors | |
| 21 | * may be used to endorse or promote products derived from this software | |
| 22 | * without specific prior written permission. | |
| 23 | * | |
| 24 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
| 25 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
| 26 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
| 27 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
| 28 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
| 29 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
| 30 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
| 31 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
| 32 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
| 33 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
| 34 | * SUCH DAMAGE. | |
| 35 | * | |
| 36 | * from: @(#)vm_page.h 8.2 (Berkeley) 12/13/93 | |
| 37 | * | |
| 38 | * | |
| 39 | * Copyright (c) 1987, 1990 Carnegie-Mellon University. | |
| 40 | * All rights reserved. | |
| 41 | * | |
| 42 | * Authors: Avadis Tevanian, Jr., Michael Wayne Young | |
| 43 | * | |
| 44 | * Permission to use, copy, modify and distribute this software and | |
| 45 | * its documentation is hereby granted, provided that both the copyright | |
| 46 | * notice and this permission notice appear in all copies of the | |
| 47 | * software, derivative works or modified versions, and any portions | |
| 48 | * thereof, and that both notices appear in supporting documentation. | |
| 49 | * | |
| 50 | * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" | |
| 51 | * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND | |
| 52 | * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. | |
| 53 | * | |
| 54 | * Carnegie Mellon requests users of this software to return to | |
| 55 | * | |
| 56 | * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU | |
| 57 | * School of Computer Science | |
| 58 | * Carnegie Mellon University | |
| 59 | * Pittsburgh PA 15213-3890 | |
| 60 | * | |
| 61 | * any improvements or extensions that they make and grant Carnegie the | |
| 62 | * rights to redistribute these changes. | |
| 63 | * | |
| 64 | * $FreeBSD: src/sys/vm/vm_page.h,v 1.75.2.8 2002/03/06 01:07:09 dillon Exp $ | |
| 17cde63e | 65 | * $DragonFly: src/sys/vm/vm_page.h,v 1.28 2008/05/09 07:24:48 dillon Exp $ |
| 984263bc MD |
66 | */ |
| 67 | ||
| 68 | /* | |
| 69 | * Resident memory system definitions. | |
| 70 | */ | |
| 71 | ||
| 1bd40720 MD |
72 | #ifndef _VM_VM_PAGE_H_ |
| 73 | #define _VM_VM_PAGE_H_ | |
| 984263bc | 74 | |
| 3c923499 | 75 | #if !defined(KLD_MODULE) && defined(_KERNEL) |
| 984263bc MD |
76 | #include "opt_vmpage.h" |
| 77 | #endif | |
| 78 | ||
| 1bd40720 MD |
79 | #ifndef _SYS_TYPES_H_ |
| 80 | #include <sys/types.h> | |
| 81 | #endif | |
| 1f804340 MD |
82 | #ifndef _SYS_TREE_H_ |
| 83 | #include <sys/tree.h> | |
| 84 | #endif | |
| 1bd40720 MD |
85 | #ifndef _MACHINE_PMAP_H_ |
| 86 | #include <machine/pmap.h> | |
| 87 | #endif | |
| 88 | #ifndef _VM_PMAP_H_ | |
| 984263bc | 89 | #include <vm/pmap.h> |
| 1bd40720 MD |
90 | #endif |
| 91 | #ifndef _MACHINE_ATOMIC_H_ | |
| 984263bc | 92 | #include <machine/atomic.h> |
| 1bd40720 MD |
93 | #endif |
| 94 | ||
| 668b1228 | 95 | #ifdef _KERNEL |
| 1bd40720 | 96 | |
| 03d6a592 MD |
97 | #ifndef _SYS_SYSTM_H_ |
| 98 | #include <sys/systm.h> | |
| 99 | #endif | |
| 1bd40720 | 100 | #ifndef _SYS_THREAD2_H_ |
| cdd46d2e | 101 | #include <sys/thread2.h> |
| 668b1228 | 102 | #endif |
| 984263bc | 103 | |
| 973c11b9 MD |
104 | #ifdef __amd64__ |
| 105 | #include <machine/vmparam.h> | |
| 106 | #endif | |
| 107 | ||
| 1bd40720 MD |
108 | #endif |
| 109 | ||
| 10192bae MD |
110 | typedef enum vm_page_event { VMEVENT_NONE, VMEVENT_COW } vm_page_event_t; |
| 111 | ||
| 112 | struct vm_page_action { | |
| 113 | LIST_ENTRY(vm_page_action) entry; | |
| 114 | vm_page_event_t event; | |
| 115 | void (*func)(struct vm_page *, | |
| 116 | struct vm_page_action *); | |
| 117 | void *data; | |
| 118 | }; | |
| 119 | ||
| 120 | typedef struct vm_page_action *vm_page_action_t; | |
| 121 | ||
| 984263bc MD |
122 | /* |
| 123 | * Management of resident (logical) pages. | |
| 124 | * | |
| 125 | * A small structure is kept for each resident | |
| 126 | * page, indexed by page number. Each structure | |
| 127 | * is an element of several lists: | |
| 128 | * | |
| 129 | * A hash table bucket used to quickly | |
| 130 | * perform object/offset lookups | |
| 131 | * | |
| 132 | * A list of all pages for a given object, | |
| 133 | * so they can be quickly deactivated at | |
| 134 | * time of deallocation. | |
| 135 | * | |
| 136 | * An ordered list of pages due for pageout. | |
| 137 | * | |
| 138 | * In addition, the structure contains the object | |
| 139 | * and offset to which this page belongs (for pageout), | |
| 140 | * and sundry status bits. | |
| 141 | * | |
| 142 | * Fields in this structure are locked either by the lock on the | |
| 143 | * object that the page belongs to (O) or by the lock on the page | |
| 144 | * queues (P). | |
| 145 | * | |
| 146 | * The 'valid' and 'dirty' fields are distinct. A page may have dirty | |
| 147 | * bits set without having associated valid bits set. This is used by | |
| 148 | * NFS to implement piecemeal writes. | |
| 149 | */ | |
| 150 | ||
| 151 | TAILQ_HEAD(pglist, vm_page); | |
| 152 | ||
| a5d36a1d | 153 | struct msf_buf; |
| 03d6a592 MD |
154 | struct vm_object; |
| 155 | ||
| 1f804340 MD |
156 | int rb_vm_page_compare(struct vm_page *, struct vm_page *); |
| 157 | ||
| 158 | struct vm_page_rb_tree; | |
| 159 | RB_PROTOTYPE2(vm_page_rb_tree, vm_page, rb_entry, rb_vm_page_compare, vm_pindex_t); | |
| 160 | ||
| 984263bc | 161 | struct vm_page { |
| a441ad78 | 162 | TAILQ_ENTRY(vm_page) pageq; /* vm_page_queues[] list (P) */ |
| 1f804340 | 163 | RB_ENTRY(vm_page) rb_entry; /* Red-Black tree based at object */ |
| 984263bc | 164 | |
| 03d6a592 | 165 | struct vm_object *object; /* which object am I in (O,P)*/ |
| 984263bc | 166 | vm_pindex_t pindex; /* offset into object (O,P) */ |
| 6ef943a3 | 167 | vm_paddr_t phys_addr; /* physical address of page */ |
| 984263bc MD |
168 | struct md_page md; /* machine dependant stuff */ |
| 169 | u_short queue; /* page queue index */ | |
| a441ad78 MD |
170 | u_short flags; /* see below */ |
| 171 | u_short pc; /* page color */ | |
| 984263bc MD |
172 | u_char act_count; /* page usage count */ |
| 173 | u_char busy; /* page busy count */ | |
| 10192bae MD |
174 | u_int wire_count; /* wired down maps refs (P) */ |
| 175 | int hold_count; /* page hold count */ | |
| a441ad78 MD |
176 | |
| 177 | /* | |
| 178 | * NOTE that these must support one bit per DEV_BSIZE in a page!!! | |
| 179 | * so, on normal X86 kernels, they must be at least 8 bits wide. | |
| 180 | */ | |
| 984263bc MD |
181 | #if PAGE_SIZE == 4096 |
| 182 | u_char valid; /* map of valid DEV_BSIZE chunks */ | |
| 183 | u_char dirty; /* map of dirty DEV_BSIZE chunks */ | |
| 184 | #elif PAGE_SIZE == 8192 | |
| 185 | u_short valid; /* map of valid DEV_BSIZE chunks */ | |
| 186 | u_short dirty; /* map of dirty DEV_BSIZE chunks */ | |
| 187 | #endif | |
| a5d36a1d | 188 | struct msf_buf *msf_hint; /* first page of an msfbuf map */ |
| 10192bae | 189 | LIST_HEAD(,vm_page_action) action_list; |
| 984263bc MD |
190 | }; |
| 191 | ||
| 03d6a592 MD |
192 | #ifndef __VM_PAGE_T_DEFINED__ |
| 193 | #define __VM_PAGE_T_DEFINED__ | |
| 194 | typedef struct vm_page *vm_page_t; | |
| 195 | #endif | |
| 196 | ||
| 984263bc MD |
197 | /* |
| 198 | * note: currently use SWAPBLK_NONE as an absolute value rather then | |
| 199 | * a flag bit. | |
| 200 | */ | |
| 984263bc MD |
201 | #define SWAPBLK_MASK ((daddr_t)((u_daddr_t)-1 >> 1)) /* mask */ |
| 202 | #define SWAPBLK_NONE ((daddr_t)((u_daddr_t)SWAPBLK_MASK + 1))/* flag */ | |
| 203 | ||
| 984263bc | 204 | /* |
| 74232d8e MD |
205 | * Page coloring parameters. We default to a middle of the road optimization. |
| 206 | * Larger selections would not really hurt us but if a machine does not have | |
| 207 | * a lot of memory it could cause vm_page_alloc() to eat more cpu cycles | |
| 208 | * looking for free pages. | |
| 209 | * | |
| 210 | * Page coloring cannot be disabled. Modules do not have access to most PQ | |
| 211 | * constants because they can change between builds. | |
| 984263bc | 212 | */ |
| 74232d8e | 213 | #if defined(_KERNEL) && !defined(KLD_MODULE) |
| 984263bc | 214 | |
| 984263bc | 215 | #if !defined(PQ_CACHESIZE) |
| 74232d8e | 216 | #define PQ_CACHESIZE 256 /* max is 1024 (MB) */ |
| 984263bc MD |
217 | #endif |
| 218 | ||
| 219 | #if PQ_CACHESIZE >= 1024 | |
| 220 | #define PQ_PRIME1 31 /* Prime number somewhat less than PQ_HASH_SIZE */ | |
| 221 | #define PQ_PRIME2 23 /* Prime number somewhat less than PQ_HASH_SIZE */ | |
| 222 | #define PQ_L2_SIZE 256 /* A number of colors opt for 1M cache */ | |
| 223 | ||
| 224 | #elif PQ_CACHESIZE >= 512 | |
| 225 | #define PQ_PRIME1 31 /* Prime number somewhat less than PQ_HASH_SIZE */ | |
| 226 | #define PQ_PRIME2 23 /* Prime number somewhat less than PQ_HASH_SIZE */ | |
| 227 | #define PQ_L2_SIZE 128 /* A number of colors opt for 512K cache */ | |
| 228 | ||
| 229 | #elif PQ_CACHESIZE >= 256 | |
| 230 | #define PQ_PRIME1 13 /* Prime number somewhat less than PQ_HASH_SIZE */ | |
| 231 | #define PQ_PRIME2 7 /* Prime number somewhat less than PQ_HASH_SIZE */ | |
| 232 | #define PQ_L2_SIZE 64 /* A number of colors opt for 256K cache */ | |
| 233 | ||
| 234 | #elif PQ_CACHESIZE >= 128 | |
| 235 | #define PQ_PRIME1 9 /* Produces a good PQ_L2_SIZE/3 + PQ_PRIME1 */ | |
| 236 | #define PQ_PRIME2 5 /* Prime number somewhat less than PQ_HASH_SIZE */ | |
| 237 | #define PQ_L2_SIZE 32 /* A number of colors opt for 128k cache */ | |
| 238 | ||
| 74232d8e | 239 | #else |
| 984263bc MD |
240 | #define PQ_PRIME1 5 /* Prime number somewhat less than PQ_HASH_SIZE */ |
| 241 | #define PQ_PRIME2 3 /* Prime number somewhat less than PQ_HASH_SIZE */ | |
| 242 | #define PQ_L2_SIZE 16 /* A reasonable number of colors (opt for 64K cache) */ | |
| 243 | ||
| 984263bc MD |
244 | #endif |
| 245 | ||
| 74232d8e | 246 | #define PQ_L2_MASK (PQ_L2_SIZE - 1) |
| 984263bc | 247 | |
| 74232d8e MD |
248 | #endif /* KERNEL && !KLD_MODULE */ |
| 249 | ||
| 250 | /* | |
| 251 | * | |
| 252 | * The queue array is always based on PQ_MAXL2_SIZE regardless of the actual | |
| 253 | * cache size chosen in order to present a uniform interface for modules. | |
| 254 | */ | |
| 255 | #define PQ_MAXL2_SIZE 256 /* fixed maximum (in pages) / module compat */ | |
| 256 | ||
| 257 | #if PQ_L2_SIZE > PQ_MAXL2_SIZE | |
| 258 | #error "Illegal PQ_L2_SIZE" | |
| 259 | #endif | |
| 260 | ||
| 261 | #define PQ_NONE 0 | |
| 262 | #define PQ_FREE 1 | |
| 263 | #define PQ_INACTIVE (1 + 1*PQ_MAXL2_SIZE) | |
| 264 | #define PQ_ACTIVE (2 + 1*PQ_MAXL2_SIZE) | |
| 265 | #define PQ_CACHE (3 + 1*PQ_MAXL2_SIZE) | |
| 266 | #define PQ_HOLD (3 + 2*PQ_MAXL2_SIZE) | |
| 267 | #define PQ_COUNT (4 + 2*PQ_MAXL2_SIZE) | |
| 984263bc | 268 | |
| 1f804340 MD |
269 | /* |
| 270 | * Scan support | |
| 271 | */ | |
| 272 | struct vm_map; | |
| 273 | ||
| 274 | struct rb_vm_page_scan_info { | |
| 275 | vm_pindex_t start_pindex; | |
| 276 | vm_pindex_t end_pindex; | |
| 277 | int limit; | |
| 278 | int desired; | |
| 279 | int error; | |
| 280 | int pagerflags; | |
| 281 | vm_offset_t addr; | |
| 282 | vm_pindex_t backing_offset_index; | |
| 283 | struct vm_object *object; | |
| 284 | struct vm_object *backing_object; | |
| 285 | struct vm_page *mpte; | |
| 286 | struct pmap *pmap; | |
| 287 | struct vm_map *map; | |
| 288 | }; | |
| 289 | ||
| 290 | int rb_vm_page_scancmp(struct vm_page *, void *); | |
| 291 | ||
| 984263bc MD |
292 | struct vpgqueues { |
| 293 | struct pglist pl; | |
| 294 | int *cnt; | |
| 295 | int lcnt; | |
| 161399b3 | 296 | int flipflop; /* probably not the best place */ |
| 984263bc MD |
297 | }; |
| 298 | ||
| 299 | extern struct vpgqueues vm_page_queues[PQ_COUNT]; | |
| 300 | ||
| 984263bc MD |
301 | /* |
| 302 | * These are the flags defined for vm_page. | |
| 303 | * | |
| 17cde63e MD |
304 | * PG_UNMANAGED (used by OBJT_PHYS) indicates that the page is |
| 305 | * not under PV management but otherwise should be treated as a | |
| 306 | * normal page. Pages not under PV management cannot be paged out | |
| 307 | * via the object/vm_page_t because there is no knowledge of their | |
| 308 | * pte mappings, nor can they be removed from their objects via | |
| 309 | * the object, and such pages are also not on any PQ queue. The | |
| 310 | * PG_MAPPED and PG_WRITEABLE flags are not applicable. | |
| 311 | * | |
| 312 | * PG_MAPPED only applies to managed pages, indicating whether the page | |
| 313 | * is mapped onto one or more pmaps. A page might still be mapped to | |
| 314 | * special pmaps in an unmanaged fashion, for example when mapped into a | |
| 315 | * buffer cache buffer, without setting PG_MAPPED. | |
| 316 | * | |
| 317 | * PG_WRITEABLE indicates that there may be a writeable managed pmap entry | |
| 318 | * somewhere, and that the page can be dirtied by hardware at any time | |
| 319 | * and may have to be tested for that. The modified bit in unmanaged | |
| 320 | * mappings or in the special clean map is not tested. | |
| 984263bc MD |
321 | */ |
| 322 | #define PG_BUSY 0x0001 /* page is in transit (O) */ | |
| 323 | #define PG_WANTED 0x0002 /* someone is waiting for page (O) */ | |
| 324 | #define PG_WINATCFLS 0x0004 /* flush dirty page on inactive q */ | |
| 325 | #define PG_FICTITIOUS 0x0008 /* physical page doesn't exist (O) */ | |
| 17cde63e MD |
326 | #define PG_WRITEABLE 0x0010 /* page is writeable */ |
| 327 | #define PG_MAPPED 0x0020 /* page is mapped (managed) */ | |
| 984263bc MD |
328 | #define PG_ZERO 0x0040 /* page is zeroed */ |
| 329 | #define PG_REFERENCED 0x0080 /* page has been referenced */ | |
| 330 | #define PG_CLEANCHK 0x0100 /* page will be checked for cleaning */ | |
| 331 | #define PG_SWAPINPROG 0x0200 /* swap I/O in progress on page */ | |
| 332 | #define PG_NOSYNC 0x0400 /* do not collect for syncer */ | |
| 333 | #define PG_UNMANAGED 0x0800 /* No PV management for page */ | |
| 334 | #define PG_MARKER 0x1000 /* special queue marker page */ | |
| 335 | ||
| 336 | /* | |
| 337 | * Misc constants. | |
| 338 | */ | |
| 339 | ||
| 340 | #define ACT_DECLINE 1 | |
| 341 | #define ACT_ADVANCE 3 | |
| 342 | #define ACT_INIT 5 | |
| 343 | #define ACT_MAX 64 | |
| 984263bc MD |
344 | |
| 345 | #ifdef _KERNEL | |
| 346 | /* | |
| 347 | * Each pageable resident page falls into one of four lists: | |
| 348 | * | |
| 349 | * free | |
| 350 | * Available for allocation now. | |
| 351 | * | |
| 352 | * The following are all LRU sorted: | |
| 353 | * | |
| 354 | * cache | |
| 355 | * Almost available for allocation. Still in an | |
| 356 | * object, but clean and immediately freeable at | |
| 357 | * non-interrupt times. | |
| 358 | * | |
| 359 | * inactive | |
| 360 | * Low activity, candidates for reclamation. | |
| 361 | * This is the list of pages that should be | |
| 362 | * paged out next. | |
| 363 | * | |
| 364 | * active | |
| 365 | * Pages that are "active" i.e. they have been | |
| 366 | * recently referenced. | |
| 367 | * | |
| 368 | * zero | |
| 369 | * Pages that are really free and have been pre-zeroed | |
| 370 | * | |
| 371 | */ | |
| 372 | ||
| 373 | extern int vm_page_zero_count; | |
| 03d6a592 | 374 | extern struct vm_page *vm_page_array; /* First resident page in table */ |
| 984263bc MD |
375 | extern int vm_page_array_size; /* number of vm_page_t's */ |
| 376 | extern long first_page; /* first physical page number */ | |
| 377 | ||
| a441ad78 MD |
378 | #define VM_PAGE_TO_PHYS(entry) \ |
| 379 | ((entry)->phys_addr) | |
| 984263bc | 380 | |
| a441ad78 MD |
381 | #define PHYS_TO_VM_PAGE(pa) \ |
| 382 | (&vm_page_array[atop(pa) - first_page]) | |
| 984263bc MD |
383 | |
| 384 | /* | |
| 385 | * Functions implemented as macros | |
| 386 | */ | |
| 387 | ||
| 388 | static __inline void | |
| 389 | vm_page_flag_set(vm_page_t m, unsigned int bits) | |
| 390 | { | |
| 391 | atomic_set_short(&(m)->flags, bits); | |
| 392 | } | |
| 393 | ||
| 394 | static __inline void | |
| 395 | vm_page_flag_clear(vm_page_t m, unsigned int bits) | |
| 396 | { | |
| 397 | atomic_clear_short(&(m)->flags, bits); | |
| 398 | } | |
| 399 | ||
| 984263bc MD |
400 | static __inline void |
| 401 | vm_page_busy(vm_page_t m) | |
| 402 | { | |
| a441ad78 MD |
403 | KASSERT((m->flags & PG_BUSY) == 0, |
| 404 | ("vm_page_busy: page already busy!!!")); | |
| 984263bc MD |
405 | vm_page_flag_set(m, PG_BUSY); |
| 406 | } | |
| 407 | ||
| 408 | /* | |
| 409 | * vm_page_flash: | |
| 410 | * | |
| 411 | * wakeup anyone waiting for the page. | |
| 412 | */ | |
| 413 | ||
| 414 | static __inline void | |
| 415 | vm_page_flash(vm_page_t m) | |
| 416 | { | |
| 417 | if (m->flags & PG_WANTED) { | |
| 418 | vm_page_flag_clear(m, PG_WANTED); | |
| 419 | wakeup(m); | |
| 420 | } | |
| 421 | } | |
| 422 | ||
| 423 | /* | |
| a441ad78 MD |
424 | * Clear the PG_BUSY flag and wakeup anyone waiting for the page. This |
| 425 | * is typically the last call you make on a page before moving onto | |
| 426 | * other things. | |
| 984263bc | 427 | */ |
| 984263bc MD |
428 | static __inline void |
| 429 | vm_page_wakeup(vm_page_t m) | |
| 430 | { | |
| 431 | KASSERT(m->flags & PG_BUSY, ("vm_page_wakeup: page not busy!!!")); | |
| 432 | vm_page_flag_clear(m, PG_BUSY); | |
| 433 | vm_page_flash(m); | |
| 434 | } | |
| 435 | ||
| a441ad78 MD |
436 | /* |
| 437 | * These routines manipulate the 'soft busy' count for a page. A soft busy | |
| 438 | * is almost like PG_BUSY except that it allows certain compatible operations | |
| 439 | * to occur on the page while it is busy. For example, a page undergoing a | |
| 440 | * write can still be mapped read-only. | |
| 441 | */ | |
| 984263bc MD |
442 | static __inline void |
| 443 | vm_page_io_start(vm_page_t m) | |
| 444 | { | |
| 445 | atomic_add_char(&(m)->busy, 1); | |
| 446 | } | |
| 447 | ||
| 448 | static __inline void | |
| 449 | vm_page_io_finish(vm_page_t m) | |
| 450 | { | |
| 451 | atomic_subtract_char(&m->busy, 1); | |
| 452 | if (m->busy == 0) | |
| 453 | vm_page_flash(m); | |
| 454 | } | |
| 455 | ||
| 456 | ||
| 457 | #if PAGE_SIZE == 4096 | |
| 458 | #define VM_PAGE_BITS_ALL 0xff | |
| 459 | #endif | |
| 460 | ||
| 461 | #if PAGE_SIZE == 8192 | |
| 462 | #define VM_PAGE_BITS_ALL 0xffff | |
| 463 | #endif | |
| 464 | ||
| dc1fd4b3 MD |
465 | /* |
| 466 | * Note: the code will always use nominally free pages from the free list | |
| 467 | * before trying other flag-specified sources. | |
| 468 | * | |
| 469 | * At least one of VM_ALLOC_NORMAL|VM_ALLOC_SYSTEM|VM_ALLOC_INTERRUPT | |
| 470 | * must be specified. VM_ALLOC_RETRY may only be specified if VM_ALLOC_NORMAL | |
| 471 | * is also specified. | |
| 472 | */ | |
| 473 | #define VM_ALLOC_NORMAL 0x01 /* ok to use cache pages */ | |
| 474 | #define VM_ALLOC_SYSTEM 0x02 /* ok to exhaust most of free list */ | |
| 475 | #define VM_ALLOC_INTERRUPT 0x04 /* ok to exhaust entire free list */ | |
| 476 | #define VM_ALLOC_ZERO 0x08 /* req pre-zero'd memory if avail */ | |
| 477 | #define VM_ALLOC_RETRY 0x80 /* indefinite block (vm_page_grab()) */ | |
| 984263bc MD |
478 | |
| 479 | void vm_page_unhold(vm_page_t mem); | |
| 984263bc | 480 | void vm_page_activate (vm_page_t); |
| 03d6a592 MD |
481 | vm_page_t vm_page_alloc (struct vm_object *, vm_pindex_t, int); |
| 482 | vm_page_t vm_page_grab (struct vm_object *, vm_pindex_t, int); | |
| 5f910b2f | 483 | void vm_page_cache (vm_page_t); |
| 984263bc MD |
484 | int vm_page_try_to_cache (vm_page_t); |
| 485 | int vm_page_try_to_free (vm_page_t); | |
| 5f910b2f | 486 | void vm_page_dontneed (vm_page_t); |
| 984263bc | 487 | void vm_page_deactivate (vm_page_t); |
| 03d6a592 MD |
488 | void vm_page_insert (vm_page_t, struct vm_object *, vm_pindex_t); |
| 489 | vm_page_t vm_page_lookup (struct vm_object *, vm_pindex_t); | |
| 984263bc | 490 | void vm_page_remove (vm_page_t); |
| 03d6a592 | 491 | void vm_page_rename (vm_page_t, struct vm_object *, vm_pindex_t); |
| 26bcc0c0 | 492 | vm_offset_t vm_page_startup (vm_offset_t); |
| 6ef943a3 | 493 | vm_page_t vm_add_new_page (vm_paddr_t pa); |
| 984263bc MD |
494 | void vm_page_unmanage (vm_page_t); |
| 495 | void vm_page_unwire (vm_page_t, int); | |
| 496 | void vm_page_wire (vm_page_t); | |
| 497 | void vm_page_unqueue (vm_page_t); | |
| 498 | void vm_page_unqueue_nowakeup (vm_page_t); | |
| 499 | void vm_page_set_validclean (vm_page_t, int, int); | |
| 500 | void vm_page_set_dirty (vm_page_t, int, int); | |
| 501 | void vm_page_clear_dirty (vm_page_t, int, int); | |
| 502 | void vm_page_set_invalid (vm_page_t, int, int); | |
| 984263bc MD |
503 | int vm_page_is_valid (vm_page_t, int, int); |
| 504 | void vm_page_test_dirty (vm_page_t); | |
| 505 | int vm_page_bits (int, int); | |
| 74232d8e | 506 | vm_page_t vm_page_list_find(int basequeue, int index, boolean_t prefer_zero); |
| 984263bc MD |
507 | void vm_page_zero_invalid(vm_page_t m, boolean_t setvalid); |
| 508 | void vm_page_free_toq(vm_page_t m); | |
| ba0fefd4 | 509 | vm_offset_t vm_contig_pg_kmap(int, u_long, vm_map_t, int); |
| d1fcdd16 | 510 | void vm_contig_pg_free(int, u_long); |
| 10192bae | 511 | void vm_page_event_internal(vm_page_t, vm_page_event_t); |
| 17cde63e | 512 | void vm_page_dirty(vm_page_t m); |
| d1fcdd16 | 513 | |
| 984263bc | 514 | /* |
| 06ecca5a MD |
515 | * Holding a page keeps it from being reused. Other parts of the system |
| 516 | * can still disassociate the page from its current object and free it, or | |
| 517 | * perform read or write I/O on it and/or otherwise manipulate the page, | |
| 518 | * but if the page is held the VM system will leave the page and its data | |
| 519 | * intact and not reuse the page for other purposes until the last hold | |
| 520 | * reference is released. (see vm_page_wire() if you want to prevent the | |
| 521 | * page from being disassociated from its object too). | |
| 522 | * | |
| 523 | * This routine must be called while at splvm() or better. | |
| 524 | * | |
| 525 | * The caller must still validate the contents of the page and, if necessary, | |
| 526 | * wait for any pending I/O (e.g. vm_page_sleep_busy() loop) to complete | |
| 527 | * before manipulating the page. | |
| 984263bc MD |
528 | */ |
| 529 | static __inline void | |
| 530 | vm_page_hold(vm_page_t mem) | |
| 531 | { | |
| 532 | mem->hold_count++; | |
| 533 | } | |
| 534 | ||
| 535 | /* | |
| 06ecca5a MD |
536 | * Reduce the protection of a page. This routine never raises the |
| 537 | * protection and therefore can be safely called if the page is already | |
| 538 | * at VM_PROT_NONE (it will be a NOP effectively ). | |
| 539 | * | |
| 540 | * VM_PROT_NONE will remove all user mappings of a page. This is often | |
| 541 | * necessary when a page changes state (for example, turns into a copy-on-write | |
| 542 | * page or needs to be frozen for write I/O) in order to force a fault, or | |
| 543 | * to force a page's dirty bits to be synchronized and avoid hardware | |
| 544 | * (modified/accessed) bit update races with pmap changes. | |
| 545 | * | |
| 546 | * Since 'prot' is usually a constant, this inline usually winds up optimizing | |
| 547 | * out the primary conditional. | |
| 17cde63e MD |
548 | * |
| 549 | * WARNING: VM_PROT_NONE can block, but will loop until all mappings have | |
| 550 | * been cleared. Callers should be aware that other page related elements | |
| 551 | * might have changed, however. | |
| 984263bc | 552 | */ |
| 984263bc MD |
553 | static __inline void |
| 554 | vm_page_protect(vm_page_t mem, int prot) | |
| 555 | { | |
| 556 | if (prot == VM_PROT_NONE) { | |
| 557 | if (mem->flags & (PG_WRITEABLE|PG_MAPPED)) { | |
| 558 | pmap_page_protect(mem, VM_PROT_NONE); | |
| 17cde63e | 559 | /* PG_WRITEABLE & PG_MAPPED cleared by call */ |
| 984263bc MD |
560 | } |
| 561 | } else if ((prot == VM_PROT_READ) && (mem->flags & PG_WRITEABLE)) { | |
| 562 | pmap_page_protect(mem, VM_PROT_READ); | |
| 17cde63e | 563 | /* PG_WRITEABLE cleared by call */ |
| 984263bc MD |
564 | } |
| 565 | } | |
| 566 | ||
| 567 | /* | |
| 06ecca5a MD |
568 | * Zero-fill the specified page. The entire contents of the page will be |
| 569 | * zero'd out. | |
| 984263bc MD |
570 | */ |
| 571 | static __inline boolean_t | |
| 06ecca5a | 572 | vm_page_zero_fill(vm_page_t m) |
| 984263bc MD |
573 | { |
| 574 | pmap_zero_page(VM_PAGE_TO_PHYS(m)); | |
| 575 | return (TRUE); | |
| 576 | } | |
| 577 | ||
| 578 | /* | |
| 06ecca5a MD |
579 | * Copy the contents of src_m to dest_m. The pages must be stable but spl |
| 580 | * and other protections depend on context. | |
| 984263bc MD |
581 | */ |
| 582 | static __inline void | |
| 06ecca5a | 583 | vm_page_copy(vm_page_t src_m, vm_page_t dest_m) |
| 984263bc MD |
584 | { |
| 585 | pmap_copy_page(VM_PAGE_TO_PHYS(src_m), VM_PAGE_TO_PHYS(dest_m)); | |
| 586 | dest_m->valid = VM_PAGE_BITS_ALL; | |
| 17cde63e | 587 | dest_m->dirty = VM_PAGE_BITS_ALL; |
| 984263bc MD |
588 | } |
| 589 | ||
| 590 | /* | |
| a441ad78 | 591 | * Free a page. The page must be marked BUSY. |
| 984263bc | 592 | * |
| a441ad78 MD |
593 | * The clearing of PG_ZERO is a temporary safety until the code can be |
| 594 | * reviewed to determine that PG_ZERO is being properly cleared on | |
| 595 | * write faults or maps. PG_ZERO was previously cleared in | |
| 596 | * vm_page_alloc(). | |
| 984263bc MD |
597 | */ |
| 598 | static __inline void | |
| a441ad78 | 599 | vm_page_free(vm_page_t m) |
| 984263bc MD |
600 | { |
| 601 | vm_page_flag_clear(m, PG_ZERO); | |
| 602 | vm_page_free_toq(m); | |
| 603 | } | |
| 604 | ||
| 605 | /* | |
| a441ad78 | 606 | * Free a page to the zerod-pages queue |
| 984263bc MD |
607 | */ |
| 608 | static __inline void | |
| a441ad78 | 609 | vm_page_free_zero(vm_page_t m) |
| 984263bc | 610 | { |
| 973c11b9 MD |
611 | #ifdef __amd64__ |
| 612 | /* JG DEBUG64 We check if the page is really zeroed. */ | |
| 613 | char *p = (char *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); | |
| 614 | int i; | |
| 615 | ||
| 616 | for (i = 0; i < PAGE_SIZE; i++) { | |
| 617 | if (p[i] != 0) { | |
| 618 | panic("non-zero page in vm_page_free_zero()"); | |
| 619 | } | |
| 620 | } | |
| 621 | ||
| 622 | #endif | |
| 984263bc MD |
623 | vm_page_flag_set(m, PG_ZERO); |
| 624 | vm_page_free_toq(m); | |
| 625 | } | |
| 626 | ||
| 627 | /* | |
| a441ad78 MD |
628 | * Wait until page is no longer PG_BUSY or (if also_m_busy is TRUE) |
| 629 | * m->busy is zero. Returns TRUE if it had to sleep ( including if | |
| 630 | * it almost had to sleep and made temporary spl*() mods), FALSE | |
| 631 | * otherwise. | |
| 632 | * | |
| 633 | * This routine assumes that interrupts can only remove the busy | |
| 634 | * status from a page, not set the busy status or change it from | |
| 635 | * PG_BUSY to m->busy or vise versa (which would create a timing | |
| 636 | * window). | |
| 637 | * | |
| 638 | * Note: as an inline, 'also_m_busy' is usually a constant and well | |
| 639 | * optimized. | |
| 984263bc | 640 | */ |
| 984263bc MD |
641 | static __inline int |
| 642 | vm_page_sleep_busy(vm_page_t m, int also_m_busy, const char *msg) | |
| 643 | { | |
| 644 | if ((m->flags & PG_BUSY) || (also_m_busy && m->busy)) { | |
| cdd46d2e | 645 | crit_enter(); |
| 984263bc MD |
646 | if ((m->flags & PG_BUSY) || (also_m_busy && m->busy)) { |
| 647 | /* | |
| 648 | * Page is busy. Wait and retry. | |
| 649 | */ | |
| 650 | vm_page_flag_set(m, PG_WANTED | PG_REFERENCED); | |
| 377d4740 | 651 | tsleep(m, 0, msg, 0); |
| 984263bc | 652 | } |
| cdd46d2e | 653 | crit_exit(); |
| 984263bc MD |
654 | return(TRUE); |
| 655 | /* not reached */ | |
| 656 | } | |
| 657 | return(FALSE); | |
| 658 | } | |
| 659 | ||
| 660 | /* | |
| a441ad78 | 661 | * Set page to not be dirty. Note: does not clear pmap modify bits . |
| 984263bc | 662 | */ |
| 984263bc MD |
663 | static __inline void |
| 664 | vm_page_undirty(vm_page_t m) | |
| 665 | { | |
| 666 | m->dirty = 0; | |
| 667 | } | |
| 668 | ||
| 984263bc | 669 | #endif /* _KERNEL */ |
| 1bd40720 | 670 | #endif /* !_VM_VM_PAGE_H_ */ |