| Commit | Line | Data |
|---|---|---|
| 984263bc | 1 | /* |
| 9ad0147b MD |
2 | * (MPSAFE) |
| 3 | * | |
| 984263bc MD |
4 | * Copyright (c) 1991 Regents of the University of California. |
| 5 | * All rights reserved. | |
| 6 | * | |
| 7 | * This code is derived from software contributed to Berkeley by | |
| 8 | * The Mach Operating System project at Carnegie-Mellon University. | |
| 9 | * | |
| 10 | * Redistribution and use in source and binary forms, with or without | |
| 11 | * modification, are permitted provided that the following conditions | |
| 12 | * are met: | |
| 13 | * 1. Redistributions of source code must retain the above copyright | |
| 14 | * notice, this list of conditions and the following disclaimer. | |
| 15 | * 2. Redistributions in binary form must reproduce the above copyright | |
| 16 | * notice, this list of conditions and the following disclaimer in the | |
| 17 | * documentation and/or other materials provided with the distribution. | |
| 984263bc MD |
18 | * 4. Neither the name of the University nor the names of its contributors |
| 19 | * may be used to endorse or promote products derived from this software | |
| 20 | * without specific prior written permission. | |
| 21 | * | |
| 22 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
| 23 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
| 24 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
| 25 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
| 26 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
| 27 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
| 28 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
| 29 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
| 30 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
| 31 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
| 32 | * SUCH DAMAGE. | |
| 33 | * | |
| 34 | * from: @(#)vm_page.c 7.4 (Berkeley) 5/7/91 | |
| 35 | * $FreeBSD: src/sys/vm/vm_page.c,v 1.147.2.18 2002/03/10 05:03:19 alc Exp $ | |
| 36 | */ | |
| 37 | ||
| 38 | /* | |
| 39 | * Copyright (c) 1987, 1990 Carnegie-Mellon University. | |
| 40 | * All rights reserved. | |
| 41 | * | |
| 42 | * Authors: Avadis Tevanian, Jr., Michael Wayne Young | |
| 43 | * | |
| 44 | * Permission to use, copy, modify and distribute this software and | |
| 45 | * its documentation is hereby granted, provided that both the copyright | |
| 46 | * notice and this permission notice appear in all copies of the | |
| 47 | * software, derivative works or modified versions, and any portions | |
| 48 | * thereof, and that both notices appear in supporting documentation. | |
| 49 | * | |
| 50 | * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" | |
| 51 | * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND | |
| 52 | * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. | |
| 53 | * | |
| 54 | * Carnegie Mellon requests users of this software to return to | |
| 55 | * | |
| 56 | * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU | |
| 57 | * School of Computer Science | |
| 58 | * Carnegie Mellon University | |
| 59 | * Pittsburgh PA 15213-3890 | |
| 60 | * | |
| 61 | * any improvements or extensions that they make and grant Carnegie the | |
| 62 | * rights to redistribute these changes. | |
| 63 | */ | |
| 984263bc | 64 | /* |
| de71fd3f MD |
65 | * Resident memory management module. The module manipulates 'VM pages'. |
| 66 | * A VM page is the core building block for memory management. | |
| 984263bc MD |
67 | */ |
| 68 | ||
| 69 | #include <sys/param.h> | |
| 70 | #include <sys/systm.h> | |
| 71 | #include <sys/malloc.h> | |
| 72 | #include <sys/proc.h> | |
| 73 | #include <sys/vmmeter.h> | |
| 74 | #include <sys/vnode.h> | |
| cd3c66bd | 75 | #include <sys/kernel.h> |
| 79d182b0 MD |
76 | #include <sys/alist.h> |
| 77 | #include <sys/sysctl.h> | |
| 984263bc MD |
78 | |
| 79 | #include <vm/vm.h> | |
| 80 | #include <vm/vm_param.h> | |
| 81 | #include <sys/lock.h> | |
| 82 | #include <vm/vm_kern.h> | |
| 83 | #include <vm/pmap.h> | |
| 84 | #include <vm/vm_map.h> | |
| 85 | #include <vm/vm_object.h> | |
| 86 | #include <vm/vm_page.h> | |
| 87 | #include <vm/vm_pageout.h> | |
| 88 | #include <vm/vm_pager.h> | |
| 89 | #include <vm/vm_extern.h> | |
| 096e95c0 | 90 | #include <vm/swap_pager.h> |
| 984263bc | 91 | |
| 480c83b6 | 92 | #include <machine/inttypes.h> |
| 8e5e6f1b AH |
93 | #include <machine/md_var.h> |
| 94 | ||
| bb6811be | 95 | #include <vm/vm_page2.h> |
| b12defdc | 96 | #include <sys/spinlock2.h> |
| bb6811be | 97 | |
| 906c754c MD |
98 | #define VMACTION_HSIZE 256 |
| 99 | #define VMACTION_HMASK (VMACTION_HSIZE - 1) | |
| 100 | ||
| de71fd3f MD |
101 | static void vm_page_queue_init(void); |
| 102 | static void vm_page_free_wakeup(void); | |
| 85946b6c | 103 | static vm_page_t vm_page_select_cache(u_short pg_color); |
| 74232d8e | 104 | static vm_page_t _vm_page_list_find2(int basequeue, int index); |
| b12defdc | 105 | static void _vm_page_deactivate_locked(vm_page_t m, int athead); |
| 984263bc | 106 | |
| b12defdc MD |
107 | /* |
| 108 | * Array of tailq lists | |
| 109 | */ | |
| 110 | __cachealign struct vpgqueues vm_page_queues[PQ_COUNT]; | |
| 984263bc | 111 | |
| 906c754c MD |
112 | LIST_HEAD(vm_page_action_list, vm_page_action); |
| 113 | struct vm_page_action_list action_list[VMACTION_HSIZE]; | |
| cd3c66bd | 114 | static volatile int vm_pages_waiting; |
| 906c754c | 115 | |
| 79d182b0 MD |
116 | static struct alist vm_contig_alist; |
| 117 | static struct almeta vm_contig_ameta[ALIST_RECORDS_65536]; | |
| 118 | static struct spinlock vm_contig_spin = SPINLOCK_INITIALIZER(&vm_contig_spin); | |
| 119 | ||
| 120 | static u_long vm_dma_reserved = 0; | |
| 121 | TUNABLE_ULONG("vm.dma_reserved", &vm_dma_reserved); | |
| 122 | SYSCTL_ULONG(_vm, OID_AUTO, dma_reserved, CTLFLAG_RD, &vm_dma_reserved, 0, | |
| 123 | "Memory reserved for DMA"); | |
| 124 | SYSCTL_UINT(_vm, OID_AUTO, dma_free_pages, CTLFLAG_RD, | |
| 125 | &vm_contig_alist.bl_free, 0, "Memory reserved for DMA"); | |
| 906c754c | 126 | |
| ef67e7a3 SZ |
127 | static int vm_contig_verbose = 0; |
| 128 | TUNABLE_INT("vm.contig_verbose", &vm_contig_verbose); | |
| 129 | ||
| 1f804340 MD |
130 | RB_GENERATE2(vm_page_rb_tree, vm_page, rb_entry, rb_vm_page_compare, |
| 131 | vm_pindex_t, pindex); | |
| 132 | ||
| 984263bc | 133 | static void |
| de71fd3f MD |
134 | vm_page_queue_init(void) |
| 135 | { | |
| 984263bc MD |
136 | int i; |
| 137 | ||
| de71fd3f | 138 | for (i = 0; i < PQ_L2_SIZE; i++) |
| 12e4aaff | 139 | vm_page_queues[PQ_FREE+i].cnt = &vmstats.v_free_count; |
| de71fd3f MD |
140 | for (i = 0; i < PQ_L2_SIZE; i++) |
| 141 | vm_page_queues[PQ_CACHE+i].cnt = &vmstats.v_cache_count; | |
| 027193eb MD |
142 | for (i = 0; i < PQ_L2_SIZE; i++) |
| 143 | vm_page_queues[PQ_INACTIVE+i].cnt = &vmstats.v_inactive_count; | |
| 144 | for (i = 0; i < PQ_L2_SIZE; i++) | |
| 145 | vm_page_queues[PQ_ACTIVE+i].cnt = &vmstats.v_active_count; | |
| 146 | for (i = 0; i < PQ_L2_SIZE; i++) | |
| 147 | vm_page_queues[PQ_HOLD+i].cnt = &vmstats.v_active_count; | |
| de71fd3f MD |
148 | /* PQ_NONE has no queue */ |
| 149 | ||
| b12defdc | 150 | for (i = 0; i < PQ_COUNT; i++) { |
| 984263bc | 151 | TAILQ_INIT(&vm_page_queues[i].pl); |
| b12defdc MD |
152 | spin_init(&vm_page_queues[i].spin); |
| 153 | } | |
| 906c754c MD |
154 | |
| 155 | for (i = 0; i < VMACTION_HSIZE; i++) | |
| 156 | LIST_INIT(&action_list[i]); | |
| 984263bc MD |
157 | } |
| 158 | ||
| de71fd3f MD |
159 | /* |
| 160 | * note: place in initialized data section? Is this necessary? | |
| 161 | */ | |
| 984263bc | 162 | long first_page = 0; |
| de71fd3f | 163 | int vm_page_array_size = 0; |
| 984263bc | 164 | int vm_page_zero_count = 0; |
| 79d182b0 MD |
165 | vm_page_t vm_page_array = NULL; |
| 166 | vm_paddr_t vm_low_phys_reserved; | |
| 984263bc MD |
167 | |
| 168 | /* | |
| de71fd3f | 169 | * (low level boot) |
| 984263bc | 170 | * |
| de71fd3f MD |
171 | * Sets the page size, perhaps based upon the memory size. |
| 172 | * Must be called before any use of page-size dependent functions. | |
| 984263bc MD |
173 | */ |
| 174 | void | |
| 175 | vm_set_page_size(void) | |
| 176 | { | |
| 12e4aaff MD |
177 | if (vmstats.v_page_size == 0) |
| 178 | vmstats.v_page_size = PAGE_SIZE; | |
| 179 | if (((vmstats.v_page_size - 1) & vmstats.v_page_size) != 0) | |
| 984263bc MD |
180 | panic("vm_set_page_size: page size not a power of two"); |
| 181 | } | |
| 182 | ||
| 183 | /* | |
| de71fd3f | 184 | * (low level boot) |
| 984263bc | 185 | * |
| de71fd3f MD |
186 | * Add a new page to the freelist for use by the system. New pages |
| 187 | * are added to both the head and tail of the associated free page | |
| 188 | * queue in a bottom-up fashion, so both zero'd and non-zero'd page | |
| 189 | * requests pull 'recent' adds (higher physical addresses) first. | |
| 161399b3 | 190 | * |
| bc3396b8 MD |
191 | * Beware that the page zeroing daemon will also be running soon after |
| 192 | * boot, moving pages from the head to the tail of the PQ_FREE queues. | |
| 193 | * | |
| 654a39f0 | 194 | * Must be called in a critical section. |
| 984263bc | 195 | */ |
| 79d182b0 | 196 | static void |
| 6ef943a3 | 197 | vm_add_new_page(vm_paddr_t pa) |
| 984263bc | 198 | { |
| 161399b3 | 199 | struct vpgqueues *vpq; |
| de71fd3f | 200 | vm_page_t m; |
| 984263bc | 201 | |
| 984263bc MD |
202 | m = PHYS_TO_VM_PAGE(pa); |
| 203 | m->phys_addr = pa; | |
| 204 | m->flags = 0; | |
| 205 | m->pc = (pa >> PAGE_SHIFT) & PQ_L2_MASK; | |
| 85946b6c MD |
206 | #ifdef SMP |
| 207 | /* | |
| bc3396b8 MD |
208 | * Twist for cpu localization in addition to page coloring, so |
| 209 | * different cpus selecting by m->queue get different page colors. | |
| 85946b6c MD |
210 | */ |
| 211 | m->pc ^= ((pa >> PAGE_SHIFT) / PQ_L2_SIZE) & PQ_L2_MASK; | |
| 212 | m->pc ^= ((pa >> PAGE_SHIFT) / (PQ_L2_SIZE * PQ_L2_SIZE)) & PQ_L2_MASK; | |
| 213 | #endif | |
| 79d182b0 MD |
214 | /* |
| 215 | * Reserve a certain number of contiguous low memory pages for | |
| 216 | * contigmalloc() to use. | |
| 217 | */ | |
| 218 | if (pa < vm_low_phys_reserved) { | |
| 219 | atomic_add_int(&vmstats.v_page_count, 1); | |
| 220 | atomic_add_int(&vmstats.v_dma_pages, 1); | |
| 221 | m->queue = PQ_NONE; | |
| 222 | m->wire_count = 1; | |
| 3ae0c654 | 223 | atomic_add_int(&vmstats.v_wire_count, 1); |
| 79d182b0 MD |
224 | alist_free(&vm_contig_alist, pa >> PAGE_SHIFT, 1); |
| 225 | return; | |
| 226 | } | |
| 227 | ||
| 228 | /* | |
| 229 | * General page | |
| 230 | */ | |
| 984263bc | 231 | m->queue = m->pc + PQ_FREE; |
| 26bcc0c0 | 232 | KKASSERT(m->dirty == 0); |
| de71fd3f | 233 | |
| b12defdc MD |
234 | atomic_add_int(&vmstats.v_page_count, 1); |
| 235 | atomic_add_int(&vmstats.v_free_count, 1); | |
| 161399b3 | 236 | vpq = &vm_page_queues[m->queue]; |
| bc3396b8 MD |
237 | if ((vpq->flipflop & 15) == 0) { |
| 238 | pmap_zero_page(VM_PAGE_TO_PHYS(m)); | |
| 239 | m->flags |= PG_ZERO; | |
| 161399b3 | 240 | TAILQ_INSERT_TAIL(&vpq->pl, m, pageq); |
| bc3396b8 MD |
241 | atomic_add_int(&vm_page_zero_count, 1); |
| 242 | } else { | |
| 161399b3 | 243 | TAILQ_INSERT_HEAD(&vpq->pl, m, pageq); |
| bc3396b8 MD |
244 | } |
| 245 | ++vpq->flipflop; | |
| b12defdc | 246 | ++vpq->lcnt; |
| 984263bc MD |
247 | } |
| 248 | ||
| 249 | /* | |
| de71fd3f | 250 | * (low level boot) |
| 984263bc | 251 | * |
| de71fd3f | 252 | * Initializes the resident memory module. |
| 984263bc | 253 | * |
| da23a592 MD |
254 | * Preallocates memory for critical VM structures and arrays prior to |
| 255 | * kernel_map becoming available. | |
| 26bcc0c0 | 256 | * |
| da23a592 MD |
257 | * Memory is allocated from (virtual2_start, virtual2_end) if available, |
| 258 | * otherwise memory is allocated from (virtual_start, virtual_end). | |
| 259 | * | |
| 260 | * On x86-64 (virtual_start, virtual_end) is only 2GB and may not be | |
| 261 | * large enough to hold vm_page_array & other structures for machines with | |
| 262 | * large amounts of ram, so we want to use virtual2* when available. | |
| 984263bc | 263 | */ |
| da23a592 MD |
264 | void |
| 265 | vm_page_startup(void) | |
| 984263bc | 266 | { |
| da23a592 | 267 | vm_offset_t vaddr = virtual2_start ? virtual2_start : virtual_start; |
| 984263bc | 268 | vm_offset_t mapped; |
| 6ef943a3 MD |
269 | vm_size_t npages; |
| 270 | vm_paddr_t page_range; | |
| 271 | vm_paddr_t new_end; | |
| 984263bc | 272 | int i; |
| 6ef943a3 | 273 | vm_paddr_t pa; |
| 984263bc | 274 | int nblocks; |
| 6ef943a3 | 275 | vm_paddr_t last_pa; |
| 6ef943a3 MD |
276 | vm_paddr_t end; |
| 277 | vm_paddr_t biggestone, biggestsize; | |
| 6ef943a3 | 278 | vm_paddr_t total; |
| 984263bc MD |
279 | |
| 280 | total = 0; | |
| 281 | biggestsize = 0; | |
| 282 | biggestone = 0; | |
| 283 | nblocks = 0; | |
| 284 | vaddr = round_page(vaddr); | |
| 285 | ||
| 286 | for (i = 0; phys_avail[i + 1]; i += 2) { | |
| aecf2182 MD |
287 | phys_avail[i] = round_page64(phys_avail[i]); |
| 288 | phys_avail[i + 1] = trunc_page64(phys_avail[i + 1]); | |
| 984263bc MD |
289 | } |
| 290 | ||
| 291 | for (i = 0; phys_avail[i + 1]; i += 2) { | |
| 6ef943a3 | 292 | vm_paddr_t size = phys_avail[i + 1] - phys_avail[i]; |
| 984263bc MD |
293 | |
| 294 | if (size > biggestsize) { | |
| 295 | biggestone = i; | |
| 296 | biggestsize = size; | |
| 297 | } | |
| 298 | ++nblocks; | |
| 299 | total += size; | |
| 300 | } | |
| 301 | ||
| 302 | end = phys_avail[biggestone+1]; | |
| 1f804340 | 303 | end = trunc_page(end); |
| 984263bc MD |
304 | |
| 305 | /* | |
| 306 | * Initialize the queue headers for the free queue, the active queue | |
| 307 | * and the inactive queue. | |
| 308 | */ | |
| 984263bc MD |
309 | vm_page_queue_init(); |
| 310 | ||
| 6abe3bd0 | 311 | #if !defined(_KERNEL_VIRTUAL) |
| 984263bc | 312 | /* |
| b12defdc MD |
313 | * VKERNELs don't support minidumps and as such don't need |
| 314 | * vm_page_dump | |
| 315 | * | |
| 8e5e6f1b AH |
316 | * Allocate a bitmap to indicate that a random physical page |
| 317 | * needs to be included in a minidump. | |
| 318 | * | |
| 319 | * The amd64 port needs this to indicate which direct map pages | |
| 320 | * need to be dumped, via calls to dump_add_page()/dump_drop_page(). | |
| 321 | * | |
| 322 | * However, i386 still needs this workspace internally within the | |
| 323 | * minidump code. In theory, they are not needed on i386, but are | |
| 324 | * included should the sf_buf code decide to use them. | |
| 325 | */ | |
| 326 | page_range = phys_avail[(nblocks - 1) * 2 + 1] / PAGE_SIZE; | |
| 327 | vm_page_dump_size = round_page(roundup2(page_range, NBBY) / NBBY); | |
| 328 | end -= vm_page_dump_size; | |
| 329 | vm_page_dump = (void *)pmap_map(&vaddr, end, end + vm_page_dump_size, | |
| 330 | VM_PROT_READ | VM_PROT_WRITE); | |
| 331 | bzero((void *)vm_page_dump, vm_page_dump_size); | |
| 6abe3bd0 | 332 | #endif |
| 8e5e6f1b | 333 | /* |
| 984263bc MD |
334 | * Compute the number of pages of memory that will be available for |
| 335 | * use (taking into account the overhead of a page structure per | |
| 336 | * page). | |
| 337 | */ | |
| 984263bc | 338 | first_page = phys_avail[0] / PAGE_SIZE; |
| 984263bc | 339 | page_range = phys_avail[(nblocks - 1) * 2 + 1] / PAGE_SIZE - first_page; |
| 1f804340 | 340 | npages = (total - (page_range * sizeof(struct vm_page))) / PAGE_SIZE; |
| de71fd3f | 341 | |
| 79d182b0 MD |
342 | #ifndef _KERNEL_VIRTUAL |
| 343 | /* | |
| 344 | * (only applies to real kernels) | |
| 345 | * | |
| 346 | * Initialize the contiguous reserve map. We initially reserve up | |
| 347 | * to 1/4 available physical memory or 65536 pages (~256MB), whichever | |
| 348 | * is lower. | |
| 349 | * | |
| 350 | * Once device initialization is complete we return most of the | |
| 351 | * reserved memory back to the normal page queues but leave some | |
| 352 | * in reserve for things like usb attachments. | |
| 353 | */ | |
| 354 | vm_low_phys_reserved = (vm_paddr_t)65536 << PAGE_SHIFT; | |
| 355 | if (vm_low_phys_reserved > total / 4) | |
| 356 | vm_low_phys_reserved = total / 4; | |
| 357 | if (vm_dma_reserved == 0) { | |
| 358 | vm_dma_reserved = 16 * 1024 * 1024; /* 16MB */ | |
| 359 | if (vm_dma_reserved > total / 16) | |
| 360 | vm_dma_reserved = total / 16; | |
| 361 | } | |
| 362 | #endif | |
| 363 | alist_init(&vm_contig_alist, 65536, vm_contig_ameta, | |
| 364 | ALIST_RECORDS_65536); | |
| 365 | ||
| 984263bc MD |
366 | /* |
| 367 | * Initialize the mem entry structures now, and put them in the free | |
| 368 | * queue. | |
| 369 | */ | |
| 984263bc | 370 | new_end = trunc_page(end - page_range * sizeof(struct vm_page)); |
| 79d182b0 | 371 | mapped = pmap_map(&vaddr, new_end, end, VM_PROT_READ | VM_PROT_WRITE); |
| 8e5e6f1b AH |
372 | vm_page_array = (vm_page_t)mapped; |
| 373 | ||
| 0e6594a8 | 374 | #if defined(__x86_64__) && !defined(_KERNEL_VIRTUAL) |
| 8e5e6f1b AH |
375 | /* |
| 376 | * since pmap_map on amd64 returns stuff out of a direct-map region, | |
| 377 | * we have to manually add these pages to the minidump tracking so | |
| 378 | * that they can be dumped, including the vm_page_array. | |
| 379 | */ | |
| 380 | for (pa = new_end; pa < phys_avail[biggestone + 1]; pa += PAGE_SIZE) | |
| 381 | dump_add_page(pa); | |
| 8fdd3267 | 382 | #endif |
| 984263bc MD |
383 | |
| 384 | /* | |
| 385 | * Clear all of the page structures | |
| 386 | */ | |
| 387 | bzero((caddr_t) vm_page_array, page_range * sizeof(struct vm_page)); | |
| 388 | vm_page_array_size = page_range; | |
| 389 | ||
| 390 | /* | |
| 161399b3 | 391 | * Construct the free queue(s) in ascending order (by physical |
| 984263bc MD |
392 | * address) so that the first 16MB of physical memory is allocated |
| 393 | * last rather than first. On large-memory machines, this avoids | |
| 394 | * the exhaustion of low physical memory before isa_dmainit has run. | |
| 395 | */ | |
| 12e4aaff MD |
396 | vmstats.v_page_count = 0; |
| 397 | vmstats.v_free_count = 0; | |
| 984263bc MD |
398 | for (i = 0; phys_avail[i + 1] && npages > 0; i += 2) { |
| 399 | pa = phys_avail[i]; | |
| 400 | if (i == biggestone) | |
| 401 | last_pa = new_end; | |
| 402 | else | |
| 403 | last_pa = phys_avail[i + 1]; | |
| 404 | while (pa < last_pa && npages-- > 0) { | |
| 405 | vm_add_new_page(pa); | |
| 406 | pa += PAGE_SIZE; | |
| 407 | } | |
| 408 | } | |
| da23a592 MD |
409 | if (virtual2_start) |
| 410 | virtual2_start = vaddr; | |
| 411 | else | |
| 412 | virtual_start = vaddr; | |
| 984263bc MD |
413 | } |
| 414 | ||
| 415 | /* | |
| 79d182b0 MD |
416 | * We tended to reserve a ton of memory for contigmalloc(). Now that most |
| 417 | * drivers have initialized we want to return most the remaining free | |
| 418 | * reserve back to the VM page queues so they can be used for normal | |
| 419 | * allocations. | |
| 420 | * | |
| 421 | * We leave vm_dma_reserved bytes worth of free pages in the reserve pool. | |
| 422 | */ | |
| 423 | static void | |
| 424 | vm_page_startup_finish(void *dummy __unused) | |
| 425 | { | |
| 426 | alist_blk_t blk; | |
| 427 | alist_blk_t rblk; | |
| 428 | alist_blk_t count; | |
| 429 | alist_blk_t xcount; | |
| 430 | alist_blk_t bfree; | |
| 431 | vm_page_t m; | |
| 432 | ||
| 433 | spin_lock(&vm_contig_spin); | |
| 434 | for (;;) { | |
| 435 | bfree = alist_free_info(&vm_contig_alist, &blk, &count); | |
| 436 | if (bfree <= vm_dma_reserved / PAGE_SIZE) | |
| 437 | break; | |
| 438 | if (count == 0) | |
| 439 | break; | |
| 440 | ||
| 441 | /* | |
| 442 | * Figure out how much of the initial reserve we have to | |
| 443 | * free in order to reach our target. | |
| 444 | */ | |
| 445 | bfree -= vm_dma_reserved / PAGE_SIZE; | |
| 446 | if (count > bfree) { | |
| 447 | blk += count - bfree; | |
| 448 | count = bfree; | |
| 449 | } | |
| 450 | ||
| 451 | /* | |
| 452 | * Calculate the nearest power of 2 <= count. | |
| 453 | */ | |
| 454 | for (xcount = 1; xcount <= count; xcount <<= 1) | |
| 455 | ; | |
| 456 | xcount >>= 1; | |
| 457 | blk += count - xcount; | |
| 458 | count = xcount; | |
| 459 | ||
| 460 | /* | |
| 461 | * Allocate the pages from the alist, then free them to | |
| 462 | * the normal VM page queues. | |
| 463 | * | |
| 464 | * Pages allocated from the alist are wired. We have to | |
| 465 | * busy, unwire, and free them. We must also adjust | |
| 466 | * vm_low_phys_reserved before freeing any pages to prevent | |
| 467 | * confusion. | |
| 468 | */ | |
| 469 | rblk = alist_alloc(&vm_contig_alist, blk, count); | |
| 470 | if (rblk != blk) { | |
| 471 | kprintf("vm_page_startup_finish: Unable to return " | |
| 472 | "dma space @0x%08x/%d -> 0x%08x\n", | |
| 473 | blk, count, rblk); | |
| 474 | break; | |
| 475 | } | |
| 476 | atomic_add_int(&vmstats.v_dma_pages, -count); | |
| 477 | spin_unlock(&vm_contig_spin); | |
| 478 | ||
| 479 | m = PHYS_TO_VM_PAGE((vm_paddr_t)blk << PAGE_SHIFT); | |
| 480 | vm_low_phys_reserved = VM_PAGE_TO_PHYS(m); | |
| 481 | while (count) { | |
| 482 | vm_page_busy_wait(m, FALSE, "cpgfr"); | |
| 483 | vm_page_unwire(m, 0); | |
| 484 | vm_page_free(m); | |
| 485 | --count; | |
| 486 | ++m; | |
| 487 | } | |
| 488 | spin_lock(&vm_contig_spin); | |
| 489 | } | |
| 490 | spin_unlock(&vm_contig_spin); | |
| 491 | ||
| 492 | /* | |
| 493 | * Print out how much DMA space drivers have already allocated and | |
| 494 | * how much is left over. | |
| 495 | */ | |
| 496 | kprintf("DMA space used: %jdk, remaining available: %jdk\n", | |
| 497 | (intmax_t)(vmstats.v_dma_pages - vm_contig_alist.bl_free) * | |
| 498 | (PAGE_SIZE / 1024), | |
| 499 | (intmax_t)vm_contig_alist.bl_free * (PAGE_SIZE / 1024)); | |
| 500 | } | |
| 501 | SYSINIT(vm_pgend, SI_SUB_PROC0_POST, SI_ORDER_ANY, | |
| 502 | vm_page_startup_finish, NULL) | |
| 503 | ||
| 504 | ||
| 505 | /* | |
| 1f804340 MD |
506 | * Scan comparison function for Red-Black tree scans. An inclusive |
| 507 | * (start,end) is expected. Other fields are not used. | |
| 984263bc | 508 | */ |
| 1f804340 MD |
509 | int |
| 510 | rb_vm_page_scancmp(struct vm_page *p, void *data) | |
| 984263bc | 511 | { |
| 1f804340 | 512 | struct rb_vm_page_scan_info *info = data; |
| 984263bc | 513 | |
| 1f804340 MD |
514 | if (p->pindex < info->start_pindex) |
| 515 | return(-1); | |
| 516 | if (p->pindex > info->end_pindex) | |
| 517 | return(1); | |
| 518 | return(0); | |
| 519 | } | |
| 520 | ||
| 521 | int | |
| 522 | rb_vm_page_compare(struct vm_page *p1, struct vm_page *p2) | |
| 523 | { | |
| 524 | if (p1->pindex < p2->pindex) | |
| 525 | return(-1); | |
| 526 | if (p1->pindex > p2->pindex) | |
| 527 | return(1); | |
| 528 | return(0); | |
| 984263bc MD |
529 | } |
| 530 | ||
| de71fd3f | 531 | /* |
| b12defdc MD |
532 | * Each page queue has its own spin lock, which is fairly optimal for |
| 533 | * allocating and freeing pages at least. | |
| 534 | * | |
| 535 | * The caller must hold the vm_page_spin_lock() before locking a vm_page's | |
| 536 | * queue spinlock via this function. Also note that m->queue cannot change | |
| 537 | * unless both the page and queue are locked. | |
| 538 | */ | |
| 539 | static __inline | |
| 540 | void | |
| 541 | _vm_page_queue_spin_lock(vm_page_t m) | |
| 542 | { | |
| 543 | u_short queue; | |
| 544 | ||
| 545 | queue = m->queue; | |
| 546 | if (queue != PQ_NONE) { | |
| 547 | spin_lock(&vm_page_queues[queue].spin); | |
| 548 | KKASSERT(queue == m->queue); | |
| 549 | } | |
| 550 | } | |
| 551 | ||
| 552 | static __inline | |
| 553 | void | |
| 554 | _vm_page_queue_spin_unlock(vm_page_t m) | |
| 555 | { | |
| 556 | u_short queue; | |
| 557 | ||
| 558 | queue = m->queue; | |
| 559 | cpu_ccfence(); | |
| 560 | if (queue != PQ_NONE) | |
| 561 | spin_unlock(&vm_page_queues[queue].spin); | |
| 562 | } | |
| 563 | ||
| 564 | static __inline | |
| 565 | void | |
| 566 | _vm_page_queues_spin_lock(u_short queue) | |
| 567 | { | |
| 568 | cpu_ccfence(); | |
| 569 | if (queue != PQ_NONE) | |
| 570 | spin_lock(&vm_page_queues[queue].spin); | |
| 571 | } | |
| 572 | ||
| 573 | ||
| 574 | static __inline | |
| 575 | void | |
| 576 | _vm_page_queues_spin_unlock(u_short queue) | |
| 577 | { | |
| 578 | cpu_ccfence(); | |
| 579 | if (queue != PQ_NONE) | |
| 580 | spin_unlock(&vm_page_queues[queue].spin); | |
| 581 | } | |
| 582 | ||
| 583 | void | |
| 584 | vm_page_queue_spin_lock(vm_page_t m) | |
| 585 | { | |
| 586 | _vm_page_queue_spin_lock(m); | |
| 587 | } | |
| 588 | ||
| 589 | void | |
| 590 | vm_page_queues_spin_lock(u_short queue) | |
| 591 | { | |
| 592 | _vm_page_queues_spin_lock(queue); | |
| 593 | } | |
| 594 | ||
| 595 | void | |
| 596 | vm_page_queue_spin_unlock(vm_page_t m) | |
| 597 | { | |
| 598 | _vm_page_queue_spin_unlock(m); | |
| 599 | } | |
| 600 | ||
| 601 | void | |
| 602 | vm_page_queues_spin_unlock(u_short queue) | |
| 603 | { | |
| 604 | _vm_page_queues_spin_unlock(queue); | |
| 605 | } | |
| 606 | ||
| 607 | /* | |
| 608 | * This locks the specified vm_page and its queue in the proper order | |
| 609 | * (page first, then queue). The queue may change so the caller must | |
| 610 | * recheck on return. | |
| 611 | */ | |
| 612 | static __inline | |
| 613 | void | |
| 614 | _vm_page_and_queue_spin_lock(vm_page_t m) | |
| 615 | { | |
| 616 | vm_page_spin_lock(m); | |
| 617 | _vm_page_queue_spin_lock(m); | |
| 618 | } | |
| 619 | ||
| 620 | static __inline | |
| 621 | void | |
| 622 | _vm_page_and_queue_spin_unlock(vm_page_t m) | |
| 623 | { | |
| 624 | _vm_page_queues_spin_unlock(m->queue); | |
| 625 | vm_page_spin_unlock(m); | |
| 626 | } | |
| 627 | ||
| 628 | void | |
| 629 | vm_page_and_queue_spin_unlock(vm_page_t m) | |
| 630 | { | |
| 631 | _vm_page_and_queue_spin_unlock(m); | |
| 632 | } | |
| 633 | ||
| 634 | void | |
| 635 | vm_page_and_queue_spin_lock(vm_page_t m) | |
| 636 | { | |
| 637 | _vm_page_and_queue_spin_lock(m); | |
| 638 | } | |
| 639 | ||
| 640 | /* | |
| 641 | * Helper function removes vm_page from its current queue. | |
| 642 | * Returns the base queue the page used to be on. | |
| 643 | * | |
| 644 | * The vm_page and the queue must be spinlocked. | |
| 645 | * This function will unlock the queue but leave the page spinlocked. | |
| 646 | */ | |
| 647 | static __inline u_short | |
| 648 | _vm_page_rem_queue_spinlocked(vm_page_t m) | |
| 649 | { | |
| 650 | struct vpgqueues *pq; | |
| 651 | u_short queue; | |
| 652 | ||
| 653 | queue = m->queue; | |
| 654 | if (queue != PQ_NONE) { | |
| 655 | pq = &vm_page_queues[queue]; | |
| 656 | TAILQ_REMOVE(&pq->pl, m, pageq); | |
| 657 | atomic_add_int(pq->cnt, -1); | |
| 658 | pq->lcnt--; | |
| 659 | m->queue = PQ_NONE; | |
| 85946b6c | 660 | vm_page_queues_spin_unlock(queue); |
| b12defdc MD |
661 | if ((queue - m->pc) == PQ_FREE && (m->flags & PG_ZERO)) |
| 662 | atomic_subtract_int(&vm_page_zero_count, 1); | |
| b12defdc MD |
663 | if ((queue - m->pc) == PQ_CACHE || (queue - m->pc) == PQ_FREE) |
| 664 | return (queue - m->pc); | |
| 665 | } | |
| 666 | return queue; | |
| 667 | } | |
| 668 | ||
| 669 | /* | |
| 670 | * Helper function places the vm_page on the specified queue. | |
| 671 | * | |
| 672 | * The vm_page must be spinlocked. | |
| 673 | * This function will return with both the page and the queue locked. | |
| 674 | */ | |
| 675 | static __inline void | |
| 676 | _vm_page_add_queue_spinlocked(vm_page_t m, u_short queue, int athead) | |
| 677 | { | |
| 678 | struct vpgqueues *pq; | |
| 679 | ||
| 680 | KKASSERT(m->queue == PQ_NONE); | |
| 681 | ||
| 682 | if (queue != PQ_NONE) { | |
| 683 | vm_page_queues_spin_lock(queue); | |
| 684 | pq = &vm_page_queues[queue]; | |
| 685 | ++pq->lcnt; | |
| 686 | atomic_add_int(pq->cnt, 1); | |
| 687 | m->queue = queue; | |
| 688 | ||
| 689 | /* | |
| 690 | * Put zero'd pages on the end ( where we look for zero'd pages | |
| 691 | * first ) and non-zerod pages at the head. | |
| 692 | */ | |
| 693 | if (queue - m->pc == PQ_FREE) { | |
| 694 | if (m->flags & PG_ZERO) { | |
| 695 | TAILQ_INSERT_TAIL(&pq->pl, m, pageq); | |
| 696 | atomic_add_int(&vm_page_zero_count, 1); | |
| 697 | } else { | |
| 698 | TAILQ_INSERT_HEAD(&pq->pl, m, pageq); | |
| 699 | } | |
| 700 | } else if (athead) { | |
| 701 | TAILQ_INSERT_HEAD(&pq->pl, m, pageq); | |
| 702 | } else { | |
| 703 | TAILQ_INSERT_TAIL(&pq->pl, m, pageq); | |
| 704 | } | |
| 705 | /* leave the queue spinlocked */ | |
| 706 | } | |
| 707 | } | |
| 708 | ||
| 709 | /* | |
| 710 | * Wait until page is no longer PG_BUSY or (if also_m_busy is TRUE) | |
| 711 | * m->busy is zero. Returns TRUE if it had to sleep, FALSE if we | |
| 712 | * did not. Only one sleep call will be made before returning. | |
| 713 | * | |
| 714 | * This function does NOT busy the page and on return the page is not | |
| 715 | * guaranteed to be available. | |
| 716 | */ | |
| 717 | void | |
| 718 | vm_page_sleep_busy(vm_page_t m, int also_m_busy, const char *msg) | |
| 719 | { | |
| 720 | u_int32_t flags; | |
| 721 | ||
| 722 | for (;;) { | |
| 723 | flags = m->flags; | |
| 724 | cpu_ccfence(); | |
| 725 | ||
| 726 | if ((flags & PG_BUSY) == 0 && | |
| 727 | (also_m_busy == 0 || (flags & PG_SBUSY) == 0)) { | |
| 728 | break; | |
| 729 | } | |
| 730 | tsleep_interlock(m, 0); | |
| 731 | if (atomic_cmpset_int(&m->flags, flags, | |
| 732 | flags | PG_WANTED | PG_REFERENCED)) { | |
| 733 | tsleep(m, PINTERLOCKED, msg, 0); | |
| 734 | break; | |
| 735 | } | |
| 736 | } | |
| 737 | } | |
| 738 | ||
| 739 | /* | |
| 740 | * Wait until PG_BUSY can be set, then set it. If also_m_busy is TRUE we | |
| 741 | * also wait for m->busy to become 0 before setting PG_BUSY. | |
| 742 | */ | |
| 743 | void | |
| 744 | VM_PAGE_DEBUG_EXT(vm_page_busy_wait)(vm_page_t m, | |
| 745 | int also_m_busy, const char *msg | |
| 746 | VM_PAGE_DEBUG_ARGS) | |
| 747 | { | |
| 748 | u_int32_t flags; | |
| 749 | ||
| 750 | for (;;) { | |
| 751 | flags = m->flags; | |
| 752 | cpu_ccfence(); | |
| 753 | if (flags & PG_BUSY) { | |
| 754 | tsleep_interlock(m, 0); | |
| 755 | if (atomic_cmpset_int(&m->flags, flags, | |
| 756 | flags | PG_WANTED | PG_REFERENCED)) { | |
| 757 | tsleep(m, PINTERLOCKED, msg, 0); | |
| 758 | } | |
| 759 | } else if (also_m_busy && (flags & PG_SBUSY)) { | |
| 760 | tsleep_interlock(m, 0); | |
| 761 | if (atomic_cmpset_int(&m->flags, flags, | |
| 762 | flags | PG_WANTED | PG_REFERENCED)) { | |
| 763 | tsleep(m, PINTERLOCKED, msg, 0); | |
| 764 | } | |
| 765 | } else { | |
| 766 | if (atomic_cmpset_int(&m->flags, flags, | |
| 767 | flags | PG_BUSY)) { | |
| 768 | #ifdef VM_PAGE_DEBUG | |
| 769 | m->busy_func = func; | |
| 770 | m->busy_line = lineno; | |
| 771 | #endif | |
| 772 | break; | |
| 773 | } | |
| 774 | } | |
| 775 | } | |
| 776 | } | |
| 777 | ||
| 778 | /* | |
| 779 | * Attempt to set PG_BUSY. If also_m_busy is TRUE we only succeed if m->busy | |
| 780 | * is also 0. | |
| 781 | * | |
| 782 | * Returns non-zero on failure. | |
| 783 | */ | |
| 784 | int | |
| 785 | VM_PAGE_DEBUG_EXT(vm_page_busy_try)(vm_page_t m, int also_m_busy | |
| 786 | VM_PAGE_DEBUG_ARGS) | |
| 787 | { | |
| 788 | u_int32_t flags; | |
| 789 | ||
| 790 | for (;;) { | |
| 791 | flags = m->flags; | |
| 792 | cpu_ccfence(); | |
| 793 | if (flags & PG_BUSY) | |
| 794 | return TRUE; | |
| 795 | if (also_m_busy && (flags & PG_SBUSY)) | |
| 796 | return TRUE; | |
| 797 | if (atomic_cmpset_int(&m->flags, flags, flags | PG_BUSY)) { | |
| 798 | #ifdef VM_PAGE_DEBUG | |
| 799 | m->busy_func = func; | |
| 800 | m->busy_line = lineno; | |
| 801 | #endif | |
| 802 | return FALSE; | |
| 803 | } | |
| 804 | } | |
| 805 | } | |
| 806 | ||
| 807 | /* | |
| 808 | * Clear the PG_BUSY flag and return non-zero to indicate to the caller | |
| 809 | * that a wakeup() should be performed. | |
| 810 | * | |
| 811 | * The vm_page must be spinlocked and will remain spinlocked on return. | |
| 812 | * The related queue must NOT be spinlocked (which could deadlock us). | |
| 813 | * | |
| 814 | * (inline version) | |
| 815 | */ | |
| 816 | static __inline | |
| 817 | int | |
| 818 | _vm_page_wakeup(vm_page_t m) | |
| 819 | { | |
| 820 | u_int32_t flags; | |
| 821 | ||
| 822 | for (;;) { | |
| 823 | flags = m->flags; | |
| 824 | cpu_ccfence(); | |
| 825 | if (atomic_cmpset_int(&m->flags, flags, | |
| 826 | flags & ~(PG_BUSY | PG_WANTED))) { | |
| 827 | break; | |
| 828 | } | |
| 829 | } | |
| 830 | return(flags & PG_WANTED); | |
| 831 | } | |
| 832 | ||
| 833 | /* | |
| 834 | * Clear the PG_BUSY flag and wakeup anyone waiting for the page. This | |
| 835 | * is typically the last call you make on a page before moving onto | |
| 836 | * other things. | |
| 837 | */ | |
| 838 | void | |
| 839 | vm_page_wakeup(vm_page_t m) | |
| 840 | { | |
| 841 | KASSERT(m->flags & PG_BUSY, ("vm_page_wakeup: page not busy!!!")); | |
| 842 | vm_page_spin_lock(m); | |
| 843 | if (_vm_page_wakeup(m)) { | |
| 844 | vm_page_spin_unlock(m); | |
| 845 | wakeup(m); | |
| 846 | } else { | |
| 847 | vm_page_spin_unlock(m); | |
| 848 | } | |
| 849 | } | |
| 850 | ||
| 851 | /* | |
| 573fb415 MD |
852 | * Holding a page keeps it from being reused. Other parts of the system |
| 853 | * can still disassociate the page from its current object and free it, or | |
| 854 | * perform read or write I/O on it and/or otherwise manipulate the page, | |
| 855 | * but if the page is held the VM system will leave the page and its data | |
| 856 | * intact and not reuse the page for other purposes until the last hold | |
| 857 | * reference is released. (see vm_page_wire() if you want to prevent the | |
| 858 | * page from being disassociated from its object too). | |
| 859 | * | |
| 573fb415 MD |
860 | * The caller must still validate the contents of the page and, if necessary, |
| 861 | * wait for any pending I/O (e.g. vm_page_sleep_busy() loop) to complete | |
| 862 | * before manipulating the page. | |
| b12defdc MD |
863 | * |
| 864 | * XXX get vm_page_spin_lock() here and move FREE->HOLD if necessary | |
| 573fb415 MD |
865 | */ |
| 866 | void | |
| 867 | vm_page_hold(vm_page_t m) | |
| 868 | { | |
| b12defdc MD |
869 | vm_page_spin_lock(m); |
| 870 | atomic_add_int(&m->hold_count, 1); | |
| 871 | if (m->queue - m->pc == PQ_FREE) { | |
| 872 | _vm_page_queue_spin_lock(m); | |
| 873 | _vm_page_rem_queue_spinlocked(m); | |
| 027193eb | 874 | _vm_page_add_queue_spinlocked(m, PQ_HOLD + m->pc, 0); |
| b12defdc MD |
875 | _vm_page_queue_spin_unlock(m); |
| 876 | } | |
| 877 | vm_page_spin_unlock(m); | |
| 573fb415 MD |
878 | } |
| 879 | ||
| 880 | /* | |
| de71fd3f | 881 | * The opposite of vm_page_hold(). A page can be freed while being held, |
| b12defdc MD |
882 | * which places it on the PQ_HOLD queue. If we are able to busy the page |
| 883 | * after the hold count drops to zero we will move the page to the | |
| 884 | * appropriate PQ_FREE queue by calling vm_page_free_toq(). | |
| de71fd3f | 885 | */ |
| 984263bc | 886 | void |
| 573fb415 | 887 | vm_page_unhold(vm_page_t m) |
| 984263bc | 888 | { |
| b12defdc MD |
889 | vm_page_spin_lock(m); |
| 890 | atomic_add_int(&m->hold_count, -1); | |
| 027193eb | 891 | if (m->hold_count == 0 && m->queue - m->pc == PQ_HOLD) { |
| b12defdc MD |
892 | _vm_page_queue_spin_lock(m); |
| 893 | _vm_page_rem_queue_spinlocked(m); | |
| 894 | _vm_page_add_queue_spinlocked(m, PQ_FREE + m->pc, 0); | |
| 895 | _vm_page_queue_spin_unlock(m); | |
| 97edb3b6 | 896 | } |
| b12defdc | 897 | vm_page_spin_unlock(m); |
| 984263bc MD |
898 | } |
| 899 | ||
| 900 | /* | |
| 573fb415 | 901 | * Inserts the given vm_page into the object and object list. |
| 984263bc | 902 | * |
| de71fd3f MD |
903 | * The pagetables are not updated but will presumably fault the page |
| 904 | * in if necessary, or if a kernel page the caller will at some point | |
| 905 | * enter the page into the kernel's pmap. We are not allowed to block | |
| 906 | * here so we *can't* do this anyway. | |
| 984263bc | 907 | * |
| de71fd3f | 908 | * This routine may not block. |
| 398c240d | 909 | * This routine must be called with the vm_object held. |
| 654a39f0 | 910 | * This routine must be called with a critical section held. |
| d2d8515b MD |
911 | * |
| 912 | * This routine returns TRUE if the page was inserted into the object | |
| 913 | * successfully, and FALSE if the page already exists in the object. | |
| 984263bc | 914 | */ |
| d2d8515b | 915 | int |
| 984263bc MD |
916 | vm_page_insert(vm_page_t m, vm_object_t object, vm_pindex_t pindex) |
| 917 | { | |
| b12defdc | 918 | ASSERT_LWKT_TOKEN_HELD(vm_object_token(object)); |
| 984263bc MD |
919 | if (m->object != NULL) |
| 920 | panic("vm_page_insert: already inserted"); | |
| 921 | ||
| b12defdc | 922 | object->generation++; |
| b12defdc | 923 | |
| 984263bc | 924 | /* |
| b12defdc MD |
925 | * Record the object/offset pair in this page and add the |
| 926 | * pv_list_count of the page to the object. | |
| 927 | * | |
| 928 | * The vm_page spin lock is required for interactions with the pmap. | |
| 984263bc | 929 | */ |
| b12defdc | 930 | vm_page_spin_lock(m); |
| 984263bc MD |
931 | m->object = object; |
| 932 | m->pindex = pindex; | |
| d2d8515b MD |
933 | if (vm_page_rb_tree_RB_INSERT(&object->rb_memq, m)) { |
| 934 | m->object = NULL; | |
| 935 | m->pindex = 0; | |
| 936 | vm_page_spin_unlock(m); | |
| 937 | return FALSE; | |
| 938 | } | |
| 939 | object->resident_page_count++; | |
| 85946b6c | 940 | /* atomic_add_int(&object->agg_pv_list_count, m->md.pv_list_count); */ |
| b12defdc | 941 | vm_page_spin_unlock(m); |
| 50a55c46 MD |
942 | |
| 943 | /* | |
| 984263bc MD |
944 | * Since we are inserting a new and possibly dirty page, |
| 945 | * update the object's OBJ_WRITEABLE and OBJ_MIGHTBEDIRTY flags. | |
| 946 | */ | |
| 17cde63e | 947 | if ((m->valid & m->dirty) || (m->flags & PG_WRITEABLE)) |
| 984263bc | 948 | vm_object_set_writeable_dirty(object); |
| 096e95c0 MD |
949 | |
| 950 | /* | |
| 951 | * Checks for a swap assignment and sets PG_SWAPPED if appropriate. | |
| 952 | */ | |
| 953 | swap_pager_page_inserted(m); | |
| d2d8515b | 954 | return TRUE; |
| 984263bc MD |
955 | } |
| 956 | ||
| 957 | /* | |
| b12defdc | 958 | * Removes the given vm_page_t from the (object,index) table |
| 984263bc | 959 | * |
| de71fd3f MD |
960 | * The underlying pmap entry (if any) is NOT removed here. |
| 961 | * This routine may not block. | |
| 9765affa | 962 | * |
| 9ad0147b MD |
963 | * The page must be BUSY and will remain BUSY on return. |
| 964 | * No other requirements. | |
| 9765affa | 965 | * |
| 9ad0147b MD |
966 | * NOTE: FreeBSD side effect was to unbusy the page on return. We leave |
| 967 | * it busy. | |
| 984263bc | 968 | */ |
| 984263bc MD |
969 | void |
| 970 | vm_page_remove(vm_page_t m) | |
| 971 | { | |
| 972 | vm_object_t object; | |
| 973 | ||
| 654a39f0 | 974 | if (m->object == NULL) { |
| 984263bc | 975 | return; |
| 654a39f0 | 976 | } |
| 984263bc | 977 | |
| de71fd3f | 978 | if ((m->flags & PG_BUSY) == 0) |
| 984263bc | 979 | panic("vm_page_remove: page not busy"); |
| 984263bc | 980 | |
| 984263bc MD |
981 | object = m->object; |
| 982 | ||
| 398c240d VS |
983 | vm_object_hold(object); |
| 984 | ||
| 984263bc | 985 | /* |
| 1f804340 | 986 | * Remove the page from the object and update the object. |
| b12defdc MD |
987 | * |
| 988 | * The vm_page spin lock is required for interactions with the pmap. | |
| 984263bc | 989 | */ |
| b12defdc | 990 | vm_page_spin_lock(m); |
| 1f804340 | 991 | vm_page_rb_tree_RB_REMOVE(&object->rb_memq, m); |
| 984263bc | 992 | object->resident_page_count--; |
| 85946b6c | 993 | /* atomic_add_int(&object->agg_pv_list_count, -m->md.pv_list_count); */ |
| 984263bc | 994 | m->object = NULL; |
| b12defdc | 995 | vm_page_spin_unlock(m); |
| 1f804340 | 996 | |
| b12defdc | 997 | object->generation++; |
| 398c240d | 998 | |
| b12defdc | 999 | vm_object_drop(object); |
| 984263bc MD |
1000 | } |
| 1001 | ||
| 1002 | /* | |
| de71fd3f MD |
1003 | * Locate and return the page at (object, pindex), or NULL if the |
| 1004 | * page could not be found. | |
| 1005 | * | |
| b12defdc | 1006 | * The caller must hold the vm_object token. |
| 984263bc | 1007 | */ |
| 984263bc MD |
1008 | vm_page_t |
| 1009 | vm_page_lookup(vm_object_t object, vm_pindex_t pindex) | |
| 1010 | { | |
| 1011 | vm_page_t m; | |
| 984263bc MD |
1012 | |
| 1013 | /* | |
| 1014 | * Search the hash table for this object/offset pair | |
| 1015 | */ | |
| b12defdc | 1016 | ASSERT_LWKT_TOKEN_HELD(vm_object_token(object)); |
| 1f804340 | 1017 | m = vm_page_rb_tree_RB_LOOKUP(&object->rb_memq, pindex); |
| 1f804340 MD |
1018 | KKASSERT(m == NULL || (m->object == object && m->pindex == pindex)); |
| 1019 | return(m); | |
| 984263bc MD |
1020 | } |
| 1021 | ||
| b12defdc MD |
1022 | vm_page_t |
| 1023 | VM_PAGE_DEBUG_EXT(vm_page_lookup_busy_wait)(struct vm_object *object, | |
| 1024 | vm_pindex_t pindex, | |
| 1025 | int also_m_busy, const char *msg | |
| 1026 | VM_PAGE_DEBUG_ARGS) | |
| 1027 | { | |
| 1028 | u_int32_t flags; | |
| 1029 | vm_page_t m; | |
| 1030 | ||
| 1031 | ASSERT_LWKT_TOKEN_HELD(vm_object_token(object)); | |
| 1032 | m = vm_page_rb_tree_RB_LOOKUP(&object->rb_memq, pindex); | |
| 1033 | while (m) { | |
| 1034 | KKASSERT(m->object == object && m->pindex == pindex); | |
| 1035 | flags = m->flags; | |
| 1036 | cpu_ccfence(); | |
| 1037 | if (flags & PG_BUSY) { | |
| 1038 | tsleep_interlock(m, 0); | |
| 1039 | if (atomic_cmpset_int(&m->flags, flags, | |
| 1040 | flags | PG_WANTED | PG_REFERENCED)) { | |
| 1041 | tsleep(m, PINTERLOCKED, msg, 0); | |
| 1042 | m = vm_page_rb_tree_RB_LOOKUP(&object->rb_memq, | |
| 1043 | pindex); | |
| 1044 | } | |
| 1045 | } else if (also_m_busy && (flags & PG_SBUSY)) { | |
| 1046 | tsleep_interlock(m, 0); | |
| 1047 | if (atomic_cmpset_int(&m->flags, flags, | |
| 1048 | flags | PG_WANTED | PG_REFERENCED)) { | |
| 1049 | tsleep(m, PINTERLOCKED, msg, 0); | |
| 1050 | m = vm_page_rb_tree_RB_LOOKUP(&object->rb_memq, | |
| 1051 | pindex); | |
| 1052 | } | |
| 1053 | } else if (atomic_cmpset_int(&m->flags, flags, | |
| 1054 | flags | PG_BUSY)) { | |
| 1055 | #ifdef VM_PAGE_DEBUG | |
| 1056 | m->busy_func = func; | |
| 1057 | m->busy_line = lineno; | |
| 1058 | #endif | |
| 1059 | break; | |
| 1060 | } | |
| 1061 | } | |
| 1062 | return m; | |
| 1063 | } | |
| 1064 | ||
| 984263bc | 1065 | /* |
| b12defdc | 1066 | * Attempt to lookup and busy a page. |
| 984263bc | 1067 | * |
| b12defdc | 1068 | * Returns NULL if the page could not be found |
| 984263bc | 1069 | * |
| b12defdc MD |
1070 | * Returns a vm_page and error == TRUE if the page exists but could not |
| 1071 | * be busied. | |
| 984263bc | 1072 | * |
| b12defdc MD |
1073 | * Returns a vm_page and error == FALSE on success. |
| 1074 | */ | |
| 1075 | vm_page_t | |
| 1076 | VM_PAGE_DEBUG_EXT(vm_page_lookup_busy_try)(struct vm_object *object, | |
| 1077 | vm_pindex_t pindex, | |
| 1078 | int also_m_busy, int *errorp | |
| 1079 | VM_PAGE_DEBUG_ARGS) | |
| 1080 | { | |
| 1081 | u_int32_t flags; | |
| 1082 | vm_page_t m; | |
| 1083 | ||
| 1084 | ASSERT_LWKT_TOKEN_HELD(vm_object_token(object)); | |
| 1085 | m = vm_page_rb_tree_RB_LOOKUP(&object->rb_memq, pindex); | |
| 1086 | *errorp = FALSE; | |
| 1087 | while (m) { | |
| 1088 | KKASSERT(m->object == object && m->pindex == pindex); | |
| 1089 | flags = m->flags; | |
| 1090 | cpu_ccfence(); | |
| 1091 | if (flags & PG_BUSY) { | |
| 1092 | *errorp = TRUE; | |
| 1093 | break; | |
| 1094 | } | |
| 1095 | if (also_m_busy && (flags & PG_SBUSY)) { | |
| 1096 | *errorp = TRUE; | |
| 1097 | break; | |
| 1098 | } | |
| 1099 | if (atomic_cmpset_int(&m->flags, flags, flags | PG_BUSY)) { | |
| 1100 | #ifdef VM_PAGE_DEBUG | |
| 1101 | m->busy_func = func; | |
| 1102 | m->busy_line = lineno; | |
| 1103 | #endif | |
| 1104 | break; | |
| 1105 | } | |
| 1106 | } | |
| 1107 | return m; | |
| 1108 | } | |
| 1109 | ||
| 1110 | /* | |
| 1111 | * Caller must hold the related vm_object | |
| 1112 | */ | |
| 1113 | vm_page_t | |
| 1114 | vm_page_next(vm_page_t m) | |
| 1115 | { | |
| 1116 | vm_page_t next; | |
| 1117 | ||
| 1118 | next = vm_page_rb_tree_RB_NEXT(m); | |
| 1119 | if (next && next->pindex != m->pindex + 1) | |
| 1120 | next = NULL; | |
| 1121 | return (next); | |
| 1122 | } | |
| 1123 | ||
| 1124 | /* | |
| 1125 | * vm_page_rename() | |
| 1126 | * | |
| 1127 | * Move the given vm_page from its current object to the specified | |
| 1128 | * target object/offset. The page must be busy and will remain so | |
| 1129 | * on return. | |
| 984263bc | 1130 | * |
| b12defdc MD |
1131 | * new_object must be held. |
| 1132 | * This routine might block. XXX ? | |
| 1133 | * | |
| 1134 | * NOTE: Swap associated with the page must be invalidated by the move. We | |
| de71fd3f MD |
1135 | * have to do this for several reasons: (1) we aren't freeing the |
| 1136 | * page, (2) we are dirtying the page, (3) the VM system is probably | |
| 1137 | * moving the page from object A to B, and will then later move | |
| 1138 | * the backing store from A to B and we can't have a conflict. | |
| 984263bc | 1139 | * |
| b12defdc | 1140 | * NOTE: We *always* dirty the page. It is necessary both for the |
| de71fd3f MD |
1141 | * fact that we moved it, and because we may be invalidating |
| 1142 | * swap. If the page is on the cache, we have to deactivate it | |
| 1143 | * or vm_page_dirty() will panic. Dirty pages are not allowed | |
| 1144 | * on the cache. | |
| 984263bc | 1145 | */ |
| 984263bc MD |
1146 | void |
| 1147 | vm_page_rename(vm_page_t m, vm_object_t new_object, vm_pindex_t new_pindex) | |
| 1148 | { | |
| b12defdc MD |
1149 | KKASSERT(m->flags & PG_BUSY); |
| 1150 | ASSERT_LWKT_TOKEN_HELD(vm_object_token(new_object)); | |
| 1151 | if (m->object) { | |
| 1152 | ASSERT_LWKT_TOKEN_HELD(vm_object_token(m->object)); | |
| 1153 | vm_page_remove(m); | |
| 1154 | } | |
| d2d8515b | 1155 | if (vm_page_insert(m, new_object, new_pindex) == FALSE) { |
| 480c83b6 | 1156 | panic("vm_page_rename: target exists (%p,%"PRIu64")", |
| d2d8515b MD |
1157 | new_object, new_pindex); |
| 1158 | } | |
| 984263bc MD |
1159 | if (m->queue - m->pc == PQ_CACHE) |
| 1160 | vm_page_deactivate(m); | |
| 1161 | vm_page_dirty(m); | |
| 984263bc MD |
1162 | } |
| 1163 | ||
| 1164 | /* | |
| de71fd3f MD |
1165 | * vm_page_unqueue() without any wakeup. This routine is used when a page |
| 1166 | * is being moved between queues or otherwise is to remain BUSYied by the | |
| 1167 | * caller. | |
| 984263bc | 1168 | * |
| de71fd3f | 1169 | * This routine may not block. |
| 984263bc | 1170 | */ |
| 984263bc MD |
1171 | void |
| 1172 | vm_page_unqueue_nowakeup(vm_page_t m) | |
| 1173 | { | |
| b12defdc MD |
1174 | vm_page_and_queue_spin_lock(m); |
| 1175 | (void)_vm_page_rem_queue_spinlocked(m); | |
| 1176 | vm_page_spin_unlock(m); | |
| 984263bc MD |
1177 | } |
| 1178 | ||
| 1179 | /* | |
| de71fd3f MD |
1180 | * vm_page_unqueue() - Remove a page from its queue, wakeup the pagedemon |
| 1181 | * if necessary. | |
| 984263bc | 1182 | * |
| de71fd3f | 1183 | * This routine may not block. |
| 984263bc | 1184 | */ |
| 984263bc MD |
1185 | void |
| 1186 | vm_page_unqueue(vm_page_t m) | |
| 1187 | { | |
| b12defdc | 1188 | u_short queue; |
| de71fd3f | 1189 | |
| b12defdc MD |
1190 | vm_page_and_queue_spin_lock(m); |
| 1191 | queue = _vm_page_rem_queue_spinlocked(m); | |
| 1192 | if (queue == PQ_FREE || queue == PQ_CACHE) { | |
| 1193 | vm_page_spin_unlock(m); | |
| 1194 | pagedaemon_wakeup(); | |
| 1195 | } else { | |
| 1196 | vm_page_spin_unlock(m); | |
| 984263bc MD |
1197 | } |
| 1198 | } | |
| 1199 | ||
| 984263bc | 1200 | /* |
| de71fd3f | 1201 | * vm_page_list_find() |
| 984263bc | 1202 | * |
| de71fd3f | 1203 | * Find a page on the specified queue with color optimization. |
| 984263bc | 1204 | * |
| de71fd3f MD |
1205 | * The page coloring optimization attempts to locate a page that does |
| 1206 | * not overload other nearby pages in the object in the cpu's L1 or L2 | |
| 1207 | * caches. We need this optimization because cpu caches tend to be | |
| 85946b6c MD |
1208 | * physical caches, while object spaces tend to be virtual. |
| 1209 | * | |
| 1210 | * On MP systems each PQ_FREE and PQ_CACHE color queue has its own spinlock | |
| 1211 | * and the algorithm is adjusted to localize allocations on a per-core basis. | |
| 1212 | * This is done by 'twisting' the colors. | |
| 984263bc | 1213 | * |
| b12defdc MD |
1214 | * The page is returned spinlocked and removed from its queue (it will |
| 1215 | * be on PQ_NONE), or NULL. The page is not PG_BUSY'd. The caller | |
| 1216 | * is responsible for dealing with the busy-page case (usually by | |
| 1217 | * deactivating the page and looping). | |
| 1218 | * | |
| 1219 | * NOTE: This routine is carefully inlined. A non-inlined version | |
| 1220 | * is available for outside callers but the only critical path is | |
| 1221 | * from within this source file. | |
| 984263bc | 1222 | * |
| b12defdc MD |
1223 | * NOTE: This routine assumes that the vm_pages found in PQ_CACHE and PQ_FREE |
| 1224 | * represent stable storage, allowing us to order our locks vm_page | |
| 1225 | * first, then queue. | |
| 984263bc | 1226 | */ |
| 74232d8e | 1227 | static __inline |
| 984263bc | 1228 | vm_page_t |
| 74232d8e MD |
1229 | _vm_page_list_find(int basequeue, int index, boolean_t prefer_zero) |
| 1230 | { | |
| 1231 | vm_page_t m; | |
| 1232 | ||
| b12defdc MD |
1233 | for (;;) { |
| 1234 | if (prefer_zero) | |
| 1235 | m = TAILQ_LAST(&vm_page_queues[basequeue+index].pl, pglist); | |
| 1236 | else | |
| 1237 | m = TAILQ_FIRST(&vm_page_queues[basequeue+index].pl); | |
| 1238 | if (m == NULL) { | |
| 1239 | m = _vm_page_list_find2(basequeue, index); | |
| 1240 | return(m); | |
| 1241 | } | |
| 1242 | vm_page_and_queue_spin_lock(m); | |
| 1243 | if (m->queue == basequeue + index) { | |
| 1244 | _vm_page_rem_queue_spinlocked(m); | |
| 1245 | /* vm_page_t spin held, no queue spin */ | |
| 1246 | break; | |
| 1247 | } | |
| 1248 | vm_page_and_queue_spin_unlock(m); | |
| 1249 | } | |
| 74232d8e MD |
1250 | return(m); |
| 1251 | } | |
| 1252 | ||
| 1253 | static vm_page_t | |
| 1254 | _vm_page_list_find2(int basequeue, int index) | |
| 984263bc MD |
1255 | { |
| 1256 | int i; | |
| 1257 | vm_page_t m = NULL; | |
| 1258 | struct vpgqueues *pq; | |
| 1259 | ||
| 1260 | pq = &vm_page_queues[basequeue]; | |
| 1261 | ||
| 1262 | /* | |
| 1263 | * Note that for the first loop, index+i and index-i wind up at the | |
| 1264 | * same place. Even though this is not totally optimal, we've already | |
| 1265 | * blown it by missing the cache case so we do not care. | |
| 1266 | */ | |
| b12defdc MD |
1267 | for (i = PQ_L2_SIZE / 2; i > 0; --i) { |
| 1268 | for (;;) { | |
| 1269 | m = TAILQ_FIRST(&pq[(index + i) & PQ_L2_MASK].pl); | |
| 1270 | if (m) { | |
| 1271 | _vm_page_and_queue_spin_lock(m); | |
| 1272 | if (m->queue == | |
| 1273 | basequeue + ((index + i) & PQ_L2_MASK)) { | |
| 1274 | _vm_page_rem_queue_spinlocked(m); | |
| 1275 | return(m); | |
| 1276 | } | |
| 1277 | _vm_page_and_queue_spin_unlock(m); | |
| 1278 | continue; | |
| 1279 | } | |
| 1280 | m = TAILQ_FIRST(&pq[(index - i) & PQ_L2_MASK].pl); | |
| 1281 | if (m) { | |
| 1282 | _vm_page_and_queue_spin_lock(m); | |
| 1283 | if (m->queue == | |
| 1284 | basequeue + ((index - i) & PQ_L2_MASK)) { | |
| 1285 | _vm_page_rem_queue_spinlocked(m); | |
| 1286 | return(m); | |
| 1287 | } | |
| 1288 | _vm_page_and_queue_spin_unlock(m); | |
| 1289 | continue; | |
| 1290 | } | |
| 1291 | break; /* next i */ | |
| 1292 | } | |
| 984263bc MD |
1293 | } |
| 1294 | return(m); | |
| 1295 | } | |
| 1296 | ||
| 573fb415 | 1297 | /* |
| b12defdc MD |
1298 | * Returns a vm_page candidate for allocation. The page is not busied so |
| 1299 | * it can move around. The caller must busy the page (and typically | |
| 1300 | * deactivate it if it cannot be busied!) | |
| 1301 | * | |
| 1302 | * Returns a spinlocked vm_page that has been removed from its queue. | |
| 573fb415 | 1303 | */ |
| 74232d8e MD |
1304 | vm_page_t |
| 1305 | vm_page_list_find(int basequeue, int index, boolean_t prefer_zero) | |
| 1306 | { | |
| 1307 | return(_vm_page_list_find(basequeue, index, prefer_zero)); | |
| 1308 | } | |
| 1309 | ||
| 984263bc | 1310 | /* |
| b12defdc MD |
1311 | * Find a page on the cache queue with color optimization, remove it |
| 1312 | * from the queue, and busy it. The returned page will not be spinlocked. | |
| 1313 | * | |
| 1314 | * A candidate failure will be deactivated. Candidates can fail due to | |
| 1315 | * being busied by someone else, in which case they will be deactivated. | |
| 984263bc | 1316 | * |
| de71fd3f | 1317 | * This routine may not block. |
| b12defdc | 1318 | * |
| 984263bc | 1319 | */ |
| b12defdc | 1320 | static vm_page_t |
| 85946b6c | 1321 | vm_page_select_cache(u_short pg_color) |
| 984263bc MD |
1322 | { |
| 1323 | vm_page_t m; | |
| 1324 | ||
| b12defdc | 1325 | for (;;) { |
| 85946b6c | 1326 | m = _vm_page_list_find(PQ_CACHE, pg_color & PQ_L2_MASK, FALSE); |
| b12defdc MD |
1327 | if (m == NULL) |
| 1328 | break; | |
| 1329 | /* | |
| 1330 | * (m) has been removed from its queue and spinlocked | |
| 1331 | */ | |
| 1332 | if (vm_page_busy_try(m, TRUE)) { | |
| 1333 | _vm_page_deactivate_locked(m, 0); | |
| 1334 | vm_page_spin_unlock(m); | |
| a491077e MD |
1335 | #ifdef INVARIANTS |
| 1336 | kprintf("Warning: busy page %p found in cache\n", m); | |
| 1337 | #endif | |
| b12defdc MD |
1338 | } else { |
| 1339 | /* | |
| 1340 | * We successfully busied the page | |
| 1341 | */ | |
| 9bf025db | 1342 | if ((m->flags & (PG_UNMANAGED | PG_NEED_COMMIT)) == 0 && |
| b12defdc | 1343 | m->hold_count == 0 && |
| 9bf025db MD |
1344 | m->wire_count == 0 && |
| 1345 | (m->dirty & m->valid) == 0) { | |
| b12defdc MD |
1346 | vm_page_spin_unlock(m); |
| 1347 | pagedaemon_wakeup(); | |
| 1348 | return(m); | |
| 1349 | } | |
| 9bf025db MD |
1350 | |
| 1351 | /* | |
| 1352 | * The page cannot be recycled, deactivate it. | |
| 1353 | */ | |
| b12defdc MD |
1354 | _vm_page_deactivate_locked(m, 0); |
| 1355 | if (_vm_page_wakeup(m)) { | |
| 1356 | vm_page_spin_unlock(m); | |
| 1357 | wakeup(m); | |
| 1358 | } else { | |
| 1359 | vm_page_spin_unlock(m); | |
| 1360 | } | |
| 984263bc | 1361 | } |
| 984263bc | 1362 | } |
| b12defdc | 1363 | return (m); |
| 984263bc MD |
1364 | } |
| 1365 | ||
| 1366 | /* | |
| de71fd3f MD |
1367 | * Find a free or zero page, with specified preference. We attempt to |
| 1368 | * inline the nominal case and fall back to _vm_page_select_free() | |
| b12defdc | 1369 | * otherwise. A busied page is removed from the queue and returned. |
| 984263bc | 1370 | * |
| de71fd3f | 1371 | * This routine may not block. |
| 984263bc | 1372 | */ |
| 984263bc | 1373 | static __inline vm_page_t |
| 85946b6c | 1374 | vm_page_select_free(u_short pg_color, boolean_t prefer_zero) |
| 984263bc MD |
1375 | { |
| 1376 | vm_page_t m; | |
| 1377 | ||
| b12defdc | 1378 | for (;;) { |
| 85946b6c | 1379 | m = _vm_page_list_find(PQ_FREE, pg_color & PQ_L2_MASK, |
| b12defdc MD |
1380 | prefer_zero); |
| 1381 | if (m == NULL) | |
| 1382 | break; | |
| 1383 | if (vm_page_busy_try(m, TRUE)) { | |
| 90244566 MD |
1384 | /* |
| 1385 | * Various mechanisms such as a pmap_collect can | |
| 1386 | * result in a busy page on the free queue. We | |
| 1387 | * have to move the page out of the way so we can | |
| 1388 | * retry the allocation. If the other thread is not | |
| 1389 | * allocating the page then m->valid will remain 0 and | |
| 1390 | * the pageout daemon will free the page later on. | |
| 1391 | * | |
| 1392 | * Since we could not busy the page, however, we | |
| 1393 | * cannot make assumptions as to whether the page | |
| 1394 | * will be allocated by the other thread or not, | |
| 1395 | * so all we can do is deactivate it to move it out | |
| 1396 | * of the way. In particular, if the other thread | |
| 1397 | * wires the page it may wind up on the inactive | |
| 1398 | * queue and the pageout daemon will have to deal | |
| 1399 | * with that case too. | |
| 1400 | */ | |
| b12defdc MD |
1401 | _vm_page_deactivate_locked(m, 0); |
| 1402 | vm_page_spin_unlock(m); | |
| 1403 | #ifdef INVARIANTS | |
| 1404 | kprintf("Warning: busy page %p found in cache\n", m); | |
| 1405 | #endif | |
| 1406 | } else { | |
| 90244566 MD |
1407 | /* |
| 1408 | * Theoretically if we are able to busy the page | |
| 1409 | * atomic with the queue removal (using the vm_page | |
| 1410 | * lock) nobody else should be able to mess with the | |
| 1411 | * page before us. | |
| 1412 | */ | |
| 9bf025db MD |
1413 | KKASSERT((m->flags & (PG_UNMANAGED | |
| 1414 | PG_NEED_COMMIT)) == 0); | |
| b12defdc MD |
1415 | KKASSERT(m->hold_count == 0); |
| 1416 | KKASSERT(m->wire_count == 0); | |
| 1417 | vm_page_spin_unlock(m); | |
| 1418 | pagedaemon_wakeup(); | |
| 1419 | ||
| 1420 | /* return busied and removed page */ | |
| 1421 | return(m); | |
| 1422 | } | |
| 1423 | } | |
| 984263bc MD |
1424 | return(m); |
| 1425 | } | |
| 1426 | ||
| 1427 | /* | |
| 54341a3b MD |
1428 | * This implements a per-cpu cache of free, zero'd, ready-to-go pages. |
| 1429 | * The idea is to populate this cache prior to acquiring any locks so | |
| 1430 | * we don't wind up potentially zeroing VM pages (under heavy loads) while | |
| 1431 | * holding potentialy contending locks. | |
| 1432 | * | |
| 1433 | * Note that we allocate the page uninserted into anything and use a pindex | |
| 1434 | * of 0, the vm_page_alloc() will effectively add gd_cpuid so these | |
| 1435 | * allocations should wind up being uncontended. However, we still want | |
| 1436 | * to rove across PQ_L2_SIZE. | |
| 1437 | */ | |
| 1438 | void | |
| 1439 | vm_page_pcpu_cache(void) | |
| 1440 | { | |
| 1441 | #if 0 | |
| 1442 | globaldata_t gd = mycpu; | |
| 1443 | vm_page_t m; | |
| 1444 | ||
| 1445 | if (gd->gd_vmpg_count < GD_MINVMPG) { | |
| 1446 | crit_enter_gd(gd); | |
| 1447 | while (gd->gd_vmpg_count < GD_MAXVMPG) { | |
| 1448 | m = vm_page_alloc(NULL, ticks & ~ncpus2_mask, | |
| 1449 | VM_ALLOC_NULL_OK | VM_ALLOC_NORMAL | | |
| 1450 | VM_ALLOC_NULL_OK | VM_ALLOC_ZERO); | |
| 1451 | if (gd->gd_vmpg_count < GD_MAXVMPG) { | |
| 1452 | if ((m->flags & PG_ZERO) == 0) { | |
| 1453 | pmap_zero_page(VM_PAGE_TO_PHYS(m)); | |
| 1454 | vm_page_flag_set(m, PG_ZERO); | |
| 1455 | } | |
| 1456 | gd->gd_vmpg_array[gd->gd_vmpg_count++] = m; | |
| 1457 | } else { | |
| 1458 | vm_page_free(m); | |
| 1459 | } | |
| 1460 | } | |
| 1461 | crit_exit_gd(gd); | |
| 1462 | } | |
| 1463 | #endif | |
| 1464 | } | |
| 1465 | ||
| 1466 | /* | |
| de71fd3f | 1467 | * vm_page_alloc() |
| 984263bc | 1468 | * |
| de71fd3f | 1469 | * Allocate and return a memory cell associated with this VM object/offset |
| 85946b6c | 1470 | * pair. If object is NULL an unassociated page will be allocated. |
| 984263bc | 1471 | * |
| d2d8515b MD |
1472 | * The returned page will be busied and removed from its queues. This |
| 1473 | * routine can block and may return NULL if a race occurs and the page | |
| 1474 | * is found to already exist at the specified (object, pindex). | |
| de71fd3f | 1475 | * |
| dc1fd4b3 | 1476 | * VM_ALLOC_NORMAL allow use of cache pages, nominal free drain |
| 39208dbe | 1477 | * VM_ALLOC_QUICK like normal but cannot use cache |
| dc1fd4b3 MD |
1478 | * VM_ALLOC_SYSTEM greater free drain |
| 1479 | * VM_ALLOC_INTERRUPT allow free list to be completely drained | |
| d2d8515b MD |
1480 | * VM_ALLOC_ZERO advisory request for pre-zero'd page only |
| 1481 | * VM_ALLOC_FORCE_ZERO advisory request for pre-zero'd page only | |
| 1482 | * VM_ALLOC_NULL_OK ok to return NULL on insertion collision | |
| 1483 | * (see vm_page_grab()) | |
| 54341a3b MD |
1484 | * VM_ALLOC_USE_GD ok to use per-gd cache |
| 1485 | * | |
| d2d8515b | 1486 | * The object must be held if not NULL |
| 85946b6c | 1487 | * This routine may not block |
| 984263bc | 1488 | * |
| de71fd3f MD |
1489 | * Additional special handling is required when called from an interrupt |
| 1490 | * (VM_ALLOC_INTERRUPT). We are not allowed to mess with the page cache | |
| 1491 | * in this case. | |
| 984263bc | 1492 | */ |
| 984263bc MD |
1493 | vm_page_t |
| 1494 | vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int page_req) | |
| 1495 | { | |
| 54341a3b MD |
1496 | #ifdef SMP |
| 1497 | globaldata_t gd = mycpu; | |
| 1498 | #endif | |
| 9d494b34 | 1499 | vm_object_t obj; |
| 54341a3b | 1500 | vm_page_t m; |
| 85946b6c | 1501 | u_short pg_color; |
| 984263bc | 1502 | |
| 54341a3b MD |
1503 | #if 0 |
| 1504 | /* | |
| 1505 | * Special per-cpu free VM page cache. The pages are pre-busied | |
| 1506 | * and pre-zerod for us. | |
| 1507 | */ | |
| 1508 | if (gd->gd_vmpg_count && (page_req & VM_ALLOC_USE_GD)) { | |
| 1509 | crit_enter_gd(gd); | |
| 1510 | if (gd->gd_vmpg_count) { | |
| 1511 | m = gd->gd_vmpg_array[--gd->gd_vmpg_count]; | |
| 1512 | crit_exit_gd(gd); | |
| 1513 | goto done; | |
| 1514 | } | |
| 1515 | crit_exit_gd(gd); | |
| 1516 | } | |
| 1517 | #endif | |
| 1518 | m = NULL; | |
| 1519 | ||
| 85946b6c MD |
1520 | #ifdef SMP |
| 1521 | /* | |
| 1522 | * Cpu twist - cpu localization algorithm | |
| 1523 | */ | |
| 1524 | if (object) { | |
| 54341a3b | 1525 | pg_color = gd->gd_cpuid + (pindex & ~ncpus_fit_mask) + |
| 85946b6c | 1526 | (object->pg_color & ~ncpus_fit_mask); |
| 85946b6c | 1527 | } else { |
| 54341a3b | 1528 | pg_color = gd->gd_cpuid + (pindex & ~ncpus_fit_mask); |
| 85946b6c MD |
1529 | } |
| 1530 | #else | |
| 1531 | /* | |
| 1532 | * Normal page coloring algorithm | |
| 1533 | */ | |
| 1534 | if (object) { | |
| 1535 | pg_color = object->pg_color + pindex; | |
| 85946b6c MD |
1536 | } else { |
| 1537 | pg_color = pindex; | |
| 1538 | } | |
| 1539 | #endif | |
| dc1fd4b3 | 1540 | KKASSERT(page_req & |
| 39208dbe MD |
1541 | (VM_ALLOC_NORMAL|VM_ALLOC_QUICK| |
| 1542 | VM_ALLOC_INTERRUPT|VM_ALLOC_SYSTEM)); | |
| 984263bc MD |
1543 | |
| 1544 | /* | |
| 4ecf7cc9 MD |
1545 | * Certain system threads (pageout daemon, buf_daemon's) are |
| 1546 | * allowed to eat deeper into the free page list. | |
| 984263bc | 1547 | */ |
| 4ecf7cc9 | 1548 | if (curthread->td_flags & TDF_SYSTHREAD) |
| dc1fd4b3 | 1549 | page_req |= VM_ALLOC_SYSTEM; |
| 984263bc | 1550 | |
| 984263bc | 1551 | loop: |
| dc1fd4b3 MD |
1552 | if (vmstats.v_free_count > vmstats.v_free_reserved || |
| 1553 | ((page_req & VM_ALLOC_INTERRUPT) && vmstats.v_free_count > 0) || | |
| 1554 | ((page_req & VM_ALLOC_SYSTEM) && vmstats.v_cache_count == 0 && | |
| 1555 | vmstats.v_free_count > vmstats.v_interrupt_free_min) | |
| 1556 | ) { | |
| 984263bc | 1557 | /* |
| dc1fd4b3 | 1558 | * The free queue has sufficient free pages to take one out. |
| 984263bc | 1559 | */ |
| d2d8515b | 1560 | if (page_req & (VM_ALLOC_ZERO | VM_ALLOC_FORCE_ZERO)) |
| 85946b6c | 1561 | m = vm_page_select_free(pg_color, TRUE); |
| 984263bc | 1562 | else |
| 85946b6c | 1563 | m = vm_page_select_free(pg_color, FALSE); |
| dc1fd4b3 | 1564 | } else if (page_req & VM_ALLOC_NORMAL) { |
| 984263bc | 1565 | /* |
| dc1fd4b3 MD |
1566 | * Allocatable from the cache (non-interrupt only). On |
| 1567 | * success, we must free the page and try again, thus | |
| 1568 | * ensuring that vmstats.v_*_free_min counters are replenished. | |
| 984263bc | 1569 | */ |
| dc1fd4b3 MD |
1570 | #ifdef INVARIANTS |
| 1571 | if (curthread->td_preempted) { | |
| 086c1d7e | 1572 | kprintf("vm_page_alloc(): warning, attempt to allocate" |
| dc1fd4b3 MD |
1573 | " cache page from preempting interrupt\n"); |
| 1574 | m = NULL; | |
| 1575 | } else { | |
| 85946b6c | 1576 | m = vm_page_select_cache(pg_color); |
| dc1fd4b3 MD |
1577 | } |
| 1578 | #else | |
| 85946b6c | 1579 | m = vm_page_select_cache(pg_color); |
| dc1fd4b3 | 1580 | #endif |
| 984263bc | 1581 | /* |
| 9765affa | 1582 | * On success move the page into the free queue and loop. |
| bdea739c MD |
1583 | * |
| 1584 | * Only do this if we can safely acquire the vm_object lock, | |
| 1585 | * because this is effectively a random page and the caller | |
| 1586 | * might be holding the lock shared, we don't want to | |
| 1587 | * deadlock. | |
| 984263bc | 1588 | */ |
| dc1fd4b3 MD |
1589 | if (m != NULL) { |
| 1590 | KASSERT(m->dirty == 0, | |
| d2d8515b | 1591 | ("Found dirty cache page %p", m)); |
| 9d494b34 MD |
1592 | if ((obj = m->object) != NULL) { |
| 1593 | if (vm_object_hold_try(obj)) { | |
| bdea739c MD |
1594 | vm_page_protect(m, VM_PROT_NONE); |
| 1595 | vm_page_free(m); | |
| 9d494b34 MD |
1596 | /* m->object NULL here */ |
| 1597 | vm_object_drop(obj); | |
| bdea739c MD |
1598 | } else { |
| 1599 | vm_page_deactivate(m); | |
| 1600 | vm_page_wakeup(m); | |
| 1601 | } | |
| 1602 | } else { | |
| 1603 | vm_page_protect(m, VM_PROT_NONE); | |
| 1604 | vm_page_free(m); | |
| 1605 | } | |
| dc1fd4b3 MD |
1606 | goto loop; |
| 1607 | } | |
| 1608 | ||
| 1609 | /* | |
| 1610 | * On failure return NULL | |
| 1611 | */ | |
| 984263bc | 1612 | #if defined(DIAGNOSTIC) |
| dc1fd4b3 | 1613 | if (vmstats.v_cache_count > 0) |
| 086c1d7e | 1614 | kprintf("vm_page_alloc(NORMAL): missing pages on cache queue: %d\n", vmstats.v_cache_count); |
| 984263bc | 1615 | #endif |
| dc1fd4b3 MD |
1616 | vm_pageout_deficit++; |
| 1617 | pagedaemon_wakeup(); | |
| 1618 | return (NULL); | |
| 984263bc MD |
1619 | } else { |
| 1620 | /* | |
| dc1fd4b3 | 1621 | * No pages available, wakeup the pageout daemon and give up. |
| 984263bc | 1622 | */ |
| 984263bc MD |
1623 | vm_pageout_deficit++; |
| 1624 | pagedaemon_wakeup(); | |
| 1625 | return (NULL); | |
| 1626 | } | |
| 1627 | ||
| 1628 | /* | |
| b12defdc MD |
1629 | * v_free_count can race so loop if we don't find the expected |
| 1630 | * page. | |
| 984263bc | 1631 | */ |
| b12defdc MD |
1632 | if (m == NULL) |
| 1633 | goto loop; | |
| 984263bc MD |
1634 | |
| 1635 | /* | |
| d2d8515b MD |
1636 | * Good page found. The page has already been busied for us and |
| 1637 | * removed from its queues. | |
| 984263bc | 1638 | */ |
| d2d8515b MD |
1639 | KASSERT(m->dirty == 0, |
| 1640 | ("vm_page_alloc: free/cache page %p was dirty", m)); | |
| b12defdc | 1641 | KKASSERT(m->queue == PQ_NONE); |
| 984263bc | 1642 | |
| 54341a3b MD |
1643 | #if 0 |
| 1644 | done: | |
| 1645 | #endif | |
| 984263bc | 1646 | /* |
| d2d8515b MD |
1647 | * Initialize the structure, inheriting some flags but clearing |
| 1648 | * all the rest. The page has already been busied for us. | |
| 984263bc | 1649 | */ |
| d2d8515b | 1650 | vm_page_flag_clear(m, ~(PG_ZERO | PG_BUSY | PG_SBUSY)); |
| b12defdc MD |
1651 | KKASSERT(m->wire_count == 0); |
| 1652 | KKASSERT(m->busy == 0); | |
| 984263bc | 1653 | m->act_count = 0; |
| 984263bc | 1654 | m->valid = 0; |
| 984263bc MD |
1655 | |
| 1656 | /* | |
| b12defdc MD |
1657 | * Caller must be holding the object lock (asserted by |
| 1658 | * vm_page_insert()). | |
| 1659 | * | |
| 1660 | * NOTE: Inserting a page here does not insert it into any pmaps | |
| 1661 | * (which could cause us to block allocating memory). | |
| 85946b6c MD |
1662 | * |
| 1663 | * NOTE: If no object an unassociated page is allocated, m->pindex | |
| 1664 | * can be used by the caller for any purpose. | |
| 984263bc | 1665 | */ |
| d2d8515b MD |
1666 | if (object) { |
| 1667 | if (vm_page_insert(m, object, pindex) == FALSE) { | |
| 480c83b6 | 1668 | kprintf("PAGE RACE (%p:%d,%"PRIu64")\n", |
| d2d8515b MD |
1669 | object, object->type, pindex); |
| 1670 | vm_page_free(m); | |
| 1671 | m = NULL; | |
| 1672 | if ((page_req & VM_ALLOC_NULL_OK) == 0) | |
| 1673 | panic("PAGE RACE"); | |
| 1674 | } | |
| 1675 | } else { | |
| 85946b6c | 1676 | m->pindex = pindex; |
| d2d8515b | 1677 | } |
| 984263bc MD |
1678 | |
| 1679 | /* | |
| 1680 | * Don't wakeup too often - wakeup the pageout daemon when | |
| 1681 | * we would be nearly out of memory. | |
| 1682 | */ | |
| 20479584 | 1683 | pagedaemon_wakeup(); |
| 984263bc | 1684 | |
| 9765affa MD |
1685 | /* |
| 1686 | * A PG_BUSY page is returned. | |
| 1687 | */ | |
| 984263bc MD |
1688 | return (m); |
| 1689 | } | |
| 1690 | ||
| 1691 | /* | |
| 79d182b0 MD |
1692 | * Attempt to allocate contiguous physical memory with the specified |
| 1693 | * requirements. | |
| 1694 | */ | |
| 1695 | vm_page_t | |
| 1696 | vm_page_alloc_contig(vm_paddr_t low, vm_paddr_t high, | |
| 1697 | unsigned long alignment, unsigned long boundary, | |
| 1698 | unsigned long size) | |
| 1699 | { | |
| 1700 | alist_blk_t blk; | |
| 1701 | ||
| 1702 | alignment >>= PAGE_SHIFT; | |
| 1703 | if (alignment == 0) | |
| 1704 | alignment = 1; | |
| 1705 | boundary >>= PAGE_SHIFT; | |
| 1706 | if (boundary == 0) | |
| 1707 | boundary = 1; | |
| 1708 | size = (size + PAGE_MASK) >> PAGE_SHIFT; | |
| 1709 | ||
| 1710 | spin_lock(&vm_contig_spin); | |
| 1711 | blk = alist_alloc(&vm_contig_alist, 0, size); | |
| 1712 | if (blk == ALIST_BLOCK_NONE) { | |
| 1713 | spin_unlock(&vm_contig_spin); | |
| 1714 | if (bootverbose) { | |
| 1715 | kprintf("vm_page_alloc_contig: %ldk nospace\n", | |
| 1716 | (size + PAGE_MASK) * (PAGE_SIZE / 1024)); | |
| 1717 | } | |
| 1718 | return(NULL); | |
| 1719 | } | |
| 1720 | if (high && ((vm_paddr_t)(blk + size) << PAGE_SHIFT) > high) { | |
| 1721 | alist_free(&vm_contig_alist, blk, size); | |
| 1722 | spin_unlock(&vm_contig_spin); | |
| 1723 | if (bootverbose) { | |
| 1724 | kprintf("vm_page_alloc_contig: %ldk high " | |
| 1725 | "%016jx failed\n", | |
| 1726 | (size + PAGE_MASK) * (PAGE_SIZE / 1024), | |
| 1727 | (intmax_t)high); | |
| 1728 | } | |
| 1729 | return(NULL); | |
| 1730 | } | |
| 1731 | spin_unlock(&vm_contig_spin); | |
| ef67e7a3 | 1732 | if (vm_contig_verbose) { |
| 79d182b0 MD |
1733 | kprintf("vm_page_alloc_contig: %016jx/%ldk\n", |
| 1734 | (intmax_t)(vm_paddr_t)blk << PAGE_SHIFT, | |
| 1735 | (size + PAGE_MASK) * (PAGE_SIZE / 1024)); | |
| 1736 | } | |
| 1737 | return (PHYS_TO_VM_PAGE((vm_paddr_t)blk << PAGE_SHIFT)); | |
| 1738 | } | |
| 1739 | ||
| 1740 | /* | |
| 1741 | * Free contiguously allocated pages. The pages will be wired but not busy. | |
| 1742 | * When freeing to the alist we leave them wired and not busy. | |
| 1743 | */ | |
| 1744 | void | |
| 1745 | vm_page_free_contig(vm_page_t m, unsigned long size) | |
| 1746 | { | |
| 1747 | vm_paddr_t pa = VM_PAGE_TO_PHYS(m); | |
| 1748 | vm_pindex_t start = pa >> PAGE_SHIFT; | |
| 1749 | vm_pindex_t pages = (size + PAGE_MASK) >> PAGE_SHIFT; | |
| 1750 | ||
| ef67e7a3 | 1751 | if (vm_contig_verbose) { |
| 79d182b0 MD |
1752 | kprintf("vm_page_free_contig: %016jx/%ldk\n", |
| 1753 | (intmax_t)pa, size / 1024); | |
| 1754 | } | |
| 1755 | if (pa < vm_low_phys_reserved) { | |
| 1756 | KKASSERT(pa + size <= vm_low_phys_reserved); | |
| 1757 | spin_lock(&vm_contig_spin); | |
| 1758 | alist_free(&vm_contig_alist, start, pages); | |
| 1759 | spin_unlock(&vm_contig_spin); | |
| 1760 | } else { | |
| 1761 | while (pages) { | |
| 1762 | vm_page_busy_wait(m, FALSE, "cpgfr"); | |
| 1763 | vm_page_unwire(m, 0); | |
| 1764 | vm_page_free(m); | |
| 1765 | --pages; | |
| 1766 | ++m; | |
| 1767 | } | |
| 1768 | ||
| 1769 | } | |
| 1770 | } | |
| 1771 | ||
| 1772 | ||
| 1773 | /* | |
| 163f8d24 MD |
1774 | * Wait for sufficient free memory for nominal heavy memory use kernel |
| 1775 | * operations. | |
| 55b50bd5 MD |
1776 | * |
| 1777 | * WARNING! Be sure never to call this in any vm_pageout code path, which | |
| 1778 | * will trivially deadlock the system. | |
| 163f8d24 MD |
1779 | */ |
| 1780 | void | |
| 1781 | vm_wait_nominal(void) | |
| 1782 | { | |
| 1783 | while (vm_page_count_min(0)) | |
| 1784 | vm_wait(0); | |
| 1785 | } | |
| 1786 | ||
| 1787 | /* | |
| 12052253 MD |
1788 | * Test if vm_wait_nominal() would block. |
| 1789 | */ | |
| 1790 | int | |
| 1791 | vm_test_nominal(void) | |
| 1792 | { | |
| 1793 | if (vm_page_count_min(0)) | |
| 1794 | return(1); | |
| 1795 | return(0); | |
| 1796 | } | |
| 1797 | ||
| 1798 | /* | |
| de71fd3f MD |
1799 | * Block until free pages are available for allocation, called in various |
| 1800 | * places before memory allocations. | |
| cd3c66bd MD |
1801 | * |
| 1802 | * The caller may loop if vm_page_count_min() == FALSE so we cannot be | |
| 1803 | * more generous then that. | |
| 984263bc | 1804 | */ |
| 984263bc | 1805 | void |
| 4ecf7cc9 | 1806 | vm_wait(int timo) |
| 984263bc | 1807 | { |
| cd3c66bd MD |
1808 | /* |
| 1809 | * never wait forever | |
| 1810 | */ | |
| 1811 | if (timo == 0) | |
| 1812 | timo = hz; | |
| 9ad0147b | 1813 | lwkt_gettoken(&vm_token); |
| cd3c66bd | 1814 | |
| bc6dffab | 1815 | if (curthread == pagethread) { |
| cd3c66bd MD |
1816 | /* |
| 1817 | * The pageout daemon itself needs pages, this is bad. | |
| 1818 | */ | |
| 1819 | if (vm_page_count_min(0)) { | |
| 1820 | vm_pageout_pages_needed = 1; | |
| 1821 | tsleep(&vm_pageout_pages_needed, 0, "VMWait", timo); | |
| 1822 | } | |
| 984263bc | 1823 | } else { |
| cd3c66bd MD |
1824 | /* |
| 1825 | * Wakeup the pageout daemon if necessary and wait. | |
| 1826 | */ | |
| 1827 | if (vm_page_count_target()) { | |
| 1828 | if (vm_pages_needed == 0) { | |
| 1829 | vm_pages_needed = 1; | |
| 1830 | wakeup(&vm_pages_needed); | |
| 1831 | } | |
| 1832 | ++vm_pages_waiting; /* SMP race ok */ | |
| 1833 | tsleep(&vmstats.v_free_count, 0, "vmwait", timo); | |
| 984263bc | 1834 | } |
| 984263bc | 1835 | } |
| 9ad0147b | 1836 | lwkt_reltoken(&vm_token); |
| 984263bc MD |
1837 | } |
| 1838 | ||
| 1839 | /* | |
| de71fd3f MD |
1840 | * Block until free pages are available for allocation |
| 1841 | * | |
| cd3c66bd | 1842 | * Called only from vm_fault so that processes page faulting can be |
| de71fd3f | 1843 | * easily tracked. |
| 984263bc | 1844 | */ |
| 984263bc MD |
1845 | void |
| 1846 | vm_waitpfault(void) | |
| 1847 | { | |
| cd3c66bd MD |
1848 | /* |
| 1849 | * Wakeup the pageout daemon if necessary and wait. | |
| 1850 | */ | |
| 1851 | if (vm_page_count_target()) { | |
| 1852 | lwkt_gettoken(&vm_token); | |
| 1853 | if (vm_page_count_target()) { | |
| 1854 | if (vm_pages_needed == 0) { | |
| 1855 | vm_pages_needed = 1; | |
| 1856 | wakeup(&vm_pages_needed); | |
| 1857 | } | |
| 1858 | ++vm_pages_waiting; /* SMP race ok */ | |
| 1859 | tsleep(&vmstats.v_free_count, 0, "pfault", hz); | |
| 1860 | } | |
| 1861 | lwkt_reltoken(&vm_token); | |
| 984263bc | 1862 | } |
| 984263bc MD |
1863 | } |
| 1864 | ||
| 1865 | /* | |
| de71fd3f MD |
1866 | * Put the specified page on the active list (if appropriate). Ensure |
| 1867 | * that act_count is at least ACT_INIT but do not otherwise mess with it. | |
| 984263bc | 1868 | * |
| b12defdc | 1869 | * The caller should be holding the page busied ? XXX |
| de71fd3f | 1870 | * This routine may not block. |
| 984263bc MD |
1871 | */ |
| 1872 | void | |
| 1873 | vm_page_activate(vm_page_t m) | |
| 1874 | { | |
| b12defdc | 1875 | u_short oqueue; |
| 984263bc | 1876 | |
| b12defdc | 1877 | vm_page_spin_lock(m); |
| 027193eb | 1878 | if (m->queue - m->pc != PQ_ACTIVE) { |
| b12defdc MD |
1879 | _vm_page_queue_spin_lock(m); |
| 1880 | oqueue = _vm_page_rem_queue_spinlocked(m); | |
| 1881 | /* page is left spinlocked, queue is unlocked */ | |
| 984263bc | 1882 | |
| b12defdc MD |
1883 | if (oqueue == PQ_CACHE) |
| 1884 | mycpu->gd_cnt.v_reactivated++; | |
| 984263bc | 1885 | if (m->wire_count == 0 && (m->flags & PG_UNMANAGED) == 0) { |
| 984263bc MD |
1886 | if (m->act_count < ACT_INIT) |
| 1887 | m->act_count = ACT_INIT; | |
| 027193eb | 1888 | _vm_page_add_queue_spinlocked(m, PQ_ACTIVE + m->pc, 0); |
| 984263bc | 1889 | } |
| b12defdc MD |
1890 | _vm_page_and_queue_spin_unlock(m); |
| 1891 | if (oqueue == PQ_CACHE || oqueue == PQ_FREE) | |
| 1892 | pagedaemon_wakeup(); | |
| 984263bc MD |
1893 | } else { |
| 1894 | if (m->act_count < ACT_INIT) | |
| 1895 | m->act_count = ACT_INIT; | |
| b12defdc | 1896 | vm_page_spin_unlock(m); |
| 984263bc | 1897 | } |
| 984263bc MD |
1898 | } |
| 1899 | ||
| 1900 | /* | |
| de71fd3f MD |
1901 | * Helper routine for vm_page_free_toq() and vm_page_cache(). This |
| 1902 | * routine is called when a page has been added to the cache or free | |
| 1903 | * queues. | |
| 984263bc | 1904 | * |
| de71fd3f | 1905 | * This routine may not block. |
| 984263bc MD |
1906 | */ |
| 1907 | static __inline void | |
| 1908 | vm_page_free_wakeup(void) | |
| 1909 | { | |
| 1910 | /* | |
| cd3c66bd MD |
1911 | * If the pageout daemon itself needs pages, then tell it that |
| 1912 | * there are some free. | |
| 984263bc MD |
1913 | */ |
| 1914 | if (vm_pageout_pages_needed && | |
| de71fd3f MD |
1915 | vmstats.v_cache_count + vmstats.v_free_count >= |
| 1916 | vmstats.v_pageout_free_min | |
| 1917 | ) { | |
| 984263bc MD |
1918 | wakeup(&vm_pageout_pages_needed); |
| 1919 | vm_pageout_pages_needed = 0; | |
| 1920 | } | |
| de71fd3f | 1921 | |
| 984263bc | 1922 | /* |
| cd3c66bd MD |
1923 | * Wakeup processes that are waiting on memory. |
| 1924 | * | |
| 1925 | * NOTE: vm_paging_target() is the pageout daemon's target, while | |
| 1926 | * vm_page_count_target() is somewhere inbetween. We want | |
| 1927 | * to wake processes up prior to the pageout daemon reaching | |
| 1928 | * its target to provide some hysteresis. | |
| 984263bc | 1929 | */ |
| cd3c66bd MD |
1930 | if (vm_pages_waiting) { |
| 1931 | if (!vm_page_count_target()) { | |
| 1932 | /* | |
| 1933 | * Plenty of pages are free, wakeup everyone. | |
| 1934 | */ | |
| 1935 | vm_pages_waiting = 0; | |
| 1936 | wakeup(&vmstats.v_free_count); | |
| 1937 | ++mycpu->gd_cnt.v_ppwakeups; | |
| 1938 | } else if (!vm_page_count_min(0)) { | |
| 1939 | /* | |
| 1940 | * Some pages are free, wakeup someone. | |
| 1941 | */ | |
| 1942 | int wcount = vm_pages_waiting; | |
| 1943 | if (wcount > 0) | |
| 1944 | --wcount; | |
| 1945 | vm_pages_waiting = wcount; | |
| 1946 | wakeup_one(&vmstats.v_free_count); | |
| 1947 | ++mycpu->gd_cnt.v_ppwakeups; | |
| 1948 | } | |
| 984263bc MD |
1949 | } |
| 1950 | } | |
| 1951 | ||
| 1952 | /* | |
| b12defdc MD |
1953 | * Returns the given page to the PQ_FREE or PQ_HOLD list and disassociates |
| 1954 | * it from its VM object. | |
| 984263bc | 1955 | * |
| b12defdc MD |
1956 | * The vm_page must be PG_BUSY on entry. PG_BUSY will be released on |
| 1957 | * return (the page will have been freed). | |
| 984263bc | 1958 | */ |
| 984263bc MD |
1959 | void |
| 1960 | vm_page_free_toq(vm_page_t m) | |
| 1961 | { | |
| 12e4aaff | 1962 | mycpu->gd_cnt.v_tfree++; |
| 17cde63e | 1963 | KKASSERT((m->flags & PG_MAPPED) == 0); |
| b12defdc | 1964 | KKASSERT(m->flags & PG_BUSY); |
| 17cde63e | 1965 | |
| 984263bc | 1966 | if (m->busy || ((m->queue - m->pc) == PQ_FREE)) { |
| 79d182b0 MD |
1967 | kprintf("vm_page_free: pindex(%lu), busy(%d), " |
| 1968 | "PG_BUSY(%d), hold(%d)\n", | |
| 1969 | (u_long)m->pindex, m->busy, | |
| 1970 | ((m->flags & PG_BUSY) ? 1 : 0), m->hold_count); | |
| 984263bc MD |
1971 | if ((m->queue - m->pc) == PQ_FREE) |
| 1972 | panic("vm_page_free: freeing free page"); | |
| 1973 | else | |
| 1974 | panic("vm_page_free: freeing busy page"); | |
| 1975 | } | |
| 1976 | ||
| 1977 | /* | |
| b12defdc MD |
1978 | * Remove from object, spinlock the page and its queues and |
| 1979 | * remove from any queue. No queue spinlock will be held | |
| 1980 | * after this section (because the page was removed from any | |
| 1981 | * queue). | |
| 984263bc | 1982 | */ |
| 984263bc | 1983 | vm_page_remove(m); |
| b12defdc MD |
1984 | vm_page_and_queue_spin_lock(m); |
| 1985 | _vm_page_rem_queue_spinlocked(m); | |
| 984263bc MD |
1986 | |
| 1987 | /* | |
| f2d22ebf MD |
1988 | * No further management of fictitious pages occurs beyond object |
| 1989 | * and queue removal. | |
| 984263bc | 1990 | */ |
| 984263bc | 1991 | if ((m->flags & PG_FICTITIOUS) != 0) { |
| b12defdc | 1992 | vm_page_spin_unlock(m); |
| 9765affa | 1993 | vm_page_wakeup(m); |
| 984263bc MD |
1994 | return; |
| 1995 | } | |
| 1996 | ||
| 1997 | m->valid = 0; | |
| 1998 | vm_page_undirty(m); | |
| 1999 | ||
| 2000 | if (m->wire_count != 0) { | |
| 2001 | if (m->wire_count > 1) { | |
| de71fd3f MD |
2002 | panic( |
| 2003 | "vm_page_free: invalid wire count (%d), pindex: 0x%lx", | |
| 2004 | m->wire_count, (long)m->pindex); | |
| 984263bc | 2005 | } |
| 73c351d1 | 2006 | panic("vm_page_free: freeing wired page"); |
| 984263bc MD |
2007 | } |
| 2008 | ||
| 2009 | /* | |
| 984263bc | 2010 | * Clear the UNMANAGED flag when freeing an unmanaged page. |
| 9bf025db | 2011 | * Clear the NEED_COMMIT flag |
| 984263bc | 2012 | */ |
| 9bf025db | 2013 | if (m->flags & PG_UNMANAGED) |
| b12defdc | 2014 | vm_page_flag_clear(m, PG_UNMANAGED); |
| 9bf025db MD |
2015 | if (m->flags & PG_NEED_COMMIT) |
| 2016 | vm_page_flag_clear(m, PG_NEED_COMMIT); | |
| 984263bc MD |
2017 | |
| 2018 | if (m->hold_count != 0) { | |
| d0aa00e8 | 2019 | vm_page_flag_clear(m, PG_ZERO); |
| 027193eb | 2020 | _vm_page_add_queue_spinlocked(m, PQ_HOLD + m->pc, 0); |
| de71fd3f | 2021 | } else { |
| b12defdc | 2022 | _vm_page_add_queue_spinlocked(m, PQ_FREE + m->pc, 0); |
| de71fd3f | 2023 | } |
| 984263bc MD |
2024 | |
| 2025 | /* | |
| b12defdc MD |
2026 | * This sequence allows us to clear PG_BUSY while still holding |
| 2027 | * its spin lock, which reduces contention vs allocators. We | |
| 2028 | * must not leave the queue locked or _vm_page_wakeup() may | |
| 2029 | * deadlock. | |
| 984263bc | 2030 | */ |
| b12defdc MD |
2031 | _vm_page_queue_spin_unlock(m); |
| 2032 | if (_vm_page_wakeup(m)) { | |
| 2033 | vm_page_spin_unlock(m); | |
| 2034 | wakeup(m); | |
| 984263bc | 2035 | } else { |
| b12defdc | 2036 | vm_page_spin_unlock(m); |
| 984263bc | 2037 | } |
| 984263bc | 2038 | vm_page_free_wakeup(); |
| 984263bc MD |
2039 | } |
| 2040 | ||
| 2041 | /* | |
| bb6811be MD |
2042 | * vm_page_free_fromq_fast() |
| 2043 | * | |
| 2044 | * Remove a non-zero page from one of the free queues; the page is removed for | |
| 2045 | * zeroing, so do not issue a wakeup. | |
| bb6811be MD |
2046 | */ |
| 2047 | vm_page_t | |
| 2048 | vm_page_free_fromq_fast(void) | |
| 2049 | { | |
| 2050 | static int qi; | |
| 2051 | vm_page_t m; | |
| 2052 | int i; | |
| 2053 | ||
| bb6811be MD |
2054 | for (i = 0; i < PQ_L2_SIZE; ++i) { |
| 2055 | m = vm_page_list_find(PQ_FREE, qi, FALSE); | |
| b12defdc MD |
2056 | /* page is returned spinlocked and removed from its queue */ |
| 2057 | if (m) { | |
| 2058 | if (vm_page_busy_try(m, TRUE)) { | |
| 2059 | /* | |
| 2060 | * We were unable to busy the page, deactivate | |
| 2061 | * it and loop. | |
| 2062 | */ | |
| 2063 | _vm_page_deactivate_locked(m, 0); | |
| 2064 | vm_page_spin_unlock(m); | |
| 90244566 | 2065 | } else if (m->flags & PG_ZERO) { |
| b12defdc MD |
2066 | /* |
| 2067 | * The page is PG_ZERO, requeue it and loop | |
| 2068 | */ | |
| 2069 | _vm_page_add_queue_spinlocked(m, | |
| 2070 | PQ_FREE + m->pc, | |
| 2071 | 0); | |
| 2072 | vm_page_queue_spin_unlock(m); | |
| 2073 | if (_vm_page_wakeup(m)) { | |
| 2074 | vm_page_spin_unlock(m); | |
| 2075 | wakeup(m); | |
| 2076 | } else { | |
| 2077 | vm_page_spin_unlock(m); | |
| 2078 | } | |
| 90244566 MD |
2079 | } else { |
| 2080 | /* | |
| 2081 | * The page is not PG_ZERO'd so return it. | |
| 2082 | */ | |
| 2083 | vm_page_spin_unlock(m); | |
| 9bf025db MD |
2084 | KKASSERT((m->flags & (PG_UNMANAGED | |
| 2085 | PG_NEED_COMMIT)) == 0); | |
| 90244566 MD |
2086 | KKASSERT(m->hold_count == 0); |
| 2087 | KKASSERT(m->wire_count == 0); | |
| 2088 | break; | |
| b12defdc MD |
2089 | } |
| 2090 | m = NULL; | |
| bb6811be | 2091 | } |
| b12defdc | 2092 | qi = (qi + PQ_PRIME2) & PQ_L2_MASK; |
| bb6811be | 2093 | } |
| bb6811be MD |
2094 | return (m); |
| 2095 | } | |
| 2096 | ||
| 2097 | /* | |
| de71fd3f MD |
2098 | * vm_page_unmanage() |
| 2099 | * | |
| 2100 | * Prevent PV management from being done on the page. The page is | |
| 2101 | * removed from the paging queues as if it were wired, and as a | |
| 2102 | * consequence of no longer being managed the pageout daemon will not | |
| 2103 | * touch it (since there is no way to locate the pte mappings for the | |
| 2104 | * page). madvise() calls that mess with the pmap will also no longer | |
| 2105 | * operate on the page. | |
| 2106 | * | |
| 2107 | * Beyond that the page is still reasonably 'normal'. Freeing the page | |
| 2108 | * will clear the flag. | |
| 2109 | * | |
| 2110 | * This routine is used by OBJT_PHYS objects - objects using unswappable | |
| 2111 | * physical memory as backing store rather then swap-backed memory and | |
| 2112 | * will eventually be extended to support 4MB unmanaged physical | |
| 2113 | * mappings. | |
| 654a39f0 | 2114 | * |
| b12defdc | 2115 | * Caller must be holding the page busy. |
| 984263bc | 2116 | */ |
| 984263bc MD |
2117 | void |
| 2118 | vm_page_unmanage(vm_page_t m) | |
| 2119 | { | |
| b12defdc | 2120 | KKASSERT(m->flags & PG_BUSY); |
| 984263bc MD |
2121 | if ((m->flags & PG_UNMANAGED) == 0) { |
| 2122 | if (m->wire_count == 0) | |
| 2123 | vm_page_unqueue(m); | |
| 2124 | } | |
| 2125 | vm_page_flag_set(m, PG_UNMANAGED); | |
| 984263bc MD |
2126 | } |
| 2127 | ||
| 2128 | /* | |
| de71fd3f MD |
2129 | * Mark this page as wired down by yet another map, removing it from |
| 2130 | * paging queues as necessary. | |
| 984263bc | 2131 | * |
| b12defdc | 2132 | * Caller must be holding the page busy. |
| 984263bc MD |
2133 | */ |
| 2134 | void | |
| 2135 | vm_page_wire(vm_page_t m) | |
| 2136 | { | |
| 984263bc MD |
2137 | /* |
| 2138 | * Only bump the wire statistics if the page is not already wired, | |
| 2139 | * and only unqueue the page if it is on some queue (if it is unmanaged | |
| f2d22ebf MD |
2140 | * it is already off the queues). Don't do anything with fictitious |
| 2141 | * pages because they are always wired. | |
| 984263bc | 2142 | */ |
| b12defdc | 2143 | KKASSERT(m->flags & PG_BUSY); |
| f2d22ebf | 2144 | if ((m->flags & PG_FICTITIOUS) == 0) { |
| b12defdc | 2145 | if (atomic_fetchadd_int(&m->wire_count, 1) == 0) { |
| f2d22ebf MD |
2146 | if ((m->flags & PG_UNMANAGED) == 0) |
| 2147 | vm_page_unqueue(m); | |
| b12defdc | 2148 | atomic_add_int(&vmstats.v_wire_count, 1); |
| f2d22ebf | 2149 | } |
| f2d22ebf | 2150 | KASSERT(m->wire_count != 0, |
| 17cde63e | 2151 | ("vm_page_wire: wire_count overflow m=%p", m)); |
| 984263bc | 2152 | } |
| 984263bc MD |
2153 | } |
| 2154 | ||
| 2155 | /* | |
| de71fd3f MD |
2156 | * Release one wiring of this page, potentially enabling it to be paged again. |
| 2157 | * | |
| 2158 | * Many pages placed on the inactive queue should actually go | |
| 2159 | * into the cache, but it is difficult to figure out which. What | |
| 2160 | * we do instead, if the inactive target is well met, is to put | |
| 2161 | * clean pages at the head of the inactive queue instead of the tail. | |
| 2162 | * This will cause them to be moved to the cache more quickly and | |
| 2163 | * if not actively re-referenced, freed more quickly. If we just | |
| 2164 | * stick these pages at the end of the inactive queue, heavy filesystem | |
| 2165 | * meta-data accesses can cause an unnecessary paging load on memory bound | |
| 2166 | * processes. This optimization causes one-time-use metadata to be | |
| 2167 | * reused more quickly. | |
| 2168 | * | |
| f84f7e81 MD |
2169 | * Pages marked PG_NEED_COMMIT are always activated and never placed on |
| 2170 | * the inactive queue. This helps the pageout daemon determine memory | |
| 2171 | * pressure and act on out-of-memory situations more quickly. | |
| 2172 | * | |
| de71fd3f MD |
2173 | * BUT, if we are in a low-memory situation we have no choice but to |
| 2174 | * put clean pages on the cache queue. | |
| 2175 | * | |
| 2176 | * A number of routines use vm_page_unwire() to guarantee that the page | |
| 2177 | * will go into either the inactive or active queues, and will NEVER | |
| 2178 | * be placed in the cache - for example, just after dirtying a page. | |
| 2179 | * dirty pages in the cache are not allowed. | |
| 2180 | * | |
| 2181 | * The page queues must be locked. | |
| 2182 | * This routine may not block. | |
| 984263bc MD |
2183 | */ |
| 2184 | void | |
| 2185 | vm_page_unwire(vm_page_t m, int activate) | |
| 2186 | { | |
| b12defdc | 2187 | KKASSERT(m->flags & PG_BUSY); |
| f2d22ebf MD |
2188 | if (m->flags & PG_FICTITIOUS) { |
| 2189 | /* do nothing */ | |
| 2190 | } else if (m->wire_count <= 0) { | |
| 2191 | panic("vm_page_unwire: invalid wire count: %d", m->wire_count); | |
| 2192 | } else { | |
| b12defdc MD |
2193 | if (atomic_fetchadd_int(&m->wire_count, -1) == 1) { |
| 2194 | atomic_add_int(&vmstats.v_wire_count, -1); | |
| 984263bc MD |
2195 | if (m->flags & PG_UNMANAGED) { |
| 2196 | ; | |
| f84f7e81 | 2197 | } else if (activate || (m->flags & PG_NEED_COMMIT)) { |
| b12defdc | 2198 | vm_page_spin_lock(m); |
| 027193eb MD |
2199 | _vm_page_add_queue_spinlocked(m, |
| 2200 | PQ_ACTIVE + m->pc, 0); | |
| b12defdc | 2201 | _vm_page_and_queue_spin_unlock(m); |
| 984263bc | 2202 | } else { |
| b12defdc | 2203 | vm_page_spin_lock(m); |
| 984263bc | 2204 | vm_page_flag_clear(m, PG_WINATCFLS); |
| 027193eb MD |
2205 | _vm_page_add_queue_spinlocked(m, |
| 2206 | PQ_INACTIVE + m->pc, 0); | |
| e527fb6b | 2207 | ++vm_swapcache_inactive_heuristic; |
| b12defdc | 2208 | _vm_page_and_queue_spin_unlock(m); |
| 984263bc MD |
2209 | } |
| 2210 | } | |
| 984263bc | 2211 | } |
| 984263bc MD |
2212 | } |
| 2213 | ||
| 984263bc MD |
2214 | /* |
| 2215 | * Move the specified page to the inactive queue. If the page has | |
| 2216 | * any associated swap, the swap is deallocated. | |
| 2217 | * | |
| 2218 | * Normally athead is 0 resulting in LRU operation. athead is set | |
| 2219 | * to 1 if we want this page to be 'as if it were placed in the cache', | |
| 2220 | * except without unmapping it from the process address space. | |
| 2221 | * | |
| b12defdc | 2222 | * vm_page's spinlock must be held on entry and will remain held on return. |
| 984263bc MD |
2223 | * This routine may not block. |
| 2224 | */ | |
| b12defdc MD |
2225 | static void |
| 2226 | _vm_page_deactivate_locked(vm_page_t m, int athead) | |
| 984263bc | 2227 | { |
| b12defdc MD |
2228 | u_short oqueue; |
| 2229 | ||
| 984263bc MD |
2230 | /* |
| 2231 | * Ignore if already inactive. | |
| 2232 | */ | |
| 027193eb | 2233 | if (m->queue - m->pc == PQ_INACTIVE) |
| 984263bc | 2234 | return; |
| b12defdc MD |
2235 | _vm_page_queue_spin_lock(m); |
| 2236 | oqueue = _vm_page_rem_queue_spinlocked(m); | |
| 984263bc | 2237 | |
| 984263bc | 2238 | if (m->wire_count == 0 && (m->flags & PG_UNMANAGED) == 0) { |
| b12defdc | 2239 | if (oqueue == PQ_CACHE) |
| 12e4aaff | 2240 | mycpu->gd_cnt.v_reactivated++; |
| 984263bc | 2241 | vm_page_flag_clear(m, PG_WINATCFLS); |
| 027193eb | 2242 | _vm_page_add_queue_spinlocked(m, PQ_INACTIVE + m->pc, athead); |
| b12defdc | 2243 | if (athead == 0) |
| e527fb6b | 2244 | ++vm_swapcache_inactive_heuristic; |
| 984263bc | 2245 | } |
| b12defdc MD |
2246 | _vm_page_queue_spin_unlock(m); |
| 2247 | /* leaves vm_page spinlocked */ | |
| 984263bc MD |
2248 | } |
| 2249 | ||
| 573fb415 MD |
2250 | /* |
| 2251 | * Attempt to deactivate a page. | |
| 2252 | * | |
| 2253 | * No requirements. | |
| 2254 | */ | |
| 984263bc MD |
2255 | void |
| 2256 | vm_page_deactivate(vm_page_t m) | |
| 2257 | { | |
| b12defdc MD |
2258 | vm_page_spin_lock(m); |
| 2259 | _vm_page_deactivate_locked(m, 0); | |
| 2260 | vm_page_spin_unlock(m); | |
| 2261 | } | |
| 2262 | ||
| 2263 | void | |
| 2264 | vm_page_deactivate_locked(vm_page_t m) | |
| 2265 | { | |
| 2266 | _vm_page_deactivate_locked(m, 0); | |
| 984263bc MD |
2267 | } |
| 2268 | ||
| 2269 | /* | |
| 573fb415 | 2270 | * Attempt to move a page to PQ_CACHE. |
| b12defdc | 2271 | * |
| 984263bc | 2272 | * Returns 0 on failure, 1 on success |
| 573fb415 | 2273 | * |
| b12defdc MD |
2274 | * The page should NOT be busied by the caller. This function will validate |
| 2275 | * whether the page can be safely moved to the cache. | |
| 984263bc MD |
2276 | */ |
| 2277 | int | |
| 2278 | vm_page_try_to_cache(vm_page_t m) | |
| 2279 | { | |
| b12defdc MD |
2280 | vm_page_spin_lock(m); |
| 2281 | if (vm_page_busy_try(m, TRUE)) { | |
| 2282 | vm_page_spin_unlock(m); | |
| 2283 | return(0); | |
| 2284 | } | |
| 2285 | if (m->dirty || m->hold_count || m->wire_count || | |
| 9bf025db | 2286 | (m->flags & (PG_UNMANAGED | PG_NEED_COMMIT))) { |
| b12defdc MD |
2287 | if (_vm_page_wakeup(m)) { |
| 2288 | vm_page_spin_unlock(m); | |
| 2289 | wakeup(m); | |
| 2290 | } else { | |
| 2291 | vm_page_spin_unlock(m); | |
| 2292 | } | |
| 984263bc MD |
2293 | return(0); |
| 2294 | } | |
| b12defdc MD |
2295 | vm_page_spin_unlock(m); |
| 2296 | ||
| 2297 | /* | |
| 2298 | * Page busied by us and no longer spinlocked. Dirty pages cannot | |
| 2299 | * be moved to the cache. | |
| 2300 | */ | |
| 984263bc | 2301 | vm_page_test_dirty(m); |
| 654a39f0 | 2302 | if (m->dirty) { |
| 50e32333 | 2303 | vm_page_wakeup(m); |
| 984263bc | 2304 | return(0); |
| 654a39f0 | 2305 | } |
| 984263bc MD |
2306 | vm_page_cache(m); |
| 2307 | return(1); | |
| 2308 | } | |
| 2309 | ||
| 2310 | /* | |
| de71fd3f MD |
2311 | * Attempt to free the page. If we cannot free it, we do nothing. |
| 2312 | * 1 is returned on success, 0 on failure. | |
| 573fb415 MD |
2313 | * |
| 2314 | * No requirements. | |
| 984263bc | 2315 | */ |
| 984263bc MD |
2316 | int |
| 2317 | vm_page_try_to_free(vm_page_t m) | |
| 2318 | { | |
| b12defdc MD |
2319 | vm_page_spin_lock(m); |
| 2320 | if (vm_page_busy_try(m, TRUE)) { | |
| 2321 | vm_page_spin_unlock(m); | |
| 2322 | return(0); | |
| 2323 | } | |
| 82034c53 MD |
2324 | |
| 2325 | /* | |
| 2326 | * The page can be in any state, including already being on the free | |
| 2327 | * queue. Check to see if it really can be freed. | |
| 2328 | */ | |
| 2329 | if (m->dirty || /* can't free if it is dirty */ | |
| 2330 | m->hold_count || /* or held (XXX may be wrong) */ | |
| 2331 | m->wire_count || /* or wired */ | |
| 9bf025db MD |
2332 | (m->flags & (PG_UNMANAGED | /* or unmanaged */ |
| 2333 | PG_NEED_COMMIT)) || /* or needs a commit */ | |
| 82034c53 MD |
2334 | m->queue - m->pc == PQ_FREE || /* already on PQ_FREE */ |
| 2335 | m->queue - m->pc == PQ_HOLD) { /* already on PQ_HOLD */ | |
| b12defdc MD |
2336 | if (_vm_page_wakeup(m)) { |
| 2337 | vm_page_spin_unlock(m); | |
| 2338 | wakeup(m); | |
| 2339 | } else { | |
| 2340 | vm_page_spin_unlock(m); | |
| 2341 | } | |
| 984263bc MD |
2342 | return(0); |
| 2343 | } | |
| b12defdc MD |
2344 | vm_page_spin_unlock(m); |
| 2345 | ||
| 2346 | /* | |
| 82034c53 MD |
2347 | * We can probably free the page. |
| 2348 | * | |
| b12defdc MD |
2349 | * Page busied by us and no longer spinlocked. Dirty pages will |
| 2350 | * not be freed by this function. We have to re-test the | |
| 2351 | * dirty bit after cleaning out the pmaps. | |
| 2352 | */ | |
| 984263bc | 2353 | vm_page_test_dirty(m); |
| 654a39f0 | 2354 | if (m->dirty) { |
| b12defdc | 2355 | vm_page_wakeup(m); |
| 984263bc | 2356 | return(0); |
| 654a39f0 | 2357 | } |
| 984263bc | 2358 | vm_page_protect(m, VM_PROT_NONE); |
| b12defdc MD |
2359 | if (m->dirty) { |
| 2360 | vm_page_wakeup(m); | |
| 2361 | return(0); | |
| 2362 | } | |
| 984263bc MD |
2363 | vm_page_free(m); |
| 2364 | return(1); | |
| 2365 | } | |
| 2366 | ||
| 984263bc MD |
2367 | /* |
| 2368 | * vm_page_cache | |
| 2369 | * | |
| 2370 | * Put the specified page onto the page cache queue (if appropriate). | |
| 2371 | * | |
| a491077e MD |
2372 | * The page must be busy, and this routine will release the busy and |
| 2373 | * possibly even free the page. | |
| 984263bc MD |
2374 | */ |
| 2375 | void | |
| 2376 | vm_page_cache(vm_page_t m) | |
| 2377 | { | |
| 9bf025db MD |
2378 | if ((m->flags & (PG_UNMANAGED | PG_NEED_COMMIT)) || |
| 2379 | m->busy || m->wire_count || m->hold_count) { | |
| 086c1d7e | 2380 | kprintf("vm_page_cache: attempting to cache busy/held page\n"); |
| a491077e | 2381 | vm_page_wakeup(m); |
| 984263bc MD |
2382 | return; |
| 2383 | } | |
| c9ec86b3 MD |
2384 | |
| 2385 | /* | |
| 2386 | * Already in the cache (and thus not mapped) | |
| 2387 | */ | |
| 17cde63e MD |
2388 | if ((m->queue - m->pc) == PQ_CACHE) { |
| 2389 | KKASSERT((m->flags & PG_MAPPED) == 0); | |
| a491077e | 2390 | vm_page_wakeup(m); |
| 984263bc | 2391 | return; |
| 17cde63e | 2392 | } |
| 984263bc MD |
2393 | |
| 2394 | /* | |
| c9ec86b3 MD |
2395 | * Caller is required to test m->dirty, but note that the act of |
| 2396 | * removing the page from its maps can cause it to become dirty | |
| 2397 | * on an SMP system due to another cpu running in usermode. | |
| 984263bc | 2398 | */ |
| c9ec86b3 | 2399 | if (m->dirty) { |
| 984263bc MD |
2400 | panic("vm_page_cache: caching a dirty page, pindex: %ld", |
| 2401 | (long)m->pindex); | |
| 2402 | } | |
| c9ec86b3 MD |
2403 | |
| 2404 | /* | |
| 2405 | * Remove all pmaps and indicate that the page is not | |
| 17cde63e MD |
2406 | * writeable or mapped. Our vm_page_protect() call may |
| 2407 | * have blocked (especially w/ VM_PROT_NONE), so recheck | |
| 2408 | * everything. | |
| c9ec86b3 MD |
2409 | */ |
| 2410 | vm_page_protect(m, VM_PROT_NONE); | |
| 9bf025db MD |
2411 | if ((m->flags & (PG_UNMANAGED | PG_MAPPED)) || |
| 2412 | m->busy || m->wire_count || m->hold_count) { | |
| a491077e | 2413 | vm_page_wakeup(m); |
| 9bf025db | 2414 | } else if (m->dirty || (m->flags & PG_NEED_COMMIT)) { |
| c9ec86b3 | 2415 | vm_page_deactivate(m); |
| a491077e | 2416 | vm_page_wakeup(m); |
| c9ec86b3 | 2417 | } else { |
| b12defdc MD |
2418 | _vm_page_and_queue_spin_lock(m); |
| 2419 | _vm_page_rem_queue_spinlocked(m); | |
| 2420 | _vm_page_add_queue_spinlocked(m, PQ_CACHE + m->pc, 0); | |
| 2421 | _vm_page_queue_spin_unlock(m); | |
| 2422 | if (_vm_page_wakeup(m)) { | |
| 2423 | vm_page_spin_unlock(m); | |
| 2424 | wakeup(m); | |
| 2425 | } else { | |
| 2426 | vm_page_spin_unlock(m); | |
| 2427 | } | |
| c9ec86b3 MD |
2428 | vm_page_free_wakeup(); |
| 2429 | } | |
| 984263bc MD |
2430 | } |
| 2431 | ||
| 2432 | /* | |
| de71fd3f MD |
2433 | * vm_page_dontneed() |
| 2434 | * | |
| 2435 | * Cache, deactivate, or do nothing as appropriate. This routine | |
| 2436 | * is typically used by madvise() MADV_DONTNEED. | |
| 2437 | * | |
| 2438 | * Generally speaking we want to move the page into the cache so | |
| 2439 | * it gets reused quickly. However, this can result in a silly syndrome | |
| 2440 | * due to the page recycling too quickly. Small objects will not be | |
| 2441 | * fully cached. On the otherhand, if we move the page to the inactive | |
| 2442 | * queue we wind up with a problem whereby very large objects | |
| 2443 | * unnecessarily blow away our inactive and cache queues. | |
| 2444 | * | |
| 2445 | * The solution is to move the pages based on a fixed weighting. We | |
| 2446 | * either leave them alone, deactivate them, or move them to the cache, | |
| 2447 | * where moving them to the cache has the highest weighting. | |
| 2448 | * By forcing some pages into other queues we eventually force the | |
| 2449 | * system to balance the queues, potentially recovering other unrelated | |
| 2450 | * space from active. The idea is to not force this to happen too | |
| 2451 | * often. | |
| 573fb415 | 2452 | * |
| b12defdc | 2453 | * The page must be busied. |
| 984263bc | 2454 | */ |
| 984263bc MD |
2455 | void |
| 2456 | vm_page_dontneed(vm_page_t m) | |
| 2457 | { | |
| 2458 | static int dnweight; | |
| 2459 | int dnw; | |
| 2460 | int head; | |
| 2461 | ||
| 2462 | dnw = ++dnweight; | |
| 2463 | ||
| 2464 | /* | |
| 2465 | * occassionally leave the page alone | |
| 2466 | */ | |
| 984263bc | 2467 | if ((dnw & 0x01F0) == 0 || |
| 027193eb | 2468 | m->queue - m->pc == PQ_INACTIVE || |
| 984263bc MD |
2469 | m->queue - m->pc == PQ_CACHE |
| 2470 | ) { | |
| 2471 | if (m->act_count >= ACT_INIT) | |
| 2472 | --m->act_count; | |
| 2473 | return; | |
| 2474 | } | |
| 2475 | ||
| 31da5e4d VS |
2476 | /* |
| 2477 | * If vm_page_dontneed() is inactivating a page, it must clear | |
| 2478 | * the referenced flag; otherwise the pagedaemon will see references | |
| 2479 | * on the page in the inactive queue and reactivate it. Until the | |
| 2480 | * page can move to the cache queue, madvise's job is not done. | |
| 2481 | */ | |
| 2482 | vm_page_flag_clear(m, PG_REFERENCED); | |
| 2483 | pmap_clear_reference(m); | |
| 2484 | ||
| 984263bc MD |
2485 | if (m->dirty == 0) |
| 2486 | vm_page_test_dirty(m); | |
| 2487 | ||
| 2488 | if (m->dirty || (dnw & 0x0070) == 0) { | |
| 2489 | /* | |
| 2490 | * Deactivate the page 3 times out of 32. | |
| 2491 | */ | |
| 2492 | head = 0; | |
| 2493 | } else { | |
| 2494 | /* | |
| 2495 | * Cache the page 28 times out of every 32. Note that | |
| 2496 | * the page is deactivated instead of cached, but placed | |
| 2497 | * at the head of the queue instead of the tail. | |
| 2498 | */ | |
| 2499 | head = 1; | |
| 2500 | } | |
| b12defdc MD |
2501 | vm_page_spin_lock(m); |
| 2502 | _vm_page_deactivate_locked(m, head); | |
| 2503 | vm_page_spin_unlock(m); | |
| 2504 | } | |
| 2505 | ||
| 2506 | /* | |
| 2507 | * These routines manipulate the 'soft busy' count for a page. A soft busy | |
| 2508 | * is almost like PG_BUSY except that it allows certain compatible operations | |
| 2509 | * to occur on the page while it is busy. For example, a page undergoing a | |
| 2510 | * write can still be mapped read-only. | |
| 2511 | * | |
| 2512 | * Because vm_pages can overlap buffers m->busy can be > 1. m->busy is only | |
| 2513 | * adjusted while the vm_page is PG_BUSY so the flash will occur when the | |
| 2514 | * busy bit is cleared. | |
| 2515 | */ | |
| 2516 | void | |
| 2517 | vm_page_io_start(vm_page_t m) | |
| 2518 | { | |
| 2519 | KASSERT(m->flags & PG_BUSY, ("vm_page_io_start: page not busy!!!")); | |
| 2520 | atomic_add_char(&m->busy, 1); | |
| 2521 | vm_page_flag_set(m, PG_SBUSY); | |
| 2522 | } | |
| 2523 | ||
| 2524 | void | |
| 2525 | vm_page_io_finish(vm_page_t m) | |
| 2526 | { | |
| 2527 | KASSERT(m->flags & PG_BUSY, ("vm_page_io_finish: page not busy!!!")); | |
| 2528 | atomic_subtract_char(&m->busy, 1); | |
| 2529 | if (m->busy == 0) | |
| 2530 | vm_page_flag_clear(m, PG_SBUSY); | |
| 984263bc MD |
2531 | } |
| 2532 | ||
| 2533 | /* | |
| 9bf025db MD |
2534 | * Indicate that a clean VM page requires a filesystem commit and cannot |
| 2535 | * be reused. Used by tmpfs. | |
| 2536 | */ | |
| 2537 | void | |
| 2538 | vm_page_need_commit(vm_page_t m) | |
| 2539 | { | |
| 2540 | vm_page_flag_set(m, PG_NEED_COMMIT); | |
| 2541 | } | |
| 2542 | ||
| 2543 | void | |
| 2544 | vm_page_clear_commit(vm_page_t m) | |
| 2545 | { | |
| 2546 | vm_page_flag_clear(m, PG_NEED_COMMIT); | |
| 2547 | } | |
| 2548 | ||
| 2549 | /* | |
| 06ecca5a | 2550 | * Grab a page, blocking if it is busy and allocating a page if necessary. |
| d2d8515b MD |
2551 | * A busy page is returned or NULL. The page may or may not be valid and |
| 2552 | * might not be on a queue (the caller is responsible for the disposition of | |
| 2553 | * the page). | |
| 984263bc | 2554 | * |
| d2d8515b MD |
2555 | * If VM_ALLOC_ZERO is specified and the grab must allocate a new page, the |
| 2556 | * page will be zero'd and marked valid. | |
| b12defdc | 2557 | * |
| d2d8515b MD |
2558 | * If VM_ALLOC_FORCE_ZERO is specified the page will be zero'd and marked |
| 2559 | * valid even if it already exists. | |
| 2560 | * | |
| 2561 | * If VM_ALLOC_RETRY is specified this routine will never return NULL. Also | |
| 2562 | * note that VM_ALLOC_NORMAL must be specified if VM_ALLOC_RETRY is specified. | |
| d149178e | 2563 | * VM_ALLOC_NULL_OK is implied when VM_ALLOC_RETRY is specified. |
| dc1fd4b3 | 2564 | * |
| 06ecca5a MD |
2565 | * This routine may block, but if VM_ALLOC_RETRY is not set then NULL is |
| 2566 | * always returned if we had blocked. | |
| d2d8515b | 2567 | * |
| 06ecca5a | 2568 | * This routine may not be called from an interrupt. |
| 06ecca5a | 2569 | * |
| d2d8515b | 2570 | * PG_ZERO is *ALWAYS* cleared by this routine. |
| 573fb415 | 2571 | * |
| d2d8515b | 2572 | * No other requirements. |
| 984263bc MD |
2573 | */ |
| 2574 | vm_page_t | |
| 2575 | vm_page_grab(vm_object_t object, vm_pindex_t pindex, int allocflags) | |
| 2576 | { | |
| 984263bc | 2577 | vm_page_t m; |
| b12defdc | 2578 | int error; |
| 984263bc | 2579 | |
| dc1fd4b3 MD |
2580 | KKASSERT(allocflags & |
| 2581 | (VM_ALLOC_NORMAL|VM_ALLOC_INTERRUPT|VM_ALLOC_SYSTEM)); | |
| 398c240d | 2582 | vm_object_hold(object); |
| b12defdc MD |
2583 | for (;;) { |
| 2584 | m = vm_page_lookup_busy_try(object, pindex, TRUE, &error); | |
| 2585 | if (error) { | |
| 2586 | vm_page_sleep_busy(m, TRUE, "pgrbwt"); | |
| 2587 | if ((allocflags & VM_ALLOC_RETRY) == 0) { | |
| 2588 | m = NULL; | |
| 2589 | break; | |
| 984263bc | 2590 | } |
| d2d8515b | 2591 | /* retry */ |
| b12defdc | 2592 | } else if (m == NULL) { |
| d149178e MD |
2593 | if (allocflags & VM_ALLOC_RETRY) |
| 2594 | allocflags |= VM_ALLOC_NULL_OK; | |
| b12defdc MD |
2595 | m = vm_page_alloc(object, pindex, |
| 2596 | allocflags & ~VM_ALLOC_RETRY); | |
| 2597 | if (m) | |
| 2598 | break; | |
| 2599 | vm_wait(0); | |
| 2600 | if ((allocflags & VM_ALLOC_RETRY) == 0) | |
| d2d8515b | 2601 | goto failed; |
| 984263bc | 2602 | } else { |
| b12defdc MD |
2603 | /* m found */ |
| 2604 | break; | |
| 984263bc MD |
2605 | } |
| 2606 | } | |
| d2d8515b MD |
2607 | |
| 2608 | /* | |
| 2609 | * If VM_ALLOC_ZERO an invalid page will be zero'd and set valid. | |
| 2610 | * | |
| 2611 | * If VM_ALLOC_FORCE_ZERO the page is unconditionally zero'd and set | |
| 2612 | * valid even if already valid. | |
| 2613 | */ | |
| 2614 | if (m->valid == 0) { | |
| 2615 | if (allocflags & (VM_ALLOC_ZERO | VM_ALLOC_FORCE_ZERO)) { | |
| 2616 | if ((m->flags & PG_ZERO) == 0) | |
| 2617 | pmap_zero_page(VM_PAGE_TO_PHYS(m)); | |
| 2618 | m->valid = VM_PAGE_BITS_ALL; | |
| 2619 | } | |
| 2620 | } else if (allocflags & VM_ALLOC_FORCE_ZERO) { | |
| 2621 | pmap_zero_page(VM_PAGE_TO_PHYS(m)); | |
| 2622 | m->valid = VM_PAGE_BITS_ALL; | |
| 2623 | } | |
| 2624 | vm_page_flag_clear(m, PG_ZERO); | |
| 2625 | failed: | |
| 398c240d | 2626 | vm_object_drop(object); |
| 06ecca5a | 2627 | return(m); |
| 984263bc MD |
2628 | } |
| 2629 | ||
| 2630 | /* | |
| 2631 | * Mapping function for valid bits or for dirty bits in | |
| 2632 | * a page. May not block. | |
| 2633 | * | |
| 2634 | * Inputs are required to range within a page. | |
| 573fb415 MD |
2635 | * |
| 2636 | * No requirements. | |
| 2637 | * Non blocking. | |
| 984263bc | 2638 | */ |
| 573fb415 | 2639 | int |
| 984263bc MD |
2640 | vm_page_bits(int base, int size) |
| 2641 | { | |
| 2642 | int first_bit; | |
| 2643 | int last_bit; | |
| 2644 | ||
| 2645 | KASSERT( | |
| 2646 | base + size <= PAGE_SIZE, | |
| 2647 | ("vm_page_bits: illegal base/size %d/%d", base, size) | |
| 2648 | ); | |
| 2649 | ||
| 2650 | if (size == 0) /* handle degenerate case */ | |
| 2651 | return(0); | |
| 2652 | ||
| 2653 | first_bit = base >> DEV_BSHIFT; | |
| 2654 | last_bit = (base + size - 1) >> DEV_BSHIFT; | |
| 2655 | ||
| 2656 | return ((2 << last_bit) - (1 << first_bit)); | |
| 2657 | } | |
| 2658 | ||
| 2659 | /* | |
| de71fd3f MD |
2660 | * Sets portions of a page valid and clean. The arguments are expected |
| 2661 | * to be DEV_BSIZE aligned but if they aren't the bitmap is inclusive | |
| 2662 | * of any partial chunks touched by the range. The invalid portion of | |
| 2663 | * such chunks will be zero'd. | |
| 984263bc | 2664 | * |
| c7841cbe MD |
2665 | * NOTE: When truncating a buffer vnode_pager_setsize() will automatically |
| 2666 | * align base to DEV_BSIZE so as not to mark clean a partially | |
| 2667 | * truncated device block. Otherwise the dirty page status might be | |
| 2668 | * lost. | |
| 2669 | * | |
| de71fd3f | 2670 | * This routine may not block. |
| 984263bc | 2671 | * |
| de71fd3f | 2672 | * (base + size) must be less then or equal to PAGE_SIZE. |
| 984263bc | 2673 | */ |
| 1a54183b MD |
2674 | static void |
| 2675 | _vm_page_zero_valid(vm_page_t m, int base, int size) | |
| 984263bc | 2676 | { |
| 984263bc MD |
2677 | int frag; |
| 2678 | int endoff; | |
| 2679 | ||
| 2680 | if (size == 0) /* handle degenerate case */ | |
| 2681 | return; | |
| 2682 | ||
| 2683 | /* | |
| 2684 | * If the base is not DEV_BSIZE aligned and the valid | |
| 2685 | * bit is clear, we have to zero out a portion of the | |
| 2686 | * first block. | |
| 2687 | */ | |
| 2688 | ||
| 2689 | if ((frag = base & ~(DEV_BSIZE - 1)) != base && | |
| 2690 | (m->valid & (1 << (base >> DEV_BSHIFT))) == 0 | |
| 2691 | ) { | |
| 2692 | pmap_zero_page_area( | |
| 2693 | VM_PAGE_TO_PHYS(m), | |
| 2694 | frag, | |
| 2695 | base - frag | |
| 2696 | ); | |
| 2697 | } | |
| 2698 | ||
| 2699 | /* | |
| 2700 | * If the ending offset is not DEV_BSIZE aligned and the | |
| 2701 | * valid bit is clear, we have to zero out a portion of | |
| 2702 | * the last block. | |
| 2703 | */ | |
| 2704 | ||
| 2705 | endoff = base + size; | |
| 2706 | ||
| 2707 | if ((frag = endoff & ~(DEV_BSIZE - 1)) != endoff && | |
| 2708 | (m->valid & (1 << (endoff >> DEV_BSHIFT))) == 0 | |
| 2709 | ) { | |
| 2710 | pmap_zero_page_area( | |
| 2711 | VM_PAGE_TO_PHYS(m), | |
| 2712 | endoff, | |
| 2713 | DEV_BSIZE - (endoff & (DEV_BSIZE - 1)) | |
| 2714 | ); | |
| 2715 | } | |
| 1a54183b | 2716 | } |
| 984263bc | 2717 | |
| 1a54183b MD |
2718 | /* |
| 2719 | * Set valid, clear dirty bits. If validating the entire | |
| 2720 | * page we can safely clear the pmap modify bit. We also | |
| 2721 | * use this opportunity to clear the PG_NOSYNC flag. If a process | |
| 2722 | * takes a write fault on a MAP_NOSYNC memory area the flag will | |
| 2723 | * be set again. | |
| 2724 | * | |
| 2725 | * We set valid bits inclusive of any overlap, but we can only | |
| 2726 | * clear dirty bits for DEV_BSIZE chunks that are fully within | |
| 2727 | * the range. | |
| 573fb415 MD |
2728 | * |
| 2729 | * Page must be busied? | |
| 2730 | * No other requirements. | |
| 1a54183b MD |
2731 | */ |
| 2732 | void | |
| 2733 | vm_page_set_valid(vm_page_t m, int base, int size) | |
| 2734 | { | |
| 2735 | _vm_page_zero_valid(m, base, size); | |
| 2736 | m->valid |= vm_page_bits(base, size); | |
| 2737 | } | |
| 984263bc | 2738 | |
| cb1cf930 MD |
2739 | |
| 2740 | /* | |
| 2741 | * Set valid bits and clear dirty bits. | |
| 2742 | * | |
| 2743 | * NOTE: This function does not clear the pmap modified bit. | |
| 2744 | * Also note that e.g. NFS may use a byte-granular base | |
| 2745 | * and size. | |
| 573fb415 | 2746 | * |
| 9a0cb7b1 MD |
2747 | * WARNING: Page must be busied? But vfs_clean_one_page() will call |
| 2748 | * this without necessarily busying the page (via bdwrite()). | |
| 2749 | * So for now vm_token must also be held. | |
| 2750 | * | |
| 573fb415 | 2751 | * No other requirements. |
| cb1cf930 | 2752 | */ |
| 1a54183b MD |
2753 | void |
| 2754 | vm_page_set_validclean(vm_page_t m, int base, int size) | |
| 2755 | { | |
| 2756 | int pagebits; | |
| 2757 | ||
| 2758 | _vm_page_zero_valid(m, base, size); | |
| 984263bc MD |
2759 | pagebits = vm_page_bits(base, size); |
| 2760 | m->valid |= pagebits; | |
| 984263bc MD |
2761 | m->dirty &= ~pagebits; |
| 2762 | if (base == 0 && size == PAGE_SIZE) { | |
| cb1cf930 | 2763 | /*pmap_clear_modify(m);*/ |
| 984263bc MD |
2764 | vm_page_flag_clear(m, PG_NOSYNC); |
| 2765 | } | |
| 2766 | } | |
| 2767 | ||
| cb1cf930 | 2768 | /* |
| 0a8aee15 | 2769 | * Set valid & dirty. Used by buwrite() |
| 573fb415 | 2770 | * |
| 9a0cb7b1 MD |
2771 | * WARNING: Page must be busied? But vfs_dirty_one_page() will |
| 2772 | * call this function in buwrite() so for now vm_token must | |
| 9bf025db | 2773 | * be held. |
| 9a0cb7b1 | 2774 | * |
| 573fb415 | 2775 | * No other requirements. |
| 0a8aee15 MD |
2776 | */ |
| 2777 | void | |
| 2778 | vm_page_set_validdirty(vm_page_t m, int base, int size) | |
| 2779 | { | |
| 2780 | int pagebits; | |
| 2781 | ||
| 2782 | pagebits = vm_page_bits(base, size); | |
| 2783 | m->valid |= pagebits; | |
| 2784 | m->dirty |= pagebits; | |
| d89ce96a | 2785 | if (m->object) |
| 9bf025db | 2786 | vm_object_set_writeable_dirty(m->object); |
| 0a8aee15 MD |
2787 | } |
| 2788 | ||
| 2789 | /* | |
| cb1cf930 MD |
2790 | * Clear dirty bits. |
| 2791 | * | |
| 2792 | * NOTE: This function does not clear the pmap modified bit. | |
| 2793 | * Also note that e.g. NFS may use a byte-granular base | |
| 2794 | * and size. | |
| 573fb415 MD |
2795 | * |
| 2796 | * Page must be busied? | |
| 2797 | * No other requirements. | |
| cb1cf930 | 2798 | */ |
| 984263bc MD |
2799 | void |
| 2800 | vm_page_clear_dirty(vm_page_t m, int base, int size) | |
| 2801 | { | |
| 2802 | m->dirty &= ~vm_page_bits(base, size); | |
| 1a54183b | 2803 | if (base == 0 && size == PAGE_SIZE) { |
| cb1cf930 | 2804 | /*pmap_clear_modify(m);*/ |
| 1a54183b MD |
2805 | vm_page_flag_clear(m, PG_NOSYNC); |
| 2806 | } | |
| 984263bc MD |
2807 | } |
| 2808 | ||
| 2809 | /* | |
| 17cde63e MD |
2810 | * Make the page all-dirty. |
| 2811 | * | |
| 2812 | * Also make sure the related object and vnode reflect the fact that the | |
| 2813 | * object may now contain a dirty page. | |
| 573fb415 MD |
2814 | * |
| 2815 | * Page must be busied? | |
| 2816 | * No other requirements. | |
| 17cde63e MD |
2817 | */ |
| 2818 | void | |
| 2819 | vm_page_dirty(vm_page_t m) | |
| 2820 | { | |
| 2821 | #ifdef INVARIANTS | |
| 2822 | int pqtype = m->queue - m->pc; | |
| 2823 | #endif | |
| 2824 | KASSERT(pqtype != PQ_CACHE && pqtype != PQ_FREE, | |
| 2825 | ("vm_page_dirty: page in free/cache queue!")); | |
| 2826 | if (m->dirty != VM_PAGE_BITS_ALL) { | |
| 2827 | m->dirty = VM_PAGE_BITS_ALL; | |
| 2828 | if (m->object) | |
| 2829 | vm_object_set_writeable_dirty(m->object); | |
| 2830 | } | |
| 2831 | } | |
| 2832 | ||
| 2833 | /* | |
| de71fd3f MD |
2834 | * Invalidates DEV_BSIZE'd chunks within a page. Both the |
| 2835 | * valid and dirty bits for the effected areas are cleared. | |
| 984263bc | 2836 | * |
| 573fb415 MD |
2837 | * Page must be busied? |
| 2838 | * Does not block. | |
| 2839 | * No other requirements. | |
| 984263bc MD |
2840 | */ |
| 2841 | void | |
| 2842 | vm_page_set_invalid(vm_page_t m, int base, int size) | |
| 2843 | { | |
| 2844 | int bits; | |
| 2845 | ||
| 2846 | bits = vm_page_bits(base, size); | |
| 2847 | m->valid &= ~bits; | |
| 2848 | m->dirty &= ~bits; | |
| 2849 | m->object->generation++; | |
| 2850 | } | |
| 2851 | ||
| 2852 | /* | |
| de71fd3f MD |
2853 | * The kernel assumes that the invalid portions of a page contain |
| 2854 | * garbage, but such pages can be mapped into memory by user code. | |
| 2855 | * When this occurs, we must zero out the non-valid portions of the | |
| 2856 | * page so user code sees what it expects. | |
| 984263bc | 2857 | * |
| de71fd3f MD |
2858 | * Pages are most often semi-valid when the end of a file is mapped |
| 2859 | * into memory and the file's size is not page aligned. | |
| 573fb415 MD |
2860 | * |
| 2861 | * Page must be busied? | |
| 2862 | * No other requirements. | |
| 984263bc | 2863 | */ |
| 984263bc MD |
2864 | void |
| 2865 | vm_page_zero_invalid(vm_page_t m, boolean_t setvalid) | |
| 2866 | { | |
| 2867 | int b; | |
| 2868 | int i; | |
| 2869 | ||
| 2870 | /* | |
| 2871 | * Scan the valid bits looking for invalid sections that | |
| 2872 | * must be zerod. Invalid sub-DEV_BSIZE'd areas ( where the | |
| 2873 | * valid bit may be set ) have already been zerod by | |
| 2874 | * vm_page_set_validclean(). | |
| 2875 | */ | |
| 984263bc MD |
2876 | for (b = i = 0; i <= PAGE_SIZE / DEV_BSIZE; ++i) { |
| 2877 | if (i == (PAGE_SIZE / DEV_BSIZE) || | |
| 2878 | (m->valid & (1 << i)) | |
| 2879 | ) { | |
| 2880 | if (i > b) { | |
| 2881 | pmap_zero_page_area( | |
| 2882 | VM_PAGE_TO_PHYS(m), | |
| 2883 | b << DEV_BSHIFT, | |
| 2884 | (i - b) << DEV_BSHIFT | |
| 2885 | ); | |
| 2886 | } | |
| 2887 | b = i + 1; | |
| 2888 | } | |
| 2889 | } | |
| 2890 | ||
| 2891 | /* | |
| 2892 | * setvalid is TRUE when we can safely set the zero'd areas | |
| 2893 | * as being valid. We can do this if there are no cache consistency | |
| 2894 | * issues. e.g. it is ok to do with UFS, but not ok to do with NFS. | |
| 2895 | */ | |
| 984263bc MD |
2896 | if (setvalid) |
| 2897 | m->valid = VM_PAGE_BITS_ALL; | |
| 2898 | } | |
| 2899 | ||
| 2900 | /* | |
| de71fd3f MD |
2901 | * Is a (partial) page valid? Note that the case where size == 0 |
| 2902 | * will return FALSE in the degenerate case where the page is entirely | |
| 2903 | * invalid, and TRUE otherwise. | |
| 984263bc | 2904 | * |
| 573fb415 MD |
2905 | * Does not block. |
| 2906 | * No other requirements. | |
| 984263bc | 2907 | */ |
| 984263bc MD |
2908 | int |
| 2909 | vm_page_is_valid(vm_page_t m, int base, int size) | |
| 2910 | { | |
| 2911 | int bits = vm_page_bits(base, size); | |
| 2912 | ||
| 2913 | if (m->valid && ((m->valid & bits) == bits)) | |
| 2914 | return 1; | |
| 2915 | else | |
| 2916 | return 0; | |
| 2917 | } | |
| 2918 | ||
| 2919 | /* | |
| 2920 | * update dirty bits from pmap/mmu. May not block. | |
| 573fb415 | 2921 | * |
| b12defdc | 2922 | * Caller must hold the page busy |
| 984263bc | 2923 | */ |
| 984263bc MD |
2924 | void |
| 2925 | vm_page_test_dirty(vm_page_t m) | |
| 2926 | { | |
| 2927 | if ((m->dirty != VM_PAGE_BITS_ALL) && pmap_is_modified(m)) { | |
| 2928 | vm_page_dirty(m); | |
| 2929 | } | |
| 2930 | } | |
| 2931 | ||
| 10192bae | 2932 | /* |
| 906c754c MD |
2933 | * Register an action, associating it with its vm_page |
| 2934 | */ | |
| 2935 | void | |
| 2936 | vm_page_register_action(vm_page_action_t action, vm_page_event_t event) | |
| 2937 | { | |
| 2938 | struct vm_page_action_list *list; | |
| 2939 | int hv; | |
| 2940 | ||
| 2941 | hv = (int)((intptr_t)action->m >> 8) & VMACTION_HMASK; | |
| 2942 | list = &action_list[hv]; | |
| 2943 | ||
| 2944 | lwkt_gettoken(&vm_token); | |
| 2945 | vm_page_flag_set(action->m, PG_ACTIONLIST); | |
| 2946 | action->event = event; | |
| 2947 | LIST_INSERT_HEAD(list, action, entry); | |
| 2948 | lwkt_reltoken(&vm_token); | |
| 2949 | } | |
| 2950 | ||
| 2951 | /* | |
| 2952 | * Unregister an action, disassociating it from its related vm_page | |
| 2953 | */ | |
| 2954 | void | |
| 2955 | vm_page_unregister_action(vm_page_action_t action) | |
| 2956 | { | |
| 2957 | struct vm_page_action_list *list; | |
| 2958 | int hv; | |
| 2959 | ||
| 2960 | lwkt_gettoken(&vm_token); | |
| 2961 | if (action->event != VMEVENT_NONE) { | |
| 2962 | action->event = VMEVENT_NONE; | |
| 2963 | LIST_REMOVE(action, entry); | |
| 2964 | ||
| 2965 | hv = (int)((intptr_t)action->m >> 8) & VMACTION_HMASK; | |
| 2966 | list = &action_list[hv]; | |
| 2967 | if (LIST_EMPTY(list)) | |
| 2968 | vm_page_flag_clear(action->m, PG_ACTIONLIST); | |
| 2969 | } | |
| 2970 | lwkt_reltoken(&vm_token); | |
| 2971 | } | |
| 2972 | ||
| 2973 | /* | |
| 10192bae MD |
2974 | * Issue an event on a VM page. Corresponding action structures are |
| 2975 | * removed from the page's list and called. | |
| 906c754c MD |
2976 | * |
| 2977 | * If the vm_page has no more pending action events we clear its | |
| 2978 | * PG_ACTIONLIST flag. | |
| 10192bae MD |
2979 | */ |
| 2980 | void | |
| 2981 | vm_page_event_internal(vm_page_t m, vm_page_event_t event) | |
| 2982 | { | |
| 906c754c MD |
2983 | struct vm_page_action_list *list; |
| 2984 | struct vm_page_action *scan; | |
| 2985 | struct vm_page_action *next; | |
| 2986 | int hv; | |
| 2987 | int all; | |
| 10192bae | 2988 | |
| 906c754c MD |
2989 | hv = (int)((intptr_t)m >> 8) & VMACTION_HMASK; |
| 2990 | list = &action_list[hv]; | |
| 2991 | all = 1; | |
| 2992 | ||
| 2993 | lwkt_gettoken(&vm_token); | |
| 2994 | LIST_FOREACH_MUTABLE(scan, list, entry, next) { | |
| 2995 | if (scan->m == m) { | |
| 2996 | if (scan->event == event) { | |
| 2997 | scan->event = VMEVENT_NONE; | |
| 2998 | LIST_REMOVE(scan, entry); | |
| 2999 | scan->func(m, scan); | |
| 3000 | /* XXX */ | |
| 3001 | } else { | |
| 3002 | all = 0; | |
| 3003 | } | |
| 10192bae MD |
3004 | } |
| 3005 | } | |
| 906c754c MD |
3006 | if (all) |
| 3007 | vm_page_flag_clear(m, PG_ACTIONLIST); | |
| 3008 | lwkt_reltoken(&vm_token); | |
| 10192bae MD |
3009 | } |
| 3010 | ||
| 984263bc MD |
3011 | #include "opt_ddb.h" |
| 3012 | #ifdef DDB | |
| 3013 | #include <sys/kernel.h> | |
| 3014 | ||
| 3015 | #include <ddb/ddb.h> | |
| 3016 | ||
| 3017 | DB_SHOW_COMMAND(page, vm_page_print_page_info) | |
| 3018 | { | |
| 12e4aaff MD |
3019 | db_printf("vmstats.v_free_count: %d\n", vmstats.v_free_count); |
| 3020 | db_printf("vmstats.v_cache_count: %d\n", vmstats.v_cache_count); | |
| 3021 | db_printf("vmstats.v_inactive_count: %d\n", vmstats.v_inactive_count); | |
| 3022 | db_printf("vmstats.v_active_count: %d\n", vmstats.v_active_count); | |
| 3023 | db_printf("vmstats.v_wire_count: %d\n", vmstats.v_wire_count); | |
| 3024 | db_printf("vmstats.v_free_reserved: %d\n", vmstats.v_free_reserved); | |
| 3025 | db_printf("vmstats.v_free_min: %d\n", vmstats.v_free_min); | |
| 3026 | db_printf("vmstats.v_free_target: %d\n", vmstats.v_free_target); | |
| 3027 | db_printf("vmstats.v_cache_min: %d\n", vmstats.v_cache_min); | |
| 3028 | db_printf("vmstats.v_inactive_target: %d\n", vmstats.v_inactive_target); | |
| 984263bc MD |
3029 | } |
| 3030 | ||
| 3031 | DB_SHOW_COMMAND(pageq, vm_page_print_pageq_info) | |
| 3032 | { | |
| 3033 | int i; | |
| 3034 | db_printf("PQ_FREE:"); | |
| 3035 | for(i=0;i<PQ_L2_SIZE;i++) { | |
| 3036 | db_printf(" %d", vm_page_queues[PQ_FREE + i].lcnt); | |
| 3037 | } | |
| 3038 | db_printf("\n"); | |
| 3039 | ||
| 3040 | db_printf("PQ_CACHE:"); | |
| 3041 | for(i=0;i<PQ_L2_SIZE;i++) { | |
| 3042 | db_printf(" %d", vm_page_queues[PQ_CACHE + i].lcnt); | |
| 3043 | } | |
| 3044 | db_printf("\n"); | |
| 3045 | ||
| 027193eb MD |
3046 | db_printf("PQ_ACTIVE:"); |
| 3047 | for(i=0;i<PQ_L2_SIZE;i++) { | |
| 3048 | db_printf(" %d", vm_page_queues[PQ_ACTIVE + i].lcnt); | |
| 3049 | } | |
| 3050 | db_printf("\n"); | |
| 3051 | ||
| 3052 | db_printf("PQ_INACTIVE:"); | |
| 3053 | for(i=0;i<PQ_L2_SIZE;i++) { | |
| 3054 | db_printf(" %d", vm_page_queues[PQ_INACTIVE + i].lcnt); | |
| 3055 | } | |
| 3056 | db_printf("\n"); | |
| 984263bc MD |
3057 | } |
| 3058 | #endif /* DDB */ |