| Commit | Line | Data |
|---|---|---|
| 984263bc MD |
1 | /* |
| 2 | * Copyright (c) 1991 Regents of the University of California. | |
| 3 | * All rights reserved. | |
| 4 | * | |
| 5 | * This code is derived from software contributed to Berkeley by | |
| 6 | * The Mach Operating System project at Carnegie-Mellon University. | |
| 7 | * | |
| 8 | * Redistribution and use in source and binary forms, with or without | |
| 9 | * modification, are permitted provided that the following conditions | |
| 10 | * are met: | |
| 11 | * 1. Redistributions of source code must retain the above copyright | |
| 12 | * notice, this list of conditions and the following disclaimer. | |
| 13 | * 2. Redistributions in binary form must reproduce the above copyright | |
| 14 | * notice, this list of conditions and the following disclaimer in the | |
| 15 | * documentation and/or other materials provided with the distribution. | |
| 16 | * 3. All advertising materials mentioning features or use of this software | |
| 17 | * must display the following acknowledgement: | |
| 18 | * This product includes software developed by the University of | |
| 19 | * California, Berkeley and its contributors. | |
| 20 | * 4. Neither the name of the University nor the names of its contributors | |
| 21 | * may be used to endorse or promote products derived from this software | |
| 22 | * without specific prior written permission. | |
| 23 | * | |
| 24 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
| 25 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
| 26 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
| 27 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
| 28 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
| 29 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
| 30 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
| 31 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
| 32 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
| 33 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
| 34 | * SUCH DAMAGE. | |
| 35 | * | |
| 36 | * from: @(#)vm_page.c 7.4 (Berkeley) 5/7/91 | |
| 37 | * $FreeBSD: src/sys/vm/vm_page.c,v 1.147.2.18 2002/03/10 05:03:19 alc Exp $ | |
| cfd17028 | 38 | * $DragonFly: src/sys/vm/vm_page.c,v 1.40 2008/08/25 17:01:42 dillon Exp $ |
| 984263bc MD |
39 | */ |
| 40 | ||
| 41 | /* | |
| 42 | * Copyright (c) 1987, 1990 Carnegie-Mellon University. | |
| 43 | * All rights reserved. | |
| 44 | * | |
| 45 | * Authors: Avadis Tevanian, Jr., Michael Wayne Young | |
| 46 | * | |
| 47 | * Permission to use, copy, modify and distribute this software and | |
| 48 | * its documentation is hereby granted, provided that both the copyright | |
| 49 | * notice and this permission notice appear in all copies of the | |
| 50 | * software, derivative works or modified versions, and any portions | |
| 51 | * thereof, and that both notices appear in supporting documentation. | |
| 52 | * | |
| 53 | * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" | |
| 54 | * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND | |
| 55 | * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. | |
| 56 | * | |
| 57 | * Carnegie Mellon requests users of this software to return to | |
| 58 | * | |
| 59 | * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU | |
| 60 | * School of Computer Science | |
| 61 | * Carnegie Mellon University | |
| 62 | * Pittsburgh PA 15213-3890 | |
| 63 | * | |
| 64 | * any improvements or extensions that they make and grant Carnegie the | |
| 65 | * rights to redistribute these changes. | |
| 66 | */ | |
| 984263bc | 67 | /* |
| de71fd3f MD |
68 | * Resident memory management module. The module manipulates 'VM pages'. |
| 69 | * A VM page is the core building block for memory management. | |
| 984263bc MD |
70 | */ |
| 71 | ||
| 72 | #include <sys/param.h> | |
| 73 | #include <sys/systm.h> | |
| 74 | #include <sys/malloc.h> | |
| 75 | #include <sys/proc.h> | |
| 76 | #include <sys/vmmeter.h> | |
| 77 | #include <sys/vnode.h> | |
| 78 | ||
| 79 | #include <vm/vm.h> | |
| 80 | #include <vm/vm_param.h> | |
| 81 | #include <sys/lock.h> | |
| 82 | #include <vm/vm_kern.h> | |
| 83 | #include <vm/pmap.h> | |
| 84 | #include <vm/vm_map.h> | |
| 85 | #include <vm/vm_object.h> | |
| 86 | #include <vm/vm_page.h> | |
| 87 | #include <vm/vm_pageout.h> | |
| 88 | #include <vm/vm_pager.h> | |
| 89 | #include <vm/vm_extern.h> | |
| 12e4aaff | 90 | #include <vm/vm_page2.h> |
| 984263bc | 91 | |
| de71fd3f MD |
92 | static void vm_page_queue_init(void); |
| 93 | static void vm_page_free_wakeup(void); | |
| 94 | static vm_page_t vm_page_select_cache(vm_object_t, vm_pindex_t); | |
| 74232d8e | 95 | static vm_page_t _vm_page_list_find2(int basequeue, int index); |
| 984263bc | 96 | |
| de71fd3f | 97 | struct vpgqueues vm_page_queues[PQ_COUNT]; /* Array of tailq lists */ |
| 984263bc | 98 | |
| 654a39f0 MD |
99 | #define ASSERT_IN_CRIT_SECTION() KKASSERT(crit_test(curthread)); |
| 100 | ||
| 1f804340 MD |
101 | RB_GENERATE2(vm_page_rb_tree, vm_page, rb_entry, rb_vm_page_compare, |
| 102 | vm_pindex_t, pindex); | |
| 103 | ||
| 984263bc | 104 | static void |
| de71fd3f MD |
105 | vm_page_queue_init(void) |
| 106 | { | |
| 984263bc MD |
107 | int i; |
| 108 | ||
| de71fd3f | 109 | for (i = 0; i < PQ_L2_SIZE; i++) |
| 12e4aaff | 110 | vm_page_queues[PQ_FREE+i].cnt = &vmstats.v_free_count; |
| de71fd3f MD |
111 | for (i = 0; i < PQ_L2_SIZE; i++) |
| 112 | vm_page_queues[PQ_CACHE+i].cnt = &vmstats.v_cache_count; | |
| 984263bc | 113 | |
| de71fd3f | 114 | vm_page_queues[PQ_INACTIVE].cnt = &vmstats.v_inactive_count; |
| 12e4aaff MD |
115 | vm_page_queues[PQ_ACTIVE].cnt = &vmstats.v_active_count; |
| 116 | vm_page_queues[PQ_HOLD].cnt = &vmstats.v_active_count; | |
| de71fd3f MD |
117 | /* PQ_NONE has no queue */ |
| 118 | ||
| 119 | for (i = 0; i < PQ_COUNT; i++) | |
| 984263bc | 120 | TAILQ_INIT(&vm_page_queues[i].pl); |
| 984263bc MD |
121 | } |
| 122 | ||
| de71fd3f MD |
123 | /* |
| 124 | * note: place in initialized data section? Is this necessary? | |
| 125 | */ | |
| 984263bc | 126 | long first_page = 0; |
| de71fd3f | 127 | int vm_page_array_size = 0; |
| 984263bc | 128 | int vm_page_zero_count = 0; |
| de71fd3f | 129 | vm_page_t vm_page_array = 0; |
| 984263bc MD |
130 | |
| 131 | /* | |
| de71fd3f | 132 | * (low level boot) |
| 984263bc | 133 | * |
| de71fd3f MD |
134 | * Sets the page size, perhaps based upon the memory size. |
| 135 | * Must be called before any use of page-size dependent functions. | |
| 984263bc MD |
136 | */ |
| 137 | void | |
| 138 | vm_set_page_size(void) | |
| 139 | { | |
| 12e4aaff MD |
140 | if (vmstats.v_page_size == 0) |
| 141 | vmstats.v_page_size = PAGE_SIZE; | |
| 142 | if (((vmstats.v_page_size - 1) & vmstats.v_page_size) != 0) | |
| 984263bc MD |
143 | panic("vm_set_page_size: page size not a power of two"); |
| 144 | } | |
| 145 | ||
| 146 | /* | |
| de71fd3f | 147 | * (low level boot) |
| 984263bc | 148 | * |
| de71fd3f MD |
149 | * Add a new page to the freelist for use by the system. New pages |
| 150 | * are added to both the head and tail of the associated free page | |
| 151 | * queue in a bottom-up fashion, so both zero'd and non-zero'd page | |
| 152 | * requests pull 'recent' adds (higher physical addresses) first. | |
| 161399b3 | 153 | * |
| 654a39f0 | 154 | * Must be called in a critical section. |
| 984263bc MD |
155 | */ |
| 156 | vm_page_t | |
| 6ef943a3 | 157 | vm_add_new_page(vm_paddr_t pa) |
| 984263bc | 158 | { |
| 161399b3 | 159 | struct vpgqueues *vpq; |
| de71fd3f | 160 | vm_page_t m; |
| 984263bc | 161 | |
| 12e4aaff MD |
162 | ++vmstats.v_page_count; |
| 163 | ++vmstats.v_free_count; | |
| 984263bc MD |
164 | m = PHYS_TO_VM_PAGE(pa); |
| 165 | m->phys_addr = pa; | |
| 166 | m->flags = 0; | |
| 167 | m->pc = (pa >> PAGE_SHIFT) & PQ_L2_MASK; | |
| 168 | m->queue = m->pc + PQ_FREE; | |
| 26bcc0c0 | 169 | KKASSERT(m->dirty == 0); |
| de71fd3f | 170 | |
| 161399b3 MD |
171 | vpq = &vm_page_queues[m->queue]; |
| 172 | if (vpq->flipflop) | |
| 173 | TAILQ_INSERT_TAIL(&vpq->pl, m, pageq); | |
| 174 | else | |
| 175 | TAILQ_INSERT_HEAD(&vpq->pl, m, pageq); | |
| 176 | vpq->flipflop = 1 - vpq->flipflop; | |
| de71fd3f | 177 | |
| 984263bc MD |
178 | vm_page_queues[m->queue].lcnt++; |
| 179 | return (m); | |
| 180 | } | |
| 181 | ||
| 182 | /* | |
| de71fd3f | 183 | * (low level boot) |
| 984263bc | 184 | * |
| de71fd3f | 185 | * Initializes the resident memory module. |
| 984263bc | 186 | * |
| de71fd3f MD |
187 | * Allocates memory for the page cells, and for the object/offset-to-page |
| 188 | * hash table headers. Each page cell is initialized and placed on the | |
| 189 | * free list. | |
| 26bcc0c0 MD |
190 | * |
| 191 | * starta/enda represents the range of physical memory addresses available | |
| 192 | * for use (skipping memory already used by the kernel), subject to | |
| 193 | * phys_avail[]. Note that phys_avail[] has already mapped out memory | |
| 194 | * already in use by the kernel. | |
| 984263bc | 195 | */ |
| 984263bc | 196 | vm_offset_t |
| 26bcc0c0 | 197 | vm_page_startup(vm_offset_t vaddr) |
| 984263bc MD |
198 | { |
| 199 | vm_offset_t mapped; | |
| 6ef943a3 MD |
200 | vm_size_t npages; |
| 201 | vm_paddr_t page_range; | |
| 202 | vm_paddr_t new_end; | |
| 984263bc | 203 | int i; |
| 6ef943a3 | 204 | vm_paddr_t pa; |
| 984263bc | 205 | int nblocks; |
| 6ef943a3 | 206 | vm_paddr_t last_pa; |
| 6ef943a3 MD |
207 | vm_paddr_t end; |
| 208 | vm_paddr_t biggestone, biggestsize; | |
| 6ef943a3 | 209 | vm_paddr_t total; |
| 984263bc MD |
210 | |
| 211 | total = 0; | |
| 212 | biggestsize = 0; | |
| 213 | biggestone = 0; | |
| 214 | nblocks = 0; | |
| 215 | vaddr = round_page(vaddr); | |
| 216 | ||
| 217 | for (i = 0; phys_avail[i + 1]; i += 2) { | |
| 218 | phys_avail[i] = round_page(phys_avail[i]); | |
| 219 | phys_avail[i + 1] = trunc_page(phys_avail[i + 1]); | |
| 220 | } | |
| 221 | ||
| 222 | for (i = 0; phys_avail[i + 1]; i += 2) { | |
| 6ef943a3 | 223 | vm_paddr_t size = phys_avail[i + 1] - phys_avail[i]; |
| 984263bc MD |
224 | |
| 225 | if (size > biggestsize) { | |
| 226 | biggestone = i; | |
| 227 | biggestsize = size; | |
| 228 | } | |
| 229 | ++nblocks; | |
| 230 | total += size; | |
| 231 | } | |
| 232 | ||
| 233 | end = phys_avail[biggestone+1]; | |
| 1f804340 | 234 | end = trunc_page(end); |
| 984263bc MD |
235 | |
| 236 | /* | |
| 237 | * Initialize the queue headers for the free queue, the active queue | |
| 238 | * and the inactive queue. | |
| 239 | */ | |
| 240 | ||
| 241 | vm_page_queue_init(); | |
| 242 | ||
| 243 | /* | |
| 984263bc MD |
244 | * Compute the number of pages of memory that will be available for |
| 245 | * use (taking into account the overhead of a page structure per | |
| 246 | * page). | |
| 247 | */ | |
| 984263bc | 248 | first_page = phys_avail[0] / PAGE_SIZE; |
| 984263bc | 249 | page_range = phys_avail[(nblocks - 1) * 2 + 1] / PAGE_SIZE - first_page; |
| 1f804340 | 250 | npages = (total - (page_range * sizeof(struct vm_page))) / PAGE_SIZE; |
| de71fd3f | 251 | |
| 984263bc MD |
252 | /* |
| 253 | * Initialize the mem entry structures now, and put them in the free | |
| 254 | * queue. | |
| 255 | */ | |
| 256 | vm_page_array = (vm_page_t) vaddr; | |
| 257 | mapped = vaddr; | |
| 258 | ||
| 259 | /* | |
| 260 | * Validate these addresses. | |
| 261 | */ | |
| 984263bc MD |
262 | new_end = trunc_page(end - page_range * sizeof(struct vm_page)); |
| 263 | mapped = pmap_map(mapped, new_end, end, | |
| 264 | VM_PROT_READ | VM_PROT_WRITE); | |
| 265 | ||
| 266 | /* | |
| 267 | * Clear all of the page structures | |
| 268 | */ | |
| 269 | bzero((caddr_t) vm_page_array, page_range * sizeof(struct vm_page)); | |
| 270 | vm_page_array_size = page_range; | |
| 271 | ||
| 272 | /* | |
| 161399b3 | 273 | * Construct the free queue(s) in ascending order (by physical |
| 984263bc MD |
274 | * address) so that the first 16MB of physical memory is allocated |
| 275 | * last rather than first. On large-memory machines, this avoids | |
| 276 | * the exhaustion of low physical memory before isa_dmainit has run. | |
| 277 | */ | |
| 12e4aaff MD |
278 | vmstats.v_page_count = 0; |
| 279 | vmstats.v_free_count = 0; | |
| 984263bc MD |
280 | for (i = 0; phys_avail[i + 1] && npages > 0; i += 2) { |
| 281 | pa = phys_avail[i]; | |
| 282 | if (i == biggestone) | |
| 283 | last_pa = new_end; | |
| 284 | else | |
| 285 | last_pa = phys_avail[i + 1]; | |
| 286 | while (pa < last_pa && npages-- > 0) { | |
| 287 | vm_add_new_page(pa); | |
| 288 | pa += PAGE_SIZE; | |
| 289 | } | |
| 290 | } | |
| 291 | return (mapped); | |
| 292 | } | |
| 293 | ||
| 294 | /* | |
| 1f804340 MD |
295 | * Scan comparison function for Red-Black tree scans. An inclusive |
| 296 | * (start,end) is expected. Other fields are not used. | |
| 984263bc | 297 | */ |
| 1f804340 MD |
298 | int |
| 299 | rb_vm_page_scancmp(struct vm_page *p, void *data) | |
| 984263bc | 300 | { |
| 1f804340 | 301 | struct rb_vm_page_scan_info *info = data; |
| 984263bc | 302 | |
| 1f804340 MD |
303 | if (p->pindex < info->start_pindex) |
| 304 | return(-1); | |
| 305 | if (p->pindex > info->end_pindex) | |
| 306 | return(1); | |
| 307 | return(0); | |
| 308 | } | |
| 309 | ||
| 310 | int | |
| 311 | rb_vm_page_compare(struct vm_page *p1, struct vm_page *p2) | |
| 312 | { | |
| 313 | if (p1->pindex < p2->pindex) | |
| 314 | return(-1); | |
| 315 | if (p1->pindex > p2->pindex) | |
| 316 | return(1); | |
| 317 | return(0); | |
| 984263bc MD |
318 | } |
| 319 | ||
| de71fd3f MD |
320 | /* |
| 321 | * The opposite of vm_page_hold(). A page can be freed while being held, | |
| 322 | * which places it on the PQ_HOLD queue. We must call vm_page_free_toq() | |
| 323 | * in this case to actually free it once the hold count drops to 0. | |
| 324 | * | |
| 325 | * This routine must be called at splvm(). | |
| 326 | */ | |
| 984263bc MD |
327 | void |
| 328 | vm_page_unhold(vm_page_t mem) | |
| 329 | { | |
| 330 | --mem->hold_count; | |
| 331 | KASSERT(mem->hold_count >= 0, ("vm_page_unhold: hold count < 0!!!")); | |
| 97edb3b6 MD |
332 | if (mem->hold_count == 0 && mem->queue == PQ_HOLD) { |
| 333 | vm_page_busy(mem); | |
| 984263bc | 334 | vm_page_free_toq(mem); |
| 97edb3b6 | 335 | } |
| 984263bc MD |
336 | } |
| 337 | ||
| 338 | /* | |
| de71fd3f | 339 | * Inserts the given mem entry into the object and object list. |
| 984263bc | 340 | * |
| de71fd3f MD |
341 | * The pagetables are not updated but will presumably fault the page |
| 342 | * in if necessary, or if a kernel page the caller will at some point | |
| 343 | * enter the page into the kernel's pmap. We are not allowed to block | |
| 344 | * here so we *can't* do this anyway. | |
| 984263bc | 345 | * |
| de71fd3f | 346 | * This routine may not block. |
| 654a39f0 | 347 | * This routine must be called with a critical section held. |
| 984263bc | 348 | */ |
| 984263bc MD |
349 | void |
| 350 | vm_page_insert(vm_page_t m, vm_object_t object, vm_pindex_t pindex) | |
| 351 | { | |
| 654a39f0 | 352 | ASSERT_IN_CRIT_SECTION(); |
| 984263bc MD |
353 | if (m->object != NULL) |
| 354 | panic("vm_page_insert: already inserted"); | |
| 355 | ||
| 356 | /* | |
| 357 | * Record the object/offset pair in this page | |
| 358 | */ | |
| 984263bc MD |
359 | m->object = object; |
| 360 | m->pindex = pindex; | |
| 361 | ||
| 362 | /* | |
| 1f804340 | 363 | * Insert it into the object. |
| 984263bc | 364 | */ |
| 1f804340 | 365 | vm_page_rb_tree_RB_INSERT(&object->rb_memq, m); |
| 984263bc MD |
366 | object->generation++; |
| 367 | ||
| 368 | /* | |
| 369 | * show that the object has one more resident page. | |
| 370 | */ | |
| 984263bc MD |
371 | object->resident_page_count++; |
| 372 | ||
| 373 | /* | |
| 374 | * Since we are inserting a new and possibly dirty page, | |
| 375 | * update the object's OBJ_WRITEABLE and OBJ_MIGHTBEDIRTY flags. | |
| 376 | */ | |
| 17cde63e | 377 | if ((m->valid & m->dirty) || (m->flags & PG_WRITEABLE)) |
| 984263bc MD |
378 | vm_object_set_writeable_dirty(object); |
| 379 | } | |
| 380 | ||
| 381 | /* | |
| 9765affa MD |
382 | * Removes the given vm_page_t from the global (object,index) hash table |
| 383 | * and from the object's memq. | |
| 984263bc | 384 | * |
| de71fd3f MD |
385 | * The underlying pmap entry (if any) is NOT removed here. |
| 386 | * This routine may not block. | |
| 9765affa MD |
387 | * |
| 388 | * The page must be BUSY and will remain BUSY on return. No spl needs to be | |
| 389 | * held on call to this routine. | |
| 390 | * | |
| 391 | * note: FreeBSD side effect was to unbusy the page on return. We leave | |
| 392 | * it busy. | |
| 984263bc | 393 | */ |
| 984263bc MD |
394 | void |
| 395 | vm_page_remove(vm_page_t m) | |
| 396 | { | |
| 397 | vm_object_t object; | |
| 398 | ||
| 654a39f0 MD |
399 | crit_enter(); |
| 400 | if (m->object == NULL) { | |
| 401 | crit_exit(); | |
| 984263bc | 402 | return; |
| 654a39f0 | 403 | } |
| 984263bc | 404 | |
| de71fd3f | 405 | if ((m->flags & PG_BUSY) == 0) |
| 984263bc | 406 | panic("vm_page_remove: page not busy"); |
| 984263bc | 407 | |
| 984263bc MD |
408 | object = m->object; |
| 409 | ||
| 410 | /* | |
| 1f804340 | 411 | * Remove the page from the object and update the object. |
| 984263bc | 412 | */ |
| 1f804340 | 413 | vm_page_rb_tree_RB_REMOVE(&object->rb_memq, m); |
| 984263bc MD |
414 | object->resident_page_count--; |
| 415 | object->generation++; | |
| 984263bc | 416 | m->object = NULL; |
| 1f804340 | 417 | |
| 9765affa | 418 | crit_exit(); |
| 984263bc MD |
419 | } |
| 420 | ||
| 421 | /* | |
| de71fd3f MD |
422 | * Locate and return the page at (object, pindex), or NULL if the |
| 423 | * page could not be found. | |
| 424 | * | |
| 425 | * This routine will operate properly without spl protection, but | |
| 426 | * the returned page could be in flux if it is busy. Because an | |
| 427 | * interrupt can race a caller's busy check (unbusying and freeing the | |
| 428 | * page we return before the caller is able to check the busy bit), | |
| 654a39f0 MD |
429 | * the caller should generally call this routine with a critical |
| 430 | * section held. | |
| de71fd3f MD |
431 | * |
| 432 | * Callers may call this routine without spl protection if they know | |
| 433 | * 'for sure' that the page will not be ripped out from under them | |
| 434 | * by an interrupt. | |
| 984263bc | 435 | */ |
| 984263bc MD |
436 | vm_page_t |
| 437 | vm_page_lookup(vm_object_t object, vm_pindex_t pindex) | |
| 438 | { | |
| 439 | vm_page_t m; | |
| 984263bc MD |
440 | |
| 441 | /* | |
| 442 | * Search the hash table for this object/offset pair | |
| 443 | */ | |
| 1f804340 MD |
444 | crit_enter(); |
| 445 | m = vm_page_rb_tree_RB_LOOKUP(&object->rb_memq, pindex); | |
| 446 | crit_exit(); | |
| 447 | KKASSERT(m == NULL || (m->object == object && m->pindex == pindex)); | |
| 448 | return(m); | |
| 984263bc MD |
449 | } |
| 450 | ||
| 451 | /* | |
| de71fd3f | 452 | * vm_page_rename() |
| 984263bc | 453 | * |
| de71fd3f MD |
454 | * Move the given memory entry from its current object to the specified |
| 455 | * target object/offset. | |
| 984263bc | 456 | * |
| de71fd3f MD |
457 | * The object must be locked. |
| 458 | * This routine may not block. | |
| 984263bc | 459 | * |
| de71fd3f | 460 | * Note: This routine will raise itself to splvm(), the caller need not. |
| 984263bc | 461 | * |
| de71fd3f MD |
462 | * Note: Swap associated with the page must be invalidated by the move. We |
| 463 | * have to do this for several reasons: (1) we aren't freeing the | |
| 464 | * page, (2) we are dirtying the page, (3) the VM system is probably | |
| 465 | * moving the page from object A to B, and will then later move | |
| 466 | * the backing store from A to B and we can't have a conflict. | |
| 984263bc | 467 | * |
| de71fd3f MD |
468 | * Note: We *always* dirty the page. It is necessary both for the |
| 469 | * fact that we moved it, and because we may be invalidating | |
| 470 | * swap. If the page is on the cache, we have to deactivate it | |
| 471 | * or vm_page_dirty() will panic. Dirty pages are not allowed | |
| 472 | * on the cache. | |
| 984263bc | 473 | */ |
| 984263bc MD |
474 | void |
| 475 | vm_page_rename(vm_page_t m, vm_object_t new_object, vm_pindex_t new_pindex) | |
| 476 | { | |
| 9765affa | 477 | crit_enter(); |
| 984263bc MD |
478 | vm_page_remove(m); |
| 479 | vm_page_insert(m, new_object, new_pindex); | |
| 480 | if (m->queue - m->pc == PQ_CACHE) | |
| 481 | vm_page_deactivate(m); | |
| 482 | vm_page_dirty(m); | |
| 9765affa MD |
483 | vm_page_wakeup(m); |
| 484 | crit_exit(); | |
| 984263bc MD |
485 | } |
| 486 | ||
| 487 | /* | |
| de71fd3f MD |
488 | * vm_page_unqueue() without any wakeup. This routine is used when a page |
| 489 | * is being moved between queues or otherwise is to remain BUSYied by the | |
| 490 | * caller. | |
| 984263bc | 491 | * |
| de71fd3f MD |
492 | * This routine must be called at splhigh(). |
| 493 | * This routine may not block. | |
| 984263bc | 494 | */ |
| 984263bc MD |
495 | void |
| 496 | vm_page_unqueue_nowakeup(vm_page_t m) | |
| 497 | { | |
| 498 | int queue = m->queue; | |
| 499 | struct vpgqueues *pq; | |
| de71fd3f | 500 | |
| 984263bc MD |
501 | if (queue != PQ_NONE) { |
| 502 | pq = &vm_page_queues[queue]; | |
| 503 | m->queue = PQ_NONE; | |
| 504 | TAILQ_REMOVE(&pq->pl, m, pageq); | |
| 505 | (*pq->cnt)--; | |
| 506 | pq->lcnt--; | |
| 507 | } | |
| 508 | } | |
| 509 | ||
| 510 | /* | |
| de71fd3f MD |
511 | * vm_page_unqueue() - Remove a page from its queue, wakeup the pagedemon |
| 512 | * if necessary. | |
| 984263bc | 513 | * |
| de71fd3f MD |
514 | * This routine must be called at splhigh(). |
| 515 | * This routine may not block. | |
| 984263bc | 516 | */ |
| 984263bc MD |
517 | void |
| 518 | vm_page_unqueue(vm_page_t m) | |
| 519 | { | |
| 520 | int queue = m->queue; | |
| 521 | struct vpgqueues *pq; | |
| de71fd3f | 522 | |
| 984263bc MD |
523 | if (queue != PQ_NONE) { |
| 524 | m->queue = PQ_NONE; | |
| 525 | pq = &vm_page_queues[queue]; | |
| 526 | TAILQ_REMOVE(&pq->pl, m, pageq); | |
| 527 | (*pq->cnt)--; | |
| 528 | pq->lcnt--; | |
| 20479584 MD |
529 | if ((queue - m->pc) == PQ_CACHE || (queue - m->pc) == PQ_FREE) |
| 530 | pagedaemon_wakeup(); | |
| 984263bc MD |
531 | } |
| 532 | } | |
| 533 | ||
| 984263bc | 534 | /* |
| de71fd3f | 535 | * vm_page_list_find() |
| 984263bc | 536 | * |
| de71fd3f | 537 | * Find a page on the specified queue with color optimization. |
| 984263bc | 538 | * |
| de71fd3f MD |
539 | * The page coloring optimization attempts to locate a page that does |
| 540 | * not overload other nearby pages in the object in the cpu's L1 or L2 | |
| 541 | * caches. We need this optimization because cpu caches tend to be | |
| 542 | * physical caches, while object spaces tend to be virtual. | |
| 984263bc | 543 | * |
| de71fd3f MD |
544 | * This routine must be called at splvm(). |
| 545 | * This routine may not block. | |
| 984263bc | 546 | * |
| de71fd3f MD |
547 | * Note that this routine is carefully inlined. A non-inlined version |
| 548 | * is available for outside callers but the only critical path is | |
| 549 | * from within this source file. | |
| 984263bc | 550 | */ |
| 74232d8e | 551 | static __inline |
| 984263bc | 552 | vm_page_t |
| 74232d8e MD |
553 | _vm_page_list_find(int basequeue, int index, boolean_t prefer_zero) |
| 554 | { | |
| 555 | vm_page_t m; | |
| 556 | ||
| 557 | if (prefer_zero) | |
| 558 | m = TAILQ_LAST(&vm_page_queues[basequeue+index].pl, pglist); | |
| 559 | else | |
| 560 | m = TAILQ_FIRST(&vm_page_queues[basequeue+index].pl); | |
| 561 | if (m == NULL) | |
| 562 | m = _vm_page_list_find2(basequeue, index); | |
| 563 | return(m); | |
| 564 | } | |
| 565 | ||
| 566 | static vm_page_t | |
| 567 | _vm_page_list_find2(int basequeue, int index) | |
| 984263bc MD |
568 | { |
| 569 | int i; | |
| 570 | vm_page_t m = NULL; | |
| 571 | struct vpgqueues *pq; | |
| 572 | ||
| 573 | pq = &vm_page_queues[basequeue]; | |
| 574 | ||
| 575 | /* | |
| 576 | * Note that for the first loop, index+i and index-i wind up at the | |
| 577 | * same place. Even though this is not totally optimal, we've already | |
| 578 | * blown it by missing the cache case so we do not care. | |
| 579 | */ | |
| 580 | ||
| 581 | for(i = PQ_L2_SIZE / 2; i > 0; --i) { | |
| 582 | if ((m = TAILQ_FIRST(&pq[(index + i) & PQ_L2_MASK].pl)) != NULL) | |
| 583 | break; | |
| 584 | ||
| 585 | if ((m = TAILQ_FIRST(&pq[(index - i) & PQ_L2_MASK].pl)) != NULL) | |
| 586 | break; | |
| 587 | } | |
| 588 | return(m); | |
| 589 | } | |
| 590 | ||
| 74232d8e MD |
591 | vm_page_t |
| 592 | vm_page_list_find(int basequeue, int index, boolean_t prefer_zero) | |
| 593 | { | |
| 594 | return(_vm_page_list_find(basequeue, index, prefer_zero)); | |
| 595 | } | |
| 596 | ||
| 984263bc | 597 | /* |
| de71fd3f MD |
598 | * Find a page on the cache queue with color optimization. As pages |
| 599 | * might be found, but not applicable, they are deactivated. This | |
| 600 | * keeps us from using potentially busy cached pages. | |
| 984263bc | 601 | * |
| 654a39f0 | 602 | * This routine must be called with a critical section held. |
| de71fd3f | 603 | * This routine may not block. |
| 984263bc MD |
604 | */ |
| 605 | vm_page_t | |
| 606 | vm_page_select_cache(vm_object_t object, vm_pindex_t pindex) | |
| 607 | { | |
| 608 | vm_page_t m; | |
| 609 | ||
| 610 | while (TRUE) { | |
| 659c6a07 | 611 | m = _vm_page_list_find( |
| 984263bc MD |
612 | PQ_CACHE, |
| 613 | (pindex + object->pg_color) & PQ_L2_MASK, | |
| 614 | FALSE | |
| 615 | ); | |
| 616 | if (m && ((m->flags & (PG_BUSY|PG_UNMANAGED)) || m->busy || | |
| 617 | m->hold_count || m->wire_count)) { | |
| 618 | vm_page_deactivate(m); | |
| 619 | continue; | |
| 620 | } | |
| 621 | return m; | |
| 622 | } | |
| de71fd3f | 623 | /* not reached */ |
| 984263bc MD |
624 | } |
| 625 | ||
| 626 | /* | |
| de71fd3f MD |
627 | * Find a free or zero page, with specified preference. We attempt to |
| 628 | * inline the nominal case and fall back to _vm_page_select_free() | |
| 629 | * otherwise. | |
| 984263bc | 630 | * |
| 654a39f0 | 631 | * This routine must be called with a critical section held. |
| de71fd3f | 632 | * This routine may not block. |
| 984263bc | 633 | */ |
| 984263bc MD |
634 | static __inline vm_page_t |
| 635 | vm_page_select_free(vm_object_t object, vm_pindex_t pindex, boolean_t prefer_zero) | |
| 636 | { | |
| 637 | vm_page_t m; | |
| 638 | ||
| 659c6a07 | 639 | m = _vm_page_list_find( |
| 984263bc MD |
640 | PQ_FREE, |
| 641 | (pindex + object->pg_color) & PQ_L2_MASK, | |
| 642 | prefer_zero | |
| 643 | ); | |
| 644 | return(m); | |
| 645 | } | |
| 646 | ||
| 647 | /* | |
| de71fd3f | 648 | * vm_page_alloc() |
| 984263bc | 649 | * |
| de71fd3f MD |
650 | * Allocate and return a memory cell associated with this VM object/offset |
| 651 | * pair. | |
| 984263bc MD |
652 | * |
| 653 | * page_req classes: | |
| de71fd3f | 654 | * |
| dc1fd4b3 MD |
655 | * VM_ALLOC_NORMAL allow use of cache pages, nominal free drain |
| 656 | * VM_ALLOC_SYSTEM greater free drain | |
| 657 | * VM_ALLOC_INTERRUPT allow free list to be completely drained | |
| 658 | * VM_ALLOC_ZERO advisory request for pre-zero'd page | |
| 984263bc | 659 | * |
| de71fd3f MD |
660 | * The object must be locked. |
| 661 | * This routine may not block. | |
| 9765affa | 662 | * The returned page will be marked PG_BUSY |
| 984263bc | 663 | * |
| de71fd3f MD |
664 | * Additional special handling is required when called from an interrupt |
| 665 | * (VM_ALLOC_INTERRUPT). We are not allowed to mess with the page cache | |
| 666 | * in this case. | |
| 984263bc | 667 | */ |
| 984263bc MD |
668 | vm_page_t |
| 669 | vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int page_req) | |
| 670 | { | |
| 671 | vm_page_t m = NULL; | |
| 984263bc | 672 | |
| cfd17028 | 673 | KKASSERT(object != NULL); |
| 984263bc MD |
674 | KASSERT(!vm_page_lookup(object, pindex), |
| 675 | ("vm_page_alloc: page already allocated")); | |
| dc1fd4b3 MD |
676 | KKASSERT(page_req & |
| 677 | (VM_ALLOC_NORMAL|VM_ALLOC_INTERRUPT|VM_ALLOC_SYSTEM)); | |
| 984263bc MD |
678 | |
| 679 | /* | |
| 4ecf7cc9 MD |
680 | * Certain system threads (pageout daemon, buf_daemon's) are |
| 681 | * allowed to eat deeper into the free page list. | |
| 984263bc | 682 | */ |
| 4ecf7cc9 | 683 | if (curthread->td_flags & TDF_SYSTHREAD) |
| dc1fd4b3 | 684 | page_req |= VM_ALLOC_SYSTEM; |
| 984263bc | 685 | |
| 9765affa | 686 | crit_enter(); |
| 984263bc | 687 | loop: |
| dc1fd4b3 MD |
688 | if (vmstats.v_free_count > vmstats.v_free_reserved || |
| 689 | ((page_req & VM_ALLOC_INTERRUPT) && vmstats.v_free_count > 0) || | |
| 690 | ((page_req & VM_ALLOC_SYSTEM) && vmstats.v_cache_count == 0 && | |
| 691 | vmstats.v_free_count > vmstats.v_interrupt_free_min) | |
| 692 | ) { | |
| 984263bc | 693 | /* |
| dc1fd4b3 | 694 | * The free queue has sufficient free pages to take one out. |
| 984263bc | 695 | */ |
| dc1fd4b3 | 696 | if (page_req & VM_ALLOC_ZERO) |
| 984263bc MD |
697 | m = vm_page_select_free(object, pindex, TRUE); |
| 698 | else | |
| 699 | m = vm_page_select_free(object, pindex, FALSE); | |
| dc1fd4b3 | 700 | } else if (page_req & VM_ALLOC_NORMAL) { |
| 984263bc | 701 | /* |
| dc1fd4b3 MD |
702 | * Allocatable from the cache (non-interrupt only). On |
| 703 | * success, we must free the page and try again, thus | |
| 704 | * ensuring that vmstats.v_*_free_min counters are replenished. | |
| 984263bc | 705 | */ |
| dc1fd4b3 MD |
706 | #ifdef INVARIANTS |
| 707 | if (curthread->td_preempted) { | |
| 086c1d7e | 708 | kprintf("vm_page_alloc(): warning, attempt to allocate" |
| dc1fd4b3 MD |
709 | " cache page from preempting interrupt\n"); |
| 710 | m = NULL; | |
| 711 | } else { | |
| 712 | m = vm_page_select_cache(object, pindex); | |
| 713 | } | |
| 714 | #else | |
| 715 | m = vm_page_select_cache(object, pindex); | |
| 716 | #endif | |
| 984263bc | 717 | /* |
| 9765affa | 718 | * On success move the page into the free queue and loop. |
| 984263bc | 719 | */ |
| dc1fd4b3 MD |
720 | if (m != NULL) { |
| 721 | KASSERT(m->dirty == 0, | |
| 722 | ("Found dirty cache page %p", m)); | |
| 723 | vm_page_busy(m); | |
| 724 | vm_page_protect(m, VM_PROT_NONE); | |
| 725 | vm_page_free(m); | |
| 726 | goto loop; | |
| 727 | } | |
| 728 | ||
| 729 | /* | |
| 730 | * On failure return NULL | |
| 731 | */ | |
| 9765affa | 732 | crit_exit(); |
| 984263bc | 733 | #if defined(DIAGNOSTIC) |
| dc1fd4b3 | 734 | if (vmstats.v_cache_count > 0) |
| 086c1d7e | 735 | kprintf("vm_page_alloc(NORMAL): missing pages on cache queue: %d\n", vmstats.v_cache_count); |
| 984263bc | 736 | #endif |
| dc1fd4b3 MD |
737 | vm_pageout_deficit++; |
| 738 | pagedaemon_wakeup(); | |
| 739 | return (NULL); | |
| 984263bc MD |
740 | } else { |
| 741 | /* | |
| dc1fd4b3 | 742 | * No pages available, wakeup the pageout daemon and give up. |
| 984263bc | 743 | */ |
| 9765affa | 744 | crit_exit(); |
| 984263bc MD |
745 | vm_pageout_deficit++; |
| 746 | pagedaemon_wakeup(); | |
| 747 | return (NULL); | |
| 748 | } | |
| 749 | ||
| 750 | /* | |
| 9765affa MD |
751 | * Good page found. The page has not yet been busied. We are in |
| 752 | * a critical section. | |
| 984263bc | 753 | */ |
| dc1fd4b3 | 754 | KASSERT(m != NULL, ("vm_page_alloc(): missing page on free queue\n")); |
| 26bcc0c0 MD |
755 | KASSERT(m->dirty == 0, |
| 756 | ("vm_page_alloc: free/cache page %p was dirty", m)); | |
| 984263bc MD |
757 | |
| 758 | /* | |
| 759 | * Remove from free queue | |
| 760 | */ | |
| 984263bc MD |
761 | vm_page_unqueue_nowakeup(m); |
| 762 | ||
| 763 | /* | |
| 9765affa MD |
764 | * Initialize structure. Only the PG_ZERO flag is inherited. Set |
| 765 | * the page PG_BUSY | |
| 984263bc | 766 | */ |
| 984263bc MD |
767 | if (m->flags & PG_ZERO) { |
| 768 | vm_page_zero_count--; | |
| 769 | m->flags = PG_ZERO | PG_BUSY; | |
| 770 | } else { | |
| 771 | m->flags = PG_BUSY; | |
| 772 | } | |
| 773 | m->wire_count = 0; | |
| 774 | m->hold_count = 0; | |
| 775 | m->act_count = 0; | |
| 776 | m->busy = 0; | |
| 777 | m->valid = 0; | |
| 984263bc MD |
778 | |
| 779 | /* | |
| 9765affa | 780 | * vm_page_insert() is safe prior to the crit_exit(). Note also that |
| 984263bc MD |
781 | * inserting a page here does not insert it into the pmap (which |
| 782 | * could cause us to block allocating memory). We cannot block | |
| 783 | * anywhere. | |
| 784 | */ | |
| 984263bc MD |
785 | vm_page_insert(m, object, pindex); |
| 786 | ||
| 787 | /* | |
| 788 | * Don't wakeup too often - wakeup the pageout daemon when | |
| 789 | * we would be nearly out of memory. | |
| 790 | */ | |
| 20479584 | 791 | pagedaemon_wakeup(); |
| 984263bc | 792 | |
| 9765affa MD |
793 | crit_exit(); |
| 794 | ||
| 795 | /* | |
| 796 | * A PG_BUSY page is returned. | |
| 797 | */ | |
| 984263bc MD |
798 | return (m); |
| 799 | } | |
| 800 | ||
| 801 | /* | |
| de71fd3f MD |
802 | * Block until free pages are available for allocation, called in various |
| 803 | * places before memory allocations. | |
| 984263bc | 804 | */ |
| 984263bc | 805 | void |
| 4ecf7cc9 | 806 | vm_wait(int timo) |
| 984263bc | 807 | { |
| cdd46d2e | 808 | crit_enter(); |
| bc6dffab | 809 | if (curthread == pagethread) { |
| 984263bc | 810 | vm_pageout_pages_needed = 1; |
| 4ecf7cc9 | 811 | tsleep(&vm_pageout_pages_needed, 0, "VMWait", timo); |
| 984263bc | 812 | } else { |
| 20479584 | 813 | if (vm_pages_needed == 0) { |
| 984263bc MD |
814 | vm_pages_needed = 1; |
| 815 | wakeup(&vm_pages_needed); | |
| 816 | } | |
| 4ecf7cc9 | 817 | tsleep(&vmstats.v_free_count, 0, "vmwait", timo); |
| 984263bc | 818 | } |
| cdd46d2e | 819 | crit_exit(); |
| 984263bc MD |
820 | } |
| 821 | ||
| 822 | /* | |
| de71fd3f MD |
823 | * Block until free pages are available for allocation |
| 824 | * | |
| 825 | * Called only in vm_fault so that processes page faulting can be | |
| 826 | * easily tracked. | |
| 984263bc | 827 | */ |
| 984263bc MD |
828 | void |
| 829 | vm_waitpfault(void) | |
| 830 | { | |
| cdd46d2e | 831 | crit_enter(); |
| 20479584 | 832 | if (vm_pages_needed == 0) { |
| 984263bc MD |
833 | vm_pages_needed = 1; |
| 834 | wakeup(&vm_pages_needed); | |
| 835 | } | |
| 377d4740 | 836 | tsleep(&vmstats.v_free_count, 0, "pfault", 0); |
| cdd46d2e | 837 | crit_exit(); |
| 984263bc MD |
838 | } |
| 839 | ||
| 840 | /* | |
| de71fd3f MD |
841 | * Put the specified page on the active list (if appropriate). Ensure |
| 842 | * that act_count is at least ACT_INIT but do not otherwise mess with it. | |
| 984263bc | 843 | * |
| de71fd3f MD |
844 | * The page queues must be locked. |
| 845 | * This routine may not block. | |
| 984263bc MD |
846 | */ |
| 847 | void | |
| 848 | vm_page_activate(vm_page_t m) | |
| 849 | { | |
| 9765affa | 850 | crit_enter(); |
| 984263bc MD |
851 | if (m->queue != PQ_ACTIVE) { |
| 852 | if ((m->queue - m->pc) == PQ_CACHE) | |
| 12e4aaff | 853 | mycpu->gd_cnt.v_reactivated++; |
| 984263bc MD |
854 | |
| 855 | vm_page_unqueue(m); | |
| 856 | ||
| 857 | if (m->wire_count == 0 && (m->flags & PG_UNMANAGED) == 0) { | |
| 858 | m->queue = PQ_ACTIVE; | |
| 859 | vm_page_queues[PQ_ACTIVE].lcnt++; | |
| de71fd3f MD |
860 | TAILQ_INSERT_TAIL(&vm_page_queues[PQ_ACTIVE].pl, |
| 861 | m, pageq); | |
| 984263bc MD |
862 | if (m->act_count < ACT_INIT) |
| 863 | m->act_count = ACT_INIT; | |
| 12e4aaff | 864 | vmstats.v_active_count++; |
| 984263bc MD |
865 | } |
| 866 | } else { | |
| 867 | if (m->act_count < ACT_INIT) | |
| 868 | m->act_count = ACT_INIT; | |
| 869 | } | |
| 9765affa | 870 | crit_exit(); |
| 984263bc MD |
871 | } |
| 872 | ||
| 873 | /* | |
| de71fd3f MD |
874 | * Helper routine for vm_page_free_toq() and vm_page_cache(). This |
| 875 | * routine is called when a page has been added to the cache or free | |
| 876 | * queues. | |
| 984263bc | 877 | * |
| de71fd3f MD |
878 | * This routine may not block. |
| 879 | * This routine must be called at splvm() | |
| 984263bc MD |
880 | */ |
| 881 | static __inline void | |
| 882 | vm_page_free_wakeup(void) | |
| 883 | { | |
| 884 | /* | |
| 885 | * if pageout daemon needs pages, then tell it that there are | |
| 886 | * some free. | |
| 887 | */ | |
| 888 | if (vm_pageout_pages_needed && | |
| de71fd3f MD |
889 | vmstats.v_cache_count + vmstats.v_free_count >= |
| 890 | vmstats.v_pageout_free_min | |
| 891 | ) { | |
| 984263bc MD |
892 | wakeup(&vm_pageout_pages_needed); |
| 893 | vm_pageout_pages_needed = 0; | |
| 894 | } | |
| de71fd3f | 895 | |
| 984263bc MD |
896 | /* |
| 897 | * wakeup processes that are waiting on memory if we hit a | |
| 898 | * high water mark. And wakeup scheduler process if we have | |
| 899 | * lots of memory. this process will swapin processes. | |
| 900 | */ | |
| 20479584 | 901 | if (vm_pages_needed && !vm_page_count_min(0)) { |
| 984263bc | 902 | vm_pages_needed = 0; |
| 12e4aaff | 903 | wakeup(&vmstats.v_free_count); |
| 984263bc MD |
904 | } |
| 905 | } | |
| 906 | ||
| 907 | /* | |
| 908 | * vm_page_free_toq: | |
| 909 | * | |
| 9765affa MD |
910 | * Returns the given page to the PQ_FREE list, disassociating it with |
| 911 | * any VM object. | |
| 912 | * | |
| 913 | * The vm_page must be PG_BUSY on entry. PG_BUSY will be released on | |
| 914 | * return (the page will have been freed). No particular spl is required | |
| 915 | * on entry. | |
| 984263bc | 916 | * |
| 984263bc MD |
917 | * This routine may not block. |
| 918 | */ | |
| 984263bc MD |
919 | void |
| 920 | vm_page_free_toq(vm_page_t m) | |
| 921 | { | |
| 984263bc | 922 | struct vpgqueues *pq; |
| 984263bc | 923 | |
| 9765affa | 924 | crit_enter(); |
| 12e4aaff | 925 | mycpu->gd_cnt.v_tfree++; |
| 984263bc | 926 | |
| 17cde63e MD |
927 | KKASSERT((m->flags & PG_MAPPED) == 0); |
| 928 | ||
| 984263bc | 929 | if (m->busy || ((m->queue - m->pc) == PQ_FREE)) { |
| 086c1d7e | 930 | kprintf( |
| 984263bc MD |
931 | "vm_page_free: pindex(%lu), busy(%d), PG_BUSY(%d), hold(%d)\n", |
| 932 | (u_long)m->pindex, m->busy, (m->flags & PG_BUSY) ? 1 : 0, | |
| 933 | m->hold_count); | |
| 934 | if ((m->queue - m->pc) == PQ_FREE) | |
| 935 | panic("vm_page_free: freeing free page"); | |
| 936 | else | |
| 937 | panic("vm_page_free: freeing busy page"); | |
| 938 | } | |
| 939 | ||
| 940 | /* | |
| 941 | * unqueue, then remove page. Note that we cannot destroy | |
| 942 | * the page here because we do not want to call the pager's | |
| 943 | * callback routine until after we've put the page on the | |
| 944 | * appropriate free queue. | |
| 945 | */ | |
| 984263bc MD |
946 | vm_page_unqueue_nowakeup(m); |
| 947 | vm_page_remove(m); | |
| 948 | ||
| 949 | /* | |
| f2d22ebf MD |
950 | * No further management of fictitious pages occurs beyond object |
| 951 | * and queue removal. | |
| 984263bc | 952 | */ |
| 984263bc | 953 | if ((m->flags & PG_FICTITIOUS) != 0) { |
| 9765affa MD |
954 | vm_page_wakeup(m); |
| 955 | crit_exit(); | |
| 984263bc MD |
956 | return; |
| 957 | } | |
| 958 | ||
| 959 | m->valid = 0; | |
| 960 | vm_page_undirty(m); | |
| 961 | ||
| 962 | if (m->wire_count != 0) { | |
| 963 | if (m->wire_count > 1) { | |
| de71fd3f MD |
964 | panic( |
| 965 | "vm_page_free: invalid wire count (%d), pindex: 0x%lx", | |
| 966 | m->wire_count, (long)m->pindex); | |
| 984263bc | 967 | } |
| 73c351d1 | 968 | panic("vm_page_free: freeing wired page"); |
| 984263bc MD |
969 | } |
| 970 | ||
| 971 | /* | |
| 984263bc MD |
972 | * Clear the UNMANAGED flag when freeing an unmanaged page. |
| 973 | */ | |
| 984263bc MD |
974 | if (m->flags & PG_UNMANAGED) { |
| 975 | m->flags &= ~PG_UNMANAGED; | |
| 984263bc MD |
976 | } |
| 977 | ||
| 978 | if (m->hold_count != 0) { | |
| 979 | m->flags &= ~PG_ZERO; | |
| 980 | m->queue = PQ_HOLD; | |
| de71fd3f | 981 | } else { |
| 984263bc | 982 | m->queue = PQ_FREE + m->pc; |
| de71fd3f | 983 | } |
| 984263bc MD |
984 | pq = &vm_page_queues[m->queue]; |
| 985 | pq->lcnt++; | |
| 986 | ++(*pq->cnt); | |
| 987 | ||
| 988 | /* | |
| 989 | * Put zero'd pages on the end ( where we look for zero'd pages | |
| 990 | * first ) and non-zerod pages at the head. | |
| 991 | */ | |
| 984263bc MD |
992 | if (m->flags & PG_ZERO) { |
| 993 | TAILQ_INSERT_TAIL(&pq->pl, m, pageq); | |
| 994 | ++vm_page_zero_count; | |
| 995 | } else { | |
| 996 | TAILQ_INSERT_HEAD(&pq->pl, m, pageq); | |
| 997 | } | |
| 9765affa | 998 | vm_page_wakeup(m); |
| 984263bc | 999 | vm_page_free_wakeup(); |
| 9765affa | 1000 | crit_exit(); |
| 984263bc MD |
1001 | } |
| 1002 | ||
| 1003 | /* | |
| de71fd3f MD |
1004 | * vm_page_unmanage() |
| 1005 | * | |
| 1006 | * Prevent PV management from being done on the page. The page is | |
| 1007 | * removed from the paging queues as if it were wired, and as a | |
| 1008 | * consequence of no longer being managed the pageout daemon will not | |
| 1009 | * touch it (since there is no way to locate the pte mappings for the | |
| 1010 | * page). madvise() calls that mess with the pmap will also no longer | |
| 1011 | * operate on the page. | |
| 1012 | * | |
| 1013 | * Beyond that the page is still reasonably 'normal'. Freeing the page | |
| 1014 | * will clear the flag. | |
| 1015 | * | |
| 1016 | * This routine is used by OBJT_PHYS objects - objects using unswappable | |
| 1017 | * physical memory as backing store rather then swap-backed memory and | |
| 1018 | * will eventually be extended to support 4MB unmanaged physical | |
| 1019 | * mappings. | |
| 654a39f0 MD |
1020 | * |
| 1021 | * Must be called with a critical section held. | |
| 984263bc | 1022 | */ |
| 984263bc MD |
1023 | void |
| 1024 | vm_page_unmanage(vm_page_t m) | |
| 1025 | { | |
| 654a39f0 | 1026 | ASSERT_IN_CRIT_SECTION(); |
| 984263bc MD |
1027 | if ((m->flags & PG_UNMANAGED) == 0) { |
| 1028 | if (m->wire_count == 0) | |
| 1029 | vm_page_unqueue(m); | |
| 1030 | } | |
| 1031 | vm_page_flag_set(m, PG_UNMANAGED); | |
| 984263bc MD |
1032 | } |
| 1033 | ||
| 1034 | /* | |
| de71fd3f MD |
1035 | * Mark this page as wired down by yet another map, removing it from |
| 1036 | * paging queues as necessary. | |
| 984263bc | 1037 | * |
| de71fd3f MD |
1038 | * The page queues must be locked. |
| 1039 | * This routine may not block. | |
| 984263bc MD |
1040 | */ |
| 1041 | void | |
| 1042 | vm_page_wire(vm_page_t m) | |
| 1043 | { | |
| 984263bc MD |
1044 | /* |
| 1045 | * Only bump the wire statistics if the page is not already wired, | |
| 1046 | * and only unqueue the page if it is on some queue (if it is unmanaged | |
| f2d22ebf MD |
1047 | * it is already off the queues). Don't do anything with fictitious |
| 1048 | * pages because they are always wired. | |
| 984263bc | 1049 | */ |
| 654a39f0 | 1050 | crit_enter(); |
| f2d22ebf MD |
1051 | if ((m->flags & PG_FICTITIOUS) == 0) { |
| 1052 | if (m->wire_count == 0) { | |
| 1053 | if ((m->flags & PG_UNMANAGED) == 0) | |
| 1054 | vm_page_unqueue(m); | |
| 1055 | vmstats.v_wire_count++; | |
| 1056 | } | |
| 1057 | m->wire_count++; | |
| 1058 | KASSERT(m->wire_count != 0, | |
| 17cde63e | 1059 | ("vm_page_wire: wire_count overflow m=%p", m)); |
| 984263bc | 1060 | } |
| 654a39f0 | 1061 | crit_exit(); |
| 984263bc MD |
1062 | } |
| 1063 | ||
| 1064 | /* | |
| de71fd3f MD |
1065 | * Release one wiring of this page, potentially enabling it to be paged again. |
| 1066 | * | |
| 1067 | * Many pages placed on the inactive queue should actually go | |
| 1068 | * into the cache, but it is difficult to figure out which. What | |
| 1069 | * we do instead, if the inactive target is well met, is to put | |
| 1070 | * clean pages at the head of the inactive queue instead of the tail. | |
| 1071 | * This will cause them to be moved to the cache more quickly and | |
| 1072 | * if not actively re-referenced, freed more quickly. If we just | |
| 1073 | * stick these pages at the end of the inactive queue, heavy filesystem | |
| 1074 | * meta-data accesses can cause an unnecessary paging load on memory bound | |
| 1075 | * processes. This optimization causes one-time-use metadata to be | |
| 1076 | * reused more quickly. | |
| 1077 | * | |
| 1078 | * BUT, if we are in a low-memory situation we have no choice but to | |
| 1079 | * put clean pages on the cache queue. | |
| 1080 | * | |
| 1081 | * A number of routines use vm_page_unwire() to guarantee that the page | |
| 1082 | * will go into either the inactive or active queues, and will NEVER | |
| 1083 | * be placed in the cache - for example, just after dirtying a page. | |
| 1084 | * dirty pages in the cache are not allowed. | |
| 1085 | * | |
| 1086 | * The page queues must be locked. | |
| 1087 | * This routine may not block. | |
| 984263bc MD |
1088 | */ |
| 1089 | void | |
| 1090 | vm_page_unwire(vm_page_t m, int activate) | |
| 1091 | { | |
| 654a39f0 | 1092 | crit_enter(); |
| f2d22ebf MD |
1093 | if (m->flags & PG_FICTITIOUS) { |
| 1094 | /* do nothing */ | |
| 1095 | } else if (m->wire_count <= 0) { | |
| 1096 | panic("vm_page_unwire: invalid wire count: %d", m->wire_count); | |
| 1097 | } else { | |
| 1098 | if (--m->wire_count == 0) { | |
| 1099 | --vmstats.v_wire_count; | |
| 984263bc MD |
1100 | if (m->flags & PG_UNMANAGED) { |
| 1101 | ; | |
| 1102 | } else if (activate) { | |
| f2d22ebf MD |
1103 | TAILQ_INSERT_TAIL( |
| 1104 | &vm_page_queues[PQ_ACTIVE].pl, m, pageq); | |
| 984263bc MD |
1105 | m->queue = PQ_ACTIVE; |
| 1106 | vm_page_queues[PQ_ACTIVE].lcnt++; | |
| 12e4aaff | 1107 | vmstats.v_active_count++; |
| 984263bc MD |
1108 | } else { |
| 1109 | vm_page_flag_clear(m, PG_WINATCFLS); | |
| f2d22ebf MD |
1110 | TAILQ_INSERT_TAIL( |
| 1111 | &vm_page_queues[PQ_INACTIVE].pl, m, pageq); | |
| 984263bc MD |
1112 | m->queue = PQ_INACTIVE; |
| 1113 | vm_page_queues[PQ_INACTIVE].lcnt++; | |
| 12e4aaff | 1114 | vmstats.v_inactive_count++; |
| 984263bc MD |
1115 | } |
| 1116 | } | |
| 984263bc | 1117 | } |
| 654a39f0 | 1118 | crit_exit(); |
| 984263bc MD |
1119 | } |
| 1120 | ||
| 1121 | ||
| 1122 | /* | |
| 1123 | * Move the specified page to the inactive queue. If the page has | |
| 1124 | * any associated swap, the swap is deallocated. | |
| 1125 | * | |
| 1126 | * Normally athead is 0 resulting in LRU operation. athead is set | |
| 1127 | * to 1 if we want this page to be 'as if it were placed in the cache', | |
| 1128 | * except without unmapping it from the process address space. | |
| 1129 | * | |
| 1130 | * This routine may not block. | |
| 1131 | */ | |
| 1132 | static __inline void | |
| 1133 | _vm_page_deactivate(vm_page_t m, int athead) | |
| 1134 | { | |
| 984263bc MD |
1135 | /* |
| 1136 | * Ignore if already inactive. | |
| 1137 | */ | |
| 1138 | if (m->queue == PQ_INACTIVE) | |
| 1139 | return; | |
| 1140 | ||
| 984263bc MD |
1141 | if (m->wire_count == 0 && (m->flags & PG_UNMANAGED) == 0) { |
| 1142 | if ((m->queue - m->pc) == PQ_CACHE) | |
| 12e4aaff | 1143 | mycpu->gd_cnt.v_reactivated++; |
| 984263bc MD |
1144 | vm_page_flag_clear(m, PG_WINATCFLS); |
| 1145 | vm_page_unqueue(m); | |
| 1146 | if (athead) | |
| 1147 | TAILQ_INSERT_HEAD(&vm_page_queues[PQ_INACTIVE].pl, m, pageq); | |
| 1148 | else | |
| 1149 | TAILQ_INSERT_TAIL(&vm_page_queues[PQ_INACTIVE].pl, m, pageq); | |
| 1150 | m->queue = PQ_INACTIVE; | |
| 1151 | vm_page_queues[PQ_INACTIVE].lcnt++; | |
| 12e4aaff | 1152 | vmstats.v_inactive_count++; |
| 984263bc | 1153 | } |
| 984263bc MD |
1154 | } |
| 1155 | ||
| 1156 | void | |
| 1157 | vm_page_deactivate(vm_page_t m) | |
| 1158 | { | |
| 654a39f0 | 1159 | crit_enter(); |
| 984263bc | 1160 | _vm_page_deactivate(m, 0); |
| 654a39f0 | 1161 | crit_exit(); |
| 984263bc MD |
1162 | } |
| 1163 | ||
| 1164 | /* | |
| 1165 | * vm_page_try_to_cache: | |
| 1166 | * | |
| 1167 | * Returns 0 on failure, 1 on success | |
| 1168 | */ | |
| 1169 | int | |
| 1170 | vm_page_try_to_cache(vm_page_t m) | |
| 1171 | { | |
| 654a39f0 | 1172 | crit_enter(); |
| 984263bc MD |
1173 | if (m->dirty || m->hold_count || m->busy || m->wire_count || |
| 1174 | (m->flags & (PG_BUSY|PG_UNMANAGED))) { | |
| 80137ef3 | 1175 | crit_exit(); |
| 984263bc MD |
1176 | return(0); |
| 1177 | } | |
| 1178 | vm_page_test_dirty(m); | |
| 654a39f0 MD |
1179 | if (m->dirty) { |
| 1180 | crit_exit(); | |
| 984263bc | 1181 | return(0); |
| 654a39f0 | 1182 | } |
| 984263bc | 1183 | vm_page_cache(m); |
| 654a39f0 | 1184 | crit_exit(); |
| 984263bc MD |
1185 | return(1); |
| 1186 | } | |
| 1187 | ||
| 1188 | /* | |
| de71fd3f MD |
1189 | * Attempt to free the page. If we cannot free it, we do nothing. |
| 1190 | * 1 is returned on success, 0 on failure. | |
| 984263bc | 1191 | */ |
| 984263bc MD |
1192 | int |
| 1193 | vm_page_try_to_free(vm_page_t m) | |
| 1194 | { | |
| 654a39f0 | 1195 | crit_enter(); |
| 984263bc MD |
1196 | if (m->dirty || m->hold_count || m->busy || m->wire_count || |
| 1197 | (m->flags & (PG_BUSY|PG_UNMANAGED))) { | |
| 654a39f0 | 1198 | crit_exit(); |
| 984263bc MD |
1199 | return(0); |
| 1200 | } | |
| 1201 | vm_page_test_dirty(m); | |
| 654a39f0 MD |
1202 | if (m->dirty) { |
| 1203 | crit_exit(); | |
| 984263bc | 1204 | return(0); |
| 654a39f0 | 1205 | } |
| 984263bc MD |
1206 | vm_page_busy(m); |
| 1207 | vm_page_protect(m, VM_PROT_NONE); | |
| 1208 | vm_page_free(m); | |
| 654a39f0 | 1209 | crit_exit(); |
| 984263bc MD |
1210 | return(1); |
| 1211 | } | |
| 1212 | ||
| 984263bc MD |
1213 | /* |
| 1214 | * vm_page_cache | |
| 1215 | * | |
| 1216 | * Put the specified page onto the page cache queue (if appropriate). | |
| 1217 | * | |
| 1218 | * This routine may not block. | |
| 1219 | */ | |
| 1220 | void | |
| 1221 | vm_page_cache(vm_page_t m) | |
| 1222 | { | |
| 654a39f0 | 1223 | ASSERT_IN_CRIT_SECTION(); |
| 984263bc | 1224 | |
| 2681a43c HP |
1225 | if ((m->flags & (PG_BUSY|PG_UNMANAGED)) || m->busy || |
| 1226 | m->wire_count || m->hold_count) { | |
| 086c1d7e | 1227 | kprintf("vm_page_cache: attempting to cache busy/held page\n"); |
| 984263bc MD |
1228 | return; |
| 1229 | } | |
| c9ec86b3 MD |
1230 | |
| 1231 | /* | |
| 1232 | * Already in the cache (and thus not mapped) | |
| 1233 | */ | |
| 17cde63e MD |
1234 | if ((m->queue - m->pc) == PQ_CACHE) { |
| 1235 | KKASSERT((m->flags & PG_MAPPED) == 0); | |
| 984263bc | 1236 | return; |
| 17cde63e | 1237 | } |
| 984263bc MD |
1238 | |
| 1239 | /* | |
| c9ec86b3 MD |
1240 | * Caller is required to test m->dirty, but note that the act of |
| 1241 | * removing the page from its maps can cause it to become dirty | |
| 1242 | * on an SMP system due to another cpu running in usermode. | |
| 984263bc | 1243 | */ |
| c9ec86b3 | 1244 | if (m->dirty) { |
| 984263bc MD |
1245 | panic("vm_page_cache: caching a dirty page, pindex: %ld", |
| 1246 | (long)m->pindex); | |
| 1247 | } | |
| c9ec86b3 MD |
1248 | |
| 1249 | /* | |
| 1250 | * Remove all pmaps and indicate that the page is not | |
| 17cde63e MD |
1251 | * writeable or mapped. Our vm_page_protect() call may |
| 1252 | * have blocked (especially w/ VM_PROT_NONE), so recheck | |
| 1253 | * everything. | |
| c9ec86b3 | 1254 | */ |
| 17cde63e | 1255 | vm_page_busy(m); |
| c9ec86b3 | 1256 | vm_page_protect(m, VM_PROT_NONE); |
| 17cde63e MD |
1257 | vm_page_wakeup(m); |
| 1258 | if ((m->flags & (PG_BUSY|PG_UNMANAGED|PG_MAPPED)) || m->busy || | |
| 1259 | m->wire_count || m->hold_count) { | |
| 1260 | /* do nothing */ | |
| 1261 | } else if (m->dirty) { | |
| c9ec86b3 MD |
1262 | vm_page_deactivate(m); |
| 1263 | } else { | |
| 1264 | vm_page_unqueue_nowakeup(m); | |
| 1265 | m->queue = PQ_CACHE + m->pc; | |
| 1266 | vm_page_queues[m->queue].lcnt++; | |
| 1267 | TAILQ_INSERT_TAIL(&vm_page_queues[m->queue].pl, m, pageq); | |
| 1268 | vmstats.v_cache_count++; | |
| 1269 | vm_page_free_wakeup(); | |
| 1270 | } | |
| 984263bc MD |
1271 | } |
| 1272 | ||
| 1273 | /* | |
| de71fd3f MD |
1274 | * vm_page_dontneed() |
| 1275 | * | |
| 1276 | * Cache, deactivate, or do nothing as appropriate. This routine | |
| 1277 | * is typically used by madvise() MADV_DONTNEED. | |
| 1278 | * | |
| 1279 | * Generally speaking we want to move the page into the cache so | |
| 1280 | * it gets reused quickly. However, this can result in a silly syndrome | |
| 1281 | * due to the page recycling too quickly. Small objects will not be | |
| 1282 | * fully cached. On the otherhand, if we move the page to the inactive | |
| 1283 | * queue we wind up with a problem whereby very large objects | |
| 1284 | * unnecessarily blow away our inactive and cache queues. | |
| 1285 | * | |
| 1286 | * The solution is to move the pages based on a fixed weighting. We | |
| 1287 | * either leave them alone, deactivate them, or move them to the cache, | |
| 1288 | * where moving them to the cache has the highest weighting. | |
| 1289 | * By forcing some pages into other queues we eventually force the | |
| 1290 | * system to balance the queues, potentially recovering other unrelated | |
| 1291 | * space from active. The idea is to not force this to happen too | |
| 1292 | * often. | |
| 984263bc | 1293 | */ |
| 984263bc MD |
1294 | void |
| 1295 | vm_page_dontneed(vm_page_t m) | |
| 1296 | { | |
| 1297 | static int dnweight; | |
| 1298 | int dnw; | |
| 1299 | int head; | |
| 1300 | ||
| 1301 | dnw = ++dnweight; | |
| 1302 | ||
| 1303 | /* | |
| 1304 | * occassionally leave the page alone | |
| 1305 | */ | |
| 654a39f0 | 1306 | crit_enter(); |
| 984263bc MD |
1307 | if ((dnw & 0x01F0) == 0 || |
| 1308 | m->queue == PQ_INACTIVE || | |
| 1309 | m->queue - m->pc == PQ_CACHE | |
| 1310 | ) { | |
| 1311 | if (m->act_count >= ACT_INIT) | |
| 1312 | --m->act_count; | |
| 654a39f0 | 1313 | crit_exit(); |
| 984263bc MD |
1314 | return; |
| 1315 | } | |
| 1316 | ||
| 1317 | if (m->dirty == 0) | |
| 1318 | vm_page_test_dirty(m); | |
| 1319 | ||
| 1320 | if (m->dirty || (dnw & 0x0070) == 0) { | |
| 1321 | /* | |
| 1322 | * Deactivate the page 3 times out of 32. | |
| 1323 | */ | |
| 1324 | head = 0; | |
| 1325 | } else { | |
| 1326 | /* | |
| 1327 | * Cache the page 28 times out of every 32. Note that | |
| 1328 | * the page is deactivated instead of cached, but placed | |
| 1329 | * at the head of the queue instead of the tail. | |
| 1330 | */ | |
| 1331 | head = 1; | |
| 1332 | } | |
| 1333 | _vm_page_deactivate(m, head); | |
| 654a39f0 | 1334 | crit_exit(); |
| 984263bc MD |
1335 | } |
| 1336 | ||
| 1337 | /* | |
| 06ecca5a MD |
1338 | * Grab a page, blocking if it is busy and allocating a page if necessary. |
| 1339 | * A busy page is returned or NULL. | |
| 984263bc | 1340 | * |
| dc1fd4b3 | 1341 | * If VM_ALLOC_RETRY is specified VM_ALLOC_NORMAL must also be specified. |
| 06ecca5a | 1342 | * If VM_ALLOC_RETRY is not specified |
| dc1fd4b3 | 1343 | * |
| 06ecca5a MD |
1344 | * This routine may block, but if VM_ALLOC_RETRY is not set then NULL is |
| 1345 | * always returned if we had blocked. | |
| 1346 | * This routine will never return NULL if VM_ALLOC_RETRY is set. | |
| 1347 | * This routine may not be called from an interrupt. | |
| 1348 | * The returned page may not be entirely valid. | |
| 1349 | * | |
| 1350 | * This routine may be called from mainline code without spl protection and | |
| 1351 | * be guarenteed a busied page associated with the object at the specified | |
| 1352 | * index. | |
| 984263bc MD |
1353 | */ |
| 1354 | vm_page_t | |
| 1355 | vm_page_grab(vm_object_t object, vm_pindex_t pindex, int allocflags) | |
| 1356 | { | |
| 984263bc | 1357 | vm_page_t m; |
| 654a39f0 | 1358 | int generation; |
| 984263bc | 1359 | |
| dc1fd4b3 MD |
1360 | KKASSERT(allocflags & |
| 1361 | (VM_ALLOC_NORMAL|VM_ALLOC_INTERRUPT|VM_ALLOC_SYSTEM)); | |
| 654a39f0 | 1362 | crit_enter(); |
| 984263bc MD |
1363 | retrylookup: |
| 1364 | if ((m = vm_page_lookup(object, pindex)) != NULL) { | |
| 1365 | if (m->busy || (m->flags & PG_BUSY)) { | |
| 1366 | generation = object->generation; | |
| 1367 | ||
| 984263bc MD |
1368 | while ((object->generation == generation) && |
| 1369 | (m->busy || (m->flags & PG_BUSY))) { | |
| 1370 | vm_page_flag_set(m, PG_WANTED | PG_REFERENCED); | |
| 377d4740 | 1371 | tsleep(m, 0, "pgrbwt", 0); |
| 984263bc | 1372 | if ((allocflags & VM_ALLOC_RETRY) == 0) { |
| 06ecca5a MD |
1373 | m = NULL; |
| 1374 | goto done; | |
| 984263bc MD |
1375 | } |
| 1376 | } | |
| 984263bc MD |
1377 | goto retrylookup; |
| 1378 | } else { | |
| 1379 | vm_page_busy(m); | |
| 06ecca5a | 1380 | goto done; |
| 984263bc MD |
1381 | } |
| 1382 | } | |
| 984263bc MD |
1383 | m = vm_page_alloc(object, pindex, allocflags & ~VM_ALLOC_RETRY); |
| 1384 | if (m == NULL) { | |
| 4ecf7cc9 | 1385 | vm_wait(0); |
| 984263bc | 1386 | if ((allocflags & VM_ALLOC_RETRY) == 0) |
| 06ecca5a | 1387 | goto done; |
| 984263bc MD |
1388 | goto retrylookup; |
| 1389 | } | |
| 06ecca5a | 1390 | done: |
| 654a39f0 | 1391 | crit_exit(); |
| 06ecca5a | 1392 | return(m); |
| 984263bc MD |
1393 | } |
| 1394 | ||
| 1395 | /* | |
| 1396 | * Mapping function for valid bits or for dirty bits in | |
| 1397 | * a page. May not block. | |
| 1398 | * | |
| 1399 | * Inputs are required to range within a page. | |
| 1400 | */ | |
| 984263bc MD |
1401 | __inline int |
| 1402 | vm_page_bits(int base, int size) | |
| 1403 | { | |
| 1404 | int first_bit; | |
| 1405 | int last_bit; | |
| 1406 | ||
| 1407 | KASSERT( | |
| 1408 | base + size <= PAGE_SIZE, | |
| 1409 | ("vm_page_bits: illegal base/size %d/%d", base, size) | |
| 1410 | ); | |
| 1411 | ||
| 1412 | if (size == 0) /* handle degenerate case */ | |
| 1413 | return(0); | |
| 1414 | ||
| 1415 | first_bit = base >> DEV_BSHIFT; | |
| 1416 | last_bit = (base + size - 1) >> DEV_BSHIFT; | |
| 1417 | ||
| 1418 | return ((2 << last_bit) - (1 << first_bit)); | |
| 1419 | } | |
| 1420 | ||
| 1421 | /* | |
| de71fd3f MD |
1422 | * Sets portions of a page valid and clean. The arguments are expected |
| 1423 | * to be DEV_BSIZE aligned but if they aren't the bitmap is inclusive | |
| 1424 | * of any partial chunks touched by the range. The invalid portion of | |
| 1425 | * such chunks will be zero'd. | |
| 984263bc | 1426 | * |
| de71fd3f | 1427 | * This routine may not block. |
| 984263bc | 1428 | * |
| de71fd3f | 1429 | * (base + size) must be less then or equal to PAGE_SIZE. |
| 984263bc MD |
1430 | */ |
| 1431 | void | |
| 1432 | vm_page_set_validclean(vm_page_t m, int base, int size) | |
| 1433 | { | |
| 1434 | int pagebits; | |
| 1435 | int frag; | |
| 1436 | int endoff; | |
| 1437 | ||
| 1438 | if (size == 0) /* handle degenerate case */ | |
| 1439 | return; | |
| 1440 | ||
| 1441 | /* | |
| 1442 | * If the base is not DEV_BSIZE aligned and the valid | |
| 1443 | * bit is clear, we have to zero out a portion of the | |
| 1444 | * first block. | |
| 1445 | */ | |
| 1446 | ||
| 1447 | if ((frag = base & ~(DEV_BSIZE - 1)) != base && | |
| 1448 | (m->valid & (1 << (base >> DEV_BSHIFT))) == 0 | |
| 1449 | ) { | |
| 1450 | pmap_zero_page_area( | |
| 1451 | VM_PAGE_TO_PHYS(m), | |
| 1452 | frag, | |
| 1453 | base - frag | |
| 1454 | ); | |
| 1455 | } | |
| 1456 | ||
| 1457 | /* | |
| 1458 | * If the ending offset is not DEV_BSIZE aligned and the | |
| 1459 | * valid bit is clear, we have to zero out a portion of | |
| 1460 | * the last block. | |
| 1461 | */ | |
| 1462 | ||
| 1463 | endoff = base + size; | |
| 1464 | ||
| 1465 | if ((frag = endoff & ~(DEV_BSIZE - 1)) != endoff && | |
| 1466 | (m->valid & (1 << (endoff >> DEV_BSHIFT))) == 0 | |
| 1467 | ) { | |
| 1468 | pmap_zero_page_area( | |
| 1469 | VM_PAGE_TO_PHYS(m), | |
| 1470 | endoff, | |
| 1471 | DEV_BSIZE - (endoff & (DEV_BSIZE - 1)) | |
| 1472 | ); | |
| 1473 | } | |
| 1474 | ||
| 1475 | /* | |
| 1476 | * Set valid, clear dirty bits. If validating the entire | |
| 1477 | * page we can safely clear the pmap modify bit. We also | |
| 1478 | * use this opportunity to clear the PG_NOSYNC flag. If a process | |
| 1479 | * takes a write fault on a MAP_NOSYNC memory area the flag will | |
| 1480 | * be set again. | |
| 1481 | * | |
| 1482 | * We set valid bits inclusive of any overlap, but we can only | |
| 1483 | * clear dirty bits for DEV_BSIZE chunks that are fully within | |
| 1484 | * the range. | |
| 1485 | */ | |
| 1486 | ||
| 1487 | pagebits = vm_page_bits(base, size); | |
| 1488 | m->valid |= pagebits; | |
| 1489 | #if 0 /* NOT YET */ | |
| 1490 | if ((frag = base & (DEV_BSIZE - 1)) != 0) { | |
| 1491 | frag = DEV_BSIZE - frag; | |
| 1492 | base += frag; | |
| 1493 | size -= frag; | |
| 1494 | if (size < 0) | |
| 1495 | size = 0; | |
| 1496 | } | |
| 1497 | pagebits = vm_page_bits(base, size & (DEV_BSIZE - 1)); | |
| 1498 | #endif | |
| 1499 | m->dirty &= ~pagebits; | |
| 1500 | if (base == 0 && size == PAGE_SIZE) { | |
| 1501 | pmap_clear_modify(m); | |
| 1502 | vm_page_flag_clear(m, PG_NOSYNC); | |
| 1503 | } | |
| 1504 | } | |
| 1505 | ||
| 984263bc MD |
1506 | void |
| 1507 | vm_page_clear_dirty(vm_page_t m, int base, int size) | |
| 1508 | { | |
| 1509 | m->dirty &= ~vm_page_bits(base, size); | |
| 1510 | } | |
| 1511 | ||
| 1512 | /* | |
| 17cde63e MD |
1513 | * Make the page all-dirty. |
| 1514 | * | |
| 1515 | * Also make sure the related object and vnode reflect the fact that the | |
| 1516 | * object may now contain a dirty page. | |
| 1517 | */ | |
| 1518 | void | |
| 1519 | vm_page_dirty(vm_page_t m) | |
| 1520 | { | |
| 1521 | #ifdef INVARIANTS | |
| 1522 | int pqtype = m->queue - m->pc; | |
| 1523 | #endif | |
| 1524 | KASSERT(pqtype != PQ_CACHE && pqtype != PQ_FREE, | |
| 1525 | ("vm_page_dirty: page in free/cache queue!")); | |
| 1526 | if (m->dirty != VM_PAGE_BITS_ALL) { | |
| 1527 | m->dirty = VM_PAGE_BITS_ALL; | |
| 1528 | if (m->object) | |
| 1529 | vm_object_set_writeable_dirty(m->object); | |
| 1530 | } | |
| 1531 | } | |
| 1532 | ||
| 1533 | /* | |
| de71fd3f MD |
1534 | * Invalidates DEV_BSIZE'd chunks within a page. Both the |
| 1535 | * valid and dirty bits for the effected areas are cleared. | |
| 984263bc | 1536 | * |
| de71fd3f | 1537 | * May not block. |
| 984263bc MD |
1538 | */ |
| 1539 | void | |
| 1540 | vm_page_set_invalid(vm_page_t m, int base, int size) | |
| 1541 | { | |
| 1542 | int bits; | |
| 1543 | ||
| 1544 | bits = vm_page_bits(base, size); | |
| 1545 | m->valid &= ~bits; | |
| 1546 | m->dirty &= ~bits; | |
| 1547 | m->object->generation++; | |
| 1548 | } | |
| 1549 | ||
| 1550 | /* | |
| de71fd3f MD |
1551 | * The kernel assumes that the invalid portions of a page contain |
| 1552 | * garbage, but such pages can be mapped into memory by user code. | |
| 1553 | * When this occurs, we must zero out the non-valid portions of the | |
| 1554 | * page so user code sees what it expects. | |
| 984263bc | 1555 | * |
| de71fd3f MD |
1556 | * Pages are most often semi-valid when the end of a file is mapped |
| 1557 | * into memory and the file's size is not page aligned. | |
| 984263bc | 1558 | */ |
| 984263bc MD |
1559 | void |
| 1560 | vm_page_zero_invalid(vm_page_t m, boolean_t setvalid) | |
| 1561 | { | |
| 1562 | int b; | |
| 1563 | int i; | |
| 1564 | ||
| 1565 | /* | |
| 1566 | * Scan the valid bits looking for invalid sections that | |
| 1567 | * must be zerod. Invalid sub-DEV_BSIZE'd areas ( where the | |
| 1568 | * valid bit may be set ) have already been zerod by | |
| 1569 | * vm_page_set_validclean(). | |
| 1570 | */ | |
| 984263bc MD |
1571 | for (b = i = 0; i <= PAGE_SIZE / DEV_BSIZE; ++i) { |
| 1572 | if (i == (PAGE_SIZE / DEV_BSIZE) || | |
| 1573 | (m->valid & (1 << i)) | |
| 1574 | ) { | |
| 1575 | if (i > b) { | |
| 1576 | pmap_zero_page_area( | |
| 1577 | VM_PAGE_TO_PHYS(m), | |
| 1578 | b << DEV_BSHIFT, | |
| 1579 | (i - b) << DEV_BSHIFT | |
| 1580 | ); | |
| 1581 | } | |
| 1582 | b = i + 1; | |
| 1583 | } | |
| 1584 | } | |
| 1585 | ||
| 1586 | /* | |
| 1587 | * setvalid is TRUE when we can safely set the zero'd areas | |
| 1588 | * as being valid. We can do this if there are no cache consistency | |
| 1589 | * issues. e.g. it is ok to do with UFS, but not ok to do with NFS. | |
| 1590 | */ | |
| 984263bc MD |
1591 | if (setvalid) |
| 1592 | m->valid = VM_PAGE_BITS_ALL; | |
| 1593 | } | |
| 1594 | ||
| 1595 | /* | |
| de71fd3f MD |
1596 | * Is a (partial) page valid? Note that the case where size == 0 |
| 1597 | * will return FALSE in the degenerate case where the page is entirely | |
| 1598 | * invalid, and TRUE otherwise. | |
| 984263bc | 1599 | * |
| de71fd3f | 1600 | * May not block. |
| 984263bc | 1601 | */ |
| 984263bc MD |
1602 | int |
| 1603 | vm_page_is_valid(vm_page_t m, int base, int size) | |
| 1604 | { | |
| 1605 | int bits = vm_page_bits(base, size); | |
| 1606 | ||
| 1607 | if (m->valid && ((m->valid & bits) == bits)) | |
| 1608 | return 1; | |
| 1609 | else | |
| 1610 | return 0; | |
| 1611 | } | |
| 1612 | ||
| 1613 | /* | |
| 1614 | * update dirty bits from pmap/mmu. May not block. | |
| 1615 | */ | |
| 984263bc MD |
1616 | void |
| 1617 | vm_page_test_dirty(vm_page_t m) | |
| 1618 | { | |
| 1619 | if ((m->dirty != VM_PAGE_BITS_ALL) && pmap_is_modified(m)) { | |
| 1620 | vm_page_dirty(m); | |
| 1621 | } | |
| 1622 | } | |
| 1623 | ||
| 10192bae MD |
1624 | /* |
| 1625 | * Issue an event on a VM page. Corresponding action structures are | |
| 1626 | * removed from the page's list and called. | |
| 1627 | */ | |
| 1628 | void | |
| 1629 | vm_page_event_internal(vm_page_t m, vm_page_event_t event) | |
| 1630 | { | |
| 1631 | struct vm_page_action *scan, *next; | |
| 1632 | ||
| 1633 | LIST_FOREACH_MUTABLE(scan, &m->action_list, entry, next) { | |
| 1634 | if (scan->event == event) { | |
| 1635 | scan->event = VMEVENT_NONE; | |
| 1636 | LIST_REMOVE(scan, entry); | |
| 1637 | scan->func(m, scan); | |
| 1638 | } | |
| 1639 | } | |
| 1640 | } | |
| 1641 | ||
| 984263bc MD |
1642 | #include "opt_ddb.h" |
| 1643 | #ifdef DDB | |
| 1644 | #include <sys/kernel.h> | |
| 1645 | ||
| 1646 | #include <ddb/ddb.h> | |
| 1647 | ||
| 1648 | DB_SHOW_COMMAND(page, vm_page_print_page_info) | |
| 1649 | { | |
| 12e4aaff MD |
1650 | db_printf("vmstats.v_free_count: %d\n", vmstats.v_free_count); |
| 1651 | db_printf("vmstats.v_cache_count: %d\n", vmstats.v_cache_count); | |
| 1652 | db_printf("vmstats.v_inactive_count: %d\n", vmstats.v_inactive_count); | |
| 1653 | db_printf("vmstats.v_active_count: %d\n", vmstats.v_active_count); | |
| 1654 | db_printf("vmstats.v_wire_count: %d\n", vmstats.v_wire_count); | |
| 1655 | db_printf("vmstats.v_free_reserved: %d\n", vmstats.v_free_reserved); | |
| 1656 | db_printf("vmstats.v_free_min: %d\n", vmstats.v_free_min); | |
| 1657 | db_printf("vmstats.v_free_target: %d\n", vmstats.v_free_target); | |
| 1658 | db_printf("vmstats.v_cache_min: %d\n", vmstats.v_cache_min); | |
| 1659 | db_printf("vmstats.v_inactive_target: %d\n", vmstats.v_inactive_target); | |
| 984263bc MD |
1660 | } |
| 1661 | ||
| 1662 | DB_SHOW_COMMAND(pageq, vm_page_print_pageq_info) | |
| 1663 | { | |
| 1664 | int i; | |
| 1665 | db_printf("PQ_FREE:"); | |
| 1666 | for(i=0;i<PQ_L2_SIZE;i++) { | |
| 1667 | db_printf(" %d", vm_page_queues[PQ_FREE + i].lcnt); | |
| 1668 | } | |
| 1669 | db_printf("\n"); | |
| 1670 | ||
| 1671 | db_printf("PQ_CACHE:"); | |
| 1672 | for(i=0;i<PQ_L2_SIZE;i++) { | |
| 1673 | db_printf(" %d", vm_page_queues[PQ_CACHE + i].lcnt); | |
| 1674 | } | |
| 1675 | db_printf("\n"); | |
| 1676 | ||
| 1677 | db_printf("PQ_ACTIVE: %d, PQ_INACTIVE: %d\n", | |
| 1678 | vm_page_queues[PQ_ACTIVE].lcnt, | |
| 1679 | vm_page_queues[PQ_INACTIVE].lcnt); | |
| 1680 | } | |
| 1681 | #endif /* DDB */ |