X-Git-Url: https://gitweb.dragonflybsd.org/~nant/dragonfly.git/blobdiff_plain/6f83ced90ea75526068ff4fbb95caabc32a7a3d8..da23a592849ab6010314c28c571f343b43c9b6d5:/sys/vm/vm_page.c diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index a10a6ead34..37af8cc72e 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -1,4 +1,6 @@ /* + * (MPSAFE) + * * Copyright (c) 1991 Regents of the University of California. * All rights reserved. * @@ -75,6 +77,7 @@ #include #include #include +#include #include #include @@ -87,7 +90,15 @@ #include #include #include +#include + +#include + #include +#include + +#define VMACTION_HSIZE 256 +#define VMACTION_HMASK (VMACTION_HSIZE - 1) static void vm_page_queue_init(void); static void vm_page_free_wakeup(void); @@ -96,7 +107,10 @@ static vm_page_t _vm_page_list_find2(int basequeue, int index); struct vpgqueues vm_page_queues[PQ_COUNT]; /* Array of tailq lists */ -#define ASSERT_IN_CRIT_SECTION() KKASSERT(crit_test(curthread)); +LIST_HEAD(vm_page_action_list, vm_page_action); +struct vm_page_action_list action_list[VMACTION_HSIZE]; +static volatile int vm_pages_waiting; + RB_GENERATE2(vm_page_rb_tree, vm_page, rb_entry, rb_vm_page_compare, vm_pindex_t, pindex); @@ -118,6 +132,9 @@ vm_page_queue_init(void) for (i = 0; i < PQ_COUNT; i++) TAILQ_INIT(&vm_page_queues[i].pl); + + for (i = 0; i < VMACTION_HSIZE; i++) + LIST_INIT(&action_list[i]); } /* @@ -184,18 +201,20 @@ vm_add_new_page(vm_paddr_t pa) * * Initializes the resident memory module. * - * Allocates memory for the page cells, and for the object/offset-to-page - * hash table headers. Each page cell is initialized and placed on the - * free list. + * Preallocates memory for critical VM structures and arrays prior to + * kernel_map becoming available. * - * starta/enda represents the range of physical memory addresses available - * for use (skipping memory already used by the kernel), subject to - * phys_avail[]. Note that phys_avail[] has already mapped out memory - * already in use by the kernel. + * Memory is allocated from (virtual2_start, virtual2_end) if available, + * otherwise memory is allocated from (virtual_start, virtual_end). + * + * On x86-64 (virtual_start, virtual_end) is only 2GB and may not be + * large enough to hold vm_page_array & other structures for machines with + * large amounts of ram, so we want to use virtual2* when available. */ -vm_offset_t -vm_page_startup(vm_offset_t vaddr) +void +vm_page_startup(void) { + vm_offset_t vaddr = virtual2_start ? virtual2_start : virtual_start; vm_offset_t mapped; vm_size_t npages; vm_paddr_t page_range; @@ -215,8 +234,8 @@ vm_page_startup(vm_offset_t vaddr) vaddr = round_page(vaddr); for (i = 0; phys_avail[i + 1]; i += 2) { - phys_avail[i] = round_page(phys_avail[i]); - phys_avail[i + 1] = trunc_page(phys_avail[i + 1]); + phys_avail[i] = round_page64(phys_avail[i]); + phys_avail[i + 1] = trunc_page64(phys_avail[i + 1]); } for (i = 0; phys_avail[i + 1]; i += 2) { @@ -240,6 +259,27 @@ vm_page_startup(vm_offset_t vaddr) vm_page_queue_init(); + /* VKERNELs don't support minidumps and as such don't need vm_page_dump */ +#if !defined(_KERNEL_VIRTUAL) + /* + * Allocate a bitmap to indicate that a random physical page + * needs to be included in a minidump. + * + * The amd64 port needs this to indicate which direct map pages + * need to be dumped, via calls to dump_add_page()/dump_drop_page(). + * + * However, i386 still needs this workspace internally within the + * minidump code. In theory, they are not needed on i386, but are + * included should the sf_buf code decide to use them. + */ + page_range = phys_avail[(nblocks - 1) * 2 + 1] / PAGE_SIZE; + vm_page_dump_size = round_page(roundup2(page_range, NBBY) / NBBY); + end -= vm_page_dump_size; + vm_page_dump = (void *)pmap_map(&vaddr, end, end + vm_page_dump_size, + VM_PROT_READ | VM_PROT_WRITE); + bzero((void *)vm_page_dump, vm_page_dump_size); +#endif + /* * Compute the number of pages of memory that will be available for * use (taking into account the overhead of a page structure per @@ -253,19 +293,19 @@ vm_page_startup(vm_offset_t vaddr) * Initialize the mem entry structures now, and put them in the free * queue. */ - vm_page_array = (vm_page_t) vaddr; - mapped = vaddr; + new_end = trunc_page(end - page_range * sizeof(struct vm_page)); + mapped = pmap_map(&vaddr, new_end, end, + VM_PROT_READ | VM_PROT_WRITE); + vm_page_array = (vm_page_t)mapped; +#if defined(__x86_64__) && !defined(_KERNEL_VIRTUAL) /* - * Validate these addresses. + * since pmap_map on amd64 returns stuff out of a direct-map region, + * we have to manually add these pages to the minidump tracking so + * that they can be dumped, including the vm_page_array. */ - new_end = trunc_page(end - page_range * sizeof(struct vm_page)); - mapped = pmap_map(mapped, new_end, end, - VM_PROT_READ | VM_PROT_WRITE); -#ifdef __x86_64__ - /* pmap_map() returns an address in the DMAP region */ - vm_page_array = (vm_page_t) mapped; - mapped = vaddr; + for (pa = new_end; pa < phys_avail[biggestone + 1]; pa += PAGE_SIZE) + dump_add_page(pa); #endif /* @@ -293,7 +333,10 @@ vm_page_startup(vm_offset_t vaddr) pa += PAGE_SIZE; } } - return (mapped); + if (virtual2_start) + virtual2_start = vaddr; + else + virtual_start = vaddr; } /* @@ -322,26 +365,51 @@ rb_vm_page_compare(struct vm_page *p1, struct vm_page *p2) return(0); } +/* + * Holding a page keeps it from being reused. Other parts of the system + * can still disassociate the page from its current object and free it, or + * perform read or write I/O on it and/or otherwise manipulate the page, + * but if the page is held the VM system will leave the page and its data + * intact and not reuse the page for other purposes until the last hold + * reference is released. (see vm_page_wire() if you want to prevent the + * page from being disassociated from its object too). + * + * The caller must hold vm_token. + * + * The caller must still validate the contents of the page and, if necessary, + * wait for any pending I/O (e.g. vm_page_sleep_busy() loop) to complete + * before manipulating the page. + */ +void +vm_page_hold(vm_page_t m) +{ + ASSERT_LWKT_TOKEN_HELD(&vm_token); + ++m->hold_count; +} + /* * The opposite of vm_page_hold(). A page can be freed while being held, * which places it on the PQ_HOLD queue. We must call vm_page_free_toq() * in this case to actually free it once the hold count drops to 0. * - * This routine must be called at splvm(). + * The caller must hold vm_token if non-blocking operation is desired, + * but otherwise does not need to. */ void -vm_page_unhold(vm_page_t mem) +vm_page_unhold(vm_page_t m) { - --mem->hold_count; - KASSERT(mem->hold_count >= 0, ("vm_page_unhold: hold count < 0!!!")); - if (mem->hold_count == 0 && mem->queue == PQ_HOLD) { - vm_page_busy(mem); - vm_page_free_toq(mem); + lwkt_gettoken(&vm_token); + --m->hold_count; + KASSERT(m->hold_count >= 0, ("vm_page_unhold: hold count < 0!!!")); + if (m->hold_count == 0 && m->queue == PQ_HOLD) { + vm_page_busy(m); + vm_page_free_toq(m); } + lwkt_reltoken(&vm_token); } /* - * Inserts the given mem entry into the object and object list. + * Inserts the given vm_page into the object and object list. * * The pagetables are not updated but will presumably fault the page * in if necessary, or if a kernel page the caller will at some point @@ -349,12 +417,13 @@ vm_page_unhold(vm_page_t mem) * here so we *can't* do this anyway. * * This routine may not block. + * This routine must be called with the vm_token held. * This routine must be called with a critical section held. */ void vm_page_insert(vm_page_t m, vm_object_t object, vm_pindex_t pindex) { - ASSERT_IN_CRIT_SECTION(); + ASSERT_LWKT_TOKEN_HELD(&vm_token); if (m->object != NULL) panic("vm_page_insert: already inserted"); @@ -375,12 +444,23 @@ vm_page_insert(vm_page_t m, vm_object_t object, vm_pindex_t pindex) */ object->resident_page_count++; + /* + * Add the pv_list_cout of the page when its inserted in + * the object + */ + object->agg_pv_list_count = object->agg_pv_list_count + m->md.pv_list_count; + /* * Since we are inserting a new and possibly dirty page, * update the object's OBJ_WRITEABLE and OBJ_MIGHTBEDIRTY flags. */ if ((m->valid & m->dirty) || (m->flags & PG_WRITEABLE)) vm_object_set_writeable_dirty(object); + + /* + * Checks for a swap assignment and sets PG_SWAPPED if appropriate. + */ + swap_pager_page_inserted(m); } /* @@ -390,20 +470,20 @@ vm_page_insert(vm_page_t m, vm_object_t object, vm_pindex_t pindex) * The underlying pmap entry (if any) is NOT removed here. * This routine may not block. * - * The page must be BUSY and will remain BUSY on return. No spl needs to be - * held on call to this routine. + * The page must be BUSY and will remain BUSY on return. + * No other requirements. * - * note: FreeBSD side effect was to unbusy the page on return. We leave - * it busy. + * NOTE: FreeBSD side effect was to unbusy the page on return. We leave + * it busy. */ void vm_page_remove(vm_page_t m) { vm_object_t object; - crit_enter(); + lwkt_gettoken(&vm_token); if (m->object == NULL) { - crit_exit(); + lwkt_reltoken(&vm_token); return; } @@ -417,26 +497,18 @@ vm_page_remove(vm_page_t m) */ vm_page_rb_tree_RB_REMOVE(&object->rb_memq, m); object->resident_page_count--; + object->agg_pv_list_count = object->agg_pv_list_count - m->md.pv_list_count; object->generation++; m->object = NULL; - crit_exit(); + lwkt_reltoken(&vm_token); } /* * Locate and return the page at (object, pindex), or NULL if the * page could not be found. * - * This routine will operate properly without spl protection, but - * the returned page could be in flux if it is busy. Because an - * interrupt can race a caller's busy check (unbusying and freeing the - * page we return before the caller is able to check the busy bit), - * the caller should generally call this routine with a critical - * section held. - * - * Callers may call this routine without spl protection if they know - * 'for sure' that the page will not be ripped out from under them - * by an interrupt. + * The caller must hold vm_token. */ vm_page_t vm_page_lookup(vm_object_t object, vm_pindex_t pindex) @@ -446,9 +518,8 @@ vm_page_lookup(vm_object_t object, vm_pindex_t pindex) /* * Search the hash table for this object/offset pair */ - crit_enter(); + ASSERT_LWKT_TOKEN_HELD(&vm_token); m = vm_page_rb_tree_RB_LOOKUP(&object->rb_memq, pindex); - crit_exit(); KKASSERT(m == NULL || (m->object == object && m->pindex == pindex)); return(m); } @@ -479,14 +550,14 @@ vm_page_lookup(vm_object_t object, vm_pindex_t pindex) void vm_page_rename(vm_page_t m, vm_object_t new_object, vm_pindex_t new_pindex) { - crit_enter(); + lwkt_gettoken(&vm_token); vm_page_remove(m); vm_page_insert(m, new_object, new_pindex); if (m->queue - m->pc == PQ_CACHE) vm_page_deactivate(m); vm_page_dirty(m); vm_page_wakeup(m); - crit_exit(); + lwkt_reltoken(&vm_token); } /* @@ -494,7 +565,7 @@ vm_page_rename(vm_page_t m, vm_object_t new_object, vm_pindex_t new_pindex) * is being moved between queues or otherwise is to remain BUSYied by the * caller. * - * This routine must be called at splhigh(). + * The caller must hold vm_token * This routine may not block. */ void @@ -503,6 +574,7 @@ vm_page_unqueue_nowakeup(vm_page_t m) int queue = m->queue; struct vpgqueues *pq; + ASSERT_LWKT_TOKEN_HELD(&vm_token); if (queue != PQ_NONE) { pq = &vm_page_queues[queue]; m->queue = PQ_NONE; @@ -516,7 +588,7 @@ vm_page_unqueue_nowakeup(vm_page_t m) * vm_page_unqueue() - Remove a page from its queue, wakeup the pagedemon * if necessary. * - * This routine must be called at splhigh(). + * The caller must hold vm_token * This routine may not block. */ void @@ -525,6 +597,7 @@ vm_page_unqueue(vm_page_t m) int queue = m->queue; struct vpgqueues *pq; + ASSERT_LWKT_TOKEN_HELD(&vm_token); if (queue != PQ_NONE) { m->queue = PQ_NONE; pq = &vm_page_queues[queue]; @@ -546,7 +619,7 @@ vm_page_unqueue(vm_page_t m) * caches. We need this optimization because cpu caches tend to be * physical caches, while object spaces tend to be virtual. * - * This routine must be called at splvm(). + * Must be called with vm_token held. * This routine may not block. * * Note that this routine is carefully inlined. A non-inlined version @@ -593,6 +666,10 @@ _vm_page_list_find2(int basequeue, int index) return(m); } +/* + * Must be called with vm_token held if the caller desired non-blocking + * operation and a stable result. + */ vm_page_t vm_page_list_find(int basequeue, int index, boolean_t prefer_zero) { @@ -604,14 +681,15 @@ vm_page_list_find(int basequeue, int index, boolean_t prefer_zero) * might be found, but not applicable, they are deactivated. This * keeps us from using potentially busy cached pages. * - * This routine must be called with a critical section held. * This routine may not block. + * Must be called with vm_token held. */ vm_page_t vm_page_select_cache(vm_object_t object, vm_pindex_t pindex) { vm_page_t m; + ASSERT_LWKT_TOKEN_HELD(&vm_token); while (TRUE) { m = _vm_page_list_find( PQ_CACHE, @@ -658,6 +736,7 @@ vm_page_select_free(vm_object_t object, vm_pindex_t pindex, boolean_t prefer_zer * page_req classes: * * VM_ALLOC_NORMAL allow use of cache pages, nominal free drain + * VM_ALLOC_QUICK like normal but cannot use cache * VM_ALLOC_SYSTEM greater free drain * VM_ALLOC_INTERRUPT allow free list to be completely drained * VM_ALLOC_ZERO advisory request for pre-zero'd page @@ -675,11 +754,14 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int page_req) { vm_page_t m = NULL; + lwkt_gettoken(&vm_token); + KKASSERT(object != NULL); KASSERT(!vm_page_lookup(object, pindex), ("vm_page_alloc: page already allocated")); KKASSERT(page_req & - (VM_ALLOC_NORMAL|VM_ALLOC_INTERRUPT|VM_ALLOC_SYSTEM)); + (VM_ALLOC_NORMAL|VM_ALLOC_QUICK| + VM_ALLOC_INTERRUPT|VM_ALLOC_SYSTEM)); /* * Certain system threads (pageout daemon, buf_daemon's) are @@ -688,7 +770,6 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int page_req) if (curthread->td_flags & TDF_SYSTHREAD) page_req |= VM_ALLOC_SYSTEM; - crit_enter(); loop: if (vmstats.v_free_count > vmstats.v_free_reserved || ((page_req & VM_ALLOC_INTERRUPT) && vmstats.v_free_count > 0) || @@ -734,7 +815,7 @@ loop: /* * On failure return NULL */ - crit_exit(); + lwkt_reltoken(&vm_token); #if defined(DIAGNOSTIC) if (vmstats.v_cache_count > 0) kprintf("vm_page_alloc(NORMAL): missing pages on cache queue: %d\n", vmstats.v_cache_count); @@ -746,7 +827,7 @@ loop: /* * No pages available, wakeup the pageout daemon and give up. */ - crit_exit(); + lwkt_reltoken(&vm_token); vm_pageout_deficit++; pagedaemon_wakeup(); return (NULL); @@ -782,7 +863,7 @@ loop: m->valid = 0; /* - * vm_page_insert() is safe prior to the crit_exit(). Note also that + * vm_page_insert() is safe while holding vm_token. Note also that * inserting a page here does not insert it into the pmap (which * could cause us to block allocating memory). We cannot block * anywhere. @@ -795,7 +876,7 @@ loop: */ pagedaemon_wakeup(); - crit_exit(); + lwkt_reltoken(&vm_token); /* * A PG_BUSY page is returned. @@ -803,43 +884,93 @@ loop: return (m); } +/* + * Wait for sufficient free memory for nominal heavy memory use kernel + * operations. + */ +void +vm_wait_nominal(void) +{ + while (vm_page_count_min(0)) + vm_wait(0); +} + +/* + * Test if vm_wait_nominal() would block. + */ +int +vm_test_nominal(void) +{ + if (vm_page_count_min(0)) + return(1); + return(0); +} + /* * Block until free pages are available for allocation, called in various * places before memory allocations. + * + * The caller may loop if vm_page_count_min() == FALSE so we cannot be + * more generous then that. */ void vm_wait(int timo) { - crit_enter(); + /* + * never wait forever + */ + if (timo == 0) + timo = hz; + lwkt_gettoken(&vm_token); + if (curthread == pagethread) { - vm_pageout_pages_needed = 1; - tsleep(&vm_pageout_pages_needed, 0, "VMWait", timo); + /* + * The pageout daemon itself needs pages, this is bad. + */ + if (vm_page_count_min(0)) { + vm_pageout_pages_needed = 1; + tsleep(&vm_pageout_pages_needed, 0, "VMWait", timo); + } } else { - if (vm_pages_needed == 0) { - vm_pages_needed = 1; - wakeup(&vm_pages_needed); + /* + * Wakeup the pageout daemon if necessary and wait. + */ + if (vm_page_count_target()) { + if (vm_pages_needed == 0) { + vm_pages_needed = 1; + wakeup(&vm_pages_needed); + } + ++vm_pages_waiting; /* SMP race ok */ + tsleep(&vmstats.v_free_count, 0, "vmwait", timo); } - tsleep(&vmstats.v_free_count, 0, "vmwait", timo); } - crit_exit(); + lwkt_reltoken(&vm_token); } /* * Block until free pages are available for allocation * - * Called only in vm_fault so that processes page faulting can be + * Called only from vm_fault so that processes page faulting can be * easily tracked. */ void vm_waitpfault(void) { - crit_enter(); - if (vm_pages_needed == 0) { - vm_pages_needed = 1; - wakeup(&vm_pages_needed); + /* + * Wakeup the pageout daemon if necessary and wait. + */ + if (vm_page_count_target()) { + lwkt_gettoken(&vm_token); + if (vm_page_count_target()) { + if (vm_pages_needed == 0) { + vm_pages_needed = 1; + wakeup(&vm_pages_needed); + } + ++vm_pages_waiting; /* SMP race ok */ + tsleep(&vmstats.v_free_count, 0, "pfault", hz); + } + lwkt_reltoken(&vm_token); } - tsleep(&vmstats.v_free_count, 0, "pfault", 0); - crit_exit(); } /* @@ -852,7 +983,7 @@ vm_waitpfault(void) void vm_page_activate(vm_page_t m) { - crit_enter(); + lwkt_gettoken(&vm_token); if (m->queue != PQ_ACTIVE) { if ((m->queue - m->pc) == PQ_CACHE) mycpu->gd_cnt.v_reactivated++; @@ -872,7 +1003,7 @@ vm_page_activate(vm_page_t m) if (m->act_count < ACT_INIT) m->act_count = ACT_INIT; } - crit_exit(); + lwkt_reltoken(&vm_token); } /* @@ -887,8 +1018,8 @@ static __inline void vm_page_free_wakeup(void) { /* - * if pageout daemon needs pages, then tell it that there are - * some free. + * If the pageout daemon itself needs pages, then tell it that + * there are some free. */ if (vm_pageout_pages_needed && vmstats.v_cache_count + vmstats.v_free_count >= @@ -899,13 +1030,32 @@ vm_page_free_wakeup(void) } /* - * wakeup processes that are waiting on memory if we hit a - * high water mark. And wakeup scheduler process if we have - * lots of memory. this process will swapin processes. + * Wakeup processes that are waiting on memory. + * + * NOTE: vm_paging_target() is the pageout daemon's target, while + * vm_page_count_target() is somewhere inbetween. We want + * to wake processes up prior to the pageout daemon reaching + * its target to provide some hysteresis. */ - if (vm_pages_needed && !vm_page_count_min(0)) { - vm_pages_needed = 0; - wakeup(&vmstats.v_free_count); + if (vm_pages_waiting) { + if (!vm_page_count_target()) { + /* + * Plenty of pages are free, wakeup everyone. + */ + vm_pages_waiting = 0; + wakeup(&vmstats.v_free_count); + ++mycpu->gd_cnt.v_ppwakeups; + } else if (!vm_page_count_min(0)) { + /* + * Some pages are free, wakeup someone. + */ + int wcount = vm_pages_waiting; + if (wcount > 0) + --wcount; + vm_pages_waiting = wcount; + wakeup_one(&vmstats.v_free_count); + ++mycpu->gd_cnt.v_ppwakeups; + } } } @@ -926,7 +1076,7 @@ vm_page_free_toq(vm_page_t m) { struct vpgqueues *pq; - crit_enter(); + lwkt_gettoken(&vm_token); mycpu->gd_cnt.v_tfree++; KKASSERT((m->flags & PG_MAPPED) == 0); @@ -957,7 +1107,7 @@ vm_page_free_toq(vm_page_t m) */ if ((m->flags & PG_FICTITIOUS) != 0) { vm_page_wakeup(m); - crit_exit(); + lwkt_reltoken(&vm_token); return; } @@ -1002,7 +1152,38 @@ vm_page_free_toq(vm_page_t m) } vm_page_wakeup(m); vm_page_free_wakeup(); - crit_exit(); + lwkt_reltoken(&vm_token); +} + +/* + * vm_page_free_fromq_fast() + * + * Remove a non-zero page from one of the free queues; the page is removed for + * zeroing, so do not issue a wakeup. + * + * MPUNSAFE + */ +vm_page_t +vm_page_free_fromq_fast(void) +{ + static int qi; + vm_page_t m; + int i; + + lwkt_gettoken(&vm_token); + for (i = 0; i < PQ_L2_SIZE; ++i) { + m = vm_page_list_find(PQ_FREE, qi, FALSE); + qi = (qi + PQ_PRIME2) & PQ_L2_MASK; + if (m && (m->flags & PG_ZERO) == 0) { + KKASSERT(m->busy == 0 && (m->flags & PG_BUSY) == 0); + vm_page_unqueue_nowakeup(m); + vm_page_busy(m); + break; + } + m = NULL; + } + lwkt_reltoken(&vm_token); + return (m); } /* @@ -1024,11 +1205,12 @@ vm_page_free_toq(vm_page_t m) * mappings. * * Must be called with a critical section held. + * Must be called with vm_token held. */ void vm_page_unmanage(vm_page_t m) { - ASSERT_IN_CRIT_SECTION(); + ASSERT_LWKT_TOKEN_HELD(&vm_token); if ((m->flags & PG_UNMANAGED) == 0) { if (m->wire_count == 0) vm_page_unqueue(m); @@ -1052,7 +1234,7 @@ vm_page_wire(vm_page_t m) * it is already off the queues). Don't do anything with fictitious * pages because they are always wired. */ - crit_enter(); + lwkt_gettoken(&vm_token); if ((m->flags & PG_FICTITIOUS) == 0) { if (m->wire_count == 0) { if ((m->flags & PG_UNMANAGED) == 0) @@ -1063,7 +1245,7 @@ vm_page_wire(vm_page_t m) KASSERT(m->wire_count != 0, ("vm_page_wire: wire_count overflow m=%p", m)); } - crit_exit(); + lwkt_reltoken(&vm_token); } /* @@ -1094,7 +1276,7 @@ vm_page_wire(vm_page_t m) void vm_page_unwire(vm_page_t m, int activate) { - crit_enter(); + lwkt_gettoken(&vm_token); if (m->flags & PG_FICTITIOUS) { /* do nothing */ } else if (m->wire_count <= 0) { @@ -1117,10 +1299,11 @@ vm_page_unwire(vm_page_t m, int activate) m->queue = PQ_INACTIVE; vm_page_queues[PQ_INACTIVE].lcnt++; vmstats.v_inactive_count++; + ++vm_swapcache_inactive_heuristic; } } } - crit_exit(); + lwkt_reltoken(&vm_token); } @@ -1133,6 +1316,7 @@ vm_page_unwire(vm_page_t m, int activate) * except without unmapping it from the process address space. * * This routine may not block. + * The caller must hold vm_token. */ static __inline void _vm_page_deactivate(vm_page_t m, int athead) @@ -1148,70 +1332,82 @@ _vm_page_deactivate(vm_page_t m, int athead) mycpu->gd_cnt.v_reactivated++; vm_page_flag_clear(m, PG_WINATCFLS); vm_page_unqueue(m); - if (athead) - TAILQ_INSERT_HEAD(&vm_page_queues[PQ_INACTIVE].pl, m, pageq); - else - TAILQ_INSERT_TAIL(&vm_page_queues[PQ_INACTIVE].pl, m, pageq); + if (athead) { + TAILQ_INSERT_HEAD(&vm_page_queues[PQ_INACTIVE].pl, + m, pageq); + } else { + TAILQ_INSERT_TAIL(&vm_page_queues[PQ_INACTIVE].pl, + m, pageq); + ++vm_swapcache_inactive_heuristic; + } m->queue = PQ_INACTIVE; vm_page_queues[PQ_INACTIVE].lcnt++; vmstats.v_inactive_count++; } } +/* + * Attempt to deactivate a page. + * + * No requirements. + */ void vm_page_deactivate(vm_page_t m) { - crit_enter(); - _vm_page_deactivate(m, 0); - crit_exit(); + lwkt_gettoken(&vm_token); + _vm_page_deactivate(m, 0); + lwkt_reltoken(&vm_token); } /* - * vm_page_try_to_cache: - * + * Attempt to move a page to PQ_CACHE. * Returns 0 on failure, 1 on success + * + * No requirements. */ int vm_page_try_to_cache(vm_page_t m) { - crit_enter(); + lwkt_gettoken(&vm_token); if (m->dirty || m->hold_count || m->busy || m->wire_count || (m->flags & (PG_BUSY|PG_UNMANAGED))) { - crit_exit(); + lwkt_reltoken(&vm_token); return(0); } vm_page_test_dirty(m); if (m->dirty) { - crit_exit(); + lwkt_reltoken(&vm_token); return(0); } vm_page_cache(m); - crit_exit(); + lwkt_reltoken(&vm_token); return(1); } /* * Attempt to free the page. If we cannot free it, we do nothing. * 1 is returned on success, 0 on failure. + * + * No requirements. */ int vm_page_try_to_free(vm_page_t m) { - crit_enter(); + lwkt_gettoken(&vm_token); if (m->dirty || m->hold_count || m->busy || m->wire_count || (m->flags & (PG_BUSY|PG_UNMANAGED))) { - crit_exit(); + lwkt_reltoken(&vm_token); return(0); } vm_page_test_dirty(m); if (m->dirty) { - crit_exit(); + lwkt_reltoken(&vm_token); return(0); } vm_page_busy(m); vm_page_protect(m, VM_PROT_NONE); vm_page_free(m); - crit_exit(); + lwkt_reltoken(&vm_token); return(1); } @@ -1220,12 +1416,13 @@ vm_page_try_to_free(vm_page_t m) * * Put the specified page onto the page cache queue (if appropriate). * + * The caller must hold vm_token. * This routine may not block. */ void vm_page_cache(vm_page_t m) { - ASSERT_IN_CRIT_SECTION(); + ASSERT_LWKT_TOKEN_HELD(&vm_token); if ((m->flags & (PG_BUSY|PG_UNMANAGED)) || m->busy || m->wire_count || m->hold_count) { @@ -1295,6 +1492,8 @@ vm_page_cache(vm_page_t m) * system to balance the queues, potentially recovering other unrelated * space from active. The idea is to not force this to happen too * often. + * + * No requirements. */ void vm_page_dontneed(vm_page_t m) @@ -1308,14 +1507,14 @@ vm_page_dontneed(vm_page_t m) /* * occassionally leave the page alone */ - crit_enter(); + lwkt_gettoken(&vm_token); if ((dnw & 0x01F0) == 0 || m->queue == PQ_INACTIVE || m->queue - m->pc == PQ_CACHE ) { if (m->act_count >= ACT_INIT) --m->act_count; - crit_exit(); + lwkt_reltoken(&vm_token); return; } @@ -1336,7 +1535,7 @@ vm_page_dontneed(vm_page_t m) head = 1; } _vm_page_deactivate(m, head); - crit_exit(); + lwkt_reltoken(&vm_token); } /* @@ -1355,6 +1554,8 @@ vm_page_dontneed(vm_page_t m) * This routine may be called from mainline code without spl protection and * be guarenteed a busied page associated with the object at the specified * index. + * + * No requirements. */ vm_page_t vm_page_grab(vm_object_t object, vm_pindex_t pindex, int allocflags) @@ -1364,7 +1565,7 @@ vm_page_grab(vm_object_t object, vm_pindex_t pindex, int allocflags) KKASSERT(allocflags & (VM_ALLOC_NORMAL|VM_ALLOC_INTERRUPT|VM_ALLOC_SYSTEM)); - crit_enter(); + lwkt_gettoken(&vm_token); retrylookup: if ((m = vm_page_lookup(object, pindex)) != NULL) { if (m->busy || (m->flags & PG_BUSY)) { @@ -1393,7 +1594,7 @@ retrylookup: goto retrylookup; } done: - crit_exit(); + lwkt_reltoken(&vm_token); return(m); } @@ -1402,8 +1603,11 @@ done: * a page. May not block. * * Inputs are required to range within a page. + * + * No requirements. + * Non blocking. */ -__inline int +int vm_page_bits(int base, int size) { int first_bit; @@ -1492,6 +1696,9 @@ _vm_page_zero_valid(vm_page_t m, int base, int size) * We set valid bits inclusive of any overlap, but we can only * clear dirty bits for DEV_BSIZE chunks that are fully within * the range. + * + * Page must be busied? + * No other requirements. */ void vm_page_set_valid(vm_page_t m, int base, int size) @@ -1507,6 +1714,12 @@ vm_page_set_valid(vm_page_t m, int base, int size) * NOTE: This function does not clear the pmap modified bit. * Also note that e.g. NFS may use a byte-granular base * and size. + * + * WARNING: Page must be busied? But vfs_clean_one_page() will call + * this without necessarily busying the page (via bdwrite()). + * So for now vm_token must also be held. + * + * No other requirements. */ void vm_page_set_validclean(vm_page_t m, int base, int size) @@ -1523,12 +1736,36 @@ vm_page_set_validclean(vm_page_t m, int base, int size) } } +/* + * Set valid & dirty. Used by buwrite() + * + * WARNING: Page must be busied? But vfs_dirty_one_page() will + * call this function in buwrite() so for now vm_token must + * be held. + * + * No other requirements. + */ +void +vm_page_set_validdirty(vm_page_t m, int base, int size) +{ + int pagebits; + + pagebits = vm_page_bits(base, size); + m->valid |= pagebits; + m->dirty |= pagebits; + if (m->object) + vm_object_set_writeable_dirty(m->object); +} + /* * Clear dirty bits. * * NOTE: This function does not clear the pmap modified bit. * Also note that e.g. NFS may use a byte-granular base * and size. + * + * Page must be busied? + * No other requirements. */ void vm_page_clear_dirty(vm_page_t m, int base, int size) @@ -1545,6 +1782,9 @@ vm_page_clear_dirty(vm_page_t m, int base, int size) * * Also make sure the related object and vnode reflect the fact that the * object may now contain a dirty page. + * + * Page must be busied? + * No other requirements. */ void vm_page_dirty(vm_page_t m) @@ -1565,7 +1805,9 @@ vm_page_dirty(vm_page_t m) * Invalidates DEV_BSIZE'd chunks within a page. Both the * valid and dirty bits for the effected areas are cleared. * - * May not block. + * Page must be busied? + * Does not block. + * No other requirements. */ void vm_page_set_invalid(vm_page_t m, int base, int size) @@ -1586,6 +1828,9 @@ vm_page_set_invalid(vm_page_t m, int base, int size) * * Pages are most often semi-valid when the end of a file is mapped * into memory and the file's size is not page aligned. + * + * Page must be busied? + * No other requirements. */ void vm_page_zero_invalid(vm_page_t m, boolean_t setvalid) @@ -1628,7 +1873,8 @@ vm_page_zero_invalid(vm_page_t m, boolean_t setvalid) * will return FALSE in the degenerate case where the page is entirely * invalid, and TRUE otherwise. * - * May not block. + * Does not block. + * No other requirements. */ int vm_page_is_valid(vm_page_t m, int base, int size) @@ -1643,6 +1889,9 @@ vm_page_is_valid(vm_page_t m, int base, int size) /* * update dirty bits from pmap/mmu. May not block. + * + * Caller must hold vm_token if non-blocking operation desired. + * No other requirements. */ void vm_page_test_dirty(vm_page_t m) @@ -1652,24 +1901,86 @@ vm_page_test_dirty(vm_page_t m) } } +/* + * Register an action, associating it with its vm_page + */ +void +vm_page_register_action(vm_page_action_t action, vm_page_event_t event) +{ + struct vm_page_action_list *list; + int hv; + + hv = (int)((intptr_t)action->m >> 8) & VMACTION_HMASK; + list = &action_list[hv]; + + lwkt_gettoken(&vm_token); + vm_page_flag_set(action->m, PG_ACTIONLIST); + action->event = event; + LIST_INSERT_HEAD(list, action, entry); + lwkt_reltoken(&vm_token); +} + +/* + * Unregister an action, disassociating it from its related vm_page + */ +void +vm_page_unregister_action(vm_page_action_t action) +{ + struct vm_page_action_list *list; + int hv; + + lwkt_gettoken(&vm_token); + if (action->event != VMEVENT_NONE) { + action->event = VMEVENT_NONE; + LIST_REMOVE(action, entry); + + hv = (int)((intptr_t)action->m >> 8) & VMACTION_HMASK; + list = &action_list[hv]; + if (LIST_EMPTY(list)) + vm_page_flag_clear(action->m, PG_ACTIONLIST); + } + lwkt_reltoken(&vm_token); +} + /* * Issue an event on a VM page. Corresponding action structures are * removed from the page's list and called. + * + * If the vm_page has no more pending action events we clear its + * PG_ACTIONLIST flag. */ void vm_page_event_internal(vm_page_t m, vm_page_event_t event) { - struct vm_page_action *scan, *next; - - LIST_FOREACH_MUTABLE(scan, &m->action_list, entry, next) { - if (scan->event == event) { - scan->event = VMEVENT_NONE; - LIST_REMOVE(scan, entry); - scan->func(m, scan); + struct vm_page_action_list *list; + struct vm_page_action *scan; + struct vm_page_action *next; + int hv; + int all; + + hv = (int)((intptr_t)m >> 8) & VMACTION_HMASK; + list = &action_list[hv]; + all = 1; + + lwkt_gettoken(&vm_token); + LIST_FOREACH_MUTABLE(scan, list, entry, next) { + if (scan->m == m) { + if (scan->event == event) { + scan->event = VMEVENT_NONE; + LIST_REMOVE(scan, entry); + scan->func(m, scan); + /* XXX */ + } else { + all = 0; + } } } + if (all) + vm_page_flag_clear(m, PG_ACTIONLIST); + lwkt_reltoken(&vm_token); } + #include "opt_ddb.h" #ifdef DDB #include