From a491077eece340924fa7f34a248de64be5888c1f Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Sun, 19 Dec 2010 21:52:36 -0800 Subject: [PATCH] kernel - vm_page BUSY handling, change vm_page_cache() API, maybe fix seg-fault * All vm_page_deactivate() calls now operate with the caller holding the page PG_BUSY or the page known not to be busy. Reorder several cases where a vm_page is unbusied prior to calling deactivate. * vm_page_cache() now expected the vm_page to be PG_BUSY and will cache the page and clear the bit. * Fix a race in vm_pageout_page_free() which calls vm_object_reference() with an unbusied vm_page, then proceeds to busy and free the page. The problem is that vm_object_reference() can block on vmobj_token. This may fix the x86-64 seg-fault issue. Or it may not (throws up hands). * Remove incorrect KKASSERT which was causing bogus panics. --- sys/kern/kern_slaballoc.c | 2 +- sys/kern/uipc_syscalls.c | 4 +++- sys/kern/vfs_bio.c | 2 +- sys/platform/pc32/i386/pmap.c | 2 +- sys/platform/pc64/x86_64/pmap.c | 2 +- sys/platform/vkernel/platform/pmap.c | 2 +- sys/platform/vkernel64/platform/pmap.c | 2 +- sys/vm/swap_pager.c | 2 +- sys/vm/vm_contig.c | 5 ++++- sys/vm/vm_fault.c | 11 ++++++++--- sys/vm/vm_object.c | 2 ++ sys/vm/vm_page.c | 21 +++++++++++++++------ sys/vm/vm_pageout.c | 15 +++++++++------ 13 files changed, 48 insertions(+), 24 deletions(-) diff --git a/sys/kern/kern_slaballoc.c b/sys/kern/kern_slaballoc.c index e690d3c136..8ba0ef3a9b 100644 --- a/sys/kern/kern_slaballoc.c +++ b/sys/kern/kern_slaballoc.c @@ -1460,13 +1460,13 @@ kmem_slab_alloc(vm_size_t size, vm_offset_t align, int flags) m->valid = VM_PAGE_BITS_ALL; /* page should already be busy */ vm_page_wire(m); - vm_page_wakeup(m); pmap_enter(&kernel_pmap, addr + i, m, VM_PROT_ALL, 1); if ((m->flags & PG_ZERO) == 0 && (flags & M_ZERO)) bzero((char *)addr + i, PAGE_SIZE); vm_page_flag_clear(m, PG_ZERO); KKASSERT(m->flags & (PG_WRITEABLE | PG_MAPPED)); vm_page_flag_set(m, PG_REFERENCED); + vm_page_wakeup(m); } vm_map_unlock(&kernel_map); vm_map_entry_release(count); diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c index bed592499a..c2955a2db2 100644 --- a/sys/kern/uipc_syscalls.c +++ b/sys/kern/uipc_syscalls.c @@ -1652,12 +1652,14 @@ retry_lookup: lwkt_reltoken(&vm_token); goto retry_lookup; } + vm_page_wire(pg); vm_page_wakeup(pg); } else if (vm_page_sleep_busy(pg, TRUE, "sfpbsy")) { lwkt_reltoken(&vm_token); goto retry_lookup; + } else { + vm_page_wire(pg); } - vm_page_wire(pg); lwkt_reltoken(&vm_token); /* diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index 0b514f354a..55b520740f 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -3363,8 +3363,8 @@ allocbuf(struct buf *bp, int size) m = bio_page_alloc(obj, pi, desiredpages - bp->b_xio.xio_npages); if (m) { vm_page_wire(m); - vm_page_wakeup(m); vm_page_flag_clear(m, PG_ZERO); + vm_page_wakeup(m); bp->b_flags &= ~B_CACHE; bp->b_xio.xio_pages[bp->b_xio.xio_npages] = m; ++bp->b_xio.xio_npages; diff --git a/sys/platform/pc32/i386/pmap.c b/sys/platform/pc32/i386/pmap.c index ac49d09f45..8a8667cf1d 100644 --- a/sys/platform/pc32/i386/pmap.c +++ b/sys/platform/pc32/i386/pmap.c @@ -2591,9 +2591,9 @@ pmap_object_init_pt_callback(vm_page_t p, void *data) } if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && (p->busy == 0) && (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) { + vm_page_busy(p); if ((p->queue - p->pc) == PQ_CACHE) vm_page_deactivate(p); - vm_page_busy(p); rel_index = p->pindex - info->start_pindex; pmap_enter_quick(info->pmap, info->addr + i386_ptob(rel_index), p); diff --git a/sys/platform/pc64/x86_64/pmap.c b/sys/platform/pc64/x86_64/pmap.c index f58b1cbb5b..2f9866cc22 100644 --- a/sys/platform/pc64/x86_64/pmap.c +++ b/sys/platform/pc64/x86_64/pmap.c @@ -2941,9 +2941,9 @@ pmap_object_init_pt_callback(vm_page_t p, void *data) } if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && (p->busy == 0) && (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) { + vm_page_busy(p); if ((p->queue - p->pc) == PQ_CACHE) vm_page_deactivate(p); - vm_page_busy(p); rel_index = p->pindex - info->start_pindex; pmap_enter_quick(info->pmap, info->addr + x86_64_ptob(rel_index), p); diff --git a/sys/platform/vkernel/platform/pmap.c b/sys/platform/vkernel/platform/pmap.c index 1447805cc4..edf5244236 100644 --- a/sys/platform/vkernel/platform/pmap.c +++ b/sys/platform/vkernel/platform/pmap.c @@ -2123,9 +2123,9 @@ pmap_object_init_pt_callback(vm_page_t p, void *data) } if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && (p->busy == 0) && (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) { + vm_page_busy(p); if ((p->queue - p->pc) == PQ_CACHE) vm_page_deactivate(p); - vm_page_busy(p); rel_index = p->pindex - info->start_pindex; pmap_enter_quick(info->pmap, info->addr + i386_ptob(rel_index), p); diff --git a/sys/platform/vkernel64/platform/pmap.c b/sys/platform/vkernel64/platform/pmap.c index 44ba742641..fe65c3a88e 100644 --- a/sys/platform/vkernel64/platform/pmap.c +++ b/sys/platform/vkernel64/platform/pmap.c @@ -2555,9 +2555,9 @@ pmap_object_init_pt_callback(vm_page_t p, void *data) } if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && (p->busy == 0) && (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) { + vm_page_busy(p); if ((p->queue - p->pc) == PQ_CACHE) vm_page_deactivate(p); - vm_page_busy(p); rel_index = p->pindex - info->start_pindex; pmap_enter_quick(info->pmap, info->addr + x86_64_ptob(rel_index), p); diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c index dceacb0871..93ad394ef9 100644 --- a/sys/vm/swap_pager.c +++ b/sys/vm/swap_pager.c @@ -1864,13 +1864,13 @@ swp_pager_async_iodone(struct bio *bio) vm_page_undirty(m); vm_page_flag_clear(m, PG_SWAPINPROG); vm_page_flag_set(m, PG_SWAPPED); - vm_page_io_finish(m); if (vm_page_count_severe()) vm_page_deactivate(m); #if 0 if (!vm_page_count_severe() || !vm_page_try_to_cache(m)) vm_page_protect(m, VM_PROT_READ); #endif + vm_page_io_finish(m); } } diff --git a/sys/vm/vm_contig.c b/sys/vm/vm_contig.c index a65024d2d5..60c266aa9d 100644 --- a/sys/vm/vm_contig.c +++ b/sys/vm/vm_contig.c @@ -175,8 +175,11 @@ vm_contig_pg_clean(int queue) return (TRUE); } } - if ((m->dirty == 0) && (m->busy == 0) && (m->hold_count == 0)) + KKASSERT(m->busy == 0); + if (m->dirty == 0 && m->hold_count == 0) { + vm_page_busy(m); vm_page_cache(m); + } } return (FALSE); } diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c index 15ac2c1199..b62fcc39f7 100644 --- a/sys/vm/vm_fault.c +++ b/sys/vm/vm_fault.c @@ -438,7 +438,7 @@ RetryFault: lwkt_gettoken(&vm_token); unlock_things(&fs); - KKASSERT(fs.m->queue == PQ_NONE); + /*KKASSERT(fs.m->queue == PQ_NONE); page-in op may deactivate page */ KKASSERT(fs.m->flags & PG_BUSY); /* @@ -475,6 +475,8 @@ RetryFault: */ vm_page_wakeup(fs.m); vm_object_deallocate(fs.first_object); + /*fs.m = NULL; */ + /*fs.first_object = NULL; */ /*lwkt_reltoken(&vm_token);*/ return (KERN_SUCCESS); @@ -689,6 +691,7 @@ RetryFault: */ vm_page_wakeup(fs.m); vm_object_deallocate(fs.first_object); + /*fs.first_object = NULL; */ lwkt_reltoken(&vm_token); *errorp = 0; @@ -838,6 +841,7 @@ RetryFault: */ vm_page_wakeup(fs.m); vm_object_deallocate(fs.first_object); + /*fs.first_object = NULL; */ lwkt_reltoken(&vm_token); *errorp = 0; @@ -934,6 +938,7 @@ vm_fault_vpagetable(struct faultstate *fs, vm_pindex_t *pindex, vm_page_flag_set(fs->m, PG_REFERENCED); vm_page_activate(fs->m); vm_page_wakeup(fs->m); + fs->m = NULL; cleanup_successful_fault(fs); } /* @@ -1202,10 +1207,10 @@ readrest: mt->wire_count) { goto skip; } + vm_page_busy(mt); if (mt->dirty == 0) vm_page_test_dirty(mt); if (mt->dirty) { - vm_page_busy(mt); vm_page_protect(mt, VM_PROT_NONE); vm_page_deactivate(mt); @@ -2119,10 +2124,10 @@ vm_prefault(pmap_t pmap, vm_offset_t addra, vm_map_entry_t entry, int prot) (m->busy == 0) && (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) { + vm_page_busy(m); if ((m->queue - m->pc) == PQ_CACHE) { vm_page_deactivate(m); } - vm_page_busy(m); if (pprot & VM_PROT_WRITE) vm_set_nosync(m, entry); pmap_enter(pmap, addr, m, pprot, 0); diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c index 15a2011674..0ee4f12895 100644 --- a/sys/vm/vm_object.c +++ b/sys/vm/vm_object.c @@ -1038,6 +1038,7 @@ shadowlookup: crit_exit(); goto relookup; } + vm_page_busy(m); crit_exit(); /* @@ -1072,6 +1073,7 @@ shadowlookup: if (tobject->type == OBJT_SWAP) swap_pager_freespace(tobject, tpindex, 1); } + vm_page_wakeup(m); } lwkt_reltoken(&vm_token); } diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index 37af8cc72e..c72c5cea6b 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -698,7 +698,11 @@ vm_page_select_cache(vm_object_t object, vm_pindex_t pindex) ); if (m && ((m->flags & (PG_BUSY|PG_UNMANAGED)) || m->busy || m->hold_count || m->wire_count)) { + /* cache page found busy */ vm_page_deactivate(m); +#ifdef INVARIANTS + kprintf("Warning: busy page %p found in cache\n", m); +#endif continue; } return m; @@ -1374,6 +1378,7 @@ vm_page_try_to_cache(vm_page_t m) lwkt_reltoken(&vm_token); return(0); } + vm_page_busy(m); vm_page_test_dirty(m); if (m->dirty) { lwkt_reltoken(&vm_token); @@ -1418,15 +1423,18 @@ vm_page_try_to_free(vm_page_t m) * * The caller must hold vm_token. * This routine may not block. + * The page must be busy, and this routine will release the busy and + * possibly even free the page. */ void vm_page_cache(vm_page_t m) { ASSERT_LWKT_TOKEN_HELD(&vm_token); - if ((m->flags & (PG_BUSY|PG_UNMANAGED)) || m->busy || - m->wire_count || m->hold_count) { + if ((m->flags & PG_UNMANAGED) || m->busy || + m->wire_count || m->hold_count) { kprintf("vm_page_cache: attempting to cache busy/held page\n"); + vm_page_wakeup(m); return; } @@ -1435,6 +1443,7 @@ vm_page_cache(vm_page_t m) */ if ((m->queue - m->pc) == PQ_CACHE) { KKASSERT((m->flags & PG_MAPPED) == 0); + vm_page_wakeup(m); return; } @@ -1454,20 +1463,20 @@ vm_page_cache(vm_page_t m) * have blocked (especially w/ VM_PROT_NONE), so recheck * everything. */ - vm_page_busy(m); vm_page_protect(m, VM_PROT_NONE); - vm_page_wakeup(m); - if ((m->flags & (PG_BUSY|PG_UNMANAGED|PG_MAPPED)) || m->busy || + if ((m->flags & (PG_UNMANAGED|PG_MAPPED)) || m->busy || m->wire_count || m->hold_count) { - /* do nothing */ + vm_page_wakeup(m); } else if (m->dirty) { vm_page_deactivate(m); + vm_page_wakeup(m); } else { vm_page_unqueue_nowakeup(m); m->queue = PQ_CACHE + m->pc; vm_page_queues[m->queue].lcnt++; TAILQ_INSERT_TAIL(&vm_page_queues[m->queue].pl, m, pageq); vmstats.v_cache_count++; + vm_page_wakeup(m); vm_page_free_wakeup(); } } diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c index b1b0b00b88..3dfd70895c 100644 --- a/sys/vm/vm_pageout.c +++ b/sys/vm/vm_pageout.c @@ -466,14 +466,14 @@ vm_pageout_flush(vm_page_t *mc, int count, int flags) * might still be read-heavy. */ if (pageout_status[i] != VM_PAGER_PEND) { - vm_object_pip_wakeup(object); - vm_page_io_finish(mt); if (vm_page_count_severe()) vm_page_deactivate(mt); #if 0 if (!vm_page_count_severe() || !vm_page_try_to_cache(mt)) vm_page_protect(mt, VM_PROT_READ); #endif + vm_page_io_finish(mt); + vm_object_pip_wakeup(object); } } return numpagedout; @@ -568,8 +568,8 @@ vm_pageout_object_deactivate_pages_callback(vm_page_t p, void *data) if (!info->limit && (vm_pageout_algorithm || (p->act_count == 0))) { vm_page_busy(p); vm_page_protect(p, VM_PROT_NONE); - vm_page_wakeup(p); vm_page_deactivate(p); + vm_page_wakeup(p); } else { TAILQ_REMOVE(&vm_page_queues[PQ_ACTIVE].pl, p, pageq); TAILQ_INSERT_TAIL(&vm_page_queues[PQ_ACTIVE].pl, p, pageq); @@ -675,6 +675,8 @@ vm_pageout_map_deactivate_pages(vm_map_t map, vm_pindex_t desired) * be trivially freed. * * The caller must hold vm_token. + * + * WARNING: vm_object_reference() can block. */ static void vm_pageout_page_free(vm_page_t m) @@ -682,9 +684,9 @@ vm_pageout_page_free(vm_page_t m) vm_object_t object = m->object; int type = object->type; + vm_page_busy(m); if (type == OBJT_SWAP || type == OBJT_DEFAULT) vm_object_reference(object); - vm_page_busy(m); vm_page_protect(m, VM_PROT_NONE); vm_page_free(m); if (type == OBJT_SWAP || type == OBJT_DEFAULT) @@ -892,6 +894,7 @@ rescan0: * Clean pages can be placed onto the cache queue. * This effectively frees them. */ + vm_page_busy(m); vm_page_cache(m); --inactive_shortage; } else if ((m->flags & PG_WINATCFLS) == 0 && pass == 0) { @@ -1167,13 +1170,13 @@ rescan0: ++recycle_count; vm_page_busy(m); vm_page_protect(m, VM_PROT_NONE); - vm_page_wakeup(m); if (m->dirty == 0 && inactive_shortage > 0) { --inactive_shortage; vm_page_cache(m); } else { vm_page_deactivate(m); + vm_page_wakeup(m); } } else { vm_page_deactivate(m); @@ -1450,8 +1453,8 @@ vm_pageout_page_stats(void) */ vm_page_busy(m); vm_page_protect(m, VM_PROT_NONE); - vm_page_wakeup(m); vm_page_deactivate(m); + vm_page_wakeup(m); } else { m->act_count -= min(m->act_count, ACT_DECLINE); TAILQ_REMOVE(&vm_page_queues[PQ_ACTIVE].pl, m, pageq); -- 2.41.0