From: Matthew Dillon Date: Wed, 18 Oct 2017 06:25:24 +0000 (-0700) Subject: kernel - refactor vm_page busy X-Git-Tag: v5.3.0~1004 X-Git-Url: https://gitweb.dragonflybsd.org/dragonfly.git/commitdiff_plain/bc0aa189232f6ec6bb245aef2b91ed26a1a72459 kernel - refactor vm_page busy * Move PG_BUSY, PG_WANTED, PG_SBUSY, and PG_SWAPINPROG out of m->flags. * Add m->busy_count with PBUSY_LOCKED, PBUSY_WANTED, PBUSY_SWAPINPROG, and PBUSY_MASK (for the soft-busy count). * Add support for acquiring a soft-busy count without a hard-busy. This requires that there not already be a hard-busy. The purpose of this is to allow a vm_page to be 'locked' in a shared manner via the soft-busy for situations where we only intend to read from it. --- diff --git a/sys/dev/drm/drm_vm.c b/sys/dev/drm/drm_vm.c index 7550c54de0..d98cd2b261 100644 --- a/sys/dev/drm/drm_vm.c +++ b/sys/dev/drm/drm_vm.c @@ -189,7 +189,8 @@ vm_phys_fictitious_reg_range(vm_paddr_t start, vm_paddr_t end, for (i = 0; i < page_count; i++) { vm_page_initfake(&fp[i], start + PAGE_SIZE * i, memattr); - fp[i].flags &= ~(PG_BUSY | PG_UNMANAGED); + fp[i].flags &= ~PG_UNMANAGED; + atomic_clear_int(&fp[i].busy_count, PBUSY_LOCKED); } mtx_lock(&vm_phys_fictitious_reg_mtx); for (segind = 0; segind < VM_PHYS_FICTITIOUS_NSEGS; segind++) { diff --git a/sys/dev/drm/i915/i915_gem.c b/sys/dev/drm/i915/i915_gem.c index 356a8361d6..8a0c34c133 100644 --- a/sys/dev/drm/i915/i915_gem.c +++ b/sys/dev/drm/i915/i915_gem.c @@ -1814,7 +1814,7 @@ int i915_gem_fault(vm_object_t vm_obj, vm_ooffset_t offset, int prot, vm_page_t if (*mres != NULL) { m = *mres; *mres = NULL; - if ((m->flags & PG_BUSY) == 0) + if ((m->busy_count & PBUSY_LOCKED) == 0) kprintf("i915_gem_fault: Page was not busy\n"); else vm_page_remove(m); @@ -1897,7 +1897,7 @@ retry: * Try to busy the page, retry on failure (non-zero ret). */ if (vm_page_busy_try(m, false)) { - kprintf("i915_gem_fault: PG_BUSY\n"); + kprintf("i915_gem_fault: BUSY\n"); ret = -EINTR; goto unlock; } @@ -1923,7 +1923,7 @@ retry: * Try to busy the page. Fails on non-zero return. */ if (vm_page_busy_try(m, false)) { - kprintf("i915_gem_fault: PG_BUSY(2)\n"); + kprintf("i915_gem_fault: BUSY(2)\n"); ret = -EINTR; goto unpin; } diff --git a/sys/dev/drm/ttm/ttm_bo_vm.c b/sys/dev/drm/ttm/ttm_bo_vm.c index b0dcbef83b..685c09a9f1 100644 --- a/sys/dev/drm/ttm/ttm_bo_vm.c +++ b/sys/dev/drm/ttm/ttm_bo_vm.c @@ -230,7 +230,7 @@ reserve: } VM_OBJECT_LOCK(vm_obj); - if ((m->flags & PG_BUSY) != 0) { + if ((m->busy_count & PBUSY_LOCKED) != 0) { #if 0 vm_page_sleep(m, "ttmpbs"); #endif diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index c73fbe21c7..d807c63b0d 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -3148,7 +3148,7 @@ allocbuf(struct buf *bp, int size) bp->b_xio.xio_npages; /* - * Blocking on m->busy might lead to a + * Blocking on m->busy_count might lead to a * deadlock: * * vm_fault->getpages->cluster_read->allocbuf @@ -3687,7 +3687,7 @@ bpdone(struct buf *bp, int elseit) * up. if you see this, you have not set the page * busy flag correctly!!! */ - if (m->busy == 0) { + if ((m->busy_count & PBUSY_MASK) == 0) { kprintf("bpdone: page busy < 0, " "pindex: %d, foff: 0x(%x,%x), " "resid: %d, index: %d\n", @@ -3872,7 +3872,7 @@ vfs_unbusy_pages(struct buf *bp) * This routine is called before a device strategy routine. * It is used to tell the VM system that paging I/O is in * progress, and treat the pages associated with the buffer - * almost as being PG_BUSY. Also the object 'paging_in_progress' + * almost as being PBUSY_LOCKED. Also the object 'paging_in_progress' * flag is handled to make sure that the object doesn't become * inconsistant. * diff --git a/sys/kern/vfs_cluster.c b/sys/kern/vfs_cluster.c index b1353e6e16..5a6f1e20f3 100644 --- a/sys/kern/vfs_cluster.c +++ b/sys/kern/vfs_cluster.c @@ -1629,7 +1629,8 @@ cluster_wbuild(struct vnode *vp, struct buf **bpp, j < tbp->b_xio.xio_npages; ++j) { m = tbp->b_xio.xio_pages[j]; - if (m->flags & PG_BUSY) { + if (m->busy_count & + PBUSY_LOCKED) { bqrelse(tbp); goto finishcluster; } diff --git a/sys/vm/device_pager.c b/sys/vm/device_pager.c index fccd324e00..5414ac3c61 100644 --- a/sys/vm/device_pager.c +++ b/sys/vm/device_pager.c @@ -281,13 +281,13 @@ dev_pager_getfake(vm_paddr_t paddr, int pat_mode) pmap_page_init(m); - m->flags = PG_BUSY | PG_FICTITIOUS; + m->flags = PG_FICTITIOUS; m->valid = VM_PAGE_BITS_ALL; m->dirty = 0; - m->busy = 0; m->queue = PQ_NONE; m->object = NULL; + m->busy_count = PBUSY_LOCKED; m->wire_count = 1; m->hold_count = 0; m->phys_addr = paddr; diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c index 23cd6d4cd9..740ea8b28c 100644 --- a/sys/vm/swap_pager.c +++ b/sys/vm/swap_pager.c @@ -1267,7 +1267,7 @@ swap_pager_getpage(vm_object_t object, vm_page_t *mpp, int seqaccess) int j; int raonly; int error; - u_int32_t flags; + u_int32_t busy_count; vm_page_t marray[XIO_INTERNAL_PAGES]; mreq = *mpp; @@ -1424,8 +1424,10 @@ swap_pager_getpage(vm_object_t object, vm_page_t *mpp, int seqaccess) else bio->bio_driver_info = (void *)(intptr_t)0; - for (j = 0; j < i; ++j) - vm_page_flag_set(bp->b_xio.xio_pages[j], PG_SWAPINPROG); + for (j = 0; j < i; ++j) { + atomic_set_int(&bp->b_xio.xio_pages[j]->busy_count, + PBUSY_SWAPINPROG); + } mycpu->gd_cnt.v_swapin++; mycpu->gd_cnt.v_swappgsin += bp->b_xio.xio_npages; @@ -1450,7 +1452,7 @@ swap_pager_getpage(vm_object_t object, vm_page_t *mpp, int seqaccess) vn_strategy(swapdev_vp, bio); /* - * Wait for the page we want to complete. PG_SWAPINPROG is always + * Wait for the page we want to complete. PBUSY_SWAPINPROG is always * cleared on completion. If an I/O error occurs, SWAPBLK_NONE * is set in the meta-data. * @@ -1466,15 +1468,17 @@ swap_pager_getpage(vm_object_t object, vm_page_t *mpp, int seqaccess) * Read-ahead includes originally requested page case. */ for (;;) { - flags = mreq->flags; + busy_count = mreq->busy_count; cpu_ccfence(); - if ((flags & PG_SWAPINPROG) == 0) + if ((busy_count & PBUSY_SWAPINPROG) == 0) break; tsleep_interlock(mreq, 0); - if (!atomic_cmpset_int(&mreq->flags, flags, - flags | PG_WANTED | PG_REFERENCED)) { + if (!atomic_cmpset_int(&mreq->busy_count, busy_count, + busy_count | + PBUSY_SWAPINPROG | PBUSY_WANTED)) { continue; } + atomic_set_int(&mreq->flags, PG_REFERENCED); mycpu->gd_cnt.v_intrans++; if (tsleep(mreq, PINTERLOCKED, "swread", hz*20)) { kprintf( @@ -1488,7 +1492,7 @@ swap_pager_getpage(vm_object_t object, vm_page_t *mpp, int seqaccess) } /* - * Disallow speculative reads prior to the PG_SWAPINPROG test. + * Disallow speculative reads prior to the SWAPINPROG test. */ cpu_lfence(); @@ -1696,7 +1700,7 @@ swap_pager_putpages(vm_object_t object, vm_page_t *m, int count, vm_page_dirty(mreq); rtvals[i+j] = VM_PAGER_OK; - vm_page_flag_set(mreq, PG_SWAPINPROG); + atomic_set_int(&mreq->busy_count, PBUSY_SWAPINPROG); bp->b_xio.xio_pages[j] = mreq; } bp->b_xio.xio_npages = n; @@ -1782,8 +1786,8 @@ swap_pager_newswap(void) * Completion routine for asynchronous reads and writes from/to swap. * Also called manually by synchronous code to finish up a bp. * - * For READ operations, the pages are PG_BUSY'd. For WRITE operations, - * the pages are vm_page_t->busy'd. For READ operations, we PG_BUSY + * For READ operations, the pages are BUSY'd. For WRITE operations, + * the pages are vm_page_t->busy'd. For READ operations, we BUSY * unbusy all pages except the 'main' request page. For WRITE * operations, we vm_page_t->busy'd unbusy all pages ( we can do this * because we marked them all VM_PAGER_PEND on return from putpages ). @@ -1873,7 +1877,7 @@ swp_pager_async_iodone(struct bio *bio) * not match anything ). * * We have to wake specifically requested pages - * up too because we cleared PG_SWAPINPROG and + * up too because we cleared SWAPINPROG and * someone may be waiting for that. * * NOTE: For reads, m->dirty will probably @@ -1887,14 +1891,15 @@ swp_pager_async_iodone(struct bio *bio) * object->memq from an interrupt. * Deactivate the page instead. * - * WARNING! The instant PG_SWAPINPROG is + * WARNING! The instant SWAPINPROG is * cleared another cpu may start * using the mreq page (it will * check m->valid immediately). */ m->valid = 0; - vm_page_flag_clear(m, PG_SWAPINPROG); + atomic_clear_int(&m->busy_count, + PBUSY_SWAPINPROG); /* * bio_driver_info holds the requested page @@ -1936,7 +1941,8 @@ swp_pager_async_iodone(struct bio *bio) vm_page_activate(m); } vm_page_io_finish(m); - vm_page_flag_clear(m, PG_SWAPINPROG); + atomic_clear_int(&m->busy_count, + PBUSY_SWAPINPROG); vm_page_wakeup(m); } } else if (bio->bio_caller_info1.index & SWBIO_READ) { @@ -1964,7 +1970,7 @@ swp_pager_async_iodone(struct bio *bio) * map non-kernel pmaps and currently asserts * the case. * - * WARNING! The instant PG_SWAPINPROG is + * WARNING! The instant SWAPINPROG is * cleared another cpu may start * using the mreq page (it will * check m->valid immediately). @@ -1973,11 +1979,11 @@ swp_pager_async_iodone(struct bio *bio) m->valid = VM_PAGE_BITS_ALL; vm_page_undirty(m); vm_page_flag_set(m, PG_SWAPPED); - vm_page_flag_clear(m, PG_SWAPINPROG); + atomic_clear_int(&m->busy_count, PBUSY_SWAPINPROG); /* * We have to wake specifically requested pages - * up too because we cleared PG_SWAPINPROG and + * up too because we cleared SWAPINPROG and * could be waiting for it in getpages. However, * be sure to not unbusy getpages specifically * requested page - getpages expects it to be @@ -2015,7 +2021,7 @@ swp_pager_async_iodone(struct bio *bio) if (m->object->type == OBJT_SWAP) vm_page_undirty(m); vm_page_flag_set(m, PG_SWAPPED); - vm_page_flag_clear(m, PG_SWAPINPROG); + atomic_clear_int(&m->busy_count, PBUSY_SWAPINPROG); if (vm_page_count_severe()) vm_page_deactivate(m); vm_page_io_finish(m); diff --git a/sys/vm/vm_contig.c b/sys/vm/vm_contig.c index bd1dd73eba..3cdd2d1621 100644 --- a/sys/vm/vm_contig.c +++ b/sys/vm/vm_contig.c @@ -151,7 +151,8 @@ vm_contig_pg_clean(int queue, int count) * Setup a local marker */ bzero(&marker, sizeof(marker)); - marker.flags = PG_BUSY | PG_FICTITIOUS | PG_MARKER; + marker.flags = PG_FICTITIOUS | PG_MARKER; + marker.busy_count = PBUSY_LOCKED; marker.queue = queue; marker.wire_count = 1; @@ -293,9 +294,10 @@ again: (phys >= low) && (phys < high) && ((phys & (alignment - 1)) == 0) && (((phys ^ (phys + size - 1)) & ~(boundary - 1)) == 0) && - m->busy == 0 && m->wire_count == 0 && - m->hold_count == 0 && - (m->flags & (PG_BUSY | PG_NEED_COMMIT)) == 0) + m->wire_count == 0 && m->hold_count == 0 && + (m->busy_count & + (PBUSY_LOCKED | PBUSY_MASK)) == 0 && + (m->flags & PG_NEED_COMMIT) == 0) { break; } @@ -359,9 +361,10 @@ again: if ((VM_PAGE_TO_PHYS(&m[0]) != (VM_PAGE_TO_PHYS(&m[-1]) + PAGE_SIZE)) || ((pqtype != PQ_FREE) && (pqtype != PQ_CACHE)) || - m->busy || m->wire_count || + m->wire_count || m->hold_count || - (m->flags & (PG_BUSY | PG_NEED_COMMIT))) + (m->busy_count & (PBUSY_LOCKED | PBUSY_MASK)) || + (m->flags & PG_NEED_COMMIT)) { start++; goto again; @@ -409,11 +412,11 @@ again: KASSERT(m->dirty == 0, ("vm_contig_pg_alloc: page %p was dirty", m)); KKASSERT(m->wire_count == 0); - KKASSERT(m->busy == 0); + KKASSERT((m->busy_count & PBUSY_MASK) == 0); /* - * Clear all flags except PG_[S]BUSY and PG_WANTED, - * then unbusy the now allocated page. + * Clear all flags. Then unbusy the now allocated + * page. */ vm_page_flag_clear(m, ~PG_KEEP_NEWPAGE_MASK); vm_page_wire(m); diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c index fb0d3d07c4..462f4cd1eb 100644 --- a/sys/vm/vm_fault.c +++ b/sys/vm/vm_fault.c @@ -438,7 +438,8 @@ RetryFault: bzero(&fakem, sizeof(fakem)); fakem.pindex = first_pindex; - fakem.flags = PG_BUSY | PG_FICTITIOUS | PG_UNMANAGED; + fakem.flags = PG_FICTITIOUS | PG_UNMANAGED; + fakem.busy_count = PBUSY_LOCKED; fakem.valid = VM_PAGE_BITS_ALL; fakem.pat_mode = VM_MEMATTR_DEFAULT; if (fs.entry->object.uksmap(fs.entry->aux.dev, &fakem)) { @@ -613,7 +614,7 @@ RetryFault: vm_map_deinterlock(fs.map, &ilock); /*KKASSERT(fs.m->queue == PQ_NONE); page-in op may deactivate page */ - KKASSERT(fs.m->flags & PG_BUSY); + KKASSERT(fs.m->busy_count & PBUSY_LOCKED); /* * If the page is not wired down, then put it where the pageout daemon @@ -910,7 +911,8 @@ RetryFault: bzero(&fakem, sizeof(fakem)); fakem.pindex = first_pindex; - fakem.flags = PG_BUSY | PG_FICTITIOUS | PG_UNMANAGED; + fakem.flags = PG_FICTITIOUS | PG_UNMANAGED; + fakem.busy_count = PBUSY_LOCKED; fakem.valid = VM_PAGE_BITS_ALL; fakem.pat_mode = VM_MEMATTR_DEFAULT; if (fs.entry->object.uksmap(fs.entry->aux.dev, &fakem)) { @@ -1753,7 +1755,7 @@ readrest: /* * Avoid deadlocking against the map when doing I/O. - * fs.object and the page is PG_BUSY'd. + * fs.object and the page is BUSY'd. * * NOTE: Once unlocked, fs->entry can become stale * so this will NULL it out. @@ -1766,13 +1768,13 @@ readrest: /* * Acquire the page data. We still hold a ref on - * fs.object and the page has been PG_BUSY's. + * fs.object and the page has been BUSY's. * * The pager may replace the page (for example, in * order to enter a fictitious page into the * object). If it does so it is responsible for * cleaning up the passed page and properly setting - * the new page PG_BUSY. + * the new page BUSY. * * If we got here through a PG_RAM read-ahead * mark the page may be partially dirty and thus @@ -1980,7 +1982,7 @@ readrest: * top-level object, we have to copy it into a new page owned by the * top-level object. */ - KASSERT((fs->m->flags & PG_BUSY) != 0, + KASSERT((fs->m->busy_count & PBUSY_LOCKED) != 0, ("vm_fault: not busy after main loop")); if (fs->object != fs->first_object) { @@ -2075,11 +2077,20 @@ readrest: * * So we have to remove the page from at * least the current pmap if it is in it. - * Just remove it from all pmaps. + * + * We used to just remove it from all pmaps + * but that creates inefficiencies on SMP, + * particularly for COW program & library + * mappings that are concurrently exec'd. + * Only remove the page from the current + * pmap. */ KKASSERT(fs->first_shared == 0); vm_page_copy(fs->m, fs->first_m); - vm_page_protect(fs->m, VM_PROT_NONE); + /*vm_page_protect(fs->m, VM_PROT_NONE);*/ + pmap_remove_specific( + &curthread->td_lwp->lwp_vmspace->vm_pmap, + fs->m); } /* @@ -2213,7 +2224,7 @@ readrest: * fs->object will have another PIP reference if it is not equal * to fs->first_object. */ - KASSERT(fs->m->flags & PG_BUSY, + KASSERT(fs->m->busy_count & PBUSY_LOCKED, ("vm_fault: page %p not busy!", fs->m)); /* @@ -3026,17 +3037,12 @@ vm_prefault_quick(pmap_t pmap, vm_offset_t addra, */ pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT; - m = vm_page_lookup_busy_try(object, pindex, TRUE, &error); - if (m == NULL || error) - break; - /* * Skip pages already mapped, and stop scanning in that * direction. When the scan terminates in both directions * we are done. */ if (pmap_prefault_ok(pmap, addr) == 0) { - vm_page_wakeup(m); if (i & 1) noneg = 1; else @@ -3046,6 +3052,38 @@ vm_prefault_quick(pmap_t pmap, vm_offset_t addra, continue; } + /* + * Shortcut the read-only mapping case using the far more + * efficient vm_page_lookup_sbusy_try() function. This + * allows us to acquire the page soft-busied only which + * is especially nice for concurrent execs of the same + * program. + * + * The lookup function also validates page suitability + * (all valid bits set, and not fictitious). + */ + if ((prot & (VM_PROT_WRITE|VM_PROT_OVERRIDE_WRITE)) == 0) { + m = vm_page_lookup_sbusy_try(object, pindex); + if (m == NULL) + break; + pmap_enter(pmap, addr, m, prot, 0, entry); + mycpu->gd_cnt.v_vm_faults++; + if (curthread->td_lwp) + ++curthread->td_lwp->lwp_ru.ru_minflt; + vm_page_sbusy_drop(m); + continue; + } + + /* + * Fallback to normal vm_page lookup code. This code + * hard-busies the page. Not only that, but the page + * can remain in that state for a significant period + * time due to pmap_enter()'s overhead. + */ + m = vm_page_lookup_busy_try(object, pindex, TRUE, &error); + if (m == NULL || error) + break; + /* * Stop if the page cannot be trivially entered into the * pmap. diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index ad508ac391..822877954e 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -3374,7 +3374,7 @@ vm_map_split(vm_map_entry_t entry) for (idx = 0; idx < size; idx++) { m = vm_page_lookup(nobject, idx); if (m) { - KKASSERT(m->flags & PG_BUSY); + KKASSERT(m->busy_count & PBUSY_LOCKED); vm_page_wakeup(m); } } diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c index 60134aed09..7de4cccba1 100644 --- a/sys/vm/vm_mmap.c +++ b/sys/vm/vm_mmap.c @@ -875,8 +875,7 @@ RestartScan: pindex); if (m && m->valid) { mincoreinfo = MINCORE_INCORE; - if (m->dirty || - pmap_is_modified(m)) + if (m->dirty || pmap_is_modified(m)) mincoreinfo |= MINCORE_MODIFIED_OTHER; if ((m->flags & PG_REFERENCED) || pmap_ts_referenced(m)) { diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index af55397ebf..995c16bab3 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -859,9 +859,11 @@ _vm_page_add_queue_spinlocked(vm_page_t m, u_short queue, int athead) } /* - * Wait until page is no longer PG_BUSY or (if also_m_busy is TRUE) - * m->busy is zero. Returns TRUE if it had to sleep, FALSE if we - * did not. Only one sleep call will be made before returning. + * Wait until page is no longer BUSY. If also_m_busy is TRUE we wait + * until the page is no longer BUSY or SBUSY (busy_count field is 0). + * + * Returns TRUE if it had to sleep, FALSE if we did not. Only one sleep + * call will be made before returning. * * This function does NOT busy the page and on return the page is not * guaranteed to be available. @@ -869,19 +871,20 @@ _vm_page_add_queue_spinlocked(vm_page_t m, u_short queue, int athead) void vm_page_sleep_busy(vm_page_t m, int also_m_busy, const char *msg) { - u_int32_t flags; + u_int32_t busy_count; for (;;) { - flags = m->flags; + busy_count = m->busy_count; cpu_ccfence(); - if ((flags & PG_BUSY) == 0 && - (also_m_busy == 0 || (flags & PG_SBUSY) == 0)) { + if ((busy_count & PBUSY_LOCKED) == 0 && + (also_m_busy == 0 || (busy_count & PBUSY_MASK) == 0)) { break; } tsleep_interlock(m, 0); - if (atomic_cmpset_int(&m->flags, flags, - flags | PG_WANTED | PG_REFERENCED)) { + if (atomic_cmpset_int(&m->busy_count, busy_count, + busy_count | PBUSY_WANTED)) { + atomic_set_int(&m->flags, PG_REFERENCED); tsleep(m, PINTERLOCKED, msg, 0); break; } @@ -953,34 +956,36 @@ vm_get_pg_color(int cpuid, vm_object_t object, vm_pindex_t pindex) } /* - * Wait until PG_BUSY can be set, then set it. If also_m_busy is TRUE we - * also wait for m->busy to become 0 before setting PG_BUSY. + * Wait until BUSY can be set, then set it. If also_m_busy is TRUE we + * also wait for m->busy_count to become 0 before setting PBUSY_LOCKED. */ void VM_PAGE_DEBUG_EXT(vm_page_busy_wait)(vm_page_t m, int also_m_busy, const char *msg VM_PAGE_DEBUG_ARGS) { - u_int32_t flags; + u_int32_t busy_count; for (;;) { - flags = m->flags; + busy_count = m->busy_count; cpu_ccfence(); - if (flags & PG_BUSY) { + if (busy_count & PBUSY_LOCKED) { tsleep_interlock(m, 0); - if (atomic_cmpset_int(&m->flags, flags, - flags | PG_WANTED | PG_REFERENCED)) { + if (atomic_cmpset_int(&m->busy_count, busy_count, + busy_count | PBUSY_WANTED)) { + atomic_set_int(&m->flags, PG_REFERENCED); tsleep(m, PINTERLOCKED, msg, 0); } - } else if (also_m_busy && (flags & PG_SBUSY)) { + } else if (also_m_busy && busy_count) { tsleep_interlock(m, 0); - if (atomic_cmpset_int(&m->flags, flags, - flags | PG_WANTED | PG_REFERENCED)) { + if (atomic_cmpset_int(&m->busy_count, busy_count, + busy_count | PBUSY_WANTED)) { + atomic_set_int(&m->flags, PG_REFERENCED); tsleep(m, PINTERLOCKED, msg, 0); } } else { - if (atomic_cmpset_int(&m->flags, flags, - flags | PG_BUSY)) { + if (atomic_cmpset_int(&m->busy_count, busy_count, + busy_count | PBUSY_LOCKED)) { #ifdef VM_PAGE_DEBUG m->busy_func = func; m->busy_line = lineno; @@ -992,8 +997,8 @@ VM_PAGE_DEBUG_EXT(vm_page_busy_wait)(vm_page_t m, } /* - * Attempt to set PG_BUSY. If also_m_busy is TRUE we only succeed if m->busy - * is also 0. + * Attempt to set BUSY. If also_m_busy is TRUE we only succeed if + * m->busy_count is also 0. * * Returns non-zero on failure. */ @@ -1001,16 +1006,17 @@ int VM_PAGE_DEBUG_EXT(vm_page_busy_try)(vm_page_t m, int also_m_busy VM_PAGE_DEBUG_ARGS) { - u_int32_t flags; + u_int32_t busy_count; for (;;) { - flags = m->flags; + busy_count = m->busy_count; cpu_ccfence(); - if (flags & PG_BUSY) + if (busy_count & PBUSY_LOCKED) return TRUE; - if (also_m_busy && (flags & PG_SBUSY)) + if (also_m_busy && (busy_count & PBUSY_MASK) != 0) return TRUE; - if (atomic_cmpset_int(&m->flags, flags, flags | PG_BUSY)) { + if (atomic_cmpset_int(&m->busy_count, busy_count, + busy_count | PBUSY_LOCKED)) { #ifdef VM_PAGE_DEBUG m->busy_func = func; m->busy_line = lineno; @@ -1021,7 +1027,7 @@ VM_PAGE_DEBUG_EXT(vm_page_busy_try)(vm_page_t m, int also_m_busy } /* - * Clear the PG_BUSY flag and return non-zero to indicate to the caller + * Clear the BUSY flag and return non-zero to indicate to the caller * that a wakeup() should be performed. * * The vm_page must be spinlocked and will remain spinlocked on return. @@ -1033,28 +1039,30 @@ static __inline int _vm_page_wakeup(vm_page_t m) { - u_int32_t flags; + u_int32_t busy_count; for (;;) { - flags = m->flags; + busy_count = m->busy_count; cpu_ccfence(); - if (atomic_cmpset_int(&m->flags, flags, - flags & ~(PG_BUSY | PG_WANTED))) { + if (atomic_cmpset_int(&m->busy_count, busy_count, + busy_count & + ~(PBUSY_LOCKED | PBUSY_WANTED))) { break; } } - return(flags & PG_WANTED); + return((int)(busy_count & PBUSY_WANTED)); } /* - * Clear the PG_BUSY flag and wakeup anyone waiting for the page. This + * Clear the BUSY flag and wakeup anyone waiting for the page. This * is typically the last call you make on a page before moving onto * other things. */ void vm_page_wakeup(vm_page_t m) { - KASSERT(m->flags & PG_BUSY, ("vm_page_wakeup: page not busy!!!")); + KASSERT(m->busy_count & PBUSY_LOCKED, + ("vm_page_wakeup: page not busy!!!")); vm_page_spin_lock(m); if (_vm_page_wakeup(m)) { vm_page_spin_unlock(m); @@ -1138,7 +1146,8 @@ vm_page_initfake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr) m->queue = PQ_NONE; /* Fictitious pages don't use "segind". */ /* Fictitious pages don't use "order" or "pool". */ - m->flags = PG_FICTITIOUS | PG_UNMANAGED | PG_BUSY; + m->flags = PG_FICTITIOUS | PG_UNMANAGED; + m->busy_count = PBUSY_LOCKED; m->wire_count = 1; spin_init(&m->spin, "fake_page"); pmap_page_init(m); @@ -1225,7 +1234,7 @@ vm_page_remove(vm_page_t m) return; } - if ((m->flags & PG_BUSY) == 0) + if ((m->busy_count & PBUSY_LOCKED) == 0) panic("vm_page_remove: page not busy"); object = m->object; @@ -1274,33 +1283,35 @@ VM_PAGE_DEBUG_EXT(vm_page_lookup_busy_wait)(struct vm_object *object, int also_m_busy, const char *msg VM_PAGE_DEBUG_ARGS) { - u_int32_t flags; + u_int32_t busy_count; vm_page_t m; ASSERT_LWKT_TOKEN_HELD(vm_object_token(object)); m = vm_page_rb_tree_RB_LOOKUP(&object->rb_memq, pindex); while (m) { KKASSERT(m->object == object && m->pindex == pindex); - flags = m->flags; + busy_count = m->busy_count; cpu_ccfence(); - if (flags & PG_BUSY) { + if (busy_count & PBUSY_LOCKED) { tsleep_interlock(m, 0); - if (atomic_cmpset_int(&m->flags, flags, - flags | PG_WANTED | PG_REFERENCED)) { + if (atomic_cmpset_int(&m->busy_count, busy_count, + busy_count | PBUSY_WANTED)) { + atomic_set_int(&m->flags, PG_REFERENCED); tsleep(m, PINTERLOCKED, msg, 0); m = vm_page_rb_tree_RB_LOOKUP(&object->rb_memq, pindex); } - } else if (also_m_busy && (flags & PG_SBUSY)) { + } else if (also_m_busy && busy_count) { tsleep_interlock(m, 0); - if (atomic_cmpset_int(&m->flags, flags, - flags | PG_WANTED | PG_REFERENCED)) { + if (atomic_cmpset_int(&m->busy_count, busy_count, + busy_count | PBUSY_WANTED)) { + atomic_set_int(&m->flags, PG_REFERENCED); tsleep(m, PINTERLOCKED, msg, 0); m = vm_page_rb_tree_RB_LOOKUP(&object->rb_memq, pindex); } - } else if (atomic_cmpset_int(&m->flags, flags, - flags | PG_BUSY)) { + } else if (atomic_cmpset_int(&m->busy_count, busy_count, + busy_count | PBUSY_LOCKED)) { #ifdef VM_PAGE_DEBUG m->busy_func = func; m->busy_line = lineno; @@ -1327,7 +1338,7 @@ VM_PAGE_DEBUG_EXT(vm_page_lookup_busy_try)(struct vm_object *object, int also_m_busy, int *errorp VM_PAGE_DEBUG_ARGS) { - u_int32_t flags; + u_int32_t busy_count; vm_page_t m; ASSERT_LWKT_TOKEN_HELD(vm_object_token(object)); @@ -1335,17 +1346,18 @@ VM_PAGE_DEBUG_EXT(vm_page_lookup_busy_try)(struct vm_object *object, *errorp = FALSE; while (m) { KKASSERT(m->object == object && m->pindex == pindex); - flags = m->flags; + busy_count = m->busy_count; cpu_ccfence(); - if (flags & PG_BUSY) { + if (busy_count & PBUSY_LOCKED) { *errorp = TRUE; break; } - if (also_m_busy && (flags & PG_SBUSY)) { + if (also_m_busy && busy_count) { *errorp = TRUE; break; } - if (atomic_cmpset_int(&m->flags, flags, flags | PG_BUSY)) { + if (atomic_cmpset_int(&m->busy_count, busy_count, + busy_count | PBUSY_LOCKED)) { #ifdef VM_PAGE_DEBUG m->busy_func = func; m->busy_line = lineno; @@ -1356,6 +1368,33 @@ VM_PAGE_DEBUG_EXT(vm_page_lookup_busy_try)(struct vm_object *object, return m; } +/* + * Returns a page that is only soft-busied for use by the caller in + * a read-only fashion. Returns NULL if the page could not be found, + * the soft busy could not be obtained, or the page data is invalid. + */ +vm_page_t +vm_page_lookup_sbusy_try(struct vm_object *object, vm_pindex_t pindex) +{ + vm_page_t m; + + ASSERT_LWKT_TOKEN_HELD(vm_object_token(object)); + m = vm_page_rb_tree_RB_LOOKUP(&object->rb_memq, pindex); + if (m) { + if (m->valid != VM_PAGE_BITS_ALL || + (m->flags & PG_FICTITIOUS)) { + m = NULL; + } else if (vm_page_sbusy_try(m)) { + m = NULL; + } else if (m->valid != VM_PAGE_BITS_ALL || + (m->flags & PG_FICTITIOUS)) { + vm_page_sbusy_drop(m); + m = NULL; + } + } + return m; +} + /* * Caller must hold the related vm_object */ @@ -1395,7 +1434,7 @@ vm_page_next(vm_page_t m) void vm_page_rename(vm_page_t m, vm_object_t new_object, vm_pindex_t new_pindex) { - KKASSERT(m->flags & PG_BUSY); + KKASSERT(m->busy_count & PBUSY_LOCKED); ASSERT_LWKT_TOKEN_HELD_EXCL(vm_object_token(new_object)); if (m->object) { ASSERT_LWKT_TOKEN_HELD_EXCL(vm_object_token(m->object)); @@ -1463,7 +1502,7 @@ vm_page_unqueue(vm_page_t m) * This is done by 'twisting' the colors. * * The page is returned spinlocked and removed from its queue (it will - * be on PQ_NONE), or NULL. The page is not PG_BUSY'd. The caller + * be on PQ_NONE), or NULL. The page is not BUSY'd. The caller * is responsible for dealing with the busy-page case (usually by * deactivating the page and looping). * @@ -1857,7 +1896,7 @@ done: vm_page_flag_clear(m, ~PG_KEEP_NEWPAGE_MASK); KKASSERT(m->wire_count == 0); - KKASSERT(m->busy == 0); + KKASSERT((m->busy_count & PBUSY_MASK) == 0); m->act_count = 0; m->valid = 0; @@ -1890,7 +1929,7 @@ done: pagedaemon_wakeup(); /* - * A PG_BUSY page is returned. + * A BUSY page is returned. */ return (m); } @@ -2221,7 +2260,7 @@ vm_page_free_wakeup(void) * Returns the given page to the PQ_FREE or PQ_HOLD list and disassociates * it from its VM object. * - * The vm_page must be PG_BUSY on entry. PG_BUSY will be released on + * The vm_page must be BUSY on entry. BUSY will be released on * return (the page will have been freed). */ void @@ -2229,13 +2268,12 @@ vm_page_free_toq(vm_page_t m) { mycpu->gd_cnt.v_tfree++; KKASSERT((m->flags & PG_MAPPED) == 0); - KKASSERT(m->flags & PG_BUSY); + KKASSERT(m->busy_count & PBUSY_LOCKED); - if (m->busy || ((m->queue - m->pc) == PQ_FREE)) { - kprintf("vm_page_free: pindex(%lu), busy(%d), " - "PG_BUSY(%d), hold(%d)\n", - (u_long)m->pindex, m->busy, - ((m->flags & PG_BUSY) ? 1 : 0), m->hold_count); + if ((m->busy_count & PBUSY_MASK) || ((m->queue - m->pc) == PQ_FREE)) { + kprintf("vm_page_free: pindex(%lu), busy %08x, " + "hold(%d)\n", + (u_long)m->pindex, m->busy_count, m->hold_count); if ((m->queue - m->pc) == PQ_FREE) panic("vm_page_free: freeing free page"); else @@ -2290,7 +2328,7 @@ vm_page_free_toq(vm_page_t m) } /* - * This sequence allows us to clear PG_BUSY while still holding + * This sequence allows us to clear BUSY while still holding * its spin lock, which reduces contention vs allocators. We * must not leave the queue locked or _vm_page_wakeup() may * deadlock. @@ -2328,7 +2366,7 @@ vm_page_free_toq(vm_page_t m) void vm_page_unmanage(vm_page_t m) { - KKASSERT(m->flags & PG_BUSY); + KKASSERT(m->busy_count & PBUSY_LOCKED); if ((m->flags & PG_UNMANAGED) == 0) { if (m->wire_count == 0) vm_page_unqueue(m); @@ -2351,7 +2389,7 @@ vm_page_wire(vm_page_t m) * it is already off the queues). Don't do anything with fictitious * pages because they are always wired. */ - KKASSERT(m->flags & PG_BUSY); + KKASSERT(m->busy_count & PBUSY_LOCKED); if ((m->flags & PG_FICTITIOUS) == 0) { if (atomic_fetchadd_int(&m->wire_count, 1) == 0) { if ((m->flags & PG_UNMANAGED) == 0) @@ -2394,7 +2432,7 @@ vm_page_wire(vm_page_t m) void vm_page_unwire(vm_page_t m, int activate) { - KKASSERT(m->flags & PG_BUSY); + KKASSERT(m->busy_count & PBUSY_LOCKED); if (m->flags & PG_FICTITIOUS) { /* do nothing */ } else if (m->wire_count <= 0) { @@ -2586,7 +2624,8 @@ vm_page_cache(vm_page_t m) * Not suitable for the cache */ if ((m->flags & (PG_UNMANAGED | PG_NEED_COMMIT)) || - m->busy || m->wire_count || m->hold_count) { + (m->busy_count & PBUSY_MASK) || + m->wire_count || m->hold_count) { vm_page_wakeup(m); return; } @@ -2618,7 +2657,8 @@ vm_page_cache(vm_page_t m) */ vm_page_protect(m, VM_PROT_NONE); if ((m->flags & (PG_UNMANAGED | PG_MAPPED)) || - m->busy || m->wire_count || m->hold_count) { + (m->busy_count & PBUSY_MASK) || + m->wire_count || m->hold_count) { vm_page_wakeup(m); } else if (m->dirty || (m->flags & PG_NEED_COMMIT)) { vm_page_deactivate(m); @@ -2714,31 +2754,59 @@ vm_page_dontneed(vm_page_t m) /* * These routines manipulate the 'soft busy' count for a page. A soft busy - * is almost like PG_BUSY except that it allows certain compatible operations - * to occur on the page while it is busy. For example, a page undergoing a - * write can still be mapped read-only. + * is almost like a hard BUSY except that it allows certain compatible + * operations to occur on the page while it is busy. For example, a page + * undergoing a write can still be mapped read-only. * - * Because vm_pages can overlap buffers m->busy can be > 1. m->busy is only - * adjusted while the vm_page is PG_BUSY so the flash will occur when the - * busy bit is cleared. + * We also use soft-busy to quickly pmap_enter shared read-only pages + * without having to hold the page locked. + * + * The soft-busy count can be > 1 in situations where multiple threads + * are pmap_enter()ing the same page simultaneously, or when two buffer + * cache buffers overlap the same page. * * The caller must hold the page BUSY when making these two calls. */ void vm_page_io_start(vm_page_t m) { - KASSERT(m->flags & PG_BUSY, ("vm_page_io_start: page not busy!!!")); - atomic_add_char(&m->busy, 1); - vm_page_flag_set(m, PG_SBUSY); + uint32_t ocount; + + ocount = atomic_fetchadd_int(&m->busy_count, 1); + KKASSERT(ocount & PBUSY_LOCKED); } void vm_page_io_finish(vm_page_t m) { - KASSERT(m->flags & PG_BUSY, ("vm_page_io_finish: page not busy!!!")); - atomic_subtract_char(&m->busy, 1); - if (m->busy == 0) - vm_page_flag_clear(m, PG_SBUSY); + uint32_t ocount; + + ocount = atomic_fetchadd_int(&m->busy_count, -1); + KKASSERT(ocount & PBUSY_MASK); +#if 0 + if (((ocount - 1) & (PBUSY_LOCKED | PBUSY_MASK)) == 0) + wakeup(m); +#endif +} + +/* + * Attempt to soft-busy a page. The page must not be PBUSY_LOCKED. + * + * Returns 0 on success, non-zero on failure. + */ +int +vm_page_sbusy_try(vm_page_t m) +{ + uint32_t ocount; + + if (m->busy_count & PBUSY_LOCKED) + return 1; + ocount = atomic_fetchadd_int(&m->busy_count, 1); + if (ocount & PBUSY_LOCKED) { + vm_page_sbusy_drop(m); + return 1; + } + return 0; } /* diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h index 6b55d0b682..f6ed1fff3e 100644 --- a/sys/vm/vm_page.h +++ b/sys/vm/vm_page.h @@ -1,10 +1,14 @@ /* * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. + * Copyright (c) 2003-2017 The DragonFly Project. All rights reserved. * * This code is derived from software contributed to Berkeley by * The Mach Operating System project at Carnegie-Mellon University. * + * This code is derived from software contributed to The DragonFly Project + * by Matthew Dillon + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: @@ -56,12 +60,10 @@ * * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. - * - * $FreeBSD: src/sys/vm/vm_page.h,v 1.75.2.8 2002/03/06 01:07:09 dillon Exp $ */ /* - * Resident memory system definitions. + * Resident memory system definitions. */ #ifndef _VM_VM_PAGE_H_ @@ -100,34 +102,8 @@ #endif /* - * Management of resident (logical) pages. - * - * A small structure is kept for each resident - * page, indexed by page number. Each structure - * is an element of several lists: - * - * A hash table bucket used to quickly - * perform object/offset lookups - * - * A list of all pages for a given object, - * so they can be quickly deactivated at - * time of deallocation. - * - * An ordered list of pages due for pageout. - * - * In addition, the structure contains the object - * and offset to which this page belongs (for pageout), - * and sundry status bits. - * - * Fields in this structure are locked either by the lock on the - * object that the page belongs to (O) or by the lock on the page - * queues (P). - * - * The 'valid' and 'dirty' fields are distinct. A page may have dirty - * bits set without having associated valid bits set. This is used by - * NFS to implement piecemeal writes. + * vm_page structure */ - TAILQ_HEAD(pglist, vm_page); struct vm_object; @@ -135,34 +111,27 @@ struct vm_object; int rb_vm_page_compare(struct vm_page *, struct vm_page *); struct vm_page_rb_tree; -RB_PROTOTYPE2(vm_page_rb_tree, vm_page, rb_entry, rb_vm_page_compare, vm_pindex_t); +RB_PROTOTYPE2(vm_page_rb_tree, vm_page, rb_entry, + rb_vm_page_compare, vm_pindex_t); struct vm_page { TAILQ_ENTRY(vm_page) pageq; /* vm_page_queues[] list (P) */ RB_ENTRY(vm_page) rb_entry; /* Red-Black tree based at object */ struct spinlock spin; - struct vm_object *object; /* which object am I in (O,P)*/ vm_pindex_t pindex; /* offset into object (O,P) */ vm_paddr_t phys_addr; /* physical address of page */ struct md_page md; /* machine dependant stuff */ - u_short queue; /* page queue index */ - u_short pc; /* page color */ - u_char act_count; /* page usage count */ - u_char busy; /* page busy count */ - u_char pat_mode; /* hardware page attribute */ - u_char unused02; - u_int32_t flags; /* see below */ - u_int wire_count; /* wired down maps refs (P) */ + uint16_t queue; /* page queue index */ + uint16_t pc; /* page color */ + uint8_t act_count; /* page usage count */ + uint8_t pat_mode; /* hardware page attribute */ + uint8_t valid; /* map of valid DEV_BSIZE chunks */ + uint8_t dirty; /* map of dirty DEV_BSIZE chunks */ + uint32_t flags; /* see below */ + uint32_t wire_count; /* wired down maps refs (P) */ + uint32_t busy_count; /* soft-busy and hard-busy */ int hold_count; /* page hold count */ - - /* - * NOTE that these must support one bit per DEV_BSIZE in a page!!! - * so, on normal X86 kernels, they must be at least 8 bits wide. - */ - u_char valid; /* map of valid DEV_BSIZE chunks */ - u_char dirty; /* map of dirty DEV_BSIZE chunks */ - int ku_pagecnt; /* kmalloc helper */ #ifdef VM_PAGE_DEBUG const char *busy_func; @@ -170,13 +139,10 @@ struct vm_page { #endif }; -#ifdef VM_PAGE_DEBUG -#define VM_PAGE_DEBUG_EXT(name) name ## _debug -#define VM_PAGE_DEBUG_ARGS , const char *func, int lineno -#else -#define VM_PAGE_DEBUG_EXT(name) name -#define VM_PAGE_DEBUG_ARGS -#endif +#define PBUSY_LOCKED 0x80000000U +#define PBUSY_WANTED 0x40000000U +#define PBUSY_SWAPINPROG 0x20000000U +#define PBUSY_MASK 0x1FFFFFFFU #ifndef __VM_PAGE_T_DEFINED__ #define __VM_PAGE_T_DEFINED__ @@ -192,7 +158,6 @@ typedef struct vm_page *vm_page_t; * * Page coloring cannot be disabled. */ - #define PQ_PRIME1 31 /* Prime number somewhat less than PQ_HASH_SIZE */ #define PQ_PRIME2 23 /* Prime number somewhat less than PQ_HASH_SIZE */ #define PQ_L2_SIZE 512 /* A number of colors opt for 1M cache */ @@ -268,12 +233,9 @@ extern struct vpgqueues vm_page_queues[PQ_COUNT]; * * PG_SWAPPED indicates that the page is backed by a swap block. Any * VM object type other than OBJT_DEFAULT can have swap-backed pages now. - * - * PG_SBUSY is set when m->busy != 0. PG_SBUSY and m->busy are only modified - * when the page is PG_BUSY. */ -#define PG_BUSY 0x00000001 /* page is in transit (O) */ -#define PG_WANTED 0x00000002 /* someone is waiting for page (O) */ +#define PG_UNUSED0001 0x00000001 +#define PG_UNUSED0002 0x00000002 #define PG_WINATCFLS 0x00000004 /* flush dirty page on inactive q */ #define PG_FICTITIOUS 0x00000008 /* physical page doesn't exist (O) */ #define PG_WRITEABLE 0x00000010 /* page is writeable */ @@ -281,7 +243,7 @@ extern struct vpgqueues vm_page_queues[PQ_COUNT]; #define PG_UNUSED0040 0x00000040 #define PG_REFERENCED 0x00000080 /* page has been referenced */ #define PG_CLEANCHK 0x00000100 /* page will be checked for cleaning */ -#define PG_SWAPINPROG 0x00000200 /* swap I/O in progress on page */ +#define PG_UNUSED0200 0x00000200 #define PG_NOSYNC 0x00000400 /* do not collect for syncer */ #define PG_UNMANAGED 0x00000800 /* No PV management for page */ #define PG_MARKER 0x00001000 /* special queue marker page */ @@ -289,11 +251,10 @@ extern struct vpgqueues vm_page_queues[PQ_COUNT]; #define PG_SWAPPED 0x00004000 /* backed by swap */ #define PG_NOTMETA 0x00008000 /* do not back with swap */ #define PG_UNUSED10000 0x00010000 -#define PG_SBUSY 0x00020000 /* soft-busy also set */ +#define PG_UNUSED20000 0x00020000 #define PG_NEED_COMMIT 0x00040000 /* clean page requires commit */ -#define PG_KEEP_NEWPAGE_MASK (PG_BUSY | PG_SBUSY | PG_WANTED) - +#define PG_KEEP_NEWPAGE_MASK (0) /* * Misc constants. @@ -304,6 +265,14 @@ extern struct vpgqueues vm_page_queues[PQ_COUNT]; #define ACT_INIT 5 #define ACT_MAX 64 +#ifdef VM_PAGE_DEBUG +#define VM_PAGE_DEBUG_EXT(name) name ## _debug +#define VM_PAGE_DEBUG_ARGS , const char *func, int lineno +#else +#define VM_PAGE_DEBUG_EXT(name) name +#define VM_PAGE_DEBUG_ARGS +#endif + #ifdef _KERNEL /* * Each pageable resident page falls into one of four lists: @@ -405,6 +374,8 @@ void vm_page_deactivate_locked (vm_page_t); void vm_page_initfake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr); int vm_page_insert (vm_page_t, struct vm_object *, vm_pindex_t); vm_page_t vm_page_lookup (struct vm_object *, vm_pindex_t); +vm_page_t vm_page_lookup_sbusy_try(struct vm_object *object, + vm_pindex_t pindex); vm_page_t VM_PAGE_DEBUG_EXT(vm_page_lookup_busy_wait)( struct vm_object *, vm_pindex_t, int, const char * VM_PAGE_DEBUG_ARGS); @@ -437,6 +408,7 @@ void vm_page_free_contig(vm_page_t m, unsigned long size); vm_page_t vm_page_free_fromq_fast(void); void vm_page_dirty(vm_page_t m); void vm_page_sleep_busy(vm_page_t m, int also_m_busy, const char *msg); +int vm_page_sbusy_try(vm_page_t m); void VM_PAGE_DEBUG_EXT(vm_page_busy_wait)(vm_page_t m, int also_m_busy, const char *wmsg VM_PAGE_DEBUG_ARGS); int VM_PAGE_DEBUG_EXT(vm_page_busy_try)(vm_page_t m, diff --git a/sys/vm/vm_page2.h b/sys/vm/vm_page2.h index c7b1930a20..557d8a7cef 100644 --- a/sys/vm/vm_page2.h +++ b/sys/vm/vm_page2.h @@ -273,16 +273,38 @@ vm_page_flag_clear(vm_page_t m, unsigned int bits) /* * Wakeup anyone waiting for the page after potentially unbusying * (hard or soft) or doing other work on a page that might make a - * waiter ready. The setting of PG_WANTED is integrated into the + * waiter ready. The setting of PBUSY_WANTED is integrated into the * related flags and it can't be set once the flags are already * clear, so there should be no races here. */ - static __inline void vm_page_flash(vm_page_t m) { - if (m->flags & PG_WANTED) { - vm_page_flag_clear(m, PG_WANTED); + if (m->busy_count & PBUSY_WANTED) { + atomic_clear_int(&m->busy_count, PBUSY_WANTED); + wakeup(m); + } +} + +/* + * Adjust the soft-busy count on a page. The drop code will issue an + * integrated wakeup if busy_count becomes 0. + */ +static __inline void +vm_page_sbusy_hold(vm_page_t m) +{ + atomic_add_int(&m->busy_count, 1); +} + +static __inline void +vm_page_sbusy_drop(vm_page_t m) +{ + uint32_t ocount; + + ocount = atomic_fetchadd_int(&m->busy_count, -1); + if (ocount - 1 == PBUSY_WANTED) { + /* WANTED and no longer BUSY or SBUSY */ + atomic_clear_int(&m->busy_count, PBUSY_WANTED); wakeup(m); } } @@ -308,7 +330,7 @@ vm_page_flash(vm_page_t m) static __inline void vm_page_protect(vm_page_t m, int prot) { - KKASSERT(m->flags & PG_BUSY); + KKASSERT(m->busy_count & PBUSY_LOCKED); if (prot == VM_PROT_NONE) { if (m->flags & (PG_WRITEABLE|PG_MAPPED)) { pmap_page_protect(m, VM_PROT_NONE); diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c index 7676c66a17..3a8a6fe545 100644 --- a/sys/vm/vm_pageout.c +++ b/sys/vm/vm_pageout.c @@ -778,7 +778,8 @@ vm_pageout_scan_inactive(int pass, int q, int avail_shortage, * Initialize our marker */ bzero(&marker, sizeof(marker)); - marker.flags = PG_BUSY | PG_FICTITIOUS | PG_MARKER; + marker.flags = PG_FICTITIOUS | PG_MARKER; + marker.busy_count = PBUSY_LOCKED; marker.queue = PQ_INACTIVE + q; marker.pc = q; marker.wire_count = 1; @@ -1301,7 +1302,8 @@ vm_pageout_scan_active(int pass, int q, */ bzero(&marker, sizeof(marker)); - marker.flags = PG_BUSY | PG_FICTITIOUS | PG_MARKER; + marker.flags = PG_FICTITIOUS | PG_MARKER; + marker.busy_count = PBUSY_LOCKED; marker.queue = PQ_ACTIVE + q; marker.pc = q; marker.wire_count = 1; @@ -1755,7 +1757,8 @@ vm_pageout_page_stats(int q) } bzero(&marker, sizeof(marker)); - marker.flags = PG_BUSY | PG_FICTITIOUS | PG_MARKER; + marker.flags = PG_FICTITIOUS | PG_MARKER; + marker.busy_count = PBUSY_LOCKED; marker.queue = PQ_ACTIVE + q; marker.pc = q; marker.wire_count = 1; diff --git a/sys/vm/vm_swap.c b/sys/vm/vm_swap.c index dd19df9620..19f82941a9 100644 --- a/sys/vm/vm_swap.c +++ b/sys/vm/vm_swap.c @@ -463,7 +463,8 @@ swapoff_one(int index) */ for (q = 0; q < PQ_L2_SIZE; ++q) { bzero(&marker, sizeof(marker)); - marker.flags = PG_BUSY | PG_FICTITIOUS | PG_MARKER; + marker.flags = PG_FICTITIOUS | PG_MARKER; + marker.busy_count = PBUSY_LOCKED; marker.queue = PQ_ACTIVE + q; marker.pc = q; marker.wire_count = 1; diff --git a/sys/vm/vm_swapcache.c b/sys/vm/vm_swapcache.c index 8fd3e6e584..3d2b1d49df 100644 --- a/sys/vm/vm_swapcache.c +++ b/sys/vm/vm_swapcache.c @@ -191,7 +191,8 @@ vm_swapcached_thread(void) */ bzero(&page_marker, sizeof(page_marker)); for (q = 0; q < PQ_L2_SIZE; ++q) { - page_marker[q].flags = PG_BUSY | PG_FICTITIOUS | PG_MARKER; + page_marker[q].flags = PG_FICTITIOUS | PG_MARKER; + page_marker[q].busy_count = PBUSY_LOCKED; page_marker[q].queue = PQ_INACTIVE + q; page_marker[q].pc = q; page_marker[q].wire_count = 1; diff --git a/sys/vm/vnode_pager.c b/sys/vm/vnode_pager.c index f0a13d8dcd..9d57b086dc 100644 --- a/sys/vm/vnode_pager.c +++ b/sys/vm/vnode_pager.c @@ -417,7 +417,9 @@ vnode_pager_setsize(struct vnode *vp, vm_ooffset_t nsize) void vnode_pager_freepage(vm_page_t m) { - if (m->busy || m->wire_count || (m->flags & PG_NEED_COMMIT)) { + if ((m->busy_count & PBUSY_MASK) || + m->wire_count || + (m->flags & PG_NEED_COMMIT)) { vm_page_activate(m); vm_page_wakeup(m); } else {