#define PV_HOLD_UNUSED2000 0x20000000U
#define PV_HOLD_MASK 0x1FFFFFFFU
-#define PV_FLAG_VMOBJECT 0x00000001U /* shared pt in VM obj */
+#define PV_FLAG_UNUSED01 0x00000001U
+#define PV_FLAG_PGTABLE 0x00000002U /* page table page */
#ifdef _KERNEL
while ((m = RB_ROOT(&obj->rb_memq)) != NULL) {
vm_page_busy_wait(m, FALSE, "efipg");
vm_page_unwire(m, 1);
- m->flags &= ~(PG_MAPPED | PG_WRITEABLE);
+ vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
cdev_pager_free_page(obj, m);
kfree(m, M_EFI);
}
&pmap_nx_enable, 0,
"no-execute support (0=disabled, 1=w/READ, 2=w/READ & WRITE)");
+static int pmap_pv_debug = 50;
+SYSCTL_INT(_machdep, OID_AUTO, pmap_pv_debug, CTLFLAG_RW,
+ &pmap_pv_debug, 0, "");
+
/* Standard user access funtions */
extern int std_copyinstr (const void *udaddr, void *kaddr, size_t len,
size_t *lencopied);
vm_page_spin_lock(m);
pmap_page_stats_adding(m);
+
+ /*
+ * PGTABLE pv's only exist in the context of the pmap RB tree
+ * (pmap->pm_pvroot).
+ */
+#if 0
TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
+#endif
+ pv->pv_flags |= PV_FLAG_PGTABLE;
pv->pv_m = m;
vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE);
vm_page_spin_unlock(m);
vm_page_spin_lock(m);
KKASSERT(m && m == pv->pv_m);
pv->pv_m = NULL;
- TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
- pmap_page_stats_deleting(m);
- if (TAILQ_EMPTY(&m->md.pv_list))
+ if (pv->pv_flags & PV_FLAG_PGTABLE) {
vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
+ KKASSERT(TAILQ_EMPTY(&m->md.pv_list));
+ } else {
+ TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
+ if (TAILQ_EMPTY(&m->md.pv_list))
+ vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
+ }
+ pmap_page_stats_deleting(m);
vm_page_spin_unlock(m);
return(m);
pnew->pv_pmap = pmap;
pnew->pv_pindex = pindex;
pnew->pv_hold = PV_HOLD_LOCKED | 2;
+ pnew->pv_flags = 0;
#ifdef PMAP_DEBUG
pnew->pv_func = func;
pnew->pv_line = lineno;
vm_page_spin_lock(m);
while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
+ if (pv->pv_m != m) {
+ kprintf("pmap_remove_all FAILURE\n");
+ kprintf("pv %p pv->pv_m %p m %p\n", pv, pv->pv_m, m);
+ kprintf("pvflags %08x\n", pv->pv_flags);
+ }
+
KKASSERT(pv->pv_m == m);
if (pv_hold_try(pv)) {
vm_page_spin_unlock(m);
*
* Enter on the PV list if part of our managed memory.
*/
+
+ if (m->object == NULL && pmap_pv_debug > 0) {
+ --pmap_pv_debug;
+ kprintf("pte_m %p pv_entry %p NOOBJ\n", m, pte_pv);
+ print_backtrace(16);
+ }
+
KKASSERT(pte_pv && (pte_pv->pv_m == NULL || pte_pv->pv_m == m));
vm_page_spin_lock(m);
pte_pv->pv_m = m;
pmap_page_stats_adding(m);
TAILQ_INSERT_TAIL(&m->md.pv_list, pte_pv, pv_list);
- vm_page_flag_set(m, PG_MAPPED);
- if (newpte & pmap->pmap_bits[PG_RW_IDX])
+
+ /*
+ * Set vm_page flags. Avoid a cache mastership change if
+ * the bits are already set.
+ */
+ if ((m->flags & PG_MAPPED) == 0)
+ vm_page_flag_set(m, PG_MAPPED);
+ if ((newpte & pmap->pmap_bits[PG_RW_IDX]) &&
+ (m->flags & PG_WRITEABLE) == 0) {
vm_page_flag_set(m, PG_WRITEABLE);
+ }
vm_page_spin_unlock(m);
if (pt_pv && opa &&
TUNABLE_INT("vm.fault_quick", &vm_fault_quick_enable);
SYSCTL_INT(_vm, OID_AUTO, fault_quick, CTLFLAG_RW,
&vm_fault_quick_enable, 0, "Allow fast vm_fault shortcut");
+#ifdef VM_FAULT_QUICK_DEBUG
static long vm_fault_quick_success_count = 0;
SYSCTL_LONG(_vm, OID_AUTO, fault_quick_success_count, CTLFLAG_RW,
&vm_fault_quick_success_count, 0, "");
static long vm_fault_quick_failure_count4 = 0;
SYSCTL_LONG(_vm, OID_AUTO, fault_quick_failure_count4, CTLFLAG_RW,
&vm_fault_quick_failure_count4, 0, "");
+#endif
static int vm_fault_quick(struct faultstate *fs, vm_pindex_t first_pindex,
vm_prot_t fault_type);
}
success:
-
/*
* On success vm_fault_object() does not unlock or deallocate, and fs.m
* will contain a busied page.
*
* Enter the page into the pmap and do pmap-related adjustments.
+ *
+ * WARNING! Soft-busied fs.m's can only be manipulated in limited
+ * ways.
*/
KKASSERT(fs.lookup_still_valid == TRUE);
vm_page_flag_set(fs.m, PG_REFERENCED);
/*
* If the page is not wired down, then put it where the pageout daemon
* can find it.
+ *
+ * NOTE: We cannot safely wire, unwire, or adjust queues for a
+ * soft-busied page.
*/
- if (fs.fault_flags & VM_FAULT_WIRE_MASK) {
- if (fs.wflags & FW_WIRED)
- vm_page_wire(fs.m);
- else
- vm_page_unwire(fs.m, 1);
- } else {
- vm_page_activate(fs.m);
- }
if (fs.msoftonly) {
KKASSERT(fs.m->busy_count & PBUSY_MASK);
+ KKASSERT((fs.fault_flags & VM_FAULT_WIRE_MASK) == 0);
vm_page_sbusy_drop(fs.m);
} else {
+ if (fs.fault_flags & VM_FAULT_WIRE_MASK) {
+ if (fs.wflags & FW_WIRED)
+ vm_page_wire(fs.m);
+ else
+ vm_page_unwire(fs.m, 1);
+ } else {
+ vm_page_activate(fs.m);
+ }
KKASSERT(fs.m->busy_count & PBUSY_LOCKED);
vm_page_wakeup(fs.m);
}
if (obj->flags & OBJ_ONEMAPPING)
return KERN_FAILURE;
+ /*
+ * This will try to wire/unwire a page, which can't be done with
+ * a soft-busied page.
+ */
+ if (fs->fault_flags & VM_FAULT_WIRE_MASK)
+ return KERN_FAILURE;
+
/*
* Ick, can't handle this
*/
if (fs->entry->maptype == VM_MAPTYPE_VPAGETABLE) {
+#ifdef VM_FAULT_QUICK_DEBUG
++vm_fault_quick_failure_count1;
+#endif
return KERN_FAILURE;
}
*/
m = vm_page_hash_get(obj, first_pindex);
if (m == NULL) {
+#ifdef VM_FAULT_QUICK_DEBUG
++vm_fault_quick_failure_count2;
+#endif
return KERN_FAILURE;
}
if ((obj->flags & OBJ_DEAD) ||
m->queue - m->pc == PQ_CACHE ||
(m->flags & PG_SWAPPED)) {
vm_page_sbusy_drop(m);
+#ifdef VM_FAULT_QUICK_DEBUG
++vm_fault_quick_failure_count3;
+#endif
return KERN_FAILURE;
}
}
/*
- * Check write permissions. We don't hold an object lock so the
- * object must already be flagged writable and dirty.
+ * If this is a write fault the object and the page must already
+ * be writable. Since we don't hold an object lock and only a
+ * soft-busy on the page, we cannot manipulate the object or
+ * the page state (other than the page queue).
*/
if (fs->prot & VM_PROT_WRITE) {
if ((obj->flags & (OBJ_WRITEABLE | OBJ_MIGHTBEDIRTY)) !=
(OBJ_WRITEABLE | OBJ_MIGHTBEDIRTY) ||
m->dirty != VM_PAGE_BITS_ALL) {
vm_page_sbusy_drop(m);
+#ifdef VM_FAULT_QUICK_DEBUG
++vm_fault_quick_failure_count4;
+#endif
return KERN_FAILURE;
}
vm_set_nosync(m, fs->entry);
}
+
+ /*
+ * Even though we are only soft-busied we can still move pages
+ * around in the normal queue(s). The soft-busy prevents the
+ * page from being removed from the object, etc (normal operation).
+ */
vm_page_activate(m);
fs->m = m;
fs->msoftonly = 1;
+#ifdef VM_FAULT_QUICK_DEBUG
++vm_fault_quick_success_count;
+#endif
return KERN_SUCCESS;
}
* The opposite of vm_page_hold(). If the page is on the HOLD queue
* it was freed while held and must be moved back to the FREE queue.
*
- * To avoid racing against vm_page_free*() we must test conditions
- * after obtaining the spin-lock.
+ * To avoid racing against vm_page_free*() we must re-test conditions
+ * after obtaining the spin-lock. The initial test can also race a
+ * vm_page_free*() that is in the middle of moving a page to PQ_HOLD,
+ * leaving the page on PQ_HOLD with hold_count == 0. Rather than
+ * throw a spin-lock in the critical path, we rely on the pageout
+ * daemon to clean-up these loose ends.
+ *
+ * More critically, the 'easy movement' between queues without busying
+ * a vm_page is only allowed for PQ_FREE<->PQ_HOLD.
*/
void
vm_page_unhold(vm_page_t m)
"on FREE queue (%d)",
m, m->hold_count, m->queue - m->pc));
- if (atomic_fetchadd_int(&m->hold_count, -1) == 1) {
+ if (atomic_fetchadd_int(&m->hold_count, -1) == 1 &&
+ m->queue - m->pc == PQ_HOLD) {
vm_page_spin_lock(m);
if (m->hold_count == 0 && m->queue - m->pc == PQ_HOLD) {
_vm_page_queue_spin_lock(m);
/*
* Theoretically if we are able to busy the page
* atomic with the queue removal (using the vm_page
- * lock) nobody else should be able to mess with the
- * page before us.
+ * lock) nobody else should have been able to mess
+ * with the page before us.
+ *
+ * Assert the page state. Note that even though
+ * wiring doesn't adjust queues, a page on the free
+ * queue should never be wired at this point.
*/
KKASSERT((m->flags & (PG_UNMANAGED |
PG_NEED_COMMIT)) == 0);
- KASSERT(m->hold_count == 0, ("m->hold_count is not zero "
- "pg %p q=%d flags=%08x hold=%d wire=%d",
- m, m->queue, m->flags, m->hold_count, m->wire_count));
+ KASSERT(m->hold_count == 0,
+ ("m->hold_count is not zero "
+ "pg %p q=%d flags=%08x hold=%d wire=%d",
+ m, m->queue, m->flags,
+ m->hold_count, m->wire_count));
KKASSERT(m->wire_count == 0);
vm_page_spin_unlock(m);
pagedaemon_wakeup();
if (oqueue == PQ_CACHE)
mycpu->gd_cnt.v_reactivated++;
- if (m->wire_count == 0 && (m->flags & PG_UNMANAGED) == 0) {
+ if ((m->flags & PG_UNMANAGED) == 0) {
if (m->act_count < ACT_INIT)
m->act_count = ACT_INIT;
_vm_page_add_queue_spinlocked(m, PQ_ACTIVE + m->pc, 0);
* vm_page_unmanage()
*
* Prevent PV management from being done on the page. The page is
- * removed from the paging queues as if it were wired, and as a
- * consequence of no longer being managed the pageout daemon will not
- * touch it (since there is no way to locate the pte mappings for the
- * page). madvise() calls that mess with the pmap will also no longer
- * operate on the page.
+ * also removed from the paging queues, and as a consequence of no longer
+ * being managed the pageout daemon will not touch it (since there is no
+ * way to locate the pte mappings for the page). madvise() calls that
+ * mess with the pmap will also no longer operate on the page.
*
* Beyond that the page is still reasonably 'normal'. Freeing the page
* will clear the flag.
{
KKASSERT(m->busy_count & PBUSY_LOCKED);
if ((m->flags & PG_UNMANAGED) == 0) {
- if (m->wire_count == 0)
- vm_page_unqueue(m);
+ vm_page_unqueue(m);
}
vm_page_flag_set(m, PG_UNMANAGED);
}
/*
- * Mark this page as wired down by yet another map, removing it from
- * paging queues as necessary.
+ * Mark this page as wired down by yet another map. We do not adjust the
+ * queue the page is on, it will be checked for wiring as-needed.
*
* Caller must be holding the page busy.
*/
KKASSERT(m->busy_count & PBUSY_LOCKED);
if ((m->flags & PG_FICTITIOUS) == 0) {
if (atomic_fetchadd_int(&m->wire_count, 1) == 0) {
- if ((m->flags & PG_UNMANAGED) == 0)
- vm_page_unqueue(m);
atomic_add_long(&mycpu->gd_vmstats_adj.v_wire_count, 1);
}
KASSERT(m->wire_count != 0,
/*
* Release one wiring of this page, potentially enabling it to be paged again.
*
+ * Note that wired pages are no longer unconditionally removed from the
+ * paging queues, so the page may already be on a queue. Move the page
+ * to the desired queue if necessary.
+ *
* Many pages placed on the inactive queue should actually go
* into the cache, but it is difficult to figure out which. What
* we do instead, if the inactive target is well met, is to put
KKASSERT(m->busy_count & PBUSY_LOCKED);
if (m->flags & PG_FICTITIOUS) {
/* do nothing */
- } else if (m->wire_count <= 0) {
+ } else if ((int)m->wire_count <= 0) {
panic("vm_page_unwire: invalid wire count: %d", m->wire_count);
} else {
if (atomic_fetchadd_int(&m->wire_count, -1) == 1) {
if (m->flags & PG_UNMANAGED) {
;
} else if (activate || (m->flags & PG_NEED_COMMIT)) {
+ vm_page_activate(m);
+#if 0
vm_page_spin_lock(m);
_vm_page_add_queue_spinlocked(m,
PQ_ACTIVE + m->pc, 0);
_vm_page_and_queue_spin_unlock(m);
+#endif
} else {
+ vm_page_deactivate(m);
+#if 0
vm_page_spin_lock(m);
vm_page_flag_clear(m, PG_WINATCFLS);
_vm_page_add_queue_spinlocked(m,
PQ_INACTIVE + m->pc, 0);
- ++vm_swapcache_inactive_heuristic;
_vm_page_and_queue_spin_unlock(m);
+#endif
+ ++vm_swapcache_inactive_heuristic;
}
}
}
}
/*
- * Move the specified page to the inactive queue. If the page has
- * any associated swap, the swap is deallocated.
+ * Move the specified page to the inactive queue.
*
* Normally athead is 0 resulting in LRU operation. athead is set
* to 1 if we want this page to be 'as if it were placed in the cache',
* except without unmapping it from the process address space.
*
* vm_page's spinlock must be held on entry and will remain held on return.
- * This routine may not block.
+ * This routine may not block. The caller does not have to hold the page
+ * busied but should have some sort of interlock on its validity.
*/
static void
_vm_page_deactivate_locked(vm_page_t m, int athead)
_vm_page_queue_spin_lock(m);
oqueue = _vm_page_rem_queue_spinlocked(m);
- if (m->wire_count == 0 && (m->flags & PG_UNMANAGED) == 0) {
+ if ((m->flags & PG_UNMANAGED) == 0) {
if (oqueue == PQ_CACHE)
mycpu->gd_cnt.v_reactivated++;
vm_page_flag_clear(m, PG_WINATCFLS);
/*
* vm_page structure
+ *
+ * hard-busy: (PBUSY_LOCKED)
+ *
+ * Hard-busying a page allows major manipulation of the page structure.
+ * No new soft-busies can accumulate while a page is hard-busied. The
+ * page busying code typically waits for all soft-busies to drop before
+ * allowing the hard-busy.
+ *
+ * soft-busy: (PBUSY_MASK)
+ *
+ * Soft-busying a page typically indicates I/O or read-only use of
+ * the content. A page can have multiple soft-busies on it. New
+ * soft-busies block on any hard-busied page (wait for the hard-busy
+ * to go away).
+ *
+ * hold_count
+ *
+ * This prevents a page from being freed. This does not prevent any
+ * other operation. The page may still be disassociated from its
+ * object and essentially scrapped. It just won't be reused while
+ * a non-zero hold_count is present.
+ *
+ * wire_count
+ *
+ * This indicates that the page has been wired into memory somewhere
+ * (typically a buffer cache buffer, or a user wire). The pageout
+ * daemon will skip wired pages.
*/
TAILQ_HEAD(pglist, vm_page);
* Remaining operations run with the page busy and neither
* the page or the queue will be spin-locked.
*/
- vm_page_queues_spin_unlock(PQ_INACTIVE + q);
KKASSERT(m->queue == PQ_INACTIVE + q);
+ vm_page_queues_spin_unlock(PQ_INACTIVE + q);
/*
* The emergency pager runs when the primary pager gets
int count = 0;
/*
- * It is possible for a page to be busied ad-hoc (e.g. the
- * pmap_collect() code) and wired and race against the
- * allocation of a new page. vm_page_alloc() may be forced
- * to deactivate the wired page in which case it winds up
- * on the inactive queue and must be handled here. We
- * correct the problem simply by unqueuing the page.
+ * Wiring no longer removes a page from its queue. The last unwiring
+ * will requeue the page. Obviously wired pages cannot be paged out
+ * so unqueue it and return.
*/
if (m->wire_count) {
vm_page_unqueue_nowakeup(m);
vm_page_wakeup(m);
- kprintf("WARNING: pagedaemon: wired page on "
- "inactive queue %p\n", m);
return 0;
}
}
vm_page_unhold(m);
+ /*
+ * If it was wired while we didn't own it.
+ */
+ if (m->wire_count) {
+ vm_page_unqueue_nowakeup(m);
+ vput(vp);
+ vm_page_wakeup(m);
+ return 0;
+ }
+
/*
* (m) is busied again
*
* Remaining operations run with the page busy and neither
* the page or the queue will be spin-locked.
*/
- vm_page_queues_spin_unlock(PQ_ACTIVE + q);
KKASSERT(m->queue == PQ_ACTIVE + q);
+ vm_page_queues_spin_unlock(PQ_ACTIVE + q);
#if 0
/*
* Don't deactivate pages that are held, even if we can
* busy them. (XXX why not?)
*/
- if (m->hold_count != 0) {
+ if (m->hold_count) {
vm_page_and_queue_spin_lock(m);
if (m->queue - m->pc == PQ_ACTIVE) {
TAILQ_REMOVE(
goto next;
}
#endif
+ /*
+ * We can just remove wired pages from the queue
+ */
+ if (m->wire_count) {
+ vm_page_unqueue_nowakeup(m);
+ vm_page_wakeup(m);
+ goto next;
+ }
/*
* The emergency pager ignores vnode-backed pages as these
m = vm_page_list_find(PQ_CACHE, cache_rover[isep] & PQ_L2_MASK);
if (m == NULL)
break;
+
+ /*
+ * If the busy attempt fails we can still deactivate the page.
+ */
/* page is returned removed from its queue and spinlocked */
if (vm_page_busy_try(m, TRUE)) {
vm_page_deactivate_locked(m);
return(0);
}
+/*
+ * This old guy slowly walks PQ_HOLD looking for pages which need to be
+ * moved back to PQ_FREE. It is possible for pages to accumulate here
+ * when vm_page_free() races against vm_page_unhold(), resulting in a
+ * page being left on a PQ_HOLD queue with hold_count == 0.
+ *
+ * It is easier to handle this edge condition here, in non-critical code,
+ * rather than enforce a spin-lock for every 1->0 transition in
+ * vm_page_unhold().
+ *
+ * NOTE: TAILQ_FOREACH becomes invalid the instant we unlock the queue.
+ */
+static void
+vm_pageout_scan_hold(int q)
+{
+ vm_page_t m;
+
+ vm_page_queues_spin_lock(PQ_HOLD + q);
+ TAILQ_FOREACH(m, &vm_page_queues[PQ_HOLD + q].pl, pageq) {
+ if (m->flags & PG_MARKER)
+ continue;
+
+ /*
+ * Process one page and return
+ */
+ if (m->hold_count)
+ break;
+ kprintf("DEBUG: pageout HOLD->FREE %p\n", m);
+ vm_page_hold(m);
+ vm_page_queues_spin_unlock(PQ_HOLD + q);
+ vm_page_unhold(m); /* reprocess */
+ return;
+ }
+ vm_page_queues_spin_unlock(PQ_HOLD + q);
+}
+
/*
* This routine tries to maintain the pseudo LRU active queue,
* so that during long periods of time where there is no paging,
* Remaining operations run with the page busy and neither
* the page or the queue will be spin-locked.
*/
- vm_page_queues_spin_unlock(PQ_ACTIVE + q);
KKASSERT(m->queue == PQ_ACTIVE + q);
+ vm_page_queues_spin_unlock(PQ_ACTIVE + q);
+
+ /*
+ * We can just remove wired pages from the queue
+ */
+ if (m->wire_count) {
+ vm_page_unqueue_nowakeup(m);
+ vm_page_wakeup(m);
+ goto next;
+ }
+
/*
* We now have a safely busied page, the page and queue
* spinlocks have been released.
*
- * Ignore held pages
+ * Ignore held and wired pages
*/
- if (m->hold_count) {
+ if (m->hold_count || m->wire_count) {
vm_page_wakeup(m);
goto next;
}
int q;
int q1iterator = 0;
int q2iterator = 0;
+ int q3iterator = 0;
int isep;
curthread->td_flags |= TDF_SYSTHREAD;
} else {
/*
* Primary pagedaemon
+ *
+ * NOTE: We unconditionally cleanup PQ_HOLD even
+ * when there is no work to do.
*/
+ vm_pageout_scan_hold(q3iterator & PQ_L2_MASK);
+ ++q3iterator;
+
if (vm_pages_needed == 0) {
error = tsleep(&vm_pages_needed,
0, "psleep",