From 831a850787c9f10e26859cda6911277adf3477f5 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Mon, 20 May 2019 09:29:43 -0700 Subject: [PATCH] kernel - VM rework part 15 - Core pmap work, refactor PG_* * Augment PG_FICTITIOUS. This takes over some of PG_UNMANAGED's previous capabilities. In addition, the pmap_*() API will work with fictitious pages, making mmap() operation (aka of the GPU) more consistent. * Add PG_UNQUEUED. This prevents a vm_page from being manipulated in the vm_page_queues[] in any way. This takes over another feature of the old PG_UNMANAGED flag. * Remove PG_UNMANAGED * Remove PG_DEVICE_IDX. This is no longer relevant. We use PG_FICTITIOUS for all device pages. * Refactor vm_contig_pg_alloc(), vm_contig_pg_free(), vm_page_alloc_contig(), and vm_page_free_contig(). These functions now set PG_FICTITIOUS | PG_UNQUEUED on the returned pages, and properly clear the bits upon free or if/when a regular (but special contig-managed) page is handed over to the normal paging system. This is combined with making the pmap*() functions work better with PG_FICTITIOUS is the primary 'fix' for some of DRMs hacks. --- sys/platform/pc64/include/pmap.h | 2 +- sys/platform/pc64/vmm/ept.c | 2 +- sys/platform/pc64/x86_64/pmap.c | 121 ++++++++++++-------------- sys/vm/device_pager.c | 8 +- sys/vm/phys_pager.c | 2 +- sys/vm/vm_contig.c | 14 ++- sys/vm/vm_fault.c | 4 +- sys/vm/vm_object.c | 3 +- sys/vm/vm_page.c | 143 +++++++++++++++---------------- sys/vm/vm_page.h | 81 ++++++++++------- sys/vm/vm_pageout.c | 10 +-- sys/vm/vm_swapcache.c | 2 +- 12 files changed, 205 insertions(+), 187 deletions(-) diff --git a/sys/platform/pc64/include/pmap.h b/sys/platform/pc64/include/pmap.h index 11535864b9..353f040914 100644 --- a/sys/platform/pc64/include/pmap.h +++ b/sys/platform/pc64/include/pmap.h @@ -273,7 +273,7 @@ RB_PROTOTYPE2(pv_entry_rb_tree, pv_entry, pv_entry, #define PG_G_IDX 7 #define PG_W_IDX 8 #define PG_MANAGED_IDX 9 -#define PG_DEVICE_IDX 10 +#define PG_UNUSED10_IDX 10 #define PG_N_IDX 11 #define PG_NX_IDX 12 #define PG_BITS_SIZE 13 diff --git a/sys/platform/pc64/vmm/ept.c b/sys/platform/pc64/vmm/ept.c index 514aeafbc2..3f58575248 100644 --- a/sys/platform/pc64/vmm/ept.c +++ b/sys/platform/pc64/vmm/ept.c @@ -98,7 +98,7 @@ vmx_ept_init(void) pmap_bits_ept[PG_M_IDX] = EPT_PG_M; pmap_bits_ept[PG_W_IDX] = EPT_PG_AVAIL1; pmap_bits_ept[PG_MANAGED_IDX] = EPT_PG_AVAIL2; - pmap_bits_ept[PG_DEVICE_IDX] = EPT_PG_AVAIL3; + pmap_bits_ept[PG_UNUSED10_IDX] = EPT_PG_AVAIL3; pmap_bits_ept[PG_N_IDX] = EPT_IGNORE_PAT | EPT_MEM_TYPE_UC; pmap_bits_ept[PG_NX_IDX] = 0; /* XXX inverted sense */ diff --git a/sys/platform/pc64/x86_64/pmap.c b/sys/platform/pc64/x86_64/pmap.c index 07e10a09d6..0a62863e33 100644 --- a/sys/platform/pc64/x86_64/pmap.c +++ b/sys/platform/pc64/x86_64/pmap.c @@ -2349,7 +2349,7 @@ pmap_puninit(pmap_t pmap) pv = NULL; /* safety */ pmap_kremove((vm_offset_t)pmap->pm_pml4); vm_page_busy_wait(p, FALSE, "pgpun"); - KKASSERT(p->flags & (PG_FICTITIOUS|PG_UNMANAGED)); + KKASSERT(p->flags & PG_UNQUEUED); vm_page_unwire(p, 0); vm_page_flag_clear(p, PG_MAPPED | PG_WRITEABLE); vm_page_free(p); @@ -2364,7 +2364,7 @@ pmap_puninit(pmap_t pmap) pv = NULL; /* safety */ pmap_kremove((vm_offset_t)pmap->pm_pml4_iso); vm_page_busy_wait(p, FALSE, "pgpun"); - KKASSERT(p->flags & (PG_FICTITIOUS|PG_UNMANAGED)); + KKASSERT(p->flags & PG_UNQUEUED); vm_page_unwire(p, 0); vm_page_flag_clear(p, PG_MAPPED | PG_WRITEABLE); vm_page_free(p); @@ -2544,21 +2544,13 @@ pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex, pv_entry_t *pvpp) vm_wait(0); } vm_page_wire(m); /* wire for mapping in parent */ - vm_page_unmanage(m); /* m must be spinunlocked */ pmap_zero_page(VM_PAGE_TO_PHYS(m)); m->valid = VM_PAGE_BITS_ALL; + vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE | PG_UNQUEUED); + KKASSERT(m->queue == PQ_NONE); - vm_page_spin_lock(m); - /* pmap_page_stats_adding(m); */ - - /* - * PGTABLE pv's only exist in the context of the pmap RB tree - * (pmap->pm_pvroot). - */ pv->pv_flags |= PV_FLAG_PGTABLE; pv->pv_m = m; - vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE); - vm_page_spin_unlock(m); /* * (isnew) is TRUE, pv is not terminal. @@ -2892,7 +2884,7 @@ pmap_release_pv(pv_entry_t pv, pv_entry_t pvp, pmap_inval_bulk_t *bulk) tstr, pv->pv_pindex, p->wire_count); } KKASSERT(p->wire_count == 1); - KKASSERT(p->flags & PG_UNMANAGED); + KKASSERT(p->flags & PG_UNQUEUED); vm_page_unwire(p, 0); KKASSERT(p->wire_count == 0); @@ -3132,22 +3124,21 @@ pmap_remove_pv_page(pv_entry_t pv) vm_page_t m; m = pv->pv_m; - vm_page_spin_lock(m); - KKASSERT(m && m == pv->pv_m); pv->pv_m = NULL; + if (pv->pv_flags & PV_FLAG_PGTABLE) { vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); - KKASSERT(m->md.pmap_count == 0); } else { + KKASSERT(0); +#if 0 /* * Used only for page table pages, so safe to clear on * the 1->0 transition. */ if (atomic_fetchadd_long(&m->md.pmap_count, -1) == 1) vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); +#endif } - /* pmap_page_stats_deleting(m); */ - vm_page_spin_unlock(m); return(m); } @@ -4591,10 +4582,19 @@ pmap_remove_all(vm_page_t m) { int retry; - if (!pmap_initialized /* || (m->flags & PG_FICTITIOUS)*/) + if (!pmap_initialized) return; + + /* + * pmap_count doesn't cover fictitious pages, but PG_MAPPED does + * (albeit without certain race protections). + */ +#if 0 if (m->md.pmap_count == 0) return; +#endif + if ((m->flags & PG_MAPPED) == 0) + return; retry = ticks + hz * 60; again: @@ -4670,8 +4670,13 @@ pmap_remove_specific(pmap_t pmap_match, vm_page_t m) { if (!pmap_initialized) return; - if (m->md.pmap_count == 0) + + /* + * PG_MAPPED test works for both non-fictitious and fictitious pages. + */ + if ((m->flags & PG_MAPPED) == 0) return; + PMAP_PAGE_BACKING_SCAN(m, pmap_match, ipmap, iptep, ipte, iva) { if (!pmap_inval_smp_cmpset(ipmap, iva, iptep, ipte, 0)) PMAP_PAGE_BACKING_RETRY; @@ -4790,7 +4795,6 @@ again: } } if (pbits & pmap->pmap_bits[PG_MANAGED_IDX]) { - KKASSERT((pbits & pmap->pmap_bits[PG_DEVICE_IDX]) == 0); m = PHYS_TO_VM_PAGE(pbits & PG_FRAME); if (pbits & pmap->pmap_bits[PG_A_IDX]) vm_page_flag_set(m, PG_REFERENCED); @@ -4815,9 +4819,6 @@ again: * * NOTE: This routine MUST insert the page into the pmap now, it cannot * lazy-evaluate. - * - * NOTE: If (m) is PG_UNMANAGED it may also be a temporary fake vm_page_t. - * never record it. */ void pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, @@ -4909,13 +4910,11 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, newpte |= pmap->pmap_bits[PG_W_IDX]; if (va < VM_MAX_USER_ADDRESS) newpte |= pmap->pmap_bits[PG_U_IDX]; - if ((m->flags & (/*PG_FICTITIOUS |*/ PG_UNMANAGED)) == 0) + if ((m->flags & PG_FICTITIOUS) == 0) newpte |= pmap->pmap_bits[PG_MANAGED_IDX]; // if (pmap == &kernel_pmap) // newpte |= pgeflag; newpte |= pmap->pmap_cache_bits[m->pat_mode]; - if (m->flags & PG_FICTITIOUS) - newpte |= pmap->pmap_bits[PG_DEVICE_IDX]; /* * It is possible for multiple faults to occur in threaded @@ -4932,18 +4931,22 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, * should be able to safely set PG_* flag bits even with the (shared) * soft-busy. * - * As a bit of a safety, bump pmap_count and set the PG_* bits - * before mapping the page. If another part of the system does - * not properly hard-busy the page (against our soft-busy) in - * order to remove mappings it might not see the pte that we are - * about to add and thus will not be able to drop pmap_count to 0. + * The pmap_count and writeable_count is only tracked for + * non-fictitious pages. As a bit of a safety, bump pmap_count + * and set the PG_* bits before mapping the page. If another part + * of the system does not properly hard-busy the page (against our + * soft-busy or hard-busy) in order to remove mappings it might not + * see the pte that we are about to add and thus will not be able to + * drop pmap_count to 0. + * + * The PG_MAPPED and PG_WRITEABLE flags are set for any type of page. * * NOTE! PG_MAPPED and PG_WRITEABLE can only be cleared when * the page is hard-busied AND pmap_count is 0. This * interlocks our setting of the flags here. */ /*vm_page_spin_lock(m);*/ - if ((m->flags & PG_UNMANAGED) == 0) { + if ((m->flags & PG_FICTITIOUS) == 0) { atomic_add_long(&m->md.pmap_count, 1); if (newpte & pmap->pmap_bits[PG_RW_IDX]) atomic_add_long(&m->md.writeable_count, 1); @@ -5038,7 +5041,6 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, */ if (opa && (origpte & pmap->pmap_bits[PG_MANAGED_IDX])) { KKASSERT(oldm == PHYS_TO_VM_PAGE(opa)); - /* XXX PG_DEVICE_IDX pages */ if (origpte & pmap->pmap_bits[PG_M_IDX]) vm_page_dirty(oldm); if (origpte & pmap->pmap_bits[PG_A_IDX]) @@ -5064,13 +5066,6 @@ done: * is not managed but we have a pte_pv (which was locking our * operation), we can free it now. pte_pv->pv_m should be NULL. */ -#if 0 - if (pte_pv && (newpte & pmap->pmap_bits[PG_MANAGED_IDX]) == 0) { - pv_free(pte_pv, pt_pv); - } else if (pte_pv) { - pv_put(pte_pv); - } else -#endif if (pte_placemark) pv_placemarker_wakeup(pmap, pte_placemark); if (pt_pv) @@ -5475,7 +5470,8 @@ pmap_testbit(vm_page_t m, int bit) /* * This routine is used to modify bits in ptes. Only one bit should be - * specified. PG_RW requires special handling. + * specified. PG_RW requires special handling. This call works with + * any sort of mapped page. PG_FICTITIOUS pages might not be optimal. * * Caller must NOT hold any spin locks * Caller must hold (m) hard-busied @@ -5504,11 +5500,9 @@ pmap_clearbit(vm_page_t m, int bit_index) int retry; /* - * XXX It might make sense to allow PG_FICTITIOUS + PG_DEVICE - * pages through to the backing scan, but atm devices do - * not care about PG_WRITEABLE; + * Too early in the boot */ - if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) { + if (!pmap_initialized) { if (bit_index == PG_RW_IDX) vm_page_flag_clear(m, PG_WRITEABLE); return; @@ -5549,8 +5543,10 @@ again: * PG_WRITEABLE, and callers expect us to for the PG_RW_IDX path. */ PMAP_PAGE_BACKING_SCAN(m, NULL, ipmap, iptep, ipte, iva) { +#if 0 if ((ipte & ipmap->pmap_bits[PG_MANAGED_IDX]) == 0) continue; +#endif if ((ipte & ipmap->pmap_bits[PG_RW_IDX]) == 0) continue; npte = ipte & ~(ipmap->pmap_bits[PG_RW_IDX] | @@ -5564,8 +5560,12 @@ again: * NOTE: m is not hard-busied so it is not safe to * clear PG_WRITEABLE on the 1->0 transition * against it being set in pmap_enter(). + * + * pmap_count and writeable_count are only applicable + * to non-fictitious pages (PG_MANAGED_IDX from pte) */ - atomic_add_long(&m->md.writeable_count, -1); + if (ipte & ipmap->pmap_bits[PG_MANAGED_IDX]) + atomic_add_long(&m->md.writeable_count, -1); } PMAP_PAGE_BACKING_DONE; /* @@ -5910,43 +5910,35 @@ pmap_mincore(pmap_t pmap, vm_offset_t addr) vm_offset_t pa; val = MINCORE_INCORE; - if ((pte & pmap->pmap_bits[PG_MANAGED_IDX]) == 0) - goto done; - pa = pte & PG_FRAME; - - if (pte & pmap->pmap_bits[PG_DEVICE_IDX]) - m = NULL; - else + if (pte & pmap->pmap_bits[PG_MANAGED_IDX]) m = PHYS_TO_VM_PAGE(pa); + else + m = NULL; /* * Modified by us */ if (pte & pmap->pmap_bits[PG_M_IDX]) val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER; + /* * Modified by someone */ else if (m && (m->dirty || pmap_is_modified(m))) val |= MINCORE_MODIFIED_OTHER; - /* - * Referenced by us - */ - if (pte & pmap->pmap_bits[PG_A_IDX]) - val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER; /* - * Referenced by someone + * Referenced by us, or someone else. */ - else if (m && ((m->flags & PG_REFERENCED) || - pmap_ts_referenced(m))) { + if (pte & pmap->pmap_bits[PG_A_IDX]) { + val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER; + } else if (m && ((m->flags & PG_REFERENCED) || + pmap_ts_referenced(m))) { val |= MINCORE_REFERENCED_OTHER; vm_page_flag_set(m, PG_REFERENCED); } } -done: - return val; } @@ -6107,7 +6099,6 @@ pmap_kvtom(vm_offset_t va) { pt_entry_t *ptep = vtopte(va); - KKASSERT((*ptep & kernel_pmap.pmap_bits[PG_DEVICE_IDX]) == 0); return(PHYS_TO_VM_PAGE(*ptep & PG_FRAME)); } diff --git a/sys/vm/device_pager.c b/sys/vm/device_pager.c index 94336b81f1..b0f8196d4d 100644 --- a/sys/vm/device_pager.c +++ b/sys/vm/device_pager.c @@ -280,7 +280,7 @@ dev_pager_getfake(vm_paddr_t paddr, int pat_mode) pmap_page_init(m); - m->flags = PG_FICTITIOUS | PG_UNMANAGED; + m->flags = PG_FICTITIOUS | PG_UNQUEUED; m->valid = VM_PAGE_BITS_ALL; m->dirty = 0; m->queue = PQ_NONE; @@ -347,8 +347,9 @@ static void old_dev_pager_dtor(void *handle) } } -static int old_dev_pager_fault(vm_object_t object, vm_ooffset_t offset, - int prot, vm_page_t *mres) +static int +old_dev_pager_fault(vm_object_t object, vm_ooffset_t offset, + int prot, vm_page_t *mres) { vm_paddr_t paddr; vm_page_t page; @@ -387,7 +388,6 @@ static int old_dev_pager_fault(vm_object_t object, vm_ooffset_t offset, } vm_object_drop(object); } - return (VM_PAGER_OK); } diff --git a/sys/vm/phys_pager.c b/sys/vm/phys_pager.c index 08817205be..9ed8449c15 100644 --- a/sys/vm/phys_pager.c +++ b/sys/vm/phys_pager.c @@ -83,7 +83,7 @@ phys_pager_getpage(vm_object_t object, vm_page_t *mpp, int seqaccess) vm_page_zero_fill(m); /* Switch off pv_entries */ - vm_page_unmanage(m); + vm_page_flag_set(m, PG_UNQUEUED); m->valid = VM_PAGE_BITS_ALL; m->dirty = VM_PAGE_BITS_ALL; diff --git a/sys/vm/vm_contig.c b/sys/vm/vm_contig.c index fc40a272ce..ec7d6c9d96 100644 --- a/sys/vm/vm_contig.c +++ b/sys/vm/vm_contig.c @@ -404,9 +404,10 @@ again: if (pqtype == PQ_CACHE && m->hold_count == 0 && m->wire_count == 0 && - (m->flags & (PG_UNMANAGED | PG_NEED_COMMIT)) == 0) { + (m->flags & PG_NEED_COMMIT) == 0) { vm_page_protect(m, VM_PROT_NONE); - KKASSERT((m->flags & PG_MAPPED) == 0); + KKASSERT((m->flags & + (PG_MAPPED | PG_UNQUEUED)) == 0); KKASSERT(m->dirty == 0); vm_page_free(m); --i; @@ -430,10 +431,15 @@ again: KKASSERT((m->busy_count & PBUSY_MASK) == 0); /* - * Clear all flags. Then unbusy the now allocated - * page. + * Clear all flags, set FICTITIOUS and UNQUEUED to + * indicate the the pages are special, then unbusy + * the now allocated page. + * + * XXX setting FICTITIOUS and UNQUEUED in the future. + * (also pair up with vm_contig_pg_free) */ vm_page_flag_clear(m, ~PG_KEEP_NEWPAGE_MASK); + /* vm_page_flag_set(m, PG_FICTITIOUS | PG_UNQUEUED);*/ vm_page_wire(m); vm_page_wakeup(m); } diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c index 73755e1b07..c498d8eb16 100644 --- a/sys/vm/vm_fault.c +++ b/sys/vm/vm_fault.c @@ -541,7 +541,7 @@ RetryFault: bzero(&fakem, sizeof(fakem)); fakem.pindex = first_pindex; - fakem.flags = PG_FICTITIOUS | PG_UNMANAGED; + fakem.flags = PG_FICTITIOUS | PG_UNQUEUED; fakem.busy_count = PBUSY_LOCKED; fakem.valid = VM_PAGE_BITS_ALL; fakem.pat_mode = VM_MEMATTR_DEFAULT; @@ -1167,7 +1167,7 @@ RetryFault: bzero(&fakem, sizeof(fakem)); fakem.pindex = first_pindex; - fakem.flags = PG_FICTITIOUS | PG_UNMANAGED; + fakem.flags = PG_FICTITIOUS | PG_UNQUEUED; fakem.busy_count = PBUSY_LOCKED; fakem.valid = VM_PAGE_BITS_ALL; fakem.pat_mode = VM_MEMATTR_DEFAULT; diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c index 307042c0e4..27b9933021 100644 --- a/sys/vm/vm_object.c +++ b/sys/vm/vm_object.c @@ -1288,7 +1288,8 @@ relookup: * any of the below states. */ if (m->wire_count || - (m->flags & (PG_UNMANAGED | PG_NEED_COMMIT)) || + (m->flags & (PG_FICTITIOUS | PG_UNQUEUED | + PG_NEED_COMMIT)) || m->valid != VM_PAGE_BITS_ALL ) { vm_page_wakeup(m); diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index 83a232522b..01723c8ddb 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -246,10 +246,18 @@ vm_add_new_page(vm_paddr_t pa) /* * Reserve a certain number of contiguous low memory pages for * contigmalloc() to use. + * + * Even though these pages represent real ram and can be + * reverse-mapped, we set PG_FICTITIOUS and PG_UNQUEUED + * because their use is special-cased. + * + * WARNING! Once PG_FICTITIOUS is set, vm_page_wire*() + * and vm_page_unwire*() calls have no effect. */ if (pa < vm_low_phys_reserved) { atomic_add_long(&vmstats.v_page_count, 1); atomic_add_long(&vmstats.v_dma_pages, 1); + m->flags |= PG_FICTITIOUS | PG_UNQUEUED; m->queue = PQ_NONE; m->wire_count = 1; atomic_add_long(&vmstats.v_wire_count, 1); @@ -785,6 +793,7 @@ vm_page_startup_finish(void *dummy __unused) m = PHYS_TO_VM_PAGE((vm_paddr_t)blk << PAGE_SHIFT); vm_low_phys_reserved = VM_PAGE_TO_PHYS(m); while (count) { + vm_page_flag_clear(m, PG_FICTITIOUS | PG_UNQUEUED); vm_page_busy_wait(m, FALSE, "cpgfr"); vm_page_unwire(m, 0); vm_page_free(m); @@ -1034,7 +1043,8 @@ _vm_page_add_queue_spinlocked(vm_page_t m, u_short queue, int athead) struct vpgqueues *pq; u_long *cnt; - KKASSERT(m->queue == PQ_NONE && (m->flags & PG_FICTITIOUS) == 0); + KKASSERT(m->queue == PQ_NONE && + (m->flags & (PG_FICTITIOUS | PG_UNQUEUED)) == 0); if (queue != PQ_NONE) { vm_page_queues_spin_lock(queue); @@ -1377,19 +1387,18 @@ vm_page_unhold(vm_page_t m) void vm_page_initfake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr) { - if ((m->flags & PG_FICTITIOUS) != 0) { - /* - * The page's memattr might have changed since the - * previous initialization. Update the pmap to the - * new memattr. - */ + /* + * The page's memattr might have changed since the + * previous initialization. Update the pmap to the + * new memattr. + */ + if ((m->flags & PG_FICTITIOUS) != 0) goto memattr; - } m->phys_addr = paddr; m->queue = PQ_NONE; /* Fictitious pages don't use "segind". */ /* Fictitious pages don't use "order" or "pool". */ - m->flags = PG_FICTITIOUS | PG_UNMANAGED; + m->flags = PG_FICTITIOUS | PG_UNQUEUED; m->busy_count = PBUSY_LOCKED; m->wire_count = 1; spin_init(&m->spin, "fake_page"); @@ -1735,6 +1744,9 @@ VM_PAGE_DEBUG_EXT(vm_page_lookup_busy_try)(struct vm_object *object, * Returns a page that is only soft-busied for use by the caller in * a read-only fashion. Returns NULL if the page could not be found, * the soft busy could not be obtained, or the page data is invalid. + * + * XXX Doesn't handle PG_FICTITIOUS pages at the moment, but there is + * no reason why we couldn't. */ vm_page_t vm_page_lookup_sbusy_try(struct vm_object *object, vm_pindex_t pindex, @@ -2046,11 +2058,12 @@ vm_page_select_cache(u_short pg_color) /* * We successfully busied the page */ - if ((m->flags & (PG_UNMANAGED | PG_NEED_COMMIT)) == 0 && + if ((m->flags & PG_NEED_COMMIT) == 0 && m->hold_count == 0 && m->wire_count == 0 && (m->dirty & m->valid) == 0) { vm_page_spin_unlock(m); + KKASSERT((m->flags & PG_UNQUEUED) == 0); pagedaemon_wakeup(); return(m); } @@ -2117,7 +2130,7 @@ vm_page_select_free(u_short pg_color) * wiring doesn't adjust queues, a page on the free * queue should never be wired at this point. */ - KKASSERT((m->flags & (PG_UNMANAGED | + KKASSERT((m->flags & (PG_UNQUEUED | PG_NEED_COMMIT)) == 0); KASSERT(m->hold_count == 0, ("m->hold_count is not zero " @@ -2454,6 +2467,10 @@ vm_page_alloc_contig(vm_paddr_t low, vm_paddr_t high, return(NULL); } spin_unlock(&vm_contig_spin); + + /* + * Base vm_page_t of range + */ m = PHYS_TO_VM_PAGE((vm_paddr_t)blk << PAGE_SHIFT); } if (vm_contig_verbose) { @@ -2464,8 +2481,10 @@ vm_page_alloc_contig(vm_paddr_t low, vm_paddr_t high, low, high, alignment, boundary, size, memattr); } if (memattr != VM_MEMATTR_DEFAULT) { - for (i = 0;i < size; i++) + for (i = 0; i < size; ++i) { + KKASSERT(m[i].flags & PG_FICTITIOUS); pmap_page_set_memattr(&m[i], memattr); + } } return m; } @@ -2486,13 +2505,19 @@ vm_page_free_contig(vm_page_t m, unsigned long size) (intmax_t)pa, size / 1024); } if (pa < vm_low_phys_reserved) { + /* + * Just assert check the first page for convenience. + */ KKASSERT(m->wire_count == 1); + KKASSERT(m->flags & PG_FICTITIOUS); KKASSERT(pa + size <= vm_low_phys_reserved); spin_lock(&vm_contig_spin); alist_free(&vm_contig_alist, start, pages); spin_unlock(&vm_contig_spin); } else { while (pages) { + /* XXX FUTURE, maybe (pair with vm_pg_contig_alloc()) */ + /*vm_page_flag_clear(m, PG_FICTITIOUS | PG_UNQUEUED);*/ vm_page_busy_wait(m, FALSE, "cpgfr"); vm_page_unwire(m, 0); vm_page_free(m); @@ -2640,25 +2665,25 @@ vm_page_activate(vm_page_t m) * If already active or inappropriate, just set act_count and * return. We don't have to spin-lock the page. */ - if (m->queue - m->pc == PQ_ACTIVE || (m->flags & PG_FICTITIOUS)) { + if (m->queue - m->pc == PQ_ACTIVE || + (m->flags & (PG_FICTITIOUS | PG_UNQUEUED))) { if (m->act_count < ACT_INIT) m->act_count = ACT_INIT; return; } vm_page_spin_lock(m); - if (m->queue - m->pc != PQ_ACTIVE && (m->flags & PG_FICTITIOUS) == 0) { + if (m->queue - m->pc != PQ_ACTIVE && + (m->flags & (PG_FICTITIOUS | PG_UNQUEUED)) == 0) { _vm_page_queue_spin_lock(m); oqueue = _vm_page_rem_queue_spinlocked(m); /* page is left spinlocked, queue is unlocked */ if (oqueue == PQ_CACHE) mycpu->gd_cnt.v_reactivated++; - if ((m->flags & PG_UNMANAGED) == 0) { - if (m->act_count < ACT_INIT) - m->act_count = ACT_INIT; - _vm_page_add_queue_spinlocked(m, PQ_ACTIVE + m->pc, 0); - } + if (m->act_count < ACT_INIT) + m->act_count = ACT_INIT; + _vm_page_add_queue_spinlocked(m, PQ_ACTIVE + m->pc, 0); _vm_page_and_queue_spin_unlock(m); if (oqueue == PQ_CACHE || oqueue == PQ_FREE) pagedaemon_wakeup(); @@ -2672,7 +2697,8 @@ vm_page_activate(vm_page_t m) void vm_page_soft_activate(vm_page_t m) { - if (m->queue - m->pc == PQ_ACTIVE || (m->flags & PG_FICTITIOUS)) { + if (m->queue - m->pc == PQ_ACTIVE || + (m->flags & (PG_FICTITIOUS | PG_UNQUEUED))) { if (m->act_count < ACT_INIT) m->act_count = ACT_INIT; } else { @@ -2803,13 +2829,11 @@ vm_page_free_toq(vm_page_t m) } /* - * Clear the UNMANAGED flag when freeing an unmanaged page. - * Clear the NEED_COMMIT flag + * Clear the PG_NEED_COMMIT and the PG_UNQUEUED flags. The + * page returns to normal operation and will be placed in + * the PQ_HOLD or PQ_FREE queue. */ - if (m->flags & PG_UNMANAGED) - vm_page_flag_clear(m, PG_UNMANAGED); - if (m->flags & PG_NEED_COMMIT) - vm_page_flag_clear(m, PG_NEED_COMMIT); + vm_page_flag_clear(m, PG_NEED_COMMIT | PG_UNQUEUED); if (m->hold_count != 0) { _vm_page_add_queue_spinlocked(m, PQ_HOLD + m->pc, 0); @@ -2833,50 +2857,17 @@ vm_page_free_toq(vm_page_t m) vm_page_free_wakeup(); } -/* - * vm_page_unmanage() - * - * Prevent PV management from being done on the page. The page is - * also removed from the paging queues, and as a consequence of no longer - * being managed the pageout daemon will not touch it (since there is no - * way to locate the pte mappings for the page). madvise() calls that - * mess with the pmap will also no longer operate on the page. - * - * Beyond that the page is still reasonably 'normal'. Freeing the page - * will clear the flag. - * - * This routine is used by OBJT_PHYS objects - objects using unswappable - * physical memory as backing store rather then swap-backed memory and - * will eventually be extended to support 4MB unmanaged physical - * mappings. - * - * Caller must be holding the page busy. - */ -void -vm_page_unmanage(vm_page_t m) -{ - KKASSERT(m->busy_count & PBUSY_LOCKED); - if ((m->flags & PG_UNMANAGED) == 0) { - vm_page_unqueue(m); - } - vm_page_flag_set(m, PG_UNMANAGED); -} - /* * Mark this page as wired down by yet another map. We do not adjust the * queue the page is on, it will be checked for wiring as-needed. * + * This function has no effect on fictitious pages. + * * Caller must be holding the page busy. */ void vm_page_wire(vm_page_t m) { - /* - * Only bump the wire statistics if the page is not already wired, - * and only unqueue the page if it is on some queue (if it is unmanaged - * it is already off the queues). Don't do anything with fictitious - * pages because they are always wired. - */ KKASSERT(m->busy_count & PBUSY_LOCKED); if ((m->flags & PG_FICTITIOUS) == 0) { if (atomic_fetchadd_int(&m->wire_count, 1) == 0) { @@ -2917,6 +2908,10 @@ vm_page_wire(vm_page_t m) * be placed in the cache - for example, just after dirtying a page. * dirty pages in the cache are not allowed. * + * PG_FICTITIOUS or PG_UNQUEUED pages are never moved to any queue, and + * the wire_count will not be adjusted in any way for a PG_FICTITIOUS + * page. + * * This routine may not block. */ void @@ -2930,7 +2925,7 @@ vm_page_unwire(vm_page_t m, int activate) } else { if (atomic_fetchadd_int(&m->wire_count, -1) == 1) { atomic_add_long(&mycpu->gd_vmstats_adj.v_wire_count,-1); - if (m->flags & PG_UNMANAGED) { + if (m->flags & PG_UNQUEUED) { ; } else if (activate || (m->flags & PG_NEED_COMMIT)) { vm_page_activate(m); @@ -2963,13 +2958,15 @@ _vm_page_deactivate_locked(vm_page_t m, int athead) /* * Ignore if already inactive. */ - if (m->queue - m->pc == PQ_INACTIVE || (m->flags & PG_FICTITIOUS)) + if (m->queue - m->pc == PQ_INACTIVE || + (m->flags & (PG_FICTITIOUS | PG_UNQUEUED))) { return; + } _vm_page_queue_spin_lock(m); oqueue = _vm_page_rem_queue_spinlocked(m); - if ((m->flags & PG_UNMANAGED) == 0) { + if ((m->flags & (PG_FICTITIOUS | PG_UNQUEUED)) == 0) { if (oqueue == PQ_CACHE) mycpu->gd_cnt.v_reactivated++; vm_page_flag_clear(m, PG_WINATCFLS); @@ -2996,7 +2993,7 @@ void vm_page_deactivate(vm_page_t m) { if (m->queue - m->pc != PQ_INACTIVE && - (m->flags & PG_FICTITIOUS) == 0) { + (m->flags & (PG_FICTITIOUS | PG_UNQUEUED)) == 0) { vm_page_spin_lock(m); _vm_page_deactivate_locked(m, 0); vm_page_spin_unlock(m); @@ -3028,7 +3025,7 @@ vm_page_try_to_cache(vm_page_t m) */ if (m->dirty || m->hold_count || m->wire_count || m->queue - m->pc == PQ_CACHE || - (m->flags & (PG_UNMANAGED | PG_NEED_COMMIT | PG_FICTITIOUS))) { + (m->flags & (PG_UNQUEUED | PG_NEED_COMMIT | PG_FICTITIOUS))) { vm_page_wakeup(m); return(0); } @@ -3050,6 +3047,10 @@ vm_page_try_to_cache(vm_page_t m) * Attempt to free the page. If we cannot free it, we do nothing. * 1 is returned on success, 0 on failure. * + * The page can be in any state, including already being on the free + * queue. Check to see if it really can be freed. Note that we disallow + * this ad-hoc operation if the page is flagged PG_UNQUEUED. + * * Caller provides an unlocked/non-busied page. * No requirements. */ @@ -3059,14 +3060,10 @@ vm_page_try_to_free(vm_page_t m) if (vm_page_busy_try(m, TRUE)) return(0); - /* - * The page can be in any state, including already being on the free - * queue. Check to see if it really can be freed. - */ if (m->dirty || /* can't free if it is dirty */ m->hold_count || /* or held (XXX may be wrong) */ m->wire_count || /* or wired */ - (m->flags & (PG_UNMANAGED | /* or unmanaged */ + (m->flags & (PG_UNQUEUED | /* or unqueued */ PG_NEED_COMMIT | /* or needs a commit */ PG_FICTITIOUS)) || /* or is fictitious */ m->queue - m->pc == PQ_FREE || /* already on PQ_FREE */ @@ -3110,7 +3107,7 @@ vm_page_cache(vm_page_t m) /* * Not suitable for the cache */ - if ((m->flags & (PG_UNMANAGED | PG_NEED_COMMIT | PG_FICTITIOUS)) || + if ((m->flags & (PG_UNQUEUED | PG_NEED_COMMIT | PG_FICTITIOUS)) || (m->busy_count & PBUSY_MASK) || m->wire_count || m->hold_count) { vm_page_wakeup(m); @@ -3151,7 +3148,7 @@ vm_page_cache(vm_page_t m) */ vm_page_protect(m, VM_PROT_NONE); pmap_mapped_sync(m); - if ((m->flags & (PG_UNMANAGED | PG_MAPPED)) || + if ((m->flags & (PG_UNQUEUED | PG_MAPPED)) || (m->busy_count & PBUSY_MASK) || m->wire_count || m->hold_count) { vm_page_wakeup(m); diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h index 060b151ca2..d30751d841 100644 --- a/sys/vm/vm_page.h +++ b/sys/vm/vm_page.h @@ -245,37 +245,61 @@ extern struct vpgqueues vm_page_queues[PQ_COUNT]; /* * These are the flags defined for vm_page. * - * PG_UNMANAGED (used by OBJT_PHYS) indicates that the page is - * not under PV management but otherwise should be treated as a - * normal page. Pages not under PV management cannot be paged out - * via the object/vm_page_t because there is no knowledge of their - * pte mappings, nor can they be removed from their objects via - * the object, and such pages are also not on any PQ queue. The - * PG_MAPPED and PG_WRITEABLE flags are not applicable. - * - * PG_MAPPED only applies to managed pages, indicating whether the page - * MIGHT be mapped onto one or more pmaps. A page might still be mapped to - * special pmaps in an unmanaged fashion, for example when mapped into a - * buffer cache buffer, without setting PG_MAPPED. - * - * PG_MAPPED can only be tested for NOT being set after a pmap_mapped_sync() - * called made while the page is hard-busied - * - * PG_WRITEABLE indicates that there may be a writeable managed pmap entry - * somewhere, and that the page can be dirtied by hardware at any time - * and may have to be tested for that. The modified bit in unmanaged - * mappings or in the special clean map is not tested. - * - * PG_WRITEABLE can only be tested for NOT being set after a - * pmap_mapped_sync() called made while the page is hard-busied. - * - * PG_SWAPPED indicates that the page is backed by a swap block. Any - * VM object type other than OBJT_DEFAULT can have swap-backed pages now. + * PG_FICTITIOUS It is not possible to translate the pte's physical + * address back to a vm_page_t. The vm_page_t is fake + * or there isn't one at all. + * + * Fictitious vm_page_t's can be placed in objects and + * it is possible to perform pmap functions on them + * by virtual address range and by their vm_page_t. + * However, pmap_count and writeable_count cannot be + * tracked since there is no way to reverse-map the + * pte back to the vm_page. + * + * (pmap operations by-vm_page can still be used to + * adjust protections or remove the page from the pmap, + * and will go only by the PG_MAPPED flag). + * + * NOTE: The contiguous memory management will flag + * PG_FICTITIOUS on pages in the vm_page_array, + * even though the physical addrses can be + * translated back to a vm_page_t. + * + * NOTE: Implies PG_UNQUEUED. PG_UNQUEUED must also + * be set. No queue management may be performed + * on fictitious pages. + * + * PG_UNQUEUED The page is not to participate in any VM page queue + * manipulation (even if it is otherwise a normal page). + * + * PG_MAPPED Only applies to non-fictitious regular pages, this + * flag indicates that the page MIGHT be mapped into + * zero or more pmaps via normal managed operations.. + * + * The page might still be mapped in a specialized manner + * (i.e. pmap_kenter(), or mapped into the buffer cache, + * and so forth) without setting this flag. + * + * If this flag is clear it indicates that the page is + * absolutely not mapped into a regular pmap by normal + * means. If set, the status is unknown. + * + * PG_WRITEABLE Similar to PG_MAPPED, indicates that the page might + * be mapped RW into zero or more pmaps via normal + * managed operations. + * + * If this flag is clear it indicates that the page is + * absolutely not mapped RW into a regular pmap by normal + * means. If set, the status is unknown. + * + * PG_SWAPPED Indicates that the page is backed by a swap block. + * Any VM object type other than OBJT_DEFAULT can contain + * swap-backed pages now. */ #define PG_UNUSED0001 0x00000001 #define PG_UNUSED0002 0x00000002 #define PG_WINATCFLS 0x00000004 /* flush dirty page on inactive q */ -#define PG_FICTITIOUS 0x00000008 /* physical page doesn't exist (O) */ +#define PG_FICTITIOUS 0x00000008 /* No reverse-map or tracking */ #define PG_WRITEABLE 0x00000010 /* page may be writeable */ #define PG_MAPPED 0x00000020 /* page may be mapped (managed) */ #define PG_UNUSED0040 0x00000040 @@ -283,7 +307,7 @@ extern struct vpgqueues vm_page_queues[PQ_COUNT]; #define PG_CLEANCHK 0x00000100 /* page will be checked for cleaning */ #define PG_UNUSED0200 0x00000200 #define PG_NOSYNC 0x00000400 /* do not collect for syncer */ -#define PG_UNMANAGED 0x00000800 /* No PV management for page */ +#define PG_UNQUEUED 0x00000800 /* No queue management for page */ #define PG_MARKER 0x00001000 /* special queue marker page */ #define PG_RAM 0x00002000 /* read ahead mark */ #define PG_SWAPPED 0x00004000 /* backed by swap */ @@ -429,7 +453,6 @@ void vm_page_rename (vm_page_t, struct vm_object *, vm_pindex_t); void vm_page_startup (void); void vm_numa_organize(vm_paddr_t ran_beg, vm_paddr_t bytes, int physid); void vm_numa_organize_finalize(void); -void vm_page_unmanage (vm_page_t); void vm_page_unwire (vm_page_t, int); void vm_page_wire (vm_page_t); void vm_page_unqueue (vm_page_t); diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c index 7e605b1c6a..8c6de80aad 100644 --- a/sys/vm/vm_pageout.c +++ b/sys/vm/vm_pageout.c @@ -262,7 +262,7 @@ vm_pageout_clean_helper(vm_page_t m, int vmflush_flags) * we can pageout held pages but there is no real need to press our * luck, so don't. */ - if (m->hold_count != 0 || (m->flags & PG_UNMANAGED)) { + if (m->hold_count != 0 || (m->flags & PG_UNQUEUED)) { vm_page_wakeup(m); return 0; } @@ -307,7 +307,7 @@ vm_pageout_clean_helper(vm_page_t m, int vmflush_flags) if (error || p == NULL) break; if ((p->queue - p->pc) == PQ_CACHE || - (p->flags & PG_UNMANAGED)) { + (p->flags & PG_UNQUEUED)) { vm_page_wakeup(p); break; } @@ -350,7 +350,7 @@ vm_pageout_clean_helper(vm_page_t m, int vmflush_flags) if (error || p == NULL) break; if (((p->queue - p->pc) == PQ_CACHE) || - (p->flags & PG_UNMANAGED)) { + (p->flags & PG_UNQUEUED)) { vm_page_wakeup(p); break; } @@ -540,7 +540,7 @@ vm_pageout_mdp_callback(struct pmap_pgscan_info *info, vm_offset_t va, mycpu->gd_cnt.v_pdpages++; - if (p->wire_count || p->hold_count || (p->flags & PG_UNMANAGED)) { + if (p->wire_count || p->hold_count || (p->flags & PG_UNQUEUED)) { vm_page_wakeup(p); goto done; } @@ -1587,7 +1587,7 @@ vm_pageout_scan_cache(long avail_shortage, int pass, * Remaining operations run with the page busy and neither * the page or the queue will be spin-locked. */ - if ((m->flags & (PG_UNMANAGED | PG_NEED_COMMIT)) || + if ((m->flags & (PG_UNQUEUED | PG_NEED_COMMIT)) || m->hold_count || m->wire_count) { vm_page_deactivate(m); diff --git a/sys/vm/vm_swapcache.c b/sys/vm/vm_swapcache.c index 7498619fc4..cb20a209fe 100644 --- a/sys/vm/vm_swapcache.c +++ b/sys/vm/vm_swapcache.c @@ -674,7 +674,7 @@ vm_swapcache_test(vm_page_t m) { vm_object_t object; - if (m->flags & PG_UNMANAGED) + if (m->flags & (PG_UNQUEUED | PG_FICTITIOUS)) return(1); if (m->hold_count || m->wire_count) return(1); -- 2.41.0