kernel - VM rework part 15 - Core pmap work, refactor PG_*
authorMatthew Dillon <dillon@apollo.backplane.com>
Mon, 20 May 2019 16:29:43 +0000 (09:29 -0700)
committerMatthew Dillon <dillon@apollo.backplane.com>
Mon, 20 May 2019 19:39:25 +0000 (12:39 -0700)
* Augment PG_FICTITIOUS.  This takes over some of PG_UNMANAGED's previous
  capabilities.  In addition, the pmap_*() API will work with fictitious
  pages, making mmap() operation (aka of the GPU) more consistent.

* Add PG_UNQUEUED.  This prevents a vm_page from being manipulated in
  the vm_page_queues[] in any way.  This takes over another feature
  of the old PG_UNMANAGED flag.

* Remove PG_UNMANAGED

* Remove PG_DEVICE_IDX.  This is no longer relevant.  We use PG_FICTITIOUS
  for all device pages.

* Refactor vm_contig_pg_alloc(), vm_contig_pg_free(),
  vm_page_alloc_contig(), and vm_page_free_contig().

  These functions now set PG_FICTITIOUS | PG_UNQUEUED on the returned
  pages, and properly clear the bits upon free or if/when a regular
  (but special contig-managed) page is handed over to the normal paging
  system.

  This is combined with making the pmap*() functions work better with
  PG_FICTITIOUS is the primary 'fix' for some of DRMs hacks.

12 files changed:
sys/platform/pc64/include/pmap.h
sys/platform/pc64/vmm/ept.c
sys/platform/pc64/x86_64/pmap.c
sys/vm/device_pager.c
sys/vm/phys_pager.c
sys/vm/vm_contig.c
sys/vm/vm_fault.c
sys/vm/vm_object.c
sys/vm/vm_page.c
sys/vm/vm_page.h
sys/vm/vm_pageout.c
sys/vm/vm_swapcache.c

index 1153586..353f040 100644 (file)
@@ -273,7 +273,7 @@ RB_PROTOTYPE2(pv_entry_rb_tree, pv_entry, pv_entry,
 #define        PG_G_IDX                7
 #define        PG_W_IDX                8
 #define        PG_MANAGED_IDX          9
-#define        PG_DEVICE_IDX           10
+#define        PG_UNUSED10_IDX         10
 #define        PG_N_IDX                11
 #define        PG_NX_IDX               12
 #define        PG_BITS_SIZE            13
index 514aeaf..3f58575 100644 (file)
@@ -98,7 +98,7 @@ vmx_ept_init(void)
        pmap_bits_ept[PG_M_IDX] = EPT_PG_M;
        pmap_bits_ept[PG_W_IDX] = EPT_PG_AVAIL1;
        pmap_bits_ept[PG_MANAGED_IDX] = EPT_PG_AVAIL2;
-       pmap_bits_ept[PG_DEVICE_IDX] = EPT_PG_AVAIL3;
+       pmap_bits_ept[PG_UNUSED10_IDX] = EPT_PG_AVAIL3;
        pmap_bits_ept[PG_N_IDX] = EPT_IGNORE_PAT | EPT_MEM_TYPE_UC;
        pmap_bits_ept[PG_NX_IDX] = 0;   /* XXX inverted sense */
 
index 07e10a0..0a62863 100644 (file)
@@ -2349,7 +2349,7 @@ pmap_puninit(pmap_t pmap)
                pv = NULL;      /* safety */
                pmap_kremove((vm_offset_t)pmap->pm_pml4);
                vm_page_busy_wait(p, FALSE, "pgpun");
-               KKASSERT(p->flags & (PG_FICTITIOUS|PG_UNMANAGED));
+               KKASSERT(p->flags & PG_UNQUEUED);
                vm_page_unwire(p, 0);
                vm_page_flag_clear(p, PG_MAPPED | PG_WRITEABLE);
                vm_page_free(p);
@@ -2364,7 +2364,7 @@ pmap_puninit(pmap_t pmap)
                pv = NULL;      /* safety */
                pmap_kremove((vm_offset_t)pmap->pm_pml4_iso);
                vm_page_busy_wait(p, FALSE, "pgpun");
-               KKASSERT(p->flags & (PG_FICTITIOUS|PG_UNMANAGED));
+               KKASSERT(p->flags & PG_UNQUEUED);
                vm_page_unwire(p, 0);
                vm_page_flag_clear(p, PG_MAPPED | PG_WRITEABLE);
                vm_page_free(p);
@@ -2544,21 +2544,13 @@ pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex, pv_entry_t *pvpp)
                vm_wait(0);
        }
        vm_page_wire(m);        /* wire for mapping in parent */
-       vm_page_unmanage(m);    /* m must be spinunlocked */
        pmap_zero_page(VM_PAGE_TO_PHYS(m));
        m->valid = VM_PAGE_BITS_ALL;
+       vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE | PG_UNQUEUED);
+       KKASSERT(m->queue == PQ_NONE);
 
-       vm_page_spin_lock(m);
-       /* pmap_page_stats_adding(m); */
-
-       /*
-        * PGTABLE pv's only exist in the context of the pmap RB tree
-        * (pmap->pm_pvroot).
-        */
        pv->pv_flags |= PV_FLAG_PGTABLE;
        pv->pv_m = m;
-       vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE);
-       vm_page_spin_unlock(m);
 
        /*
         * (isnew) is TRUE, pv is not terminal.
@@ -2892,7 +2884,7 @@ pmap_release_pv(pv_entry_t pv, pv_entry_t pvp, pmap_inval_bulk_t *bulk)
                        tstr, pv->pv_pindex, p->wire_count);
        }
        KKASSERT(p->wire_count == 1);
-       KKASSERT(p->flags & PG_UNMANAGED);
+       KKASSERT(p->flags & PG_UNQUEUED);
 
        vm_page_unwire(p, 0);
        KKASSERT(p->wire_count == 0);
@@ -3132,22 +3124,21 @@ pmap_remove_pv_page(pv_entry_t pv)
        vm_page_t m;
 
        m = pv->pv_m;
-       vm_page_spin_lock(m);
-       KKASSERT(m && m == pv->pv_m);
        pv->pv_m = NULL;
+
        if (pv->pv_flags & PV_FLAG_PGTABLE) {
                vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
-               KKASSERT(m->md.pmap_count == 0);
        } else {
+               KKASSERT(0);
+#if 0
                /*
                 * Used only for page table pages, so safe to clear on
                 * the 1->0 transition.
                 */
                if (atomic_fetchadd_long(&m->md.pmap_count, -1) == 1)
                        vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
+#endif
        }
-       /* pmap_page_stats_deleting(m); */
-       vm_page_spin_unlock(m);
 
        return(m);
 }
@@ -4591,10 +4582,19 @@ pmap_remove_all(vm_page_t m)
 {
        int retry;
 
-       if (!pmap_initialized /* || (m->flags & PG_FICTITIOUS)*/)
+       if (!pmap_initialized)
                return;
+
+       /*
+        * pmap_count doesn't cover fictitious pages, but PG_MAPPED does
+        * (albeit without certain race protections).
+        */
+#if 0
        if (m->md.pmap_count == 0)
                return;
+#endif
+       if ((m->flags & PG_MAPPED) == 0)
+               return;
 
        retry = ticks + hz * 60;
 again:
@@ -4670,8 +4670,13 @@ pmap_remove_specific(pmap_t pmap_match, vm_page_t m)
 {
        if (!pmap_initialized)
                return;
-       if (m->md.pmap_count == 0)
+
+       /*
+        * PG_MAPPED test works for both non-fictitious and fictitious pages.
+        */
+       if ((m->flags & PG_MAPPED) == 0)
                return;
+
        PMAP_PAGE_BACKING_SCAN(m, pmap_match, ipmap, iptep, ipte, iva) {
                if (!pmap_inval_smp_cmpset(ipmap, iva, iptep, ipte, 0))
                        PMAP_PAGE_BACKING_RETRY;
@@ -4790,7 +4795,6 @@ again:
                        }
                }
                if (pbits & pmap->pmap_bits[PG_MANAGED_IDX]) {
-                       KKASSERT((pbits & pmap->pmap_bits[PG_DEVICE_IDX]) == 0);
                        m = PHYS_TO_VM_PAGE(pbits & PG_FRAME);
                        if (pbits & pmap->pmap_bits[PG_A_IDX])
                                vm_page_flag_set(m, PG_REFERENCED);
@@ -4815,9 +4819,6 @@ again:
  *
  * NOTE: This routine MUST insert the page into the pmap now, it cannot
  *      lazy-evaluate.
- *
- * NOTE: If (m) is PG_UNMANAGED it may also be a temporary fake vm_page_t.
- *      never record it.
  */
 void
 pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
@@ -4909,13 +4910,11 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
                newpte |= pmap->pmap_bits[PG_W_IDX];
        if (va < VM_MAX_USER_ADDRESS)
                newpte |= pmap->pmap_bits[PG_U_IDX];
-       if ((m->flags & (/*PG_FICTITIOUS |*/ PG_UNMANAGED)) == 0)
+       if ((m->flags & PG_FICTITIOUS) == 0)
                newpte |= pmap->pmap_bits[PG_MANAGED_IDX];
 //     if (pmap == &kernel_pmap)
 //             newpte |= pgeflag;
        newpte |= pmap->pmap_cache_bits[m->pat_mode];
-       if (m->flags & PG_FICTITIOUS)
-               newpte |= pmap->pmap_bits[PG_DEVICE_IDX];
 
        /*
         * It is possible for multiple faults to occur in threaded
@@ -4932,18 +4931,22 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
         * should be able to safely set PG_* flag bits even with the (shared)
         * soft-busy.
         *
-        * As a bit of a safety, bump pmap_count and set the PG_* bits
-        * before mapping the page.  If another part of the system does
-        * not properly hard-busy the page (against our soft-busy) in
-        * order to remove mappings it might not see the pte that we are
-        * about to add and thus will not be able to drop pmap_count to 0.
+        * The pmap_count and writeable_count is only tracked for
+        * non-fictitious pages.  As a bit of a safety, bump pmap_count
+        * and set the PG_* bits before mapping the page.  If another part
+        * of the system does not properly hard-busy the page (against our
+        * soft-busy or hard-busy) in order to remove mappings it might not
+        * see the pte that we are about to add and thus will not be able to
+        * drop pmap_count to 0.
+        *
+        * The PG_MAPPED and PG_WRITEABLE flags are set for any type of page.
         *
         * NOTE! PG_MAPPED and PG_WRITEABLE can only be cleared when
         *       the page is hard-busied AND pmap_count is 0.  This
         *       interlocks our setting of the flags here.
         */
        /*vm_page_spin_lock(m);*/
-       if ((m->flags & PG_UNMANAGED) == 0) {
+       if ((m->flags & PG_FICTITIOUS) == 0) {
                atomic_add_long(&m->md.pmap_count, 1);
                if (newpte & pmap->pmap_bits[PG_RW_IDX])
                        atomic_add_long(&m->md.writeable_count, 1);
@@ -5038,7 +5041,6 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
         */
        if (opa && (origpte & pmap->pmap_bits[PG_MANAGED_IDX])) {
                KKASSERT(oldm == PHYS_TO_VM_PAGE(opa));
-               /* XXX PG_DEVICE_IDX pages */
                if (origpte & pmap->pmap_bits[PG_M_IDX])
                        vm_page_dirty(oldm);
                if (origpte & pmap->pmap_bits[PG_A_IDX])
@@ -5064,13 +5066,6 @@ done:
         * is not managed but we have a pte_pv (which was locking our
         * operation), we can free it now.  pte_pv->pv_m should be NULL.
         */
-#if 0
-       if (pte_pv && (newpte & pmap->pmap_bits[PG_MANAGED_IDX]) == 0) {
-               pv_free(pte_pv, pt_pv);
-       } else if (pte_pv) {
-               pv_put(pte_pv);
-       } else
-#endif
        if (pte_placemark)
                pv_placemarker_wakeup(pmap, pte_placemark);
        if (pt_pv)
@@ -5475,7 +5470,8 @@ pmap_testbit(vm_page_t m, int bit)
 
 /*
  * This routine is used to modify bits in ptes.  Only one bit should be
- * specified.  PG_RW requires special handling.
+ * specified.  PG_RW requires special handling.  This call works with
+ * any sort of mapped page.  PG_FICTITIOUS pages might not be optimal.
  *
  * Caller must NOT hold any spin locks
  * Caller must hold (m) hard-busied
@@ -5504,11 +5500,9 @@ pmap_clearbit(vm_page_t m, int bit_index)
        int retry;
 
        /*
-        * XXX It might make sense to allow PG_FICTITIOUS + PG_DEVICE
-        *     pages through to the backing scan, but atm devices do
-        *     not care about PG_WRITEABLE;
+        * Too early in the boot
         */
-       if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) {
+       if (!pmap_initialized) {
                if (bit_index == PG_RW_IDX)
                        vm_page_flag_clear(m, PG_WRITEABLE);
                return;
@@ -5549,8 +5543,10 @@ again:
         * PG_WRITEABLE, and callers expect us to for the PG_RW_IDX path.
         */
        PMAP_PAGE_BACKING_SCAN(m, NULL, ipmap, iptep, ipte, iva) {
+#if 0
                if ((ipte & ipmap->pmap_bits[PG_MANAGED_IDX]) == 0)
                        continue;
+#endif
                if ((ipte & ipmap->pmap_bits[PG_RW_IDX]) == 0)
                        continue;
                npte = ipte & ~(ipmap->pmap_bits[PG_RW_IDX] |
@@ -5564,8 +5560,12 @@ again:
                 * NOTE: m is not hard-busied so it is not safe to
                 *       clear PG_WRITEABLE on the 1->0 transition
                 *       against it being set in pmap_enter().
+                *
+                *       pmap_count and writeable_count are only applicable
+                *       to non-fictitious pages (PG_MANAGED_IDX from pte)
                 */
-               atomic_add_long(&m->md.writeable_count, -1);
+               if (ipte & ipmap->pmap_bits[PG_MANAGED_IDX])
+                       atomic_add_long(&m->md.writeable_count, -1);
        } PMAP_PAGE_BACKING_DONE;
 
        /*
@@ -5910,43 +5910,35 @@ pmap_mincore(pmap_t pmap, vm_offset_t addr)
                vm_offset_t pa;
 
                val = MINCORE_INCORE;
-               if ((pte & pmap->pmap_bits[PG_MANAGED_IDX]) == 0)
-                       goto done;
-
                pa = pte & PG_FRAME;
-
-               if (pte & pmap->pmap_bits[PG_DEVICE_IDX])
-                       m = NULL;
-               else
+               if (pte & pmap->pmap_bits[PG_MANAGED_IDX])
                        m = PHYS_TO_VM_PAGE(pa);
+               else
+                       m = NULL;
 
                /*
                 * Modified by us
                 */
                if (pte & pmap->pmap_bits[PG_M_IDX])
                        val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
+
                /*
                 * Modified by someone
                 */
                else if (m && (m->dirty || pmap_is_modified(m)))
                        val |= MINCORE_MODIFIED_OTHER;
-               /*
-                * Referenced by us
-                */
-               if (pte & pmap->pmap_bits[PG_A_IDX])
-                       val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
 
                /*
-                * Referenced by someone
+                * Referenced by us, or someone else.
                 */
-               else if (m && ((m->flags & PG_REFERENCED) ||
-                               pmap_ts_referenced(m))) {
+               if (pte & pmap->pmap_bits[PG_A_IDX]) {
+                       val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
+               } else if (m && ((m->flags & PG_REFERENCED) ||
+                                pmap_ts_referenced(m))) {
                        val |= MINCORE_REFERENCED_OTHER;
                        vm_page_flag_set(m, PG_REFERENCED);
                }
        } 
-done:
-
        return val;
 }
 
@@ -6107,7 +6099,6 @@ pmap_kvtom(vm_offset_t va)
 {
        pt_entry_t *ptep = vtopte(va);
 
-       KKASSERT((*ptep & kernel_pmap.pmap_bits[PG_DEVICE_IDX]) == 0);
        return(PHYS_TO_VM_PAGE(*ptep & PG_FRAME));
 }
 
index 94336b8..b0f8196 100644 (file)
@@ -280,7 +280,7 @@ dev_pager_getfake(vm_paddr_t paddr, int pat_mode)
 
        pmap_page_init(m);
 
-       m->flags = PG_FICTITIOUS | PG_UNMANAGED;
+       m->flags = PG_FICTITIOUS | PG_UNQUEUED;
        m->valid = VM_PAGE_BITS_ALL;
        m->dirty = 0;
        m->queue = PQ_NONE;
@@ -347,8 +347,9 @@ static void old_dev_pager_dtor(void *handle)
        }
 }
 
-static int old_dev_pager_fault(vm_object_t object, vm_ooffset_t offset,
-    int prot, vm_page_t *mres)
+static int
+old_dev_pager_fault(vm_object_t object, vm_ooffset_t offset,
+                   int prot, vm_page_t *mres)
 {
        vm_paddr_t paddr;
        vm_page_t page;
@@ -387,7 +388,6 @@ static int old_dev_pager_fault(vm_object_t object, vm_ooffset_t offset,
                }
                vm_object_drop(object);
        }
-
        return (VM_PAGER_OK);
 }
 
index 0881720..9ed8449 100644 (file)
@@ -83,7 +83,7 @@ phys_pager_getpage(vm_object_t object, vm_page_t *mpp, int seqaccess)
 
        vm_page_zero_fill(m);
        /* Switch off pv_entries */
-       vm_page_unmanage(m);
+       vm_page_flag_set(m, PG_UNQUEUED);
        m->valid = VM_PAGE_BITS_ALL;
        m->dirty = VM_PAGE_BITS_ALL;
 
index fc40a27..ec7d6c9 100644 (file)
@@ -404,9 +404,10 @@ again:
                        if (pqtype == PQ_CACHE &&
                            m->hold_count == 0 &&
                            m->wire_count == 0 &&
-                           (m->flags & (PG_UNMANAGED | PG_NEED_COMMIT)) == 0) {
+                           (m->flags & PG_NEED_COMMIT) == 0) {
                                vm_page_protect(m, VM_PROT_NONE);
-                               KKASSERT((m->flags & PG_MAPPED) == 0);
+                               KKASSERT((m->flags &
+                                        (PG_MAPPED | PG_UNQUEUED)) == 0);
                                KKASSERT(m->dirty == 0);
                                vm_page_free(m);
                                --i;
@@ -430,10 +431,15 @@ again:
                        KKASSERT((m->busy_count & PBUSY_MASK) == 0);
 
                        /*
-                        * Clear all flags.  Then unbusy the now allocated
-                        * page.
+                        * Clear all flags, set FICTITIOUS and UNQUEUED to
+                        * indicate the the pages are special, then unbusy
+                        * the now allocated page.
+                        *
+                        * XXX setting FICTITIOUS and UNQUEUED in the future.
+                        *     (also pair up with vm_contig_pg_free)
                         */
                        vm_page_flag_clear(m, ~PG_KEEP_NEWPAGE_MASK);
+                       /* vm_page_flag_set(m, PG_FICTITIOUS | PG_UNQUEUED);*/
                        vm_page_wire(m);
                        vm_page_wakeup(m);
                }
index 73755e1..c498d8e 100644 (file)
@@ -541,7 +541,7 @@ RetryFault:
 
                bzero(&fakem, sizeof(fakem));
                fakem.pindex = first_pindex;
-               fakem.flags = PG_FICTITIOUS | PG_UNMANAGED;
+               fakem.flags = PG_FICTITIOUS | PG_UNQUEUED;
                fakem.busy_count = PBUSY_LOCKED;
                fakem.valid = VM_PAGE_BITS_ALL;
                fakem.pat_mode = VM_MEMATTR_DEFAULT;
@@ -1167,7 +1167,7 @@ RetryFault:
 
                bzero(&fakem, sizeof(fakem));
                fakem.pindex = first_pindex;
-               fakem.flags = PG_FICTITIOUS | PG_UNMANAGED;
+               fakem.flags = PG_FICTITIOUS | PG_UNQUEUED;
                fakem.busy_count = PBUSY_LOCKED;
                fakem.valid = VM_PAGE_BITS_ALL;
                fakem.pat_mode = VM_MEMATTR_DEFAULT;
index 307042c..27b9933 100644 (file)
@@ -1288,7 +1288,8 @@ relookup:
                 * any of the below states.
                 */
                if (m->wire_count ||
-                   (m->flags & (PG_UNMANAGED | PG_NEED_COMMIT)) ||
+                   (m->flags & (PG_FICTITIOUS | PG_UNQUEUED |
+                                PG_NEED_COMMIT)) ||
                    m->valid != VM_PAGE_BITS_ALL
                ) {
                        vm_page_wakeup(m);
index 83a2325..01723c8 100644 (file)
@@ -246,10 +246,18 @@ vm_add_new_page(vm_paddr_t pa)
        /*
         * Reserve a certain number of contiguous low memory pages for
         * contigmalloc() to use.
+        *
+        * Even though these pages represent real ram and can be
+        * reverse-mapped, we set PG_FICTITIOUS and PG_UNQUEUED
+        * because their use is special-cased.
+        *
+        * WARNING! Once PG_FICTITIOUS is set, vm_page_wire*()
+        *          and vm_page_unwire*() calls have no effect.
         */
        if (pa < vm_low_phys_reserved) {
                atomic_add_long(&vmstats.v_page_count, 1);
                atomic_add_long(&vmstats.v_dma_pages, 1);
+               m->flags |= PG_FICTITIOUS | PG_UNQUEUED;
                m->queue = PQ_NONE;
                m->wire_count = 1;
                atomic_add_long(&vmstats.v_wire_count, 1);
@@ -785,6 +793,7 @@ vm_page_startup_finish(void *dummy __unused)
                m = PHYS_TO_VM_PAGE((vm_paddr_t)blk << PAGE_SHIFT);
                vm_low_phys_reserved = VM_PAGE_TO_PHYS(m);
                while (count) {
+                       vm_page_flag_clear(m, PG_FICTITIOUS | PG_UNQUEUED);
                        vm_page_busy_wait(m, FALSE, "cpgfr");
                        vm_page_unwire(m, 0);
                        vm_page_free(m);
@@ -1034,7 +1043,8 @@ _vm_page_add_queue_spinlocked(vm_page_t m, u_short queue, int athead)
        struct vpgqueues *pq;
        u_long *cnt;
 
-       KKASSERT(m->queue == PQ_NONE && (m->flags & PG_FICTITIOUS) == 0);
+       KKASSERT(m->queue == PQ_NONE &&
+                (m->flags & (PG_FICTITIOUS | PG_UNQUEUED)) == 0);
 
        if (queue != PQ_NONE) {
                vm_page_queues_spin_lock(queue);
@@ -1377,19 +1387,18 @@ vm_page_unhold(vm_page_t m)
 void
 vm_page_initfake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr)
 {
-       if ((m->flags & PG_FICTITIOUS) != 0) {
-               /*
-                * The page's memattr might have changed since the
-                * previous initialization.  Update the pmap to the
-                * new memattr.
-                */
+       /*
+        * The page's memattr might have changed since the
+        * previous initialization.  Update the pmap to the
+        * new memattr.
+        */
+       if ((m->flags & PG_FICTITIOUS) != 0)
                goto memattr;
-       }
        m->phys_addr = paddr;
        m->queue = PQ_NONE;
        /* Fictitious pages don't use "segind". */
        /* Fictitious pages don't use "order" or "pool". */
-       m->flags = PG_FICTITIOUS | PG_UNMANAGED;
+       m->flags = PG_FICTITIOUS | PG_UNQUEUED;
        m->busy_count = PBUSY_LOCKED;
        m->wire_count = 1;
        spin_init(&m->spin, "fake_page");
@@ -1735,6 +1744,9 @@ VM_PAGE_DEBUG_EXT(vm_page_lookup_busy_try)(struct vm_object *object,
  * Returns a page that is only soft-busied for use by the caller in
  * a read-only fashion.  Returns NULL if the page could not be found,
  * the soft busy could not be obtained, or the page data is invalid.
+ *
+ * XXX Doesn't handle PG_FICTITIOUS pages at the moment, but there is
+ *     no reason why we couldn't.
  */
 vm_page_t
 vm_page_lookup_sbusy_try(struct vm_object *object, vm_pindex_t pindex,
@@ -2046,11 +2058,12 @@ vm_page_select_cache(u_short pg_color)
                        /*
                         * We successfully busied the page
                         */
-                       if ((m->flags & (PG_UNMANAGED | PG_NEED_COMMIT)) == 0 &&
+                       if ((m->flags & PG_NEED_COMMIT) == 0 &&
                            m->hold_count == 0 &&
                            m->wire_count == 0 &&
                            (m->dirty & m->valid) == 0) {
                                vm_page_spin_unlock(m);
+                               KKASSERT((m->flags & PG_UNQUEUED) == 0);
                                pagedaemon_wakeup();
                                return(m);
                        }
@@ -2117,7 +2130,7 @@ vm_page_select_free(u_short pg_color)
                         * wiring doesn't adjust queues, a page on the free
                         * queue should never be wired at this point.
                         */
-                       KKASSERT((m->flags & (PG_UNMANAGED |
+                       KKASSERT((m->flags & (PG_UNQUEUED |
                                              PG_NEED_COMMIT)) == 0);
                        KASSERT(m->hold_count == 0,
                                ("m->hold_count is not zero "
@@ -2454,6 +2467,10 @@ vm_page_alloc_contig(vm_paddr_t low, vm_paddr_t high,
                        return(NULL);
                }
                spin_unlock(&vm_contig_spin);
+
+               /*
+                * Base vm_page_t of range
+                */
                m = PHYS_TO_VM_PAGE((vm_paddr_t)blk << PAGE_SHIFT);
        }
        if (vm_contig_verbose) {
@@ -2464,8 +2481,10 @@ vm_page_alloc_contig(vm_paddr_t low, vm_paddr_t high,
                        low, high, alignment, boundary, size, memattr);
        }
        if (memattr != VM_MEMATTR_DEFAULT) {
-               for (i = 0;i < size; i++)
+               for (i = 0; i < size; ++i) {
+                       KKASSERT(m[i].flags & PG_FICTITIOUS);
                        pmap_page_set_memattr(&m[i], memattr);
+               }
        }
        return m;
 }
@@ -2486,13 +2505,19 @@ vm_page_free_contig(vm_page_t m, unsigned long size)
                        (intmax_t)pa, size / 1024);
        }
        if (pa < vm_low_phys_reserved) {
+               /*
+                * Just assert check the first page for convenience.
+                */
                KKASSERT(m->wire_count == 1);
+               KKASSERT(m->flags & PG_FICTITIOUS);
                KKASSERT(pa + size <= vm_low_phys_reserved);
                spin_lock(&vm_contig_spin);
                alist_free(&vm_contig_alist, start, pages);
                spin_unlock(&vm_contig_spin);
        } else {
                while (pages) {
+                       /* XXX FUTURE, maybe (pair with vm_pg_contig_alloc()) */
+                       /*vm_page_flag_clear(m, PG_FICTITIOUS | PG_UNQUEUED);*/
                        vm_page_busy_wait(m, FALSE, "cpgfr");
                        vm_page_unwire(m, 0);
                        vm_page_free(m);
@@ -2640,25 +2665,25 @@ vm_page_activate(vm_page_t m)
         * If already active or inappropriate, just set act_count and
         * return.  We don't have to spin-lock the page.
         */
-       if (m->queue - m->pc == PQ_ACTIVE || (m->flags & PG_FICTITIOUS)) {
+       if (m->queue - m->pc == PQ_ACTIVE ||
+           (m->flags & (PG_FICTITIOUS | PG_UNQUEUED))) {
                if (m->act_count < ACT_INIT)
                        m->act_count = ACT_INIT;
                return;
        }
 
        vm_page_spin_lock(m);
-       if (m->queue - m->pc != PQ_ACTIVE && (m->flags & PG_FICTITIOUS) == 0) {
+       if (m->queue - m->pc != PQ_ACTIVE &&
+           (m->flags & (PG_FICTITIOUS | PG_UNQUEUED)) == 0) {
                _vm_page_queue_spin_lock(m);
                oqueue = _vm_page_rem_queue_spinlocked(m);
                /* page is left spinlocked, queue is unlocked */
 
                if (oqueue == PQ_CACHE)
                        mycpu->gd_cnt.v_reactivated++;
-               if ((m->flags & PG_UNMANAGED) == 0) {
-                       if (m->act_count < ACT_INIT)
-                               m->act_count = ACT_INIT;
-                       _vm_page_add_queue_spinlocked(m, PQ_ACTIVE + m->pc, 0);
-               }
+               if (m->act_count < ACT_INIT)
+                       m->act_count = ACT_INIT;
+               _vm_page_add_queue_spinlocked(m, PQ_ACTIVE + m->pc, 0);
                _vm_page_and_queue_spin_unlock(m);
                if (oqueue == PQ_CACHE || oqueue == PQ_FREE)
                        pagedaemon_wakeup();
@@ -2672,7 +2697,8 @@ vm_page_activate(vm_page_t m)
 void
 vm_page_soft_activate(vm_page_t m)
 {
-       if (m->queue - m->pc == PQ_ACTIVE || (m->flags & PG_FICTITIOUS)) {
+       if (m->queue - m->pc == PQ_ACTIVE ||
+           (m->flags & (PG_FICTITIOUS | PG_UNQUEUED))) {
                if (m->act_count < ACT_INIT)
                        m->act_count = ACT_INIT;
        } else {
@@ -2803,13 +2829,11 @@ vm_page_free_toq(vm_page_t m)
        }
 
        /*
-        * Clear the UNMANAGED flag when freeing an unmanaged page.
-        * Clear the NEED_COMMIT flag
+        * Clear the PG_NEED_COMMIT and the PG_UNQUEUED flags.  The
+        * page returns to normal operation and will be placed in
+        * the PQ_HOLD or PQ_FREE queue.
         */
-       if (m->flags & PG_UNMANAGED)
-               vm_page_flag_clear(m, PG_UNMANAGED);
-       if (m->flags & PG_NEED_COMMIT)
-               vm_page_flag_clear(m, PG_NEED_COMMIT);
+       vm_page_flag_clear(m, PG_NEED_COMMIT | PG_UNQUEUED);
 
        if (m->hold_count != 0) {
                _vm_page_add_queue_spinlocked(m, PQ_HOLD + m->pc, 0);
@@ -2833,50 +2857,17 @@ vm_page_free_toq(vm_page_t m)
        vm_page_free_wakeup();
 }
 
-/*
- * vm_page_unmanage()
- *
- * Prevent PV management from being done on the page.  The page is
- * also removed from the paging queues, and as a consequence of no longer
- * being managed the pageout daemon will not touch it (since there is no
- * way to locate the pte mappings for the page).  madvise() calls that
- * mess with the pmap will also no longer operate on the page.
- *
- * Beyond that the page is still reasonably 'normal'.  Freeing the page
- * will clear the flag.
- *
- * This routine is used by OBJT_PHYS objects - objects using unswappable
- * physical memory as backing store rather then swap-backed memory and
- * will eventually be extended to support 4MB unmanaged physical 
- * mappings.
- *
- * Caller must be holding the page busy.
- */
-void
-vm_page_unmanage(vm_page_t m)
-{
-       KKASSERT(m->busy_count & PBUSY_LOCKED);
-       if ((m->flags & PG_UNMANAGED) == 0) {
-               vm_page_unqueue(m);
-       }
-       vm_page_flag_set(m, PG_UNMANAGED);
-}
-
 /*
  * Mark this page as wired down by yet another map.  We do not adjust the
  * queue the page is on, it will be checked for wiring as-needed.
  *
+ * This function has no effect on fictitious pages.
+ *
  * Caller must be holding the page busy.
  */
 void
 vm_page_wire(vm_page_t m)
 {
-       /*
-        * Only bump the wire statistics if the page is not already wired,
-        * and only unqueue the page if it is on some queue (if it is unmanaged
-        * it is already off the queues).  Don't do anything with fictitious
-        * pages because they are always wired.
-        */
        KKASSERT(m->busy_count & PBUSY_LOCKED);
        if ((m->flags & PG_FICTITIOUS) == 0) {
                if (atomic_fetchadd_int(&m->wire_count, 1) == 0) {
@@ -2917,6 +2908,10 @@ vm_page_wire(vm_page_t m)
  * be placed in the cache - for example, just after dirtying a page.
  * dirty pages in the cache are not allowed.
  *
+ * PG_FICTITIOUS or PG_UNQUEUED pages are never moved to any queue, and
+ * the wire_count will not be adjusted in any way for a PG_FICTITIOUS
+ * page.
+ *
  * This routine may not block.
  */
 void
@@ -2930,7 +2925,7 @@ vm_page_unwire(vm_page_t m, int activate)
        } else {
                if (atomic_fetchadd_int(&m->wire_count, -1) == 1) {
                        atomic_add_long(&mycpu->gd_vmstats_adj.v_wire_count,-1);
-                       if (m->flags & PG_UNMANAGED) {
+                       if (m->flags & PG_UNQUEUED) {
                                ;
                        } else if (activate || (m->flags & PG_NEED_COMMIT)) {
                                vm_page_activate(m);
@@ -2963,13 +2958,15 @@ _vm_page_deactivate_locked(vm_page_t m, int athead)
        /*
         * Ignore if already inactive.
         */
-       if (m->queue - m->pc == PQ_INACTIVE || (m->flags & PG_FICTITIOUS))
+       if (m->queue - m->pc == PQ_INACTIVE ||
+           (m->flags & (PG_FICTITIOUS | PG_UNQUEUED))) {
                return;
+       }
 
        _vm_page_queue_spin_lock(m);
        oqueue = _vm_page_rem_queue_spinlocked(m);
 
-       if ((m->flags & PG_UNMANAGED) == 0) {
+       if ((m->flags & (PG_FICTITIOUS | PG_UNQUEUED)) == 0) {
                if (oqueue == PQ_CACHE)
                        mycpu->gd_cnt.v_reactivated++;
                vm_page_flag_clear(m, PG_WINATCFLS);
@@ -2996,7 +2993,7 @@ void
 vm_page_deactivate(vm_page_t m)
 {
        if (m->queue - m->pc != PQ_INACTIVE &&
-           (m->flags & PG_FICTITIOUS) == 0) {
+           (m->flags & (PG_FICTITIOUS | PG_UNQUEUED)) == 0) {
                vm_page_spin_lock(m);
                _vm_page_deactivate_locked(m, 0);
                vm_page_spin_unlock(m);
@@ -3028,7 +3025,7 @@ vm_page_try_to_cache(vm_page_t m)
         */
        if (m->dirty || m->hold_count || m->wire_count ||
            m->queue - m->pc == PQ_CACHE ||
-           (m->flags & (PG_UNMANAGED | PG_NEED_COMMIT | PG_FICTITIOUS))) {
+           (m->flags & (PG_UNQUEUED | PG_NEED_COMMIT | PG_FICTITIOUS))) {
                vm_page_wakeup(m);
                return(0);
        }
@@ -3050,6 +3047,10 @@ vm_page_try_to_cache(vm_page_t m)
  * Attempt to free the page.  If we cannot free it, we do nothing.
  * 1 is returned on success, 0 on failure.
  *
+ * The page can be in any state, including already being on the free
+ * queue.  Check to see if it really can be freed.  Note that we disallow
+ * this ad-hoc operation if the page is flagged PG_UNQUEUED.
+ *
  * Caller provides an unlocked/non-busied page.
  * No requirements.
  */
@@ -3059,14 +3060,10 @@ vm_page_try_to_free(vm_page_t m)
        if (vm_page_busy_try(m, TRUE))
                return(0);
 
-       /*
-        * The page can be in any state, including already being on the free
-        * queue.  Check to see if it really can be freed.
-        */
        if (m->dirty ||                         /* can't free if it is dirty */
            m->hold_count ||                    /* or held (XXX may be wrong) */
            m->wire_count ||                    /* or wired */
-           (m->flags & (PG_UNMANAGED |         /* or unmanaged */
+           (m->flags & (PG_UNQUEUED |          /* or unqueued */
                         PG_NEED_COMMIT |       /* or needs a commit */
                         PG_FICTITIOUS)) ||     /* or is fictitious */
            m->queue - m->pc == PQ_FREE ||      /* already on PQ_FREE */
@@ -3110,7 +3107,7 @@ vm_page_cache(vm_page_t m)
        /*
         * Not suitable for the cache
         */
-       if ((m->flags & (PG_UNMANAGED | PG_NEED_COMMIT | PG_FICTITIOUS)) ||
+       if ((m->flags & (PG_UNQUEUED | PG_NEED_COMMIT | PG_FICTITIOUS)) ||
            (m->busy_count & PBUSY_MASK) ||
            m->wire_count || m->hold_count) {
                vm_page_wakeup(m);
@@ -3151,7 +3148,7 @@ vm_page_cache(vm_page_t m)
         */
        vm_page_protect(m, VM_PROT_NONE);
        pmap_mapped_sync(m);
-       if ((m->flags & (PG_UNMANAGED | PG_MAPPED)) ||
+       if ((m->flags & (PG_UNQUEUED | PG_MAPPED)) ||
            (m->busy_count & PBUSY_MASK) ||
            m->wire_count || m->hold_count) {
                vm_page_wakeup(m);
index 060b151..d30751d 100644 (file)
@@ -245,37 +245,61 @@ extern struct vpgqueues vm_page_queues[PQ_COUNT];
 /*
  * These are the flags defined for vm_page.
  *
- *  PG_UNMANAGED (used by OBJT_PHYS) indicates that the page is
- *  not under PV management but otherwise should be treated as a
- *  normal page.  Pages not under PV management cannot be paged out
- *  via the object/vm_page_t because there is no knowledge of their
- *  pte mappings, nor can they be removed from their objects via 
- *  the object, and such pages are also not on any PQ queue.  The
- *  PG_MAPPED and PG_WRITEABLE flags are not applicable.
- *
- *  PG_MAPPED only applies to managed pages, indicating whether the page
- *  MIGHT be mapped onto one or more pmaps.  A page might still be mapped to
- *  special pmaps in an unmanaged fashion, for example when mapped into a
- *  buffer cache buffer, without setting PG_MAPPED.
- *
- *  PG_MAPPED can only be tested for NOT being set after a pmap_mapped_sync()
- *  called made while the page is hard-busied
- *
- *  PG_WRITEABLE indicates that there may be a writeable managed pmap entry
- *  somewhere, and that the page can be dirtied by hardware at any time
- *  and may have to be tested for that.  The modified bit in unmanaged
- *  mappings or in the special clean map is not tested.
- *
- *  PG_WRITEABLE can only be tested for NOT being set after a
- *  pmap_mapped_sync() called made while the page is hard-busied.
- *
- *  PG_SWAPPED indicates that the page is backed by a swap block.  Any
- *  VM object type other than OBJT_DEFAULT can have swap-backed pages now.
+ *  PG_FICTITIOUS      It is not possible to translate the pte's physical
+ *                     address back to a vm_page_t.  The vm_page_t is fake
+ *                     or there isn't one at all.
+ *
+ *                     Fictitious vm_page_t's can be placed in objects and
+ *                     it is possible to perform pmap functions on them
+ *                     by virtual address range and by their vm_page_t.
+ *                     However, pmap_count and writeable_count cannot be
+ *                     tracked since there is no way to reverse-map the
+ *                     pte back to the vm_page.
+ *
+ *                     (pmap operations by-vm_page can still be used to
+ *                     adjust protections or remove the page from the pmap,
+ *                     and will go only by the PG_MAPPED flag).
+ *
+ *                     NOTE: The contiguous memory management will flag
+ *                           PG_FICTITIOUS on pages in the vm_page_array,
+ *                           even though the physical addrses can be
+ *                           translated back to a vm_page_t.
+ *
+ *                     NOTE: Implies PG_UNQUEUED.  PG_UNQUEUED must also
+ *                           be set.  No queue management may be performed
+ *                           on fictitious pages.
+ *
+ *  PG_UNQUEUED                The page is not to participate in any VM page queue
+ *                     manipulation (even if it is otherwise a normal page).
+ *
+ *  PG_MAPPED          Only applies to non-fictitious regular pages, this
+ *                     flag indicates that the page MIGHT be mapped into
+ *                     zero or more pmaps via normal managed operations..
+ *
+ *                     The page might still be mapped in a specialized manner
+ *                     (i.e. pmap_kenter(), or mapped into the buffer cache,
+ *                     and so forth) without setting this flag.
+ *
+ *                     If this flag is clear it indicates that the page is
+ *                     absolutely not mapped into a regular pmap by normal
+ *                     means.  If set, the status is unknown.
+ *
+ *  PG_WRITEABLE       Similar to PG_MAPPED, indicates that the page might
+ *                     be mapped RW into zero or more pmaps via normal
+ *                     managed operations.
+ *
+ *                     If this flag is clear it indicates that the page is
+ *                     absolutely not mapped RW into a regular pmap by normal
+ *                     means.  If set, the status is unknown.
+ *
+ *  PG_SWAPPED         Indicates that the page is backed by a swap block.
+ *                     Any VM object type other than OBJT_DEFAULT can contain
+ *                     swap-backed pages now.
  */
 #define        PG_UNUSED0001   0x00000001
 #define        PG_UNUSED0002   0x00000002
 #define PG_WINATCFLS   0x00000004      /* flush dirty page on inactive q */
-#define        PG_FICTITIOUS   0x00000008      /* physical page doesn't exist (O) */
+#define        PG_FICTITIOUS   0x00000008      /* No reverse-map or tracking */
 #define        PG_WRITEABLE    0x00000010      /* page may be writeable */
 #define PG_MAPPED      0x00000020      /* page may be mapped (managed) */
 #define        PG_UNUSED0040   0x00000040
@@ -283,7 +307,7 @@ extern struct vpgqueues vm_page_queues[PQ_COUNT];
 #define PG_CLEANCHK    0x00000100      /* page will be checked for cleaning */
 #define PG_UNUSED0200  0x00000200
 #define PG_NOSYNC      0x00000400      /* do not collect for syncer */
-#define PG_UNMANAGED   0x00000800      /* No PV management for page */
+#define PG_UNQUEUED    0x00000800      /* No queue management for page */
 #define PG_MARKER      0x00001000      /* special queue marker page */
 #define PG_RAM         0x00002000      /* read ahead mark */
 #define PG_SWAPPED     0x00004000      /* backed by swap */
@@ -429,7 +453,6 @@ void vm_page_rename (vm_page_t, struct vm_object *, vm_pindex_t);
 void vm_page_startup (void);
 void vm_numa_organize(vm_paddr_t ran_beg, vm_paddr_t bytes, int physid);
 void vm_numa_organize_finalize(void);
-void vm_page_unmanage (vm_page_t);
 void vm_page_unwire (vm_page_t, int);
 void vm_page_wire (vm_page_t);
 void vm_page_unqueue (vm_page_t);
index 7e605b1..8c6de80 100644 (file)
@@ -262,7 +262,7 @@ vm_pageout_clean_helper(vm_page_t m, int vmflush_flags)
         * we can pageout held pages but there is no real need to press our
         * luck, so don't.
         */
-       if (m->hold_count != 0 || (m->flags & PG_UNMANAGED)) {
+       if (m->hold_count != 0 || (m->flags & PG_UNQUEUED)) {
                vm_page_wakeup(m);
                return 0;
        }
@@ -307,7 +307,7 @@ vm_pageout_clean_helper(vm_page_t m, int vmflush_flags)
                if (error || p == NULL)
                        break;
                if ((p->queue - p->pc) == PQ_CACHE ||
-                   (p->flags & PG_UNMANAGED)) {
+                   (p->flags & PG_UNQUEUED)) {
                        vm_page_wakeup(p);
                        break;
                }
@@ -350,7 +350,7 @@ vm_pageout_clean_helper(vm_page_t m, int vmflush_flags)
                if (error || p == NULL)
                        break;
                if (((p->queue - p->pc) == PQ_CACHE) ||
-                   (p->flags & PG_UNMANAGED)) {
+                   (p->flags & PG_UNQUEUED)) {
                        vm_page_wakeup(p);
                        break;
                }
@@ -540,7 +540,7 @@ vm_pageout_mdp_callback(struct pmap_pgscan_info *info, vm_offset_t va,
 
        mycpu->gd_cnt.v_pdpages++;
 
-       if (p->wire_count || p->hold_count || (p->flags & PG_UNMANAGED)) {
+       if (p->wire_count || p->hold_count || (p->flags & PG_UNQUEUED)) {
                vm_page_wakeup(p);
                goto done;
        }
@@ -1587,7 +1587,7 @@ vm_pageout_scan_cache(long avail_shortage, int pass,
                 * Remaining operations run with the page busy and neither
                 * the page or the queue will be spin-locked.
                 */
-               if ((m->flags & (PG_UNMANAGED | PG_NEED_COMMIT)) ||
+               if ((m->flags & (PG_UNQUEUED | PG_NEED_COMMIT)) ||
                    m->hold_count ||
                    m->wire_count) {
                        vm_page_deactivate(m);
index 7498619..cb20a20 100644 (file)
@@ -674,7 +674,7 @@ vm_swapcache_test(vm_page_t m)
 {
        vm_object_t object;
 
-       if (m->flags & PG_UNMANAGED)
+       if (m->flags & (PG_UNQUEUED | PG_FICTITIOUS))
                return(1);
        if (m->hold_count || m->wire_count)
                return(1);