kernel - VM rework part 17 - Cleanup
authorMatthew Dillon <dillon@apollo.backplane.com>
Tue, 21 May 2019 00:35:57 +0000 (17:35 -0700)
committerMatthew Dillon <dillon@apollo.backplane.com>
Tue, 21 May 2019 05:54:09 +0000 (22:54 -0700)
* Adjust kmapinfo and vmpageinfo in /usr/src/test/debug.
  Enhance the code to display more useful information.

* Get pmap_page_stats_*() working again.

* Change systat -vm's 'VM' reporting.  Replace VM-rss with PMAP and
  VMRSS.  Relabel VM-swp to SWAP and SWTOT.

  PMAP - Amount of real memory faulted into user pmaps.

  VMRSS - Sum of all process RSS's in thet system.  This is
  the 'virtual' memory faulted into user pmaps and
  includes shared pages.

  SWAP - Amount of swap space currently in use.

  SWTOT - Total amount of swap installed.

* Redocument vm_page.h.

* Remove dead code from pmap.c (some left over cruft from the
  days when pv_entry's were used for PTEs).

sys/platform/pc64/include/pmap.h
sys/platform/pc64/x86_64/pmap.c
sys/vm/vm_page.c
sys/vm/vm_page.h
test/debug/kmapinfo.c
test/debug/vmpageinfo.c
usr.bin/systat/vmstat.c

index 353f040..d6e7b5c 100644 (file)
@@ -363,7 +363,7 @@ typedef struct pv_entry {
 #define PV_HOLD_MASK           0x1FFFFFFFU
 
 #define PV_FLAG_UNUSED01       0x00000001U
-#define PV_FLAG_PGTABLE                0x00000002U     /* page table page */
+#define PV_FLAG_UNUSED02       0x00000002U
 
 #ifdef _KERNEL
 
index 0a62863..f8cfdb0 100644 (file)
@@ -396,15 +396,28 @@ pv_entry_compare(pv_entry_t pv1, pv_entry_t pv2)
 RB_GENERATE2(pv_entry_rb_tree, pv_entry, pv_entry,
              pv_entry_compare, vm_pindex_t, pv_pindex);
 
+/*
+ * Keep track of pages in the pmap.  The procedure is handed
+ * the vm_page->md.pmap_count value prior to an increment or
+ * decrement.
+ *
+ *     t_arm           - Active real memory
+ *     t_avm           - Active virtual memory
+ *     t_armshr        - Active real memory that is also shared
+ *     t_avmshr        - Active virtual memory that is also shared
+ *
+ * NOTE: At the moment t_avm is effectively just the same as t_arm.
+ */
 static __inline
 void
-pmap_page_stats_adding(vm_page_t m)
+pmap_page_stats_adding(long prev_count)
 {
        globaldata_t gd = mycpu;
 
-       if (m->md.pmap_count == 0) {
+       if (prev_count == 0) {
                ++gd->gd_vmtotal.t_arm;
-       } else if (m->md.pmap_count == 1) {
+               ++gd->gd_vmtotal.t_avm;
+       } else if (prev_count == 1) {
                ++gd->gd_vmtotal.t_armshr;
                ++gd->gd_vmtotal.t_avmshr;
        } else {
@@ -414,13 +427,14 @@ pmap_page_stats_adding(vm_page_t m)
 
 static __inline
 void
-pmap_page_stats_deleting(vm_page_t m)
+pmap_page_stats_deleting(long prev_count)
 {
        globaldata_t gd = mycpu;
 
-       if (m->md.pmap_count == 0) {
+       if (prev_count == 1) {
                --gd->gd_vmtotal.t_arm;
-       } else if (m->md.pmap_count == 1) {
+               --gd->gd_vmtotal.t_avm;
+       } else if (prev_count == 2) {
                --gd->gd_vmtotal.t_armshr;
                --gd->gd_vmtotal.t_avmshr;
        } else {
@@ -2425,24 +2439,10 @@ pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex, pv_entry_t *pvpp)
 
        /*
         * DragonFly doesn't use PV's to represent terminal PTEs any more.
+        * The index range is still used for placemarkers, but not for
+        * actual pv_entry's.
         */
        KKASSERT(ptepindex >= pmap_pt_pindex(0));
-#if 0
-       if (ptepindex < pmap_pt_pindex(0)) {
-               if (ptepindex >= NUPTE_USER && pmap != &iso_pmap) {
-                       /* kernel manages this manually for KVM */
-                       KKASSERT(pvpp == NULL);
-               } else {
-                       KKASSERT(pvpp != NULL);
-                       pt_pindex = NUPTE_TOTAL + (ptepindex >> NPTEPGSHIFT);
-                       pvp = pmap_allocpte(pmap, pt_pindex, NULL);
-                       if (isnew)
-                               vm_page_wire_quick(pvp->pv_m);
-                       *pvpp = pvp;
-               }
-               return(pv);
-       }
-#endif
 
        /*
         * Note that pt_pv's are only returned for user VAs. We assert that
@@ -2549,7 +2549,6 @@ pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex, pv_entry_t *pvpp)
        vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE | PG_UNQUEUED);
        KKASSERT(m->queue == PQ_NONE);
 
-       pv->pv_flags |= PV_FLAG_PGTABLE;
        pv->pv_m = m;
 
        /*
@@ -2584,27 +2583,7 @@ pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex, pv_entry_t *pvpp)
                else
                        ptep_iso  = NULL;
                if (*ptep & pmap->pmap_bits[PG_V_IDX]) {
-                       KKASSERT(0);
-#if 0
-                       /* REMOVED replaces shared page table page */
-                       pt_entry_t pte;
-
-                       if (ispt == 0) {
-                               panic("pmap_allocpte: unexpected pte %p/%d",
-                                     pvp, (int)ptepindex);
-                       }
-                       pte = pmap_inval_smp(pmap, (vm_offset_t)-1, 1,
-                                            ptep, v);
-                       if (ptep_iso) {
-                               pmap_inval_smp(pmap, (vm_offset_t)-1, 1,
-                                              ptep_iso, v);
-                       }
-                       if (vm_page_unwire_quick(
-                                       PHYS_TO_VM_PAGE(pte & PG_FRAME))) {
-                               panic("pmap_allocpte: shared pgtable "
-                                     "pg bad wirecount");
-                       }
-#endif
+                       panic("pmap_allocpte: ptpte present without pv_entry!");
                } else {
                        pt_entry_t pte;
 
@@ -3125,20 +3104,7 @@ pmap_remove_pv_page(pv_entry_t pv)
 
        m = pv->pv_m;
        pv->pv_m = NULL;
-
-       if (pv->pv_flags & PV_FLAG_PGTABLE) {
-               vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
-       } else {
-               KKASSERT(0);
-#if 0
-               /*
-                * Used only for page table pages, so safe to clear on
-                * the 1->0 transition.
-                */
-               if (atomic_fetchadd_long(&m->md.pmap_count, -1) == 1)
-                       vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
-#endif
-       }
+       vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
 
        return(m);
 }
@@ -4552,7 +4518,8 @@ pmap_remove_callback(pmap_t pmap, struct pmap_scan_info *info,
                 */
                if (pte & pmap->pmap_bits[PG_RW_IDX])
                        atomic_add_long(&p->md.writeable_count, -1);
-               atomic_add_long(&p->md.pmap_count, -1);
+               pmap_page_stats_deleting(
+                       atomic_fetchadd_long(&p->md.pmap_count, -1));
        }
        if (pte & pmap->pmap_bits[PG_V_IDX]) {
                atomic_add_long(&pmap->pm_stats.resident_count, -1);
@@ -4615,7 +4582,8 @@ again:
                         */
                        if (ipte & ipmap->pmap_bits[PG_RW_IDX])
                                atomic_add_long(&m->md.writeable_count, -1);
-                       atomic_add_long(&m->md.pmap_count, -1);
+                       pmap_page_stats_deleting(
+                               atomic_fetchadd_long(&m->md.pmap_count, -1));
                }
 
                /*
@@ -4694,7 +4662,8 @@ pmap_remove_specific(pmap_t pmap_match, vm_page_t m)
                         */
                        if (ipte & ipmap->pmap_bits[PG_RW_IDX])
                                atomic_add_long(&m->md.writeable_count, -1);
-                       atomic_add_long(&m->md.pmap_count, -1);
+                       pmap_page_stats_deleting(
+                               atomic_fetchadd_long(&m->md.pmap_count, -1));
                }
 
                /*
@@ -4947,7 +4916,8 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
         */
        /*vm_page_spin_lock(m);*/
        if ((m->flags & PG_FICTITIOUS) == 0) {
-               atomic_add_long(&m->md.pmap_count, 1);
+               pmap_page_stats_adding(
+                       atomic_fetchadd_long(&m->md.pmap_count, 1));
                if (newpte & pmap->pmap_bits[PG_RW_IDX])
                        atomic_add_long(&m->md.writeable_count, 1);
        }
@@ -4959,7 +4929,6 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
                        vm_page_flag_set(m, PG_MAPPED);
        }
        /*vm_page_spin_unlock(m);*/
-       /*pmap_page_stats_adding(m);*/
 
        /*
         * A race can develop when replacing an existing mapping.  The new
@@ -5054,7 +5023,8 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
                 */
                if (origpte & pmap->pmap_bits[PG_RW_IDX])
                        atomic_add_long(&oldm->md.writeable_count, -1);
-               atomic_add_long(&oldm->md.pmap_count, -1);
+               pmap_page_stats_deleting(
+                       atomic_fetchadd_long(&oldm->md.pmap_count, -1));
        }
 
 done:
index d0921d9..c4d9b26 100644 (file)
@@ -1442,6 +1442,7 @@ vm_page_insert(vm_page_t m, vm_object_t object, vm_pindex_t pindex)
         * Associate the VM page with an (object, offset).
         *
         * The vm_page spin lock is required for interactions with the pmap.
+        * XXX vm_page_spin_lock() might not be needed for this any more.
         */
        vm_page_spin_lock(m);
        m->object = object;
@@ -1508,6 +1509,7 @@ vm_page_remove(vm_page_t m)
         * Remove the page from the object and update the object.
         *
         * The vm_page spin lock is required for interactions with the pmap.
+        * XXX vm_page_spin_lock() might not be needed for this any more.
         */
        vm_page_spin_lock(m);
        vm_page_rb_tree_RB_REMOVE(&object->rb_memq, m);
index 8634dc8..3153f19 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * Copyright (c) 1991, 1993
  *     The Regents of the University of California.  All rights reserved.
- * Copyright (c) 2003-2017 The DragonFly Project.  All rights reserved.
+ * Copyright (c) 2003-2019 The DragonFly Project.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * The Mach Operating System project at Carnegie-Mellon University.
 #endif
 
 /*
- * vm_page structure
+ * The vm_page structure is the heart of the entire system.  It's fairly
+ * bulky, eating 3.125% of available memory (128 bytes vs 4K page size).
+ * Most normal uses of the structure, representing physical memory, uses
+ * the type-stable vm_page_array[].  Device mappings exposed to mmap()
+ * (such as GPUs) generally use temporary vm_page's outside of this array
+ * and will be flagged FICTITIOUS.  Devices which use the kernel's contig
+ * memory allocator get normal pages, but for convenience the pages will
+ * be temporarily flagged as FICTITIOUS.
  *
- * hard-busy: (PBUSY_LOCKED)
+ * Soft-busying or Hard-busying guarantees a stable m->object, m->pindex,
+ * and m->valid field.  A page cannot be validated or invalidated unless
+ * hard-busied.
  *
- *     Hard-busying a page allows major manipulation of the page structure.
- *     No new soft-busies can accumulate while a page is hard-busied.  The
- *     page busying code typically waits for all soft-busies to drop before
- *     allowing the hard-busy.
+ * The page must be hard-busied to make the following changes:
  *
- * soft-busy: (PBUSY_MASK)
+ *     (1) Any change to m->object or m->pindex (also requires the
+ *         related object to be exclusively locked).
  *
- *     Soft-busying a page typically indicates I/O or read-only use of
- *     the content.  A page can have multiple soft-busies on it.  New
- *     soft-busies block on any hard-busied page (wait for the hard-busy
- *     to go away).
+ *     (2) Any transition of m->wire_count to 0 or from 0.  Other
+ *         transitions (e.g. 2->1, 1->2, etc) are allowed without
+ *         locks.
  *
- * hold_count
+ *     (3) Any change to m->valid.
  *
- *     This prevents a page from being freed.  This does not prevent any
- *     other operation.  The page may still be disassociated from its
- *     object and essentially scrapped.  It just won't be reused while
- *     a non-zero hold_count is present.
+ *     (4) Clearing PG_MAPPED or PG_WRITEABLE (note that because of
+ *         this, these bits may be left lazily set until they can
+ *         be cleared later on.
  *
- * wire_count
+ * Most other fields of the vm_page can change at any time with certain
+ * restrictions.
  *
- *     This indicates that the page has been wired into memory somewhere
- *     (typically a buffer cache buffer, or a user wire).  The pageout
- *     daemon will skip wired pages.
+ *     (1) PG_WRITEABLE and PG_MAPPED may be set with the page soft-busied
+ *         or hard-busied.
+ *
+ *     (2) m->dirty may be set to VM_PAGE_BITS_ALL by a page fault at
+ *         any time if PG_WRITEABLE is flagged.  Tests of m->dirty are
+ *         only tentative until all writeable mappings of the page are
+ *         removed.  This may occur unlocked.  A hard-busy is required
+ *         if modifying m->dirty under other conditions.
+ *
+ *     (3) PG_REFERENCED may be set at any time by the pmap code to
+ *         synchronized the [A]ccessed bit, if PG_MAPPED is flagged,
+ *         unlocked.  A hard-busy is required for any other time.
+ *
+ *     (3) hold_count can be incremented or decremented at any time,
+ *         including transitions to or from 0.  Holding a page via
+ *         vm_page_hold() does NOT stop major changes from being made
+ *         to the page, but WILL prevent the page from being freed
+ *         or reallocated.  If the hold is emplaced with the page in
+ *         a known state it can prevent the underlying data from being
+ *         destroyed.
+ *
+ *     (4) Each individual flag may have a different behavior.  Some flags
+ *         can be set or cleared at any time, some require hard-busying,
+ *         etc.
+ *
+ * Moving the page between queues (aka m->pageq and m->queue) requires
+ * m->spin to be exclusively locked first, and then also the spinlock related
+ * to the queue.
+ *
+ *     (1) This is the only use that requires m->spin any more.
+ *
+ *     (2) There is one special case and that is the pageout daemon is
+ *         allowed to reorder the page within the same queue while just
+ *         holding the queue's spin-lock.
+ *
+ * Please see the flags section below for flag documentation.
  */
 TAILQ_HEAD(pglist, vm_page);
 
@@ -140,11 +179,11 @@ RB_PROTOTYPE2(vm_page_rb_tree, vm_page, rb_entry,
 RB_HEAD(vm_page_rb_tree, vm_page);
 
 struct vm_page {
-       TAILQ_ENTRY(vm_page) pageq;     /* vm_page_queues[] list (P)    */
+       TAILQ_ENTRY(vm_page) pageq;     /* vm_page_queues[] list        */
        RB_ENTRY(vm_page) rb_entry;     /* Red-Black tree based at object */
        struct spinlock spin;
-       struct vm_object *object;       /* which object am I in (O,P)*/
-       vm_pindex_t pindex;             /* offset into object (O,P) */
+       struct vm_object *object;       /* which object am I in */
+       vm_pindex_t pindex;             /* offset into object */
        vm_paddr_t phys_addr;           /* physical address of page */
        struct md_page md;              /* machine dependant stuff */
        uint16_t queue;                 /* page queue index */
@@ -157,7 +196,9 @@ struct vm_page {
        uint32_t wire_count;            /* wired down maps refs (P) */
        uint32_t busy_count;            /* soft-busy and hard-busy */
        int     hold_count;             /* page hold count */
-       int     ku_pagecnt;             /* kmalloc helper */
+       int     ku_pagecnt;             /* help kmalloc() w/oversized allocs */
+       int     unused01;               /* available */
+       /* 128 bytes */
 #ifdef VM_PAGE_DEBUG
        const char *busy_func;
        int     busy_line;
@@ -244,58 +285,86 @@ extern struct vpgqueues vm_page_queues[PQ_COUNT];
 extern long vmmeter_neg_slop_cnt;
 
 /*
- * These are the flags defined for vm_page.
- *
- *  PG_FICTITIOUS      It is not possible to translate the pte's physical
- *                     address back to a vm_page_t.  The vm_page_t is fake
- *                     or there isn't one at all.
- *
- *                     Fictitious vm_page_t's can be placed in objects and
- *                     it is possible to perform pmap functions on them
- *                     by virtual address range and by their vm_page_t.
- *                     However, pmap_count and writeable_count cannot be
- *                     tracked since there is no way to reverse-map the
- *                     pte back to the vm_page.
- *
- *                     (pmap operations by-vm_page can still be used to
- *                     adjust protections or remove the page from the pmap,
- *                     and will go only by the PG_MAPPED flag).
- *
- *                     NOTE: The contiguous memory management will flag
- *                           PG_FICTITIOUS on pages in the vm_page_array,
- *                           even though the physical addrses can be
- *                           translated back to a vm_page_t.
- *
- *                     NOTE: Implies PG_UNQUEUED.  PG_UNQUEUED must also
- *                           be set.  No queue management may be performed
- *                           on fictitious pages.
- *
- *  PG_UNQUEUED                The page is not to participate in any VM page queue
- *                     manipulation (even if it is otherwise a normal page).
- *
- *  PG_MAPPED          Only applies to non-fictitious regular pages, this
- *                     flag indicates that the page MIGHT be mapped into
- *                     zero or more pmaps via normal managed operations..
- *
- *                     The page might still be mapped in a specialized manner
- *                     (i.e. pmap_kenter(), or mapped into the buffer cache,
- *                     and so forth) without setting this flag.
- *
- *                     If this flag is clear it indicates that the page is
- *                     absolutely not mapped into a regular pmap by normal
- *                     means.  If set, the status is unknown.
- *
- *  PG_WRITEABLE       Similar to PG_MAPPED, indicates that the page might
- *                     be mapped RW into zero or more pmaps via normal
- *                     managed operations.
- *
- *                     If this flag is clear it indicates that the page is
- *                     absolutely not mapped RW into a regular pmap by normal
- *                     means.  If set, the status is unknown.
- *
- *  PG_SWAPPED         Indicates that the page is backed by a swap block.
- *                     Any VM object type other than OBJT_DEFAULT can contain
- *                     swap-backed pages now.
+ * The m->flags field is generally categorized as follows.  Unless otherwise
+ * noted, a flag may only be updated while the page is hard-busied.
+ *
+ * PG_UNQUEUED    - This prevents the page from being placed on any queue.
+ *
+ * PG_FICTITIOUS   - This indicates to the pmap subsystem that the
+ *                  page might not be reverse-addressable via
+ *                  PHYS_TO_VM_PAGE().   The vm_page_t might be
+ *                  temporary and not exist in the vm_page_array[].
+ *
+ *                  This also generally means that the pmap subsystem
+ *                  cannot synchronize the [M]odified and [A]ccessed
+ *                  bits with the related vm_page_t, and in fact that
+ *                  there might not even BE a related vm_page_t.
+ *
+ *                  Unlike the old system, the new pmap subsystem is
+ *                  able to do bulk operations on virtual address ranges
+ *                  containing fictitious pages, and can also pick-out
+ *                  specific fictitious pages by matching m->phys_addr
+ *                  if you supply a fake vm_page to it.
+ *
+ *                  Fictitious pages can still be organized into vm_objects
+ *                  if desired.
+ *
+ * PG_MAPPED      - Indicates that the page MIGHT be mapped into a pmap.
+ *                  If not set, guarantees that the page is not mapped.
+ *
+ *                  This bit can be set unlocked but only cleared while
+ *                  vm_page is hard-busied.
+ *
+ *                  For FICTITIOUS pages, this bit will be set automatically
+ *                  via a page fault (aka pmap_enter()), but must be cleared
+ *                  manually.
+ *
+ * PG_WRITEABLE    - Indicates that the page MIGHT be writeable via a pte.
+ *                  If not set, guarantees that the page is not writeable.
+ *
+ *                  This bit can be set unlocked but only cleared while
+ *                  vm_page is hard-busied.
+ *
+ *                  For FICTITIOUS pages, this bit will be set automatically
+ *                  via a page fault (aka pmap_enter()), but must be cleared
+ *                  manually.
+ *
+ * PG_SWAPPED     - Indicates that the page is backed by a swap block.
+ *                  Any VM object type other than OBJT_DEFAULT can contain
+ *                  swap-backed pages now.  The bit may only be adjusted
+ *                  while the page is hard-busied.
+ *
+ * PG_RAM         - Heuristic read-ahead-marker.  When I/O brings pages in,
+ *                  this bit is set on one of them to force a page fault on
+ *                  it to proactively read-ahead additional pages.
+ *
+ *                  Can be set or cleared at any time unlocked.
+ *
+ * PG_WINATCFLS           - This is used to give dirty pages a second chance
+ *                  on the inactive queue before getting flushed by
+ *                  the pageout daemon.
+ *
+ * PG_REFERENCED   - Indicates that the page has been accessed.  If the
+ *                  page is PG_MAPPED, this bit might not reflect the
+ *                  actual state of the page.  The pmap code synchronizes
+ *                  the [A]ccessed bit to this flag and then clears the
+ *                  [A]ccessed bit.
+ *
+ * PG_MARKER      - Used by any queue-scanning code to recognize a fake
+ *                  vm_page being used only as a scan marker.
+ *
+ * PG_NOTMETA     - Distinguish pages representing content from pages
+ *                  representing meta-data.
+ *
+ * PG_NEED_COMMIT  - May only be modified while the page is hard-busied.
+ *                  Indicates that even if the page might not appear to
+ *                  be dirty, it must still be validated against some
+ *                  remote entity (e.g. NFS) before it can be thrown away.
+ *
+ * PG_CLEANCHK    - Used by the vm_object subsystem to detect pages that
+ *                  might have been inserted during a scan.  May be changed
+ *                  at any time by the VM system (usually while holding the
+ *                  related vm_object's lock).
  */
 #define        PG_UNUSED0001   0x00000001
 #define        PG_UNUSED0002   0x00000002
index bc59105..59abd5c 100644 (file)
@@ -73,10 +73,11 @@ int verboseopt;
 struct vm_map kmap;
 vm_offset_t total_empty;
 vm_offset_t total_used;
+vm_offset_t total_real;
 vm_offset_t total_used_byid[VM_SUBSYS_LIMIT];
 
 static const char *formatnum(int64_t value);
-static const char *entryid(vm_subsys_t id);
+static const char *entryid(vm_subsys_t id, int *realmemp);
 static void kkread(kvm_t *kd, u_long addr, void *buf, size_t nbytes);
 static void mapscan(kvm_t *kd, vm_map_entry_t kptr, vm_map_entry_t ken,
                    vm_offset_t *lastp);
@@ -140,12 +141,20 @@ main(int ac, char **av)
     total_empty += kmap.max_addr - last;
 
     printf("-----------------------------------------------\n");
-    for (i = 0; i < VM_SUBSYS_LIMIT; ++i)
-       printf("Total-id: %9s %s\n", entryid(i), formatnum(total_used_byid[i]));
+    for (i = 0; i < VM_SUBSYS_LIMIT; ++i) {
+       int realmem;
+       const char *id = entryid(i, &realmem);
+
+       printf("Total-id: %9s %s%s\n",
+               id,
+               formatnum(total_used_byid[i]),
+               (realmem ? " (real memory)" : ""));
+    }
 
     printf("-----------------------------------------------\n");
     printf("Total empty space: %s\n", formatnum(total_empty));
     printf("Total used  space: %s\n", formatnum(total_used));
+    printf("Total real  space: %s\n", formatnum(total_real));
 }
 
 static const char *
@@ -197,6 +206,8 @@ formatnum(int64_t value)
 static void
 mapscan(kvm_t *kd, vm_map_entry_t kptr, vm_map_entry_t ken, vm_offset_t *lastp)
 {
+    int realmem;
+
     if (*lastp != ken->ba.start) {
            printf("%4ldM %p %08lx-%08lx (%s) EMPTY\n",
                total_used / 1024 / 1024,
@@ -210,35 +221,47 @@ mapscan(kvm_t *kd, vm_map_entry_t kptr, vm_map_entry_t ken, vm_offset_t *lastp)
        kptr,
        ken->ba.start, ken->ba.end,
        (ken->ba.end - ken->ba.start) / 1024,
-       entryid(ken->id),
+       entryid(ken->id, &realmem),
        ken->ba.map_object);
     total_used += ken->ba.end - ken->ba.start;
+
     if (ken->id < VM_SUBSYS_LIMIT)
        total_used_byid[ken->id] += ken->ba.end - ken->ba.start;
     else
        total_used_byid[0] += ken->ba.end - ken->ba.start;
+
+    if (realmem)
+       total_real += ken->ba.end - ken->ba.start;
+
     *lastp = ken->ba.end;
 }
 
 static
 const char *
-entryid(vm_subsys_t id)
+entryid(vm_subsys_t id, int *realmemp)
 {
        static char buf[32];
+       int dummy = 0;
+       int *realmem = (realmemp ? realmemp : &dummy);
+
+       *realmem = 0;
 
        switch(id) {
        case VM_SUBSYS_UNKNOWN:
                return("UNKNOWN");
        case VM_SUBSYS_KMALLOC:
+               *realmem = 1;
                return("KMALLOC");
        case VM_SUBSYS_STACK:
+               *realmem = 1;
                return("STACK");
        case VM_SUBSYS_IMGACT:
                return("IMGACT");
        case VM_SUBSYS_EFI:
                return("EFI");
        case VM_SUBSYS_RESERVED:
-               return("RESERVED");
+               *realmem = 1;
+               return("BOOT+KERN");
        case VM_SUBSYS_INIT:
                return("INIT");
        case VM_SUBSYS_PIPE:
@@ -256,16 +279,21 @@ entryid(vm_subsys_t id)
        case VM_SUBSYS_BOGUS:
                return("BOGUS");
        case VM_SUBSYS_BUF:
+               *realmem = 1;
                return("BUF");
        case VM_SUBSYS_BUFDATA:
                return("BUFDATA");
        case VM_SUBSYS_GD:
+               *realmem = 1;
                return("GD");
        case VM_SUBSYS_IPIQ:
+               *realmem = 1;
                return("IPIQ");
        case VM_SUBSYS_PVENTRY:
+               *realmem = 1;
                return("PVENTRY");
        case VM_SUBSYS_PML4:
+               *realmem = 1;
                return("PML4");
        case VM_SUBSYS_MAPDEV:
                return("MAPDEV");
@@ -283,12 +311,14 @@ entryid(vm_subsys_t id)
        case VM_SUBSYS_DRM_SCAT:
                return("DRM_SCAT");
        case VM_SUBSYS_DRM_VMAP:
+               *realmem = 1;
                return("DRM_VMAP");
        case VM_SUBSYS_DRM_TTM:
                return("DRM_TTM");
        case VM_SUBSYS_HAMMER:
                return("HAMMER");
        case VM_SUBSYS_VMPGHASH:
+               *realmem = 1;
                return("VMPGHASH");
        default:
                break;
index c7d0a04..31cb3d5 100644 (file)
 #include <getopt.h>
 
 struct nlist Nl[] = {
-#if 0
-    { "_vm_page_buckets" },
-    { "_vm_page_hash_mask" },
-#endif
     { "_vm_page_array" },
     { "_vm_page_array_size" },
+    { "_kernel_object" },
     { NULL }
 };
 
@@ -85,6 +82,7 @@ struct vm_page **vm_page_buckets;
 int vm_page_hash_mask;
 #endif
 struct vm_page *vm_page_array;
+struct vm_object *kernel_object_ptr;
 int vm_page_array_size;
 
 void checkpage(kvm_t *kd, vm_page_t mptr, vm_page_t m, struct vm_object *obj);
@@ -94,8 +92,29 @@ static int kkread_err(kvm_t *kd, u_long addr, void *buf, size_t nbytes);
 
 #if 0
 static void addsltrack(vm_page_t m);
-#endif
 static void dumpsltrack(kvm_t *kd);
+#endif
+static int unique_object(void *ptr);
+
+long count_free;
+long count_wired;              /* total */
+long count_wired_vnode;
+long count_wired_anon;
+long count_wired_in_pmap;
+long count_wired_pgtable;
+long count_wired_other;
+long count_wired_kernel;
+long count_wired_obj_other;
+
+long count_anon;
+long count_anon_in_pmap;
+long count_vnode;
+long count_device;
+long count_phys;
+long count_kernel;
+long count_unknown;
+long count_noobj_offqueue;
+long count_noobj_onqueue;
 
 int
 main(int ac, char **av)
@@ -145,12 +164,9 @@ main(int ac, char **av)
        exit(1);
     }
 
-#if 0
-    kkread(kd, Nl[0].n_value, &vm_page_buckets, sizeof(vm_page_buckets));
-    kkread(kd, Nl[1].n_value, &vm_page_hash_mask, sizeof(vm_page_hash_mask));
-#endif
     kkread(kd, Nl[0].n_value, &vm_page_array, sizeof(vm_page_array));
     kkread(kd, Nl[1].n_value, &vm_page_array_size, sizeof(vm_page_array_size));
+    kernel_object_ptr = (void *)Nl[2].n_value;
 
     /*
      * Scan the vm_page_array validating all pages with associated objects
@@ -165,20 +181,63 @@ main(int ac, char **av)
            kkread(kd, (u_long)m.object, &obj, sizeof(obj));
            checkpage(kd, &vm_page_array[i], &m, &obj);
        }
+       if (m.queue >= PQ_HOLD) {
+           qstr = "HOLD";
+       } else if (m.queue >= PQ_CACHE) {
+           qstr = "CACHE";
+       } else if (m.queue >= PQ_ACTIVE) {
+           qstr = "ACTIVE";
+       } else if (m.queue >= PQ_INACTIVE) {
+           qstr = "INACTIVE";
+       } else if (m.queue >= PQ_FREE) {
+           qstr = "FREE";
+           ++count_free;
+       } else {
+           qstr = "NONE";
+       }
+       if (m.wire_count) {
+               ++count_wired;
+               if (m.object == NULL) {
+                       if ((m.flags & PG_MAPPED) &&
+                           (m.flags & PG_WRITEABLE) &&
+                           (m.flags & PG_UNQUEUED)) {
+                               ++count_wired_pgtable;
+                       } else {
+                               ++count_wired_other;
+                       }
+               } else if (m.object == kernel_object_ptr) {
+                       ++count_wired_kernel;
+               } else {
+                       switch(obj.type) {
+                       case OBJT_VNODE:
+                               ++count_wired_vnode;
+                               break;
+                       case OBJT_DEFAULT:
+                       case OBJT_SWAP:
+                               if (m.md.pmap_count)
+                                       ++count_wired_in_pmap;
+                               else
+                                       ++count_wired_anon;
+                               break;
+                       default:
+                               ++count_wired_obj_other;
+                               break;
+                       }
+               }
+       } else if (m.md.pmap_count) {
+               if (m.object && m.object != kernel_object_ptr) {
+                       switch(obj.type) {
+                       case OBJT_DEFAULT:
+                       case OBJT_SWAP:
+                               ++count_anon_in_pmap;
+                               break;
+                       default:
+                               break;
+                       }
+               }
+       }
+
        if (verboseopt) {
-           if (m.queue >= PQ_HOLD) {
-               qstr = "HOLD";
-           } else if (m.queue >= PQ_CACHE) {
-               qstr = "CACHE";
-           } else if (m.queue >= PQ_ACTIVE) {
-               qstr = "ACTIVE";
-           } else if (m.queue >= PQ_INACTIVE) {
-               qstr = "INACTIVE";
-           } else if (m.queue >= PQ_FREE) {
-               qstr = "FREE";
-           } else {
-               qstr = "NONE";
-           } 
            printf("page %p obj %p/%-8ju(%016jx) val=%02x dty=%02x hold=%d "
                   "wire=%-2d act=%-3d busy=%d %8s",
                &vm_page_array[i],
@@ -193,33 +252,59 @@ main(int ac, char **av)
                m.busy_count,
                qstr
            );
-           if (m.object) {
-               switch(obj.type) {
-               case OBJT_DEFAULT:
-                   ostr = "default";
-                   break;
-               case OBJT_SWAP:
-                   ostr = "swap";
-                   break;
-               case OBJT_VNODE:
-                   ostr = "vnode";
-                   break;
-               case OBJT_DEVICE:
-                   ostr = "device";
-                   break;
-               case OBJT_PHYS:
-                   ostr = "phys";
-                   break;
-               case OBJT_DEAD:
-                   ostr = "dead";
-                   break;
-               default:
-                   ostr = "unknown";
-                   break;
-               }
-           } else {
-               ostr = "-";
+       }
+
+       if (m.object == kernel_object_ptr) {
+               ostr = "kernel";
+               if (unique_object(m.object))
+                       count_kernel += obj.resident_page_count;
+       } else if (m.object) {
+           switch(obj.type) {
+           case OBJT_DEFAULT:
+               ostr = "default";
+               if (unique_object(m.object))
+                       count_anon += obj.resident_page_count;
+               break;
+           case OBJT_SWAP:
+               ostr = "swap";
+               if (unique_object(m.object))
+                       count_anon += obj.resident_page_count;
+               break;
+           case OBJT_VNODE:
+               ostr = "vnode";
+               if (unique_object(m.object))
+                       count_vnode += obj.resident_page_count;
+               break;
+           case OBJT_DEVICE:
+               ostr = "device";
+               if (unique_object(m.object))
+                       count_device += obj.resident_page_count;
+               break;
+           case OBJT_PHYS:
+               ostr = "phys";
+               if (unique_object(m.object))
+                       count_phys += obj.resident_page_count;
+               break;
+           case OBJT_DEAD:
+               ostr = "dead";
+               if (unique_object(m.object))
+                       count_unknown += obj.resident_page_count;
+               break;
+           default:
+               if (unique_object(m.object))
+                       count_unknown += obj.resident_page_count;
+               ostr = "unknown";
+               break;
            }
+       } else {
+           ostr = "-";
+           if (m.queue == PQ_NONE)
+                   ++count_noobj_offqueue;
+           else if (m.queue - m.pc != PQ_FREE)
+                   ++count_noobj_onqueue;
+       }
+
+       if (verboseopt) {
            printf(" %-7s", ostr);
            if (m.busy_count & PBUSY_LOCKED)
                printf(" BUSY");
@@ -243,8 +328,8 @@ main(int ac, char **av)
                printf(" SWAPINPROG");
            if (m.flags & PG_NOSYNC)
                printf(" NOSYNC");
-           if (m.flags & PG_UNMANAGED)
-               printf(" UNMANAGED");
+           if (m.flags & PG_UNQUEUED)
+               printf(" UNQUEUED");
            if (m.flags & PG_MARKER)
                printf(" MARKER");
            if (m.flags & PG_RAM)
@@ -264,6 +349,41 @@ main(int ac, char **av)
     }
     if (debugopt || verboseopt)
        printf("\n");
+    printf("%8.2fM free\n", count_free * 4096.0 / 1048576.0);
+
+    printf("%8.2fM wired vnode (in buffer cache)\n",
+       count_wired_vnode * 4096.0 / 1048576.0);
+    printf("%8.2fM wired in-pmap\n",
+       count_wired_in_pmap * 4096.0 / 1048576.0);
+    printf("%8.2fM wired pgtable\n",
+       count_wired_pgtable * 4096.0 / 1048576.0);
+    printf("%8.2fM wired anon\n",
+       count_wired_anon * 4096.0 / 1048576.0);
+    printf("%8.2fM wired kernel_object\n",
+       count_wired_kernel * 4096.0 / 1048576.0);
+    printf("%8.2fM wired other (unknown object)\n",
+       count_wired_obj_other * 4096.0 / 1048576.0);
+    printf("%8.2fM wired other (no object, probably kernel)\n",
+       count_wired_other * 4096.0 / 1048576.0);
+    printf("%8.2fM WIRED TOTAL\n",
+       count_wired * 4096.0 / 1048576.0);
+
+    printf("\n");
+    printf("%8.2fM anonymous (total, includes in-pmap)\n",
+       count_anon * 4096.0 / 1048576.0);
+    printf("%8.2fM anonymous memory in-pmap\n",
+       count_anon_in_pmap * 4096.0 / 1048576.0);
+    printf("%8.2fM vnode (includes wired)\n",
+       count_vnode * 4096.0 / 1048576.0);
+    printf("%8.2fM device\n", count_device * 4096.0 / 1048576.0);
+    printf("%8.2fM phys\n", count_phys * 4096.0 / 1048576.0);
+    printf("%8.2fM kernel (includes wired)\n",
+       count_kernel * 4096.0 / 1048576.0);
+    printf("%8.2fM unknown\n", count_unknown * 4096.0 / 1048576.0);
+    printf("%8.2fM no_object, off queue (includes wired w/o object)\n",
+       count_noobj_offqueue * 4096.0 / 1048576.0);
+    printf("%8.2fM no_object, on non-free queue (includes wired w/o object)\n",
+       count_noobj_onqueue * 4096.0 / 1048576.0);
 
 #if 0
     /*
@@ -295,7 +415,9 @@ main(int ac, char **av)
 #endif
     if (debugopt)
        printf("\n");
+#if 0
     dumpsltrack(kd);
+#endif
     return(0);
 }
 
@@ -440,3 +562,33 @@ dumpsltrack(kvm_t *kd)
        }
        printf("FullZones/TotalZones: %ld/%ld\n", full_zones, total_zones);
 }
+
+#define HASH_SIZE      (1024*1024)
+#define HASH_MASK      (HASH_SIZE - 1)
+
+struct dup_entry {
+       struct dup_entry *next;
+       void    *ptr;
+};
+
+struct dup_entry *dup_hash[HASH_SIZE];
+
+static int
+unique_object(void *ptr)
+{
+       struct dup_entry *hen;
+       int hv;
+
+       hv = (intptr_t)ptr ^ ((intptr_t)ptr >> 20);
+       hv &= HASH_MASK;
+       for (hen = dup_hash[hv]; hen; hen = hen->next) {
+               if (hen->ptr == ptr)
+                       return 0;
+       }
+       hen = malloc(sizeof(*hen));
+       hen->next = dup_hash[hv];
+       hen->ptr = ptr;
+       dup_hash[hv] = hen;
+
+       return 1;
+}
index b2ff084..de2b8d3 100644 (file)
@@ -164,7 +164,7 @@ static struct nlist namelist[] = {
 #define STATCOL                 2
 #define MEMROW          2      /* uses 4 rows and 31 cols */
 #define MEMCOLA                 0
-#define MEMCOLB                 20
+#define MEMCOLB                 17
 #define PAGEROW                 2      /* uses 4 rows and 26 cols */
 #define PAGECOL                45
 #define INTSROW                 6      /* uses all rows to bottom and 17 cols */
@@ -320,9 +320,10 @@ labelkre(void)
 
        mvprintw(MEMROW + 2, MEMCOLA + 14, "i+c+f");
 
-       mvprintw(MEMROW + 0, MEMCOLB, "VM-rss");
-       mvprintw(MEMROW + 1, MEMCOLB, "VM-swp");
-       mvprintw(MEMROW + 1, MEMCOLB + 15, "/");
+       mvprintw(MEMROW + 0, MEMCOLB, "PMAP");
+       mvprintw(MEMROW + 0, MEMCOLB + 13, "VMRSS");
+       mvprintw(MEMROW + 1, MEMCOLB, "SWAP");
+       mvprintw(MEMROW + 1, MEMCOLB + 13, "SWTOT");
 
        mvprintw(PAGEROW, PAGECOL,     "       VNODE PAGER    SWAP PAGER ");
        mvprintw(PAGEROW + 1, PAGECOL, "          in   out      in   out ");
@@ -547,9 +548,14 @@ showkre(void)
                    vms.v_cache_count +
                    vms.v_free_count), MEMROW + 2, MEMCOLA + 7, 6, 0);
        put64(s.physmem, MEMROW + 3, MEMCOLA + 7, 6, 0);
-       put64(pgtob(total.t_rm), MEMROW + 0, MEMCOLB + 7, 6, 0);
-       put64(pgtob(total.t_vm - total.t_rm), MEMROW + 1, MEMCOLB + 7, 6, 0);
-       put64(pgtob(s.kvmsw[kvnsw].ksw_total), MEMROW + 1, MEMCOLB + 17, 6, 0);
+       put64(pgtob(total.t_arm),
+                       MEMROW + 0, MEMCOLB + 5, 6, 0);
+       put64(pgtob(total.t_avm + total.t_avmshr),
+                       MEMROW + 0, MEMCOLB + 19, 6, 0);
+       put64(pgtob(total.t_vm - total.t_rm),
+                       MEMROW + 1, MEMCOLB + 5, 6, 0);
+       put64(pgtob(s.kvmsw[kvnsw].ksw_total),
+                       MEMROW + 1, MEMCOLB + 19, 6, 0);
 
 #if 0
        put64(pgtob(total.t_arm), MEMROW + 2, MEMCOL + 4, 6, 0);