kernel - VM rework part 5 - Cleanup
authorMatthew Dillon <dillon@apollo.backplane.com>
Fri, 10 May 2019 18:37:00 +0000 (11:37 -0700)
committerMatthew Dillon <dillon@apollo.backplane.com>
Sun, 12 May 2019 04:07:39 +0000 (21:07 -0700)
* Cleanup vm_map_entry_shadow()

* Remove (unused) vmspace_president_count()
  Remove (barely used) struct lwkt_token typedef.

* Cleanup the vm_map_aux, vm_map_entry, vm_map, and vm_object
  structures

* Adjfustments to in-code documentation

sys/kern/kern_kinfo.c
sys/kern/sysv_shm.c
sys/sys/mpipe.h
sys/sys/thread.h
sys/vfs/nfs/nfs_node.c
sys/vm/vm_map.c
sys/vm/vm_map.h
sys/vm/vm_object.h
sys/vm/vm_page.h
sys/vm/vm_pageout.c

index 047cf13..c81269d 100644 (file)
@@ -149,10 +149,6 @@ fill_kinfo_proc(struct proc *p, struct kinfo_proc *kp)
        if ((vm = p->p_vmspace) != NULL) {
                kp->kp_vm_map_size = vm->vm_map.size;
                kp->kp_vm_rssize = vmspace_resident_count(vm);
-#ifdef _KERNEL
-               /*XXX MP RACES */
-               /*kp->kp_vm_prssize = vmspace_president_count(vm);*/
-#endif
                kp->kp_vm_swrss = vm->vm_swrss;
                kp->kp_vm_tsize = btoc(vm->vm_tsize);
                kp->kp_vm_dsize = btoc(vm->vm_dsize);
index 9c01b77..6fe91a8 100644 (file)
@@ -114,11 +114,9 @@ struct     shminfo shminfo = {
  *                 or seg-faults unexpectedly.
  *
  * use-phys        Shared memory segments are to use physical memory by
- *                 default, which allows the kernel to optimize (remove)
- *                 pv_entry management structures for the related PTEs and
- *                 prevents paging.  This has distinctly different and
- *                 usually desireable characteristics verses mmap()ing
- *                 anonymous memory.
+ *                 default, which may allow the kernel to better-optimize
+ *                 the pmap and reduce overhead.  The pages are effectively
+ *                 wired.
  */
 static int shm_allow_removed = 1;
 static int shm_use_phys = 1;
index 5424f3a..32168d3 100644 (file)
@@ -90,7 +90,7 @@ struct malloc_pipe {
     int                total_count;    /* total outstanding allocations incl free */
     int                ary_count;      /* guarenteed allocation count */
     int                max_count;      /* maximum count (M_NOWAIT used beyond nom) */
-    lwkt_token token;
+    struct lwkt_token token;
     void       **array;        /* array[ary_count] */
     void       (*construct)(void *buf, void *priv);
     void       (*deconstruct)(void *buf, void *priv);
index ec33f5a..e1eddda 100644 (file)
@@ -121,12 +121,12 @@ struct intrframe;
  * reduces the complexity of the token release code.
  */
 
-typedef struct lwkt_token {
+struct lwkt_token {
     long               t_count;        /* Shared/exclreq/exclusive access */
     struct lwkt_tokref *t_ref;         /* Exclusive ref */
     long               t_collisions;   /* Collision counter */
     const char         *t_desc;        /* Descriptive name */
-} lwkt_token;
+};
 
 #define TOK_EXCLUSIVE  0x00000001      /* Exclusive lock held */
 #define TOK_EXCLREQ    0x00000002      /* Exclusive request pending */
index e86a78f..b714faf 100644 (file)
 
 static MALLOC_DEFINE(M_NFSNODE, "NFS node", "NFS node");
 
-static struct objcache *nfsnode_objcache;
-static LIST_HEAD(nfsnodehashhead, nfsnode) *nfsnodehashtbl;
-static u_long nfsnodehash;
-static lwkt_token nfsnhash_token = LWKT_TOKEN_INITIALIZER(nfsnhash_token);
+static struct lwkt_token nfsnhash_token =
+                       LWKT_TOKEN_INITIALIZER(nfsnhash_token);
 static struct lock nfsnhash_lock;
+__read_mostly static struct objcache *nfsnode_objcache;
+__read_mostly static LIST_HEAD(nfsnodehashhead, nfsnode) *nfsnodehashtbl;
+__read_mostly static u_long nfsnodehash;
 
 #define TRUE   1
 #define        FALSE   0
index 6174091..d71556d 100644 (file)
@@ -740,17 +740,16 @@ vm_map_entry_shadow(vm_map_entry_t entry, int addref)
                length = atop(entry->end - entry->start);
        ba = kmalloc(sizeof(*ba), M_MAP_BACKING, M_INTWAIT); /* copied later */
 
-       /*
-        * The ref on source is inherited when we move it into the ba.
-        */
-       source = entry->ba.object;
-
        /*
         * Don't create the new object if the old object isn't shared.
         *
+        * The ref on source is inherited when we move it into the ba.
         * If addref is non-zero additional ref(s) are being added (probably
         * for map entry fork purposes), so clear OBJ_ONEMAPPING.
         *
+        * Caller ensures source exists (all backing_ba's must have objects),
+        * typically indirectly by virtue of the NEEDS_COPY flag being set.
+        *
         * WARNING! Checking ref_count == 1 only works because we are testing
         *          the object embedded in the entry (entry->ba.object).
         *          This test DOES NOT WORK if checking an object hanging off
@@ -758,32 +757,35 @@ vm_map_entry_shadow(vm_map_entry_t entry, int addref)
         *          vm_map_backing might be shared, or part of a chain that
         *          is shared.  Checking ba->refs is worthless.
         */
-       drop_source = 0;
-       if (source) {
-               if (source->type != OBJT_VNODE) {
-                       vm_object_hold(source);
-                       if (source->ref_count == 1 &&
-                           source->handle == NULL &&
-                           (source->type == OBJT_DEFAULT ||
-                            source->type == OBJT_SWAP)) {
-                               if (addref) {
-                                       vm_object_reference_locked(source);
-                                       vm_object_clear_flag(source,
-                                                            OBJ_ONEMAPPING);
-                               }
-                               vm_object_drop(source);
-                               kfree(ba, M_MAP_BACKING);
-                               goto done;
+       source = entry->ba.object;
+       KKASSERT(source);
+
+       if (source->type != OBJT_VNODE) {
+               vm_object_hold(source);
+               if (source->ref_count == 1 &&
+                   source->handle == NULL &&
+                   (source->type == OBJT_DEFAULT ||
+                    source->type == OBJT_SWAP)) {
+                       if (addref) {
+                               vm_object_reference_locked(source);
+                               vm_object_clear_flag(source,
+                                                    OBJ_ONEMAPPING);
                        }
-                       /*vm_object_reference_locked(source);*/
-                       vm_object_clear_flag(source, OBJ_ONEMAPPING);
-                       drop_source = 1;        /* drop source at end */
-               } else {
-                       /*vm_object_reference_quick(source);*/
-                       vm_object_clear_flag(source, OBJ_ONEMAPPING);
+                       vm_object_drop(source);
+                       kfree(ba, M_MAP_BACKING);
+                       goto done;
                }
+               drop_source = 1;        /* drop source at end */
+       } else {
+               drop_source = 0;
        }
 
+       /*
+        * Once it becomes part of a backing_ba chain it can wind up anywhere,
+        * drop the ONEMAPPING flag now.
+        */
+       vm_object_clear_flag(source, OBJ_ONEMAPPING);
+
        /*
         * Allocate a new object with the given length.  The new object
         * is returned referenced but we may have to add another one.
@@ -826,32 +828,16 @@ vm_map_entry_shadow(vm_map_entry_t entry, int addref)
        entry->ba.offset = 0;
        entry->ba.refs = 0;
 
-       if (source) {
-#if 0
-               /* shadowing no longer messes with generation count */
-               if (drop_source) {
-                       atomic_add_int(&source->generation, 1);
-                       vm_object_set_flag(result, OBJ_ONSHADOW);
-               }
-#endif
-               /* cpu localization twist */
-               result->pg_color = vm_quickcolor();
-       }
+       /* cpu localization twist */
+       result->pg_color = vm_quickcolor();
 
        /*
         * Adjust the return storage.  Drop the ref on source before
         * returning.
         */
        vm_object_drop(result);
-       if (source) {
-               if (drop_source) {
-                       /*vm_object_deallocate_locked(source);*/
-                       vm_object_drop(source);
-               } else {
-                       /*vm_object_deallocate(source);*/
-               }
-       }
-
+       if (drop_source)
+               vm_object_drop(source);
 done:
        entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
 }
@@ -3510,11 +3496,9 @@ vm_map_copy_entry(vm_map_t src_map, vm_map_t dst_map,
 }
 
 /*
- * vmspace_fork:
- * Create a new process vmspace structure and vm_map
- * based on those of an existing process.  The new map
- * is based on the old map, according to the inheritance
- * values on the regions in that map.
+ * Create a vmspace for a new process and its related vm_map based on an
+ * existing vmspace.  The new map inherits information from the old map
+ * according to inheritance settings.
  *
  * The source map must not be locked.
  * No requirements.
index 898dbb2..a860c6f 100644 (file)
@@ -63,9 +63,9 @@
  */
 
 /*
- *     Virtual memory map module definitions.
+ * Virtual memory map module definitions.  The vm_map houses the pmap
+ * structure which controls the mmu context for a process.
  */
-
 #ifndef        _VM_VM_MAP_H_
 #define        _VM_VM_MAP_H_
 
 struct vm_map_rb_tree;
 RB_PROTOTYPE(vm_map_rb_tree, vm_map_entry, rb_entry, rb_vm_map_compare);
 
-/*
- *     Types defined:
- *
- *     vm_map_t                the high-level address map data structure.
- *     vm_map_entry_t          an entry in an address map.
- */
-
 typedef u_int vm_flags_t;
 typedef u_int vm_eflags_t;
 
+/*
+ * Aux structure depends on map type and/or flags.
+ */
 union vm_map_aux {
        vm_offset_t avail_ssize;        /* amt can grow if this is a stack */
        vpte_t master_pde;              /* virtual page table root */
@@ -175,9 +171,9 @@ struct vm_map_backing {
         */
        union {
                struct vm_object *object;       /* vm_object */
-               struct vm_map *sub_map;         /* belongs to another map */
-               int     (*uksmap)(struct cdev *dev, vm_page_t fake);
-               void    *map_object;            /* generic */
+               struct vm_map   *sub_map;       /* belongs to another map */
+               int             (*uksmap)(struct cdev *dev, vm_page_t fake);
+               void            *map_object;    /* generic */
        };
 
        vm_ooffset_t            offset;         /* cumulative offset */
@@ -208,17 +204,17 @@ typedef struct vm_map_backing *vm_map_backing_t;
  */
 struct vm_map_entry {
        RB_ENTRY(vm_map_entry) rb_entry;
-       vm_offset_t start;              /* start address */
-       vm_offset_t end;                /* end address */
+       vm_offset_t     start;          /* start address */
+       vm_offset_t     end;            /* end address */
        union vm_map_aux aux;           /* auxillary data */
        struct vm_map_backing ba;       /* backing object chain */
-       vm_eflags_t eflags;             /* map entry flags */
-       vm_maptype_t maptype;           /* type of VM mapping */
-       vm_prot_t protection;           /* protection code */
-       vm_prot_t max_protection;       /* maximum protection */
-       vm_inherit_t inheritance;       /* inheritance */
-       int wired_count;                /* can be paged if = 0 */
-       vm_subsys_t id;                 /* subsystem id */
+       vm_eflags_t     eflags;         /* map entry flags */
+       vm_maptype_t    maptype;        /* type of VM mapping */
+       vm_prot_t       protection;     /* protection code */
+       vm_prot_t       max_protection; /* maximum protection */
+       vm_inherit_t    inheritance;    /* inheritance */
+       int             wired_count;    /* can be paged if = 0 */
+       vm_subsys_t     id;             /* subsystem id */
 };
 
 typedef struct vm_map_entry *vm_map_entry_t;
@@ -328,24 +324,22 @@ typedef struct vm_map_freehint vm_map_freehint_t;
 RB_HEAD(vm_map_rb_tree, vm_map_entry);
 
 struct vm_map {
-       struct lock lock;               /* Lock for map data */
+       struct          lock lock;      /* Lock for map data */
        struct vm_map_rb_tree rb_root;  /* Organize map entries */
-       vm_offset_t min_addr;           /* min address */
-       vm_offset_t max_addr;           /* max address */
-       int nentries;                   /* Number of entries */
-       unsigned int timestamp;         /* Version number */
-       vm_size_t size;                 /* virtual size */
-       u_char system_map;              /* Am I a system map? */
-       u_char freehint_newindex;
-       u_char unused02;
-       u_char unused03;
-       vm_flags_t flags;               /* flags for this vm_map */
+       vm_offset_t     min_addr;       /* min address */
+       vm_offset_t     max_addr;       /* max address */
+       int             nentries;       /* Number of entries */
+       unsigned int    timestamp;      /* Version number */
+       vm_size_t       size;           /* virtual size */
+       u_char          system_map;     /* Am I a system map? */
+       u_char          freehint_newindex;
+       u_char          unused02;
+       u_char          unused03;
+       vm_flags_t      flags;          /* flags for this vm_map */
        vm_map_freehint_t freehint[VM_MAP_FFCOUNT];
-       struct pmap *pmap;              /* Physical map */
-       u_int president_cache;          /* Remember president count */
-       u_int president_ticks;          /* Save ticks for cache */
+       struct pmap     *pmap;          /* Physical map */
        struct vm_map_ilock *ilock_base;/* interlocks */
-       struct spinlock ilock_spin;     /* interlocks (spinlock for) */
+       struct spinlock ilock_spin;     /* interlocks (spinlock for) */
        struct lwkt_token token;        /* Soft serializer */
        vm_offset_t pgout_offset;       /* for RLIMIT_RSS scans */
 };
@@ -545,64 +539,6 @@ vmspace_resident_count(struct vmspace *vmspace)
        return pmap_resident_count(vmspace_pmap(vmspace));
 }
 
-/*
- * Calculates the proportional RSS and returning the
- * accrued result.  This is a loose value for statistics/display
- * purposes only and will only be updated if we can acquire
- * a non-blocking map lock.
- *
- * (used by userland or the kernel)
- */
-static __inline u_int
-vmspace_president_count(struct vmspace *vmspace)
-{
-       vm_map_t map = &vmspace->vm_map;
-       vm_map_entry_t cur;
-       vm_object_t object;
-       u_int count = 0;
-
-#ifdef _KERNEL
-       if (map->president_ticks == ticks / hz || vm_map_lock_read_try(map))
-               return(map->president_cache);
-#endif
-
-       RB_FOREACH(cur, vm_map_rb_tree, &map->rb_root) {
-               switch(cur->maptype) {
-               case VM_MAPTYPE_NORMAL:
-               case VM_MAPTYPE_VPAGETABLE:
-                       if ((object = cur->ba.object) == NULL)
-                               break;
-                       if (object->type != OBJT_DEFAULT &&
-                           object->type != OBJT_SWAP) {
-                               break;
-                       }
-
-#if 0
-                       /*
-                        * synchronize non-zero case, contents of field
-                        * can change at any time due to pmap ops.
-                        */
-                       if ((n = object->agg_pv_list_count) != 0) {
-#ifdef _KERNEL
-                               cpu_ccfence();
-#endif
-                               count += object->resident_page_count / n;
-                       }
-#endif
-                       break;
-               default:
-                       break;
-               }
-       }
-#ifdef _KERNEL
-       map->president_cache = count;
-       map->president_ticks = ticks / hz;
-       vm_map_unlock_read(map);
-#endif
-
-       return(count);
-}
-
 /*
  * Number of kernel maps and entries to statically allocate, required
  * during boot to bootstrap the VM system.
index b3bd831..624ec96 100644 (file)
@@ -113,6 +113,7 @@ int rb_swblock_compare(struct swblock *, struct swblock *);
 
 RB_PROTOTYPE2(swblock_rb_tree, swblock, swb_entry, rb_swblock_compare,
              vm_pindex_t);
+RB_HEAD(swblock_rb_tree, swblock);
 
 enum obj_type { 
        OBJT_DEFAULT,
@@ -129,27 +130,25 @@ typedef u_char objtype_t;
 /*
  * A VM object which represents an arbitrarily sized data store.
  *
- * LOCKING:
- *     vmobj_tokens[n] for object_list, hashed by address.
- *
- *     vm_object_hold/drop() for most vm_object related operations
- *     to avoid ref confusion in the deallocator.
+ * vm_objects are soft-locked with their token, meaning that any
+ * blocking can allow other threads to squeeze in some work.
  */
 struct vm_object {
-       TAILQ_ENTRY(vm_object) object_list; /* locked by vmobj_tokens[n] */
-       RB_HEAD(vm_page_rb_tree, vm_page) rb_memq;      /* resident pages */
-       int generation;                 /* generation ID */
-       vm_pindex_t size;               /* Object size */
-       int ref_count;
-       vm_memattr_t memattr;           /* default memory attribute for pages */
-       objtype_t type;                 /* type of pager */
-       u_short flags;                  /* see below */
-       u_short pg_color;               /* color of first page in obj */
-       u_int paging_in_progress;       /* Paging (in or out) so don't collapse or destroy */
-       long resident_page_count;       /* number of resident pages */
-       TAILQ_ENTRY(vm_object) pager_object_list; /* list of all objects of this pager type */
-       void *handle;                   /* control handle: vp, etc */
-       int hold_count;                 /* count prevents destruction */
+       struct lwkt_token token;
+       TAILQ_ENTRY(vm_object) object_list;
+       struct vm_page_rb_tree rb_memq; /* resident pages */
+       int             generation;     /* generation ID */
+       vm_pindex_t     size;           /* Object size */
+       int             ref_count;
+       vm_memattr_t    memattr;        /* default memory attribute for pages */
+       objtype_t       type;           /* type of pager */
+       u_short         flags;          /* see below */
+       u_short         pg_color;       /* color of first page in obj */
+       u_int           paging_in_progress;     /* Activity in progress */
+       long            resident_page_count;    /* number of resident pages */
+       TAILQ_ENTRY(vm_object) pager_object_list; /* optional use by pager */
+       void            *handle;        /* control handle: vp, etc */
+       int             hold_count;     /* count prevents destruction */
        
 #if defined(DEBUG_LOCKS)
        /* 
@@ -157,10 +156,10 @@ struct vm_object {
         */
 
 #define VMOBJ_DEBUG_ARRAY_SIZE         (32)
-       char debug_hold_thrs[VMOBJ_DEBUG_ARRAY_SIZE][64];
-       const char *debug_hold_file[VMOBJ_DEBUG_ARRAY_SIZE];
-       int debug_hold_line[VMOBJ_DEBUG_ARRAY_SIZE];
-       int     debug_index;
+       char            debug_hold_thrs[VMOBJ_DEBUG_ARRAY_SIZE][64];
+       const char      *debug_hold_file[VMOBJ_DEBUG_ARRAY_SIZE];
+       int             debug_hold_line[VMOBJ_DEBUG_ARRAY_SIZE];
+       int             debug_index;
 #endif
 
        union {
@@ -181,10 +180,9 @@ struct vm_object {
         * store.  For vnodes the swap backing store acts as a fast
         * data cache but the vnode contains the official data.
         */
-       RB_HEAD(swblock_rb_tree, swblock) swblock_root;
-       long    swblock_count;
-       struct  lwkt_token      token;
-       struct md_object        md;     /* machine specific (typ pmap) */
+       struct swblock_rb_tree swblock_root;
+       long            swblock_count;
+       struct md_object md;            /* machine specific (typ pmap) */
 };
 
 /*
index 791c8b6..a82cc96 100644 (file)
@@ -137,6 +137,7 @@ int rb_vm_page_compare(struct vm_page *, struct vm_page *);
 struct vm_page_rb_tree;
 RB_PROTOTYPE2(vm_page_rb_tree, vm_page, rb_entry,
              rb_vm_page_compare, vm_pindex_t);
+RB_HEAD(vm_page_rb_tree, vm_page);
 
 struct vm_page {
        TAILQ_ENTRY(vm_page) pageq;     /* vm_page_queues[] list (P)    */
index 3bd6a60..8cff802 100644 (file)
@@ -1960,9 +1960,6 @@ vm_pageout_free_page_calc(vm_size_t count)
        if (count < vmstats.v_page_count)
                 return 0;
        /*
-        * free_reserved needs to include enough for the largest swap pager
-        * structures plus enough for any pv_entry structs when paging.
-        *
         * v_free_min           normal allocations
         * v_free_reserved      system allocations
         * v_pageout_free_min   allocations by pageout daemon