From 8492a2fe35953d0750be8475aaee5ef21485860c Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Fri, 10 May 2019 11:37:00 -0700 Subject: [PATCH] kernel - VM rework part 5 - Cleanup * Cleanup vm_map_entry_shadow() * Remove (unused) vmspace_president_count() Remove (barely used) struct lwkt_token typedef. * Cleanup the vm_map_aux, vm_map_entry, vm_map, and vm_object structures * Adjfustments to in-code documentation --- sys/kern/kern_kinfo.c | 4 -- sys/kern/sysv_shm.c | 8 +-- sys/sys/mpipe.h | 2 +- sys/sys/thread.h | 4 +- sys/vfs/nfs/nfs_node.c | 9 +-- sys/vm/vm_map.c | 88 ++++++++++++----------------- sys/vm/vm_map.h | 124 ++++++++++------------------------------- sys/vm/vm_object.h | 52 +++++++++-------- sys/vm/vm_page.h | 1 + sys/vm/vm_pageout.c | 3 - 10 files changed, 103 insertions(+), 192 deletions(-) diff --git a/sys/kern/kern_kinfo.c b/sys/kern/kern_kinfo.c index 047cf13110..c81269d6e3 100644 --- a/sys/kern/kern_kinfo.c +++ b/sys/kern/kern_kinfo.c @@ -149,10 +149,6 @@ fill_kinfo_proc(struct proc *p, struct kinfo_proc *kp) if ((vm = p->p_vmspace) != NULL) { kp->kp_vm_map_size = vm->vm_map.size; kp->kp_vm_rssize = vmspace_resident_count(vm); -#ifdef _KERNEL - /*XXX MP RACES */ - /*kp->kp_vm_prssize = vmspace_president_count(vm);*/ -#endif kp->kp_vm_swrss = vm->vm_swrss; kp->kp_vm_tsize = btoc(vm->vm_tsize); kp->kp_vm_dsize = btoc(vm->vm_dsize); diff --git a/sys/kern/sysv_shm.c b/sys/kern/sysv_shm.c index 9c01b77b14..6fe91a82fb 100644 --- a/sys/kern/sysv_shm.c +++ b/sys/kern/sysv_shm.c @@ -114,11 +114,9 @@ struct shminfo shminfo = { * or seg-faults unexpectedly. * * use-phys Shared memory segments are to use physical memory by - * default, which allows the kernel to optimize (remove) - * pv_entry management structures for the related PTEs and - * prevents paging. This has distinctly different and - * usually desireable characteristics verses mmap()ing - * anonymous memory. + * default, which may allow the kernel to better-optimize + * the pmap and reduce overhead. The pages are effectively + * wired. */ static int shm_allow_removed = 1; static int shm_use_phys = 1; diff --git a/sys/sys/mpipe.h b/sys/sys/mpipe.h index 5424f3ac14..32168d3d51 100644 --- a/sys/sys/mpipe.h +++ b/sys/sys/mpipe.h @@ -90,7 +90,7 @@ struct malloc_pipe { int total_count; /* total outstanding allocations incl free */ int ary_count; /* guarenteed allocation count */ int max_count; /* maximum count (M_NOWAIT used beyond nom) */ - lwkt_token token; + struct lwkt_token token; void **array; /* array[ary_count] */ void (*construct)(void *buf, void *priv); void (*deconstruct)(void *buf, void *priv); diff --git a/sys/sys/thread.h b/sys/sys/thread.h index ec33f5a7b7..e1eddda269 100644 --- a/sys/sys/thread.h +++ b/sys/sys/thread.h @@ -121,12 +121,12 @@ struct intrframe; * reduces the complexity of the token release code. */ -typedef struct lwkt_token { +struct lwkt_token { long t_count; /* Shared/exclreq/exclusive access */ struct lwkt_tokref *t_ref; /* Exclusive ref */ long t_collisions; /* Collision counter */ const char *t_desc; /* Descriptive name */ -} lwkt_token; +}; #define TOK_EXCLUSIVE 0x00000001 /* Exclusive lock held */ #define TOK_EXCLREQ 0x00000002 /* Exclusive request pending */ diff --git a/sys/vfs/nfs/nfs_node.c b/sys/vfs/nfs/nfs_node.c index e86a78f7af..b714faf6b6 100644 --- a/sys/vfs/nfs/nfs_node.c +++ b/sys/vfs/nfs/nfs_node.c @@ -53,11 +53,12 @@ static MALLOC_DEFINE(M_NFSNODE, "NFS node", "NFS node"); -static struct objcache *nfsnode_objcache; -static LIST_HEAD(nfsnodehashhead, nfsnode) *nfsnodehashtbl; -static u_long nfsnodehash; -static lwkt_token nfsnhash_token = LWKT_TOKEN_INITIALIZER(nfsnhash_token); +static struct lwkt_token nfsnhash_token = + LWKT_TOKEN_INITIALIZER(nfsnhash_token); static struct lock nfsnhash_lock; +__read_mostly static struct objcache *nfsnode_objcache; +__read_mostly static LIST_HEAD(nfsnodehashhead, nfsnode) *nfsnodehashtbl; +__read_mostly static u_long nfsnodehash; #define TRUE 1 #define FALSE 0 diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index 61740917e3..d71556d17c 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -740,17 +740,16 @@ vm_map_entry_shadow(vm_map_entry_t entry, int addref) length = atop(entry->end - entry->start); ba = kmalloc(sizeof(*ba), M_MAP_BACKING, M_INTWAIT); /* copied later */ - /* - * The ref on source is inherited when we move it into the ba. - */ - source = entry->ba.object; - /* * Don't create the new object if the old object isn't shared. * + * The ref on source is inherited when we move it into the ba. * If addref is non-zero additional ref(s) are being added (probably * for map entry fork purposes), so clear OBJ_ONEMAPPING. * + * Caller ensures source exists (all backing_ba's must have objects), + * typically indirectly by virtue of the NEEDS_COPY flag being set. + * * WARNING! Checking ref_count == 1 only works because we are testing * the object embedded in the entry (entry->ba.object). * This test DOES NOT WORK if checking an object hanging off @@ -758,32 +757,35 @@ vm_map_entry_shadow(vm_map_entry_t entry, int addref) * vm_map_backing might be shared, or part of a chain that * is shared. Checking ba->refs is worthless. */ - drop_source = 0; - if (source) { - if (source->type != OBJT_VNODE) { - vm_object_hold(source); - if (source->ref_count == 1 && - source->handle == NULL && - (source->type == OBJT_DEFAULT || - source->type == OBJT_SWAP)) { - if (addref) { - vm_object_reference_locked(source); - vm_object_clear_flag(source, - OBJ_ONEMAPPING); - } - vm_object_drop(source); - kfree(ba, M_MAP_BACKING); - goto done; + source = entry->ba.object; + KKASSERT(source); + + if (source->type != OBJT_VNODE) { + vm_object_hold(source); + if (source->ref_count == 1 && + source->handle == NULL && + (source->type == OBJT_DEFAULT || + source->type == OBJT_SWAP)) { + if (addref) { + vm_object_reference_locked(source); + vm_object_clear_flag(source, + OBJ_ONEMAPPING); } - /*vm_object_reference_locked(source);*/ - vm_object_clear_flag(source, OBJ_ONEMAPPING); - drop_source = 1; /* drop source at end */ - } else { - /*vm_object_reference_quick(source);*/ - vm_object_clear_flag(source, OBJ_ONEMAPPING); + vm_object_drop(source); + kfree(ba, M_MAP_BACKING); + goto done; } + drop_source = 1; /* drop source at end */ + } else { + drop_source = 0; } + /* + * Once it becomes part of a backing_ba chain it can wind up anywhere, + * drop the ONEMAPPING flag now. + */ + vm_object_clear_flag(source, OBJ_ONEMAPPING); + /* * Allocate a new object with the given length. The new object * is returned referenced but we may have to add another one. @@ -826,32 +828,16 @@ vm_map_entry_shadow(vm_map_entry_t entry, int addref) entry->ba.offset = 0; entry->ba.refs = 0; - if (source) { -#if 0 - /* shadowing no longer messes with generation count */ - if (drop_source) { - atomic_add_int(&source->generation, 1); - vm_object_set_flag(result, OBJ_ONSHADOW); - } -#endif - /* cpu localization twist */ - result->pg_color = vm_quickcolor(); - } + /* cpu localization twist */ + result->pg_color = vm_quickcolor(); /* * Adjust the return storage. Drop the ref on source before * returning. */ vm_object_drop(result); - if (source) { - if (drop_source) { - /*vm_object_deallocate_locked(source);*/ - vm_object_drop(source); - } else { - /*vm_object_deallocate(source);*/ - } - } - + if (drop_source) + vm_object_drop(source); done: entry->eflags &= ~MAP_ENTRY_NEEDS_COPY; } @@ -3510,11 +3496,9 @@ vm_map_copy_entry(vm_map_t src_map, vm_map_t dst_map, } /* - * vmspace_fork: - * Create a new process vmspace structure and vm_map - * based on those of an existing process. The new map - * is based on the old map, according to the inheritance - * values on the regions in that map. + * Create a vmspace for a new process and its related vm_map based on an + * existing vmspace. The new map inherits information from the old map + * according to inheritance settings. * * The source map must not be locked. * No requirements. diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h index 898dbb26db..a860c6f401 100644 --- a/sys/vm/vm_map.h +++ b/sys/vm/vm_map.h @@ -63,9 +63,9 @@ */ /* - * Virtual memory map module definitions. + * Virtual memory map module definitions. The vm_map houses the pmap + * structure which controls the mmu context for a process. */ - #ifndef _VM_VM_MAP_H_ #define _VM_VM_MAP_H_ @@ -102,16 +102,12 @@ struct vm_map_rb_tree; RB_PROTOTYPE(vm_map_rb_tree, vm_map_entry, rb_entry, rb_vm_map_compare); -/* - * Types defined: - * - * vm_map_t the high-level address map data structure. - * vm_map_entry_t an entry in an address map. - */ - typedef u_int vm_flags_t; typedef u_int vm_eflags_t; +/* + * Aux structure depends on map type and/or flags. + */ union vm_map_aux { vm_offset_t avail_ssize; /* amt can grow if this is a stack */ vpte_t master_pde; /* virtual page table root */ @@ -175,9 +171,9 @@ struct vm_map_backing { */ union { struct vm_object *object; /* vm_object */ - struct vm_map *sub_map; /* belongs to another map */ - int (*uksmap)(struct cdev *dev, vm_page_t fake); - void *map_object; /* generic */ + struct vm_map *sub_map; /* belongs to another map */ + int (*uksmap)(struct cdev *dev, vm_page_t fake); + void *map_object; /* generic */ }; vm_ooffset_t offset; /* cumulative offset */ @@ -208,17 +204,17 @@ typedef struct vm_map_backing *vm_map_backing_t; */ struct vm_map_entry { RB_ENTRY(vm_map_entry) rb_entry; - vm_offset_t start; /* start address */ - vm_offset_t end; /* end address */ + vm_offset_t start; /* start address */ + vm_offset_t end; /* end address */ union vm_map_aux aux; /* auxillary data */ struct vm_map_backing ba; /* backing object chain */ - vm_eflags_t eflags; /* map entry flags */ - vm_maptype_t maptype; /* type of VM mapping */ - vm_prot_t protection; /* protection code */ - vm_prot_t max_protection; /* maximum protection */ - vm_inherit_t inheritance; /* inheritance */ - int wired_count; /* can be paged if = 0 */ - vm_subsys_t id; /* subsystem id */ + vm_eflags_t eflags; /* map entry flags */ + vm_maptype_t maptype; /* type of VM mapping */ + vm_prot_t protection; /* protection code */ + vm_prot_t max_protection; /* maximum protection */ + vm_inherit_t inheritance; /* inheritance */ + int wired_count; /* can be paged if = 0 */ + vm_subsys_t id; /* subsystem id */ }; typedef struct vm_map_entry *vm_map_entry_t; @@ -328,24 +324,22 @@ typedef struct vm_map_freehint vm_map_freehint_t; RB_HEAD(vm_map_rb_tree, vm_map_entry); struct vm_map { - struct lock lock; /* Lock for map data */ + struct lock lock; /* Lock for map data */ struct vm_map_rb_tree rb_root; /* Organize map entries */ - vm_offset_t min_addr; /* min address */ - vm_offset_t max_addr; /* max address */ - int nentries; /* Number of entries */ - unsigned int timestamp; /* Version number */ - vm_size_t size; /* virtual size */ - u_char system_map; /* Am I a system map? */ - u_char freehint_newindex; - u_char unused02; - u_char unused03; - vm_flags_t flags; /* flags for this vm_map */ + vm_offset_t min_addr; /* min address */ + vm_offset_t max_addr; /* max address */ + int nentries; /* Number of entries */ + unsigned int timestamp; /* Version number */ + vm_size_t size; /* virtual size */ + u_char system_map; /* Am I a system map? */ + u_char freehint_newindex; + u_char unused02; + u_char unused03; + vm_flags_t flags; /* flags for this vm_map */ vm_map_freehint_t freehint[VM_MAP_FFCOUNT]; - struct pmap *pmap; /* Physical map */ - u_int president_cache; /* Remember president count */ - u_int president_ticks; /* Save ticks for cache */ + struct pmap *pmap; /* Physical map */ struct vm_map_ilock *ilock_base;/* interlocks */ - struct spinlock ilock_spin; /* interlocks (spinlock for) */ + struct spinlock ilock_spin; /* interlocks (spinlock for) */ struct lwkt_token token; /* Soft serializer */ vm_offset_t pgout_offset; /* for RLIMIT_RSS scans */ }; @@ -545,64 +539,6 @@ vmspace_resident_count(struct vmspace *vmspace) return pmap_resident_count(vmspace_pmap(vmspace)); } -/* - * Calculates the proportional RSS and returning the - * accrued result. This is a loose value for statistics/display - * purposes only and will only be updated if we can acquire - * a non-blocking map lock. - * - * (used by userland or the kernel) - */ -static __inline u_int -vmspace_president_count(struct vmspace *vmspace) -{ - vm_map_t map = &vmspace->vm_map; - vm_map_entry_t cur; - vm_object_t object; - u_int count = 0; - -#ifdef _KERNEL - if (map->president_ticks == ticks / hz || vm_map_lock_read_try(map)) - return(map->president_cache); -#endif - - RB_FOREACH(cur, vm_map_rb_tree, &map->rb_root) { - switch(cur->maptype) { - case VM_MAPTYPE_NORMAL: - case VM_MAPTYPE_VPAGETABLE: - if ((object = cur->ba.object) == NULL) - break; - if (object->type != OBJT_DEFAULT && - object->type != OBJT_SWAP) { - break; - } - -#if 0 - /* - * synchronize non-zero case, contents of field - * can change at any time due to pmap ops. - */ - if ((n = object->agg_pv_list_count) != 0) { -#ifdef _KERNEL - cpu_ccfence(); -#endif - count += object->resident_page_count / n; - } -#endif - break; - default: - break; - } - } -#ifdef _KERNEL - map->president_cache = count; - map->president_ticks = ticks / hz; - vm_map_unlock_read(map); -#endif - - return(count); -} - /* * Number of kernel maps and entries to statically allocate, required * during boot to bootstrap the VM system. diff --git a/sys/vm/vm_object.h b/sys/vm/vm_object.h index b3bd831180..624ec9619a 100644 --- a/sys/vm/vm_object.h +++ b/sys/vm/vm_object.h @@ -113,6 +113,7 @@ int rb_swblock_compare(struct swblock *, struct swblock *); RB_PROTOTYPE2(swblock_rb_tree, swblock, swb_entry, rb_swblock_compare, vm_pindex_t); +RB_HEAD(swblock_rb_tree, swblock); enum obj_type { OBJT_DEFAULT, @@ -129,27 +130,25 @@ typedef u_char objtype_t; /* * A VM object which represents an arbitrarily sized data store. * - * LOCKING: - * vmobj_tokens[n] for object_list, hashed by address. - * - * vm_object_hold/drop() for most vm_object related operations - * to avoid ref confusion in the deallocator. + * vm_objects are soft-locked with their token, meaning that any + * blocking can allow other threads to squeeze in some work. */ struct vm_object { - TAILQ_ENTRY(vm_object) object_list; /* locked by vmobj_tokens[n] */ - RB_HEAD(vm_page_rb_tree, vm_page) rb_memq; /* resident pages */ - int generation; /* generation ID */ - vm_pindex_t size; /* Object size */ - int ref_count; - vm_memattr_t memattr; /* default memory attribute for pages */ - objtype_t type; /* type of pager */ - u_short flags; /* see below */ - u_short pg_color; /* color of first page in obj */ - u_int paging_in_progress; /* Paging (in or out) so don't collapse or destroy */ - long resident_page_count; /* number of resident pages */ - TAILQ_ENTRY(vm_object) pager_object_list; /* list of all objects of this pager type */ - void *handle; /* control handle: vp, etc */ - int hold_count; /* count prevents destruction */ + struct lwkt_token token; + TAILQ_ENTRY(vm_object) object_list; + struct vm_page_rb_tree rb_memq; /* resident pages */ + int generation; /* generation ID */ + vm_pindex_t size; /* Object size */ + int ref_count; + vm_memattr_t memattr; /* default memory attribute for pages */ + objtype_t type; /* type of pager */ + u_short flags; /* see below */ + u_short pg_color; /* color of first page in obj */ + u_int paging_in_progress; /* Activity in progress */ + long resident_page_count; /* number of resident pages */ + TAILQ_ENTRY(vm_object) pager_object_list; /* optional use by pager */ + void *handle; /* control handle: vp, etc */ + int hold_count; /* count prevents destruction */ #if defined(DEBUG_LOCKS) /* @@ -157,10 +156,10 @@ struct vm_object { */ #define VMOBJ_DEBUG_ARRAY_SIZE (32) - char debug_hold_thrs[VMOBJ_DEBUG_ARRAY_SIZE][64]; - const char *debug_hold_file[VMOBJ_DEBUG_ARRAY_SIZE]; - int debug_hold_line[VMOBJ_DEBUG_ARRAY_SIZE]; - int debug_index; + char debug_hold_thrs[VMOBJ_DEBUG_ARRAY_SIZE][64]; + const char *debug_hold_file[VMOBJ_DEBUG_ARRAY_SIZE]; + int debug_hold_line[VMOBJ_DEBUG_ARRAY_SIZE]; + int debug_index; #endif union { @@ -181,10 +180,9 @@ struct vm_object { * store. For vnodes the swap backing store acts as a fast * data cache but the vnode contains the official data. */ - RB_HEAD(swblock_rb_tree, swblock) swblock_root; - long swblock_count; - struct lwkt_token token; - struct md_object md; /* machine specific (typ pmap) */ + struct swblock_rb_tree swblock_root; + long swblock_count; + struct md_object md; /* machine specific (typ pmap) */ }; /* diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h index 791c8b61a9..a82cc963e2 100644 --- a/sys/vm/vm_page.h +++ b/sys/vm/vm_page.h @@ -137,6 +137,7 @@ int rb_vm_page_compare(struct vm_page *, struct vm_page *); struct vm_page_rb_tree; RB_PROTOTYPE2(vm_page_rb_tree, vm_page, rb_entry, rb_vm_page_compare, vm_pindex_t); +RB_HEAD(vm_page_rb_tree, vm_page); struct vm_page { TAILQ_ENTRY(vm_page) pageq; /* vm_page_queues[] list (P) */ diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c index 3bd6a607f2..8cff802040 100644 --- a/sys/vm/vm_pageout.c +++ b/sys/vm/vm_pageout.c @@ -1960,9 +1960,6 @@ vm_pageout_free_page_calc(vm_size_t count) if (count < vmstats.v_page_count) return 0; /* - * free_reserved needs to include enough for the largest swap pager - * structures plus enough for any pv_entry structs when paging. - * * v_free_min normal allocations * v_free_reserved system allocations * v_pageout_free_min allocations by pageout daemon -- 2.41.0