From 7b00fbb4c0ce96d5b7130b22da7d0f9b06084e06 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Thu, 24 Oct 2013 17:01:28 -0700 Subject: [PATCH] kernel - Replace global vmobj_token with vmobj_tokens[] array * Remove one of the two remaining major bottlenecks in the system, the global vmobj_token which is used to manage access to the vm_object_list. All VM object creation and deletion would get thrown into this list. * Replace it with an array of 64 tokens and an array of 64 lists. vmobj_token[] and vm_object_lists[]. Use a simple right-shift hash code to index the array. * This reduces contention by a factor of 64 or so which makes a big difference on multi-chip cpu systems. It won't be as noticable on single-chip (e.g. 4-core/8-thread) systems. * Rip-out some of the linux vmstats compat functions which were iterating the object list and replace with the pcpu accumulator scan that was recently implemented for dragonfly vmstats. * TODO: proc_token. --- .../linux/i386/linprocfs/linprocfs_misc.c | 15 +- sys/emulation/linux/linux_misc.c | 14 +- sys/gnu/vfs/ext2fs/ext2_vfsops.c | 6 +- sys/kern/lwkt_token.c | 2 - sys/kern/vfs_mount.c | 13 +- sys/kern/vfs_subr.c | 25 +-- sys/sys/thread.h | 1 - sys/vm/swap_pager.c | 59 ++--- sys/vm/vm_object.c | 211 ++++++++++-------- sys/vm/vm_object.h | 13 +- sys/vm/vm_swapcache.c | 79 ++++--- 11 files changed, 223 insertions(+), 215 deletions(-) diff --git a/sys/emulation/linux/i386/linprocfs/linprocfs_misc.c b/sys/emulation/linux/i386/linprocfs/linprocfs_misc.c index da7ba8a95d..8c0b6be112 100644 --- a/sys/emulation/linux/i386/linprocfs/linprocfs_misc.c +++ b/sys/emulation/linux/i386/linprocfs/linprocfs_misc.c @@ -100,7 +100,7 @@ linprocfs_domeminfo(struct proc *curp, struct proc *p, struct pfsnode *pfs, unsigned long long swaptotal; /* total swap space in bytes */ unsigned long long swapused; /* used swap space in bytes */ unsigned long long swapfree; /* free swap space in bytes */ - vm_object_t object; + int n; if (uio->uio_rw != UIO_READ) return (EOPNOTSUPP); @@ -128,16 +128,13 @@ linprocfs_domeminfo(struct proc *curp, struct proc *p, struct pfsnode *pfs, swapused = swaptotal - swapfree; memshared = 0; - lwkt_gettoken(&vmobj_token); - for (object = TAILQ_FIRST(&vm_object_list); object != NULL; - object = TAILQ_NEXT(object, object_list)) { - if (object->type == OBJT_MARKER) - continue; - if (object->shadow_count > 1) - memshared += object->resident_page_count; + for (n = 0; n < ncpus; ++n) { + globaldata_t gd = globaldata_find(n); + + memshared += gd->gd_vmtotal.t_arm; } - lwkt_reltoken(&vmobj_token); memshared *= PAGE_SIZE; + /* * We'd love to be able to write: * diff --git a/sys/emulation/linux/linux_misc.c b/sys/emulation/linux/linux_misc.c index 74dcd069a9..7fcc497091 100644 --- a/sys/emulation/linux/linux_misc.c +++ b/sys/emulation/linux/linux_misc.c @@ -131,6 +131,7 @@ sys_linux_sysinfo(struct linux_sysinfo_args *args) struct timespec ts; int error; int i; + int n; /* Uptime is copied out of print_uptime() in kern_shutdown.c */ getnanouptime(&ts); @@ -159,16 +160,11 @@ sys_linux_sysinfo(struct linux_sysinfo_args *args) sysinfo.freeram = sysinfo.totalram - vmstats.v_wire_count * PAGE_SIZE; sysinfo.sharedram = 0; - lwkt_gettoken(&vmobj_token); - for (object = TAILQ_FIRST(&vm_object_list); object != NULL; - object = TAILQ_NEXT(object, object_list)) { - if (object->type == OBJT_MARKER) - continue; - if (object->shadow_count > 1) - sysinfo.sharedram += object->resident_page_count; - } - lwkt_reltoken(&vmobj_token); + for (n = 0; n < ncpus; ++n) { + globaldata_t gd = globaldata_find(n); + sysinfo.sharedram += gd->gd_vmtotal.t_avmshr; + } sysinfo.sharedram *= PAGE_SIZE; sysinfo.bufferram = 0; diff --git a/sys/gnu/vfs/ext2fs/ext2_vfsops.c b/sys/gnu/vfs/ext2fs/ext2_vfsops.c index d8e1e0ad46..47618b3464 100644 --- a/sys/gnu/vfs/ext2fs/ext2_vfsops.c +++ b/sys/gnu/vfs/ext2fs/ext2_vfsops.c @@ -688,8 +688,8 @@ ext2_reload(struct mount *mountp, struct ucred *cred) scaninfo.fs = fs; while (error == 0 && scaninfo.rescan) { scaninfo.rescan = 0; - error = vmntvnodescan(mountp, VMSC_GETVX, ext2_reload_scan1, - ext2_reload_scan2, &scaninfo); + error = vmntvnodescan(mountp, VMSC_GETVX, + NULL, ext2_reload_scan2, &scaninfo); } return(error); } @@ -1043,7 +1043,7 @@ ext2_sync(struct mount *mp, int waitfor) while (scaninfo.rescan) { scaninfo.rescan = 0; vmntvnodescan(mp, VMSC_GETVP|VMSC_NOWAIT, - NULL, ext2_sync_scan, &scaninfo); + NULL, ext2_sync_scan, &scaninfo); } /* diff --git a/sys/kern/lwkt_token.c b/sys/kern/lwkt_token.c index 76d4985451..907061b8b6 100644 --- a/sys/kern/lwkt_token.c +++ b/sys/kern/lwkt_token.c @@ -136,7 +136,6 @@ struct lwkt_token kvm_token = LWKT_TOKEN_INITIALIZER(kvm_token); struct lwkt_token proc_token = LWKT_TOKEN_INITIALIZER(proc_token); struct lwkt_token tty_token = LWKT_TOKEN_INITIALIZER(tty_token); struct lwkt_token vnode_token = LWKT_TOKEN_INITIALIZER(vnode_token); -struct lwkt_token vmobj_token = LWKT_TOKEN_INITIALIZER(vmobj_token); struct lwkt_token ifnet_token = LWKT_TOKEN_INITIALIZER(ifnet_token); static int lwkt_token_spin = 5; @@ -952,7 +951,6 @@ DB_SHOW_COMMAND(tokens, db_tok_all) &proc_token, &tty_token, &vnode_token, - &vmobj_token, NULL }; diff --git a/sys/kern/vfs_mount.c b/sys/kern/vfs_mount.c index a067875be4..63aae96ab7 100644 --- a/sys/kern/vfs_mount.c +++ b/sys/kern/vfs_mount.c @@ -726,6 +726,9 @@ insmntque(struct vnode *vp, struct mount *mp) * token is held. A return value < 0 skips to the next vnode without calling * the slowfunc(), a return value > 0 terminates the loop. * + * WARNING! The fastfunc() should not indirect through vp->v_object, the vp + * data structure is unstable when called from fastfunc(). + * * The slowfunc() callback is called after the vnode has been successfully * locked based on passed flags. The vnode is skipped if it gets rearranged * or destroyed while blocking on the lock. A non-zero return value from @@ -733,12 +736,6 @@ insmntque(struct vnode *vp, struct mount *mp) * arbitrarily block. The scanning code guarentees consistency of operation * even if the slow function deletes or moves the node, or blocks and some * other thread deletes or moves the node. - * - * NOTE: We hold vmobj_token to prevent a VM object from being destroyed - * out from under the fastfunc()'s vnode test. It will not prevent - * v_object from getting NULL'd out but it will ensure that the - * pointer (if we race) will remain stable. Only needed when - * fastfunc is non-NULL. */ int vmntvnodescan( @@ -756,8 +753,6 @@ vmntvnodescan( int count = 0; lwkt_gettoken(&mp->mnt_token); - if (fastfunc) - lwkt_gettoken(&vmobj_token); /* * If asked to do one pass stop after iterating available vnodes. @@ -883,8 +878,6 @@ next: } TAILQ_REMOVE(&mp->mnt_vnodescan_list, &info, entry); - if (fastfunc) - lwkt_reltoken(&vmobj_token); lwkt_reltoken(&mp->mnt_token); return(r); } diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index b86dbea21c..9adaa59cae 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -139,27 +139,6 @@ rb_buf_compare(struct buf *b1, struct buf *b2) return(0); } -/* - * Returns non-zero if the vnode is a candidate for lazy msyncing. - * - * NOTE: v_object is not stable (this scan can race), however the - * mntvnodescan code holds vmobj_token so any VM object we - * do find will remain stable storage. - */ -static __inline int -vshouldmsync(struct vnode *vp) -{ - vm_object_t object; - - if (vp->v_auxrefs != 0 || VREFCNT(vp) > 0) - return (0); /* other holders */ - object = vp->v_object; - cpu_ccfence(); - if (object && (object->ref_count || object->resident_page_count)) - return(0); - return (1); -} - /* * Initialize the vnode management data structures. * @@ -2223,8 +2202,10 @@ vfs_msync_scan1(struct mount *mp, struct vnode *vp, void *data) int flags = (int)(intptr_t)data; if ((vp->v_flag & VRECLAIMED) == 0) { - if (vshouldmsync(vp)) + if (vp->v_auxrefs == 0 && VREFCNT(vp) <= 0 && + vp->v_object) { return(0); /* call scan2 */ + } if ((mp->mnt_flag & MNT_RDONLY) == 0 && (vp->v_flag & VOBJDIRTY) && (flags == MNT_WAIT || vn_islocked(vp) == 0)) { diff --git a/sys/sys/thread.h b/sys/sys/thread.h index 80a0cb32a4..d6e18f0cfc 100644 --- a/sys/sys/thread.h +++ b/sys/sys/thread.h @@ -427,7 +427,6 @@ extern struct lwkt_token kvm_token; extern struct lwkt_token proc_token; extern struct lwkt_token tty_token; extern struct lwkt_token vnode_token; -extern struct lwkt_token vmobj_token; extern struct lwkt_token ifnet_token; /* diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c index c04b25c618..433a1b3977 100644 --- a/sys/vm/swap_pager.c +++ b/sys/vm/swap_pager.c @@ -1981,43 +1981,48 @@ static int swp_pager_swapoff_callback(struct swblock *swap, void *data); int swap_pager_swapoff(int devidx) { + struct swswapoffinfo info; struct vm_object marker; vm_object_t object; - struct swswapoffinfo info; + int n; bzero(&marker, sizeof(marker)); marker.type = OBJT_MARKER; - lwkt_gettoken(&vmobj_token); - TAILQ_INSERT_HEAD(&vm_object_list, &marker, object_list); - - while ((object = TAILQ_NEXT(&marker, object_list)) != NULL) { - if (object->type == OBJT_MARKER) - goto skip; - if (object->type != OBJT_SWAP && object->type != OBJT_VNODE) - goto skip; - vm_object_hold(object); - if (object->type != OBJT_SWAP && object->type != OBJT_VNODE) { + for (n = 0; n < VMOBJ_HSIZE; ++n) { + lwkt_gettoken(&vmobj_tokens[n]); + TAILQ_INSERT_HEAD(&vm_object_lists[n], &marker, object_list); + + while ((object = TAILQ_NEXT(&marker, object_list)) != NULL) { + if (object->type == OBJT_MARKER) + goto skip; + if (object->type != OBJT_SWAP && + object->type != OBJT_VNODE) + goto skip; + vm_object_hold(object); + if (object->type != OBJT_SWAP && + object->type != OBJT_VNODE) { + vm_object_drop(object); + goto skip; + } + info.object = object; + info.shared = 0; + info.devidx = devidx; + swblock_rb_tree_RB_SCAN(&object->swblock_root, + NULL, swp_pager_swapoff_callback, + &info); vm_object_drop(object); - goto skip; - } - info.object = object; - info.shared = 0; - info.devidx = devidx; - swblock_rb_tree_RB_SCAN(&object->swblock_root, - NULL, - swp_pager_swapoff_callback, - &info); - vm_object_drop(object); skip: - if (object == TAILQ_NEXT(&marker, object_list)) { - TAILQ_REMOVE(&vm_object_list, &marker, object_list); - TAILQ_INSERT_AFTER(&vm_object_list, object, - &marker, object_list); + if (object == TAILQ_NEXT(&marker, object_list)) { + TAILQ_REMOVE(&vm_object_lists[n], + &marker, object_list); + TAILQ_INSERT_AFTER(&vm_object_lists[n], object, + &marker, object_list); + } } + TAILQ_REMOVE(&vm_object_lists[n], &marker, object_list); + lwkt_reltoken(&vmobj_tokens[n]); } - TAILQ_REMOVE(&vm_object_list, &marker, object_list); - lwkt_reltoken(&vmobj_token); /* * If we fail to locate all swblocks we just fail gracefully and diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c index 74588e23a0..9c1b44475f 100644 --- a/sys/vm/vm_object.c +++ b/sys/vm/vm_object.c @@ -128,10 +128,9 @@ static void vm_object_lock_init(vm_object_t); * */ -struct object_q vm_object_list; /* locked by vmobj_token */ struct vm_object kernel_object; -static long vm_object_count; /* locked by vmobj_token */ +static long vm_object_count; static long object_collapses; static long object_bypasses; @@ -141,6 +140,9 @@ static struct vm_zone obj_zone_store; #define VM_OBJECTS_INIT 256 static struct vm_object vm_objects_init[VM_OBJECTS_INIT]; +struct object_q vm_object_lists[VMOBJ_HSIZE]; +struct lwkt_token vmobj_tokens[VMOBJ_HSIZE]; + /* * Misc low level routines */ @@ -408,6 +410,7 @@ void _vm_object_allocate(objtype_t type, vm_pindex_t size, vm_object_t object) { int incr; + int n; RB_INIT(&object->rb_memq); LIST_INIT(&object->shadow_head); @@ -443,10 +446,12 @@ _vm_object_allocate(objtype_t type, vm_pindex_t size, vm_object_t object) pmap_object_init(object); vm_object_hold(object); - lwkt_gettoken(&vmobj_token); - TAILQ_INSERT_TAIL(&vm_object_list, object, object_list); - vm_object_count++; - lwkt_reltoken(&vmobj_token); + + n = VMOBJ_HASH(object); + atomic_add_long(&vm_object_count, 1); + lwkt_gettoken(&vmobj_tokens[n]); + TAILQ_INSERT_TAIL(&vm_object_lists[n], object, object_list); + lwkt_reltoken(&vmobj_tokens[n]); } /* @@ -457,7 +462,12 @@ _vm_object_allocate(objtype_t type, vm_pindex_t size, vm_object_t object) void vm_object_init(void) { - TAILQ_INIT(&vm_object_list); + int i; + + for (i = 0; i < VMOBJ_HSIZE; ++i) { + TAILQ_INIT(&vm_object_lists[i]); + lwkt_token_init(&vmobj_tokens[i], "vmobjlst"); + } _vm_object_allocate(OBJT_DEFAULT, OFF_TO_IDX(KvaEnd), &kernel_object); @@ -1125,6 +1135,8 @@ static int vm_object_terminate_callback(vm_page_t p, void *data); void vm_object_terminate(vm_object_t object) { + int n; + /* * Make sure no one uses us. Once we set OBJ_DEAD we should be * able to safely block. @@ -1218,10 +1230,11 @@ vm_object_terminate(vm_object_t object) /* * Remove the object from the global object list. */ - lwkt_gettoken(&vmobj_token); - TAILQ_REMOVE(&vm_object_list, object, object_list); - vm_object_count--; - lwkt_reltoken(&vmobj_token); + n = VMOBJ_HASH(object); + lwkt_gettoken(&vmobj_tokens[n]); + TAILQ_REMOVE(&vm_object_lists[n], object, object_list); + lwkt_reltoken(&vmobj_tokens[n]); + atomic_add_long(&vm_object_count, -1); if (object->ref_count != 0) { panic("vm_object_terminate2: object with references, " @@ -1900,6 +1913,7 @@ static __inline int vm_object_backing_scan(vm_object_t object, vm_object_t backing_object, int op) { struct rb_vm_page_scan_info info; + int n; vm_object_assert_held(object); vm_object_assert_held(backing_object); @@ -1926,10 +1940,12 @@ vm_object_backing_scan(vm_object_t object, vm_object_t backing_object, int op) if (op & OBSC_COLLAPSE_WAIT) { KKASSERT((backing_object->flags & OBJ_DEAD) == 0); vm_object_set_flag(backing_object, OBJ_DEAD); - lwkt_gettoken(&vmobj_token); - TAILQ_REMOVE(&vm_object_list, backing_object, object_list); - vm_object_count--; - lwkt_reltoken(&vmobj_token); + + n = VMOBJ_HASH(backing_object); + lwkt_gettoken(&vmobj_tokens[n]); + TAILQ_REMOVE(&vm_object_lists[n], backing_object, object_list); + lwkt_reltoken(&vmobj_tokens[n]); + atomic_add_long(&vm_object_count, -1); } /* @@ -2906,30 +2922,36 @@ vm_object_in_map_callback(struct proc *p, void *data) DB_SHOW_COMMAND(vmochk, vm_object_check) { vm_object_t object; + int n; /* * make sure that internal objs are in a map somewhere * and none have zero ref counts. */ - for (object = TAILQ_FIRST(&vm_object_list); - object != NULL; - object = TAILQ_NEXT(object, object_list)) { - if (object->type == OBJT_MARKER) - continue; - if (object->handle == NULL && - (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) { - if (object->ref_count == 0) { - db_printf("vmochk: internal obj has zero ref count: %ld\n", - (long)object->size); + for (n = 0; n < VMOBJ_HSIZE; ++n) { + for (object = TAILQ_FIRST(&vm_object_lists[n]); + object != NULL; + object = TAILQ_NEXT(object, object_list)) { + if (object->type == OBJT_MARKER) + continue; + if (object->handle != NULL || + (object->type != OBJT_DEFAULT && + object->type != OBJT_SWAP)) { + continue; } - if (!vm_object_in_map(object)) { - db_printf( - "vmochk: internal obj is not in a map: " - "ref: %d, size: %lu: 0x%lx, backing_object: %p\n", - object->ref_count, (u_long)object->size, - (u_long)object->size, - (void *)object->backing_object); + if (object->ref_count == 0) { + db_printf("vmochk: internal obj has " + "zero ref count: %ld\n", + (long)object->size); } + if (vm_object_in_map(object)) + continue; + db_printf("vmochk: internal obj is not in a map: " + "ref: %d, size: %lu: 0x%lx, " + "backing_object: %p\n", + object->ref_count, (u_long)object->size, + (u_long)object->size, + (void *)object->backing_object); } } } @@ -3014,36 +3036,68 @@ DB_SHOW_COMMAND(vmopag, vm_object_print_pages) vm_object_t object; int nl = 0; int c; - for (object = TAILQ_FIRST(&vm_object_list); - object != NULL; - object = TAILQ_NEXT(object, object_list)) { - vm_pindex_t idx, fidx; - vm_pindex_t osize; - vm_paddr_t pa = -1, padiff; - int rcount; - vm_page_t m; - - if (object->type == OBJT_MARKER) - continue; - db_printf("new object: %p\n", (void *)object); - if ( nl > 18) { - c = cngetc(); - if (c != ' ') - return; - nl = 0; - } - nl++; - rcount = 0; - fidx = 0; - osize = object->size; - if (osize > 128) - osize = 128; - for (idx = 0; idx < osize; idx++) { - m = vm_page_lookup(object, idx); - if (m == NULL) { + int n; + + for (n = 0; n < VMOBJ_HSIZE; ++n) { + for (object = TAILQ_FIRST(&vm_object_lists[n]); + object != NULL; + object = TAILQ_NEXT(object, object_list)) { + vm_pindex_t idx, fidx; + vm_pindex_t osize; + vm_paddr_t pa = -1, padiff; + int rcount; + vm_page_t m; + + if (object->type == OBJT_MARKER) + continue; + db_printf("new object: %p\n", (void *)object); + if ( nl > 18) { + c = cngetc(); + if (c != ' ') + return; + nl = 0; + } + nl++; + rcount = 0; + fidx = 0; + osize = object->size; + if (osize > 128) + osize = 128; + for (idx = 0; idx < osize; idx++) { + m = vm_page_lookup(object, idx); + if (m == NULL) { + if (rcount) { + db_printf(" index(%ld)run(%d)pa(0x%lx)\n", + (long)fidx, rcount, (long)pa); + if ( nl > 18) { + c = cngetc(); + if (c != ' ') + return; + nl = 0; + } + nl++; + rcount = 0; + } + continue; + } + + if (rcount && + (VM_PAGE_TO_PHYS(m) == pa + rcount * PAGE_SIZE)) { + ++rcount; + continue; + } if (rcount) { - db_printf(" index(%ld)run(%d)pa(0x%lx)\n", + padiff = pa + rcount * PAGE_SIZE - VM_PAGE_TO_PHYS(m); + padiff >>= PAGE_SHIFT; + padiff &= PQ_L2_MASK; + if (padiff == 0) { + pa = VM_PAGE_TO_PHYS(m) - rcount * PAGE_SIZE; + ++rcount; + continue; + } + db_printf(" index(%ld)run(%d)pa(0x%lx)", (long)fidx, rcount, (long)pa); + db_printf("pd(%ld)\n", (long)padiff); if ( nl > 18) { c = cngetc(); if (c != ' ') @@ -3051,29 +3105,14 @@ DB_SHOW_COMMAND(vmopag, vm_object_print_pages) nl = 0; } nl++; - rcount = 0; } - continue; - } - - - if (rcount && - (VM_PAGE_TO_PHYS(m) == pa + rcount * PAGE_SIZE)) { - ++rcount; - continue; + fidx = idx; + pa = VM_PAGE_TO_PHYS(m); + rcount = 1; } if (rcount) { - padiff = pa + rcount * PAGE_SIZE - VM_PAGE_TO_PHYS(m); - padiff >>= PAGE_SHIFT; - padiff &= PQ_L2_MASK; - if (padiff == 0) { - pa = VM_PAGE_TO_PHYS(m) - rcount * PAGE_SIZE; - ++rcount; - continue; - } - db_printf(" index(%ld)run(%d)pa(0x%lx)", + db_printf(" index(%ld)run(%d)pa(0x%lx)\n", (long)fidx, rcount, (long)pa); - db_printf("pd(%ld)\n", (long)padiff); if ( nl > 18) { c = cngetc(); if (c != ' ') @@ -3082,20 +3121,6 @@ DB_SHOW_COMMAND(vmopag, vm_object_print_pages) } nl++; } - fidx = idx; - pa = VM_PAGE_TO_PHYS(m); - rcount = 1; - } - if (rcount) { - db_printf(" index(%ld)run(%d)pa(0x%lx)\n", - (long)fidx, rcount, (long)pa); - if ( nl > 18) { - c = cngetc(); - if (c != ' ') - return; - nl = 0; - } - nl++; } } } diff --git a/sys/vm/vm_object.h b/sys/vm/vm_object.h index 2a0e22743f..1d15720836 100644 --- a/sys/vm/vm_object.h +++ b/sys/vm/vm_object.h @@ -135,12 +135,13 @@ typedef u_char objtype_t; * them. * * LOCKING: - * vmobj_token for object_list. + * vmobj_tokens[n] for object_list, hashed by address. + * * vm_object_hold/drop() for most vm_object related operations. * OBJ_CHAINLOCK to avoid chain/shadow object collisions. */ struct vm_object { - TAILQ_ENTRY(vm_object) object_list; /* vmobj_token */ + TAILQ_ENTRY(vm_object) object_list; /* locked by vmobj_tokens[n] */ LIST_HEAD(, vm_object) shadow_head; /* objects we are a shadow for */ LIST_ENTRY(vm_object) shadow_list; /* chain of shadow objects */ RB_HEAD(vm_page_rb_tree, vm_page) rb_memq; /* resident pages */ @@ -244,7 +245,13 @@ struct vm_object_dealloc_list { TAILQ_HEAD(object_q, vm_object); -extern struct object_q vm_object_list; /* list of allocated objects */ +#define VMOBJ_HSIZE 64 +#define VMOBJ_HMASK (VMOBJ_HSIZE - 1) +#define VMOBJ_HASH(obj) (((intptr_t)(obj) >> 8) & VMOBJ_HMASK) + +extern struct object_q vm_object_lists[VMOBJ_HSIZE]; +extern struct lwkt_token vmobj_tokens[VMOBJ_HSIZE]; + /* lock for object list and count */ diff --git a/sys/vm/vm_swapcache.c b/sys/vm/vm_swapcache.c index c88d8e82f6..138ecaba89 100644 --- a/sys/vm/vm_swapcache.c +++ b/sys/vm/vm_swapcache.c @@ -84,8 +84,9 @@ static int vm_swapcached_flush (vm_page_t m, int isblkdev); static int vm_swapcache_test(vm_page_t m); static int vm_swapcache_writing_heuristic(void); static int vm_swapcache_writing(vm_page_t marker, int count, int scount); -static void vm_swapcache_cleaning(vm_object_t marker); -static void vm_swapcache_movemarker(vm_object_t marker, vm_object_t object); +static void vm_swapcache_cleaning(vm_object_t marker, int *swindexp); +static void vm_swapcache_movemarker(vm_object_t marker, int swindex, + vm_object_t object); struct thread *swapcached_thread; SYSCTL_NODE(_vm, OID_AUTO, swapcache, CTLFLAG_RW, NULL, NULL); @@ -171,7 +172,8 @@ vm_swapcached_thread(void) enum { SWAPC_WRITING, SWAPC_CLEANING } state = SWAPC_WRITING; enum { SWAPB_BURSTING, SWAPB_RECOVERING } burst = SWAPB_BURSTING; static struct vm_page page_marker[PQ_L2_SIZE]; - static struct vm_object object_marker; + static struct vm_object swmarker; + static int swindex; int q; /* @@ -206,11 +208,13 @@ vm_swapcached_thread(void) /* * Initialize our marker for the vm_object scan (SWAPC_CLEANING) */ - bzero(&object_marker, sizeof(object_marker)); - object_marker.type = OBJT_MARKER; - lwkt_gettoken(&vmobj_token); - TAILQ_INSERT_HEAD(&vm_object_list, &object_marker, object_list); - lwkt_reltoken(&vmobj_token); + bzero(&swmarker, sizeof(swmarker)); + swmarker.type = OBJT_MARKER; + swindex = 0; + lwkt_gettoken(&vmobj_tokens[swindex]); + TAILQ_INSERT_HEAD(&vm_object_lists[swindex], + &swmarker, object_list); + lwkt_reltoken(&vmobj_tokens[swindex]); for (;;) { int reached_end; @@ -268,7 +272,7 @@ vm_swapcached_thread(void) * is one-seconds worth of accumulation. */ if (state != SWAPC_WRITING) { - vm_swapcache_cleaning(&object_marker); + vm_swapcache_cleaning(&swmarker, &swindex); continue; } if (vm_swapcache_curburst < vm_swapcache_accrate) @@ -319,9 +323,9 @@ vm_swapcached_thread(void) vm_page_queues_spin_unlock(PQ_INACTIVE + q); } - lwkt_gettoken(&vmobj_token); - TAILQ_REMOVE(&vm_object_list, &object_marker, object_list); - lwkt_reltoken(&vmobj_token); + lwkt_gettoken(&vmobj_tokens[swindex]); + TAILQ_REMOVE(&vm_object_lists[swindex], &swmarker, object_list); + lwkt_reltoken(&vmobj_tokens[swindex]); } static struct kproc_desc swpc_kp = { @@ -673,7 +677,7 @@ vm_swapcache_test(vm_page_t m) */ static void -vm_swapcache_cleaning(vm_object_t marker) +vm_swapcache_cleaning(vm_object_t marker, int *swindexp) { vm_object_t object; struct vnode *vp; @@ -684,10 +688,11 @@ vm_swapcache_cleaning(vm_object_t marker) count = vm_swapcache_maxlaunder; scount = vm_swapcache_maxscan; +outerloop: /* * Look for vnode objects */ - lwkt_gettoken(&vmobj_token); + lwkt_gettoken(&vmobj_tokens[*swindexp]); while ((object = TAILQ_NEXT(marker, object_list)) != NULL) { /* @@ -695,7 +700,7 @@ vm_swapcache_cleaning(vm_object_t marker) * objects! */ if (object->type == OBJT_MARKER) { - vm_swapcache_movemarker(marker, object); + vm_swapcache_movemarker(marker, *swindexp, object); continue; } @@ -704,7 +709,7 @@ vm_swapcache_cleaning(vm_object_t marker) * without swapcache backing. */ if (--scount <= 0) - break; + goto breakout; /* * We must hold the object before potentially yielding. @@ -723,7 +728,7 @@ vm_swapcache_cleaning(vm_object_t marker) (vp->v_type != VREG && vp->v_type != VCHR)) { vm_object_drop(object); /* object may be invalid now */ - vm_swapcache_movemarker(marker, object); + vm_swapcache_movemarker(marker, *swindexp, object); continue; } @@ -755,13 +760,13 @@ vm_swapcache_cleaning(vm_object_t marker) * tree leafs. */ lwkt_token_swap(); - lwkt_reltoken(&vmobj_token); + lwkt_reltoken(&vmobj_tokens[*swindexp]); n = swap_pager_condfree(object, &marker->size, (count + SWAP_META_MASK) & ~SWAP_META_MASK); vm_object_drop(object); /* object may be invalid now */ - lwkt_gettoken(&vmobj_token); + lwkt_gettoken(&vmobj_tokens[*swindexp]); /* * If we have exhausted the object or deleted our per-pass @@ -770,7 +775,7 @@ vm_swapcache_cleaning(vm_object_t marker) */ if (n <= 0 || marker->backing_object_offset > vm_swapcache_cleanperobj) { - vm_swapcache_movemarker(marker, object); + vm_swapcache_movemarker(marker, *swindexp, object); } /* @@ -779,17 +784,24 @@ vm_swapcache_cleaning(vm_object_t marker) count -= n; marker->backing_object_offset += n * PAGE_SIZE; if (count < 0) - break; + goto breakout; } /* - * If we wound up at the end of the list this will move the - * marker back to the beginning. + * Iterate vm_object_lists[] hash table */ - if (object == NULL) - vm_swapcache_movemarker(marker, NULL); - - lwkt_reltoken(&vmobj_token); + TAILQ_REMOVE(&vm_object_lists[*swindexp], marker, object_list); + lwkt_reltoken(&vmobj_tokens[*swindexp]); + if (++*swindexp >= VMOBJ_HSIZE) + *swindexp = 0; + lwkt_gettoken(&vmobj_tokens[*swindexp]); + TAILQ_INSERT_HEAD(&vm_object_lists[*swindexp], marker, object_list); + + if (*swindexp != 0) + goto outerloop; + +breakout: + lwkt_reltoken(&vmobj_tokens[*swindexp]); } /* @@ -799,16 +811,11 @@ vm_swapcache_cleaning(vm_object_t marker) * the marker past it. */ static void -vm_swapcache_movemarker(vm_object_t marker, vm_object_t object) +vm_swapcache_movemarker(vm_object_t marker, int swindex, vm_object_t object) { if (TAILQ_NEXT(marker, object_list) == object) { - TAILQ_REMOVE(&vm_object_list, marker, object_list); - if (object) { - TAILQ_INSERT_AFTER(&vm_object_list, object, - marker, object_list); - } else { - TAILQ_INSERT_HEAD(&vm_object_list, - marker, object_list); - } + TAILQ_REMOVE(&vm_object_lists[swindex], marker, object_list); + TAILQ_INSERT_AFTER(&vm_object_lists[swindex], object, + marker, object_list); } } -- 2.41.0