From 2de4f77e2b4ab23695f2ed9e2917345bc821b229 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Sat, 28 Aug 2010 10:26:04 -0700 Subject: [PATCH] kernel - Add vmobj_token, misc vm-related tokenization * Add vmobj_token to protect vm_object_list and vm_object->ref_count and related functions. Note: coalesce and collapse require both vm_token and vmobj_token, and vmspace_fork() requires a bunch of tokens. * Remove miscellanious mplocks and critical sections that are no longer needed. * Correct potential sysinfo kernel data visibilty issue. * Optimize some potentially recursive vm_token/vmobj_token situations by adding *_locked() procedure variants. To avoid blowing out the token stack. * Remove unnecessary get_mplock() calls in vm_zone.c * Bump gd_intr_nesting_level in the IPI processing core and assert that it is zero in the gettoken core. Hard interrupts (vs interrupt threads) are not allowed to acquire tokens for obvious reasons. --- sys/cpu/i386/misc/lwbuf.c | 3 - sys/dev/virtual/net/if_vke.c | 2 +- .../linux/i386/linprocfs/linprocfs_misc.c | 3 + sys/emulation/linux/linux_misc.c | 8 +- sys/kern/kern_slaballoc.c | 8 +- sys/kern/lwkt_ipiq.c | 3 + sys/kern/lwkt_token.c | 3 + sys/kern/vfs_subr.c | 12 +- sys/sys/thread.h | 1 + sys/vm/vm_kern.c | 6 +- sys/vm/vm_map.c | 69 +++++--- sys/vm/vm_meter.c | 9 +- sys/vm/vm_object.c | 158 +++++++++++------- sys/vm/vm_object.h | 12 +- sys/vm/vm_swapcache.c | 8 + sys/vm/vm_zeroidle.c | 1 - sys/vm/vm_zone.c | 2 - sys/vm/vnode_pager.c | 21 ++- 18 files changed, 216 insertions(+), 113 deletions(-) diff --git a/sys/cpu/i386/misc/lwbuf.c b/sys/cpu/i386/misc/lwbuf.c index 580a098ad8..502573061f 100644 --- a/sys/cpu/i386/misc/lwbuf.c +++ b/sys/cpu/i386/misc/lwbuf.c @@ -52,7 +52,6 @@ #include #include #include -#include static void lwbuf_init(void *); SYSINIT(sock_lwb, SI_BOOT2_MACHDEP, SI_ORDER_ANY, lwbuf_init, NULL); @@ -105,9 +104,7 @@ lwbuf_cache_dtor(void *obj, void *pdata) struct lwbuf *lwb = (struct lwbuf *)obj; KKASSERT(lwb->kva != 0); - get_mplock(); pmap_kremove_quick(lwb->kva); - rel_mplock(); kmem_free(&kernel_map, lwb->kva, PAGE_SIZE); lwb->kva = 0; atomic_add_int(&lwbuf_kva_bytes, -PAGE_SIZE); diff --git a/sys/dev/virtual/net/if_vke.c b/sys/dev/virtual/net/if_vke.c index 8d4336d51b..8c1d23d5fa 100644 --- a/sys/dev/virtual/net/if_vke.c +++ b/sys/dev/virtual/net/if_vke.c @@ -323,7 +323,7 @@ vke_init(void *xsc) * * NOTE: We can't make any kernel callbacks while holding cothread lock * because the cothread lock is not governed by the kernel scheduler - * (so mplock, tokens, etc will not bbe released). + * (so mplock, tokens, etc will not be released). */ static void vke_start(struct ifnet *ifp) diff --git a/sys/emulation/linux/i386/linprocfs/linprocfs_misc.c b/sys/emulation/linux/i386/linprocfs/linprocfs_misc.c index 111b10a874..58f6a9cf48 100644 --- a/sys/emulation/linux/i386/linprocfs/linprocfs_misc.c +++ b/sys/emulation/linux/i386/linprocfs/linprocfs_misc.c @@ -127,6 +127,8 @@ linprocfs_domeminfo(struct proc *curp, struct proc *p, struct pfsnode *pfs, } swapused = swaptotal - swapfree; memshared = 0; + + lwkt_gettoken(&vmobj_token); for (object = TAILQ_FIRST(&vm_object_list); object != NULL; object = TAILQ_NEXT(object, object_list)) { if (object->type == OBJT_MARKER) @@ -134,6 +136,7 @@ linprocfs_domeminfo(struct proc *curp, struct proc *p, struct pfsnode *pfs, if (object->shadow_count > 1) memshared += object->resident_page_count; } + lwkt_reltoken(&vmobj_token); memshared *= PAGE_SIZE; /* * We'd love to be able to write: diff --git a/sys/emulation/linux/linux_misc.c b/sys/emulation/linux/linux_misc.c index cb032fb5a8..40948298b6 100644 --- a/sys/emulation/linux/linux_misc.c +++ b/sys/emulation/linux/linux_misc.c @@ -147,6 +147,8 @@ sys_linux_sysinfo(struct linux_sysinfo_args *args) ts.tv_sec %= 60; i = 1; } + + bzero(&sysinfo, sizeof(sysinfo)); sysinfo.uptime=ts.tv_sec; /* Use the information from the mib to get our load averages */ @@ -155,9 +157,9 @@ sys_linux_sysinfo(struct linux_sysinfo_args *args) sysinfo.totalram = Maxmem * PAGE_SIZE; sysinfo.freeram = sysinfo.totalram - vmstats.v_wire_count * PAGE_SIZE; - - get_mplock(); sysinfo.sharedram = 0; + + lwkt_gettoken(&vmobj_token); for (object = TAILQ_FIRST(&vm_object_list); object != NULL; object = TAILQ_NEXT(object, object_list)) { if (object->type == OBJT_MARKER) @@ -165,6 +167,7 @@ sys_linux_sysinfo(struct linux_sysinfo_args *args) if (object->shadow_count > 1) sysinfo.sharedram += object->resident_page_count; } + lwkt_reltoken(&vmobj_token); sysinfo.sharedram *= PAGE_SIZE; sysinfo.bufferram = 0; @@ -176,7 +179,6 @@ sys_linux_sysinfo(struct linux_sysinfo_args *args) sysinfo.totalswap = swapblist->bl_blocks * 1024; sysinfo.freeswap = swapblist->bl_root->u.bmu_avail * PAGE_SIZE; } - rel_mplock(); sysinfo.procs = nprocs; sysinfo.totalhigh = 0; diff --git a/sys/kern/kern_slaballoc.c b/sys/kern/kern_slaballoc.c index 7a3f8cb6ee..e4246686d3 100644 --- a/sys/kern/kern_slaballoc.c +++ b/sys/kern/kern_slaballoc.c @@ -1151,8 +1151,8 @@ kmem_slab_alloc(vm_size_t size, vm_offset_t align, int flags) vm_map_unlock(&kernel_map); if ((flags & M_NULLOK) == 0) panic("kmem_slab_alloc(): kernel_map ran out of space!"); - crit_exit(); vm_map_entry_release(count); + crit_exit(); lwkt_reltoken(&vm_token); return(NULL); } @@ -1232,19 +1232,19 @@ kmem_slab_alloc(vm_size_t size, vm_offset_t align, int flags) /* * We were unable to recover, cleanup and return NULL + * + * (vm_token already held) */ while (i != 0) { i -= PAGE_SIZE; - lwkt_gettoken(&vm_token); m = vm_page_lookup(&kernel_object, OFF_TO_IDX(addr + i)); /* page should already be busy */ vm_page_free(m); - lwkt_reltoken(&vm_token); } vm_map_delete(&kernel_map, addr, addr + size, &count); vm_map_unlock(&kernel_map); - crit_exit(); vm_map_entry_release(count); + crit_exit(); lwkt_reltoken(&vm_token); return(NULL); } diff --git a/sys/kern/lwkt_ipiq.c b/sys/kern/lwkt_ipiq.c index 9be618a85e..50a178aa79 100644 --- a/sys/kern/lwkt_ipiq.c +++ b/sys/kern/lwkt_ipiq.c @@ -519,6 +519,7 @@ static int lwkt_process_ipiq_core(globaldata_t sgd, lwkt_ipiq_t ip, struct intrframe *frame) { + globaldata_t mygd = mycpu; int ri; int wi; ipifunc3_t copy_func; @@ -533,6 +534,7 @@ lwkt_process_ipiq_core(globaldata_t sgd, lwkt_ipiq_t ip, KKASSERT(curthread->td_critcount); wi = ip->ip_windex; cpu_lfence(); + ++mygd->gd_intr_nesting_level; /* * Note: xindex is only updated after we are sure the function has @@ -571,6 +573,7 @@ lwkt_process_ipiq_core(globaldata_t sgd, lwkt_ipiq_t ip, } #endif } + --mygd->gd_intr_nesting_level; /* * Return non-zero if there are more IPI messages pending on this diff --git a/sys/kern/lwkt_token.c b/sys/kern/lwkt_token.c index e0f015f11b..1ec62699c7 100644 --- a/sys/kern/lwkt_token.c +++ b/sys/kern/lwkt_token.c @@ -136,6 +136,7 @@ struct lwkt_token kvm_token = LWKT_TOKEN_UP_INITIALIZER(kvm_token); struct lwkt_token proc_token = LWKT_TOKEN_UP_INITIALIZER(proc_token); struct lwkt_token tty_token = LWKT_TOKEN_UP_INITIALIZER(tty_token); struct lwkt_token vnode_token = LWKT_TOKEN_UP_INITIALIZER(vnode_token); +struct lwkt_token vmobj_token = LWKT_TOKEN_UP_INITIALIZER(vmobj_token); SYSCTL_INT(_lwkt, OID_AUTO, pmap_mpsafe, CTLFLAG_RW, &pmap_token.t_flags, 0, ""); @@ -153,6 +154,8 @@ SYSCTL_INT(_lwkt, OID_AUTO, tty_mpsafe, CTLFLAG_RW, &tty_token.t_flags, 0, ""); SYSCTL_INT(_lwkt, OID_AUTO, vnode_mpsafe, CTLFLAG_RW, &vnode_token.t_flags, 0, ""); +SYSCTL_INT(_lwkt, OID_AUTO, vmobj_mpsafe, + CTLFLAG_RW, &vmobj_token.t_flags, 0, ""); /* * The collision count is bumped every time the LWKT scheduler fails diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index f619030129..ac67ee77f3 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -1235,8 +1235,8 @@ vclean_vxlocked(struct vnode *vp, int flags) /* * If the vnode has an object, destroy it. */ + lwkt_gettoken(&vmobj_token); if ((object = vp->v_object) != NULL) { - lwkt_gettoken(&vm_token); KKASSERT(object == vp->v_object); if (object->ref_count == 0) { if ((object->flags & OBJ_DEAD) == 0) @@ -1245,8 +1245,8 @@ vclean_vxlocked(struct vnode *vp, int flags) vm_pager_deallocate(object); } vclrflags(vp, VOBJBUF); - lwkt_reltoken(&vm_token); } + lwkt_reltoken(&vmobj_token); KKASSERT((vp->v_flag & VOBJBUF) == 0); /* @@ -1490,9 +1490,9 @@ vinitvmio(struct vnode *vp, off_t filesize, int blksize, int boff) vm_object_t object; int error = 0; + lwkt_gettoken(&vmobj_token); retry: if ((object = vp->v_object) == NULL) { - lwkt_gettoken(&vm_token); object = vnode_pager_alloc(vp, filesize, 0, 0, blksize, boff); /* * Dereference the reference we just created. This assumes @@ -1500,17 +1500,19 @@ retry: */ object->ref_count--; vrele(vp); - lwkt_reltoken(&vm_token); } else { if (object->flags & OBJ_DEAD) { vn_unlock(vp); - vm_object_dead_sleep(object, "vodead"); + if (vp->v_object == object) + vm_object_dead_sleep(object, "vodead"); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); goto retry; } } KASSERT(vp->v_object != NULL, ("vinitvmio: NULL object")); vsetflags(vp, VOBJBUF); + lwkt_reltoken(&vmobj_token); + return (error); } diff --git a/sys/sys/thread.h b/sys/sys/thread.h index 2e375970fb..0559377256 100644 --- a/sys/sys/thread.h +++ b/sys/sys/thread.h @@ -373,6 +373,7 @@ extern struct lwkt_token kvm_token; extern struct lwkt_token proc_token; extern struct lwkt_token tty_token; extern struct lwkt_token vnode_token; +extern struct lwkt_token vmobj_token; /* * Procedures diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c index ba3ff65e8d..5c50538877 100644 --- a/sys/vm/vm_kern.c +++ b/sys/vm/vm_kern.c @@ -112,9 +112,8 @@ kmem_alloc_pageable(vm_map_t map, vm_size_t size) TRUE, VM_MAPTYPE_NORMAL, VM_PROT_ALL, VM_PROT_ALL, 0); - if (result != KERN_SUCCESS) { + if (result != KERN_SUCCESS) return (0); - } return (addr); } @@ -136,9 +135,8 @@ kmem_alloc_nofault(vm_map_t map, vm_size_t size, vm_size_t align) TRUE, VM_MAPTYPE_NORMAL, VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT); - if (result != KERN_SUCCESS) { + if (result != KERN_SUCCESS) return (0); - } return (addr); } diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index c025fb2843..71a684bb16 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -909,6 +909,9 @@ vm_map_insert(vm_map_t map, int *countp, if (cow & MAP_IS_STACK) protoeflags |= MAP_ENTRY_STACK; + lwkt_gettoken(&vm_token); + lwkt_gettoken(&vmobj_token); + if (object) { /* * When object is non-NULL, it could be shared with another @@ -937,6 +940,8 @@ vm_map_insert(vm_map_t map, int *countp, if ((prev_entry->inheritance == VM_INHERIT_DEFAULT) && (prev_entry->protection == prot) && (prev_entry->max_protection == max)) { + lwkt_reltoken(&vmobj_token); + lwkt_reltoken(&vm_token); map->size += (end - prev_entry->end); prev_entry->end = end; vm_map_simplify_entry(map, prev_entry, countp); @@ -952,9 +957,12 @@ vm_map_insert(vm_map_t map, int *countp, object = prev_entry->object.vm_object; offset = prev_entry->offset + (prev_entry->end - prev_entry->start); - vm_object_reference(object); + vm_object_reference_locked(object); } + lwkt_reltoken(&vmobj_token); + lwkt_reltoken(&vm_token); + /* * NOTE: if conditionals fail, object can be NULL here. This occurs * in things like the buffer map where we manage kva but do not manage @@ -2437,6 +2445,8 @@ vm_map_clean(vm_map_t map, vm_offset_t start, vm_offset_t end, * Hold vm_token to avoid blocking in vm_object_reference() */ lwkt_gettoken(&vm_token); + lwkt_gettoken(&vmobj_token); + for (current = entry; current->start < end; current = current->next) { offset = current->offset + (start - current->start); size = (end <= current->end ? end : current->end) - start; @@ -2490,7 +2500,7 @@ vm_map_clean(vm_map_t map, vm_offset_t start, vm_offset_t end, */ int flags; - vm_object_reference(object); + vm_object_reference_locked(object); vn_lock(object->handle, LK_EXCLUSIVE | LK_RETRY); flags = (syncio || invalidate) ? OBJPC_SYNC : 0; flags |= invalidate ? OBJPC_INVAL : 0; @@ -2512,14 +2522,14 @@ vm_map_clean(vm_map_t map, vm_offset_t start, vm_offset_t end, break; } vn_unlock(((struct vnode *)object->handle)); - vm_object_deallocate(object); + vm_object_deallocate_locked(object); } if (object && invalidate && ((object->type == OBJT_VNODE) || (object->type == OBJT_DEVICE))) { int clean_only = (object->type == OBJT_DEVICE) ? FALSE : TRUE; - vm_object_reference(object); + vm_object_reference_locked(object); switch(current->maptype) { case VM_MAPTYPE_NORMAL: vm_object_page_remove(object, @@ -2531,12 +2541,14 @@ vm_map_clean(vm_map_t map, vm_offset_t start, vm_offset_t end, vm_object_page_remove(object, 0, 0, clean_only); break; } - vm_object_deallocate(object); + vm_object_deallocate_locked(object); } start += size; } - vm_map_unlock_read(map); + + lwkt_reltoken(&vmobj_token); lwkt_reltoken(&vm_token); + vm_map_unlock_read(map); return (KERN_SUCCESS); } @@ -2667,14 +2679,20 @@ again: offidxend = offidxstart + count; /* - * Hold vm_token when manipulating vm_objects. + * Hold vm_token when manipulating vm_objects, + * + * Hold vmobj_token when potentially adding or removing + * objects (collapse requires both). */ lwkt_gettoken(&vm_token); + lwkt_gettoken(&vmobj_token); + if (object == &kernel_object) { vm_object_page_remove(object, offidxstart, offidxend, FALSE); } else { pmap_remove(map->pmap, s, e); + if (object != NULL && object->ref_count != 1 && (object->flags & (OBJ_NOSPLIT|OBJ_ONEMAPPING)) == @@ -2695,6 +2713,7 @@ again: } } } + lwkt_reltoken(&vmobj_token); lwkt_reltoken(&vm_token); /* @@ -2839,15 +2858,18 @@ vm_map_split(vm_map_entry_t entry) * vm_token required when manipulating vm_objects. */ lwkt_gettoken(&vm_token); + lwkt_gettoken(&vmobj_token); source = orig_object->backing_object; if (source != NULL) { - vm_object_reference(source); /* Referenced by new_object */ + /* Referenced by new_object */ + vm_object_reference_locked(source); LIST_INSERT_HEAD(&source->shadow_head, - new_object, shadow_list); + new_object, shadow_list); vm_object_clear_flag(source, OBJ_ONEMAPPING); new_object->backing_object_offset = - orig_object->backing_object_offset + IDX_TO_OFF(offidxstart); + orig_object->backing_object_offset + + IDX_TO_OFF(offidxstart); new_object->backing_object = source; source->shadow_count++; source->generation++; @@ -2856,13 +2878,10 @@ vm_map_split(vm_map_entry_t entry) for (idx = 0; idx < size; idx++) { vm_page_t m; - crit_enter(); retry: m = vm_page_lookup(orig_object, offidxstart + idx); - if (m == NULL) { - crit_exit(); + if (m == NULL) continue; - } /* * We must wait for pending I/O to complete before we can @@ -2877,7 +2896,6 @@ vm_map_split(vm_map_entry_t entry) vm_page_rename(m, new_object, idx); /* page automatically made dirty by rename and cache handled */ vm_page_busy(m); - crit_exit(); } if (orig_object->type == OBJT_SWAP) { @@ -2903,7 +2921,8 @@ vm_map_split(vm_map_entry_t entry) entry->object.vm_object = new_object; entry->offset = 0LL; - vm_object_deallocate(orig_object); + vm_object_deallocate_locked(orig_object); + lwkt_reltoken(&vmobj_token); lwkt_reltoken(&vm_token); } @@ -2912,6 +2931,7 @@ vm_map_split(vm_map_entry_t entry) * entry. The entries *must* be aligned properly. * * The vm_map must be exclusively locked. + * vm_token must be held */ static void vm_map_copy_entry(vm_map_t src_map, vm_map_t dst_map, @@ -2924,7 +2944,9 @@ vm_map_copy_entry(vm_map_t src_map, vm_map_t dst_map, if (src_entry->maptype == VM_MAPTYPE_SUBMAP) return; - lwkt_gettoken(&vm_token); + ASSERT_LWKT_TOKEN_HELD(&vm_token); + lwkt_gettoken(&vmobj_token); /* required for collapse */ + if (src_entry->wired_count == 0) { /* * If the source entry is marked needs_copy, it is already @@ -2951,7 +2973,7 @@ vm_map_copy_entry(vm_map_t src_map, vm_map_t dst_map, } } - vm_object_reference(src_object); + vm_object_reference_locked(src_object); vm_object_clear_flag(src_object, OBJ_ONEMAPPING); dst_entry->object.vm_object = src_object; src_entry->eflags |= (MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY); @@ -2972,7 +2994,7 @@ vm_map_copy_entry(vm_map_t src_map, vm_map_t dst_map, */ vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry); } - lwkt_reltoken(&vm_token); + lwkt_reltoken(&vmobj_token); } /* @@ -2998,6 +3020,7 @@ vmspace_fork(struct vmspace *vm1) lwkt_gettoken(&vm_token); lwkt_gettoken(&vmspace_token); + lwkt_gettoken(&vmobj_token); vm_map_lock(old_map); old_map->infork = 1; @@ -3044,13 +3067,13 @@ vmspace_fork(struct vmspace *vm1) * Add the reference before calling vm_map_entry_shadow * to insure that a shadow object is created. */ - vm_object_reference(object); + vm_object_reference_locked(object); if (old_entry->eflags & MAP_ENTRY_NEEDS_COPY) { vm_map_entry_shadow(old_entry); /* Transfer the second reference too. */ - vm_object_reference( + vm_object_reference_locked( old_entry->object.vm_object); - vm_object_deallocate(object); + vm_object_deallocate_locked(object); object = old_entry->object.vm_object; } vm_object_clear_flag(object, OBJ_ONEMAPPING); @@ -3102,6 +3125,8 @@ vmspace_fork(struct vmspace *vm1) vm_map_unlock(old_map); vm_map_unlock(new_map); vm_map_entry_release(count); + + lwkt_reltoken(&vmobj_token); lwkt_reltoken(&vmspace_token); lwkt_reltoken(&vm_token); diff --git a/sys/vm/vm_meter.c b/sys/vm/vm_meter.c index 71eacf7fbe..657a32aea8 100644 --- a/sys/vm/vm_meter.c +++ b/sys/vm/vm_meter.c @@ -90,12 +90,13 @@ do_vmtotal(SYSCTL_HANDLER_ARGS) struct vmtotal *totalp; vm_object_t object; + bzero(&total, sizeof(total)); totalp = &total; - bzero(totalp, sizeof *totalp); /* * Mark all objects as inactive. */ + lwkt_gettoken(&vmobj_token); for (object = TAILQ_FIRST(&vm_object_list); object != NULL; object = TAILQ_NEXT(object,object_list)) { @@ -103,6 +104,7 @@ do_vmtotal(SYSCTL_HANDLER_ARGS) continue; vm_object_clear_flag(object, OBJ_ACTIVE); } + lwkt_reltoken(&vmobj_token); /* * Calculate process statistics. @@ -112,7 +114,7 @@ do_vmtotal(SYSCTL_HANDLER_ARGS) /* * Calculate object memory usage statistics. */ - lwkt_gettoken(&vm_token); + lwkt_gettoken(&vmobj_token); for (object = TAILQ_FIRST(&vm_object_list); object != NULL; object = TAILQ_NEXT(object, object_list)) { @@ -140,8 +142,9 @@ do_vmtotal(SYSCTL_HANDLER_ARGS) } } } + lwkt_reltoken(&vmobj_token); totalp->t_free = vmstats.v_free_count + vmstats.v_cache_count; - lwkt_reltoken(&vm_token); + return (sysctl_handle_opaque(oidp, totalp, sizeof total, req)); } diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c index bb5a405791..d8a9272b7b 100644 --- a/sys/vm/vm_object.c +++ b/sys/vm/vm_object.c @@ -126,10 +126,10 @@ static int vm_object_page_collect_flush(vm_object_t object, vm_page_t p, * */ -struct object_q vm_object_list; +struct object_q vm_object_list; /* locked by vmobj_token */ struct vm_object kernel_object; -static long vm_object_count; /* count of all objects */ +static long vm_object_count; /* locked by vmobj_token */ extern int vm_pageout_page_count; static long object_collapses; @@ -186,13 +186,11 @@ _vm_object_allocate(objtype_t type, vm_pindex_t size, vm_object_t object) object->swblock_count = 0; RB_INIT(&object->swblock_root); - crit_enter(); - lwkt_gettoken(&vm_token); + lwkt_gettoken(&vmobj_token); TAILQ_INSERT_TAIL(&vm_object_list, object, object_list); vm_object_count++; object_hash_rand = object->hash_rand; - lwkt_reltoken(&vm_token); - crit_exit(); + lwkt_reltoken(&vmobj_token); } /* @@ -240,27 +238,39 @@ vm_object_allocate(objtype_t type, vm_pindex_t size) * Add an additional reference to a vm_object. * * Object passed by caller must be stable or caller must already - * hold vm_token to avoid races. + * hold vmobj_token to avoid races. */ void vm_object_reference(vm_object_t object) { - if (object == NULL) - return; + if (object) { + lwkt_gettoken(&vmobj_token); + object->ref_count++; + if (object->type == OBJT_VNODE) { + vref(object->handle); + /* XXX what if the vnode is being destroyed? */ + } + lwkt_reltoken(&vmobj_token); + } +} - lwkt_gettoken(&vm_token); - object->ref_count++; - if (object->type == OBJT_VNODE) { - vref(object->handle); - /* XXX what if the vnode is being destroyed? */ +void +vm_object_reference_locked(vm_object_t object) +{ + if (object) { + ASSERT_LWKT_TOKEN_HELD(&vmobj_token); + object->ref_count++; + if (object->type == OBJT_VNODE) { + vref(object->handle); + /* XXX what if the vnode is being destroyed? */ + } } - lwkt_reltoken(&vm_token); } /* * Dereference an object and its underlying vnode. * - * The caller must hold vm_token. + * The caller must hold vmobj_token. */ static void vm_object_vndeallocate(vm_object_t object) @@ -270,6 +280,7 @@ vm_object_vndeallocate(vm_object_t object) KASSERT(object->type == OBJT_VNODE, ("vm_object_vndeallocate: not a vnode object")); KASSERT(vp != NULL, ("vm_object_vndeallocate: missing vp")); + ASSERT_LWKT_TOKEN_HELD(&vmobj_token); #ifdef INVARIANTS if (object->ref_count == 0) { vprint("vm_object_vndeallocate", vp); @@ -287,15 +298,21 @@ vm_object_vndeallocate(vm_object_t object) * Release a reference to the specified object, gained either through a * vm_object_allocate or a vm_object_reference call. When all references * are gone, storage associated with this object may be relinquished. - * - * The object must not be locked. */ void vm_object_deallocate(vm_object_t object) +{ + lwkt_gettoken(&vmobj_token); + vm_object_deallocate_locked(object); + lwkt_reltoken(&vmobj_token); +} + +void +vm_object_deallocate_locked(vm_object_t object) { vm_object_t temp; - lwkt_gettoken(&vm_token); + ASSERT_LWKT_TOKEN_HELD(&vmobj_token); while (object != NULL) { if (object->type == OBJT_VNODE) { @@ -312,6 +329,18 @@ vm_object_deallocate(vm_object_t object) break; } + /* + * We currently need the vm_token from this point on, and + * we must recheck ref_count after acquiring it. + */ + lwkt_gettoken(&vm_token); + + if (object->ref_count > 2) { + object->ref_count--; + lwkt_reltoken(&vm_token); + break; + } + /* * Here on ref_count of one or two, which are special cases for * objects. @@ -319,6 +348,7 @@ vm_object_deallocate(vm_object_t object) if ((object->ref_count == 2) && (object->shadow_count == 0)) { vm_object_set_flag(object, OBJ_ONEMAPPING); object->ref_count--; + lwkt_reltoken(&vm_token); break; } if ((object->ref_count == 2) && (object->shadow_count == 1)) { @@ -357,9 +387,11 @@ vm_object_deallocate(vm_object_t object) object = robject; vm_object_collapse(object); + lwkt_reltoken(&vm_token); continue; } } + lwkt_reltoken(&vm_token); break; } @@ -367,14 +399,15 @@ vm_object_deallocate(vm_object_t object) * Normal dereferencing path */ object->ref_count--; - if (object->ref_count != 0) + if (object->ref_count != 0) { + lwkt_reltoken(&vm_token); break; + } /* * Termination path */ doterm: - temp = object->backing_object; if (temp) { LIST_REMOVE(object, shadow_list); @@ -382,6 +415,7 @@ doterm: temp->generation++; object->backing_object = NULL; } + lwkt_reltoken(&vm_token); /* * Don't double-terminate, we could be in a termination @@ -392,7 +426,6 @@ doterm: vm_object_terminate(object); object = temp; } - lwkt_reltoken(&vm_token); } /* @@ -400,7 +433,7 @@ doterm: * * The object must have zero references. * - * The caller must be holding vm_token and properly interlock with + * The caller must be holding vmobj_token and properly interlock with * OBJ_DEAD. */ static int vm_object_terminate_callback(vm_page_t p, void *data); @@ -409,13 +442,15 @@ void vm_object_terminate(vm_object_t object) { /* - * Make sure no one uses us. + * Make sure no one uses us. Once we set OBJ_DEAD we should be + * able to safely block. */ - ASSERT_LWKT_TOKEN_HELD(&vm_token); + KKASSERT((object->flags & OBJ_DEAD) == 0); + ASSERT_LWKT_TOKEN_HELD(&vmobj_token); vm_object_set_flag(object, OBJ_DEAD); /* - * wait for the pageout daemon to be done with the object + * Wait for the pageout daemon to be done with the object */ vm_object_pip_wait(object, "objtrm"); @@ -444,18 +479,20 @@ vm_object_terminate(vm_object_t object) */ vm_object_pip_wait(object, "objtrm"); - if (object->ref_count != 0) - panic("vm_object_terminate: object with references, ref_count=%d", object->ref_count); + if (object->ref_count != 0) { + panic("vm_object_terminate: object with references, " + "ref_count=%d", object->ref_count); + } /* * Now free any remaining pages. For internal objects, this also * removes them from paging queues. Don't free wired pages, just * remove them from the object. */ - crit_enter(); + lwkt_gettoken(&vm_token); vm_page_rb_tree_RB_SCAN(&object->rb_memq, NULL, vm_object_terminate_callback, NULL); - crit_exit(); + lwkt_reltoken(&vm_token); /* * Let the pager know object is dead. @@ -464,15 +501,17 @@ vm_object_terminate(vm_object_t object) /* * Remove the object from the global object list. + * + * (we are holding vmobj_token) */ - crit_enter(); TAILQ_REMOVE(&vm_object_list, object, object_list); vm_object_count--; - crit_exit(); - vm_object_dead_wakeup(object); - if (object->ref_count != 0) - panic("vm_object_terminate2: object with references, ref_count=%d", object->ref_count); + + if (object->ref_count != 0) { + panic("vm_object_terminate2: object with references, " + "ref_count=%d", object->ref_count); + } /* * Free the space for the object. @@ -506,36 +545,33 @@ vm_object_terminate_callback(vm_page_t p, void *data __unused) * The object is dead but still has an object<->pager association. Sleep * and return. The caller typically retests the association in a loop. * - * No requirement. + * Must be called with the vmobj_token held. */ void vm_object_dead_sleep(vm_object_t object, const char *wmesg) { - crit_enter(); - lwkt_gettoken(&vm_token); + ASSERT_LWKT_TOKEN_HELD(&vmobj_token); if (object->handle) { vm_object_set_flag(object, OBJ_DEADWNT); tsleep(object, 0, wmesg, 0); + /* object may be invalid after this point */ } - lwkt_reltoken(&vm_token); - crit_exit(); } /* * Wakeup anyone waiting for the object<->pager disassociation on * a dead object. * - * No requirement. + * Must be called with the vmobj_token held. */ void vm_object_dead_wakeup(vm_object_t object) { - lwkt_gettoken(&vm_token); + ASSERT_LWKT_TOKEN_HELD(&vmobj_token); if (object->flags & OBJ_DEADWNT) { vm_object_clear_flag(object, OBJ_DEADWNT); wakeup(object); } - lwkt_reltoken(&vm_token); } /* @@ -1323,7 +1359,8 @@ vm_object_backing_scan_callback(vm_page_t p, void *data) * when paging_in_progress is true for an object... This is not a complete * operation, but should plug 99.9% of the rest of the leaks. * - * The caller must hold vm_token. + * The caller must hold vm_token and vmobj_token. + * (only called from vm_object_collapse) */ static void vm_object_qcollapse(vm_object_t object) @@ -1347,7 +1384,8 @@ vm_object_qcollapse(vm_object_t object) void vm_object_collapse(vm_object_t object) { - lwkt_gettoken(&vm_token); + ASSERT_LWKT_TOKEN_HELD(&vm_token); + ASSERT_LWKT_TOKEN_HELD(&vmobj_token); while (TRUE) { vm_object_t backing_object; @@ -1466,13 +1504,19 @@ vm_object_collapse(vm_object_t object) * necessary is to dispose of it. */ - KASSERT(backing_object->ref_count == 1, ("backing_object %p was somehow re-referenced during collapse!", backing_object)); - KASSERT(RB_EMPTY(&backing_object->rb_memq), ("backing_object %p somehow has left over pages during collapse!", backing_object)); - crit_enter(); + KASSERT(backing_object->ref_count == 1, + ("backing_object %p was somehow " + "re-referenced during collapse!", + backing_object)); + KASSERT(RB_EMPTY(&backing_object->rb_memq), + ("backing_object %p somehow has left " + "over pages during collapse!", + backing_object)); + + /* (we are holding vmobj_token) */ TAILQ_REMOVE(&vm_object_list, backing_object, object_list); vm_object_count--; - crit_exit(); zfree(obj_zone, backing_object); @@ -1519,7 +1563,7 @@ vm_object_collapse(vm_object_t object) * so we don't need to call vm_object_deallocate, but * we do anyway. */ - vm_object_deallocate(backing_object); + vm_object_deallocate_locked(backing_object); object_bypasses++; } @@ -1527,7 +1571,6 @@ vm_object_collapse(vm_object_t object) * Try again with this object's new backing object. */ } - lwkt_reltoken(&vm_token); } /* @@ -1681,6 +1724,7 @@ vm_object_page_remove_callback(vm_page_t p, void *data) * next_size Size of reference to next_object * * The object must not be locked. + * The caller must hold vm_token and vmobj_token. */ boolean_t vm_object_coalesce(vm_object_t prev_object, vm_pindex_t prev_pindex, @@ -1688,6 +1732,9 @@ vm_object_coalesce(vm_object_t prev_object, vm_pindex_t prev_pindex, { vm_pindex_t next_pindex; + ASSERT_LWKT_TOKEN_HELD(&vm_token); + ASSERT_LWKT_TOKEN_HELD(&vmobj_token); + if (prev_object == NULL) { return (TRUE); } @@ -1697,8 +1744,6 @@ vm_object_coalesce(vm_object_t prev_object, vm_pindex_t prev_pindex, return (FALSE); } - lwkt_gettoken(&vm_token); - /* * Try to collapse the object first */ @@ -1710,10 +1755,8 @@ vm_object_coalesce(vm_object_t prev_object, vm_pindex_t prev_pindex, * pages not mapped to prev_entry may be in use anyway) */ - if (prev_object->backing_object != NULL) { - lwkt_reltoken(&vm_token); + if (prev_object->backing_object != NULL) return (FALSE); - } prev_size >>= PAGE_SHIFT; next_size >>= PAGE_SHIFT; @@ -1721,7 +1764,6 @@ vm_object_coalesce(vm_object_t prev_object, vm_pindex_t prev_pindex, if ((prev_object->ref_count > 1) && (prev_object->size != next_pindex)) { - lwkt_reltoken(&vm_token); return (FALSE); } @@ -1743,8 +1785,6 @@ vm_object_coalesce(vm_object_t prev_object, vm_pindex_t prev_pindex, */ if (next_pindex + next_size > prev_object->size) prev_object->size = next_pindex + next_size; - - lwkt_reltoken(&vm_token); return (TRUE); } diff --git a/sys/vm/vm_object.h b/sys/vm/vm_object.h index 002035c0db..d20b350c90 100644 --- a/sys/vm/vm_object.h +++ b/sys/vm/vm_object.h @@ -84,6 +84,9 @@ #ifndef _SYS_TREE_H_ #include #endif +#ifndef _SYS_THREAD_H_ +#include +#endif #ifndef _MACHINE_ATOMIC_H_ #include #endif @@ -138,15 +141,18 @@ struct vm_object_lock { /* * vm_object A VM object which represents an arbitrarily sized * data store. + * + * Locking requirements: vmobj_token for ref_count and object_list, and + * vm_token for everything else. */ struct vm_object { - TAILQ_ENTRY(vm_object) object_list; /* list of all objects */ + TAILQ_ENTRY(vm_object) object_list; /* vmobj_token */ LIST_HEAD(, vm_object) shadow_head; /* objects that this is a shadow for */ LIST_ENTRY(vm_object) shadow_list; /* chain of shadow objects */ RB_HEAD(vm_page_rb_tree, vm_page) rb_memq; /* resident pages */ int generation; /* generation ID */ vm_pindex_t size; /* Object size */ - int ref_count; /* How many refs?? */ + int ref_count; /* vmobj_token */ int shadow_count; /* how many objects that this is a shadow for */ int hash_rand; /* vm hash table randomizer */ objtype_t type; /* type of pager */ @@ -285,6 +291,7 @@ void _vm_object_allocate (objtype_t, vm_pindex_t, vm_object_t); boolean_t vm_object_coalesce (vm_object_t, vm_pindex_t, vm_size_t, vm_size_t); void vm_object_collapse (vm_object_t); void vm_object_deallocate (vm_object_t); +void vm_object_deallocate_locked (vm_object_t); void vm_object_terminate (vm_object_t); void vm_object_set_writeable_dirty (vm_object_t); void vm_object_init (void); @@ -294,6 +301,7 @@ void vm_object_pmap_copy (vm_object_t, vm_pindex_t, vm_pindex_t); void vm_object_pmap_copy_1 (vm_object_t, vm_pindex_t, vm_pindex_t); void vm_object_pmap_remove (vm_object_t, vm_pindex_t, vm_pindex_t); void vm_object_reference (vm_object_t); +void vm_object_reference_locked (vm_object_t); void vm_object_shadow (vm_object_t *, vm_ooffset_t *, vm_size_t); void vm_object_madvise (vm_object_t, vm_pindex_t, int, int); void vm_object_init2 (void); diff --git a/sys/vm/vm_swapcache.c b/sys/vm/vm_swapcache.c index bb76a008d1..0b492538c5 100644 --- a/sys/vm/vm_swapcache.c +++ b/sys/vm/vm_swapcache.c @@ -180,7 +180,9 @@ vm_swapcached(void) */ bzero(&object_marker, sizeof(object_marker)); object_marker.type = OBJT_MARKER; + lwkt_gettoken(&vmobj_token); TAILQ_INSERT_HEAD(&vm_object_list, &object_marker, object_list); + lwkt_reltoken(&vmobj_token); for (;;) { /* @@ -245,7 +247,9 @@ vm_swapcached(void) } } TAILQ_REMOVE(INACTIVE_LIST, &page_marker, pageq); + lwkt_gettoken(&vmobj_token); TAILQ_REMOVE(&vm_object_list, &object_marker, object_list); + lwkt_reltoken(&vmobj_token); lwkt_reltoken(&vm_token); crit_exit(); } @@ -523,6 +527,8 @@ vm_swapcache_cleaning(vm_object_t marker) * Look for vnode objects */ lwkt_gettoken(&vm_token); + lwkt_gettoken(&vmobj_token); + while ((object = TAILQ_NEXT(object, object_list)) != NULL && count--) { if (object->type != OBJT_VNODE) continue; @@ -581,5 +587,7 @@ vm_swapcache_cleaning(vm_object_t marker) else TAILQ_INSERT_HEAD(&vm_object_list, marker, object_list); marker->backing_object = object; + + lwkt_reltoken(&vmobj_token); lwkt_reltoken(&vm_token); } diff --git a/sys/vm/vm_zeroidle.c b/sys/vm/vm_zeroidle.c index 2d286e6e48..9fc926b301 100644 --- a/sys/vm/vm_zeroidle.c +++ b/sys/vm/vm_zeroidle.c @@ -161,7 +161,6 @@ vm_pagezero(void __unused *arg) * For now leave the MP lock held, the VM routines cannot be called * with it released until tokenization is finished. */ - /* rel_mplock(); */ lwkt_setpri_self(TDPRI_IDLE_WORK); lwkt_setcpu_self(globaldata_find(ncpus - 1)); sleep_time = DEFAULT_SLEEP_TIME; diff --git a/sys/vm/vm_zone.c b/sys/vm/vm_zone.c index 1bc8905fec..e3dd2f1c31 100644 --- a/sys/vm/vm_zone.c +++ b/sys/vm/vm_zone.c @@ -407,7 +407,6 @@ zget(vm_zone_t z) * Interrupt zones do not mess with the kernel_map, they * simply populate an existing mapping. */ - get_mplock(); lwkt_gettoken(&vm_token); savezpc = z->zpagecount; nbytes = z->zpagecount * PAGE_SIZE; @@ -442,7 +441,6 @@ zget(vm_zone_t z) } nitems = ((z->zpagecount * PAGE_SIZE) - nbytes) / z->zsize; lwkt_reltoken(&vm_token); - rel_mplock(); } else if (z->zflags & ZONE_SPECIAL) { /* * The special zone is the one used for vm_map_entry_t's. diff --git a/sys/vm/vnode_pager.c b/sys/vm/vnode_pager.c index ee05321214..c5956588bb 100644 --- a/sys/vm/vnode_pager.c +++ b/sys/vm/vnode_pager.c @@ -123,7 +123,11 @@ vnode_pager_alloc(void *handle, off_t length, vm_prot_t prot, off_t offset, vnode_pbuf_freecnt = nswbuf / 2 + 1; } - vp = (struct vnode *) handle; + /* + * Serialize potential vnode/object teardowns and interlocks + */ + vp = (struct vnode *)handle; + lwkt_gettoken(&vmobj_token); /* * Prevent race condition when allocating the object. This @@ -174,7 +178,7 @@ vnode_pager_alloc(void *handle, off_t length, vm_prot_t prot, off_t offset, if (vp->v_mount && (vp->v_mount->mnt_kern_flag & MNTK_NOMSYNC)) object->flags |= OBJ_NOMSYNC; } else { - object->ref_count++; + object->ref_count++; /* protected by vmobj_token */ if (object->size != lsize) { kprintf("vnode_pager_alloc: Warning, objsize " "mismatch %jd/%jd vp=%p obj=%p\n", @@ -190,13 +194,15 @@ vnode_pager_alloc(void *handle, off_t length, vm_prot_t prot, off_t offset, vp, object); } } - vref(vp); + vref(vp); vclrflags(vp, VOLOCK); if (vp->v_flag & VOWANT) { vclrflags(vp, VOWANT); wakeup(vp); } + lwkt_reltoken(&vmobj_token); + return (object); } @@ -211,6 +217,11 @@ vnode_pager_reference(struct vnode *vp) { vm_object_t object; + /* + * Serialize potential vnode/object teardowns and interlocks + */ + lwkt_gettoken(&vmobj_token); + /* * Prevent race condition when allocating the object. This * can happen with NFS vnodes since the nfsnode isn't locked. @@ -235,7 +246,7 @@ vnode_pager_reference(struct vnode *vp) * NULL returns if it does not. */ if (object) { - object->ref_count++; + object->ref_count++; /* protected by vmobj_token */ vref(vp); } @@ -244,6 +255,8 @@ vnode_pager_reference(struct vnode *vp) vclrflags(vp, VOWANT); wakeup(vp); } + + lwkt_reltoken(&vmobj_token); return (object); } -- 2.41.0