kernel - Add vmobj_token, misc vm-related tokenization
authorMatthew Dillon <dillon@apollo.backplane.com>
Sat, 28 Aug 2010 17:26:04 +0000 (10:26 -0700)
committerMatthew Dillon <dillon@apollo.backplane.com>
Sat, 28 Aug 2010 17:26:04 +0000 (10:26 -0700)
* Add vmobj_token to protect vm_object_list and vm_object->ref_count and
  related functions.

  Note: coalesce and collapse require both vm_token and vmobj_token,
  and vmspace_fork() requires a bunch of tokens.

* Remove miscellanious mplocks and critical sections that are no longer
  needed.

* Correct potential sysinfo kernel data visibilty issue.

* Optimize some potentially recursive vm_token/vmobj_token situations
  by adding *_locked() procedure variants.  To avoid blowing out the
  token stack.

* Remove unnecessary get_mplock() calls in vm_zone.c

* Bump gd_intr_nesting_level in the IPI processing core and assert
  that it is zero in the gettoken core.  Hard interrupts (vs interrupt
  threads) are not allowed to acquire tokens for obvious reasons.

18 files changed:
sys/cpu/i386/misc/lwbuf.c
sys/dev/virtual/net/if_vke.c
sys/emulation/linux/i386/linprocfs/linprocfs_misc.c
sys/emulation/linux/linux_misc.c
sys/kern/kern_slaballoc.c
sys/kern/lwkt_ipiq.c
sys/kern/lwkt_token.c
sys/kern/vfs_subr.c
sys/sys/thread.h
sys/vm/vm_kern.c
sys/vm/vm_map.c
sys/vm/vm_meter.c
sys/vm/vm_object.c
sys/vm/vm_object.h
sys/vm/vm_swapcache.c
sys/vm/vm_zeroidle.c
sys/vm/vm_zone.c
sys/vm/vnode_pager.c

index 580a098..5025730 100644 (file)
@@ -52,7 +52,6 @@
 #include <machine/atomic.h>
 #include <machine/param.h>
 #include <sys/thread.h>
-#include <sys/mplock2.h>
 
 static void lwbuf_init(void *);
 SYSINIT(sock_lwb, SI_BOOT2_MACHDEP, SI_ORDER_ANY, lwbuf_init, NULL);
@@ -105,9 +104,7 @@ lwbuf_cache_dtor(void *obj, void *pdata)
     struct lwbuf *lwb = (struct lwbuf *)obj;
 
     KKASSERT(lwb->kva != 0);
-    get_mplock();
     pmap_kremove_quick(lwb->kva);
-    rel_mplock();
     kmem_free(&kernel_map, lwb->kva, PAGE_SIZE);
     lwb->kva = 0;
     atomic_add_int(&lwbuf_kva_bytes, -PAGE_SIZE);
index 8d4336d..8c1d23d 100644 (file)
@@ -323,7 +323,7 @@ vke_init(void *xsc)
  *
  * NOTE: We can't make any kernel callbacks while holding cothread lock
  *      because the cothread lock is not governed by the kernel scheduler
- *      (so mplock, tokens, etc will not bbe released).
+ *      (so mplock, tokens, etc will not be released).
  */
 static void
 vke_start(struct ifnet *ifp)
index 111b10a..58f6a9c 100644 (file)
@@ -127,6 +127,8 @@ linprocfs_domeminfo(struct proc *curp, struct proc *p, struct pfsnode *pfs,
        }
        swapused = swaptotal - swapfree;
        memshared = 0;
+
+       lwkt_gettoken(&vmobj_token);
        for (object = TAILQ_FIRST(&vm_object_list); object != NULL;
            object = TAILQ_NEXT(object, object_list)) {
                if (object->type == OBJT_MARKER)
@@ -134,6 +136,7 @@ linprocfs_domeminfo(struct proc *curp, struct proc *p, struct pfsnode *pfs,
                if (object->shadow_count > 1)
                        memshared += object->resident_page_count;
        }
+       lwkt_reltoken(&vmobj_token);
        memshared *= PAGE_SIZE;
        /*
         * We'd love to be able to write:
index cb032fb..4094829 100644 (file)
@@ -147,6 +147,8 @@ sys_linux_sysinfo(struct linux_sysinfo_args *args)
                ts.tv_sec %= 60;
                i = 1;
        }
+
+       bzero(&sysinfo, sizeof(sysinfo));
        sysinfo.uptime=ts.tv_sec;
 
        /* Use the information from the mib to get our load averages */
@@ -155,9 +157,9 @@ sys_linux_sysinfo(struct linux_sysinfo_args *args)
 
        sysinfo.totalram = Maxmem * PAGE_SIZE;
        sysinfo.freeram = sysinfo.totalram - vmstats.v_wire_count * PAGE_SIZE;
-
-       get_mplock();
        sysinfo.sharedram = 0;
+
+       lwkt_gettoken(&vmobj_token);
        for (object = TAILQ_FIRST(&vm_object_list); object != NULL;
             object = TAILQ_NEXT(object, object_list)) {
                if (object->type == OBJT_MARKER)
@@ -165,6 +167,7 @@ sys_linux_sysinfo(struct linux_sysinfo_args *args)
                if (object->shadow_count > 1)
                        sysinfo.sharedram += object->resident_page_count;
        }
+       lwkt_reltoken(&vmobj_token);
 
        sysinfo.sharedram *= PAGE_SIZE;
        sysinfo.bufferram = 0;
@@ -176,7 +179,6 @@ sys_linux_sysinfo(struct linux_sysinfo_args *args)
                sysinfo.totalswap = swapblist->bl_blocks * 1024;
                sysinfo.freeswap = swapblist->bl_root->u.bmu_avail * PAGE_SIZE;
        }
-       rel_mplock();
 
        sysinfo.procs = nprocs;
        sysinfo.totalhigh = 0;
index 7a3f8cb..e424668 100644 (file)
@@ -1151,8 +1151,8 @@ kmem_slab_alloc(vm_size_t size, vm_offset_t align, int flags)
        vm_map_unlock(&kernel_map);
        if ((flags & M_NULLOK) == 0)
            panic("kmem_slab_alloc(): kernel_map ran out of space!");
-       crit_exit();
        vm_map_entry_release(count);
+       crit_exit();
        lwkt_reltoken(&vm_token);
        return(NULL);
     }
@@ -1232,19 +1232,19 @@ kmem_slab_alloc(vm_size_t size, vm_offset_t align, int flags)
 
            /*
             * We were unable to recover, cleanup and return NULL
+            *
+            * (vm_token already held)
             */
            while (i != 0) {
                i -= PAGE_SIZE;
-               lwkt_gettoken(&vm_token);
                m = vm_page_lookup(&kernel_object, OFF_TO_IDX(addr + i));
                /* page should already be busy */
                vm_page_free(m);
-               lwkt_reltoken(&vm_token);
            }
            vm_map_delete(&kernel_map, addr, addr + size, &count);
            vm_map_unlock(&kernel_map);
-           crit_exit();
            vm_map_entry_release(count);
+           crit_exit();
            lwkt_reltoken(&vm_token);
            return(NULL);
        }
index 9be618a..50a178a 100644 (file)
@@ -519,6 +519,7 @@ static int
 lwkt_process_ipiq_core(globaldata_t sgd, lwkt_ipiq_t ip, 
                       struct intrframe *frame)
 {
+    globaldata_t mygd = mycpu;
     int ri;
     int wi;
     ipifunc3_t copy_func;
@@ -533,6 +534,7 @@ lwkt_process_ipiq_core(globaldata_t sgd, lwkt_ipiq_t ip,
     KKASSERT(curthread->td_critcount);
     wi = ip->ip_windex;
     cpu_lfence();
+    ++mygd->gd_intr_nesting_level;
 
     /*
      * Note: xindex is only updated after we are sure the function has
@@ -571,6 +573,7 @@ lwkt_process_ipiq_core(globaldata_t sgd, lwkt_ipiq_t ip,
        }
 #endif
     }
+    --mygd->gd_intr_nesting_level;
 
     /*
      * Return non-zero if there are more IPI messages pending on this
index e0f015f..1ec6269 100644 (file)
@@ -136,6 +136,7 @@ struct lwkt_token kvm_token = LWKT_TOKEN_UP_INITIALIZER(kvm_token);
 struct lwkt_token proc_token = LWKT_TOKEN_UP_INITIALIZER(proc_token);
 struct lwkt_token tty_token = LWKT_TOKEN_UP_INITIALIZER(tty_token);
 struct lwkt_token vnode_token = LWKT_TOKEN_UP_INITIALIZER(vnode_token);
+struct lwkt_token vmobj_token = LWKT_TOKEN_UP_INITIALIZER(vmobj_token);
 
 SYSCTL_INT(_lwkt, OID_AUTO, pmap_mpsafe,
           CTLFLAG_RW, &pmap_token.t_flags, 0, "");
@@ -153,6 +154,8 @@ SYSCTL_INT(_lwkt, OID_AUTO, tty_mpsafe,
           CTLFLAG_RW, &tty_token.t_flags, 0, "");
 SYSCTL_INT(_lwkt, OID_AUTO, vnode_mpsafe,
           CTLFLAG_RW, &vnode_token.t_flags, 0, "");
+SYSCTL_INT(_lwkt, OID_AUTO, vmobj_mpsafe,
+          CTLFLAG_RW, &vmobj_token.t_flags, 0, "");
 
 /*
  * The collision count is bumped every time the LWKT scheduler fails
index f619030..ac67ee7 100644 (file)
@@ -1235,8 +1235,8 @@ vclean_vxlocked(struct vnode *vp, int flags)
        /*
         * If the vnode has an object, destroy it.
         */
+       lwkt_gettoken(&vmobj_token);
        if ((object = vp->v_object) != NULL) {
-               lwkt_gettoken(&vm_token);
                KKASSERT(object == vp->v_object);
                if (object->ref_count == 0) {
                        if ((object->flags & OBJ_DEAD) == 0)
@@ -1245,8 +1245,8 @@ vclean_vxlocked(struct vnode *vp, int flags)
                        vm_pager_deallocate(object);
                }
                vclrflags(vp, VOBJBUF);
-               lwkt_reltoken(&vm_token);
        }
+       lwkt_reltoken(&vmobj_token);
        KKASSERT((vp->v_flag & VOBJBUF) == 0);
 
        /*
@@ -1490,9 +1490,9 @@ vinitvmio(struct vnode *vp, off_t filesize, int blksize, int boff)
        vm_object_t object;
        int error = 0;
 
+       lwkt_gettoken(&vmobj_token);
 retry:
        if ((object = vp->v_object) == NULL) {
-               lwkt_gettoken(&vm_token);
                object = vnode_pager_alloc(vp, filesize, 0, 0, blksize, boff);
                /*
                 * Dereference the reference we just created.  This assumes
@@ -1500,17 +1500,19 @@ retry:
                 */
                object->ref_count--;
                vrele(vp);
-               lwkt_reltoken(&vm_token);
        } else {
                if (object->flags & OBJ_DEAD) {
                        vn_unlock(vp);
-                       vm_object_dead_sleep(object, "vodead");
+                       if (vp->v_object == object)
+                               vm_object_dead_sleep(object, "vodead");
                        vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
                        goto retry;
                }
        }
        KASSERT(vp->v_object != NULL, ("vinitvmio: NULL object"));
        vsetflags(vp, VOBJBUF);
+       lwkt_reltoken(&vmobj_token);
+
        return (error);
 }
 
index 2e37597..0559377 100644 (file)
@@ -373,6 +373,7 @@ extern struct lwkt_token kvm_token;
 extern struct lwkt_token proc_token;
 extern struct lwkt_token tty_token;
 extern struct lwkt_token vnode_token;
+extern struct lwkt_token vmobj_token;
 
 /*
  * Procedures
index ba3ff65..5c50538 100644 (file)
@@ -112,9 +112,8 @@ kmem_alloc_pageable(vm_map_t map, vm_size_t size)
                             TRUE, VM_MAPTYPE_NORMAL,
                             VM_PROT_ALL, VM_PROT_ALL,
                             0);
-       if (result != KERN_SUCCESS) {
+       if (result != KERN_SUCCESS)
                return (0);
-       }
        return (addr);
 }
 
@@ -136,9 +135,8 @@ kmem_alloc_nofault(vm_map_t map, vm_size_t size, vm_size_t align)
                             TRUE, VM_MAPTYPE_NORMAL,
                             VM_PROT_ALL, VM_PROT_ALL,
                             MAP_NOFAULT);
-       if (result != KERN_SUCCESS) {
+       if (result != KERN_SUCCESS)
                return (0);
-       }
        return (addr);
 }
 
index c025fb2..71a684b 100644 (file)
@@ -909,6 +909,9 @@ vm_map_insert(vm_map_t map, int *countp,
        if (cow & MAP_IS_STACK)
                protoeflags |= MAP_ENTRY_STACK;
 
+       lwkt_gettoken(&vm_token);
+       lwkt_gettoken(&vmobj_token);
+
        if (object) {
                /*
                 * When object is non-NULL, it could be shared with another
@@ -937,6 +940,8 @@ vm_map_insert(vm_map_t map, int *countp,
                if ((prev_entry->inheritance == VM_INHERIT_DEFAULT) &&
                    (prev_entry->protection == prot) &&
                    (prev_entry->max_protection == max)) {
+                       lwkt_reltoken(&vmobj_token);
+                       lwkt_reltoken(&vm_token);
                        map->size += (end - prev_entry->end);
                        prev_entry->end = end;
                        vm_map_simplify_entry(map, prev_entry, countp);
@@ -952,9 +957,12 @@ vm_map_insert(vm_map_t map, int *countp,
                object = prev_entry->object.vm_object;
                offset = prev_entry->offset +
                        (prev_entry->end - prev_entry->start);
-               vm_object_reference(object);
+               vm_object_reference_locked(object);
        }
 
+       lwkt_reltoken(&vmobj_token);
+       lwkt_reltoken(&vm_token);
+
        /*
         * NOTE: if conditionals fail, object can be NULL here.  This occurs
         * in things like the buffer map where we manage kva but do not manage
@@ -2437,6 +2445,8 @@ vm_map_clean(vm_map_t map, vm_offset_t start, vm_offset_t end,
         * Hold vm_token to avoid blocking in vm_object_reference()
         */
        lwkt_gettoken(&vm_token);
+       lwkt_gettoken(&vmobj_token);
+
        for (current = entry; current->start < end; current = current->next) {
                offset = current->offset + (start - current->start);
                size = (end <= current->end ? end : current->end) - start;
@@ -2490,7 +2500,7 @@ vm_map_clean(vm_map_t map, vm_offset_t start, vm_offset_t end,
                         */
                        int flags;
 
-                       vm_object_reference(object);
+                       vm_object_reference_locked(object);
                        vn_lock(object->handle, LK_EXCLUSIVE | LK_RETRY);
                        flags = (syncio || invalidate) ? OBJPC_SYNC : 0;
                        flags |= invalidate ? OBJPC_INVAL : 0;
@@ -2512,14 +2522,14 @@ vm_map_clean(vm_map_t map, vm_offset_t start, vm_offset_t end,
                                break;
                        }
                        vn_unlock(((struct vnode *)object->handle));
-                       vm_object_deallocate(object);
+                       vm_object_deallocate_locked(object);
                }
                if (object && invalidate &&
                   ((object->type == OBJT_VNODE) ||
                    (object->type == OBJT_DEVICE))) {
                        int clean_only = 
                                (object->type == OBJT_DEVICE) ? FALSE : TRUE;
-                       vm_object_reference(object);
+                       vm_object_reference_locked(object);
                        switch(current->maptype) {
                        case VM_MAPTYPE_NORMAL:
                                vm_object_page_remove(object,
@@ -2531,12 +2541,14 @@ vm_map_clean(vm_map_t map, vm_offset_t start, vm_offset_t end,
                                vm_object_page_remove(object, 0, 0, clean_only);
                                break;
                        }
-                       vm_object_deallocate(object);
+                       vm_object_deallocate_locked(object);
                }
                start += size;
        }
-       vm_map_unlock_read(map);
+
+       lwkt_reltoken(&vmobj_token);
        lwkt_reltoken(&vm_token);
+       vm_map_unlock_read(map);
 
        return (KERN_SUCCESS);
 }
@@ -2667,14 +2679,20 @@ again:
                offidxend = offidxstart + count;
 
                /*
-                * Hold vm_token when manipulating vm_objects.
+                * Hold vm_token when manipulating vm_objects,
+                *
+                * Hold vmobj_token when potentially adding or removing
+                * objects (collapse requires both).
                 */
                lwkt_gettoken(&vm_token);
+               lwkt_gettoken(&vmobj_token);
+
                if (object == &kernel_object) {
                        vm_object_page_remove(object, offidxstart,
                                              offidxend, FALSE);
                } else {
                        pmap_remove(map->pmap, s, e);
+
                        if (object != NULL &&
                            object->ref_count != 1 &&
                            (object->flags & (OBJ_NOSPLIT|OBJ_ONEMAPPING)) ==
@@ -2695,6 +2713,7 @@ again:
                                }
                        }
                }
+               lwkt_reltoken(&vmobj_token);
                lwkt_reltoken(&vm_token);
 
                /*
@@ -2839,15 +2858,18 @@ vm_map_split(vm_map_entry_t entry)
         * vm_token required when manipulating vm_objects.
         */
        lwkt_gettoken(&vm_token);
+       lwkt_gettoken(&vmobj_token);
 
        source = orig_object->backing_object;
        if (source != NULL) {
-               vm_object_reference(source);    /* Referenced by new_object */
+               /* Referenced by new_object */
+               vm_object_reference_locked(source);
                LIST_INSERT_HEAD(&source->shadow_head,
-                                 new_object, shadow_list);
+                                new_object, shadow_list);
                vm_object_clear_flag(source, OBJ_ONEMAPPING);
                new_object->backing_object_offset = 
-                       orig_object->backing_object_offset + IDX_TO_OFF(offidxstart);
+                       orig_object->backing_object_offset +
+                       IDX_TO_OFF(offidxstart);
                new_object->backing_object = source;
                source->shadow_count++;
                source->generation++;
@@ -2856,13 +2878,10 @@ vm_map_split(vm_map_entry_t entry)
        for (idx = 0; idx < size; idx++) {
                vm_page_t m;
 
-               crit_enter();
        retry:
                m = vm_page_lookup(orig_object, offidxstart + idx);
-               if (m == NULL) {
-                       crit_exit();
+               if (m == NULL)
                        continue;
-               }
 
                /*
                 * We must wait for pending I/O to complete before we can
@@ -2877,7 +2896,6 @@ vm_map_split(vm_map_entry_t entry)
                vm_page_rename(m, new_object, idx);
                /* page automatically made dirty by rename and cache handled */
                vm_page_busy(m);
-               crit_exit();
        }
 
        if (orig_object->type == OBJT_SWAP) {
@@ -2903,7 +2921,8 @@ vm_map_split(vm_map_entry_t entry)
 
        entry->object.vm_object = new_object;
        entry->offset = 0LL;
-       vm_object_deallocate(orig_object);
+       vm_object_deallocate_locked(orig_object);
+       lwkt_reltoken(&vmobj_token);
        lwkt_reltoken(&vm_token);
 }
 
@@ -2912,6 +2931,7 @@ vm_map_split(vm_map_entry_t entry)
  * entry.  The entries *must* be aligned properly.
  *
  * The vm_map must be exclusively locked.
+ * vm_token must be held
  */
 static void
 vm_map_copy_entry(vm_map_t src_map, vm_map_t dst_map,
@@ -2924,7 +2944,9 @@ vm_map_copy_entry(vm_map_t src_map, vm_map_t dst_map,
        if (src_entry->maptype == VM_MAPTYPE_SUBMAP)
                return;
 
-       lwkt_gettoken(&vm_token);
+       ASSERT_LWKT_TOKEN_HELD(&vm_token);
+       lwkt_gettoken(&vmobj_token);            /* required for collapse */
+
        if (src_entry->wired_count == 0) {
                /*
                 * If the source entry is marked needs_copy, it is already
@@ -2951,7 +2973,7 @@ vm_map_copy_entry(vm_map_t src_map, vm_map_t dst_map,
                                }
                        }
 
-                       vm_object_reference(src_object);
+                       vm_object_reference_locked(src_object);
                        vm_object_clear_flag(src_object, OBJ_ONEMAPPING);
                        dst_entry->object.vm_object = src_object;
                        src_entry->eflags |= (MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY);
@@ -2972,7 +2994,7 @@ vm_map_copy_entry(vm_map_t src_map, vm_map_t dst_map,
                 */
                vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry);
        }
-       lwkt_reltoken(&vm_token);
+       lwkt_reltoken(&vmobj_token);
 }
 
 /*
@@ -2998,6 +3020,7 @@ vmspace_fork(struct vmspace *vm1)
 
        lwkt_gettoken(&vm_token);
        lwkt_gettoken(&vmspace_token);
+       lwkt_gettoken(&vmobj_token);
        vm_map_lock(old_map);
        old_map->infork = 1;
 
@@ -3044,13 +3067,13 @@ vmspace_fork(struct vmspace *vm1)
                         * Add the reference before calling vm_map_entry_shadow
                         * to insure that a shadow object is created.
                         */
-                       vm_object_reference(object);
+                       vm_object_reference_locked(object);
                        if (old_entry->eflags & MAP_ENTRY_NEEDS_COPY) {
                                vm_map_entry_shadow(old_entry);
                                /* Transfer the second reference too. */
-                               vm_object_reference(
+                               vm_object_reference_locked(
                                    old_entry->object.vm_object);
-                               vm_object_deallocate(object);
+                               vm_object_deallocate_locked(object);
                                object = old_entry->object.vm_object;
                        }
                        vm_object_clear_flag(object, OBJ_ONEMAPPING);
@@ -3102,6 +3125,8 @@ vmspace_fork(struct vmspace *vm1)
        vm_map_unlock(old_map);
        vm_map_unlock(new_map);
        vm_map_entry_release(count);
+
+       lwkt_reltoken(&vmobj_token);
        lwkt_reltoken(&vmspace_token);
        lwkt_reltoken(&vm_token);
 
index 71eacf7..657a32a 100644 (file)
@@ -90,12 +90,13 @@ do_vmtotal(SYSCTL_HANDLER_ARGS)
        struct vmtotal *totalp;
        vm_object_t object;
 
+       bzero(&total, sizeof(total));
        totalp = &total;
-       bzero(totalp, sizeof *totalp);
 
        /*
         * Mark all objects as inactive.
         */
+       lwkt_gettoken(&vmobj_token);
        for (object = TAILQ_FIRST(&vm_object_list);
            object != NULL;
            object = TAILQ_NEXT(object,object_list)) {
@@ -103,6 +104,7 @@ do_vmtotal(SYSCTL_HANDLER_ARGS)
                        continue;
                vm_object_clear_flag(object, OBJ_ACTIVE);
        }
+       lwkt_reltoken(&vmobj_token);
 
        /*
         * Calculate process statistics.
@@ -112,7 +114,7 @@ do_vmtotal(SYSCTL_HANDLER_ARGS)
        /*
         * Calculate object memory usage statistics.
         */
-       lwkt_gettoken(&vm_token);
+       lwkt_gettoken(&vmobj_token);
        for (object = TAILQ_FIRST(&vm_object_list);
            object != NULL;
            object = TAILQ_NEXT(object, object_list)) {
@@ -140,8 +142,9 @@ do_vmtotal(SYSCTL_HANDLER_ARGS)
                        }
                }
        }
+       lwkt_reltoken(&vmobj_token);
        totalp->t_free = vmstats.v_free_count + vmstats.v_cache_count;
-       lwkt_reltoken(&vm_token);
+
        return (sysctl_handle_opaque(oidp, totalp, sizeof total, req));
 }
 
index bb5a405..d8a9272 100644 (file)
@@ -126,10 +126,10 @@ static int        vm_object_page_collect_flush(vm_object_t object, vm_page_t p,
  *
  */
 
-struct object_q vm_object_list;
+struct object_q vm_object_list;                /* locked by vmobj_token */
 struct vm_object kernel_object;
 
-static long vm_object_count;           /* count of all objects */
+static long vm_object_count;           /* locked by vmobj_token */
 extern int vm_pageout_page_count;
 
 static long object_collapses;
@@ -186,13 +186,11 @@ _vm_object_allocate(objtype_t type, vm_pindex_t size, vm_object_t object)
        object->swblock_count = 0;
        RB_INIT(&object->swblock_root);
 
-       crit_enter();
-       lwkt_gettoken(&vm_token);
+       lwkt_gettoken(&vmobj_token);
        TAILQ_INSERT_TAIL(&vm_object_list, object, object_list);
        vm_object_count++;
        object_hash_rand = object->hash_rand;
-       lwkt_reltoken(&vm_token);
-       crit_exit();
+       lwkt_reltoken(&vmobj_token);
 }
 
 /*
@@ -240,27 +238,39 @@ vm_object_allocate(objtype_t type, vm_pindex_t size)
  * Add an additional reference to a vm_object.
  *
  * Object passed by caller must be stable or caller must already
- * hold vm_token to avoid races.
+ * hold vmobj_token to avoid races.
  */
 void
 vm_object_reference(vm_object_t object)
 {
-       if (object == NULL)
-               return;
+       if (object) {
+               lwkt_gettoken(&vmobj_token);
+               object->ref_count++;
+               if (object->type == OBJT_VNODE) {
+                       vref(object->handle);
+                       /* XXX what if the vnode is being destroyed? */
+               }
+               lwkt_reltoken(&vmobj_token);
+       }
+}
 
-       lwkt_gettoken(&vm_token);
-       object->ref_count++;
-       if (object->type == OBJT_VNODE) {
-               vref(object->handle);
-               /* XXX what if the vnode is being destroyed? */
+void
+vm_object_reference_locked(vm_object_t object)
+{
+       if (object) {
+               ASSERT_LWKT_TOKEN_HELD(&vmobj_token);
+               object->ref_count++;
+               if (object->type == OBJT_VNODE) {
+                       vref(object->handle);
+                       /* XXX what if the vnode is being destroyed? */
+               }
        }
-       lwkt_reltoken(&vm_token);
 }
 
 /*
  * Dereference an object and its underlying vnode.
  *
- * The caller must hold vm_token.
+ * The caller must hold vmobj_token.
  */
 static void
 vm_object_vndeallocate(vm_object_t object)
@@ -270,6 +280,7 @@ vm_object_vndeallocate(vm_object_t object)
        KASSERT(object->type == OBJT_VNODE,
            ("vm_object_vndeallocate: not a vnode object"));
        KASSERT(vp != NULL, ("vm_object_vndeallocate: missing vp"));
+       ASSERT_LWKT_TOKEN_HELD(&vmobj_token);
 #ifdef INVARIANTS
        if (object->ref_count == 0) {
                vprint("vm_object_vndeallocate", vp);
@@ -287,15 +298,21 @@ vm_object_vndeallocate(vm_object_t object)
  * Release a reference to the specified object, gained either through a
  * vm_object_allocate or a vm_object_reference call.  When all references
  * are gone, storage associated with this object may be relinquished.
- *
- * The object must not be locked.
  */
 void
 vm_object_deallocate(vm_object_t object)
+{
+       lwkt_gettoken(&vmobj_token);
+       vm_object_deallocate_locked(object);
+       lwkt_reltoken(&vmobj_token);
+}
+
+void
+vm_object_deallocate_locked(vm_object_t object)
 {
        vm_object_t temp;
 
-       lwkt_gettoken(&vm_token);
+       ASSERT_LWKT_TOKEN_HELD(&vmobj_token);
 
        while (object != NULL) {
                if (object->type == OBJT_VNODE) {
@@ -312,6 +329,18 @@ vm_object_deallocate(vm_object_t object)
                        break;
                }
 
+               /*
+                * We currently need the vm_token from this point on, and
+                * we must recheck ref_count after acquiring it.
+                */
+               lwkt_gettoken(&vm_token);
+
+               if (object->ref_count > 2) {
+                       object->ref_count--;
+                       lwkt_reltoken(&vm_token);
+                       break;
+               }
+
                /*
                 * Here on ref_count of one or two, which are special cases for
                 * objects.
@@ -319,6 +348,7 @@ vm_object_deallocate(vm_object_t object)
                if ((object->ref_count == 2) && (object->shadow_count == 0)) {
                        vm_object_set_flag(object, OBJ_ONEMAPPING);
                        object->ref_count--;
+                       lwkt_reltoken(&vm_token);
                        break;
                }
                if ((object->ref_count == 2) && (object->shadow_count == 1)) {
@@ -357,9 +387,11 @@ vm_object_deallocate(vm_object_t object)
 
                                        object = robject;
                                        vm_object_collapse(object);
+                                       lwkt_reltoken(&vm_token);
                                        continue;
                                }
                        }
+                       lwkt_reltoken(&vm_token);
                        break;
                }
 
@@ -367,14 +399,15 @@ vm_object_deallocate(vm_object_t object)
                 * Normal dereferencing path
                 */
                object->ref_count--;
-               if (object->ref_count != 0)
+               if (object->ref_count != 0) {
+                       lwkt_reltoken(&vm_token);
                        break;
+               }
 
                /*
                 * Termination path
                 */
 doterm:
-
                temp = object->backing_object;
                if (temp) {
                        LIST_REMOVE(object, shadow_list);
@@ -382,6 +415,7 @@ doterm:
                        temp->generation++;
                        object->backing_object = NULL;
                }
+               lwkt_reltoken(&vm_token);
 
                /*
                 * Don't double-terminate, we could be in a termination
@@ -392,7 +426,6 @@ doterm:
                        vm_object_terminate(object);
                object = temp;
        }
-       lwkt_reltoken(&vm_token);
 }
 
 /*
@@ -400,7 +433,7 @@ doterm:
  *
  * The object must have zero references.
  *
- * The caller must be holding vm_token and properly interlock with
+ * The caller must be holding vmobj_token and properly interlock with
  * OBJ_DEAD.
  */
 static int vm_object_terminate_callback(vm_page_t p, void *data);
@@ -409,13 +442,15 @@ void
 vm_object_terminate(vm_object_t object)
 {
        /*
-        * Make sure no one uses us.
+        * Make sure no one uses us.  Once we set OBJ_DEAD we should be
+        * able to safely block.
         */
-       ASSERT_LWKT_TOKEN_HELD(&vm_token);
+       KKASSERT((object->flags & OBJ_DEAD) == 0);
+       ASSERT_LWKT_TOKEN_HELD(&vmobj_token);
        vm_object_set_flag(object, OBJ_DEAD);
 
        /*
-        * wait for the pageout daemon to be done with the object
+        * Wait for the pageout daemon to be done with the object
         */
        vm_object_pip_wait(object, "objtrm");
 
@@ -444,18 +479,20 @@ vm_object_terminate(vm_object_t object)
         */
        vm_object_pip_wait(object, "objtrm");
 
-       if (object->ref_count != 0)
-               panic("vm_object_terminate: object with references, ref_count=%d", object->ref_count);
+       if (object->ref_count != 0) {
+               panic("vm_object_terminate: object with references, "
+                     "ref_count=%d", object->ref_count);
+       }
 
        /*
         * Now free any remaining pages. For internal objects, this also
         * removes them from paging queues. Don't free wired pages, just
         * remove them from the object. 
         */
-       crit_enter();
+       lwkt_gettoken(&vm_token);
        vm_page_rb_tree_RB_SCAN(&object->rb_memq, NULL,
                                vm_object_terminate_callback, NULL);
-       crit_exit();
+       lwkt_reltoken(&vm_token);
 
        /*
         * Let the pager know object is dead.
@@ -464,15 +501,17 @@ vm_object_terminate(vm_object_t object)
 
        /*
         * Remove the object from the global object list.
+        *
+        * (we are holding vmobj_token)
         */
-       crit_enter();
        TAILQ_REMOVE(&vm_object_list, object, object_list);
        vm_object_count--;
-       crit_exit();
-
        vm_object_dead_wakeup(object);
-       if (object->ref_count != 0)
-               panic("vm_object_terminate2: object with references, ref_count=%d", object->ref_count);
+
+       if (object->ref_count != 0) {
+               panic("vm_object_terminate2: object with references, "
+                     "ref_count=%d", object->ref_count);
+       }
 
        /*
         * Free the space for the object.
@@ -506,36 +545,33 @@ vm_object_terminate_callback(vm_page_t p, void *data __unused)
  * The object is dead but still has an object<->pager association.  Sleep
  * and return.  The caller typically retests the association in a loop.
  *
- * No requirement.
+ * Must be called with the vmobj_token held.
  */
 void
 vm_object_dead_sleep(vm_object_t object, const char *wmesg)
 {
-       crit_enter();
-       lwkt_gettoken(&vm_token);
+       ASSERT_LWKT_TOKEN_HELD(&vmobj_token);
        if (object->handle) {
                vm_object_set_flag(object, OBJ_DEADWNT);
                tsleep(object, 0, wmesg, 0);
+               /* object may be invalid after this point */
        }
-       lwkt_reltoken(&vm_token);
-       crit_exit();
 }
 
 /*
  * Wakeup anyone waiting for the object<->pager disassociation on
  * a dead object.
  *
- * No requirement.
+ * Must be called with the vmobj_token held.
  */
 void
 vm_object_dead_wakeup(vm_object_t object)
 {
-       lwkt_gettoken(&vm_token);
+       ASSERT_LWKT_TOKEN_HELD(&vmobj_token);
        if (object->flags & OBJ_DEADWNT) {
                vm_object_clear_flag(object, OBJ_DEADWNT);
                wakeup(object);
        }
-       lwkt_reltoken(&vm_token);
 }
 
 /*
@@ -1323,7 +1359,8 @@ vm_object_backing_scan_callback(vm_page_t p, void *data)
  * when paging_in_progress is true for an object...  This is not a complete
  * operation, but should plug 99.9% of the rest of the leaks.
  *
- * The caller must hold vm_token.
+ * The caller must hold vm_token and vmobj_token.
+ * (only called from vm_object_collapse)
  */
 static void
 vm_object_qcollapse(vm_object_t object)
@@ -1347,7 +1384,8 @@ vm_object_qcollapse(vm_object_t object)
 void
 vm_object_collapse(vm_object_t object)
 {
-       lwkt_gettoken(&vm_token);
+       ASSERT_LWKT_TOKEN_HELD(&vm_token);
+       ASSERT_LWKT_TOKEN_HELD(&vmobj_token);
 
        while (TRUE) {
                vm_object_t backing_object;
@@ -1466,13 +1504,19 @@ vm_object_collapse(vm_object_t object)
                         * necessary is to dispose of it.
                         */
 
-                       KASSERT(backing_object->ref_count == 1, ("backing_object %p was somehow re-referenced during collapse!", backing_object));
-                       KASSERT(RB_EMPTY(&backing_object->rb_memq), ("backing_object %p somehow has left over pages during collapse!", backing_object));
-                       crit_enter();
+                       KASSERT(backing_object->ref_count == 1,
+                               ("backing_object %p was somehow "
+                                "re-referenced during collapse!",
+                                backing_object));
+                       KASSERT(RB_EMPTY(&backing_object->rb_memq),
+                               ("backing_object %p somehow has left "
+                                "over pages during collapse!",
+                                backing_object));
+
+                       /* (we are holding vmobj_token) */
                        TAILQ_REMOVE(&vm_object_list, backing_object,
                                     object_list);
                        vm_object_count--;
-                       crit_exit();
 
                        zfree(obj_zone, backing_object);
 
@@ -1519,7 +1563,7 @@ vm_object_collapse(vm_object_t object)
                         * so we don't need to call vm_object_deallocate, but
                         * we do anyway.
                         */
-                       vm_object_deallocate(backing_object);
+                       vm_object_deallocate_locked(backing_object);
                        object_bypasses++;
                }
 
@@ -1527,7 +1571,6 @@ vm_object_collapse(vm_object_t object)
                 * Try again with this object's new backing object.
                 */
        }
-       lwkt_reltoken(&vm_token);
 }
 
 /*
@@ -1681,6 +1724,7 @@ vm_object_page_remove_callback(vm_page_t p, void *data)
  *     next_size       Size of reference to next_object
  *
  * The object must not be locked.
+ * The caller must hold vm_token and vmobj_token.
  */
 boolean_t
 vm_object_coalesce(vm_object_t prev_object, vm_pindex_t prev_pindex,
@@ -1688,6 +1732,9 @@ vm_object_coalesce(vm_object_t prev_object, vm_pindex_t prev_pindex,
 {
        vm_pindex_t next_pindex;
 
+       ASSERT_LWKT_TOKEN_HELD(&vm_token);
+       ASSERT_LWKT_TOKEN_HELD(&vmobj_token);
+
        if (prev_object == NULL) {
                return (TRUE);
        }
@@ -1697,8 +1744,6 @@ vm_object_coalesce(vm_object_t prev_object, vm_pindex_t prev_pindex,
                return (FALSE);
        }
 
-       lwkt_gettoken(&vm_token);
-
        /*
         * Try to collapse the object first
         */
@@ -1710,10 +1755,8 @@ vm_object_coalesce(vm_object_t prev_object, vm_pindex_t prev_pindex,
         * pages not mapped to prev_entry may be in use anyway)
         */
 
-       if (prev_object->backing_object != NULL) {
-               lwkt_reltoken(&vm_token);
+       if (prev_object->backing_object != NULL)
                return (FALSE);
-       }
 
        prev_size >>= PAGE_SHIFT;
        next_size >>= PAGE_SHIFT;
@@ -1721,7 +1764,6 @@ vm_object_coalesce(vm_object_t prev_object, vm_pindex_t prev_pindex,
 
        if ((prev_object->ref_count > 1) &&
            (prev_object->size != next_pindex)) {
-               lwkt_reltoken(&vm_token);
                return (FALSE);
        }
 
@@ -1743,8 +1785,6 @@ vm_object_coalesce(vm_object_t prev_object, vm_pindex_t prev_pindex,
         */
        if (next_pindex + next_size > prev_object->size)
                prev_object->size = next_pindex + next_size;
-
-       lwkt_reltoken(&vm_token);
        return (TRUE);
 }
 
index 002035c..d20b350 100644 (file)
@@ -84,6 +84,9 @@
 #ifndef _SYS_TREE_H_
 #include <sys/tree.h>
 #endif
+#ifndef _SYS_THREAD_H_
+#include <sys/thread.h>
+#endif
 #ifndef _MACHINE_ATOMIC_H_
 #include <machine/atomic.h>
 #endif
@@ -138,15 +141,18 @@ struct vm_object_lock {
 /*
  * vm_object           A VM object which represents an arbitrarily sized
  *                     data store.
+ *
+ * Locking requirements: vmobj_token for ref_count and object_list, and
+ * vm_token for everything else.
  */
 struct vm_object {
-       TAILQ_ENTRY(vm_object) object_list; /* list of all objects */
+       TAILQ_ENTRY(vm_object) object_list; /* vmobj_token */
        LIST_HEAD(, vm_object) shadow_head; /* objects that this is a shadow for */
        LIST_ENTRY(vm_object) shadow_list; /* chain of shadow objects */
        RB_HEAD(vm_page_rb_tree, vm_page) rb_memq;      /* resident pages */
        int generation;                 /* generation ID */
        vm_pindex_t size;               /* Object size */
-       int ref_count;                  /* How many refs?? */
+       int ref_count;                  /* vmobj_token */
        int shadow_count;               /* how many objects that this is a shadow for */
        int hash_rand;                  /* vm hash table randomizer     */
        objtype_t type;                 /* type of pager */
@@ -285,6 +291,7 @@ void _vm_object_allocate (objtype_t, vm_pindex_t, vm_object_t);
 boolean_t vm_object_coalesce (vm_object_t, vm_pindex_t, vm_size_t, vm_size_t);
 void vm_object_collapse (vm_object_t);
 void vm_object_deallocate (vm_object_t);
+void vm_object_deallocate_locked (vm_object_t);
 void vm_object_terminate (vm_object_t);
 void vm_object_set_writeable_dirty (vm_object_t);
 void vm_object_init (void);
@@ -294,6 +301,7 @@ void vm_object_pmap_copy (vm_object_t, vm_pindex_t, vm_pindex_t);
 void vm_object_pmap_copy_1 (vm_object_t, vm_pindex_t, vm_pindex_t);
 void vm_object_pmap_remove (vm_object_t, vm_pindex_t, vm_pindex_t);
 void vm_object_reference (vm_object_t);
+void vm_object_reference_locked (vm_object_t);
 void vm_object_shadow (vm_object_t *, vm_ooffset_t *, vm_size_t);
 void vm_object_madvise (vm_object_t, vm_pindex_t, int, int);
 void vm_object_init2 (void);
index bb76a00..0b49253 100644 (file)
@@ -180,7 +180,9 @@ vm_swapcached(void)
         */
        bzero(&object_marker, sizeof(object_marker));
        object_marker.type = OBJT_MARKER;
+       lwkt_gettoken(&vmobj_token);
        TAILQ_INSERT_HEAD(&vm_object_list, &object_marker, object_list);
+       lwkt_reltoken(&vmobj_token);
 
        for (;;) {
                /*
@@ -245,7 +247,9 @@ vm_swapcached(void)
                }
        }
        TAILQ_REMOVE(INACTIVE_LIST, &page_marker, pageq);
+       lwkt_gettoken(&vmobj_token);
        TAILQ_REMOVE(&vm_object_list, &object_marker, object_list);
+       lwkt_reltoken(&vmobj_token);
        lwkt_reltoken(&vm_token);
        crit_exit();
 }
@@ -523,6 +527,8 @@ vm_swapcache_cleaning(vm_object_t marker)
         * Look for vnode objects
         */
        lwkt_gettoken(&vm_token);
+       lwkt_gettoken(&vmobj_token);
+
        while ((object = TAILQ_NEXT(object, object_list)) != NULL && count--) {
                if (object->type != OBJT_VNODE)
                        continue;
@@ -581,5 +587,7 @@ vm_swapcache_cleaning(vm_object_t marker)
        else
                TAILQ_INSERT_HEAD(&vm_object_list, marker, object_list);
        marker->backing_object = object;
+
+       lwkt_reltoken(&vmobj_token);
        lwkt_reltoken(&vm_token);
 }
index 2d286e6..9fc926b 100644 (file)
@@ -161,7 +161,6 @@ vm_pagezero(void __unused *arg)
         * For now leave the MP lock held, the VM routines cannot be called
         * with it released until tokenization is finished.
         */
-       /* rel_mplock(); */
        lwkt_setpri_self(TDPRI_IDLE_WORK);
        lwkt_setcpu_self(globaldata_find(ncpus - 1));
        sleep_time = DEFAULT_SLEEP_TIME;
index 1bc8905..e3dd2f1 100644 (file)
@@ -407,7 +407,6 @@ zget(vm_zone_t z)
                 * Interrupt zones do not mess with the kernel_map, they
                 * simply populate an existing mapping.
                 */
-               get_mplock();
                lwkt_gettoken(&vm_token);
                savezpc = z->zpagecount;
                nbytes = z->zpagecount * PAGE_SIZE;
@@ -442,7 +441,6 @@ zget(vm_zone_t z)
                }
                nitems = ((z->zpagecount * PAGE_SIZE) - nbytes) / z->zsize;
                lwkt_reltoken(&vm_token);
-               rel_mplock();
        } else if (z->zflags & ZONE_SPECIAL) {
                /*
                 * The special zone is the one used for vm_map_entry_t's.
index ee05321..c595658 100644 (file)
@@ -123,7 +123,11 @@ vnode_pager_alloc(void *handle, off_t length, vm_prot_t prot, off_t offset,
            vnode_pbuf_freecnt = nswbuf / 2 + 1;
        }
 
-       vp = (struct vnode *) handle;
+       /*
+        * Serialize potential vnode/object teardowns and interlocks
+        */
+       vp = (struct vnode *)handle;
+       lwkt_gettoken(&vmobj_token);
 
        /*
         * Prevent race condition when allocating the object. This
@@ -174,7 +178,7 @@ vnode_pager_alloc(void *handle, off_t length, vm_prot_t prot, off_t offset,
                if (vp->v_mount && (vp->v_mount->mnt_kern_flag & MNTK_NOMSYNC))
                        object->flags |= OBJ_NOMSYNC;
        } else {
-               object->ref_count++;
+               object->ref_count++;    /* protected  by vmobj_token */
                if (object->size != lsize) {
                        kprintf("vnode_pager_alloc: Warning, objsize "
                                "mismatch %jd/%jd vp=%p obj=%p\n",
@@ -190,13 +194,15 @@ vnode_pager_alloc(void *handle, off_t length, vm_prot_t prot, off_t offset,
                                vp, object);
                }
        }
-       vref(vp);
 
+       vref(vp);
        vclrflags(vp, VOLOCK);
        if (vp->v_flag & VOWANT) {
                vclrflags(vp, VOWANT);
                wakeup(vp);
        }
+       lwkt_reltoken(&vmobj_token);
+
        return (object);
 }
 
@@ -211,6 +217,11 @@ vnode_pager_reference(struct vnode *vp)
 {
        vm_object_t object;
 
+       /*
+        * Serialize potential vnode/object teardowns and interlocks
+        */
+       lwkt_gettoken(&vmobj_token);
+
        /*
         * Prevent race condition when allocating the object. This
         * can happen with NFS vnodes since the nfsnode isn't locked.
@@ -235,7 +246,7 @@ vnode_pager_reference(struct vnode *vp)
         * NULL returns if it does not.
         */
        if (object) {
-               object->ref_count++;
+               object->ref_count++;    /* protected by vmobj_token */
                vref(vp);
        }
 
@@ -244,6 +255,8 @@ vnode_pager_reference(struct vnode *vp)
                vclrflags(vp, VOWANT);
                wakeup(vp);
        }
+
+       lwkt_reltoken(&vmobj_token);
        return (object);
 }