kernel - Refactor vm_fault and vm_map a bit.
authorMatthew Dillon <dillon@apollo.backplane.com>
Thu, 2 Nov 2017 00:36:14 +0000 (17:36 -0700)
committerMatthew Dillon <dillon@apollo.backplane.com>
Thu, 2 Nov 2017 00:50:06 +0000 (17:50 -0700)
* Allow the virtual copy feature to be disabled via a sysctl.
  Default enabled.

* Fix a bug in the virtual copy test.  Multiple elements were
  not being retested after reacquiring the map lock.

* Change the auto-partitioning of vm_map_entry structures from
  16MB to 32MB.  Add a sysctl to allow the feature to be disabled.
  Default enabled.

* Cleanup map->timestamp bumps.  Basically we bump it in
  vm_map_lock(), and also fix a bug where it was not being
  bumped after relocking the map in the virtual copy feature.

* Fix an incorrect assertion in vm_map_split().  Refactor  tests
  in vm_map_split().  Also, acquire the chain lock for the VM
  object in the caller to vm_map_split() instead of in vm_map_split()
  itself, allowing us to include the pmap adjustment within the
  locked area.

* Make sure OBJ_ONEMAPPING is cleared for nobject in vm_map_split().

* Fix a bug in a call to vm_map_transition_wait() that
  double-locked the vm_map in the partitioning code.

* General cleanups in vm/vm_object.c

sys/vm/vm_fault.c
sys/vm/vm_map.c
sys/vm/vm_map.h
sys/vm/vm_mmap.c
sys/vm/vm_object.c
sys/vm/vm_object.h

index 81b6fe4..d9cbf8d 100644 (file)
@@ -152,10 +152,13 @@ static int debug_fault = 0;
 SYSCTL_INT(_vm, OID_AUTO, debug_fault, CTLFLAG_RW, &debug_fault, 0, "");
 static int debug_cluster = 0;
 SYSCTL_INT(_vm, OID_AUTO, debug_cluster, CTLFLAG_RW, &debug_cluster, 0, "");
+static int virtual_copy_enable = 1;
+SYSCTL_INT(_vm, OID_AUTO, virtual_copy_enable, CTLFLAG_RW,
+               &virtual_copy_enable, 0, "");
 int vm_shared_fault = 1;
 TUNABLE_INT("vm.shared_fault", &vm_shared_fault);
-SYSCTL_INT(_vm, OID_AUTO, shared_fault, CTLFLAG_RW, &vm_shared_fault, 0,
-          "Allow shared token on vm_object");
+SYSCTL_INT(_vm, OID_AUTO, shared_fault, CTLFLAG_RW,
+               &vm_shared_fault, 0, "Allow shared token on vm_object");
 
 static int vm_fault_object(struct faultstate *, vm_pindex_t, vm_prot_t, int);
 static int vm_fault_vpagetable(struct faultstate *, vm_pindex_t *,
@@ -259,6 +262,82 @@ _unlock_things(struct faultstate *fs, int dealloc)
 #define unlock_and_deallocate(fs) _unlock_things(fs, 1)
 #define cleanup_successful_fault(fs) _cleanup_successful_fault(fs, 1)
 
+/*
+ * Virtual copy tests.   Used by the fault code to determine if a
+ * page can be moved from an orphan vm_object into its shadow
+ * instead of copying its contents.
+ */
+static __inline int
+virtual_copy_test(struct faultstate *fs)
+{
+       /*
+        * Must be holding exclusive locks
+        */
+       if (fs->first_shared || fs->shared || virtual_copy_enable == 0)
+               return 0;
+
+       /*
+        * Map, if present, has not changed
+        */
+       if (fs->map && fs->map_generation != fs->map->timestamp)
+               return 0;
+
+       /*
+        * Only one shadow object
+        */
+       if (fs->object->shadow_count != 1)
+               return 0;
+
+       /*
+        * No COW refs, except us
+        */
+       if (fs->object->ref_count != 1)
+               return 0;
+
+       /*
+        * No one else can look this object up
+        */
+       if (fs->object->handle != NULL)
+               return 0;
+
+       /*
+        * No other ways to look the object up
+        */
+       if (fs->object->type != OBJT_DEFAULT &&
+           fs->object->type != OBJT_SWAP)
+               return 0;
+
+       /*
+        * We don't chase down the shadow chain
+        */
+       if (fs->object != fs->first_object->backing_object)
+               return 0;
+
+       return 1;
+}
+
+static __inline int
+virtual_copy_ok(struct faultstate *fs)
+{
+       if (virtual_copy_test(fs)) {
+               /*
+                * Grab the lock and re-test changeable items.
+                */
+               if (fs->lookup_still_valid == FALSE && fs->map) {
+                       if (lockmgr(&fs->map->lock, LK_EXCLUSIVE|LK_NOWAIT))
+                               return 0;
+                       fs->lookup_still_valid = TRUE;
+                       if (virtual_copy_test(fs)) {
+                               fs->map_generation = ++fs->map->timestamp;
+                               return 1;
+                       }
+                       fs->lookup_still_valid = FALSE;
+                       lockmgr(&fs->map->lock, LK_RELEASE);
+               }
+       }
+       return 0;
+}
+
 /*
  * TRYPAGER 
  *
@@ -1975,6 +2054,7 @@ readrest:
         * is held.]
         *
         * object still held.
+        * vm_map may not be locked (determined by fs->lookup_still_valid)
         *
         * local shared variable may be different from fs->shared.
         *
@@ -2000,46 +2080,7 @@ readrest:
                         * dirty in the first object so that it will go out 
                         * to swap when needed.
                         */
-                       if (
-                               /*
-                                * Must be holding exclusive locks
-                                */
-                               fs->first_shared == 0 &&
-                               fs->shared == 0 &&
-                               /*
-                                * Map, if present, has not changed
-                                */
-                               (fs->map == NULL ||
-                               fs->map_generation == fs->map->timestamp) &&
-                               /*
-                                * Only one shadow object
-                                */
-                               (fs->object->shadow_count == 1) &&
-                               /*
-                                * No COW refs, except us
-                                */
-                               (fs->object->ref_count == 1) &&
-                               /*
-                                * No one else can look this object up
-                                */
-                               (fs->object->handle == NULL) &&
-                               /*
-                                * No other ways to look the object up
-                                */
-                               ((fs->object->type == OBJT_DEFAULT) ||
-                                (fs->object->type == OBJT_SWAP)) &&
-                               /*
-                                * We don't chase down the shadow chain
-                                */
-                               (fs->object == fs->first_object->backing_object) &&
-
-                               /*
-                                * grab the lock if we need to
-                                */
-                               (fs->lookup_still_valid ||
-                                fs->map == NULL ||
-                                lockmgr(&fs->map->lock, LK_EXCLUSIVE|LK_NOWAIT) == 0)
-                           ) {
+                       if (virtual_copy_ok(fs)) {
                                /*
                                 * (first_m) and (m) are both busied.  We have
                                 * move (m) into (first_m)'s object/pindex
@@ -2051,7 +2092,6 @@ readrest:
                                 * block so we don't do it until after the
                                 * rename.
                                 */
-                               fs->lookup_still_valid = 1;
                                vm_page_protect(fs->first_m, VM_PROT_NONE);
                                vm_page_remove(fs->first_m);
                                vm_page_rename(fs->m, fs->first_object,
@@ -2403,6 +2443,7 @@ vm_fault_copy_entry(vm_map_t dst_map, vm_map_t src_map,
         */
        vm_object_hold(src_object);
        vm_object_hold(dst_object);
+
        for (vaddr = dst_entry->start, dst_offset = 0;
             vaddr < dst_entry->end;
             vaddr += PAGE_SIZE, dst_offset += PAGE_SIZE) {
index 8228779..843eed6 100644 (file)
@@ -1,12 +1,14 @@
 /*
- * (MPSAFE)
- *
  * Copyright (c) 1991, 1993
  *     The Regents of the University of California.  All rights reserved.
+ * Copyright (c) 2003-2017 The DragonFly Project.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * The Mach Operating System project at Carnegie-Mellon University.
  *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@backplane.com>
+ *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -131,7 +133,11 @@ static struct objcache *vmspace_cache;
 #define MAPENTRYBSP_CACHE      (MAXCPU+1)
 #define MAPENTRYAP_CACHE       8
 
-#define MAP_ENTRY_PARTITION_SIZE       ((vm_offset_t)(16 * 1024 * 1024))
+/*
+ * Partioning threaded programs with large anonymous memory areas can
+ * improve concurrent fault performance.
+ */
+#define MAP_ENTRY_PARTITION_SIZE       ((vm_offset_t)(32 * 1024 * 1024))
 #define MAP_ENTRY_PARTITION_MASK       (MAP_ENTRY_PARTITION_SIZE - 1)
 
 #define VM_MAP_ENTRY_WITHIN_PARTITION(entry)   \
@@ -149,7 +155,10 @@ SYSCTL_INT(_vm, OID_AUTO, randomize_mmap, CTLFLAG_RW, &randomize_mmap, 0,
     "Randomize mmap offsets");
 static int vm_map_relock_enable = 1;
 SYSCTL_INT(_vm, OID_AUTO, map_relock_enable, CTLFLAG_RW,
-          &vm_map_relock_enable, 0, "Randomize mmap offsets");
+          &vm_map_relock_enable, 0, "insert pop pgtable optimization");
+static int vm_map_partition_enable = 1;
+SYSCTL_INT(_vm, OID_AUTO, map_partition_enable, CTLFLAG_RW,
+          &vm_map_partition_enable, 0, "Break up larger vm_map_entry's");
 
 static void vmspace_drop_notoken(struct vmspace *vm);
 static void vm_map_entry_shadow(vm_map_entry_t entry, int addref);
@@ -1651,12 +1660,13 @@ _vm_map_clip_end(vm_map_t map, vm_map_entry_t entry, vm_offset_t end,
  * is unlocked for the sleep and relocked before the return.
  */
 void
-vm_map_transition_wait(vm_map_t map)
+vm_map_transition_wait(vm_map_t map, int relock)
 {
        tsleep_interlock(map, 0);
        vm_map_unlock(map);
        tsleep(map, PINTERLOCKED, "vment", 0);
-       vm_map_lock(map);
+       if (relock)
+               vm_map_lock(map);
 }
 
 /*
@@ -1722,7 +1732,7 @@ again:
                entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP;
                ++mycpu->gd_cnt.v_intrans_coll;
                ++mycpu->gd_cnt.v_intrans_wait;
-               vm_map_transition_wait(map);
+               vm_map_transition_wait(map, 1);
                /*
                 * entry and/or start_entry may have been clipped while
                 * we slept, or may have gone away entirely.  We have
@@ -1762,7 +1772,7 @@ again:
                        next->eflags |= MAP_ENTRY_NEEDS_WAKEUP;
                        ++mycpu->gd_cnt.v_intrans_coll;
                        ++mycpu->gd_cnt.v_intrans_wait;
-                       vm_map_transition_wait(map);
+                       vm_map_transition_wait(map, 1);
 
                        /*
                         * clips might have occured while we blocked.
@@ -1771,6 +1781,7 @@ again:
                        CLIP_CHECK_BACK(start_entry, start);
                        continue;
                }
+
                /*
                 * No restart necessary even though clip_end may block, we
                 * are holding the map lock.
@@ -1958,7 +1969,6 @@ vm_map_protect(vm_map_t map, vm_offset_t start, vm_offset_t end,
                 * Update physical map if necessary. Worry about copy-on-write
                 * here -- CHECK THIS XXX
                 */
-
                if (current->protection != old_prot) {
 #define MASK(entry)    (((entry)->eflags & MAP_ENTRY_COW) ? ~VM_PROT_WRITE : \
                                                        VM_PROT_ALL)
@@ -1973,7 +1983,6 @@ vm_map_protect(vm_map_t map, vm_offset_t start, vm_offset_t end,
 
                current = current->next;
        }
-
        vm_map_unlock(map);
        vm_map_entry_release(count);
        return (KERN_SUCCESS);
@@ -2432,9 +2441,9 @@ vm_map_unwire(vm_map_t map, vm_offset_t start, vm_offset_t real_end,
 done:
        vm_map_unclip_range(map, start_entry, start, real_end, &count,
                MAP_CLIP_NO_HOLES);
-       map->timestamp++;
        vm_map_unlock(map);
        vm_map_entry_release(count);
+
        return (rv);
 }
 
@@ -2636,7 +2645,6 @@ vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t real_end, int kmflags)
 done:
        vm_map_unclip_range(map, start_entry, start, real_end,
                            &count, MAP_CLIP_NO_HOLES);
-       map->timestamp++;
        vm_map_unlock(map);
 failure:
        if (kmflags & KM_KRESERVE)
@@ -2960,7 +2968,7 @@ again:
                        start = entry->start;
                        ++mycpu->gd_cnt.v_intrans_coll;
                        ++mycpu->gd_cnt.v_intrans_wait;
-                       vm_map_transition_wait(map);
+                       vm_map_transition_wait(map, 1);
                        goto again;
                }
                vm_map_clip_end(map, entry, end, countp);
@@ -3025,6 +3033,10 @@ again:
                             OBJ_ONEMAPPING &&
                            (object->type == OBJT_DEFAULT ||
                             object->type == OBJT_SWAP)) {
+                               /*
+                                * When ONEMAPPING is set we can destroy the
+                                * pages underlying the entry's range.
+                                */
                                vm_object_collapse(object, NULL);
                                vm_object_page_remove(object, offidxstart,
                                                      offidxend, FALSE);
@@ -3147,6 +3159,9 @@ vm_map_check_protection(vm_map_t map, vm_offset_t start, vm_offset_t end,
  * and moves the VM pages from the original object to the new object.
  * The original object will also be collapsed, if possible.
  *
+ * Caller must supply entry->object.vm_object held and chain_acquired, and
+ * should chain_release and drop the object upon return.
+ *
  * We can only do this for normal memory objects with a single mapping, and
  * it only makes sense to do it if there are 2 or more refs on the original
  * object.  i.e. typically a memory object that has been extended into
@@ -3164,10 +3179,10 @@ vm_map_check_protection(vm_map_t map, vm_offset_t start, vm_offset_t end,
  * The vm_map must be locked and its token held.
  */
 static void
-vm_map_split(vm_map_entry_t entry)
+vm_map_split(vm_map_entry_t entry, vm_object_t oobject)
 {
        /* OPTIMIZED */
-       vm_object_t oobject, nobject, bobject;
+       vm_object_t nobject, bobject;
        vm_offset_t s, e;
        vm_page_t m;
        vm_pindex_t offidxstart, offidxend, idx;
@@ -3182,33 +3197,22 @@ vm_map_split(vm_map_entry_t entry)
         * OBJ_ONEMAPPING doesn't apply to vnode objects but clear the flag
         * anyway.
         */
-       oobject = entry->object.vm_object;
        if (oobject->type != OBJT_DEFAULT && oobject->type != OBJT_SWAP) {
                vm_object_reference_quick(oobject);
                vm_object_clear_flag(oobject, OBJ_ONEMAPPING);
                return;
        }
 
+#if 0
        /*
-        * Setup.  Chain lock the original object throughout the entire
-        * routine to prevent new page faults from occuring.
-        *
-        * XXX can madvise WILLNEED interfere with us too?
-        */
-       vm_object_hold(oobject);
-       vm_object_chain_acquire(oobject, 0);
-
-       /*
-        * Original object cannot be split?  Might have also changed state.
+        * Original object cannot be split?
         */
-       if (oobject->handle == NULL || (oobject->type != OBJT_DEFAULT &&
-                                       oobject->type != OBJT_SWAP)) {
-               vm_object_chain_release(oobject);
-               vm_object_reference_locked(oobject);
+       if (oobject->handle == NULL) {
+               vm_object_reference_locked_chain_held(oobject);
                vm_object_clear_flag(oobject, OBJ_ONEMAPPING);
-               vm_object_drop(oobject);
                return;
        }
+#endif
 
        /*
         * Collapse original object with its backing store as an
@@ -3224,10 +3228,8 @@ vm_map_split(vm_map_entry_t entry)
        if (oobject->ref_count <= 1 ||
            (oobject->type != OBJT_DEFAULT && oobject->type != OBJT_SWAP) ||
            (oobject->flags & (OBJ_NOSPLIT|OBJ_ONEMAPPING)) != OBJ_ONEMAPPING) {
-               vm_object_chain_release(oobject);
-               vm_object_reference_locked(oobject);
+               vm_object_reference_locked_chain_held(oobject);
                vm_object_clear_flag(oobject, OBJ_ONEMAPPING);
-               vm_object_drop(oobject);
                return;
        }
 
@@ -3246,7 +3248,7 @@ vm_map_split(vm_map_entry_t entry)
                        /* ref for shadowing below */
                        vm_object_reference_locked(bobject);
                        vm_object_chain_acquire(bobject, 0);
-                       KKASSERT(bobject->backing_object == bobject);
+                       KKASSERT(oobject->backing_object == bobject);
                        KKASSERT((bobject->flags & OBJ_DEAD) == 0);
                } else {
                        /*
@@ -3284,6 +3286,10 @@ vm_map_split(vm_map_entry_t entry)
                KKASSERT(0);
        }
 
+       /*
+        * If we could not allocate nobject just clear ONEMAPPING on
+        * oobject and return.
+        */
        if (nobject == NULL) {
                if (bobject) {
                        if (useshadowlist) {
@@ -3294,10 +3300,8 @@ vm_map_split(vm_map_entry_t entry)
                                vm_object_deallocate(bobject);
                        }
                }
-               vm_object_chain_release(oobject);
-               vm_object_reference_locked(oobject);
+               vm_object_reference_locked_chain_held(oobject);
                vm_object_clear_flag(oobject, OBJ_ONEMAPPING);
-               vm_object_drop(oobject);
                return;
        }
 
@@ -3314,6 +3318,9 @@ vm_map_split(vm_map_entry_t entry)
         *
         * Adding an object to bobject's shadow list requires refing bobject
         * which we did above in the useshadowlist case.
+        *
+        * XXX it is unclear if we need to clear ONEMAPPING on bobject here
+        *     or not.
         */
        if (bobject) {
                nobject->backing_object_offset =
@@ -3381,6 +3388,13 @@ vm_map_split(vm_map_entry_t entry)
        entry->object.vm_object = nobject;
        entry->offset = 0LL;
 
+       /*
+        * The map is being split and nobject is going to wind up on both
+        * vm_map_entry's, so make sure OBJ_ONEMAPPING is cleared on
+        * nobject.
+        */
+       vm_object_clear_flag(nobject, OBJ_ONEMAPPING);
+
        /*
         * Cleanup
         *
@@ -3397,10 +3411,27 @@ vm_map_split(vm_map_entry_t entry)
                vm_object_chain_release(bobject);
                vm_object_drop(bobject);
        }
-       vm_object_chain_release(oobject);
+
+#if 0
+       if (oobject->resident_page_count) {
+               kprintf("oobject %p still contains %jd pages!\n",
+                       oobject, (intmax_t)oobject->resident_page_count);
+               for (idx = 0; idx < size; idx++) {
+                       vm_page_t m;
+
+                       m = vm_page_lookup_busy_wait(oobject, offidxstart + idx,
+                                                    TRUE, "vmpg");
+                       if (m) {
+                               kprintf("oobject %p idx %jd\n",
+                                       oobject,
+                                       offidxstart + idx);
+                               vm_page_wakeup(m);
+                       }
+               }
+       }
+#endif
        /*vm_object_clear_flag(oobject, OBJ_ONEMAPPING);*/
        vm_object_deallocate_locked(oobject);
-       vm_object_drop(oobject);
 }
 
 /*
@@ -3418,6 +3449,7 @@ vm_map_copy_entry(vm_map_t src_map, vm_map_t dst_map,
                  vm_map_entry_t src_entry, vm_map_entry_t dst_entry)
 {
        vm_object_t src_object;
+       vm_object_t oobject;
 
        if (dst_entry->maptype == VM_MAPTYPE_SUBMAP ||
            dst_entry->maptype == VM_MAPTYPE_UKSMAP)
@@ -3430,13 +3462,29 @@ vm_map_copy_entry(vm_map_t src_map, vm_map_t dst_map,
                /*
                 * If the source entry is marked needs_copy, it is already
                 * write-protected.
+                *
+                * To avoid interacting with a vm_fault that might have
+                * released its vm_map, we must acquire the fronting
+                * object.
                 */
+               oobject = src_entry->object.vm_object;
+               if (oobject) {
+                       vm_object_hold(oobject);
+                       vm_object_chain_acquire(oobject, 0);
+               }
+
+#if 0
+               pmap_protect(src_map->pmap,
+                            src_entry->start, src_entry->end,
+                            VM_PROT_NONE);
+#else
                if ((src_entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) {
                        pmap_protect(src_map->pmap,
                            src_entry->start,
                            src_entry->end,
                            src_entry->protection & ~VM_PROT_WRITE);
                }
+#endif
 
                /*
                 * Make a copy of the object.
@@ -3452,10 +3500,12 @@ vm_map_copy_entry(vm_map_t src_map, vm_map_t dst_map,
                 * to retry, otherwise the concurrent fault might improperly
                 * install a RW pte when its supposed to be a RO(COW) pte.
                 * This race can occur because a vnode-backed fault may have
-                * to temporarily release the map lock.
+                * to temporarily release the map lock.  This was handled
+                * when the caller locked the map exclusively.
                 */
-               if (src_entry->object.vm_object != NULL) {
-                       vm_map_split(src_entry);
+               if (oobject) {
+                       vm_map_split(src_entry, oobject);
+
                        src_object = src_entry->object.vm_object;
                        dst_entry->object.vm_object = src_object;
                        src_entry->eflags |= (MAP_ENTRY_COW |
@@ -3463,14 +3513,17 @@ vm_map_copy_entry(vm_map_t src_map, vm_map_t dst_map,
                        dst_entry->eflags |= (MAP_ENTRY_COW |
                                              MAP_ENTRY_NEEDS_COPY);
                        dst_entry->offset = src_entry->offset;
-                       ++src_map->timestamp;
                } else {
                        dst_entry->object.vm_object = NULL;
                        dst_entry->offset = 0;
                }
-
                pmap_copy(dst_map->pmap, src_map->pmap, dst_entry->start,
-                   dst_entry->end - dst_entry->start, src_entry->start);
+                         dst_entry->end - dst_entry->start,
+                         src_entry->start);
+               if (oobject) {
+                       vm_object_chain_release(oobject);
+                       vm_object_drop(oobject);
+               }
        } else {
                /*
                 * Of course, wired down pages can't be set copy-on-write.
@@ -3510,8 +3563,13 @@ vmspace_fork(struct vmspace *vm1)
 
        vm2 = vmspace_alloc(old_map->min_offset, old_map->max_offset);
        lwkt_gettoken(&vm2->vm_map.token);
+
+       /*
+        * We must bump the timestamp to force any concurrent fault
+        * to retry.
+        */
        bcopy(&vm1->vm_startcopy, &vm2->vm_startcopy,
-           (caddr_t)&vm1->vm_endcopy - (caddr_t)&vm1->vm_startcopy);
+             (caddr_t)&vm1->vm_endcopy - (caddr_t)&vm1->vm_startcopy);
        new_map = &vm2->vm_map; /* XXX */
        new_map->timestamp = 1;
 
@@ -3594,7 +3652,7 @@ vmspace_fork_normal_entry(vm_map_t old_map, vm_map_t new_map,
                         *
                         * Optimize vnode objects.  OBJ_ONEMAPPING
                         * is non-applicable but clear it anyway,
-                        * and its terminal so we don'tave to deal
+                        * and its terminal so we don't have to deal
                         * with chains.  Reduces SMP conflicts.
                         *
                         * XXX assert that object.vm_object != NULL
@@ -3628,7 +3686,6 @@ vmspace_fork_normal_entry(vm_map_t old_map, vm_map_t new_map,
                 * Insert the entry into the new map -- we know we're
                 * inserting at the end of the new map.
                 */
-
                vm_map_entry_link(new_map, new_map->header.prev,
                                  new_entry);
 
@@ -4211,7 +4268,6 @@ RetryLookup:
                 * If we don't need to write the page, we just demote the
                 * permissions allowed.
                 */
-
                if (fault_type & VM_PROT_WRITE) {
                        /*
                         * Not allowed if TDF_NOFAULT is set as the shadowing
@@ -4235,7 +4291,6 @@ RetryLookup:
                                goto RetryLookup;
                        }
                        use_read_lock = 0;
-
                        vm_map_entry_shadow(entry, 0);
                } else {
                        /*
@@ -4265,12 +4320,13 @@ RetryLookup:
                 */
                if (vaddr < VM_MAX_USER_ADDRESS &&
                    entry->maptype == VM_MAPTYPE_NORMAL &&
-                   ((entry->start ^ entry->end) & ~MAP_ENTRY_PARTITION_MASK)) {
+                   ((entry->start ^ entry->end) & ~MAP_ENTRY_PARTITION_MASK) &&
+                   vm_map_partition_enable) {
                        if (entry->eflags & MAP_ENTRY_IN_TRANSITION) {
                                entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP;
                                ++mycpu->gd_cnt.v_intrans_coll;
                                ++mycpu->gd_cnt.v_intrans_wait;
-                               vm_map_transition_wait(map);
+                               vm_map_transition_wait(map, 0);
                                goto RetryLookup;
                        }
                        vm_map_entry_partition(map, entry, vaddr, &count);
index fff3964..2b82914 100644 (file)
@@ -1,10 +1,14 @@
 /*
  * Copyright (c) 1991, 1993
  *     The Regents of the University of California.  All rights reserved.
+ * Copyright (c) 2003-2017 The DragonFly Project.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * The Mach Operating System project at Carnegie-Mellon University.
  *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@backplane.com>
+ *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -649,7 +653,7 @@ int vm_map_growstack (vm_map_t map, vm_offset_t addr);
 vm_offset_t vmspace_swap_count (struct vmspace *vmspace);
 vm_offset_t vmspace_anonymous_count (struct vmspace *vmspace);
 void vm_map_set_wired_quick(vm_map_t map, vm_offset_t addr, vm_size_t size, int *);
-void vm_map_transition_wait(vm_map_t map);
+void vm_map_transition_wait(vm_map_t map, int relock);
 
 void vm_map_interlock(vm_map_t map, struct vm_map_ilock *ilock,
                        vm_offset_t ran_beg, vm_offset_t ran_end);
index 46e2183..ae432ca 100644 (file)
@@ -893,15 +893,15 @@ RestartScan:
                        /*
                         * Pass the page information to the user
                         */
-                       error = subyte( vec + vecindex, mincoreinfo);
+                       error = subyte(vec + vecindex, mincoreinfo);
                        if (error) {
                                error = EFAULT;
                                goto done;
                        }
 
                        /*
-                        * If the map has changed, due to the subyte, the previous
-                        * output may be invalid.
+                        * If the map has changed, due to the subyte,
+                        * the previous output may be invalid.
                         */
                        vm_map_lock_read(map);
                        if (timestamp != map->timestamp)
@@ -1084,7 +1084,7 @@ retry:
                        entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP;
                        ++mycpu->gd_cnt.v_intrans_coll;
                        ++mycpu->gd_cnt.v_intrans_wait;
-                       vm_map_transition_wait(map);
+                       vm_map_transition_wait(map, 1);
                        goto retry;
                }
 
@@ -1098,7 +1098,6 @@ retry:
                        vm_fault_unwire(map, entry);
        }
 
-       map->timestamp++;
        vm_map_unlock(map);
 
        return (rc);
index 843a555..77d2f6b 100644 (file)
@@ -480,7 +480,8 @@ vm_object_allocate_hold(objtype_t type, vm_pindex_t size)
  * Referencing a chain-locked object can blow up the fairly sensitive
  * ref_count and shadow_count tests in the deallocator.  Most callers
  * will call vm_object_chain_wait() prior to calling
- * vm_object_reference_locked() to avoid the case.
+ * vm_object_reference_locked() to avoid the case.  The held token
+ * allows the caller to pair the wait and ref.
  *
  * The object must be held, but may be held shared if desired (hence why
  * we use an atomic op).
@@ -501,6 +502,26 @@ VMOBJDEBUG(vm_object_reference_locked)(vm_object_t object VMOBJDBARGS)
 #endif
 }
 
+/*
+ * This version explicitly allows the chain to be held (i.e. by the
+ * caller).  The token must also be held.
+ */
+void
+VMOBJDEBUG(vm_object_reference_locked_chain_held)(vm_object_t object
+          VMOBJDBARGS)
+{
+       KKASSERT(object != NULL);
+       ASSERT_LWKT_TOKEN_HELD(vm_object_token(object));
+       atomic_add_int(&object->ref_count, 1);
+       if (object->type == OBJT_VNODE) {
+               vref(object->handle);
+               /* XXX what if the vnode is being destroyed? */
+       }
+#if defined(DEBUG_LOCKS)
+       debugvm_object_add(object, file, line, 1);
+#endif
+}
+
 /*
  * This version is only allowed for vnode objects.
  */
@@ -2278,7 +2299,7 @@ vm_object_collapse(vm_object_t object, struct vm_object_dealloc_list **dlistp)
                }
 
                /*
-                * Hold the backing_object and check for races
+                * Hold (token lock) the backing_object and retest conditions.
                 */
                vm_object_hold(backing_object);
                if (backing_object != object->backing_object ||
@@ -2303,8 +2324,8 @@ vm_object_collapse(vm_object_t object, struct vm_object_dealloc_list **dlistp)
                }
 
                /*
-                * we check the backing object first, because it is most likely
-                * not collapsable.
+                * We check the backing object first, because it is most
+                * likely not collapsable.
                 */
                if (backing_object->handle != NULL ||
                    (backing_object->type != OBJT_DEFAULT &&
@@ -2320,8 +2341,7 @@ vm_object_collapse(vm_object_t object, struct vm_object_dealloc_list **dlistp)
                /*
                 * If paging is in progress we can't do a normal collapse.
                 */
-               if (
-                   object->paging_in_progress != 0 ||
+               if (object->paging_in_progress != 0 ||
                    backing_object->paging_in_progress != 0
                ) {
                        vm_object_qcollapse(object, backing_object);
@@ -2795,25 +2815,16 @@ done:
 }
 
 /*
- * Coalesces two objects backing up adjoining regions of memory into a
- * single object.
- *
- * returns TRUE if objects were combined.
- *
- * NOTE: Only works at the moment if the second object is NULL -
- *      if it's not, which object do we lock first?
- *
- * Parameters:
- *     prev_object     First object to coalesce
- *     prev_offset     Offset into prev_object
- *     next_object     Second object into coalesce
- *     next_offset     Offset into next_object
- *
- *     prev_size       Size of reference to prev_object
- *     next_size       Size of reference to next_object
+ * Try to extend prev_object into an adjoining region of virtual
+ * memory, return TRUE on success.
  *
  * The caller does not need to hold (prev_object) but must have a stable
  * pointer to it (typically by holding the vm_map locked).
+ *
+ * This function only works for anonymous memory objects which either
+ * have (a) one reference or (b) we are extending the object's size.
+ * Otherwise the related VM pages we want to use for the object might
+ * be in use by another mapping.
  */
 boolean_t
 vm_object_coalesce(vm_object_t prev_object, vm_pindex_t prev_pindex,
@@ -2839,11 +2850,9 @@ vm_object_coalesce(vm_object_t prev_object, vm_pindex_t prev_pindex,
        vm_object_collapse(prev_object, NULL);
 
        /*
-        * Can't coalesce if: . more than one reference . paged out . shadows
-        * another object . has a copy elsewhere (any of which mean that the
-        * pages not mapped to prev_entry may be in use anyway)
+        * We can't coalesce if we shadow another object (figuring out the
+        * relationships become too complex).
         */
-
        if (prev_object->backing_object != NULL) {
                vm_object_chain_release(prev_object);
                vm_object_drop(prev_object);
@@ -2854,8 +2863,12 @@ vm_object_coalesce(vm_object_t prev_object, vm_pindex_t prev_pindex,
        next_size >>= PAGE_SHIFT;
        next_pindex = prev_pindex + prev_size;
 
-       if ((prev_object->ref_count > 1) &&
-           (prev_object->size != next_pindex)) {
+       /*
+        * We can't if the object has more than one ref count unless we
+        * are extending it into newly minted space.
+        */
+       if (prev_object->ref_count > 1 &&
+           prev_object->size != next_pindex) {
                vm_object_chain_release(prev_object);
                vm_object_drop(prev_object);
                return (FALSE);
@@ -2879,9 +2892,9 @@ vm_object_coalesce(vm_object_t prev_object, vm_pindex_t prev_pindex,
         */
        if (next_pindex + next_size > prev_object->size)
                prev_object->size = next_pindex + next_size;
-
        vm_object_chain_release(prev_object);
        vm_object_drop(prev_object);
+
        return (TRUE);
 }
 
index a609a17..3c02a72 100644 (file)
@@ -207,6 +207,12 @@ struct vm_object {
  *                 be clear in those cases.  It might not be set on other
  *                 object types (particularly OBJT_VNODE).
  *
+ *                 This flag indicates that any given page index within the
+ *                 object is only mapped to a single vm_map_entry.  Split
+ *                 vm_map_entry's (denoting distinct non-overlapping page
+ *                 ranges) do not clear this flag.  This flag is typically
+ *                 cleared on fork().
+ *
  * OBJ_NOPAGEIN   - vn and tmpfs set this flag, indicating to swapoff
  *                 that the objects aren't intended to have any vm_page's,
  *                 only swap blocks.  vn and tmpfs don't know how to deal
@@ -222,7 +228,7 @@ struct vm_object {
 #define OBJ_MIGHTBEDIRTY 0x0100                /* object might be dirty */
 #define OBJ_CLEANING   0x0200
 #define OBJ_DEADWNT    0x1000          /* waiting because object is dead */
-#define        OBJ_ONEMAPPING  0x2000          /* flag single vm_map_entry mapping */
+#define        OBJ_ONEMAPPING  0x2000
 #define OBJ_NOMSYNC    0x4000          /* disable msync() system call */
 
 #define CHAINLK_EXCL   0x80000000
@@ -361,6 +367,9 @@ void vm_object_unlock(vm_object_t);
                debugvm_object_reference_quick(obj, __FILE__, __LINE__)
 #define vm_object_reference_locked(obj)                \
                debugvm_object_reference_locked(obj, __FILE__, __LINE__)
+#define vm_object_reference_locked_chain_held(obj)             \
+               debugvm_object_reference_locked_chain_held(     \
+                                       obj, __FILE__, __LINE__)
 #define vm_object_deallocate(obj)              \
                debugvm_object_deallocate(obj, __FILE__, __LINE__)
 #define vm_object_deallocate_locked(obj)       \
@@ -380,6 +389,8 @@ void VMOBJDEBUG(vm_object_hold_shared)(vm_object_t object VMOBJDBARGS);
 void VMOBJDEBUG(vm_object_drop)(vm_object_t object VMOBJDBARGS);
 void VMOBJDEBUG(vm_object_reference_quick)(vm_object_t object VMOBJDBARGS);
 void VMOBJDEBUG(vm_object_reference_locked)(vm_object_t object VMOBJDBARGS);
+void VMOBJDEBUG(vm_object_reference_locked_chain_held)(
+                       vm_object_t object VMOBJDBARGS);
 void VMOBJDEBUG(vm_object_deallocate)(vm_object_t object VMOBJDBARGS);
 void VMOBJDEBUG(vm_object_deallocate_locked)(vm_object_t object VMOBJDBARGS);