From 641f3b0ac8f54c6715024ca7b8dd7eb3ebbf651d Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Wed, 1 Nov 2017 17:36:14 -0700 Subject: [PATCH] kernel - Refactor vm_fault and vm_map a bit. * Allow the virtual copy feature to be disabled via a sysctl. Default enabled. * Fix a bug in the virtual copy test. Multiple elements were not being retested after reacquiring the map lock. * Change the auto-partitioning of vm_map_entry structures from 16MB to 32MB. Add a sysctl to allow the feature to be disabled. Default enabled. * Cleanup map->timestamp bumps. Basically we bump it in vm_map_lock(), and also fix a bug where it was not being bumped after relocking the map in the virtual copy feature. * Fix an incorrect assertion in vm_map_split(). Refactor tests in vm_map_split(). Also, acquire the chain lock for the VM object in the caller to vm_map_split() instead of in vm_map_split() itself, allowing us to include the pmap adjustment within the locked area. * Make sure OBJ_ONEMAPPING is cleared for nobject in vm_map_split(). * Fix a bug in a call to vm_map_transition_wait() that double-locked the vm_map in the partitioning code. * General cleanups in vm/vm_object.c --- sys/vm/vm_fault.c | 127 +++++++++++++++++++++++------------ sys/vm/vm_map.c | 162 ++++++++++++++++++++++++++++++--------------- sys/vm/vm_map.h | 6 +- sys/vm/vm_mmap.c | 9 ++- sys/vm/vm_object.c | 71 ++++++++++++-------- sys/vm/vm_object.h | 13 +++- 6 files changed, 256 insertions(+), 132 deletions(-) diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c index 81b6fe4378..d9cbf8dc97 100644 --- a/sys/vm/vm_fault.c +++ b/sys/vm/vm_fault.c @@ -152,10 +152,13 @@ static int debug_fault = 0; SYSCTL_INT(_vm, OID_AUTO, debug_fault, CTLFLAG_RW, &debug_fault, 0, ""); static int debug_cluster = 0; SYSCTL_INT(_vm, OID_AUTO, debug_cluster, CTLFLAG_RW, &debug_cluster, 0, ""); +static int virtual_copy_enable = 1; +SYSCTL_INT(_vm, OID_AUTO, virtual_copy_enable, CTLFLAG_RW, + &virtual_copy_enable, 0, ""); int vm_shared_fault = 1; TUNABLE_INT("vm.shared_fault", &vm_shared_fault); -SYSCTL_INT(_vm, OID_AUTO, shared_fault, CTLFLAG_RW, &vm_shared_fault, 0, - "Allow shared token on vm_object"); +SYSCTL_INT(_vm, OID_AUTO, shared_fault, CTLFLAG_RW, + &vm_shared_fault, 0, "Allow shared token on vm_object"); static int vm_fault_object(struct faultstate *, vm_pindex_t, vm_prot_t, int); static int vm_fault_vpagetable(struct faultstate *, vm_pindex_t *, @@ -259,6 +262,82 @@ _unlock_things(struct faultstate *fs, int dealloc) #define unlock_and_deallocate(fs) _unlock_things(fs, 1) #define cleanup_successful_fault(fs) _cleanup_successful_fault(fs, 1) +/* + * Virtual copy tests. Used by the fault code to determine if a + * page can be moved from an orphan vm_object into its shadow + * instead of copying its contents. + */ +static __inline int +virtual_copy_test(struct faultstate *fs) +{ + /* + * Must be holding exclusive locks + */ + if (fs->first_shared || fs->shared || virtual_copy_enable == 0) + return 0; + + /* + * Map, if present, has not changed + */ + if (fs->map && fs->map_generation != fs->map->timestamp) + return 0; + + /* + * Only one shadow object + */ + if (fs->object->shadow_count != 1) + return 0; + + /* + * No COW refs, except us + */ + if (fs->object->ref_count != 1) + return 0; + + /* + * No one else can look this object up + */ + if (fs->object->handle != NULL) + return 0; + + /* + * No other ways to look the object up + */ + if (fs->object->type != OBJT_DEFAULT && + fs->object->type != OBJT_SWAP) + return 0; + + /* + * We don't chase down the shadow chain + */ + if (fs->object != fs->first_object->backing_object) + return 0; + + return 1; +} + +static __inline int +virtual_copy_ok(struct faultstate *fs) +{ + if (virtual_copy_test(fs)) { + /* + * Grab the lock and re-test changeable items. + */ + if (fs->lookup_still_valid == FALSE && fs->map) { + if (lockmgr(&fs->map->lock, LK_EXCLUSIVE|LK_NOWAIT)) + return 0; + fs->lookup_still_valid = TRUE; + if (virtual_copy_test(fs)) { + fs->map_generation = ++fs->map->timestamp; + return 1; + } + fs->lookup_still_valid = FALSE; + lockmgr(&fs->map->lock, LK_RELEASE); + } + } + return 0; +} + /* * TRYPAGER * @@ -1975,6 +2054,7 @@ readrest: * is held.] * * object still held. + * vm_map may not be locked (determined by fs->lookup_still_valid) * * local shared variable may be different from fs->shared. * @@ -2000,46 +2080,7 @@ readrest: * dirty in the first object so that it will go out * to swap when needed. */ - if ( - /* - * Must be holding exclusive locks - */ - fs->first_shared == 0 && - fs->shared == 0 && - /* - * Map, if present, has not changed - */ - (fs->map == NULL || - fs->map_generation == fs->map->timestamp) && - /* - * Only one shadow object - */ - (fs->object->shadow_count == 1) && - /* - * No COW refs, except us - */ - (fs->object->ref_count == 1) && - /* - * No one else can look this object up - */ - (fs->object->handle == NULL) && - /* - * No other ways to look the object up - */ - ((fs->object->type == OBJT_DEFAULT) || - (fs->object->type == OBJT_SWAP)) && - /* - * We don't chase down the shadow chain - */ - (fs->object == fs->first_object->backing_object) && - - /* - * grab the lock if we need to - */ - (fs->lookup_still_valid || - fs->map == NULL || - lockmgr(&fs->map->lock, LK_EXCLUSIVE|LK_NOWAIT) == 0) - ) { + if (virtual_copy_ok(fs)) { /* * (first_m) and (m) are both busied. We have * move (m) into (first_m)'s object/pindex @@ -2051,7 +2092,6 @@ readrest: * block so we don't do it until after the * rename. */ - fs->lookup_still_valid = 1; vm_page_protect(fs->first_m, VM_PROT_NONE); vm_page_remove(fs->first_m); vm_page_rename(fs->m, fs->first_object, @@ -2403,6 +2443,7 @@ vm_fault_copy_entry(vm_map_t dst_map, vm_map_t src_map, */ vm_object_hold(src_object); vm_object_hold(dst_object); + for (vaddr = dst_entry->start, dst_offset = 0; vaddr < dst_entry->end; vaddr += PAGE_SIZE, dst_offset += PAGE_SIZE) { diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index 822877954e..843eed6959 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -1,12 +1,14 @@ /* - * (MPSAFE) - * * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. + * Copyright (c) 2003-2017 The DragonFly Project. All rights reserved. * * This code is derived from software contributed to Berkeley by * The Mach Operating System project at Carnegie-Mellon University. * + * This code is derived from software contributed to The DragonFly Project + * by Matthew Dillon + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: @@ -131,7 +133,11 @@ static struct objcache *vmspace_cache; #define MAPENTRYBSP_CACHE (MAXCPU+1) #define MAPENTRYAP_CACHE 8 -#define MAP_ENTRY_PARTITION_SIZE ((vm_offset_t)(16 * 1024 * 1024)) +/* + * Partioning threaded programs with large anonymous memory areas can + * improve concurrent fault performance. + */ +#define MAP_ENTRY_PARTITION_SIZE ((vm_offset_t)(32 * 1024 * 1024)) #define MAP_ENTRY_PARTITION_MASK (MAP_ENTRY_PARTITION_SIZE - 1) #define VM_MAP_ENTRY_WITHIN_PARTITION(entry) \ @@ -149,7 +155,10 @@ SYSCTL_INT(_vm, OID_AUTO, randomize_mmap, CTLFLAG_RW, &randomize_mmap, 0, "Randomize mmap offsets"); static int vm_map_relock_enable = 1; SYSCTL_INT(_vm, OID_AUTO, map_relock_enable, CTLFLAG_RW, - &vm_map_relock_enable, 0, "Randomize mmap offsets"); + &vm_map_relock_enable, 0, "insert pop pgtable optimization"); +static int vm_map_partition_enable = 1; +SYSCTL_INT(_vm, OID_AUTO, map_partition_enable, CTLFLAG_RW, + &vm_map_partition_enable, 0, "Break up larger vm_map_entry's"); static void vmspace_drop_notoken(struct vmspace *vm); static void vm_map_entry_shadow(vm_map_entry_t entry, int addref); @@ -1651,12 +1660,13 @@ _vm_map_clip_end(vm_map_t map, vm_map_entry_t entry, vm_offset_t end, * is unlocked for the sleep and relocked before the return. */ void -vm_map_transition_wait(vm_map_t map) +vm_map_transition_wait(vm_map_t map, int relock) { tsleep_interlock(map, 0); vm_map_unlock(map); tsleep(map, PINTERLOCKED, "vment", 0); - vm_map_lock(map); + if (relock) + vm_map_lock(map); } /* @@ -1722,7 +1732,7 @@ again: entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP; ++mycpu->gd_cnt.v_intrans_coll; ++mycpu->gd_cnt.v_intrans_wait; - vm_map_transition_wait(map); + vm_map_transition_wait(map, 1); /* * entry and/or start_entry may have been clipped while * we slept, or may have gone away entirely. We have @@ -1762,7 +1772,7 @@ again: next->eflags |= MAP_ENTRY_NEEDS_WAKEUP; ++mycpu->gd_cnt.v_intrans_coll; ++mycpu->gd_cnt.v_intrans_wait; - vm_map_transition_wait(map); + vm_map_transition_wait(map, 1); /* * clips might have occured while we blocked. @@ -1771,6 +1781,7 @@ again: CLIP_CHECK_BACK(start_entry, start); continue; } + /* * No restart necessary even though clip_end may block, we * are holding the map lock. @@ -1958,7 +1969,6 @@ vm_map_protect(vm_map_t map, vm_offset_t start, vm_offset_t end, * Update physical map if necessary. Worry about copy-on-write * here -- CHECK THIS XXX */ - if (current->protection != old_prot) { #define MASK(entry) (((entry)->eflags & MAP_ENTRY_COW) ? ~VM_PROT_WRITE : \ VM_PROT_ALL) @@ -1973,7 +1983,6 @@ vm_map_protect(vm_map_t map, vm_offset_t start, vm_offset_t end, current = current->next; } - vm_map_unlock(map); vm_map_entry_release(count); return (KERN_SUCCESS); @@ -2432,9 +2441,9 @@ vm_map_unwire(vm_map_t map, vm_offset_t start, vm_offset_t real_end, done: vm_map_unclip_range(map, start_entry, start, real_end, &count, MAP_CLIP_NO_HOLES); - map->timestamp++; vm_map_unlock(map); vm_map_entry_release(count); + return (rv); } @@ -2636,7 +2645,6 @@ vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t real_end, int kmflags) done: vm_map_unclip_range(map, start_entry, start, real_end, &count, MAP_CLIP_NO_HOLES); - map->timestamp++; vm_map_unlock(map); failure: if (kmflags & KM_KRESERVE) @@ -2960,7 +2968,7 @@ again: start = entry->start; ++mycpu->gd_cnt.v_intrans_coll; ++mycpu->gd_cnt.v_intrans_wait; - vm_map_transition_wait(map); + vm_map_transition_wait(map, 1); goto again; } vm_map_clip_end(map, entry, end, countp); @@ -3025,6 +3033,10 @@ again: OBJ_ONEMAPPING && (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) { + /* + * When ONEMAPPING is set we can destroy the + * pages underlying the entry's range. + */ vm_object_collapse(object, NULL); vm_object_page_remove(object, offidxstart, offidxend, FALSE); @@ -3147,6 +3159,9 @@ vm_map_check_protection(vm_map_t map, vm_offset_t start, vm_offset_t end, * and moves the VM pages from the original object to the new object. * The original object will also be collapsed, if possible. * + * Caller must supply entry->object.vm_object held and chain_acquired, and + * should chain_release and drop the object upon return. + * * We can only do this for normal memory objects with a single mapping, and * it only makes sense to do it if there are 2 or more refs on the original * object. i.e. typically a memory object that has been extended into @@ -3164,10 +3179,10 @@ vm_map_check_protection(vm_map_t map, vm_offset_t start, vm_offset_t end, * The vm_map must be locked and its token held. */ static void -vm_map_split(vm_map_entry_t entry) +vm_map_split(vm_map_entry_t entry, vm_object_t oobject) { /* OPTIMIZED */ - vm_object_t oobject, nobject, bobject; + vm_object_t nobject, bobject; vm_offset_t s, e; vm_page_t m; vm_pindex_t offidxstart, offidxend, idx; @@ -3182,33 +3197,22 @@ vm_map_split(vm_map_entry_t entry) * OBJ_ONEMAPPING doesn't apply to vnode objects but clear the flag * anyway. */ - oobject = entry->object.vm_object; if (oobject->type != OBJT_DEFAULT && oobject->type != OBJT_SWAP) { vm_object_reference_quick(oobject); vm_object_clear_flag(oobject, OBJ_ONEMAPPING); return; } +#if 0 /* - * Setup. Chain lock the original object throughout the entire - * routine to prevent new page faults from occuring. - * - * XXX can madvise WILLNEED interfere with us too? - */ - vm_object_hold(oobject); - vm_object_chain_acquire(oobject, 0); - - /* - * Original object cannot be split? Might have also changed state. + * Original object cannot be split? */ - if (oobject->handle == NULL || (oobject->type != OBJT_DEFAULT && - oobject->type != OBJT_SWAP)) { - vm_object_chain_release(oobject); - vm_object_reference_locked(oobject); + if (oobject->handle == NULL) { + vm_object_reference_locked_chain_held(oobject); vm_object_clear_flag(oobject, OBJ_ONEMAPPING); - vm_object_drop(oobject); return; } +#endif /* * Collapse original object with its backing store as an @@ -3224,10 +3228,8 @@ vm_map_split(vm_map_entry_t entry) if (oobject->ref_count <= 1 || (oobject->type != OBJT_DEFAULT && oobject->type != OBJT_SWAP) || (oobject->flags & (OBJ_NOSPLIT|OBJ_ONEMAPPING)) != OBJ_ONEMAPPING) { - vm_object_chain_release(oobject); - vm_object_reference_locked(oobject); + vm_object_reference_locked_chain_held(oobject); vm_object_clear_flag(oobject, OBJ_ONEMAPPING); - vm_object_drop(oobject); return; } @@ -3246,7 +3248,7 @@ vm_map_split(vm_map_entry_t entry) /* ref for shadowing below */ vm_object_reference_locked(bobject); vm_object_chain_acquire(bobject, 0); - KKASSERT(bobject->backing_object == bobject); + KKASSERT(oobject->backing_object == bobject); KKASSERT((bobject->flags & OBJ_DEAD) == 0); } else { /* @@ -3284,6 +3286,10 @@ vm_map_split(vm_map_entry_t entry) KKASSERT(0); } + /* + * If we could not allocate nobject just clear ONEMAPPING on + * oobject and return. + */ if (nobject == NULL) { if (bobject) { if (useshadowlist) { @@ -3294,10 +3300,8 @@ vm_map_split(vm_map_entry_t entry) vm_object_deallocate(bobject); } } - vm_object_chain_release(oobject); - vm_object_reference_locked(oobject); + vm_object_reference_locked_chain_held(oobject); vm_object_clear_flag(oobject, OBJ_ONEMAPPING); - vm_object_drop(oobject); return; } @@ -3314,6 +3318,9 @@ vm_map_split(vm_map_entry_t entry) * * Adding an object to bobject's shadow list requires refing bobject * which we did above in the useshadowlist case. + * + * XXX it is unclear if we need to clear ONEMAPPING on bobject here + * or not. */ if (bobject) { nobject->backing_object_offset = @@ -3381,6 +3388,13 @@ vm_map_split(vm_map_entry_t entry) entry->object.vm_object = nobject; entry->offset = 0LL; + /* + * The map is being split and nobject is going to wind up on both + * vm_map_entry's, so make sure OBJ_ONEMAPPING is cleared on + * nobject. + */ + vm_object_clear_flag(nobject, OBJ_ONEMAPPING); + /* * Cleanup * @@ -3397,10 +3411,27 @@ vm_map_split(vm_map_entry_t entry) vm_object_chain_release(bobject); vm_object_drop(bobject); } - vm_object_chain_release(oobject); + +#if 0 + if (oobject->resident_page_count) { + kprintf("oobject %p still contains %jd pages!\n", + oobject, (intmax_t)oobject->resident_page_count); + for (idx = 0; idx < size; idx++) { + vm_page_t m; + + m = vm_page_lookup_busy_wait(oobject, offidxstart + idx, + TRUE, "vmpg"); + if (m) { + kprintf("oobject %p idx %jd\n", + oobject, + offidxstart + idx); + vm_page_wakeup(m); + } + } + } +#endif /*vm_object_clear_flag(oobject, OBJ_ONEMAPPING);*/ vm_object_deallocate_locked(oobject); - vm_object_drop(oobject); } /* @@ -3418,6 +3449,7 @@ vm_map_copy_entry(vm_map_t src_map, vm_map_t dst_map, vm_map_entry_t src_entry, vm_map_entry_t dst_entry) { vm_object_t src_object; + vm_object_t oobject; if (dst_entry->maptype == VM_MAPTYPE_SUBMAP || dst_entry->maptype == VM_MAPTYPE_UKSMAP) @@ -3430,13 +3462,29 @@ vm_map_copy_entry(vm_map_t src_map, vm_map_t dst_map, /* * If the source entry is marked needs_copy, it is already * write-protected. + * + * To avoid interacting with a vm_fault that might have + * released its vm_map, we must acquire the fronting + * object. */ + oobject = src_entry->object.vm_object; + if (oobject) { + vm_object_hold(oobject); + vm_object_chain_acquire(oobject, 0); + } + +#if 0 + pmap_protect(src_map->pmap, + src_entry->start, src_entry->end, + VM_PROT_NONE); +#else if ((src_entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) { pmap_protect(src_map->pmap, src_entry->start, src_entry->end, src_entry->protection & ~VM_PROT_WRITE); } +#endif /* * Make a copy of the object. @@ -3452,10 +3500,12 @@ vm_map_copy_entry(vm_map_t src_map, vm_map_t dst_map, * to retry, otherwise the concurrent fault might improperly * install a RW pte when its supposed to be a RO(COW) pte. * This race can occur because a vnode-backed fault may have - * to temporarily release the map lock. + * to temporarily release the map lock. This was handled + * when the caller locked the map exclusively. */ - if (src_entry->object.vm_object != NULL) { - vm_map_split(src_entry); + if (oobject) { + vm_map_split(src_entry, oobject); + src_object = src_entry->object.vm_object; dst_entry->object.vm_object = src_object; src_entry->eflags |= (MAP_ENTRY_COW | @@ -3463,14 +3513,17 @@ vm_map_copy_entry(vm_map_t src_map, vm_map_t dst_map, dst_entry->eflags |= (MAP_ENTRY_COW | MAP_ENTRY_NEEDS_COPY); dst_entry->offset = src_entry->offset; - ++src_map->timestamp; } else { dst_entry->object.vm_object = NULL; dst_entry->offset = 0; } - pmap_copy(dst_map->pmap, src_map->pmap, dst_entry->start, - dst_entry->end - dst_entry->start, src_entry->start); + dst_entry->end - dst_entry->start, + src_entry->start); + if (oobject) { + vm_object_chain_release(oobject); + vm_object_drop(oobject); + } } else { /* * Of course, wired down pages can't be set copy-on-write. @@ -3510,8 +3563,13 @@ vmspace_fork(struct vmspace *vm1) vm2 = vmspace_alloc(old_map->min_offset, old_map->max_offset); lwkt_gettoken(&vm2->vm_map.token); + + /* + * We must bump the timestamp to force any concurrent fault + * to retry. + */ bcopy(&vm1->vm_startcopy, &vm2->vm_startcopy, - (caddr_t)&vm1->vm_endcopy - (caddr_t)&vm1->vm_startcopy); + (caddr_t)&vm1->vm_endcopy - (caddr_t)&vm1->vm_startcopy); new_map = &vm2->vm_map; /* XXX */ new_map->timestamp = 1; @@ -3594,7 +3652,7 @@ vmspace_fork_normal_entry(vm_map_t old_map, vm_map_t new_map, * * Optimize vnode objects. OBJ_ONEMAPPING * is non-applicable but clear it anyway, - * and its terminal so we don'th ave to deal + * and its terminal so we don't have to deal * with chains. Reduces SMP conflicts. * * XXX assert that object.vm_object != NULL @@ -3628,7 +3686,6 @@ vmspace_fork_normal_entry(vm_map_t old_map, vm_map_t new_map, * Insert the entry into the new map -- we know we're * inserting at the end of the new map. */ - vm_map_entry_link(new_map, new_map->header.prev, new_entry); @@ -4211,7 +4268,6 @@ RetryLookup: * If we don't need to write the page, we just demote the * permissions allowed. */ - if (fault_type & VM_PROT_WRITE) { /* * Not allowed if TDF_NOFAULT is set as the shadowing @@ -4235,7 +4291,6 @@ RetryLookup: goto RetryLookup; } use_read_lock = 0; - vm_map_entry_shadow(entry, 0); } else { /* @@ -4265,12 +4320,13 @@ RetryLookup: */ if (vaddr < VM_MAX_USER_ADDRESS && entry->maptype == VM_MAPTYPE_NORMAL && - ((entry->start ^ entry->end) & ~MAP_ENTRY_PARTITION_MASK)) { + ((entry->start ^ entry->end) & ~MAP_ENTRY_PARTITION_MASK) && + vm_map_partition_enable) { if (entry->eflags & MAP_ENTRY_IN_TRANSITION) { entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP; ++mycpu->gd_cnt.v_intrans_coll; ++mycpu->gd_cnt.v_intrans_wait; - vm_map_transition_wait(map); + vm_map_transition_wait(map, 0); goto RetryLookup; } vm_map_entry_partition(map, entry, vaddr, &count); diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h index fff3964a7a..2b82914fb2 100644 --- a/sys/vm/vm_map.h +++ b/sys/vm/vm_map.h @@ -1,10 +1,14 @@ /* * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. + * Copyright (c) 2003-2017 The DragonFly Project. All rights reserved. * * This code is derived from software contributed to Berkeley by * The Mach Operating System project at Carnegie-Mellon University. * + * This code is derived from software contributed to The DragonFly Project + * by Matthew Dillon + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: @@ -649,7 +653,7 @@ int vm_map_growstack (vm_map_t map, vm_offset_t addr); vm_offset_t vmspace_swap_count (struct vmspace *vmspace); vm_offset_t vmspace_anonymous_count (struct vmspace *vmspace); void vm_map_set_wired_quick(vm_map_t map, vm_offset_t addr, vm_size_t size, int *); -void vm_map_transition_wait(vm_map_t map); +void vm_map_transition_wait(vm_map_t map, int relock); void vm_map_interlock(vm_map_t map, struct vm_map_ilock *ilock, vm_offset_t ran_beg, vm_offset_t ran_end); diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c index 46e2183883..ae432cadd9 100644 --- a/sys/vm/vm_mmap.c +++ b/sys/vm/vm_mmap.c @@ -893,15 +893,15 @@ RestartScan: /* * Pass the page information to the user */ - error = subyte( vec + vecindex, mincoreinfo); + error = subyte(vec + vecindex, mincoreinfo); if (error) { error = EFAULT; goto done; } /* - * If the map has changed, due to the subyte, the previous - * output may be invalid. + * If the map has changed, due to the subyte, + * the previous output may be invalid. */ vm_map_lock_read(map); if (timestamp != map->timestamp) @@ -1084,7 +1084,7 @@ retry: entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP; ++mycpu->gd_cnt.v_intrans_coll; ++mycpu->gd_cnt.v_intrans_wait; - vm_map_transition_wait(map); + vm_map_transition_wait(map, 1); goto retry; } @@ -1098,7 +1098,6 @@ retry: vm_fault_unwire(map, entry); } - map->timestamp++; vm_map_unlock(map); return (rc); diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c index 843a55579d..77d2f6b3b4 100644 --- a/sys/vm/vm_object.c +++ b/sys/vm/vm_object.c @@ -480,7 +480,8 @@ vm_object_allocate_hold(objtype_t type, vm_pindex_t size) * Referencing a chain-locked object can blow up the fairly sensitive * ref_count and shadow_count tests in the deallocator. Most callers * will call vm_object_chain_wait() prior to calling - * vm_object_reference_locked() to avoid the case. + * vm_object_reference_locked() to avoid the case. The held token + * allows the caller to pair the wait and ref. * * The object must be held, but may be held shared if desired (hence why * we use an atomic op). @@ -501,6 +502,26 @@ VMOBJDEBUG(vm_object_reference_locked)(vm_object_t object VMOBJDBARGS) #endif } +/* + * This version explicitly allows the chain to be held (i.e. by the + * caller). The token must also be held. + */ +void +VMOBJDEBUG(vm_object_reference_locked_chain_held)(vm_object_t object + VMOBJDBARGS) +{ + KKASSERT(object != NULL); + ASSERT_LWKT_TOKEN_HELD(vm_object_token(object)); + atomic_add_int(&object->ref_count, 1); + if (object->type == OBJT_VNODE) { + vref(object->handle); + /* XXX what if the vnode is being destroyed? */ + } +#if defined(DEBUG_LOCKS) + debugvm_object_add(object, file, line, 1); +#endif +} + /* * This version is only allowed for vnode objects. */ @@ -2278,7 +2299,7 @@ vm_object_collapse(vm_object_t object, struct vm_object_dealloc_list **dlistp) } /* - * Hold the backing_object and check for races + * Hold (token lock) the backing_object and retest conditions. */ vm_object_hold(backing_object); if (backing_object != object->backing_object || @@ -2303,8 +2324,8 @@ vm_object_collapse(vm_object_t object, struct vm_object_dealloc_list **dlistp) } /* - * we check the backing object first, because it is most likely - * not collapsable. + * We check the backing object first, because it is most + * likely not collapsable. */ if (backing_object->handle != NULL || (backing_object->type != OBJT_DEFAULT && @@ -2320,8 +2341,7 @@ vm_object_collapse(vm_object_t object, struct vm_object_dealloc_list **dlistp) /* * If paging is in progress we can't do a normal collapse. */ - if ( - object->paging_in_progress != 0 || + if (object->paging_in_progress != 0 || backing_object->paging_in_progress != 0 ) { vm_object_qcollapse(object, backing_object); @@ -2795,25 +2815,16 @@ done: } /* - * Coalesces two objects backing up adjoining regions of memory into a - * single object. - * - * returns TRUE if objects were combined. - * - * NOTE: Only works at the moment if the second object is NULL - - * if it's not, which object do we lock first? - * - * Parameters: - * prev_object First object to coalesce - * prev_offset Offset into prev_object - * next_object Second object into coalesce - * next_offset Offset into next_object - * - * prev_size Size of reference to prev_object - * next_size Size of reference to next_object + * Try to extend prev_object into an adjoining region of virtual + * memory, return TRUE on success. * * The caller does not need to hold (prev_object) but must have a stable * pointer to it (typically by holding the vm_map locked). + * + * This function only works for anonymous memory objects which either + * have (a) one reference or (b) we are extending the object's size. + * Otherwise the related VM pages we want to use for the object might + * be in use by another mapping. */ boolean_t vm_object_coalesce(vm_object_t prev_object, vm_pindex_t prev_pindex, @@ -2839,11 +2850,9 @@ vm_object_coalesce(vm_object_t prev_object, vm_pindex_t prev_pindex, vm_object_collapse(prev_object, NULL); /* - * Can't coalesce if: . more than one reference . paged out . shadows - * another object . has a copy elsewhere (any of which mean that the - * pages not mapped to prev_entry may be in use anyway) + * We can't coalesce if we shadow another object (figuring out the + * relationships become too complex). */ - if (prev_object->backing_object != NULL) { vm_object_chain_release(prev_object); vm_object_drop(prev_object); @@ -2854,8 +2863,12 @@ vm_object_coalesce(vm_object_t prev_object, vm_pindex_t prev_pindex, next_size >>= PAGE_SHIFT; next_pindex = prev_pindex + prev_size; - if ((prev_object->ref_count > 1) && - (prev_object->size != next_pindex)) { + /* + * We can't if the object has more than one ref count unless we + * are extending it into newly minted space. + */ + if (prev_object->ref_count > 1 && + prev_object->size != next_pindex) { vm_object_chain_release(prev_object); vm_object_drop(prev_object); return (FALSE); @@ -2879,9 +2892,9 @@ vm_object_coalesce(vm_object_t prev_object, vm_pindex_t prev_pindex, */ if (next_pindex + next_size > prev_object->size) prev_object->size = next_pindex + next_size; - vm_object_chain_release(prev_object); vm_object_drop(prev_object); + return (TRUE); } diff --git a/sys/vm/vm_object.h b/sys/vm/vm_object.h index a609a17205..3c02a72ebf 100644 --- a/sys/vm/vm_object.h +++ b/sys/vm/vm_object.h @@ -207,6 +207,12 @@ struct vm_object { * be clear in those cases. It might not be set on other * object types (particularly OBJT_VNODE). * + * This flag indicates that any given page index within the + * object is only mapped to a single vm_map_entry. Split + * vm_map_entry's (denoting distinct non-overlapping page + * ranges) do not clear this flag. This flag is typically + * cleared on fork(). + * * OBJ_NOPAGEIN - vn and tmpfs set this flag, indicating to swapoff * that the objects aren't intended to have any vm_page's, * only swap blocks. vn and tmpfs don't know how to deal @@ -222,7 +228,7 @@ struct vm_object { #define OBJ_MIGHTBEDIRTY 0x0100 /* object might be dirty */ #define OBJ_CLEANING 0x0200 #define OBJ_DEADWNT 0x1000 /* waiting because object is dead */ -#define OBJ_ONEMAPPING 0x2000 /* flag single vm_map_entry mapping */ +#define OBJ_ONEMAPPING 0x2000 #define OBJ_NOMSYNC 0x4000 /* disable msync() system call */ #define CHAINLK_EXCL 0x80000000 @@ -361,6 +367,9 @@ void vm_object_unlock(vm_object_t); debugvm_object_reference_quick(obj, __FILE__, __LINE__) #define vm_object_reference_locked(obj) \ debugvm_object_reference_locked(obj, __FILE__, __LINE__) +#define vm_object_reference_locked_chain_held(obj) \ + debugvm_object_reference_locked_chain_held( \ + obj, __FILE__, __LINE__) #define vm_object_deallocate(obj) \ debugvm_object_deallocate(obj, __FILE__, __LINE__) #define vm_object_deallocate_locked(obj) \ @@ -380,6 +389,8 @@ void VMOBJDEBUG(vm_object_hold_shared)(vm_object_t object VMOBJDBARGS); void VMOBJDEBUG(vm_object_drop)(vm_object_t object VMOBJDBARGS); void VMOBJDEBUG(vm_object_reference_quick)(vm_object_t object VMOBJDBARGS); void VMOBJDEBUG(vm_object_reference_locked)(vm_object_t object VMOBJDBARGS); +void VMOBJDEBUG(vm_object_reference_locked_chain_held)( + vm_object_t object VMOBJDBARGS); void VMOBJDEBUG(vm_object_deallocate)(vm_object_t object VMOBJDBARGS); void VMOBJDEBUG(vm_object_deallocate_locked)(vm_object_t object VMOBJDBARGS); -- 2.41.0