virtual_offset, text_end,
VM_MAPTYPE_NORMAL,
VM_PROT_READ | VM_PROT_EXECUTE, VM_PROT_ALL,
- MAP_COPY_ON_WRITE | MAP_PREFAULT);
+ MAP_COPY_ON_WRITE | MAP_PREFAULT | MAP_PREFAULT_RELOCK);
if (error) {
vm_object_drop(object);
vm_map_entry_release(count);
return (error);
}
+
data_end = text_end + a_out->a_data;
if (a_out->a_data) {
vm_object_reference_locked(object);
text_end, data_end,
VM_MAPTYPE_NORMAL,
VM_PROT_ALL, VM_PROT_ALL,
- MAP_COPY_ON_WRITE | MAP_PREFAULT);
+ MAP_COPY_ON_WRITE | MAP_PREFAULT | MAP_PREFAULT_RELOCK);
if (error) {
vm_object_drop(object);
vm_map_unlock(map);
vm_offset_t map_addr;
int error, rv, cow;
int count;
+ int shared;
size_t copy_len;
vm_object_t object;
vm_offset_t file_addr;
object = vp->v_object;
error = 0;
- vm_object_hold(object);
+ /*
+ * In most cases we will be able to use a shared lock on the
+ * object we are inserting into the map. The lock will be
+ * upgraded in situations where new VM pages must be allocated.
+ */
+ shared = vm_object_hold_maybe_shared(object);
/*
* It's necessary to fail if the filsz + offset taken from the
vm_object_reference_locked(object);
/* cow flags: don't dump readonly sections in core */
- cow = MAP_COPY_ON_WRITE | MAP_PREFAULT |
- (prot & VM_PROT_WRITE ? 0 : MAP_DISABLE_COREDUMP);
+ cow = MAP_COPY_ON_WRITE | MAP_PREFAULT;
+ if ((prot & VM_PROT_WRITE) == 0)
+ cow |= MAP_DISABLE_COREDUMP;
+ if (shared == 0)
+ cow |= MAP_PREFAULT_RELOCK;
count = vm_map_entry_reserve(MAP_RESERVE_COUNT);
vm_map_lock(&vmspace->vm_map);
cow);
vm_map_unlock(&vmspace->vm_map);
vm_map_entry_release(count);
+
+ /*
+ * NOTE: Object must have a hold ref when calling
+ * vm_object_deallocate().
+ */
if (rv != KERN_SUCCESS) {
- vm_object_deallocate(object);
vm_object_drop(object);
+ vm_object_deallocate(object);
return (EINVAL);
}
}
}
-
/*
* We have to get the remaining bit of the file into the first part
* of the oversized map segment. This is normally because the .data
}
if (copy_len != 0) {
- vm_page_t m;
struct lwbuf *lwb;
struct lwbuf lwb_cache;
+ vm_page_t m;
m = vm_fault_object_page(object, trunc_page(offset + filsz),
- VM_PROT_READ, 0, &error);
+ VM_PROT_READ, 0, shared, &error);
if (m) {
lwb = lwbuf_alloc(m, &lwb_cache);
error = copyout((caddr_t)lwbuf_kva(lwb),
}
vm_object_drop(object);
+
/*
* set it to the specified protection
*/
#endif
static lwkt_token pool_tokens[LWKT_NUM_POOL_TOKENS];
+struct spinlock tok_debug_spin = SPINLOCK_INITIALIZER(&tok_debug_spin);
#define TOKEN_STRING "REF=%p TOK=%p TD=%p"
#define TOKEN_ARGS lwkt_tokref_t ref, lwkt_token_t tok, struct thread *td
SYSCTL_LONG(_lwkt, OID_AUTO, vnode_collisions, CTLFLAG_RW,
&vnode_token.t_collisions, 0, "Collision counter of vnode_token");
+int tokens_debug_output;
+SYSCTL_INT(_lwkt, OID_AUTO, tokens_debug_output, CTLFLAG_RW,
+ &tokens_debug_output, 0, "Generate stack trace N times");
+
+
#ifdef DEBUG_LOCKS_LATENCY
static long tokens_add_latency;
++tok->t_collisions;
logtoken(fail, ref);
td->td_toks_have = td->td_toks_stop - 1;
+
+ if (tokens_debug_output > 0) {
+ --tokens_debug_output;
+ spin_lock(&tok_debug_spin);
+ kprintf("Excl Token thread %p %s %s\n",
+ td, tok->t_desc, td->td_comm);
+ print_backtrace(6);
+ kprintf("\n");
+ spin_unlock(&tok_debug_spin);
+ }
+
lwkt_switch();
logtoken(succ, ref);
KKASSERT(tok->t_ref == ref);
++tok->t_collisions;
logtoken(fail, ref);
td->td_toks_have = td->td_toks_stop - 1;
+
+ if (tokens_debug_output > 0) {
+ --tokens_debug_output;
+ spin_lock(&tok_debug_spin);
+ kprintf("Shar Token thread %p %s %s\n",
+ td, tok->t_desc, td->td_comm);
+ print_backtrace(6);
+ kprintf("\n");
+ spin_unlock(&tok_debug_spin);
+ }
+
lwkt_switch();
logtoken(succ, ref);
}
m = vm_fault_object_page(object, IDX_TO_OFF(pindex),
VM_PROT_NONE,
VM_FAULT_DIRTY | VM_FAULT_UNSWAP,
- &error);
+ 0, &error);
if (m)
vm_page_unhold(m);
}
int hardfault;
int fault_flags;
int map_generation;
+ int shared;
boolean_t wired;
struct vnode *vp;
};
static int debug_cluster = 0;
SYSCTL_INT(_vm, OID_AUTO, debug_cluster, CTLFLAG_RW, &debug_cluster, 0, "");
-static int vm_shared_fault = 1;
+int vm_shared_fault = 1;
SYSCTL_INT(_vm, OID_AUTO, shared_fault, CTLFLAG_RW, &vm_shared_fault, 0,
"Allow shared token on vm_object");
static long vm_shared_hit = 0;
"Successful shared faults");
static long vm_shared_miss = 0;
SYSCTL_LONG(_vm, OID_AUTO, shared_miss, CTLFLAG_RW, &vm_shared_miss, 0,
- "Successful shared faults");
+ "Unsuccessful shared faults");
static int vm_fault_object(struct faultstate *, vm_pindex_t, vm_prot_t);
static int vm_fault_vpagetable(struct faultstate *, vm_pindex_t *, vpte_t, int);
fs.lookup_still_valid = TRUE;
fs.first_m = NULL;
fs.object = fs.first_object; /* so unlock_and_deallocate works */
+ fs.shared = 0;
/*
* If the entry is wired we cannot change the page protection.
* page can be safely written. However, it will force a read-only
* mapping for a read fault if the memory is managed by a virtual
* page table.
+ *
+ * If the fault code uses the shared object lock shortcut
+ * we must not try to burst (we can't allocate VM pages).
*/
- /* BEFORE */
result = vm_fault_object(&fs, first_pindex, fault_type);
+ if (fs.shared)
+ fault_flags &= ~VM_FAULT_BURST;
if (result == KERN_TRY_AGAIN) {
vm_object_drop(fs.first_object);
fs.lookup_still_valid = TRUE;
fs.first_m = NULL;
fs.object = fs.first_object; /* so unlock_and_deallocate works */
+ fs.shared = 0;
/*
* If the entry is wired we cannot change the page protection.
*/
vm_page_t
vm_fault_object_page(vm_object_t object, vm_ooffset_t offset,
- vm_prot_t fault_type, int fault_flags, int *errorp)
+ vm_prot_t fault_type, int fault_flags,
+ int shared, int *errorp)
{
int result;
vm_pindex_t first_pindex;
fs.entry = &entry;
fs.first_prot = fault_type;
fs.wired = 0;
+ fs.shared = shared;
/*fs.map_generation = 0; unused */
/*
* inclusive is chainlocked.
*
* If the object is dead, we stop here
+ *
+ * vm_shared_fault (fs->shared != 0) case: nothing special.
*/
if (fs->object->flags & OBJ_DEAD) {
vm_object_pip_wakeup(fs->first_object);
* worth. We cannot under any circumstances mess
* around with a vm_page_t->busy page except, perhaps,
* to pmap it.
+ *
+ * vm_shared_fault (fs->shared != 0) case:
+ * error nothing special
+ * fs->m relock excl if I/O needed
+ * NULL relock excl
*/
fs->m = vm_page_lookup_busy_try(fs->object, pindex,
TRUE, &error);
if (fs->m->object != &kernel_object) {
if ((fs->m->valid & VM_PAGE_BITS_ALL) !=
VM_PAGE_BITS_ALL) {
+ if (fs->shared) {
+ vm_object_drop(fs->object);
+ vm_object_hold(fs->object);
+ fs->shared = 0;
+ }
goto readrest;
}
if (fs->m->flags & PG_RAM) {
if (debug_cluster)
kprintf("R");
vm_page_flag_clear(fs->m, PG_RAM);
+ if (fs->shared) {
+ vm_object_drop(fs->object);
+ vm_object_hold(fs->object);
+ fs->shared = 0;
+ }
goto readrest;
}
}
break; /* break to PAGE HAS BEEN FOUND */
}
+ if (fs->shared) {
+ vm_object_drop(fs->object);
+ vm_object_hold(fs->object);
+ fs->shared = 0;
+ }
+
/*
* Page is not resident, If this is the search termination
* or the pager might contain the page, allocate a new page.
/*
* Move on to the next object. The chain lock should prevent
* the backing_object from getting ripped out from under us.
+ *
+ * vm_shared_fault case:
+ *
+ * If the next object is the last object and
+ * vnode-backed (thus possibly shared), we can try a
+ * shared object lock. There is no 'chain' for this
+ * last object if vnode-backed (otherwise we would
+ * need an exclusive lock).
+ *
+ * fs->shared mode is very fragile and only works
+ * under certain specific conditions, and is only
+ * handled for those conditions in our loop. Essentially
+ * it is designed only to be able to 'dip into' the
+ * vnode's object and extract an already-cached page.
*/
+ fs->shared = 0;
if ((next_object = fs->object->backing_object) != NULL) {
- vm_object_hold(next_object);
+ fs->shared = vm_object_hold_maybe_shared(next_object);
vm_object_chain_acquire(next_object);
KKASSERT(next_object == fs->object->backing_object);
pindex += OFF_TO_IDX(fs->object->backing_object_offset);
static int randomize_mmap;
SYSCTL_INT(_vm, OID_AUTO, randomize_mmap, CTLFLAG_RW, &randomize_mmap, 0,
"Randomize mmap offsets");
+static int vm_map_relock_enable = 1;
+SYSCTL_INT(_vm, OID_AUTO, map_relock_enable, CTLFLAG_RW,
+ &vm_map_relock_enable, 0, "Randomize mmap offsets");
static void vm_map_entry_shadow(vm_map_entry_t entry, int addref);
static vm_map_entry_t vm_map_entry_create(vm_map_t map, int *);
* When object is non-NULL, it could be shared with another
* process. We have to set or clear OBJ_ONEMAPPING
* appropriately.
+ *
+ * NOTE: This flag is only applicable to DEFAULT and SWAP
+ * objects and will already be clear in other types
+ * of objects, so a shared object lock is ok for
+ * VNODE objects.
*/
if ((object->ref_count > 1) || (object->shadow_count != 0)) {
vm_object_clear_flag(object, OBJ_ONEMAPPING);
*/
if ((cow & (MAP_PREFAULT|MAP_PREFAULT_PARTIAL)) &&
maptype != VM_MAPTYPE_VPAGETABLE) {
+ int dorelock = 0;
+ if (vm_map_relock_enable && (cow & MAP_PREFAULT_RELOCK)) {
+ dorelock = 1;
+ vm_object_lock_swap();
+ vm_object_drop(object);
+ }
pmap_object_init_pt(map->pmap, start, prot,
object, OFF_TO_IDX(offset), end - start,
cow & MAP_PREFAULT_PARTIAL);
+ if (dorelock) {
+ vm_object_hold(object);
+ vm_object_lock_swap();
+ }
}
if (must_drop)
vm_object_drop(object);
} else if (object && object->type != OBJT_DEFAULT &&
object->type != OBJT_SWAP) {
/*
- * vnode object routines cannot be chain-locked
+ * vnode object routines cannot be chain-locked,
+ * but since we aren't removing pages from the
+ * object here we can use a shared hold.
*/
- vm_object_hold(object);
+ vm_object_hold_shared(object);
pmap_remove(map->pmap, s, e);
vm_object_drop(object);
} else if (object) {
#define MAP_IS_KSTACK 0x0080
#define MAP_DISABLE_COREDUMP 0x0100
#define MAP_PREFAULT_MADVISE 0x0200 /* from (user) madvise request */
+#define MAP_PREFAULT_RELOCK 0x0200
/*
* vm_fault option flags
/*
- * (MPSAFE)
- *
* Copyright (c) 1991, 1993
* The Regents of the University of California. All rights reserved.
*
}
/*
+ * Obtain either a shared or exclusive lock on VM object
+ * based on whether this is a terminal vnode object or not.
+ */
+int
+#ifndef DEBUG_LOCKS
+vm_object_hold_maybe_shared(vm_object_t obj)
+#else
+debugvm_object_hold_maybe_shared(vm_object_t obj, char *file, int line)
+#endif
+{
+ if (vm_shared_fault &&
+ obj->type == OBJT_VNODE &&
+ obj->backing_object == NULL) {
+ vm_object_hold_shared(obj);
+ return(1);
+ } else {
+ vm_object_hold(obj);
+ return(0);
+ }
+}
+
+/*
* Drop the token and hold_count on the object.
*/
void
* will call vm_object_chain_wait() prior to calling
* vm_object_reference_locked() to avoid the case.
*
- * The object must be held.
+ * The object must be held, but may be held shared if desired (hence why
+ * we use an atomic op).
*/
void
vm_object_reference_locked(vm_object_t object)
KKASSERT(object != NULL);
ASSERT_LWKT_TOKEN_HELD(vm_object_token(object));
KKASSERT((object->flags & OBJ_CHAINLOCK) == 0);
- object->ref_count++;
+ atomic_add_int(&object->ref_count, 1);
if (object->type == OBJT_VNODE) {
vref(object->handle);
/* XXX what if the vnode is being destroyed? */
/*
* Dereference an object and its underlying vnode.
*
- * The object must be held and will be held on return.
+ * The object must be held exclusively and will remain held on return.
+ * (We don't need an atomic op due to the exclusivity).
*/
static void
vm_object_vndeallocate(vm_object_t object)
* The caller does not have to hold the object locked but must have control
* over the reference in question in order to guarantee that the object
* does not get ripped out from under us.
+ *
+ * XXX Currently all deallocations require an exclusive lock.
*/
void
vm_object_deallocate(vm_object_t object)
*/
again:
while (object != NULL) {
+ ASSERT_LWKT_TOKEN_HELD_EXCL(&object->token);
#if 0
/*
* Don't rip a ref_count out from under an object undergoing
*
* Nominal ref_count > 1 case if the second ref is not from
* a shadow.
+ *
+ * (ONEMAPPING only applies to DEFAULT AND SWAP objects)
*/
if (object->ref_count == 2 && object->shadow_count == 0) {
- vm_object_set_flag(object, OBJ_ONEMAPPING);
+ if (object->type == OBJT_DEFAULT ||
+ object->type == OBJT_SWAP) {
+ vm_object_set_flag(object, OBJ_ONEMAPPING);
+ }
object->ref_count--;
break;
}
/*
* Flags
+ *
+ * NOTE: OBJ_ONEMAPPING only applies to DEFAULT and SWAP objects. It
+ * may be gratuitously re-cleared in other cases but will already be
+ * clear in those cases.
*/
#define OBJ_CHAINLOCK 0x0001 /* backing_object/shadow changing */
#define OBJ_CHAINWANT 0x0002
/* lock for object list and count */
extern struct vm_object kernel_object; /* the single kernel object */
+extern int vm_shared_fault;
#endif /* _KERNEL */
void vm_object_madvise (vm_object_t, vm_pindex_t, int, int);
void vm_object_init2 (void);
vm_page_t vm_fault_object_page(vm_object_t, vm_ooffset_t,
- vm_prot_t, int, int *);
+ vm_prot_t, int, int, int *);
void vm_object_dead_sleep(vm_object_t, const char *);
void vm_object_dead_wakeup(vm_object_t);
void vm_object_lock_swap(void);
#ifndef DEBUG_LOCKS
void vm_object_hold(vm_object_t);
+int vm_object_hold_maybe_shared(vm_object_t);
int vm_object_hold_try(vm_object_t);
void vm_object_hold_shared(vm_object_t);
#else
+#define vm_object_hold_maybe_shared(obj) \
+ debugvm_object_hold_maybe_shared(obj, __FILE__, __LINE__)
#define vm_object_hold(obj) \
debugvm_object_hold(obj, __FILE__, __LINE__)
void debugvm_object_hold(vm_object_t, char *, int);
int
vm_page_insert(vm_page_t m, vm_object_t object, vm_pindex_t pindex)
{
- ASSERT_LWKT_TOKEN_HELD(vm_object_token(object));
+ ASSERT_LWKT_TOKEN_HELD_EXCL(vm_object_token(object));
if (m->object != NULL)
panic("vm_page_insert: already inserted");
vm_page_rename(vm_page_t m, vm_object_t new_object, vm_pindex_t new_pindex)
{
KKASSERT(m->flags & PG_BUSY);
- ASSERT_LWKT_TOKEN_HELD(vm_object_token(new_object));
+ ASSERT_LWKT_TOKEN_HELD_EXCL(vm_object_token(new_object));
if (m->object) {
- ASSERT_LWKT_TOKEN_HELD(vm_object_token(m->object));
+ ASSERT_LWKT_TOKEN_HELD_EXCL(vm_object_token(m->object));
vm_page_remove(m);
}
if (vm_page_insert(m, new_object, new_pindex) == FALSE) {
tobject = lobject->backing_object;
if (tobject == NULL)
break;
- vm_object_hold(tobject);
+ vm_object_hold_shared(tobject);
if (tobject == lobject->backing_object) {
if (lobject != object) {
vm_object_lock_swap();