From 625a293762660fd6f8474c36683a1aeed85b71ba Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Thu, 11 Aug 2011 14:38:09 -0700 Subject: [PATCH] kernel - Fix deadlock w/recent vm_map work * Fix a deadlock where a vm_page is being held PG_BUSY throgh a vm_map relocking operation. This can deadlock against the attempt to busy a vm_page while holding a vm_map lock. * The solution is a bit of a hack. Currently allow the relock attempt to timeout and force a retry of the VM fault, which unlocks and relocks everything. This is not the best solution but the problem occurs fairly rarely so for now it should be an acceptable workaround. Reported-by: ftigeot, Studbolt --- sys/vm/vm_fault.c | 22 +++++++++++++++++----- sys/vm/vm_map.c | 2 +- sys/vm/vm_map.h | 12 ++++++++++++ 3 files changed, 30 insertions(+), 6 deletions(-) diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c index 09f0f4db8e..5092e2dd5f 100644 --- a/sys/vm/vm_fault.c +++ b/sys/vm/vm_fault.c @@ -158,14 +158,23 @@ release_page(struct faultstate *fs) * * NOTE: vm_map_lock_read() does not bump fs->map->timestamp so we do * not have to update fs->map_generation here. + * + * NOTE: This function can fail due to a deadlock against the caller's + * holding of a vm_page BUSY. */ -static __inline void +static __inline int relock_map(struct faultstate *fs) { + int error; + if (fs->lookup_still_valid == FALSE && fs->map) { - vm_map_lock_read(fs->map); - fs->lookup_still_valid = TRUE; + error = vm_map_lock_read_to(fs->map); + if (error == 0) + fs->lookup_still_valid = TRUE; + } else { + error = 0; } + return error; } static __inline void @@ -1535,10 +1544,13 @@ skip: * * If the count has changed after relocking then all sorts of * crap may have happened and we have to retry. + * + * NOTE: The relock_map() can fail due to a deadlock against + * the vm_page we are holding BUSY. */ if (fs->lookup_still_valid == FALSE && fs->map) { - relock_map(fs); - if (fs->map->timestamp != fs->map_generation) { + if (relock_map(fs) || + fs->map->timestamp != fs->map_generation) { release_page(fs); lwkt_reltoken(&vm_token); unlock_and_deallocate(fs); diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index 5c7df73b2e..3dd41c1b54 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -497,7 +497,7 @@ vm_map_init(struct vm_map *map, vm_offset_t min, vm_offset_t max, pmap_t pmap) map->hint = &map->header; map->timestamp = 0; map->flags = 0; - lockinit(&map->lock, "thrd_sleep", 0, 0); + lockinit(&map->lock, "thrd_sleep", (hz + 9) / 10, 0); TUNABLE_INT("vm.cache_vmspaces", &vmspace_sysref_class.nom_cache); } diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h index c2e12b25d9..15cee91627 100644 --- a/sys/vm/vm_map.h +++ b/sys/vm/vm_map.h @@ -376,6 +376,18 @@ struct vmresident { #define vm_map_lock_read_try(map) \ lockmgr(&(map)->lock, LK_SHARED | LK_NOWAIT) +static __inline__ int +vm_map_lock_read_to(vm_map_t map) +{ + int error; + +#if defined(MAP_LOCK_DIAGNOSTIC) + kprintf ("locking map LK_SHARED: 0x%x\n", map); +#endif + error = lockmgr(&(map)->lock, LK_SHARED | LK_TIMELOCK); + return error; +} + static __inline__ int vm_map_lock_upgrade(vm_map_t map) { int error; -- 2.41.0