From 87f32d7cb3198c80dd8299ecdd884e86d5ac69f9 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Thu, 16 Aug 2012 17:40:38 -0700 Subject: [PATCH] kernel - Change lockmgr LK_SHARED behavior to fix improper recursion return * When obtaining a LK_SHARED lock in a situation where you already own the lock LK_EXCLUSIVE, lockmgr would downgrade the lock to shared. This creates a very serious problem when large procedural recursions get a lock that is already being held exclusively but request a shared lock. When these recursions return the original top level will find its lock is no longer exclusive. * This problem occured with vnode locks when a VOP_WRITE operation on a mmap'd space causes a VM fault which then turns around and issues a read(). When the fault returns the vnode wound up locked shared instead of exclusive. * Fix the problem by NOT downgrading an exclusive lock to shared when recursing on LK_SHARED. Simply add another count to the exclusive lock. --- sys/kern/kern_lock.c | 29 +++++++++++++++++++++++++++-- sys/vfs/ufs/ffs_rawread.c | 14 -------------- 2 files changed, 27 insertions(+), 16 deletions(-) diff --git a/sys/kern/kern_lock.c b/sys/kern/kern_lock.c index 9453c0c02a..2e6700775e 100644 --- a/sys/kern/kern_lock.c +++ b/sys/kern/kern_lock.c @@ -234,13 +234,38 @@ debuglockmgr(struct lock *lkp, u_int flags, COUNT(td, 1); break; } + /* - * We hold an exclusive lock, so downgrade it to shared. - * An alternative would be to fail with EDEADLK. + * If we already hold an exclusive lock we bump the + * exclusive count instead of downgrading to a shared + * lock. + * + * WARNING! The old FreeBSD behavior was to downgrade, + * but this creates a problem when recursions + * return to the caller and the caller expects + * its original exclusive lock to remain exclusively + * locked. + */ + if (extflags & LK_CANRECURSE) { + lkp->lk_exclusivecount++; + COUNT(td, 1); + break; + } + if (extflags & LK_NOWAIT) { + error = EBUSY; + break; + } + spin_unlock(&lkp->lk_spinlock); + panic("lockmgr: locking against myself"); +#if 0 + /* + * old code queued a shared lock request fell into + * a downgrade. */ sharelock(lkp, 1); COUNT(td, 1); /* fall into downgrade */ +#endif case LK_DOWNGRADE: if (lkp->lk_lockholder != td || lkp->lk_exclusivecount == 0) { diff --git a/sys/vfs/ufs/ffs_rawread.c b/sys/vfs/ufs/ffs_rawread.c index 13550dc1bc..e5fcdfa6ce 100644 --- a/sys/vfs/ufs/ffs_rawread.c +++ b/sys/vfs/ufs/ffs_rawread.c @@ -88,7 +88,6 @@ static int ffs_rawread_sync(struct vnode *vp) { int error; - int upgraded; /* * Check for dirty mmap, pending writes and dirty buffers @@ -97,13 +96,6 @@ ffs_rawread_sync(struct vnode *vp) if (bio_track_active(&vp->v_track_write) || !RB_EMPTY(&vp->v_rbdirty_tree) || (vp->v_flag & VOBJDIRTY) != 0) { - if (vn_islocked(vp) != LK_EXCLUSIVE) { - upgraded = 1; - /* Upgrade to exclusive lock, this might block */ - vn_lock(vp, LK_UPGRADE); - } else - upgraded = 0; - /* Attempt to msync mmap() regions to clean dirty mmap */ if ((vp->v_flag & VOBJDIRTY) != 0) { struct vm_object *obj; @@ -114,23 +106,17 @@ ffs_rawread_sync(struct vnode *vp) /* Wait for pending writes to complete */ error = bio_track_wait(&vp->v_track_write, 0, 0); if (error != 0) { - if (upgraded != 0) - vn_lock(vp, LK_DOWNGRADE); goto done; } /* Flush dirty buffers */ if (!RB_EMPTY(&vp->v_rbdirty_tree)) { if ((error = VOP_FSYNC(vp, MNT_WAIT, 0)) != 0) { - if (upgraded != 0) - vn_lock(vp, LK_DOWNGRADE); goto done; } if (bio_track_active(&vp->v_track_write) || !RB_EMPTY(&vp->v_rbdirty_tree)) panic("ffs_rawread_sync: dirty bufs"); } - if (upgraded != 0) - vn_lock(vp, LK_DOWNGRADE); } else { error = 0; } -- 2.41.0