From c0c70b27fa5d25d0b596dbf158062c286d149dba Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Thu, 8 May 2008 01:41:07 +0000 Subject: [PATCH] Fix a race between the namecache and the vnode recycler. A vnode cannot be recycled if it's namecache entry represents a directory with locked children. The various VOP_N*() functions require the parent dvp to be stable. The main fix is in vrecycle() (kern/vfs_subr.c). Do not vgone() the vnode if we can't clean out the children. Also create an API to assert that the parent dvp is stable, and make it vhold/vdrop the dvp. The race primarily effected HAMMER which uses the VOP_N*() API. --- sys/kern/uipc_usrreq.c | 7 ++-- sys/kern/vfs_cache.c | 71 +++++++++++++++++++++++++++++++++-------- sys/kern/vfs_subr.c | 7 +++- sys/kern/vfs_syscalls.c | 63 ++++++++++++++++-------------------- sys/kern/vfs_vnops.c | 7 ++-- sys/sys/namecache.h | 4 ++- 6 files changed, 99 insertions(+), 60 deletions(-) diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c index c2df6b4a2c..3f5bc1a25c 100644 --- a/sys/kern/uipc_usrreq.c +++ b/sys/kern/uipc_usrreq.c @@ -32,7 +32,7 @@ * * From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94 * $FreeBSD: src/sys/kern/uipc_usrreq.c,v 1.54.2.10 2003/03/04 17:28:09 nectar Exp $ - * $DragonFly: src/sys/kern/uipc_usrreq.c,v 1.37 2008/01/06 16:55:51 swildner Exp $ + * $DragonFly: src/sys/kern/uipc_usrreq.c,v 1.38 2008/05/08 01:41:05 dillon Exp $ */ #include @@ -628,17 +628,16 @@ unp_bind(struct unpcb *unp, struct sockaddr *nam, struct thread *td) error = nlookup(&nd); if (error == 0 && nd.nl_nch.ncp->nc_vp != NULL) error = EADDRINUSE; - if (error == 0 && (dvp = nd.nl_nch.ncp->nc_parent->nc_vp) == NULL) + if (error == 0 && (dvp = cache_dvpref(nd.nl_nch.ncp)) == NULL) error = EPERM; if (error) goto done; - /* vhold(dvp); - DVP can't go away */ VATTR_NULL(&vattr); vattr.va_type = VSOCK; vattr.va_mode = (ACCESSPERMS & ~p->p_fd->fd_cmask); error = VOP_NCREATE(&nd.nl_nch, dvp, &vp, nd.nl_cred, &vattr); - /* vdrop(dvp); */ + cache_dvprel(dvp); if (error == 0) { vp->v_socket = unp->unp_socket; unp->unp_vnode = vp; diff --git a/sys/kern/vfs_cache.c b/sys/kern/vfs_cache.c index 3b3104fdb9..33240e3886 100644 --- a/sys/kern/vfs_cache.c +++ b/sys/kern/vfs_cache.c @@ -67,7 +67,7 @@ * * @(#)vfs_cache.c 8.5 (Berkeley) 3/22/95 * $FreeBSD: src/sys/kern/vfs_cache.c,v 1.42.2.6 2001/10/05 20:07:03 dillon Exp $ - * $DragonFly: src/sys/kern/vfs_cache.c,v 1.88 2008/02/06 08:53:15 dillon Exp $ + * $DragonFly: src/sys/kern/vfs_cache.c,v 1.89 2008/05/08 01:41:05 dillon Exp $ */ #include @@ -1124,6 +1124,46 @@ again: return(error); } +/* + * Return a referenced vnode representing the parent directory of + * ncp. Because the caller has locked the ncp it should not be possible for + * the parent to go away. + */ +struct vnode * +cache_dvpref(struct namecache *ncp) +{ + struct vnode *dvp; +#if 0 + int error; +#endif + + if (ncp->nc_parent == NULL) + return(NULL); + if ((dvp = ncp->nc_parent->nc_vp) == NULL) + return(NULL); + KKASSERT((dvp->v_flag & VRECLAIMED) == 0); + vhold(dvp); + return(dvp); +#if 0 + if (vget(dvp, LK_SHARED) == 0) { + vn_unlock(dvp); + return(dvp); + } else { + panic("cache_dvpref: vget(%p) failed", dvp); + return(NULL); /* NOT REACHED */ + } +#endif +} + +void +cache_dvprel(struct vnode *dvp) +{ + vdrop(dvp); +#if 0 + vrele(dvp); +#endif +} + /* * Recursively set the FSMID update flag for namecache nodes leading * to root. This will cause the next getattr or reclaim to increment the @@ -2024,16 +2064,17 @@ restart: _cache_get(par); if (par == nch->mount->mnt_ncmountpt.ncp) { cache_resolve_mp(nch->mount); - } else if ((dvp = par->nc_parent->nc_vp) == NULL) { + } else if ((dvp = cache_dvpref(par)) == NULL) { kprintf("[diagnostic] cache_resolve: raced on %*.*s\n", par->nc_nlen, par->nc_nlen, par->nc_name); _cache_put(par); continue; - } else if (par->nc_flag & NCF_UNRESOLVED) { - /* vhold(dvp); - DVP can't go away */ - nctmp.mount = mp; - nctmp.ncp = par; - par->nc_error = VOP_NRESOLVE(&nctmp, dvp, cred); - /* vdrop(dvp); */ + } else { + if (par->nc_flag & NCF_UNRESOLVED) { + nctmp.mount = mp; + nctmp.ncp = par; + par->nc_error = VOP_NRESOLVE(&nctmp, dvp, cred); + } + cache_dvprel(dvp); } if ((error = par->nc_error) != 0) { if (par->nc_error != EAGAIN) { @@ -2058,12 +2099,14 @@ restart: * NOTE: in order to call VOP_NRESOLVE(), the parent of the passed * ncp must already be resolved. */ - dvp = ncp->nc_parent->nc_vp; - /* vhold(dvp); - dvp can't go away */ - nctmp.mount = mp; - nctmp.ncp = ncp; - ncp->nc_error = VOP_NRESOLVE(&nctmp, dvp, cred); - /* vdrop(dvp); */ + if ((dvp = cache_dvpref(ncp)) != NULL) { + nctmp.mount = mp; + nctmp.ncp = ncp; + ncp->nc_error = VOP_NRESOLVE(&nctmp, dvp, cred); + cache_dvprel(dvp); + } else { + ncp->nc_error = EPERM; + } if (ncp->nc_error == EAGAIN) { kprintf("[diagnostic] cache_resolve: EAGAIN ncp %p %*.*s\n", ncp, ncp->nc_nlen, ncp->nc_nlen, ncp->nc_name); diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index 904ddb786b..b4a3f1e656 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -37,7 +37,7 @@ * * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95 * $FreeBSD: src/sys/kern/vfs_subr.c,v 1.249.2.30 2003/04/04 20:35:57 tegge Exp $ - * $DragonFly: src/sys/kern/vfs_subr.c,v 1.112 2008/04/30 17:34:11 dillon Exp $ + * $DragonFly: src/sys/kern/vfs_subr.c,v 1.113 2008/05/08 01:41:05 dillon Exp $ */ /* @@ -1230,11 +1230,16 @@ vop_stdrevoke(struct vop_revoke_args *ap) * This is called when the object underlying a vnode is being destroyed, * such as in a remove(). Try to recycle the vnode immediately if the * only active reference is our reference. + * + * Directory vnodes in the namecache with children cannot be immediately + * recycled because numerous VOP_N*() ops require them to be stable. */ int vrecycle(struct vnode *vp) { if (vp->v_sysref.refcnt <= 1) { + if (cache_inval_vp_nonblock(vp)) + return(0); vgone_vxlocked(vp); return (1); } diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index b6ea0af364..98583c785e 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -37,7 +37,7 @@ * * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 * $FreeBSD: src/sys/kern/vfs_syscalls.c,v 1.151.2.18 2003/04/04 20:35:58 tegge Exp $ - * $DragonFly: src/sys/kern/vfs_syscalls.c,v 1.124 2008/01/04 12:16:19 matthias Exp $ + * $DragonFly: src/sys/kern/vfs_syscalls.c,v 1.125 2008/05/08 01:41:05 dillon Exp $ */ #include @@ -1653,9 +1653,8 @@ kern_mknod(struct nlookupdata *nd, int mode, int rmajor, int rminor) return (EEXIST); if ((error = ncp_writechk(&nd->nl_nch)) != 0) return (error); - if ((dvp = nd->nl_nch.ncp->nc_parent->nc_vp) == NULL) + if ((dvp = cache_dvpref(nd->nl_nch.ncp)) == NULL) return (EPERM); - /* vhold(dvp); - DVP can't go away */ VATTR_NULL(&vattr); vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; @@ -1690,7 +1689,7 @@ kern_mknod(struct nlookupdata *nd, int mode, int rmajor, int rminor) vput(vp); } } - /* vdrop(dvp); */ + cache_dvprel(dvp); return (error); } @@ -1733,16 +1732,15 @@ kern_mkfifo(struct nlookupdata *nd, int mode) return (EEXIST); if ((error = ncp_writechk(&nd->nl_nch)) != 0) return (error); - if ((dvp = nd->nl_nch.ncp->nc_parent->nc_vp) == NULL) + if ((dvp = cache_dvpref(nd->nl_nch.ncp)) == NULL) return (EPERM); - /* vhold(dvp); - DVP can't go away */ VATTR_NULL(&vattr); vattr.va_type = VFIFO; vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; vp = NULL; error = VOP_NMKNOD(&nd->nl_nch, dvp, &vp, nd->nl_cred, &vattr); - /* vdrop(dvp); */ + cache_dvprel(dvp); if (error == 0) vput(vp); return (error); @@ -1860,11 +1858,10 @@ kern_link(struct nlookupdata *nd, struct nlookupdata *linknd) vput(vp); return (EEXIST); } - if ((dvp = linknd->nl_nch.ncp->nc_parent->nc_vp) == NULL) { + if ((dvp = cache_dvpref(linknd->nl_nch.ncp)) == NULL) { vput(vp); return (EPERM); } - /* vhold(dvp); - dvp can't go away */ /* * Finally run the new API VOP. @@ -1872,7 +1869,7 @@ kern_link(struct nlookupdata *nd, struct nlookupdata *linknd) error = can_hardlink(vp, td, td->td_proc->p_ucred); if (error == 0) error = VOP_NLINK(&linknd->nl_nch, dvp, vp, linknd->nl_cred); - /* vdrop(dvp); */ + cache_dvprel(dvp); vput(vp); return (error); } @@ -1915,13 +1912,12 @@ kern_symlink(struct nlookupdata *nd, char *path, int mode) return (EEXIST); if ((error = ncp_writechk(&nd->nl_nch)) != 0) return (error); - if ((dvp = nd->nl_nch.ncp->nc_parent->nc_vp) == NULL) + if ((dvp = cache_dvpref(nd->nl_nch.ncp)) == NULL) return (EPERM); - /* vhold(dvp); - dvp can't go away */ VATTR_NULL(&vattr); vattr.va_mode = mode; error = VOP_NSYMLINK(&nd->nl_nch, dvp, &vp, nd->nl_cred, &vattr, path); - /* vdrop(dvp); */ + cache_dvprel(dvp); if (error == 0) vput(vp); return (error); @@ -1977,13 +1973,13 @@ sys_undelete(struct undelete_args *uap) error = ncp_writechk(&nd.nl_nch); dvp = NULL; if (error == 0) { - if ((dvp = nd.nl_nch.ncp->nc_parent->nc_vp) == NULL) + if ((dvp = cache_dvpref(nd.nl_nch.ncp)) != NULL) { + error = VOP_NWHITEOUT(&nd.nl_nch, dvp, + nd.nl_cred, NAMEI_DELETE); + cache_dvprel(dvp); + } else { error = EPERM; - } - if (error == 0) { - /* vhold(dvp); - dvp can't go away */ - error = VOP_NWHITEOUT(&nd.nl_nch, dvp, nd.nl_cred, NAMEI_DELETE); - /* vdrop(dvp); */ + } } nlookup_done(&nd); return (error); @@ -2001,11 +1997,10 @@ kern_unlink(struct nlookupdata *nd) return (error); if ((error = ncp_writechk(&nd->nl_nch)) != 0) return (error); - if ((dvp = nd->nl_nch.ncp->nc_parent->nc_vp) == NULL) + if ((dvp = cache_dvpref(nd->nl_nch.ncp)) == NULL) return (EPERM); - /* vhold(dvp); - dvp can't go away */ error = VOP_NREMOVE(&nd->nl_nch, dvp, nd->nl_cred); - /* vdrop(dvp); */ + cache_dvprel(dvp); return (error); } @@ -3039,23 +3034,21 @@ kern_rename(struct nlookupdata *fromnd, struct nlookupdata *tond) * when we detect the situation. */ if (error == 0) { - fdvp = fromnd->nl_nch.ncp->nc_parent->nc_vp; - tdvp = tond->nl_nch.ncp->nc_parent->nc_vp; + fdvp = cache_dvpref(fromnd->nl_nch.ncp); + tdvp = cache_dvpref(tond->nl_nch.ncp); if (fdvp == NULL || tdvp == NULL) { error = EPERM; } else if (fromnd->nl_nch.ncp->nc_vp == tond->nl_nch.ncp->nc_vp) { - /* vhold(fdvp); - dvp can't go away */ error = VOP_NREMOVE(&fromnd->nl_nch, fdvp, fromnd->nl_cred); - /* vdrop(fdvp); */ } else { - /* vhold(fdvp); - dvp can't go away */ - /* vhold(tdvp); - dvp can't go away */ error = VOP_NRENAME(&fromnd->nl_nch, &tond->nl_nch, fdvp, tdvp, tond->nl_cred); - /* vdrop(fdvp); */ - /* vdrop(tdvp); */ } + if (fdvp) + cache_dvprel(fdvp); + if (tdvp) + cache_dvprel(tdvp); } return (error); } @@ -3102,16 +3095,15 @@ kern_mkdir(struct nlookupdata *nd, int mode) return (EEXIST); if ((error = ncp_writechk(&nd->nl_nch)) != 0) return (error); - if ((dvp = nd->nl_nch.ncp->nc_parent->nc_vp) == NULL) + if ((dvp = cache_dvpref(nd->nl_nch.ncp)) == NULL) return (EPERM); - /* vhold(dvp); - dvp can't go away */ VATTR_NULL(&vattr); vattr.va_type = VDIR; vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask; vp = NULL; error = VOP_NMKDIR(&nd->nl_nch, dvp, &vp, p->p_ucred, &vattr); - /* vdrop(dvp); */ + cache_dvprel(dvp); if (error == 0) vput(vp); return (error); @@ -3156,11 +3148,10 @@ kern_rmdir(struct nlookupdata *nd) return (EINVAL); if ((error = ncp_writechk(&nd->nl_nch)) != 0) return (error); - if ((dvp = nd->nl_nch.ncp->nc_parent->nc_vp) == NULL) + if ((dvp = cache_dvpref(nd->nl_nch.ncp)) == NULL) return (EPERM); - /* vhold(dvp); - dvp can't go away */ error = VOP_NRMDIR(&nd->nl_nch, dvp, nd->nl_cred); - /* vdrop(dvp); */ + cache_dvprel(dvp); return (error); } diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c index 10587dfe3c..10f511edba 100644 --- a/sys/kern/vfs_vnops.c +++ b/sys/kern/vfs_vnops.c @@ -37,7 +37,7 @@ * * @(#)vfs_vnops.c 8.2 (Berkeley) 1/21/94 * $FreeBSD: src/sys/kern/vfs_vnops.c,v 1.87.2.13 2002/12/29 18:19:53 dillon Exp $ - * $DragonFly: src/sys/kern/vfs_vnops.c,v 1.55 2008/05/05 22:09:44 dillon Exp $ + * $DragonFly: src/sys/kern/vfs_vnops.c,v 1.56 2008/05/08 01:41:06 dillon Exp $ */ #include @@ -177,9 +177,8 @@ again: if (nd->nl_nch.ncp->nc_vp == NULL) { if ((error = ncp_writechk(&nd->nl_nch)) != 0) return (error); - if ((dvp = nd->nl_nch.ncp->nc_parent->nc_vp) == NULL) + if ((dvp = cache_dvpref(nd->nl_nch.ncp)) == NULL) return (EPERM); - /* vhold(dvp); - dvp can't go away */ VATTR_NULL(vap); vap->va_type = VREG; vap->va_mode = cmode; @@ -187,7 +186,7 @@ again: vap->va_vaflags |= VA_EXCLUSIVE; error = VOP_NCREATE(&nd->nl_nch, dvp, &vp, nd->nl_cred, vap); - /* vdrop(dvp); */ + cache_dvprel(dvp); if (error) return (error); fmode &= ~O_TRUNC; diff --git a/sys/sys/namecache.h b/sys/sys/namecache.h index 781ab7b7fc..e5f4119d57 100644 --- a/sys/sys/namecache.h +++ b/sys/sys/namecache.h @@ -62,7 +62,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/sys/namecache.h,v 1.29 2007/05/13 02:34:22 dillon Exp $ + * $DragonFly: src/sys/sys/namecache.h,v 1.30 2008/05/08 01:41:07 dillon Exp $ */ #ifndef _SYS_NAMECACHE_H_ @@ -194,6 +194,8 @@ int cache_vget(struct nchandle *, struct ucred *, int, struct vnode **); int cache_vref(struct nchandle *, struct ucred *, struct vnode **); int cache_fromdvp(struct vnode *, struct ucred *, int, struct nchandle *); int cache_fullpath(struct proc *, struct nchandle *, char **, char **); +struct vnode *cache_dvpref(struct namecache *); +void cache_dvprel(struct vnode *); void cache_update_fsmid(struct nchandle *); void cache_update_fsmid_vp(struct vnode *); int64_t cache_sync_fsmid_vp(struct vnode *); -- 2.41.0