From b2eb81cde0846a1f939d982317b0e2a624b766e1 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Wed, 7 Apr 2004 05:15:48 +0000 Subject: [PATCH] Protect nfs socket locks with a critical section. Recheck rep->r_mrep just prior to calling tsleep() in case another thread got in and handled the request being waited for. Rewrite the vnode scanning code in nfs_sync() to use vmntvnodescan(), fixing a number of potential races. Protect the commit phase 2 scan in nfs_subs.c with the appropriate token (note: still needs some work). --- sys/vfs/nfs/nfs_socket.c | 69 +++++++++++++++++++++++++--------- sys/vfs/nfs/nfs_subs.c | 11 +++--- sys/vfs/nfs/nfs_vfsops.c | 80 +++++++++++++++++++++++++++------------- sys/vfs/nfs/nfsmount.h | 3 +- 4 files changed, 115 insertions(+), 48 deletions(-) diff --git a/sys/vfs/nfs/nfs_socket.c b/sys/vfs/nfs/nfs_socket.c index ecc404cc20..9af3e7952f 100644 --- a/sys/vfs/nfs/nfs_socket.c +++ b/sys/vfs/nfs/nfs_socket.c @@ -35,7 +35,7 @@ * * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95 * $FreeBSD: src/sys/nfs/nfs_socket.c,v 1.60.2.6 2003/03/26 01:44:46 alfred Exp $ - * $DragonFly: src/sys/vfs/nfs/nfs_socket.c,v 1.14 2004/03/13 03:13:53 dillon Exp $ + * $DragonFly: src/sys/vfs/nfs/nfs_socket.c,v 1.15 2004/04/07 05:15:48 dillon Exp $ */ /* @@ -1613,17 +1613,25 @@ nfs_sndlock(struct nfsreq *rep) { int *statep = &rep->r_nmp->nm_state; struct thread *td; - int slpflag = 0, slptimeo = 0; + int slptimeo; + int slpflag; + int error; + slpflag = 0; + slptimeo = 0; td = rep->r_td; if (rep->r_nmp->nm_flag & NFSMNT_INT) slpflag = PCATCH; + + error = 0; + crit_enter(); while (*statep & NFSSTA_SNDLOCK) { *statep |= NFSSTA_WANTSND; - if (nfs_sigintr(rep->r_nmp, rep, td)) - return (EINTR); - (void) tsleep((caddr_t)statep, slpflag, - "nfsndlck", slptimeo); + if (nfs_sigintr(rep->r_nmp, rep, td)) { + error = EINTR; + break; + } + tsleep((caddr_t)statep, slpflag, "nfsndlck", slptimeo); if (slpflag == PCATCH) { slpflag = 0; slptimeo = 2 * hz; @@ -1631,9 +1639,11 @@ nfs_sndlock(struct nfsreq *rep) } /* Always fail if our request has been cancelled. */ if ((rep->r_flags & R_SOFTTERM)) - return (EINTR); - *statep |= NFSSTA_SNDLOCK; - return (0); + error = EINTR; + if (error == 0) + *statep |= NFSSTA_SNDLOCK; + crit_exit(); + return (error); } /* @@ -1647,11 +1657,13 @@ nfs_sndunlock(rep) if ((*statep & NFSSTA_SNDLOCK) == 0) panic("nfs sndunlock"); + crit_enter(); *statep &= ~NFSSTA_SNDLOCK; if (*statep & NFSSTA_WANTSND) { *statep &= ~NFSSTA_WANTSND; wakeup((caddr_t)statep); } + crit_exit(); } static int @@ -1659,13 +1671,18 @@ nfs_rcvlock(rep) struct nfsreq *rep; { int *statep = &rep->r_nmp->nm_state; - int slpflag, slptimeo = 0; + int slpflag; + int slptimeo; + int error; /* * Unconditionally check for completion in case another nfsiod * get the packet while the caller was blocked, before the caller * called us. Packet reception is handled by mainline code which * is protected by the BGL at the moment. + * + * We do not strictly need the second check just before the + * tsleep(), but it's good defensive programming. */ if (rep->r_mrep != NULL) return (EALREADY); @@ -1674,26 +1691,41 @@ nfs_rcvlock(rep) slpflag = PCATCH; else slpflag = 0; + slptimeo = 0; + error = 0; + crit_enter(); while (*statep & NFSSTA_RCVLOCK) { - if (nfs_sigintr(rep->r_nmp, rep, rep->r_td)) - return (EINTR); + if (nfs_sigintr(rep->r_nmp, rep, rep->r_td)) { + error = EINTR; + break; + } + if (rep->r_mrep != NULL) { + error = EALREADY; + break; + } *statep |= NFSSTA_WANTRCV; - (void) tsleep((caddr_t)statep, slpflag, "nfsrcvlk", slptimeo); + tsleep((caddr_t)statep, slpflag, "nfsrcvlk", slptimeo); /* * If our reply was recieved while we were sleeping, * then just return without taking the lock to avoid a * situation where a single iod could 'capture' the * recieve lock. */ - if (rep->r_mrep != NULL) - return (EALREADY); + if (rep->r_mrep != NULL) { + error = EALREADY; + break; + } if (slpflag == PCATCH) { slpflag = 0; slptimeo = 2 * hz; } } - *statep |= NFSSTA_RCVLOCK; - return (0); + if (error == 0) { + *statep |= NFSSTA_RCVLOCK; + rep->r_nmp->nm_rcvlock_td = curthread; /* DEBUGGING */ + } + crit_exit(); + return (error); } /* @@ -1707,11 +1739,14 @@ nfs_rcvunlock(rep) if ((*statep & NFSSTA_RCVLOCK) == 0) panic("nfs rcvunlock"); + crit_enter(); + rep->r_nmp->nm_rcvlock_td = (void *)-1; /* DEBUGGING */ *statep &= ~NFSSTA_RCVLOCK; if (*statep & NFSSTA_WANTRCV) { *statep &= ~NFSSTA_WANTRCV; wakeup((caddr_t)statep); } + crit_exit(); } /* diff --git a/sys/vfs/nfs/nfs_subs.c b/sys/vfs/nfs/nfs_subs.c index 86eb4d05d5..ec77ba9ca4 100644 --- a/sys/vfs/nfs/nfs_subs.c +++ b/sys/vfs/nfs/nfs_subs.c @@ -35,7 +35,7 @@ * * @(#)nfs_subs.c 8.8 (Berkeley) 5/22/95 * $FreeBSD: src/sys/nfs/nfs_subs.c,v 1.90.2.2 2001/10/25 19:18:53 dillon Exp $ - * $DragonFly: src/sys/vfs/nfs/nfs_subs.c,v 1.12 2004/03/01 06:33:21 dillon Exp $ + * $DragonFly: src/sys/vfs/nfs/nfs_subs.c,v 1.13 2004/04/07 05:15:48 dillon Exp $ */ /* @@ -2138,25 +2138,26 @@ nfs_clearcommit(mp) { struct vnode *vp, *nvp; struct buf *bp, *nbp; + lwkt_tokref ilock; int s; + lwkt_gettoken(&ilock, &mntvnode_token); s = splbio(); -loop: for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp; vp = nvp) { nvp = TAILQ_NEXT(vp, v_nmntvnodes); /* ZZZ */ if (vp->v_flag & VPLACEMARKER) continue; - if (vp->v_mount != mp) /* Paranoia */ - goto loop; for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { nbp = TAILQ_NEXT(bp, b_vnbufs); if (BUF_REFCNT(bp) == 0 && (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) - == (B_DELWRI | B_NEEDCOMMIT)) + == (B_DELWRI | B_NEEDCOMMIT)) { bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK); + } } } splx(s); + lwkt_reltoken(&ilock); } #ifndef NFS_NOSERVER diff --git a/sys/vfs/nfs/nfs_vfsops.c b/sys/vfs/nfs/nfs_vfsops.c index 37af623998..55a1ce7ed0 100644 --- a/sys/vfs/nfs/nfs_vfsops.c +++ b/sys/vfs/nfs/nfs_vfsops.c @@ -35,7 +35,7 @@ * * @(#)nfs_vfsops.c 8.12 (Berkeley) 5/20/95 * $FreeBSD: src/sys/nfs/nfs_vfsops.c,v 1.91.2.7 2003/01/27 20:04:08 dillon Exp $ - * $DragonFly: src/sys/vfs/nfs/nfs_vfsops.c,v 1.14 2004/03/10 02:07:52 dillon Exp $ + * $DragonFly: src/sys/vfs/nfs/nfs_vfsops.c,v 1.15 2004/04/07 05:15:48 dillon Exp $ */ #include "opt_bootp.h" @@ -1082,6 +1082,16 @@ nfs_root(mp, vpp) extern int syncprt; +struct scaninfo { + int rescan; + thread_t td; + int waitfor; + int allerror; +}; + +static int nfs_sync_scan1(struct mount *mp, struct vnode *vp, void *data); +static int nfs_sync_scan2(struct mount *mp, struct vnode *vp, lwkt_tokref_t vlock, void *data); + /* * Flush out the buffer cache */ @@ -1089,34 +1099,54 @@ extern int syncprt; static int nfs_sync(struct mount *mp, int waitfor, struct thread *td) { - struct vnode *vp; - int error, allerror = 0; + struct scaninfo scaninfo; + int error; + + scaninfo.rescan = 0; + scaninfo.td = td; + scaninfo.waitfor = waitfor; + scaninfo.allerror = 0; /* * Force stale buffer cache information to be flushed. */ -loop: - for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); - vp != NULL; - vp = TAILQ_NEXT(vp, v_nmntvnodes)) { - if (vp->v_flag & VPLACEMARKER) /* ZZZ */ - continue; - /* - * If the vnode that we are about to sync is no longer - * associated with this mount point, start over. - */ - if (vp->v_mount != mp) - goto loop; - if (VOP_ISLOCKED(vp, NULL) || TAILQ_EMPTY(&vp->v_dirtyblkhd) || - waitfor == MNT_LAZY) - continue; - if (vget(vp, NULL, LK_EXCLUSIVE, td)) - goto loop; - error = VOP_FSYNC(vp, waitfor, td); - if (error) - allerror = error; - vput(vp); + error = 0; + while (error == 0 && scaninfo.rescan) { + scaninfo.rescan = 0; + error = vmntvnodescan(mp, nfs_sync_scan1, + nfs_sync_scan2, &scaninfo); } - return (allerror); + return(error); +} + +static +int +nfs_sync_scan1(struct mount *mp, struct vnode *vp, void *data) +{ + struct scaninfo *info = data; + + if (VOP_ISLOCKED(vp, NULL) || TAILQ_EMPTY(&vp->v_dirtyblkhd)) + return(-1); + if (info->waitfor == MNT_LAZY) + return(-1); + return(0); +} + +static +int +nfs_sync_scan2(struct mount *mp, struct vnode *vp, lwkt_tokref_t vlock, void *data) +{ + struct scaninfo *info = data; + int error; + + if (vget(vp, vlock, LK_EXCLUSIVE | LK_INTERLOCK, info->td)) { + info->rescan = 1; + return(0); + } + error = VOP_FSYNC(vp, info->waitfor, info->td); + if (error) + info->allerror = error; + vput(vp); + return(0); } diff --git a/sys/vfs/nfs/nfsmount.h b/sys/vfs/nfs/nfsmount.h index 071f818ee0..242a3dfcec 100644 --- a/sys/vfs/nfs/nfsmount.h +++ b/sys/vfs/nfs/nfsmount.h @@ -35,7 +35,7 @@ * * @(#)nfsmount.h 8.3 (Berkeley) 3/30/95 * $FreeBSD: src/sys/nfs/nfsmount.h,v 1.17 1999/12/29 04:54:54 peter Exp $ - * $DragonFly: src/sys/vfs/nfs/nfsmount.h,v 1.4 2003/10/10 22:01:13 dillon Exp $ + * $DragonFly: src/sys/vfs/nfs/nfsmount.h,v 1.5 2004/04/07 05:15:48 dillon Exp $ */ @@ -95,6 +95,7 @@ struct nfsmount { int nm_bufqiods; /* number of iods processing queue */ u_int64_t nm_maxfilesize; /* maximum file size */ struct ucred *nm_cred; /* 'root' credential */ + struct thread *nm_rcvlock_td; /* debugging */ }; -- 2.41.0