From 5a9187cbcb515049916aabc8f3cd880e0eb9b239 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Thu, 17 Mar 2005 17:28:46 +0000 Subject: [PATCH] Clean up a number of caching edge cases in NFS, rework the code to be a bit more readable, document some bits, and fix some cache coherency detection issues. The caching cleanups should allow the NFS client to retain more of the NFS cache when doing complex operations on a file. * Properly check and update the mtime using WCC records in the NFS response. This record gives us the 'before' and 'after' mtime. The 'before' mtime must match our existing idea of the mtime, if it doesn't we flag the nfsnode as having been modified by the server. Our notion of the mtime is then set to the 'after time. This was not being done properly for several edge cases. This required extending the nfsm macros a bit in order to be able to tell loadattrcache how to handle the mtime data. This also required rearranging (really fixing) the sequence in nfs_open(), nfs_write(), etc. * Rearrange the flags a bit. NSIZECHANGED -> NRMODIFIED (nfsnode modified by server), NMODIFIED -> NLMODIFIED (nfsnode modified by client). Do not clear NRMODIFIED until we have actually invalidated the cache (this fixes a problem where programs using mmap() were not properly clearing the cache after a file was modified on the server). * Don't code NRMODIFIED as an exception to NLMODIFIED. Recode the flags so they (mostly) operate in tandem. * When appending to a file, use nfs_flush() instead of nfs_vinvalbuf(). There is no need to destroy our data cache for the file. This makes appends considerably more efficient. * Hopefully fix the last problem associated with attribute timeouts. * Clear the attribute cache when a file is opened for write in nfs_open() BEFORE doing other checks rather then after. * Document some of the nastier cache coherency hacks. --- sys/vfs/nfs/nfs.h | 6 +- sys/vfs/nfs/nfs_bio.c | 85 +++++++++---------- sys/vfs/nfs/nfs_node.c | 7 +- sys/vfs/nfs/nfs_nqlease.c | 6 +- sys/vfs/nfs/nfs_subs.c | 98 +++++++++++++++++----- sys/vfs/nfs/nfs_vfsops.c | 6 +- sys/vfs/nfs/nfs_vnops.c | 168 ++++++++++++++++++++++---------------- sys/vfs/nfs/nfsm_subs.h | 27 ++++-- sys/vfs/nfs/nfsnode.h | 9 +- usr.sbin/pstat/pstat.c | 6 +- 10 files changed, 259 insertions(+), 159 deletions(-) diff --git a/sys/vfs/nfs/nfs.h b/sys/vfs/nfs/nfs.h index 379bb89a84..173c4c23e1 100644 --- a/sys/vfs/nfs/nfs.h +++ b/sys/vfs/nfs/nfs.h @@ -35,7 +35,7 @@ * * @(#)nfs.h 8.4 (Berkeley) 5/1/95 * $FreeBSD: src/sys/nfs/nfs.h,v 1.53.2.5 2002/02/20 01:35:34 iedowse Exp $ - * $DragonFly: src/sys/vfs/nfs/nfs.h,v 1.8 2004/11/12 00:09:37 dillon Exp $ + * $DragonFly: src/sys/vfs/nfs/nfs.h,v 1.9 2005/03/17 17:28:46 dillon Exp $ */ #ifndef _NFS_NFS_H_ @@ -483,6 +483,10 @@ struct nfsd { #define NFSD_NEEDAUTH 0x04 #define NFSD_AUTHFAIL 0x08 +/* Bits for loadattrcache */ +#define NFS_LATTR_NOSHRINK 0x01 +#define NFS_LATTR_NOMTIMECHECK 0x02 + /* * This structure is used by the server for describing each request. * Some fields are used only when write request gathering is performed. diff --git a/sys/vfs/nfs/nfs_bio.c b/sys/vfs/nfs/nfs_bio.c index 6680a79e5b..4ec3987205 100644 --- a/sys/vfs/nfs/nfs_bio.c +++ b/sys/vfs/nfs/nfs_bio.c @@ -35,7 +35,7 @@ * * @(#)nfs_bio.c 8.9 (Berkeley) 3/30/95 * $FreeBSD: /repoman/r/ncvs/src/sys/nfsclient/nfs_bio.c,v 1.130 2004/04/14 23:23:55 peadar Exp $ - * $DragonFly: src/sys/vfs/nfs/nfs_bio.c,v 1.20 2005/03/04 05:21:17 dillon Exp $ + * $DragonFly: src/sys/vfs/nfs/nfs_bio.c,v 1.21 2005/03/17 17:28:46 dillon Exp $ */ @@ -359,52 +359,46 @@ nfs_bioread(struct vnode *vp, struct uio *uio, int ioflag) return (EFBIG); biosize = vp->v_mount->mnt_stat.f_iosize; seqcount = (int)((off_t)(ioflag >> IO_SEQSHIFT) * biosize / BKVASIZE); + /* * For nfs, cache consistency can only be maintained approximately. * Although RFC1094 does not specify the criteria, the following is * believed to be compatible with the reference port. - * For nqnfs, full cache consistency is maintained within the loop. - * For nfs: - * If the file's modify time on the server has changed since the - * last read rpc or you have written to the file, - * you may have lost data cache consistency with the - * server, so flush all of the file's data out of the cache. - * Then force a getattr rpc to ensure that you have up to date - * attributes. - * NB: This implies that cache data can be read when up to - * NFS_ATTRTIMEO seconds out of date. If you find that you need current - * attributes this could be forced by setting n_attrstamp to 0 before - * the VOP_GETATTR() call. + * + * NQNFS: Full cache coherency is maintained within the loop. + * + * NFS: If local changes have been made and this is a + * directory, the directory must be invalidated and + * the attribute cache must be cleared. + * + * GETATTR is called to synchronize the file size. + * + * If remote changes are detected local data is flushed + * and the cache is invalidated. + * + * + * NOTE: In the normal case the attribute cache is not + * cleared which means GETATTR may use cached data and + * not immediately detect changes made on the server. */ if ((nmp->nm_flag & NFSMNT_NQNFS) == 0) { - if (np->n_flag & NMODIFIED) { - if (vp->v_type != VREG) { - if (vp->v_type != VDIR) - panic("nfs: bioread, not dir"); - nfs_invaldir(vp); - error = nfs_vinvalbuf(vp, V_SAVE, td, 1); - if (error) - return (error); - } - np->n_attrstamp = 0; - error = VOP_GETATTR(vp, &vattr, td); + if ((np->n_flag & NLMODIFIED) && vp->v_type == VDIR) { + nfs_invaldir(vp); + error = nfs_vinvalbuf(vp, V_SAVE, td, 1); if (error) return (error); - np->n_mtime = vattr.va_mtime.tv_sec; - } else { - error = VOP_GETATTR(vp, &vattr, td); + np->n_attrstamp = 0; + } + error = VOP_GETATTR(vp, &vattr, td); + if (error) + return (error); + if (np->n_flag & NRMODIFIED) { + if (vp->v_type == VDIR) + nfs_invaldir(vp); + error = nfs_vinvalbuf(vp, V_SAVE, td, 1); if (error) return (error); - if ((np->n_flag & NSIZECHANGED) - || np->n_mtime != vattr.va_mtime.tv_sec) { - if (vp->v_type == VDIR) - nfs_invaldir(vp); - error = nfs_vinvalbuf(vp, V_SAVE, td, 1); - if (error) - return (error); - np->n_mtime = vattr.va_mtime.tv_sec; - np->n_flag &= ~NSIZECHANGED; - } + np->n_flag &= ~NRMODIFIED; } } do { @@ -421,7 +415,7 @@ nfs_bioread(struct vnode *vp, struct uio *uio, int ioflag) return (error); if (np->n_lrev != np->n_brev || (np->n_flag & NQNFSNONCACHE) || - ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) { + ((np->n_flag & NLMODIFIED) && vp->v_type == VDIR)) { if (vp->v_type == VDIR) nfs_invaldir(vp); error = nfs_vinvalbuf(vp, V_SAVE, td, 1); @@ -429,7 +423,7 @@ nfs_bioread(struct vnode *vp, struct uio *uio, int ioflag) return (error); np->n_brev = np->n_lrev; } - } else if (vp->v_type == VDIR && (np->n_flag & NMODIFIED)) { + } else if (vp->v_type == VDIR && (np->n_flag & NLMODIFIED)) { nfs_invaldir(vp); error = nfs_vinvalbuf(vp, V_SAVE, td, 1); if (error) @@ -752,9 +746,10 @@ nfs_write(struct vop_write_args *ap) * mode or if we are appending. */ if (ioflag & (IO_APPEND | IO_SYNC)) { - if (np->n_flag & NMODIFIED) { + if (np->n_flag & NLMODIFIED) { np->n_attrstamp = 0; - error = nfs_vinvalbuf(vp, V_SAVE, td, 1); + error = nfs_flush(vp, MNT_WAIT, td, 0); + /* error = nfs_vinvalbuf(vp, V_SAVE, td, 1); */ if (error) return (error); } @@ -874,7 +869,7 @@ again: long save; np->n_size = uio->uio_offset + n; - np->n_flag |= NMODIFIED; + np->n_flag |= NLMODIFIED; vnode_pager_setsize(vp, np->n_size); save = bp->b_flags & B_CACHE; @@ -897,7 +892,7 @@ again: bp = nfs_getcacheblk(vp, lbn, bcount, td); if (uio->uio_offset + n > np->n_size) { np->n_size = uio->uio_offset + n; - np->n_flag |= NMODIFIED; + np->n_flag |= NLMODIFIED; vnode_pager_setsize(vp, np->n_size); } } @@ -944,7 +939,7 @@ again: error = EINTR; break; } - np->n_flag |= NMODIFIED; + np->n_flag |= NLMODIFIED; /* * If dirtyend exceeds file size, chop it down. This should @@ -1179,7 +1174,7 @@ nfs_vinvalbuf(struct vnode *vp, int flags, } error = vinvalbuf(vp, flags, td, 0, slptimeo); } - np->n_flag &= ~(NMODIFIED | NFLUSHINPROG); + np->n_flag &= ~(NLMODIFIED | NFLUSHINPROG); if (np->n_flag & NFLUSHWANT) { np->n_flag &= ~NFLUSHWANT; wakeup((caddr_t)&np->n_flag); diff --git a/sys/vfs/nfs/nfs_node.c b/sys/vfs/nfs/nfs_node.c index 8e5d272e77..8720f0861a 100644 --- a/sys/vfs/nfs/nfs_node.c +++ b/sys/vfs/nfs/nfs_node.c @@ -35,7 +35,7 @@ * * @(#)nfs_node.c 8.6 (Berkeley) 5/22/95 * $FreeBSD: src/sys/nfs/nfs_node.c,v 1.36.2.3 2002/01/05 22:25:04 dillon Exp $ - * $DragonFly: src/sys/vfs/nfs/nfs_node.c,v 1.18 2004/12/17 00:18:28 dillon Exp $ + * $DragonFly: src/sys/vfs/nfs/nfs_node.c,v 1.19 2005/03/17 17:28:46 dillon Exp $ */ @@ -243,8 +243,9 @@ nfs_inactive(struct vop_inactive_args *ap) vrele(sp->s_dvp); FREE((caddr_t)sp, M_NFSREQ); } - np->n_flag &= (NMODIFIED | NFLUSHINPROG | NFLUSHWANT | NQNFSEVICTED | - NQNFSNONCACHE | NQNFSWRITE); + + np->n_flag &= ~(NWRITEERR | NACC | NUPD | NCHG | NLOCKED | NWANTED); + return (0); } diff --git a/sys/vfs/nfs/nfs_nqlease.c b/sys/vfs/nfs/nfs_nqlease.c index 24dd7f684d..a324dea0ef 100644 --- a/sys/vfs/nfs/nfs_nqlease.c +++ b/sys/vfs/nfs/nfs_nqlease.c @@ -35,7 +35,7 @@ * * @(#)nfs_nqlease.c 8.9 (Berkeley) 5/20/95 * $FreeBSD: src/sys/nfs/nfs_nqlease.c,v 1.50 2000/02/13 03:32:05 peter Exp $ - * $DragonFly: src/sys/vfs/nfs/Attic/nfs_nqlease.c,v 1.21 2004/11/18 20:04:28 dillon Exp $ + * $DragonFly: src/sys/vfs/nfs/Attic/nfs_nqlease.c,v 1.22 2005/03/17 17:28:46 dillon Exp $ */ @@ -1058,7 +1058,7 @@ nqnfs_clientd(struct nfsmount *nmp, struct ucred *cred, struct nfsd_cargs *ncd, if (vpid == vp->v_id) { CIRCLEQ_REMOVE(&nmp->nm_timerhead, np, n_timer); np->n_timer.cqe_next = 0; - if (np->n_flag & (NMODIFIED | NQNFSEVICTED)) { + if (np->n_flag & (NLMODIFIED | NQNFSEVICTED)) { if (np->n_flag & NQNFSEVICTED) { if (vp->v_type == VDIR) nfs_invaldir(vp); @@ -1069,7 +1069,7 @@ nqnfs_clientd(struct nfsmount *nmp, struct ucred *cred, struct nfsd_cargs *ncd, (void) nqnfs_vacated(vp, cred); } else if (vp->v_type == VREG) { (void) VOP_FSYNC(vp, MNT_WAIT, td); - np->n_flag &= ~NMODIFIED; + np->n_flag &= ~NLMODIFIED; } } } diff --git a/sys/vfs/nfs/nfs_subs.c b/sys/vfs/nfs/nfs_subs.c index 5824edadd0..15a7b1ce3f 100644 --- a/sys/vfs/nfs/nfs_subs.c +++ b/sys/vfs/nfs/nfs_subs.c @@ -35,7 +35,7 @@ * * @(#)nfs_subs.c 8.8 (Berkeley) 5/22/95 * $FreeBSD: /repoman/r/ncvs/src/sys/nfsclient/nfs_subs.c,v 1.128 2004/04/14 23:23:55 peadar Exp $ - * $DragonFly: src/sys/vfs/nfs/nfs_subs.c,v 1.26 2005/03/16 22:17:59 dillon Exp $ + * $DragonFly: src/sys/vfs/nfs/nfs_subs.c,v 1.27 2005/03/17 17:28:46 dillon Exp $ */ /* @@ -1163,13 +1163,19 @@ nfs_uninit(struct vfsconf *vfsp) /* * Load the attribute cache (that lives in the nfsnode entry) with - * the values on the mbuf list and - * Iff vap not NULL - * copy the attributes to *vaper + * the values on the mbuf list. Load *vaper with the attributes. vaper + * may be NULL. + * + * As a side effect n_mtime, which we use to determine if the file was + * modified by some other host, is set to the attribute timestamp and + * NRMODIFIED is set if the two values differ. + * + * WARNING: the mtime loaded into vaper does not necessarily represent + * n_mtime or n_attr.mtime due to NACC and NUPD. */ int nfs_loadattrcache(struct vnode **vpp, struct mbuf **mdp, caddr_t *dposp, - struct vattr *vaper, int dontshrink) + struct vattr *vaper, int lattr_flags) { struct vnode *vp = *vpp; struct vattr *vap; @@ -1249,18 +1255,24 @@ nfs_loadattrcache(struct vnode **vpp, struct mbuf **mdp, caddr_t *dposp, vp->v_ops = &vp->v_mount->mnt_vn_use_ops; } np->n_mtime = mtime.tv_sec; - } else if ((np->n_flag & NMODIFIED) == 0 && - np->n_mtime != mtime.tv_sec) { + } else if (np->n_mtime != mtime.tv_sec) { /* - * If we haven't modified the file locally update our notion - * of the last-modified time based on the server's - * information, otherwise certain cache timeout calculations - * will break. If it has changed, set the NSIZECHANGED flag - * to ensure that the buffer cache is flushed. Do NOT flush - * the buffer cache in this routine. + * If we haven't modified the file locally and the server + * timestamp does not match, then the server probably + * modified the file. We must flag this condition so + * the proper syncnronization can be done. We do not + * try to synchronize the state here because that + * could lead to an endless recursion. + * + * XXX loadattrcache can be set during the reply to a write, + * before the write timestamp is properly processed. To + * avoid unconditionally setting the rmodified bit (which + * has the effect of flushing the cache), we only do this + * check if the lmodified bit is not set. */ np->n_mtime = mtime.tv_sec; - np->n_flag |= NSIZECHANGED; + if ((lattr_flags & NFS_LATTR_NOMTIMECHECK) == 0) + np->n_flag |= NRMODIFIED; } vap = &np->n_vattr; vap->va_type = vtyp; @@ -1301,28 +1313,58 @@ nfs_loadattrcache(struct vnode **vpp, struct mbuf **mdp, caddr_t *dposp, np->n_attrstamp = time_second; if (vap->va_size != np->n_size) { if (vap->va_type == VREG) { - if (dontshrink && vap->va_size < np->n_size) { + if ((lattr_flags & NFS_LATTR_NOSHRINK) && + vap->va_size < np->n_size) { /* * We've been told not to shrink the file; * zero np->n_attrstamp to indicate that * the attributes are stale. + * + * This occurs primarily due to recursive + * NFS ops that are executed during periods + * where we cannot safely reduce the size of + * the file. + * + * Additionally, write rpcs are broken down + * into buffers and np->n_size is + * pre-extended. Setting NRMODIFIED here + * can result in n_size getting reset to a + * lower value, which is NOT what we want. + * XXX this needs to be cleaned up a lot + * more. */ vap->va_size = np->n_size; np->n_attrstamp = 0; - } else if (np->n_flag & NMODIFIED) { + if ((np->n_flag & NLMODIFIED) == 0) + np->n_flag |= NRMODIFIED; + } else if (np->n_flag & NLMODIFIED) { /* * We've modified the file: Use the larger - * of our size, and the server's size. + * of our size, and the server's size. At + * this point the cache coherency is all + * shot to hell. To try to handle multiple + * clients appending to the file at the same + * time mark that the server has changed + * the file if the server's notion of the + * file size is larger then our notion. + * + * XXX this needs work. */ if (vap->va_size < np->n_size) { vap->va_size = np->n_size; } else { np->n_size = vap->va_size; - np->n_flag |= NSIZECHANGED; + np->n_flag |= NRMODIFIED; } } else { + /* + * Someone changed the file's size on the + * server and there are no local changes + * to get in the way, set the size and mark + * it. + */ np->n_size = vap->va_size; - np->n_flag |= NSIZECHANGED; + np->n_flag |= NRMODIFIED; } vnode_pager_setsize(vp, np->n_size); } else { @@ -1367,6 +1409,7 @@ nfs_getattrcache(struct vnode *vp, struct vattr *vaper) /* * Dynamic timeout based on how recently the file was modified. + * n_mtime is always valid. */ timeo = (get_approximate_time_t() - np->n_mtime) / 10; @@ -1376,12 +1419,12 @@ nfs_getattrcache(struct vnode *vp, struct vattr *vaper) #endif if (vap->va_type == VDIR) { - if ((np->n_flag & NMODIFIED) || timeo < nmp->nm_acdirmin) + if ((np->n_flag & NLMODIFIED) || timeo < nmp->nm_acdirmin) timeo = nmp->nm_acdirmin; else if (timeo > nmp->nm_acdirmax) timeo = nmp->nm_acdirmax; } else { - if ((np->n_flag & NMODIFIED) || timeo < nmp->nm_acregmin) + if ((np->n_flag & NLMODIFIED) || timeo < nmp->nm_acregmin) timeo = nmp->nm_acregmin; else if (timeo > nmp->nm_acregmax) timeo = nmp->nm_acregmax; @@ -1403,9 +1446,17 @@ nfs_getattrcache(struct vnode *vp, struct vattr *vaper) return (ENOENT); } nfsstats.attrcache_hits++; + + /* + * Our attribute cache can be stale due to modifications made on + * this host. XXX this is a bad hack. We need a more deterministic + * means of finding out which np fields are valid verses attr cache + * fields. We really should update the vattr info on the fly when + * making local changes. + */ if (vap->va_size != np->n_size) { if (vap->va_type == VREG) { - if (np->n_flag & NMODIFIED) { + if (np->n_flag & NLMODIFIED) { if (vap->va_size < np->n_size) vap->va_size = np->n_size; else @@ -1719,6 +1770,9 @@ nfsm_srvwcc(struct nfsrv_descript *nfsd, int before_ret, char *bpos = *bposp; u_int32_t *tl; + /* + * before_ret is 0 if before_vap is valid, non-zero if it isn't. + */ if (before_ret) { nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED); *tl = nfs_false; diff --git a/sys/vfs/nfs/nfs_vfsops.c b/sys/vfs/nfs/nfs_vfsops.c index aee0be7c89..3f4703175a 100644 --- a/sys/vfs/nfs/nfs_vfsops.c +++ b/sys/vfs/nfs/nfs_vfsops.c @@ -35,7 +35,7 @@ * * @(#)nfs_vfsops.c 8.12 (Berkeley) 5/20/95 * $FreeBSD: src/sys/nfs/nfs_vfsops.c,v 1.91.2.7 2003/01/27 20:04:08 dillon Exp $ - * $DragonFly: src/sys/vfs/nfs/nfs_vfsops.c,v 1.24 2005/02/02 21:34:18 joerg Exp $ + * $DragonFly: src/sys/vfs/nfs/nfs_vfsops.c,v 1.25 2005/03/17 17:28:46 dillon Exp $ */ #include "opt_bootp.h" @@ -313,7 +313,7 @@ nfs_statfs(struct mount *mp, struct statfs *sbp, struct thread *td) nfsm_fhtom(vp, v3); nfsm_request(vp, NFSPROC_FSSTAT, td, cred); if (v3) - nfsm_postop_attr(vp, retattr); + nfsm_postop_attr(vp, retattr, NFS_LATTR_NOSHRINK); if (error) { if (mrep != NULL) m_freem(mrep); @@ -373,7 +373,7 @@ nfs_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct thread *td) nfsm_reqhead(vp, NFSPROC_FSINFO, NFSX_FH(1)); nfsm_fhtom(vp, 1); nfsm_request(vp, NFSPROC_FSINFO, td, nfs_vpcred(vp, ND_READ)); - nfsm_postop_attr(vp, retattr); + nfsm_postop_attr(vp, retattr, NFS_LATTR_NOSHRINK); if (!error) { nfsm_dissect(fsp, struct nfsv3_fsinfo *, NFSX_V3FSINFO); pref = fxdr_unsigned(u_int32_t, fsp->fs_wtpref); diff --git a/sys/vfs/nfs/nfs_vnops.c b/sys/vfs/nfs/nfs_vnops.c index 3f9a22c3a8..fff3de3350 100644 --- a/sys/vfs/nfs/nfs_vnops.c +++ b/sys/vfs/nfs/nfs_vnops.c @@ -35,7 +35,7 @@ * * @(#)nfs_vnops.c 8.16 (Berkeley) 5/27/95 * $FreeBSD: src/sys/nfs/nfs_vnops.c,v 1.150.2.5 2001/12/20 19:56:28 dillon Exp $ - * $DragonFly: src/sys/vfs/nfs/nfs_vnops.c,v 1.37 2005/02/15 08:32:18 joerg Exp $ + * $DragonFly: src/sys/vfs/nfs/nfs_vnops.c,v 1.38 2005/03/17 17:28:46 dillon Exp $ */ @@ -106,7 +106,6 @@ static int nfsfifo_write (struct vop_write_args *); static int nfsspec_close (struct vop_close_args *); static int nfsfifo_close (struct vop_close_args *); #define nfs_poll vop_nopoll -static int nfs_flush (struct vnode *,int,struct thread *,int); static int nfs_setattrrpc (struct vnode *,struct vattr *,struct ucred *,struct thread *); static int nfs_lookup (struct vop_lookup_args *); static int nfs_create (struct vop_create_args *); @@ -292,7 +291,7 @@ nfs3_access_otw(struct vnode *vp, int wmode, nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(wmode); nfsm_request(vp, NFSPROC_ACCESS, td, cred); - nfsm_postop_attr(vp, attrflag); + nfsm_postop_attr(vp, attrflag, NFS_LATTR_NOSHRINK); if (!error) { nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); rmode = fxdr_unsigned(u_int32_t, *tl); @@ -484,10 +483,20 @@ nfs_open(struct vop_open_args *ap) #endif return (EOPNOTSUPP); } + /* - * Get a valid lease. If cached data is stale, flush it. + * Clear the attribute cache only if opening with write access. It + * is unclear if we should do this at all here, but we certainly + * should not clear the cache unconditionally simply because a file + * is being opened. */ + if (ap->a_mode & FWRITE) + np->n_attrstamp = 0; + if (nmp->nm_flag & NFSMNT_NQNFS) { + /* + * If NQNFS is active, get a valid lease + */ if (NQNFS_CKINVALID(vp, np, ND_READ)) { do { error = nqnfs_getlease(vp, ND_READ, ap->a_td); @@ -504,41 +513,44 @@ nfs_open(struct vop_open_args *ap) } } } else { - if (np->n_flag & NMODIFIED) { - if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1)) - == EINTR) { - return (error); - } + /* + * For normal NFS, reconcile changes made locally verses + * changes made remotely. Note that VOP_GETATTR only goes + * to the wire if the cached attribute has timed out or been + * cleared. + * + * If local modifications have been made clear the attribute + * cache to force an attribute and modified time check. If + * GETATTR detects that the file has been changed by someone + * other then us it will set NRMODIFIED. + * + * If we are opening a directory and local changes have been + * made we have to invalidate the cache in order to ensure + * that we get the most up-to-date information from the + * server. XXX + */ + if (np->n_flag & NLMODIFIED) { np->n_attrstamp = 0; - if (vp->v_type == VDIR) - np->n_direofoffset = 0; - error = VOP_GETATTR(vp, &vattr, ap->a_td); - if (error) - return (error); - np->n_mtime = vattr.va_mtime.tv_sec; - } else { - error = VOP_GETATTR(vp, &vattr, ap->a_td); - if (error) - return (error); - if (np->n_mtime != vattr.va_mtime.tv_sec) { - if (vp->v_type == VDIR) - np->n_direofoffset = 0; - if ((error = nfs_vinvalbuf(vp, V_SAVE, - ap->a_td, 1)) == EINTR) { + if (vp->v_type == VDIR) { + error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1); + if (error == EINTR) return (error); - } - np->n_mtime = vattr.va_mtime.tv_sec; + nfs_invaldir(vp); } } + error = VOP_GETATTR(vp, &vattr, ap->a_td); + if (error) + return (error); + if (np->n_flag & NRMODIFIED) { + if (vp->v_type == VDIR) + nfs_invaldir(vp); + error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1); + if (error == EINTR) + return (error); + np->n_flag &= ~NRMODIFIED; + } } - /* - * Clear attrstamp only if opening with write access. It is unclear - * whether we should do this at all here, but we certainly should not - * clear attrstamp unconditionally. - */ - if (ap->a_mode & FWRITE) - np->n_attrstamp = 0; return (0); } @@ -585,7 +597,7 @@ nfs_close(struct vop_close_args *ap) if (vp->v_type == VREG) { if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) == 0 && - (np->n_flag & NMODIFIED)) { + (np->n_flag & NLMODIFIED)) { if (NFS_ISV3(vp)) { /* * Under NFSv3 we have dirty buffers to dispose of. We @@ -596,13 +608,13 @@ nfs_close(struct vop_close_args *ap) * server's cache, which is roughly similar to the state * a standard disk subsystem leaves the file in on close(). * - * We cannot clear the NMODIFIED bit in np->n_flag due to + * We cannot clear the NLMODIFIED bit in np->n_flag due to * potential races with other processes, and certainly * cannot clear it if we don't commit. */ int cm = nfsv3_commit_on_close ? 1 : 0; error = nfs_flush(vp, MNT_WAIT, ap->a_td, cm); - /* np->n_flag &= ~NMODIFIED; */ + /* np->n_flag &= ~NLMODIFIED; */ } else { error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1); } @@ -733,7 +745,7 @@ nfs_setattr(struct vop_setattr_args *ap) tsize = np->n_size; error = nfs_meta_setsize(vp, ap->a_td, vap->va_size); - if (np->n_flag & NMODIFIED) { + if (np->n_flag & NLMODIFIED) { if (vap->va_size == 0) error = nfs_vinvalbuf(vp, 0, ap->a_td, 1); else @@ -754,9 +766,10 @@ nfs_setattr(struct vop_setattr_args *ap) * vnode_pager_setsize() for us in that case). */ np->n_vattr.va_size = np->n_size = vap->va_size; - }; + break; + } } else if ((vap->va_mtime.tv_sec != VNOVAL || - vap->va_atime.tv_sec != VNOVAL) && (np->n_flag & NMODIFIED) && + vap->va_atime.tv_sec != VNOVAL) && (np->n_flag & NLMODIFIED) && vp->v_type == VREG && (error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1)) == EINTR) return (error); @@ -886,7 +899,7 @@ nfs_nresolve(struct vop_nresolve_args *ap) cache_setvp(ncp, NULL); cache_settimeout(ncp, nticks); } - nfsm_postop_attr(dvp, attrflag); + nfsm_postop_attr(dvp, attrflag, NFS_LATTR_NOSHRINK); m_freem(mrep); goto nfsmout; } @@ -921,8 +934,8 @@ nfs_nresolve(struct vop_nresolve_args *ap) nvp = NFSTOV(np); } if (v3) { - nfsm_postop_attr(nvp, attrflag); - nfsm_postop_attr(dvp, attrflag); + nfsm_postop_attr(nvp, attrflag, NFS_LATTR_NOSHRINK); + nfsm_postop_attr(dvp, attrflag, NFS_LATTR_NOSHRINK); } else { nfsm_loadattr(nvp, NULL); } @@ -1004,7 +1017,7 @@ nfs_lookup(struct vop_lookup_args *ap) nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN); nfsm_request(dvp, NFSPROC_LOOKUP, cnp->cn_td, cnp->cn_cred); if (error) { - nfsm_postop_attr(dvp, attrflag); + nfsm_postop_attr(dvp, attrflag, NFS_LATTR_NOSHRINK); m_freem(mrep); goto nfsmout; } @@ -1025,8 +1038,8 @@ nfs_lookup(struct vop_lookup_args *ap) } newvp = NFSTOV(np); if (v3) { - nfsm_postop_attr(newvp, attrflag); - nfsm_postop_attr(dvp, attrflag); + nfsm_postop_attr(newvp, attrflag, NFS_LATTR_NOSHRINK); + nfsm_postop_attr(dvp, attrflag, NFS_LATTR_NOSHRINK); } else nfsm_loadattr(newvp, (struct vattr *)0); *vpp = newvp; @@ -1072,8 +1085,8 @@ nfs_lookup(struct vop_lookup_args *ap) newvp = NFSTOV(np); } if (v3) { - nfsm_postop_attr(newvp, attrflag); - nfsm_postop_attr(dvp, attrflag); + nfsm_postop_attr(newvp, attrflag, NFS_LATTR_NOSHRINK); + nfsm_postop_attr(dvp, attrflag, NFS_LATTR_NOSHRINK); } else nfsm_loadattr(newvp, (struct vattr *)0); #if 0 @@ -1165,7 +1178,7 @@ nfs_readlinkrpc(struct vnode *vp, struct uio *uiop) nfsm_fhtom(vp, v3); nfsm_request(vp, NFSPROC_READLINK, uiop->uio_td, nfs_vpcred(vp, ND_CHECK)); if (v3) - nfsm_postop_attr(vp, attrflag); + nfsm_postop_attr(vp, attrflag, NFS_LATTR_NOSHRINK); if (!error) { nfsm_strsiz(len, NFS_MAXPATHLEN); if (len == NFS_MAXPATHLEN) { @@ -1219,7 +1232,7 @@ nfs_readrpc(struct vnode *vp, struct uio *uiop) } nfsm_request(vp, NFSPROC_READ, uiop->uio_td, nfs_vpcred(vp, ND_READ)); if (v3) { - nfsm_postop_attr(vp, attrflag); + nfsm_postop_attr(vp, attrflag, NFS_LATTR_NOSHRINK); if (error) { m_freem(mrep); goto nfsmout; @@ -1295,6 +1308,14 @@ nfs_writerpc(struct vnode *vp, struct uio *uiop, int *iomode, int *must_commit) nfsm_uiotom(uiop, len); nfsm_request(vp, NFSPROC_WRITE, uiop->uio_td, nfs_vpcred(vp, ND_WRITE)); if (v3) { + /* + * The write RPC returns a before and after mtime. The + * nfsm_wcc_data() macro checks the before n_mtime + * against the before time and stores the after time + * in the nfsnode's cached vattr and n_mtime field. + * The NRMODIFIED bit will be set if the before + * time did not match the original mtime. + */ wccflag = NFSV3_WCCCHK; nfsm_wcc_data(vp, wccflag); if (!error) { @@ -1335,10 +1356,9 @@ nfs_writerpc(struct vnode *vp, struct uio *uiop, int *iomode, int *must_commit) NFSX_V3WRITEVERF); } } - } else - nfsm_loadattr(vp, (struct vattr *)0); - if (wccflag) - VTONFS(vp)->n_mtime = VTONFS(vp)->n_vattr.va_mtime.tv_sec; + } else { + nfsm_loadattr(vp, (struct vattr *)0); + } m_freem(mrep); if (error) break; @@ -1433,7 +1453,7 @@ nfsmout: } else { *vpp = newvp; } - VTONFS(dvp)->n_flag |= NMODIFIED; + VTONFS(dvp)->n_flag |= NLMODIFIED; if (!wccflag) VTONFS(dvp)->n_attrstamp = 0; return (error); @@ -1573,7 +1593,7 @@ nfsmout: np->n_wucred = crhold(cnp->cn_cred); *ap->a_vpp = newvp; } - VTONFS(dvp)->n_flag |= NMODIFIED; + VTONFS(dvp)->n_flag |= NLMODIFIED; if (!wccflag) VTONFS(dvp)->n_attrstamp = 0; return (error); @@ -1671,7 +1691,7 @@ nfs_removerpc(struct vnode *dvp, const char *name, int namelen, nfsm_wcc_data(dvp, wccflag); m_freem(mrep); nfsmout: - VTONFS(dvp)->n_flag |= NMODIFIED; + VTONFS(dvp)->n_flag |= NLMODIFIED; if (!wccflag) VTONFS(dvp)->n_attrstamp = 0; return (error); @@ -1796,8 +1816,8 @@ nfs_renamerpc(struct vnode *fdvp, const char *fnameptr, int fnamelen, } m_freem(mrep); nfsmout: - VTONFS(fdvp)->n_flag |= NMODIFIED; - VTONFS(tdvp)->n_flag |= NMODIFIED; + VTONFS(fdvp)->n_flag |= NLMODIFIED; + VTONFS(tdvp)->n_flag |= NLMODIFIED; if (!fwccflag) VTONFS(fdvp)->n_attrstamp = 0; if (!twccflag) @@ -1845,12 +1865,12 @@ nfs_link(struct vop_link_args *ap) nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); nfsm_request(vp, NFSPROC_LINK, cnp->cn_td, cnp->cn_cred); if (v3) { - nfsm_postop_attr(vp, attrflag); + nfsm_postop_attr(vp, attrflag, NFS_LATTR_NOSHRINK); nfsm_wcc_data(tdvp, wccflag); } m_freem(mrep); nfsmout: - VTONFS(tdvp)->n_flag |= NMODIFIED; + VTONFS(tdvp)->n_flag |= NLMODIFIED; if (!attrflag) VTONFS(vp)->n_attrstamp = 0; if (!wccflag) @@ -1954,7 +1974,7 @@ nfsmout: } else { *ap->a_vpp = newvp; } - VTONFS(dvp)->n_flag |= NMODIFIED; + VTONFS(dvp)->n_flag |= NLMODIFIED; if (!wccflag) VTONFS(dvp)->n_attrstamp = 0; return (error); @@ -2013,7 +2033,7 @@ nfs_mkdir(struct vop_mkdir_args *ap) nfsm_wcc_data(dvp, wccflag); m_freem(mrep); nfsmout: - VTONFS(dvp)->n_flag |= NMODIFIED; + VTONFS(dvp)->n_flag |= NLMODIFIED; if (!wccflag) VTONFS(dvp)->n_attrstamp = 0; /* @@ -2073,7 +2093,7 @@ nfs_rmdir(struct vop_rmdir_args *ap) nfsm_wcc_data(dvp, wccflag); m_freem(mrep); nfsmout: - VTONFS(dvp)->n_flag |= NMODIFIED; + VTONFS(dvp)->n_flag |= NLMODIFIED; if (!wccflag) VTONFS(dvp)->n_attrstamp = 0; /* @@ -2100,25 +2120,31 @@ nfs_readdir(struct vop_readdir_args *ap) if (vp->v_type != VDIR) return (EPERM); + /* - * First, check for hit on the EOF offset cache + * If we have a valid EOF offset cache we must call VOP_GETATTR() + * and then check that is still valid, or if this is an NQNFS mount + * we call NQNFS_CKCACHEABLE() instead of VOP_GETATTR(). Note that + * VOP_GETATTR() does not necessarily go to the wire. */ if (np->n_direofoffset > 0 && uio->uio_offset >= np->n_direofoffset && - (np->n_flag & NMODIFIED) == 0) { + (np->n_flag & (NLMODIFIED|NRMODIFIED)) == 0) { if (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) { if (NQNFS_CKCACHABLE(vp, ND_READ)) { nfsstats.direofcache_hits++; return (0); } } else if (VOP_GETATTR(vp, &vattr, uio->uio_td) == 0 && - np->n_mtime == vattr.va_mtime.tv_sec) { + (np->n_flag & (NLMODIFIED|NRMODIFIED)) == 0 + ) { nfsstats.direofcache_hits++; return (0); } } /* - * Call nfs_bioread() to do the real work. + * Call nfs_bioread() to do the real work. nfs_bioread() does its + * own cache coherency checks so we do not have to. */ tresid = uio->uio_resid; error = nfs_bioread(vp, uio, 0); @@ -2188,7 +2214,7 @@ nfs_readdirrpc(struct vnode *vp, struct uio *uiop) *tl = txdr_unsigned(nmp->nm_readdirsize); nfsm_request(vp, NFSPROC_READDIR, uiop->uio_td, nfs_vpcred(vp, ND_READ)); if (v3) { - nfsm_postop_attr(vp, attrflag); + nfsm_postop_attr(vp, attrflag, NFS_LATTR_NOSHRINK); if (!error) { nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED); @@ -2384,7 +2410,7 @@ nfs_readdirplusrpc(struct vnode *vp, struct uio *uiop) *tl++ = txdr_unsigned(nmp->nm_readdirsize); *tl = txdr_unsigned(nmp->nm_rsize); nfsm_request(vp, NFSPROC_READDIRPLUS, uiop->uio_td, nfs_vpcred(vp, ND_READ)); - nfsm_postop_attr(vp, attrflag); + nfsm_postop_attr(vp, attrflag, NFS_LATTR_NOSHRINK); if (error) { m_freem(mrep); goto nfsmout; @@ -2676,7 +2702,7 @@ nfs_lookitup(struct vnode *dvp, const char *name, int len, struct ucred *cred, newvp = NFSTOV(np); } if (v3) { - nfsm_postop_attr(newvp, attrflag); + nfsm_postop_attr(newvp, attrflag, NFS_LATTR_NOSHRINK); if (!attrflag && *npp == NULL) { m_freem(mrep); if (newvp == dvp) @@ -2839,7 +2865,7 @@ nfs_fsync(struct vop_fsync_args *ap) * Walk through the buffer pool and push any dirty pages * associated with the vnode. */ -static int +int nfs_flush(struct vnode *vp, int waitfor, struct thread *td, int commit) { struct nfsnode *np = VTONFS(vp); diff --git a/sys/vfs/nfs/nfsm_subs.h b/sys/vfs/nfs/nfsm_subs.h index 7483e70eba..be394ebd19 100644 --- a/sys/vfs/nfs/nfsm_subs.h +++ b/sys/vfs/nfs/nfsm_subs.h @@ -35,7 +35,7 @@ * * @(#)nfsm_subs.h 8.2 (Berkeley) 3/30/95 * $FreeBSD: src/sys/nfs/nfsm_subs.h,v 1.27.2.1 2000/10/28 16:27:27 dwmalone Exp $ - * $DragonFly: src/sys/vfs/nfs/nfsm_subs.h,v 1.6 2004/06/02 14:43:04 eirikn Exp $ + * $DragonFly: src/sys/vfs/nfs/nfsm_subs.h,v 1.7 2005/03/17 17:28:46 dillon Exp $ */ @@ -221,13 +221,13 @@ struct mbuf *nfsm_rpchead (struct ucred *cr, int nmflag, int procid, (v) = ttvp; \ } while (0) -#define nfsm_postop_attr(v, f) \ +#define nfsm_postop_attr(v, f, lflags) \ do { \ struct vnode *ttvp = (v); \ nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); \ if (((f) = fxdr_unsigned(int, *tl)) != 0) { \ if ((t1 = nfs_loadattrcache(&ttvp, &md, &dpos, \ - (struct vattr *)0, 1)) != 0) { \ + (struct vattr *)0, lflags)) != 0) { \ error = t1; \ (f) = 0; \ m_freem(mrep); \ @@ -237,6 +237,18 @@ struct mbuf *nfsm_rpchead (struct ucred *cr, int nmflag, int procid, } \ } while (0) +/* + * This function updates the attribute cache based on data returned in the + * NFS reply for NFS RPCs that modify the target file. If the RPC succeeds + * a 'before' and 'after' mtime is returned that allows us to determine if + * the new mtime attribute represents our modification or someone else's + * modification. + * + * The flag argument returns non-0 if the original times matched, zero if + * they did not match. NRMODIFIED is automatically set if the before time + * does not match the original n_mtime, and n_mtime is automatically updated + * to the new after time (by nfsm_postop_attr()). + */ /* Used as (f) for nfsm_wcc_data() */ #define NFSV3_WCCRATTR 0 #define NFSV3_WCCCHK 1 @@ -247,11 +259,16 @@ struct mbuf *nfsm_rpchead (struct ucred *cr, int nmflag, int procid, nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); \ if (*tl == nfs_true) { \ nfsm_dissect(tl, u_int32_t *, 6 * NFSX_UNSIGNED); \ - if (f) \ + if (f) { \ ttretf = (VTONFS(v)->n_mtime == \ fxdr_unsigned(u_int32_t, *(tl + 2))); \ + if (!ttretf) \ + VTONFS(v)->n_flag |= NRMODIFIED; \ + } \ + nfsm_postop_attr((v), ttattrf, NFS_LATTR_NOSHRINK|NFS_LATTR_NOMTIMECHECK); \ + } else { \ + nfsm_postop_attr((v), ttattrf, NFS_LATTR_NOSHRINK); \ } \ - nfsm_postop_attr((v), ttattrf); \ if (f) { \ (f) = ttretf; \ } else { \ diff --git a/sys/vfs/nfs/nfsnode.h b/sys/vfs/nfs/nfsnode.h index d056ec3c40..51aa7d89b4 100644 --- a/sys/vfs/nfs/nfsnode.h +++ b/sys/vfs/nfs/nfsnode.h @@ -35,7 +35,7 @@ * * @(#)nfsnode.h 8.9 (Berkeley) 5/14/95 * $FreeBSD: /repoman/r/ncvs/src/sys/nfsclient/nfsnode.h,v 1.43 2004/04/14 23:23:55 peadar Exp $ - * $DragonFly: src/sys/vfs/nfs/nfsnode.h,v 1.12 2004/08/28 19:02:20 dillon Exp $ + * $DragonFly: src/sys/vfs/nfs/nfsnode.h,v 1.13 2005/03/17 17:28:46 dillon Exp $ */ @@ -102,7 +102,7 @@ struct nfsnode { u_int32_t n_mode; /* ACCESS mode cache */ uid_t n_modeuid; /* credentials having mode */ time_t n_modestamp; /* mode cache timestamp */ - time_t n_mtime; /* Prev modify time. */ + time_t n_mtime; /* Last known modified time */ time_t n_ctime; /* Prev create time. */ time_t n_expiry; /* Lease expiry time */ nfsfh_t *n_fhp; /* NFS File Handle */ @@ -141,7 +141,7 @@ struct nfsnode { */ #define NFLUSHWANT 0x0001 /* Want wakeup from a flush in prog. */ #define NFLUSHINPROG 0x0002 /* Avoid multiple calls to vinvalbuf() */ -#define NMODIFIED 0x0004 /* Might have a modified buffer in bio */ +#define NLMODIFIED 0x0004 /* Client has pending modifications */ #define NWRITEERR 0x0008 /* Flag write errors so close will know */ #define NQNFSNONCACHE 0x0020 /* Non-cachable lease */ #define NQNFSWRITE 0x0040 /* Write lease */ @@ -151,7 +151,7 @@ struct nfsnode { #define NCHG 0x0400 /* Special file times changed */ #define NLOCKED 0x0800 /* node is locked */ #define NWANTED 0x0100 /* someone wants to lock */ -#define NSIZECHANGED 0x2000 /* File size has changed: need cache inval */ +#define NRMODIFIED 0x2000 /* Server has unsynchronized modifications */ /* * Convert between nfsnode pointers and vnode pointers @@ -215,6 +215,7 @@ int nfs_write (struct vop_write_args *); int nqnfs_vop_lease_check (struct vop_lease_args *); int nfs_inactive (struct vop_inactive_args *); int nfs_reclaim (struct vop_reclaim_args *); +int nfs_flush (struct vnode *, int, struct thread *, int); /* other stuff */ int nfs_removeit (struct sillyrename *); diff --git a/usr.sbin/pstat/pstat.c b/usr.sbin/pstat/pstat.c index a9a34ad72b..884152eefc 100644 --- a/usr.sbin/pstat/pstat.c +++ b/usr.sbin/pstat/pstat.c @@ -33,7 +33,7 @@ * @(#) Copyright (c) 1980, 1991, 1993, 1994 The Regents of the University of California. All rights reserved. * @(#)pstat.c 8.16 (Berkeley) 5/9/95 * $FreeBSD: src/usr.sbin/pstat/pstat.c,v 1.49.2.5 2002/07/12 09:12:49 des Exp $ - * $DragonFly: src/usr.sbin/pstat/pstat.c,v 1.14 2005/02/03 17:28:40 joerg Exp $ + * $DragonFly: src/usr.sbin/pstat/pstat.c,v 1.15 2005/03/17 17:28:44 dillon Exp $ */ #define _KERNEL_STRUCTURES @@ -528,8 +528,10 @@ nfs_print(struct vnode *vp) *flags++ = 'W'; if (flag & NFLUSHINPROG) *flags++ = 'P'; - if (flag & NMODIFIED) + if (flag & NLMODIFIED) *flags++ = 'M'; + if (flag & NRMODIFIED) + *flags++ = 'R'; if (flag & NWRITEERR) *flags++ = 'E'; if (flag & NQNFSNONCACHE) -- 2.41.0