From 28271622aa9fc15ee04275cd2fbca1f3dccba1e3 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Mon, 10 Jan 2011 15:31:29 -0800 Subject: [PATCH] kernel - Do a better job with the filesystem background sync * Adjust code for MNT_LAZY, MNT_NOWAIT, and MNT_WAITOK to reflect the fact that they are three different flags and not enumeration constants. * HAMMER now sets VMSC_ONEPASS for MNT_LAZY syncs (background filesystem sync). This generally reduces instances where the background sync winds up running continuously when heavy filesystem ops saturate the disk. Fewer vnodes dirtied after the sync is initiated will get caught up in the sync. --- sys/kern/vfs_subr.c | 13 ++++++++++++- sys/kern/vfs_sync.c | 4 ++-- sys/kern/vfs_syscalls.c | 2 +- sys/sys/mount.h | 6 +++--- sys/vfs/gnu/ext2fs/ext2_vfsops.c | 4 ++-- sys/vfs/gnu/ext2fs/ext2_vnops.c | 2 +- sys/vfs/hammer/hammer_ondisk.c | 15 ++++++++++++--- sys/vfs/msdosfs/msdosfs_vfsops.c | 4 ++-- sys/vfs/nfs/nfs_vfsops.c | 2 +- sys/vfs/nwfs/nwfs_vfsops.c | 2 +- sys/vfs/smbfs/smbfs_vfsops.c | 2 +- sys/vfs/ufs/ffs_softdep.c | 18 +++++++++--------- sys/vfs/ufs/ffs_vfsops.c | 2 +- 13 files changed, 48 insertions(+), 28 deletions(-) diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index cdad58b9cb..575e870ed8 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -93,6 +93,9 @@ static MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure"); int numvnodes; SYSCTL_INT(_debug, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, "Number of vnodes allocated"); +int verbose_reclaims; +SYSCTL_INT(_debug, OID_AUTO, verbose_reclaims, CTLFLAG_RD, &verbose_reclaims, 0, + "Output filename of reclaimed vnode(s)"); enum vtype iftovt_tab[16] = { VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, @@ -667,6 +670,7 @@ vfsync(struct vnode *vp, int waitfor, int passes, lwkt_gettoken(&vp->v_token); switch(waitfor) { + case MNT_LAZY | MNT_NOWAIT: case MNT_LAZY: /* * Lazy (filesystem syncer typ) Asynchronous plus limit the @@ -1161,6 +1165,7 @@ vclean_vxlocked(struct vnode *vp, int flags) int active; int n; vm_object_t object; + struct namecache *ncp; /* * If the vnode has already been reclaimed we have nothing to do. @@ -1169,11 +1174,17 @@ vclean_vxlocked(struct vnode *vp, int flags) return; vsetflags(vp, VRECLAIMED); + if (verbose_reclaims) { + if ((ncp = TAILQ_FIRST(&vp->v_namecache)) != NULL) + kprintf("Debug: reclaim %p %s\n", vp, ncp->nc_name); + } + /* * Scrap the vfs cache */ while (cache_inval_vp(vp, 0) != 0) { - kprintf("Warning: vnode %p clean/cache_resolution race detected\n", vp); + kprintf("Warning: vnode %p clean/cache_resolution " + "race detected\n", vp); tsleep(vp, 0, "vclninv", 2); } diff --git a/sys/kern/vfs_sync.c b/sys/kern/vfs_sync.c index e185ccd73b..892faac8ee 100644 --- a/sys/kern/vfs_sync.c +++ b/sys/kern/vfs_sync.c @@ -413,7 +413,7 @@ sync_fsync(struct vop_fsync_args *ap) /* * We only need to do something if this is a lazy evaluation. */ - if (ap->a_waitfor != MNT_LAZY) + if ((ap->a_waitfor & MNT_LAZY) == 0) return (0); /* @@ -436,7 +436,7 @@ sync_fsync(struct vop_fsync_args *ap) asyncflag = mp->mnt_flag & MNT_ASYNC; mp->mnt_flag &= ~MNT_ASYNC; /* ZZZ hack */ vfs_msync(mp, MNT_NOWAIT); - VFS_SYNC(mp, MNT_LAZY); + VFS_SYNC(mp, MNT_NOWAIT | MNT_LAZY); if (asyncflag) mp->mnt_flag |= MNT_ASYNC; } diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index bf9e1dbf2e..ff68175f6b 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -895,7 +895,7 @@ sync_callback(struct mount *mp, void *data __unused) asyncflag = mp->mnt_flag & MNT_ASYNC; mp->mnt_flag &= ~MNT_ASYNC; vfs_msync(mp, MNT_NOWAIT); - VFS_SYNC(mp, MNT_NOWAIT); + VFS_SYNC(mp, MNT_NOWAIT | MNT_LAZY); mp->mnt_flag |= asyncflag; } return(0); diff --git a/sys/sys/mount.h b/sys/sys/mount.h index 5ac7f0ab9f..e27baca23d 100644 --- a/sys/sys/mount.h +++ b/sys/sys/mount.h @@ -373,9 +373,9 @@ struct mount { * * waitfor flags to vfs_sync() and getfsstat() */ -#define MNT_WAIT 1 /* synchronously wait for I/O to complete */ -#define MNT_NOWAIT 2 /* start all I/O, but do not wait for it */ -#define MNT_LAZY 4 /* be lazy and do not necessarily push it all */ +#define MNT_WAIT 0x0001 /* synchronously wait for I/O to complete */ +#define MNT_NOWAIT 0x0002 /* start all I/O, but do not wait for it */ +#define MNT_LAZY 0x0004 /* be lazy and do not necessarily push it all */ #define VOP_FSYNC_SYSCALL 0x0001 /* from system call */ diff --git a/sys/vfs/gnu/ext2fs/ext2_vfsops.c b/sys/vfs/gnu/ext2fs/ext2_vfsops.c index 09b68c61ca..6cdd777eff 100644 --- a/sys/vfs/gnu/ext2fs/ext2_vfsops.c +++ b/sys/vfs/gnu/ext2fs/ext2_vfsops.c @@ -1053,7 +1053,7 @@ ext2_sync(struct mount *mp, int waitfor) /* * Force stale file system control information to be flushed. */ - if (waitfor != MNT_LAZY) { + if ((waitfor & MNT_LAZY) == 0) { vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY); if ((error = VOP_FSYNC(ump->um_devvp, waitfor, 0)) != 0) scaninfo.allerror = error; @@ -1085,7 +1085,7 @@ ext2_sync_scan(struct mount *mp, struct vnode *vp, void *data) if (vp->v_type == VNON || ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 && - (RB_EMPTY(&vp->v_rbdirty_tree) || info->waitfor == MNT_LAZY))) { + (RB_EMPTY(&vp->v_rbdirty_tree) || (info->waitfor & MNT_LAZY)))) { return(0); } if ((error = VOP_FSYNC(vp, info->waitfor, 0)) != 0) diff --git a/sys/vfs/gnu/ext2fs/ext2_vnops.c b/sys/vfs/gnu/ext2fs/ext2_vnops.c index bf0ec6e84c..c04dbaaad7 100644 --- a/sys/vfs/gnu/ext2fs/ext2_vnops.c +++ b/sys/vfs/gnu/ext2fs/ext2_vnops.c @@ -254,7 +254,7 @@ ext2_fsync_bp(struct buf *bp, void *data) * Wait for I/O associated with indirect blocks to complete, * since there is no way to quickly wait for them below. */ - if (bp->b_vp == info->vp || info->waitfor == MNT_NOWAIT) + if (bp->b_vp == info->vp || (info->waitfor & MNT_NOWAIT)) bawrite(bp); else bwrite(bp); diff --git a/sys/vfs/hammer/hammer_ondisk.c b/sys/vfs/hammer/hammer_ondisk.c index 9740bc1014..ce3bdae565 100644 --- a/sys/vfs/hammer/hammer_ondisk.c +++ b/sys/vfs/hammer/hammer_ondisk.c @@ -1713,19 +1713,28 @@ hammer_queue_inodes_flusher(hammer_mount_t hmp, int waitfor) * the vnodes in case any were already flushing during the first pass, * and activate the flusher twice (the second time brings the UNDO FIFO's * start position up to the end position after the first call). + * + * If doing a lazy sync make just one pass on the vnode list, ignoring + * any new vnodes added to the list while the sync is in progress. */ int hammer_sync_hmp(hammer_mount_t hmp, int waitfor) { struct hammer_sync_info info; + int flags; + + flags = VMSC_GETVP; + if (waitfor & MNT_LAZY) + flags |= VMSC_ONEPASS; info.error = 0; info.waitfor = MNT_NOWAIT; - vmntvnodescan(hmp->mp, VMSC_GETVP|VMSC_NOWAIT, + vmntvnodescan(hmp->mp, flags | VMSC_NOWAIT, hammer_sync_scan1, hammer_sync_scan2, &info); - if (info.error == 0 && waitfor == MNT_WAIT) { + + if (info.error == 0 && (waitfor & MNT_WAIT)) { info.waitfor = waitfor; - vmntvnodescan(hmp->mp, VMSC_GETVP, + vmntvnodescan(hmp->mp, flags, hammer_sync_scan1, hammer_sync_scan2, &info); } if (waitfor == MNT_WAIT) { diff --git a/sys/vfs/msdosfs/msdosfs_vfsops.c b/sys/vfs/msdosfs/msdosfs_vfsops.c index 743279d23e..a7353d1c0d 100644 --- a/sys/vfs/msdosfs/msdosfs_vfsops.c +++ b/sys/vfs/msdosfs/msdosfs_vfsops.c @@ -756,7 +756,7 @@ msdosfs_sync(struct mount *mp, int waitfor) /* * Flush filesystem control info. */ - if (waitfor != MNT_LAZY) { + if ((waitfor & MNT_LAZY) == 0) { vn_lock(pmp->pm_devvp, LK_EXCLUSIVE | LK_RETRY); if ((error = VOP_FSYNC(pmp->pm_devvp, waitfor, 0)) != 0) scaninfo.allerror = error; @@ -776,7 +776,7 @@ msdosfs_sync_scan(struct mount *mp, struct vnode *vp, void *data) if (vp->v_type == VNON || vp->v_type == VBAD || ((dep->de_flag & (DE_ACCESS | DE_CREATE | DE_UPDATE | DE_MODIFIED)) == 0 && - (RB_EMPTY(&vp->v_rbdirty_tree) || info->waitfor == MNT_LAZY))) { + (RB_EMPTY(&vp->v_rbdirty_tree) || (info->waitfor & MNT_LAZY)))) { return(0); } if ((error = VOP_FSYNC(vp, info->waitfor, 0)) != 0) diff --git a/sys/vfs/nfs/nfs_vfsops.c b/sys/vfs/nfs/nfs_vfsops.c index f3299ed5bf..23998de23e 100644 --- a/sys/vfs/nfs/nfs_vfsops.c +++ b/sys/vfs/nfs/nfs_vfsops.c @@ -1374,7 +1374,7 @@ nfs_sync_scan1(struct mount *mp, struct vnode *vp, void *data) if (vn_islocked(vp) || RB_EMPTY(&vp->v_rbdirty_tree)) return(-1); - if (info->waitfor == MNT_LAZY) + if (info->waitfor & MNT_LAZY) return(-1); return(0); } diff --git a/sys/vfs/nwfs/nwfs_vfsops.c b/sys/vfs/nwfs/nwfs_vfsops.c index 66a05a3c10..ce5e83ddc1 100644 --- a/sys/vfs/nwfs/nwfs_vfsops.c +++ b/sys/vfs/nwfs/nwfs_vfsops.c @@ -442,7 +442,7 @@ loop: if (vp->v_mount != mp) goto loop; if (vn_islocked(vp) || RB_EMPTY(&vp->v_rbdirty_tree) || - waitfor == MNT_LAZY) + (waitfor & MNT_LAZY)) continue; if (vget(vp, LK_EXCLUSIVE)) goto loop; diff --git a/sys/vfs/smbfs/smbfs_vfsops.c b/sys/vfs/smbfs/smbfs_vfsops.c index ddc6f53305..b5d0c158de 100644 --- a/sys/vfs/smbfs/smbfs_vfsops.c +++ b/sys/vfs/smbfs/smbfs_vfsops.c @@ -395,7 +395,7 @@ loop: if (vp->v_mount != mp) goto loop; if (vn_islocked(vp) || RB_EMPTY(&vp->v_rbdirty_tree) || - waitfor == MNT_LAZY) + (waitfor & MNT_LAZY)) continue; if (vget(vp, LK_EXCLUSIVE)) goto loop; diff --git a/sys/vfs/ufs/ffs_softdep.c b/sys/vfs/ufs/ffs_softdep.c index 919db55ea6..eeaad4b442 100644 --- a/sys/vfs/ufs/ffs_softdep.c +++ b/sys/vfs/ufs/ffs_softdep.c @@ -4377,7 +4377,7 @@ top: * The brief unlock is to allow any pent up dependency * processing to be done. Then proceed with the second pass. */ - if (waitfor == MNT_NOWAIT) { + if (waitfor & MNT_NOWAIT) { waitfor = MNT_WAIT; FREE_LOCK(&lk); ACQUIRE_LOCK(&lk); @@ -4451,7 +4451,7 @@ softdep_sync_metadata_bp(struct buf *bp, void *data) if (getdirtybuf(&nbp, info->waitfor) == 0) break; FREE_LOCK(&lk); - if (info->waitfor == MNT_NOWAIT) { + if (info->waitfor & MNT_NOWAIT) { bawrite(nbp); } else if ((error = bwrite(nbp)) != 0) { bawrite(bp); @@ -4469,7 +4469,7 @@ softdep_sync_metadata_bp(struct buf *bp, void *data) if (getdirtybuf(&nbp, info->waitfor) == 0) break; FREE_LOCK(&lk); - if (info->waitfor == MNT_NOWAIT) { + if (info->waitfor & MNT_NOWAIT) { bawrite(nbp); } else if ((error = bwrite(nbp)) != 0) { bawrite(bp); @@ -4545,7 +4545,7 @@ softdep_sync_metadata_bp(struct buf *bp, void *data) if (getdirtybuf(&nbp, info->waitfor) == 0) break; FREE_LOCK(&lk); - if (info->waitfor == MNT_NOWAIT) { + if (info->waitfor & MNT_NOWAIT) { bawrite(nbp); } else if ((error = bwrite(nbp)) != 0) { bawrite(bp); @@ -4573,7 +4573,7 @@ softdep_sync_metadata_bp(struct buf *bp, void *data) if (getdirtybuf(&nbp, info->waitfor) == 0) break; FREE_LOCK(&lk); - if (info->waitfor == MNT_NOWAIT) { + if (info->waitfor & MNT_NOWAIT) { bawrite(nbp); } else if ((error = bwrite(nbp)) != 0) { bawrite(bp); @@ -4631,12 +4631,12 @@ flush_inodedep_deps(struct fs *fs, ino_t ino) continue; bp = adp->ad_buf; if (getdirtybuf(&bp, waitfor) == 0) { - if (waitfor == MNT_NOWAIT) + if (waitfor & MNT_NOWAIT) continue; break; } FREE_LOCK(&lk); - if (waitfor == MNT_NOWAIT) { + if (waitfor & MNT_NOWAIT) { bawrite(bp); } else if ((error = bwrite(bp)) != 0) { ACQUIRE_LOCK(&lk); @@ -4652,12 +4652,12 @@ flush_inodedep_deps(struct fs *fs, ino_t ino) continue; bp = adp->ad_buf; if (getdirtybuf(&bp, waitfor) == 0) { - if (waitfor == MNT_NOWAIT) + if (waitfor & MNT_NOWAIT) continue; break; } FREE_LOCK(&lk); - if (waitfor == MNT_NOWAIT) { + if (waitfor & MNT_NOWAIT) { bawrite(bp); } else if ((error = bwrite(bp)) != 0) { ACQUIRE_LOCK(&lk); diff --git a/sys/vfs/ufs/ffs_vfsops.c b/sys/vfs/ufs/ffs_vfsops.c index a21edbd0bd..a47758a22c 100644 --- a/sys/vfs/ufs/ffs_vfsops.c +++ b/sys/vfs/ufs/ffs_vfsops.c @@ -992,7 +992,7 @@ ffs_sync(struct mount *mp, int waitfor) /* * Force stale filesystem control information to be flushed. */ - if (waitfor != MNT_LAZY) { + if ((waitfor & MNT_LAZY) == 0) { if (ump->um_mountp->mnt_flag & MNT_SOFTDEP) waitfor = MNT_NOWAIT; vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY); -- 2.41.0