From: Matthew Dillon Date: Mon, 10 Jan 2011 23:31:29 +0000 (-0800) Subject: kernel - Do a better job with the filesystem background sync X-Git-Tag: v2.10.0~356 X-Git-Url: http://gitweb.dragonflybsd.org/dragonfly.git/commitdiff_plain/28271622aa9fc15ee04275cd2fbca1f3dccba1e3 kernel - Do a better job with the filesystem background sync * Adjust code for MNT_LAZY, MNT_NOWAIT, and MNT_WAITOK to reflect the fact that they are three different flags and not enumeration constants. * HAMMER now sets VMSC_ONEPASS for MNT_LAZY syncs (background filesystem sync). This generally reduces instances where the background sync winds up running continuously when heavy filesystem ops saturate the disk. Fewer vnodes dirtied after the sync is initiated will get caught up in the sync. --- diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index cdad58b..575e870 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -93,6 +93,9 @@ static MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure"); int numvnodes; SYSCTL_INT(_debug, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, "Number of vnodes allocated"); +int verbose_reclaims; +SYSCTL_INT(_debug, OID_AUTO, verbose_reclaims, CTLFLAG_RD, &verbose_reclaims, 0, + "Output filename of reclaimed vnode(s)"); enum vtype iftovt_tab[16] = { VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, @@ -667,6 +670,7 @@ vfsync(struct vnode *vp, int waitfor, int passes, lwkt_gettoken(&vp->v_token); switch(waitfor) { + case MNT_LAZY | MNT_NOWAIT: case MNT_LAZY: /* * Lazy (filesystem syncer typ) Asynchronous plus limit the @@ -1161,6 +1165,7 @@ vclean_vxlocked(struct vnode *vp, int flags) int active; int n; vm_object_t object; + struct namecache *ncp; /* * If the vnode has already been reclaimed we have nothing to do. @@ -1169,11 +1174,17 @@ vclean_vxlocked(struct vnode *vp, int flags) return; vsetflags(vp, VRECLAIMED); + if (verbose_reclaims) { + if ((ncp = TAILQ_FIRST(&vp->v_namecache)) != NULL) + kprintf("Debug: reclaim %p %s\n", vp, ncp->nc_name); + } + /* * Scrap the vfs cache */ while (cache_inval_vp(vp, 0) != 0) { - kprintf("Warning: vnode %p clean/cache_resolution race detected\n", vp); + kprintf("Warning: vnode %p clean/cache_resolution " + "race detected\n", vp); tsleep(vp, 0, "vclninv", 2); } diff --git a/sys/kern/vfs_sync.c b/sys/kern/vfs_sync.c index e185ccd..892faac 100644 --- a/sys/kern/vfs_sync.c +++ b/sys/kern/vfs_sync.c @@ -413,7 +413,7 @@ sync_fsync(struct vop_fsync_args *ap) /* * We only need to do something if this is a lazy evaluation. */ - if (ap->a_waitfor != MNT_LAZY) + if ((ap->a_waitfor & MNT_LAZY) == 0) return (0); /* @@ -436,7 +436,7 @@ sync_fsync(struct vop_fsync_args *ap) asyncflag = mp->mnt_flag & MNT_ASYNC; mp->mnt_flag &= ~MNT_ASYNC; /* ZZZ hack */ vfs_msync(mp, MNT_NOWAIT); - VFS_SYNC(mp, MNT_LAZY); + VFS_SYNC(mp, MNT_NOWAIT | MNT_LAZY); if (asyncflag) mp->mnt_flag |= MNT_ASYNC; } diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index bf9e1db..ff68175 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -895,7 +895,7 @@ sync_callback(struct mount *mp, void *data __unused) asyncflag = mp->mnt_flag & MNT_ASYNC; mp->mnt_flag &= ~MNT_ASYNC; vfs_msync(mp, MNT_NOWAIT); - VFS_SYNC(mp, MNT_NOWAIT); + VFS_SYNC(mp, MNT_NOWAIT | MNT_LAZY); mp->mnt_flag |= asyncflag; } return(0); diff --git a/sys/sys/mount.h b/sys/sys/mount.h index 5ac7f0a..e27baca 100644 --- a/sys/sys/mount.h +++ b/sys/sys/mount.h @@ -373,9 +373,9 @@ struct mount { * * waitfor flags to vfs_sync() and getfsstat() */ -#define MNT_WAIT 1 /* synchronously wait for I/O to complete */ -#define MNT_NOWAIT 2 /* start all I/O, but do not wait for it */ -#define MNT_LAZY 4 /* be lazy and do not necessarily push it all */ +#define MNT_WAIT 0x0001 /* synchronously wait for I/O to complete */ +#define MNT_NOWAIT 0x0002 /* start all I/O, but do not wait for it */ +#define MNT_LAZY 0x0004 /* be lazy and do not necessarily push it all */ #define VOP_FSYNC_SYSCALL 0x0001 /* from system call */ diff --git a/sys/vfs/gnu/ext2fs/ext2_vfsops.c b/sys/vfs/gnu/ext2fs/ext2_vfsops.c index 09b68c6..6cdd777 100644 --- a/sys/vfs/gnu/ext2fs/ext2_vfsops.c +++ b/sys/vfs/gnu/ext2fs/ext2_vfsops.c @@ -1053,7 +1053,7 @@ ext2_sync(struct mount *mp, int waitfor) /* * Force stale file system control information to be flushed. */ - if (waitfor != MNT_LAZY) { + if ((waitfor & MNT_LAZY) == 0) { vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY); if ((error = VOP_FSYNC(ump->um_devvp, waitfor, 0)) != 0) scaninfo.allerror = error; @@ -1085,7 +1085,7 @@ ext2_sync_scan(struct mount *mp, struct vnode *vp, void *data) if (vp->v_type == VNON || ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 && - (RB_EMPTY(&vp->v_rbdirty_tree) || info->waitfor == MNT_LAZY))) { + (RB_EMPTY(&vp->v_rbdirty_tree) || (info->waitfor & MNT_LAZY)))) { return(0); } if ((error = VOP_FSYNC(vp, info->waitfor, 0)) != 0) diff --git a/sys/vfs/gnu/ext2fs/ext2_vnops.c b/sys/vfs/gnu/ext2fs/ext2_vnops.c index bf0ec6e..c04dbaa 100644 --- a/sys/vfs/gnu/ext2fs/ext2_vnops.c +++ b/sys/vfs/gnu/ext2fs/ext2_vnops.c @@ -254,7 +254,7 @@ ext2_fsync_bp(struct buf *bp, void *data) * Wait for I/O associated with indirect blocks to complete, * since there is no way to quickly wait for them below. */ - if (bp->b_vp == info->vp || info->waitfor == MNT_NOWAIT) + if (bp->b_vp == info->vp || (info->waitfor & MNT_NOWAIT)) bawrite(bp); else bwrite(bp); diff --git a/sys/vfs/hammer/hammer_ondisk.c b/sys/vfs/hammer/hammer_ondisk.c index 9740bc1..ce3bdae 100644 --- a/sys/vfs/hammer/hammer_ondisk.c +++ b/sys/vfs/hammer/hammer_ondisk.c @@ -1713,19 +1713,28 @@ hammer_queue_inodes_flusher(hammer_mount_t hmp, int waitfor) * the vnodes in case any were already flushing during the first pass, * and activate the flusher twice (the second time brings the UNDO FIFO's * start position up to the end position after the first call). + * + * If doing a lazy sync make just one pass on the vnode list, ignoring + * any new vnodes added to the list while the sync is in progress. */ int hammer_sync_hmp(hammer_mount_t hmp, int waitfor) { struct hammer_sync_info info; + int flags; + + flags = VMSC_GETVP; + if (waitfor & MNT_LAZY) + flags |= VMSC_ONEPASS; info.error = 0; info.waitfor = MNT_NOWAIT; - vmntvnodescan(hmp->mp, VMSC_GETVP|VMSC_NOWAIT, + vmntvnodescan(hmp->mp, flags | VMSC_NOWAIT, hammer_sync_scan1, hammer_sync_scan2, &info); - if (info.error == 0 && waitfor == MNT_WAIT) { + + if (info.error == 0 && (waitfor & MNT_WAIT)) { info.waitfor = waitfor; - vmntvnodescan(hmp->mp, VMSC_GETVP, + vmntvnodescan(hmp->mp, flags, hammer_sync_scan1, hammer_sync_scan2, &info); } if (waitfor == MNT_WAIT) { diff --git a/sys/vfs/msdosfs/msdosfs_vfsops.c b/sys/vfs/msdosfs/msdosfs_vfsops.c index 743279d..a7353d1 100644 --- a/sys/vfs/msdosfs/msdosfs_vfsops.c +++ b/sys/vfs/msdosfs/msdosfs_vfsops.c @@ -756,7 +756,7 @@ msdosfs_sync(struct mount *mp, int waitfor) /* * Flush filesystem control info. */ - if (waitfor != MNT_LAZY) { + if ((waitfor & MNT_LAZY) == 0) { vn_lock(pmp->pm_devvp, LK_EXCLUSIVE | LK_RETRY); if ((error = VOP_FSYNC(pmp->pm_devvp, waitfor, 0)) != 0) scaninfo.allerror = error; @@ -776,7 +776,7 @@ msdosfs_sync_scan(struct mount *mp, struct vnode *vp, void *data) if (vp->v_type == VNON || vp->v_type == VBAD || ((dep->de_flag & (DE_ACCESS | DE_CREATE | DE_UPDATE | DE_MODIFIED)) == 0 && - (RB_EMPTY(&vp->v_rbdirty_tree) || info->waitfor == MNT_LAZY))) { + (RB_EMPTY(&vp->v_rbdirty_tree) || (info->waitfor & MNT_LAZY)))) { return(0); } if ((error = VOP_FSYNC(vp, info->waitfor, 0)) != 0) diff --git a/sys/vfs/nfs/nfs_vfsops.c b/sys/vfs/nfs/nfs_vfsops.c index f3299ed..23998de 100644 --- a/sys/vfs/nfs/nfs_vfsops.c +++ b/sys/vfs/nfs/nfs_vfsops.c @@ -1374,7 +1374,7 @@ nfs_sync_scan1(struct mount *mp, struct vnode *vp, void *data) if (vn_islocked(vp) || RB_EMPTY(&vp->v_rbdirty_tree)) return(-1); - if (info->waitfor == MNT_LAZY) + if (info->waitfor & MNT_LAZY) return(-1); return(0); } diff --git a/sys/vfs/nwfs/nwfs_vfsops.c b/sys/vfs/nwfs/nwfs_vfsops.c index 66a05a3..ce5e83d 100644 --- a/sys/vfs/nwfs/nwfs_vfsops.c +++ b/sys/vfs/nwfs/nwfs_vfsops.c @@ -442,7 +442,7 @@ loop: if (vp->v_mount != mp) goto loop; if (vn_islocked(vp) || RB_EMPTY(&vp->v_rbdirty_tree) || - waitfor == MNT_LAZY) + (waitfor & MNT_LAZY)) continue; if (vget(vp, LK_EXCLUSIVE)) goto loop; diff --git a/sys/vfs/smbfs/smbfs_vfsops.c b/sys/vfs/smbfs/smbfs_vfsops.c index ddc6f53..b5d0c15 100644 --- a/sys/vfs/smbfs/smbfs_vfsops.c +++ b/sys/vfs/smbfs/smbfs_vfsops.c @@ -395,7 +395,7 @@ loop: if (vp->v_mount != mp) goto loop; if (vn_islocked(vp) || RB_EMPTY(&vp->v_rbdirty_tree) || - waitfor == MNT_LAZY) + (waitfor & MNT_LAZY)) continue; if (vget(vp, LK_EXCLUSIVE)) goto loop; diff --git a/sys/vfs/ufs/ffs_softdep.c b/sys/vfs/ufs/ffs_softdep.c index 919db55..eeaad4b 100644 --- a/sys/vfs/ufs/ffs_softdep.c +++ b/sys/vfs/ufs/ffs_softdep.c @@ -4377,7 +4377,7 @@ top: * The brief unlock is to allow any pent up dependency * processing to be done. Then proceed with the second pass. */ - if (waitfor == MNT_NOWAIT) { + if (waitfor & MNT_NOWAIT) { waitfor = MNT_WAIT; FREE_LOCK(&lk); ACQUIRE_LOCK(&lk); @@ -4451,7 +4451,7 @@ softdep_sync_metadata_bp(struct buf *bp, void *data) if (getdirtybuf(&nbp, info->waitfor) == 0) break; FREE_LOCK(&lk); - if (info->waitfor == MNT_NOWAIT) { + if (info->waitfor & MNT_NOWAIT) { bawrite(nbp); } else if ((error = bwrite(nbp)) != 0) { bawrite(bp); @@ -4469,7 +4469,7 @@ softdep_sync_metadata_bp(struct buf *bp, void *data) if (getdirtybuf(&nbp, info->waitfor) == 0) break; FREE_LOCK(&lk); - if (info->waitfor == MNT_NOWAIT) { + if (info->waitfor & MNT_NOWAIT) { bawrite(nbp); } else if ((error = bwrite(nbp)) != 0) { bawrite(bp); @@ -4545,7 +4545,7 @@ softdep_sync_metadata_bp(struct buf *bp, void *data) if (getdirtybuf(&nbp, info->waitfor) == 0) break; FREE_LOCK(&lk); - if (info->waitfor == MNT_NOWAIT) { + if (info->waitfor & MNT_NOWAIT) { bawrite(nbp); } else if ((error = bwrite(nbp)) != 0) { bawrite(bp); @@ -4573,7 +4573,7 @@ softdep_sync_metadata_bp(struct buf *bp, void *data) if (getdirtybuf(&nbp, info->waitfor) == 0) break; FREE_LOCK(&lk); - if (info->waitfor == MNT_NOWAIT) { + if (info->waitfor & MNT_NOWAIT) { bawrite(nbp); } else if ((error = bwrite(nbp)) != 0) { bawrite(bp); @@ -4631,12 +4631,12 @@ flush_inodedep_deps(struct fs *fs, ino_t ino) continue; bp = adp->ad_buf; if (getdirtybuf(&bp, waitfor) == 0) { - if (waitfor == MNT_NOWAIT) + if (waitfor & MNT_NOWAIT) continue; break; } FREE_LOCK(&lk); - if (waitfor == MNT_NOWAIT) { + if (waitfor & MNT_NOWAIT) { bawrite(bp); } else if ((error = bwrite(bp)) != 0) { ACQUIRE_LOCK(&lk); @@ -4652,12 +4652,12 @@ flush_inodedep_deps(struct fs *fs, ino_t ino) continue; bp = adp->ad_buf; if (getdirtybuf(&bp, waitfor) == 0) { - if (waitfor == MNT_NOWAIT) + if (waitfor & MNT_NOWAIT) continue; break; } FREE_LOCK(&lk); - if (waitfor == MNT_NOWAIT) { + if (waitfor & MNT_NOWAIT) { bawrite(bp); } else if ((error = bwrite(bp)) != 0) { ACQUIRE_LOCK(&lk); diff --git a/sys/vfs/ufs/ffs_vfsops.c b/sys/vfs/ufs/ffs_vfsops.c index a21edbd..a47758a 100644 --- a/sys/vfs/ufs/ffs_vfsops.c +++ b/sys/vfs/ufs/ffs_vfsops.c @@ -992,7 +992,7 @@ ffs_sync(struct mount *mp, int waitfor) /* * Force stale filesystem control information to be flushed. */ - if (waitfor != MNT_LAZY) { + if ((waitfor & MNT_LAZY) == 0) { if (ump->um_mountp->mnt_flag & MNT_SOFTDEP) waitfor = MNT_NOWAIT; vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);