From 1c222faf36ea48e314d77b9f242ed1258f7717d5 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Tue, 4 Dec 2018 21:49:35 -0800 Subject: [PATCH] kernel - Add trigger_syncer(), VFS_MODIFYING() * Add trigger_syncer(). This function asynchronously triggers the syncer vnode in the syncer thread for the mount. It is a NOP if there is no syncer thread or syncer vnode. Will be used by HAMMER2 to pipeline syncs when heavy filesystem activity over-extends internal memory structures. * Add VFS_MODIFYING(). This is a hook into the filesystem that modifying filesystem ops in the kernel will call prior to locking any vnodes. It allows the filesystem to moderate the over-allocation of internal structures. Waiting until after the VOP is called is too late, so we need kernel support for this. Numerous attempts to hack moderation code into the H2 VOPs have all failed spectacularly. In H2, over-allocation can occur because H2 must retain disconnected inodes related to file creation and deletion until the next sync cycle. --- sys/kern/vfs_default.c | 6 ++++++ sys/kern/vfs_init.c | 4 ++++ sys/kern/vfs_sync.c | 44 +++++++++++++++++++++++++++++++++++++---- sys/kern/vfs_syscalls.c | 1 + sys/kern/vfs_vnops.c | 12 +++++++++-- sys/sys/mount.h | 7 ++++++- sys/sys/vnode.h | 1 + 7 files changed, 68 insertions(+), 7 deletions(-) diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c index 5a468b489f..c05b3a6901 100644 --- a/sys/kern/vfs_default.c +++ b/sys/kern/vfs_default.c @@ -1487,4 +1487,10 @@ vfs_stdncpgen_test(struct mount *mp, struct namecache *ncp) { return 0; } + +void +vfs_stdmodifying(struct mount *mp) +{ + /* do nothing */ +} /* end of vfs default ops */ diff --git a/sys/kern/vfs_init.c b/sys/kern/vfs_init.c index 7fa28070cd..ff22024cce 100644 --- a/sys/kern/vfs_init.c +++ b/sys/kern/vfs_init.c @@ -435,6 +435,10 @@ vfs_register(struct vfsconf *vfc) vfsops->vfs_acdone = vfs_stdac_done; } + if (vfsops->vfs_modifying == NULL) { + vfsops->vfs_modifying = vfs_stdmodifying; + } + /* * Call init function for this VFS... */ diff --git a/sys/kern/vfs_sync.c b/sys/kern/vfs_sync.c index 1446855f0f..cacbea5d2d 100644 --- a/sys/kern/vfs_sync.c +++ b/sys/kern/vfs_sync.c @@ -116,8 +116,8 @@ struct syncer_ctx { long syncer_mask; int syncer_delayno; int syncer_forced; - int syncer_rushjob; - int syncer_unused01; + int syncer_rushjob; /* sequence vnodes faster */ + int syncer_trigger; /* trigger full sync */ long syncer_count; }; @@ -190,7 +190,8 @@ vn_syncer_count(struct mount *mp) * depend on the syncer_token (which might already be held by * the caller) to protect v_synclist and VONWORKLST. * - * MPSAFE + * WARNING: The syncer depends on this function not blocking if the caller + * already holds the syncer token. */ void vn_syncer_add(struct vnode *vp, int delay) @@ -387,6 +388,25 @@ syncer_thread(void *_ctx) */ slp = &ctx->syncer_workitem_pending[ctx->syncer_delayno]; + /* + * If syncer_trigger is set (from trigger_syncer(mp)), + * Immediately do a full filesystem sync. + */ + if (ctx->syncer_trigger) { + ctx->syncer_trigger = 0; + if (ctx->sc_mp && ctx->sc_mp->mnt_syncer) { + vp = ctx->sc_mp->mnt_syncer; + if (vp->v_flag & VONWORKLST) { + vn_syncer_add(vp, retrydelay); + if (vget(vp, LK_EXCLUSIVE) == 0) { + VOP_FSYNC(vp, MNT_LAZY, 0); + vput(vp); + vnodes_synced++; + } + } + } + } + while ((vp = LIST_FIRST(slp)) != NULL) { vn_syncer_add(vp, retrydelay); if (ctx->syncer_forced) { @@ -531,10 +551,26 @@ speedup_syncer(struct mount *mp) */ atomic_add_int(&rushjob, 1); ++stat_rush_requests; - if (mp) + if (mp && mp->mnt_syncer_ctx) wakeup(mp->mnt_syncer_ctx); } +/* + * trigger a full sync + */ +void +trigger_syncer(struct mount *mp) +{ + struct syncer_ctx *ctx; + + if (mp && (ctx = mp->mnt_syncer_ctx) != NULL) { + if (ctx->syncer_trigger == 0) { + ctx->syncer_trigger = 1; + wakeup(ctx); + } + } +} + /* * Routine to create and manage a filesystem syncer vnode. */ diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index 3e50a86ff1..6f4cec2b49 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -2437,6 +2437,7 @@ kern_link(struct nlookupdata *nd, struct nlookupdata *linknd) vrele(vp); return (EEXIST); } + VFS_MODIFYING(vp->v_mount); error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_FAILRECLAIM); if (error) { vrele(vp); diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c index 137f856ec8..d8493a7fe4 100644 --- a/sys/kern/vfs_vnops.c +++ b/sys/kern/vfs_vnops.c @@ -401,8 +401,14 @@ vn_writechk(struct vnode *vp, struct nchandle *nch) int ncp_writechk(struct nchandle *nch) { - if (nch->mount && (nch->mount->mnt_flag & MNT_RDONLY)) - return (EROFS); + struct mount *mp; + + if ((mp = nch->mount) != NULL) { + if (mp->mnt_flag & MNT_RDONLY) + return (EROFS); + if (mp->mnt_op->vfs_modifying != vfs_stdmodifying) + VFS_MODIFYING(mp); + } return(0); } @@ -722,6 +728,8 @@ vn_write(struct file *fp, struct uio *uio, struct ucred *cred, int flags) ioflag |= IO_SYNC; if ((flags & O_FOFFSET) == 0) uio->uio_offset = vn_get_fpf_offset(fp); + if (vp->v_mount) + VFS_MODIFYING(vp->v_mount); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); ioflag |= sequential_heuristic(uio, fp); error = VOP_WRITE(vp, uio, ioflag, cred); diff --git a/sys/sys/mount.h b/sys/sys/mount.h index 8ad6e9199e..1a4b81bbe7 100644 --- a/sys/sys/mount.h +++ b/sys/sys/mount.h @@ -567,6 +567,7 @@ typedef void vfs_account_t(struct mount *mp, uid_t uid, gid_t gid, int64_t delta); typedef void vfs_ncpgen_set_t(struct mount *mp, struct namecache *ncp); typedef int vfs_ncpgen_test_t(struct mount *mp, struct namecache *ncp); +typedef void vfs_modifying_t(struct mount *mp); int vfs_mount(struct mount *mp, char *path, caddr_t data, struct ucred *cred); int vfs_start(struct mount *mp, int flags); @@ -589,7 +590,7 @@ int vfs_uninit(struct vfsconf *vfc, struct vfsconf *vfsp); int vfs_extattrctl(struct mount *mp, int cmd, struct vnode *vp, int attrnamespace, const char *attrname, struct ucred *cred); - +int vfs_modifying(struct mount *mp); struct vfsops { vfs_mount_t *vfs_mount; @@ -612,6 +613,7 @@ struct vfsops { vfs_account_t *vfs_account; vfs_ncpgen_set_t *vfs_ncpgen_set; vfs_ncpgen_test_t *vfs_ncpgen_test; + vfs_modifying_t *vfs_modifying; }; #define VFS_MOUNT(MP, PATH, DATA, CRED) \ @@ -653,6 +655,8 @@ struct vfsops { MP->mnt_op->vfs_ncpgen_set(MP, NCP) #define VFS_NCPGEN_TEST(MP, NCP) \ MP->mnt_op->vfs_ncpgen_test(MP, NCP) +#define VFS_MODIFYING(MP) \ + MP->mnt_op->vfs_modifying(MP) #endif @@ -765,6 +769,7 @@ vfs_account_t vfs_stdaccount; vfs_account_t vfs_noaccount; vfs_ncpgen_set_t vfs_stdncpgen_set; vfs_ncpgen_test_t vfs_stdncpgen_test; +vfs_modifying_t vfs_stdmodifying; struct vop_access_args; int vop_helper_access(struct vop_access_args *ap, uid_t ino_uid, gid_t ino_gid, diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h index af464634ed..b6f5f806c3 100644 --- a/sys/sys/vnode.h +++ b/sys/sys/vnode.h @@ -431,6 +431,7 @@ int getspecialvnode (enum vtagtype tag, struct mount *mp, struct vop_ops **ops, struct vnode **vpp, int timo, int lkflags); void speedup_syncer (struct mount *mp); +void trigger_syncer (struct mount *mp); int vaccess(enum vtype, mode_t, uid_t, gid_t, mode_t, struct ucred *); void vattr_null (struct vattr *vap); int vcount (struct vnode *vp); -- 2.41.0