From: Venkatesh Srinivas Date: Mon, 26 Dec 2011 08:12:14 +0000 (-0800) Subject: kernel -- Per-mount syncer thread infrastructure. X-Git-Tag: v3.0.0~272 X-Git-Url: http://gitweb.dragonflybsd.org/dragonfly.git/commitdiff_plain/50e4012a4b55e1efc595db0db397b4365f08b640 kernel -- Per-mount syncer thread infrastructure. Introduce infrastructure to create per-mount periodic filesystem syncer threads. For a filesystem flagged MNTK_THR_SYNC, create a thread to periodically synchronize dirty vnodes and the entire filesystem via VOP_FSYNC and VFS_SYNC. This thread subsumes that responsibility from the global syncer. The thread will be created at mount time and torn down at unmount. For filesystems not flagged, the global syncer thread (syncer0) is still used. The global syncer thread is also used for the bioops_sync(NULL) call, to invoke every filesystem's bioops_sync path. This is primarily a concern for UFS/softdep. The global syncer is also the only syncer thread to respond to speedup_syncer / rushjob requests; this is again a consideration for softdep. Future work will consider the relative phase of the syncer thread clocks, so that they may wake together; this may result in more work done per wakeup. Currently no filesystems are flagged MNTK_THR_SYNC, so there should be no functional change associated with this commit. --- diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c index 3513c64..959a514 100644 --- a/sys/kern/kern_synch.c +++ b/sys/kern/kern_synch.c @@ -74,7 +74,7 @@ SYSINIT(sched_setup, SI_SUB_KICK_SCHEDULER, SI_ORDER_FIRST, sched_setup, NULL) int hogticks; int lbolt; -int lbolt_syncer; +void *lbolt_syncer; int sched_quantum; /* Roundrobin scheduling quantum in ticks. */ int ncpus; int ncpus2, ncpus2_shift, ncpus2_mask; /* note: mask not cpumask_t */ @@ -190,7 +190,7 @@ schedcpu(void *arg) allproc_scan(schedcpu_stats, NULL); allproc_scan(schedcpu_resource, NULL); wakeup((caddr_t)&lbolt); - wakeup((caddr_t)&lbolt_syncer); + wakeup(lbolt_syncer); callout_reset(&schedcpu_callout, hz, schedcpu, NULL); } diff --git a/sys/kern/vfs_sync.c b/sys/kern/vfs_sync.c index 3573b98..4591043 100644 --- a/sys/kern/vfs_sync.c +++ b/sys/kern/vfs_sync.c @@ -1,6 +1,4 @@ /* - * (MPSAFE) - * * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. @@ -39,7 +37,6 @@ * * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95 * $FreeBSD: src/sys/kern/vfs_subr.c,v 1.249.2.30 2003/04/04 20:35:57 tegge Exp $ - * $DragonFly: src/sys/kern/vfs_sync.c,v 1.18 2008/05/18 05:54:25 dillon Exp $ */ /* @@ -84,7 +81,6 @@ #include #include -#include /* * The workitem queue. @@ -108,11 +104,38 @@ static int stat_rush_requests; /* number of times I/O speeded up */ SYSCTL_INT(_debug, OID_AUTO, rush_requests, CTLFLAG_RW, &stat_rush_requests, 0, ""); -static int syncer_delayno = 0; -static long syncer_mask; -static struct lwkt_token syncer_token; LIST_HEAD(synclist, vnode); -static struct synclist *syncer_workitem_pending; + +#define SC_FLAG_EXIT (0x1) /* request syncer exit */ +#define SC_FLAG_DONE (0x2) /* syncer confirm exit */ +#define SC_FLAG_BIOOPS_ALL (0x4) /* do bufops_sync(NULL) */ + +struct syncer_ctx { + struct mount *sc_mp; + struct lwkt_token sc_token; + struct thread *sc_thread; + int sc_flags; + + struct synclist *syncer_workitem_pending; + long syncer_mask; + int syncer_delayno; +}; + +static struct syncer_ctx syncer_ctx0; + +static void syncer_thread(void *); + +static void +syncer_ctx_init(struct syncer_ctx *ctx, struct mount *mp) +{ + ctx->sc_mp = mp; + lwkt_token_init(&ctx->sc_token, "syncer"); + ctx->sc_flags = 0; + + ctx->syncer_workitem_pending = hashinit(syncer_maxdelay, M_DEVBUF, + &ctx->syncer_mask); + ctx->syncer_delayno = 0; +} /* * Called from vfsinit() @@ -120,10 +143,27 @@ static struct synclist *syncer_workitem_pending; void vfs_sync_init(void) { - syncer_workitem_pending = hashinit(syncer_maxdelay, M_DEVBUF, - &syncer_mask); - syncer_maxdelay = syncer_mask + 1; - lwkt_token_init(&syncer_token, "syncer"); + syncer_ctx_init(&syncer_ctx0, NULL); + syncer_maxdelay = syncer_ctx0.syncer_mask + 1; + syncer_ctx0.sc_flags |= SC_FLAG_BIOOPS_ALL; + + /* Support schedcpu wakeup of syncer0 */ + lbolt_syncer = &syncer_ctx0; +} + +static struct syncer_ctx * +vn_get_syncer(struct vnode *vp) { + struct mount *mp; + struct syncer_ctx *ctx; + + ctx = NULL; + mp = vp->v_mount; + if (mp) + ctx = mp->mnt_syncer_ctx; + if (ctx == NULL) + ctx = &syncer_ctx0; + + return (ctx); } /* @@ -164,20 +204,23 @@ vfs_sync_init(void) void vn_syncer_add(struct vnode *vp, int delay) { + struct syncer_ctx *ctx; int slot; - lwkt_gettoken(&syncer_token); + ctx = vn_get_syncer(vp); + + lwkt_gettoken(&ctx->sc_token); if (vp->v_flag & VONWORKLST) LIST_REMOVE(vp, v_synclist); if (delay > syncer_maxdelay - 2) delay = syncer_maxdelay - 2; - slot = (syncer_delayno + delay) & syncer_mask; + slot = (ctx->syncer_delayno + delay) & ctx->syncer_mask; - LIST_INSERT_HEAD(&syncer_workitem_pending[slot], vp, v_synclist); + LIST_INSERT_HEAD(&ctx->syncer_workitem_pending[slot], vp, v_synclist); vsetflags(vp, VONWORKLST); - lwkt_reltoken(&syncer_token); + lwkt_reltoken(&ctx->sc_token); } /* @@ -189,14 +232,64 @@ vn_syncer_add(struct vnode *vp, int delay) void vn_syncer_remove(struct vnode *vp) { - lwkt_gettoken(&syncer_token); + struct syncer_ctx *ctx; + + ctx = vn_get_syncer(vp); + + lwkt_gettoken(&ctx->sc_token); if ((vp->v_flag & VONWORKLST) && RB_EMPTY(&vp->v_rbdirty_tree)) { vclrflags(vp, VONWORKLST); LIST_REMOVE(vp, v_synclist); } - lwkt_reltoken(&syncer_token); + lwkt_reltoken(&ctx->sc_token); +} + +/* + * Create per-filesystem syncer process + */ +void +vn_syncer_thr_create(struct mount *mp) +{ + struct syncer_ctx *ctx; + static int syncalloc = 0; + int rc; + + ctx = kmalloc(sizeof(struct syncer_ctx), M_TEMP, M_WAITOK); + + syncer_ctx_init(ctx, mp); + mp->mnt_syncer_ctx = ctx; + + rc = kthread_create(syncer_thread, ctx, &ctx->sc_thread, + "syncer%d", ++syncalloc); +} + +/* + * Stop per-filesystem syncer process + */ +void +vn_syncer_thr_stop(struct mount *mp) +{ + struct syncer_ctx *ctx; + + ctx = mp->mnt_syncer_ctx; + + lwkt_gettoken(&ctx->sc_token); + + /* Signal the syncer process to exit */ + ctx->sc_flags |= SC_FLAG_EXIT; + wakeup(ctx); + + /* Wait till syncer process exits */ + while ((ctx->sc_flags & SC_FLAG_DONE) == 0) + tsleep(&ctx->sc_flags, 0, "syncexit", hz); + + mp->mnt_syncer_ctx = NULL; + lwkt_reltoken(&ctx->sc_token); + + kfree(ctx->syncer_workitem_pending, M_DEVBUF); + kfree(ctx, M_TEMP); } struct thread *updatethread; @@ -205,34 +298,44 @@ struct thread *updatethread; * System filesystem synchronizer daemon. */ static void -syncer_thread(void) +syncer_thread(void *_ctx) { struct thread *td = curthread; + struct syncer_ctx *ctx = _ctx; struct synclist *slp; struct vnode *vp; long starttime; + int *sc_flagsp; + int sc_flags; + int vnodes_synced = 0; - EVENTHANDLER_REGISTER(shutdown_pre_sync, shutdown_kproc, td, - SHUTDOWN_PRI_LAST); + /* + * syncer0 runs till system shutdown; per-filesystem syncers are + * terminated on filesystem unmount + */ + if (ctx == &syncer_ctx0) + EVENTHANDLER_REGISTER(shutdown_pre_sync, shutdown_kproc, td, + SHUTDOWN_PRI_LAST); for (;;) { kproc_suspend_loop(); starttime = time_second; - lwkt_gettoken(&syncer_token); + lwkt_gettoken(&ctx->sc_token); /* * Push files whose dirty time has expired. Be careful * of interrupt race on slp queue. */ - slp = &syncer_workitem_pending[syncer_delayno]; - syncer_delayno += 1; - if (syncer_delayno == syncer_maxdelay) - syncer_delayno = 0; + slp = &ctx->syncer_workitem_pending[ctx->syncer_delayno]; + ctx->syncer_delayno += 1; + if (ctx->syncer_delayno == syncer_maxdelay) + ctx->syncer_delayno = 0; while ((vp = LIST_FIRST(slp)) != NULL) { if (vget(vp, LK_EXCLUSIVE | LK_NOWAIT) == 0) { VOP_FSYNC(vp, MNT_LAZY, 0); vput(vp); + vnodes_synced++; } /* @@ -259,12 +362,20 @@ syncer_thread(void) if (LIST_FIRST(slp) == vp) vn_syncer_add(vp, syncdelay); } - lwkt_reltoken(&syncer_token); + + sc_flags = ctx->sc_flags; + + /* Exit on unmount */ + if (sc_flags & SC_FLAG_EXIT) + break; + + lwkt_reltoken(&ctx->sc_token); /* * Do sync processing for each mount. */ - bio_ops_sync(NULL); + if (ctx->sc_mp || sc_flags & SC_FLAG_BIOOPS_ALL) + bio_ops_sync(ctx->sc_mp); /* * The variable rushjob allows the kernel to speed up the @@ -276,7 +387,7 @@ syncer_thread(void) * ahead of the disk that the kernel memory pool is being * threatened with exhaustion. */ - if (rushjob > 0) { + if (ctx == &syncer_ctx0 && rushjob > 0) { atomic_subtract_int(&rushjob, 1); continue; } @@ -289,13 +400,28 @@ syncer_thread(void) * filesystem activity. */ if (time_second == starttime) - tsleep(&lbolt_syncer, 0, "syncer", 0); + tsleep(ctx, 0, "syncer", hz); } + + /* + * Unmount/exit path for per-filesystem syncers; sc_token held + */ + ctx->sc_flags |= SC_FLAG_DONE; + sc_flagsp = &ctx->sc_flags; + lwkt_reltoken(&ctx->sc_token); + wakeup(sc_flagsp); + + kthread_exit(); +} + +static void +syncer_thread_start(void) { + syncer_thread(&syncer_ctx0); } static struct kproc_desc up_kp = { - "syncer", - syncer_thread, + "syncer0", + syncer_thread_start, &updatethread }; SYSINIT(syncer, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp) @@ -312,7 +438,7 @@ speedup_syncer(void) * Don't bother protecting the test. unsleep_and_wakeup_thread() * will only do something real if the thread is in the right state. */ - wakeup(&lbolt_syncer); + wakeup(lbolt_syncer); if (rushjob < syncdelay / 2) { atomic_add_int(&rushjob, 1); stat_rush_requests += 1; @@ -470,14 +596,17 @@ static int sync_reclaim(struct vop_reclaim_args *ap) { struct vnode *vp = ap->a_vp; + struct syncer_ctx *ctx; + + ctx = vn_get_syncer(vp); - lwkt_gettoken(&syncer_token); + lwkt_gettoken(&ctx->sc_token); KKASSERT(vp->v_mount->mnt_syncer != vp); if (vp->v_flag & VONWORKLST) { LIST_REMOVE(vp, v_synclist); vclrflags(vp, VONWORKLST); } - lwkt_reltoken(&syncer_token); + lwkt_reltoken(&ctx->sc_token); return (0); } diff --git a/sys/kern/vfs_vfsops.c b/sys/kern/vfs_vfsops.c index bc5bc8f..c21f333 100644 --- a/sys/kern/vfs_vfsops.c +++ b/sys/kern/vfs_vfsops.c @@ -91,6 +91,12 @@ vfs_mount(struct mount *mp, char *path, caddr_t data, struct ucred *cred) VFS_MPLOCK1(mp); error = (mp->mnt_op->vfs_mount)(mp, path, data, cred); + if (error == 0) { + /* Create per-filesystem syncer threads if requested */ + if ((mp->mnt_flag & MNT_UPDATE) == 0 && + (mp->mnt_kern_flag & MNTK_THR_SYNC)) + vn_syncer_thr_create(mp); + } VFS_MPUNLOCK(mp); return (error); } @@ -124,6 +130,8 @@ vfs_unmount(struct mount *mp, int mntflags) int error; VFS_MPLOCK1(mp); + if (mp->mnt_kern_flag & MNTK_THR_SYNC) + vn_syncer_thr_stop(mp); error = (mp->mnt_op->vfs_acdone)(mp); if (error == 0) error = (mp->mnt_op->vfs_unmount)(mp, mntflags); diff --git a/sys/sys/kernel.h b/sys/sys/kernel.h index 73f3124..ea39332 100644 --- a/sys/sys/kernel.h +++ b/sys/sys/kernel.h @@ -85,7 +85,7 @@ extern int stathz; /* statistics clock's frequency */ extern int profhz; /* profiling clock's frequency */ extern int ticks; extern int lbolt; /* once a second sleep address */ -extern int lbolt_syncer; /* approx 1 hz but may be sped up */ +extern void *lbolt_syncer; /* approx 1 hz but may be sped up */ /* * Enumerated types for known system startup interfaces. diff --git a/sys/sys/mount.h b/sys/sys/mount.h index 4221133..f4bd4c1 100644 --- a/sys/sys/mount.h +++ b/sys/sys/mount.h @@ -208,6 +208,7 @@ struct mount { struct vfsconf *mnt_vfc; /* configuration info */ long mnt_namecache_gen; /* ++ to clear negative hits */ struct vnode *mnt_syncer; /* syncer vnode */ + struct syncer_ctx *mnt_syncer_ctx; /* syncer process context */ struct vnodelst mnt_nvnodelist; /* list of vnodes this mount */ struct lock mnt_lock; /* mount structure lock */ int mnt_flag; /* flags shared with user */ @@ -341,6 +342,7 @@ struct mount { #define MNTK_FSMID 0x08000000 /* getattr supports FSMIDs */ #define MNTK_NOSTKMNT 0x10000000 /* no stacked mount point allowed */ #define MNTK_NOMSYNC 0x20000000 /* used by tmpfs */ +#define MNTK_THR_SYNC 0x40000000 /* fs sync thread requested */ #define MNTK_ALL_MPSAFE (MNTK_MPSAFE | MNTK_RD_MPSAFE | MNTK_WR_MPSAFE | \ MNTK_GA_MPSAFE | MNTK_IN_MPSAFE | MNTK_SG_MPSAFE) diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h index 98a6f79..cadce2b 100644 --- a/sys/sys/vnode.h +++ b/sys/sys/vnode.h @@ -397,6 +397,7 @@ struct ucred; struct uio; struct vattr; struct vnode; +struct syncer_ctx; struct vnode *getsynthvnode(const char *devname); void addaliasu (struct vnode *vp, int x, int y); @@ -548,6 +549,9 @@ void mount_init(struct mount *mp); void vn_syncer_add(struct vnode *, int); void vn_syncer_remove(struct vnode *); +void vn_syncer_thr_create(struct mount *); +void vn_syncer_thr_stop(struct mount *); + void vnlru_proc_wait(void); extern struct vop_ops default_vnode_vops;