X-Git-Url: https://gitweb.dragonflybsd.org/dragonfly.git/blobdiff_plain/ef77015ecbc711f60630bc90c67da65b7480100a..50e4012a4b55e1efc595db0db397b4365f08b640:/sys/kern/vfs_sync.c diff --git a/sys/kern/vfs_sync.c b/sys/kern/vfs_sync.c index 0567e0d713..4591043c0f 100644 --- a/sys/kern/vfs_sync.c +++ b/sys/kern/vfs_sync.c @@ -37,7 +37,6 @@ * * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95 * $FreeBSD: src/sys/kern/vfs_subr.c,v 1.249.2.30 2003/04/04 20:35:57 tegge Exp $ - * $DragonFly: src/sys/kern/vfs_sync.c,v 1.18 2008/05/18 05:54:25 dillon Exp $ */ /* @@ -105,11 +104,38 @@ static int stat_rush_requests; /* number of times I/O speeded up */ SYSCTL_INT(_debug, OID_AUTO, rush_requests, CTLFLAG_RW, &stat_rush_requests, 0, ""); -static int syncer_delayno = 0; -static long syncer_mask; -static struct lwkt_token syncer_token; LIST_HEAD(synclist, vnode); -static struct synclist *syncer_workitem_pending; + +#define SC_FLAG_EXIT (0x1) /* request syncer exit */ +#define SC_FLAG_DONE (0x2) /* syncer confirm exit */ +#define SC_FLAG_BIOOPS_ALL (0x4) /* do bufops_sync(NULL) */ + +struct syncer_ctx { + struct mount *sc_mp; + struct lwkt_token sc_token; + struct thread *sc_thread; + int sc_flags; + + struct synclist *syncer_workitem_pending; + long syncer_mask; + int syncer_delayno; +}; + +static struct syncer_ctx syncer_ctx0; + +static void syncer_thread(void *); + +static void +syncer_ctx_init(struct syncer_ctx *ctx, struct mount *mp) +{ + ctx->sc_mp = mp; + lwkt_token_init(&ctx->sc_token, "syncer"); + ctx->sc_flags = 0; + + ctx->syncer_workitem_pending = hashinit(syncer_maxdelay, M_DEVBUF, + &ctx->syncer_mask); + ctx->syncer_delayno = 0; +} /* * Called from vfsinit() @@ -117,10 +143,27 @@ static struct synclist *syncer_workitem_pending; void vfs_sync_init(void) { - syncer_workitem_pending = hashinit(syncer_maxdelay, M_DEVBUF, - &syncer_mask); - syncer_maxdelay = syncer_mask + 1; - lwkt_token_init(&syncer_token); + syncer_ctx_init(&syncer_ctx0, NULL); + syncer_maxdelay = syncer_ctx0.syncer_mask + 1; + syncer_ctx0.sc_flags |= SC_FLAG_BIOOPS_ALL; + + /* Support schedcpu wakeup of syncer0 */ + lbolt_syncer = &syncer_ctx0; +} + +static struct syncer_ctx * +vn_get_syncer(struct vnode *vp) { + struct mount *mp; + struct syncer_ctx *ctx; + + ctx = NULL; + mp = vp->v_mount; + if (mp) + ctx = mp->mnt_syncer_ctx; + if (ctx == NULL) + ctx = &syncer_ctx0; + + return (ctx); } /* @@ -152,105 +195,187 @@ vfs_sync_init(void) /* * Add an item to the syncer work queue. * + * WARNING: Cannot get vp->v_token here if not already held, we must + * depend on the syncer_token (which might already be held by + * the caller) to protect v_synclist and VONWORKLST. + * * MPSAFE */ void -vn_syncer_add_to_worklist(struct vnode *vp, int delay) +vn_syncer_add(struct vnode *vp, int delay) { - lwkt_tokref ilock; + struct syncer_ctx *ctx; int slot; - lwkt_gettoken(&ilock, &syncer_token); + ctx = vn_get_syncer(vp); + + lwkt_gettoken(&ctx->sc_token); if (vp->v_flag & VONWORKLST) LIST_REMOVE(vp, v_synclist); if (delay > syncer_maxdelay - 2) delay = syncer_maxdelay - 2; - slot = (syncer_delayno + delay) & syncer_mask; + slot = (ctx->syncer_delayno + delay) & ctx->syncer_mask; - LIST_INSERT_HEAD(&syncer_workitem_pending[slot], vp, v_synclist); + LIST_INSERT_HEAD(&ctx->syncer_workitem_pending[slot], vp, v_synclist); vsetflags(vp, VONWORKLST); - lwkt_reltoken(&ilock); + lwkt_reltoken(&ctx->sc_token); +} + +/* + * Removes the vnode from the syncer list. Since we might block while + * acquiring the syncer_token we have to recheck conditions. + * + * vp->v_token held on call + */ +void +vn_syncer_remove(struct vnode *vp) +{ + struct syncer_ctx *ctx; + + ctx = vn_get_syncer(vp); + + lwkt_gettoken(&ctx->sc_token); + + if ((vp->v_flag & VONWORKLST) && RB_EMPTY(&vp->v_rbdirty_tree)) { + vclrflags(vp, VONWORKLST); + LIST_REMOVE(vp, v_synclist); + } + + lwkt_reltoken(&ctx->sc_token); +} + +/* + * Create per-filesystem syncer process + */ +void +vn_syncer_thr_create(struct mount *mp) +{ + struct syncer_ctx *ctx; + static int syncalloc = 0; + int rc; + + ctx = kmalloc(sizeof(struct syncer_ctx), M_TEMP, M_WAITOK); + + syncer_ctx_init(ctx, mp); + mp->mnt_syncer_ctx = ctx; + + rc = kthread_create(syncer_thread, ctx, &ctx->sc_thread, + "syncer%d", ++syncalloc); +} + +/* + * Stop per-filesystem syncer process + */ +void +vn_syncer_thr_stop(struct mount *mp) +{ + struct syncer_ctx *ctx; + + ctx = mp->mnt_syncer_ctx; + + lwkt_gettoken(&ctx->sc_token); + + /* Signal the syncer process to exit */ + ctx->sc_flags |= SC_FLAG_EXIT; + wakeup(ctx); + + /* Wait till syncer process exits */ + while ((ctx->sc_flags & SC_FLAG_DONE) == 0) + tsleep(&ctx->sc_flags, 0, "syncexit", hz); + + mp->mnt_syncer_ctx = NULL; + lwkt_reltoken(&ctx->sc_token); + + kfree(ctx->syncer_workitem_pending, M_DEVBUF); + kfree(ctx, M_TEMP); } struct thread *updatethread; -static void sched_sync (void); -static struct kproc_desc up_kp = { - "syncer", - sched_sync, - &updatethread -}; -SYSINIT(syncer, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp) /* * System filesystem synchronizer daemon. */ -void -sched_sync(void) +static void +syncer_thread(void *_ctx) { struct thread *td = curthread; + struct syncer_ctx *ctx = _ctx; struct synclist *slp; struct vnode *vp; - lwkt_tokref ilock; - lwkt_tokref vlock; long starttime; + int *sc_flagsp; + int sc_flags; + int vnodes_synced = 0; - EVENTHANDLER_REGISTER(shutdown_pre_sync, shutdown_kproc, td, - SHUTDOWN_PRI_LAST); - + /* + * syncer0 runs till system shutdown; per-filesystem syncers are + * terminated on filesystem unmount + */ + if (ctx == &syncer_ctx0) + EVENTHANDLER_REGISTER(shutdown_pre_sync, shutdown_kproc, td, + SHUTDOWN_PRI_LAST); for (;;) { kproc_suspend_loop(); starttime = time_second; - lwkt_gettoken(&ilock, &syncer_token); + lwkt_gettoken(&ctx->sc_token); /* * Push files whose dirty time has expired. Be careful * of interrupt race on slp queue. */ - slp = &syncer_workitem_pending[syncer_delayno]; - syncer_delayno += 1; - if (syncer_delayno == syncer_maxdelay) - syncer_delayno = 0; + slp = &ctx->syncer_workitem_pending[ctx->syncer_delayno]; + ctx->syncer_delayno += 1; + if (ctx->syncer_delayno == syncer_maxdelay) + ctx->syncer_delayno = 0; while ((vp = LIST_FIRST(slp)) != NULL) { if (vget(vp, LK_EXCLUSIVE | LK_NOWAIT) == 0) { VOP_FSYNC(vp, MNT_LAZY, 0); vput(vp); + vnodes_synced++; } /* - * If the vnode is still at the head of the list - * we were not able to completely flush it. To - * give other vnodes a fair shake we move it to - * a later slot. + * vp is stale but can still be used if we can + * verify that it remains at the head of the list. + * Be careful not to try to get vp->v_token as + * vp can become stale if this blocks. + * + * If the vp is still at the head of the list were + * unable to completely flush it and move it to + * a later slot to give other vnodes a fair shot. * * Note that v_tag VT_VFS vnodes can remain on the * worklist with no dirty blocks, but sync_fsync() * moves it to a later slot so we will never see it * here. + * + * It is possible to race a vnode with no dirty + * buffers being removed from the list. If this + * occurs we will move the vnode in the synclist + * and then the other thread will remove it. Do + * not try to remove it here. */ - if (LIST_FIRST(slp) == vp) { - lwkt_gettoken(&vlock, &vp->v_token); - if (LIST_FIRST(slp) == vp) { - if (RB_EMPTY(&vp->v_rbdirty_tree) && - !vn_isdisk(vp, NULL)) { - panic("sched_sync: fsync " - "failed vp %p tag %d", - vp, vp->v_tag); - } - vn_syncer_add_to_worklist(vp, syncdelay); - } - lwkt_reltoken(&vlock); - } + if (LIST_FIRST(slp) == vp) + vn_syncer_add(vp, syncdelay); } - lwkt_reltoken(&ilock); + + sc_flags = ctx->sc_flags; + + /* Exit on unmount */ + if (sc_flags & SC_FLAG_EXIT) + break; + + lwkt_reltoken(&ctx->sc_token); /* * Do sync processing for each mount. */ - bio_ops_sync(NULL); + if (ctx->sc_mp || sc_flags & SC_FLAG_BIOOPS_ALL) + bio_ops_sync(ctx->sc_mp); /* * The variable rushjob allows the kernel to speed up the @@ -262,8 +387,8 @@ sched_sync(void) * ahead of the disk that the kernel memory pool is being * threatened with exhaustion. */ - if (rushjob > 0) { - rushjob -= 1; + if (ctx == &syncer_ctx0 && rushjob > 0) { + atomic_subtract_int(&rushjob, 1); continue; } /* @@ -275,16 +400,36 @@ sched_sync(void) * filesystem activity. */ if (time_second == starttime) - tsleep(&lbolt_syncer, 0, "syncer", 0); + tsleep(ctx, 0, "syncer", hz); } + + /* + * Unmount/exit path for per-filesystem syncers; sc_token held + */ + ctx->sc_flags |= SC_FLAG_DONE; + sc_flagsp = &ctx->sc_flags; + lwkt_reltoken(&ctx->sc_token); + wakeup(sc_flagsp); + + kthread_exit(); } +static void +syncer_thread_start(void) { + syncer_thread(&syncer_ctx0); +} + +static struct kproc_desc up_kp = { + "syncer0", + syncer_thread_start, + &updatethread +}; +SYSINIT(syncer, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp) + /* * Request the syncer daemon to speed up its work. * We never push it to speed up more than half of its * normal turn time, otherwise it could take over the cpu. - * - * YYY wchan field protected by the BGL. */ int speedup_syncer(void) @@ -293,9 +438,9 @@ speedup_syncer(void) * Don't bother protecting the test. unsleep_and_wakeup_thread() * will only do something real if the thread is in the right state. */ - wakeup(&lbolt_syncer); + wakeup(lbolt_syncer); if (rushjob < syncdelay / 2) { - rushjob += 1; + atomic_add_int(&rushjob, 1); stat_rush_requests += 1; return (1); } @@ -362,7 +507,7 @@ vfs_allocate_syncvnode(struct mount *mp) } next = start; } - vn_syncer_add_to_worklist(vp, syncdelay > 0 ? next % syncdelay : 0); + vn_syncer_add(vp, syncdelay > 0 ? next % syncdelay : 0); /* * The mnt_syncer field inherits the vnode reference, which is @@ -394,13 +539,13 @@ sync_fsync(struct vop_fsync_args *ap) /* * We only need to do something if this is a lazy evaluation. */ - if (ap->a_waitfor != MNT_LAZY) + if ((ap->a_waitfor & MNT_LAZY) == 0) return (0); /* * Move ourselves to the back of the sync list. */ - vn_syncer_add_to_worklist(syncvp, syncdelay); + vn_syncer_add(syncvp, syncdelay); /* * Walk the list of vnodes pushing all that are dirty and @@ -417,7 +562,7 @@ sync_fsync(struct vop_fsync_args *ap) asyncflag = mp->mnt_flag & MNT_ASYNC; mp->mnt_flag &= ~MNT_ASYNC; /* ZZZ hack */ vfs_msync(mp, MNT_NOWAIT); - VFS_SYNC(mp, MNT_LAZY); + VFS_SYNC(mp, MNT_NOWAIT | MNT_LAZY); if (asyncflag) mp->mnt_flag |= MNT_ASYNC; } @@ -451,15 +596,17 @@ static int sync_reclaim(struct vop_reclaim_args *ap) { struct vnode *vp = ap->a_vp; - lwkt_tokref ilock; + struct syncer_ctx *ctx; + + ctx = vn_get_syncer(vp); - lwkt_gettoken(&ilock, &syncer_token); + lwkt_gettoken(&ctx->sc_token); KKASSERT(vp->v_mount->mnt_syncer != vp); if (vp->v_flag & VONWORKLST) { LIST_REMOVE(vp, v_synclist); vclrflags(vp, VONWORKLST); } - lwkt_reltoken(&ilock); + lwkt_reltoken(&ctx->sc_token); return (0); }