| Commit | Line | Data |
|---|---|---|
| 5fd012e0 MD |
1 | /* |
| 2 | * Copyright (c) 1989, 1993 | |
| 3 | * The Regents of the University of California. All rights reserved. | |
| 4 | * (c) UNIX System Laboratories, Inc. | |
| 5 | * All or some portions of this file are derived from material licensed | |
| 6 | * to the University of California by American Telephone and Telegraph | |
| 7 | * Co. or Unix System Laboratories, Inc. and are reproduced herein with | |
| 8 | * the permission of UNIX System Laboratories, Inc. | |
| 9 | * | |
| 10 | * Redistribution and use in source and binary forms, with or without | |
| 11 | * modification, are permitted provided that the following conditions | |
| 12 | * are met: | |
| 13 | * 1. Redistributions of source code must retain the above copyright | |
| 14 | * notice, this list of conditions and the following disclaimer. | |
| 15 | * 2. Redistributions in binary form must reproduce the above copyright | |
| 16 | * notice, this list of conditions and the following disclaimer in the | |
| 17 | * documentation and/or other materials provided with the distribution. | |
| 18 | * 3. All advertising materials mentioning features or use of this software | |
| 19 | * must display the following acknowledgement: | |
| 20 | * This product includes software developed by the University of | |
| 21 | * California, Berkeley and its contributors. | |
| 22 | * 4. Neither the name of the University nor the names of its contributors | |
| 23 | * may be used to endorse or promote products derived from this software | |
| 24 | * without specific prior written permission. | |
| 25 | * | |
| 26 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
| 27 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
| 28 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
| 29 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
| 30 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
| 31 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
| 32 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
| 33 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
| 34 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
| 35 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
| 36 | * SUCH DAMAGE. | |
| 37 | * | |
| 38 | * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95 | |
| 39 | * $FreeBSD: src/sys/kern/vfs_subr.c,v 1.249.2.30 2003/04/04 20:35:57 tegge Exp $ | |
| 5fd012e0 MD |
40 | */ |
| 41 | ||
| 42 | /* | |
| 43 | * External virtual filesystem routines | |
| 44 | */ | |
| 45 | #include "opt_ddb.h" | |
| 46 | ||
| 47 | #include <sys/param.h> | |
| 48 | #include <sys/systm.h> | |
| 49 | #include <sys/buf.h> | |
| 50 | #include <sys/conf.h> | |
| 51 | #include <sys/dirent.h> | |
| 52 | #include <sys/domain.h> | |
| 53 | #include <sys/eventhandler.h> | |
| 54 | #include <sys/fcntl.h> | |
| 55 | #include <sys/kernel.h> | |
| 56 | #include <sys/kthread.h> | |
| 57 | #include <sys/malloc.h> | |
| 58 | #include <sys/mbuf.h> | |
| 59 | #include <sys/mount.h> | |
| 60 | #include <sys/proc.h> | |
| 61 | #include <sys/namei.h> | |
| 62 | #include <sys/reboot.h> | |
| 63 | #include <sys/socket.h> | |
| 64 | #include <sys/stat.h> | |
| 65 | #include <sys/sysctl.h> | |
| 66 | #include <sys/syslog.h> | |
| 67 | #include <sys/vmmeter.h> | |
| 68 | #include <sys/vnode.h> | |
| 69 | ||
| 70 | #include <machine/limits.h> | |
| 71 | ||
| 72 | #include <vm/vm.h> | |
| 73 | #include <vm/vm_object.h> | |
| 74 | #include <vm/vm_extern.h> | |
| 75 | #include <vm/vm_kern.h> | |
| 76 | #include <vm/pmap.h> | |
| 77 | #include <vm/vm_map.h> | |
| 78 | #include <vm/vm_page.h> | |
| 79 | #include <vm/vm_pager.h> | |
| 80 | #include <vm/vnode_pager.h> | |
| 81 | ||
| 82 | #include <sys/buf2.h> | |
| 83 | #include <sys/thread2.h> | |
| 84 | ||
| 85 | /* | |
| 86 | * The workitem queue. | |
| 87 | */ | |
| 88 | #define SYNCER_MAXDELAY 32 | |
| 89 | static int syncer_maxdelay = SYNCER_MAXDELAY; /* maximum delay time */ | |
| 90 | time_t syncdelay = 30; /* max time to delay syncing data */ | |
| 91 | SYSCTL_INT(_kern, OID_AUTO, syncdelay, CTLFLAG_RW, | |
| 92 | &syncdelay, 0, "VFS data synchronization delay"); | |
| 93 | time_t filedelay = 30; /* time to delay syncing files */ | |
| 94 | SYSCTL_INT(_kern, OID_AUTO, filedelay, CTLFLAG_RW, | |
| 95 | &filedelay, 0, "File synchronization delay"); | |
| 96 | time_t dirdelay = 29; /* time to delay syncing directories */ | |
| 97 | SYSCTL_INT(_kern, OID_AUTO, dirdelay, CTLFLAG_RW, | |
| 98 | &dirdelay, 0, "Directory synchronization delay"); | |
| 99 | time_t metadelay = 28; /* time to delay syncing metadata */ | |
| 100 | SYSCTL_INT(_kern, OID_AUTO, metadelay, CTLFLAG_RW, | |
| 101 | &metadelay, 0, "VFS metadata synchronization delay"); | |
| 102 | static int rushjob; /* number of slots to run ASAP */ | |
| 103 | static int stat_rush_requests; /* number of times I/O speeded up */ | |
| 104 | SYSCTL_INT(_debug, OID_AUTO, rush_requests, CTLFLAG_RW, | |
| 105 | &stat_rush_requests, 0, ""); | |
| 106 | ||
| 5fd012e0 | 107 | LIST_HEAD(synclist, vnode); |
| 50e4012a VS |
108 | |
| 109 | #define SC_FLAG_EXIT (0x1) /* request syncer exit */ | |
| 110 | #define SC_FLAG_DONE (0x2) /* syncer confirm exit */ | |
| 111 | #define SC_FLAG_BIOOPS_ALL (0x4) /* do bufops_sync(NULL) */ | |
| 112 | ||
| 113 | struct syncer_ctx { | |
| 114 | struct mount *sc_mp; | |
| 115 | struct lwkt_token sc_token; | |
| 116 | struct thread *sc_thread; | |
| 117 | int sc_flags; | |
| 118 | ||
| 119 | struct synclist *syncer_workitem_pending; | |
| 120 | long syncer_mask; | |
| 121 | int syncer_delayno; | |
| 122 | }; | |
| 123 | ||
| 124 | static struct syncer_ctx syncer_ctx0; | |
| 125 | ||
| 126 | static void syncer_thread(void *); | |
| 127 | ||
| 128 | static void | |
| 129 | syncer_ctx_init(struct syncer_ctx *ctx, struct mount *mp) | |
| 130 | { | |
| 131 | ctx->sc_mp = mp; | |
| 132 | lwkt_token_init(&ctx->sc_token, "syncer"); | |
| 133 | ctx->sc_flags = 0; | |
| 134 | ||
| 135 | ctx->syncer_workitem_pending = hashinit(syncer_maxdelay, M_DEVBUF, | |
| 136 | &ctx->syncer_mask); | |
| 137 | ctx->syncer_delayno = 0; | |
| 138 | } | |
| 5fd012e0 MD |
139 | |
| 140 | /* | |
| 141 | * Called from vfsinit() | |
| 142 | */ | |
| 143 | void | |
| 144 | vfs_sync_init(void) | |
| 145 | { | |
| 50e4012a VS |
146 | syncer_ctx_init(&syncer_ctx0, NULL); |
| 147 | syncer_maxdelay = syncer_ctx0.syncer_mask + 1; | |
| 148 | syncer_ctx0.sc_flags |= SC_FLAG_BIOOPS_ALL; | |
| 149 | ||
| 150 | /* Support schedcpu wakeup of syncer0 */ | |
| 151 | lbolt_syncer = &syncer_ctx0; | |
| 152 | } | |
| 153 | ||
| 154 | static struct syncer_ctx * | |
| 155 | vn_get_syncer(struct vnode *vp) { | |
| 156 | struct mount *mp; | |
| 157 | struct syncer_ctx *ctx; | |
| 158 | ||
| 159 | ctx = NULL; | |
| 160 | mp = vp->v_mount; | |
| 161 | if (mp) | |
| 162 | ctx = mp->mnt_syncer_ctx; | |
| 163 | if (ctx == NULL) | |
| 164 | ctx = &syncer_ctx0; | |
| 165 | ||
| 166 | return (ctx); | |
| 5fd012e0 MD |
167 | } |
| 168 | ||
| 169 | /* | |
| 170 | * The workitem queue. | |
| 171 | * | |
| 172 | * It is useful to delay writes of file data and filesystem metadata | |
| 173 | * for tens of seconds so that quickly created and deleted files need | |
| 174 | * not waste disk bandwidth being created and removed. To realize this, | |
| 175 | * we append vnodes to a "workitem" queue. When running with a soft | |
| 176 | * updates implementation, most pending metadata dependencies should | |
| 177 | * not wait for more than a few seconds. Thus, mounted on block devices | |
| 178 | * are delayed only about a half the time that file data is delayed. | |
| 179 | * Similarly, directory updates are more critical, so are only delayed | |
| 180 | * about a third the time that file data is delayed. Thus, there are | |
| 181 | * SYNCER_MAXDELAY queues that are processed round-robin at a rate of | |
| 182 | * one each second (driven off the filesystem syncer process). The | |
| 183 | * syncer_delayno variable indicates the next queue that is to be processed. | |
| 184 | * Items that need to be processed soon are placed in this queue: | |
| 185 | * | |
| 186 | * syncer_workitem_pending[syncer_delayno] | |
| 187 | * | |
| 188 | * A delay of fifteen seconds is done by placing the request fifteen | |
| 189 | * entries later in the queue: | |
| 190 | * | |
| 191 | * syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask] | |
| 192 | * | |
| 193 | */ | |
| 194 | ||
| 195 | /* | |
| 196 | * Add an item to the syncer work queue. | |
| b1c20cfa | 197 | * |
| 77912481 MD |
198 | * WARNING: Cannot get vp->v_token here if not already held, we must |
| 199 | * depend on the syncer_token (which might already be held by | |
| 200 | * the caller) to protect v_synclist and VONWORKLST. | |
| 201 | * | |
| b1c20cfa | 202 | * MPSAFE |
| 5fd012e0 MD |
203 | */ |
| 204 | void | |
| 77912481 | 205 | vn_syncer_add(struct vnode *vp, int delay) |
| 5fd012e0 | 206 | { |
| 50e4012a | 207 | struct syncer_ctx *ctx; |
| 5fd012e0 MD |
208 | int slot; |
| 209 | ||
| 50e4012a VS |
210 | ctx = vn_get_syncer(vp); |
| 211 | ||
| 212 | lwkt_gettoken(&ctx->sc_token); | |
| 5fd012e0 | 213 | |
| 0202303b | 214 | if (vp->v_flag & VONWORKLST) |
| 5fd012e0 | 215 | LIST_REMOVE(vp, v_synclist); |
| 5fd012e0 MD |
216 | if (delay > syncer_maxdelay - 2) |
| 217 | delay = syncer_maxdelay - 2; | |
| 50e4012a | 218 | slot = (ctx->syncer_delayno + delay) & ctx->syncer_mask; |
| 5fd012e0 | 219 | |
| 50e4012a | 220 | LIST_INSERT_HEAD(&ctx->syncer_workitem_pending[slot], vp, v_synclist); |
| 2247fe02 | 221 | vsetflags(vp, VONWORKLST); |
| 0202303b | 222 | |
| 50e4012a | 223 | lwkt_reltoken(&ctx->sc_token); |
| 5fd012e0 MD |
224 | } |
| 225 | ||
| 77912481 MD |
226 | /* |
| 227 | * Removes the vnode from the syncer list. Since we might block while | |
| 228 | * acquiring the syncer_token we have to recheck conditions. | |
| 229 | * | |
| 230 | * vp->v_token held on call | |
| 231 | */ | |
| 232 | void | |
| 233 | vn_syncer_remove(struct vnode *vp) | |
| 234 | { | |
| 50e4012a VS |
235 | struct syncer_ctx *ctx; |
| 236 | ||
| 237 | ctx = vn_get_syncer(vp); | |
| 238 | ||
| 239 | lwkt_gettoken(&ctx->sc_token); | |
| 77912481 MD |
240 | |
| 241 | if ((vp->v_flag & VONWORKLST) && RB_EMPTY(&vp->v_rbdirty_tree)) { | |
| 242 | vclrflags(vp, VONWORKLST); | |
| 243 | LIST_REMOVE(vp, v_synclist); | |
| 244 | } | |
| 245 | ||
| 50e4012a VS |
246 | lwkt_reltoken(&ctx->sc_token); |
| 247 | } | |
| 248 | ||
| 249 | /* | |
| 250 | * Create per-filesystem syncer process | |
| 251 | */ | |
| 252 | void | |
| 253 | vn_syncer_thr_create(struct mount *mp) | |
| 254 | { | |
| 255 | struct syncer_ctx *ctx; | |
| 256 | static int syncalloc = 0; | |
| 257 | int rc; | |
| 258 | ||
| 259 | ctx = kmalloc(sizeof(struct syncer_ctx), M_TEMP, M_WAITOK); | |
| 260 | ||
| 261 | syncer_ctx_init(ctx, mp); | |
| 262 | mp->mnt_syncer_ctx = ctx; | |
| 263 | ||
| 264 | rc = kthread_create(syncer_thread, ctx, &ctx->sc_thread, | |
| 265 | "syncer%d", ++syncalloc); | |
| 266 | } | |
| 267 | ||
| 268 | /* | |
| 269 | * Stop per-filesystem syncer process | |
| 270 | */ | |
| 271 | void | |
| 272 | vn_syncer_thr_stop(struct mount *mp) | |
| 273 | { | |
| 274 | struct syncer_ctx *ctx; | |
| 275 | ||
| 276 | ctx = mp->mnt_syncer_ctx; | |
| 277 | ||
| 278 | lwkt_gettoken(&ctx->sc_token); | |
| 279 | ||
| 280 | /* Signal the syncer process to exit */ | |
| 281 | ctx->sc_flags |= SC_FLAG_EXIT; | |
| 282 | wakeup(ctx); | |
| 283 | ||
| 284 | /* Wait till syncer process exits */ | |
| 285 | while ((ctx->sc_flags & SC_FLAG_DONE) == 0) | |
| 286 | tsleep(&ctx->sc_flags, 0, "syncexit", hz); | |
| 287 | ||
| 288 | mp->mnt_syncer_ctx = NULL; | |
| 289 | lwkt_reltoken(&ctx->sc_token); | |
| 290 | ||
| 291 | kfree(ctx->syncer_workitem_pending, M_DEVBUF); | |
| 292 | kfree(ctx, M_TEMP); | |
| 77912481 MD |
293 | } |
| 294 | ||
| 5fd012e0 | 295 | struct thread *updatethread; |
| 5fd012e0 MD |
296 | |
| 297 | /* | |
| 298 | * System filesystem synchronizer daemon. | |
| 299 | */ | |
| cd8ab232 | 300 | static void |
| 50e4012a | 301 | syncer_thread(void *_ctx) |
| 5fd012e0 | 302 | { |
| 0202303b | 303 | struct thread *td = curthread; |
| 50e4012a | 304 | struct syncer_ctx *ctx = _ctx; |
| 5fd012e0 MD |
305 | struct synclist *slp; |
| 306 | struct vnode *vp; | |
| 307 | long starttime; | |
| 50e4012a VS |
308 | int *sc_flagsp; |
| 309 | int sc_flags; | |
| 310 | int vnodes_synced = 0; | |
| 5fd012e0 | 311 | |
| 50e4012a VS |
312 | /* |
| 313 | * syncer0 runs till system shutdown; per-filesystem syncers are | |
| 314 | * terminated on filesystem unmount | |
| 315 | */ | |
| 316 | if (ctx == &syncer_ctx0) | |
| 317 | EVENTHANDLER_REGISTER(shutdown_pre_sync, shutdown_kproc, td, | |
| 318 | SHUTDOWN_PRI_LAST); | |
| 5fd012e0 MD |
319 | for (;;) { |
| 320 | kproc_suspend_loop(); | |
| 321 | ||
| 322 | starttime = time_second; | |
| 50e4012a | 323 | lwkt_gettoken(&ctx->sc_token); |
| 5fd012e0 MD |
324 | |
| 325 | /* | |
| 326 | * Push files whose dirty time has expired. Be careful | |
| 327 | * of interrupt race on slp queue. | |
| 328 | */ | |
| 50e4012a VS |
329 | slp = &ctx->syncer_workitem_pending[ctx->syncer_delayno]; |
| 330 | ctx->syncer_delayno += 1; | |
| 331 | if (ctx->syncer_delayno == syncer_maxdelay) | |
| 332 | ctx->syncer_delayno = 0; | |
| 5fd012e0 MD |
333 | |
| 334 | while ((vp = LIST_FIRST(slp)) != NULL) { | |
| 87de5057 | 335 | if (vget(vp, LK_EXCLUSIVE | LK_NOWAIT) == 0) { |
| 52174f71 | 336 | VOP_FSYNC(vp, MNT_LAZY, 0); |
| 0e0b6202 | 337 | vput(vp); |
| 50e4012a | 338 | vnodes_synced++; |
| 5fd012e0 | 339 | } |
| 6bae6177 MD |
340 | |
| 341 | /* | |
| 77912481 MD |
342 | * vp is stale but can still be used if we can |
| 343 | * verify that it remains at the head of the list. | |
| 344 | * Be careful not to try to get vp->v_token as | |
| 345 | * vp can become stale if this blocks. | |
| 346 | * | |
| 347 | * If the vp is still at the head of the list were | |
| 348 | * unable to completely flush it and move it to | |
| 349 | * a later slot to give other vnodes a fair shot. | |
| 6bae6177 MD |
350 | * |
| 351 | * Note that v_tag VT_VFS vnodes can remain on the | |
| 352 | * worklist with no dirty blocks, but sync_fsync() | |
| 353 | * moves it to a later slot so we will never see it | |
| 354 | * here. | |
| 77912481 MD |
355 | * |
| 356 | * It is possible to race a vnode with no dirty | |
| 357 | * buffers being removed from the list. If this | |
| 358 | * occurs we will move the vnode in the synclist | |
| 359 | * and then the other thread will remove it. Do | |
| 360 | * not try to remove it here. | |
| 6bae6177 | 361 | */ |
| 77912481 MD |
362 | if (LIST_FIRST(slp) == vp) |
| 363 | vn_syncer_add(vp, syncdelay); | |
| 5fd012e0 | 364 | } |
| 50e4012a VS |
365 | |
| 366 | sc_flags = ctx->sc_flags; | |
| 367 | ||
| 368 | /* Exit on unmount */ | |
| 369 | if (sc_flags & SC_FLAG_EXIT) | |
| 370 | break; | |
| 371 | ||
| 372 | lwkt_reltoken(&ctx->sc_token); | |
| 5fd012e0 MD |
373 | |
| 374 | /* | |
| 408357d8 | 375 | * Do sync processing for each mount. |
| 5fd012e0 | 376 | */ |
| 50e4012a VS |
377 | if (ctx->sc_mp || sc_flags & SC_FLAG_BIOOPS_ALL) |
| 378 | bio_ops_sync(ctx->sc_mp); | |
| 5fd012e0 MD |
379 | |
| 380 | /* | |
| 381 | * The variable rushjob allows the kernel to speed up the | |
| 382 | * processing of the filesystem syncer process. A rushjob | |
| 383 | * value of N tells the filesystem syncer to process the next | |
| 384 | * N seconds worth of work on its queue ASAP. Currently rushjob | |
| 385 | * is used by the soft update code to speed up the filesystem | |
| 386 | * syncer process when the incore state is getting so far | |
| 387 | * ahead of the disk that the kernel memory pool is being | |
| 388 | * threatened with exhaustion. | |
| 389 | */ | |
| 50e4012a | 390 | if (ctx == &syncer_ctx0 && rushjob > 0) { |
| 145eb524 | 391 | atomic_subtract_int(&rushjob, 1); |
| 5fd012e0 MD |
392 | continue; |
| 393 | } | |
| 394 | /* | |
| 395 | * If it has taken us less than a second to process the | |
| 396 | * current work, then wait. Otherwise start right over | |
| 397 | * again. We can still lose time if any single round | |
| 398 | * takes more than two seconds, but it does not really | |
| 399 | * matter as we are just trying to generally pace the | |
| 400 | * filesystem activity. | |
| 401 | */ | |
| 402 | if (time_second == starttime) | |
| 50e4012a | 403 | tsleep(ctx, 0, "syncer", hz); |
| 5fd012e0 | 404 | } |
| 50e4012a VS |
405 | |
| 406 | /* | |
| 407 | * Unmount/exit path for per-filesystem syncers; sc_token held | |
| 408 | */ | |
| 409 | ctx->sc_flags |= SC_FLAG_DONE; | |
| 410 | sc_flagsp = &ctx->sc_flags; | |
| 411 | lwkt_reltoken(&ctx->sc_token); | |
| 412 | wakeup(sc_flagsp); | |
| 413 | ||
| 414 | kthread_exit(); | |
| 415 | } | |
| 416 | ||
| 417 | static void | |
| 418 | syncer_thread_start(void) { | |
| 419 | syncer_thread(&syncer_ctx0); | |
| 5fd012e0 MD |
420 | } |
| 421 | ||
| cd8ab232 | 422 | static struct kproc_desc up_kp = { |
| 50e4012a VS |
423 | "syncer0", |
| 424 | syncer_thread_start, | |
| cd8ab232 MD |
425 | &updatethread |
| 426 | }; | |
| 427 | SYSINIT(syncer, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp) | |
| 428 | ||
| 5fd012e0 MD |
429 | /* |
| 430 | * Request the syncer daemon to speed up its work. | |
| 431 | * We never push it to speed up more than half of its | |
| 432 | * normal turn time, otherwise it could take over the cpu. | |
| 5fd012e0 MD |
433 | */ |
| 434 | int | |
| 435 | speedup_syncer(void) | |
| 436 | { | |
| 344ad853 MD |
437 | /* |
| 438 | * Don't bother protecting the test. unsleep_and_wakeup_thread() | |
| 439 | * will only do something real if the thread is in the right state. | |
| 440 | */ | |
| 50e4012a | 441 | wakeup(lbolt_syncer); |
| 5fd012e0 | 442 | if (rushjob < syncdelay / 2) { |
| 145eb524 | 443 | atomic_add_int(&rushjob, 1); |
| 5fd012e0 MD |
444 | stat_rush_requests += 1; |
| 445 | return (1); | |
| 446 | } | |
| 447 | return(0); | |
| 448 | } | |
| 449 | ||
| 450 | /* | |
| 451 | * Routine to create and manage a filesystem syncer vnode. | |
| 452 | */ | |
| fef8985e MD |
453 | static int sync_close(struct vop_close_args *); |
| 454 | static int sync_fsync(struct vop_fsync_args *); | |
| 455 | static int sync_inactive(struct vop_inactive_args *); | |
| 456 | static int sync_reclaim (struct vop_reclaim_args *); | |
| 457 | static int sync_print(struct vop_print_args *); | |
| 5fd012e0 | 458 | |
| 66a1ddf5 MD |
459 | static struct vop_ops sync_vnode_vops = { |
| 460 | .vop_default = vop_eopnotsupp, | |
| 461 | .vop_close = sync_close, | |
| 462 | .vop_fsync = sync_fsync, | |
| 463 | .vop_inactive = sync_inactive, | |
| 464 | .vop_reclaim = sync_reclaim, | |
| 66a1ddf5 | 465 | .vop_print = sync_print, |
| 5fd012e0 MD |
466 | }; |
| 467 | ||
| 66a1ddf5 | 468 | static struct vop_ops *sync_vnode_vops_p = &sync_vnode_vops; |
| 5fd012e0 | 469 | |
| 66a1ddf5 | 470 | VNODEOP_SET(sync_vnode_vops); |
| 5fd012e0 MD |
471 | |
| 472 | /* | |
| 473 | * Create a new filesystem syncer vnode for the specified mount point. | |
| 474 | * This vnode is placed on the worklist and is responsible for sync'ing | |
| 475 | * the filesystem. | |
| 476 | * | |
| 477 | * NOTE: read-only mounts are also placed on the worklist. The filesystem | |
| 478 | * sync code is also responsible for cleaning up vnodes. | |
| 479 | */ | |
| 480 | int | |
| 481 | vfs_allocate_syncvnode(struct mount *mp) | |
| 482 | { | |
| 483 | struct vnode *vp; | |
| 484 | static long start, incr, next; | |
| 485 | int error; | |
| 486 | ||
| 487 | /* Allocate a new vnode */ | |
| 66a1ddf5 | 488 | error = getspecialvnode(VT_VFS, mp, &sync_vnode_vops_p, &vp, 0, 0); |
| 5fd012e0 MD |
489 | if (error) { |
| 490 | mp->mnt_syncer = NULL; | |
| 491 | return (error); | |
| 492 | } | |
| 493 | vp->v_type = VNON; | |
| 494 | /* | |
| 495 | * Place the vnode onto the syncer worklist. We attempt to | |
| 496 | * scatter them about on the list so that they will go off | |
| 497 | * at evenly distributed times even if all the filesystems | |
| 498 | * are mounted at once. | |
| 499 | */ | |
| 500 | next += incr; | |
| 501 | if (next == 0 || next > syncer_maxdelay) { | |
| 502 | start /= 2; | |
| 503 | incr /= 2; | |
| 504 | if (start == 0) { | |
| 505 | start = syncer_maxdelay / 2; | |
| 506 | incr = syncer_maxdelay; | |
| 507 | } | |
| 508 | next = start; | |
| 509 | } | |
| 77912481 | 510 | vn_syncer_add(vp, syncdelay > 0 ? next % syncdelay : 0); |
| 2ec4b00d MD |
511 | |
| 512 | /* | |
| 513 | * The mnt_syncer field inherits the vnode reference, which is | |
| 514 | * held until later decomissioning. | |
| 515 | */ | |
| 5fd012e0 MD |
516 | mp->mnt_syncer = vp; |
| 517 | vx_unlock(vp); | |
| 518 | return (0); | |
| fef8985e MD |
519 | } |
| 520 | ||
| 521 | static int | |
| 522 | sync_close(struct vop_close_args *ap) | |
| 523 | { | |
| 524 | return (0); | |
| 5fd012e0 MD |
525 | } |
| 526 | ||
| 527 | /* | |
| 528 | * Do a lazy sync of the filesystem. | |
| 529 | * | |
| b478fdce | 530 | * sync_fsync { struct vnode *a_vp, int a_waitfor } |
| 5fd012e0 MD |
531 | */ |
| 532 | static int | |
| 533 | sync_fsync(struct vop_fsync_args *ap) | |
| 534 | { | |
| 535 | struct vnode *syncvp = ap->a_vp; | |
| 536 | struct mount *mp = syncvp->v_mount; | |
| 5fd012e0 MD |
537 | int asyncflag; |
| 538 | ||
| 539 | /* | |
| 540 | * We only need to do something if this is a lazy evaluation. | |
| 541 | */ | |
| 28271622 | 542 | if ((ap->a_waitfor & MNT_LAZY) == 0) |
| 5fd012e0 MD |
543 | return (0); |
| 544 | ||
| 545 | /* | |
| 546 | * Move ourselves to the back of the sync list. | |
| 547 | */ | |
| 77912481 | 548 | vn_syncer_add(syncvp, syncdelay); |
| 5fd012e0 MD |
549 | |
| 550 | /* | |
| 551 | * Walk the list of vnodes pushing all that are dirty and | |
| 552 | * not already on the sync list, and freeing vnodes which have | |
| 553 | * no refs and whos VM objects are empty. vfs_msync() handles | |
| 554 | * the VM issues and must be called whether the mount is readonly | |
| 555 | * or not. | |
| 556 | */ | |
| f9642f56 | 557 | if (vfs_busy(mp, LK_NOWAIT) != 0) |
| 5fd012e0 | 558 | return (0); |
| 5fd012e0 MD |
559 | if (mp->mnt_flag & MNT_RDONLY) { |
| 560 | vfs_msync(mp, MNT_NOWAIT); | |
| 561 | } else { | |
| 562 | asyncflag = mp->mnt_flag & MNT_ASYNC; | |
| 563 | mp->mnt_flag &= ~MNT_ASYNC; /* ZZZ hack */ | |
| 564 | vfs_msync(mp, MNT_NOWAIT); | |
| 28271622 | 565 | VFS_SYNC(mp, MNT_NOWAIT | MNT_LAZY); |
| 5fd012e0 MD |
566 | if (asyncflag) |
| 567 | mp->mnt_flag |= MNT_ASYNC; | |
| 568 | } | |
| f9642f56 | 569 | vfs_unbusy(mp); |
| 5fd012e0 MD |
570 | return (0); |
| 571 | } | |
| 572 | ||
| 573 | /* | |
| 3c37c940 | 574 | * The syncer vnode is no longer referenced. |
| 5fd012e0 MD |
575 | * |
| 576 | * sync_inactive { struct vnode *a_vp, struct proc *a_p } | |
| 577 | */ | |
| 578 | static int | |
| 579 | sync_inactive(struct vop_inactive_args *ap) | |
| 580 | { | |
| 3c37c940 | 581 | vgone_vxlocked(ap->a_vp); |
| 5fd012e0 MD |
582 | return (0); |
| 583 | } | |
| 584 | ||
| 585 | /* | |
| 586 | * The syncer vnode is no longer needed and is being decommissioned. | |
| 2ec4b00d MD |
587 | * This can only occur when the last reference has been released on |
| 588 | * mp->mnt_syncer, so mp->mnt_syncer had better be NULL. | |
| 5fd012e0 | 589 | * |
| e43a034f MD |
590 | * Modifications to the worklist must be protected with a critical |
| 591 | * section. | |
| 5fd012e0 MD |
592 | * |
| 593 | * sync_reclaim { struct vnode *a_vp } | |
| 594 | */ | |
| 595 | static int | |
| 596 | sync_reclaim(struct vop_reclaim_args *ap) | |
| 597 | { | |
| 598 | struct vnode *vp = ap->a_vp; | |
| 50e4012a VS |
599 | struct syncer_ctx *ctx; |
| 600 | ||
| 601 | ctx = vn_get_syncer(vp); | |
| 5fd012e0 | 602 | |
| 50e4012a | 603 | lwkt_gettoken(&ctx->sc_token); |
| 2ec4b00d | 604 | KKASSERT(vp->v_mount->mnt_syncer != vp); |
| 5fd012e0 MD |
605 | if (vp->v_flag & VONWORKLST) { |
| 606 | LIST_REMOVE(vp, v_synclist); | |
| 2247fe02 | 607 | vclrflags(vp, VONWORKLST); |
| 5fd012e0 | 608 | } |
| 50e4012a | 609 | lwkt_reltoken(&ctx->sc_token); |
| 5fd012e0 MD |
610 | |
| 611 | return (0); | |
| 612 | } | |
| 613 | ||
| 614 | /* | |
| 615 | * Print out a syncer vnode. | |
| 616 | * | |
| 617 | * sync_print { struct vnode *a_vp } | |
| 618 | */ | |
| 619 | static int | |
| 620 | sync_print(struct vop_print_args *ap) | |
| 621 | { | |
| 622 | struct vnode *vp = ap->a_vp; | |
| 623 | ||
| 6ea70f76 | 624 | kprintf("syncer vnode"); |
| 5fd012e0 | 625 | lockmgr_printinfo(&vp->v_lock); |
| 6ea70f76 | 626 | kprintf("\n"); |
| 5fd012e0 MD |
627 | return (0); |
| 628 | } | |
| 629 |