2 * Copyright (c) 2007 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * $DragonFly: src/sys/vfs/hammer/hammer_io.c,v 1.6 2007/12/14 08:05:39 dillon Exp $
37 * IO Primitives and buffer cache management
39 * All major data-tracking structures in HAMMER contain a struct hammer_io
40 * which is used to manage their backing store. We use filesystem buffers
41 * for backing store and we leave them passively associated with their
44 * If the kernel tries to release a passively associated buf which we cannot
45 * yet let go we set B_LOCKED in the buffer and then actively released it
50 #include <sys/fcntl.h>
51 #include <sys/nlookup.h>
56 * Helper routine to disassociate a buffer cache buffer from an I/O
60 hammer_io_disassociate(union hammer_io_structure *io)
62 struct buf *bp = io->io.bp;
64 LIST_INIT(&bp->b_dep); /* clear the association */
69 case HAMMER_STRUCTURE_VOLUME:
70 io->volume.ondisk = NULL;
71 io->volume.alist.meta = NULL;
73 case HAMMER_STRUCTURE_SUPERCL:
74 io->supercl.ondisk = NULL;
75 io->supercl.alist.meta = NULL;
77 case HAMMER_STRUCTURE_CLUSTER:
78 io->cluster.ondisk = NULL;
79 io->cluster.alist_master.meta = NULL;
80 io->cluster.alist_btree.meta = NULL;
81 io->cluster.alist_record.meta = NULL;
82 io->cluster.alist_mdata.meta = NULL;
84 case HAMMER_STRUCTURE_BUFFER:
85 io->buffer.ondisk = NULL;
86 io->buffer.alist.meta = NULL;
94 * Mark a cluster as being closed. This is done as late as possible,
95 * only when we are asked to flush the cluster
98 hammer_close_cluster(hammer_cluster_t cluster)
100 while (cluster->state == HAMMER_CLUSTER_ASYNC)
101 tsleep(cluster, 0, "hmrdep", 0);
102 if (cluster->state == HAMMER_CLUSTER_OPEN) {
103 cluster->state = HAMMER_CLUSTER_IDLE;
104 cluster->ondisk->clu_flags &= ~HAMMER_CLUF_OPEN;
105 kprintf("CLOSE CLUSTER\n");
106 hammer_modify_cluster(cluster);
112 * Load bp for a HAMMER structure.
115 hammer_io_read(struct vnode *devvp, struct hammer_io *io)
120 if ((bp = io->bp) == NULL) {
121 error = bread(devvp, io->offset, HAMMER_BUFSIZE, &io->bp);
124 bp->b_ops = &hammer_bioops;
125 LIST_INSERT_HEAD(&bp->b_dep, &io->worklist, node);
128 io->modified = 0; /* no new modifications yet */
129 io->released = 0; /* we hold an active lock on bp */
137 * Similar to hammer_io_read() but returns a zero'd out buffer instead.
138 * vfs_bio_clrbuf() is kinda nasty, enforce serialization against background
139 * I/O so we can call it.
142 hammer_io_new(struct vnode *devvp, struct hammer_io *io)
146 if ((bp = io->bp) == NULL) {
147 io->bp = getblk(devvp, io->offset, HAMMER_BUFSIZE, 0, 0);
149 bp->b_ops = &hammer_bioops;
150 LIST_INSERT_HEAD(&bp->b_dep, &io->worklist, node);
151 io->released = 0; /* we hold an active lock on bp */
166 * This routine is called when a buffer within a cluster is modified. We
167 * mark the cluster open and immediately initiate asynchronous I/O. Any
168 * related hammer_buffer write I/O blocks until our async write completes.
169 * This guarentees (inasmuch as the OS can) that the cluster recovery code
170 * will see a cluster marked open if a crash occured while the filesystem
171 * still had dirty buffers associated with that cluster.
174 hammer_io_notify_cluster(hammer_cluster_t cluster)
176 struct hammer_io *io = &cluster->io;
178 if (cluster->state == HAMMER_CLUSTER_IDLE) {
179 hammer_lock_ex(&cluster->io.lock);
180 if (cluster->state == HAMMER_CLUSTER_IDLE) {
183 kprintf("MARK CLUSTER OPEN\n");
184 cluster->ondisk->clu_flags |= HAMMER_CLUF_OPEN;
185 cluster->state = HAMMER_CLUSTER_ASYNC;
186 hammer_modify_cluster(cluster);
189 /* leave cluster marked as modified */
191 hammer_unlock(&cluster->io.lock);
196 * This routine is called on the last reference to a hammer structure. If
197 * flush is non-zero we have to completely disassociate the bp from the
198 * structure (which may involve blocking). Otherwise we can leave the bp
199 * passively associated with the structure.
201 * The caller is holding io->lock exclusively.
204 hammer_io_release(struct hammer_io *io, int flush)
206 union hammer_io_structure *iou = (void *)io;
207 hammer_cluster_t cluster;
210 if ((bp = io->bp) != NULL) {
212 * If neither we nor the kernel want to flush the bp, we can
213 * stop here. Make sure the bp is passively released
214 * before returning. Even though we are still holding it,
215 * we want to be notified when the kernel wishes to flush
216 * it out so make sure B_DELWRI is properly set if we had
217 * made modifications.
219 if (flush == 0 && (bp->b_flags & B_LOCKED) == 0) {
220 if ((bp->b_flags & B_DELWRI) == 0 && io->modified) {
225 } else if (io->released == 0) {
233 * We've been asked to flush the buffer.
235 * If this is a hammer_buffer we may have to wait for the
236 * cluster header write to complete.
238 if (iou->io.type == HAMMER_STRUCTURE_BUFFER &&
239 (io->modified || (bp->b_flags & B_DELWRI))) {
240 cluster = iou->buffer.cluster;
241 while (cluster->state == HAMMER_CLUSTER_ASYNC)
242 tsleep(iou->buffer.cluster, 0, "hmrdep", 0);
246 * If we have an open cluster header, close it
248 if (iou->io.type == HAMMER_STRUCTURE_CLUSTER) {
249 hammer_close_cluster(&iou->cluster);
254 * Ok the dependancies are all gone. Check for the simple
255 * disassociation case.
257 if (io->released && (bp->b_flags & B_LOCKED) == 0 &&
258 (io->modified == 0 || (bp->b_flags & B_DELWRI))) {
259 hammer_io_disassociate(iou);
264 * Handle the more complex disassociation case. Acquire the
265 * buffer, clean up B_LOCKED, and deal with the modified
270 bp->b_flags &= ~B_LOCKED;
271 if (io->modified || (bp->b_flags & B_DELWRI))
276 hammer_io_disassociate(iou);
281 * Flush dirty data, if any.
284 hammer_io_flush(struct hammer_io *io, struct hammer_sync_info *info)
289 if ((bp = io->bp) == NULL)
291 if (bp->b_flags & B_DELWRI)
293 if (io->modified == 0)
295 kprintf("IO FLUSH BP %p TYPE %d REFS %d\n", bp, io->type, io->lock.refs);
296 hammer_lock_ex(&io->lock);
298 if ((bp = io->bp) != NULL && io->modified) {
306 if (info->waitfor & MNT_WAIT) {
311 } else if (io->lock.refs == 1) {
315 kprintf("can't flush, %d refs\n", io->lock.refs);
316 /* structure is in-use, don't race the write */
320 hammer_unlock(&io->lock);
329 * Pre and post I/O callbacks.
331 static void hammer_io_deallocate(struct buf *bp);
334 hammer_io_start(struct buf *bp)
337 union hammer_io_structure *io = (void *)LIST_FIRST(&bp->b_dep);
339 if (io->io.type == HAMMER_STRUCTURE_BUFFER) {
340 while (io->buffer.cluster->io_in_progress) {
341 kprintf("hammer_io_start: wait for cluster\n");
342 tsleep(io->buffer.cluster, 0, "hmrdep", 0);
343 kprintf("hammer_io_start: wait for cluster done\n");
350 hammer_io_complete(struct buf *bp)
352 union hammer_io_structure *io = (void *)LIST_FIRST(&bp->b_dep);
354 if (io->io.type == HAMMER_STRUCTURE_CLUSTER) {
355 if (io->cluster.state == HAMMER_CLUSTER_ASYNC) {
356 kprintf("cluster write complete flags %08x\n",
357 io->cluster.ondisk->clu_flags);
358 io->cluster.state = HAMMER_CLUSTER_OPEN;
359 wakeup(&io->cluster);
365 * Callback from kernel when it wishes to deallocate a passively
366 * associated structure. This can only occur if the buffer is
367 * passively associated with the structure. The kernel has locked
370 * If we cannot disassociate we set B_LOCKED to prevent the buffer
371 * from getting reused.
374 hammer_io_deallocate(struct buf *bp)
376 union hammer_io_structure *io = (void *)LIST_FIRST(&bp->b_dep);
378 /* XXX memory interlock, spinlock to sync cpus */
381 * Since the kernel is passing us a locked buffer, the HAMMER
382 * structure had better not believe it has a lock on the buffer.
384 KKASSERT(io->io.released);
388 * First, ref the structure to prevent either the buffer or the
389 * structure from going away or being unexpectedly flushed.
391 hammer_ref(&io->io.lock);
394 * Buffers can have active references from cached hammer_node's,
395 * even if those nodes are themselves passively cached. Attempt
396 * to clean them out. This may not succeed.
398 if (io->io.type == HAMMER_STRUCTURE_BUFFER &&
399 hammer_lock_ex_try(&io->io.lock) == 0) {
400 hammer_flush_buffer_nodes(&io->buffer);
401 hammer_unlock(&io->io.lock);
404 if (hammer_islastref(&io->io.lock)) {
406 * If we are the only ref left we can disassociate the I/O.
407 * It had better still be in a released state because the
408 * kernel is holding a lock on the buffer. Any passive
409 * modifications should have already been synchronized with
412 KKASSERT(io->io.released);
413 hammer_io_disassociate(io);
414 bp->b_flags &= ~B_LOCKED;
415 KKASSERT (io->io.modified == 0 || (bp->b_flags & B_DELWRI));
418 * Perform final rights on the structure. This can cause
419 * a chain reaction - e.g. last buffer -> last cluster ->
420 * last supercluster -> last volume.
422 switch(io->io.type) {
423 case HAMMER_STRUCTURE_VOLUME:
424 hammer_rel_volume(&io->volume, 1);
426 case HAMMER_STRUCTURE_SUPERCL:
427 hammer_rel_supercl(&io->supercl, 1);
429 case HAMMER_STRUCTURE_CLUSTER:
430 hammer_rel_cluster(&io->cluster, 1);
432 case HAMMER_STRUCTURE_BUFFER:
433 hammer_rel_buffer(&io->buffer, 1);
438 * Otherwise tell the kernel not to destroy the buffer.
440 * We have to unref the structure without performing any
441 * final rights to it to avoid a deadlock.
443 bp->b_flags |= B_LOCKED;
444 hammer_unref(&io->io.lock);
451 hammer_io_fsync(struct vnode *vp)
457 * NOTE: will not be called unless we tell the kernel about the
458 * bioops. Unused... we use the mount's VFS_SYNC instead.
461 hammer_io_sync(struct mount *mp)
467 hammer_io_movedeps(struct buf *bp1, struct buf *bp2)
472 * I/O pre-check for reading and writing. HAMMER only uses this for
473 * B_CACHE buffers so checkread just shouldn't happen, but if it does
476 * Writing is a different case. We don't want the kernel to try to write
477 * out a buffer that HAMMER may be modifying passively or which has a
480 * This code enforces the following write ordering: buffers, then cluster
481 * headers, then volume headers.
484 hammer_io_checkread(struct buf *bp)
490 hammer_io_checkwrite(struct buf *bp)
492 union hammer_io_structure *iou = (void *)LIST_FIRST(&bp->b_dep);
494 if (iou->io.type == HAMMER_STRUCTURE_BUFFER &&
495 iou->buffer.cluster->state == HAMMER_CLUSTER_ASYNC) {
497 * Cannot write out a cluster buffer if the cluster header
498 * I/O opening the cluster has not completed.
500 kprintf("hammer_io_checkwrite: w/ depend - delayed\n");
501 bp->b_flags |= B_LOCKED;
503 } else if (iou->io.lock.refs) {
505 * Cannot write out a bp if its associated buffer has active
508 kprintf("hammer_io_checkwrite: w/ refs - delayed\n");
509 bp->b_flags |= B_LOCKED;
513 * We're good, but before we can let the kernel proceed we
514 * may have to make some adjustments.
516 if (iou->io.type == HAMMER_STRUCTURE_CLUSTER)
517 hammer_close_cluster(&iou->cluster);
518 kprintf("hammer_io_checkwrite: ok\n");
519 KKASSERT(iou->io.released);
520 hammer_io_disassociate(iou);
526 * Return non-zero if the caller should flush the structure associated
527 * with this io sub-structure.
530 hammer_io_checkflush(struct hammer_io *io)
532 if (io->bp == NULL || (io->bp->b_flags & B_LOCKED))
538 * Return non-zero if we wish to delay the kernel's attempt to flush
539 * this buffer to disk.
542 hammer_io_countdeps(struct buf *bp, int n)
547 struct bio_ops hammer_bioops = {
548 .io_start = hammer_io_start,
549 .io_complete = hammer_io_complete,
550 .io_deallocate = hammer_io_deallocate,
551 .io_fsync = hammer_io_fsync,
552 .io_sync = hammer_io_sync,
553 .io_movedeps = hammer_io_movedeps,
554 .io_countdeps = hammer_io_countdeps,
555 .io_checkread = hammer_io_checkread,
556 .io_checkwrite = hammer_io_checkwrite,