HAMMER: Mass storage flush command support
authorMatthew Dillon <dillon@dragonflybsd.org>
Fri, 29 Aug 2008 20:19:08 +0000 (20:19 +0000)
committerMatthew Dillon <dillon@dragonflybsd.org>
Fri, 29 Aug 2008 20:19:08 +0000 (20:19 +0000)
Add mass storage flush command support to HAMMER.  The HAMMER flush cycle
issues async I/O in parallel and waits for it to complete before moving
onto the next stage.  This occurs in two places.  The new flush command
is issued after the async I/O, and then we wait on everything.

Due to HAMMER's ability to gang writes asynchronously between stages the
addition of the flush should not significantly impact performance.

sys/vfs/hammer/hammer.h
sys/vfs/hammer/hammer_disk.h
sys/vfs/hammer/hammer_io.c
sys/vfs/hammer/hammer_ondisk.c

index 6785345..bb1155f 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.126 2008/08/02 21:21:28 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.127 2008/08/29 20:19:08 dillon Exp $
  */
 /*
  * This header file contains structures used internally by the HAMMERFS
@@ -484,6 +484,7 @@ struct hammer_io {
        struct hammer_lock      lock;
        enum hammer_io_type     type;
        struct hammer_mount     *hmp;
+       struct hammer_volume    *volume;
        TAILQ_ENTRY(hammer_io)  mod_entry; /* list entry if modified */
        hammer_io_list_t        mod_list;
        struct buf              *bp;
@@ -541,7 +542,6 @@ struct hammer_buffer {
        struct hammer_io io;
        RB_ENTRY(hammer_buffer) rb_node;
        void *ondisk;
-       struct hammer_volume *volume;
        hammer_off_t zoneX_offset;
        hammer_off_t zone2_offset;
        struct hammer_reserve *resv;
@@ -1099,7 +1099,7 @@ void hammer_rel_pseudofs(hammer_mount_t hmp, hammer_pseudofs_inmem_t pfsm);
 int hammer_ioctl(hammer_inode_t ip, u_long com, caddr_t data, int fflag,
                        struct ucred *cred);
 
-void hammer_io_init(hammer_io_t io, hammer_mount_t hmp,
+void hammer_io_init(hammer_io_t io, hammer_volume_t volume,
                        enum hammer_io_type type);
 int hammer_io_read(struct vnode *devvp, struct hammer_io *io,
                        hammer_off_t limit);
@@ -1120,6 +1120,8 @@ void hammer_io_write_interlock(hammer_io_t io);
 void hammer_io_done_interlock(hammer_io_t io);
 void hammer_io_clear_modify(struct hammer_io *io, int inval);
 void hammer_io_clear_modlist(struct hammer_io *io);
+void hammer_io_flush_sync(hammer_mount_t hmp);
+
 void hammer_modify_volume(hammer_transaction_t trans, hammer_volume_t volume,
                        void *base, int len);
 void hammer_modify_buffer(hammer_transaction_t trans, hammer_buffer_t buffer,
index 43e9996..8c47bc9 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_disk.h,v 1.52 2008/07/31 04:42:04 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_disk.h,v 1.53 2008/08/29 20:19:08 dillon Exp $
  */
 
 #ifndef VFS_HAMMER_DISK_H_
@@ -518,6 +518,7 @@ typedef struct hammer_volume_ondisk *hammer_volume_ondisk_t;
 
 #define HAMMER_VOLF_VALID              0x0001  /* valid entry */
 #define HAMMER_VOLF_OPEN               0x0002  /* volume is open */
+#define HAMMER_VOLF_NEEDFLUSH          0x0004  /* volume needs flush */
 
 #define HAMMER_VOL_CRCSIZE1    \
        offsetof(struct hammer_volume_ondisk, vol_crc)
index 53e404a..2bc55b4 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_io.c,v 1.53 2008/08/06 15:38:58 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_io.c,v 1.54 2008/08/29 20:19:08 dillon Exp $
  */
 /*
  * IO Primitives and buffer cache management
@@ -60,15 +60,19 @@ static void hammer_io_direct_read_complete(struct bio *nbio);
 static void hammer_io_direct_write_complete(struct bio *nbio);
 static int hammer_io_direct_uncache_callback(hammer_inode_t ip, void *data);
 static void hammer_io_set_modlist(struct hammer_io *io);
+static void hammer_io_flush_mark(hammer_volume_t volume);
+static void hammer_io_flush_sync_done(struct bio *bio);
+
 
 /*
  * Initialize a new, already-zero'd hammer_io structure, or reinitialize
  * an existing hammer_io structure which may have switched to another type.
  */
 void
-hammer_io_init(hammer_io_t io, hammer_mount_t hmp, enum hammer_io_type type)
+hammer_io_init(hammer_io_t io, hammer_volume_t volume, enum hammer_io_type type)
 {
-       io->hmp = hmp;
+       io->volume = volume;
+       io->hmp = volume->io.hmp;
        io->type = type;
 }
 
@@ -150,6 +154,7 @@ hammer_io_wait(hammer_io_t io)
 void
 hammer_io_wait_all(hammer_mount_t hmp, const char *ident)
 {
+       hammer_io_flush_sync(hmp);
        crit_enter();
        while (hmp->io_running_space)
                tsleep(&hmp->io_running_space, 0, ident, 0);
@@ -507,6 +512,7 @@ hammer_io_flush(struct hammer_io *io)
        io->hmp->io_running_space += io->bytes;
        hammer_count_io_running_write += io->bytes;
        bawrite(bp);
+       hammer_io_flush_mark(io->volume);
 }
 
 /************************************************************************
@@ -1201,6 +1207,7 @@ hammer_io_direct_write(hammer_mount_t hmp, hammer_record_t record,
                                           zone2_offset;
                        hammer_stats_disk_write += bp->b_bufsize;
                        vn_strategy(volume->devvp, nbio);
+                       hammer_io_flush_mark(volume);
                }
                hammer_rel_volume(volume, 0);
        } else {
@@ -1390,3 +1397,66 @@ hammer_io_direct_uncache_callback(hammer_inode_t ip, void *data)
        return(0);
 }
 
+
+/*
+ * This function is called when writes may have occured on the volume,
+ * indicating that the device may be holding cached writes.
+ */
+static void
+hammer_io_flush_mark(hammer_volume_t volume)
+{
+       volume->vol_flags |= HAMMER_VOLF_NEEDFLUSH;
+}
+
+/*
+ * This function ensures that the device has flushed any cached writes out.
+ */
+void
+hammer_io_flush_sync(hammer_mount_t hmp)
+{
+       hammer_volume_t volume;
+       struct buf *bp_base = NULL;
+       struct buf *bp;
+
+       RB_FOREACH(volume, hammer_vol_rb_tree, &hmp->rb_vols_root) {
+               if (volume->vol_flags & HAMMER_VOLF_NEEDFLUSH) {
+                       volume->vol_flags &= ~HAMMER_VOLF_NEEDFLUSH;
+                       bp = getpbuf(NULL);
+                       bp->b_bio1.bio_offset = 0;
+                       bp->b_bufsize = 0;
+                       bp->b_bcount = 0;
+                       bp->b_cmd = BUF_CMD_FLUSH;
+                       bp->b_bio1.bio_caller_info1.cluster_head = bp_base;
+                       bp->b_bio1.bio_done = hammer_io_flush_sync_done;
+                       bp->b_flags |= B_ASYNC;
+                       bp_base = bp;
+                       vn_strategy(volume->devvp, &bp->b_bio1);
+               }
+       }
+       while ((bp = bp_base) != NULL) {
+               bp_base = bp->b_bio1.bio_caller_info1.cluster_head;
+               while (bp->b_cmd != BUF_CMD_DONE) {
+                       crit_enter();
+                       tsleep_interlock(&bp->b_cmd);
+                       if (bp->b_cmd != BUF_CMD_DONE)
+                               tsleep(&bp->b_cmd, 0, "hmrFLS", 0);
+                       crit_exit();
+               }
+               bp->b_flags &= ~B_ASYNC;
+               relpbuf(bp, NULL);
+       }
+}
+
+/*
+ * Callback to deal with completed flush commands to the device.
+ */
+static void
+hammer_io_flush_sync_done(struct bio *bio)
+{
+       struct buf *bp;
+
+       bp = bio->bio_buf;
+       bp->b_cmd = BUF_CMD_DONE;
+       wakeup(&bp->b_cmd);
+}
+
index f6a0936..b6873d2 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.75 2008/08/06 15:38:58 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.76 2008/08/29 20:19:08 dillon Exp $
  */
 /*
  * Manage HAMMER's on-disk structures.  These routines are primarily
@@ -119,7 +119,8 @@ hammer_install_volume(struct hammer_mount *hmp, const char *volname,
        ++hammer_count_volumes;
        volume = kmalloc(sizeof(*volume), M_HAMMER, M_WAITOK|M_ZERO);
        volume->vol_name = kstrdup(volname, M_HAMMER);
-       hammer_io_init(&volume->io, hmp, HAMMER_STRUCTURE_VOLUME);
+       volume->io.hmp = hmp;   /* bootstrap */
+       hammer_io_init(&volume->io, volume, HAMMER_STRUCTURE_VOLUME);
        volume->io.offset = 0LL;
        volume->io.bytes = HAMMER_BUFSIZE;
 
@@ -614,9 +615,8 @@ again:
                         M_WAITOK|M_ZERO|M_USE_RESERVE);
        buffer->zone2_offset = zone2_offset;
        buffer->zoneX_offset = buf_offset;
-       buffer->volume = volume;
 
-       hammer_io_init(&buffer->io, hmp, iotype);
+       hammer_io_init(&buffer->io, volume, iotype);
        buffer->io.offset = volume->ondisk->vol_buf_beg +
                            (zone2_offset & HAMMER_OFF_SHORT_MASK);
        buffer->io.bytes = bytes;
@@ -723,7 +723,7 @@ hammer_del_buffers(hammer_mount_t hmp, hammer_off_t base_offset,
                                hammer_io_clear_modify(&buffer->io, 1);
                                buffer->io.reclaim = 1;
                                buffer->io.waitdep = 1;
-                               KKASSERT(buffer->volume == volume);
+                               KKASSERT(buffer->io.volume == volume);
                                hammer_rel_buffer(buffer, 0);
                        }
                } else {
@@ -745,7 +745,7 @@ hammer_load_buffer(hammer_buffer_t buffer, int isnew)
        /*
         * Load the buffer's on-disk info
         */
-       volume = buffer->volume;
+       volume = buffer->io.volume;
        ++buffer->io.loading;
        hammer_lock_ex(&buffer->io.lock);
 
@@ -885,8 +885,8 @@ hammer_rel_buffer(hammer_buffer_t buffer, int flush)
                                RB_REMOVE(hammer_buf_rb_tree,
                                          &buffer->io.hmp->rb_bufs_root,
                                          buffer);
-                               volume = buffer->volume;
-                               buffer->volume = NULL; /* sanity */
+                               volume = buffer->io.volume;
+                               buffer->io.volume = NULL; /* sanity */
                                hammer_rel_volume(volume, 0);
                                hammer_io_clear_modlist(&buffer->io);
                                hammer_flush_buffer_nodes(buffer);