HAMMER VFS - Improve saturated write performance (2).
authorMatthew Dillon <dillon@apollo.backplane.com>
Tue, 11 Jan 2011 07:17:34 +0000 (23:17 -0800)
committerMatthew Dillon <dillon@apollo.backplane.com>
Tue, 11 Jan 2011 07:17:34 +0000 (23:17 -0800)
* Change the dirty io buffer lists from TAILQs to Red-Black trees.

* The dirty io buffers are sorted by disk address on a flush-group by
  flush-group basis and I/O writes are initiated in sorted order.

  This significantly improves write I/O throughput to normal HDs.
  Essentially what is happening here is that the sheer number of
  unsorted buffers are overwhelming the HDs own caches.  Having HAMMER
  pre-sort the buffers, of which there can be upwards of 100MBs worth,
  allow the HD to write more optimally.

sys/vfs/hammer/hammer.h
sys/vfs/hammer/hammer_flusher.c
sys/vfs/hammer/hammer_io.c
sys/vfs/hammer/hammer_ondisk.c
sys/vfs/hammer/hammer_vfsops.c

index c76de4b..09f338b 100644 (file)
@@ -584,6 +584,7 @@ RB_HEAD(hammer_buf_rb_tree, hammer_buffer);
 RB_HEAD(hammer_nod_rb_tree, hammer_node);
 RB_HEAD(hammer_und_rb_tree, hammer_undo);
 RB_HEAD(hammer_res_rb_tree, hammer_reserve);
+RB_HEAD(hammer_mod_rb_tree, hammer_io);
 
 RB_PROTOTYPE2(hammer_vol_rb_tree, hammer_volume, rb_node,
              hammer_vol_rb_compare, int32_t);
@@ -595,6 +596,8 @@ RB_PROTOTYPE2(hammer_und_rb_tree, hammer_undo, rb_node,
              hammer_und_rb_compare, hammer_off_t);
 RB_PROTOTYPE2(hammer_res_rb_tree, hammer_reserve, rb_node,
              hammer_res_rb_compare, hammer_off_t);
+RB_PROTOTYPE2(hammer_mod_rb_tree, hammer_io, rb_node,
+             hammer_mod_rb_compare, hammer_off_t);
 
 /*
  * IO management - embedded at the head of various in-memory structures
@@ -631,9 +634,9 @@ struct hammer_io {
        enum hammer_io_type     type;
        struct hammer_mount     *hmp;
        struct hammer_volume    *volume;
-       TAILQ_ENTRY(hammer_io)  mod_entry; /* list entry if modified */
+       RB_ENTRY(hammer_io)     rb_node;     /* if modified */
        TAILQ_ENTRY(hammer_io)  iorun_entry; /* iorun_list */
-       hammer_io_list_t        mod_list;
+       struct hammer_mod_rb_tree *mod_root;
        struct buf              *bp;
        int64_t                 offset;    /* zone-2 offset */
        int                     bytes;     /* buffer cache buffer size */
@@ -904,12 +907,11 @@ struct hammer_mount {
        u_int   check_interrupt;
        u_int   check_yield;
        uuid_t  fsid;
-       struct hammer_io_list volu_list;        /* dirty undo buffers */
-       struct hammer_io_list undo_list;        /* dirty undo buffers */
-       struct hammer_io_list data_list;        /* dirty data buffers */
-       struct hammer_io_list alt_data_list;    /* dirty data buffers */
-       struct hammer_io_list meta_list;        /* dirty meta bufs    */
-       struct hammer_io_list lose_list;        /* loose buffers      */
+       struct hammer_mod_rb_tree volu_root;    /* dirty undo buffers */
+       struct hammer_mod_rb_tree undo_root;    /* dirty undo buffers */
+       struct hammer_mod_rb_tree data_root;    /* dirty data buffers */
+       struct hammer_mod_rb_tree meta_root;    /* dirty meta bufs    */
+       struct hammer_mod_rb_tree lose_root;    /* loose buffers      */
        int     locked_dirty_space;             /* meta/volu count    */
        int     io_running_space;               /* io_token */
        int     io_running_wakeup;              /* io_token */
index 5e1b0b3..998da92 100644 (file)
@@ -526,12 +526,12 @@ hammer_flusher_clean_loose_ios(hammer_mount_t hmp)
         *
         * The io_token is needed to protect the list.
         */
-       if ((io = TAILQ_FIRST(&hmp->lose_list)) != NULL) {
+       if ((io = RB_ROOT(&hmp->lose_root)) != NULL) {
                lwkt_gettoken(&hmp->io_token);
-               while ((io = TAILQ_FIRST(&hmp->lose_list)) != NULL) {
-                       KKASSERT(io->mod_list == &hmp->lose_list);
-                       TAILQ_REMOVE(&hmp->lose_list, io, mod_entry);
-                       io->mod_list = NULL;
+               while ((io = RB_ROOT(&hmp->lose_root)) != NULL) {
+                       KKASSERT(io->mod_root == &hmp->lose_root);
+                       RB_REMOVE(hammer_mod_rb_tree, io->mod_root, io);
+                       io->mod_root = NULL;
                        hammer_ref(&io->lock);
                        buffer = (void *)io;
                        hammer_rel_buffer(buffer, 0);
@@ -658,7 +658,7 @@ hammer_flusher_finalize(hammer_transaction_t trans, int final)
         * related inode(s) getting queued to the flush group.
         */
        count = 0;
-       while ((io = TAILQ_FIRST(&hmp->data_list)) != NULL) {
+       while ((io = RB_FIRST(hammer_mod_rb_tree, &hmp->data_root)) != NULL) {
                if (io->ioerror)
                        break;
                hammer_ref(&io->lock);
@@ -802,7 +802,7 @@ hammer_flusher_finalize(hammer_transaction_t trans, int final)
         * meta data buffers.
         */
        count = 0;
-       while ((io = TAILQ_FIRST(&hmp->meta_list)) != NULL) {
+       while ((io = RB_FIRST(hammer_mod_rb_tree, &hmp->meta_root)) != NULL) {
                if (io->ioerror)
                        break;
                KKASSERT(io->modify_refs == 0);
@@ -894,7 +894,7 @@ hammer_flusher_flush_undos(hammer_mount_t hmp, int mode)
        int count;
 
        count = 0;
-       while ((io = TAILQ_FIRST(&hmp->undo_list)) != NULL) {
+       while ((io = RB_FIRST(hammer_mod_rb_tree, &hmp->undo_root)) != NULL) {
                if (io->ioerror)
                        break;
                hammer_ref(&io->lock);
@@ -958,10 +958,10 @@ hammer_flusher_haswork(hammer_mount_t hmp)
        if (hmp->flags & HAMMER_MOUNT_CRITICAL_ERROR)
                return(0);
        if (TAILQ_FIRST(&hmp->flush_group_list) ||      /* dirty inodes */
-           TAILQ_FIRST(&hmp->volu_list) ||             /* dirty buffers */
-           TAILQ_FIRST(&hmp->undo_list) ||
-           TAILQ_FIRST(&hmp->data_list) ||
-           TAILQ_FIRST(&hmp->meta_list) ||
+           RB_ROOT(&hmp->volu_root) ||                 /* dirty buffers */
+           RB_ROOT(&hmp->undo_root) ||
+           RB_ROOT(&hmp->data_root) ||
+           RB_ROOT(&hmp->meta_root) ||
            (hmp->hflags & HMNT_UNDO_DIRTY)             /* UNDO FIFO sync */
        ) {
                return(1);
index ac892f4..38f689d 100644 (file)
@@ -30,8 +30,6 @@
  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
- * 
- * $DragonFly: src/sys/vfs/hammer/hammer_io.c,v 1.55 2008/09/15 17:02:49 dillon Exp $
  */
 /*
  * IO Primitives and buffer cache management
@@ -66,6 +64,26 @@ static int hammer_io_direct_uncache_callback(hammer_inode_t ip, void *data);
 static void hammer_io_set_modlist(struct hammer_io *io);
 static void hammer_io_flush_mark(hammer_volume_t volume);
 
+static int
+hammer_mod_rb_compare(hammer_io_t io1, hammer_io_t io2)
+{
+       hammer_off_t io1_offset;
+       hammer_off_t io2_offset;
+
+       io1_offset = ((io1->offset & HAMMER_OFF_SHORT_MASK) << 8) |
+                    HAMMER_VOL_DECODE(io1->offset);
+       io2_offset = ((io2->offset & HAMMER_OFF_SHORT_MASK) << 8) |
+                    HAMMER_VOL_DECODE(io2->offset);
+
+       if (io1_offset < io2_offset)
+               return(-1);
+       if (io1_offset > io2_offset)
+               return(1);
+       return(0);
+}
+
+RB_GENERATE(hammer_mod_rb_tree, hammer_io, rb_node, hammer_mod_rb_compare);
+
 /*
  * Initialize a new, already-zero'd hammer_io structure, or reinitialize
  * an existing hammer_io structure which may have switched to another type.
@@ -906,7 +924,7 @@ hammer_io_clear_modify(struct hammer_io *io, int inval)
        hammer_mount_t hmp;
 
        /*
-        * io_token is needed to avoid races on mod_list
+        * io_token is needed to avoid races on mod_root
         */
        if (io->modified == 0)
                return;
@@ -920,14 +938,14 @@ hammer_io_clear_modify(struct hammer_io *io, int inval)
        /*
         * Take us off the mod-list and clear the modified bit.
         */
-       KKASSERT(io->mod_list != NULL);
-       if (io->mod_list == &io->hmp->volu_list ||
-           io->mod_list == &io->hmp->meta_list) {
+       KKASSERT(io->mod_root != NULL);
+       if (io->mod_root == &io->hmp->volu_root ||
+           io->mod_root == &io->hmp->meta_root) {
                io->hmp->locked_dirty_space -= io->bytes;
                atomic_add_int(&hammer_count_dirtybufspace, -io->bytes);
        }
-       TAILQ_REMOVE(io->mod_list, io, mod_entry);
-       io->mod_list = NULL;
+       RB_REMOVE(hammer_mod_rb_tree, io->mod_root, io);
+       io->mod_root = NULL;
        io->modified = 0;
 
        lwkt_reltoken(&hmp->io_token);
@@ -966,10 +984,10 @@ restart:
 
 /*
  * Clear the IO's modify list.  Even though the IO is no longer modified
- * it may still be on the lose_list.  This routine is called just before
+ * it may still be on the lose_root.  This routine is called just before
  * the governing hammer_buffer is destroyed.
  *
- * mod_list requires io_token protection.
+ * mod_root requires io_token protection.
  */
 void
 hammer_io_clear_modlist(struct hammer_io *io)
@@ -977,12 +995,12 @@ hammer_io_clear_modlist(struct hammer_io *io)
        hammer_mount_t hmp = io->hmp;
 
        KKASSERT(io->modified == 0);
-       if (io->mod_list) {
+       if (io->mod_root) {
                lwkt_gettoken(&hmp->io_token);
-               if (io->mod_list) {
-                       KKASSERT(io->mod_list == &io->hmp->lose_list);
-                       TAILQ_REMOVE(io->mod_list, io, mod_entry);
-                       io->mod_list = NULL;
+               if (io->mod_root) {
+                       KKASSERT(io->mod_root == &io->hmp->lose_root);
+                       RB_REMOVE(hammer_mod_rb_tree, io->mod_root, io);
+                       io->mod_root = NULL;
                }
                lwkt_reltoken(&hmp->io_token);
        }
@@ -994,30 +1012,33 @@ hammer_io_set_modlist(struct hammer_io *io)
        struct hammer_mount *hmp = io->hmp;
 
        lwkt_gettoken(&hmp->io_token);
-       KKASSERT(io->mod_list == NULL);
+       KKASSERT(io->mod_root == NULL);
 
        switch(io->type) {
        case HAMMER_STRUCTURE_VOLUME:
-               io->mod_list = &hmp->volu_list;
+               io->mod_root = &hmp->volu_root;
                hmp->locked_dirty_space += io->bytes;
                atomic_add_int(&hammer_count_dirtybufspace, io->bytes);
                break;
        case HAMMER_STRUCTURE_META_BUFFER:
-               io->mod_list = &hmp->meta_list;
+               io->mod_root = &hmp->meta_root;
                hmp->locked_dirty_space += io->bytes;
                atomic_add_int(&hammer_count_dirtybufspace, io->bytes);
                break;
        case HAMMER_STRUCTURE_UNDO_BUFFER:
-               io->mod_list = &hmp->undo_list;
+               io->mod_root = &hmp->undo_root;
                break;
        case HAMMER_STRUCTURE_DATA_BUFFER:
-               io->mod_list = &hmp->data_list;
+               io->mod_root = &hmp->data_root;
                break;
        case HAMMER_STRUCTURE_DUMMY:
-               panic("hammer_io_disassociate: bad io type");
-               break;
+               panic("hammer_io_set_modlist: bad io type");
+               break; /* NOT REACHED */
+       }
+       if (RB_INSERT(hammer_mod_rb_tree, io->mod_root, io)) {
+               panic("hammer_io_set_modlist: duplicate entry");
+               /* NOT REACHED */
        }
-       TAILQ_INSERT_TAIL(io->mod_list, io, mod_entry);
        lwkt_reltoken(&hmp->io_token);
 }
 
@@ -1196,9 +1217,12 @@ hammer_io_deallocate(struct buf *bp)
                hammer_io_disassociate(iou);
                if (iou->io.type != HAMMER_STRUCTURE_VOLUME) {
                        KKASSERT(iou->io.bp == NULL);
-                       KKASSERT(iou->io.mod_list == NULL);
-                       iou->io.mod_list = &hmp->lose_list;
-                       TAILQ_INSERT_TAIL(iou->io.mod_list, &iou->io, mod_entry);
+                       KKASSERT(iou->io.mod_root == NULL);
+                       iou->io.mod_root = &hmp->lose_root;
+                       if (RB_INSERT(hammer_mod_rb_tree, iou->io.mod_root,
+                                     &iou->io)) {
+                               panic("hammer_io_deallocate: duplicate entry");
+                       }
                }
                hammer_put_interlock(&iou->io.lock, 1);
        }
index ce3bdae..de931f3 100644 (file)
@@ -583,12 +583,12 @@ found_aliased:
                 * lose_list can be modified via a biodone() interrupt
                 * so the io_token must be held.
                 */
-               if (buffer->io.mod_list == &hmp->lose_list) {
+               if (buffer->io.mod_root == &hmp->lose_root) {
                        lwkt_gettoken(&hmp->io_token);
-                       if (buffer->io.mod_list == &hmp->lose_list) {
-                               TAILQ_REMOVE(buffer->io.mod_list, &buffer->io,
-                                            mod_entry);
-                               buffer->io.mod_list = NULL;
+                       if (buffer->io.mod_root == &hmp->lose_root) {
+                               RB_REMOVE(hammer_mod_rb_tree,
+                                         buffer->io.mod_root, &buffer->io);
+                               buffer->io.mod_root = NULL;
                                KKASSERT(buffer->io.modified == 0);
                        }
                        lwkt_reltoken(&hmp->io_token);
@@ -967,12 +967,12 @@ hammer_ref_buffer(hammer_buffer_t buffer)
         *
         * No longer loose.  lose_list requires the io_token.
         */
-       if (buffer->io.mod_list == &hmp->lose_list) {
+       if (buffer->io.mod_root == &hmp->lose_root) {
                lwkt_gettoken(&hmp->io_token);
-               if (buffer->io.mod_list == &hmp->lose_list) {
-                       TAILQ_REMOVE(buffer->io.mod_list, &buffer->io,
-                                    mod_entry);
-                       buffer->io.mod_list = NULL;
+               if (buffer->io.mod_root == &hmp->lose_root) {
+                       RB_REMOVE(hammer_mod_rb_tree,
+                                 buffer->io.mod_root, &buffer->io);
+                       buffer->io.mod_root = NULL;
                }
                lwkt_reltoken(&hmp->io_token);
        }
index 328a9d9..6f483b2 100644 (file)
@@ -563,11 +563,11 @@ hammer_vfs_mount(struct mount *mp, char *mntpt, caddr_t data,
 
        hmp->ronly = ((mp->mnt_flag & MNT_RDONLY) != 0);
 
-       TAILQ_INIT(&hmp->volu_list);
-       TAILQ_INIT(&hmp->undo_list);
-       TAILQ_INIT(&hmp->data_list);
-       TAILQ_INIT(&hmp->meta_list);
-       TAILQ_INIT(&hmp->lose_list);
+       RB_INIT(&hmp->volu_root);
+       RB_INIT(&hmp->undo_root);
+       RB_INIT(&hmp->data_root);
+       RB_INIT(&hmp->meta_root);
+       RB_INIT(&hmp->lose_root);
        TAILQ_INIT(&hmp->iorun_list);
 
        lwkt_token_init(&hmp->fs_token, 1, "hammerfs");