From 1afb73cf098b11a9457b91c7e5f165f66efeceb6 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Mon, 10 Jan 2011 23:17:34 -0800 Subject: [PATCH] HAMMER VFS - Improve saturated write performance (2). * Change the dirty io buffer lists from TAILQs to Red-Black trees. * The dirty io buffers are sorted by disk address on a flush-group by flush-group basis and I/O writes are initiated in sorted order. This significantly improves write I/O throughput to normal HDs. Essentially what is happening here is that the sheer number of unsorted buffers are overwhelming the HDs own caches. Having HAMMER pre-sort the buffers, of which there can be upwards of 100MBs worth, allow the HD to write more optimally. --- sys/vfs/hammer/hammer.h | 18 +++++---- sys/vfs/hammer/hammer_flusher.c | 24 ++++++------ sys/vfs/hammer/hammer_io.c | 76 +++++++++++++++++++++++++------------- sys/vfs/hammer/hammer_ondisk.c | 20 +++++----- sys/vfs/hammer/hammer_vfsops.c | 10 +++--- 5 files changed, 87 insertions(+), 61 deletions(-) diff --git a/sys/vfs/hammer/hammer.h b/sys/vfs/hammer/hammer.h index c76de4b..09f338b 100644 --- a/sys/vfs/hammer/hammer.h +++ b/sys/vfs/hammer/hammer.h @@ -584,6 +584,7 @@ RB_HEAD(hammer_buf_rb_tree, hammer_buffer); RB_HEAD(hammer_nod_rb_tree, hammer_node); RB_HEAD(hammer_und_rb_tree, hammer_undo); RB_HEAD(hammer_res_rb_tree, hammer_reserve); +RB_HEAD(hammer_mod_rb_tree, hammer_io); RB_PROTOTYPE2(hammer_vol_rb_tree, hammer_volume, rb_node, hammer_vol_rb_compare, int32_t); @@ -595,6 +596,8 @@ RB_PROTOTYPE2(hammer_und_rb_tree, hammer_undo, rb_node, hammer_und_rb_compare, hammer_off_t); RB_PROTOTYPE2(hammer_res_rb_tree, hammer_reserve, rb_node, hammer_res_rb_compare, hammer_off_t); +RB_PROTOTYPE2(hammer_mod_rb_tree, hammer_io, rb_node, + hammer_mod_rb_compare, hammer_off_t); /* * IO management - embedded at the head of various in-memory structures @@ -631,9 +634,9 @@ struct hammer_io { enum hammer_io_type type; struct hammer_mount *hmp; struct hammer_volume *volume; - TAILQ_ENTRY(hammer_io) mod_entry; /* list entry if modified */ + RB_ENTRY(hammer_io) rb_node; /* if modified */ TAILQ_ENTRY(hammer_io) iorun_entry; /* iorun_list */ - hammer_io_list_t mod_list; + struct hammer_mod_rb_tree *mod_root; struct buf *bp; int64_t offset; /* zone-2 offset */ int bytes; /* buffer cache buffer size */ @@ -904,12 +907,11 @@ struct hammer_mount { u_int check_interrupt; u_int check_yield; uuid_t fsid; - struct hammer_io_list volu_list; /* dirty undo buffers */ - struct hammer_io_list undo_list; /* dirty undo buffers */ - struct hammer_io_list data_list; /* dirty data buffers */ - struct hammer_io_list alt_data_list; /* dirty data buffers */ - struct hammer_io_list meta_list; /* dirty meta bufs */ - struct hammer_io_list lose_list; /* loose buffers */ + struct hammer_mod_rb_tree volu_root; /* dirty undo buffers */ + struct hammer_mod_rb_tree undo_root; /* dirty undo buffers */ + struct hammer_mod_rb_tree data_root; /* dirty data buffers */ + struct hammer_mod_rb_tree meta_root; /* dirty meta bufs */ + struct hammer_mod_rb_tree lose_root; /* loose buffers */ int locked_dirty_space; /* meta/volu count */ int io_running_space; /* io_token */ int io_running_wakeup; /* io_token */ diff --git a/sys/vfs/hammer/hammer_flusher.c b/sys/vfs/hammer/hammer_flusher.c index 5e1b0b3..998da92 100644 --- a/sys/vfs/hammer/hammer_flusher.c +++ b/sys/vfs/hammer/hammer_flusher.c @@ -526,12 +526,12 @@ hammer_flusher_clean_loose_ios(hammer_mount_t hmp) * * The io_token is needed to protect the list. */ - if ((io = TAILQ_FIRST(&hmp->lose_list)) != NULL) { + if ((io = RB_ROOT(&hmp->lose_root)) != NULL) { lwkt_gettoken(&hmp->io_token); - while ((io = TAILQ_FIRST(&hmp->lose_list)) != NULL) { - KKASSERT(io->mod_list == &hmp->lose_list); - TAILQ_REMOVE(&hmp->lose_list, io, mod_entry); - io->mod_list = NULL; + while ((io = RB_ROOT(&hmp->lose_root)) != NULL) { + KKASSERT(io->mod_root == &hmp->lose_root); + RB_REMOVE(hammer_mod_rb_tree, io->mod_root, io); + io->mod_root = NULL; hammer_ref(&io->lock); buffer = (void *)io; hammer_rel_buffer(buffer, 0); @@ -658,7 +658,7 @@ hammer_flusher_finalize(hammer_transaction_t trans, int final) * related inode(s) getting queued to the flush group. */ count = 0; - while ((io = TAILQ_FIRST(&hmp->data_list)) != NULL) { + while ((io = RB_FIRST(hammer_mod_rb_tree, &hmp->data_root)) != NULL) { if (io->ioerror) break; hammer_ref(&io->lock); @@ -802,7 +802,7 @@ hammer_flusher_finalize(hammer_transaction_t trans, int final) * meta data buffers. */ count = 0; - while ((io = TAILQ_FIRST(&hmp->meta_list)) != NULL) { + while ((io = RB_FIRST(hammer_mod_rb_tree, &hmp->meta_root)) != NULL) { if (io->ioerror) break; KKASSERT(io->modify_refs == 0); @@ -894,7 +894,7 @@ hammer_flusher_flush_undos(hammer_mount_t hmp, int mode) int count; count = 0; - while ((io = TAILQ_FIRST(&hmp->undo_list)) != NULL) { + while ((io = RB_FIRST(hammer_mod_rb_tree, &hmp->undo_root)) != NULL) { if (io->ioerror) break; hammer_ref(&io->lock); @@ -958,10 +958,10 @@ hammer_flusher_haswork(hammer_mount_t hmp) if (hmp->flags & HAMMER_MOUNT_CRITICAL_ERROR) return(0); if (TAILQ_FIRST(&hmp->flush_group_list) || /* dirty inodes */ - TAILQ_FIRST(&hmp->volu_list) || /* dirty buffers */ - TAILQ_FIRST(&hmp->undo_list) || - TAILQ_FIRST(&hmp->data_list) || - TAILQ_FIRST(&hmp->meta_list) || + RB_ROOT(&hmp->volu_root) || /* dirty buffers */ + RB_ROOT(&hmp->undo_root) || + RB_ROOT(&hmp->data_root) || + RB_ROOT(&hmp->meta_root) || (hmp->hflags & HMNT_UNDO_DIRTY) /* UNDO FIFO sync */ ) { return(1); diff --git a/sys/vfs/hammer/hammer_io.c b/sys/vfs/hammer/hammer_io.c index ac892f4..38f689d 100644 --- a/sys/vfs/hammer/hammer_io.c +++ b/sys/vfs/hammer/hammer_io.c @@ -30,8 +30,6 @@ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * $DragonFly: src/sys/vfs/hammer/hammer_io.c,v 1.55 2008/09/15 17:02:49 dillon Exp $ */ /* * IO Primitives and buffer cache management @@ -66,6 +64,26 @@ static int hammer_io_direct_uncache_callback(hammer_inode_t ip, void *data); static void hammer_io_set_modlist(struct hammer_io *io); static void hammer_io_flush_mark(hammer_volume_t volume); +static int +hammer_mod_rb_compare(hammer_io_t io1, hammer_io_t io2) +{ + hammer_off_t io1_offset; + hammer_off_t io2_offset; + + io1_offset = ((io1->offset & HAMMER_OFF_SHORT_MASK) << 8) | + HAMMER_VOL_DECODE(io1->offset); + io2_offset = ((io2->offset & HAMMER_OFF_SHORT_MASK) << 8) | + HAMMER_VOL_DECODE(io2->offset); + + if (io1_offset < io2_offset) + return(-1); + if (io1_offset > io2_offset) + return(1); + return(0); +} + +RB_GENERATE(hammer_mod_rb_tree, hammer_io, rb_node, hammer_mod_rb_compare); + /* * Initialize a new, already-zero'd hammer_io structure, or reinitialize * an existing hammer_io structure which may have switched to another type. @@ -906,7 +924,7 @@ hammer_io_clear_modify(struct hammer_io *io, int inval) hammer_mount_t hmp; /* - * io_token is needed to avoid races on mod_list + * io_token is needed to avoid races on mod_root */ if (io->modified == 0) return; @@ -920,14 +938,14 @@ hammer_io_clear_modify(struct hammer_io *io, int inval) /* * Take us off the mod-list and clear the modified bit. */ - KKASSERT(io->mod_list != NULL); - if (io->mod_list == &io->hmp->volu_list || - io->mod_list == &io->hmp->meta_list) { + KKASSERT(io->mod_root != NULL); + if (io->mod_root == &io->hmp->volu_root || + io->mod_root == &io->hmp->meta_root) { io->hmp->locked_dirty_space -= io->bytes; atomic_add_int(&hammer_count_dirtybufspace, -io->bytes); } - TAILQ_REMOVE(io->mod_list, io, mod_entry); - io->mod_list = NULL; + RB_REMOVE(hammer_mod_rb_tree, io->mod_root, io); + io->mod_root = NULL; io->modified = 0; lwkt_reltoken(&hmp->io_token); @@ -966,10 +984,10 @@ restart: /* * Clear the IO's modify list. Even though the IO is no longer modified - * it may still be on the lose_list. This routine is called just before + * it may still be on the lose_root. This routine is called just before * the governing hammer_buffer is destroyed. * - * mod_list requires io_token protection. + * mod_root requires io_token protection. */ void hammer_io_clear_modlist(struct hammer_io *io) @@ -977,12 +995,12 @@ hammer_io_clear_modlist(struct hammer_io *io) hammer_mount_t hmp = io->hmp; KKASSERT(io->modified == 0); - if (io->mod_list) { + if (io->mod_root) { lwkt_gettoken(&hmp->io_token); - if (io->mod_list) { - KKASSERT(io->mod_list == &io->hmp->lose_list); - TAILQ_REMOVE(io->mod_list, io, mod_entry); - io->mod_list = NULL; + if (io->mod_root) { + KKASSERT(io->mod_root == &io->hmp->lose_root); + RB_REMOVE(hammer_mod_rb_tree, io->mod_root, io); + io->mod_root = NULL; } lwkt_reltoken(&hmp->io_token); } @@ -994,30 +1012,33 @@ hammer_io_set_modlist(struct hammer_io *io) struct hammer_mount *hmp = io->hmp; lwkt_gettoken(&hmp->io_token); - KKASSERT(io->mod_list == NULL); + KKASSERT(io->mod_root == NULL); switch(io->type) { case HAMMER_STRUCTURE_VOLUME: - io->mod_list = &hmp->volu_list; + io->mod_root = &hmp->volu_root; hmp->locked_dirty_space += io->bytes; atomic_add_int(&hammer_count_dirtybufspace, io->bytes); break; case HAMMER_STRUCTURE_META_BUFFER: - io->mod_list = &hmp->meta_list; + io->mod_root = &hmp->meta_root; hmp->locked_dirty_space += io->bytes; atomic_add_int(&hammer_count_dirtybufspace, io->bytes); break; case HAMMER_STRUCTURE_UNDO_BUFFER: - io->mod_list = &hmp->undo_list; + io->mod_root = &hmp->undo_root; break; case HAMMER_STRUCTURE_DATA_BUFFER: - io->mod_list = &hmp->data_list; + io->mod_root = &hmp->data_root; break; case HAMMER_STRUCTURE_DUMMY: - panic("hammer_io_disassociate: bad io type"); - break; + panic("hammer_io_set_modlist: bad io type"); + break; /* NOT REACHED */ + } + if (RB_INSERT(hammer_mod_rb_tree, io->mod_root, io)) { + panic("hammer_io_set_modlist: duplicate entry"); + /* NOT REACHED */ } - TAILQ_INSERT_TAIL(io->mod_list, io, mod_entry); lwkt_reltoken(&hmp->io_token); } @@ -1196,9 +1217,12 @@ hammer_io_deallocate(struct buf *bp) hammer_io_disassociate(iou); if (iou->io.type != HAMMER_STRUCTURE_VOLUME) { KKASSERT(iou->io.bp == NULL); - KKASSERT(iou->io.mod_list == NULL); - iou->io.mod_list = &hmp->lose_list; - TAILQ_INSERT_TAIL(iou->io.mod_list, &iou->io, mod_entry); + KKASSERT(iou->io.mod_root == NULL); + iou->io.mod_root = &hmp->lose_root; + if (RB_INSERT(hammer_mod_rb_tree, iou->io.mod_root, + &iou->io)) { + panic("hammer_io_deallocate: duplicate entry"); + } } hammer_put_interlock(&iou->io.lock, 1); } diff --git a/sys/vfs/hammer/hammer_ondisk.c b/sys/vfs/hammer/hammer_ondisk.c index ce3bdae..de931f3 100644 --- a/sys/vfs/hammer/hammer_ondisk.c +++ b/sys/vfs/hammer/hammer_ondisk.c @@ -583,12 +583,12 @@ found_aliased: * lose_list can be modified via a biodone() interrupt * so the io_token must be held. */ - if (buffer->io.mod_list == &hmp->lose_list) { + if (buffer->io.mod_root == &hmp->lose_root) { lwkt_gettoken(&hmp->io_token); - if (buffer->io.mod_list == &hmp->lose_list) { - TAILQ_REMOVE(buffer->io.mod_list, &buffer->io, - mod_entry); - buffer->io.mod_list = NULL; + if (buffer->io.mod_root == &hmp->lose_root) { + RB_REMOVE(hammer_mod_rb_tree, + buffer->io.mod_root, &buffer->io); + buffer->io.mod_root = NULL; KKASSERT(buffer->io.modified == 0); } lwkt_reltoken(&hmp->io_token); @@ -967,12 +967,12 @@ hammer_ref_buffer(hammer_buffer_t buffer) * * No longer loose. lose_list requires the io_token. */ - if (buffer->io.mod_list == &hmp->lose_list) { + if (buffer->io.mod_root == &hmp->lose_root) { lwkt_gettoken(&hmp->io_token); - if (buffer->io.mod_list == &hmp->lose_list) { - TAILQ_REMOVE(buffer->io.mod_list, &buffer->io, - mod_entry); - buffer->io.mod_list = NULL; + if (buffer->io.mod_root == &hmp->lose_root) { + RB_REMOVE(hammer_mod_rb_tree, + buffer->io.mod_root, &buffer->io); + buffer->io.mod_root = NULL; } lwkt_reltoken(&hmp->io_token); } diff --git a/sys/vfs/hammer/hammer_vfsops.c b/sys/vfs/hammer/hammer_vfsops.c index 328a9d9..6f483b2 100644 --- a/sys/vfs/hammer/hammer_vfsops.c +++ b/sys/vfs/hammer/hammer_vfsops.c @@ -563,11 +563,11 @@ hammer_vfs_mount(struct mount *mp, char *mntpt, caddr_t data, hmp->ronly = ((mp->mnt_flag & MNT_RDONLY) != 0); - TAILQ_INIT(&hmp->volu_list); - TAILQ_INIT(&hmp->undo_list); - TAILQ_INIT(&hmp->data_list); - TAILQ_INIT(&hmp->meta_list); - TAILQ_INIT(&hmp->lose_list); + RB_INIT(&hmp->volu_root); + RB_INIT(&hmp->undo_root); + RB_INIT(&hmp->data_root); + RB_INIT(&hmp->meta_root); + RB_INIT(&hmp->lose_root); TAILQ_INIT(&hmp->iorun_list); lwkt_token_init(&hmp->fs_token, 1, "hammerfs"); -- 1.7.7.2