* Change the dirty io buffer lists from TAILQs to Red-Black trees.
* The dirty io buffers are sorted by disk address on a flush-group by
flush-group basis and I/O writes are initiated in sorted order.
This significantly improves write I/O throughput to normal HDs.
Essentially what is happening here is that the sheer number of
unsorted buffers are overwhelming the HDs own caches. Having HAMMER
pre-sort the buffers, of which there can be upwards of 100MBs worth,
allow the HD to write more optimally.
RB_HEAD(hammer_nod_rb_tree, hammer_node);
RB_HEAD(hammer_und_rb_tree, hammer_undo);
RB_HEAD(hammer_res_rb_tree, hammer_reserve);
+RB_HEAD(hammer_mod_rb_tree, hammer_io);
RB_PROTOTYPE2(hammer_vol_rb_tree, hammer_volume, rb_node,
hammer_vol_rb_compare, int32_t);
hammer_und_rb_compare, hammer_off_t);
RB_PROTOTYPE2(hammer_res_rb_tree, hammer_reserve, rb_node,
hammer_res_rb_compare, hammer_off_t);
+RB_PROTOTYPE2(hammer_mod_rb_tree, hammer_io, rb_node,
+ hammer_mod_rb_compare, hammer_off_t);
/*
* IO management - embedded at the head of various in-memory structures
enum hammer_io_type type;
struct hammer_mount *hmp;
struct hammer_volume *volume;
- TAILQ_ENTRY(hammer_io) mod_entry; /* list entry if modified */
+ RB_ENTRY(hammer_io) rb_node; /* if modified */
TAILQ_ENTRY(hammer_io) iorun_entry; /* iorun_list */
- hammer_io_list_t mod_list;
+ struct hammer_mod_rb_tree *mod_root;
struct buf *bp;
int64_t offset; /* zone-2 offset */
int bytes; /* buffer cache buffer size */
u_int check_interrupt;
u_int check_yield;
uuid_t fsid;
- struct hammer_io_list volu_list; /* dirty undo buffers */
- struct hammer_io_list undo_list; /* dirty undo buffers */
- struct hammer_io_list data_list; /* dirty data buffers */
- struct hammer_io_list alt_data_list; /* dirty data buffers */
- struct hammer_io_list meta_list; /* dirty meta bufs */
- struct hammer_io_list lose_list; /* loose buffers */
+ struct hammer_mod_rb_tree volu_root; /* dirty undo buffers */
+ struct hammer_mod_rb_tree undo_root; /* dirty undo buffers */
+ struct hammer_mod_rb_tree data_root; /* dirty data buffers */
+ struct hammer_mod_rb_tree meta_root; /* dirty meta bufs */
+ struct hammer_mod_rb_tree lose_root; /* loose buffers */
int locked_dirty_space; /* meta/volu count */
int io_running_space; /* io_token */
int io_running_wakeup; /* io_token */
*
* The io_token is needed to protect the list.
*/
- if ((io = TAILQ_FIRST(&hmp->lose_list)) != NULL) {
+ if ((io = RB_ROOT(&hmp->lose_root)) != NULL) {
lwkt_gettoken(&hmp->io_token);
- while ((io = TAILQ_FIRST(&hmp->lose_list)) != NULL) {
- KKASSERT(io->mod_list == &hmp->lose_list);
- TAILQ_REMOVE(&hmp->lose_list, io, mod_entry);
- io->mod_list = NULL;
+ while ((io = RB_ROOT(&hmp->lose_root)) != NULL) {
+ KKASSERT(io->mod_root == &hmp->lose_root);
+ RB_REMOVE(hammer_mod_rb_tree, io->mod_root, io);
+ io->mod_root = NULL;
hammer_ref(&io->lock);
buffer = (void *)io;
hammer_rel_buffer(buffer, 0);
* related inode(s) getting queued to the flush group.
*/
count = 0;
- while ((io = TAILQ_FIRST(&hmp->data_list)) != NULL) {
+ while ((io = RB_FIRST(hammer_mod_rb_tree, &hmp->data_root)) != NULL) {
if (io->ioerror)
break;
hammer_ref(&io->lock);
* meta data buffers.
*/
count = 0;
- while ((io = TAILQ_FIRST(&hmp->meta_list)) != NULL) {
+ while ((io = RB_FIRST(hammer_mod_rb_tree, &hmp->meta_root)) != NULL) {
if (io->ioerror)
break;
KKASSERT(io->modify_refs == 0);
int count;
count = 0;
- while ((io = TAILQ_FIRST(&hmp->undo_list)) != NULL) {
+ while ((io = RB_FIRST(hammer_mod_rb_tree, &hmp->undo_root)) != NULL) {
if (io->ioerror)
break;
hammer_ref(&io->lock);
if (hmp->flags & HAMMER_MOUNT_CRITICAL_ERROR)
return(0);
if (TAILQ_FIRST(&hmp->flush_group_list) || /* dirty inodes */
- TAILQ_FIRST(&hmp->volu_list) || /* dirty buffers */
- TAILQ_FIRST(&hmp->undo_list) ||
- TAILQ_FIRST(&hmp->data_list) ||
- TAILQ_FIRST(&hmp->meta_list) ||
+ RB_ROOT(&hmp->volu_root) || /* dirty buffers */
+ RB_ROOT(&hmp->undo_root) ||
+ RB_ROOT(&hmp->data_root) ||
+ RB_ROOT(&hmp->meta_root) ||
(hmp->hflags & HMNT_UNDO_DIRTY) /* UNDO FIFO sync */
) {
return(1);
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
- *
- * $DragonFly: src/sys/vfs/hammer/hammer_io.c,v 1.55 2008/09/15 17:02:49 dillon Exp $
*/
/*
* IO Primitives and buffer cache management
static void hammer_io_set_modlist(struct hammer_io *io);
static void hammer_io_flush_mark(hammer_volume_t volume);
+static int
+hammer_mod_rb_compare(hammer_io_t io1, hammer_io_t io2)
+{
+ hammer_off_t io1_offset;
+ hammer_off_t io2_offset;
+
+ io1_offset = ((io1->offset & HAMMER_OFF_SHORT_MASK) << 8) |
+ HAMMER_VOL_DECODE(io1->offset);
+ io2_offset = ((io2->offset & HAMMER_OFF_SHORT_MASK) << 8) |
+ HAMMER_VOL_DECODE(io2->offset);
+
+ if (io1_offset < io2_offset)
+ return(-1);
+ if (io1_offset > io2_offset)
+ return(1);
+ return(0);
+}
+
+RB_GENERATE(hammer_mod_rb_tree, hammer_io, rb_node, hammer_mod_rb_compare);
+
/*
* Initialize a new, already-zero'd hammer_io structure, or reinitialize
* an existing hammer_io structure which may have switched to another type.
hammer_mount_t hmp;
/*
- * io_token is needed to avoid races on mod_list
+ * io_token is needed to avoid races on mod_root
*/
if (io->modified == 0)
return;
/*
* Take us off the mod-list and clear the modified bit.
*/
- KKASSERT(io->mod_list != NULL);
- if (io->mod_list == &io->hmp->volu_list ||
- io->mod_list == &io->hmp->meta_list) {
+ KKASSERT(io->mod_root != NULL);
+ if (io->mod_root == &io->hmp->volu_root ||
+ io->mod_root == &io->hmp->meta_root) {
io->hmp->locked_dirty_space -= io->bytes;
atomic_add_int(&hammer_count_dirtybufspace, -io->bytes);
}
- TAILQ_REMOVE(io->mod_list, io, mod_entry);
- io->mod_list = NULL;
+ RB_REMOVE(hammer_mod_rb_tree, io->mod_root, io);
+ io->mod_root = NULL;
io->modified = 0;
lwkt_reltoken(&hmp->io_token);
/*
* Clear the IO's modify list. Even though the IO is no longer modified
- * it may still be on the lose_list. This routine is called just before
+ * it may still be on the lose_root. This routine is called just before
* the governing hammer_buffer is destroyed.
*
- * mod_list requires io_token protection.
+ * mod_root requires io_token protection.
*/
void
hammer_io_clear_modlist(struct hammer_io *io)
hammer_mount_t hmp = io->hmp;
KKASSERT(io->modified == 0);
- if (io->mod_list) {
+ if (io->mod_root) {
lwkt_gettoken(&hmp->io_token);
- if (io->mod_list) {
- KKASSERT(io->mod_list == &io->hmp->lose_list);
- TAILQ_REMOVE(io->mod_list, io, mod_entry);
- io->mod_list = NULL;
+ if (io->mod_root) {
+ KKASSERT(io->mod_root == &io->hmp->lose_root);
+ RB_REMOVE(hammer_mod_rb_tree, io->mod_root, io);
+ io->mod_root = NULL;
}
lwkt_reltoken(&hmp->io_token);
}
struct hammer_mount *hmp = io->hmp;
lwkt_gettoken(&hmp->io_token);
- KKASSERT(io->mod_list == NULL);
+ KKASSERT(io->mod_root == NULL);
switch(io->type) {
case HAMMER_STRUCTURE_VOLUME:
- io->mod_list = &hmp->volu_list;
+ io->mod_root = &hmp->volu_root;
hmp->locked_dirty_space += io->bytes;
atomic_add_int(&hammer_count_dirtybufspace, io->bytes);
break;
case HAMMER_STRUCTURE_META_BUFFER:
- io->mod_list = &hmp->meta_list;
+ io->mod_root = &hmp->meta_root;
hmp->locked_dirty_space += io->bytes;
atomic_add_int(&hammer_count_dirtybufspace, io->bytes);
break;
case HAMMER_STRUCTURE_UNDO_BUFFER:
- io->mod_list = &hmp->undo_list;
+ io->mod_root = &hmp->undo_root;
break;
case HAMMER_STRUCTURE_DATA_BUFFER:
- io->mod_list = &hmp->data_list;
+ io->mod_root = &hmp->data_root;
break;
case HAMMER_STRUCTURE_DUMMY:
- panic("hammer_io_disassociate: bad io type");
- break;
+ panic("hammer_io_set_modlist: bad io type");
+ break; /* NOT REACHED */
+ }
+ if (RB_INSERT(hammer_mod_rb_tree, io->mod_root, io)) {
+ panic("hammer_io_set_modlist: duplicate entry");
+ /* NOT REACHED */
}
- TAILQ_INSERT_TAIL(io->mod_list, io, mod_entry);
lwkt_reltoken(&hmp->io_token);
}
hammer_io_disassociate(iou);
if (iou->io.type != HAMMER_STRUCTURE_VOLUME) {
KKASSERT(iou->io.bp == NULL);
- KKASSERT(iou->io.mod_list == NULL);
- iou->io.mod_list = &hmp->lose_list;
- TAILQ_INSERT_TAIL(iou->io.mod_list, &iou->io, mod_entry);
+ KKASSERT(iou->io.mod_root == NULL);
+ iou->io.mod_root = &hmp->lose_root;
+ if (RB_INSERT(hammer_mod_rb_tree, iou->io.mod_root,
+ &iou->io)) {
+ panic("hammer_io_deallocate: duplicate entry");
+ }
}
hammer_put_interlock(&iou->io.lock, 1);
}
* lose_list can be modified via a biodone() interrupt
* so the io_token must be held.
*/
- if (buffer->io.mod_list == &hmp->lose_list) {
+ if (buffer->io.mod_root == &hmp->lose_root) {
lwkt_gettoken(&hmp->io_token);
- if (buffer->io.mod_list == &hmp->lose_list) {
- TAILQ_REMOVE(buffer->io.mod_list, &buffer->io,
- mod_entry);
- buffer->io.mod_list = NULL;
+ if (buffer->io.mod_root == &hmp->lose_root) {
+ RB_REMOVE(hammer_mod_rb_tree,
+ buffer->io.mod_root, &buffer->io);
+ buffer->io.mod_root = NULL;
KKASSERT(buffer->io.modified == 0);
}
lwkt_reltoken(&hmp->io_token);
*
* No longer loose. lose_list requires the io_token.
*/
- if (buffer->io.mod_list == &hmp->lose_list) {
+ if (buffer->io.mod_root == &hmp->lose_root) {
lwkt_gettoken(&hmp->io_token);
- if (buffer->io.mod_list == &hmp->lose_list) {
- TAILQ_REMOVE(buffer->io.mod_list, &buffer->io,
- mod_entry);
- buffer->io.mod_list = NULL;
+ if (buffer->io.mod_root == &hmp->lose_root) {
+ RB_REMOVE(hammer_mod_rb_tree,
+ buffer->io.mod_root, &buffer->io);
+ buffer->io.mod_root = NULL;
}
lwkt_reltoken(&hmp->io_token);
}
hmp->ronly = ((mp->mnt_flag & MNT_RDONLY) != 0);
- TAILQ_INIT(&hmp->volu_list);
- TAILQ_INIT(&hmp->undo_list);
- TAILQ_INIT(&hmp->data_list);
- TAILQ_INIT(&hmp->meta_list);
- TAILQ_INIT(&hmp->lose_list);
+ RB_INIT(&hmp->volu_root);
+ RB_INIT(&hmp->undo_root);
+ RB_INIT(&hmp->data_root);
+ RB_INIT(&hmp->meta_root);
+ RB_INIT(&hmp->lose_root);
TAILQ_INIT(&hmp->iorun_list);
lwkt_token_init(&hmp->fs_token, 1, "hammerfs");