HAMMER_STRUCTURE_VOLUME,
HAMMER_STRUCTURE_META_BUFFER,
HAMMER_STRUCTURE_UNDO_BUFFER,
- HAMMER_STRUCTURE_DATA_BUFFER
+ HAMMER_STRUCTURE_DATA_BUFFER,
+ HAMMER_STRUCTURE_DUMMY
} hammer_io_type_t;
union hammer_io_structure;
struct hammer_mount *hmp;
struct hammer_volume *volume;
TAILQ_ENTRY(hammer_io) mod_entry; /* list entry if modified */
+ TAILQ_ENTRY(hammer_io) iorun_entry; /* iorun_list */
hammer_io_list_t mod_list;
struct buf *bp;
int64_t offset; /* zone-2 offset */
hammer_flush_group_t next_flush_group;
TAILQ_HEAD(, hammer_objid_cache) objid_cache_list;
TAILQ_HEAD(, hammer_reclaim) reclaim_list;
+ TAILQ_HEAD(, hammer_io) iorun_list;
};
typedef struct hammer_mount *hammer_mount_t;
void hammer_io_flush(struct hammer_io *io, int reclaim);
void hammer_io_wait(struct hammer_io *io);
void hammer_io_waitdep(struct hammer_io *io);
-void hammer_io_wait_all(hammer_mount_t hmp, const char *ident);
+void hammer_io_wait_all(hammer_mount_t hmp, const char *ident, int doflush);
int hammer_io_direct_read(hammer_mount_t hmp, struct bio *bio,
hammer_btree_leaf_elm_t leaf);
int hammer_io_direct_write(hammer_mount_t hmp, hammer_record_t record,
}
/*
- * Flush UNDOs. This also waits for I/Os to complete and flushes
- * the cache on the target disk.
+ * Flush UNDOs. This can occur concurrently with the data flush
+ * because data writes never overwrite.
+ *
+ * This also waits for I/Os to complete and flushes the cache on
+ * the target disk.
*
* Record the UNDO append point as this can continue to change
* after we have flushed the UNDOs.
*/
hammer_flusher_clean_loose_ios(hmp);
if (hmp->version < HAMMER_VOL_VERSION_FOUR)
- hammer_io_wait_all(hmp, "hmrfl2");
+ hammer_io_wait_all(hmp, "hmrfl3", 1);
if (hmp->flags & HAMMER_MOUNT_CRITICAL_ERROR)
goto failed;
hammer_flusher_clean_loose_ios(hmp);
if (mode == HAMMER_FLUSH_UNDOS_FORCED ||
(mode == HAMMER_FLUSH_UNDOS_AUTO && count)) {
- hammer_io_wait_all(hmp, "hmrfl1");
+ hammer_io_wait_all(hmp, "hmrfl1", 1);
+ } else {
+ hammer_io_wait_all(hmp, "hmrfl2", 0);
}
}
case HAMMER_STRUCTURE_UNDO_BUFFER:
iou->buffer.ondisk = NULL;
break;
+ case HAMMER_STRUCTURE_DUMMY:
+ panic("hammer_io_disassociate: bad io type");
+ break;
}
}
}
/*
- * Wait for all hammer_io-initated write I/O's to complete. This is not
- * supposed to count direct I/O's but some can leak through (for
- * non-full-sized direct I/Os).
+ * Wait for all currently queued HAMMER-initiated I/Os to complete.
+ *
+ * This is not supposed to count direct I/O's but some can leak
+ * through (for non-full-sized direct I/Os).
*/
void
-hammer_io_wait_all(hammer_mount_t hmp, const char *ident)
+hammer_io_wait_all(hammer_mount_t hmp, const char *ident, int doflush)
{
- hammer_io_flush_sync(hmp);
+ struct hammer_io iodummy;
+ hammer_io_t io;
+
+ /*
+ * Degenerate case, no I/O is running
+ */
crit_enter();
- while (hmp->io_running_space)
- tsleep(&hmp->io_running_space, 0, ident, 0);
+ if (TAILQ_EMPTY(&hmp->iorun_list)) {
+ crit_exit();
+ if (doflush)
+ hammer_io_flush_sync(hmp);
+ return;
+ }
+ bzero(&iodummy, sizeof(iodummy));
+ iodummy.type = HAMMER_STRUCTURE_DUMMY;
+
+ /*
+ * Add placemarker and then wait until it becomes the head of
+ * the list.
+ */
+ TAILQ_INSERT_TAIL(&hmp->iorun_list, &iodummy, iorun_entry);
+ while (TAILQ_FIRST(&hmp->iorun_list) != &iodummy) {
+ tsleep(&iodummy, 0, ident, 0);
+ }
+
+ /*
+ * Chain in case several placemarkers are present.
+ */
+ TAILQ_REMOVE(&hmp->iorun_list, &iodummy, iorun_entry);
+ io = TAILQ_FIRST(&hmp->iorun_list);
+ if (io && io->type == HAMMER_STRUCTURE_DUMMY)
+ wakeup(io);
crit_exit();
+
+ if (doflush)
+ hammer_io_flush_sync(hmp);
}
/*
*/
io->running = 1;
io->hmp->io_running_space += io->bytes;
+ TAILQ_INSERT_TAIL(&io->hmp->iorun_list, io, iorun_entry);
hammer_count_io_running_write += io->bytes;
bawrite(bp);
hammer_io_flush_mark(io->volume);
case HAMMER_STRUCTURE_DATA_BUFFER:
io->mod_list = &hmp->data_list;
break;
+ case HAMMER_STRUCTURE_DUMMY:
+ panic("hammer_io_disassociate: bad io type");
+ break;
}
TAILQ_INSERT_TAIL(io->mod_list, io, mod_entry);
}
hammer_io_complete(struct buf *bp)
{
union hammer_io_structure *iou = (void *)LIST_FIRST(&bp->b_dep);
+ struct hammer_io *ionext;
KKASSERT(iou->io.released == 1);
hammer_stats_disk_write += iou->io.bytes;
hammer_count_io_running_write -= iou->io.bytes;
iou->io.hmp->io_running_space -= iou->io.bytes;
- if (iou->io.hmp->io_running_space == 0)
- wakeup(&iou->io.hmp->io_running_space);
KKASSERT(iou->io.hmp->io_running_space >= 0);
iou->io.running = 0;
+
+ /*
+ * Remove from iorun list and wakeup any multi-io waiter(s).
+ */
+ if (TAILQ_FIRST(&iou->io.hmp->iorun_list) == &iou->io) {
+ ionext = TAILQ_NEXT(&iou->io, iorun_entry);
+ if (ionext && ionext->type == HAMMER_STRUCTURE_DUMMY)
+ wakeup(ionext);
+ }
+ TAILQ_REMOVE(&iou->io.hmp->iorun_list, &iou->io, iorun_entry);
} else {
hammer_stats_disk_read += iou->io.bytes;
}
KKASSERT(io->running == 0);
io->running = 1;
io->hmp->io_running_space += io->bytes;
+ TAILQ_INSERT_TAIL(&io->hmp->iorun_list, io, iorun_entry);
hammer_count_io_running_write += io->bytes;
return(0);
}
*/
RB_SCAN(hammer_buf_rb_tree, &hmp->rb_bufs_root, NULL,
hammer_recover_flush_buffer_callback, &final);
- hammer_io_wait_all(hmp, "hmrrcw");
+ hammer_io_wait_all(hmp, "hmrrcw", 1);
RB_SCAN(hammer_buf_rb_tree, &hmp->rb_bufs_root, NULL,
hammer_recover_flush_buffer_callback, &final);
* Finalize the root volume header.
*/
if (root_volume && root_volume->io.recovered && final > 0) {
- crit_enter();
- while (hmp->io_running_space > 0)
- tsleep(&hmp->io_running_space, 0, "hmrflx", 0);
- crit_exit();
+ hammer_io_wait_all(hmp, "hmrflx", 1);
root_volume->io.recovered = 0;
hammer_io_flush(&root_volume->io, 0);
hammer_rel_volume(root_volume, 0);
+ hammer_io_wait_all(hmp, "hmrfly", 1);
}
}
TAILQ_INIT(&hmp->data_list);
TAILQ_INIT(&hmp->meta_list);
TAILQ_INIT(&hmp->lose_list);
+ TAILQ_INIT(&hmp->iorun_list);
/*
* Load volumes