From 5177e93677f90befd71aafdd62b00f680c28d7ef Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Sat, 2 Aug 2008 21:24:28 +0000 Subject: [PATCH] HAMMER: MFC to 2.0 * Bug fix: fsync indefinite blocking * Bug fix: missed invalidation which can cause an assertion * Bug fix: kmalloc exhaustion panic on machines with > 2G of ram * Feature: Streaming mirroring --- sys/vfs/hammer/hammer.h | 14 +++- sys/vfs/hammer/hammer_blockmap.c | 8 +- sys/vfs/hammer/hammer_btree.c | 7 +- sys/vfs/hammer/hammer_btree.h | 2 +- sys/vfs/hammer/hammer_cursor.c | 2 +- sys/vfs/hammer/hammer_cursor.h | 2 +- sys/vfs/hammer/hammer_disk.h | 4 +- sys/vfs/hammer/hammer_flusher.c | 30 +++++++- sys/vfs/hammer/hammer_freemap.c | 2 +- sys/vfs/hammer/hammer_inode.c | 48 ++++++++---- sys/vfs/hammer/hammer_io.c | 115 +++++++++++++++++++--------- sys/vfs/hammer/hammer_ioctl.c | 8 +- sys/vfs/hammer/hammer_ioctl.h | 4 +- sys/vfs/hammer/hammer_mirror.c | 64 +++++++++------- sys/vfs/hammer/hammer_mount.h | 2 +- sys/vfs/hammer/hammer_object.c | 66 +++++++--------- sys/vfs/hammer/hammer_ondisk.c | 5 +- sys/vfs/hammer/hammer_pfs.c | 55 ++++++++++++- sys/vfs/hammer/hammer_prune.c | 2 +- sys/vfs/hammer/hammer_reblock.c | 2 +- sys/vfs/hammer/hammer_recover.c | 2 +- sys/vfs/hammer/hammer_signal.c | 2 +- sys/vfs/hammer/hammer_subs.c | 2 +- sys/vfs/hammer/hammer_transaction.c | 2 +- sys/vfs/hammer/hammer_undo.c | 2 +- sys/vfs/hammer/hammer_vfsops.c | 9 ++- sys/vfs/hammer/hammer_vnops.c | 3 +- 27 files changed, 310 insertions(+), 154 deletions(-) diff --git a/sys/vfs/hammer/hammer.h b/sys/vfs/hammer/hammer.h index 88299b33c1..4b18b6359a 100644 --- a/sys/vfs/hammer/hammer.h +++ b/sys/vfs/hammer/hammer.h @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.117.2.5 2008/07/30 07:53:01 mneumann Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.117.2.6 2008/08/02 21:24:27 dillon Exp $ */ /* * This header file contains structures used internally by the HAMMERFS @@ -66,6 +66,7 @@ #if defined(_KERNEL) || defined(_KERNEL_STRUCTURES) MALLOC_DECLARE(M_HAMMER); +MALLOC_DECLARE(M_HAMMER_INO); /* * Kernel trace @@ -322,7 +323,7 @@ typedef struct hammer_inode *hammer_inode_t; #define HAMMER_INODE_VHELD 0x0400 /* vnode held on sync */ #define HAMMER_INODE_DONDISK 0x0800 /* data records may be on disk */ #define HAMMER_INODE_BUFS 0x1000 /* dirty high level bps present */ -#define HAMMER_INODE_REFLUSH 0x2000 /* pipelined flush during flush */ +#define HAMMER_INODE_REFLUSH 0x2000 /* flush on dependancy / reflush */ #define HAMMER_INODE_RECLAIM 0x4000 /* trying to reclaim */ #define HAMMER_INODE_FLUSHW 0x8000 /* Someone waiting for flush */ @@ -397,6 +398,7 @@ struct hammer_record { struct hammer_btree_leaf_elm leaf; union hammer_data_ondisk *data; int flags; + hammer_off_t zone2_offset; /* direct-write only */ }; typedef struct hammer_record *hammer_record_t; @@ -415,6 +417,7 @@ typedef struct hammer_record *hammer_record_t; #define HAMMER_RECF_CONVERT_DELETE 0x0100 /* special case */ #define HAMMER_RECF_DIRECT_IO 0x0200 /* related direct I/O running*/ #define HAMMER_RECF_DIRECT_WAIT 0x0400 /* related direct I/O running*/ +#define HAMMER_RECF_DIRECT_INVAL 0x0800 /* buffer alias invalidation */ /* * hammer_delete_at_cursor() flags @@ -719,7 +722,9 @@ struct hammer_mount { int error; /* critical I/O error */ struct krate krate; /* rate limited kprintf */ hammer_tid_t asof; /* snapshot mount */ - hammer_off_t next_tid; + hammer_tid_t next_tid; + hammer_tid_t flush_tid1; /* flusher tid sequencing */ + hammer_tid_t flush_tid2; /* flusher tid sequencing */ int64_t copy_stat_freebigblocks; /* number of free bigblocks */ u_int32_t namekey_iterator; @@ -843,6 +848,7 @@ int hammer_install_volume(hammer_mount_t hmp, const char *volname, struct vnode *devvp); int hammer_mountcheck_volumes(hammer_mount_t hmp); +int hammer_mem_add(hammer_record_t record); int hammer_ip_lookup(hammer_cursor_t cursor); int hammer_ip_first(hammer_cursor_t cursor); int hammer_ip_next(hammer_cursor_t cursor); @@ -1139,6 +1145,8 @@ int hammer_ioc_downgrade_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, struct hammer_ioc_pseudofs_rw *pfs); int hammer_ioc_upgrade_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, struct hammer_ioc_pseudofs_rw *pfs); +int hammer_ioc_wait_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, + struct hammer_ioc_pseudofs_rw *pfs); int hammer_signal_check(hammer_mount_t hmp); diff --git a/sys/vfs/hammer/hammer_blockmap.c b/sys/vfs/hammer/hammer_blockmap.c index 06ffd5d018..e047081d13 100644 --- a/sys/vfs/hammer/hammer_blockmap.c +++ b/sys/vfs/hammer/hammer_blockmap.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_blockmap.c,v 1.24.2.2 2008/07/18 00:21:09 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_blockmap.c,v 1.24.2.3 2008/08/02 21:24:27 dillon Exp $ */ /* @@ -573,7 +573,11 @@ hammer_blockmap_reserve_complete(hammer_mount_t hmp, hammer_reserve_t resv) /* * If we are releasing a zone and all of its reservations * were undone we have to clean out all hammer and device - * buffers associated with the big block. + * buffers associated with the big block. We do this + * primarily because the large-block may be reallocated + * from non-large-data to large-data or vise-versa, resulting + * in a different mix of 16K and 64K buffer cache buffers. + * XXX - this isn't fun and needs to be redone. * * Any direct allocations will cause this test to fail * (bytes_freed will never reach append_off), which is diff --git a/sys/vfs/hammer/hammer_btree.c b/sys/vfs/hammer/hammer_btree.c index 2b847111df..98fe2544c0 100644 --- a/sys/vfs/hammer/hammer_btree.c +++ b/sys/vfs/hammer/hammer_btree.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_btree.c,v 1.71.2.3 2008/07/19 18:46:20 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_btree.c,v 1.71.2.4 2008/08/02 21:24:27 dillon Exp $ */ /* @@ -704,8 +704,11 @@ hammer_btree_extract(hammer_cursor_t cursor, int flags) KKASSERT(data_len >= 0 && data_len <= HAMMER_XBUFSIZE); cursor->data = hammer_bread_ext(hmp, data_off, data_len, &error, &cursor->data_buffer); - if (hammer_crc_test_leaf(cursor->data, &elm->leaf) == 0) + if (hammer_crc_test_leaf(cursor->data, &elm->leaf) == 0) { + kprintf("CRC DATA @ %016llx/%d FAILED\n", + elm->leaf.data_offset, elm->leaf.data_len); Debugger("CRC FAILED: DATA"); + } return(error); } diff --git a/sys/vfs/hammer/hammer_btree.h b/sys/vfs/hammer/hammer_btree.h index 43fccb998c..80bb84d786 100644 --- a/sys/vfs/hammer/hammer_btree.h +++ b/sys/vfs/hammer/hammer_btree.h @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_btree.h,v 1.24 2008/06/26 04:06:22 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_btree.h,v 1.24.2.1 2008/08/02 21:24:27 dillon Exp $ */ /* diff --git a/sys/vfs/hammer/hammer_cursor.c b/sys/vfs/hammer/hammer_cursor.c index 5458bf914f..180460772f 100644 --- a/sys/vfs/hammer/hammer_cursor.c +++ b/sys/vfs/hammer/hammer_cursor.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_cursor.c,v 1.41 2008/07/11 01:22:29 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_cursor.c,v 1.41.2.1 2008/08/02 21:24:27 dillon Exp $ */ /* diff --git a/sys/vfs/hammer/hammer_cursor.h b/sys/vfs/hammer/hammer_cursor.h index 66f7e3d3e1..4629401e07 100644 --- a/sys/vfs/hammer/hammer_cursor.h +++ b/sys/vfs/hammer/hammer_cursor.h @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_cursor.h,v 1.25 2008/07/10 04:44:33 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_cursor.h,v 1.25.2.1 2008/08/02 21:24:27 dillon Exp $ */ struct hammer_cmirror; diff --git a/sys/vfs/hammer/hammer_disk.h b/sys/vfs/hammer/hammer_disk.h index 62a1b9cf44..9a5e4f5270 100644 --- a/sys/vfs/hammer/hammer_disk.h +++ b/sys/vfs/hammer/hammer_disk.h @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_disk.h,v 1.50.2.1 2008/07/19 18:46:20 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_disk.h,v 1.50.2.2 2008/08/02 21:24:28 dillon Exp $ */ #ifndef VFS_HAMMER_DISK_H_ @@ -498,7 +498,7 @@ struct hammer_volume_ondisk { int64_t vol0_stat_inodes; /* for statfs only */ int64_t vol0_stat_records; /* total records in filesystem */ hammer_off_t vol0_btree_root; /* B-Tree root */ - hammer_tid_t vol0_next_tid; /* highest synchronized TID */ + hammer_tid_t vol0_next_tid; /* highest partially synchronized TID */ hammer_off_t vol0_unused03; /* diff --git a/sys/vfs/hammer/hammer_flusher.c b/sys/vfs/hammer/hammer_flusher.c index 83632266c0..633ee7e316 100644 --- a/sys/vfs/hammer/hammer_flusher.c +++ b/sys/vfs/hammer/hammer_flusher.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_flusher.c,v 1.40.2.4 2008/07/19 04:51:09 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_flusher.c,v 1.40.2.5 2008/08/02 21:24:28 dillon Exp $ */ /* * HAMMER dependancy flusher thread @@ -231,7 +231,13 @@ hammer_flusher_master_thread(void *arg) break; while (hmp->flusher.signal == 0) tsleep(&hmp->flusher.signal, 0, "hmrwwa", 0); - hmp->flusher.signal = 0; + + /* + * Flush for each count on signal but only allow one extra + * flush request to build up. + */ + if (--hmp->flusher.signal != 0) + hmp->flusher.signal = 1; } /* @@ -665,6 +671,13 @@ hammer_flusher_finalize(hammer_transaction_t trans, int final) hammer_modify_volume_done(root_volume); } + /* + * vol0_next_tid is used for TID selection and is updated without + * an UNDO so we do not reuse a TID that may have been rolled-back. + * + * vol0_last_tid is the highest fully-synchronized TID. It is + * set-up when the UNDO fifo is fully synced, later on (not here). + */ if (root_volume->io.modified) { hammer_modify_volume(NULL, root_volume, NULL, 0); if (root_volume->ondisk->vol0_next_tid < trans->tid) @@ -722,6 +735,18 @@ hammer_flusher_finalize(hammer_transaction_t trans, int final) hmp->hflags |= HMNT_UNDO_DIRTY; } hammer_clear_undo_history(hmp); + + /* + * Flush tid sequencing. flush_tid1 is fully synchronized, + * meaning a crash will not roll it back. flush_tid2 has + * been written out asynchronously and a crash will roll + * it back. flush_tid1 is used for all mirroring masters. + */ + if (hmp->flush_tid1 != hmp->flush_tid2) { + hmp->flush_tid1 = hmp->flush_tid2; + wakeup(&hmp->flush_tid1); + } + hmp->flush_tid2 = trans->tid; } /* @@ -738,6 +763,7 @@ failed: done: hammer_unlock(&hmp->flusher.finalize_lock); + if (--hmp->flusher.finalize_want == 0) wakeup(&hmp->flusher.finalize_want); hammer_stats_commits += final; diff --git a/sys/vfs/hammer/hammer_freemap.c b/sys/vfs/hammer/hammer_freemap.c index a971e470c0..ad40687346 100644 --- a/sys/vfs/hammer/hammer_freemap.c +++ b/sys/vfs/hammer/hammer_freemap.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_freemap.c,v 1.18 2008/06/20 05:38:26 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_freemap.c,v 1.18.2.1 2008/08/02 21:24:28 dillon Exp $ */ /* diff --git a/sys/vfs/hammer/hammer_inode.c b/sys/vfs/hammer/hammer_inode.c index 8fa958b1a0..69b2efcf52 100644 --- a/sys/vfs/hammer/hammer_inode.c +++ b/sys/vfs/hammer/hammer_inode.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.103.2.2 2008/07/18 00:21:09 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.103.2.3 2008/08/02 21:24:28 dillon Exp $ */ #include "hammer.h" @@ -378,7 +378,7 @@ loop: /* * Allocate a new inode structure and deal with races later. */ - ip = kmalloc(sizeof(*ip), M_HAMMER, M_WAITOK|M_ZERO); + ip = kmalloc(sizeof(*ip), M_HAMMER_INO, M_WAITOK|M_ZERO); ++hammer_count_inodes; ++hmp->count_inodes; ip->obj_id = obj_id; @@ -511,7 +511,7 @@ hammer_create_inode(hammer_transaction_t trans, struct vattr *vap, hmp = trans->hmp; - ip = kmalloc(sizeof(*ip), M_HAMMER, M_WAITOK|M_ZERO); + ip = kmalloc(sizeof(*ip), M_HAMMER_INO, M_WAITOK|M_ZERO); ++hammer_count_inodes; ++hmp->count_inodes; @@ -672,7 +672,7 @@ hammer_free_inode(hammer_inode_t ip) hammer_rel_pseudofs(ip->hmp, ip->pfsm); ip->pfsm = NULL; } - kfree(ip, M_HAMMER); + kfree(ip, M_HAMMER_INO); ip = NULL; } @@ -1360,8 +1360,9 @@ hammer_modify_inode(hammer_inode_t ip, int flags) * place the inode in a flushing state if it is currently idle and flag it * to reflush if it is currently flushing. * - * If the HAMMER_FLUSH_SYNCHRONOUS flag is specified we will attempt to - * flush the indoe synchronously using the caller's context. + * Upon return if the inode could not be flushed due to a setup + * dependancy, then it will be automatically flushed when the dependancy + * is satisfied. */ void hammer_flush_inode(hammer_inode_t ip, int flags) @@ -1440,10 +1441,14 @@ hammer_flush_inode(hammer_inode_t ip, int flags) hammer_flush_inode_core(ip, flg, flags); } else { /* - * parent has no connectivity, tell it to flush + * Parent has no connectivity, tell it to flush * us as soon as it does. + * + * The REFLUSH flag is also needed to trigger + * dependancy wakeups. */ - ip->flags |= HAMMER_INODE_CONN_DOWN; + ip->flags |= HAMMER_INODE_CONN_DOWN | + HAMMER_INODE_REFLUSH; if (flags & HAMMER_FLUSH_SIGNAL) { ip->flags |= HAMMER_INODE_RESIGNAL; hammer_flusher_async(ip->hmp, flg); @@ -1454,6 +1459,9 @@ hammer_flush_inode(hammer_inode_t ip, int flags) /* * We are already flushing, flag the inode to reflush * if needed after it completes its current flush. + * + * The REFLUSH flag is also needed to trigger + * dependancy wakeups. */ if ((ip->flags & HAMMER_INODE_REFLUSH) == 0) ip->flags |= HAMMER_INODE_REFLUSH; @@ -1706,17 +1714,22 @@ hammer_flush_inode_core(hammer_inode_t ip, hammer_flush_group_t flg, int flags) */ if (go_count == 0) { if ((ip->flags & HAMMER_INODE_MODMASK_NOXDIRTY) == 0) { - ip->flags |= HAMMER_INODE_REFLUSH; - --ip->hmp->count_iqueued; --hammer_count_iqueued; + --flg->total_count; ip->flush_state = HAMMER_FST_SETUP; ip->flush_group = NULL; if (ip->flags & HAMMER_INODE_VHELD) { ip->flags &= ~HAMMER_INODE_VHELD; vrele(ip->vp); } + + /* + * REFLUSH is needed to trigger dependancy wakeups + * when an inode is in SETUP. + */ + ip->flags |= HAMMER_INODE_REFLUSH; if (flags & HAMMER_FLUSH_SIGNAL) { ip->flags |= HAMMER_INODE_RESIGNAL; hammer_flusher_async(ip->hmp, flg); @@ -1909,8 +1922,8 @@ hammer_setup_child_callback(hammer_record_t rec, void *data) * flush groups before it can be completely * flushed. */ - ip->flags |= HAMMER_INODE_REFLUSH; - ip->flags |= HAMMER_INODE_RESIGNAL; + ip->flags |= HAMMER_INODE_RESIGNAL | + HAMMER_INODE_REFLUSH; r = -1; } else if (rec->type == HAMMER_MEM_RECORD_ADD) { /* @@ -1994,13 +2007,14 @@ hammer_wait_inode(hammer_inode_t ip) flg = NULL; if ((ip->hmp->flags & HAMMER_MOUNT_CRITICAL_ERROR) == 0) { - if (ip->flush_state == HAMMER_FST_SETUP) { - hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL); - } while (ip->flush_state != HAMMER_FST_IDLE && (ip->hmp->flags & HAMMER_MOUNT_CRITICAL_ERROR) == 0) { - ip->flags |= HAMMER_INODE_FLUSHW; - tsleep(&ip->flags, 0, "hmrwin", 0); + if (ip->flush_state == HAMMER_FST_SETUP) + hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL); + if (ip->flush_state != HAMMER_FST_IDLE) { + ip->flags |= HAMMER_INODE_FLUSHW; + tsleep(&ip->flags, 0, "hmrwin", 0); + } } } } diff --git a/sys/vfs/hammer/hammer_io.c b/sys/vfs/hammer/hammer_io.c index 04ac62475b..6ca02b9440 100644 --- a/sys/vfs/hammer/hammer_io.c +++ b/sys/vfs/hammer/hammer_io.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_io.c,v 1.49.2.2 2008/07/18 00:21:09 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_io.c,v 1.49.2.3 2008/08/02 21:24:28 dillon Exp $ */ /* * IO Primitives and buffer cache management @@ -248,7 +248,11 @@ hammer_io_new(struct vnode *devvp, struct hammer_io *io) /* * Remove potential device level aliases against buffers managed by high level - * vnodes. + * vnodes. Aliases can also be created due to mixed buffer sizes. + * + * This is nasty because the buffers are also VMIO-backed. Even if a buffer + * does not exist its backing VM pages might, and we have to invalidate + * those as well or a getblk() will reinstate them. */ void hammer_io_inval(hammer_volume_t volume, hammer_off_t zone2_offset) @@ -260,20 +264,21 @@ hammer_io_inval(hammer_volume_t volume, hammer_off_t zone2_offset) phys_offset = volume->ondisk->vol_buf_beg + (zone2_offset & HAMMER_OFF_SHORT_MASK); crit_enter(); - if ((bp = findblk(volume->devvp, phys_offset)) != NULL) { + if ((bp = findblk(volume->devvp, phys_offset)) != NULL) bp = getblk(volume->devvp, phys_offset, bp->b_bufsize, 0, 0); - if ((iou = (void *)LIST_FIRST(&bp->b_dep)) != NULL) { - hammer_io_clear_modify(&iou->io, 1); - bundirty(bp); - iou->io.reclaim = 1; - hammer_io_deallocate(bp); - } else { - KKASSERT((bp->b_flags & B_LOCKED) == 0); - bundirty(bp); - bp->b_flags |= B_NOCACHE|B_RELBUF; - } - brelse(bp); + else + bp = getblk(volume->devvp, phys_offset, HAMMER_BUFSIZE, 0, 0); + if ((iou = (void *)LIST_FIRST(&bp->b_dep)) != NULL) { + hammer_io_clear_modify(&iou->io, 1); + bundirty(bp); + iou->io.reclaim = 1; + hammer_io_deallocate(bp); + } else { + KKASSERT((bp->b_flags & B_LOCKED) == 0); + bundirty(bp); + bp->b_flags |= B_NOCACHE|B_RELBUF; } + brelse(bp); crit_exit(); } @@ -995,9 +1000,6 @@ struct bio_ops hammer_bioops = { * disk media. The bio may be issued asynchronously. If leaf is non-NULL * we validate the CRC. * - * A second-level bio already resolved to a zone-2 offset (typically by - * the BMAP code, or by a previous hammer_io_direct_write()), is passed. - * * We must check for the presence of a HAMMER buffer to handle the case * where the reblocker has rewritten the data (which it does via the HAMMER * buffer system, not via the high-level vnode buffer cache), but not yet @@ -1048,11 +1050,12 @@ hammer_io_direct_read(hammer_mount_t hmp, struct bio *bio, error = EIO; if (error == 0) { - zone2_offset &= HAMMER_OFF_SHORT_MASK; - + /* + * 3rd level bio + */ nbio = push_bio(bio); nbio->bio_offset = volume->ondisk->vol_buf_beg + - zone2_offset; + (zone2_offset & HAMMER_OFF_SHORT_MASK); #if 0 /* * XXX disabled - our CRC check doesn't work if the OS @@ -1110,7 +1113,7 @@ hammer_io_direct_read_complete(struct bio *nbio) * disk media. The bio may be issued asynchronously. * * The BIO is associated with the specified record and RECF_DIRECT_IO - * is set. + * is set. The recorded is added to its object. */ int hammer_io_direct_write(hammer_mount_t hmp, hammer_record_t record, @@ -1148,8 +1151,10 @@ hammer_io_direct_write(hammer_mount_t hmp, hammer_record_t record, if (error == 0) { bp = bio->bio_buf; KKASSERT((bp->b_bufsize & HAMMER_BUFMASK) == 0); + /* hammer_del_buffers(hmp, buf_offset, zone2_offset, bp->b_bufsize); + */ /* * Second level bio - cached zone2 offset. @@ -1161,7 +1166,9 @@ hammer_io_direct_write(hammer_mount_t hmp, hammer_record_t record, nbio->bio_offset = zone2_offset; nbio->bio_done = hammer_io_direct_write_complete; nbio->bio_caller_info1.ptr = record; - record->flags |= HAMMER_RECF_DIRECT_IO; + record->zone2_offset = zone2_offset; + record->flags |= HAMMER_RECF_DIRECT_IO | + HAMMER_RECF_DIRECT_INVAL; /* * Third level bio - raw offset specific to the @@ -1195,7 +1202,17 @@ hammer_io_direct_write(hammer_mount_t hmp, hammer_record_t record, biodone(bio); } } - if (error) { + if (error == 0) { + /* + * The record is all setup now, add it. Potential conflics + * have already been dealt with. + */ + error = hammer_mem_add(record); + KKASSERT(error == 0); + } else { + /* + * Major suckage occured. + */ kprintf("hammer_direct_write: failed @ %016llx\n", leaf->data_offset); bp = bio->bio_buf; @@ -1203,6 +1220,8 @@ hammer_io_direct_write(hammer_mount_t hmp, hammer_record_t record, bp->b_error = EIO; bp->b_flags |= B_ERROR; biodone(bio); + record->flags |= HAMMER_RECF_DELETED_FE; + hammer_rel_mem_record(record); } return(error); } @@ -1220,17 +1239,21 @@ void hammer_io_direct_write_complete(struct bio *nbio) { struct bio *obio; + struct buf *bp; hammer_record_t record = nbio->bio_caller_info1.ptr; + bp = nbio->bio_buf; obio = pop_bio(nbio); - if (obio->bio_buf->b_flags & B_ERROR) { + if (bp->b_flags & B_ERROR) { hammer_critical_error(record->ip->hmp, record->ip, - obio->bio_buf->b_error, + bp->b_error, "while writing bulk data"); - obio->bio_buf->b_flags |= B_INVAL; + bp->b_flags |= B_INVAL; } biodone(obio); - KKASSERT(record != NULL && (record->flags & HAMMER_RECF_DIRECT_IO)); + + KKASSERT(record != NULL); + KKASSERT(record->flags & HAMMER_RECF_DIRECT_IO); record->flags &= ~HAMMER_RECF_DIRECT_IO; if (record->flags & HAMMER_RECF_DIRECT_WAIT) { record->flags &= ~HAMMER_RECF_DIRECT_WAIT; @@ -1241,22 +1264,40 @@ hammer_io_direct_write_complete(struct bio *nbio) /* * This is called before a record is either committed to the B-Tree - * or destroyed, to resolve any associated direct-IO. We must - * ensure that the data is available on-media to other consumers - * such as the reblocker or mirroring code. + * or destroyed, to resolve any associated direct-IO. * - * Note that other consumers might access the data via the block - * device's buffer cache and not the high level vnode's buffer cache. + * (1) We must wait for any direct-IO related to the record to complete. + * + * (2) We must remove any buffer cache aliases for data accessed via + * leaf->data_offset or zone2_offset so non-direct-IO consumers + * (the mirroring and reblocking code) do not see stale data. */ void hammer_io_direct_wait(hammer_record_t record) { - crit_enter(); - while (record->flags & HAMMER_RECF_DIRECT_IO) { - record->flags |= HAMMER_RECF_DIRECT_WAIT; - tsleep(&record->flags, 0, "hmdiow", 0); + /* + * Wait for I/O to complete + */ + if (record->flags & HAMMER_RECF_DIRECT_IO) { + crit_enter(); + while (record->flags & HAMMER_RECF_DIRECT_IO) { + record->flags |= HAMMER_RECF_DIRECT_WAIT; + tsleep(&record->flags, 0, "hmdiow", 0); + } + crit_exit(); + } + + /* + * Invalidate any related buffer cache aliases. + */ + if (record->flags & HAMMER_RECF_DIRECT_INVAL) { + KKASSERT(record->leaf.data_offset); + hammer_del_buffers(record->ip->hmp, + record->leaf.data_offset, + record->zone2_offset, + record->leaf.data_len); + record->flags &= ~HAMMER_RECF_DIRECT_INVAL; } - crit_exit(); } /* diff --git a/sys/vfs/hammer/hammer_ioctl.c b/sys/vfs/hammer/hammer_ioctl.c index 17da8d183b..f8ed40c021 100644 --- a/sys/vfs/hammer/hammer_ioctl.c +++ b/sys/vfs/hammer/hammer_ioctl.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_ioctl.c,v 1.28.2.1 2008/07/16 18:39:31 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_ioctl.c,v 1.28.2.2 2008/08/02 21:24:28 dillon Exp $ */ #include "hammer.h" @@ -101,6 +101,12 @@ hammer_ioctl(hammer_inode_t ip, u_long com, caddr_t data, int fflag, (struct hammer_ioc_pseudofs_rw *)data); } break; + case HAMMERIOC_WAI_PSEUDOFS: + if (error == 0) { + error = hammer_ioc_wait_pseudofs(&trans, ip, + (struct hammer_ioc_pseudofs_rw *)data); + } + break; case HAMMERIOC_MIRROR_READ: if (error == 0) { error = hammer_ioc_mirror_read(&trans, ip, diff --git a/sys/vfs/hammer/hammer_ioctl.h b/sys/vfs/hammer/hammer_ioctl.h index 2c31b8ece4..29330dfd6f 100644 --- a/sys/vfs/hammer/hammer_ioctl.h +++ b/sys/vfs/hammer/hammer_ioctl.h @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_ioctl.h,v 1.21 2008/07/12 23:04:50 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_ioctl.h,v 1.21.2.1 2008/08/02 21:24:28 dillon Exp $ */ /* * HAMMER ioctl's. This file can be #included from userland @@ -300,6 +300,7 @@ typedef union hammer_ioc_mrecord_any *hammer_ioc_mrecord_any_t; #define HAMMER_MREC_TYPE_SKIP 5 /* skip-range */ #define HAMMER_MREC_TYPE_PASS 6 /* record for cmp only (pass) */ #define HAMMER_MREC_TYPE_TERM 7 /* (userland only) */ +#define HAMMER_MREC_TYPE_IDLE 8 /* (userland only) */ #define HAMMER_MREC_CRCOFF (offsetof(struct hammer_ioc_mrecord_head, rec_size)) #define HAMMER_MREC_HEADSIZE sizeof(struct hammer_ioc_mrecord_head) @@ -322,6 +323,7 @@ typedef union hammer_ioc_mrecord_any *hammer_ioc_mrecord_any_t; #define HAMMERIOC_UPG_PSEUDOFS _IOWR('h',9,struct hammer_ioc_pseudofs_rw) #define HAMMERIOC_DGD_PSEUDOFS _IOWR('h',10,struct hammer_ioc_pseudofs_rw) #define HAMMERIOC_RMR_PSEUDOFS _IOWR('h',11,struct hammer_ioc_pseudofs_rw) +#define HAMMERIOC_WAI_PSEUDOFS _IOWR('h',12,struct hammer_ioc_pseudofs_rw) #endif diff --git a/sys/vfs/hammer/hammer_mirror.c b/sys/vfs/hammer/hammer_mirror.c index 34c63c317e..6da1c01434 100644 --- a/sys/vfs/hammer/hammer_mirror.c +++ b/sys/vfs/hammer/hammer_mirror.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_mirror.c,v 1.15 2008/07/13 01:12:41 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_mirror.c,v 1.15.2.1 2008/08/02 21:24:28 dillon Exp $ */ /* * HAMMER mirroring ioctls - serialize and deserialize modifications made @@ -186,10 +186,21 @@ retry: elm = &cursor.node->ondisk->elms[cursor.index].leaf; mirror->key_cur = elm->base; - if ((elm->base.create_tid < mirror->tid_beg || - elm->base.create_tid > mirror->tid_end) && - (elm->base.delete_tid < mirror->tid_beg || - elm->base.delete_tid > mirror->tid_end)) { + /* + * Determine if we should generate a PASS or a REC. PASS + * records are records without any data payload. Such + * records will be generated if the target is already expected + * to have the record, allowing it to delete the gaps. + * + * A PASS record is also used to perform deletions on the + * target. + * + * Such deletions are needed if the master or files on the + * master are no-history, or if the slave is so far behind + * the master has already been pruned. + */ + if (elm->base.create_tid < mirror->tid_beg || + elm->base.create_tid > mirror->tid_end) { bytes = sizeof(mrec.rec); if (mirror->count + HAMMER_HEAD_DOALIGN(bytes) > mirror->size) { @@ -197,15 +208,7 @@ retry: } /* - * Fill mrec. PASS records are records which are - * outside the TID range needed for the mirror - * update. They are sent without any data payload - * because the mirroring target must still compare - * records that fall outside the SKIP ranges to - * determine what might need to be deleted. Such - * deletions are needed if the master or files on - * the master are no-history, or if the slave is - * so far behind the master has already been pruned. + * Fill mrec. */ mrec.head.signature = HAMMER_IOC_MIRROR_SIGNATURE; mrec.head.type = HAMMER_MREC_TYPE_PASS; @@ -246,7 +249,7 @@ retry: mrec.head.type = HAMMER_MREC_TYPE_REC; mrec.head.rec_size = bytes; mrec.rec.leaf = *elm; - if (elm->base.delete_tid >= mirror->tid_end) + if (elm->base.delete_tid > mirror->tid_end) mrec.rec.leaf.base.delete_tid = 0; rec_crc = crc32(&mrec.head.rec_size, sizeof(mrec.rec) - crc_start); @@ -561,10 +564,12 @@ hammer_ioc_mirror_write_rec(hammer_cursor_t cursor, * * If the record exists only the delete_tid may be updated. * - * If the record does not exist we create it. For now we - * ignore records with a non-zero delete_tid. Note that - * mirror operations are effective an as-of operation and - * delete_tid can be 0 for mirroring purposes even if it is + * If the record does not exist we can create it only if the + * create_tid is not too old. If the create_tid is too old + * it may have already been destroyed on the slave from pruning. + * + * Note that mirror operations are effectively as-of operations + * and delete_tid can be 0 for mirroring purposes even if it is * not actually 0 at the originator. * * These functions can return EDEADLK @@ -576,10 +581,11 @@ hammer_ioc_mirror_write_rec(hammer_cursor_t cursor, if (error == 0 && hammer_mirror_check(cursor, mrec)) { error = hammer_mirror_update(cursor, mrec); - } else if (error == ENOENT && mrec->leaf.base.delete_tid == 0) { - error = hammer_mirror_write(cursor, mrec, uptr); } else if (error == ENOENT) { - error = 0; + if (mrec->leaf.base.create_tid >= mirror->tid_beg) + error = hammer_mirror_write(cursor, mrec, uptr); + else + error = 0; } if (error == 0 || error == EALREADY) mirror->key_cur = mrec->leaf.base; @@ -630,7 +636,9 @@ hammer_ioc_mirror_write_pass(hammer_cursor_t cursor, error = hammer_mirror_delete_to(cursor, mirror); /* - * Locate the record and get past it by setting ATEDISK. + * Locate the record and get past it by setting ATEDISK. Perform + * any necessary deletions. We have no data payload and cannot + * create a new record. */ if (error == 0) { mirror->key_cur = mrec->leaf.base; @@ -638,10 +646,13 @@ hammer_ioc_mirror_write_pass(hammer_cursor_t cursor, cursor->flags |= HAMMER_CURSOR_BACKEND; cursor->flags &= ~HAMMER_CURSOR_INSERT; error = hammer_btree_lookup(cursor); - if (error == 0) + if (error == 0) { + if (hammer_mirror_check(cursor, mrec)) + error = hammer_mirror_update(cursor, mrec); cursor->flags |= HAMMER_CURSOR_ATEDISK; - else + } else { cursor->flags &= ~HAMMER_CURSOR_ATEDISK; + } if (error == ENOENT) error = 0; } @@ -668,14 +679,13 @@ hammer_mirror_delete_to(hammer_cursor_t cursor, while (error == 0) { elm = &cursor->node->ondisk->elms[cursor->index].leaf; KKASSERT(elm->base.btype == HAMMER_BTREE_TYPE_RECORD); + cursor->flags |= HAMMER_CURSOR_ATEDISK; if (elm->base.delete_tid == 0) { error = hammer_delete_at_cursor(cursor, HAMMER_DELETE_ADJUST, mirror->tid_end, time_second, 1, NULL); - if (error == 0) - cursor->flags |= HAMMER_CURSOR_ATEDISK; } if (error == 0) error = hammer_btree_iterate(cursor); diff --git a/sys/vfs/hammer/hammer_mount.h b/sys/vfs/hammer/hammer_mount.h index 24732e1238..1d03143303 100644 --- a/sys/vfs/hammer/hammer_mount.h +++ b/sys/vfs/hammer/hammer_mount.h @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_mount.h,v 1.9.2.1 2008/07/19 18:46:20 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_mount.h,v 1.9.2.2 2008/08/02 21:24:28 dillon Exp $ */ #ifndef _SYS_TYPES_H_ diff --git a/sys/vfs/hammer/hammer_object.c b/sys/vfs/hammer/hammer_object.c index 26b43e72da..144bb29ff3 100644 --- a/sys/vfs/hammer/hammer_object.c +++ b/sys/vfs/hammer/hammer_object.c @@ -31,12 +31,11 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_object.c,v 1.90.2.2 2008/07/19 04:51:09 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_object.c,v 1.90.2.3 2008/08/02 21:24:28 dillon Exp $ */ #include "hammer.h" -static int hammer_mem_add(hammer_record_t record); static int hammer_mem_lookup(hammer_cursor_t cursor); static int hammer_mem_first(hammer_cursor_t cursor); static int hammer_frontend_trunc_callback(hammer_record_t record, @@ -399,10 +398,13 @@ hammer_rel_mem_record(struct hammer_record *record) /* * We must wait for any direct-IO to complete before - * we can destroy the record. + * we can destroy the record because the bio may + * have a reference to it. */ - if (record->flags & HAMMER_RECF_DIRECT_IO) + if (record->flags & + (HAMMER_RECF_DIRECT_IO | HAMMER_RECF_DIRECT_INVAL)) { hammer_io_direct_wait(record); + } /* @@ -668,10 +670,16 @@ hammer_ip_add_directory(struct hammer_transaction *trans, /* * The inode now has a dependancy and must be taken out of the idle * state. An inode not in an idle state is given an extra reference. + * + * When transitioning to a SETUP state flag for an automatic reflush + * when the dependancies are disposed of if someone is waiting on + * the inode. */ if (ip->flush_state == HAMMER_FST_IDLE) { hammer_ref(&ip->lock); ip->flush_state = HAMMER_FST_SETUP; + if (ip->flags & HAMMER_INODE_FLUSHW) + ip->flags |= HAMMER_INODE_REFLUSH; } error = hammer_mem_add(record); if (error == 0) { @@ -742,10 +750,16 @@ hammer_ip_del_directory(struct hammer_transaction *trans, * The inode now has a dependancy and must be taken out of * the idle state. An inode not in an idle state is given * an extra reference. + * + * When transitioning to a SETUP state flag for an automatic + * reflush when the dependancies are disposed of if someone + * is waiting on the inode. */ if (ip->flush_state == HAMMER_FST_IDLE) { hammer_ref(&ip->lock); ip->flush_state = HAMMER_FST_SETUP; + if (ip->flags & HAMMER_INODE_FLUSHW) + ip->flags |= HAMMER_INODE_REFLUSH; } error = hammer_mem_add(record); @@ -843,6 +857,8 @@ hammer_ip_get_bulk(hammer_inode_t ip, off_t file_offset, int bytes) * flush a buffer cache buffer. The frontend has locked the related buffer * cache buffers and we should be able to manipulate any overlapping * in-memory records. + * + * The caller is responsible for adding the returned record. */ hammer_record_t hammer_ip_add_bulk(hammer_inode_t ip, off_t file_offset, void *data, int bytes, @@ -851,7 +867,6 @@ hammer_ip_add_bulk(hammer_inode_t ip, off_t file_offset, void *data, int bytes, hammer_record_t record; hammer_record_t conflict; int zone; - int flags; /* * Deal with conflicting in-memory records. We cannot have multiple @@ -903,30 +918,8 @@ hammer_ip_add_bulk(hammer_inode_t ip, off_t file_offset, void *data, int bytes, HAMMER_LOCALIZE_MISC; record->leaf.data_len = bytes; hammer_crc_set_leaf(data, &record->leaf); - flags = record->flags; - - hammer_ref(&record->lock); /* mem_add eats a reference */ - *errorp = hammer_mem_add(record); - if (*errorp) { - conflict = hammer_ip_get_bulk(ip, file_offset, bytes); - kprintf("hammer_ip_add_bulk: error %d conflict %p file_offset %lld bytes %d\n", - *errorp, conflict, file_offset, bytes); - if (conflict) - kprintf("conflict %lld %d\n", conflict->leaf.base.key, conflict->leaf.data_len); - if (conflict) - hammer_rel_mem_record(conflict); - } KKASSERT(*errorp == 0); - conflict = hammer_ip_get_bulk(ip, file_offset, bytes); - if (conflict != record) { - kprintf("conflict mismatch %p %p %08x\n", conflict, record, record->flags); - if (conflict) - kprintf("conflict mismatch %lld/%d %lld/%d\n", conflict->leaf.base.key, conflict->leaf.data_len, record->leaf.base.key, record->leaf.data_len); - } - KKASSERT(conflict == record); - hammer_rel_mem_record(conflict); - - return (record); + return(record); } /* @@ -1017,6 +1010,13 @@ hammer_ip_sync_record_cursor(hammer_cursor_t cursor, hammer_record_t record) KKASSERT(record->flags & HAMMER_RECF_INTERLOCK_BE); KKASSERT(record->leaf.base.localization != 0); + /* + * Any direct-write related to the record must complete before we + * can sync the record to the on-disk media. + */ + if (record->flags & (HAMMER_RECF_DIRECT_IO | HAMMER_RECF_DIRECT_INVAL)) + hammer_io_direct_wait(record); + /* * If this is a bulk-data record placemarker there may be an existing * record on-disk, indicating a data overwrite. If there is the @@ -1164,13 +1164,6 @@ hammer_ip_sync_record_cursor(hammer_cursor_t cursor, hammer_record_t record) record->leaf.data_crc = 0; } - /* - * If the record's data was direct-written we cannot insert - * it until the direct-IO has completed. - */ - if (record->flags & HAMMER_RECF_DIRECT_IO) - hammer_io_direct_wait(record); - error = hammer_btree_insert(cursor, &record->leaf, &doprop); if (hammer_debug_inode && error) kprintf("BTREE INSERT error %d @ %016llx:%d key %016llx\n", error, cursor->node->node_offset, cursor->index, record->leaf.base.key); @@ -1224,7 +1217,6 @@ done: * A copy of the temporary record->data pointer provided by the caller * will be made. */ -static int hammer_mem_add(hammer_record_t record) { @@ -1925,7 +1917,6 @@ hammer_ip_delete_record(hammer_cursor_t cursor, hammer_inode_t ip, hammer_tid_t tid) { hammer_record_t iprec; - hammer_btree_elm_t elm; hammer_mount_t hmp; int error; @@ -1960,7 +1951,6 @@ hammer_ip_delete_record(hammer_cursor_t cursor, hammer_inode_t ip, * hammer_delete_at_cursor() not to. */ error = hammer_btree_extract(cursor, HAMMER_CURSOR_GET_LEAF); - elm = NULL; if (error == 0) { error = hammer_delete_at_cursor( diff --git a/sys/vfs/hammer/hammer_ondisk.c b/sys/vfs/hammer/hammer_ondisk.c index b1a1fc9a03..de20a32401 100644 --- a/sys/vfs/hammer/hammer_ondisk.c +++ b/sys/vfs/hammer/hammer_ondisk.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.69.2.3 2008/07/30 07:53:01 mneumann Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.69.2.4 2008/08/02 21:24:28 dillon Exp $ */ /* * Manage HAMMER's on-disk structures. These routines are primarily @@ -526,7 +526,7 @@ again: hammer_ref(&buffer->io.lock); /* - * Onced refed the ondisk field will not be cleared by + * Once refed the ondisk field will not be cleared by * any other action. */ if (buffer->ondisk && buffer->io.loading == 0) { @@ -1478,6 +1478,7 @@ hammer_sync_hmp(hammer_mount_t hmp, int waitfor) hammer_flusher_sync(hmp); } else { hammer_flusher_async(hmp, NULL); + hammer_flusher_async(hmp, NULL); } return(info.error); } diff --git a/sys/vfs/hammer/hammer_pfs.c b/sys/vfs/hammer/hammer_pfs.c index 920c507d67..e5ba329d77 100644 --- a/sys/vfs/hammer/hammer_pfs.c +++ b/sys/vfs/hammer/hammer_pfs.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_pfs.c,v 1.1.2.3 2008/07/19 18:46:20 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_pfs.c,v 1.1.2.4 2008/08/02 21:24:28 dillon Exp $ */ /* * HAMMER PFS ioctls - Manage pseudo-fs configurations @@ -76,9 +76,13 @@ hammer_ioc_get_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, * If the PFS is a master the sync tid is set by normal operation * rather then the mirroring code, and will always track the * real HAMMER filesystem. + * + * We use flush_tid1, which is the highest fully committed TID. + * flush_tid2 is the TID most recently flushed, but the UNDO hasn't + * caught up to it yet so a crash will roll us back to flush_tid1. */ if ((pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) == 0) - pfsm->pfsd.sync_end_tid = trans->rootvol->ondisk->vol0_next_tid; + pfsm->pfsd.sync_end_tid = trans->hmp->flush_tid1; /* * Copy out to userland. @@ -126,6 +130,11 @@ hammer_ioc_set_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, error = hammer_mkroot_pseudofs(trans, cred, pfsm); if (error == 0) error = hammer_save_pseudofs(trans, pfsm); + + /* + * Wakeup anyone waiting for a TID update for this PFS + */ + wakeup(&pfsm->pfsd.sync_end_tid); hammer_rel_pseudofs(trans->hmp, pfsm); } return(error); @@ -255,6 +264,48 @@ hammer_ioc_destroy_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, return(error); } +/* + * Wait for the PFS to sync past the specified TID + */ +int +hammer_ioc_wait_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, + struct hammer_ioc_pseudofs_rw *pfs) +{ + hammer_pseudofs_inmem_t pfsm; + struct hammer_pseudofs_data pfsd; + u_int32_t localization; + hammer_tid_t tid; + void *waitp; + int error; + + if ((error = hammer_pfs_autodetect(pfs, ip)) != 0) + return(error); + localization = (u_int32_t)pfs->pfs_id << 16; + + if ((error = copyin(pfs->ondisk, &pfsd, sizeof(pfsd))) != 0) + return(error); + + pfsm = hammer_load_pseudofs(trans, localization, &error); + if (error == 0) { + if (pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) { + tid = pfsm->pfsd.sync_end_tid; + waitp = &pfsm->pfsd.sync_end_tid; + } else { + tid = trans->hmp->flush_tid1; + waitp = &trans->hmp->flush_tid1; + } + if (tid <= pfsd.sync_end_tid) + tsleep(waitp, PCATCH, "hmrmwt", 0); + } + hammer_rel_pseudofs(trans->hmp, pfsm); + if (error == EINTR) { + pfs->head.flags |= HAMMER_IOC_HEAD_INTR; + error = 0; + } + return(error); +} + + /* * Auto-detect the pseudofs and do basic bounds checking. */ diff --git a/sys/vfs/hammer/hammer_prune.c b/sys/vfs/hammer/hammer_prune.c index 0c807a5ba7..ec56d5d920 100644 --- a/sys/vfs/hammer/hammer_prune.c +++ b/sys/vfs/hammer/hammer_prune.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_prune.c,v 1.18 2008/07/14 03:20:49 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_prune.c,v 1.18.2.1 2008/08/02 21:24:28 dillon Exp $ */ #include "hammer.h" diff --git a/sys/vfs/hammer/hammer_reblock.c b/sys/vfs/hammer/hammer_reblock.c index ff677a4f40..94e80c0079 100644 --- a/sys/vfs/hammer/hammer_reblock.c +++ b/sys/vfs/hammer/hammer_reblock.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_reblock.c,v 1.32.2.1 2008/07/16 18:39:32 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_reblock.c,v 1.32.2.2 2008/08/02 21:24:28 dillon Exp $ */ /* * HAMMER reblocker - This code frees up fragmented physical space diff --git a/sys/vfs/hammer/hammer_recover.c b/sys/vfs/hammer/hammer_recover.c index d1d99809a3..6a1cffe364 100644 --- a/sys/vfs/hammer/hammer_recover.c +++ b/sys/vfs/hammer/hammer_recover.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_recover.c,v 1.28.2.1 2008/07/26 05:37:20 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_recover.c,v 1.28.2.2 2008/08/02 21:24:28 dillon Exp $ */ #include "hammer.h" diff --git a/sys/vfs/hammer/hammer_signal.c b/sys/vfs/hammer/hammer_signal.c index c0ad1bf91b..95a5dcf04a 100644 --- a/sys/vfs/hammer/hammer_signal.c +++ b/sys/vfs/hammer/hammer_signal.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_signal.c,v 1.1 2008/03/20 06:08:40 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_signal.c,v 1.1.2.1 2008/08/02 21:24:28 dillon Exp $ */ /* * Check for interruption when doing a long ioctl operation. diff --git a/sys/vfs/hammer/hammer_subs.c b/sys/vfs/hammer/hammer_subs.c index 573c8e7fa1..dd6c4abe8f 100644 --- a/sys/vfs/hammer/hammer_subs.c +++ b/sys/vfs/hammer/hammer_subs.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_subs.c,v 1.34 2008/07/11 01:22:29 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_subs.c,v 1.34.2.1 2008/08/02 21:24:28 dillon Exp $ */ /* * HAMMER structural locking diff --git a/sys/vfs/hammer/hammer_transaction.c b/sys/vfs/hammer/hammer_transaction.c index 3a8d2c722e..5c722af2be 100644 --- a/sys/vfs/hammer/hammer_transaction.c +++ b/sys/vfs/hammer/hammer_transaction.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_transaction.c,v 1.22.2.2 2008/07/19 18:46:20 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_transaction.c,v 1.22.2.3 2008/08/02 21:24:28 dillon Exp $ */ #include "hammer.h" diff --git a/sys/vfs/hammer/hammer_undo.c b/sys/vfs/hammer/hammer_undo.c index 6dcc704321..6514696f4b 100644 --- a/sys/vfs/hammer/hammer_undo.c +++ b/sys/vfs/hammer/hammer_undo.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_undo.c,v 1.18.2.2 2008/07/18 00:21:09 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_undo.c,v 1.18.2.3 2008/08/02 21:24:28 dillon Exp $ */ /* diff --git a/sys/vfs/hammer/hammer_vfsops.c b/sys/vfs/hammer/hammer_vfsops.c index 6ed236ca03..406e4b0785 100644 --- a/sys/vfs/hammer/hammer_vfsops.c +++ b/sys/vfs/hammer/hammer_vfsops.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_vfsops.c,v 1.63.2.5 2008/07/30 07:53:01 mneumann Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_vfsops.c,v 1.63.2.6 2008/08/02 21:24:28 dillon Exp $ */ #include @@ -248,7 +248,8 @@ static struct vfsops hammer_vfsops = { .vfs_checkexp = hammer_vfs_checkexp }; -MALLOC_DEFINE(M_HAMMER, "hammer-mount", "hammer mount"); +MALLOC_DEFINE(M_HAMMER, "hammer-general", "hammer general"); +MALLOC_DEFINE(M_HAMMER_INO, "hammer-inodes", "hammer inodes"); VFS_SET(hammer_vfsops, hammer, 0); MODULE_VERSION(hammer, 1); @@ -582,6 +583,8 @@ hammer_vfs_mount(struct mount *mp, char *mntpt, caddr_t data, * on-disk first_offset represents the LAST flush cycle. */ hmp->next_tid = rootvol->ondisk->vol0_next_tid; + hmp->flush_tid1 = hmp->next_tid; + hmp->flush_tid2 = hmp->next_tid; bcopy(rootvol->ondisk->vol0_blockmap, hmp->blockmap, sizeof(hmp->blockmap)); hmp->copy_stat_freebigblocks = rootvol->ondisk->vol0_stat_freebigblocks; @@ -871,8 +874,6 @@ hammer_vfs_sync(struct mount *mp, int waitfor) if (panicstr == NULL) { error = hammer_sync_hmp(hmp, waitfor); - if (error == 0) - error = hammer_sync_hmp(hmp, waitfor); } else { error = EIO; } diff --git a/sys/vfs/hammer/hammer_vnops.c b/sys/vfs/hammer/hammer_vnops.c index b669d892bf..9c2dfdaf6f 100644 --- a/sys/vfs/hammer/hammer_vnops.c +++ b/sys/vfs/hammer/hammer_vnops.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.91.2.2 2008/07/19 04:51:09 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.91.2.3 2008/08/02 21:24:28 dillon Exp $ */ #include @@ -2610,7 +2610,6 @@ hammer_vop_strategy_write(struct vop_strategy_args *ap) bytes, &error); if (record) { hammer_io_direct_write(hmp, record, bio); - hammer_rel_mem_record(record); if (ip->rsv_recs > 1 && hmp->rsv_recs > hammer_limit_recs) hammer_flush_inode(ip, 0); } else { -- 2.41.0