From 43c665ae988b8aad9ffde49db18622101a33d25c Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Sat, 21 Jun 2008 20:21:58 +0000 Subject: [PATCH] HAMMER 56F/Many: Stabilization pass * When data is reblocked any related direct-io offsets cached in front-end buffer cache buffers must be cleaned out. This also requires running through any snapshotted inodes referencing the same object. * The flusher must check that the cached B-Tree node has not been flagged as deleted (HAMMER_NODE_DELETED) before seeking to it. * hammer_io_direct_read() now requires and asserts that the second-level cached offset in the BIO is a zone-2 offset. * hammer_io_direct_write() no longer overwrites the second-level cached offset with the third level raw disk offset. It pushes a third level to set the raw disk offset. * When creating a directory entry, set the localization field for pseudo-fs support (which isn't quite working yet anyway so no biggy). * Move the Red-Black tree generator for inodes from hammer_ondisk.c to hammer_inode.c. --- sys/vfs/hammer/hammer.h | 10 ++- sys/vfs/hammer/hammer_inode.c | 92 +++++++++++++++++++++- sys/vfs/hammer/hammer_io.c | 132 +++++++++++++++++++++++++------- sys/vfs/hammer/hammer_object.c | 3 +- sys/vfs/hammer/hammer_ondisk.c | 49 +----------- sys/vfs/hammer/hammer_reblock.c | 11 +-- sys/vfs/hammer/hammer_vnops.c | 4 +- 7 files changed, 213 insertions(+), 88 deletions(-) diff --git a/sys/vfs/hammer/hammer.h b/sys/vfs/hammer/hammer.h index 4746a9a60e..e204cbfdc3 100644 --- a/sys/vfs/hammer/hammer.h +++ b/sys/vfs/hammer/hammer.h @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.88 2008/06/20 21:24:53 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.89 2008/06/21 20:21:58 dillon Exp $ */ /* * This header file contains structures used internally by the HAMMERFS @@ -88,6 +88,9 @@ typedef struct hammer_inode_info { int64_t obj_id; /* (key) object identifier */ hammer_tid_t obj_asof; /* (key) snapshot transid or 0 */ u_int32_t obj_localization; /* (key) pseudo-fs */ + union { + struct hammer_btree_leaf_elm *leaf; + } u; } *hammer_inode_info_t; typedef enum hammer_transaction_type { @@ -737,6 +740,10 @@ struct hammer_inode *hammer_get_inode(hammer_transaction_t trans, hammer_inode_t dip, u_int64_t obj_id, hammer_tid_t asof, u_int32_t localization, int flags, int *errorp); +void hammer_scan_inode_snapshots(hammer_mount_t hmp, + hammer_inode_info_t iinfo, + int (*callback)(hammer_inode_t ip, void *data), + void *data); void hammer_put_inode(struct hammer_inode *ip); void hammer_put_inode_ref(struct hammer_inode *ip); void hammer_inode_waitreclaims(hammer_mount_t hmp); @@ -981,6 +988,7 @@ void hammer_io_wait_all(hammer_mount_t hmp, const char *ident); int hammer_io_direct_read(hammer_mount_t hmp, struct bio *bio); int hammer_io_direct_write(hammer_mount_t hmp, hammer_btree_leaf_elm_t leaf, struct bio *bio); +void hammer_io_direct_uncache(hammer_mount_t hmp, hammer_btree_leaf_elm_t leaf); void hammer_io_write_interlock(hammer_io_t io); void hammer_io_done_interlock(hammer_io_t io); void hammer_io_clear_modify(struct hammer_io *io, int inval); diff --git a/sys/vfs/hammer/hammer_inode.c b/sys/vfs/hammer/hammer_inode.c index dd6efaf8cd..e0490b86b5 100644 --- a/sys/vfs/hammer/hammer_inode.c +++ b/sys/vfs/hammer/hammer_inode.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.80 2008/06/21 01:24:12 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.81 2008/06/21 20:21:58 dillon Exp $ */ #include "hammer.h" @@ -50,6 +50,75 @@ static void hammer_inode_wakereclaims(hammer_inode_t ip); extern struct hammer_inode *HammerTruncIp; #endif +/* + * Red-Black tree support for inode structures. + * + * Insertions + */ +int +hammer_ino_rb_compare(hammer_inode_t ip1, hammer_inode_t ip2) +{ + if (ip1->obj_localization < ip2->obj_localization) + return(-1); + if (ip1->obj_localization > ip2->obj_localization) + return(1); + if (ip1->obj_id < ip2->obj_id) + return(-1); + if (ip1->obj_id > ip2->obj_id) + return(1); + if (ip1->obj_asof < ip2->obj_asof) + return(-1); + if (ip1->obj_asof > ip2->obj_asof) + return(1); + return(0); +} + +/* + * LOOKUP_INFO + */ +static int +hammer_inode_info_cmp(hammer_inode_info_t info, hammer_inode_t ip) +{ + if (info->obj_localization < ip->obj_localization) + return(-1); + if (info->obj_localization > ip->obj_localization) + return(1); + if (info->obj_id < ip->obj_id) + return(-1); + if (info->obj_id > ip->obj_id) + return(1); + if (info->obj_asof < ip->obj_asof) + return(-1); + if (info->obj_asof > ip->obj_asof) + return(1); + return(0); +} + +/* + * Used by hammer_scan_inode_snapshots() to locate all of an object's + * snapshots. Note that the asof field is not tested, which we can get + * away with because it is the lowest-priority field. + */ +static int +hammer_inode_info_cmp_all_history(hammer_inode_t ip, void *data) +{ + hammer_inode_info_t info = data; + + if (ip->obj_localization > info->obj_localization) + return(1); + if (ip->obj_localization < info->obj_localization) + return(-1); + if (ip->obj_id > info->obj_id) + return(1); + if (ip->obj_id < info->obj_id) + return(-1); + return(0); +} + +RB_GENERATE(hammer_ino_rb_tree, hammer_inode, rb_node, hammer_ino_rb_compare); +RB_GENERATE_XLOOKUP(hammer_ino_rb_tree, INFO, hammer_inode, rb_node, + hammer_inode_info_cmp, hammer_inode_info_t); + /* * The kernel is not actively referencing this vnode but is still holding * it cached. @@ -212,6 +281,21 @@ hammer_get_vnode(struct hammer_inode *ip, struct vnode **vpp) return(error); } +/* + * Locate all copies of the inode for obj_id compatible with the specified + * asof, reference, and issue the related call-back. This routine is used + * for direct-io invalidation and does not create any new inodes. + */ +void +hammer_scan_inode_snapshots(hammer_mount_t hmp, hammer_inode_info_t iinfo, + int (*callback)(hammer_inode_t ip, void *data), + void *data) +{ + hammer_ino_rb_tree_RB_SCAN(&hmp->rb_inos_root, + hammer_inode_info_cmp_all_history, + callback, iinfo); +} + /* * Acquire a HAMMER inode. The returned inode is not locked. These functions * do not attach or detach the related vnode (use hammer_get_vnode() for @@ -1772,12 +1856,14 @@ hammer_sync_inode(hammer_inode_t ip) hammer_cache_node(&ip->cache[1], cursor.node); /* - * Re-seek for inode update. + * Re-seek for inode update, assuming our cache hasn't been ripped + * out from under us. */ if (error == 0) { tmp_node = hammer_ref_node_safe(ip->hmp, &ip->cache[0], &error); if (tmp_node) { - hammer_cursor_seek(&cursor, tmp_node, 0); + if ((tmp_node->flags & HAMMER_NODE_DELETED) == 0) + hammer_cursor_seek(&cursor, tmp_node, 0); hammer_rel_node(tmp_node); } error = 0; diff --git a/sys/vfs/hammer/hammer_io.c b/sys/vfs/hammer/hammer_io.c index 1226db9654..e23996cf55 100644 --- a/sys/vfs/hammer/hammer_io.c +++ b/sys/vfs/hammer/hammer_io.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_io.c,v 1.44 2008/06/20 05:38:26 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_io.c,v 1.45 2008/06/21 20:21:58 dillon Exp $ */ /* * IO Primitives and buffer cache management @@ -54,6 +54,7 @@ static void hammer_io_modify(hammer_io_t io, int count); static void hammer_io_deallocate(struct buf *bp); +static int hammer_io_direct_uncache_callback(hammer_inode_t ip, void *data); /* * Initialize a new, already-zero'd hammer_io structure, or reinitialize @@ -922,12 +923,12 @@ struct bio_ops hammer_bioops = { * Read a buffer associated with a front-end vnode directly from the * disk media. The bio may be issued asynchronously. * - * This function can takes a zone-2 or zone-X blockmap offset. + * A second-level bio already resolved to a zone-2 offset (typically by + * the BMAP code, or by a previous hammer_io_direct_write()), is passed. */ int hammer_io_direct_read(hammer_mount_t hmp, struct bio *bio) { - hammer_off_t data_offset; hammer_off_t zone2_offset; hammer_volume_t volume; struct buf *bp; @@ -935,35 +936,33 @@ hammer_io_direct_read(hammer_mount_t hmp, struct bio *bio) int vol_no; int error; - data_offset = bio->bio_offset; + zone2_offset = bio->bio_offset; - if ((data_offset & HAMMER_OFF_ZONE_MASK) == HAMMER_ZONE_RAW_BUFFER) { - zone2_offset = data_offset; - error = 0; - } else { - KKASSERT(data_offset >= HAMMER_ZONE_BTREE); - KKASSERT((data_offset & HAMMER_BUFMASK) == 0); - zone2_offset = hammer_blockmap_lookup(hmp, data_offset, &error); - } + KKASSERT((zone2_offset & HAMMER_OFF_ZONE_MASK) == + HAMMER_ZONE_RAW_BUFFER); + + vol_no = HAMMER_VOL_DECODE(zone2_offset); + volume = hammer_get_volume(hmp, vol_no, &error); + if (error == 0 && zone2_offset >= volume->maxbuf_off) + error = EIO; + + /* + * Third level bio - raw offset specific to the + * correct volume. + */ if (error == 0) { - vol_no = HAMMER_VOL_DECODE(zone2_offset); - volume = hammer_get_volume(hmp, vol_no, &error); - if (error == 0 && zone2_offset >= volume->maxbuf_off) - error = EIO; - if (error == 0) { - zone2_offset &= HAMMER_OFF_SHORT_MASK; + zone2_offset &= HAMMER_OFF_SHORT_MASK; - /* NOTE: third-level push */ - nbio = push_bio(bio); - nbio->bio_offset = volume->ondisk->vol_buf_beg + - zone2_offset; - vn_strategy(volume->devvp, nbio); - } - hammer_rel_volume(volume, 0); + nbio = push_bio(bio); + nbio->bio_offset = volume->ondisk->vol_buf_beg + + zone2_offset; + vn_strategy(volume->devvp, nbio); } + hammer_rel_volume(volume, 0); + if (error) { kprintf("hammer_direct_read: failed @ %016llx\n", - data_offset); + zone2_offset); bp = bio->bio_buf; bp->b_error = error; bp->b_flags |= B_ERROR; @@ -1013,9 +1012,18 @@ hammer_io_direct_write(hammer_mount_t hmp, hammer_btree_leaf_elm_t leaf, KKASSERT((bp->b_bufsize & HAMMER_BUFMASK) == 0); hammer_del_buffers(hmp, buf_offset, zone2_offset, bp->b_bufsize); - zone2_offset &= HAMMER_OFF_SHORT_MASK; - + /* + * Second level bio - cached zone2 offset. + */ nbio = push_bio(bio); + nbio->bio_offset = zone2_offset; + + /* + * Third level bio - raw offset specific to the + * correct volume. + */ + zone2_offset &= HAMMER_OFF_SHORT_MASK; + nbio = push_bio(nbio); nbio->bio_offset = volume->ondisk->vol_buf_beg + zone2_offset; vn_strategy(volume->devvp, nbio); @@ -1049,4 +1057,72 @@ hammer_io_direct_write(hammer_mount_t hmp, hammer_btree_leaf_elm_t leaf, return(error); } +/* + * This is called to remove the second-level cached zone-2 offset from + * frontend buffer cache buffers, now stale due to a data relocation. + * These offsets are generated by cluster_read() via VOP_BMAP, or directly + * by hammer_vop_strategy_read(). + * + * This is rather nasty because here we have something like the reblocker + * scanning the raw B-Tree with no held references on anything, really, + * other then a shared lock on the B-Tree node, and we have to access the + * frontend's buffer cache to check for and clean out the association. + * Specifically, if the reblocker is moving data on the disk, these cached + * offsets will become invalid. + * + * Only data record types associated with the large-data zone are subject + * to direct-io and need to be checked. + * + */ +void +hammer_io_direct_uncache(hammer_mount_t hmp, hammer_btree_leaf_elm_t leaf) +{ + struct hammer_inode_info iinfo; + int zone; + + if (leaf->base.rec_type != HAMMER_RECTYPE_DATA) + return; + zone = HAMMER_ZONE_DECODE(leaf->data_offset); + if (zone != HAMMER_ZONE_LARGE_DATA_INDEX) + return; + iinfo.obj_id = leaf->base.obj_id; + iinfo.obj_asof = 0; /* unused */ + iinfo.obj_localization = leaf->base.localization & + HAMMER_LOCALIZE_PSEUDOFS; + iinfo.u.leaf = leaf; + hammer_scan_inode_snapshots(hmp, &iinfo, + hammer_io_direct_uncache_callback, + leaf); +} + +static int +hammer_io_direct_uncache_callback(hammer_inode_t ip, void *data) +{ + hammer_inode_info_t iinfo = data; + hammer_off_t data_offset; + hammer_off_t file_offset; + struct vnode *vp; + struct buf *bp; + int blksize; + + if (ip->vp == NULL) + return(0); + data_offset = iinfo->u.leaf->data_offset; + file_offset = iinfo->u.leaf->base.key - iinfo->u.leaf->data_len; + blksize = iinfo->u.leaf->data_len; + KKASSERT((blksize & HAMMER_BUFMASK) == 0); + + hammer_ref(&ip->lock); + if (hammer_get_vnode(ip, &vp) == 0) { + if ((bp = findblk(ip->vp, file_offset)) != NULL && + bp->b_bio2.bio_offset != NOOFFSET) { + bp = getblk(ip->vp, file_offset, blksize, 0, 0); + bp->b_bio2.bio_offset = NOOFFSET; + brelse(bp); + } + vput(vp); + } + hammer_rel_inode(ip, 0); + return(0); +} diff --git a/sys/vfs/hammer/hammer_object.c b/sys/vfs/hammer/hammer_object.c index 024f9acbd5..a6eac13fef 100644 --- a/sys/vfs/hammer/hammer_object.c +++ b/sys/vfs/hammer/hammer_object.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_object.c,v 1.72 2008/06/20 21:24:53 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_object.c,v 1.73 2008/06/21 20:21:58 dillon Exp $ */ #include "hammer.h" @@ -604,6 +604,7 @@ hammer_ip_add_directory(struct hammer_transaction *trans, record->leaf.base.rec_type = HAMMER_RECTYPE_DIRENTRY; record->leaf.base.obj_type = ip->ino_leaf.base.obj_type; record->data->entry.obj_id = ip->obj_id; + record->data->entry.localization = ip->obj_localization; bcopy(ncp->nc_name, record->data->entry.name, bytes); ++ip->ino_data.nlinks; diff --git a/sys/vfs/hammer/hammer_ondisk.c b/sys/vfs/hammer/hammer_ondisk.c index 57d1f64369..1bd8558e4b 100644 --- a/sys/vfs/hammer/hammer_ondisk.c +++ b/sys/vfs/hammer/hammer_ondisk.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.61 2008/06/20 21:24:53 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.62 2008/06/21 20:21:58 dillon Exp $ */ /* * Manage HAMMER's on-disk structures. These routines are primarily @@ -50,45 +50,6 @@ static int hammer_load_volume(hammer_volume_t volume); static int hammer_load_buffer(hammer_buffer_t buffer, int isnew); static int hammer_load_node(hammer_node_t node, int isnew); -/* - * Red-Black tree support for various structures - */ -int -hammer_ino_rb_compare(hammer_inode_t ip1, hammer_inode_t ip2) -{ - if (ip1->obj_localization < ip2->obj_localization) - return(-1); - if (ip1->obj_localization > ip2->obj_localization) - return(1); - if (ip1->obj_id < ip2->obj_id) - return(-1); - if (ip1->obj_id > ip2->obj_id) - return(1); - if (ip1->obj_asof < ip2->obj_asof) - return(-1); - if (ip1->obj_asof > ip2->obj_asof) - return(1); - return(0); -} - -static int -hammer_inode_info_cmp(hammer_inode_info_t info, hammer_inode_t ip) -{ - if (info->obj_localization < ip->obj_localization) - return(-1); - if (info->obj_localization > ip->obj_localization) - return(1); - if (info->obj_id < ip->obj_id) - return(-1); - if (info->obj_id > ip->obj_id) - return(1); - if (info->obj_asof < ip->obj_asof) - return(-1); - if (info->obj_asof > ip->obj_asof) - return(1); - return(0); -} - static int hammer_vol_rb_compare(hammer_volume_t vol1, hammer_volume_t vol2) { @@ -119,14 +80,6 @@ hammer_nod_rb_compare(hammer_node_t node1, hammer_node_t node2) return(0); } -/* - * Note: The lookup function for hammer_ino_rb_tree winds up being named - * hammer_ino_rb_tree_RB_LOOKUP_INFO(root, info). The other lookup - * functions are normal, e.g. hammer_buf_rb_tree_RB_LOOKUP(root, zone2_offset). - */ -RB_GENERATE(hammer_ino_rb_tree, hammer_inode, rb_node, hammer_ino_rb_compare); -RB_GENERATE_XLOOKUP(hammer_ino_rb_tree, INFO, hammer_inode, rb_node, - hammer_inode_info_cmp, hammer_inode_info_t); RB_GENERATE2(hammer_vol_rb_tree, hammer_volume, rb_node, hammer_vol_rb_compare, int32_t, vol_no); RB_GENERATE2(hammer_buf_rb_tree, hammer_buffer, rb_node, diff --git a/sys/vfs/hammer/hammer_reblock.c b/sys/vfs/hammer/hammer_reblock.c index a295a7a592..617bfdba86 100644 --- a/sys/vfs/hammer/hammer_reblock.c +++ b/sys/vfs/hammer/hammer_reblock.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_reblock.c,v 1.19 2008/06/20 21:24:53 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_reblock.c,v 1.20 2008/06/21 20:21:58 dillon Exp $ */ /* * HAMMER reblocker - This code frees up fragmented physical space @@ -163,6 +163,7 @@ static int hammer_reblock_helper(struct hammer_ioc_reblock *reblock, hammer_cursor_t cursor, hammer_btree_elm_t elm) { + hammer_mount_t hmp; hammer_off_t tmp_offset; int error; int bytes; @@ -170,6 +171,7 @@ hammer_reblock_helper(struct hammer_ioc_reblock *reblock, int iocflags; error = 0; + hmp = cursor->trans->hmp; /* * Reblock data. Note that data embedded in a record is reblocked @@ -210,12 +212,12 @@ hammer_reblock_helper(struct hammer_ioc_reblock *reblock, if (reblock->head.flags & iocflags) { ++reblock->data_count; reblock->data_byte_count += elm->leaf.data_len; - bytes = hammer_blockmap_getfree(cursor->trans->hmp, tmp_offset, - &cur, &error); + bytes = hammer_blockmap_getfree(hmp, tmp_offset, &cur, &error); if (hammer_debug_general & 0x4000) kprintf("D %6d/%d\n", bytes, reblock->free_level); if (error == 0 && (cur == 0 || reblock->free_level == 0) && bytes >= reblock->free_level) { + hammer_io_direct_uncache(hmp, &elm->leaf); error = hammer_cursor_upgrade(cursor); if (error == 0) { error = hammer_reblock_data(reblock, @@ -236,8 +238,7 @@ skip: if (cursor->index == 0 && error == 0 && (reblock->head.flags & HAMMER_IOC_DO_BTREE)) { ++reblock->btree_count; - bytes = hammer_blockmap_getfree(cursor->trans->hmp, tmp_offset, - &cur, &error); + bytes = hammer_blockmap_getfree(hmp, tmp_offset, &cur, &error); if (hammer_debug_general & 0x4000) kprintf("B %6d/%d\n", bytes, reblock->free_level); if (error == 0 && (cur == 0 || reblock->free_level == 0) && diff --git a/sys/vfs/hammer/hammer_vnops.c b/sys/vfs/hammer/hammer_vnops.c index 337b7a7804..4cacb1efe3 100644 --- a/sys/vfs/hammer/hammer_vnops.c +++ b/sys/vfs/hammer/hammer_vnops.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.74 2008/06/20 21:24:53 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.75 2008/06/21 20:21:58 dillon Exp $ */ #include @@ -852,7 +852,7 @@ hammer_vop_nresolve(struct vop_nresolve_args *ap) * records for the purposes of the search. */ obj_id = 0; - localization = 0; + localization = HAMMER_DEF_LOCALIZATION; if (error == 0) { error = hammer_ip_first(&cursor); -- 2.41.0