From b3bad96f64d725641314f1aa1ff8a1d9c9860040 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Sat, 5 Jul 2008 18:59:28 +0000 Subject: [PATCH] HAMMER 60D/Many: Mirroring, bug fixes * Add more mirroring infrastructure. * Fix support for unix domain sockets. Reported-by: Gergo Szakal , Rumko --- sys/vfs/hammer/hammer.h | 18 ++- sys/vfs/hammer/hammer_btree.c | 12 +- sys/vfs/hammer/hammer_cursor.c | 199 +++++++++++++++++++++++++++++++- sys/vfs/hammer/hammer_cursor.h | 8 +- sys/vfs/hammer/hammer_disk.h | 3 +- sys/vfs/hammer/hammer_ondisk.c | 3 +- sys/vfs/hammer/hammer_reblock.c | 4 +- sys/vfs/hammer/hammer_subs.c | 22 +++- 8 files changed, 250 insertions(+), 19 deletions(-) diff --git a/sys/vfs/hammer/hammer.h b/sys/vfs/hammer/hammer.h index de04094d49..f38936eb66 100644 --- a/sys/vfs/hammer/hammer.h +++ b/sys/vfs/hammer/hammer.h @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.102 2008/07/04 07:25:36 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.103 2008/07/05 18:59:27 dillon Exp $ */ /* * This header file contains structures used internally by the HAMMERFS @@ -540,6 +540,7 @@ struct hammer_node { struct hammer_mount *hmp; struct hammer_buffer *buffer; /* backing buffer */ hammer_node_ondisk_t ondisk; /* ptr to on-disk structure */ + TAILQ_HEAD(, hammer_cursor) cursor_list; /* deadlock recovery */ struct hammer_node_cache_list cache_list; /* passive caches */ int flags; int loading; /* load interlock */ @@ -828,6 +829,7 @@ void hammer_lock_sh_lowpri(struct hammer_lock *lock); int hammer_lock_sh_try(struct hammer_lock *lock); int hammer_lock_upgrade(struct hammer_lock *lock); void hammer_lock_downgrade(struct hammer_lock *lock); +int hammer_lock_status(struct hammer_lock *lock); void hammer_unlock(struct hammer_lock *lock); void hammer_ref(struct hammer_lock *lock); void hammer_unref(struct hammer_lock *lock); @@ -847,7 +849,7 @@ void hammer_clear_objid(hammer_inode_t dip); void hammer_destroy_objid_cache(hammer_mount_t hmp); int hammer_enter_undo_history(hammer_mount_t hmp, hammer_off_t offset, - int bytes); + int bytes); void hammer_clear_undo_history(hammer_mount_t hmp); enum vtype hammer_get_vnode_type(u_int8_t obj_type); int hammer_get_dtype(u_int8_t obj_type); @@ -856,10 +858,18 @@ int64_t hammer_directory_namekey(const void *name, int len); int hammer_nohistory(hammer_inode_t ip); int hammer_init_cursor(hammer_transaction_t trans, hammer_cursor_t cursor, - hammer_node_cache_t cache, hammer_inode_t ip); -int hammer_reinit_cursor(hammer_cursor_t cursor); + hammer_node_cache_t cache, hammer_inode_t ip); void hammer_normalize_cursor(hammer_cursor_t cursor); void hammer_done_cursor(hammer_cursor_t cursor); +int hammer_recover_cursor(hammer_cursor_t cursor); + +void hammer_cursor_replaced_node(hammer_node_t onode, hammer_node_t nnode); +void hammer_cursor_removed_node(hammer_node_t onode, hammer_node_t parent, + int index); +void hammer_cursor_split_node(hammer_node_t onode, hammer_node_t nnode, + int index); +void hammer_cursor_inserted_element(hammer_node_t node, int index); +void hammer_cursor_deleted_element(hammer_node_t node, int index); int hammer_btree_lookup(hammer_cursor_t cursor); int hammer_btree_first(hammer_cursor_t cursor); diff --git a/sys/vfs/hammer/hammer_btree.c b/sys/vfs/hammer/hammer_btree.c index b71c63a1ae..06896c2b8b 100644 --- a/sys/vfs/hammer/hammer_btree.c +++ b/sys/vfs/hammer/hammer_btree.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_btree.c,v 1.62 2008/07/04 07:25:36 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_btree.c,v 1.63 2008/07/05 18:59:27 dillon Exp $ */ /* @@ -781,6 +781,7 @@ hammer_btree_delete(hammer_cursor_t cursor) } --ondisk->count; hammer_modify_node_done(node); + hammer_cursor_deleted_element(node, i); /* * Validate local parent @@ -1443,6 +1444,7 @@ btree_split_internal(hammer_cursor_t cursor) new_node->ondisk->parent = parent->node_offset; new_node->ondisk->type = HAMMER_BTREE_TYPE_INTERNAL; KKASSERT(ondisk->type == new_node->ondisk->type); + hammer_cursor_split_node(node, new_node, split); /* * Cleanup the original node. Elm (P) becomes the new boundary, @@ -1470,6 +1472,7 @@ btree_split_internal(hammer_cursor_t cursor) parent_elm->internal.subtree_offset = new_node->node_offset; ++ondisk->count; hammer_modify_node_done(parent); + hammer_cursor_inserted_element(parent, parent_index + 1); /* * The children of new_node need their parent pointer set to new_node. @@ -1508,7 +1511,6 @@ btree_split_internal(hammer_cursor_t cursor) } hammer_modify_node_done(node); - /* * Ok, now adjust the cursor depending on which element the original * index was pointing at. If we are >= the split point the push node @@ -1672,6 +1674,7 @@ btree_split_leaf(hammer_cursor_t cursor) new_leaf->ondisk->type = HAMMER_BTREE_TYPE_LEAF; KKASSERT(ondisk->type == new_leaf->ondisk->type); hammer_modify_node_done(new_leaf); + hammer_cursor_split_node(leaf, new_leaf, split); /* * Cleanup the original node. Because this is a leaf node and @@ -1703,6 +1706,7 @@ btree_split_leaf(hammer_cursor_t cursor) mid_boundary = &parent_elm->base; ++ondisk->count; hammer_modify_node_done(parent); + hammer_cursor_inserted_element(parent, parent_index + 1); /* * The filesystem's root B-Tree pointer may have to be updated. @@ -2038,6 +2042,7 @@ btree_remove(hammer_cursor_t cursor) } parent = cursor->parent; + hammer_cursor_removed_node(node, parent, cursor->parent_index); /* * Attempt to remove the parent's reference to the child. If the @@ -2129,9 +2134,6 @@ hammer_btree_do_propagation(hammer_cursor_t cursor, hammer_inode_t ip, return; } - /* - * Get as far as we can without deadlocking. - */ error = hammer_btree_mirror_propagate(cursor->trans, cursor->parent, cursor->parent_index, cursor->node->ondisk->mirror_tid); diff --git a/sys/vfs/hammer/hammer_cursor.c b/sys/vfs/hammer/hammer_cursor.c index 549bf1e973..b7f795d57f 100644 --- a/sys/vfs/hammer/hammer_cursor.c +++ b/sys/vfs/hammer/hammer_cursor.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_cursor.c,v 1.36 2008/07/04 07:25:36 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_cursor.c,v 1.37 2008/07/05 18:59:27 dillon Exp $ */ /* @@ -182,7 +182,6 @@ hammer_done_cursor(hammer_cursor_t cursor) cursor->ip = NULL; } - /* * If we deadlocked this node will be referenced. Do a quick * lock/unlock to wait for the deadlock condition to clear. @@ -370,8 +369,9 @@ hammer_cursor_up(hammer_cursor_t cursor) /* * Special cursor up given a locked cursor. The orignal node is not - * unlocked and released and the cursor is not downgraded. If we are - * unable to acquire and lock the parent, EDEADLK is returned. + * unlocked or released and the cursor is not downgraded. + * + * This function will recover from deadlocks. EDEADLK cannot be returned. */ int hammer_cursor_up_locked(hammer_cursor_t cursor) @@ -480,3 +480,194 @@ hammer_cursor_down(hammer_cursor_t cursor) return(error); } +/************************************************************************ + * DEADLOCK RECOVERY * + ************************************************************************ + * + * These are the new deadlock recovery functions. Currently they are only + * used for the mirror propagation and physical node removal cases but + * ultimately the intention is to use them for all deadlock recovery + * operations. + */ + +/* + * Recover from a deadlocked cursor, tracking any node removals or + * replacements. If the cursor's current node is removed by another + * thread (via btree_remove()) the cursor will be seeked upwards. + */ +int +hammer_recover_cursor(hammer_cursor_t cursor) +{ + hammer_inode_t ip; + hammer_node_t node; + int status; + int error; + +again: + KKASSERT((cursor->flags & HAMMER_CURSOR_DEADLK_RECOVER) == 0); + KKASSERT(cursor->node); + /* + * Release the cursor's locks and track B-Tree operations on node. + * While being tracked our cursor can be modified by other threads + * and node may be replaced. + */ + if (cursor->parent) { + hammer_unlock(&cursor->parent->lock); + hammer_rel_node(cursor->parent); + cursor->parent = NULL; + } + node = cursor->node; + cursor->flags |= HAMMER_CURSOR_DEADLK_RECOVER; + TAILQ_INSERT_TAIL(&node->cursor_list, cursor, deadlk_entry); + status = hammer_lock_status(&node->lock); + hammer_unlock(&node->lock); + + if ((ip = cursor->ip) != NULL) + hammer_unlock(&ip->lock); + + /* + * Wait for the deadlock to clear + */ + if (cursor->deadlk_node) { + hammer_lock_ex_ident(&cursor->deadlk_node->lock, "hmrdlk"); + hammer_unlock(&cursor->deadlk_node->lock); + hammer_rel_node(cursor->deadlk_node); + cursor->deadlk_node = NULL; + } + if (cursor->deadlk_rec) { + hammer_wait_mem_record_ident(cursor->deadlk_rec, "hmmdlr"); + hammer_rel_mem_record(cursor->deadlk_rec); + cursor->deadlk_rec = NULL; + } + + /* + * Get the cursor heated up again. The cursor's node may have + * changed and we might have to locate the new parent. + * + * If the exact element we were on got deleted RIPOUT will be + * set and we must clear ATEDISK so an iteration does not skip + * the element after it. + */ + KKASSERT(cursor->flags & HAMMER_CURSOR_DEADLK_RECOVER); + node = cursor->node; + TAILQ_REMOVE(&node->cursor_list, cursor, deadlk_entry); + cursor->flags &= ~HAMMER_CURSOR_DEADLK_RECOVER; + if (cursor->flags & HAMMER_CURSOR_DEADLK_RIPOUT) { + cursor->flags &= ~HAMMER_CURSOR_DEADLK_RIPOUT; + cursor->flags &= ~HAMMER_CURSOR_ATEDISK; + } + hammer_lock_sh(&node->lock); + error = hammer_load_cursor_parent(cursor, 0); + if (error == 0) { + if (status > 0) { + error = hammer_cursor_upgrade(cursor); + if (error == EDEADLK) { + kprintf("r"); + goto again; + } + } + } + return(error); +} + +/* + * onode is being replaced by nnode by the reblocking code. + */ +void +hammer_cursor_replaced_node(hammer_node_t onode, hammer_node_t nnode) +{ + hammer_cursor_t cursor; + + while ((cursor = TAILQ_FIRST(&onode->cursor_list)) != NULL) { + TAILQ_REMOVE(&onode->cursor_list, cursor, deadlk_entry); + TAILQ_INSERT_TAIL(&nnode->cursor_list, cursor, deadlk_entry); + KKASSERT(cursor->node == onode); + cursor->node = nnode; + hammer_ref_node(nnode); + hammer_rel_node(onode); + } +} + +/* + * node is being removed, cursors in deadlock recovery are seeked upward + * to the parent. + */ +void +hammer_cursor_removed_node(hammer_node_t node, hammer_node_t parent, int index) +{ + hammer_cursor_t cursor; + + KKASSERT(parent != NULL); + while ((cursor = TAILQ_FIRST(&node->cursor_list)) != NULL) { + KKASSERT(cursor->node == node); + KKASSERT(cursor->index == 0); + TAILQ_REMOVE(&node->cursor_list, cursor, deadlk_entry); + TAILQ_INSERT_TAIL(&parent->cursor_list, cursor, deadlk_entry); + cursor->flags |= HAMMER_CURSOR_DEADLK_RIPOUT; + cursor->node = parent; + cursor->index = index; + hammer_ref_node(parent); + hammer_rel_node(node); + } +} + +/* + * node was split at (onode, index) with elements >= index moved to nnode. + */ +void +hammer_cursor_split_node(hammer_node_t onode, hammer_node_t nnode, int index) +{ + hammer_cursor_t cursor; + +again: + TAILQ_FOREACH(cursor, &onode->cursor_list, deadlk_entry) { + KKASSERT(cursor->node == onode); + if (cursor->index < index) + continue; + TAILQ_REMOVE(&onode->cursor_list, cursor, deadlk_entry); + TAILQ_INSERT_TAIL(&nnode->cursor_list, cursor, deadlk_entry); + cursor->node = nnode; + cursor->index -= index; + hammer_ref_node(nnode); + hammer_rel_node(onode); + goto again; + } +} + +/* + * Inserted element at (node, index) + * + * Shift indexes >= index + */ +void +hammer_cursor_deleted_element(hammer_node_t node, int index) +{ + hammer_cursor_t cursor; + + TAILQ_FOREACH(cursor, &node->cursor_list, deadlk_entry) { + KKASSERT(cursor->node == node); + if (cursor->index == index) { + cursor->flags |= HAMMER_CURSOR_DEADLK_RIPOUT; + } else if (cursor->index > index) { + --cursor->index; + } + } +} + +/* + * Deleted element at (node, index) + * + * Shift indexes >= index + */ +void +hammer_cursor_inserted_element(hammer_node_t node, int index) +{ + hammer_cursor_t cursor; + + TAILQ_FOREACH(cursor, &node->cursor_list, deadlk_entry) { + KKASSERT(cursor->node == node); + if (cursor->index >= index) + ++cursor->index; + } +} + diff --git a/sys/vfs/hammer/hammer_cursor.h b/sys/vfs/hammer/hammer_cursor.h index 3bc2d7e2be..e7b815a3e4 100644 --- a/sys/vfs/hammer/hammer_cursor.h +++ b/sys/vfs/hammer/hammer_cursor.h @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_cursor.h,v 1.22 2008/06/26 04:06:23 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_cursor.h,v 1.23 2008/07/05 18:59:27 dillon Exp $ */ /* @@ -61,8 +61,10 @@ struct hammer_cursor { /* * Set if a deadlock occurs. hammer_done_cursor() will block on - * this after releasing parent and node, before returning. + * this after releasing parent and node, before returning. See + * also hammer_recover_cursor(). */ + TAILQ_ENTRY(hammer_cursor) deadlk_entry; hammer_node_t deadlk_node; hammer_record_t deadlk_rec; @@ -133,6 +135,8 @@ typedef struct hammer_cursor *hammer_cursor_t; #define HAMMER_CURSOR_PRUNING 0x00010000 #define HAMMER_CURSOR_REBLOCKING 0x00020000 +#define HAMMER_CURSOR_DEADLK_RECOVER 0x00040000 +#define HAMMER_CURSOR_DEADLK_RIPOUT 0x00080000 /* * Flags we can clear when reusing a cursor (we can clear all of them) diff --git a/sys/vfs/hammer/hammer_disk.h b/sys/vfs/hammer/hammer_disk.h index 4fbf683c63..86dd1a1acb 100644 --- a/sys/vfs/hammer/hammer_disk.h +++ b/sys/vfs/hammer/hammer_disk.h @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_disk.h,v 1.44 2008/07/02 21:57:54 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_disk.h,v 1.45 2008/07/05 18:59:27 dillon Exp $ */ #ifndef VFS_HAMMER_DISK_H_ @@ -554,6 +554,7 @@ typedef struct hammer_volume_ondisk *hammer_volume_ondisk_t; #define HAMMER_OBJTYPE_BDEV 6 #define HAMMER_OBJTYPE_SOFTLINK 7 #define HAMMER_OBJTYPE_PSEUDOFS 8 /* pseudo filesystem obj */ +#define HAMMER_OBJTYPE_SOCKET 9 /* * HAMMER inode attribute data diff --git a/sys/vfs/hammer/hammer_ondisk.c b/sys/vfs/hammer/hammer_ondisk.c index a117b4b86e..1ce3aa7fb4 100644 --- a/sys/vfs/hammer/hammer_ondisk.c +++ b/sys/vfs/hammer/hammer_ondisk.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.64 2008/07/01 02:08:58 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.65 2008/07/05 18:59:27 dillon Exp $ */ /* * Manage HAMMER's on-disk structures. These routines are primarily @@ -957,6 +957,7 @@ again: node = kmalloc(sizeof(*node), M_HAMMER, M_WAITOK|M_ZERO|M_USE_RESERVE); node->node_offset = node_offset; node->hmp = hmp; + TAILQ_INIT(&node->cursor_list); TAILQ_INIT(&node->cache_list); if (RB_INSERT(hammer_nod_rb_tree, &hmp->rb_nods_root, node)) { --hammer_count_nodes; diff --git a/sys/vfs/hammer/hammer_reblock.c b/sys/vfs/hammer/hammer_reblock.c index 8cb11b7e6e..7a21bb1130 100644 --- a/sys/vfs/hammer/hammer_reblock.c +++ b/sys/vfs/hammer/hammer_reblock.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_reblock.c,v 1.23 2008/07/03 04:24:51 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_reblock.c,v 1.24 2008/07/05 18:59:28 dillon Exp $ */ /* * HAMMER reblocker - This code frees up fragmented physical space @@ -400,6 +400,7 @@ hammer_reblock_leaf_node(struct hammer_ioc_reblock *reblock, hammer_rel_volume(volume, 0); } + hammer_cursor_replaced_node(onode, nnode); hammer_delete_node(cursor->trans, onode); if (hammer_debug_general & 0x4000) { @@ -490,6 +491,7 @@ hammer_reblock_int_node(struct hammer_ioc_reblock *reblock, * The new node replaces the current node in the cursor. The cursor * expects it to be locked so leave it locked. Discard onode. */ + hammer_cursor_replaced_node(onode, nnode); hammer_delete_node(cursor->trans, onode); if (hammer_debug_general & 0x4000) { diff --git a/sys/vfs/hammer/hammer_subs.c b/sys/vfs/hammer/hammer_subs.c index a732294ac4..e1abe632bb 100644 --- a/sys/vfs/hammer/hammer_subs.c +++ b/sys/vfs/hammer/hammer_subs.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_subs.c,v 1.29 2008/07/04 07:25:36 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_subs.c,v 1.30 2008/07/05 18:59:28 dillon Exp $ */ /* * HAMMER structural locking @@ -229,6 +229,20 @@ hammer_unlock(struct hammer_lock *lock) crit_exit(); } +/* + * The calling thread must be holding a shared or exclusive lock. + * Returns < 0 if lock is held shared, and > 0 if held exlusively. + */ +int +hammer_lock_status(struct hammer_lock *lock) +{ + if (lock->lockcount < 0) + return(-1); + if (lock->lockcount > 0) + return(1); + panic("hammer_lock_status: lock must be held: %p", lock); +} + void hammer_ref(struct hammer_lock *lock) { @@ -336,6 +350,8 @@ hammer_get_vnode_type(u_int8_t obj_type) return(VDATABASE); case HAMMER_OBJTYPE_FIFO: return(VFIFO); + case HAMMER_OBJTYPE_SOCKET: + return(VSOCK); case HAMMER_OBJTYPE_CDEV: return(VCHR); case HAMMER_OBJTYPE_BDEV: @@ -360,6 +376,8 @@ hammer_get_dtype(u_int8_t obj_type) return(DT_DBF); case HAMMER_OBJTYPE_FIFO: return(DT_FIFO); + case HAMMER_OBJTYPE_SOCKET: + return(DT_SOCK); case HAMMER_OBJTYPE_CDEV: return(DT_CHR); case HAMMER_OBJTYPE_BDEV: @@ -384,6 +402,8 @@ hammer_get_obj_type(enum vtype vtype) return(HAMMER_OBJTYPE_DBFILE); case VFIFO: return(HAMMER_OBJTYPE_FIFO); + case VSOCK: + return(HAMMER_OBJTYPE_SOCKET); case VCHR: return(HAMMER_OBJTYPE_CDEV); case VBLK: -- 2.41.0