HAMMER 60D/Many: Mirroring, bug fixes
authorMatthew Dillon <dillon@dragonflybsd.org>
Sat, 5 Jul 2008 18:59:28 +0000 (18:59 +0000)
committerMatthew Dillon <dillon@dragonflybsd.org>
Sat, 5 Jul 2008 18:59:28 +0000 (18:59 +0000)
* Add more mirroring infrastructure.

* Fix support for unix domain sockets.

Reported-by: Gergo Szakal <bastyaelvtars@gmail.com>,
     Rumko <rumcic@gmail.com>

sys/vfs/hammer/hammer.h
sys/vfs/hammer/hammer_btree.c
sys/vfs/hammer/hammer_cursor.c
sys/vfs/hammer/hammer_cursor.h
sys/vfs/hammer/hammer_disk.h
sys/vfs/hammer/hammer_ondisk.c
sys/vfs/hammer/hammer_reblock.c
sys/vfs/hammer/hammer_subs.c

index de04094..f38936e 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.102 2008/07/04 07:25:36 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.103 2008/07/05 18:59:27 dillon Exp $
  */
 /*
  * This header file contains structures used internally by the HAMMERFS
@@ -540,6 +540,7 @@ struct hammer_node {
        struct hammer_mount     *hmp;
        struct hammer_buffer    *buffer;        /* backing buffer */
        hammer_node_ondisk_t    ondisk;         /* ptr to on-disk structure */
+       TAILQ_HEAD(, hammer_cursor) cursor_list;  /* deadlock recovery */
        struct hammer_node_cache_list cache_list; /* passive caches */
        int                     flags;
        int                     loading;        /* load interlock */
@@ -828,6 +829,7 @@ void        hammer_lock_sh_lowpri(struct hammer_lock *lock);
 int    hammer_lock_sh_try(struct hammer_lock *lock);
 int    hammer_lock_upgrade(struct hammer_lock *lock);
 void   hammer_lock_downgrade(struct hammer_lock *lock);
+int    hammer_lock_status(struct hammer_lock *lock);
 void   hammer_unlock(struct hammer_lock *lock);
 void   hammer_ref(struct hammer_lock *lock);
 void   hammer_unref(struct hammer_lock *lock);
@@ -847,7 +849,7 @@ void hammer_clear_objid(hammer_inode_t dip);
 void hammer_destroy_objid_cache(hammer_mount_t hmp);
 
 int hammer_enter_undo_history(hammer_mount_t hmp, hammer_off_t offset,
-                             int bytes);
+                       int bytes);
 void hammer_clear_undo_history(hammer_mount_t hmp);
 enum vtype hammer_get_vnode_type(u_int8_t obj_type);
 int hammer_get_dtype(u_int8_t obj_type);
@@ -856,10 +858,18 @@ int64_t hammer_directory_namekey(const void *name, int len);
 int    hammer_nohistory(hammer_inode_t ip);
 
 int    hammer_init_cursor(hammer_transaction_t trans, hammer_cursor_t cursor,
-                          hammer_node_cache_t cache, hammer_inode_t ip);
-int    hammer_reinit_cursor(hammer_cursor_t cursor);
+                       hammer_node_cache_t cache, hammer_inode_t ip);
 void   hammer_normalize_cursor(hammer_cursor_t cursor);
 void   hammer_done_cursor(hammer_cursor_t cursor);
+int    hammer_recover_cursor(hammer_cursor_t cursor);
+
+void   hammer_cursor_replaced_node(hammer_node_t onode, hammer_node_t nnode);
+void   hammer_cursor_removed_node(hammer_node_t onode, hammer_node_t parent,
+                       int index);
+void   hammer_cursor_split_node(hammer_node_t onode, hammer_node_t nnode,
+                       int index);
+void   hammer_cursor_inserted_element(hammer_node_t node, int index);
+void   hammer_cursor_deleted_element(hammer_node_t node, int index);
 
 int    hammer_btree_lookup(hammer_cursor_t cursor);
 int    hammer_btree_first(hammer_cursor_t cursor);
index b71c63a..06896c2 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_btree.c,v 1.62 2008/07/04 07:25:36 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_btree.c,v 1.63 2008/07/05 18:59:27 dillon Exp $
  */
 
 /*
@@ -781,6 +781,7 @@ hammer_btree_delete(hammer_cursor_t cursor)
        }
        --ondisk->count;
        hammer_modify_node_done(node);
+       hammer_cursor_deleted_element(node, i);
 
        /*
         * Validate local parent
@@ -1443,6 +1444,7 @@ btree_split_internal(hammer_cursor_t cursor)
        new_node->ondisk->parent = parent->node_offset;
        new_node->ondisk->type = HAMMER_BTREE_TYPE_INTERNAL;
        KKASSERT(ondisk->type == new_node->ondisk->type);
+       hammer_cursor_split_node(node, new_node, split);
 
        /*
         * Cleanup the original node.  Elm (P) becomes the new boundary,
@@ -1470,6 +1472,7 @@ btree_split_internal(hammer_cursor_t cursor)
        parent_elm->internal.subtree_offset = new_node->node_offset;
        ++ondisk->count;
        hammer_modify_node_done(parent);
+       hammer_cursor_inserted_element(parent, parent_index + 1);
 
        /*
         * The children of new_node need their parent pointer set to new_node.
@@ -1508,7 +1511,6 @@ btree_split_internal(hammer_cursor_t cursor)
        }
        hammer_modify_node_done(node);
 
-
        /*
         * Ok, now adjust the cursor depending on which element the original
         * index was pointing at.  If we are >= the split point the push node
@@ -1672,6 +1674,7 @@ btree_split_leaf(hammer_cursor_t cursor)
        new_leaf->ondisk->type = HAMMER_BTREE_TYPE_LEAF;
        KKASSERT(ondisk->type == new_leaf->ondisk->type);
        hammer_modify_node_done(new_leaf);
+       hammer_cursor_split_node(leaf, new_leaf, split);
 
        /*
         * Cleanup the original node.  Because this is a leaf node and
@@ -1703,6 +1706,7 @@ btree_split_leaf(hammer_cursor_t cursor)
        mid_boundary = &parent_elm->base;
        ++ondisk->count;
        hammer_modify_node_done(parent);
+       hammer_cursor_inserted_element(parent, parent_index + 1);
 
        /*
         * The filesystem's root B-Tree pointer may have to be updated.
@@ -2038,6 +2042,7 @@ btree_remove(hammer_cursor_t cursor)
        }
 
        parent = cursor->parent;
+       hammer_cursor_removed_node(node, parent, cursor->parent_index);
 
        /*
         * Attempt to remove the parent's reference to the child.  If the
@@ -2129,9 +2134,6 @@ hammer_btree_do_propagation(hammer_cursor_t cursor, hammer_inode_t ip,
                return;
        }
 
-       /*
-        * Get as far as we can without deadlocking.
-        */
        error = hammer_btree_mirror_propagate(cursor->trans,
                                        cursor->parent, cursor->parent_index,
                                        cursor->node->ondisk->mirror_tid);
index 549bf1e..b7f795d 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_cursor.c,v 1.36 2008/07/04 07:25:36 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_cursor.c,v 1.37 2008/07/05 18:59:27 dillon Exp $
  */
 
 /*
@@ -182,7 +182,6 @@ hammer_done_cursor(hammer_cursor_t cursor)
                 cursor->ip = NULL;
         }
 
-
        /*
         * If we deadlocked this node will be referenced.  Do a quick
         * lock/unlock to wait for the deadlock condition to clear.
@@ -370,8 +369,9 @@ hammer_cursor_up(hammer_cursor_t cursor)
 
 /*
  * Special cursor up given a locked cursor.  The orignal node is not
- * unlocked and released and the cursor is not downgraded.  If we are
- * unable to acquire and lock the parent, EDEADLK is returned.
+ * unlocked or released and the cursor is not downgraded.
+ *
+ * This function will recover from deadlocks.  EDEADLK cannot be returned.
  */
 int
 hammer_cursor_up_locked(hammer_cursor_t cursor)
@@ -480,3 +480,194 @@ hammer_cursor_down(hammer_cursor_t cursor)
        return(error);
 }
 
+/************************************************************************
+ *                             DEADLOCK RECOVERY                       *
+ ************************************************************************
+ *
+ * These are the new deadlock recovery functions.  Currently they are only
+ * used for the mirror propagation and physical node removal cases but
+ * ultimately the intention is to use them for all deadlock recovery
+ * operations.
+ */
+
+/*
+ * Recover from a deadlocked cursor, tracking any node removals or
+ * replacements.  If the cursor's current node is removed by another
+ * thread (via btree_remove()) the cursor will be seeked upwards.
+ */
+int
+hammer_recover_cursor(hammer_cursor_t cursor)
+{
+       hammer_inode_t ip;
+       hammer_node_t node;
+       int status;
+       int error;
+
+again:
+       KKASSERT((cursor->flags & HAMMER_CURSOR_DEADLK_RECOVER) == 0);
+       KKASSERT(cursor->node);
+       /*
+        * Release the cursor's locks and track B-Tree operations on node.
+        * While being tracked our cursor can be modified by other threads
+        * and node may be replaced.
+        */
+       if (cursor->parent) {
+               hammer_unlock(&cursor->parent->lock);
+               hammer_rel_node(cursor->parent);
+               cursor->parent = NULL;
+       }
+       node = cursor->node;
+       cursor->flags |= HAMMER_CURSOR_DEADLK_RECOVER;
+       TAILQ_INSERT_TAIL(&node->cursor_list, cursor, deadlk_entry);
+       status = hammer_lock_status(&node->lock);
+       hammer_unlock(&node->lock);
+
+       if ((ip = cursor->ip) != NULL)
+               hammer_unlock(&ip->lock);
+
+       /*
+        * Wait for the deadlock to clear
+        */
+       if (cursor->deadlk_node) {
+               hammer_lock_ex_ident(&cursor->deadlk_node->lock, "hmrdlk");
+               hammer_unlock(&cursor->deadlk_node->lock);
+               hammer_rel_node(cursor->deadlk_node);
+               cursor->deadlk_node = NULL;
+       }
+       if (cursor->deadlk_rec) {
+               hammer_wait_mem_record_ident(cursor->deadlk_rec, "hmmdlr");
+               hammer_rel_mem_record(cursor->deadlk_rec);
+               cursor->deadlk_rec = NULL;
+       }
+
+       /*
+        * Get the cursor heated up again.  The cursor's node may have
+        * changed and we might have to locate the new parent.
+        *
+        * If the exact element we were on got deleted RIPOUT will be
+        * set and we must clear ATEDISK so an iteration does not skip
+        * the element after it.
+        */
+       KKASSERT(cursor->flags & HAMMER_CURSOR_DEADLK_RECOVER);
+       node = cursor->node;
+       TAILQ_REMOVE(&node->cursor_list, cursor, deadlk_entry);
+       cursor->flags &= ~HAMMER_CURSOR_DEADLK_RECOVER;
+       if (cursor->flags & HAMMER_CURSOR_DEADLK_RIPOUT) {
+               cursor->flags &= ~HAMMER_CURSOR_DEADLK_RIPOUT;
+               cursor->flags &= ~HAMMER_CURSOR_ATEDISK;
+       }
+       hammer_lock_sh(&node->lock);
+       error = hammer_load_cursor_parent(cursor, 0);
+       if (error == 0) {
+               if (status > 0) {
+                       error = hammer_cursor_upgrade(cursor);
+                       if (error == EDEADLK) {
+                               kprintf("r");
+                               goto again;
+                       }
+               }
+       }
+       return(error);
+}
+
+/*
+ * onode is being replaced by nnode by the reblocking code.
+ */
+void
+hammer_cursor_replaced_node(hammer_node_t onode, hammer_node_t nnode)
+{
+       hammer_cursor_t cursor;
+
+       while ((cursor = TAILQ_FIRST(&onode->cursor_list)) != NULL) {
+               TAILQ_REMOVE(&onode->cursor_list, cursor, deadlk_entry);
+               TAILQ_INSERT_TAIL(&nnode->cursor_list, cursor, deadlk_entry);
+               KKASSERT(cursor->node == onode);
+               cursor->node = nnode;
+               hammer_ref_node(nnode);
+               hammer_rel_node(onode);
+       }
+}
+
+/*
+ * node is being removed, cursors in deadlock recovery are seeked upward
+ * to the parent.
+ */
+void
+hammer_cursor_removed_node(hammer_node_t node, hammer_node_t parent, int index)
+{
+       hammer_cursor_t cursor;
+
+       KKASSERT(parent != NULL);
+       while ((cursor = TAILQ_FIRST(&node->cursor_list)) != NULL) {
+               KKASSERT(cursor->node == node);
+               KKASSERT(cursor->index == 0);
+               TAILQ_REMOVE(&node->cursor_list, cursor, deadlk_entry);
+               TAILQ_INSERT_TAIL(&parent->cursor_list, cursor, deadlk_entry);
+               cursor->flags |= HAMMER_CURSOR_DEADLK_RIPOUT;
+               cursor->node = parent;
+               cursor->index = index;
+               hammer_ref_node(parent);
+               hammer_rel_node(node);
+       }
+}
+
+/*
+ * node was split at (onode, index) with elements >= index moved to nnode.
+ */
+void
+hammer_cursor_split_node(hammer_node_t onode, hammer_node_t nnode, int index)
+{
+       hammer_cursor_t cursor;
+
+again:
+       TAILQ_FOREACH(cursor, &onode->cursor_list, deadlk_entry) {
+               KKASSERT(cursor->node == onode);
+               if (cursor->index < index)
+                       continue;
+               TAILQ_REMOVE(&onode->cursor_list, cursor, deadlk_entry);
+               TAILQ_INSERT_TAIL(&nnode->cursor_list, cursor, deadlk_entry);
+               cursor->node = nnode;
+               cursor->index -= index;
+               hammer_ref_node(nnode);
+               hammer_rel_node(onode);
+               goto again;
+       }
+}
+
+/*
+ * Inserted element at (node, index)
+ *
+ * Shift indexes >= index
+ */
+void
+hammer_cursor_deleted_element(hammer_node_t node, int index)
+{
+       hammer_cursor_t cursor;
+
+       TAILQ_FOREACH(cursor, &node->cursor_list, deadlk_entry) {
+               KKASSERT(cursor->node == node);
+               if (cursor->index == index) {
+                       cursor->flags |= HAMMER_CURSOR_DEADLK_RIPOUT;
+               } else if (cursor->index > index) {
+                       --cursor->index;
+               }
+       }
+}
+
+/*
+ * Deleted element at (node, index)
+ *
+ * Shift indexes >= index
+ */
+void
+hammer_cursor_inserted_element(hammer_node_t node, int index)
+{
+       hammer_cursor_t cursor;
+
+       TAILQ_FOREACH(cursor, &node->cursor_list, deadlk_entry) {
+               KKASSERT(cursor->node == node);
+               if (cursor->index >= index)
+                       ++cursor->index;
+       }
+}
+
index 3bc2d7e..e7b815a 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_cursor.h,v 1.22 2008/06/26 04:06:23 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_cursor.h,v 1.23 2008/07/05 18:59:27 dillon Exp $
  */
 
 /*
@@ -61,8 +61,10 @@ struct hammer_cursor {
 
        /*
         * Set if a deadlock occurs.  hammer_done_cursor() will block on
-        * this after releasing parent and node, before returning.
+        * this after releasing parent and node, before returning.  See
+        * also hammer_recover_cursor().
         */
+       TAILQ_ENTRY(hammer_cursor) deadlk_entry;
        hammer_node_t deadlk_node;
        hammer_record_t deadlk_rec;
 
@@ -133,6 +135,8 @@ typedef struct hammer_cursor *hammer_cursor_t;
 
 #define HAMMER_CURSOR_PRUNING          0x00010000
 #define HAMMER_CURSOR_REBLOCKING       0x00020000
+#define HAMMER_CURSOR_DEADLK_RECOVER   0x00040000
+#define HAMMER_CURSOR_DEADLK_RIPOUT    0x00080000
 
 /*
  * Flags we can clear when reusing a cursor (we can clear all of them)
index 4fbf683..86dd1a1 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_disk.h,v 1.44 2008/07/02 21:57:54 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_disk.h,v 1.45 2008/07/05 18:59:27 dillon Exp $
  */
 
 #ifndef VFS_HAMMER_DISK_H_
@@ -554,6 +554,7 @@ typedef struct hammer_volume_ondisk *hammer_volume_ondisk_t;
 #define HAMMER_OBJTYPE_BDEV            6
 #define HAMMER_OBJTYPE_SOFTLINK                7
 #define HAMMER_OBJTYPE_PSEUDOFS                8       /* pseudo filesystem obj */
+#define HAMMER_OBJTYPE_SOCKET          9
 
 /*
  * HAMMER inode attribute data
index a117b4b..1ce3aa7 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.64 2008/07/01 02:08:58 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.65 2008/07/05 18:59:27 dillon Exp $
  */
 /*
  * Manage HAMMER's on-disk structures.  These routines are primarily
@@ -957,6 +957,7 @@ again:
                node = kmalloc(sizeof(*node), M_HAMMER, M_WAITOK|M_ZERO|M_USE_RESERVE);
                node->node_offset = node_offset;
                node->hmp = hmp;
+               TAILQ_INIT(&node->cursor_list);
                TAILQ_INIT(&node->cache_list);
                if (RB_INSERT(hammer_nod_rb_tree, &hmp->rb_nods_root, node)) {
                        --hammer_count_nodes;
index 8cb11b7..7a21bb1 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_reblock.c,v 1.23 2008/07/03 04:24:51 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_reblock.c,v 1.24 2008/07/05 18:59:28 dillon Exp $
  */
 /*
  * HAMMER reblocker - This code frees up fragmented physical space
@@ -400,6 +400,7 @@ hammer_reblock_leaf_node(struct hammer_ioc_reblock *reblock,
                 hammer_rel_volume(volume, 0);
         }
 
+       hammer_cursor_replaced_node(onode, nnode);
        hammer_delete_node(cursor->trans, onode);
 
        if (hammer_debug_general & 0x4000) {
@@ -490,6 +491,7 @@ hammer_reblock_int_node(struct hammer_ioc_reblock *reblock,
         * The new node replaces the current node in the cursor.  The cursor
         * expects it to be locked so leave it locked.  Discard onode.
         */
+       hammer_cursor_replaced_node(onode, nnode);
        hammer_delete_node(cursor->trans, onode);
 
        if (hammer_debug_general & 0x4000) {
index a732294..e1abe63 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_subs.c,v 1.29 2008/07/04 07:25:36 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_subs.c,v 1.30 2008/07/05 18:59:28 dillon Exp $
  */
 /*
  * HAMMER structural locking
@@ -229,6 +229,20 @@ hammer_unlock(struct hammer_lock *lock)
        crit_exit();
 }
 
+/*
+ * The calling thread must be holding a shared or exclusive lock.
+ * Returns < 0 if lock is held shared, and > 0 if held exlusively.
+ */
+int
+hammer_lock_status(struct hammer_lock *lock)
+{
+       if (lock->lockcount < 0)
+               return(-1);
+       if (lock->lockcount > 0)
+               return(1);
+       panic("hammer_lock_status: lock must be held: %p", lock);
+}
+
 void
 hammer_ref(struct hammer_lock *lock)
 {
@@ -336,6 +350,8 @@ hammer_get_vnode_type(u_int8_t obj_type)
                return(VDATABASE);
        case HAMMER_OBJTYPE_FIFO:
                return(VFIFO);
+       case HAMMER_OBJTYPE_SOCKET:
+               return(VSOCK);
        case HAMMER_OBJTYPE_CDEV:
                return(VCHR);
        case HAMMER_OBJTYPE_BDEV:
@@ -360,6 +376,8 @@ hammer_get_dtype(u_int8_t obj_type)
                return(DT_DBF);
        case HAMMER_OBJTYPE_FIFO:
                return(DT_FIFO);
+       case HAMMER_OBJTYPE_SOCKET:
+               return(DT_SOCK);
        case HAMMER_OBJTYPE_CDEV:
                return(DT_CHR);
        case HAMMER_OBJTYPE_BDEV:
@@ -384,6 +402,8 @@ hammer_get_obj_type(enum vtype vtype)
                return(HAMMER_OBJTYPE_DBFILE);
        case VFIFO:
                return(HAMMER_OBJTYPE_FIFO);
+       case VSOCK:
+               return(HAMMER_OBJTYPE_SOCKET);
        case VCHR:
                return(HAMMER_OBJTYPE_CDEV);
        case VBLK: