HAMMER 44/Many: Stabilization pass, user-guaranteed transaction ids
authorMatthew Dillon <dillon@dragonflybsd.org>
Tue, 13 May 2008 20:46:55 +0000 (20:46 +0000)
committerMatthew Dillon <dillon@dragonflybsd.org>
Tue, 13 May 2008 20:46:55 +0000 (20:46 +0000)
* B-Tree changes:  Allow leaves to be empty.  Do not leave internal nodes
  with subtree_offsets of 0 when deleting a B-Tree element.  Do not try to
  clean up internal nodes with subtree_offsets of 0 while scanning the B-Tree.

  The pruner will be made responsible for such cleanups.  This way the
  front-end does not modify the B-Tree at all.

* Add a new ioctl to support the hammer utility 'synctid' command.

sys/vfs/hammer/hammer.h
sys/vfs/hammer/hammer_btree.c
sys/vfs/hammer/hammer_btree.h
sys/vfs/hammer/hammer_cursor.c
sys/vfs/hammer/hammer_flusher.c
sys/vfs/hammer/hammer_inode.c
sys/vfs/hammer/hammer_ioctl.c
sys/vfs/hammer/hammer_ioctl.h
sys/vfs/hammer/hammer_ondisk.c

index 297d11c..9a0b363 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.66 2008/05/13 00:15:28 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.67 2008/05/13 20:46:54 dillon Exp $
  */
 /*
  * This header file contains structures used internally by the HAMMERFS
@@ -546,6 +546,7 @@ struct hammer_mount {
        int     flusher_next;   /* next flush group */
        int     flusher_lock;   /* lock sequencing of the next flush */
        int     flusher_exiting;
+       hammer_tid_t flusher_tid; /* last flushed transaction id */
        hammer_off_t flusher_undo_start; /* UNDO window for flushes */
        int     reclaim_count;
        thread_t flusher_td;
@@ -635,6 +636,8 @@ int hammer_delete_at_cursor(hammer_cursor_t cursor, int64_t *stat_bytes);
 int    hammer_ip_check_directory_empty(hammer_transaction_t trans,
                        hammer_inode_t ip);
 int    hammer_sync_hmp(hammer_mount_t hmp, int waitfor);
+int    hammer_queue_inodes_flusher(hammer_mount_t hmp, int waitfor);
+
 
 hammer_record_t
        hammer_alloc_mem_record(hammer_inode_t ip, int data_len);
@@ -643,6 +646,7 @@ void        hammer_wait_mem_record(hammer_record_t record);
 void   hammer_rel_mem_record(hammer_record_t record);
 
 int    hammer_cursor_up(hammer_cursor_t cursor);
+int    hammer_cursor_up_locked(hammer_cursor_t cursor);
 int    hammer_cursor_down(hammer_cursor_t cursor);
 int    hammer_cursor_upgrade(hammer_cursor_t cursor);
 void   hammer_cursor_downgrade(hammer_cursor_t cursor);
index 21ace0b..42ea29e 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_btree.c,v 1.47 2008/05/13 05:04:39 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_btree.c,v 1.48 2008/05/13 20:46:54 dillon Exp $
  */
 
 /*
  * B-Tree.
  *
  * DELETIONS: A deletion makes no attempt to proactively balance the
- * tree and will recursively remove nodes that become empty.  Empty
- * nodes are not allowed and a deletion may recurse upwards from the leaf.
- * Rather then allow a deadlock a deletion may terminate early by setting
- * an internal node's element's subtree_offset to 0.  The deletion will
- * then be resumed the next time a search encounters the element.
+ * tree and will recursively remove nodes that become empty.  If a
+ * deadlock occurs a deletion may not be able to remove an empty leaf.
+ * Deletions never allow internal nodes to become empty (that would blow
+ * up the boundaries).
  */
 #include "hammer.h"
 #include <sys/buf.h>
@@ -87,7 +86,6 @@ static int btree_search(hammer_cursor_t cursor, int flags);
 static int btree_split_internal(hammer_cursor_t cursor);
 static int btree_split_leaf(hammer_cursor_t cursor);
 static int btree_remove(hammer_cursor_t cursor);
-static int btree_remove_deleted_element(hammer_cursor_t cursor);
 static int btree_set_parent(hammer_transaction_t trans, hammer_node_t node,
                        hammer_btree_elm_t elm);
 static int btree_node_is_full(hammer_node_ondisk_t node);
@@ -214,20 +212,14 @@ hammer_btree_iterate(hammer_cursor_t cursor)
                        KKASSERT(s <= 0);
 
                        /*
-                        * When iterating try to clean up any deleted
-                        * internal elements left over from btree_remove()
-                        * deadlocks, but it is ok if we can't.
+                        * Better not be zero
                         */
-                       if (elm->internal.subtree_offset == 0) {
-                               hkprintf("REMOVE DELETED ELEMENT\n");
-                               btree_remove_deleted_element(cursor);
-                               /* note: elm also invalid */
-                       } else if (elm->internal.subtree_offset != 0) {
-                               error = hammer_cursor_down(cursor);
-                               if (error)
-                                       break;
-                               KKASSERT(cursor->index == 0);
-                       }
+                       KKASSERT(elm->internal.subtree_offset != 0);
+
+                       error = hammer_cursor_down(cursor);
+                       if (error)
+                               break;
+                       KKASSERT(cursor->index == 0);
                        /* reload stale pointer */
                        node = cursor->node->ondisk;
                        continue;
@@ -384,22 +376,19 @@ hammer_btree_iterate_reverse(hammer_cursor_t cursor)
                        KKASSERT(r >= 0);
 
                        /*
-                        * When iterating try to clean up any deleted
-                        * internal elements left over from btree_remove()
-                        * deadlocks, but it is ok if we can't.
+                        * Better not be zero
                         */
-                       if (elm->internal.subtree_offset == 0) {
-                               btree_remove_deleted_element(cursor);
-                               /* note: elm also invalid */
-                       } else if (elm->internal.subtree_offset != 0) {
-                               error = hammer_cursor_down(cursor);
-                               if (error)
-                                       break;
-                               KKASSERT(cursor->index == 0);
-                               cursor->index = cursor->node->ondisk->count - 1;
-                       }
+                       KKASSERT(elm->internal.subtree_offset != 0);
+
+                       error = hammer_cursor_down(cursor);
+                       if (error)
+                               break;
+                       KKASSERT(cursor->index == 0);
                        /* reload stale pointer */
                        node = cursor->node->ondisk;
+
+                       /* this can assign -1 if the leaf was empty */
+                       cursor->index = node->count - 1;
                        continue;
                } else {
                        elm = &node->elms[cursor->index];
@@ -486,7 +475,8 @@ hammer_btree_iterate_reverse(hammer_cursor_t cursor)
  * not visible and thus causes ENOENT to be returned.  We really need
  * to check record 11 in LEAF1.  If it also fails then the search fails
  * (e.g. it might represent the range 11-16 and thus still not match our
- * AS-OF timestamp of 17).
+ * AS-OF timestamp of 17).  Note that LEAF1 could be empty, requiring
+ * further iterations.
  *
  * If this case occurs btree_search() will set HAMMER_CURSOR_CREATE_CHECK
  * and the cursor->create_check TID if an iteration might be needed.
@@ -705,10 +695,9 @@ hammer_btree_insert(hammer_cursor_t cursor, hammer_btree_leaf_elm_t elm)
  * of an iteration but not for an insertion or deletion.
  *
  * Deletions will attempt to partially rebalance the B-Tree in an upward
- * direction, but will terminate rather then deadlock.  Empty leaves are
- * not allowed.  An early termination will leave an internal node with an
- * element whos subtree_offset is 0, a case detected and handled by
- * btree_search().
+ * direction, but will terminate rather then deadlock.  Empty internal nodes
+ * are never allowed by a deletion which deadlocks may end up giving us an
+ * empty leaf.  The pruner will clean up and rebalance the tree.
  *
  * This function can return EDEADLK, requiring the caller to retry the
  * operation after clearing the deadlock.
@@ -762,14 +751,11 @@ hammer_btree_delete(hammer_cursor_t cursor)
         * current node.
         *
         * Ignore deadlock errors, that simply means that btree_remove
-        * was unable to recurse and had to leave the subtree_offset 
-        * in the parent set to 0.
+        * was unable to recurse and had to leave us with an empty leaf. 
         */
        KKASSERT(cursor->index <= ondisk->count);
        if (ondisk->count == 0) {
-               do {
-                       error = btree_remove(cursor);
-               } while (error == EAGAIN);
+               error = btree_remove(cursor);
                if (error == EDEADLK)
                        error = 0;
        } else {
@@ -915,7 +901,6 @@ btree_search(hammer_cursor_t cursor, int flags)
                        goto done;
        }
 
-re_search:
        /*
         * Push down through internal nodes to locate the requested key.
         */
@@ -990,7 +975,9 @@ re_search:
                        /*
                         * Correct a left-hand boundary mismatch.
                         *
-                        * We can only do this if we can upgrade the lock.
+                        * We can only do this if we can upgrade the lock,
+                        * and synchronized as a background cursor (i.e.
+                        * inserting or pruning).
                         *
                         * WARNING: We can only do this if inserting, i.e.
                         * we are running on the backend.
@@ -1026,7 +1013,9 @@ re_search:
                         * (actual push-down record is i-2 prior to
                         * adjustments to i).
                         *
-                        * We can only do this if we can upgrade the lock.
+                        * We can only do this if we can upgrade the lock,
+                        * and synchronized as a background cursor (i.e.
+                        * inserting or pruning).
                         *
                         * WARNING: We can only do this if inserting, i.e.
                         * we are running on the backend.
@@ -1064,25 +1053,9 @@ re_search:
                }
 
                /*
-                * When searching try to clean up any deleted
-                * internal elements left over from btree_remove()
-                * deadlocks.
-                *
-                * If we fail and we are doing an insertion lookup,
-                * we have to return EDEADLK, because an insertion lookup
-                * must terminate at a leaf.
+                * We better have a valid subtree offset.
                 */
-               if (elm->internal.subtree_offset == 0) {
-                       error = btree_remove_deleted_element(cursor);
-                       if (error == 0)
-                               goto re_search;
-                       if (error == EDEADLK &&
-                           (flags & HAMMER_CURSOR_INSERT) == 0) {
-                               error = ENOENT;
-                       }
-                       return(error);
-               }
-
+               KKASSERT(elm->internal.subtree_offset != 0);
 
                /*
                 * Handle insertion and deletion requirements.
@@ -1130,9 +1103,6 @@ re_search:
        /*
         * We are at a leaf, do a linear search of the key array.
         *
-        * If we encounter a spike element type within the necessary
-        * range we push into it.
-        *
         * On success the index is set to the matching element and 0
         * is returned.
         *
@@ -1141,7 +1111,8 @@ re_search:
         *
         * Boundaries are not stored in leaf nodes, so the index can wind
         * up to the left of element 0 (index == 0) or past the end of
-        * the array (index == node->count).
+        * the array (index == node->count).  It is also possible that the
+        * leaf might be empty.
         */
        KKASSERT (node->type == HAMMER_BTREE_TYPE_LEAF);
        KKASSERT(node->count <= HAMMER_BTREE_LEAF_ELMS);
@@ -1920,20 +1891,17 @@ hammer_btree_correct_lhb(hammer_cursor_t cursor, hammer_tid_t tid)
 }
 
 /*
- * Attempt to remove the empty B-Tree node at (cursor->node).  Returns 0
- * on success, EAGAIN if we could not acquire the necessary locks, or some
- * other error.  This node can be a leaf node or an internal node.
- *
- * On return the cursor may end up pointing at an internal node, suitable
- * for further iteration but not for an immediate insertion or deletion.
+ * Attempt to remove the locked, empty or want-to-be-empty B-Tree node at
+ * (cursor->node).  Returns 0 on success, EDEADLK if we could not complete
+ * the operation due to a deadlock, or some other error.
  *
- * cursor->node may be an internal node or a leaf node.  The cursor must be
- * locked (node and parent).
+ * This routine is always called with an empty, locked leaf but may recurse
+ * into want-to-be-empty parents as part of its operation.
  *
- * NOTE: If cursor->node has one element it is the parent trying to delete
- * that element, make sure cursor->index is properly adjusted on success.
+ * On return the cursor may end up pointing to an internal node, suitable
+ * for further iteration but not for an immediate insertion or deletion.
  */
-int
+static int
 btree_remove(hammer_cursor_t cursor)
 {
        hammer_node_ondisk_t ondisk;
@@ -1950,6 +1918,7 @@ btree_remove(hammer_cursor_t cursor)
         * an empty leaf node.  Internal nodes cannot be empty.
         */
        if (node->ondisk->parent == 0) {
+               KKASSERT(cursor->parent == NULL);
                hammer_modify_node_all(cursor->trans, node);
                ondisk = node->ondisk;
                ondisk->type = HAMMER_BTREE_TYPE_LEAF;
@@ -1960,100 +1929,68 @@ btree_remove(hammer_cursor_t cursor)
        }
 
        /*
-        * Zero-out the parent's reference to the child and flag the
-        * child for destruction.  This ensures that the child is not
-        * reused while other references to it exist.
+        * Attempt to remove the parent's reference to the child.  If the
+        * parent would become empty we have to recurse.  If we fail we 
+        * leave the parent pointing to an empty leaf node.
         */
        parent = cursor->parent;
-       hammer_modify_node_all(cursor->trans, parent);
-       ondisk = parent->ondisk;
-       KKASSERT(ondisk->type == HAMMER_BTREE_TYPE_INTERNAL);
-       elm = &ondisk->elms[cursor->parent_index];
-       KKASSERT(elm->internal.subtree_offset == node->node_offset);
-       elm->internal.subtree_offset = 0;
-
-       hammer_flush_node(node);
-       hammer_delete_node(cursor->trans, node);
 
-       /*
-        * If the parent would otherwise not become empty we can physically
-        * remove the zero'd element.  Note however that in order to
-        * guarentee a valid cursor we still need to be able to cursor up
-        * because we no longer have a node.
-        *
-        * This collapse will change the parent's boundary elements, making
-        * them wider.  The new boundaries are recursively corrected in
-        * btree_search().
-        *
-        * XXX we can theoretically recalculate the midpoint but there isn't
-        * much of a reason to do it.
-        */
-       error = hammer_cursor_up(cursor);
-       if (error == 0)
-               error = hammer_cursor_upgrade(cursor);
+       if (parent->ondisk->count == 1) {
+               /*
+                * This special cursor_up_locked() call leaves the original
+                * node exclusively locked and referenced, leaves the
+                * original parent locked (as the new node), and locks the
+                * new parent.  It can return EDEADLK.
+                */
+               error = hammer_cursor_up_locked(cursor);
+               if (error == 0) {
+                       error = btree_remove(cursor);
+                       if (error == 0) {
+                               hammer_modify_node_all(cursor->trans, node);
+                               ondisk = node->ondisk;
+                               ondisk->type = HAMMER_BTREE_TYPE_DELETED;
+                               ondisk->count = 0;
+                               hammer_modify_node_done(node);
+                               hammer_flush_node(node);
+                               hammer_delete_node(cursor->trans, node);
+                       } else {
+                               kprintf("Warning: BTREE_REMOVE: Defering "
+                                       "parent removal1 @ %016llx, skipping\n",
+                                       node->node_offset);
+                       }
+                       hammer_unlock(&node->lock);
+                       hammer_rel_node(node);
+               } else {
+                       kprintf("Warning: BTREE_REMOVE: Defering parent "
+                               "removal2 @ %016llx, skipping\n",
+                               node->node_offset);
+               }
+       } else {
+               KKASSERT(parent->ondisk->count > 1);
 
-       if (error) {
-               kprintf("Warning: BTREE_REMOVE: Defering parent removal "
-                       "@ %016llx, skipping\n", cursor->parent->node_offset);
+               /*
+                * Delete the subtree reference in the parent
+                */
+               hammer_modify_node_all(cursor->trans, parent);
+               ondisk = parent->ondisk;
+               KKASSERT(ondisk->type == HAMMER_BTREE_TYPE_INTERNAL);
+               elm = &ondisk->elms[cursor->parent_index];
+               KKASSERT(elm->internal.subtree_offset == node->node_offset);
+               KKASSERT(ondisk->count > 0);
+               bcopy(&elm[1], &elm[0],
+                     (ondisk->count - cursor->parent_index) * esize);
+               --ondisk->count;
                hammer_modify_node_done(parent);
-               return (0);
-       }
-
-       /*
-        * Remove the internal element from the parent.  The bcopy must
-        * include the right boundary element.
-        */
-       KKASSERT(parent == cursor->node && ondisk == parent->ondisk);
-       node = parent;
-       parent = NULL;
-       /* ondisk is node's ondisk */
-       /* elm is node's element */
+               hammer_flush_node(node);
+               hammer_delete_node(cursor->trans, node);
 
-       /*
-        * Remove the internal element that we zero'd out.  Tell the caller
-        * to loop if it hits zero (to try to avoid eating up precious kernel
-        * stack).
-        */
-       KKASSERT(ondisk->count > 0);
-       bcopy(&elm[1], &elm[0], (ondisk->count - cursor->index) * esize);
-       --ondisk->count;
-       if (ondisk->count == 0)
-               error = EAGAIN;
-       hammer_modify_node_done(node);
-       return(error);
-}
-
-/*
- * Attempt to remove the deleted internal element at the current cursor
- * position.  If we are unable to remove the element we return EDEADLK.
- *
- * If the current internal node becomes empty we delete it in the parent
- * and cursor up, looping until we finish or we deadlock.
- *
- * On return, if successful, the cursor will be pointing at the next
- * iterative position in the B-Tree.  If unsuccessful the cursor will be
- * pointing at the last deleted internal element that could not be
- * removed.
- */
-static 
-int
-btree_remove_deleted_element(hammer_cursor_t cursor)
-{
-       hammer_node_t node;
-       hammer_btree_elm_t elm; 
-       int error;
-
-       if ((error = hammer_cursor_upgrade(cursor)) != 0)
-               return(error);
-       node = cursor->node;
-       elm = &node->ondisk->elms[cursor->index];
-       if (elm->internal.subtree_offset == 0) {
-               do {
-                       error = btree_remove(cursor);
-                       hkprintf("BTREE REMOVE DELETED ELEMENT %d\n", error);
-               } while (error == EAGAIN);
+               /*
+                * cursor->node is invalid, cursor up to make the cursor
+                * valid again.
+                */
+               error = hammer_cursor_up(cursor);
        }
-       return(error);
+       return (error);
 }
 
 /*
@@ -2124,6 +2061,7 @@ hammer_btree_lock_children(hammer_cursor_t cursor,
                switch(elm->base.btype) {
                case HAMMER_BTREE_TYPE_INTERNAL:
                case HAMMER_BTREE_TYPE_LEAF:
+                       KKASSERT(elm->internal.subtree_offset != 0);
                        child = hammer_get_node(node->hmp,
                                                elm->internal.subtree_offset,
                                                0, &error);
index fdc0472..25d7f2d 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_btree.h,v 1.14 2008/05/12 21:17:18 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_btree.h,v 1.15 2008/05/13 20:46:55 dillon Exp $
  */
 
 /*
@@ -184,6 +184,7 @@ typedef union hammer_btree_elm *hammer_btree_elm_t;
 #define HAMMER_BTREE_TYPE_INTERNAL     ((u_int8_t)'I')
 #define HAMMER_BTREE_TYPE_LEAF         ((u_int8_t)'L')
 #define HAMMER_BTREE_TYPE_RECORD       ((u_int8_t)'R')
+#define HAMMER_BTREE_TYPE_DELETED      ((u_int8_t)'D')
 
 struct hammer_node_ondisk {
        /*
index 9de9f74..36e17e0 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_cursor.c,v 1.25 2008/05/12 21:17:18 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_cursor.c,v 1.26 2008/05/13 20:46:55 dillon Exp $
  */
 
 /*
@@ -39,7 +39,7 @@
  */
 #include "hammer.h"
 
-static int hammer_load_cursor_parent(hammer_cursor_t cursor);
+static int hammer_load_cursor_parent(hammer_cursor_t cursor, int try_exclusive);
 
 /*
  * Initialize a fresh cursor using the B-Tree node cache.  If the cache
@@ -124,7 +124,7 @@ hammer_init_cursor(hammer_transaction_t trans, hammer_cursor_t cursor,
         */
        cursor->node = node;
        if (error == 0)
-               error = hammer_load_cursor_parent(cursor);
+               error = hammer_load_cursor_parent(cursor, 0);
        KKASSERT(error == 0);
        /* if (error) hammer_done_cursor(cursor); */
        return(error);
@@ -283,6 +283,7 @@ hammer_cursor_seek(hammer_cursor_t cursor, hammer_node_t node, int index)
                cursor->node = node;
                hammer_ref_node(node);
                hammer_lock_sh(&node->lock);
+               KKASSERT ((node->flags & HAMMER_NODE_DELETED) == 0);
 
                if (cursor->parent) {
                        hammer_unlock(&cursor->parent->lock);
@@ -290,7 +291,7 @@ hammer_cursor_seek(hammer_cursor_t cursor, hammer_node_t node, int index)
                        cursor->parent = NULL;
                        cursor->parent_index = 0;
                }
-               error = hammer_load_cursor_parent(cursor);
+               error = hammer_load_cursor_parent(cursor, 0);
        }
        cursor->index = index;
        return (error);
@@ -301,7 +302,7 @@ hammer_cursor_seek(hammer_cursor_t cursor, hammer_node_t node, int index)
  */
 static
 int
-hammer_load_cursor_parent(hammer_cursor_t cursor)
+hammer_load_cursor_parent(hammer_cursor_t cursor, int try_exclusive)
 {
        hammer_mount_t hmp;
        hammer_node_t parent;
@@ -317,7 +318,15 @@ hammer_load_cursor_parent(hammer_cursor_t cursor)
                parent = hammer_get_node(hmp, node->ondisk->parent, 0, &error);
                if (error)
                        return(error);
-               hammer_lock_sh(&parent->lock);
+               if (try_exclusive) {
+                       if (hammer_lock_ex_try(&parent->lock)) {
+                               hammer_rel_node(parent);
+                               return(EDEADLK);
+                       }
+               } else {
+                       hammer_lock_sh(&parent->lock);
+               }
+               KKASSERT ((parent->flags & HAMMER_NODE_DELETED) == 0);
                elm = NULL;
                for (i = 0; i < parent->ondisk->count; ++i) {
                        elm = &parent->ondisk->elms[i];
@@ -349,9 +358,6 @@ hammer_load_cursor_parent(hammer_cursor_t cursor)
 /*
  * Cursor up to our parent node.  Return ENOENT if we are at the root of
  * the filesystem.
- *
- * If doing a nonblocking cursor-up and we are unable to acquire the lock,
- * the cursor remains unchanged.
  */
 int
 hammer_cursor_up(hammer_cursor_t cursor)
@@ -377,10 +383,54 @@ hammer_cursor_up(hammer_cursor_t cursor)
        cursor->parent = NULL;
        cursor->parent_index = 0;
 
-       error = hammer_load_cursor_parent(cursor);
+       error = hammer_load_cursor_parent(cursor, 0);
        return(error);
 }
 
+/*
+ * Special cursor up given a locked cursor.  The orignal node is not
+ * unlocked and released and the cursor is not downgraded.  If we are
+ * unable to acquire and lock the parent, EDEADLK is returned.
+ */
+int
+hammer_cursor_up_locked(hammer_cursor_t cursor)
+{
+       hammer_node_t save;
+       int error;
+
+       /*
+        * If the parent is NULL we are at the root of the B-Tree and
+        * return ENOENT.
+        */
+       if (cursor->parent == NULL)
+               return (ENOENT);
+
+       save = cursor->node;
+
+       /*
+        * Set the node to its parent. 
+        */
+       cursor->node = cursor->parent;
+       cursor->index = cursor->parent_index;
+       cursor->parent = NULL;
+       cursor->parent_index = 0;
+
+       /*
+        * load the new parent, attempt to exclusively lock it.  Note that
+        * we are still holding the old parent (now cursor->node) exclusively
+        * locked.  This can return EDEADLK.
+        */
+       error = hammer_load_cursor_parent(cursor, 1);
+       if (error) {
+               cursor->parent = cursor->node;
+               cursor->parent_index = cursor->index;
+               cursor->node = save;
+               cursor->index = 0;
+       }
+       return(error);
+}
+
+
 /*
  * Cursor down through the current node, which must be an internal node.
  *
@@ -442,6 +492,7 @@ hammer_cursor_down(hammer_cursor_t cursor)
        }
        if (error == 0) {
                hammer_lock_sh(&node->lock);
+               KKASSERT ((node->flags & HAMMER_NODE_DELETED) == 0);
                cursor->node = node;
                cursor->index = 0;
        }
index 488ddb7..31fc2aa 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_flusher.c,v 1.14 2008/05/06 00:21:07 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_flusher.c,v 1.15 2008/05/13 20:46:55 dillon Exp $
  */
 /*
  * HAMMER dependancy flusher thread
@@ -203,6 +203,7 @@ hammer_flusher_flush(hammer_mount_t hmp)
                }
        }
        hammer_flusher_finalize(&trans);
+       hmp->flusher_tid = trans.tid;
        hammer_done_transaction(&trans);
 }
 
index 3fff7b2..03a1f70 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.58 2008/05/13 05:04:39 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.59 2008/05/13 20:46:55 dillon Exp $
  */
 
 #include "hammer.h"
@@ -1562,10 +1562,13 @@ hammer_sync_inode(hammer_inode_t ip)
                        /*
                         * Adjust the inode count in the volume header
                         */
-                       hammer_modify_volume_field(&trans, trans.rootvol,
-                                                  vol0_stat_inodes);
-                       --ip->hmp->rootvol->ondisk->vol0_stat_inodes;
-                       hammer_modify_volume_done(trans.rootvol);
+                       if (ip->flags & HAMMER_INODE_ONDISK) {
+                               hammer_modify_volume_field(&trans,
+                                                          trans.rootvol,
+                                                          vol0_stat_inodes);
+                               --ip->hmp->rootvol->ondisk->vol0_stat_inodes;
+                               hammer_modify_volume_done(trans.rootvol);
+                       }
                } else {
                        ip->flags &= ~HAMMER_INODE_DELETED;
                        Debugger("hammer_ip_delete_range_all errored");
index 9ae0f21..8761efb 100644 (file)
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_ioctl.c,v 1.17 2008/05/12 21:17:18 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_ioctl.c,v 1.18 2008/05/13 20:46:55 dillon Exp $
  */
 
 #include "hammer.h"
 
 static int hammer_ioc_gethistory(hammer_transaction_t trans, hammer_inode_t ip,
                                struct hammer_ioc_history *hist);
+static int hammer_ioc_synctid(hammer_transaction_t trans, hammer_inode_t ip,
+                               struct hammer_ioc_synctid *std);
 
 int
 hammer_ioctl(hammer_inode_t ip, u_long com, caddr_t data, int fflag,
@@ -65,6 +67,10 @@ hammer_ioctl(hammer_inode_t ip, u_long com, caddr_t data, int fflag,
                error = hammer_ioc_reblock(&trans, ip,
                                        (struct hammer_ioc_reblock *)data);
                break;
+       case HAMMERIOC_SYNCTID:
+               error = hammer_ioc_synctid(&trans, ip,
+                                       (struct hammer_ioc_synctid *)data);
+               break;
        default:
                error = EOPNOTSUPP;
                break;
@@ -280,3 +286,41 @@ add_history(hammer_inode_t ip, struct hammer_ioc_history *hist,
        }
 }
 
+/*
+ * Acquire synchronization TID
+ */
+static
+int
+hammer_ioc_synctid(hammer_transaction_t trans, hammer_inode_t ip,
+                  struct hammer_ioc_synctid *std)
+{
+       hammer_mount_t hmp = ip->hmp;
+       int error = 0;
+
+       switch(std->op) {
+       case HAMMER_SYNCTID_NONE:
+               std->tid = hmp->flusher_tid;    /* inaccurate */
+               break;
+       case HAMMER_SYNCTID_ASYNC:
+               hammer_queue_inodes_flusher(hmp, MNT_NOWAIT);
+               std->tid = hmp->flusher_tid;    /* inaccurate */
+               hammer_flusher_async(hmp);
+               break;
+       case HAMMER_SYNCTID_SYNC1:
+               hammer_queue_inodes_flusher(hmp, MNT_WAIT);
+               hammer_flusher_sync(hmp);
+               std->tid = hmp->flusher_tid;
+               break;
+       case HAMMER_SYNCTID_SYNC2:
+               hammer_queue_inodes_flusher(hmp, MNT_WAIT);
+               hammer_flusher_sync(hmp);
+               std->tid = hmp->flusher_tid;
+               hammer_flusher_sync(hmp);
+               break;
+       default:
+               error = EOPNOTSUPP;
+               break;
+       }
+       return(error);
+}
+
index 1ee5b9c..ee34d14 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_ioctl.h,v 1.7 2008/05/12 05:13:11 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_ioctl.h,v 1.8 2008/05/13 20:46:55 dillon Exp $
  */
 /*
  * HAMMER ioctl's.  This file can be #included from userland
@@ -168,8 +168,26 @@ struct hammer_ioc_reblock {
        int32_t         unused03;
 };
 
+/*
+ * HAMMER_IOC_SYNCTID
+ */
+enum hammer_synctid_op {
+       HAMMER_SYNCTID_NONE,    /* no sync (TID will not be accurate) */
+       HAMMER_SYNCTID_ASYNC,   /* async (TID will not be accurate) */
+       HAMMER_SYNCTID_SYNC1,   /* single sync - might undo after crash */
+       HAMMER_SYNCTID_SYNC2    /* double sync - guarantee no undo */
+};
+
+struct hammer_ioc_synctid {
+       struct hammer_ioc_head  head;
+       enum hammer_synctid_op  op;
+       hammer_tid_t            tid;
+};
+
+
 #define HAMMERIOC_PRUNE                _IOWR('h',1,struct hammer_ioc_prune)
 #define HAMMERIOC_GETHISTORY   _IOWR('h',2,struct hammer_ioc_history)
 #define HAMMERIOC_REBLOCK      _IOWR('h',3,struct hammer_ioc_reblock)
+#define HAMMERIOC_SYNCTID      _IOWR('h',4,struct hammer_ioc_synctid)
 
 #endif
index b79902f..fddafcf 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.43 2008/05/13 05:04:39 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.44 2008/05/13 20:46:55 dillon Exp $
  */
 /*
  * Manage HAMMER's on-disk structures.  These routines are primarily
@@ -1368,6 +1368,23 @@ hammer_alloc_data(hammer_transaction_t trans, int32_t data_len,
 static int hammer_sync_scan1(struct mount *mp, struct vnode *vp, void *data);
 static int hammer_sync_scan2(struct mount *mp, struct vnode *vp, void *data);
 
+int
+hammer_queue_inodes_flusher(hammer_mount_t hmp, int waitfor)
+{
+       struct hammer_sync_info info;
+
+       info.error = 0;
+       info.waitfor = waitfor;
+       if (waitfor == MNT_WAIT) {
+               vmntvnodescan(hmp->mp, VMSC_GETVP,
+                             hammer_sync_scan1, hammer_sync_scan2, &info);
+       } else {
+               vmntvnodescan(hmp->mp, VMSC_GETVP|VMSC_NOWAIT,
+                             hammer_sync_scan1, hammer_sync_scan2, &info);
+       }
+       return(info.error);
+}
+
 int
 hammer_sync_hmp(hammer_mount_t hmp, int waitfor)
 {