HAMMER 13/many - Stabilization commit
authorMatthew Dillon <dillon@dragonflybsd.org>
Mon, 31 Dec 2007 05:33:12 +0000 (05:33 +0000)
committerMatthew Dillon <dillon@dragonflybsd.org>
Mon, 31 Dec 2007 05:33:12 +0000 (05:33 +0000)
* Clean up the in-memory record API.

* Add B-Tree boundary assertions and B-Tree debugging code.

* Delay freeing bits in the allocation bitmaps for B-Tree nodes and
  clusters until the last reference to the in-memory structure goes away.
  This avoids premature reallocation.

* Fix a bug in btree_split_leaf() - the cursor was not being properly
  adjusted in a particular boundary case.

* Fix a recursive lock bug on a buffer cache buffer in hammer_io.c

* Do not allow a non-empty directory to be removed.

* Synthesize directory entries for "." and "..".  Adjust the
  hammer_directory_namekey() procedure to reserve key-space for the
  synthesized entries.

* Fix memory leaks related to the hammer_node and hammer_record structures.

* Finish writing the rename code.

12 files changed:
sys/vfs/hammer/hammer.h
sys/vfs/hammer/hammer_btree.c
sys/vfs/hammer/hammer_cursor.c
sys/vfs/hammer/hammer_disk.h
sys/vfs/hammer/hammer_inode.c
sys/vfs/hammer/hammer_io.c
sys/vfs/hammer/hammer_object.c
sys/vfs/hammer/hammer_ondisk.c
sys/vfs/hammer/hammer_spike.c
sys/vfs/hammer/hammer_subs.c
sys/vfs/hammer/hammer_vfsops.c
sys/vfs/hammer/hammer_vnops.c

index 862126f..858bbf3 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.16 2007/12/30 08:49:20 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.17 2007/12/31 05:33:12 dillon Exp $
  */
 /*
  * This header file contains structures used internally by the HAMMERFS
@@ -202,6 +202,7 @@ typedef struct hammer_record *hammer_record_t;
 #define HAMMER_RECF_ONRBTREE           0x0002
 #define HAMMER_RECF_DELETED            0x0004
 #define HAMMER_RECF_EMBEDDED_DATA      0x0008
+#define HAMMER_RECF_SYNCING            0x0010
 
 /*
  * Structures used to internally represent a volume and a cluster
@@ -364,8 +365,13 @@ struct hammer_node {
        hammer_node_ondisk_t    ondisk;         /* ptr to on-disk structure */
        struct hammer_node      **cache1;       /* passive cache(s) */
        struct hammer_node      **cache2;
+       int                     flags;
 };
 
+#define HAMMER_NODE_DELETED    0x0001
+#define HAMMER_NODE_FLUSH      0x0002
+#define HAMMER_NODE_MODIFIED   0x0004
+
 typedef struct hammer_node     *hammer_node_t;
 
 /*
@@ -427,6 +433,17 @@ extern struct hammer_alist_config Clu_master_alist_config;
 extern struct hammer_alist_config Clu_slave_alist_config;
 extern struct bio_ops hammer_bioops;
 
+extern int hammer_debug_btree;
+extern int hammer_count_inodes;
+extern int hammer_count_records;
+extern int hammer_count_record_datas;
+extern int hammer_count_volumes;
+extern int hammer_count_supercls;
+extern int hammer_count_clusters;
+extern int hammer_count_buffers;
+extern int hammer_count_nodes;
+extern int hammer_count_spikes;
+
 int    hammer_vop_inactive(struct vop_inactive_args *);
 int    hammer_vop_reclaim(struct vop_reclaim_args *);
 int    hammer_vfs_vget(struct mount *mp, ino_t ino, struct vnode **vpp);
@@ -450,7 +467,8 @@ int hammer_ip_first(hammer_cursor_t cursor, hammer_inode_t ip);
 int    hammer_ip_next(hammer_cursor_t cursor);
 int    hammer_ip_resolve_data(hammer_cursor_t cursor);
 int    hammer_ip_delete_record(hammer_cursor_t cursor, hammer_tid_t tid);
-
+int    hammer_ip_check_directory_empty(hammer_transaction_t trans,
+                       hammer_inode_t ip);
 int    hammer_sync_hmp(hammer_mount_t hmp, int waitfor);
 int    hammer_sync_volume(hammer_volume_t volume, void *data);
 int    hammer_sync_cluster(hammer_cluster_t cluster, void *data);
@@ -458,8 +476,7 @@ int hammer_sync_buffer(hammer_buffer_t buffer, void *data);
 
 hammer_record_t
        hammer_alloc_mem_record(hammer_inode_t ip);
-void   hammer_rel_mem_record(struct hammer_record **recordp);
-void   hammer_drop_mem_record(hammer_record_t record, int delete);
+void   hammer_rel_mem_record(hammer_record_t record);
 
 int    hammer_cursor_up(hammer_cursor_t cursor, int nonblock);
 int    hammer_cursor_toroot(hammer_cursor_t cursor);
index 322e9dc..e8389b4 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_btree.c,v 1.13 2007/12/30 08:49:20 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_btree.c,v 1.14 2007/12/31 05:33:12 dillon Exp $
  */
 
 /*
@@ -181,6 +181,23 @@ hammer_btree_iterate(hammer_cursor_t cursor)
                        elm = &node->elms[cursor->index];
                        r = hammer_btree_cmp(&cursor->key_end, &elm[0].base);
                        s = hammer_btree_cmp(&cursor->key_beg, &elm[1].base);
+                       if (hammer_debug_btree) {
+                               kprintf("BRACKETL %p:%d %016llx %02x %016llx %d\n",
+                                       cursor->node, cursor->index,
+                                       elm[0].internal.base.obj_id,
+                                       elm[0].internal.base.rec_type,
+                                       elm[0].internal.base.key,
+                                       r
+                               );
+                               kprintf("BRACKETR %p:%d %016llx %02x %016llx %d\n",
+                                       cursor->node, cursor->index + 1,
+                                       elm[1].internal.base.obj_id,
+                                       elm[1].internal.base.rec_type,
+                                       elm[1].internal.base.key,
+                                       s
+                               );
+                       }
+
                        if (r < 0) {
                                error = ENOENT;
                                break;
@@ -202,6 +219,15 @@ hammer_btree_iterate(hammer_cursor_t cursor)
                } else {
                        elm = &node->elms[cursor->index];
                        r = hammer_btree_cmp(&cursor->key_end, &elm->base);
+                       if (hammer_debug_btree) {
+                               kprintf("ELEMENT  %p:%d %016llx %02x %016llx %d\n",
+                                       cursor->node, cursor->index,
+                                       elm[0].leaf.base.obj_id,
+                                       elm[0].leaf.base.rec_type,
+                                       elm[0].leaf.base.key,
+                                       r
+                               );
+                       }
                        if (r < 0) {
                                error = ENOENT;
                                break;
@@ -221,6 +247,16 @@ hammer_btree_iterate(hammer_cursor_t cursor)
                /*
                 * Return entry
                 */
+               if (hammer_debug_btree) {
+                       int i = cursor->index;
+                       hammer_btree_elm_t elm = &cursor->node->ondisk->elms[i];
+                       kprintf("ITERATE  %p:%d %016llx %02x %016llx\n",
+                               cursor->node, i,
+                               elm->internal.base.obj_id,
+                               elm->internal.base.rec_type,
+                               elm->internal.base.key
+                       );
+               }
                return(0);
        }
        return(error);
@@ -427,6 +463,13 @@ hammer_btree_insert(hammer_cursor_t cursor, hammer_btree_elm_t elm)
        ++node->count;
        hammer_modify_node_done(cursor->node);
 
+       KKASSERT(hammer_btree_cmp(cursor->left_bound, &elm->leaf.base) <= 0);
+       KKASSERT(hammer_btree_cmp(cursor->right_bound, &elm->leaf.base) > 0);
+       if (i)
+               KKASSERT(hammer_btree_cmp(&node->elms[i-1].leaf.base, &elm->leaf.base) < 0);
+       if (i != node->count - 1)
+               KKASSERT(hammer_btree_cmp(&node->elms[i+1].leaf.base, &elm->leaf.base) > 0);
+
        /*
         * Adjust the sub-tree count in the parent.  note that the parent
         * may be in a different cluster.
@@ -520,6 +563,7 @@ hammer_btree_delete(hammer_cursor_t cursor)
         * This may reposition the cursor at one of the parent's of the
         * current node.
         */
+       KKASSERT(cursor->index <= ondisk->count);
        if (ondisk->count == 0) {
                error = btree_remove(cursor);
                if (error == EAGAIN)
@@ -565,6 +609,15 @@ btree_search(hammer_cursor_t cursor, int flags)
 
        flags |= cursor->flags;
 
+       if (hammer_debug_btree) {
+               kprintf("SEARCH   %p:%d %016llx %02x %016llx\n",
+                       cursor->node, cursor->index,
+                       cursor->key_beg.obj_id,
+                       cursor->key_beg.rec_type,
+                       cursor->key_beg.key
+               );
+       }
+
        /*
         * Move our cursor up the tree until we find a node whos range covers
         * the key we are trying to locate.  This may move us between
@@ -769,6 +822,16 @@ btree_search(hammer_cursor_t cursor, int flags)
                }
                cursor->index = i;
 
+               if (hammer_debug_btree) {
+                       hammer_btree_elm_t elm = &node->elms[i];
+                       kprintf("SEARCH-I %p:%d %016llx %02x %016llx\n",
+                               cursor->node, i,
+                               elm->internal.base.obj_id,
+                               elm->internal.base.rec_type,
+                               elm->internal.base.key
+                       );
+               }
+
                /*
                 * Handle insertion and deletion requirements.
                 *
@@ -879,10 +942,19 @@ btree_search(hammer_cursor_t cursor, int flags)
                if (r == 0) {
                        cursor->index = i;
                        error = 0;
+                       if (hammer_debug_btree) {
+                               kprintf("SEARCH-L %p:%d (SUCCESS)\n",
+                                       cursor->node, i);
+                       }
                        goto done;
                }
        }
 
+       if (hammer_debug_btree) {
+               kprintf("SEARCH-L %p:%d (FAILED)\n",
+                       cursor->node, i);
+       }
+
        /*
         * No exact match was found, i is now at the insertion point.
         *
@@ -1027,7 +1099,7 @@ btree_split_internal(hammer_cursor_t cursor)
        if (new_node == NULL) {
                if (made_root) {
                        hammer_unlock(&parent->lock);
-                       hammer_free_btree(parent->cluster, parent->node_offset);
+                       parent->flags |= HAMMER_NODE_DELETED;
                        hammer_rel_node(parent);
                }
                return(error);
@@ -1143,6 +1215,10 @@ btree_split_internal(hammer_cursor_t cursor)
        parent_elm = &parent->ondisk->elms[cursor->parent_index];
        cursor->left_bound = &parent_elm[0].internal.base;
        cursor->right_bound = &parent_elm[1].internal.base;
+       KKASSERT(hammer_btree_cmp(cursor->left_bound,
+                &cursor->node->ondisk->elms[0].internal.base) <= 0);
+       KKASSERT(hammer_btree_cmp(cursor->right_bound,
+                &cursor->node->ondisk->elms[cursor->node->ondisk->count-1].internal.base) > 0);
 
        return (0);
 }
@@ -1160,6 +1236,7 @@ btree_split_leaf(hammer_cursor_t cursor)
        hammer_node_t new_leaf;
        hammer_btree_elm_t elm;
        hammer_btree_elm_t parent_elm;
+       hammer_base_elm_t mid_boundary;
        int parent_index;
        int made_root;
        int split;
@@ -1223,7 +1300,7 @@ btree_split_leaf(hammer_cursor_t cursor)
        if (new_leaf == NULL) {
                if (made_root) {
                        hammer_unlock(&parent->lock);
-                       hammer_free_btree(parent->cluster, parent->node_offset);
+                       parent->flags |= HAMMER_NODE_DELETED;
                        hammer_rel_node(parent);
                }
                return(error);
@@ -1273,6 +1350,7 @@ btree_split_leaf(hammer_cursor_t cursor)
        parent_elm->internal.subtree_type = new_leaf->ondisk->type;
        parent_elm->internal.subtree_vol_no = 0;
        parent_elm->internal.rec_offset = 0;
+       mid_boundary = &parent_elm->base;
        ++ondisk->count;
        hammer_modify_node_done(parent);
 
@@ -1298,11 +1376,14 @@ btree_split_leaf(hammer_cursor_t cursor)
         * index was pointing at.  If we are >= the split point the push node
         * is now in the new node.
         *
-        * NOTE: If we are at the split point itself we cannot stay with the
-        * original node because the push index will point at the right-hand
-        * boundary, which is illegal.
+        * NOTE: If we are at the split point itself we need to select the
+        * old or new node based on where key_beg's insertion point will be.
+        * If we pick the wrong side the inserted element will wind up in
+        * the wrong leaf node and outside that node's bounds.
         */
-       if (cursor->index >= split) {
+       if (cursor->index > split ||
+           (cursor->index == split &&
+            hammer_btree_cmp(&cursor->key_beg, mid_boundary) >= 0)) {
                cursor->parent_index = parent_index + 1;
                cursor->index -= split;
                hammer_unlock(&cursor->node->lock);
@@ -1320,6 +1401,10 @@ btree_split_leaf(hammer_cursor_t cursor)
        parent_elm = &parent->ondisk->elms[cursor->parent_index];
        cursor->left_bound = &parent_elm[0].internal.base;
        cursor->right_bound = &parent_elm[1].internal.base;
+       KKASSERT(hammer_btree_cmp(cursor->left_bound,
+                &cursor->node->ondisk->elms[0].leaf.base) <= 0);
+       KKASSERT(hammer_btree_cmp(cursor->right_bound,
+                &cursor->node->ondisk->elms[cursor->node->ondisk->count-1].leaf.base) > 0);
 
        return (0);
 }
@@ -1330,9 +1415,12 @@ btree_split_leaf(hammer_cursor_t cursor)
  * other error.
  *
  * On return the cursor may end up pointing at an internal node, suitable
- * for further iteration but not for insertion or deletion.
+ * for further iteration but not for an immediate insertion or deletion.
  *
  * cursor->node may be an internal node or a leaf node.
+ *
+ * NOTE: If cursor->node has one element it is the parent trying to delete
+ * that element, make sure cursor->index is properly adjusted on success.
  */
 int
 btree_remove(hammer_cursor_t cursor)
@@ -1356,6 +1444,7 @@ btree_remove(hammer_cursor_t cursor)
                KKASSERT(ondisk->parent == 0);
                ondisk->type = HAMMER_BTREE_TYPE_LEAF;
                ondisk->count = 0;
+               cursor->index = 0;
                hammer_modify_node_done(cursor->node);
                kprintf("EMPTY ROOT OF ROOT CLUSTER -> LEAF\n");
                return(0);
@@ -1379,10 +1468,8 @@ btree_remove(hammer_cursor_t cursor)
         */
        error = hammer_cursor_up(cursor, 1);
        if (error) {
-               kprintf("BTREE_REMOVE: Cannot lock parent\n");
-               hammer_unlock(&save->lock);
-               hammer_rel_node(save);
-               return(error);
+               kprintf("BTREE_REMOVE: Cannot lock parent, skipping\n");
+               goto failure;
        }
 
        /*
@@ -1403,15 +1490,12 @@ btree_remove(hammer_cursor_t cursor)
        if (node->ondisk->count == 1) {
                error = btree_remove(cursor);
                if (error == 0) {
-                       kprintf("BTREE_REMOVE: Successful!\n");
-                       hammer_flush_node(save);
-                       hammer_free_btree(save->cluster, save->node_offset);
+                       /*kprintf("BTREE_REMOVE: Successful!\n");*/
+                       goto success;
                } else {
                        kprintf("BTREE_REMOVE: Recursion failed %d\n", error);
+                       goto failure;
                }
-               hammer_unlock(&save->lock);
-               hammer_rel_node(save);
-               return(error);
        }
 
        /*
@@ -1438,6 +1522,8 @@ btree_remove(hammer_cursor_t cursor)
 #if 0
        kprintf("BTREE_REMOVE: Removing element %d\n", cursor->index);
 #endif
+       KKASSERT(node->ondisk->type == HAMMER_BTREE_TYPE_INTERNAL);
+       KKASSERT(cursor->index < node->ondisk->count);
        hammer_modify_node(node);
        ondisk = node->ondisk;
        i = cursor->index;
@@ -1464,15 +1550,20 @@ btree_remove(hammer_cursor_t cursor)
                        hammer_modify_node_done(parent);
                }
        }
-               
+
+success:
        /*
-        * Free the saved node.
+        * Free the saved node.  If the saved node was the root of a
+        * cluster, free the entire cluster.
         */
        hammer_flush_node(save);
-       hammer_free_btree(save->cluster, save->node_offset);
+       save->flags |= HAMMER_NODE_DELETED;
+
+       error = 0;
+failure:
        hammer_unlock(&save->lock);
        hammer_rel_node(save);
-       return(0);
+       return(error);
 }
 
 /*
@@ -1536,214 +1627,6 @@ btree_set_parent(hammer_node_t node, hammer_btree_elm_t elm)
 
 #if 0
 
-/*
- * This routine is called on the internal node (node) prior to recursing down
- * through (node, index) when the node referenced by (node, index) MIGHT
- * have too few elements for the caller to perform a deletion.
- *
- * cursor->index is invalid on return because the separators may have gotten
- * adjusted, the caller must rescan the node's elements.  The caller may set
- * cursor->index to -1 if it wants us to do a general rebalancing.
- *
- * This routine rebalances the children of the (node), collapsing children
- * together if possible.  On return each child will have at least L/2-1
- * elements unless the node only has one child.
- * 
- * NOTE: Because we do not update the parent's parent in the split code,
- * the subtree_count used by the caller may be incorrect.  We correct it
- * here.  Also note that we cannot change the depth of the tree's leaf
- * nodes here (see btree_collapse()).
- *
- * NOTE: We make no attempt to rebalance inter-cluster elements.
- */
-static
-int
-btree_rebalance(hammer_cursor_t cursor)
-{
-       hammer_node_ondisk_t ondisk;
-       hammer_node_t node;
-       hammer_node_t children[HAMMER_BTREE_INT_ELMS];
-       hammer_node_t child;
-       hammer_btree_elm_t elm;
-       hammer_btree_elm_t elms;
-       int i, j, n, nelms, goal;
-       int maxelms, halfelms;
-       int error;
-
-       /*
-        * If the elm being recursed through is an inter-cluster reference,
-        * don't worry about it.
-        */
-       ondisk = cursor->node->ondisk;
-       elm = &ondisk->elms[cursor->index];
-       if (elm->internal.subtree_type == HAMMER_BTREE_TYPE_CLUSTER)
-               return(0);
-
-       KKASSERT(elm->internal.subtree_offset != 0);
-       error = 0;
-
-       /*
-        * Load the children of node and do any necessary corrections
-        * to subtree_count.  subtree_count may be too low due to the
-        * way insertions split nodes.  Get a count of the total number
-        * of actual elements held by our children.
-        */
-       error = 0;
-
-       for (i = n = 0; i < node->base.count; ++i) {
-               struct hammer_btree_internal_elm *elm;
-
-               elm = &node->elms[i];
-               children[i] = NULL;
-               child_buffer[i] = NULL; /* must be preinitialized for bread */
-               if (elm->subtree_offset == 0)
-                       continue;
-               child = hammer_bread(cursor->cluster, elm->subtree_offset,
-                                    HAMMER_FSBUF_BTREE, &error,
-                                    &child_buffer[i], XXX);
-               children[i] = child;
-               if (child == NULL)
-                       continue;
-               XXX
-               KKASSERT(node->base.subtype == child->base.type);
-
-               /*
-                * Accumulate n for a good child, update the node's count
-                * if it was wrong.
-                */
-               if (node->elms[i].subtree_count != child->base.count) {
-                       node->elms[i].subtree_count = child->base.count;
-               }
-               n += node->elms[i].subtree_count;
-       }
-       if (error)
-               goto failed;
-
-       /*
-        * Collect all the children's elements together
-        */
-       nelms = n;
-       elms = kmalloc(sizeof(*elms) * (nelms + 1), M_HAMMER, M_WAITOK|M_ZERO);
-       for (i = n = 0; i < node->base.count; ++i) {
-               child = children[i];
-               for (j = 0; j < child->base.count; ++j) {
-                       elms[n].owner = child;
-                       if (node->base.subtype == HAMMER_BTREE_TYPE_LEAF)
-                               elms[n].u.leaf = child->leaf.elms[j];
-                       else
-                               elms[n].u.internal = child->internal.elms[j];
-                       ++n;
-               }
-       }
-       KKASSERT(n == nelms);
-
-       /*
-        * Store a boundary in the elms array to ease the code below.  This
-        * is only used if the children are internal nodes.
-        */
-       elms[n].u.internal = node->elms[i];
-
-       /*
-        * Calculate the number of elements each child should have (goal) by
-        * reducing the number of elements until we achieve at least
-        * halfelms - 1 per child, unless we are a degenerate case.
-        */
-       maxelms = btree_max_elements(node->base.subtype);
-       halfelms = maxelms / 2;
-
-       goal = halfelms - 1;
-       while (i && n / i < goal)
-               --i;
-
-       /*
-        * Now rebalance using the specified goal
-        */
-       for (i = n = 0; i < node->base.count; ++i) {
-               struct hammer_buffer *subchild_buffer = NULL;
-               struct hammer_btree_internal_node *subchild;
-
-               child = children[i];
-               for (j = 0; j < goal && n < nelms; ++j) {
-                       if (node->base.subtype == HAMMER_BTREE_TYPE_LEAF) {
-                               child->leaf.elms[j] = elms[n].u.leaf;
-                       } else {
-                               child->internal.elms[j] = elms[n].u.internal;
-                       }
-
-                       /*
-                        * If the element's parent has changed we have to
-                        * update the parent pointer.  This is somewhat
-                        * expensive.
-                        */
-                       if (elms[n].owner != child &&
-                           node->base.subtype == HAMMER_BTREE_TYPE_INTERNAL) {
-                               subchild = hammer_bread(cursor->cluster,
-                                                       elms[n].u.internal.subtree_offset,
-                                                       HAMMER_FSBUF_BTREE,
-                                                       &error,
-                                                       &subchild_buffer, XXX);
-                               if (subchild) {
-                                       hammer_modify_buffer(subchild_buffer);
-                                       subchild->base.parent =
-                                           hammer_bclu_offset(child_buffer[i],
-                                                               child);
-                                       hammer_modify_buffer_done(subchild_buffer);
-                               }
-                               /* XXX error */
-                       }
-                       ++n;
-               }
-               /* 
-                * Set right boundary if the children are internal nodes.
-                */
-               if (node->base.subtype == HAMMER_BTREE_TYPE_INTERNAL)
-                       child->internal.elms[j] = elms[n].u.internal;
-               child->base.count = j;
-               hammer_modify_buffer(child_buffer[i]);
-               if (subchild_buffer)
-                       hammer_put_buffer(subchild_buffer, 0);
-
-               /*
-                * If we have run out of elements, break out
-                */
-               if (n == nelms)
-                       break;
-       }
-
-       /*
-        * Physically destroy any left-over children.  These children's
-        * elements have been packed into prior children.  The node's
-        * right hand boundary and count gets shifted to index i.
-        *
-        * The subtree count in the node's parent MUST be updated because
-        * we are removing elements.  The subtree_count field is allowed to
-        * be too small, but not too large!
-        */
-       if (i != node->base.count) {
-               n = i;
-               node->elms[n] = node->elms[node->base.count];
-               while (i < node->base.count) {
-                       hammer_free_btree_ptr(child_buffer[i], children[i]);
-                       hammer_put_buffer(child_buffer[i], 0);
-                       ++i;
-               }
-               node->base.count = n;
-               if (cursor->parent) {
-                       cursor->parent->elms[cursor->parent_index].subtree_count = n;
-                       hammer_modify_buffer(cursor->parent_buffer);
-               }
-       }
-
-       kfree(elms, M_HAMMER);
-failed:
-       hammer_modify_buffer(cursor->node_buffer);
-       for (i = 0; i < node->base.count; ++i) {
-               if (child_buffer[i])
-                       hammer_put_buffer(child_buffer[i], 0);
-       }
-       return (error);
-}
-
 /*
  * This routine is only called if the cursor is at the root node and the
  * root node is an internal node.  We attempt to collapse the root node
index ad574d3..6c6f786 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_cursor.c,v 1.8 2007/12/30 08:49:20 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_cursor.c,v 1.9 2007/12/31 05:33:12 dillon Exp $
  */
 
 /*
@@ -346,9 +346,9 @@ hammer_load_cursor_parent_cluster(hammer_cursor_t cursor)
        cursor->right_bound = &elm[1].internal.base;
 
        KKASSERT(hammer_btree_cmp(cursor->left_bound,
-                &ccluster->ondisk->clu_btree_beg) == 0);
+                &ccluster->ondisk->clu_btree_beg) <= 0);
        KKASSERT(hammer_btree_cmp(cursor->right_bound,
-                &ccluster->ondisk->clu_btree_end) == 0);
+                &ccluster->ondisk->clu_btree_end) >= 0);
 
        if (hammer_lock_ex_try(&parent->lock) != 0) {
                hammer_unlock(&cursor->node->lock);
index 0952cd6..c950469 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_disk.h,v 1.13 2007/12/30 00:47:22 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_disk.h,v 1.14 2007/12/31 05:33:12 dillon Exp $
  */
 
 #ifndef _SYS_UUID_H_
@@ -447,6 +447,9 @@ struct hammer_base_record {
  * not set in the left or right boundary elements around the inter-cluster
  * reference of an internal node in the B-Tree (because doing so would
  * interfere with the boundary tests).
+ *
+ * NOTE: hammer_ip_delete_range_all() deletes all record types greater
+ * then HAMMER_RECTYPE_INODE.
  */
 #define HAMMER_RECTYPE_UNKNOWN         0
 #define HAMMER_RECTYPE_LOWEST          1       /* lowest record type avail */
index 676a220..ae41173 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.14 2007/12/30 08:49:20 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.15 2007/12/31 05:33:12 dillon Exp $
  */
 
 #include "hammer.h"
@@ -203,6 +203,7 @@ loop:
        }
 
        ip = kmalloc(sizeof(*ip), M_HAMMER, M_WAITOK|M_ZERO);
+       ++hammer_count_inodes;
        ip->obj_id = obj_id;
        ip->obj_asof = iinfo.obj_asof;
        ip->hmp = hmp;
@@ -236,26 +237,32 @@ loop:
        } else if (cursor.node) {
                hammer_cache_node(cursor.node, &ip->cache);
        }
-       hammer_done_cursor(&cursor);
 
        /*
         * On success load the inode's record and data and insert the
         * inode into the B-Tree.  It is possible to race another lookup
         * insertion of the same inode so deal with that condition too.
+        *
+        * The cursor's locked node interlocks against others creating and
+        * destroying ip while we were blocked.
         */
        if (*errorp == 0) {
                hammer_ref(&ip->lock);
                if (RB_INSERT(hammer_ino_rb_tree, &hmp->rb_inos_root, ip)) {
                        hammer_uncache_node(&ip->cache);
                        hammer_unref(&ip->lock);
+                       --hammer_count_inodes;
                        kfree(ip, M_HAMMER);
+                       hammer_done_cursor(&cursor);
                        goto loop;
                }
                ip->flags |= HAMMER_INODE_ONDISK;
        } else {
+               --hammer_count_inodes;
                kfree(ip, M_HAMMER);
                ip = NULL;
        }
+       hammer_done_cursor(&cursor);
        return (ip);
 }
 
@@ -277,6 +284,7 @@ hammer_create_inode(hammer_transaction_t trans, struct vattr *vap,
 
        hmp = trans->hmp;
        ip = kmalloc(sizeof(*ip), M_HAMMER, M_WAITOK|M_ZERO);
+       ++hammer_count_inodes;
        ip->obj_id = hammer_alloc_tid(trans);
        KKASSERT(ip->obj_id != 0);
        ip->obj_asof = hmp->asof;
@@ -403,7 +411,8 @@ retry:
                record->rec.inode.base.data_len = sizeof(ip->ino_data);
                record->data = (void *)&ip->ino_data;
                error = hammer_ip_sync_record(record, &spike);
-               hammer_drop_mem_record(record, 1);
+               record->flags |= HAMMER_RECF_DELETED;
+               hammer_rel_mem_record(record);
                if (error == ENOSPC) {
                        error = hammer_spike(&spike);
                        if (error == 0)
@@ -457,10 +466,11 @@ hammer_unload_inode(struct hammer_inode *ip, void *data __unused)
        error = hammer_sync_inode(ip, MNT_WAIT, 1);
        if (error)
                kprintf("hammer_sync_inode failed error %d\n", error);
-
+       KKASSERT(RB_EMPTY(&ip->rec_tree));
        RB_REMOVE(hammer_ino_rb_tree, &ip->hmp->rb_inos_root, ip);
 
        hammer_uncache_node(&ip->cache);
+       --hammer_count_inodes;
        kfree(ip, M_HAMMER);
        return(0);
 }
@@ -499,25 +509,17 @@ hammer_sync_inode_callback(hammer_record_t rec, void *data)
        int error;
 
        hammer_ref(&rec->lock);
-       hammer_lock_ex(&rec->lock);
-       if ((rec->flags & HAMMER_RECF_DELETED) == 0)
-               error = hammer_ip_sync_record(rec, spike);
-       else
-               error = 0;
-
-       if (error == ENOSPC) {
-               hammer_drop_mem_record(rec, 0);
-               return(-error);
-       }
+       error = hammer_ip_sync_record(rec, spike);
+       hammer_rel_mem_record(rec);
 
        if (error) {
-               kprintf("hammer_sync_inode_callback: sync failed rec %p, error %d\n",
-                       rec, error);
-               hammer_drop_mem_record(rec, 0);
-               return(-error);
+               error = -error;
+               if (error != -ENOSPC) {
+                       kprintf("hammer_sync_inode_callback: sync failed rec "
+                               "%p, error %d\n", rec, error);
+               }
        }
-       hammer_drop_mem_record(rec, 1); /* ref & lock eaten by call */
-       return(0);
+       return(error);
 }
 
 /*
@@ -608,8 +610,8 @@ hammer_sync_inode(hammer_inode_t ip, int waitfor, int handle_delete)
                while (RB_ROOT(&ip->rec_tree)) {
                        hammer_record_t rec = RB_ROOT(&ip->rec_tree);
                        hammer_ref(&rec->lock);
-                       hammer_lock_ex(&rec->lock);
-                       hammer_drop_mem_record(rec, 1);
+                       rec->flags |= HAMMER_RECF_DELETED;
+                       hammer_rel_mem_record(rec);
                }
                break;
        case HAMMER_INODE_ONDISK:
index af40b27..b09a8d5 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_io.c,v 1.9 2007/12/30 08:49:20 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_io.c,v 1.10 2007/12/31 05:33:12 dillon Exp $
  */
 /*
  * IO Primitives and buffer cache management
@@ -61,9 +61,11 @@ hammer_io_disassociate(union hammer_io_structure *io)
 {
        struct buf *bp = io->io.bp;
 
+       KKASSERT(io->io.released && io->io.modified == 0);
        LIST_INIT(&bp->b_dep);  /* clear the association */
        bp->b_ops = NULL;
        io->io.bp = NULL;
+       bp->b_flags &= ~B_LOCKED;
 
        switch(io->io.type) {
        case HAMMER_STRUCTURE_VOLUME:
@@ -86,8 +88,6 @@ hammer_io_disassociate(union hammer_io_structure *io)
                io->buffer.alist.meta = NULL;
                break;
        }
-       io->io.modified = 0;
-       io->io.released = 1;
 }
 
 /*
@@ -187,16 +187,18 @@ hammer_io_notify_cluster(hammer_cluster_t cluster)
                        kprintf("MARK CLUSTER OPEN\n");
                        cluster->ondisk->clu_flags |= HAMMER_CLUF_OPEN;
                        cluster->state = HAMMER_CLUSTER_ASYNC;
+                       cluster->io.modified = 1;
                        bawrite(io->bp);
-                       /* leave cluster marked as modified */
                }
                hammer_unlock(&cluster->io.lock);
        }
 }
 
 /*
- * This routine is called on the last reference to a hammer structure.  If
- * flush is non-zero we have to completely disassociate the bp from the
+ * This routine is called on the last reference to a hammer structure.
+ * Regardless of the state io->modified must be cleared when we return.
+ *
+ * If flush is non-zero we have to completely disassociate the bp from the
  * structure (which may involve blocking).  Otherwise we can leave the bp
  * passively associated with the structure.
  *
@@ -208,6 +210,7 @@ hammer_io_release(struct hammer_io *io, int flush)
        union hammer_io_structure *iou = (void *)io;
        hammer_cluster_t cluster;
        struct buf *bp;
+       int modified;
 
        if ((bp = io->bp) != NULL) {
                /*
@@ -222,17 +225,24 @@ hammer_io_release(struct hammer_io *io, int flush)
                        if ((bp->b_flags & B_DELWRI) == 0 && io->modified) {
                                if (io->released)
                                        regetblk(bp);
+                               else
+                                       io->released = 1;
+                               io->modified = 0;
                                bdwrite(bp);
-                               io->released = 1;
                        } else if (io->released == 0) {
-                               bqrelse(bp);
+                               /* buffer write state already synchronized */
+                               io->modified = 0;
                                io->released = 1;
+                               bqrelse(bp);
+                       } else {
+                               /* buffer write state already synchronized */
+                               io->modified = 0;
                        }
                        return;
                }
 
                /*
-                * We've been asked to flush the buffer.
+                * Either we want to flush the buffer or the kernel tried.
                 *
                 * If this is a hammer_buffer we may have to wait for the
                 * cluster header write to complete.
@@ -251,31 +261,30 @@ hammer_io_release(struct hammer_io *io, int flush)
                        hammer_close_cluster(&iou->cluster);
                }
 
-
                /*
-                * Ok the dependancies are all gone.  Check for the simple
-                * disassociation case.
+                * Gain ownership of the buffer.  Nothing can take it away
+                * from the io structure while we have it locked, so we
+                * can safely reget.
+                *
+                * Once our thread owns the buffer we can disassociate it
+                * from the io structure.
                 */
-               if (io->released && (bp->b_flags & B_LOCKED) == 0 &&
-                   (io->modified == 0 || (bp->b_flags & B_DELWRI))) {
-                       hammer_io_disassociate(iou);
-                       return;
-               }
+               if (io->released)
+                       regetblk(bp);
+               else
+                       io->released = 1;
+               modified = io->modified;
+               io->modified = 0;
+               hammer_io_disassociate(iou);
 
                /*
-                * Handle the more complex disassociation case.  Acquire the
-                * buffer, clean up B_LOCKED, and deal with the modified
-                * flag.
+                * Now dispose of the buffer.  Someone tried to flush, so
+                * issue the I/O immediately.
                 */
-               if (io->released)
-                       regetblk(bp);
-               io->released = 1;
-               bp->b_flags &= ~B_LOCKED;
-               if (io->modified || (bp->b_flags & B_DELWRI))
+               if (modified || (bp->b_flags & B_DELWRI))
                        bawrite(bp);
                else
                        bqrelse(bp);
-               hammer_io_disassociate(iou);
        }
 }
 
@@ -318,8 +327,9 @@ again:
                if (io->modified == 0)
                        goto done;
                regetblk(bp);
+       } else {
+               io->released = 1;
        }
-       io->released = 1;
 
        /*
         * Return the bp to the system, issuing I/O if necessary.  The
@@ -446,10 +456,17 @@ hammer_io_deallocate(struct buf *bp)
         * Buffers can have active references from cached hammer_node's,
         * even if those nodes are themselves passively cached.  Attempt
         * to clean them out.  This may not succeed.
+        *
+        * We have to do some magic with io.released because
+        * hammer_io_intend_modify() can be called indirectly from the
+        * flush code, otherwise we might panic with a recursive bp lock.
         */
        if (io->io.type == HAMMER_STRUCTURE_BUFFER &&
            hammer_lock_ex_try(&io->io.lock) == 0) {
+               io->io.released = 0;
                hammer_flush_buffer_nodes(&io->buffer);
+               KKASSERT(io->io.released == 0);
+               io->io.released = 1;
                hammer_unlock(&io->io.lock);
        }
 
@@ -461,10 +478,8 @@ hammer_io_deallocate(struct buf *bp)
                 * modifications should have already been synchronized with
                 * the buffer.
                 */
-               KKASSERT(io->io.released);
+               KKASSERT(io->io.modified == 0);
                hammer_io_disassociate(io);
-               bp->b_flags &= ~B_LOCKED;
-               KKASSERT (io->io.modified == 0 || (bp->b_flags & B_DELWRI));
 
                /*
                 * Perform final rights on the structure.  This can cause
@@ -495,7 +510,6 @@ hammer_io_deallocate(struct buf *bp)
                bp->b_flags |= B_LOCKED;
                hammer_unref(&io->io.lock);
        }
-
        crit_exit();
 }
 
@@ -562,10 +576,13 @@ hammer_io_checkwrite(struct buf *bp)
                /*
                 * We're good, but before we can let the kernel proceed we
                 * may have to make some adjustments.
+                *
+                * Since there are no refs on the io structure, HAMMER must
+                * have already synchronized its modify state with the bp
+                * so iou->io.modified should be 0.
                 */
                if (iou->io.type == HAMMER_STRUCTURE_CLUSTER)
                        hammer_close_cluster(&iou->cluster);
-               KKASSERT(iou->io.released);
                hammer_io_disassociate(iou);
                return(0);
        }
index ac89adc..350c7cf 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_object.c,v 1.12 2007/12/30 08:49:20 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_object.c,v 1.13 2007/12/31 05:33:12 dillon Exp $
  */
 
 #include "hammer.h"
@@ -40,7 +40,6 @@ static int hammer_mem_add(hammer_transaction_t trans,
                             hammer_record_t record);
 static int hammer_mem_lookup(hammer_cursor_t cursor, hammer_inode_t ip);
 static int hammer_mem_first(hammer_cursor_t cursor, hammer_inode_t ip);
-static void hammer_free_mem_record(hammer_record_t record);
 
 /*
  * Red-black tree support.
@@ -135,77 +134,46 @@ RB_GENERATE_XLOOKUP(hammer_rec_rb_tree, INFO, hammer_record, rb_node,
 
 /*
  * Allocate a record for the caller to finish filling in.  The record is
- * returned referenced and locked.
+ * returned referenced.
  */
 hammer_record_t
 hammer_alloc_mem_record(hammer_inode_t ip)
 {
        hammer_record_t record;
 
+       ++hammer_count_records;
        record = kmalloc(sizeof(*record), M_HAMMER, M_WAITOK|M_ZERO);
        record->ip = ip;
        hammer_ref(&record->lock);
-       hammer_lock_ex(&record->lock);
        return (record);
 }
 
 /*
- * Release a memory record.  If the record was marked for defered deletion,
- * and no references remain, the record is physically destroyed.
+ * Release a memory record.  Records marked for deletion are immediately
+ * removed from the RB-Tree but otherwise left intact until the last ref
+ * goes away.
  */
 void
-hammer_rel_mem_record(struct hammer_record **recordp)
+hammer_rel_mem_record(struct hammer_record *record)
 {
-       hammer_record_t rec;
-
-       if ((rec = *recordp) != NULL) {
-               hammer_unref(&rec->lock);
-               if (rec->lock.refs == 0) {
-                       if (rec->flags & HAMMER_RECF_DELETED)
-                               hammer_free_mem_record(rec);
+       hammer_unref(&record->lock);
+       if (record->flags & HAMMER_RECF_DELETED) {
+               if (record->flags & HAMMER_RECF_ONRBTREE) {
+                       RB_REMOVE(hammer_rec_rb_tree, &record->ip->rec_tree,
+                                 record);
+                       record->flags &= ~HAMMER_RECF_ONRBTREE;
+               }
+               if (record->lock.refs == 0) {
+                       if (record->flags & HAMMER_RECF_ALLOCDATA) {
+                               --hammer_count_record_datas;
+                               kfree(record->data, M_HAMMER);
+                               record->flags &= ~HAMMER_RECF_ALLOCDATA;
+                       }
+                       record->data = NULL;
+                       --hammer_count_records;
+                       kfree(record, M_HAMMER);
                }
-               *recordp = NULL;
-       }
-}
-
-/*
- * Drop a locked hammer in-memory record.  This function unlocks and
- * dereferences the record.  If delete != 0 the record is marked for
- * deletion.  Physical deletion only occurs when the last reference goes
- * away.
- */
-void
-hammer_drop_mem_record(hammer_record_t rec, int delete)
-{
-       if (delete)
-               rec->flags |= HAMMER_RECF_DELETED;
-       hammer_unlock(&rec->lock);
-       hammer_rel_mem_record(&rec);
-}
-
-/*
- * Free a record.  Clean the structure up even though we are throwing it
- * away as a sanity check.  The actual free operation is delayed while
- * the record is referenced.  However, the record is removed from the RB
- * tree immediately.
- */
-static void
-hammer_free_mem_record(hammer_record_t record)
-{
-       if (record->flags & HAMMER_RECF_ONRBTREE) {
-               RB_REMOVE(hammer_rec_rb_tree, &record->ip->rec_tree, record);
-               record->flags &= ~HAMMER_RECF_ONRBTREE;
-       }
-       if (record->lock.refs) {
-               record->flags |= HAMMER_RECF_DELETED;
-               return;
-       }
-       if (record->flags & HAMMER_RECF_ALLOCDATA) {
-               kfree(record->data, M_HAMMER);
-               record->flags &= ~HAMMER_RECF_ALLOCDATA;
        }
-       record->data = NULL;
-       kfree(record, M_HAMMER);
 }
 
 /*
@@ -221,8 +189,10 @@ hammer_mem_lookup(hammer_cursor_t cursor, hammer_inode_t ip)
 {
        int error;
 
-       if (cursor->iprec)
-               hammer_rel_mem_record(&cursor->iprec);
+       if (cursor->iprec) {
+               hammer_rel_mem_record(cursor->iprec);
+               cursor->iprec = NULL;
+       }
        if (cursor->ip) {
                hammer_rec_rb_tree_scan_info_done(&cursor->scan,
                                                  &cursor->ip->rec_tree);
@@ -282,14 +252,17 @@ static
 int
 hammer_mem_first(hammer_cursor_t cursor, hammer_inode_t ip)
 {
-       if (cursor->iprec)
-               hammer_rel_mem_record(&cursor->iprec);
+       if (cursor->iprec) {
+               hammer_rel_mem_record(cursor->iprec);
+               cursor->iprec = NULL;
+       }
        if (cursor->ip) {
                hammer_rec_rb_tree_scan_info_done(&cursor->scan,
                                                  &cursor->ip->rec_tree);
        }
        cursor->ip = ip;
        hammer_rec_rb_tree_scan_info_link(&cursor->scan, &ip->rec_tree);
+
        cursor->scan.node = NULL;
        hammer_rec_rb_tree_RB_SCAN(&ip->rec_tree, hammer_rec_scan_cmp,
                                   hammer_rec_scan_callback, cursor);
@@ -313,8 +286,10 @@ hammer_mem_done(hammer_cursor_t cursor)
                                                  &cursor->ip->rec_tree);
                cursor->ip = NULL;
        }
-        if (cursor->iprec)
-               hammer_rel_mem_record(&cursor->iprec);
+        if (cursor->iprec) {
+               hammer_rel_mem_record(cursor->iprec);
+               cursor->iprec = NULL;
+       }
 }
 
 /************************************************************************
@@ -360,6 +335,7 @@ hammer_ip_add_directory(struct hammer_transaction *trans,
                record->data = (void *)record->rec.entry.den_name;
                record->flags |= HAMMER_RECF_EMBEDDED_DATA;
        } else {
+               ++hammer_count_record_datas;
                record->data = kmalloc(bytes, M_HAMMER, M_WAITOK);
                record->flags |= HAMMER_RECF_ALLOCDATA;
        }
@@ -430,6 +406,7 @@ hammer_ip_add_record(struct hammer_transaction *trans, hammer_record_t record)
        if (record->data) {
                if ((char *)record->data < (char *)&record->rec ||
                    (char *)record->data >= (char *)(&record->rec + 1)) {
+                       ++hammer_count_record_datas;
                        data = kmalloc(bytes, M_HAMMER, M_WAITOK);
                        record->flags |= HAMMER_RECF_ALLOCDATA;
                        bcopy(record->data, data, bytes);
@@ -546,6 +523,7 @@ hammer_ip_sync_record(hammer_record_t record, struct hammer_cursor **spike)
 {
        struct hammer_cursor cursor;
        hammer_record_ondisk_t rec;
+       hammer_mount_t hmp;
        union hammer_btree_elm elm;
        void *bdata;
        int error;
@@ -558,21 +536,50 @@ hammer_ip_sync_record(hammer_record_t record, struct hammer_cursor **spike)
 
        /*
         * Issue a lookup to position the cursor and locate the cluster.  The
-        * target key should not exist.
+        * target key should not exist.  If we are creating a directory entry
+        * we may have to iterate the low 32 bits of the key to find an unused
+        * key.
         *
         * If we run out of space trying to adjust the B-Tree for the
         * insert, re-lookup without the insert flag so the cursor
         * is properly positioned for the spike.
         */
+again:
        error = hammer_btree_lookup(&cursor);
        if (error == 0) {
+               if (record->rec.base.base.rec_type == HAMMER_RECTYPE_DIRENTRY) {
+                       hmp = cursor.node->cluster->volume->hmp;
+                       if (++hmp->namekey_iterator == 0)
+                               ++hmp->namekey_iterator;
+                       record->rec.base.base.key &= ~(0xFFFFFFFFLL);
+                       record->rec.base.base.key |= hmp->namekey_iterator;
+                       goto again;
+               }
                kprintf("hammer_ip_sync_record: duplicate rec at (%016llx)\n",
                        record->rec.base.base.key);
+               Debugger("duplicate record1");
                error = EIO;
        }
        if (error != ENOENT)
                goto done;
 
+       /*
+        * Mark the record as undergoing synchronization.  Our cursor is
+        * holding a locked B-Tree node for the insertion which interlocks
+        * anyone trying to access this record.
+        *
+        * XXX There is still a race present related to iterations.  An
+        * iteration may process the record, a sync may occur, and then
+        * later process the B-Tree element for the same record.
+        *
+        * We do not try to synchronize a deleted record.
+        */
+       if (record->flags & (HAMMER_RECF_DELETED | HAMMER_RECF_SYNCING)) {
+               error = 0;
+               goto done;
+       }
+       record->flags |= HAMMER_RECF_SYNCING;
+
        /*
         * Allocate record and data space now that we know which cluster
         * the B-Tree node ended up in.
@@ -585,7 +592,7 @@ hammer_ip_sync_record(hammer_record_t record, struct hammer_cursor **spike)
                                          record->rec.base.data_len, &error,
                                          &cursor.data_buffer);
                if (bdata == NULL)
-                       goto done;
+                       goto fail2;
        }
        rec = hammer_alloc_record(cursor.node->cluster, &error,
                                  &cursor.record_buffer);
@@ -632,8 +639,15 @@ hammer_ip_sync_record(hammer_record_t record, struct hammer_cursor **spike)
        elm.leaf.data_crc = rec->base.data_crc;
 
        error = hammer_btree_insert(&cursor, &elm);
-       if (error == 0)
+
+       /*
+        * Clean up on success, or fall through on error.
+        */
+       if (error == 0) {
+               record->flags |= HAMMER_RECF_DELETED;
+               record->flags &= ~HAMMER_RECF_SYNCING;
                goto done;
+       }
 
        hammer_free_record_ptr(cursor.record_buffer, rec);
 fail1:
@@ -641,6 +655,8 @@ fail1:
                hammer_free_data_ptr(cursor.data_buffer, bdata,
                                     record->rec.base.data_len);
        }
+fail2:
+       record->flags &= ~HAMMER_RECF_SYNCING;
 done:
        /*
         * If ENOSPC in cluster fill in the spike structure and return
@@ -684,6 +700,7 @@ hammer_write_record(hammer_cursor_t cursor, hammer_record_ondisk_t orec,
        if (error == 0) {
                kprintf("hammer_ip_sync_record: duplicate rec at (%016llx)\n",
                        orec->base.base.key);
+               Debugger("duplicate record2");
                error = EIO;
        }
        if (error != ENOENT)
@@ -767,8 +784,8 @@ done:
  * A unique 64 bit key is generated in-memory and may be regenerated a
  * second time when the directory record is flushed to the on-disk B-Tree.
  *
- * A locked and referenced record is passed to this function.  This function
- * eats the lock and reference.
+ * A referenced record is passed to this function.  This function
+ * eats the reference.  If an error occurs the record will be deleted.
  */
 static
 int
@@ -776,7 +793,8 @@ hammer_mem_add(struct hammer_transaction *trans, hammer_record_t record)
 {
        while (RB_INSERT(hammer_rec_rb_tree, &record->ip->rec_tree, record)) {
                if (record->rec.base.base.rec_type != HAMMER_RECTYPE_DIRENTRY){
-                       hammer_drop_mem_record(record, 1);
+                       record->flags |= HAMMER_RECF_DELETED;
+                       hammer_rel_mem_record(record);
                        return (EEXIST);
                }
                if (++trans->hmp->namekey_iterator == 0)
@@ -785,7 +803,7 @@ hammer_mem_add(struct hammer_transaction *trans, hammer_record_t record)
                record->rec.base.base.key |= trans->hmp->namekey_iterator;
        }
        record->flags |= HAMMER_RECF_ONRBTREE;
-       hammer_drop_mem_record(record, 0);
+       hammer_rel_mem_record(record);
        return(0);
 }
 
@@ -850,8 +868,10 @@ hammer_ip_first(hammer_cursor_t cursor, struct hammer_inode *ip)
        cursor->flags &= ~HAMMER_CURSOR_DELBTREE;
        cursor->flags |= HAMMER_CURSOR_ATEDISK | HAMMER_CURSOR_ATEMEM;
        cursor->flags |= HAMMER_CURSOR_DISKEOF | HAMMER_CURSOR_MEMEOF;
-       if (cursor->iprec)
-               hammer_rel_mem_record(&cursor->iprec);
+       if (cursor->iprec) {
+               hammer_rel_mem_record(cursor->iprec);
+               cursor->iprec = NULL;
+       }
 
        /*
         * Search the on-disk B-Tree.  hammer_btree_lookup() only does an
@@ -927,6 +947,7 @@ hammer_ip_next(hammer_cursor_t cursor)
        if (cursor->flags & (HAMMER_CURSOR_ATEDISK|HAMMER_CURSOR_DELBTREE)) {
                if ((cursor->flags & HAMMER_CURSOR_DISKEOF) == 0) {
                        error = hammer_btree_iterate(cursor);
+                       cursor->flags &= ~HAMMER_CURSOR_DELBTREE;
                        if (error == 0)
                                cursor->flags &= ~HAMMER_CURSOR_ATEDISK;
                        else
@@ -946,7 +967,10 @@ hammer_ip_next(hammer_cursor_t cursor)
         */
        if (cursor->flags & HAMMER_CURSOR_ATEMEM) {
                if ((cursor->flags & HAMMER_CURSOR_MEMEOF) == 0) {
-                       hammer_rel_mem_record(&cursor->iprec);
+                       if (cursor->iprec) {
+                               hammer_rel_mem_record(cursor->iprec);
+                               cursor->iprec = NULL;
+                       }
                        rec = cursor->scan.node;        /* next node */
                        while (rec) {
                                if (hammer_rec_scan_cmp(rec, cursor) != 0)
@@ -956,8 +980,8 @@ hammer_ip_next(hammer_cursor_t cursor)
                                rec = hammer_rec_rb_tree_RB_NEXT(rec);
                        }
                        if (cursor->iprec) {
+                               KKASSERT(cursor->iprec == rec);
                                cursor->flags &= ~HAMMER_CURSOR_ATEMEM;
-                               hammer_ref(&cursor->iprec->lock);
                                cursor->scan.node =
                                        hammer_rec_rb_tree_RB_NEXT(rec);
                        } else {
@@ -1161,6 +1185,10 @@ hammer_ip_delete_range(hammer_transaction_t trans, hammer_inode_t ip,
        return(error);
 }
 
+/*
+ * Delete all records associated with an inode except the inode record
+ * itself.
+ */
 int
 hammer_ip_delete_range_all(hammer_transaction_t trans, hammer_inode_t ip)
 {
@@ -1175,7 +1203,7 @@ hammer_ip_delete_range_all(hammer_transaction_t trans, hammer_inode_t ip)
        cursor.key_beg.create_tid = ip->obj_asof;
        cursor.key_beg.delete_tid = 0;
        cursor.key_beg.obj_type = 0;
-       cursor.key_beg.rec_type = 0;
+       cursor.key_beg.rec_type = HAMMER_RECTYPE_INODE + 1;
        cursor.key_beg.key = HAMMER_MIN_KEY;
 
        cursor.key_end = cursor.key_beg;
@@ -1226,9 +1254,8 @@ hammer_ip_delete_record(hammer_cursor_t cursor, hammer_tid_t tid)
        /*
         * In-memory (unsynchronized) records can simply be freed.
         */
-       cursor->flags &= ~HAMMER_CURSOR_DELBTREE;
        if (cursor->record == &cursor->iprec->rec) {
-               hammer_free_mem_record(cursor->iprec); /* XXX */
+               cursor->iprec->flags |= HAMMER_RECF_DELETED;
                return(0);
        }
 
@@ -1288,10 +1315,44 @@ hammer_ip_delete_record(hammer_cursor_t cursor, hammer_tid_t tid)
                }
                hammer_rel_cluster(cluster, 0);
                if (error) {
-                       kprintf("hammer_ip_delete_record: unable to physically delete the record!\n");
+                       panic("hammer_ip_delete_record: unable to physically delete the record!\n");
                        error = 0;
                }
        }
        return(error);
 }
 
+/*
+ * Determine whether a directory is empty or not.  Returns 0 if the directory
+ * is empty, ENOTEMPTY if it isn't, plus other possible errors.
+ */
+int
+hammer_ip_check_directory_empty(hammer_transaction_t trans, hammer_inode_t ip)
+{
+       struct hammer_cursor cursor;
+       int error;
+
+       hammer_init_cursor_ip(&cursor, ip);
+
+       cursor.key_beg.obj_id = ip->obj_id;
+       cursor.key_beg.create_tid = ip->obj_asof;
+       cursor.key_beg.delete_tid = 0;
+       cursor.key_beg.obj_type = 0;
+       cursor.key_beg.rec_type = HAMMER_RECTYPE_INODE + 1;
+       cursor.key_beg.key = HAMMER_MIN_KEY;
+
+       cursor.key_end = cursor.key_beg;
+       cursor.key_end.rec_type = 0xFFFF;
+       cursor.key_end.key = HAMMER_MAX_KEY;
+
+       cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE;
+
+       error = hammer_ip_first(&cursor, ip);
+       if (error == ENOENT)
+               error = 0;
+       else if (error == 0)
+               error = ENOTEMPTY;
+       hammer_done_cursor(&cursor);
+       return(error);
+}
+
index 2428005..d7cc4cf 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.13 2007/12/30 08:49:20 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.14 2007/12/31 05:33:12 dillon Exp $
  */
 /*
  * Manage HAMMER's on-disk structures.  These routines are primarily
@@ -205,6 +205,7 @@ hammer_install_volume(struct hammer_mount *hmp, const char *volname)
        /*
         * Allocate a volume structure
         */
+       ++hammer_count_volumes;
        volume = kmalloc(sizeof(*volume), M_HAMMER, M_WAITOK|M_ZERO);
        volume->vol_name = kstrdup(volname, M_HAMMER);
        volume->hmp = hmp;
@@ -382,6 +383,7 @@ hammer_free_volume(hammer_volume_t volume)
                vrele(volume->devvp);
                volume->devvp = NULL;
        }
+       --hammer_count_volumes;
        kfree(volume, M_HAMMER);
 }
 
@@ -544,6 +546,7 @@ hammer_get_supercl(hammer_volume_t volume, int32_t scl_no,
 again:
        supercl = RB_LOOKUP(hammer_scl_rb_tree, &volume->rb_scls_root, scl_no);
        if (supercl == NULL) {
+               ++hammer_count_supercls;
                supercl = kmalloc(sizeof(*supercl), M_HAMMER, M_WAITOK|M_ZERO);
                supercl->scl_no = scl_no;
                supercl->volume = volume;
@@ -557,6 +560,7 @@ again:
                 */
                if (RB_INSERT(hammer_scl_rb_tree, &volume->rb_scls_root, supercl)) {
                        hammer_unref(&supercl->io.lock);
+                       --hammer_count_supercls;
                        kfree(supercl, M_HAMMER);
                        goto again;
                }
@@ -661,6 +665,7 @@ hammer_rel_supercl(hammer_supercl_t supercl, int flush)
                                RB_REMOVE(hammer_scl_rb_tree,
                                          &volume->rb_scls_root, supercl);
                                supercl->volume = NULL; /* sanity */
+                               --hammer_count_supercls;
                                kfree(supercl, M_HAMMER);
                                hammer_rel_volume(volume, 0);
                                return;
@@ -685,6 +690,7 @@ hammer_get_cluster(hammer_volume_t volume, int32_t clu_no,
 again:
        cluster = RB_LOOKUP(hammer_clu_rb_tree, &volume->rb_clus_root, clu_no);
        if (cluster == NULL) {
+               ++hammer_count_clusters;
                cluster = kmalloc(sizeof(*cluster), M_HAMMER, M_WAITOK|M_ZERO);
                cluster->clu_no = clu_no;
                cluster->volume = volume;
@@ -701,6 +707,7 @@ again:
                 */
                if (RB_INSERT(hammer_clu_rb_tree, &volume->rb_clus_root, cluster)) {
                        hammer_unref(&cluster->io.lock);
+                       --hammer_count_clusters;
                        kfree(cluster, M_HAMMER);
                        goto again;
                }
@@ -962,6 +969,7 @@ hammer_rel_cluster(hammer_cluster_t cluster, int flush)
                                RB_REMOVE(hammer_clu_rb_tree,
                                          &volume->rb_clus_root, cluster);
                                cluster->volume = NULL; /* sanity */
+                               --hammer_count_clusters;
                                kfree(cluster, M_HAMMER);
                                hammer_rel_volume(volume, 0);
                                return;
@@ -1001,7 +1009,8 @@ hammer_get_buffer(hammer_cluster_t cluster, int32_t buf_no,
 again:
        buffer = RB_LOOKUP(hammer_buf_rb_tree, &cluster->rb_bufs_root, buf_no);
        if (buffer == NULL) {
-               buffer = kmalloc(sizeof(*cluster), M_HAMMER, M_WAITOK|M_ZERO);
+               ++hammer_count_buffers;
+               buffer = kmalloc(sizeof(*buffer), M_HAMMER, M_WAITOK|M_ZERO);
                buffer->buf_no = buf_no;
                buffer->cluster = cluster;
                buffer->volume = cluster->volume;
@@ -1017,6 +1026,7 @@ again:
                 */
                if (RB_INSERT(hammer_buf_rb_tree, &cluster->rb_bufs_root, buffer)) {
                        hammer_unref(&buffer->io.lock);
+                       --hammer_count_buffers;
                        kfree(buffer, M_HAMMER);
                        goto again;
                }
@@ -1161,6 +1171,7 @@ hammer_rel_buffer(hammer_buffer_t buffer, int flush)
                                RB_REMOVE(hammer_buf_rb_tree,
                                          &cluster->rb_bufs_root, buffer);
                                buffer->cluster = NULL; /* sanity */
+                               --hammer_count_buffers;
                                kfree(buffer, M_HAMMER);
                                hammer_rel_cluster(cluster, 0);
                                return;
@@ -1184,8 +1195,11 @@ hammer_flush_buffer_nodes(hammer_buffer_t buffer)
        node = TAILQ_FIRST(&buffer->clist);
        while (node) {
                buffer->save_scan = TAILQ_NEXT(node, entry);
-               if (node->lock.refs == 0)
-                       hammer_flush_node(node);
+               if (node->lock.refs == 0) {
+                       hammer_ref(&node->lock);
+                       node->flags |= HAMMER_NODE_FLUSH;
+                       hammer_rel_node(node);
+               }
                node = buffer->save_scan;
        }
 }
@@ -1230,11 +1244,13 @@ again:
        node = RB_LOOKUP(hammer_nod_rb_tree, &cluster->rb_nods_root,
                         node_offset);
        if (node == NULL) {
+               ++hammer_count_nodes;
                node = kmalloc(sizeof(*node), M_HAMMER, M_WAITOK|M_ZERO);
                node->node_offset = node_offset;
                node->cluster = cluster;
                if (RB_INSERT(hammer_nod_rb_tree, &cluster->rb_nods_root,
                              node)) {
+                       --hammer_count_nodes;
                        kfree(node, M_HAMMER);
                        goto again;
                }
@@ -1320,18 +1336,36 @@ hammer_rel_node(hammer_node_t node)
 
        if (hammer_islastref(&node->lock)) {
                cluster = node->cluster;
+
                /*
-                * Clutter control, this case only occurs after a failed
-                * load since otherwise ondisk will be non-NULL.
+                * Destroy the node if it is being deleted.  Free the node
+                * in the bitmap after we have unhooked it.
                 */
-               if (node->cache1 == NULL && node->cache2 == NULL && 
-                   node->ondisk == NULL) {
+               if (node->flags & (HAMMER_NODE_DELETED|HAMMER_NODE_FLUSH)) {
+                       hammer_flush_node(node);
                        RB_REMOVE(hammer_nod_rb_tree, &cluster->rb_nods_root,
                                  node);
+                       hammer_ref_cluster(cluster);
                        if ((buffer = node->buffer) != NULL) {
                                node->buffer = NULL;
                                hammer_remove_node_clist(buffer, node);
+                               if (node->ondisk) {
+                                       node->ondisk = NULL;
+                                       hammer_rel_buffer(buffer, 0);
+                               }
                        }
+                       if (node->flags & HAMMER_NODE_DELETED) {
+                               hammer_free_btree(node->cluster,
+                                                 node->node_offset);
+                               if (node->node_offset ==
+                                   cluster->ondisk->clu_btree_root) {
+                                       kprintf("FREE CLUSTER %d\n", cluster->clu_no);
+                                       hammer_free_cluster(cluster);
+                                       /*hammer_io_undirty(&cluster->io);*/
+                               }
+                       }
+                       hammer_rel_cluster(cluster, 0);
+                       --hammer_count_nodes;
                        kfree(node, M_HAMMER);
                        return;
                }
@@ -1350,15 +1384,33 @@ hammer_rel_node(hammer_node_t node)
                 * remain intact.
                 */
                if (node->ondisk && hammer_io_checkflush(&node->buffer->io)) {
+                       hammer_flush_node(node);
                        buffer = node->buffer;
                        node->buffer = NULL;
                        node->ondisk = NULL;
                        hammer_remove_node_clist(buffer, node);
-                       hammer_unref(&node->lock);
                        hammer_rel_buffer(buffer, 0);
-               } else {
-                       hammer_unref(&node->lock);
                }
+
+               /*
+                * Clutter control, this case only occurs after a failed
+                * load since otherwise ondisk will be non-NULL.
+                */
+               if (node->cache1 == NULL && node->cache2 == NULL && 
+                   node->ondisk == NULL) {
+                       RB_REMOVE(hammer_nod_rb_tree, &cluster->rb_nods_root,
+                                 node);
+                       if ((buffer = node->buffer) != NULL) {
+                               node->buffer = NULL; /* sanity */
+                               node->ondisk = NULL; /* sanity */
+                               hammer_remove_node_clist(buffer, node);
+                       }
+                       --hammer_count_nodes;
+                       kfree(node, M_HAMMER);
+                       return;
+               }
+
+               hammer_unref(&node->lock);
        } else {
                hammer_unref(&node->lock);
        }
@@ -1374,6 +1426,19 @@ hammer_rel_node(hammer_node_t node)
 void
 hammer_cache_node(hammer_node_t node, struct hammer_node **cache)
 {
+       hammer_node_t old;
+
+       /*
+        * If the node is being deleted, don't cache it!
+        */
+       if (node->flags & HAMMER_NODE_DELETED)
+               return;
+
+       /*
+        * Cache the node.  If we previously cached a different node we
+        * have to give HAMMER a chance to destroy it.
+        */
+again:
        if (node->cache1 != cache) {
                if (node->cache2 == cache) {
                        struct hammer_node **tmp;
@@ -1381,6 +1446,11 @@ hammer_cache_node(hammer_node_t node, struct hammer_node **cache)
                        node->cache1 = node->cache2;
                        node->cache2 = tmp;
                } else {
+                       if ((old = *cache) != NULL) {
+                               *cache = NULL;
+                               hammer_flush_node(old); /* can block */
+                               goto again;
+                       }
                        if (node->cache2)
                                *node->cache2 = NULL;
                        node->cache2 = node->cache1;
@@ -1405,23 +1475,14 @@ hammer_uncache_node(struct hammer_node **cache)
                } else {
                        panic("hammer_uncache_node: missing cache linkage");
                }
-               if (node->cache1 == NULL && node->cache2 == NULL &&
-                   node->lock.refs == 0) {
+               if (node->cache1 == NULL && node->cache2 == NULL)
                        hammer_flush_node(node);
-               }
        }
 }
 
 /*
  * Remove a node's cache references and destroy the node if it has no
- * references.  This is typically called from the buffer handling code.
- *
- * The node may have an active buffer reference (ondisk != NULL) even
- * if the node itself has no references.
- *
- * Note that a caller iterating through nodes via a buffer must have its
- * own reference on the buffer or our hammer_rel_buffer() call below may
- * rip it out from under the caller.
+ * other references or backing store.
  */
 void
 hammer_flush_node(hammer_node_t node)
@@ -1432,20 +1493,16 @@ hammer_flush_node(hammer_node_t node)
                *node->cache1 = NULL;
        if (node->cache2)
                *node->cache2 = NULL;
-       if (node->lock.refs == 0) {
+       if (node->lock.refs == 0 && node->ondisk == NULL) {
                RB_REMOVE(hammer_nod_rb_tree, &node->cluster->rb_nods_root,
                          node);
                if ((buffer = node->buffer) != NULL) {
                        node->buffer = NULL;
                        hammer_remove_node_clist(buffer, node);
-                       if (node->ondisk) {
-                               node->ondisk = NULL;
-                               hammer_rel_buffer(buffer, 0);
-                       }
+                       /* buffer is unreferenced because ondisk is NULL */
                }
+               --hammer_count_nodes;
                kfree(node, M_HAMMER);
-       } else {
-               kprintf("Cannot flush node: %p\n", node);
        }
 }
 
index 7b00a34..29f85cd 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/Attic/hammer_spike.c,v 1.3 2007/12/30 08:49:20 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/Attic/hammer_spike.c,v 1.4 2007/12/31 05:33:12 dillon Exp $
  */
 
 #include "hammer.h"
@@ -48,6 +48,7 @@ hammer_load_spike(hammer_cursor_t cursor, struct hammer_cursor **spikep)
        KKASSERT(cursor->node->ondisk->type == HAMMER_BTREE_TYPE_LEAF);
        KKASSERT(*spikep == NULL);
        *spikep = spike = kmalloc(sizeof(*spike), M_HAMMER, M_WAITOK|M_ZERO);
+       ++hammer_count_spikes;
 
        spike->parent = cursor->parent;
        spike->parent_index = cursor->parent_index;
@@ -164,6 +165,7 @@ hammer_spike(struct hammer_cursor **spikep)
                elm->internal.subtree_vol_no = ncluster->volume->vol_no;
                elm->internal.subtree_count = onode->ondisk->count; /*XXX*/
                hammer_modify_node_done(spike->parent);
+               onode->flags |= HAMMER_NODE_MODIFIED;
                hammer_flush_node(onode);
        }
        {
@@ -199,7 +201,7 @@ hammer_spike(struct hammer_cursor **spikep)
                        }
                }
        }
-       hammer_free_btree(ocluster, onode->node_offset);
+       onode->flags |= HAMMER_NODE_DELETED;
 
        /*
         * XXX I/O dependancy - new cluster must be flushed before current
@@ -223,6 +225,7 @@ failed3:
        kprintf("UNLOAD SPIKE %p %d\n", spike, error);
        hammer_unlock(&ocluster->io.lock);
        hammer_done_cursor(spike);
+       --hammer_count_spikes;
        kfree(spike, M_HAMMER);
        *spikep = NULL;
        return (error);
index 167d78e..587f8c8 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_subs.c,v 1.8 2007/12/30 08:49:20 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_subs.c,v 1.9 2007/12/31 05:33:12 dillon Exp $
  */
 /*
  * HAMMER structural locking
@@ -271,6 +271,9 @@ hammer_get_obj_type(enum vtype vtype)
  *
  * We strip bit 63 in order to provide a positive key, this way a seek
  * offset of 0 will represent the base of the directory.
+ *
+ * This function can never return 0.  We use the MSB-0 space to synthesize
+ * artificial directory entries such as "." and "..".
  */
 int64_t
 hammer_directory_namekey(void *name, int len)
@@ -278,6 +281,8 @@ hammer_directory_namekey(void *name, int len)
        int64_t key;
 
        key = (int64_t)(crc32(name, len) & 0x7FFFFFFF) << 32;
+       if (key == 0)
+               key |= 0x100000000LL;
        return(key);
 }
 
index d11036f..dadc3a2 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_vfsops.c,v 1.11 2007/12/30 00:47:22 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_vfsops.c,v 1.12 2007/12/31 05:33:12 dillon Exp $
  */
 
 #include <sys/param.h>
 #include <sys/malloc.h>
 #include <sys/nlookup.h>
 #include <sys/fcntl.h>
+#include <sys/sysctl.h>
 #include <sys/buf.h>
 #include <sys/buf2.h>
 #include "hammer.h"
 
+int hammer_debug_btree;
+int hammer_count_inodes;
+int hammer_count_records;
+int hammer_count_record_datas;
+int hammer_count_volumes;
+int hammer_count_supercls;
+int hammer_count_clusters;
+int hammer_count_buffers;
+int hammer_count_nodes;
+int hammer_count_spikes;
+
+SYSCTL_NODE(_vfs, OID_AUTO, hammer, CTLFLAG_RW, 0, "HAMMER filesystem");
+SYSCTL_INT(_vfs_hammer, OID_AUTO, debug_btree, CTLFLAG_RW,
+          &hammer_debug_btree, 0, "");
+SYSCTL_INT(_vfs_hammer, OID_AUTO, count_inodes, CTLFLAG_RD,
+          &hammer_count_inodes, 0, "");
+SYSCTL_INT(_vfs_hammer, OID_AUTO, count_records, CTLFLAG_RD,
+          &hammer_count_records, 0, "");
+SYSCTL_INT(_vfs_hammer, OID_AUTO, count_record_datas, CTLFLAG_RD,
+          &hammer_count_record_datas, 0, "");
+SYSCTL_INT(_vfs_hammer, OID_AUTO, count_volumes, CTLFLAG_RD,
+          &hammer_count_volumes, 0, "");
+SYSCTL_INT(_vfs_hammer, OID_AUTO, count_supercls, CTLFLAG_RD,
+          &hammer_count_supercls, 0, "");
+SYSCTL_INT(_vfs_hammer, OID_AUTO, count_clusters, CTLFLAG_RD,
+          &hammer_count_clusters, 0, "");
+SYSCTL_INT(_vfs_hammer, OID_AUTO, count_buffers, CTLFLAG_RD,
+          &hammer_count_buffers, 0, "");
+SYSCTL_INT(_vfs_hammer, OID_AUTO, count_nodes, CTLFLAG_RD,
+          &hammer_count_nodes, 0, "");
+SYSCTL_INT(_vfs_hammer, OID_AUTO, count_spikes, CTLFLAG_RD,
+          &hammer_count_spikes, 0, "");
+
 /*
  * VFS ABI
  */
index 0341f86..fc2a054 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.12 2007/12/30 08:49:20 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.13 2007/12/31 05:33:12 dillon Exp $
  */
 
 #include <sys/param.h>
@@ -43,6 +43,7 @@
 #include <sys/lockf.h>
 #include <sys/event.h>
 #include <sys/stat.h>
+#include <sys/dirent.h>
 #include <vm/vm_extern.h>
 #include <vfs/fifofs/fifo.h>
 #include "hammer.h"
@@ -870,36 +871,70 @@ hammer_vop_readdir(struct vop_readdir_args *ap)
 
        ip = VTOI(ap->a_vp);
        uio = ap->a_uio;
-       hammer_init_cursor_ip(&cursor, ip);
+       saveoff = uio->uio_offset;
+
+       if (ap->a_ncookies) {
+               ncookies = uio->uio_resid / 16 + 1;
+               if (ncookies > 1024)
+                       ncookies = 1024;
+               cookies = kmalloc(ncookies * sizeof(off_t), M_TEMP, M_WAITOK);
+               cookie_index = 0;
+       } else {
+               ncookies = -1;
+               cookies = NULL;
+               cookie_index = 0;
+       }
+
+       /*
+        * Handle artificial entries
+        */
+       error = 0;
+       if (saveoff == 0) {
+               r = vop_write_dirent(&error, uio, ip->obj_id, DT_DIR, 1, ".");
+               if (r)
+                       goto done;
+               if (cookies)
+                       cookies[cookie_index] = saveoff;
+               ++saveoff;
+               ++cookie_index;
+               if (cookie_index == ncookies)
+                       goto done;
+       }
+       if (saveoff == 1) {
+               if (ip->ino_data.parent_obj_id) {
+                       r = vop_write_dirent(&error, uio,
+                                            ip->ino_data.parent_obj_id,
+                                            DT_DIR, 2, "..");
+               } else {
+                       r = vop_write_dirent(&error, uio,
+                                            ip->obj_id, DT_DIR, 2, "..");
+               }
+               if (r)
+                       goto done;
+               if (cookies)
+                       cookies[cookie_index] = saveoff;
+               ++saveoff;
+               ++cookie_index;
+               if (cookie_index == ncookies)
+                       goto done;
+       }
 
        /*
         * Key range (begin and end inclusive) to scan.  Directory keys
         * directly translate to a 64 bit 'seek' position.
         */
+       hammer_init_cursor_ip(&cursor, ip);
        cursor.key_beg.obj_id = ip->obj_id;
        cursor.key_beg.create_tid = ip->obj_asof;
        cursor.key_beg.delete_tid = 0;
         cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY;
        cursor.key_beg.obj_type = 0;
-       cursor.key_beg.key = uio->uio_offset;
+       cursor.key_beg.key = saveoff;
 
        cursor.key_end = cursor.key_beg;
        cursor.key_end.key = HAMMER_MAX_KEY;
        cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE;
 
-       if (ap->a_ncookies) {
-               ncookies = uio->uio_resid / 16 + 1;
-               if (ncookies > 1024)
-                       ncookies = 1024;
-               cookies = kmalloc(ncookies * sizeof(off_t), M_TEMP, M_WAITOK);
-               cookie_index = 0;
-       } else {
-               ncookies = -1;
-               cookies = NULL;
-               cookie_index = 0;
-       }
-
-       saveoff = cursor.key_beg.key;
        error = hammer_ip_first(&cursor, ip);
 
        while (error == 0) {
@@ -930,10 +965,13 @@ hammer_vop_readdir(struct vop_readdir_args *ap)
        }
        hammer_done_cursor(&cursor);
 
+done:
        if (ap->a_eofflag)
                *ap->a_eofflag = (error == ENOENT);
        uio->uio_offset = saveoff;
        if (error && cookie_index == 0) {
+               if (error == ENOENT)
+                       error = 0;
                if (cookies) {
                        kfree(cookies, M_TEMP);
                        *ap->a_ncookies = 0;
@@ -1020,16 +1058,19 @@ hammer_vop_nrename(struct vop_nrename_args *ap)
        tdip = VTOI(ap->a_tdvp);
        fncp = ap->a_fnch->ncp;
        tncp = ap->a_tnch->ncp;
+       ip = VTOI(fncp->nc_vp);
+       KKASSERT(ip != NULL);
        hammer_start_transaction(&trans, fdip->hmp);
 
        /*
-        * Extract the hammer_inode from fncp and add link to the target
-        * directory.
+        * Remove tncp from the target directory and then link ip as
+        * tncp. XXX pass trans to dounlink
         */
-       ip = VTOI(fncp->nc_vp);
-       KKASSERT(ip != NULL);
-
-       error = hammer_ip_add_directory(&trans, tdip, tncp, ip);
+       error = hammer_dounlink(ap->a_tnch, ap->a_tdvp, ap->a_cred, 0);
+       if (error == 0 || error == ENOENT)
+               error = hammer_ip_add_directory(&trans, tdip, tncp, ip);
+       if (error)
+               goto failed; /* XXX */
 
        /*
         * Locate the record in the originating directory and remove it.
@@ -1079,12 +1120,14 @@ hammer_vop_nrename(struct vop_nrename_args *ap)
        if (error)
                goto done;
        error = hammer_ip_del_directory(&trans, &cursor, fdip, ip);
+
        if (error == 0) {
                cache_rename(ap->a_fnch, ap->a_tnch);
                cache_setvp(ap->a_tnch, ip->vp);
        }
 done:
         hammer_done_cursor(&cursor);
+failed:
        if (error == 0) {
                hammer_commit_transaction(&trans);
        } else {
@@ -1100,8 +1143,6 @@ static
 int
 hammer_vop_nrmdir(struct vop_nrmdir_args *ap)
 {
-       /* XXX check that directory is empty */
-
        return(hammer_dounlink(ap->a_nch, ap->a_dvp, ap->a_cred, 0));
 }
 
@@ -1624,10 +1665,16 @@ hammer_dounlink(struct nchandle *nch, struct vnode *dvp, struct ucred *cred,
 
        /*
         * If all is ok we have to get the inode so we can adjust nlinks.
+        *
+        * If the target is a directory, it must be empty.
         */
        if (error == 0) {
                ip = hammer_get_inode(dip->hmp, rec->entry.obj_id,
                                      dip->hmp->asof, &error);
+               if (error == 0 && ip->ino_rec.base.base.obj_type ==
+                                 HAMMER_OBJTYPE_DIRECTORY) {
+                       error = hammer_ip_check_directory_empty(&trans, ip);
+               }
                if (error == 0)
                        error = hammer_ip_del_directory(&trans, &cursor, dip, ip);
                if (error == 0) {